From 19fcec84d8d7d21e796c7624e521b60d28ee21ed Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 20:45:59 +0200 Subject: Adding upstream version 16.2.11+ds. Signed-off-by: Daniel Baumann --- src/isa-l/erasure_code/Makefile.am | 167 + src/isa-l/erasure_code/aarch64/Makefile.am | 45 + .../erasure_code/aarch64/ec_aarch64_dispatcher.c | 69 + .../aarch64/ec_aarch64_highlevel_func.c | 127 + .../erasure_code/aarch64/ec_multibinary_arm.S | 36 + .../erasure_code/aarch64/gf_2vect_dot_prod_neon.S | 399 ++ src/isa-l/erasure_code/aarch64/gf_2vect_mad_neon.S | 401 ++ .../erasure_code/aarch64/gf_3vect_dot_prod_neon.S | 358 ++ src/isa-l/erasure_code/aarch64/gf_3vect_mad_neon.S | 381 ++ .../erasure_code/aarch64/gf_4vect_dot_prod_neon.S | 421 ++ src/isa-l/erasure_code/aarch64/gf_4vect_mad_neon.S | 455 ++ .../erasure_code/aarch64/gf_5vect_dot_prod_neon.S | 481 ++ src/isa-l/erasure_code/aarch64/gf_5vect_mad_neon.S | 534 ++ src/isa-l/erasure_code/aarch64/gf_6vect_mad_neon.S | 609 ++ .../erasure_code/aarch64/gf_vect_dot_prod_neon.S | 298 + src/isa-l/erasure_code/aarch64/gf_vect_mad_neon.S | 314 + src/isa-l/erasure_code/aarch64/gf_vect_mul_neon.S | 235 + src/isa-l/erasure_code/ec_base.c | 371 ++ src/isa-l/erasure_code/ec_base.h | 6680 ++++++++++++++++++++ src/isa-l/erasure_code/ec_base_aliases.c | 61 + src/isa-l/erasure_code/ec_highlevel_func.c | 374 ++ src/isa-l/erasure_code/ec_multibinary.asm | 95 + src/isa-l/erasure_code/erasure_code_base_perf.c | 176 + src/isa-l/erasure_code/erasure_code_base_test.c | 764 +++ src/isa-l/erasure_code/erasure_code_perf.c | 177 + src/isa-l/erasure_code/erasure_code_test.c | 764 +++ src/isa-l/erasure_code/erasure_code_update_perf.c | 281 + src/isa-l/erasure_code/erasure_code_update_test.c | 959 +++ src/isa-l/erasure_code/gen_rs_matrix_limits.c | 115 + src/isa-l/erasure_code/gf_2vect_dot_prod_avx.asm | 337 + src/isa-l/erasure_code/gf_2vect_dot_prod_avx2.asm | 356 ++ .../erasure_code/gf_2vect_dot_prod_avx512.asm | 245 + src/isa-l/erasure_code/gf_2vect_dot_prod_sse.asm | 339 + .../erasure_code/gf_2vect_dot_prod_sse_test.c | 480 ++ src/isa-l/erasure_code/gf_2vect_mad_avx.asm | 236 + src/isa-l/erasure_code/gf_2vect_mad_avx2.asm | 247 + src/isa-l/erasure_code/gf_2vect_mad_avx512.asm | 230 + src/isa-l/erasure_code/gf_2vect_mad_sse.asm | 239 + src/isa-l/erasure_code/gf_3vect_dot_prod_avx.asm | 377 ++ src/isa-l/erasure_code/gf_3vect_dot_prod_avx2.asm | 397 ++ .../erasure_code/gf_3vect_dot_prod_avx512.asm | 270 + src/isa-l/erasure_code/gf_3vect_dot_prod_sse.asm | 378 ++ .../erasure_code/gf_3vect_dot_prod_sse_test.c | 586 ++ src/isa-l/erasure_code/gf_3vect_mad_avx.asm | 288 + src/isa-l/erasure_code/gf_3vect_mad_avx2.asm | 317 + src/isa-l/erasure_code/gf_3vect_mad_avx512.asm | 247 + src/isa-l/erasure_code/gf_3vect_mad_sse.asm | 298 + src/isa-l/erasure_code/gf_4vect_dot_prod_avx.asm | 441 ++ src/isa-l/erasure_code/gf_4vect_dot_prod_avx2.asm | 460 ++ .../erasure_code/gf_4vect_dot_prod_avx512.asm | 301 + src/isa-l/erasure_code/gf_4vect_dot_prod_sse.asm | 443 ++ .../erasure_code/gf_4vect_dot_prod_sse_test.c | 695 ++ src/isa-l/erasure_code/gf_4vect_mad_avx.asm | 336 + src/isa-l/erasure_code/gf_4vect_mad_avx2.asm | 342 + src/isa-l/erasure_code/gf_4vect_mad_avx512.asm | 267 + src/isa-l/erasure_code/gf_4vect_mad_sse.asm | 342 + src/isa-l/erasure_code/gf_5vect_dot_prod_avx.asm | 303 + src/isa-l/erasure_code/gf_5vect_dot_prod_avx2.asm | 315 + .../erasure_code/gf_5vect_dot_prod_avx512.asm | 335 + src/isa-l/erasure_code/gf_5vect_dot_prod_sse.asm | 304 + .../erasure_code/gf_5vect_dot_prod_sse_test.c | 805 +++ src/isa-l/erasure_code/gf_5vect_mad_avx.asm | 365 ++ src/isa-l/erasure_code/gf_5vect_mad_avx2.asm | 363 ++ src/isa-l/erasure_code/gf_5vect_mad_avx512.asm | 287 + src/isa-l/erasure_code/gf_5vect_mad_sse.asm | 373 ++ src/isa-l/erasure_code/gf_6vect_dot_prod_avx.asm | 315 + src/isa-l/erasure_code/gf_6vect_dot_prod_avx2.asm | 326 + .../erasure_code/gf_6vect_dot_prod_avx512.asm | 354 ++ src/isa-l/erasure_code/gf_6vect_dot_prod_sse.asm | 315 + .../erasure_code/gf_6vect_dot_prod_sse_test.c | 911 +++ src/isa-l/erasure_code/gf_6vect_mad_avx.asm | 394 ++ src/isa-l/erasure_code/gf_6vect_mad_avx2.asm | 400 ++ src/isa-l/erasure_code/gf_6vect_mad_avx512.asm | 321 + src/isa-l/erasure_code/gf_6vect_mad_sse.asm | 406 ++ src/isa-l/erasure_code/gf_inverse_test.c | 225 + src/isa-l/erasure_code/gf_vect_dot_prod_1tbl.c | 152 + src/isa-l/erasure_code/gf_vect_dot_prod_avx.asm | 271 + src/isa-l/erasure_code/gf_vect_dot_prod_avx2.asm | 280 + src/isa-l/erasure_code/gf_vect_dot_prod_avx512.asm | 240 + .../erasure_code/gf_vect_dot_prod_base_test.c | 290 + src/isa-l/erasure_code/gf_vect_dot_prod_perf.c | 174 + src/isa-l/erasure_code/gf_vect_dot_prod_sse.asm | 271 + src/isa-l/erasure_code/gf_vect_dot_prod_test.c | 525 ++ src/isa-l/erasure_code/gf_vect_mad_avx.asm | 196 + src/isa-l/erasure_code/gf_vect_mad_avx2.asm | 203 + src/isa-l/erasure_code/gf_vect_mad_avx512.asm | 193 + src/isa-l/erasure_code/gf_vect_mad_sse.asm | 197 + src/isa-l/erasure_code/gf_vect_mad_test.c | 519 ++ src/isa-l/erasure_code/gf_vect_mul_avx.asm | 164 + src/isa-l/erasure_code/gf_vect_mul_base_test.c | 129 + src/isa-l/erasure_code/gf_vect_mul_perf.c | 90 + src/isa-l/erasure_code/gf_vect_mul_sse.asm | 170 + src/isa-l/erasure_code/gf_vect_mul_test.c | 158 + src/isa-l/erasure_code/ppc64le/Makefile.am | 15 + src/isa-l/erasure_code/ppc64le/ec_base_vsx.c | 97 + src/isa-l/erasure_code/ppc64le/ec_base_vsx.h | 338 + .../erasure_code/ppc64le/gf_2vect_dot_prod_vsx.c | 83 + src/isa-l/erasure_code/ppc64le/gf_2vect_mad_vsx.c | 65 + .../erasure_code/ppc64le/gf_3vect_dot_prod_vsx.c | 104 + src/isa-l/erasure_code/ppc64le/gf_3vect_mad_vsx.c | 84 + .../erasure_code/ppc64le/gf_4vect_dot_prod_vsx.c | 124 + src/isa-l/erasure_code/ppc64le/gf_4vect_mad_vsx.c | 103 + .../erasure_code/ppc64le/gf_5vect_dot_prod_vsx.c | 145 + src/isa-l/erasure_code/ppc64le/gf_5vect_mad_vsx.c | 122 + .../erasure_code/ppc64le/gf_6vect_dot_prod_vsx.c | 166 + src/isa-l/erasure_code/ppc64le/gf_6vect_mad_vsx.c | 142 + .../erasure_code/ppc64le/gf_vect_dot_prod_vsx.c | 85 + src/isa-l/erasure_code/ppc64le/gf_vect_mad_vsx.c | 48 + src/isa-l/erasure_code/ppc64le/gf_vect_mul_vsx.c | 61 + 109 files changed, 39062 insertions(+) create mode 100644 src/isa-l/erasure_code/Makefile.am create mode 100644 src/isa-l/erasure_code/aarch64/Makefile.am create mode 100644 src/isa-l/erasure_code/aarch64/ec_aarch64_dispatcher.c create mode 100644 src/isa-l/erasure_code/aarch64/ec_aarch64_highlevel_func.c create mode 100644 src/isa-l/erasure_code/aarch64/ec_multibinary_arm.S create mode 100644 src/isa-l/erasure_code/aarch64/gf_2vect_dot_prod_neon.S create mode 100644 src/isa-l/erasure_code/aarch64/gf_2vect_mad_neon.S create mode 100644 src/isa-l/erasure_code/aarch64/gf_3vect_dot_prod_neon.S create mode 100644 src/isa-l/erasure_code/aarch64/gf_3vect_mad_neon.S create mode 100644 src/isa-l/erasure_code/aarch64/gf_4vect_dot_prod_neon.S create mode 100644 src/isa-l/erasure_code/aarch64/gf_4vect_mad_neon.S create mode 100644 src/isa-l/erasure_code/aarch64/gf_5vect_dot_prod_neon.S create mode 100644 src/isa-l/erasure_code/aarch64/gf_5vect_mad_neon.S create mode 100644 src/isa-l/erasure_code/aarch64/gf_6vect_mad_neon.S create mode 100644 src/isa-l/erasure_code/aarch64/gf_vect_dot_prod_neon.S create mode 100644 src/isa-l/erasure_code/aarch64/gf_vect_mad_neon.S create mode 100644 src/isa-l/erasure_code/aarch64/gf_vect_mul_neon.S create mode 100644 src/isa-l/erasure_code/ec_base.c create mode 100644 src/isa-l/erasure_code/ec_base.h create mode 100644 src/isa-l/erasure_code/ec_base_aliases.c create mode 100644 src/isa-l/erasure_code/ec_highlevel_func.c create mode 100644 src/isa-l/erasure_code/ec_multibinary.asm create mode 100644 src/isa-l/erasure_code/erasure_code_base_perf.c create mode 100644 src/isa-l/erasure_code/erasure_code_base_test.c create mode 100644 src/isa-l/erasure_code/erasure_code_perf.c create mode 100644 src/isa-l/erasure_code/erasure_code_test.c create mode 100644 src/isa-l/erasure_code/erasure_code_update_perf.c create mode 100644 src/isa-l/erasure_code/erasure_code_update_test.c create mode 100644 src/isa-l/erasure_code/gen_rs_matrix_limits.c create mode 100644 src/isa-l/erasure_code/gf_2vect_dot_prod_avx.asm create mode 100644 src/isa-l/erasure_code/gf_2vect_dot_prod_avx2.asm create mode 100644 src/isa-l/erasure_code/gf_2vect_dot_prod_avx512.asm create mode 100644 src/isa-l/erasure_code/gf_2vect_dot_prod_sse.asm create mode 100644 src/isa-l/erasure_code/gf_2vect_dot_prod_sse_test.c create mode 100644 src/isa-l/erasure_code/gf_2vect_mad_avx.asm create mode 100644 src/isa-l/erasure_code/gf_2vect_mad_avx2.asm create mode 100644 src/isa-l/erasure_code/gf_2vect_mad_avx512.asm create mode 100644 src/isa-l/erasure_code/gf_2vect_mad_sse.asm create mode 100644 src/isa-l/erasure_code/gf_3vect_dot_prod_avx.asm create mode 100644 src/isa-l/erasure_code/gf_3vect_dot_prod_avx2.asm create mode 100644 src/isa-l/erasure_code/gf_3vect_dot_prod_avx512.asm create mode 100644 src/isa-l/erasure_code/gf_3vect_dot_prod_sse.asm create mode 100644 src/isa-l/erasure_code/gf_3vect_dot_prod_sse_test.c create mode 100644 src/isa-l/erasure_code/gf_3vect_mad_avx.asm create mode 100644 src/isa-l/erasure_code/gf_3vect_mad_avx2.asm create mode 100644 src/isa-l/erasure_code/gf_3vect_mad_avx512.asm create mode 100644 src/isa-l/erasure_code/gf_3vect_mad_sse.asm create mode 100644 src/isa-l/erasure_code/gf_4vect_dot_prod_avx.asm create mode 100644 src/isa-l/erasure_code/gf_4vect_dot_prod_avx2.asm create mode 100644 src/isa-l/erasure_code/gf_4vect_dot_prod_avx512.asm create mode 100644 src/isa-l/erasure_code/gf_4vect_dot_prod_sse.asm create mode 100644 src/isa-l/erasure_code/gf_4vect_dot_prod_sse_test.c create mode 100644 src/isa-l/erasure_code/gf_4vect_mad_avx.asm create mode 100644 src/isa-l/erasure_code/gf_4vect_mad_avx2.asm create mode 100644 src/isa-l/erasure_code/gf_4vect_mad_avx512.asm create mode 100644 src/isa-l/erasure_code/gf_4vect_mad_sse.asm create mode 100644 src/isa-l/erasure_code/gf_5vect_dot_prod_avx.asm create mode 100644 src/isa-l/erasure_code/gf_5vect_dot_prod_avx2.asm create mode 100644 src/isa-l/erasure_code/gf_5vect_dot_prod_avx512.asm create mode 100644 src/isa-l/erasure_code/gf_5vect_dot_prod_sse.asm create mode 100644 src/isa-l/erasure_code/gf_5vect_dot_prod_sse_test.c create mode 100644 src/isa-l/erasure_code/gf_5vect_mad_avx.asm create mode 100644 src/isa-l/erasure_code/gf_5vect_mad_avx2.asm create mode 100644 src/isa-l/erasure_code/gf_5vect_mad_avx512.asm create mode 100644 src/isa-l/erasure_code/gf_5vect_mad_sse.asm create mode 100644 src/isa-l/erasure_code/gf_6vect_dot_prod_avx.asm create mode 100644 src/isa-l/erasure_code/gf_6vect_dot_prod_avx2.asm create mode 100644 src/isa-l/erasure_code/gf_6vect_dot_prod_avx512.asm create mode 100644 src/isa-l/erasure_code/gf_6vect_dot_prod_sse.asm create mode 100644 src/isa-l/erasure_code/gf_6vect_dot_prod_sse_test.c create mode 100644 src/isa-l/erasure_code/gf_6vect_mad_avx.asm create mode 100644 src/isa-l/erasure_code/gf_6vect_mad_avx2.asm create mode 100644 src/isa-l/erasure_code/gf_6vect_mad_avx512.asm create mode 100644 src/isa-l/erasure_code/gf_6vect_mad_sse.asm create mode 100644 src/isa-l/erasure_code/gf_inverse_test.c create mode 100644 src/isa-l/erasure_code/gf_vect_dot_prod_1tbl.c create mode 100644 src/isa-l/erasure_code/gf_vect_dot_prod_avx.asm create mode 100644 src/isa-l/erasure_code/gf_vect_dot_prod_avx2.asm create mode 100644 src/isa-l/erasure_code/gf_vect_dot_prod_avx512.asm create mode 100644 src/isa-l/erasure_code/gf_vect_dot_prod_base_test.c create mode 100644 src/isa-l/erasure_code/gf_vect_dot_prod_perf.c create mode 100644 src/isa-l/erasure_code/gf_vect_dot_prod_sse.asm create mode 100644 src/isa-l/erasure_code/gf_vect_dot_prod_test.c create mode 100644 src/isa-l/erasure_code/gf_vect_mad_avx.asm create mode 100644 src/isa-l/erasure_code/gf_vect_mad_avx2.asm create mode 100644 src/isa-l/erasure_code/gf_vect_mad_avx512.asm create mode 100644 src/isa-l/erasure_code/gf_vect_mad_sse.asm create mode 100644 src/isa-l/erasure_code/gf_vect_mad_test.c create mode 100644 src/isa-l/erasure_code/gf_vect_mul_avx.asm create mode 100644 src/isa-l/erasure_code/gf_vect_mul_base_test.c create mode 100644 src/isa-l/erasure_code/gf_vect_mul_perf.c create mode 100644 src/isa-l/erasure_code/gf_vect_mul_sse.asm create mode 100644 src/isa-l/erasure_code/gf_vect_mul_test.c create mode 100644 src/isa-l/erasure_code/ppc64le/Makefile.am create mode 100644 src/isa-l/erasure_code/ppc64le/ec_base_vsx.c create mode 100644 src/isa-l/erasure_code/ppc64le/ec_base_vsx.h create mode 100644 src/isa-l/erasure_code/ppc64le/gf_2vect_dot_prod_vsx.c create mode 100644 src/isa-l/erasure_code/ppc64le/gf_2vect_mad_vsx.c create mode 100644 src/isa-l/erasure_code/ppc64le/gf_3vect_dot_prod_vsx.c create mode 100644 src/isa-l/erasure_code/ppc64le/gf_3vect_mad_vsx.c create mode 100644 src/isa-l/erasure_code/ppc64le/gf_4vect_dot_prod_vsx.c create mode 100644 src/isa-l/erasure_code/ppc64le/gf_4vect_mad_vsx.c create mode 100644 src/isa-l/erasure_code/ppc64le/gf_5vect_dot_prod_vsx.c create mode 100644 src/isa-l/erasure_code/ppc64le/gf_5vect_mad_vsx.c create mode 100644 src/isa-l/erasure_code/ppc64le/gf_6vect_dot_prod_vsx.c create mode 100644 src/isa-l/erasure_code/ppc64le/gf_6vect_mad_vsx.c create mode 100644 src/isa-l/erasure_code/ppc64le/gf_vect_dot_prod_vsx.c create mode 100644 src/isa-l/erasure_code/ppc64le/gf_vect_mad_vsx.c create mode 100644 src/isa-l/erasure_code/ppc64le/gf_vect_mul_vsx.c (limited to 'src/isa-l/erasure_code') diff --git a/src/isa-l/erasure_code/Makefile.am b/src/isa-l/erasure_code/Makefile.am new file mode 100644 index 000000000..12a31852b --- /dev/null +++ b/src/isa-l/erasure_code/Makefile.am @@ -0,0 +1,167 @@ +######################################################################## +# Copyright(c) 2011-2019 Intel Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +include erasure_code/aarch64/Makefile.am + +include erasure_code/ppc64le/Makefile.am + +lsrc += erasure_code/ec_base.c + +lsrc_base_aliases += erasure_code/ec_base_aliases.c +lsrc_x86_64 += \ + erasure_code/ec_highlevel_func.c \ + erasure_code/gf_vect_mul_sse.asm \ + erasure_code/gf_vect_mul_avx.asm \ + erasure_code/gf_vect_dot_prod_sse.asm \ + erasure_code/gf_vect_dot_prod_avx.asm \ + erasure_code/gf_vect_dot_prod_avx2.asm \ + erasure_code/gf_2vect_dot_prod_sse.asm \ + erasure_code/gf_3vect_dot_prod_sse.asm \ + erasure_code/gf_4vect_dot_prod_sse.asm \ + erasure_code/gf_5vect_dot_prod_sse.asm \ + erasure_code/gf_6vect_dot_prod_sse.asm \ + erasure_code/gf_2vect_dot_prod_avx.asm \ + erasure_code/gf_3vect_dot_prod_avx.asm \ + erasure_code/gf_4vect_dot_prod_avx.asm \ + erasure_code/gf_5vect_dot_prod_avx.asm \ + erasure_code/gf_6vect_dot_prod_avx.asm \ + erasure_code/gf_2vect_dot_prod_avx2.asm \ + erasure_code/gf_3vect_dot_prod_avx2.asm \ + erasure_code/gf_4vect_dot_prod_avx2.asm \ + erasure_code/gf_5vect_dot_prod_avx2.asm \ + erasure_code/gf_6vect_dot_prod_avx2.asm \ + erasure_code/gf_vect_mad_sse.asm \ + erasure_code/gf_2vect_mad_sse.asm \ + erasure_code/gf_3vect_mad_sse.asm \ + erasure_code/gf_4vect_mad_sse.asm \ + erasure_code/gf_5vect_mad_sse.asm \ + erasure_code/gf_6vect_mad_sse.asm \ + erasure_code/gf_vect_mad_avx.asm \ + erasure_code/gf_2vect_mad_avx.asm \ + erasure_code/gf_3vect_mad_avx.asm \ + erasure_code/gf_4vect_mad_avx.asm \ + erasure_code/gf_5vect_mad_avx.asm \ + erasure_code/gf_6vect_mad_avx.asm \ + erasure_code/gf_vect_mad_avx2.asm \ + erasure_code/gf_2vect_mad_avx2.asm \ + erasure_code/gf_3vect_mad_avx2.asm \ + erasure_code/gf_4vect_mad_avx2.asm \ + erasure_code/gf_5vect_mad_avx2.asm \ + erasure_code/gf_6vect_mad_avx2.asm \ + erasure_code/ec_multibinary.asm + +#if HAVE_AVX512 +lsrc_x86_64 += \ + erasure_code/gf_vect_dot_prod_avx512.asm \ + erasure_code/gf_2vect_dot_prod_avx512.asm \ + erasure_code/gf_3vect_dot_prod_avx512.asm \ + erasure_code/gf_4vect_dot_prod_avx512.asm \ + erasure_code/gf_5vect_dot_prod_avx512.asm \ + erasure_code/gf_6vect_dot_prod_avx512.asm \ + erasure_code/gf_vect_mad_avx512.asm \ + erasure_code/gf_2vect_mad_avx512.asm \ + erasure_code/gf_3vect_mad_avx512.asm \ + erasure_code/gf_4vect_mad_avx512.asm \ + erasure_code/gf_5vect_mad_avx512.asm \ + erasure_code/gf_6vect_mad_avx512.asm + +lsrc_x86_32 += \ + erasure_code/ec_highlevel_func.c \ + erasure_code/ec_multibinary.asm \ + erasure_code/gf_vect_dot_prod_avx.asm \ + erasure_code/gf_2vect_dot_prod_avx.asm \ + erasure_code/gf_3vect_dot_prod_avx.asm \ + erasure_code/gf_4vect_dot_prod_avx.asm \ + erasure_code/gf_vect_dot_prod_sse.asm \ + erasure_code/gf_2vect_dot_prod_sse.asm \ + erasure_code/gf_3vect_dot_prod_sse.asm \ + erasure_code/gf_4vect_dot_prod_sse.asm \ + erasure_code/gf_vect_dot_prod_avx2.asm \ + erasure_code/gf_2vect_dot_prod_avx2.asm \ + erasure_code/gf_3vect_dot_prod_avx2.asm \ + erasure_code/gf_4vect_dot_prod_avx2.asm + +unit_tests32 += erasure_code/erasure_code_base_test \ + erasure_code/erasure_code_test \ + erasure_code/gf_vect_mul_test \ + erasure_code/gf_vect_mul_base_test \ + erasure_code/gf_vect_dot_prod_base_test \ + erasure_code/gf_vect_dot_prod_test + +perf_tests32 += erasure_code/gf_vect_mul_perf \ + erasure_code/gf_vect_dot_prod_perf \ + erasure_code/erasure_code_perf \ + erasure_code/erasure_code_base_perf \ + erasure_code/gf_vect_dot_prod_1tbl + +src_include += -I $(srcdir)/erasure_code +extern_hdrs += include/erasure_code.h \ + include/gf_vect_mul.h + +other_src += erasure_code/ec_base.h \ + include/multibinary.asm \ + include/reg_sizes.asm + +check_tests += erasure_code/gf_vect_mul_test \ + erasure_code/erasure_code_test \ + erasure_code/gf_inverse_test \ + erasure_code/erasure_code_update_test + +unit_tests += \ + erasure_code/gf_vect_mul_base_test \ + erasure_code/gf_vect_dot_prod_base_test \ + erasure_code/gf_vect_dot_prod_test \ + erasure_code/gf_vect_mad_test \ + erasure_code/erasure_code_base_test + +perf_tests += erasure_code/gf_vect_mul_perf \ + erasure_code/gf_vect_dot_prod_perf \ + erasure_code/gf_vect_dot_prod_1tbl \ + erasure_code/erasure_code_perf \ + erasure_code/erasure_code_base_perf \ + erasure_code/erasure_code_update_perf + +other_tests += erasure_code/gen_rs_matrix_limits + +other_tests_x86_64 += \ + erasure_code/gf_2vect_dot_prod_sse_test \ + erasure_code/gf_3vect_dot_prod_sse_test \ + erasure_code/gf_4vect_dot_prod_sse_test \ + erasure_code/gf_5vect_dot_prod_sse_test \ + erasure_code/gf_6vect_dot_prod_sse_test + +other_tests_x86_32 += \ + erasure_code/gf_2vect_dot_prod_sse_test \ + erasure_code/gf_3vect_dot_prod_sse_test \ + erasure_code/gf_4vect_dot_prod_sse_test \ + erasure_code/gf_5vect_dot_prod_sse_test \ + erasure_code/gf_6vect_dot_prod_sse_test + +other_src += include/test.h \ + include/types.h diff --git a/src/isa-l/erasure_code/aarch64/Makefile.am b/src/isa-l/erasure_code/aarch64/Makefile.am new file mode 100644 index 000000000..94bb5a139 --- /dev/null +++ b/src/isa-l/erasure_code/aarch64/Makefile.am @@ -0,0 +1,45 @@ +################################################################## +# Copyright (c) 2019 Huawei Technologies Co., Ltd. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Huawei Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +lsrc_aarch64 += \ + erasure_code/aarch64/ec_aarch64_highlevel_func.c \ + erasure_code/aarch64/ec_aarch64_dispatcher.c \ + erasure_code/aarch64/gf_vect_dot_prod_neon.S \ + erasure_code/aarch64/gf_2vect_dot_prod_neon.S \ + erasure_code/aarch64/gf_3vect_dot_prod_neon.S \ + erasure_code/aarch64/gf_4vect_dot_prod_neon.S \ + erasure_code/aarch64/gf_5vect_dot_prod_neon.S \ + erasure_code/aarch64/gf_vect_mad_neon.S \ + erasure_code/aarch64/gf_2vect_mad_neon.S \ + erasure_code/aarch64/gf_3vect_mad_neon.S \ + erasure_code/aarch64/gf_4vect_mad_neon.S \ + erasure_code/aarch64/gf_5vect_mad_neon.S \ + erasure_code/aarch64/gf_6vect_mad_neon.S \ + erasure_code/aarch64/gf_vect_mul_neon.S \ + erasure_code/aarch64/ec_multibinary_arm.S diff --git a/src/isa-l/erasure_code/aarch64/ec_aarch64_dispatcher.c b/src/isa-l/erasure_code/aarch64/ec_aarch64_dispatcher.c new file mode 100644 index 000000000..ba6634785 --- /dev/null +++ b/src/isa-l/erasure_code/aarch64/ec_aarch64_dispatcher.c @@ -0,0 +1,69 @@ +/************************************************************** + Copyright (c) 2019 Huawei Technologies Co., Ltd. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Huawei Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ +#include + +DEFINE_INTERFACE_DISPATCHER(gf_vect_dot_prod) +{ + if (getauxval(AT_HWCAP) & HWCAP_ASIMD) + return PROVIDER_INFO(gf_vect_dot_prod_neon); + return PROVIDER_BASIC(gf_vect_dot_prod); + +} + +DEFINE_INTERFACE_DISPATCHER(gf_vect_mad) +{ + if (getauxval(AT_HWCAP) & HWCAP_ASIMD) + return PROVIDER_INFO(gf_vect_mad_neon); + return PROVIDER_BASIC(gf_vect_mad); + +} + +DEFINE_INTERFACE_DISPATCHER(ec_encode_data) +{ + if (getauxval(AT_HWCAP) & HWCAP_ASIMD) + return PROVIDER_INFO(ec_encode_data_neon); + return PROVIDER_BASIC(ec_encode_data); + +} + +DEFINE_INTERFACE_DISPATCHER(ec_encode_data_update) +{ + if (getauxval(AT_HWCAP) & HWCAP_ASIMD) + return PROVIDER_INFO(ec_encode_data_update_neon); + return PROVIDER_BASIC(ec_encode_data_update); + +} + +DEFINE_INTERFACE_DISPATCHER(gf_vect_mul) +{ + if (getauxval(AT_HWCAP) & HWCAP_ASIMD) + return PROVIDER_INFO(gf_vect_mul_neon); + return PROVIDER_BASIC(gf_vect_mul); + +} diff --git a/src/isa-l/erasure_code/aarch64/ec_aarch64_highlevel_func.c b/src/isa-l/erasure_code/aarch64/ec_aarch64_highlevel_func.c new file mode 100644 index 000000000..dd23702ce --- /dev/null +++ b/src/isa-l/erasure_code/aarch64/ec_aarch64_highlevel_func.c @@ -0,0 +1,127 @@ +/************************************************************** + Copyright (c) 2019 Huawei Technologies Co., Ltd. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Huawei Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ +#include "erasure_code.h" + +/*external function*/ +extern void gf_vect_dot_prod_neon(int len, int vlen, unsigned char *gftbls, + unsigned char **src, unsigned char *dest); +extern void gf_2vect_dot_prod_neon(int len, int vlen, unsigned char *gftbls, + unsigned char **src, unsigned char **dest); +extern void gf_3vect_dot_prod_neon(int len, int vlen, unsigned char *gftbls, + unsigned char **src, unsigned char **dest); +extern void gf_4vect_dot_prod_neon(int len, int vlen, unsigned char *gftbls, + unsigned char **src, unsigned char **dest); +extern void gf_5vect_dot_prod_neon(int len, int vlen, unsigned char *gftbls, + unsigned char **src, unsigned char **dest); +extern void gf_vect_mad_neon(int len, int vec, int vec_i, unsigned char *gftbls, + unsigned char *src, unsigned char *dest); +extern void gf_2vect_mad_neon(int len, int vec, int vec_i, unsigned char *gftbls, + unsigned char *src, unsigned char **dest); +extern void gf_3vect_mad_neon(int len, int vec, int vec_i, unsigned char *gftbls, + unsigned char *src, unsigned char **dest); +extern void gf_4vect_mad_neon(int len, int vec, int vec_i, unsigned char *gftbls, + unsigned char *src, unsigned char **dest); +extern void gf_5vect_mad_neon(int len, int vec, int vec_i, unsigned char *gftbls, + unsigned char *src, unsigned char **dest); +extern void gf_6vect_mad_neon(int len, int vec, int vec_i, unsigned char *gftbls, + unsigned char *src, unsigned char **dest); + +void ec_encode_data_neon(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data, + unsigned char **coding) +{ + if (len < 16) { + ec_encode_data_base(len, k, rows, g_tbls, data, coding); + return; + } + + while (rows > 5) { + gf_5vect_dot_prod_neon(len, k, g_tbls, data, coding); + g_tbls += 5 * k * 32; + coding += 5; + rows -= 5; + } + switch (rows) { + case 5: + gf_5vect_dot_prod_neon(len, k, g_tbls, data, coding); + break; + case 4: + gf_4vect_dot_prod_neon(len, k, g_tbls, data, coding); + break; + case 3: + gf_3vect_dot_prod_neon(len, k, g_tbls, data, coding); + break; + case 2: + gf_2vect_dot_prod_neon(len, k, g_tbls, data, coding); + break; + case 1: + gf_vect_dot_prod_neon(len, k, g_tbls, data, *coding); + break; + case 0: + break; + default: + break; + } +} + +void ec_encode_data_update_neon(int len, int k, int rows, int vec_i, unsigned char *g_tbls, + unsigned char *data, unsigned char **coding) +{ + if (len < 16) { + ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding); + return; + } + while (rows > 6) { + gf_6vect_mad_neon(len, k, vec_i, g_tbls, data, coding); + g_tbls += 6 * k * 32; + coding += 6; + rows -= 6; + } + switch (rows) { + case 6: + gf_6vect_mad_neon(len, k, vec_i, g_tbls, data, coding); + break; + case 5: + gf_5vect_mad_neon(len, k, vec_i, g_tbls, data, coding); + break; + case 4: + gf_4vect_mad_neon(len, k, vec_i, g_tbls, data, coding); + break; + case 3: + gf_3vect_mad_neon(len, k, vec_i, g_tbls, data, coding); + break; + case 2: + gf_2vect_mad_neon(len, k, vec_i, g_tbls, data, coding); + break; + case 1: + gf_vect_mad_neon(len, k, vec_i, g_tbls, data, *coding); + break; + case 0: + break; + } +} diff --git a/src/isa-l/erasure_code/aarch64/ec_multibinary_arm.S b/src/isa-l/erasure_code/aarch64/ec_multibinary_arm.S new file mode 100644 index 000000000..0b75a4902 --- /dev/null +++ b/src/isa-l/erasure_code/aarch64/ec_multibinary_arm.S @@ -0,0 +1,36 @@ +/************************************************************** + Copyright (c) 2019 Huawei Technologies Co., Ltd. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Huawei Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include "aarch64_multibinary.h" + +mbin_interface ec_encode_data +mbin_interface gf_vect_mul +mbin_interface gf_vect_dot_prod +mbin_interface gf_vect_mad +mbin_interface ec_encode_data_update diff --git a/src/isa-l/erasure_code/aarch64/gf_2vect_dot_prod_neon.S b/src/isa-l/erasure_code/aarch64/gf_2vect_dot_prod_neon.S new file mode 100644 index 000000000..33a28501d --- /dev/null +++ b/src/isa-l/erasure_code/aarch64/gf_2vect_dot_prod_neon.S @@ -0,0 +1,399 @@ +/************************************************************** + Copyright (c) 2019 Huawei Technologies Co., Ltd. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Huawei Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +.text + +.global gf_2vect_dot_prod_neon +.type gf_2vect_dot_prod_neon, %function + + +/* arguments */ +x_len .req x0 +x_vec .req x1 +x_tbl .req x2 +x_src .req x3 +x_dest .req x4 + +/* returns */ +w_ret .req w0 + +/* local variables */ +x_vec_i .req x5 +x_ptr .req x6 +x_pos .req x7 +x_tmp .req x8 +x_tbl1 .req x9 +x_tbl2 .req x10 +x_dest1 .req x11 +x_dest2 .req x12 + +/* vectors */ +v_gft1_lo .req v0 +v_gft1_hi .req v1 +v_gft2_lo .req v2 +v_gft2_hi .req v3 +q_gft1_lo .req q0 +q_gft1_hi .req q1 +q_gft2_lo .req q2 +q_gft2_hi .req q3 + +v_mask0f .req v4 +q_mask0f .req q4 + +v_tmp1_lo .req v5 +v_tmp1_hi .req v6 +v_tmp1 .req v7 + +v_data_0 .req v8 +v_data_1 .req v9 +v_data_2 .req v10 +v_data_3 .req v11 +v_data_4 .req v12 +v_data_5 .req v13 +v_data_6 .req v14 +v_data_7 .req v15 +q_data_0 .req q8 +q_data_1 .req q9 +q_data_2 .req q10 +q_data_3 .req q11 +q_data_4 .req q12 +q_data_5 .req q13 +q_data_6 .req q14 +q_data_7 .req q15 + +v_p1_0 .req v16 +v_p1_1 .req v17 +v_p1_2 .req v18 +v_p1_3 .req v19 +v_p1_4 .req v20 +v_p1_5 .req v21 +v_p1_6 .req v22 +v_p1_7 .req v23 +v_p2_0 .req v24 +v_p2_1 .req v25 +v_p2_2 .req v26 +v_p2_3 .req v27 +v_p2_4 .req v28 +v_p2_5 .req v29 +v_p2_6 .req v30 +v_p2_7 .req v31 + +q_p1_0 .req q16 +q_p1_1 .req q17 +q_p1_2 .req q18 +q_p1_3 .req q19 +q_p1_4 .req q20 +q_p1_5 .req q21 +q_p1_6 .req q22 +q_p1_7 .req q23 +q_p2_0 .req q24 +q_p2_1 .req q25 +q_p2_2 .req q26 +q_p2_3 .req q27 +q_p2_4 .req q28 +q_p2_5 .req q29 +q_p2_6 .req q30 +q_p2_7 .req q31 + +v_p1 .req v_p1_0 +q_p1 .req q_p1_0 +v_p2 .req v_p2_0 +q_p2 .req q_p2_0 +v_data .req v_p1_1 +q_data .req q_p1_1 +v_data_lo .req v_p1_2 +v_data_hi .req v_p1_3 + +gf_2vect_dot_prod_neon: + /* less than 16 bytes, return_fail */ + cmp x_len, #16 + blt .return_fail + + movi v_mask0f.16b, #0x0f + mov x_pos, #0 + lsl x_vec, x_vec, #3 + ldr x_dest1, [x_dest, #8*0] + ldr x_dest2, [x_dest, #8*1] + +.Lloop128_init: + /* less than 128 bytes, goto Lloop16_init */ + cmp x_len, #128 + blt .Lloop16_init + + /* save d8 ~ d15 to stack */ + sub sp, sp, #64 + stp d8, d9, [sp] + stp d10, d11, [sp, #16] + stp d12, d13, [sp, #32] + stp d14, d15, [sp, #48] + + sub x_len, x_len, #128 + +.Lloop128: + movi v_p1_0.16b, #0 + movi v_p1_1.16b, #0 + movi v_p1_2.16b, #0 + movi v_p1_3.16b, #0 + movi v_p1_4.16b, #0 + movi v_p1_5.16b, #0 + movi v_p1_6.16b, #0 + movi v_p1_7.16b, #0 + + movi v_p2_0.16b, #0 + movi v_p2_1.16b, #0 + movi v_p2_2.16b, #0 + movi v_p2_3.16b, #0 + movi v_p2_4.16b, #0 + movi v_p2_5.16b, #0 + movi v_p2_6.16b, #0 + movi v_p2_7.16b, #0 + + mov x_tbl1, x_tbl + add x_tbl2, x_tbl, x_vec, lsl #2 + mov x_vec_i, #0 + +.Lloop128_vects: + ldr x_ptr, [x_src, x_vec_i] + add x_vec_i, x_vec_i, #8 + add x_ptr, x_ptr, x_pos + + ldp q_data_0, q_data_1, [x_ptr], #32 + ldp q_data_2, q_data_3, [x_ptr], #32 + + ldp q_gft1_lo, q_gft1_hi, [x_tbl1], #32 + ldp q_gft2_lo, q_gft2_hi, [x_tbl2], #32 + ldp q_data_4, q_data_5, [x_ptr], #32 + ldp q_data_6, q_data_7, [x_ptr], #32 + prfm pldl1strm, [x_ptr] + prfm pldl1keep, [x_tbl1] + prfm pldl1keep, [x_tbl2] + + /* data_0 */ + and v_tmp1.16b, v_data_0.16b, v_mask0f.16b + ushr v_data_0.16b, v_data_0.16b, #4 + + tbl v_tmp1_lo.16b, {v_gft1_lo.16b}, v_tmp1.16b + tbl v_tmp1_hi.16b, {v_gft1_hi.16b}, v_data_0.16b + eor v_p1_0.16b, v_tmp1_lo.16b, v_p1_0.16b + eor v_p1_0.16b, v_p1_0.16b, v_tmp1_hi.16b + + tbl v_tmp1_lo.16b, {v_gft2_lo.16b}, v_tmp1.16b + tbl v_tmp1_hi.16b, {v_gft2_hi.16b}, v_data_0.16b + eor v_p2_0.16b, v_tmp1_lo.16b, v_p2_0.16b + eor v_p2_0.16b, v_p2_0.16b, v_tmp1_hi.16b + + /* data_1 */ + and v_tmp1.16b, v_data_1.16b, v_mask0f.16b + ushr v_data_1.16b, v_data_1.16b, #4 + + tbl v_tmp1_lo.16b, {v_gft1_lo.16b}, v_tmp1.16b + tbl v_tmp1_hi.16b, {v_gft1_hi.16b}, v_data_1.16b + eor v_p1_1.16b, v_tmp1_lo.16b, v_p1_1.16b + eor v_p1_1.16b, v_p1_1.16b, v_tmp1_hi.16b + + tbl v_tmp1_lo.16b, {v_gft2_lo.16b}, v_tmp1.16b + tbl v_tmp1_hi.16b, {v_gft2_hi.16b}, v_data_1.16b + eor v_p2_1.16b, v_tmp1_lo.16b, v_p2_1.16b + eor v_p2_1.16b, v_p2_1.16b, v_tmp1_hi.16b + + /* data_2 */ + and v_tmp1.16b, v_data_2.16b, v_mask0f.16b + ushr v_data_2.16b, v_data_2.16b, #4 + + tbl v_tmp1_lo.16b, {v_gft1_lo.16b}, v_tmp1.16b + tbl v_tmp1_hi.16b, {v_gft1_hi.16b}, v_data_2.16b + eor v_p1_2.16b, v_tmp1_lo.16b, v_p1_2.16b + eor v_p1_2.16b, v_p1_2.16b, v_tmp1_hi.16b + + tbl v_tmp1_lo.16b, {v_gft2_lo.16b}, v_tmp1.16b + tbl v_tmp1_hi.16b, {v_gft2_hi.16b}, v_data_2.16b + eor v_p2_2.16b, v_tmp1_lo.16b, v_p2_2.16b + eor v_p2_2.16b, v_p2_2.16b, v_tmp1_hi.16b + + /* data_3 */ + and v_tmp1.16b, v_data_3.16b, v_mask0f.16b + ushr v_data_3.16b, v_data_3.16b, #4 + + tbl v_tmp1_lo.16b, {v_gft1_lo.16b}, v_tmp1.16b + tbl v_tmp1_hi.16b, {v_gft1_hi.16b}, v_data_3.16b + eor v_p1_3.16b, v_tmp1_lo.16b, v_p1_3.16b + eor v_p1_3.16b, v_p1_3.16b, v_tmp1_hi.16b + + tbl v_tmp1_lo.16b, {v_gft2_lo.16b}, v_tmp1.16b + tbl v_tmp1_hi.16b, {v_gft2_hi.16b}, v_data_3.16b + eor v_p2_3.16b, v_tmp1_lo.16b, v_p2_3.16b + eor v_p2_3.16b, v_p2_3.16b, v_tmp1_hi.16b + + /* data_4 */ + and v_tmp1.16b, v_data_4.16b, v_mask0f.16b + ushr v_data_4.16b, v_data_4.16b, #4 + + tbl v_tmp1_lo.16b, {v_gft1_lo.16b}, v_tmp1.16b + tbl v_tmp1_hi.16b, {v_gft1_hi.16b}, v_data_4.16b + eor v_p1_4.16b, v_tmp1_lo.16b, v_p1_4.16b + eor v_p1_4.16b, v_p1_4.16b, v_tmp1_hi.16b + + tbl v_tmp1_lo.16b, {v_gft2_lo.16b}, v_tmp1.16b + tbl v_tmp1_hi.16b, {v_gft2_hi.16b}, v_data_4.16b + eor v_p2_4.16b, v_tmp1_lo.16b, v_p2_4.16b + eor v_p2_4.16b, v_p2_4.16b, v_tmp1_hi.16b + + /* data_5 */ + and v_tmp1.16b, v_data_5.16b, v_mask0f.16b + ushr v_data_5.16b, v_data_5.16b, #4 + + tbl v_tmp1_lo.16b, {v_gft1_lo.16b}, v_tmp1.16b + tbl v_tmp1_hi.16b, {v_gft1_hi.16b}, v_data_5.16b + eor v_p1_5.16b, v_tmp1_lo.16b, v_p1_5.16b + eor v_p1_5.16b, v_p1_5.16b, v_tmp1_hi.16b + + tbl v_tmp1_lo.16b, {v_gft2_lo.16b}, v_tmp1.16b + tbl v_tmp1_hi.16b, {v_gft2_hi.16b}, v_data_5.16b + eor v_p2_5.16b, v_tmp1_lo.16b, v_p2_5.16b + eor v_p2_5.16b, v_p2_5.16b, v_tmp1_hi.16b + + /* data_6 */ + and v_tmp1.16b, v_data_6.16b, v_mask0f.16b + ushr v_data_6.16b, v_data_6.16b, #4 + + tbl v_tmp1_lo.16b, {v_gft1_lo.16b}, v_tmp1.16b + tbl v_tmp1_hi.16b, {v_gft1_hi.16b}, v_data_6.16b + eor v_p1_6.16b, v_tmp1_lo.16b, v_p1_6.16b + eor v_p1_6.16b, v_p1_6.16b, v_tmp1_hi.16b + + tbl v_tmp1_lo.16b, {v_gft2_lo.16b}, v_tmp1.16b + tbl v_tmp1_hi.16b, {v_gft2_hi.16b}, v_data_6.16b + eor v_p2_6.16b, v_tmp1_lo.16b, v_p2_6.16b + eor v_p2_6.16b, v_p2_6.16b, v_tmp1_hi.16b + + /* data_7 */ + and v_tmp1.16b, v_data_7.16b, v_mask0f.16b + ushr v_data_7.16b, v_data_7.16b, #4 + + tbl v_tmp1_lo.16b, {v_gft1_lo.16b}, v_tmp1.16b + tbl v_tmp1_hi.16b, {v_gft1_hi.16b}, v_data_7.16b + eor v_p1_7.16b, v_tmp1_lo.16b, v_p1_7.16b + eor v_p1_7.16b, v_p1_7.16b, v_tmp1_hi.16b + + tbl v_tmp1_lo.16b, {v_gft2_lo.16b}, v_tmp1.16b + tbl v_tmp1_hi.16b, {v_gft2_hi.16b}, v_data_7.16b + eor v_p2_7.16b, v_tmp1_lo.16b, v_p2_7.16b + eor v_p2_7.16b, v_p2_7.16b, v_tmp1_hi.16b + + cmp x_vec_i, x_vec + blt .Lloop128_vects + +.Lloop128_vects_end: + add x_ptr, x_dest1, x_pos + stp q_p1_0, q_p1_1, [x_ptr], #32 + stp q_p1_2, q_p1_3, [x_ptr], #32 + stp q_p1_4, q_p1_5, [x_ptr], #32 + stp q_p1_6, q_p1_7, [x_ptr] + + add x_ptr, x_dest2, x_pos + stp q_p2_0, q_p2_1, [x_ptr], #32 + stp q_p2_2, q_p2_3, [x_ptr], #32 + stp q_p2_4, q_p2_5, [x_ptr], #32 + stp q_p2_6, q_p2_7, [x_ptr] + + add x_pos, x_pos, #128 + cmp x_pos, x_len + ble .Lloop128 + +.Lloop128_end: + /* restore d8 ~ d15 */ + ldp d8, d9, [sp] + ldp d10, d11, [sp, #16] + ldp d12, d13, [sp, #32] + ldp d14, d15, [sp, #48] + add sp, sp, #64 + + add x_len, x_len, #128 + cmp x_pos, x_len + beq .return_pass + +.Lloop16_init: + sub x_len, x_len, #16 + cmp x_pos, x_len + bgt .lessthan16_init + +.Lloop16: + movi v_p1.16b, #0 + movi v_p2.16b, #0 + mov x_tbl1, x_tbl + add x_tbl2, x_tbl, x_vec, lsl #2 + mov x_vec_i, #0 + +.Lloop16_vects: + ldr x_ptr, [x_src, x_vec_i] + ldr q_data, [x_ptr, x_pos] + add x_vec_i, x_vec_i, #8 + + ldp q_gft1_lo, q_gft1_hi, [x_tbl1], #32 + ldp q_gft2_lo, q_gft2_hi, [x_tbl2], #32 + + and v_data_lo.16b, v_data.16b, v_mask0f.16b + ushr v_data_hi.16b, v_data.16b, #4 + + tbl v_tmp1_hi.16b, {v_gft1_hi.16b}, v_data_hi.16b + tbl v_tmp1_lo.16b, {v_gft1_lo.16b}, v_data_lo.16b + eor v_p1.16b, v_tmp1_lo.16b, v_p1.16b + eor v_p1.16b, v_p1.16b, v_tmp1_hi.16b + + tbl v_tmp1_hi.16b, {v_gft2_hi.16b}, v_data_hi.16b + tbl v_tmp1_lo.16b, {v_gft2_lo.16b}, v_data_lo.16b + eor v_p2.16b, v_tmp1_lo.16b, v_p2.16b + eor v_p2.16b, v_p2.16b, v_tmp1_hi.16b + + cmp x_vec_i, x_vec + bne .Lloop16_vects + +.Lloop16_vects_end: + str q_p1, [x_dest1, x_pos] + str q_p2, [x_dest2, x_pos] + add x_pos, x_pos, #16 + cmp x_pos, x_len + ble .Lloop16 + +.Lloop16_end: + sub x_tmp, x_pos, x_len + cmp x_tmp, #16 + beq .return_pass + +.lessthan16_init: + mov x_pos, x_len + b .Lloop16 + +.return_pass: + mov w_ret, #0 + ret + +.return_fail: + mov w_ret, #1 + ret diff --git a/src/isa-l/erasure_code/aarch64/gf_2vect_mad_neon.S b/src/isa-l/erasure_code/aarch64/gf_2vect_mad_neon.S new file mode 100644 index 000000000..92c19f549 --- /dev/null +++ b/src/isa-l/erasure_code/aarch64/gf_2vect_mad_neon.S @@ -0,0 +1,401 @@ +/************************************************************** + Copyright (c) 2019 Huawei Technologies Co., Ltd. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Huawei Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ +.text + +.global gf_2vect_mad_neon +.type gf_2vect_mad_neon, %function + + +/* arguments */ +x_len .req x0 +x_vec .req x1 +x_vec_i .req x2 +x_tbl .req x3 +x_src .req x4 +x_dest .req x5 + +/* returns */ +w_ret .req w0 + +/* local variables */ +x_src_end .req x6 +x_dest1 .req x7 +x_dest2 .req x8 +x_tmp .req x9 +x_tbl1 .req x10 +x_tbl2 .req x11 +x_const .req x12 + +/* vectors */ +v_mask0f .req v0 +v_tmp_lo .req v1 +v_tmp_hi .req v2 +v_tmp .req v3 +q_tmp .req q3 + +v_gft1_lo .req v4 +v_gft1_hi .req v5 +v_gft2_lo .req v6 +v_gft2_hi .req v7 +q_gft1_lo .req q4 +q_gft1_hi .req q5 +q_gft2_lo .req q6 +q_gft2_hi .req q7 + +v_data_0 .req v8 +v_data_1 .req v9 +v_data_2 .req v10 +v_data_3 .req v11 +v_data_4 .req v12 +v_data_5 .req v13 +v_data_6 .req v14 +v_data_7 .req v15 +q_data_0 .req q8 +q_data_1 .req q9 +q_data_2 .req q10 +q_data_3 .req q11 +q_data_4 .req q12 +q_data_5 .req q13 +q_data_6 .req q14 +q_data_7 .req q15 + +v_data_0_lo .req v16 +v_data_1_lo .req v17 +v_data_2_lo .req v18 +v_data_3_lo .req v19 +v_data_4_lo .req v20 +v_data_5_lo .req v21 +v_data_6_lo .req v22 +v_data_7_lo .req v23 +v_data_0_hi .req v_data_0 +v_data_1_hi .req v_data_1 +v_data_2_hi .req v_data_2 +v_data_3_hi .req v_data_3 +v_data_4_hi .req v_data_4 +v_data_5_hi .req v_data_5 +v_data_6_hi .req v_data_6 +v_data_7_hi .req v_data_7 + +v_d0 .req v24 +v_d1 .req v25 +v_d2 .req v26 +v_d3 .req v27 +v_d4 .req v28 +v_d5 .req v29 +v_d6 .req v30 +v_d7 .req v31 +q_d0 .req q24 +q_d1 .req q25 +q_d2 .req q26 +q_d3 .req q27 +q_d4 .req q28 +q_d5 .req q29 +q_d6 .req q30 +q_d7 .req q31 + +v_data .req v16 +q_data .req q16 +v_data_lo .req v17 +v_data_hi .req v18 + + +gf_2vect_mad_neon: + /* less than 16 bytes, return_fail */ + cmp x_len, #16 + blt .return_fail + + movi v_mask0f.16b, #0x0f + lsl x_vec_i, x_vec_i, #5 + lsl x_vec, x_vec, #5 + add x_tbl1, x_tbl, x_vec_i + add x_tbl2, x_tbl1, x_vec + add x_src_end, x_src, x_len + + ldr x_dest1, [x_dest] + ldr x_dest2, [x_dest, #8] + ldr q_gft1_lo, [x_tbl1] + ldr q_gft1_hi, [x_tbl1, #16] + ldr q_gft2_lo, [x_tbl2] + ldr q_gft2_hi, [x_tbl2, #16] + +.Lloop128_init: + /* less than 128 bytes, goto Lloop16_init */ + cmp x_len, #128 + blt .Lloop16_init + + /* save d8 ~ d15 to stack */ + sub sp, sp, #64 + stp d8, d9, [sp] + stp d10, d11, [sp, #16] + stp d12, d13, [sp, #32] + stp d14, d15, [sp, #48] + + sub x_src_end, x_src_end, #128 + +.Lloop128: + ldr q_data_0, [x_src, #16*0] + ldr q_data_1, [x_src, #16*1] + ldr q_data_2, [x_src, #16*2] + ldr q_data_3, [x_src, #16*3] + ldr q_data_4, [x_src, #16*4] + ldr q_data_5, [x_src, #16*5] + ldr q_data_6, [x_src, #16*6] + ldr q_data_7, [x_src, #16*7] + + ldr q_d0, [x_dest1, #16*0] + ldr q_d1, [x_dest1, #16*1] + ldr q_d2, [x_dest1, #16*2] + ldr q_d3, [x_dest1, #16*3] + ldr q_d4, [x_dest1, #16*4] + ldr q_d5, [x_dest1, #16*5] + ldr q_d6, [x_dest1, #16*6] + ldr q_d7, [x_dest1, #16*7] + + and v_data_0_lo.16b, v_data_0.16b, v_mask0f.16b + and v_data_1_lo.16b, v_data_1.16b, v_mask0f.16b + and v_data_2_lo.16b, v_data_2.16b, v_mask0f.16b + and v_data_3_lo.16b, v_data_3.16b, v_mask0f.16b + and v_data_4_lo.16b, v_data_4.16b, v_mask0f.16b + and v_data_5_lo.16b, v_data_5.16b, v_mask0f.16b + and v_data_6_lo.16b, v_data_6.16b, v_mask0f.16b + and v_data_7_lo.16b, v_data_7.16b, v_mask0f.16b + + ushr v_data_0_hi.16b, v_data_0.16b, #4 + ushr v_data_1_hi.16b, v_data_1.16b, #4 + ushr v_data_2_hi.16b, v_data_2.16b, #4 + ushr v_data_3_hi.16b, v_data_3.16b, #4 + ushr v_data_4_hi.16b, v_data_4.16b, #4 + ushr v_data_5_hi.16b, v_data_5.16b, #4 + ushr v_data_6_hi.16b, v_data_6.16b, #4 + ushr v_data_7_hi.16b, v_data_7.16b, #4 + + tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_0_lo.16b + tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_0_hi.16b + eor v_d0.16b, v_tmp_lo.16b, v_d0.16b + eor v_d0.16b, v_d0.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_1_lo.16b + tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_1_hi.16b + eor v_d1.16b, v_tmp_lo.16b, v_d1.16b + eor v_d1.16b, v_d1.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_2_lo.16b + tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_2_hi.16b + eor v_d2.16b, v_tmp_lo.16b, v_d2.16b + eor v_d2.16b, v_d2.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_3_lo.16b + tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_3_hi.16b + eor v_d3.16b, v_tmp_lo.16b, v_d3.16b + eor v_d3.16b, v_d3.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_4_lo.16b + tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_4_hi.16b + eor v_d4.16b, v_tmp_lo.16b, v_d4.16b + eor v_d4.16b, v_d4.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_5_lo.16b + tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_5_hi.16b + eor v_d5.16b, v_tmp_lo.16b, v_d5.16b + eor v_d5.16b, v_d5.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_6_lo.16b + tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_6_hi.16b + eor v_d6.16b, v_tmp_lo.16b, v_d6.16b + eor v_d6.16b, v_d6.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_7_lo.16b + tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_7_hi.16b + eor v_d7.16b, v_tmp_lo.16b, v_d7.16b + eor v_d7.16b, v_d7.16b, v_tmp_hi.16b + + str q_d0, [x_dest1, #16*0] + str q_d1, [x_dest1, #16*1] + str q_d2, [x_dest1, #16*2] + str q_d3, [x_dest1, #16*3] + str q_d4, [x_dest1, #16*4] + str q_d5, [x_dest1, #16*5] + str q_d6, [x_dest1, #16*6] + str q_d7, [x_dest1, #16*7] + + ldr q_d0, [x_dest2, #16*0] + ldr q_d1, [x_dest2, #16*1] + ldr q_d2, [x_dest2, #16*2] + ldr q_d3, [x_dest2, #16*3] + ldr q_d4, [x_dest2, #16*4] + ldr q_d5, [x_dest2, #16*5] + ldr q_d6, [x_dest2, #16*6] + ldr q_d7, [x_dest2, #16*7] + + tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_0_lo.16b + tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_0_hi.16b + eor v_d0.16b, v_tmp_lo.16b, v_d0.16b + eor v_d0.16b, v_d0.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_1_lo.16b + tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_1_hi.16b + eor v_d1.16b, v_tmp_lo.16b, v_d1.16b + eor v_d1.16b, v_d1.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_2_lo.16b + tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_2_hi.16b + eor v_d2.16b, v_tmp_lo.16b, v_d2.16b + eor v_d2.16b, v_d2.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_3_lo.16b + tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_3_hi.16b + eor v_d3.16b, v_tmp_lo.16b, v_d3.16b + eor v_d3.16b, v_d3.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_4_lo.16b + tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_4_hi.16b + eor v_d4.16b, v_tmp_lo.16b, v_d4.16b + eor v_d4.16b, v_d4.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_5_lo.16b + tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_5_hi.16b + eor v_d5.16b, v_tmp_lo.16b, v_d5.16b + eor v_d5.16b, v_d5.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_6_lo.16b + tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_6_hi.16b + eor v_d6.16b, v_tmp_lo.16b, v_d6.16b + eor v_d6.16b, v_d6.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_7_lo.16b + tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_7_hi.16b + eor v_d7.16b, v_tmp_lo.16b, v_d7.16b + eor v_d7.16b, v_d7.16b, v_tmp_hi.16b + + str q_d0, [x_dest2, #16*0] + str q_d1, [x_dest2, #16*1] + str q_d2, [x_dest2, #16*2] + str q_d3, [x_dest2, #16*3] + str q_d4, [x_dest2, #16*4] + str q_d5, [x_dest2, #16*5] + str q_d6, [x_dest2, #16*6] + str q_d7, [x_dest2, #16*7] + + add x_src, x_src, #128 + add x_dest1, x_dest1, #128 + add x_dest2, x_dest2, #128 + cmp x_src, x_src_end + bls .Lloop128 + +.Lloop128_end: + /* restore d8 ~ d15 */ + ldp d8, d9, [sp] + ldp d10, d11, [sp, #16] + ldp d12, d13, [sp, #32] + ldp d14, d15, [sp, #48] + add sp, sp, #64 + add x_src_end, x_src_end, #128 + +.Lloop16_init: + sub x_src_end, x_src_end, #16 + cmp x_src, x_src_end + bhi .lessthan16_init + +.Lloop16: + ldr q_data, [x_src] + + ldr q_d0, [x_dest1] + ldr q_d1, [x_dest2] + + and v_data_lo.16b, v_data.16b, v_mask0f.16b + ushr v_data_hi.16b, v_data.16b, #4 + + tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_lo.16b + tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_hi.16b + eor v_d0.16b, v_tmp_lo.16b, v_d0.16b + eor v_d0.16b, v_d0.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_lo.16b + tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_hi.16b + eor v_d1.16b, v_tmp_lo.16b, v_d1.16b + eor v_d1.16b, v_d1.16b, v_tmp_hi.16b + + str q_d0, [x_dest1] + str q_d1, [x_dest2] + + add x_dest1, x_dest1, #16 + add x_dest2, x_dest2, #16 + add x_src, x_src, #16 + cmp x_src, x_src_end + bls .Lloop16 + +.lessthan16_init: + sub x_tmp, x_src, x_src_end + cmp x_tmp, #16 + beq .return_pass + +.lessthan16: + mov x_src, x_src_end + sub x_dest1, x_dest1, x_tmp + sub x_dest2, x_dest2, x_tmp + + ldr x_const, =const_tbl + sub x_const, x_const, x_tmp + ldr q_tmp, [x_const, #16] + + ldr q_data, [x_src] + ldr q_d0, [x_dest1] + ldr q_d1, [x_dest2] + + and v_data_lo.16b, v_data.16b, v_mask0f.16b + ushr v_data_hi.16b, v_data.16b, #4 + + tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_lo.16b + tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_hi.16b + eor v_tmp_hi.16b, v_tmp_lo.16b, v_tmp_hi.16b + and v_tmp_hi.16b, v_tmp_hi.16b, v_tmp.16b + eor v_d0.16b, v_d0.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_lo.16b + tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_hi.16b + eor v_tmp_hi.16b, v_tmp_lo.16b, v_tmp_hi.16b + and v_tmp_hi.16b, v_tmp_hi.16b, v_tmp.16b + eor v_d1.16b, v_d1.16b, v_tmp_hi.16b + + str q_d0, [x_dest1] + str q_d1, [x_dest2] + +.return_pass: + mov w_ret, #0 + ret + +.return_fail: + mov w_ret, #1 + ret + +.section .data +.balign 8 +const_tbl: + .dword 0x0000000000000000, 0x0000000000000000 + .dword 0xffffffffffffffff, 0xffffffffffffffff diff --git a/src/isa-l/erasure_code/aarch64/gf_3vect_dot_prod_neon.S b/src/isa-l/erasure_code/aarch64/gf_3vect_dot_prod_neon.S new file mode 100644 index 000000000..becca90e2 --- /dev/null +++ b/src/isa-l/erasure_code/aarch64/gf_3vect_dot_prod_neon.S @@ -0,0 +1,358 @@ +/************************************************************** + Copyright (c) 2019 Huawei Technologies Co., Ltd. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Huawei Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +.text + +.global gf_3vect_dot_prod_neon +.type gf_3vect_dot_prod_neon, %function + + +/* arguments */ +x_len .req x0 +x_vec .req x1 +x_tbl .req x2 +x_src .req x3 +x_dest .req x4 + +/* returns */ +w_ret .req w0 + +/* local variables */ +x_vec_i .req x5 +x_ptr .req x6 +x_pos .req x7 +x_tmp .req x8 +x_dest1 .req x9 +x_tbl1 .req x10 +x_dest2 .req x11 +x_tbl2 .req x12 +x_dest3 .req x13 +x_tbl3 .req x14 + +/* vectors */ +v_gft1_lo .req v0 +v_gft1_hi .req v1 +v_gft2_lo .req v2 +v_gft2_hi .req v3 +v_gft3_lo .req v4 +v_gft3_hi .req v5 +q_gft1_lo .req q0 +q_gft1_hi .req q1 +q_gft2_lo .req q2 +q_gft2_hi .req q3 +q_gft3_lo .req q4 +q_gft3_hi .req q5 + +v_mask0f .req v6 +q_mask0f .req q6 +v_tmp1 .req v7 + +v_data_0 .req v8 +v_data_1 .req v9 +v_data_2 .req v10 +v_data_3 .req v11 +q_data_0 .req q8 +q_data_1 .req q9 +q_data_2 .req q10 +q_data_3 .req q11 + +v_tmp1_lo .req v12 +v_tmp1_hi .req v13 + +v_p1_0 .req v20 +v_p1_1 .req v21 +v_p1_2 .req v22 +v_p1_3 .req v23 +v_p2_0 .req v24 +v_p2_1 .req v25 +v_p2_2 .req v26 +v_p2_3 .req v27 +v_p3_0 .req v28 +v_p3_1 .req v29 +v_p3_2 .req v30 +v_p3_3 .req v31 + +q_p1_0 .req q20 +q_p1_1 .req q21 +q_p1_2 .req q22 +q_p1_3 .req q23 +q_p2_0 .req q24 +q_p2_1 .req q25 +q_p2_2 .req q26 +q_p2_3 .req q27 +q_p3_0 .req q28 +q_p3_1 .req q29 +q_p3_2 .req q30 +q_p3_3 .req q31 + +v_data .req v_p1_1 +q_data .req q_p1_1 +v_data_lo .req v_p1_2 +v_data_hi .req v_p1_3 + + +gf_3vect_dot_prod_neon: + /* less than 16 bytes, return_fail */ + cmp x_len, #16 + blt .return_fail + + movi v_mask0f.16b, #0x0f + mov x_pos, #0 + lsl x_vec, x_vec, #3 + ldr x_dest1, [x_dest, #8*0] + ldr x_dest2, [x_dest, #8*1] + ldr x_dest3, [x_dest, #8*2] + +.Lloop64_init: + /* less than 64 bytes, goto Lloop16_init */ + cmp x_len, #64 + blt .Lloop16_init + + /* save d8 ~ d15 to stack */ + sub sp, sp, #64 + stp d8, d9, [sp] + stp d10, d11, [sp, #16] + stp d12, d13, [sp, #32] + stp d14, d15, [sp, #48] + + sub x_len, x_len, #64 + +.Lloop64: + movi v_p1_0.16b, #0 + movi v_p1_1.16b, #0 + movi v_p1_2.16b, #0 + movi v_p1_3.16b, #0 + movi v_p2_0.16b, #0 + movi v_p2_1.16b, #0 + movi v_p2_2.16b, #0 + movi v_p2_3.16b, #0 + movi v_p3_0.16b, #0 + movi v_p3_1.16b, #0 + movi v_p3_2.16b, #0 + movi v_p3_3.16b, #0 + + mov x_tbl1, x_tbl + add x_tbl2, x_tbl1, x_vec, lsl #2 + add x_tbl3, x_tbl2, x_vec, lsl #2 + mov x_vec_i, #0 + +.Lloop64_vects: + ldr x_ptr, [x_src, x_vec_i] + add x_vec_i, x_vec_i, #8 + add x_ptr, x_ptr, x_pos + + ldr q_data_0, [x_ptr], #16 + ldr q_data_1, [x_ptr], #16 + + ldp q_gft1_lo, q_gft1_hi, [x_tbl1], #32 + ldp q_gft2_lo, q_gft2_hi, [x_tbl2], #32 + ldp q_gft3_lo, q_gft3_hi, [x_tbl3], #32 + + ldr q_data_2, [x_ptr], #16 + ldr q_data_3, [x_ptr], #16 + prfm pldl1strm, [x_ptr] + prfm pldl1keep, [x_tbl1] + prfm pldl1keep, [x_tbl2] + prfm pldl1keep, [x_tbl3] + + /* data_0 */ + and v_tmp1.16b, v_data_0.16b, v_mask0f.16b + ushr v_data_0.16b, v_data_0.16b, #4 + + tbl v_tmp1_lo.16b, {v_gft1_lo.16b}, v_tmp1.16b + tbl v_tmp1_hi.16b, {v_gft1_hi.16b}, v_data_0.16b + eor v_p1_0.16b, v_tmp1_lo.16b, v_p1_0.16b + eor v_p1_0.16b, v_p1_0.16b, v_tmp1_hi.16b + + tbl v_tmp1_lo.16b, {v_gft2_lo.16b}, v_tmp1.16b + tbl v_tmp1_hi.16b, {v_gft2_hi.16b}, v_data_0.16b + eor v_p2_0.16b, v_tmp1_lo.16b, v_p2_0.16b + eor v_p2_0.16b, v_p2_0.16b, v_tmp1_hi.16b + + tbl v_tmp1_lo.16b, {v_gft3_lo.16b}, v_tmp1.16b + tbl v_tmp1_hi.16b, {v_gft3_hi.16b}, v_data_0.16b + eor v_p3_0.16b, v_tmp1_lo.16b, v_p3_0.16b + eor v_p3_0.16b, v_p3_0.16b, v_tmp1_hi.16b + + /* data_1 */ + and v_tmp1.16b, v_data_1.16b, v_mask0f.16b + ushr v_data_1.16b, v_data_1.16b, #4 + + tbl v_tmp1_lo.16b, {v_gft1_lo.16b}, v_tmp1.16b + tbl v_tmp1_hi.16b, {v_gft1_hi.16b}, v_data_1.16b + eor v_p1_1.16b, v_tmp1_lo.16b, v_p1_1.16b + eor v_p1_1.16b, v_p1_1.16b, v_tmp1_hi.16b + + tbl v_tmp1_lo.16b, {v_gft2_lo.16b}, v_tmp1.16b + tbl v_tmp1_hi.16b, {v_gft2_hi.16b}, v_data_1.16b + eor v_p2_1.16b, v_tmp1_lo.16b, v_p2_1.16b + eor v_p2_1.16b, v_p2_1.16b, v_tmp1_hi.16b + + tbl v_tmp1_lo.16b, {v_gft3_lo.16b}, v_tmp1.16b + tbl v_tmp1_hi.16b, {v_gft3_hi.16b}, v_data_1.16b + eor v_p3_1.16b, v_tmp1_lo.16b, v_p3_1.16b + eor v_p3_1.16b, v_p3_1.16b, v_tmp1_hi.16b + + /* data_2 */ + and v_tmp1.16b, v_data_2.16b, v_mask0f.16b + ushr v_data_2.16b, v_data_2.16b, #4 + + tbl v_tmp1_lo.16b, {v_gft1_lo.16b}, v_tmp1.16b + tbl v_tmp1_hi.16b, {v_gft1_hi.16b}, v_data_2.16b + eor v_p1_2.16b, v_tmp1_lo.16b, v_p1_2.16b + eor v_p1_2.16b, v_p1_2.16b, v_tmp1_hi.16b + + tbl v_tmp1_lo.16b, {v_gft2_lo.16b}, v_tmp1.16b + tbl v_tmp1_hi.16b, {v_gft2_hi.16b}, v_data_2.16b + eor v_p2_2.16b, v_tmp1_lo.16b, v_p2_2.16b + eor v_p2_2.16b, v_p2_2.16b, v_tmp1_hi.16b + + tbl v_tmp1_lo.16b, {v_gft3_lo.16b}, v_tmp1.16b + tbl v_tmp1_hi.16b, {v_gft3_hi.16b}, v_data_2.16b + eor v_p3_2.16b, v_tmp1_lo.16b, v_p3_2.16b + eor v_p3_2.16b, v_p3_2.16b, v_tmp1_hi.16b + + /* data_3 */ + and v_tmp1.16b, v_data_3.16b, v_mask0f.16b + ushr v_data_3.16b, v_data_3.16b, #4 + + tbl v_tmp1_lo.16b, {v_gft1_lo.16b}, v_tmp1.16b + tbl v_tmp1_hi.16b, {v_gft1_hi.16b}, v_data_3.16b + eor v_p1_3.16b, v_tmp1_lo.16b, v_p1_3.16b + eor v_p1_3.16b, v_p1_3.16b, v_tmp1_hi.16b + + tbl v_tmp1_lo.16b, {v_gft2_lo.16b}, v_tmp1.16b + tbl v_tmp1_hi.16b, {v_gft2_hi.16b}, v_data_3.16b + eor v_p2_3.16b, v_tmp1_lo.16b, v_p2_3.16b + eor v_p2_3.16b, v_p2_3.16b, v_tmp1_hi.16b + + tbl v_tmp1_lo.16b, {v_gft3_lo.16b}, v_tmp1.16b + tbl v_tmp1_hi.16b, {v_gft3_hi.16b}, v_data_3.16b + eor v_p3_3.16b, v_tmp1_lo.16b, v_p3_3.16b + eor v_p3_3.16b, v_p3_3.16b, v_tmp1_hi.16b + + cmp x_vec_i, x_vec + blt .Lloop64_vects + +.Lloop64_vects_end: + add x_ptr, x_dest1, x_pos + stp q_p1_0, q_p1_1, [x_ptr], #32 + stp q_p1_2, q_p1_3, [x_ptr] + + add x_ptr, x_dest2, x_pos + stp q_p2_0, q_p2_1, [x_ptr], #32 + stp q_p2_2, q_p2_3, [x_ptr] + + add x_ptr, x_dest3, x_pos + stp q_p3_0, q_p3_1, [x_ptr], #32 + stp q_p3_2, q_p3_3, [x_ptr] + + add x_pos, x_pos, #64 + cmp x_pos, x_len + ble .Lloop64 + +.Lloop64_end: + /* restore d8 ~ d15 */ + ldp d8, d9, [sp] + ldp d10, d11, [sp, #16] + ldp d12, d13, [sp, #32] + ldp d14, d15, [sp, #48] + add sp, sp, #64 + + add x_len, x_len, #64 + cmp x_pos, x_len + beq .return_pass + +.Lloop16_init: + sub x_len, x_len, #16 + cmp x_pos, x_len + bgt .lessthan16_init + +.Lloop16: + movi v_p1_0.16b, #0 + movi v_p2_0.16b, #0 + movi v_p3_0.16b, #0 + mov x_tbl1, x_tbl + add x_tbl2, x_tbl1, x_vec, lsl #2 + add x_tbl3, x_tbl2, x_vec, lsl #2 + mov x_vec_i, #0 + +.Lloop16_vects: + ldr x_ptr, [x_src, x_vec_i] + add x_vec_i, x_vec_i, #8 + ldr q_data, [x_ptr, x_pos] + + ldp q_gft1_lo, q_gft1_hi, [x_tbl1], #32 + ldp q_gft2_lo, q_gft2_hi, [x_tbl2], #32 + ldp q_gft3_lo, q_gft3_hi, [x_tbl3], #32 + + and v_data_lo.16b, v_data.16b, v_mask0f.16b + ushr v_data_hi.16b, v_data.16b, #4 + + tbl v_gft1_lo.16b, {v_gft1_lo.16b}, v_data_lo.16b + tbl v_gft1_hi.16b, {v_gft1_hi.16b}, v_data_hi.16b + tbl v_gft2_lo.16b, {v_gft2_lo.16b}, v_data_lo.16b + tbl v_gft2_hi.16b, {v_gft2_hi.16b}, v_data_hi.16b + tbl v_gft3_lo.16b, {v_gft3_lo.16b}, v_data_lo.16b + tbl v_gft3_hi.16b, {v_gft3_hi.16b}, v_data_hi.16b + + eor v_p1_0.16b, v_gft1_hi.16b, v_p1_0.16b + eor v_p1_0.16b, v_p1_0.16b, v_gft1_lo.16b + eor v_p2_0.16b, v_gft2_hi.16b, v_p2_0.16b + eor v_p2_0.16b, v_p2_0.16b, v_gft2_lo.16b + eor v_p3_0.16b, v_gft3_hi.16b, v_p3_0.16b + eor v_p3_0.16b, v_p3_0.16b, v_gft3_lo.16b + + cmp x_vec_i, x_vec + bne .Lloop16_vects + +.Lloop16_vects_end: + str q_p1_0, [x_dest1, x_pos] + str q_p2_0, [x_dest2, x_pos] + str q_p3_0, [x_dest3, x_pos] + add x_pos, x_pos, #16 + cmp x_pos, x_len + ble .Lloop16 + +.Lloop16_end: + sub x_tmp, x_pos, x_len + cmp x_tmp, #16 + beq .return_pass + +.lessthan16_init: + mov x_pos, x_len + b .Lloop16 + +.return_pass: + mov w_ret, #0 + ret + +.return_fail: + mov w_ret, #1 + ret diff --git a/src/isa-l/erasure_code/aarch64/gf_3vect_mad_neon.S b/src/isa-l/erasure_code/aarch64/gf_3vect_mad_neon.S new file mode 100644 index 000000000..4a041e053 --- /dev/null +++ b/src/isa-l/erasure_code/aarch64/gf_3vect_mad_neon.S @@ -0,0 +1,381 @@ +/************************************************************** + Copyright (c) 2019 Huawei Technologies Co., Ltd. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Huawei Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ +.text + +.global gf_3vect_mad_neon +.type gf_3vect_mad_neon, %function + + +/* arguments */ +x_len .req x0 +x_vec .req x1 +x_vec_i .req x2 +x_tbl .req x3 +x_src .req x4 +x_dest .req x5 + +/* returns */ +w_ret .req w0 + +/* local variables */ +x_src_end .req x6 +x_dest1 .req x7 +x_dest2 .req x8 +x_dest3 .req x_dest +x_tmp .req x10 +x_tbl1 .req x11 +x_tbl2 .req x12 +x_tbl3 .req x13 +x_const .req x14 + +/* vectors */ +v_mask0f .req v0 +v_tmp_lo .req v1 +v_tmp_hi .req v2 +v_tmp .req v3 +q_tmp .req q3 + +v_gft1_lo .req v4 +v_gft1_hi .req v5 +v_gft2_lo .req v6 +v_gft2_hi .req v7 +v_gft3_lo .req v16 +v_gft3_hi .req v17 +q_gft1_lo .req q4 +q_gft1_hi .req q5 +q_gft2_lo .req q6 +q_gft2_hi .req q7 +q_gft3_lo .req q16 +q_gft3_hi .req q17 + +v_data_0 .req v8 +v_data_1 .req v9 +v_data_2 .req v10 +v_data_3 .req v11 +q_data_0 .req q8 +q_data_1 .req q9 +q_data_2 .req q10 +q_data_3 .req q11 + +v_data_0_lo .req v12 +v_data_1_lo .req v13 +v_data_2_lo .req v14 +v_data_3_lo .req v15 +v_data_0_hi .req v_data_0 +v_data_1_hi .req v_data_1 +v_data_2_hi .req v_data_2 +v_data_3_hi .req v_data_3 + +v_d1_0 .req v20 +v_d1_1 .req v21 +v_d1_2 .req v22 +v_d1_3 .req v23 +v_d2_0 .req v24 +v_d2_1 .req v25 +v_d2_2 .req v26 +v_d2_3 .req v27 +v_d3_0 .req v28 +v_d3_1 .req v29 +v_d3_2 .req v30 +v_d3_3 .req v31 +q_d1_0 .req q20 +q_d1_1 .req q21 +q_d1_2 .req q22 +q_d1_3 .req q23 +q_d2_0 .req q24 +q_d2_1 .req q25 +q_d2_2 .req q26 +q_d2_3 .req q27 +q_d3_0 .req q28 +q_d3_1 .req q29 +q_d3_2 .req q30 +q_d3_3 .req q31 + +v_data .req v21 +q_data .req q21 +v_data_lo .req v22 +v_data_hi .req v23 + +gf_3vect_mad_neon: + /* less than 16 bytes, return_fail */ + cmp x_len, #16 + blt .return_fail + + movi v_mask0f.16b, #0x0f + lsl x_vec_i, x_vec_i, #5 + lsl x_vec, x_vec, #5 + add x_tbl1, x_tbl, x_vec_i + add x_tbl2, x_tbl1, x_vec + add x_tbl3, x_tbl2, x_vec + add x_src_end, x_src, x_len + ldr x_dest1, [x_dest] + ldr x_dest2, [x_dest, #8] + ldr x_dest3, [x_dest, #16] + ldr q_gft1_lo, [x_tbl1] + ldr q_gft1_hi, [x_tbl1, #16] + ldr q_gft2_lo, [x_tbl2] + ldr q_gft2_hi, [x_tbl2, #16] + ldr q_gft3_lo, [x_tbl3] + ldr q_gft3_hi, [x_tbl3, #16] + +.Lloop64_init: + /* less than 64 bytes, goto Lloop16_init */ + cmp x_len, #64 + blt .Lloop16_init + + /* save d8 ~ d15 to stack */ + sub sp, sp, #64 + stp d8, d9, [sp] + stp d10, d11, [sp, #16] + stp d12, d13, [sp, #32] + stp d14, d15, [sp, #48] + + sub x_src_end, x_src_end, #64 + +.Lloop64: + ldr q_data_0, [x_src, #16*0] + ldr q_data_1, [x_src, #16*1] + ldr q_data_2, [x_src, #16*2] + ldr q_data_3, [x_src, #16*3] + add x_src, x_src, #64 + + ldr q_d1_0, [x_dest1, #16*0] + ldr q_d1_1, [x_dest1, #16*1] + ldr q_d1_2, [x_dest1, #16*2] + ldr q_d1_3, [x_dest1, #16*3] + + ldr q_d2_0, [x_dest2, #16*0] + ldr q_d2_1, [x_dest2, #16*1] + ldr q_d2_2, [x_dest2, #16*2] + ldr q_d2_3, [x_dest2, #16*3] + + ldr q_d3_0, [x_dest3, #16*0] + ldr q_d3_1, [x_dest3, #16*1] + ldr q_d3_2, [x_dest3, #16*2] + ldr q_d3_3, [x_dest3, #16*3] + + and v_data_0_lo.16b, v_data_0.16b, v_mask0f.16b + and v_data_1_lo.16b, v_data_1.16b, v_mask0f.16b + and v_data_2_lo.16b, v_data_2.16b, v_mask0f.16b + and v_data_3_lo.16b, v_data_3.16b, v_mask0f.16b + + ushr v_data_0_hi.16b, v_data_0.16b, #4 + ushr v_data_1_hi.16b, v_data_1.16b, #4 + ushr v_data_2_hi.16b, v_data_2.16b, #4 + ushr v_data_3_hi.16b, v_data_3.16b, #4 + + tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_0_lo.16b + tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_0_hi.16b + eor v_d1_0.16b, v_tmp_lo.16b, v_d1_0.16b + eor v_d1_0.16b, v_d1_0.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_1_lo.16b + tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_1_hi.16b + eor v_d1_1.16b, v_tmp_lo.16b, v_d1_1.16b + eor v_d1_1.16b, v_d1_1.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_2_lo.16b + tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_2_hi.16b + eor v_d1_2.16b, v_tmp_lo.16b, v_d1_2.16b + eor v_d1_2.16b, v_d1_2.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_3_lo.16b + tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_3_hi.16b + eor v_d1_3.16b, v_tmp_lo.16b, v_d1_3.16b + eor v_d1_3.16b, v_d1_3.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_0_lo.16b + tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_0_hi.16b + eor v_d2_0.16b, v_tmp_lo.16b, v_d2_0.16b + eor v_d2_0.16b, v_d2_0.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_1_lo.16b + tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_1_hi.16b + eor v_d2_1.16b, v_tmp_lo.16b, v_d2_1.16b + eor v_d2_1.16b, v_d2_1.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_2_lo.16b + tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_2_hi.16b + eor v_d2_2.16b, v_tmp_lo.16b, v_d2_2.16b + eor v_d2_2.16b, v_d2_2.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_3_lo.16b + tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_3_hi.16b + eor v_d2_3.16b, v_tmp_lo.16b, v_d2_3.16b + eor v_d2_3.16b, v_d2_3.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft3_lo.16b}, v_data_0_lo.16b + tbl v_tmp_hi.16b, {v_gft3_hi.16b}, v_data_0_hi.16b + eor v_d3_0.16b, v_tmp_lo.16b, v_d3_0.16b + eor v_d3_0.16b, v_d3_0.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft3_lo.16b}, v_data_1_lo.16b + tbl v_tmp_hi.16b, {v_gft3_hi.16b}, v_data_1_hi.16b + eor v_d3_1.16b, v_tmp_lo.16b, v_d3_1.16b + eor v_d3_1.16b, v_d3_1.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft3_lo.16b}, v_data_2_lo.16b + tbl v_tmp_hi.16b, {v_gft3_hi.16b}, v_data_2_hi.16b + eor v_d3_2.16b, v_tmp_lo.16b, v_d3_2.16b + eor v_d3_2.16b, v_d3_2.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft3_lo.16b}, v_data_3_lo.16b + tbl v_tmp_hi.16b, {v_gft3_hi.16b}, v_data_3_hi.16b + eor v_d3_3.16b, v_tmp_lo.16b, v_d3_3.16b + eor v_d3_3.16b, v_d3_3.16b, v_tmp_hi.16b + + str q_d1_0, [x_dest1, #16*0] + str q_d1_1, [x_dest1, #16*1] + str q_d1_2, [x_dest1, #16*2] + str q_d1_3, [x_dest1, #16*3] + add x_dest1, x_dest1, #64 + + str q_d2_0, [x_dest2, #16*0] + str q_d2_1, [x_dest2, #16*1] + str q_d2_2, [x_dest2, #16*2] + str q_d2_3, [x_dest2, #16*3] + add x_dest2, x_dest2, #64 + + str q_d3_0, [x_dest3, #16*0] + str q_d3_1, [x_dest3, #16*1] + str q_d3_2, [x_dest3, #16*2] + str q_d3_3, [x_dest3, #16*3] + add x_dest3, x_dest3, #64 + + cmp x_src, x_src_end + bls .Lloop64 + +.Lloop64_end: + /* restore d8 ~ d15 */ + ldp d8, d9, [sp] + ldp d10, d11, [sp, #16] + ldp d12, d13, [sp, #32] + ldp d14, d15, [sp, #48] + add sp, sp, #64 + add x_src_end, x_src_end, #64 + +.Lloop16_init: + sub x_src_end, x_src_end, #16 + cmp x_src, x_src_end + bhi .lessthan16_init + +.Lloop16: + ldr q_data, [x_src] + + ldr q_d1_0, [x_dest1] + ldr q_d2_0, [x_dest2] + ldr q_d3_0, [x_dest3] + + and v_data_lo.16b, v_data.16b, v_mask0f.16b + ushr v_data_hi.16b, v_data.16b, #4 + + tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_lo.16b + tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_hi.16b + eor v_d1_0.16b, v_tmp_lo.16b, v_d1_0.16b + eor v_d1_0.16b, v_d1_0.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_lo.16b + tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_hi.16b + eor v_d2_0.16b, v_tmp_lo.16b, v_d2_0.16b + eor v_d2_0.16b, v_d2_0.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft3_lo.16b}, v_data_lo.16b + tbl v_tmp_hi.16b, {v_gft3_hi.16b}, v_data_hi.16b + eor v_d3_0.16b, v_tmp_lo.16b, v_d3_0.16b + eor v_d3_0.16b, v_d3_0.16b, v_tmp_hi.16b + + str q_d1_0, [x_dest1] + str q_d2_0, [x_dest2] + str q_d3_0, [x_dest3] + + add x_src, x_src, #16 + add x_dest1, x_dest1, #16 + add x_dest2, x_dest2, #16 + add x_dest3, x_dest3, #16 + cmp x_src, x_src_end + bls .Lloop16 + +.lessthan16_init: + sub x_tmp, x_src, x_src_end + cmp x_tmp, #16 + beq .return_pass + +.lessthan16: + mov x_src, x_src_end + sub x_dest1, x_dest1, x_tmp + sub x_dest2, x_dest2, x_tmp + sub x_dest3, x_dest3, x_tmp + + ldr x_const, =const_tbl + sub x_const, x_const, x_tmp + ldr q_tmp, [x_const, #16] + + ldr q_data, [x_src] + ldr q_d1_0, [x_dest1] + ldr q_d2_0, [x_dest2] + ldr q_d3_0, [x_dest3] + + and v_data_lo.16b, v_data.16b, v_mask0f.16b + ushr v_data_hi.16b, v_data.16b, #4 + + tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_lo.16b + tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_hi.16b + eor v_tmp_hi.16b, v_tmp_lo.16b, v_tmp_hi.16b + and v_tmp_hi.16b, v_tmp_hi.16b, v_tmp.16b + eor v_d1_0.16b, v_d1_0.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_lo.16b + tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_hi.16b + eor v_tmp_hi.16b, v_tmp_lo.16b, v_tmp_hi.16b + and v_tmp_hi.16b, v_tmp_hi.16b, v_tmp.16b + eor v_d2_0.16b, v_d2_0.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft3_lo.16b}, v_data_lo.16b + tbl v_tmp_hi.16b, {v_gft3_hi.16b}, v_data_hi.16b + eor v_tmp_hi.16b, v_tmp_lo.16b, v_tmp_hi.16b + and v_tmp_hi.16b, v_tmp_hi.16b, v_tmp.16b + eor v_d3_0.16b, v_d3_0.16b, v_tmp_hi.16b + + str q_d1_0, [x_dest1] + str q_d2_0, [x_dest2] + str q_d3_0, [x_dest3] + +.return_pass: + mov w_ret, #0 + ret + +.return_fail: + mov w_ret, #1 + ret + +.section .data +.balign 8 +const_tbl: + .dword 0x0000000000000000, 0x0000000000000000 + .dword 0xffffffffffffffff, 0xffffffffffffffff diff --git a/src/isa-l/erasure_code/aarch64/gf_4vect_dot_prod_neon.S b/src/isa-l/erasure_code/aarch64/gf_4vect_dot_prod_neon.S new file mode 100644 index 000000000..2cfe5aab0 --- /dev/null +++ b/src/isa-l/erasure_code/aarch64/gf_4vect_dot_prod_neon.S @@ -0,0 +1,421 @@ +/************************************************************** + Copyright (c) 2019 Huawei Technologies Co., Ltd. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Huawei Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ +.text + +.global gf_4vect_dot_prod_neon +.type gf_4vect_dot_prod_neon, %function + + +/* arguments */ +x_len .req x0 +x_vec .req x1 +x_tbl .req x2 +x_src .req x3 +x_dest .req x4 + +/* returns */ +w_ret .req w0 + +/* local variables */ +x_vec_i .req x5 +x_ptr .req x6 +x_pos .req x7 +x_tmp .req x8 +x_dest1 .req x9 +x_tbl1 .req x10 +x_dest2 .req x11 +x_tbl2 .req x12 +x_dest3 .req x13 +x_tbl3 .req x14 +x_dest4 .req x_dest +x_tbl4 .req x15 + +/* vectors */ +v_mask0f .req v0 +q_mask0f .req q0 +v_tmp1_lo .req v1 +v_tmp1_hi .req v2 +v_tmp1 .req v3 +q_tmp1 .req q3 + +v_p1_0 .req v4 +v_p2_0 .req v5 +v_p3_0 .req v6 +v_p4_0 .req v7 + +q_p1_0 .req q4 +q_p2_0 .req q5 +q_p3_0 .req q6 +q_p4_0 .req q7 + +v_data_0 .req v8 +v_data_1 .req v9 +v_data_2 .req v10 +v_data_3 .req v11 +q_data_0 .req q8 +q_data_1 .req q9 +q_data_2 .req q10 +q_data_3 .req q11 + +v_p1_3 .req v12 +v_p2_3 .req v13 +v_p3_3 .req v14 +v_p4_3 .req v15 +q_p1_3 .req q12 +q_p2_3 .req q13 +q_p3_3 .req q14 +q_p4_3 .req q15 + +v_gft1_lo .req v16 +v_gft1_hi .req v17 +v_gft2_lo .req v18 +v_gft2_hi .req v19 +v_gft3_lo .req v20 +v_gft3_hi .req v21 +v_gft4_lo .req v22 +v_gft4_hi .req v23 +q_gft1_lo .req q16 +q_gft1_hi .req q17 +q_gft2_lo .req q18 +q_gft2_hi .req q19 +q_gft3_lo .req q20 +q_gft3_hi .req q21 +q_gft4_lo .req q22 +q_gft4_hi .req q23 + +v_p1_1 .req v24 +v_p1_2 .req v25 +v_p2_1 .req v26 +v_p2_2 .req v27 +v_p3_1 .req v28 +v_p3_2 .req v29 +v_p4_1 .req v30 +v_p4_2 .req v31 + +q_p1_1 .req q24 +q_p1_2 .req q25 +q_p2_1 .req q26 +q_p2_2 .req q27 +q_p3_1 .req q28 +q_p3_2 .req q29 +q_p4_1 .req q30 +q_p4_2 .req q31 + +v_data .req v_tmp1 +q_data .req q_tmp1 +v_data_lo .req v_tmp1_lo +v_data_hi .req v_tmp1_hi + +gf_4vect_dot_prod_neon: + /* less than 16 bytes, return_fail */ + cmp x_len, #16 + blt .return_fail + + movi v_mask0f.16b, #0x0f + mov x_pos, #0 + lsl x_vec, x_vec, #3 + ldr x_dest1, [x_dest, #8*0] + ldr x_dest2, [x_dest, #8*1] + ldr x_dest3, [x_dest, #8*2] + ldr x_dest4, [x_dest, #8*3] + +.Lloop64_init: + /* less than 64 bytes, goto Lloop16_init */ + cmp x_len, #64 + blt .Lloop16_init + + /* save d8 ~ d15 to stack */ + sub sp, sp, #64 + stp d8, d9, [sp] + stp d10, d11, [sp, #16] + stp d12, d13, [sp, #32] + stp d14, d15, [sp, #48] + + sub x_len, x_len, #64 + +.Lloop64: + movi v_p1_0.16b, #0 + movi v_p1_1.16b, #0 + movi v_p1_2.16b, #0 + movi v_p1_3.16b, #0 + movi v_p2_0.16b, #0 + movi v_p2_1.16b, #0 + movi v_p2_2.16b, #0 + movi v_p2_3.16b, #0 + movi v_p3_0.16b, #0 + movi v_p3_1.16b, #0 + movi v_p3_2.16b, #0 + movi v_p3_3.16b, #0 + movi v_p4_0.16b, #0 + movi v_p4_1.16b, #0 + movi v_p4_2.16b, #0 + movi v_p4_3.16b, #0 + + mov x_tbl1, x_tbl + add x_tbl2, x_tbl1, x_vec, lsl #2 + add x_tbl3, x_tbl2, x_vec, lsl #2 + add x_tbl4, x_tbl3, x_vec, lsl #2 + mov x_vec_i, #0 + prfm pldl1keep, [x_tbl1] + prfm pldl1keep, [x_tbl2] + prfm pldl1keep, [x_tbl3] + prfm pldl1keep, [x_tbl4] + +.Lloop64_vects: + ldr x_ptr, [x_src, x_vec_i] + add x_vec_i, x_vec_i, #8 + add x_ptr, x_ptr, x_pos + + ldr q_data_0, [x_ptr], #16 + ldr q_data_1, [x_ptr], #16 + ldp q_gft1_lo, q_gft1_hi, [x_tbl1], #32 + ldp q_gft2_lo, q_gft2_hi, [x_tbl2], #32 + ldp q_gft3_lo, q_gft3_hi, [x_tbl3], #32 + ldp q_gft4_lo, q_gft4_hi, [x_tbl4], #32 + ldr q_data_2, [x_ptr], #16 + ldr q_data_3, [x_ptr], #16 + + prfm pldl1strm, [x_ptr] + prfm pldl1keep, [x_tbl1] + prfm pldl1keep, [x_tbl2] + prfm pldl1keep, [x_tbl3] + prfm pldl1keep, [x_tbl4] + + /* data_0 */ + and v_tmp1.16b, v_data_0.16b, v_mask0f.16b + ushr v_data_0.16b, v_data_0.16b, #4 + + tbl v_tmp1_lo.16b, {v_gft1_lo.16b}, v_tmp1.16b + tbl v_tmp1_hi.16b, {v_gft1_hi.16b}, v_data_0.16b + eor v_p1_0.16b, v_tmp1_lo.16b, v_p1_0.16b + eor v_p1_0.16b, v_p1_0.16b, v_tmp1_hi.16b + + tbl v_tmp1_lo.16b, {v_gft2_lo.16b}, v_tmp1.16b + tbl v_tmp1_hi.16b, {v_gft2_hi.16b}, v_data_0.16b + eor v_p2_0.16b, v_tmp1_lo.16b, v_p2_0.16b + eor v_p2_0.16b, v_p2_0.16b, v_tmp1_hi.16b + + tbl v_tmp1_lo.16b, {v_gft3_lo.16b}, v_tmp1.16b + tbl v_tmp1_hi.16b, {v_gft3_hi.16b}, v_data_0.16b + eor v_p3_0.16b, v_tmp1_lo.16b, v_p3_0.16b + eor v_p3_0.16b, v_p3_0.16b, v_tmp1_hi.16b + + tbl v_tmp1_lo.16b, {v_gft4_lo.16b}, v_tmp1.16b + tbl v_tmp1_hi.16b, {v_gft4_hi.16b}, v_data_0.16b + eor v_p4_0.16b, v_tmp1_lo.16b, v_p4_0.16b + eor v_p4_0.16b, v_p4_0.16b, v_tmp1_hi.16b + + /* data_1 */ + and v_tmp1.16b, v_data_1.16b, v_mask0f.16b + ushr v_data_1.16b, v_data_1.16b, #4 + + tbl v_tmp1_lo.16b, {v_gft1_lo.16b}, v_tmp1.16b + tbl v_tmp1_hi.16b, {v_gft1_hi.16b}, v_data_1.16b + eor v_p1_1.16b, v_tmp1_lo.16b, v_p1_1.16b + eor v_p1_1.16b, v_p1_1.16b, v_tmp1_hi.16b + + tbl v_tmp1_lo.16b, {v_gft2_lo.16b}, v_tmp1.16b + tbl v_tmp1_hi.16b, {v_gft2_hi.16b}, v_data_1.16b + eor v_p2_1.16b, v_tmp1_lo.16b, v_p2_1.16b + eor v_p2_1.16b, v_p2_1.16b, v_tmp1_hi.16b + + tbl v_tmp1_lo.16b, {v_gft3_lo.16b}, v_tmp1.16b + tbl v_tmp1_hi.16b, {v_gft3_hi.16b}, v_data_1.16b + eor v_p3_1.16b, v_tmp1_lo.16b, v_p3_1.16b + eor v_p3_1.16b, v_p3_1.16b, v_tmp1_hi.16b + + tbl v_tmp1_lo.16b, {v_gft4_lo.16b}, v_tmp1.16b + tbl v_tmp1_hi.16b, {v_gft4_hi.16b}, v_data_1.16b + eor v_p4_1.16b, v_tmp1_lo.16b, v_p4_1.16b + eor v_p4_1.16b, v_p4_1.16b, v_tmp1_hi.16b + + /* data_2 */ + and v_tmp1.16b, v_data_2.16b, v_mask0f.16b + ushr v_data_2.16b, v_data_2.16b, #4 + + tbl v_tmp1_lo.16b, {v_gft1_lo.16b}, v_tmp1.16b + tbl v_tmp1_hi.16b, {v_gft1_hi.16b}, v_data_2.16b + eor v_p1_2.16b, v_tmp1_lo.16b, v_p1_2.16b + eor v_p1_2.16b, v_p1_2.16b, v_tmp1_hi.16b + + tbl v_tmp1_lo.16b, {v_gft2_lo.16b}, v_tmp1.16b + tbl v_tmp1_hi.16b, {v_gft2_hi.16b}, v_data_2.16b + eor v_p2_2.16b, v_tmp1_lo.16b, v_p2_2.16b + eor v_p2_2.16b, v_p2_2.16b, v_tmp1_hi.16b + + tbl v_tmp1_lo.16b, {v_gft3_lo.16b}, v_tmp1.16b + tbl v_tmp1_hi.16b, {v_gft3_hi.16b}, v_data_2.16b + eor v_p3_2.16b, v_tmp1_lo.16b, v_p3_2.16b + eor v_p3_2.16b, v_p3_2.16b, v_tmp1_hi.16b + + tbl v_tmp1_lo.16b, {v_gft4_lo.16b}, v_tmp1.16b + tbl v_tmp1_hi.16b, {v_gft4_hi.16b}, v_data_2.16b + eor v_p4_2.16b, v_tmp1_lo.16b, v_p4_2.16b + eor v_p4_2.16b, v_p4_2.16b, v_tmp1_hi.16b + + /* data_3 */ + and v_tmp1.16b, v_data_3.16b, v_mask0f.16b + ushr v_data_3.16b, v_data_3.16b, #4 + + tbl v_tmp1_lo.16b, {v_gft1_lo.16b}, v_tmp1.16b + tbl v_tmp1_hi.16b, {v_gft1_hi.16b}, v_data_3.16b + eor v_p1_3.16b, v_tmp1_lo.16b, v_p1_3.16b + eor v_p1_3.16b, v_p1_3.16b, v_tmp1_hi.16b + + tbl v_tmp1_lo.16b, {v_gft2_lo.16b}, v_tmp1.16b + tbl v_tmp1_hi.16b, {v_gft2_hi.16b}, v_data_3.16b + eor v_p2_3.16b, v_tmp1_lo.16b, v_p2_3.16b + eor v_p2_3.16b, v_p2_3.16b, v_tmp1_hi.16b + + tbl v_tmp1_lo.16b, {v_gft3_lo.16b}, v_tmp1.16b + tbl v_tmp1_hi.16b, {v_gft3_hi.16b}, v_data_3.16b + eor v_p3_3.16b, v_tmp1_lo.16b, v_p3_3.16b + eor v_p3_3.16b, v_p3_3.16b, v_tmp1_hi.16b + + tbl v_tmp1_lo.16b, {v_gft4_lo.16b}, v_tmp1.16b + tbl v_tmp1_hi.16b, {v_gft4_hi.16b}, v_data_3.16b + eor v_p4_3.16b, v_tmp1_lo.16b, v_p4_3.16b + eor v_p4_3.16b, v_p4_3.16b, v_tmp1_hi.16b + + cmp x_vec_i, x_vec + blt .Lloop64_vects + +.Lloop64_vects_end: + add x_ptr, x_dest1, x_pos + stp q_p1_0, q_p1_1, [x_ptr], #32 + stp q_p1_2, q_p1_3, [x_ptr] + + add x_ptr, x_dest2, x_pos + stp q_p2_0, q_p2_1, [x_ptr], #32 + stp q_p2_2, q_p2_3, [x_ptr] + + add x_ptr, x_dest3, x_pos + stp q_p3_0, q_p3_1, [x_ptr], #32 + stp q_p3_2, q_p3_3, [x_ptr] + + add x_ptr, x_dest4, x_pos + stp q_p4_0, q_p4_1, [x_ptr], #32 + stp q_p4_2, q_p4_3, [x_ptr] + + add x_pos, x_pos, #64 + cmp x_pos, x_len + ble .Lloop64 + +.Lloop64_end: + /* restore d8 ~ d15 */ + ldp d8, d9, [sp] + ldp d10, d11, [sp, #16] + ldp d12, d13, [sp, #32] + ldp d14, d15, [sp, #48] + add sp, sp, #64 + + add x_len, x_len, #64 + cmp x_pos, x_len + beq .return_pass + +.Lloop16_init: + sub x_len, x_len, #16 + cmp x_pos, x_len + bgt .lessthan16_init + +.Lloop16: + movi v_p1_0.16b, #0 + movi v_p2_0.16b, #0 + movi v_p3_0.16b, #0 + movi v_p4_0.16b, #0 + mov x_tbl1, x_tbl + add x_tbl2, x_tbl1, x_vec, lsl #2 + add x_tbl3, x_tbl2, x_vec, lsl #2 + add x_tbl4, x_tbl3, x_vec, lsl #2 + mov x_vec_i, #0 + +.Lloop16_vects: + ldr x_ptr, [x_src, x_vec_i] + add x_vec_i, x_vec_i, #8 + ldr q_data, [x_ptr, x_pos] + + ldp q_gft1_lo, q_gft1_hi, [x_tbl1], #32 + ldp q_gft2_lo, q_gft2_hi, [x_tbl2], #32 + ldp q_gft3_lo, q_gft3_hi, [x_tbl3], #32 + ldp q_gft4_lo, q_gft4_hi, [x_tbl4], #32 + + prfm pldl1keep, [x_tbl1] + prfm pldl1keep, [x_tbl2] + prfm pldl1keep, [x_tbl3] + prfm pldl1keep, [x_tbl4] + + and v_data_lo.16b, v_data.16b, v_mask0f.16b + ushr v_data_hi.16b, v_data.16b, #4 + + tbl v_gft1_lo.16b, {v_gft1_lo.16b}, v_data_lo.16b + tbl v_gft1_hi.16b, {v_gft1_hi.16b}, v_data_hi.16b + tbl v_gft2_lo.16b, {v_gft2_lo.16b}, v_data_lo.16b + tbl v_gft2_hi.16b, {v_gft2_hi.16b}, v_data_hi.16b + tbl v_gft3_lo.16b, {v_gft3_lo.16b}, v_data_lo.16b + tbl v_gft3_hi.16b, {v_gft3_hi.16b}, v_data_hi.16b + tbl v_gft4_lo.16b, {v_gft4_lo.16b}, v_data_lo.16b + tbl v_gft4_hi.16b, {v_gft4_hi.16b}, v_data_hi.16b + + eor v_p1_0.16b, v_gft1_hi.16b, v_p1_0.16b + eor v_p1_0.16b, v_p1_0.16b, v_gft1_lo.16b + eor v_p2_0.16b, v_gft2_hi.16b, v_p2_0.16b + eor v_p2_0.16b, v_p2_0.16b, v_gft2_lo.16b + eor v_p3_0.16b, v_gft3_hi.16b, v_p3_0.16b + eor v_p3_0.16b, v_p3_0.16b, v_gft3_lo.16b + eor v_p4_0.16b, v_gft4_hi.16b, v_p4_0.16b + eor v_p4_0.16b, v_p4_0.16b, v_gft4_lo.16b + + cmp x_vec_i, x_vec + bne .Lloop16_vects + +.Lloop16_vects_end: + str q_p1_0, [x_dest1, x_pos] + str q_p2_0, [x_dest2, x_pos] + str q_p3_0, [x_dest3, x_pos] + str q_p4_0, [x_dest4, x_pos] + add x_pos, x_pos, #16 + cmp x_pos, x_len + ble .Lloop16 + +.Lloop16_end: + sub x_tmp, x_pos, x_len + cmp x_tmp, #16 + beq .return_pass + +.lessthan16_init: + mov x_pos, x_len + b .Lloop16 + +.return_pass: + mov w_ret, #0 + ret + +.return_fail: + mov w_ret, #1 + ret diff --git a/src/isa-l/erasure_code/aarch64/gf_4vect_mad_neon.S b/src/isa-l/erasure_code/aarch64/gf_4vect_mad_neon.S new file mode 100644 index 000000000..d56cece4c --- /dev/null +++ b/src/isa-l/erasure_code/aarch64/gf_4vect_mad_neon.S @@ -0,0 +1,455 @@ +/************************************************************** + Copyright (c) 2019 Huawei Technologies Co., Ltd. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Huawei Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +.text + +.global gf_4vect_mad_neon +.type gf_4vect_mad_neon, %function + + +/* arguments */ +x_len .req x0 +x_vec .req x1 +x_vec_i .req x2 +x_tbl .req x3 +x_src .req x4 +x_dest .req x5 + +/* returns */ +w_ret .req w0 + +/* local variables */ +x_src_end .req x6 +x_dest1 .req x7 +x_dest2 .req x8 +x_dest3 .req x9 +x_dest4 .req x_dest +x_tmp .req x10 +x_tbl1 .req x11 +x_tbl2 .req x12 +x_tbl3 .req x13 +x_tbl4 .req x14 +x_const .req x15 + +/* vectors */ +v_mask0f .req v0 +v_tmp_lo .req v1 +v_tmp_hi .req v2 +v_tmp .req v3 +q_tmp .req q3 + +v_gft1_lo .req v4 +v_gft1_hi .req v5 +v_gft2_lo .req v6 +v_gft2_hi .req v7 +v_gft3_lo .req v16 +v_gft3_hi .req v17 +v_gft4_lo .req v18 +v_gft4_hi .req v19 +q_gft1_lo .req q4 +q_gft1_hi .req q5 +q_gft2_lo .req q6 +q_gft2_hi .req q7 +q_gft3_lo .req q16 +q_gft3_hi .req q17 +q_gft4_lo .req q18 +q_gft4_hi .req q19 + +v_data_0 .req v8 +v_data_1 .req v9 +v_data_2 .req v10 +v_data_3 .req v11 +q_data_0 .req q8 +q_data_1 .req q9 +q_data_2 .req q10 +q_data_3 .req q11 + +v_data_0_lo .req v12 +v_data_1_lo .req v13 +v_data_2_lo .req v14 +v_data_3_lo .req v15 +v_data_0_hi .req v_data_0 +v_data_1_hi .req v_data_1 +v_data_2_hi .req v_data_2 +v_data_3_hi .req v_data_3 + +v_d1_0 .req v20 +v_d1_1 .req v21 +v_d1_2 .req v22 +v_d1_3 .req v23 +v_d2_0 .req v24 +v_d2_1 .req v25 +v_d2_2 .req v26 +v_d2_3 .req v27 +v_d3_0 .req v28 +v_d3_1 .req v29 +v_d3_2 .req v30 +v_d3_3 .req v31 +q_d1_0 .req q20 +q_d1_1 .req q21 +q_d1_2 .req q22 +q_d1_3 .req q23 +q_d2_0 .req q24 +q_d2_1 .req q25 +q_d2_2 .req q26 +q_d2_3 .req q27 +q_d3_0 .req q28 +q_d3_1 .req q29 +q_d3_2 .req q30 +q_d3_3 .req q31 + +v_d4_0 .req v_d1_0 +v_d4_1 .req v_d1_1 +v_d4_2 .req v_d1_2 +v_d4_3 .req v_d1_3 +q_d4_0 .req q_d1_0 +q_d4_1 .req q_d1_1 +q_d4_2 .req q_d1_2 +q_d4_3 .req q_d1_3 + +v_data .req v21 +q_data .req q21 +v_data_lo .req v22 +v_data_hi .req v23 + +gf_4vect_mad_neon: + /* less than 16 bytes, return_fail */ + cmp x_len, #16 + blt .return_fail + + movi v_mask0f.16b, #0x0f + lsl x_vec_i, x_vec_i, #5 + lsl x_vec, x_vec, #5 + add x_tbl1, x_tbl, x_vec_i + add x_tbl2, x_tbl1, x_vec + add x_tbl3, x_tbl2, x_vec + add x_tbl4, x_tbl3, x_vec + add x_src_end, x_src, x_len + ldr x_dest1, [x_dest, #8*0] + ldr x_dest2, [x_dest, #8*1] + ldr x_dest3, [x_dest, #8*2] + ldr x_dest4, [x_dest, #8*3] + ldr q_gft1_lo, [x_tbl1] + ldr q_gft1_hi, [x_tbl1, #16] + ldr q_gft2_lo, [x_tbl2] + ldr q_gft2_hi, [x_tbl2, #16] + ldr q_gft3_lo, [x_tbl3] + ldr q_gft3_hi, [x_tbl3, #16] + ldr q_gft4_lo, [x_tbl4] + ldr q_gft4_hi, [x_tbl4, #16] + +.Lloop64_init: + /* less than 64 bytes, goto Lloop16_init */ + cmp x_len, #64 + blt .Lloop16_init + + /* save d8 ~ d15 to stack */ + sub sp, sp, #64 + stp d8, d9, [sp] + stp d10, d11, [sp, #16] + stp d12, d13, [sp, #32] + stp d14, d15, [sp, #48] + + sub x_src_end, x_src_end, #64 + +.Lloop64: + ldr q_data_0, [x_src, #16*0] + ldr q_data_1, [x_src, #16*1] + ldr q_data_2, [x_src, #16*2] + ldr q_data_3, [x_src, #16*3] + add x_src, x_src, #64 + + ldr q_d1_0, [x_dest1, #16*0] + ldr q_d1_1, [x_dest1, #16*1] + ldr q_d1_2, [x_dest1, #16*2] + ldr q_d1_3, [x_dest1, #16*3] + + ldr q_d2_0, [x_dest2, #16*0] + ldr q_d2_1, [x_dest2, #16*1] + ldr q_d2_2, [x_dest2, #16*2] + ldr q_d2_3, [x_dest2, #16*3] + + and v_data_0_lo.16b, v_data_0.16b, v_mask0f.16b + and v_data_1_lo.16b, v_data_1.16b, v_mask0f.16b + and v_data_2_lo.16b, v_data_2.16b, v_mask0f.16b + and v_data_3_lo.16b, v_data_3.16b, v_mask0f.16b + + ushr v_data_0_hi.16b, v_data_0.16b, #4 + ushr v_data_1_hi.16b, v_data_1.16b, #4 + ushr v_data_2_hi.16b, v_data_2.16b, #4 + ushr v_data_3_hi.16b, v_data_3.16b, #4 + + /* dest1 */ + tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_0_lo.16b + tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_0_hi.16b + eor v_d1_0.16b, v_tmp_lo.16b, v_d1_0.16b + eor v_d1_0.16b, v_d1_0.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_1_lo.16b + tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_1_hi.16b + eor v_d1_1.16b, v_tmp_lo.16b, v_d1_1.16b + eor v_d1_1.16b, v_d1_1.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_2_lo.16b + tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_2_hi.16b + eor v_d1_2.16b, v_tmp_lo.16b, v_d1_2.16b + eor v_d1_2.16b, v_d1_2.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_3_lo.16b + tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_3_hi.16b + eor v_d1_3.16b, v_tmp_lo.16b, v_d1_3.16b + eor v_d1_3.16b, v_d1_3.16b, v_tmp_hi.16b + + /* dest2 */ + tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_0_lo.16b + tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_0_hi.16b + eor v_d2_0.16b, v_tmp_lo.16b, v_d2_0.16b + eor v_d2_0.16b, v_d2_0.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_1_lo.16b + tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_1_hi.16b + eor v_d2_1.16b, v_tmp_lo.16b, v_d2_1.16b + eor v_d2_1.16b, v_d2_1.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_2_lo.16b + tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_2_hi.16b + eor v_d2_2.16b, v_tmp_lo.16b, v_d2_2.16b + eor v_d2_2.16b, v_d2_2.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_3_lo.16b + tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_3_hi.16b + eor v_d2_3.16b, v_tmp_lo.16b, v_d2_3.16b + eor v_d2_3.16b, v_d2_3.16b, v_tmp_hi.16b + + str q_d1_0, [x_dest1, #16*0] + str q_d1_1, [x_dest1, #16*1] + str q_d1_2, [x_dest1, #16*2] + str q_d1_3, [x_dest1, #16*3] + add x_dest1, x_dest1, #64 + + str q_d2_0, [x_dest2, #16*0] + str q_d2_1, [x_dest2, #16*1] + str q_d2_2, [x_dest2, #16*2] + str q_d2_3, [x_dest2, #16*3] + add x_dest2, x_dest2, #64 + + ldr q_d3_0, [x_dest3, #16*0] + ldr q_d3_1, [x_dest3, #16*1] + ldr q_d3_2, [x_dest3, #16*2] + ldr q_d3_3, [x_dest3, #16*3] + + ldr q_d4_0, [x_dest4, #16*0] + ldr q_d4_1, [x_dest4, #16*1] + ldr q_d4_2, [x_dest4, #16*2] + ldr q_d4_3, [x_dest4, #16*3] + + /* dest3 */ + tbl v_tmp_lo.16b, {v_gft3_lo.16b}, v_data_0_lo.16b + tbl v_tmp_hi.16b, {v_gft3_hi.16b}, v_data_0_hi.16b + eor v_d3_0.16b, v_tmp_lo.16b, v_d3_0.16b + eor v_d3_0.16b, v_d3_0.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft3_lo.16b}, v_data_1_lo.16b + tbl v_tmp_hi.16b, {v_gft3_hi.16b}, v_data_1_hi.16b + eor v_d3_1.16b, v_tmp_lo.16b, v_d3_1.16b + eor v_d3_1.16b, v_d3_1.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft3_lo.16b}, v_data_2_lo.16b + tbl v_tmp_hi.16b, {v_gft3_hi.16b}, v_data_2_hi.16b + eor v_d3_2.16b, v_tmp_lo.16b, v_d3_2.16b + eor v_d3_2.16b, v_d3_2.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft3_lo.16b}, v_data_3_lo.16b + tbl v_tmp_hi.16b, {v_gft3_hi.16b}, v_data_3_hi.16b + eor v_d3_3.16b, v_tmp_lo.16b, v_d3_3.16b + eor v_d3_3.16b, v_d3_3.16b, v_tmp_hi.16b + + /* dest4 */ + tbl v_tmp_lo.16b, {v_gft4_lo.16b}, v_data_0_lo.16b + tbl v_tmp_hi.16b, {v_gft4_hi.16b}, v_data_0_hi.16b + eor v_d4_0.16b, v_tmp_lo.16b, v_d4_0.16b + eor v_d4_0.16b, v_d4_0.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft4_lo.16b}, v_data_1_lo.16b + tbl v_tmp_hi.16b, {v_gft4_hi.16b}, v_data_1_hi.16b + eor v_d4_1.16b, v_tmp_lo.16b, v_d4_1.16b + eor v_d4_1.16b, v_d4_1.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft4_lo.16b}, v_data_2_lo.16b + tbl v_tmp_hi.16b, {v_gft4_hi.16b}, v_data_2_hi.16b + eor v_d4_2.16b, v_tmp_lo.16b, v_d4_2.16b + eor v_d4_2.16b, v_d4_2.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft4_lo.16b}, v_data_3_lo.16b + tbl v_tmp_hi.16b, {v_gft4_hi.16b}, v_data_3_hi.16b + eor v_d4_3.16b, v_tmp_lo.16b, v_d4_3.16b + eor v_d4_3.16b, v_d4_3.16b, v_tmp_hi.16b + + str q_d3_0, [x_dest3, #16*0] + str q_d3_1, [x_dest3, #16*1] + str q_d3_2, [x_dest3, #16*2] + str q_d3_3, [x_dest3, #16*3] + add x_dest3, x_dest3, #64 + + str q_d4_0, [x_dest4, #16*0] + str q_d4_1, [x_dest4, #16*1] + str q_d4_2, [x_dest4, #16*2] + str q_d4_3, [x_dest4, #16*3] + add x_dest4, x_dest4, #64 + + cmp x_src, x_src_end + bls .Lloop64 + +.Lloop64_end: + /* restore d8 ~ d15 */ + ldp d8, d9, [sp] + ldp d10, d11, [sp, #16] + ldp d12, d13, [sp, #32] + ldp d14, d15, [sp, #48] + add sp, sp, #64 + add x_src_end, x_src_end, #64 + +.Lloop16_init: + sub x_src_end, x_src_end, #16 + cmp x_src, x_src_end + bhi .lessthan16_init + +.Lloop16: + ldr q_data, [x_src] + + ldr q_d1_0, [x_dest1] + ldr q_d2_0, [x_dest2] + + and v_data_lo.16b, v_data.16b, v_mask0f.16b + ushr v_data_hi.16b, v_data.16b, #4 + + tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_lo.16b + tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_hi.16b + eor v_d1_0.16b, v_tmp_lo.16b, v_d1_0.16b + eor v_d1_0.16b, v_d1_0.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_lo.16b + tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_hi.16b + eor v_d2_0.16b, v_tmp_lo.16b, v_d2_0.16b + eor v_d2_0.16b, v_d2_0.16b, v_tmp_hi.16b + + str q_d1_0, [x_dest1] + str q_d2_0, [x_dest2] + ldr q_d3_0, [x_dest3] + ldr q_d4_0, [x_dest4] + + tbl v_tmp_lo.16b, {v_gft3_lo.16b}, v_data_lo.16b + tbl v_tmp_hi.16b, {v_gft3_hi.16b}, v_data_hi.16b + eor v_d3_0.16b, v_tmp_lo.16b, v_d3_0.16b + eor v_d3_0.16b, v_d3_0.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft4_lo.16b}, v_data_lo.16b + tbl v_tmp_hi.16b, {v_gft4_hi.16b}, v_data_hi.16b + eor v_d4_0.16b, v_tmp_lo.16b, v_d4_0.16b + eor v_d4_0.16b, v_d4_0.16b, v_tmp_hi.16b + + str q_d3_0, [x_dest3] + str q_d4_0, [x_dest4] + + add x_src, x_src, #16 + add x_dest1, x_dest1, #16 + add x_dest2, x_dest2, #16 + add x_dest3, x_dest3, #16 + add x_dest4, x_dest4, #16 + cmp x_src, x_src_end + bls .Lloop16 + +.lessthan16_init: + sub x_tmp, x_src, x_src_end + cmp x_tmp, #16 + beq .return_pass + +.lessthan16: + mov x_src, x_src_end + sub x_dest1, x_dest1, x_tmp + sub x_dest2, x_dest2, x_tmp + sub x_dest3, x_dest3, x_tmp + sub x_dest4, x_dest4, x_tmp + + ldr x_const, =const_tbl + sub x_const, x_const, x_tmp + ldr q_tmp, [x_const, #16] + + ldr q_data, [x_src] + ldr q_d1_0, [x_dest1] + ldr q_d2_0, [x_dest2] + + and v_data_lo.16b, v_data.16b, v_mask0f.16b + ushr v_data_hi.16b, v_data.16b, #4 + + tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_lo.16b + tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_hi.16b + eor v_tmp_hi.16b, v_tmp_lo.16b, v_tmp_hi.16b + and v_tmp_hi.16b, v_tmp_hi.16b, v_tmp.16b + eor v_d1_0.16b, v_d1_0.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_lo.16b + tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_hi.16b + eor v_tmp_hi.16b, v_tmp_lo.16b, v_tmp_hi.16b + and v_tmp_hi.16b, v_tmp_hi.16b, v_tmp.16b + eor v_d2_0.16b, v_d2_0.16b, v_tmp_hi.16b + + str q_d1_0, [x_dest1] + str q_d2_0, [x_dest2] + ldr q_d3_0, [x_dest3] + ldr q_d4_0, [x_dest4] + + tbl v_tmp_lo.16b, {v_gft3_lo.16b}, v_data_lo.16b + tbl v_tmp_hi.16b, {v_gft3_hi.16b}, v_data_hi.16b + eor v_tmp_hi.16b, v_tmp_lo.16b, v_tmp_hi.16b + and v_tmp_hi.16b, v_tmp_hi.16b, v_tmp.16b + eor v_d3_0.16b, v_d3_0.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft4_lo.16b}, v_data_lo.16b + tbl v_tmp_hi.16b, {v_gft4_hi.16b}, v_data_hi.16b + eor v_tmp_hi.16b, v_tmp_lo.16b, v_tmp_hi.16b + and v_tmp_hi.16b, v_tmp_hi.16b, v_tmp.16b + eor v_d4_0.16b, v_d4_0.16b, v_tmp_hi.16b + + str q_d3_0, [x_dest3] + str q_d4_0, [x_dest4] + +.return_pass: + mov w_ret, #0 + ret + +.return_fail: + mov w_ret, #1 + ret + +.section .data +.balign 8 +const_tbl: + .dword 0x0000000000000000, 0x0000000000000000 + .dword 0xffffffffffffffff, 0xffffffffffffffff diff --git a/src/isa-l/erasure_code/aarch64/gf_5vect_dot_prod_neon.S b/src/isa-l/erasure_code/aarch64/gf_5vect_dot_prod_neon.S new file mode 100644 index 000000000..2e4dea4ad --- /dev/null +++ b/src/isa-l/erasure_code/aarch64/gf_5vect_dot_prod_neon.S @@ -0,0 +1,481 @@ +/************************************************************** + Copyright (c) 2019 Huawei Technologies Co., Ltd. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Huawei Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +.text + +.global gf_5vect_dot_prod_neon +.type gf_5vect_dot_prod_neon, %function + + +/* arguments */ +x_len .req x0 +x_vec .req x1 +x_tbl .req x2 +x_src .req x3 +x_dest .req x4 + +/* returns */ +w_ret .req w0 + +/* local variables */ +x_vec_i .req x5 +x_ptr .req x6 +x_pos .req x7 +x_tmp .req x8 +x_dest1 .req x9 +x_dest2 .req x10 +x_dest3 .req x11 +x_dest4 .req x12 +x_dest5 .req x13 + +/* vectors */ +v_tmp1 .req v0 +q_tmp1 .req q0 +v_tmp2 .req v1 +q_tmp2 .req q1 + +v_mask0f .req v_tmp1 +q_mask0f .req q_tmp1 +v_tmp_lo .req v_tmp1 +v_tmp_hi .req v_tmp2 + +v_gft_lo .req v2 +v_gft_hi .req v3 +q_gft_lo .req q2 +q_gft_hi .req q3 + +v_p1_0 .req v4 +v_p2_0 .req v5 +v_p3_0 .req v6 +v_p4_0 .req v7 + +q_p1_0 .req q4 +q_p2_0 .req q5 +q_p3_0 .req q6 +q_p4_0 .req q7 + +v_data_0 .req v8 +v_data_1 .req v9 +v_data_2 .req v10 +v_data_3 .req v11 +q_data_0 .req q8 +q_data_1 .req q9 +q_data_2 .req q10 +q_data_3 .req q11 + +v_data_0_lo .req v12 +v_data_1_lo .req v13 +v_data_2_lo .req v14 +v_data_3_lo .req v15 +v_data_0_hi .req v_data_0 +v_data_1_hi .req v_data_1 +v_data_2_hi .req v_data_2 +v_data_3_hi .req v_data_3 + +v_p5_0 .req v16 +v_p1_1 .req v17 +v_p2_1 .req v18 +v_p3_1 .req v19 +v_p4_1 .req v20 +v_p5_1 .req v21 +v_p1_2 .req v22 +v_p2_2 .req v23 +v_p3_2 .req v24 +v_p4_2 .req v25 +v_p5_2 .req v26 +v_p1_3 .req v27 +v_p2_3 .req v28 +v_p3_3 .req v29 +v_p4_3 .req v30 +v_p5_3 .req v31 + +q_p5_0 .req q16 +q_p1_1 .req q17 +q_p2_1 .req q18 +q_p3_1 .req q19 +q_p4_1 .req q20 +q_p5_1 .req q21 +q_p1_2 .req q22 +q_p2_2 .req q23 +q_p3_2 .req q24 +q_p4_2 .req q25 +q_p5_2 .req q26 +q_p1_3 .req q27 +q_p2_3 .req q28 +q_p3_3 .req q29 +q_p4_3 .req q30 +q_p5_3 .req q31 + +v_data .req v_p1_1 +q_data .req q_p1_1 +v_data_lo .req v_p2_1 +v_data_hi .req v_p3_1 + +v_gft1_lo .req v_p4_1 +v_gft1_hi .req v_p5_1 +v_gft2_lo .req v_p1_2 +v_gft2_hi .req v_p2_2 +v_gft3_lo .req v_p3_2 +v_gft3_hi .req v_p4_2 +v_gft4_lo .req v_p5_2 +v_gft4_hi .req v_p1_3 +v_gft5_lo .req v_p2_3 +v_gft5_hi .req v_p3_3 +q_gft1_lo .req q_p4_1 +q_gft1_hi .req q_p5_1 +q_gft2_lo .req q_p1_2 +q_gft2_hi .req q_p2_2 +q_gft3_lo .req q_p3_2 +q_gft3_hi .req q_p4_2 +q_gft4_lo .req q_p5_2 +q_gft4_hi .req q_p1_3 +q_gft5_lo .req q_p2_3 +q_gft5_hi .req q_p3_3 + + +gf_5vect_dot_prod_neon: + /* less than 16 bytes, return_fail */ + cmp x_len, #16 + blt .return_fail + + mov x_pos, #0 + lsl x_vec, x_vec, #3 + ldr x_dest1, [x_dest, #8*0] + ldr x_dest2, [x_dest, #8*1] + ldr x_dest3, [x_dest, #8*2] + ldr x_dest4, [x_dest, #8*3] + ldr x_dest5, [x_dest, #8*4] + +.Lloop64_init: + /* less than 64 bytes, goto Lloop16_init */ + cmp x_len, #64 + blt .Lloop16_init + + /* save d8 ~ d15 to stack */ + sub sp, sp, #64 + stp d8, d9, [sp] + stp d10, d11, [sp, #16] + stp d12, d13, [sp, #32] + stp d14, d15, [sp, #48] + + sub x_len, x_len, #64 + +.Lloop64: + movi v_p1_0.16b, #0 + movi v_p1_1.16b, #0 + movi v_p1_2.16b, #0 + movi v_p1_3.16b, #0 + movi v_p2_0.16b, #0 + movi v_p2_1.16b, #0 + movi v_p2_2.16b, #0 + movi v_p2_3.16b, #0 + movi v_p3_0.16b, #0 + movi v_p3_1.16b, #0 + movi v_p3_2.16b, #0 + movi v_p3_3.16b, #0 + movi v_p4_0.16b, #0 + movi v_p4_1.16b, #0 + movi v_p4_2.16b, #0 + movi v_p4_3.16b, #0 + movi v_p5_0.16b, #0 + movi v_p5_1.16b, #0 + movi v_p5_2.16b, #0 + movi v_p5_3.16b, #0 + mov x_vec_i, #0 + +.Lloop64_vects: + ldr x_ptr, [x_src, x_vec_i] + add x_ptr, x_ptr, x_pos + + ldr q_data_0, [x_ptr], #16 + ldr q_data_1, [x_ptr], #16 + ldr q_data_2, [x_ptr], #16 + ldr q_data_3, [x_ptr], #16 + prfm pldl2keep, [x_ptr] + + movi v_mask0f.16b, #0x0f + and v_data_0_lo.16b, v_data_0.16b, v_mask0f.16b + and v_data_1_lo.16b, v_data_1.16b, v_mask0f.16b + and v_data_2_lo.16b, v_data_2.16b, v_mask0f.16b + and v_data_3_lo.16b, v_data_3.16b, v_mask0f.16b + ushr v_data_0_hi.16b, v_data_0.16b, #4 + ushr v_data_1_hi.16b, v_data_1.16b, #4 + ushr v_data_2_hi.16b, v_data_2.16b, #4 + ushr v_data_3_hi.16b, v_data_3.16b, #4 + + /* v_p1_x */ + add x_tmp, x_tbl, x_vec_i, lsl #2 + add x_vec_i, x_vec_i, #8 + ldp q_gft_lo, q_gft_hi, [x_tmp] + prfm pldl3keep, [x_tmp, #32] + add x_tmp, x_tmp, x_vec, lsl #2 + + tbl v_tmp_lo.16b, {v_gft_lo.16b}, v_data_0_lo.16b + tbl v_tmp_hi.16b, {v_gft_hi.16b}, v_data_0_hi.16b + eor v_p1_0.16b, v_tmp_lo.16b, v_p1_0.16b + eor v_p1_0.16b, v_p1_0.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft_lo.16b}, v_data_1_lo.16b + tbl v_tmp_hi.16b, {v_gft_hi.16b}, v_data_1_hi.16b + eor v_p1_1.16b, v_tmp_lo.16b, v_p1_1.16b + eor v_p1_1.16b, v_p1_1.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft_lo.16b}, v_data_2_lo.16b + tbl v_tmp_hi.16b, {v_gft_hi.16b}, v_data_2_hi.16b + eor v_p1_2.16b, v_tmp_lo.16b, v_p1_2.16b + eor v_p1_2.16b, v_p1_2.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft_lo.16b}, v_data_3_lo.16b + tbl v_tmp_hi.16b, {v_gft_hi.16b}, v_data_3_hi.16b + eor v_p1_3.16b, v_tmp_lo.16b, v_p1_3.16b + eor v_p1_3.16b, v_p1_3.16b, v_tmp_hi.16b + + /* v_p2_x */ + ldp q_gft_lo, q_gft_hi, [x_tmp] + prfm pldl3keep, [x_tmp, #32] + add x_tmp, x_tmp, x_vec, lsl #2 + + tbl v_tmp_lo.16b, {v_gft_lo.16b}, v_data_0_lo.16b + tbl v_tmp_hi.16b, {v_gft_hi.16b}, v_data_0_hi.16b + eor v_p2_0.16b, v_tmp_lo.16b, v_p2_0.16b + eor v_p2_0.16b, v_p2_0.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft_lo.16b}, v_data_1_lo.16b + tbl v_tmp_hi.16b, {v_gft_hi.16b}, v_data_1_hi.16b + eor v_p2_1.16b, v_tmp_lo.16b, v_p2_1.16b + eor v_p2_1.16b, v_p2_1.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft_lo.16b}, v_data_2_lo.16b + tbl v_tmp_hi.16b, {v_gft_hi.16b}, v_data_2_hi.16b + eor v_p2_2.16b, v_tmp_lo.16b, v_p2_2.16b + eor v_p2_2.16b, v_p2_2.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft_lo.16b}, v_data_3_lo.16b + tbl v_tmp_hi.16b, {v_gft_hi.16b}, v_data_3_hi.16b + eor v_p2_3.16b, v_tmp_lo.16b, v_p2_3.16b + eor v_p2_3.16b, v_p2_3.16b, v_tmp_hi.16b + + /* v_p3_x */ + ldp q_gft_lo, q_gft_hi, [x_tmp] + prfm pldl3keep, [x_tmp, #32] + add x_tmp, x_tmp, x_vec, lsl #2 + + tbl v_tmp_lo.16b, {v_gft_lo.16b}, v_data_0_lo.16b + tbl v_tmp_hi.16b, {v_gft_hi.16b}, v_data_0_hi.16b + eor v_p3_0.16b, v_tmp_lo.16b, v_p3_0.16b + eor v_p3_0.16b, v_p3_0.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft_lo.16b}, v_data_1_lo.16b + tbl v_tmp_hi.16b, {v_gft_hi.16b}, v_data_1_hi.16b + eor v_p3_1.16b, v_tmp_lo.16b, v_p3_1.16b + eor v_p3_1.16b, v_p3_1.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft_lo.16b}, v_data_2_lo.16b + tbl v_tmp_hi.16b, {v_gft_hi.16b}, v_data_2_hi.16b + eor v_p3_2.16b, v_tmp_lo.16b, v_p3_2.16b + eor v_p3_2.16b, v_p3_2.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft_lo.16b}, v_data_3_lo.16b + tbl v_tmp_hi.16b, {v_gft_hi.16b}, v_data_3_hi.16b + eor v_p3_3.16b, v_tmp_lo.16b, v_p3_3.16b + eor v_p3_3.16b, v_p3_3.16b, v_tmp_hi.16b + + /* v_p4_x */ + ldp q_gft_lo, q_gft_hi, [x_tmp] + prfm pldl3keep, [x_tmp, #32] + add x_tmp, x_tmp, x_vec, lsl #2 + + tbl v_tmp_lo.16b, {v_gft_lo.16b}, v_data_0_lo.16b + tbl v_tmp_hi.16b, {v_gft_hi.16b}, v_data_0_hi.16b + eor v_p4_0.16b, v_tmp_lo.16b, v_p4_0.16b + eor v_p4_0.16b, v_p4_0.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft_lo.16b}, v_data_1_lo.16b + tbl v_tmp_hi.16b, {v_gft_hi.16b}, v_data_1_hi.16b + eor v_p4_1.16b, v_tmp_lo.16b, v_p4_1.16b + eor v_p4_1.16b, v_p4_1.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft_lo.16b}, v_data_2_lo.16b + tbl v_tmp_hi.16b, {v_gft_hi.16b}, v_data_2_hi.16b + eor v_p4_2.16b, v_tmp_lo.16b, v_p4_2.16b + eor v_p4_2.16b, v_p4_2.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft_lo.16b}, v_data_3_lo.16b + tbl v_tmp_hi.16b, {v_gft_hi.16b}, v_data_3_hi.16b + eor v_p4_3.16b, v_tmp_lo.16b, v_p4_3.16b + eor v_p4_3.16b, v_p4_3.16b, v_tmp_hi.16b + + /* v_p5_x */ + ldp q_gft_lo, q_gft_hi, [x_tmp] + prfm pldl3keep, [x_tmp, #32] + + tbl v_tmp_lo.16b, {v_gft_lo.16b}, v_data_0_lo.16b + tbl v_tmp_hi.16b, {v_gft_hi.16b}, v_data_0_hi.16b + eor v_p5_0.16b, v_tmp_lo.16b, v_p5_0.16b + eor v_p5_0.16b, v_p5_0.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft_lo.16b}, v_data_1_lo.16b + tbl v_tmp_hi.16b, {v_gft_hi.16b}, v_data_1_hi.16b + eor v_p5_1.16b, v_tmp_lo.16b, v_p5_1.16b + eor v_p5_1.16b, v_p5_1.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft_lo.16b}, v_data_2_lo.16b + tbl v_tmp_hi.16b, {v_gft_hi.16b}, v_data_2_hi.16b + eor v_p5_2.16b, v_tmp_lo.16b, v_p5_2.16b + eor v_p5_2.16b, v_p5_2.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft_lo.16b}, v_data_3_lo.16b + tbl v_tmp_hi.16b, {v_gft_hi.16b}, v_data_3_hi.16b + eor v_p5_3.16b, v_tmp_lo.16b, v_p5_3.16b + eor v_p5_3.16b, v_p5_3.16b, v_tmp_hi.16b + + cmp x_vec_i, x_vec + blt .Lloop64_vects + +.Lloop64_vects_end: + add x_ptr, x_dest1, x_pos + stp q_p1_0, q_p1_1, [x_ptr], #32 + stp q_p1_2, q_p1_3, [x_ptr] + + add x_ptr, x_dest2, x_pos + stp q_p2_0, q_p2_1, [x_ptr], #32 + stp q_p2_2, q_p2_3, [x_ptr] + + add x_ptr, x_dest3, x_pos + stp q_p3_0, q_p3_1, [x_ptr], #32 + stp q_p3_2, q_p3_3, [x_ptr] + + add x_ptr, x_dest4, x_pos + stp q_p4_0, q_p4_1, [x_ptr], #32 + stp q_p4_2, q_p4_3, [x_ptr] + + add x_ptr, x_dest5, x_pos + stp q_p5_0, q_p5_1, [x_ptr], #32 + stp q_p5_2, q_p5_3, [x_ptr] + + add x_pos, x_pos, #64 + cmp x_pos, x_len + ble .Lloop64 + +.Lloop64_end: + /* restore d8 ~ d15 */ + ldp d8, d9, [sp] + ldp d10, d11, [sp, #16] + ldp d12, d13, [sp, #32] + ldp d14, d15, [sp, #48] + add sp, sp, #64 + + add x_len, x_len, #64 + cmp x_pos, x_len + beq .return_pass + +.Lloop16_init: + sub x_len, x_len, #16 + cmp x_pos, x_len + bgt .lessthan16_init + +.Lloop16: + movi v_p1_0.16b, #0 + movi v_p2_0.16b, #0 + movi v_p3_0.16b, #0 + movi v_p4_0.16b, #0 + movi v_p5_0.16b, #0 + mov x_vec_i, #0 + +.Lloop16_vects: + ldr x_ptr, [x_src, x_vec_i] + ldr q_data, [x_ptr, x_pos] + + movi v_mask0f.16b, #0x0f + and v_data_lo.16b, v_data.16b, v_mask0f.16b + ushr v_data_hi.16b, v_data.16b, #4 + + add x_tmp, x_tbl, x_vec_i, lsl #2 + add x_vec_i, x_vec_i, #8 + ldp q_gft1_lo, q_gft1_hi, [x_tmp] + add x_tmp, x_tmp, x_vec, lsl #2 + ldp q_gft2_lo, q_gft2_hi, [x_tmp] + add x_tmp, x_tmp, x_vec, lsl #2 + ldp q_gft3_lo, q_gft3_hi, [x_tmp] + add x_tmp, x_tmp, x_vec, lsl #2 + ldp q_gft4_lo, q_gft4_hi, [x_tmp] + add x_tmp, x_tmp, x_vec, lsl #2 + ldp q_gft5_lo, q_gft5_hi, [x_tmp] + + tbl v_gft1_lo.16b, {v_gft1_lo.16b}, v_data_lo.16b + tbl v_gft1_hi.16b, {v_gft1_hi.16b}, v_data_hi.16b + tbl v_gft2_lo.16b, {v_gft2_lo.16b}, v_data_lo.16b + tbl v_gft2_hi.16b, {v_gft2_hi.16b}, v_data_hi.16b + tbl v_gft3_lo.16b, {v_gft3_lo.16b}, v_data_lo.16b + tbl v_gft3_hi.16b, {v_gft3_hi.16b}, v_data_hi.16b + tbl v_gft4_lo.16b, {v_gft4_lo.16b}, v_data_lo.16b + tbl v_gft4_hi.16b, {v_gft4_hi.16b}, v_data_hi.16b + tbl v_gft5_lo.16b, {v_gft5_lo.16b}, v_data_lo.16b + tbl v_gft5_hi.16b, {v_gft5_hi.16b}, v_data_hi.16b + + eor v_p1_0.16b, v_gft1_hi.16b, v_p1_0.16b + eor v_p1_0.16b, v_p1_0.16b, v_gft1_lo.16b + eor v_p2_0.16b, v_gft2_hi.16b, v_p2_0.16b + eor v_p2_0.16b, v_p2_0.16b, v_gft2_lo.16b + eor v_p3_0.16b, v_gft3_hi.16b, v_p3_0.16b + eor v_p3_0.16b, v_p3_0.16b, v_gft3_lo.16b + eor v_p4_0.16b, v_gft4_hi.16b, v_p4_0.16b + eor v_p4_0.16b, v_p4_0.16b, v_gft4_lo.16b + eor v_p5_0.16b, v_gft5_hi.16b, v_p5_0.16b + eor v_p5_0.16b, v_p5_0.16b, v_gft5_lo.16b + + cmp x_vec_i, x_vec + bne .Lloop16_vects + +.Lloop16_vects_end: + str q_p1_0, [x_dest1, x_pos] + str q_p2_0, [x_dest2, x_pos] + str q_p3_0, [x_dest3, x_pos] + str q_p4_0, [x_dest4, x_pos] + str q_p5_0, [x_dest5, x_pos] + add x_pos, x_pos, #16 + cmp x_pos, x_len + ble .Lloop16 + +.Lloop16_end: + sub x_tmp, x_pos, x_len + cmp x_tmp, #16 + beq .return_pass + +.lessthan16_init: + mov x_pos, x_len + b .Lloop16 + +.return_pass: + mov w_ret, #0 + ret + +.return_fail: + mov w_ret, #1 + ret diff --git a/src/isa-l/erasure_code/aarch64/gf_5vect_mad_neon.S b/src/isa-l/erasure_code/aarch64/gf_5vect_mad_neon.S new file mode 100644 index 000000000..5cbc6bf92 --- /dev/null +++ b/src/isa-l/erasure_code/aarch64/gf_5vect_mad_neon.S @@ -0,0 +1,534 @@ +/************************************************************** + Copyright (c) 2019 Huawei Technologies Co., Ltd. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Huawei Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ +.text + +.global gf_5vect_mad_neon +.type gf_5vect_mad_neon, %function + + +/* arguments */ +x_len .req x0 +x_vec .req x1 +x_vec_i .req x2 +x_tbl .req x3 +x_src .req x4 +x_dest .req x5 + +/* returns */ +w_ret .req w0 + +/* local variables */ +x_src_end .req x6 +x_dest1 .req x7 +x_dest2 .req x8 +x_dest3 .req x9 +x_dest4 .req x10 +x_dest5 .req x_dest +x_tmp .req x11 +x_tbl1 .req x12 +x_tbl2 .req x13 +x_tbl3 .req x14 +x_tbl4 .req x15 +x_tbl5 .req x16 +x_const .req x17 + +/* vectors */ +v_mask0f .req v0 +v_tmp_lo .req v1 +v_tmp_hi .req v2 +v_tmp .req v3 +q_tmp .req q3 + +v_gft1_lo .req v4 +v_gft1_hi .req v5 +v_gft2_lo .req v6 +v_gft2_hi .req v7 +v_gft3_lo .req v16 +v_gft3_hi .req v17 +q_gft1_lo .req q4 +q_gft1_hi .req q5 +q_gft2_lo .req q6 +q_gft2_hi .req q7 +q_gft3_lo .req q16 +q_gft3_hi .req q17 + +v_gft4_lo .req v18 +v_gft4_hi .req v19 +q_gft4_lo .req q18 +q_gft4_hi .req q19 +v_gft5_lo .req v_gft2_lo +v_gft5_hi .req v_gft2_hi +q_gft5_lo .req q_gft2_lo +q_gft5_hi .req q_gft2_hi + +v_data_0 .req v8 +v_data_1 .req v9 +v_data_2 .req v10 +v_data_3 .req v11 +q_data_0 .req q8 +q_data_1 .req q9 +q_data_2 .req q10 +q_data_3 .req q11 + +v_data_0_lo .req v12 +v_data_1_lo .req v13 +v_data_2_lo .req v14 +v_data_3_lo .req v15 +v_data_0_hi .req v_data_0 +v_data_1_hi .req v_data_1 +v_data_2_hi .req v_data_2 +v_data_3_hi .req v_data_3 + +v_d1_0 .req v20 +v_d1_1 .req v21 +v_d1_2 .req v22 +v_d1_3 .req v23 +v_d2_0 .req v24 +v_d2_1 .req v25 +v_d2_2 .req v26 +v_d2_3 .req v27 +v_d3_0 .req v28 +v_d3_1 .req v29 +v_d3_2 .req v30 +v_d3_3 .req v31 +q_d1_0 .req q20 +q_d1_1 .req q21 +q_d1_2 .req q22 +q_d1_3 .req q23 +q_d2_0 .req q24 +q_d2_1 .req q25 +q_d2_2 .req q26 +q_d2_3 .req q27 +q_d3_0 .req q28 +q_d3_1 .req q29 +q_d3_2 .req q30 +q_d3_3 .req q31 + +v_d4_0 .req v_d1_0 +v_d4_1 .req v_d1_1 +v_d4_2 .req v_d1_2 +v_d4_3 .req v_d1_3 +q_d4_0 .req q_d1_0 +q_d4_1 .req q_d1_1 +q_d4_2 .req q_d1_2 +q_d4_3 .req q_d1_3 +v_d5_0 .req v_d2_0 +v_d5_1 .req v_d2_1 +v_d5_2 .req v_d2_2 +v_d5_3 .req v_d2_3 +q_d5_0 .req q_d2_0 +q_d5_1 .req q_d2_1 +q_d5_2 .req q_d2_2 +q_d5_3 .req q_d2_3 + +v_data .req v21 +q_data .req q21 +v_data_lo .req v22 +v_data_hi .req v23 + +gf_5vect_mad_neon: + /* less than 16 bytes, return_fail */ + cmp x_len, #16 + blt .return_fail + + movi v_mask0f.16b, #0x0f + lsl x_vec_i, x_vec_i, #5 + lsl x_vec, x_vec, #5 + add x_tbl1, x_tbl, x_vec_i + add x_tbl2, x_tbl1, x_vec + add x_tbl3, x_tbl2, x_vec + add x_tbl4, x_tbl3, x_vec + add x_tbl5, x_tbl4, x_vec + add x_src_end, x_src, x_len + ldr x_dest1, [x_dest, #8*0] + ldr x_dest2, [x_dest, #8*1] + ldr x_dest3, [x_dest, #8*2] + ldr x_dest4, [x_dest, #8*3] + ldr x_dest5, [x_dest, #8*4] + ldr q_gft1_lo, [x_tbl1] + ldr q_gft1_hi, [x_tbl1, #16] + ldr q_gft3_lo, [x_tbl3] + ldr q_gft3_hi, [x_tbl3, #16] + ldr q_gft4_lo, [x_tbl4] + ldr q_gft4_hi, [x_tbl4, #16] + +.Lloop64_init: + /* less than 64 bytes, goto Lloop16_init */ + cmp x_len, #64 + blt .Lloop16_init + + /* save d8 ~ d15 to stack */ + sub sp, sp, #64 + stp d8, d9, [sp] + stp d10, d11, [sp, #16] + stp d12, d13, [sp, #32] + stp d14, d15, [sp, #48] + + sub x_src_end, x_src_end, #64 + +.Lloop64: + ldr q_data_0, [x_src, #16*0] + ldr q_data_1, [x_src, #16*1] + ldr q_data_2, [x_src, #16*2] + ldr q_data_3, [x_src, #16*3] + add x_src, x_src, #64 + + ldr q_d1_0, [x_dest1, #16*0] + ldr q_d1_1, [x_dest1, #16*1] + ldr q_d1_2, [x_dest1, #16*2] + ldr q_d1_3, [x_dest1, #16*3] + + ldr q_d2_0, [x_dest2, #16*0] + ldr q_d2_1, [x_dest2, #16*1] + ldr q_d2_2, [x_dest2, #16*2] + ldr q_d2_3, [x_dest2, #16*3] + + ldr q_d3_0, [x_dest3, #16*0] + ldr q_d3_1, [x_dest3, #16*1] + ldr q_d3_2, [x_dest3, #16*2] + ldr q_d3_3, [x_dest3, #16*3] + + ldr q_gft2_lo, [x_tbl2] + ldr q_gft2_hi, [x_tbl2, #16] + + and v_data_0_lo.16b, v_data_0.16b, v_mask0f.16b + and v_data_1_lo.16b, v_data_1.16b, v_mask0f.16b + and v_data_2_lo.16b, v_data_2.16b, v_mask0f.16b + and v_data_3_lo.16b, v_data_3.16b, v_mask0f.16b + + ushr v_data_0_hi.16b, v_data_0.16b, #4 + ushr v_data_1_hi.16b, v_data_1.16b, #4 + ushr v_data_2_hi.16b, v_data_2.16b, #4 + ushr v_data_3_hi.16b, v_data_3.16b, #4 + + /* dest1 */ + tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_0_lo.16b + tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_0_hi.16b + eor v_d1_0.16b, v_tmp_lo.16b, v_d1_0.16b + eor v_d1_0.16b, v_d1_0.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_1_lo.16b + tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_1_hi.16b + eor v_d1_1.16b, v_tmp_lo.16b, v_d1_1.16b + eor v_d1_1.16b, v_d1_1.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_2_lo.16b + tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_2_hi.16b + eor v_d1_2.16b, v_tmp_lo.16b, v_d1_2.16b + eor v_d1_2.16b, v_d1_2.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_3_lo.16b + tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_3_hi.16b + eor v_d1_3.16b, v_tmp_lo.16b, v_d1_3.16b + eor v_d1_3.16b, v_d1_3.16b, v_tmp_hi.16b + + /* dest2 */ + tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_0_lo.16b + tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_0_hi.16b + eor v_d2_0.16b, v_tmp_lo.16b, v_d2_0.16b + eor v_d2_0.16b, v_d2_0.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_1_lo.16b + tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_1_hi.16b + eor v_d2_1.16b, v_tmp_lo.16b, v_d2_1.16b + eor v_d2_1.16b, v_d2_1.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_2_lo.16b + tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_2_hi.16b + eor v_d2_2.16b, v_tmp_lo.16b, v_d2_2.16b + eor v_d2_2.16b, v_d2_2.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_3_lo.16b + tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_3_hi.16b + eor v_d2_3.16b, v_tmp_lo.16b, v_d2_3.16b + eor v_d2_3.16b, v_d2_3.16b, v_tmp_hi.16b + + /* dest3 */ + tbl v_tmp_lo.16b, {v_gft3_lo.16b}, v_data_0_lo.16b + tbl v_tmp_hi.16b, {v_gft3_hi.16b}, v_data_0_hi.16b + eor v_d3_0.16b, v_tmp_lo.16b, v_d3_0.16b + eor v_d3_0.16b, v_d3_0.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft3_lo.16b}, v_data_1_lo.16b + tbl v_tmp_hi.16b, {v_gft3_hi.16b}, v_data_1_hi.16b + eor v_d3_1.16b, v_tmp_lo.16b, v_d3_1.16b + eor v_d3_1.16b, v_d3_1.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft3_lo.16b}, v_data_2_lo.16b + tbl v_tmp_hi.16b, {v_gft3_hi.16b}, v_data_2_hi.16b + eor v_d3_2.16b, v_tmp_lo.16b, v_d3_2.16b + eor v_d3_2.16b, v_d3_2.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft3_lo.16b}, v_data_3_lo.16b + tbl v_tmp_hi.16b, {v_gft3_hi.16b}, v_data_3_hi.16b + eor v_d3_3.16b, v_tmp_lo.16b, v_d3_3.16b + eor v_d3_3.16b, v_d3_3.16b, v_tmp_hi.16b + + str q_d1_0, [x_dest1, #16*0] + str q_d1_1, [x_dest1, #16*1] + str q_d1_2, [x_dest1, #16*2] + str q_d1_3, [x_dest1, #16*3] + add x_dest1, x_dest1, #64 + + str q_d2_0, [x_dest2, #16*0] + str q_d2_1, [x_dest2, #16*1] + str q_d2_2, [x_dest2, #16*2] + str q_d2_3, [x_dest2, #16*3] + add x_dest2, x_dest2, #64 + + str q_d3_0, [x_dest3, #16*0] + str q_d3_1, [x_dest3, #16*1] + str q_d3_2, [x_dest3, #16*2] + str q_d3_3, [x_dest3, #16*3] + add x_dest3, x_dest3, #64 + + ldr q_d4_0, [x_dest4, #16*0] + ldr q_d4_1, [x_dest4, #16*1] + ldr q_d4_2, [x_dest4, #16*2] + ldr q_d4_3, [x_dest4, #16*3] + + ldr q_d5_0, [x_dest5, #16*0] + ldr q_d5_1, [x_dest5, #16*1] + ldr q_d5_2, [x_dest5, #16*2] + ldr q_d5_3, [x_dest5, #16*3] + + ldr q_gft5_lo, [x_tbl5] + ldr q_gft5_hi, [x_tbl5, #16] + + /* dest4 */ + tbl v_tmp_lo.16b, {v_gft4_lo.16b}, v_data_0_lo.16b + tbl v_tmp_hi.16b, {v_gft4_hi.16b}, v_data_0_hi.16b + eor v_d4_0.16b, v_tmp_lo.16b, v_d4_0.16b + eor v_d4_0.16b, v_d4_0.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft4_lo.16b}, v_data_1_lo.16b + tbl v_tmp_hi.16b, {v_gft4_hi.16b}, v_data_1_hi.16b + eor v_d4_1.16b, v_tmp_lo.16b, v_d4_1.16b + eor v_d4_1.16b, v_d4_1.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft4_lo.16b}, v_data_2_lo.16b + tbl v_tmp_hi.16b, {v_gft4_hi.16b}, v_data_2_hi.16b + eor v_d4_2.16b, v_tmp_lo.16b, v_d4_2.16b + eor v_d4_2.16b, v_d4_2.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft4_lo.16b}, v_data_3_lo.16b + tbl v_tmp_hi.16b, {v_gft4_hi.16b}, v_data_3_hi.16b + eor v_d4_3.16b, v_tmp_lo.16b, v_d4_3.16b + eor v_d4_3.16b, v_d4_3.16b, v_tmp_hi.16b + + /* dest5 */ + tbl v_tmp_lo.16b, {v_gft5_lo.16b}, v_data_0_lo.16b + tbl v_tmp_hi.16b, {v_gft5_hi.16b}, v_data_0_hi.16b + eor v_d5_0.16b, v_tmp_lo.16b, v_d5_0.16b + eor v_d5_0.16b, v_d5_0.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft5_lo.16b}, v_data_1_lo.16b + tbl v_tmp_hi.16b, {v_gft5_hi.16b}, v_data_1_hi.16b + eor v_d5_1.16b, v_tmp_lo.16b, v_d5_1.16b + eor v_d5_1.16b, v_d5_1.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft5_lo.16b}, v_data_2_lo.16b + tbl v_tmp_hi.16b, {v_gft5_hi.16b}, v_data_2_hi.16b + eor v_d5_2.16b, v_tmp_lo.16b, v_d5_2.16b + eor v_d5_2.16b, v_d5_2.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft5_lo.16b}, v_data_3_lo.16b + tbl v_tmp_hi.16b, {v_gft5_hi.16b}, v_data_3_hi.16b + eor v_d5_3.16b, v_tmp_lo.16b, v_d5_3.16b + eor v_d5_3.16b, v_d5_3.16b, v_tmp_hi.16b + + str q_d4_0, [x_dest4, #16*0] + str q_d4_1, [x_dest4, #16*1] + str q_d4_2, [x_dest4, #16*2] + str q_d4_3, [x_dest4, #16*3] + add x_dest4, x_dest4, #64 + + str q_d5_0, [x_dest5, #16*0] + str q_d5_1, [x_dest5, #16*1] + str q_d5_2, [x_dest5, #16*2] + str q_d5_3, [x_dest5, #16*3] + add x_dest5, x_dest5, #64 + + cmp x_src, x_src_end + bls .Lloop64 + +.Lloop64_end: + /* restore d8 ~ d15 */ + ldp d8, d9, [sp] + ldp d10, d11, [sp, #16] + ldp d12, d13, [sp, #32] + ldp d14, d15, [sp, #48] + add sp, sp, #64 + add x_src_end, x_src_end, #64 + +.Lloop16_init: + sub x_src_end, x_src_end, #16 + cmp x_src, x_src_end + bhi .lessthan16_init + +.Lloop16: + ldr q_data, [x_src] + + ldr q_d1_0, [x_dest1] + ldr q_d2_0, [x_dest2] + ldr q_d3_0, [x_dest3] + ldr q_gft2_lo, [x_tbl2] + ldr q_gft2_hi, [x_tbl2, #16] + + and v_data_lo.16b, v_data.16b, v_mask0f.16b + ushr v_data_hi.16b, v_data.16b, #4 + + tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_lo.16b + tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_hi.16b + eor v_d1_0.16b, v_tmp_lo.16b, v_d1_0.16b + eor v_d1_0.16b, v_d1_0.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_lo.16b + tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_hi.16b + eor v_d2_0.16b, v_tmp_lo.16b, v_d2_0.16b + eor v_d2_0.16b, v_d2_0.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft3_lo.16b}, v_data_lo.16b + tbl v_tmp_hi.16b, {v_gft3_hi.16b}, v_data_hi.16b + eor v_d3_0.16b, v_tmp_lo.16b, v_d3_0.16b + eor v_d3_0.16b, v_d3_0.16b, v_tmp_hi.16b + + str q_d1_0, [x_dest1] + str q_d2_0, [x_dest2] + str q_d3_0, [x_dest3] + + ldr q_d4_0, [x_dest4] + ldr q_d5_0, [x_dest5] + ldr q_gft5_lo, [x_tbl5] + ldr q_gft5_hi, [x_tbl5, #16] + + tbl v_tmp_lo.16b, {v_gft4_lo.16b}, v_data_lo.16b + tbl v_tmp_hi.16b, {v_gft4_hi.16b}, v_data_hi.16b + eor v_d4_0.16b, v_tmp_lo.16b, v_d4_0.16b + eor v_d4_0.16b, v_d4_0.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft5_lo.16b}, v_data_lo.16b + tbl v_tmp_hi.16b, {v_gft5_hi.16b}, v_data_hi.16b + eor v_d5_0.16b, v_tmp_lo.16b, v_d5_0.16b + eor v_d5_0.16b, v_d5_0.16b, v_tmp_hi.16b + + str q_d4_0, [x_dest4] + str q_d5_0, [x_dest5] + + add x_src, x_src, #16 + add x_dest1, x_dest1, #16 + add x_dest2, x_dest2, #16 + add x_dest3, x_dest3, #16 + add x_dest4, x_dest4, #16 + add x_dest5, x_dest5, #16 + cmp x_src, x_src_end + bls .Lloop16 + +.lessthan16_init: + sub x_tmp, x_src, x_src_end + cmp x_tmp, #16 + beq .return_pass + +.lessthan16: + mov x_src, x_src_end + sub x_dest1, x_dest1, x_tmp + sub x_dest2, x_dest2, x_tmp + sub x_dest3, x_dest3, x_tmp + sub x_dest4, x_dest4, x_tmp + sub x_dest5, x_dest5, x_tmp + + ldr x_const, =const_tbl + sub x_const, x_const, x_tmp + ldr q_tmp, [x_const, #16] + + ldr q_data, [x_src] + ldr q_d1_0, [x_dest1] + ldr q_d2_0, [x_dest2] + ldr q_d3_0, [x_dest3] + ldr q_gft2_lo, [x_tbl2] + ldr q_gft2_hi, [x_tbl2, #16] + + and v_data_lo.16b, v_data.16b, v_mask0f.16b + ushr v_data_hi.16b, v_data.16b, #4 + + tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_lo.16b + tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_hi.16b + eor v_tmp_hi.16b, v_tmp_lo.16b, v_tmp_hi.16b + and v_tmp_hi.16b, v_tmp_hi.16b, v_tmp.16b + eor v_d1_0.16b, v_d1_0.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_lo.16b + tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_hi.16b + eor v_tmp_hi.16b, v_tmp_lo.16b, v_tmp_hi.16b + and v_tmp_hi.16b, v_tmp_hi.16b, v_tmp.16b + eor v_d2_0.16b, v_d2_0.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft3_lo.16b}, v_data_lo.16b + tbl v_tmp_hi.16b, {v_gft3_hi.16b}, v_data_hi.16b + eor v_tmp_hi.16b, v_tmp_lo.16b, v_tmp_hi.16b + and v_tmp_hi.16b, v_tmp_hi.16b, v_tmp.16b + eor v_d3_0.16b, v_d3_0.16b, v_tmp_hi.16b + + str q_d1_0, [x_dest1] + str q_d2_0, [x_dest2] + str q_d3_0, [x_dest3] + + ldr q_d4_0, [x_dest4] + ldr q_d5_0, [x_dest5] + ldr q_gft5_lo, [x_tbl5] + ldr q_gft5_hi, [x_tbl5, #16] + + tbl v_tmp_lo.16b, {v_gft4_lo.16b}, v_data_lo.16b + tbl v_tmp_hi.16b, {v_gft4_hi.16b}, v_data_hi.16b + eor v_tmp_hi.16b, v_tmp_lo.16b, v_tmp_hi.16b + and v_tmp_hi.16b, v_tmp_hi.16b, v_tmp.16b + eor v_d4_0.16b, v_d4_0.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft5_lo.16b}, v_data_lo.16b + tbl v_tmp_hi.16b, {v_gft5_hi.16b}, v_data_hi.16b + eor v_tmp_hi.16b, v_tmp_lo.16b, v_tmp_hi.16b + and v_tmp_hi.16b, v_tmp_hi.16b, v_tmp.16b + eor v_d5_0.16b, v_d5_0.16b, v_tmp_hi.16b + + str q_d4_0, [x_dest4] + str q_d5_0, [x_dest5] + +.return_pass: + mov w_ret, #0 + ret + +.return_fail: + mov w_ret, #1 + ret + +.section .data +.balign 8 +const_tbl: + .dword 0x0000000000000000, 0x0000000000000000 + .dword 0xffffffffffffffff, 0xffffffffffffffff diff --git a/src/isa-l/erasure_code/aarch64/gf_6vect_mad_neon.S b/src/isa-l/erasure_code/aarch64/gf_6vect_mad_neon.S new file mode 100644 index 000000000..4886440ba --- /dev/null +++ b/src/isa-l/erasure_code/aarch64/gf_6vect_mad_neon.S @@ -0,0 +1,609 @@ +/************************************************************** + Copyright (c) 2019 Huawei Technologies Co., Ltd. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Huawei Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +.text +.global gf_6vect_mad_neon +.type gf_6vect_mad_neon, %function + + +/* arguments */ +x_len .req x0 +x_vec .req x1 +x_vec_i .req x2 +x_tbl .req x3 +x_src .req x4 +x_dest .req x5 + +/* returns */ +w_ret .req w0 + +/* local variables */ +x_src_end .req x6 +x_dest1 .req x7 +x_dest2 .req x8 +x_dest3 .req x9 +x_dest4 .req x10 +x_dest5 .req x11 +x_dest6 .req x_dest +x_tmp .req x12 +x_tbl1 .req x13 +x_tbl2 .req x14 +x_tbl3 .req x15 +x_tbl4 .req x16 +x_tbl5 .req x17 +x_tbl6 .req x_tbl +x_const .req x18 + +/* vectors */ +v_mask0f .req v0 +v_tmp_lo .req v1 +v_tmp_hi .req v2 +v_tmp .req v3 +q_tmp .req q3 + +v_gft1_lo .req v4 +v_gft1_hi .req v5 +v_gft2_lo .req v6 +v_gft2_hi .req v7 +v_gft3_lo .req v16 +v_gft3_hi .req v17 +q_gft1_lo .req q4 +q_gft1_hi .req q5 +q_gft2_lo .req q6 +q_gft2_hi .req q7 +q_gft3_lo .req q16 +q_gft3_hi .req q17 + +v_gft4_lo .req v18 +v_gft4_hi .req v19 +q_gft4_lo .req q18 +q_gft4_hi .req q19 +v_gft5_lo .req v_gft2_lo +v_gft5_hi .req v_gft2_hi +q_gft5_lo .req q_gft2_lo +q_gft5_hi .req q_gft2_hi +v_gft6_lo .req v_gft3_lo +v_gft6_hi .req v_gft3_hi +q_gft6_lo .req q_gft3_lo +q_gft6_hi .req q_gft3_hi + +v_data_0 .req v8 +v_data_1 .req v9 +v_data_2 .req v10 +v_data_3 .req v11 +q_data_0 .req q8 +q_data_1 .req q9 +q_data_2 .req q10 +q_data_3 .req q11 + +v_data_0_lo .req v12 +v_data_1_lo .req v13 +v_data_2_lo .req v14 +v_data_3_lo .req v15 +v_data_0_hi .req v_data_0 +v_data_1_hi .req v_data_1 +v_data_2_hi .req v_data_2 +v_data_3_hi .req v_data_3 + +v_d1_0 .req v20 +v_d1_1 .req v21 +v_d1_2 .req v22 +v_d1_3 .req v23 +v_d2_0 .req v24 +v_d2_1 .req v25 +v_d2_2 .req v26 +v_d2_3 .req v27 +v_d3_0 .req v28 +v_d3_1 .req v29 +v_d3_2 .req v30 +v_d3_3 .req v31 +q_d1_0 .req q20 +q_d1_1 .req q21 +q_d1_2 .req q22 +q_d1_3 .req q23 +q_d2_0 .req q24 +q_d2_1 .req q25 +q_d2_2 .req q26 +q_d2_3 .req q27 +q_d3_0 .req q28 +q_d3_1 .req q29 +q_d3_2 .req q30 +q_d3_3 .req q31 + +v_d4_0 .req v_d1_0 +v_d4_1 .req v_d1_1 +v_d4_2 .req v_d1_2 +v_d4_3 .req v_d1_3 +q_d4_0 .req q_d1_0 +q_d4_1 .req q_d1_1 +q_d4_2 .req q_d1_2 +q_d4_3 .req q_d1_3 +v_d5_0 .req v_d2_0 +v_d5_1 .req v_d2_1 +v_d5_2 .req v_d2_2 +v_d5_3 .req v_d2_3 +q_d5_0 .req q_d2_0 +q_d5_1 .req q_d2_1 +q_d5_2 .req q_d2_2 +q_d5_3 .req q_d2_3 +v_d6_0 .req v_d3_0 +v_d6_1 .req v_d3_1 +v_d6_2 .req v_d3_2 +v_d6_3 .req v_d3_3 +q_d6_0 .req q_d3_0 +q_d6_1 .req q_d3_1 +q_d6_2 .req q_d3_2 +q_d6_3 .req q_d3_3 + +v_data .req v21 +q_data .req q21 +v_data_lo .req v22 +v_data_hi .req v23 + +gf_6vect_mad_neon: + /* less than 16 bytes, return_fail */ + cmp x_len, #16 + blt .return_fail + + movi v_mask0f.16b, #0x0f + lsl x_vec_i, x_vec_i, #5 + lsl x_vec, x_vec, #5 + add x_tbl1, x_tbl, x_vec_i + add x_tbl2, x_tbl1, x_vec + add x_tbl3, x_tbl2, x_vec + add x_tbl4, x_tbl3, x_vec + add x_tbl5, x_tbl4, x_vec + add x_tbl6, x_tbl5, x_vec + add x_src_end, x_src, x_len + ldr x_dest1, [x_dest, #8*0] + ldr x_dest2, [x_dest, #8*1] + ldr x_dest3, [x_dest, #8*2] + ldr x_dest4, [x_dest, #8*3] + ldr x_dest5, [x_dest, #8*4] + ldr x_dest6, [x_dest, #8*5] + ldr q_gft1_lo, [x_tbl1] + ldr q_gft1_hi, [x_tbl1, #16] + ldr q_gft4_lo, [x_tbl4] + ldr q_gft4_hi, [x_tbl4, #16] + +.Lloop64_init: + /* less than 64 bytes, goto Lloop16_init */ + cmp x_len, #64 + blt .Lloop16_init + + /* save d8 ~ d15 to stack */ + sub sp, sp, #64 + stp d8, d9, [sp] + stp d10, d11, [sp, #16] + stp d12, d13, [sp, #32] + stp d14, d15, [sp, #48] + + sub x_src_end, x_src_end, #64 + +.Lloop64: + ldr q_data_0, [x_src, #16*0] + ldr q_data_1, [x_src, #16*1] + ldr q_data_2, [x_src, #16*2] + ldr q_data_3, [x_src, #16*3] + add x_src, x_src, #64 + + ldr q_d1_0, [x_dest1, #16*0] + ldr q_d1_1, [x_dest1, #16*1] + ldr q_d1_2, [x_dest1, #16*2] + ldr q_d1_3, [x_dest1, #16*3] + + ldr q_d2_0, [x_dest2, #16*0] + ldr q_d2_1, [x_dest2, #16*1] + ldr q_d2_2, [x_dest2, #16*2] + ldr q_d2_3, [x_dest2, #16*3] + + ldr q_d3_0, [x_dest3, #16*0] + ldr q_d3_1, [x_dest3, #16*1] + ldr q_d3_2, [x_dest3, #16*2] + ldr q_d3_3, [x_dest3, #16*3] + + ldr q_gft2_lo, [x_tbl2] + ldr q_gft2_hi, [x_tbl2, #16] + ldr q_gft3_lo, [x_tbl3] + ldr q_gft3_hi, [x_tbl3, #16] + + and v_data_0_lo.16b, v_data_0.16b, v_mask0f.16b + and v_data_1_lo.16b, v_data_1.16b, v_mask0f.16b + and v_data_2_lo.16b, v_data_2.16b, v_mask0f.16b + and v_data_3_lo.16b, v_data_3.16b, v_mask0f.16b + + ushr v_data_0_hi.16b, v_data_0.16b, #4 + ushr v_data_1_hi.16b, v_data_1.16b, #4 + ushr v_data_2_hi.16b, v_data_2.16b, #4 + ushr v_data_3_hi.16b, v_data_3.16b, #4 + + /* dest1 */ + tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_0_lo.16b + tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_0_hi.16b + eor v_d1_0.16b, v_tmp_lo.16b, v_d1_0.16b + eor v_d1_0.16b, v_d1_0.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_1_lo.16b + tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_1_hi.16b + eor v_d1_1.16b, v_tmp_lo.16b, v_d1_1.16b + eor v_d1_1.16b, v_d1_1.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_2_lo.16b + tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_2_hi.16b + eor v_d1_2.16b, v_tmp_lo.16b, v_d1_2.16b + eor v_d1_2.16b, v_d1_2.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_3_lo.16b + tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_3_hi.16b + eor v_d1_3.16b, v_tmp_lo.16b, v_d1_3.16b + eor v_d1_3.16b, v_d1_3.16b, v_tmp_hi.16b + + /* dest2 */ + tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_0_lo.16b + tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_0_hi.16b + eor v_d2_0.16b, v_tmp_lo.16b, v_d2_0.16b + eor v_d2_0.16b, v_d2_0.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_1_lo.16b + tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_1_hi.16b + eor v_d2_1.16b, v_tmp_lo.16b, v_d2_1.16b + eor v_d2_1.16b, v_d2_1.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_2_lo.16b + tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_2_hi.16b + eor v_d2_2.16b, v_tmp_lo.16b, v_d2_2.16b + eor v_d2_2.16b, v_d2_2.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_3_lo.16b + tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_3_hi.16b + eor v_d2_3.16b, v_tmp_lo.16b, v_d2_3.16b + eor v_d2_3.16b, v_d2_3.16b, v_tmp_hi.16b + + /* dest3 */ + tbl v_tmp_lo.16b, {v_gft3_lo.16b}, v_data_0_lo.16b + tbl v_tmp_hi.16b, {v_gft3_hi.16b}, v_data_0_hi.16b + eor v_d3_0.16b, v_tmp_lo.16b, v_d3_0.16b + eor v_d3_0.16b, v_d3_0.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft3_lo.16b}, v_data_1_lo.16b + tbl v_tmp_hi.16b, {v_gft3_hi.16b}, v_data_1_hi.16b + eor v_d3_1.16b, v_tmp_lo.16b, v_d3_1.16b + eor v_d3_1.16b, v_d3_1.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft3_lo.16b}, v_data_2_lo.16b + tbl v_tmp_hi.16b, {v_gft3_hi.16b}, v_data_2_hi.16b + eor v_d3_2.16b, v_tmp_lo.16b, v_d3_2.16b + eor v_d3_2.16b, v_d3_2.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft3_lo.16b}, v_data_3_lo.16b + tbl v_tmp_hi.16b, {v_gft3_hi.16b}, v_data_3_hi.16b + eor v_d3_3.16b, v_tmp_lo.16b, v_d3_3.16b + eor v_d3_3.16b, v_d3_3.16b, v_tmp_hi.16b + + str q_d1_0, [x_dest1, #16*0] + str q_d1_1, [x_dest1, #16*1] + str q_d1_2, [x_dest1, #16*2] + str q_d1_3, [x_dest1, #16*3] + add x_dest1, x_dest1, #64 + + str q_d2_0, [x_dest2, #16*0] + str q_d2_1, [x_dest2, #16*1] + str q_d2_2, [x_dest2, #16*2] + str q_d2_3, [x_dest2, #16*3] + add x_dest2, x_dest2, #64 + + str q_d3_0, [x_dest3, #16*0] + str q_d3_1, [x_dest3, #16*1] + str q_d3_2, [x_dest3, #16*2] + str q_d3_3, [x_dest3, #16*3] + add x_dest3, x_dest3, #64 + + ldr q_d4_0, [x_dest4, #16*0] + ldr q_d4_1, [x_dest4, #16*1] + ldr q_d4_2, [x_dest4, #16*2] + ldr q_d4_3, [x_dest4, #16*3] + + ldr q_d5_0, [x_dest5, #16*0] + ldr q_d5_1, [x_dest5, #16*1] + ldr q_d5_2, [x_dest5, #16*2] + ldr q_d5_3, [x_dest5, #16*3] + + ldr q_d6_0, [x_dest6, #16*0] + ldr q_d6_1, [x_dest6, #16*1] + ldr q_d6_2, [x_dest6, #16*2] + ldr q_d6_3, [x_dest6, #16*3] + + ldr q_gft5_lo, [x_tbl5] + ldr q_gft5_hi, [x_tbl5, #16] + ldr q_gft6_lo, [x_tbl6] + ldr q_gft6_hi, [x_tbl6, #16] + + /* dest4 */ + tbl v_tmp_lo.16b, {v_gft4_lo.16b}, v_data_0_lo.16b + tbl v_tmp_hi.16b, {v_gft4_hi.16b}, v_data_0_hi.16b + eor v_d4_0.16b, v_tmp_lo.16b, v_d4_0.16b + eor v_d4_0.16b, v_d4_0.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft4_lo.16b}, v_data_1_lo.16b + tbl v_tmp_hi.16b, {v_gft4_hi.16b}, v_data_1_hi.16b + eor v_d4_1.16b, v_tmp_lo.16b, v_d4_1.16b + eor v_d4_1.16b, v_d4_1.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft4_lo.16b}, v_data_2_lo.16b + tbl v_tmp_hi.16b, {v_gft4_hi.16b}, v_data_2_hi.16b + eor v_d4_2.16b, v_tmp_lo.16b, v_d4_2.16b + eor v_d4_2.16b, v_d4_2.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft4_lo.16b}, v_data_3_lo.16b + tbl v_tmp_hi.16b, {v_gft4_hi.16b}, v_data_3_hi.16b + eor v_d4_3.16b, v_tmp_lo.16b, v_d4_3.16b + eor v_d4_3.16b, v_d4_3.16b, v_tmp_hi.16b + + /* dest5 */ + tbl v_tmp_lo.16b, {v_gft5_lo.16b}, v_data_0_lo.16b + tbl v_tmp_hi.16b, {v_gft5_hi.16b}, v_data_0_hi.16b + eor v_d5_0.16b, v_tmp_lo.16b, v_d5_0.16b + eor v_d5_0.16b, v_d5_0.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft5_lo.16b}, v_data_1_lo.16b + tbl v_tmp_hi.16b, {v_gft5_hi.16b}, v_data_1_hi.16b + eor v_d5_1.16b, v_tmp_lo.16b, v_d5_1.16b + eor v_d5_1.16b, v_d5_1.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft5_lo.16b}, v_data_2_lo.16b + tbl v_tmp_hi.16b, {v_gft5_hi.16b}, v_data_2_hi.16b + eor v_d5_2.16b, v_tmp_lo.16b, v_d5_2.16b + eor v_d5_2.16b, v_d5_2.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft5_lo.16b}, v_data_3_lo.16b + tbl v_tmp_hi.16b, {v_gft5_hi.16b}, v_data_3_hi.16b + eor v_d5_3.16b, v_tmp_lo.16b, v_d5_3.16b + eor v_d5_3.16b, v_d5_3.16b, v_tmp_hi.16b + + /* dest6 */ + tbl v_tmp_lo.16b, {v_gft6_lo.16b}, v_data_0_lo.16b + tbl v_tmp_hi.16b, {v_gft6_hi.16b}, v_data_0_hi.16b + eor v_d6_0.16b, v_tmp_lo.16b, v_d6_0.16b + eor v_d6_0.16b, v_d6_0.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft6_lo.16b}, v_data_1_lo.16b + tbl v_tmp_hi.16b, {v_gft6_hi.16b}, v_data_1_hi.16b + eor v_d6_1.16b, v_tmp_lo.16b, v_d6_1.16b + eor v_d6_1.16b, v_d6_1.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft6_lo.16b}, v_data_2_lo.16b + tbl v_tmp_hi.16b, {v_gft6_hi.16b}, v_data_2_hi.16b + eor v_d6_2.16b, v_tmp_lo.16b, v_d6_2.16b + eor v_d6_2.16b, v_d6_2.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft6_lo.16b}, v_data_3_lo.16b + tbl v_tmp_hi.16b, {v_gft6_hi.16b}, v_data_3_hi.16b + eor v_d6_3.16b, v_tmp_lo.16b, v_d6_3.16b + eor v_d6_3.16b, v_d6_3.16b, v_tmp_hi.16b + + str q_d4_0, [x_dest4, #16*0] + str q_d4_1, [x_dest4, #16*1] + str q_d4_2, [x_dest4, #16*2] + str q_d4_3, [x_dest4, #16*3] + add x_dest4, x_dest4, #64 + + str q_d5_0, [x_dest5, #16*0] + str q_d5_1, [x_dest5, #16*1] + str q_d5_2, [x_dest5, #16*2] + str q_d5_3, [x_dest5, #16*3] + add x_dest5, x_dest5, #64 + + str q_d6_0, [x_dest6, #16*0] + str q_d6_1, [x_dest6, #16*1] + str q_d6_2, [x_dest6, #16*2] + str q_d6_3, [x_dest6, #16*3] + add x_dest6, x_dest6, #64 + + cmp x_src, x_src_end + bls .Lloop64 + +.Lloop64_end: + /* restore d8 ~ d15 */ + ldp d8, d9, [sp] + ldp d10, d11, [sp, #16] + ldp d12, d13, [sp, #32] + ldp d14, d15, [sp, #48] + add sp, sp, #64 + add x_src_end, x_src_end, #64 + +.Lloop16_init: + sub x_src_end, x_src_end, #16 + cmp x_src, x_src_end + bhi .lessthan16_init + +.Lloop16: + ldr q_data, [x_src] + + ldr q_d1_0, [x_dest1] + ldr q_d2_0, [x_dest2] + ldr q_d3_0, [x_dest3] + ldr q_gft2_lo, [x_tbl2] + ldr q_gft2_hi, [x_tbl2, #16] + ldr q_gft3_lo, [x_tbl3] + ldr q_gft3_hi, [x_tbl3, #16] + + and v_data_lo.16b, v_data.16b, v_mask0f.16b + ushr v_data_hi.16b, v_data.16b, #4 + + tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_lo.16b + tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_hi.16b + eor v_d1_0.16b, v_tmp_lo.16b, v_d1_0.16b + eor v_d1_0.16b, v_d1_0.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_lo.16b + tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_hi.16b + eor v_d2_0.16b, v_tmp_lo.16b, v_d2_0.16b + eor v_d2_0.16b, v_d2_0.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft3_lo.16b}, v_data_lo.16b + tbl v_tmp_hi.16b, {v_gft3_hi.16b}, v_data_hi.16b + eor v_d3_0.16b, v_tmp_lo.16b, v_d3_0.16b + eor v_d3_0.16b, v_d3_0.16b, v_tmp_hi.16b + + str q_d1_0, [x_dest1] + str q_d2_0, [x_dest2] + str q_d3_0, [x_dest3] + + ldr q_d4_0, [x_dest4] + ldr q_d5_0, [x_dest5] + ldr q_d6_0, [x_dest6] + ldr q_gft5_lo, [x_tbl5] + ldr q_gft5_hi, [x_tbl5, #16] + ldr q_gft6_lo, [x_tbl6] + ldr q_gft6_hi, [x_tbl6, #16] + + tbl v_tmp_lo.16b, {v_gft4_lo.16b}, v_data_lo.16b + tbl v_tmp_hi.16b, {v_gft4_hi.16b}, v_data_hi.16b + eor v_d4_0.16b, v_tmp_lo.16b, v_d4_0.16b + eor v_d4_0.16b, v_d4_0.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft5_lo.16b}, v_data_lo.16b + tbl v_tmp_hi.16b, {v_gft5_hi.16b}, v_data_hi.16b + eor v_d5_0.16b, v_tmp_lo.16b, v_d5_0.16b + eor v_d5_0.16b, v_d5_0.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft6_lo.16b}, v_data_lo.16b + tbl v_tmp_hi.16b, {v_gft6_hi.16b}, v_data_hi.16b + eor v_d6_0.16b, v_tmp_lo.16b, v_d6_0.16b + eor v_d6_0.16b, v_d6_0.16b, v_tmp_hi.16b + + str q_d4_0, [x_dest4] + str q_d5_0, [x_dest5] + str q_d6_0, [x_dest6] + + add x_src, x_src, #16 + add x_dest1, x_dest1, #16 + add x_dest2, x_dest2, #16 + add x_dest3, x_dest3, #16 + add x_dest4, x_dest4, #16 + add x_dest5, x_dest5, #16 + add x_dest6, x_dest6, #16 + cmp x_src, x_src_end + bls .Lloop16 + +.lessthan16_init: + sub x_tmp, x_src, x_src_end + cmp x_tmp, #16 + beq .return_pass + +.lessthan16: + mov x_src, x_src_end + sub x_dest1, x_dest1, x_tmp + sub x_dest2, x_dest2, x_tmp + sub x_dest3, x_dest3, x_tmp + sub x_dest4, x_dest4, x_tmp + sub x_dest5, x_dest5, x_tmp + sub x_dest6, x_dest6, x_tmp + + ldr x_const, =const_tbl + sub x_const, x_const, x_tmp + ldr q_tmp, [x_const, #16] + + ldr q_data, [x_src] + ldr q_d1_0, [x_dest1] + ldr q_d2_0, [x_dest2] + ldr q_d3_0, [x_dest3] + ldr q_gft2_lo, [x_tbl2] + ldr q_gft2_hi, [x_tbl2, #16] + ldr q_gft3_lo, [x_tbl3] + ldr q_gft3_hi, [x_tbl3, #16] + + and v_data_lo.16b, v_data.16b, v_mask0f.16b + ushr v_data_hi.16b, v_data.16b, #4 + + tbl v_tmp_lo.16b, {v_gft1_lo.16b}, v_data_lo.16b + tbl v_tmp_hi.16b, {v_gft1_hi.16b}, v_data_hi.16b + eor v_tmp_hi.16b, v_tmp_lo.16b, v_tmp_hi.16b + and v_tmp_hi.16b, v_tmp_hi.16b, v_tmp.16b + eor v_d1_0.16b, v_d1_0.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft2_lo.16b}, v_data_lo.16b + tbl v_tmp_hi.16b, {v_gft2_hi.16b}, v_data_hi.16b + eor v_tmp_hi.16b, v_tmp_lo.16b, v_tmp_hi.16b + and v_tmp_hi.16b, v_tmp_hi.16b, v_tmp.16b + eor v_d2_0.16b, v_d2_0.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft3_lo.16b}, v_data_lo.16b + tbl v_tmp_hi.16b, {v_gft3_hi.16b}, v_data_hi.16b + eor v_tmp_hi.16b, v_tmp_lo.16b, v_tmp_hi.16b + and v_tmp_hi.16b, v_tmp_hi.16b, v_tmp.16b + eor v_d3_0.16b, v_d3_0.16b, v_tmp_hi.16b + + str q_d1_0, [x_dest1] + str q_d2_0, [x_dest2] + str q_d3_0, [x_dest3] + + ldr q_d4_0, [x_dest4] + ldr q_d5_0, [x_dest5] + ldr q_d6_0, [x_dest6] + ldr q_gft5_lo, [x_tbl5] + ldr q_gft5_hi, [x_tbl5, #16] + ldr q_gft6_lo, [x_tbl6] + ldr q_gft6_hi, [x_tbl6, #16] + + tbl v_tmp_lo.16b, {v_gft4_lo.16b}, v_data_lo.16b + tbl v_tmp_hi.16b, {v_gft4_hi.16b}, v_data_hi.16b + eor v_tmp_hi.16b, v_tmp_lo.16b, v_tmp_hi.16b + and v_tmp_hi.16b, v_tmp_hi.16b, v_tmp.16b + eor v_d4_0.16b, v_d4_0.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft5_lo.16b}, v_data_lo.16b + tbl v_tmp_hi.16b, {v_gft5_hi.16b}, v_data_hi.16b + eor v_tmp_hi.16b, v_tmp_lo.16b, v_tmp_hi.16b + and v_tmp_hi.16b, v_tmp_hi.16b, v_tmp.16b + eor v_d5_0.16b, v_d5_0.16b, v_tmp_hi.16b + + tbl v_tmp_lo.16b, {v_gft6_lo.16b}, v_data_lo.16b + tbl v_tmp_hi.16b, {v_gft6_hi.16b}, v_data_hi.16b + eor v_tmp_hi.16b, v_tmp_lo.16b, v_tmp_hi.16b + and v_tmp_hi.16b, v_tmp_hi.16b, v_tmp.16b + eor v_d6_0.16b, v_d6_0.16b, v_tmp_hi.16b + + str q_d4_0, [x_dest4] + str q_d5_0, [x_dest5] + str q_d6_0, [x_dest6] + +.return_pass: + mov w_ret, #0 + ret + +.return_fail: + mov w_ret, #1 + ret + +.section .data +.balign 8 +const_tbl: + .dword 0x0000000000000000, 0x0000000000000000 + .dword 0xffffffffffffffff, 0xffffffffffffffff diff --git a/src/isa-l/erasure_code/aarch64/gf_vect_dot_prod_neon.S b/src/isa-l/erasure_code/aarch64/gf_vect_dot_prod_neon.S new file mode 100644 index 000000000..117110c8a --- /dev/null +++ b/src/isa-l/erasure_code/aarch64/gf_vect_dot_prod_neon.S @@ -0,0 +1,298 @@ +/************************************************************** + Copyright (c) 2019 Huawei Technologies Co., Ltd. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Huawei Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ +.text + +.global gf_vect_dot_prod_neon +.type gf_vect_dot_prod_neon, %function + +/* arguments */ +x_len .req x0 +x_vec .req x1 +x_tbl .req x2 +x_src .req x3 +x_dest1 .req x4 + +/* returns */ +w_ret .req w0 + +/* local variables */ +x_vec_i .req x5 +x_ptr .req x6 +x_pos .req x7 +x_tmp .req x8 +x_tbl1 .req x9 + +/* vectors */ +v_gft1_lo .req v0 +v_gft1_hi .req v1 +q_gft1_lo .req q0 +q_gft1_hi .req q1 +v_mask0f .req v2 +q_mask0f .req q2 + +v_data_0 .req v8 +v_data_1 .req v9 +v_data_2 .req v10 +v_data_3 .req v11 +v_data_4 .req v12 +v_data_5 .req v13 +v_data_6 .req v14 +v_data_7 .req v15 +q_data_0 .req q8 +q_data_1 .req q9 +q_data_2 .req q10 +q_data_3 .req q11 +q_data_4 .req q12 +q_data_5 .req q13 +q_data_6 .req q14 +q_data_7 .req q15 + +v_data_0_lo .req v16 +v_data_1_lo .req v17 +v_data_2_lo .req v18 +v_data_3_lo .req v19 +v_data_4_lo .req v20 +v_data_5_lo .req v21 +v_data_6_lo .req v22 +v_data_7_lo .req v23 +v_data_0_hi .req v_data_0 +v_data_1_hi .req v_data_1 +v_data_2_hi .req v_data_2 +v_data_3_hi .req v_data_3 +v_data_4_hi .req v_data_4 +v_data_5_hi .req v_data_5 +v_data_6_hi .req v_data_6 +v_data_7_hi .req v_data_7 + +v_p0 .req v24 +v_p1 .req v25 +v_p2 .req v26 +v_p3 .req v27 +v_p4 .req v28 +v_p5 .req v29 +v_p6 .req v30 +v_p7 .req v31 +q_p0 .req q24 +q_p1 .req q25 +q_p2 .req q26 +q_p3 .req q27 +q_p4 .req q28 +q_p5 .req q29 +q_p6 .req q30 +q_p7 .req q31 + +v_p .req v_p0 +q_p .req q_p0 +v_data .req v_p1 +q_data .req q_p1 +v_data_lo .req v_p2 +v_data_hi .req v_p3 + + +gf_vect_dot_prod_neon: + /* less than 16 bytes, return_fail */ + cmp x_len, #16 + blt .return_fail + + movi v_mask0f.16b, #0x0f + mov x_pos, #0 + + lsl x_vec, x_vec, #3 + +.Lloop128_init: + /* less than 128 bytes, goto Lloop16_init */ + cmp x_len, #128 + blt .Lloop16_init + + /* save d8 ~ d15 to stack */ + sub sp, sp, #64 + stp d8, d9, [sp] + stp d10, d11, [sp, #16] + stp d12, d13, [sp, #32] + stp d14, d15, [sp, #48] + + sub x_len, x_len, #128 + +.Lloop128: + movi v_p0.16b, #0 + movi v_p1.16b, #0 + movi v_p2.16b, #0 + movi v_p3.16b, #0 + movi v_p4.16b, #0 + movi v_p5.16b, #0 + movi v_p6.16b, #0 + movi v_p7.16b, #0 + + mov x_tbl1, x_tbl + mov x_vec_i, #0 + +.Lloop128_vects: + ldr x_ptr, [x_src, x_vec_i] + add x_vec_i, x_vec_i, #8 + add x_ptr, x_ptr, x_pos + + ldp q_gft1_lo, q_gft1_hi, [x_tbl1], #32 + + ldp q_data_0, q_data_1, [x_ptr], #32 + ldp q_data_2, q_data_3, [x_ptr], #32 + ldp q_data_4, q_data_5, [x_ptr], #32 + ldp q_data_6, q_data_7, [x_ptr] + + prfm pldl1keep, [x_tbl1] + prfm pldl1strm, [x_ptr] + + and v_data_0_lo.16b, v_data_0.16b, v_mask0f.16b + and v_data_1_lo.16b, v_data_1.16b, v_mask0f.16b + and v_data_2_lo.16b, v_data_2.16b, v_mask0f.16b + and v_data_3_lo.16b, v_data_3.16b, v_mask0f.16b + and v_data_4_lo.16b, v_data_4.16b, v_mask0f.16b + and v_data_5_lo.16b, v_data_5.16b, v_mask0f.16b + and v_data_6_lo.16b, v_data_6.16b, v_mask0f.16b + and v_data_7_lo.16b, v_data_7.16b, v_mask0f.16b + + ushr v_data_0_hi.16b, v_data_0.16b, #4 + ushr v_data_1_hi.16b, v_data_1.16b, #4 + ushr v_data_2_hi.16b, v_data_2.16b, #4 + ushr v_data_3_hi.16b, v_data_3.16b, #4 + ushr v_data_4_hi.16b, v_data_4.16b, #4 + ushr v_data_5_hi.16b, v_data_5.16b, #4 + ushr v_data_6_hi.16b, v_data_6.16b, #4 + ushr v_data_7_hi.16b, v_data_7.16b, #4 + + tbl v_data_0_lo.16b, {v_gft1_lo.16b}, v_data_0_lo.16b + tbl v_data_1_lo.16b, {v_gft1_lo.16b}, v_data_1_lo.16b + tbl v_data_2_lo.16b, {v_gft1_lo.16b}, v_data_2_lo.16b + tbl v_data_3_lo.16b, {v_gft1_lo.16b}, v_data_3_lo.16b + tbl v_data_4_lo.16b, {v_gft1_lo.16b}, v_data_4_lo.16b + tbl v_data_5_lo.16b, {v_gft1_lo.16b}, v_data_5_lo.16b + tbl v_data_6_lo.16b, {v_gft1_lo.16b}, v_data_6_lo.16b + tbl v_data_7_lo.16b, {v_gft1_lo.16b}, v_data_7_lo.16b + + tbl v_data_0_hi.16b, {v_gft1_hi.16b}, v_data_0_hi.16b + tbl v_data_1_hi.16b, {v_gft1_hi.16b}, v_data_1_hi.16b + tbl v_data_2_hi.16b, {v_gft1_hi.16b}, v_data_2_hi.16b + tbl v_data_3_hi.16b, {v_gft1_hi.16b}, v_data_3_hi.16b + tbl v_data_4_hi.16b, {v_gft1_hi.16b}, v_data_4_hi.16b + tbl v_data_5_hi.16b, {v_gft1_hi.16b}, v_data_5_hi.16b + tbl v_data_6_hi.16b, {v_gft1_hi.16b}, v_data_6_hi.16b + tbl v_data_7_hi.16b, {v_gft1_hi.16b}, v_data_7_hi.16b + + eor v_p0.16b, v_data_0_lo.16b, v_p0.16b + eor v_p0.16b, v_p0.16b, v_data_0_hi.16b + eor v_p1.16b, v_data_1_lo.16b, v_p1.16b + eor v_p1.16b, v_p1.16b, v_data_1_hi.16b + eor v_p2.16b, v_data_2_lo.16b, v_p2.16b + eor v_p2.16b, v_p2.16b, v_data_2_hi.16b + eor v_p3.16b, v_data_3_lo.16b, v_p3.16b + eor v_p3.16b, v_p3.16b, v_data_3_hi.16b + eor v_p4.16b, v_data_4_lo.16b, v_p4.16b + eor v_p4.16b, v_p4.16b, v_data_4_hi.16b + eor v_p5.16b, v_data_5_lo.16b, v_p5.16b + eor v_p5.16b, v_p5.16b, v_data_5_hi.16b + eor v_p6.16b, v_data_6_lo.16b, v_p6.16b + eor v_p6.16b, v_p6.16b, v_data_6_hi.16b + eor v_p7.16b, v_data_7_lo.16b, v_p7.16b + eor v_p7.16b, v_p7.16b, v_data_7_hi.16b + + cmp x_vec_i, x_vec + blt .Lloop128_vects + +.Lloop128_vects_end: + add x_ptr, x_dest1, x_pos + stp q_p0, q_p1, [x_ptr], #32 + stp q_p2, q_p3, [x_ptr], #32 + stp q_p4, q_p5, [x_ptr], #32 + stp q_p6, q_p7, [x_ptr] + + add x_pos, x_pos, #128 + cmp x_pos, x_len + ble .Lloop128 + +.Lloop128_end: + /* restore d8 ~ d15 */ + ldp d8, d9, [sp] + ldp d10, d11, [sp, #16] + ldp d12, d13, [sp, #32] + ldp d14, d15, [sp, #48] + add sp, sp, #64 + + add x_len, x_len, #128 + cmp x_pos, x_len + beq .return_pass + +.Lloop16_init: + sub x_len, x_len, #16 + cmp x_pos, x_len + bgt .lessthan16_init + +.Lloop16: + movi v_p.16b, #0 + mov x_tbl1, x_tbl + mov x_vec_i, #0 + +.Lloop16_vects: + ldr x_ptr, [x_src, x_vec_i] + ldr q_data, [x_ptr, x_pos] + add x_vec_i, x_vec_i, #8 + + ldp q_gft1_lo, q_gft1_hi, [x_tbl1], #32 + + and v_data_lo.16b, v_data.16b, v_mask0f.16b + ushr v_data_hi.16b, v_data.16b, #4 + + tbl v_data_lo.16b, {v_gft1_lo.16b}, v_data_lo.16b + tbl v_data_hi.16b, {v_gft1_hi.16b}, v_data_hi.16b + eor v_p.16b, v_data_lo.16b, v_p.16b + eor v_p.16b, v_p.16b, v_data_hi.16b + + cmp x_vec_i, x_vec + blt .Lloop16_vects + +.Lloop16_vects_end: + str q_p, [x_dest1, x_pos] + add x_pos, x_pos, #16 + cmp x_pos, x_len + ble .Lloop16 + +.Lloop16_end: + sub x_tmp, x_pos, x_len + cmp x_tmp, #16 + beq .return_pass + +.lessthan16_init: + mov x_pos, x_len + b .Lloop16 + +.return_pass: + mov w_ret, #0 + ret + +.return_fail: + mov w_ret, #1 + ret diff --git a/src/isa-l/erasure_code/aarch64/gf_vect_mad_neon.S b/src/isa-l/erasure_code/aarch64/gf_vect_mad_neon.S new file mode 100644 index 000000000..0e25a51e8 --- /dev/null +++ b/src/isa-l/erasure_code/aarch64/gf_vect_mad_neon.S @@ -0,0 +1,314 @@ +/************************************************************** + Copyright (c) 2019 Huawei Technologies Co., Ltd. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Huawei Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ +.text + +.global gf_vect_mad_neon +.type gf_vect_mad_neon, %function + + +/* arguments */ +x_len .req x0 +x_vec .req x1 +x_vec_i .req x2 +x_tbl .req x3 +x_src .req x4 +x_dest .req x5 + +/* returns */ +w_ret .req w0 + +/* local variables */ +x_src_end .req x6 +x_dest1 .req x_dest +x_tmp .req x7 +x_const .req x8 + +/* vectors */ +v_mask0f .req v0 +v_tmp .req v1 +q_tmp .req q1 + +v_tmp1_lo .req v2 +v_tmp1_hi .req v3 +v_tmp2_lo .req v4 +v_tmp2_hi .req v5 + +v_gft1_lo .req v6 +v_gft1_hi .req v7 +q_gft1_lo .req q6 +q_gft1_hi .req q7 + +v_data_0 .req v8 +v_data_1 .req v9 +v_data_2 .req v10 +v_data_3 .req v11 +v_data_4 .req v12 +v_data_5 .req v13 +v_data_6 .req v14 +v_data_7 .req v15 +q_data_0 .req q8 +q_data_1 .req q9 +q_data_2 .req q10 +q_data_3 .req q11 +q_data_4 .req q12 +q_data_5 .req q13 +q_data_6 .req q14 +q_data_7 .req q15 + +v_data_0_lo .req v16 +v_data_1_lo .req v17 +v_data_2_lo .req v18 +v_data_3_lo .req v19 +v_data_4_lo .req v20 +v_data_5_lo .req v21 +v_data_6_lo .req v22 +v_data_7_lo .req v23 +v_data_0_hi .req v_data_0 +v_data_1_hi .req v_data_1 +v_data_2_hi .req v_data_2 +v_data_3_hi .req v_data_3 +v_data_4_hi .req v_data_4 +v_data_5_hi .req v_data_5 +v_data_6_hi .req v_data_6 +v_data_7_hi .req v_data_7 + +v_d1_0 .req v24 +v_d1_1 .req v25 +v_d1_2 .req v26 +v_d1_3 .req v27 +v_d1_4 .req v28 +v_d1_5 .req v29 +v_d1_6 .req v30 +v_d1_7 .req v31 +q_d1_0 .req q24 +q_d1_1 .req q25 +q_d1_2 .req q26 +q_d1_3 .req q27 +q_d1_4 .req q28 +q_d1_5 .req q29 +q_d1_6 .req q30 +q_d1_7 .req q31 + +v_data .req v_d1_1 +q_data .req q_d1_1 +v_data_lo .req v_d1_2 +v_data_hi .req v_d1_3 + + +gf_vect_mad_neon: + /* less than 16 bytes, return_fail */ + cmp x_len, #16 + blt .return_fail + + movi v_mask0f.16b, #0x0f + lsl x_vec_i, x_vec_i, #5 + add x_tbl, x_tbl, x_vec_i + add x_src_end, x_src, x_len + + ldr q_gft1_lo, [x_tbl] + ldr q_gft1_hi, [x_tbl, #16] + +.Lloop128_init: + /* less than 128 bytes, goto Lloop16_init */ + cmp x_len, #128 + blt .Lloop16_init + + /* save d8 ~ d15 to stack */ + sub sp, sp, #64 + stp d8, d9, [sp] + stp d10, d11, [sp, #16] + stp d12, d13, [sp, #32] + stp d14, d15, [sp, #48] + + sub x_src_end, x_src_end, #128 + +.Lloop128: + ldr q_data_0, [x_src, #16*0] + ldr q_data_1, [x_src, #16*1] + ldr q_data_2, [x_src, #16*2] + ldr q_data_3, [x_src, #16*3] + ldr q_data_4, [x_src, #16*4] + ldr q_data_5, [x_src, #16*5] + ldr q_data_6, [x_src, #16*6] + ldr q_data_7, [x_src, #16*7] + + ldr q_d1_0, [x_dest1, #16*0] + ldr q_d1_1, [x_dest1, #16*1] + ldr q_d1_2, [x_dest1, #16*2] + ldr q_d1_3, [x_dest1, #16*3] + ldr q_d1_4, [x_dest1, #16*4] + ldr q_d1_5, [x_dest1, #16*5] + ldr q_d1_6, [x_dest1, #16*6] + ldr q_d1_7, [x_dest1, #16*7] + + and v_data_0_lo.16b, v_data_0.16b, v_mask0f.16b + and v_data_1_lo.16b, v_data_1.16b, v_mask0f.16b + and v_data_2_lo.16b, v_data_2.16b, v_mask0f.16b + and v_data_3_lo.16b, v_data_3.16b, v_mask0f.16b + and v_data_4_lo.16b, v_data_4.16b, v_mask0f.16b + and v_data_5_lo.16b, v_data_5.16b, v_mask0f.16b + and v_data_6_lo.16b, v_data_6.16b, v_mask0f.16b + and v_data_7_lo.16b, v_data_7.16b, v_mask0f.16b + + ushr v_data_0_hi.16b, v_data_0.16b, #4 + ushr v_data_1_hi.16b, v_data_1.16b, #4 + ushr v_data_2_hi.16b, v_data_2.16b, #4 + ushr v_data_3_hi.16b, v_data_3.16b, #4 + ushr v_data_4_hi.16b, v_data_4.16b, #4 + ushr v_data_5_hi.16b, v_data_5.16b, #4 + ushr v_data_6_hi.16b, v_data_6.16b, #4 + ushr v_data_7_hi.16b, v_data_7.16b, #4 + + tbl v_tmp1_lo.16b, {v_gft1_lo.16b}, v_data_0_lo.16b + tbl v_tmp1_hi.16b, {v_gft1_hi.16b}, v_data_0_hi.16b + tbl v_tmp2_lo.16b, {v_gft1_lo.16b}, v_data_1_lo.16b + tbl v_tmp2_hi.16b, {v_gft1_hi.16b}, v_data_1_hi.16b + + eor v_d1_0.16b, v_tmp1_lo.16b, v_d1_0.16b + eor v_d1_0.16b, v_d1_0.16b, v_tmp1_hi.16b + eor v_d1_1.16b, v_tmp2_lo.16b, v_d1_1.16b + eor v_d1_1.16b, v_d1_1.16b, v_tmp2_hi.16b + + tbl v_tmp1_lo.16b, {v_gft1_lo.16b}, v_data_2_lo.16b + tbl v_tmp1_hi.16b, {v_gft1_hi.16b}, v_data_2_hi.16b + tbl v_tmp2_lo.16b, {v_gft1_lo.16b}, v_data_3_lo.16b + tbl v_tmp2_hi.16b, {v_gft1_hi.16b}, v_data_3_hi.16b + + eor v_d1_2.16b, v_tmp1_lo.16b, v_d1_2.16b + eor v_d1_2.16b, v_d1_2.16b, v_tmp1_hi.16b + eor v_d1_3.16b, v_tmp2_lo.16b, v_d1_3.16b + eor v_d1_3.16b, v_d1_3.16b, v_tmp2_hi.16b + + tbl v_tmp1_lo.16b, {v_gft1_lo.16b}, v_data_4_lo.16b + tbl v_tmp1_hi.16b, {v_gft1_hi.16b}, v_data_4_hi.16b + tbl v_tmp2_lo.16b, {v_gft1_lo.16b}, v_data_5_lo.16b + tbl v_tmp2_hi.16b, {v_gft1_hi.16b}, v_data_5_hi.16b + + eor v_d1_4.16b, v_tmp1_lo.16b, v_d1_4.16b + eor v_d1_4.16b, v_d1_4.16b, v_tmp1_hi.16b + eor v_d1_5.16b, v_tmp2_lo.16b, v_d1_5.16b + eor v_d1_5.16b, v_d1_5.16b, v_tmp2_hi.16b + + tbl v_tmp1_lo.16b, {v_gft1_lo.16b}, v_data_6_lo.16b + tbl v_tmp1_hi.16b, {v_gft1_hi.16b}, v_data_6_hi.16b + tbl v_tmp2_lo.16b, {v_gft1_lo.16b}, v_data_7_lo.16b + tbl v_tmp2_hi.16b, {v_gft1_hi.16b}, v_data_7_hi.16b + + eor v_d1_6.16b, v_tmp1_lo.16b, v_d1_6.16b + eor v_d1_6.16b, v_d1_6.16b, v_tmp1_hi.16b + eor v_d1_7.16b, v_tmp2_lo.16b, v_d1_7.16b + eor v_d1_7.16b, v_d1_7.16b, v_tmp2_hi.16b + + str q_d1_0, [x_dest1, #16*0] + str q_d1_1, [x_dest1, #16*1] + str q_d1_2, [x_dest1, #16*2] + str q_d1_3, [x_dest1, #16*3] + str q_d1_4, [x_dest1, #16*4] + str q_d1_5, [x_dest1, #16*5] + str q_d1_6, [x_dest1, #16*6] + str q_d1_7, [x_dest1, #16*7] + + add x_src, x_src, #128 + add x_dest1, x_dest1, #128 + cmp x_src, x_src_end + bls .Lloop128 + +.Lloop128_end: + /* restore d8 ~ d15 */ + ldp d8, d9, [sp] + ldp d10, d11, [sp, #16] + ldp d12, d13, [sp, #32] + ldp d14, d15, [sp, #48] + add sp, sp, #64 + add x_src_end, x_src_end, #128 + +.Lloop16_init: + sub x_src_end, x_src_end, #16 + cmp x_src, x_src_end + bhi .lessthan16_init + +.Lloop16: + ldr q_data, [x_src] + ldr q_d1_0, [x_dest1] + + and v_data_lo.16b, v_data.16b, v_mask0f.16b + ushr v_data_hi.16b, v_data.16b, #4 + + tbl v_tmp1_lo.16b, {v_gft1_lo.16b}, v_data_lo.16b + tbl v_tmp1_hi.16b, {v_gft1_hi.16b}, v_data_hi.16b + eor v_d1_0.16b, v_tmp1_lo.16b, v_d1_0.16b + eor v_d1_0.16b, v_d1_0.16b, v_tmp1_hi.16b + + str q_d1_0, [x_dest1] + + add x_dest1, x_dest1, #16 + add x_src, x_src, #16 + cmp x_src, x_src_end + bls .Lloop16 + +.lessthan16_init: + sub x_tmp, x_src, x_src_end + cmp x_tmp, #16 + beq .return_pass + +.lessthan16: + mov x_src, x_src_end + sub x_dest1, x_dest1, x_tmp + + ldr x_const, =const_tbl + sub x_const, x_const, x_tmp + ldr q_tmp, [x_const, #16] + + ldr q_data, [x_src] + ldr q_d1_0, [x_dest1] + + and v_data_lo.16b, v_data.16b, v_mask0f.16b + ushr v_data_hi.16b, v_data.16b, #4 + + tbl v_tmp1_lo.16b, {v_gft1_lo.16b}, v_data_lo.16b + tbl v_tmp1_hi.16b, {v_gft1_hi.16b}, v_data_hi.16b + eor v_tmp1_hi.16b, v_tmp1_lo.16b, v_tmp1_hi.16b + and v_tmp1_hi.16b, v_tmp1_hi.16b, v_tmp.16b + eor v_d1_0.16b, v_d1_0.16b, v_tmp1_hi.16b + + str q_d1_0, [x_dest1] + +.return_pass: + mov w_ret, #0 + ret + +.return_fail: + mov w_ret, #1 + ret + +.section .data +.balign 8 +const_tbl: + .dword 0x0000000000000000, 0x0000000000000000 + .dword 0xffffffffffffffff, 0xffffffffffffffff diff --git a/src/isa-l/erasure_code/aarch64/gf_vect_mul_neon.S b/src/isa-l/erasure_code/aarch64/gf_vect_mul_neon.S new file mode 100644 index 000000000..c88c53b8e --- /dev/null +++ b/src/isa-l/erasure_code/aarch64/gf_vect_mul_neon.S @@ -0,0 +1,235 @@ +/************************************************************** + Copyright (c) 2019 Huawei Technologies Co., Ltd. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Huawei Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +.text + +.global gf_vect_mul_neon +.type gf_vect_mul_neon, %function + + +/* arguments */ +x_len .req x0 +x_tbl .req x1 +x_src .req x2 +x_dest .req x3 + +/* returns */ +w_ret .req w0 + +/* local variables */ +x_dest1 .req x_dest +x_src_end .req x4 +x_tmp .req x5 + +/* vectors */ +v_mask0f .req v0 + +v_gft1_lo .req v2 +v_gft1_hi .req v3 +q_gft1_lo .req q2 +q_gft1_hi .req q3 + +v_data_0 .req v16 +v_data_1 .req v17 +v_data_2 .req v18 +v_data_3 .req v19 +v_data_4 .req v20 +v_data_5 .req v21 +v_data_6 .req v22 +v_data_7 .req v23 +q_data_0 .req q16 +q_data_1 .req q17 +q_data_2 .req q18 +q_data_3 .req q19 +q_data_4 .req q20 +q_data_5 .req q21 +q_data_6 .req q22 +q_data_7 .req q23 + +v_data_0_lo .req v24 +v_data_1_lo .req v25 +v_data_2_lo .req v26 +v_data_3_lo .req v27 +v_data_4_lo .req v28 +v_data_5_lo .req v29 +v_data_6_lo .req v30 +v_data_7_lo .req v31 +v_data_0_hi .req v_data_0 +v_data_1_hi .req v_data_1 +v_data_2_hi .req v_data_2 +v_data_3_hi .req v_data_3 +v_data_4_hi .req v_data_4 +v_data_5_hi .req v_data_5 +v_data_6_hi .req v_data_6 +v_data_7_hi .req v_data_7 + + +gf_vect_mul_neon: + /* less than 32 bytes, return_fail */ + cmp x_len, #32 + blt .return_fail + + movi v_mask0f.16b, #0x0f + add x_src_end, x_src, x_len + ldr q_gft1_lo, [x_tbl] + ldr q_gft1_hi, [x_tbl, #16] + + +.Lloop128_init: + /* less than 128 bytes, goto Lloop16_init */ + cmp x_len, #128 + blt .Lloop32_init + + /* save d8 ~ d15 to stack */ + sub sp, sp, #64 + stp d8, d9, [sp] + stp d10, d11, [sp, #16] + stp d12, d13, [sp, #32] + stp d14, d15, [sp, #48] + + sub x_src_end, x_src_end, #128 + +.Lloop128: + ldr q_data_0, [x_src, #16*0] + ldr q_data_1, [x_src, #16*1] + ldr q_data_2, [x_src, #16*2] + ldr q_data_3, [x_src, #16*3] + ldr q_data_4, [x_src, #16*4] + ldr q_data_5, [x_src, #16*5] + ldr q_data_6, [x_src, #16*6] + ldr q_data_7, [x_src, #16*7] + + and v_data_0_lo.16b, v_data_0.16b, v_mask0f.16b + and v_data_1_lo.16b, v_data_1.16b, v_mask0f.16b + and v_data_2_lo.16b, v_data_2.16b, v_mask0f.16b + and v_data_3_lo.16b, v_data_3.16b, v_mask0f.16b + and v_data_4_lo.16b, v_data_4.16b, v_mask0f.16b + and v_data_5_lo.16b, v_data_5.16b, v_mask0f.16b + and v_data_6_lo.16b, v_data_6.16b, v_mask0f.16b + and v_data_7_lo.16b, v_data_7.16b, v_mask0f.16b + + ushr v_data_0_hi.16b, v_data_0.16b, #4 + ushr v_data_1_hi.16b, v_data_1.16b, #4 + ushr v_data_2_hi.16b, v_data_2.16b, #4 + ushr v_data_3_hi.16b, v_data_3.16b, #4 + ushr v_data_4_hi.16b, v_data_4.16b, #4 + ushr v_data_5_hi.16b, v_data_5.16b, #4 + ushr v_data_6_hi.16b, v_data_6.16b, #4 + ushr v_data_7_hi.16b, v_data_7.16b, #4 + + tbl v_data_0_lo.16b, {v_gft1_lo.16b}, v_data_0_lo.16b + tbl v_data_1_lo.16b, {v_gft1_lo.16b}, v_data_1_lo.16b + tbl v_data_2_lo.16b, {v_gft1_lo.16b}, v_data_2_lo.16b + tbl v_data_3_lo.16b, {v_gft1_lo.16b}, v_data_3_lo.16b + tbl v_data_4_lo.16b, {v_gft1_lo.16b}, v_data_4_lo.16b + tbl v_data_5_lo.16b, {v_gft1_lo.16b}, v_data_5_lo.16b + tbl v_data_6_lo.16b, {v_gft1_lo.16b}, v_data_6_lo.16b + tbl v_data_7_lo.16b, {v_gft1_lo.16b}, v_data_7_lo.16b + + tbl v_data_0_hi.16b, {v_gft1_hi.16b}, v_data_0_hi.16b + tbl v_data_1_hi.16b, {v_gft1_hi.16b}, v_data_1_hi.16b + tbl v_data_2_hi.16b, {v_gft1_hi.16b}, v_data_2_hi.16b + tbl v_data_3_hi.16b, {v_gft1_hi.16b}, v_data_3_hi.16b + tbl v_data_4_hi.16b, {v_gft1_hi.16b}, v_data_4_hi.16b + tbl v_data_5_hi.16b, {v_gft1_hi.16b}, v_data_5_hi.16b + tbl v_data_6_hi.16b, {v_gft1_hi.16b}, v_data_6_hi.16b + tbl v_data_7_hi.16b, {v_gft1_hi.16b}, v_data_7_hi.16b + + eor v_data_0.16b, v_data_0_hi.16b, v_data_0_lo.16b + eor v_data_1.16b, v_data_1_hi.16b, v_data_1_lo.16b + eor v_data_2.16b, v_data_2_hi.16b, v_data_2_lo.16b + eor v_data_3.16b, v_data_3_hi.16b, v_data_3_lo.16b + eor v_data_4.16b, v_data_4_hi.16b, v_data_4_lo.16b + eor v_data_5.16b, v_data_5_hi.16b, v_data_5_lo.16b + eor v_data_6.16b, v_data_6_hi.16b, v_data_6_lo.16b + eor v_data_7.16b, v_data_7_hi.16b, v_data_7_lo.16b + + str q_data_0, [x_dest1, #16*0] + str q_data_1, [x_dest1, #16*1] + str q_data_2, [x_dest1, #16*2] + str q_data_3, [x_dest1, #16*3] + str q_data_4, [x_dest1, #16*4] + str q_data_5, [x_dest1, #16*5] + str q_data_6, [x_dest1, #16*6] + str q_data_7, [x_dest1, #16*7] + + add x_src, x_src, #128 + add x_dest1, x_dest1, #128 + cmp x_src, x_src_end + bls .Lloop128 + +.Lloop128_end: + /* restore d8 ~ d15 */ + ldp d8, d9, [sp] + ldp d10, d11, [sp, #16] + ldp d12, d13, [sp, #32] + ldp d14, d15, [sp, #48] + add sp, sp, #64 + add x_src_end, x_src_end, #128 + +.Lloop32_init: + sub x_src_end, x_src_end, #32 + cmp x_src, x_src_end + bhi .return_fail + +.Lloop32: + ldr q_data_0, [x_src, #16*0] + ldr q_data_1, [x_src, #16*1] + + and v_data_0_lo.16b, v_data_0.16b, v_mask0f.16b + and v_data_1_lo.16b, v_data_1.16b, v_mask0f.16b + ushr v_data_0_hi.16b, v_data_0.16b, #4 + ushr v_data_1_hi.16b, v_data_1.16b, #4 + tbl v_data_0_lo.16b, {v_gft1_lo.16b}, v_data_0_lo.16b + tbl v_data_1_lo.16b, {v_gft1_lo.16b}, v_data_1_lo.16b + tbl v_data_0_hi.16b, {v_gft1_hi.16b}, v_data_0_hi.16b + tbl v_data_1_hi.16b, {v_gft1_hi.16b}, v_data_1_hi.16b + eor v_data_0.16b, v_data_0_hi.16b, v_data_0_lo.16b + eor v_data_1.16b, v_data_1_hi.16b, v_data_1_lo.16b + str q_data_0, [x_dest1, #16*0] + str q_data_1, [x_dest1, #16*1] + + add x_dest1, x_dest1, #32 + add x_src, x_src, #32 + cmp x_src, x_src_end + bls .Lloop32 + +.Lloop32_end: + sub x_tmp, x_src, x_src_end + cmp x_tmp, #32 + beq .return_pass + b .return_fail + +.return_pass: + mov w_ret, #0 + ret + +.return_fail: + mov w_ret, #1 + ret diff --git a/src/isa-l/erasure_code/ec_base.c b/src/isa-l/erasure_code/ec_base.c new file mode 100644 index 000000000..9d76c8df4 --- /dev/null +++ b/src/isa-l/erasure_code/ec_base.c @@ -0,0 +1,371 @@ +/********************************************************************** + Copyright(c) 2011-2015 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include // for memset +#include "erasure_code.h" +#include "ec_base.h" // for GF tables + +void ec_init_tables(int k, int rows, unsigned char *a, unsigned char *g_tbls) +{ + int i, j; + + for (i = 0; i < rows; i++) { + for (j = 0; j < k; j++) { + gf_vect_mul_init(*a++, g_tbls); + g_tbls += 32; + } + } +} + +unsigned char gf_mul(unsigned char a, unsigned char b) +{ +#ifndef GF_LARGE_TABLES + int i; + + if ((a == 0) || (b == 0)) + return 0; + + return gff_base[(i = gflog_base[a] + gflog_base[b]) > 254 ? i - 255 : i]; +#else + return gf_mul_table_base[b * 256 + a]; +#endif +} + +unsigned char gf_inv(unsigned char a) +{ +#ifndef GF_LARGE_TABLES + if (a == 0) + return 0; + + return gff_base[255 - gflog_base[a]]; +#else + return gf_inv_table_base[a]; +#endif +} + +void gf_gen_rs_matrix(unsigned char *a, int m, int k) +{ + int i, j; + unsigned char p, gen = 1; + + memset(a, 0, k * m); + for (i = 0; i < k; i++) + a[k * i + i] = 1; + + for (i = k; i < m; i++) { + p = 1; + for (j = 0; j < k; j++) { + a[k * i + j] = p; + p = gf_mul(p, gen); + } + gen = gf_mul(gen, 2); + } +} + +void gf_gen_cauchy1_matrix(unsigned char *a, int m, int k) +{ + int i, j; + unsigned char *p; + + // Identity matrix in high position + memset(a, 0, k * m); + for (i = 0; i < k; i++) + a[k * i + i] = 1; + + // For the rest choose 1/(i + j) | i != j + p = &a[k * k]; + for (i = k; i < m; i++) + for (j = 0; j < k; j++) + *p++ = gf_inv(i ^ j); + +} + +int gf_invert_matrix(unsigned char *in_mat, unsigned char *out_mat, const int n) +{ + int i, j, k; + unsigned char temp; + + // Set out_mat[] to the identity matrix + for (i = 0; i < n * n; i++) // memset(out_mat, 0, n*n) + out_mat[i] = 0; + + for (i = 0; i < n; i++) + out_mat[i * n + i] = 1; + + // Inverse + for (i = 0; i < n; i++) { + // Check for 0 in pivot element + if (in_mat[i * n + i] == 0) { + // Find a row with non-zero in current column and swap + for (j = i + 1; j < n; j++) + if (in_mat[j * n + i]) + break; + + if (j == n) // Couldn't find means it's singular + return -1; + + for (k = 0; k < n; k++) { // Swap rows i,j + temp = in_mat[i * n + k]; + in_mat[i * n + k] = in_mat[j * n + k]; + in_mat[j * n + k] = temp; + + temp = out_mat[i * n + k]; + out_mat[i * n + k] = out_mat[j * n + k]; + out_mat[j * n + k] = temp; + } + } + + temp = gf_inv(in_mat[i * n + i]); // 1/pivot + for (j = 0; j < n; j++) { // Scale row i by 1/pivot + in_mat[i * n + j] = gf_mul(in_mat[i * n + j], temp); + out_mat[i * n + j] = gf_mul(out_mat[i * n + j], temp); + } + + for (j = 0; j < n; j++) { + if (j == i) + continue; + + temp = in_mat[j * n + i]; + for (k = 0; k < n; k++) { + out_mat[j * n + k] ^= gf_mul(temp, out_mat[i * n + k]); + in_mat[j * n + k] ^= gf_mul(temp, in_mat[i * n + k]); + } + } + } + return 0; +} + +// Calculates const table gftbl in GF(2^8) from single input A +// gftbl(A) = {A{00}, A{01}, A{02}, ... , A{0f} }, {A{00}, A{10}, A{20}, ... , A{f0} } + +void gf_vect_mul_init(unsigned char c, unsigned char *tbl) +{ + unsigned char c2 = (c << 1) ^ ((c & 0x80) ? 0x1d : 0); //Mult by GF{2} + unsigned char c4 = (c2 << 1) ^ ((c2 & 0x80) ? 0x1d : 0); //Mult by GF{2} + unsigned char c8 = (c4 << 1) ^ ((c4 & 0x80) ? 0x1d : 0); //Mult by GF{2} + +#if __WORDSIZE == 64 || _WIN64 || __x86_64__ + unsigned long long v1, v2, v4, v8, *t; + unsigned long long v10, v20, v40, v80; + unsigned char c17, c18, c20, c24; + + t = (unsigned long long *)tbl; + + v1 = c * 0x0100010001000100ull; + v2 = c2 * 0x0101000001010000ull; + v4 = c4 * 0x0101010100000000ull; + v8 = c8 * 0x0101010101010101ull; + + v4 = v1 ^ v2 ^ v4; + t[0] = v4; + t[1] = v8 ^ v4; + + c17 = (c8 << 1) ^ ((c8 & 0x80) ? 0x1d : 0); //Mult by GF{2} + c18 = (c17 << 1) ^ ((c17 & 0x80) ? 0x1d : 0); //Mult by GF{2} + c20 = (c18 << 1) ^ ((c18 & 0x80) ? 0x1d : 0); //Mult by GF{2} + c24 = (c20 << 1) ^ ((c20 & 0x80) ? 0x1d : 0); //Mult by GF{2} + + v10 = c17 * 0x0100010001000100ull; + v20 = c18 * 0x0101000001010000ull; + v40 = c20 * 0x0101010100000000ull; + v80 = c24 * 0x0101010101010101ull; + + v40 = v10 ^ v20 ^ v40; + t[2] = v40; + t[3] = v80 ^ v40; + +#else // 32-bit or other + unsigned char c3, c5, c6, c7, c9, c10, c11, c12, c13, c14, c15; + unsigned char c17, c18, c19, c20, c21, c22, c23, c24, c25, c26, c27, c28, c29, c30, + c31; + + c3 = c2 ^ c; + c5 = c4 ^ c; + c6 = c4 ^ c2; + c7 = c4 ^ c3; + + c9 = c8 ^ c; + c10 = c8 ^ c2; + c11 = c8 ^ c3; + c12 = c8 ^ c4; + c13 = c8 ^ c5; + c14 = c8 ^ c6; + c15 = c8 ^ c7; + + tbl[0] = 0; + tbl[1] = c; + tbl[2] = c2; + tbl[3] = c3; + tbl[4] = c4; + tbl[5] = c5; + tbl[6] = c6; + tbl[7] = c7; + tbl[8] = c8; + tbl[9] = c9; + tbl[10] = c10; + tbl[11] = c11; + tbl[12] = c12; + tbl[13] = c13; + tbl[14] = c14; + tbl[15] = c15; + + c17 = (c8 << 1) ^ ((c8 & 0x80) ? 0x1d : 0); //Mult by GF{2} + c18 = (c17 << 1) ^ ((c17 & 0x80) ? 0x1d : 0); //Mult by GF{2} + c19 = c18 ^ c17; + c20 = (c18 << 1) ^ ((c18 & 0x80) ? 0x1d : 0); //Mult by GF{2} + c21 = c20 ^ c17; + c22 = c20 ^ c18; + c23 = c20 ^ c19; + c24 = (c20 << 1) ^ ((c20 & 0x80) ? 0x1d : 0); //Mult by GF{2} + c25 = c24 ^ c17; + c26 = c24 ^ c18; + c27 = c24 ^ c19; + c28 = c24 ^ c20; + c29 = c24 ^ c21; + c30 = c24 ^ c22; + c31 = c24 ^ c23; + + tbl[16] = 0; + tbl[17] = c17; + tbl[18] = c18; + tbl[19] = c19; + tbl[20] = c20; + tbl[21] = c21; + tbl[22] = c22; + tbl[23] = c23; + tbl[24] = c24; + tbl[25] = c25; + tbl[26] = c26; + tbl[27] = c27; + tbl[28] = c28; + tbl[29] = c29; + tbl[30] = c30; + tbl[31] = c31; + +#endif //__WORDSIZE == 64 || _WIN64 || __x86_64__ +} + +void gf_vect_dot_prod_base(int len, int vlen, unsigned char *v, + unsigned char **src, unsigned char *dest) +{ + int i, j; + unsigned char s; + for (i = 0; i < len; i++) { + s = 0; + for (j = 0; j < vlen; j++) + s ^= gf_mul(src[j][i], v[j * 32 + 1]); + + dest[i] = s; + } +} + +void gf_vect_mad_base(int len, int vec, int vec_i, + unsigned char *v, unsigned char *src, unsigned char *dest) +{ + int i; + unsigned char s; + for (i = 0; i < len; i++) { + s = dest[i]; + s ^= gf_mul(src[i], v[vec_i * 32 + 1]); + dest[i] = s; + } +} + +void ec_encode_data_base(int len, int srcs, int dests, unsigned char *v, + unsigned char **src, unsigned char **dest) +{ + int i, j, l; + unsigned char s; + + for (l = 0; l < dests; l++) { + for (i = 0; i < len; i++) { + s = 0; + for (j = 0; j < srcs; j++) + s ^= gf_mul(src[j][i], v[j * 32 + l * srcs * 32 + 1]); + + dest[l][i] = s; + } + } +} + +void ec_encode_data_update_base(int len, int k, int rows, int vec_i, unsigned char *v, + unsigned char *data, unsigned char **dest) +{ + int i, l; + unsigned char s; + + for (l = 0; l < rows; l++) { + for (i = 0; i < len; i++) { + s = dest[l][i]; + s ^= gf_mul(data[i], v[vec_i * 32 + l * k * 32 + 1]); + + dest[l][i] = s; + } + } +} + +void gf_vect_mul_base(int len, unsigned char *a, unsigned char *src, unsigned char *dest) +{ + //2nd element of table array is ref value used to fill it in + unsigned char c = a[1]; + while (len-- > 0) + *dest++ = gf_mul(c, *src++); +} + +struct slver { + unsigned short snum; + unsigned char ver; + unsigned char core; +}; + +// Version info +struct slver gf_vect_mul_init_slver_00020035; +struct slver gf_vect_mul_init_slver = { 0x0035, 0x02, 0x00 }; + +struct slver ec_encode_data_base_slver_00010135; +struct slver ec_encode_data_base_slver = { 0x0135, 0x01, 0x00 }; + +struct slver gf_vect_mul_base_slver_00010136; +struct slver gf_vect_mul_base_slver = { 0x0136, 0x01, 0x00 }; + +struct slver gf_vect_dot_prod_base_slver_00010137; +struct slver gf_vect_dot_prod_base_slver = { 0x0137, 0x01, 0x00 }; + +struct slver gf_mul_slver_00000214; +struct slver gf_mul_slver = { 0x0214, 0x00, 0x00 }; + +struct slver gf_invert_matrix_slver_00000215; +struct slver gf_invert_matrix_slver = { 0x0215, 0x00, 0x00 }; + +struct slver gf_gen_rs_matrix_slver_00000216; +struct slver gf_gen_rs_matrix_slver = { 0x0216, 0x00, 0x00 }; + +struct slver gf_gen_cauchy1_matrix_slver_00000217; +struct slver gf_gen_cauchy1_matrix_slver = { 0x0217, 0x00, 0x00 }; diff --git a/src/isa-l/erasure_code/ec_base.h b/src/isa-l/erasure_code/ec_base.h new file mode 100644 index 000000000..070b27665 --- /dev/null +++ b/src/isa-l/erasure_code/ec_base.h @@ -0,0 +1,6680 @@ +/********************************************************************** + Copyright(c) 2011-2015 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#ifndef _EC_BASE_H_ +#define _EC_BASE_H_ + +// Global GF(256) tables +#ifndef GF_LARGE_TABLES +static const unsigned char gff_base[] = { + 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1d, 0x3a, + 0x74, 0xe8, 0xcd, 0x87, 0x13, 0x26, 0x4c, 0x98, 0x2d, 0x5a, + 0xb4, 0x75, 0xea, 0xc9, 0x8f, 0x03, 0x06, 0x0c, 0x18, 0x30, + 0x60, 0xc0, 0x9d, 0x27, 0x4e, 0x9c, 0x25, 0x4a, 0x94, 0x35, + 0x6a, 0xd4, 0xb5, 0x77, 0xee, 0xc1, 0x9f, 0x23, 0x46, 0x8c, + 0x05, 0x0a, 0x14, 0x28, 0x50, 0xa0, 0x5d, 0xba, 0x69, 0xd2, + 0xb9, 0x6f, 0xde, 0xa1, 0x5f, 0xbe, 0x61, 0xc2, 0x99, 0x2f, + 0x5e, 0xbc, 0x65, 0xca, 0x89, 0x0f, 0x1e, 0x3c, 0x78, 0xf0, + 0xfd, 0xe7, 0xd3, 0xbb, 0x6b, 0xd6, 0xb1, 0x7f, 0xfe, 0xe1, + 0xdf, 0xa3, 0x5b, 0xb6, 0x71, 0xe2, 0xd9, 0xaf, 0x43, 0x86, + 0x11, 0x22, 0x44, 0x88, 0x0d, 0x1a, 0x34, 0x68, 0xd0, 0xbd, + 0x67, 0xce, 0x81, 0x1f, 0x3e, 0x7c, 0xf8, 0xed, 0xc7, 0x93, + 0x3b, 0x76, 0xec, 0xc5, 0x97, 0x33, 0x66, 0xcc, 0x85, 0x17, + 0x2e, 0x5c, 0xb8, 0x6d, 0xda, 0xa9, 0x4f, 0x9e, 0x21, 0x42, + 0x84, 0x15, 0x2a, 0x54, 0xa8, 0x4d, 0x9a, 0x29, 0x52, 0xa4, + 0x55, 0xaa, 0x49, 0x92, 0x39, 0x72, 0xe4, 0xd5, 0xb7, 0x73, + 0xe6, 0xd1, 0xbf, 0x63, 0xc6, 0x91, 0x3f, 0x7e, 0xfc, 0xe5, + 0xd7, 0xb3, 0x7b, 0xf6, 0xf1, 0xff, 0xe3, 0xdb, 0xab, 0x4b, + 0x96, 0x31, 0x62, 0xc4, 0x95, 0x37, 0x6e, 0xdc, 0xa5, 0x57, + 0xae, 0x41, 0x82, 0x19, 0x32, 0x64, 0xc8, 0x8d, 0x07, 0x0e, + 0x1c, 0x38, 0x70, 0xe0, 0xdd, 0xa7, 0x53, 0xa6, 0x51, 0xa2, + 0x59, 0xb2, 0x79, 0xf2, 0xf9, 0xef, 0xc3, 0x9b, 0x2b, 0x56, + 0xac, 0x45, 0x8a, 0x09, 0x12, 0x24, 0x48, 0x90, 0x3d, 0x7a, + 0xf4, 0xf5, 0xf7, 0xf3, 0xfb, 0xeb, 0xcb, 0x8b, 0x0b, 0x16, + 0x2c, 0x58, 0xb0, 0x7d, 0xfa, 0xe9, 0xcf, 0x83, 0x1b, 0x36, + 0x6c, 0xd8, 0xad, 0x47, 0x8e, 0x01 +}; + +static const unsigned char gflog_base[] = { + 0x00, 0xff, 0x01, 0x19, 0x02, 0x32, 0x1a, 0xc6, 0x03, 0xdf, + 0x33, 0xee, 0x1b, 0x68, 0xc7, 0x4b, 0x04, 0x64, 0xe0, 0x0e, + 0x34, 0x8d, 0xef, 0x81, 0x1c, 0xc1, 0x69, 0xf8, 0xc8, 0x08, + 0x4c, 0x71, 0x05, 0x8a, 0x65, 0x2f, 0xe1, 0x24, 0x0f, 0x21, + 0x35, 0x93, 0x8e, 0xda, 0xf0, 0x12, 0x82, 0x45, 0x1d, 0xb5, + 0xc2, 0x7d, 0x6a, 0x27, 0xf9, 0xb9, 0xc9, 0x9a, 0x09, 0x78, + 0x4d, 0xe4, 0x72, 0xa6, 0x06, 0xbf, 0x8b, 0x62, 0x66, 0xdd, + 0x30, 0xfd, 0xe2, 0x98, 0x25, 0xb3, 0x10, 0x91, 0x22, 0x88, + 0x36, 0xd0, 0x94, 0xce, 0x8f, 0x96, 0xdb, 0xbd, 0xf1, 0xd2, + 0x13, 0x5c, 0x83, 0x38, 0x46, 0x40, 0x1e, 0x42, 0xb6, 0xa3, + 0xc3, 0x48, 0x7e, 0x6e, 0x6b, 0x3a, 0x28, 0x54, 0xfa, 0x85, + 0xba, 0x3d, 0xca, 0x5e, 0x9b, 0x9f, 0x0a, 0x15, 0x79, 0x2b, + 0x4e, 0xd4, 0xe5, 0xac, 0x73, 0xf3, 0xa7, 0x57, 0x07, 0x70, + 0xc0, 0xf7, 0x8c, 0x80, 0x63, 0x0d, 0x67, 0x4a, 0xde, 0xed, + 0x31, 0xc5, 0xfe, 0x18, 0xe3, 0xa5, 0x99, 0x77, 0x26, 0xb8, + 0xb4, 0x7c, 0x11, 0x44, 0x92, 0xd9, 0x23, 0x20, 0x89, 0x2e, + 0x37, 0x3f, 0xd1, 0x5b, 0x95, 0xbc, 0xcf, 0xcd, 0x90, 0x87, + 0x97, 0xb2, 0xdc, 0xfc, 0xbe, 0x61, 0xf2, 0x56, 0xd3, 0xab, + 0x14, 0x2a, 0x5d, 0x9e, 0x84, 0x3c, 0x39, 0x53, 0x47, 0x6d, + 0x41, 0xa2, 0x1f, 0x2d, 0x43, 0xd8, 0xb7, 0x7b, 0xa4, 0x76, + 0xc4, 0x17, 0x49, 0xec, 0x7f, 0x0c, 0x6f, 0xf6, 0x6c, 0xa1, + 0x3b, 0x52, 0x29, 0x9d, 0x55, 0xaa, 0xfb, 0x60, 0x86, 0xb1, + 0xbb, 0xcc, 0x3e, 0x5a, 0xcb, 0x59, 0x5f, 0xb0, 0x9c, 0xa9, + 0xa0, 0x51, 0x0b, 0xf5, 0x16, 0xeb, 0x7a, 0x75, 0x2c, 0xd7, + 0x4f, 0xae, 0xd5, 0xe9, 0xe6, 0xe7, 0xad, 0xe8, 0x74, 0xd6, + 0xf4, 0xea, 0xa8, 0x50, 0x58, 0xaf +}; +#else +static const unsigned char gf_mul_table_base[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x03, + 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, + 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, + 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, + 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, + 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, + 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, + 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, + 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, + 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, + 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, + 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, + 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, + 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, + 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, + 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, + 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, + 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, + 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, + 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, + 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, + 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, + 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, + 0xfe, 0xff, 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, + 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e, 0x20, 0x22, + 0x24, 0x26, 0x28, 0x2a, 0x2c, 0x2e, 0x30, 0x32, 0x34, 0x36, + 0x38, 0x3a, 0x3c, 0x3e, 0x40, 0x42, 0x44, 0x46, 0x48, 0x4a, + 0x4c, 0x4e, 0x50, 0x52, 0x54, 0x56, 0x58, 0x5a, 0x5c, 0x5e, + 0x60, 0x62, 0x64, 0x66, 0x68, 0x6a, 0x6c, 0x6e, 0x70, 0x72, + 0x74, 0x76, 0x78, 0x7a, 0x7c, 0x7e, 0x80, 0x82, 0x84, 0x86, + 0x88, 0x8a, 0x8c, 0x8e, 0x90, 0x92, 0x94, 0x96, 0x98, 0x9a, + 0x9c, 0x9e, 0xa0, 0xa2, 0xa4, 0xa6, 0xa8, 0xaa, 0xac, 0xae, + 0xb0, 0xb2, 0xb4, 0xb6, 0xb8, 0xba, 0xbc, 0xbe, 0xc0, 0xc2, + 0xc4, 0xc6, 0xc8, 0xca, 0xcc, 0xce, 0xd0, 0xd2, 0xd4, 0xd6, + 0xd8, 0xda, 0xdc, 0xde, 0xe0, 0xe2, 0xe4, 0xe6, 0xe8, 0xea, + 0xec, 0xee, 0xf0, 0xf2, 0xf4, 0xf6, 0xf8, 0xfa, 0xfc, 0xfe, + 0x1d, 0x1f, 0x19, 0x1b, 0x15, 0x17, 0x11, 0x13, 0x0d, 0x0f, + 0x09, 0x0b, 0x05, 0x07, 0x01, 0x03, 0x3d, 0x3f, 0x39, 0x3b, + 0x35, 0x37, 0x31, 0x33, 0x2d, 0x2f, 0x29, 0x2b, 0x25, 0x27, + 0x21, 0x23, 0x5d, 0x5f, 0x59, 0x5b, 0x55, 0x57, 0x51, 0x53, + 0x4d, 0x4f, 0x49, 0x4b, 0x45, 0x47, 0x41, 0x43, 0x7d, 0x7f, + 0x79, 0x7b, 0x75, 0x77, 0x71, 0x73, 0x6d, 0x6f, 0x69, 0x6b, + 0x65, 0x67, 0x61, 0x63, 0x9d, 0x9f, 0x99, 0x9b, 0x95, 0x97, + 0x91, 0x93, 0x8d, 0x8f, 0x89, 0x8b, 0x85, 0x87, 0x81, 0x83, + 0xbd, 0xbf, 0xb9, 0xbb, 0xb5, 0xb7, 0xb1, 0xb3, 0xad, 0xaf, + 0xa9, 0xab, 0xa5, 0xa7, 0xa1, 0xa3, 0xdd, 0xdf, 0xd9, 0xdb, + 0xd5, 0xd7, 0xd1, 0xd3, 0xcd, 0xcf, 0xc9, 0xcb, 0xc5, 0xc7, + 0xc1, 0xc3, 0xfd, 0xff, 0xf9, 0xfb, 0xf5, 0xf7, 0xf1, 0xf3, + 0xed, 0xef, 0xe9, 0xeb, 0xe5, 0xe7, 0xe1, 0xe3, 0x00, 0x03, + 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09, 0x18, 0x1b, 0x1e, 0x1d, + 0x14, 0x17, 0x12, 0x11, 0x30, 0x33, 0x36, 0x35, 0x3c, 0x3f, + 0x3a, 0x39, 0x28, 0x2b, 0x2e, 0x2d, 0x24, 0x27, 0x22, 0x21, + 0x60, 0x63, 0x66, 0x65, 0x6c, 0x6f, 0x6a, 0x69, 0x78, 0x7b, + 0x7e, 0x7d, 0x74, 0x77, 0x72, 0x71, 0x50, 0x53, 0x56, 0x55, + 0x5c, 0x5f, 0x5a, 0x59, 0x48, 0x4b, 0x4e, 0x4d, 0x44, 0x47, + 0x42, 0x41, 0xc0, 0xc3, 0xc6, 0xc5, 0xcc, 0xcf, 0xca, 0xc9, + 0xd8, 0xdb, 0xde, 0xdd, 0xd4, 0xd7, 0xd2, 0xd1, 0xf0, 0xf3, + 0xf6, 0xf5, 0xfc, 0xff, 0xfa, 0xf9, 0xe8, 0xeb, 0xee, 0xed, + 0xe4, 0xe7, 0xe2, 0xe1, 0xa0, 0xa3, 0xa6, 0xa5, 0xac, 0xaf, + 0xaa, 0xa9, 0xb8, 0xbb, 0xbe, 0xbd, 0xb4, 0xb7, 0xb2, 0xb1, + 0x90, 0x93, 0x96, 0x95, 0x9c, 0x9f, 0x9a, 0x99, 0x88, 0x8b, + 0x8e, 0x8d, 0x84, 0x87, 0x82, 0x81, 0x9d, 0x9e, 0x9b, 0x98, + 0x91, 0x92, 0x97, 0x94, 0x85, 0x86, 0x83, 0x80, 0x89, 0x8a, + 0x8f, 0x8c, 0xad, 0xae, 0xab, 0xa8, 0xa1, 0xa2, 0xa7, 0xa4, + 0xb5, 0xb6, 0xb3, 0xb0, 0xb9, 0xba, 0xbf, 0xbc, 0xfd, 0xfe, + 0xfb, 0xf8, 0xf1, 0xf2, 0xf7, 0xf4, 0xe5, 0xe6, 0xe3, 0xe0, + 0xe9, 0xea, 0xef, 0xec, 0xcd, 0xce, 0xcb, 0xc8, 0xc1, 0xc2, + 0xc7, 0xc4, 0xd5, 0xd6, 0xd3, 0xd0, 0xd9, 0xda, 0xdf, 0xdc, + 0x5d, 0x5e, 0x5b, 0x58, 0x51, 0x52, 0x57, 0x54, 0x45, 0x46, + 0x43, 0x40, 0x49, 0x4a, 0x4f, 0x4c, 0x6d, 0x6e, 0x6b, 0x68, + 0x61, 0x62, 0x67, 0x64, 0x75, 0x76, 0x73, 0x70, 0x79, 0x7a, + 0x7f, 0x7c, 0x3d, 0x3e, 0x3b, 0x38, 0x31, 0x32, 0x37, 0x34, + 0x25, 0x26, 0x23, 0x20, 0x29, 0x2a, 0x2f, 0x2c, 0x0d, 0x0e, + 0x0b, 0x08, 0x01, 0x02, 0x07, 0x04, 0x15, 0x16, 0x13, 0x10, + 0x19, 0x1a, 0x1f, 0x1c, 0x00, 0x04, 0x08, 0x0c, 0x10, 0x14, + 0x18, 0x1c, 0x20, 0x24, 0x28, 0x2c, 0x30, 0x34, 0x38, 0x3c, + 0x40, 0x44, 0x48, 0x4c, 0x50, 0x54, 0x58, 0x5c, 0x60, 0x64, + 0x68, 0x6c, 0x70, 0x74, 0x78, 0x7c, 0x80, 0x84, 0x88, 0x8c, + 0x90, 0x94, 0x98, 0x9c, 0xa0, 0xa4, 0xa8, 0xac, 0xb0, 0xb4, + 0xb8, 0xbc, 0xc0, 0xc4, 0xc8, 0xcc, 0xd0, 0xd4, 0xd8, 0xdc, + 0xe0, 0xe4, 0xe8, 0xec, 0xf0, 0xf4, 0xf8, 0xfc, 0x1d, 0x19, + 0x15, 0x11, 0x0d, 0x09, 0x05, 0x01, 0x3d, 0x39, 0x35, 0x31, + 0x2d, 0x29, 0x25, 0x21, 0x5d, 0x59, 0x55, 0x51, 0x4d, 0x49, + 0x45, 0x41, 0x7d, 0x79, 0x75, 0x71, 0x6d, 0x69, 0x65, 0x61, + 0x9d, 0x99, 0x95, 0x91, 0x8d, 0x89, 0x85, 0x81, 0xbd, 0xb9, + 0xb5, 0xb1, 0xad, 0xa9, 0xa5, 0xa1, 0xdd, 0xd9, 0xd5, 0xd1, + 0xcd, 0xc9, 0xc5, 0xc1, 0xfd, 0xf9, 0xf5, 0xf1, 0xed, 0xe9, + 0xe5, 0xe1, 0x3a, 0x3e, 0x32, 0x36, 0x2a, 0x2e, 0x22, 0x26, + 0x1a, 0x1e, 0x12, 0x16, 0x0a, 0x0e, 0x02, 0x06, 0x7a, 0x7e, + 0x72, 0x76, 0x6a, 0x6e, 0x62, 0x66, 0x5a, 0x5e, 0x52, 0x56, + 0x4a, 0x4e, 0x42, 0x46, 0xba, 0xbe, 0xb2, 0xb6, 0xaa, 0xae, + 0xa2, 0xa6, 0x9a, 0x9e, 0x92, 0x96, 0x8a, 0x8e, 0x82, 0x86, + 0xfa, 0xfe, 0xf2, 0xf6, 0xea, 0xee, 0xe2, 0xe6, 0xda, 0xde, + 0xd2, 0xd6, 0xca, 0xce, 0xc2, 0xc6, 0x27, 0x23, 0x2f, 0x2b, + 0x37, 0x33, 0x3f, 0x3b, 0x07, 0x03, 0x0f, 0x0b, 0x17, 0x13, + 0x1f, 0x1b, 0x67, 0x63, 0x6f, 0x6b, 0x77, 0x73, 0x7f, 0x7b, + 0x47, 0x43, 0x4f, 0x4b, 0x57, 0x53, 0x5f, 0x5b, 0xa7, 0xa3, + 0xaf, 0xab, 0xb7, 0xb3, 0xbf, 0xbb, 0x87, 0x83, 0x8f, 0x8b, + 0x97, 0x93, 0x9f, 0x9b, 0xe7, 0xe3, 0xef, 0xeb, 0xf7, 0xf3, + 0xff, 0xfb, 0xc7, 0xc3, 0xcf, 0xcb, 0xd7, 0xd3, 0xdf, 0xdb, + 0x00, 0x05, 0x0a, 0x0f, 0x14, 0x11, 0x1e, 0x1b, 0x28, 0x2d, + 0x22, 0x27, 0x3c, 0x39, 0x36, 0x33, 0x50, 0x55, 0x5a, 0x5f, + 0x44, 0x41, 0x4e, 0x4b, 0x78, 0x7d, 0x72, 0x77, 0x6c, 0x69, + 0x66, 0x63, 0xa0, 0xa5, 0xaa, 0xaf, 0xb4, 0xb1, 0xbe, 0xbb, + 0x88, 0x8d, 0x82, 0x87, 0x9c, 0x99, 0x96, 0x93, 0xf0, 0xf5, + 0xfa, 0xff, 0xe4, 0xe1, 0xee, 0xeb, 0xd8, 0xdd, 0xd2, 0xd7, + 0xcc, 0xc9, 0xc6, 0xc3, 0x5d, 0x58, 0x57, 0x52, 0x49, 0x4c, + 0x43, 0x46, 0x75, 0x70, 0x7f, 0x7a, 0x61, 0x64, 0x6b, 0x6e, + 0x0d, 0x08, 0x07, 0x02, 0x19, 0x1c, 0x13, 0x16, 0x25, 0x20, + 0x2f, 0x2a, 0x31, 0x34, 0x3b, 0x3e, 0xfd, 0xf8, 0xf7, 0xf2, + 0xe9, 0xec, 0xe3, 0xe6, 0xd5, 0xd0, 0xdf, 0xda, 0xc1, 0xc4, + 0xcb, 0xce, 0xad, 0xa8, 0xa7, 0xa2, 0xb9, 0xbc, 0xb3, 0xb6, + 0x85, 0x80, 0x8f, 0x8a, 0x91, 0x94, 0x9b, 0x9e, 0xba, 0xbf, + 0xb0, 0xb5, 0xae, 0xab, 0xa4, 0xa1, 0x92, 0x97, 0x98, 0x9d, + 0x86, 0x83, 0x8c, 0x89, 0xea, 0xef, 0xe0, 0xe5, 0xfe, 0xfb, + 0xf4, 0xf1, 0xc2, 0xc7, 0xc8, 0xcd, 0xd6, 0xd3, 0xdc, 0xd9, + 0x1a, 0x1f, 0x10, 0x15, 0x0e, 0x0b, 0x04, 0x01, 0x32, 0x37, + 0x38, 0x3d, 0x26, 0x23, 0x2c, 0x29, 0x4a, 0x4f, 0x40, 0x45, + 0x5e, 0x5b, 0x54, 0x51, 0x62, 0x67, 0x68, 0x6d, 0x76, 0x73, + 0x7c, 0x79, 0xe7, 0xe2, 0xed, 0xe8, 0xf3, 0xf6, 0xf9, 0xfc, + 0xcf, 0xca, 0xc5, 0xc0, 0xdb, 0xde, 0xd1, 0xd4, 0xb7, 0xb2, + 0xbd, 0xb8, 0xa3, 0xa6, 0xa9, 0xac, 0x9f, 0x9a, 0x95, 0x90, + 0x8b, 0x8e, 0x81, 0x84, 0x47, 0x42, 0x4d, 0x48, 0x53, 0x56, + 0x59, 0x5c, 0x6f, 0x6a, 0x65, 0x60, 0x7b, 0x7e, 0x71, 0x74, + 0x17, 0x12, 0x1d, 0x18, 0x03, 0x06, 0x09, 0x0c, 0x3f, 0x3a, + 0x35, 0x30, 0x2b, 0x2e, 0x21, 0x24, 0x00, 0x06, 0x0c, 0x0a, + 0x18, 0x1e, 0x14, 0x12, 0x30, 0x36, 0x3c, 0x3a, 0x28, 0x2e, + 0x24, 0x22, 0x60, 0x66, 0x6c, 0x6a, 0x78, 0x7e, 0x74, 0x72, + 0x50, 0x56, 0x5c, 0x5a, 0x48, 0x4e, 0x44, 0x42, 0xc0, 0xc6, + 0xcc, 0xca, 0xd8, 0xde, 0xd4, 0xd2, 0xf0, 0xf6, 0xfc, 0xfa, + 0xe8, 0xee, 0xe4, 0xe2, 0xa0, 0xa6, 0xac, 0xaa, 0xb8, 0xbe, + 0xb4, 0xb2, 0x90, 0x96, 0x9c, 0x9a, 0x88, 0x8e, 0x84, 0x82, + 0x9d, 0x9b, 0x91, 0x97, 0x85, 0x83, 0x89, 0x8f, 0xad, 0xab, + 0xa1, 0xa7, 0xb5, 0xb3, 0xb9, 0xbf, 0xfd, 0xfb, 0xf1, 0xf7, + 0xe5, 0xe3, 0xe9, 0xef, 0xcd, 0xcb, 0xc1, 0xc7, 0xd5, 0xd3, + 0xd9, 0xdf, 0x5d, 0x5b, 0x51, 0x57, 0x45, 0x43, 0x49, 0x4f, + 0x6d, 0x6b, 0x61, 0x67, 0x75, 0x73, 0x79, 0x7f, 0x3d, 0x3b, + 0x31, 0x37, 0x25, 0x23, 0x29, 0x2f, 0x0d, 0x0b, 0x01, 0x07, + 0x15, 0x13, 0x19, 0x1f, 0x27, 0x21, 0x2b, 0x2d, 0x3f, 0x39, + 0x33, 0x35, 0x17, 0x11, 0x1b, 0x1d, 0x0f, 0x09, 0x03, 0x05, + 0x47, 0x41, 0x4b, 0x4d, 0x5f, 0x59, 0x53, 0x55, 0x77, 0x71, + 0x7b, 0x7d, 0x6f, 0x69, 0x63, 0x65, 0xe7, 0xe1, 0xeb, 0xed, + 0xff, 0xf9, 0xf3, 0xf5, 0xd7, 0xd1, 0xdb, 0xdd, 0xcf, 0xc9, + 0xc3, 0xc5, 0x87, 0x81, 0x8b, 0x8d, 0x9f, 0x99, 0x93, 0x95, + 0xb7, 0xb1, 0xbb, 0xbd, 0xaf, 0xa9, 0xa3, 0xa5, 0xba, 0xbc, + 0xb6, 0xb0, 0xa2, 0xa4, 0xae, 0xa8, 0x8a, 0x8c, 0x86, 0x80, + 0x92, 0x94, 0x9e, 0x98, 0xda, 0xdc, 0xd6, 0xd0, 0xc2, 0xc4, + 0xce, 0xc8, 0xea, 0xec, 0xe6, 0xe0, 0xf2, 0xf4, 0xfe, 0xf8, + 0x7a, 0x7c, 0x76, 0x70, 0x62, 0x64, 0x6e, 0x68, 0x4a, 0x4c, + 0x46, 0x40, 0x52, 0x54, 0x5e, 0x58, 0x1a, 0x1c, 0x16, 0x10, + 0x02, 0x04, 0x0e, 0x08, 0x2a, 0x2c, 0x26, 0x20, 0x32, 0x34, + 0x3e, 0x38, 0x00, 0x07, 0x0e, 0x09, 0x1c, 0x1b, 0x12, 0x15, + 0x38, 0x3f, 0x36, 0x31, 0x24, 0x23, 0x2a, 0x2d, 0x70, 0x77, + 0x7e, 0x79, 0x6c, 0x6b, 0x62, 0x65, 0x48, 0x4f, 0x46, 0x41, + 0x54, 0x53, 0x5a, 0x5d, 0xe0, 0xe7, 0xee, 0xe9, 0xfc, 0xfb, + 0xf2, 0xf5, 0xd8, 0xdf, 0xd6, 0xd1, 0xc4, 0xc3, 0xca, 0xcd, + 0x90, 0x97, 0x9e, 0x99, 0x8c, 0x8b, 0x82, 0x85, 0xa8, 0xaf, + 0xa6, 0xa1, 0xb4, 0xb3, 0xba, 0xbd, 0xdd, 0xda, 0xd3, 0xd4, + 0xc1, 0xc6, 0xcf, 0xc8, 0xe5, 0xe2, 0xeb, 0xec, 0xf9, 0xfe, + 0xf7, 0xf0, 0xad, 0xaa, 0xa3, 0xa4, 0xb1, 0xb6, 0xbf, 0xb8, + 0x95, 0x92, 0x9b, 0x9c, 0x89, 0x8e, 0x87, 0x80, 0x3d, 0x3a, + 0x33, 0x34, 0x21, 0x26, 0x2f, 0x28, 0x05, 0x02, 0x0b, 0x0c, + 0x19, 0x1e, 0x17, 0x10, 0x4d, 0x4a, 0x43, 0x44, 0x51, 0x56, + 0x5f, 0x58, 0x75, 0x72, 0x7b, 0x7c, 0x69, 0x6e, 0x67, 0x60, + 0xa7, 0xa0, 0xa9, 0xae, 0xbb, 0xbc, 0xb5, 0xb2, 0x9f, 0x98, + 0x91, 0x96, 0x83, 0x84, 0x8d, 0x8a, 0xd7, 0xd0, 0xd9, 0xde, + 0xcb, 0xcc, 0xc5, 0xc2, 0xef, 0xe8, 0xe1, 0xe6, 0xf3, 0xf4, + 0xfd, 0xfa, 0x47, 0x40, 0x49, 0x4e, 0x5b, 0x5c, 0x55, 0x52, + 0x7f, 0x78, 0x71, 0x76, 0x63, 0x64, 0x6d, 0x6a, 0x37, 0x30, + 0x39, 0x3e, 0x2b, 0x2c, 0x25, 0x22, 0x0f, 0x08, 0x01, 0x06, + 0x13, 0x14, 0x1d, 0x1a, 0x7a, 0x7d, 0x74, 0x73, 0x66, 0x61, + 0x68, 0x6f, 0x42, 0x45, 0x4c, 0x4b, 0x5e, 0x59, 0x50, 0x57, + 0x0a, 0x0d, 0x04, 0x03, 0x16, 0x11, 0x18, 0x1f, 0x32, 0x35, + 0x3c, 0x3b, 0x2e, 0x29, 0x20, 0x27, 0x9a, 0x9d, 0x94, 0x93, + 0x86, 0x81, 0x88, 0x8f, 0xa2, 0xa5, 0xac, 0xab, 0xbe, 0xb9, + 0xb0, 0xb7, 0xea, 0xed, 0xe4, 0xe3, 0xf6, 0xf1, 0xf8, 0xff, + 0xd2, 0xd5, 0xdc, 0xdb, 0xce, 0xc9, 0xc0, 0xc7, 0x00, 0x08, + 0x10, 0x18, 0x20, 0x28, 0x30, 0x38, 0x40, 0x48, 0x50, 0x58, + 0x60, 0x68, 0x70, 0x78, 0x80, 0x88, 0x90, 0x98, 0xa0, 0xa8, + 0xb0, 0xb8, 0xc0, 0xc8, 0xd0, 0xd8, 0xe0, 0xe8, 0xf0, 0xf8, + 0x1d, 0x15, 0x0d, 0x05, 0x3d, 0x35, 0x2d, 0x25, 0x5d, 0x55, + 0x4d, 0x45, 0x7d, 0x75, 0x6d, 0x65, 0x9d, 0x95, 0x8d, 0x85, + 0xbd, 0xb5, 0xad, 0xa5, 0xdd, 0xd5, 0xcd, 0xc5, 0xfd, 0xf5, + 0xed, 0xe5, 0x3a, 0x32, 0x2a, 0x22, 0x1a, 0x12, 0x0a, 0x02, + 0x7a, 0x72, 0x6a, 0x62, 0x5a, 0x52, 0x4a, 0x42, 0xba, 0xb2, + 0xaa, 0xa2, 0x9a, 0x92, 0x8a, 0x82, 0xfa, 0xf2, 0xea, 0xe2, + 0xda, 0xd2, 0xca, 0xc2, 0x27, 0x2f, 0x37, 0x3f, 0x07, 0x0f, + 0x17, 0x1f, 0x67, 0x6f, 0x77, 0x7f, 0x47, 0x4f, 0x57, 0x5f, + 0xa7, 0xaf, 0xb7, 0xbf, 0x87, 0x8f, 0x97, 0x9f, 0xe7, 0xef, + 0xf7, 0xff, 0xc7, 0xcf, 0xd7, 0xdf, 0x74, 0x7c, 0x64, 0x6c, + 0x54, 0x5c, 0x44, 0x4c, 0x34, 0x3c, 0x24, 0x2c, 0x14, 0x1c, + 0x04, 0x0c, 0xf4, 0xfc, 0xe4, 0xec, 0xd4, 0xdc, 0xc4, 0xcc, + 0xb4, 0xbc, 0xa4, 0xac, 0x94, 0x9c, 0x84, 0x8c, 0x69, 0x61, + 0x79, 0x71, 0x49, 0x41, 0x59, 0x51, 0x29, 0x21, 0x39, 0x31, + 0x09, 0x01, 0x19, 0x11, 0xe9, 0xe1, 0xf9, 0xf1, 0xc9, 0xc1, + 0xd9, 0xd1, 0xa9, 0xa1, 0xb9, 0xb1, 0x89, 0x81, 0x99, 0x91, + 0x4e, 0x46, 0x5e, 0x56, 0x6e, 0x66, 0x7e, 0x76, 0x0e, 0x06, + 0x1e, 0x16, 0x2e, 0x26, 0x3e, 0x36, 0xce, 0xc6, 0xde, 0xd6, + 0xee, 0xe6, 0xfe, 0xf6, 0x8e, 0x86, 0x9e, 0x96, 0xae, 0xa6, + 0xbe, 0xb6, 0x53, 0x5b, 0x43, 0x4b, 0x73, 0x7b, 0x63, 0x6b, + 0x13, 0x1b, 0x03, 0x0b, 0x33, 0x3b, 0x23, 0x2b, 0xd3, 0xdb, + 0xc3, 0xcb, 0xf3, 0xfb, 0xe3, 0xeb, 0x93, 0x9b, 0x83, 0x8b, + 0xb3, 0xbb, 0xa3, 0xab, 0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, + 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77, + 0x90, 0x99, 0x82, 0x8b, 0xb4, 0xbd, 0xa6, 0xaf, 0xd8, 0xd1, + 0xca, 0xc3, 0xfc, 0xf5, 0xee, 0xe7, 0x3d, 0x34, 0x2f, 0x26, + 0x19, 0x10, 0x0b, 0x02, 0x75, 0x7c, 0x67, 0x6e, 0x51, 0x58, + 0x43, 0x4a, 0xad, 0xa4, 0xbf, 0xb6, 0x89, 0x80, 0x9b, 0x92, + 0xe5, 0xec, 0xf7, 0xfe, 0xc1, 0xc8, 0xd3, 0xda, 0x7a, 0x73, + 0x68, 0x61, 0x5e, 0x57, 0x4c, 0x45, 0x32, 0x3b, 0x20, 0x29, + 0x16, 0x1f, 0x04, 0x0d, 0xea, 0xe3, 0xf8, 0xf1, 0xce, 0xc7, + 0xdc, 0xd5, 0xa2, 0xab, 0xb0, 0xb9, 0x86, 0x8f, 0x94, 0x9d, + 0x47, 0x4e, 0x55, 0x5c, 0x63, 0x6a, 0x71, 0x78, 0x0f, 0x06, + 0x1d, 0x14, 0x2b, 0x22, 0x39, 0x30, 0xd7, 0xde, 0xc5, 0xcc, + 0xf3, 0xfa, 0xe1, 0xe8, 0x9f, 0x96, 0x8d, 0x84, 0xbb, 0xb2, + 0xa9, 0xa0, 0xf4, 0xfd, 0xe6, 0xef, 0xd0, 0xd9, 0xc2, 0xcb, + 0xbc, 0xb5, 0xae, 0xa7, 0x98, 0x91, 0x8a, 0x83, 0x64, 0x6d, + 0x76, 0x7f, 0x40, 0x49, 0x52, 0x5b, 0x2c, 0x25, 0x3e, 0x37, + 0x08, 0x01, 0x1a, 0x13, 0xc9, 0xc0, 0xdb, 0xd2, 0xed, 0xe4, + 0xff, 0xf6, 0x81, 0x88, 0x93, 0x9a, 0xa5, 0xac, 0xb7, 0xbe, + 0x59, 0x50, 0x4b, 0x42, 0x7d, 0x74, 0x6f, 0x66, 0x11, 0x18, + 0x03, 0x0a, 0x35, 0x3c, 0x27, 0x2e, 0x8e, 0x87, 0x9c, 0x95, + 0xaa, 0xa3, 0xb8, 0xb1, 0xc6, 0xcf, 0xd4, 0xdd, 0xe2, 0xeb, + 0xf0, 0xf9, 0x1e, 0x17, 0x0c, 0x05, 0x3a, 0x33, 0x28, 0x21, + 0x56, 0x5f, 0x44, 0x4d, 0x72, 0x7b, 0x60, 0x69, 0xb3, 0xba, + 0xa1, 0xa8, 0x97, 0x9e, 0x85, 0x8c, 0xfb, 0xf2, 0xe9, 0xe0, + 0xdf, 0xd6, 0xcd, 0xc4, 0x23, 0x2a, 0x31, 0x38, 0x07, 0x0e, + 0x15, 0x1c, 0x6b, 0x62, 0x79, 0x70, 0x4f, 0x46, 0x5d, 0x54, + 0x00, 0x0a, 0x14, 0x1e, 0x28, 0x22, 0x3c, 0x36, 0x50, 0x5a, + 0x44, 0x4e, 0x78, 0x72, 0x6c, 0x66, 0xa0, 0xaa, 0xb4, 0xbe, + 0x88, 0x82, 0x9c, 0x96, 0xf0, 0xfa, 0xe4, 0xee, 0xd8, 0xd2, + 0xcc, 0xc6, 0x5d, 0x57, 0x49, 0x43, 0x75, 0x7f, 0x61, 0x6b, + 0x0d, 0x07, 0x19, 0x13, 0x25, 0x2f, 0x31, 0x3b, 0xfd, 0xf7, + 0xe9, 0xe3, 0xd5, 0xdf, 0xc1, 0xcb, 0xad, 0xa7, 0xb9, 0xb3, + 0x85, 0x8f, 0x91, 0x9b, 0xba, 0xb0, 0xae, 0xa4, 0x92, 0x98, + 0x86, 0x8c, 0xea, 0xe0, 0xfe, 0xf4, 0xc2, 0xc8, 0xd6, 0xdc, + 0x1a, 0x10, 0x0e, 0x04, 0x32, 0x38, 0x26, 0x2c, 0x4a, 0x40, + 0x5e, 0x54, 0x62, 0x68, 0x76, 0x7c, 0xe7, 0xed, 0xf3, 0xf9, + 0xcf, 0xc5, 0xdb, 0xd1, 0xb7, 0xbd, 0xa3, 0xa9, 0x9f, 0x95, + 0x8b, 0x81, 0x47, 0x4d, 0x53, 0x59, 0x6f, 0x65, 0x7b, 0x71, + 0x17, 0x1d, 0x03, 0x09, 0x3f, 0x35, 0x2b, 0x21, 0x69, 0x63, + 0x7d, 0x77, 0x41, 0x4b, 0x55, 0x5f, 0x39, 0x33, 0x2d, 0x27, + 0x11, 0x1b, 0x05, 0x0f, 0xc9, 0xc3, 0xdd, 0xd7, 0xe1, 0xeb, + 0xf5, 0xff, 0x99, 0x93, 0x8d, 0x87, 0xb1, 0xbb, 0xa5, 0xaf, + 0x34, 0x3e, 0x20, 0x2a, 0x1c, 0x16, 0x08, 0x02, 0x64, 0x6e, + 0x70, 0x7a, 0x4c, 0x46, 0x58, 0x52, 0x94, 0x9e, 0x80, 0x8a, + 0xbc, 0xb6, 0xa8, 0xa2, 0xc4, 0xce, 0xd0, 0xda, 0xec, 0xe6, + 0xf8, 0xf2, 0xd3, 0xd9, 0xc7, 0xcd, 0xfb, 0xf1, 0xef, 0xe5, + 0x83, 0x89, 0x97, 0x9d, 0xab, 0xa1, 0xbf, 0xb5, 0x73, 0x79, + 0x67, 0x6d, 0x5b, 0x51, 0x4f, 0x45, 0x23, 0x29, 0x37, 0x3d, + 0x0b, 0x01, 0x1f, 0x15, 0x8e, 0x84, 0x9a, 0x90, 0xa6, 0xac, + 0xb2, 0xb8, 0xde, 0xd4, 0xca, 0xc0, 0xf6, 0xfc, 0xe2, 0xe8, + 0x2e, 0x24, 0x3a, 0x30, 0x06, 0x0c, 0x12, 0x18, 0x7e, 0x74, + 0x6a, 0x60, 0x56, 0x5c, 0x42, 0x48, 0x00, 0x0b, 0x16, 0x1d, + 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, + 0x62, 0x69, 0xb0, 0xbb, 0xa6, 0xad, 0x9c, 0x97, 0x8a, 0x81, + 0xe8, 0xe3, 0xfe, 0xf5, 0xc4, 0xcf, 0xd2, 0xd9, 0x7d, 0x76, + 0x6b, 0x60, 0x51, 0x5a, 0x47, 0x4c, 0x25, 0x2e, 0x33, 0x38, + 0x09, 0x02, 0x1f, 0x14, 0xcd, 0xc6, 0xdb, 0xd0, 0xe1, 0xea, + 0xf7, 0xfc, 0x95, 0x9e, 0x83, 0x88, 0xb9, 0xb2, 0xaf, 0xa4, + 0xfa, 0xf1, 0xec, 0xe7, 0xd6, 0xdd, 0xc0, 0xcb, 0xa2, 0xa9, + 0xb4, 0xbf, 0x8e, 0x85, 0x98, 0x93, 0x4a, 0x41, 0x5c, 0x57, + 0x66, 0x6d, 0x70, 0x7b, 0x12, 0x19, 0x04, 0x0f, 0x3e, 0x35, + 0x28, 0x23, 0x87, 0x8c, 0x91, 0x9a, 0xab, 0xa0, 0xbd, 0xb6, + 0xdf, 0xd4, 0xc9, 0xc2, 0xf3, 0xf8, 0xe5, 0xee, 0x37, 0x3c, + 0x21, 0x2a, 0x1b, 0x10, 0x0d, 0x06, 0x6f, 0x64, 0x79, 0x72, + 0x43, 0x48, 0x55, 0x5e, 0xe9, 0xe2, 0xff, 0xf4, 0xc5, 0xce, + 0xd3, 0xd8, 0xb1, 0xba, 0xa7, 0xac, 0x9d, 0x96, 0x8b, 0x80, + 0x59, 0x52, 0x4f, 0x44, 0x75, 0x7e, 0x63, 0x68, 0x01, 0x0a, + 0x17, 0x1c, 0x2d, 0x26, 0x3b, 0x30, 0x94, 0x9f, 0x82, 0x89, + 0xb8, 0xb3, 0xae, 0xa5, 0xcc, 0xc7, 0xda, 0xd1, 0xe0, 0xeb, + 0xf6, 0xfd, 0x24, 0x2f, 0x32, 0x39, 0x08, 0x03, 0x1e, 0x15, + 0x7c, 0x77, 0x6a, 0x61, 0x50, 0x5b, 0x46, 0x4d, 0x13, 0x18, + 0x05, 0x0e, 0x3f, 0x34, 0x29, 0x22, 0x4b, 0x40, 0x5d, 0x56, + 0x67, 0x6c, 0x71, 0x7a, 0xa3, 0xa8, 0xb5, 0xbe, 0x8f, 0x84, + 0x99, 0x92, 0xfb, 0xf0, 0xed, 0xe6, 0xd7, 0xdc, 0xc1, 0xca, + 0x6e, 0x65, 0x78, 0x73, 0x42, 0x49, 0x54, 0x5f, 0x36, 0x3d, + 0x20, 0x2b, 0x1a, 0x11, 0x0c, 0x07, 0xde, 0xd5, 0xc8, 0xc3, + 0xf2, 0xf9, 0xe4, 0xef, 0x86, 0x8d, 0x90, 0x9b, 0xaa, 0xa1, + 0xbc, 0xb7, 0x00, 0x0c, 0x18, 0x14, 0x30, 0x3c, 0x28, 0x24, + 0x60, 0x6c, 0x78, 0x74, 0x50, 0x5c, 0x48, 0x44, 0xc0, 0xcc, + 0xd8, 0xd4, 0xf0, 0xfc, 0xe8, 0xe4, 0xa0, 0xac, 0xb8, 0xb4, + 0x90, 0x9c, 0x88, 0x84, 0x9d, 0x91, 0x85, 0x89, 0xad, 0xa1, + 0xb5, 0xb9, 0xfd, 0xf1, 0xe5, 0xe9, 0xcd, 0xc1, 0xd5, 0xd9, + 0x5d, 0x51, 0x45, 0x49, 0x6d, 0x61, 0x75, 0x79, 0x3d, 0x31, + 0x25, 0x29, 0x0d, 0x01, 0x15, 0x19, 0x27, 0x2b, 0x3f, 0x33, + 0x17, 0x1b, 0x0f, 0x03, 0x47, 0x4b, 0x5f, 0x53, 0x77, 0x7b, + 0x6f, 0x63, 0xe7, 0xeb, 0xff, 0xf3, 0xd7, 0xdb, 0xcf, 0xc3, + 0x87, 0x8b, 0x9f, 0x93, 0xb7, 0xbb, 0xaf, 0xa3, 0xba, 0xb6, + 0xa2, 0xae, 0x8a, 0x86, 0x92, 0x9e, 0xda, 0xd6, 0xc2, 0xce, + 0xea, 0xe6, 0xf2, 0xfe, 0x7a, 0x76, 0x62, 0x6e, 0x4a, 0x46, + 0x52, 0x5e, 0x1a, 0x16, 0x02, 0x0e, 0x2a, 0x26, 0x32, 0x3e, + 0x4e, 0x42, 0x56, 0x5a, 0x7e, 0x72, 0x66, 0x6a, 0x2e, 0x22, + 0x36, 0x3a, 0x1e, 0x12, 0x06, 0x0a, 0x8e, 0x82, 0x96, 0x9a, + 0xbe, 0xb2, 0xa6, 0xaa, 0xee, 0xe2, 0xf6, 0xfa, 0xde, 0xd2, + 0xc6, 0xca, 0xd3, 0xdf, 0xcb, 0xc7, 0xe3, 0xef, 0xfb, 0xf7, + 0xb3, 0xbf, 0xab, 0xa7, 0x83, 0x8f, 0x9b, 0x97, 0x13, 0x1f, + 0x0b, 0x07, 0x23, 0x2f, 0x3b, 0x37, 0x73, 0x7f, 0x6b, 0x67, + 0x43, 0x4f, 0x5b, 0x57, 0x69, 0x65, 0x71, 0x7d, 0x59, 0x55, + 0x41, 0x4d, 0x09, 0x05, 0x11, 0x1d, 0x39, 0x35, 0x21, 0x2d, + 0xa9, 0xa5, 0xb1, 0xbd, 0x99, 0x95, 0x81, 0x8d, 0xc9, 0xc5, + 0xd1, 0xdd, 0xf9, 0xf5, 0xe1, 0xed, 0xf4, 0xf8, 0xec, 0xe0, + 0xc4, 0xc8, 0xdc, 0xd0, 0x94, 0x98, 0x8c, 0x80, 0xa4, 0xa8, + 0xbc, 0xb0, 0x34, 0x38, 0x2c, 0x20, 0x04, 0x08, 0x1c, 0x10, + 0x54, 0x58, 0x4c, 0x40, 0x64, 0x68, 0x7c, 0x70, 0x00, 0x0d, + 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, + 0x5c, 0x51, 0x46, 0x4b, 0xd0, 0xdd, 0xca, 0xc7, 0xe4, 0xe9, + 0xfe, 0xf3, 0xb8, 0xb5, 0xa2, 0xaf, 0x8c, 0x81, 0x96, 0x9b, + 0xbd, 0xb0, 0xa7, 0xaa, 0x89, 0x84, 0x93, 0x9e, 0xd5, 0xd8, + 0xcf, 0xc2, 0xe1, 0xec, 0xfb, 0xf6, 0x6d, 0x60, 0x77, 0x7a, + 0x59, 0x54, 0x43, 0x4e, 0x05, 0x08, 0x1f, 0x12, 0x31, 0x3c, + 0x2b, 0x26, 0x67, 0x6a, 0x7d, 0x70, 0x53, 0x5e, 0x49, 0x44, + 0x0f, 0x02, 0x15, 0x18, 0x3b, 0x36, 0x21, 0x2c, 0xb7, 0xba, + 0xad, 0xa0, 0x83, 0x8e, 0x99, 0x94, 0xdf, 0xd2, 0xc5, 0xc8, + 0xeb, 0xe6, 0xf1, 0xfc, 0xda, 0xd7, 0xc0, 0xcd, 0xee, 0xe3, + 0xf4, 0xf9, 0xb2, 0xbf, 0xa8, 0xa5, 0x86, 0x8b, 0x9c, 0x91, + 0x0a, 0x07, 0x10, 0x1d, 0x3e, 0x33, 0x24, 0x29, 0x62, 0x6f, + 0x78, 0x75, 0x56, 0x5b, 0x4c, 0x41, 0xce, 0xc3, 0xd4, 0xd9, + 0xfa, 0xf7, 0xe0, 0xed, 0xa6, 0xab, 0xbc, 0xb1, 0x92, 0x9f, + 0x88, 0x85, 0x1e, 0x13, 0x04, 0x09, 0x2a, 0x27, 0x30, 0x3d, + 0x76, 0x7b, 0x6c, 0x61, 0x42, 0x4f, 0x58, 0x55, 0x73, 0x7e, + 0x69, 0x64, 0x47, 0x4a, 0x5d, 0x50, 0x1b, 0x16, 0x01, 0x0c, + 0x2f, 0x22, 0x35, 0x38, 0xa3, 0xae, 0xb9, 0xb4, 0x97, 0x9a, + 0x8d, 0x80, 0xcb, 0xc6, 0xd1, 0xdc, 0xff, 0xf2, 0xe5, 0xe8, + 0xa9, 0xa4, 0xb3, 0xbe, 0x9d, 0x90, 0x87, 0x8a, 0xc1, 0xcc, + 0xdb, 0xd6, 0xf5, 0xf8, 0xef, 0xe2, 0x79, 0x74, 0x63, 0x6e, + 0x4d, 0x40, 0x57, 0x5a, 0x11, 0x1c, 0x0b, 0x06, 0x25, 0x28, + 0x3f, 0x32, 0x14, 0x19, 0x0e, 0x03, 0x20, 0x2d, 0x3a, 0x37, + 0x7c, 0x71, 0x66, 0x6b, 0x48, 0x45, 0x52, 0x5f, 0xc4, 0xc9, + 0xde, 0xd3, 0xf0, 0xfd, 0xea, 0xe7, 0xac, 0xa1, 0xb6, 0xbb, + 0x98, 0x95, 0x82, 0x8f, 0x00, 0x0e, 0x1c, 0x12, 0x38, 0x36, + 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a, + 0xe0, 0xee, 0xfc, 0xf2, 0xd8, 0xd6, 0xc4, 0xca, 0x90, 0x9e, + 0x8c, 0x82, 0xa8, 0xa6, 0xb4, 0xba, 0xdd, 0xd3, 0xc1, 0xcf, + 0xe5, 0xeb, 0xf9, 0xf7, 0xad, 0xa3, 0xb1, 0xbf, 0x95, 0x9b, + 0x89, 0x87, 0x3d, 0x33, 0x21, 0x2f, 0x05, 0x0b, 0x19, 0x17, + 0x4d, 0x43, 0x51, 0x5f, 0x75, 0x7b, 0x69, 0x67, 0xa7, 0xa9, + 0xbb, 0xb5, 0x9f, 0x91, 0x83, 0x8d, 0xd7, 0xd9, 0xcb, 0xc5, + 0xef, 0xe1, 0xf3, 0xfd, 0x47, 0x49, 0x5b, 0x55, 0x7f, 0x71, + 0x63, 0x6d, 0x37, 0x39, 0x2b, 0x25, 0x0f, 0x01, 0x13, 0x1d, + 0x7a, 0x74, 0x66, 0x68, 0x42, 0x4c, 0x5e, 0x50, 0x0a, 0x04, + 0x16, 0x18, 0x32, 0x3c, 0x2e, 0x20, 0x9a, 0x94, 0x86, 0x88, + 0xa2, 0xac, 0xbe, 0xb0, 0xea, 0xe4, 0xf6, 0xf8, 0xd2, 0xdc, + 0xce, 0xc0, 0x53, 0x5d, 0x4f, 0x41, 0x6b, 0x65, 0x77, 0x79, + 0x23, 0x2d, 0x3f, 0x31, 0x1b, 0x15, 0x07, 0x09, 0xb3, 0xbd, + 0xaf, 0xa1, 0x8b, 0x85, 0x97, 0x99, 0xc3, 0xcd, 0xdf, 0xd1, + 0xfb, 0xf5, 0xe7, 0xe9, 0x8e, 0x80, 0x92, 0x9c, 0xb6, 0xb8, + 0xaa, 0xa4, 0xfe, 0xf0, 0xe2, 0xec, 0xc6, 0xc8, 0xda, 0xd4, + 0x6e, 0x60, 0x72, 0x7c, 0x56, 0x58, 0x4a, 0x44, 0x1e, 0x10, + 0x02, 0x0c, 0x26, 0x28, 0x3a, 0x34, 0xf4, 0xfa, 0xe8, 0xe6, + 0xcc, 0xc2, 0xd0, 0xde, 0x84, 0x8a, 0x98, 0x96, 0xbc, 0xb2, + 0xa0, 0xae, 0x14, 0x1a, 0x08, 0x06, 0x2c, 0x22, 0x30, 0x3e, + 0x64, 0x6a, 0x78, 0x76, 0x5c, 0x52, 0x40, 0x4e, 0x29, 0x27, + 0x35, 0x3b, 0x11, 0x1f, 0x0d, 0x03, 0x59, 0x57, 0x45, 0x4b, + 0x61, 0x6f, 0x7d, 0x73, 0xc9, 0xc7, 0xd5, 0xdb, 0xf1, 0xff, + 0xed, 0xe3, 0xb9, 0xb7, 0xa5, 0xab, 0x81, 0x8f, 0x9d, 0x93, + 0x00, 0x0f, 0x1e, 0x11, 0x3c, 0x33, 0x22, 0x2d, 0x78, 0x77, + 0x66, 0x69, 0x44, 0x4b, 0x5a, 0x55, 0xf0, 0xff, 0xee, 0xe1, + 0xcc, 0xc3, 0xd2, 0xdd, 0x88, 0x87, 0x96, 0x99, 0xb4, 0xbb, + 0xaa, 0xa5, 0xfd, 0xf2, 0xe3, 0xec, 0xc1, 0xce, 0xdf, 0xd0, + 0x85, 0x8a, 0x9b, 0x94, 0xb9, 0xb6, 0xa7, 0xa8, 0x0d, 0x02, + 0x13, 0x1c, 0x31, 0x3e, 0x2f, 0x20, 0x75, 0x7a, 0x6b, 0x64, + 0x49, 0x46, 0x57, 0x58, 0xe7, 0xe8, 0xf9, 0xf6, 0xdb, 0xd4, + 0xc5, 0xca, 0x9f, 0x90, 0x81, 0x8e, 0xa3, 0xac, 0xbd, 0xb2, + 0x17, 0x18, 0x09, 0x06, 0x2b, 0x24, 0x35, 0x3a, 0x6f, 0x60, + 0x71, 0x7e, 0x53, 0x5c, 0x4d, 0x42, 0x1a, 0x15, 0x04, 0x0b, + 0x26, 0x29, 0x38, 0x37, 0x62, 0x6d, 0x7c, 0x73, 0x5e, 0x51, + 0x40, 0x4f, 0xea, 0xe5, 0xf4, 0xfb, 0xd6, 0xd9, 0xc8, 0xc7, + 0x92, 0x9d, 0x8c, 0x83, 0xae, 0xa1, 0xb0, 0xbf, 0xd3, 0xdc, + 0xcd, 0xc2, 0xef, 0xe0, 0xf1, 0xfe, 0xab, 0xa4, 0xb5, 0xba, + 0x97, 0x98, 0x89, 0x86, 0x23, 0x2c, 0x3d, 0x32, 0x1f, 0x10, + 0x01, 0x0e, 0x5b, 0x54, 0x45, 0x4a, 0x67, 0x68, 0x79, 0x76, + 0x2e, 0x21, 0x30, 0x3f, 0x12, 0x1d, 0x0c, 0x03, 0x56, 0x59, + 0x48, 0x47, 0x6a, 0x65, 0x74, 0x7b, 0xde, 0xd1, 0xc0, 0xcf, + 0xe2, 0xed, 0xfc, 0xf3, 0xa6, 0xa9, 0xb8, 0xb7, 0x9a, 0x95, + 0x84, 0x8b, 0x34, 0x3b, 0x2a, 0x25, 0x08, 0x07, 0x16, 0x19, + 0x4c, 0x43, 0x52, 0x5d, 0x70, 0x7f, 0x6e, 0x61, 0xc4, 0xcb, + 0xda, 0xd5, 0xf8, 0xf7, 0xe6, 0xe9, 0xbc, 0xb3, 0xa2, 0xad, + 0x80, 0x8f, 0x9e, 0x91, 0xc9, 0xc6, 0xd7, 0xd8, 0xf5, 0xfa, + 0xeb, 0xe4, 0xb1, 0xbe, 0xaf, 0xa0, 0x8d, 0x82, 0x93, 0x9c, + 0x39, 0x36, 0x27, 0x28, 0x05, 0x0a, 0x1b, 0x14, 0x41, 0x4e, + 0x5f, 0x50, 0x7d, 0x72, 0x63, 0x6c, 0x00, 0x10, 0x20, 0x30, + 0x40, 0x50, 0x60, 0x70, 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, + 0xe0, 0xf0, 0x1d, 0x0d, 0x3d, 0x2d, 0x5d, 0x4d, 0x7d, 0x6d, + 0x9d, 0x8d, 0xbd, 0xad, 0xdd, 0xcd, 0xfd, 0xed, 0x3a, 0x2a, + 0x1a, 0x0a, 0x7a, 0x6a, 0x5a, 0x4a, 0xba, 0xaa, 0x9a, 0x8a, + 0xfa, 0xea, 0xda, 0xca, 0x27, 0x37, 0x07, 0x17, 0x67, 0x77, + 0x47, 0x57, 0xa7, 0xb7, 0x87, 0x97, 0xe7, 0xf7, 0xc7, 0xd7, + 0x74, 0x64, 0x54, 0x44, 0x34, 0x24, 0x14, 0x04, 0xf4, 0xe4, + 0xd4, 0xc4, 0xb4, 0xa4, 0x94, 0x84, 0x69, 0x79, 0x49, 0x59, + 0x29, 0x39, 0x09, 0x19, 0xe9, 0xf9, 0xc9, 0xd9, 0xa9, 0xb9, + 0x89, 0x99, 0x4e, 0x5e, 0x6e, 0x7e, 0x0e, 0x1e, 0x2e, 0x3e, + 0xce, 0xde, 0xee, 0xfe, 0x8e, 0x9e, 0xae, 0xbe, 0x53, 0x43, + 0x73, 0x63, 0x13, 0x03, 0x33, 0x23, 0xd3, 0xc3, 0xf3, 0xe3, + 0x93, 0x83, 0xb3, 0xa3, 0xe8, 0xf8, 0xc8, 0xd8, 0xa8, 0xb8, + 0x88, 0x98, 0x68, 0x78, 0x48, 0x58, 0x28, 0x38, 0x08, 0x18, + 0xf5, 0xe5, 0xd5, 0xc5, 0xb5, 0xa5, 0x95, 0x85, 0x75, 0x65, + 0x55, 0x45, 0x35, 0x25, 0x15, 0x05, 0xd2, 0xc2, 0xf2, 0xe2, + 0x92, 0x82, 0xb2, 0xa2, 0x52, 0x42, 0x72, 0x62, 0x12, 0x02, + 0x32, 0x22, 0xcf, 0xdf, 0xef, 0xff, 0x8f, 0x9f, 0xaf, 0xbf, + 0x4f, 0x5f, 0x6f, 0x7f, 0x0f, 0x1f, 0x2f, 0x3f, 0x9c, 0x8c, + 0xbc, 0xac, 0xdc, 0xcc, 0xfc, 0xec, 0x1c, 0x0c, 0x3c, 0x2c, + 0x5c, 0x4c, 0x7c, 0x6c, 0x81, 0x91, 0xa1, 0xb1, 0xc1, 0xd1, + 0xe1, 0xf1, 0x01, 0x11, 0x21, 0x31, 0x41, 0x51, 0x61, 0x71, + 0xa6, 0xb6, 0x86, 0x96, 0xe6, 0xf6, 0xc6, 0xd6, 0x26, 0x36, + 0x06, 0x16, 0x66, 0x76, 0x46, 0x56, 0xbb, 0xab, 0x9b, 0x8b, + 0xfb, 0xeb, 0xdb, 0xcb, 0x3b, 0x2b, 0x1b, 0x0b, 0x7b, 0x6b, + 0x5b, 0x4b, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, + 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff, 0x0d, 0x1c, + 0x2f, 0x3e, 0x49, 0x58, 0x6b, 0x7a, 0x85, 0x94, 0xa7, 0xb6, + 0xc1, 0xd0, 0xe3, 0xf2, 0x1a, 0x0b, 0x38, 0x29, 0x5e, 0x4f, + 0x7c, 0x6d, 0x92, 0x83, 0xb0, 0xa1, 0xd6, 0xc7, 0xf4, 0xe5, + 0x17, 0x06, 0x35, 0x24, 0x53, 0x42, 0x71, 0x60, 0x9f, 0x8e, + 0xbd, 0xac, 0xdb, 0xca, 0xf9, 0xe8, 0x34, 0x25, 0x16, 0x07, + 0x70, 0x61, 0x52, 0x43, 0xbc, 0xad, 0x9e, 0x8f, 0xf8, 0xe9, + 0xda, 0xcb, 0x39, 0x28, 0x1b, 0x0a, 0x7d, 0x6c, 0x5f, 0x4e, + 0xb1, 0xa0, 0x93, 0x82, 0xf5, 0xe4, 0xd7, 0xc6, 0x2e, 0x3f, + 0x0c, 0x1d, 0x6a, 0x7b, 0x48, 0x59, 0xa6, 0xb7, 0x84, 0x95, + 0xe2, 0xf3, 0xc0, 0xd1, 0x23, 0x32, 0x01, 0x10, 0x67, 0x76, + 0x45, 0x54, 0xab, 0xba, 0x89, 0x98, 0xef, 0xfe, 0xcd, 0xdc, + 0x68, 0x79, 0x4a, 0x5b, 0x2c, 0x3d, 0x0e, 0x1f, 0xe0, 0xf1, + 0xc2, 0xd3, 0xa4, 0xb5, 0x86, 0x97, 0x65, 0x74, 0x47, 0x56, + 0x21, 0x30, 0x03, 0x12, 0xed, 0xfc, 0xcf, 0xde, 0xa9, 0xb8, + 0x8b, 0x9a, 0x72, 0x63, 0x50, 0x41, 0x36, 0x27, 0x14, 0x05, + 0xfa, 0xeb, 0xd8, 0xc9, 0xbe, 0xaf, 0x9c, 0x8d, 0x7f, 0x6e, + 0x5d, 0x4c, 0x3b, 0x2a, 0x19, 0x08, 0xf7, 0xe6, 0xd5, 0xc4, + 0xb3, 0xa2, 0x91, 0x80, 0x5c, 0x4d, 0x7e, 0x6f, 0x18, 0x09, + 0x3a, 0x2b, 0xd4, 0xc5, 0xf6, 0xe7, 0x90, 0x81, 0xb2, 0xa3, + 0x51, 0x40, 0x73, 0x62, 0x15, 0x04, 0x37, 0x26, 0xd9, 0xc8, + 0xfb, 0xea, 0x9d, 0x8c, 0xbf, 0xae, 0x46, 0x57, 0x64, 0x75, + 0x02, 0x13, 0x20, 0x31, 0xce, 0xdf, 0xec, 0xfd, 0x8a, 0x9b, + 0xa8, 0xb9, 0x4b, 0x5a, 0x69, 0x78, 0x0f, 0x1e, 0x2d, 0x3c, + 0xc3, 0xd2, 0xe1, 0xf0, 0x87, 0x96, 0xa5, 0xb4, 0x00, 0x12, + 0x24, 0x36, 0x48, 0x5a, 0x6c, 0x7e, 0x90, 0x82, 0xb4, 0xa6, + 0xd8, 0xca, 0xfc, 0xee, 0x3d, 0x2f, 0x19, 0x0b, 0x75, 0x67, + 0x51, 0x43, 0xad, 0xbf, 0x89, 0x9b, 0xe5, 0xf7, 0xc1, 0xd3, + 0x7a, 0x68, 0x5e, 0x4c, 0x32, 0x20, 0x16, 0x04, 0xea, 0xf8, + 0xce, 0xdc, 0xa2, 0xb0, 0x86, 0x94, 0x47, 0x55, 0x63, 0x71, + 0x0f, 0x1d, 0x2b, 0x39, 0xd7, 0xc5, 0xf3, 0xe1, 0x9f, 0x8d, + 0xbb, 0xa9, 0xf4, 0xe6, 0xd0, 0xc2, 0xbc, 0xae, 0x98, 0x8a, + 0x64, 0x76, 0x40, 0x52, 0x2c, 0x3e, 0x08, 0x1a, 0xc9, 0xdb, + 0xed, 0xff, 0x81, 0x93, 0xa5, 0xb7, 0x59, 0x4b, 0x7d, 0x6f, + 0x11, 0x03, 0x35, 0x27, 0x8e, 0x9c, 0xaa, 0xb8, 0xc6, 0xd4, + 0xe2, 0xf0, 0x1e, 0x0c, 0x3a, 0x28, 0x56, 0x44, 0x72, 0x60, + 0xb3, 0xa1, 0x97, 0x85, 0xfb, 0xe9, 0xdf, 0xcd, 0x23, 0x31, + 0x07, 0x15, 0x6b, 0x79, 0x4f, 0x5d, 0xf5, 0xe7, 0xd1, 0xc3, + 0xbd, 0xaf, 0x99, 0x8b, 0x65, 0x77, 0x41, 0x53, 0x2d, 0x3f, + 0x09, 0x1b, 0xc8, 0xda, 0xec, 0xfe, 0x80, 0x92, 0xa4, 0xb6, + 0x58, 0x4a, 0x7c, 0x6e, 0x10, 0x02, 0x34, 0x26, 0x8f, 0x9d, + 0xab, 0xb9, 0xc7, 0xd5, 0xe3, 0xf1, 0x1f, 0x0d, 0x3b, 0x29, + 0x57, 0x45, 0x73, 0x61, 0xb2, 0xa0, 0x96, 0x84, 0xfa, 0xe8, + 0xde, 0xcc, 0x22, 0x30, 0x06, 0x14, 0x6a, 0x78, 0x4e, 0x5c, + 0x01, 0x13, 0x25, 0x37, 0x49, 0x5b, 0x6d, 0x7f, 0x91, 0x83, + 0xb5, 0xa7, 0xd9, 0xcb, 0xfd, 0xef, 0x3c, 0x2e, 0x18, 0x0a, + 0x74, 0x66, 0x50, 0x42, 0xac, 0xbe, 0x88, 0x9a, 0xe4, 0xf6, + 0xc0, 0xd2, 0x7b, 0x69, 0x5f, 0x4d, 0x33, 0x21, 0x17, 0x05, + 0xeb, 0xf9, 0xcf, 0xdd, 0xa3, 0xb1, 0x87, 0x95, 0x46, 0x54, + 0x62, 0x70, 0x0e, 0x1c, 0x2a, 0x38, 0xd6, 0xc4, 0xf2, 0xe0, + 0x9e, 0x8c, 0xba, 0xa8, 0x00, 0x13, 0x26, 0x35, 0x4c, 0x5f, + 0x6a, 0x79, 0x98, 0x8b, 0xbe, 0xad, 0xd4, 0xc7, 0xf2, 0xe1, + 0x2d, 0x3e, 0x0b, 0x18, 0x61, 0x72, 0x47, 0x54, 0xb5, 0xa6, + 0x93, 0x80, 0xf9, 0xea, 0xdf, 0xcc, 0x5a, 0x49, 0x7c, 0x6f, + 0x16, 0x05, 0x30, 0x23, 0xc2, 0xd1, 0xe4, 0xf7, 0x8e, 0x9d, + 0xa8, 0xbb, 0x77, 0x64, 0x51, 0x42, 0x3b, 0x28, 0x1d, 0x0e, + 0xef, 0xfc, 0xc9, 0xda, 0xa3, 0xb0, 0x85, 0x96, 0xb4, 0xa7, + 0x92, 0x81, 0xf8, 0xeb, 0xde, 0xcd, 0x2c, 0x3f, 0x0a, 0x19, + 0x60, 0x73, 0x46, 0x55, 0x99, 0x8a, 0xbf, 0xac, 0xd5, 0xc6, + 0xf3, 0xe0, 0x01, 0x12, 0x27, 0x34, 0x4d, 0x5e, 0x6b, 0x78, + 0xee, 0xfd, 0xc8, 0xdb, 0xa2, 0xb1, 0x84, 0x97, 0x76, 0x65, + 0x50, 0x43, 0x3a, 0x29, 0x1c, 0x0f, 0xc3, 0xd0, 0xe5, 0xf6, + 0x8f, 0x9c, 0xa9, 0xba, 0x5b, 0x48, 0x7d, 0x6e, 0x17, 0x04, + 0x31, 0x22, 0x75, 0x66, 0x53, 0x40, 0x39, 0x2a, 0x1f, 0x0c, + 0xed, 0xfe, 0xcb, 0xd8, 0xa1, 0xb2, 0x87, 0x94, 0x58, 0x4b, + 0x7e, 0x6d, 0x14, 0x07, 0x32, 0x21, 0xc0, 0xd3, 0xe6, 0xf5, + 0x8c, 0x9f, 0xaa, 0xb9, 0x2f, 0x3c, 0x09, 0x1a, 0x63, 0x70, + 0x45, 0x56, 0xb7, 0xa4, 0x91, 0x82, 0xfb, 0xe8, 0xdd, 0xce, + 0x02, 0x11, 0x24, 0x37, 0x4e, 0x5d, 0x68, 0x7b, 0x9a, 0x89, + 0xbc, 0xaf, 0xd6, 0xc5, 0xf0, 0xe3, 0xc1, 0xd2, 0xe7, 0xf4, + 0x8d, 0x9e, 0xab, 0xb8, 0x59, 0x4a, 0x7f, 0x6c, 0x15, 0x06, + 0x33, 0x20, 0xec, 0xff, 0xca, 0xd9, 0xa0, 0xb3, 0x86, 0x95, + 0x74, 0x67, 0x52, 0x41, 0x38, 0x2b, 0x1e, 0x0d, 0x9b, 0x88, + 0xbd, 0xae, 0xd7, 0xc4, 0xf1, 0xe2, 0x03, 0x10, 0x25, 0x36, + 0x4f, 0x5c, 0x69, 0x7a, 0xb6, 0xa5, 0x90, 0x83, 0xfa, 0xe9, + 0xdc, 0xcf, 0x2e, 0x3d, 0x08, 0x1b, 0x62, 0x71, 0x44, 0x57, + 0x00, 0x14, 0x28, 0x3c, 0x50, 0x44, 0x78, 0x6c, 0xa0, 0xb4, + 0x88, 0x9c, 0xf0, 0xe4, 0xd8, 0xcc, 0x5d, 0x49, 0x75, 0x61, + 0x0d, 0x19, 0x25, 0x31, 0xfd, 0xe9, 0xd5, 0xc1, 0xad, 0xb9, + 0x85, 0x91, 0xba, 0xae, 0x92, 0x86, 0xea, 0xfe, 0xc2, 0xd6, + 0x1a, 0x0e, 0x32, 0x26, 0x4a, 0x5e, 0x62, 0x76, 0xe7, 0xf3, + 0xcf, 0xdb, 0xb7, 0xa3, 0x9f, 0x8b, 0x47, 0x53, 0x6f, 0x7b, + 0x17, 0x03, 0x3f, 0x2b, 0x69, 0x7d, 0x41, 0x55, 0x39, 0x2d, + 0x11, 0x05, 0xc9, 0xdd, 0xe1, 0xf5, 0x99, 0x8d, 0xb1, 0xa5, + 0x34, 0x20, 0x1c, 0x08, 0x64, 0x70, 0x4c, 0x58, 0x94, 0x80, + 0xbc, 0xa8, 0xc4, 0xd0, 0xec, 0xf8, 0xd3, 0xc7, 0xfb, 0xef, + 0x83, 0x97, 0xab, 0xbf, 0x73, 0x67, 0x5b, 0x4f, 0x23, 0x37, + 0x0b, 0x1f, 0x8e, 0x9a, 0xa6, 0xb2, 0xde, 0xca, 0xf6, 0xe2, + 0x2e, 0x3a, 0x06, 0x12, 0x7e, 0x6a, 0x56, 0x42, 0xd2, 0xc6, + 0xfa, 0xee, 0x82, 0x96, 0xaa, 0xbe, 0x72, 0x66, 0x5a, 0x4e, + 0x22, 0x36, 0x0a, 0x1e, 0x8f, 0x9b, 0xa7, 0xb3, 0xdf, 0xcb, + 0xf7, 0xe3, 0x2f, 0x3b, 0x07, 0x13, 0x7f, 0x6b, 0x57, 0x43, + 0x68, 0x7c, 0x40, 0x54, 0x38, 0x2c, 0x10, 0x04, 0xc8, 0xdc, + 0xe0, 0xf4, 0x98, 0x8c, 0xb0, 0xa4, 0x35, 0x21, 0x1d, 0x09, + 0x65, 0x71, 0x4d, 0x59, 0x95, 0x81, 0xbd, 0xa9, 0xc5, 0xd1, + 0xed, 0xf9, 0xbb, 0xaf, 0x93, 0x87, 0xeb, 0xff, 0xc3, 0xd7, + 0x1b, 0x0f, 0x33, 0x27, 0x4b, 0x5f, 0x63, 0x77, 0xe6, 0xf2, + 0xce, 0xda, 0xb6, 0xa2, 0x9e, 0x8a, 0x46, 0x52, 0x6e, 0x7a, + 0x16, 0x02, 0x3e, 0x2a, 0x01, 0x15, 0x29, 0x3d, 0x51, 0x45, + 0x79, 0x6d, 0xa1, 0xb5, 0x89, 0x9d, 0xf1, 0xe5, 0xd9, 0xcd, + 0x5c, 0x48, 0x74, 0x60, 0x0c, 0x18, 0x24, 0x30, 0xfc, 0xe8, + 0xd4, 0xc0, 0xac, 0xb8, 0x84, 0x90, 0x00, 0x15, 0x2a, 0x3f, + 0x54, 0x41, 0x7e, 0x6b, 0xa8, 0xbd, 0x82, 0x97, 0xfc, 0xe9, + 0xd6, 0xc3, 0x4d, 0x58, 0x67, 0x72, 0x19, 0x0c, 0x33, 0x26, + 0xe5, 0xf0, 0xcf, 0xda, 0xb1, 0xa4, 0x9b, 0x8e, 0x9a, 0x8f, + 0xb0, 0xa5, 0xce, 0xdb, 0xe4, 0xf1, 0x32, 0x27, 0x18, 0x0d, + 0x66, 0x73, 0x4c, 0x59, 0xd7, 0xc2, 0xfd, 0xe8, 0x83, 0x96, + 0xa9, 0xbc, 0x7f, 0x6a, 0x55, 0x40, 0x2b, 0x3e, 0x01, 0x14, + 0x29, 0x3c, 0x03, 0x16, 0x7d, 0x68, 0x57, 0x42, 0x81, 0x94, + 0xab, 0xbe, 0xd5, 0xc0, 0xff, 0xea, 0x64, 0x71, 0x4e, 0x5b, + 0x30, 0x25, 0x1a, 0x0f, 0xcc, 0xd9, 0xe6, 0xf3, 0x98, 0x8d, + 0xb2, 0xa7, 0xb3, 0xa6, 0x99, 0x8c, 0xe7, 0xf2, 0xcd, 0xd8, + 0x1b, 0x0e, 0x31, 0x24, 0x4f, 0x5a, 0x65, 0x70, 0xfe, 0xeb, + 0xd4, 0xc1, 0xaa, 0xbf, 0x80, 0x95, 0x56, 0x43, 0x7c, 0x69, + 0x02, 0x17, 0x28, 0x3d, 0x52, 0x47, 0x78, 0x6d, 0x06, 0x13, + 0x2c, 0x39, 0xfa, 0xef, 0xd0, 0xc5, 0xae, 0xbb, 0x84, 0x91, + 0x1f, 0x0a, 0x35, 0x20, 0x4b, 0x5e, 0x61, 0x74, 0xb7, 0xa2, + 0x9d, 0x88, 0xe3, 0xf6, 0xc9, 0xdc, 0xc8, 0xdd, 0xe2, 0xf7, + 0x9c, 0x89, 0xb6, 0xa3, 0x60, 0x75, 0x4a, 0x5f, 0x34, 0x21, + 0x1e, 0x0b, 0x85, 0x90, 0xaf, 0xba, 0xd1, 0xc4, 0xfb, 0xee, + 0x2d, 0x38, 0x07, 0x12, 0x79, 0x6c, 0x53, 0x46, 0x7b, 0x6e, + 0x51, 0x44, 0x2f, 0x3a, 0x05, 0x10, 0xd3, 0xc6, 0xf9, 0xec, + 0x87, 0x92, 0xad, 0xb8, 0x36, 0x23, 0x1c, 0x09, 0x62, 0x77, + 0x48, 0x5d, 0x9e, 0x8b, 0xb4, 0xa1, 0xca, 0xdf, 0xe0, 0xf5, + 0xe1, 0xf4, 0xcb, 0xde, 0xb5, 0xa0, 0x9f, 0x8a, 0x49, 0x5c, + 0x63, 0x76, 0x1d, 0x08, 0x37, 0x22, 0xac, 0xb9, 0x86, 0x93, + 0xf8, 0xed, 0xd2, 0xc7, 0x04, 0x11, 0x2e, 0x3b, 0x50, 0x45, + 0x7a, 0x6f, 0x00, 0x16, 0x2c, 0x3a, 0x58, 0x4e, 0x74, 0x62, + 0xb0, 0xa6, 0x9c, 0x8a, 0xe8, 0xfe, 0xc4, 0xd2, 0x7d, 0x6b, + 0x51, 0x47, 0x25, 0x33, 0x09, 0x1f, 0xcd, 0xdb, 0xe1, 0xf7, + 0x95, 0x83, 0xb9, 0xaf, 0xfa, 0xec, 0xd6, 0xc0, 0xa2, 0xb4, + 0x8e, 0x98, 0x4a, 0x5c, 0x66, 0x70, 0x12, 0x04, 0x3e, 0x28, + 0x87, 0x91, 0xab, 0xbd, 0xdf, 0xc9, 0xf3, 0xe5, 0x37, 0x21, + 0x1b, 0x0d, 0x6f, 0x79, 0x43, 0x55, 0xe9, 0xff, 0xc5, 0xd3, + 0xb1, 0xa7, 0x9d, 0x8b, 0x59, 0x4f, 0x75, 0x63, 0x01, 0x17, + 0x2d, 0x3b, 0x94, 0x82, 0xb8, 0xae, 0xcc, 0xda, 0xe0, 0xf6, + 0x24, 0x32, 0x08, 0x1e, 0x7c, 0x6a, 0x50, 0x46, 0x13, 0x05, + 0x3f, 0x29, 0x4b, 0x5d, 0x67, 0x71, 0xa3, 0xb5, 0x8f, 0x99, + 0xfb, 0xed, 0xd7, 0xc1, 0x6e, 0x78, 0x42, 0x54, 0x36, 0x20, + 0x1a, 0x0c, 0xde, 0xc8, 0xf2, 0xe4, 0x86, 0x90, 0xaa, 0xbc, + 0xcf, 0xd9, 0xe3, 0xf5, 0x97, 0x81, 0xbb, 0xad, 0x7f, 0x69, + 0x53, 0x45, 0x27, 0x31, 0x0b, 0x1d, 0xb2, 0xa4, 0x9e, 0x88, + 0xea, 0xfc, 0xc6, 0xd0, 0x02, 0x14, 0x2e, 0x38, 0x5a, 0x4c, + 0x76, 0x60, 0x35, 0x23, 0x19, 0x0f, 0x6d, 0x7b, 0x41, 0x57, + 0x85, 0x93, 0xa9, 0xbf, 0xdd, 0xcb, 0xf1, 0xe7, 0x48, 0x5e, + 0x64, 0x72, 0x10, 0x06, 0x3c, 0x2a, 0xf8, 0xee, 0xd4, 0xc2, + 0xa0, 0xb6, 0x8c, 0x9a, 0x26, 0x30, 0x0a, 0x1c, 0x7e, 0x68, + 0x52, 0x44, 0x96, 0x80, 0xba, 0xac, 0xce, 0xd8, 0xe2, 0xf4, + 0x5b, 0x4d, 0x77, 0x61, 0x03, 0x15, 0x2f, 0x39, 0xeb, 0xfd, + 0xc7, 0xd1, 0xb3, 0xa5, 0x9f, 0x89, 0xdc, 0xca, 0xf0, 0xe6, + 0x84, 0x92, 0xa8, 0xbe, 0x6c, 0x7a, 0x40, 0x56, 0x34, 0x22, + 0x18, 0x0e, 0xa1, 0xb7, 0x8d, 0x9b, 0xf9, 0xef, 0xd5, 0xc3, + 0x11, 0x07, 0x3d, 0x2b, 0x49, 0x5f, 0x65, 0x73, 0x00, 0x17, + 0x2e, 0x39, 0x5c, 0x4b, 0x72, 0x65, 0xb8, 0xaf, 0x96, 0x81, + 0xe4, 0xf3, 0xca, 0xdd, 0x6d, 0x7a, 0x43, 0x54, 0x31, 0x26, + 0x1f, 0x08, 0xd5, 0xc2, 0xfb, 0xec, 0x89, 0x9e, 0xa7, 0xb0, + 0xda, 0xcd, 0xf4, 0xe3, 0x86, 0x91, 0xa8, 0xbf, 0x62, 0x75, + 0x4c, 0x5b, 0x3e, 0x29, 0x10, 0x07, 0xb7, 0xa0, 0x99, 0x8e, + 0xeb, 0xfc, 0xc5, 0xd2, 0x0f, 0x18, 0x21, 0x36, 0x53, 0x44, + 0x7d, 0x6a, 0xa9, 0xbe, 0x87, 0x90, 0xf5, 0xe2, 0xdb, 0xcc, + 0x11, 0x06, 0x3f, 0x28, 0x4d, 0x5a, 0x63, 0x74, 0xc4, 0xd3, + 0xea, 0xfd, 0x98, 0x8f, 0xb6, 0xa1, 0x7c, 0x6b, 0x52, 0x45, + 0x20, 0x37, 0x0e, 0x19, 0x73, 0x64, 0x5d, 0x4a, 0x2f, 0x38, + 0x01, 0x16, 0xcb, 0xdc, 0xe5, 0xf2, 0x97, 0x80, 0xb9, 0xae, + 0x1e, 0x09, 0x30, 0x27, 0x42, 0x55, 0x6c, 0x7b, 0xa6, 0xb1, + 0x88, 0x9f, 0xfa, 0xed, 0xd4, 0xc3, 0x4f, 0x58, 0x61, 0x76, + 0x13, 0x04, 0x3d, 0x2a, 0xf7, 0xe0, 0xd9, 0xce, 0xab, 0xbc, + 0x85, 0x92, 0x22, 0x35, 0x0c, 0x1b, 0x7e, 0x69, 0x50, 0x47, + 0x9a, 0x8d, 0xb4, 0xa3, 0xc6, 0xd1, 0xe8, 0xff, 0x95, 0x82, + 0xbb, 0xac, 0xc9, 0xde, 0xe7, 0xf0, 0x2d, 0x3a, 0x03, 0x14, + 0x71, 0x66, 0x5f, 0x48, 0xf8, 0xef, 0xd6, 0xc1, 0xa4, 0xb3, + 0x8a, 0x9d, 0x40, 0x57, 0x6e, 0x79, 0x1c, 0x0b, 0x32, 0x25, + 0xe6, 0xf1, 0xc8, 0xdf, 0xba, 0xad, 0x94, 0x83, 0x5e, 0x49, + 0x70, 0x67, 0x02, 0x15, 0x2c, 0x3b, 0x8b, 0x9c, 0xa5, 0xb2, + 0xd7, 0xc0, 0xf9, 0xee, 0x33, 0x24, 0x1d, 0x0a, 0x6f, 0x78, + 0x41, 0x56, 0x3c, 0x2b, 0x12, 0x05, 0x60, 0x77, 0x4e, 0x59, + 0x84, 0x93, 0xaa, 0xbd, 0xd8, 0xcf, 0xf6, 0xe1, 0x51, 0x46, + 0x7f, 0x68, 0x0d, 0x1a, 0x23, 0x34, 0xe9, 0xfe, 0xc7, 0xd0, + 0xb5, 0xa2, 0x9b, 0x8c, 0x00, 0x18, 0x30, 0x28, 0x60, 0x78, + 0x50, 0x48, 0xc0, 0xd8, 0xf0, 0xe8, 0xa0, 0xb8, 0x90, 0x88, + 0x9d, 0x85, 0xad, 0xb5, 0xfd, 0xe5, 0xcd, 0xd5, 0x5d, 0x45, + 0x6d, 0x75, 0x3d, 0x25, 0x0d, 0x15, 0x27, 0x3f, 0x17, 0x0f, + 0x47, 0x5f, 0x77, 0x6f, 0xe7, 0xff, 0xd7, 0xcf, 0x87, 0x9f, + 0xb7, 0xaf, 0xba, 0xa2, 0x8a, 0x92, 0xda, 0xc2, 0xea, 0xf2, + 0x7a, 0x62, 0x4a, 0x52, 0x1a, 0x02, 0x2a, 0x32, 0x4e, 0x56, + 0x7e, 0x66, 0x2e, 0x36, 0x1e, 0x06, 0x8e, 0x96, 0xbe, 0xa6, + 0xee, 0xf6, 0xde, 0xc6, 0xd3, 0xcb, 0xe3, 0xfb, 0xb3, 0xab, + 0x83, 0x9b, 0x13, 0x0b, 0x23, 0x3b, 0x73, 0x6b, 0x43, 0x5b, + 0x69, 0x71, 0x59, 0x41, 0x09, 0x11, 0x39, 0x21, 0xa9, 0xb1, + 0x99, 0x81, 0xc9, 0xd1, 0xf9, 0xe1, 0xf4, 0xec, 0xc4, 0xdc, + 0x94, 0x8c, 0xa4, 0xbc, 0x34, 0x2c, 0x04, 0x1c, 0x54, 0x4c, + 0x64, 0x7c, 0x9c, 0x84, 0xac, 0xb4, 0xfc, 0xe4, 0xcc, 0xd4, + 0x5c, 0x44, 0x6c, 0x74, 0x3c, 0x24, 0x0c, 0x14, 0x01, 0x19, + 0x31, 0x29, 0x61, 0x79, 0x51, 0x49, 0xc1, 0xd9, 0xf1, 0xe9, + 0xa1, 0xb9, 0x91, 0x89, 0xbb, 0xa3, 0x8b, 0x93, 0xdb, 0xc3, + 0xeb, 0xf3, 0x7b, 0x63, 0x4b, 0x53, 0x1b, 0x03, 0x2b, 0x33, + 0x26, 0x3e, 0x16, 0x0e, 0x46, 0x5e, 0x76, 0x6e, 0xe6, 0xfe, + 0xd6, 0xce, 0x86, 0x9e, 0xb6, 0xae, 0xd2, 0xca, 0xe2, 0xfa, + 0xb2, 0xaa, 0x82, 0x9a, 0x12, 0x0a, 0x22, 0x3a, 0x72, 0x6a, + 0x42, 0x5a, 0x4f, 0x57, 0x7f, 0x67, 0x2f, 0x37, 0x1f, 0x07, + 0x8f, 0x97, 0xbf, 0xa7, 0xef, 0xf7, 0xdf, 0xc7, 0xf5, 0xed, + 0xc5, 0xdd, 0x95, 0x8d, 0xa5, 0xbd, 0x35, 0x2d, 0x05, 0x1d, + 0x55, 0x4d, 0x65, 0x7d, 0x68, 0x70, 0x58, 0x40, 0x08, 0x10, + 0x38, 0x20, 0xa8, 0xb0, 0x98, 0x80, 0xc8, 0xd0, 0xf8, 0xe0, + 0x00, 0x19, 0x32, 0x2b, 0x64, 0x7d, 0x56, 0x4f, 0xc8, 0xd1, + 0xfa, 0xe3, 0xac, 0xb5, 0x9e, 0x87, 0x8d, 0x94, 0xbf, 0xa6, + 0xe9, 0xf0, 0xdb, 0xc2, 0x45, 0x5c, 0x77, 0x6e, 0x21, 0x38, + 0x13, 0x0a, 0x07, 0x1e, 0x35, 0x2c, 0x63, 0x7a, 0x51, 0x48, + 0xcf, 0xd6, 0xfd, 0xe4, 0xab, 0xb2, 0x99, 0x80, 0x8a, 0x93, + 0xb8, 0xa1, 0xee, 0xf7, 0xdc, 0xc5, 0x42, 0x5b, 0x70, 0x69, + 0x26, 0x3f, 0x14, 0x0d, 0x0e, 0x17, 0x3c, 0x25, 0x6a, 0x73, + 0x58, 0x41, 0xc6, 0xdf, 0xf4, 0xed, 0xa2, 0xbb, 0x90, 0x89, + 0x83, 0x9a, 0xb1, 0xa8, 0xe7, 0xfe, 0xd5, 0xcc, 0x4b, 0x52, + 0x79, 0x60, 0x2f, 0x36, 0x1d, 0x04, 0x09, 0x10, 0x3b, 0x22, + 0x6d, 0x74, 0x5f, 0x46, 0xc1, 0xd8, 0xf3, 0xea, 0xa5, 0xbc, + 0x97, 0x8e, 0x84, 0x9d, 0xb6, 0xaf, 0xe0, 0xf9, 0xd2, 0xcb, + 0x4c, 0x55, 0x7e, 0x67, 0x28, 0x31, 0x1a, 0x03, 0x1c, 0x05, + 0x2e, 0x37, 0x78, 0x61, 0x4a, 0x53, 0xd4, 0xcd, 0xe6, 0xff, + 0xb0, 0xa9, 0x82, 0x9b, 0x91, 0x88, 0xa3, 0xba, 0xf5, 0xec, + 0xc7, 0xde, 0x59, 0x40, 0x6b, 0x72, 0x3d, 0x24, 0x0f, 0x16, + 0x1b, 0x02, 0x29, 0x30, 0x7f, 0x66, 0x4d, 0x54, 0xd3, 0xca, + 0xe1, 0xf8, 0xb7, 0xae, 0x85, 0x9c, 0x96, 0x8f, 0xa4, 0xbd, + 0xf2, 0xeb, 0xc0, 0xd9, 0x5e, 0x47, 0x6c, 0x75, 0x3a, 0x23, + 0x08, 0x11, 0x12, 0x0b, 0x20, 0x39, 0x76, 0x6f, 0x44, 0x5d, + 0xda, 0xc3, 0xe8, 0xf1, 0xbe, 0xa7, 0x8c, 0x95, 0x9f, 0x86, + 0xad, 0xb4, 0xfb, 0xe2, 0xc9, 0xd0, 0x57, 0x4e, 0x65, 0x7c, + 0x33, 0x2a, 0x01, 0x18, 0x15, 0x0c, 0x27, 0x3e, 0x71, 0x68, + 0x43, 0x5a, 0xdd, 0xc4, 0xef, 0xf6, 0xb9, 0xa0, 0x8b, 0x92, + 0x98, 0x81, 0xaa, 0xb3, 0xfc, 0xe5, 0xce, 0xd7, 0x50, 0x49, + 0x62, 0x7b, 0x34, 0x2d, 0x06, 0x1f, 0x00, 0x1a, 0x34, 0x2e, + 0x68, 0x72, 0x5c, 0x46, 0xd0, 0xca, 0xe4, 0xfe, 0xb8, 0xa2, + 0x8c, 0x96, 0xbd, 0xa7, 0x89, 0x93, 0xd5, 0xcf, 0xe1, 0xfb, + 0x6d, 0x77, 0x59, 0x43, 0x05, 0x1f, 0x31, 0x2b, 0x67, 0x7d, + 0x53, 0x49, 0x0f, 0x15, 0x3b, 0x21, 0xb7, 0xad, 0x83, 0x99, + 0xdf, 0xc5, 0xeb, 0xf1, 0xda, 0xc0, 0xee, 0xf4, 0xb2, 0xa8, + 0x86, 0x9c, 0x0a, 0x10, 0x3e, 0x24, 0x62, 0x78, 0x56, 0x4c, + 0xce, 0xd4, 0xfa, 0xe0, 0xa6, 0xbc, 0x92, 0x88, 0x1e, 0x04, + 0x2a, 0x30, 0x76, 0x6c, 0x42, 0x58, 0x73, 0x69, 0x47, 0x5d, + 0x1b, 0x01, 0x2f, 0x35, 0xa3, 0xb9, 0x97, 0x8d, 0xcb, 0xd1, + 0xff, 0xe5, 0xa9, 0xb3, 0x9d, 0x87, 0xc1, 0xdb, 0xf5, 0xef, + 0x79, 0x63, 0x4d, 0x57, 0x11, 0x0b, 0x25, 0x3f, 0x14, 0x0e, + 0x20, 0x3a, 0x7c, 0x66, 0x48, 0x52, 0xc4, 0xde, 0xf0, 0xea, + 0xac, 0xb6, 0x98, 0x82, 0x81, 0x9b, 0xb5, 0xaf, 0xe9, 0xf3, + 0xdd, 0xc7, 0x51, 0x4b, 0x65, 0x7f, 0x39, 0x23, 0x0d, 0x17, + 0x3c, 0x26, 0x08, 0x12, 0x54, 0x4e, 0x60, 0x7a, 0xec, 0xf6, + 0xd8, 0xc2, 0x84, 0x9e, 0xb0, 0xaa, 0xe6, 0xfc, 0xd2, 0xc8, + 0x8e, 0x94, 0xba, 0xa0, 0x36, 0x2c, 0x02, 0x18, 0x5e, 0x44, + 0x6a, 0x70, 0x5b, 0x41, 0x6f, 0x75, 0x33, 0x29, 0x07, 0x1d, + 0x8b, 0x91, 0xbf, 0xa5, 0xe3, 0xf9, 0xd7, 0xcd, 0x4f, 0x55, + 0x7b, 0x61, 0x27, 0x3d, 0x13, 0x09, 0x9f, 0x85, 0xab, 0xb1, + 0xf7, 0xed, 0xc3, 0xd9, 0xf2, 0xe8, 0xc6, 0xdc, 0x9a, 0x80, + 0xae, 0xb4, 0x22, 0x38, 0x16, 0x0c, 0x4a, 0x50, 0x7e, 0x64, + 0x28, 0x32, 0x1c, 0x06, 0x40, 0x5a, 0x74, 0x6e, 0xf8, 0xe2, + 0xcc, 0xd6, 0x90, 0x8a, 0xa4, 0xbe, 0x95, 0x8f, 0xa1, 0xbb, + 0xfd, 0xe7, 0xc9, 0xd3, 0x45, 0x5f, 0x71, 0x6b, 0x2d, 0x37, + 0x19, 0x03, 0x00, 0x1b, 0x36, 0x2d, 0x6c, 0x77, 0x5a, 0x41, + 0xd8, 0xc3, 0xee, 0xf5, 0xb4, 0xaf, 0x82, 0x99, 0xad, 0xb6, + 0x9b, 0x80, 0xc1, 0xda, 0xf7, 0xec, 0x75, 0x6e, 0x43, 0x58, + 0x19, 0x02, 0x2f, 0x34, 0x47, 0x5c, 0x71, 0x6a, 0x2b, 0x30, + 0x1d, 0x06, 0x9f, 0x84, 0xa9, 0xb2, 0xf3, 0xe8, 0xc5, 0xde, + 0xea, 0xf1, 0xdc, 0xc7, 0x86, 0x9d, 0xb0, 0xab, 0x32, 0x29, + 0x04, 0x1f, 0x5e, 0x45, 0x68, 0x73, 0x8e, 0x95, 0xb8, 0xa3, + 0xe2, 0xf9, 0xd4, 0xcf, 0x56, 0x4d, 0x60, 0x7b, 0x3a, 0x21, + 0x0c, 0x17, 0x23, 0x38, 0x15, 0x0e, 0x4f, 0x54, 0x79, 0x62, + 0xfb, 0xe0, 0xcd, 0xd6, 0x97, 0x8c, 0xa1, 0xba, 0xc9, 0xd2, + 0xff, 0xe4, 0xa5, 0xbe, 0x93, 0x88, 0x11, 0x0a, 0x27, 0x3c, + 0x7d, 0x66, 0x4b, 0x50, 0x64, 0x7f, 0x52, 0x49, 0x08, 0x13, + 0x3e, 0x25, 0xbc, 0xa7, 0x8a, 0x91, 0xd0, 0xcb, 0xe6, 0xfd, + 0x01, 0x1a, 0x37, 0x2c, 0x6d, 0x76, 0x5b, 0x40, 0xd9, 0xc2, + 0xef, 0xf4, 0xb5, 0xae, 0x83, 0x98, 0xac, 0xb7, 0x9a, 0x81, + 0xc0, 0xdb, 0xf6, 0xed, 0x74, 0x6f, 0x42, 0x59, 0x18, 0x03, + 0x2e, 0x35, 0x46, 0x5d, 0x70, 0x6b, 0x2a, 0x31, 0x1c, 0x07, + 0x9e, 0x85, 0xa8, 0xb3, 0xf2, 0xe9, 0xc4, 0xdf, 0xeb, 0xf0, + 0xdd, 0xc6, 0x87, 0x9c, 0xb1, 0xaa, 0x33, 0x28, 0x05, 0x1e, + 0x5f, 0x44, 0x69, 0x72, 0x8f, 0x94, 0xb9, 0xa2, 0xe3, 0xf8, + 0xd5, 0xce, 0x57, 0x4c, 0x61, 0x7a, 0x3b, 0x20, 0x0d, 0x16, + 0x22, 0x39, 0x14, 0x0f, 0x4e, 0x55, 0x78, 0x63, 0xfa, 0xe1, + 0xcc, 0xd7, 0x96, 0x8d, 0xa0, 0xbb, 0xc8, 0xd3, 0xfe, 0xe5, + 0xa4, 0xbf, 0x92, 0x89, 0x10, 0x0b, 0x26, 0x3d, 0x7c, 0x67, + 0x4a, 0x51, 0x65, 0x7e, 0x53, 0x48, 0x09, 0x12, 0x3f, 0x24, + 0xbd, 0xa6, 0x8b, 0x90, 0xd1, 0xca, 0xe7, 0xfc, 0x00, 0x1c, + 0x38, 0x24, 0x70, 0x6c, 0x48, 0x54, 0xe0, 0xfc, 0xd8, 0xc4, + 0x90, 0x8c, 0xa8, 0xb4, 0xdd, 0xc1, 0xe5, 0xf9, 0xad, 0xb1, + 0x95, 0x89, 0x3d, 0x21, 0x05, 0x19, 0x4d, 0x51, 0x75, 0x69, + 0xa7, 0xbb, 0x9f, 0x83, 0xd7, 0xcb, 0xef, 0xf3, 0x47, 0x5b, + 0x7f, 0x63, 0x37, 0x2b, 0x0f, 0x13, 0x7a, 0x66, 0x42, 0x5e, + 0x0a, 0x16, 0x32, 0x2e, 0x9a, 0x86, 0xa2, 0xbe, 0xea, 0xf6, + 0xd2, 0xce, 0x53, 0x4f, 0x6b, 0x77, 0x23, 0x3f, 0x1b, 0x07, + 0xb3, 0xaf, 0x8b, 0x97, 0xc3, 0xdf, 0xfb, 0xe7, 0x8e, 0x92, + 0xb6, 0xaa, 0xfe, 0xe2, 0xc6, 0xda, 0x6e, 0x72, 0x56, 0x4a, + 0x1e, 0x02, 0x26, 0x3a, 0xf4, 0xe8, 0xcc, 0xd0, 0x84, 0x98, + 0xbc, 0xa0, 0x14, 0x08, 0x2c, 0x30, 0x64, 0x78, 0x5c, 0x40, + 0x29, 0x35, 0x11, 0x0d, 0x59, 0x45, 0x61, 0x7d, 0xc9, 0xd5, + 0xf1, 0xed, 0xb9, 0xa5, 0x81, 0x9d, 0xa6, 0xba, 0x9e, 0x82, + 0xd6, 0xca, 0xee, 0xf2, 0x46, 0x5a, 0x7e, 0x62, 0x36, 0x2a, + 0x0e, 0x12, 0x7b, 0x67, 0x43, 0x5f, 0x0b, 0x17, 0x33, 0x2f, + 0x9b, 0x87, 0xa3, 0xbf, 0xeb, 0xf7, 0xd3, 0xcf, 0x01, 0x1d, + 0x39, 0x25, 0x71, 0x6d, 0x49, 0x55, 0xe1, 0xfd, 0xd9, 0xc5, + 0x91, 0x8d, 0xa9, 0xb5, 0xdc, 0xc0, 0xe4, 0xf8, 0xac, 0xb0, + 0x94, 0x88, 0x3c, 0x20, 0x04, 0x18, 0x4c, 0x50, 0x74, 0x68, + 0xf5, 0xe9, 0xcd, 0xd1, 0x85, 0x99, 0xbd, 0xa1, 0x15, 0x09, + 0x2d, 0x31, 0x65, 0x79, 0x5d, 0x41, 0x28, 0x34, 0x10, 0x0c, + 0x58, 0x44, 0x60, 0x7c, 0xc8, 0xd4, 0xf0, 0xec, 0xb8, 0xa4, + 0x80, 0x9c, 0x52, 0x4e, 0x6a, 0x76, 0x22, 0x3e, 0x1a, 0x06, + 0xb2, 0xae, 0x8a, 0x96, 0xc2, 0xde, 0xfa, 0xe6, 0x8f, 0x93, + 0xb7, 0xab, 0xff, 0xe3, 0xc7, 0xdb, 0x6f, 0x73, 0x57, 0x4b, + 0x1f, 0x03, 0x27, 0x3b, 0x00, 0x1d, 0x3a, 0x27, 0x74, 0x69, + 0x4e, 0x53, 0xe8, 0xf5, 0xd2, 0xcf, 0x9c, 0x81, 0xa6, 0xbb, + 0xcd, 0xd0, 0xf7, 0xea, 0xb9, 0xa4, 0x83, 0x9e, 0x25, 0x38, + 0x1f, 0x02, 0x51, 0x4c, 0x6b, 0x76, 0x87, 0x9a, 0xbd, 0xa0, + 0xf3, 0xee, 0xc9, 0xd4, 0x6f, 0x72, 0x55, 0x48, 0x1b, 0x06, + 0x21, 0x3c, 0x4a, 0x57, 0x70, 0x6d, 0x3e, 0x23, 0x04, 0x19, + 0xa2, 0xbf, 0x98, 0x85, 0xd6, 0xcb, 0xec, 0xf1, 0x13, 0x0e, + 0x29, 0x34, 0x67, 0x7a, 0x5d, 0x40, 0xfb, 0xe6, 0xc1, 0xdc, + 0x8f, 0x92, 0xb5, 0xa8, 0xde, 0xc3, 0xe4, 0xf9, 0xaa, 0xb7, + 0x90, 0x8d, 0x36, 0x2b, 0x0c, 0x11, 0x42, 0x5f, 0x78, 0x65, + 0x94, 0x89, 0xae, 0xb3, 0xe0, 0xfd, 0xda, 0xc7, 0x7c, 0x61, + 0x46, 0x5b, 0x08, 0x15, 0x32, 0x2f, 0x59, 0x44, 0x63, 0x7e, + 0x2d, 0x30, 0x17, 0x0a, 0xb1, 0xac, 0x8b, 0x96, 0xc5, 0xd8, + 0xff, 0xe2, 0x26, 0x3b, 0x1c, 0x01, 0x52, 0x4f, 0x68, 0x75, + 0xce, 0xd3, 0xf4, 0xe9, 0xba, 0xa7, 0x80, 0x9d, 0xeb, 0xf6, + 0xd1, 0xcc, 0x9f, 0x82, 0xa5, 0xb8, 0x03, 0x1e, 0x39, 0x24, + 0x77, 0x6a, 0x4d, 0x50, 0xa1, 0xbc, 0x9b, 0x86, 0xd5, 0xc8, + 0xef, 0xf2, 0x49, 0x54, 0x73, 0x6e, 0x3d, 0x20, 0x07, 0x1a, + 0x6c, 0x71, 0x56, 0x4b, 0x18, 0x05, 0x22, 0x3f, 0x84, 0x99, + 0xbe, 0xa3, 0xf0, 0xed, 0xca, 0xd7, 0x35, 0x28, 0x0f, 0x12, + 0x41, 0x5c, 0x7b, 0x66, 0xdd, 0xc0, 0xe7, 0xfa, 0xa9, 0xb4, + 0x93, 0x8e, 0xf8, 0xe5, 0xc2, 0xdf, 0x8c, 0x91, 0xb6, 0xab, + 0x10, 0x0d, 0x2a, 0x37, 0x64, 0x79, 0x5e, 0x43, 0xb2, 0xaf, + 0x88, 0x95, 0xc6, 0xdb, 0xfc, 0xe1, 0x5a, 0x47, 0x60, 0x7d, + 0x2e, 0x33, 0x14, 0x09, 0x7f, 0x62, 0x45, 0x58, 0x0b, 0x16, + 0x31, 0x2c, 0x97, 0x8a, 0xad, 0xb0, 0xe3, 0xfe, 0xd9, 0xc4, + 0x00, 0x1e, 0x3c, 0x22, 0x78, 0x66, 0x44, 0x5a, 0xf0, 0xee, + 0xcc, 0xd2, 0x88, 0x96, 0xb4, 0xaa, 0xfd, 0xe3, 0xc1, 0xdf, + 0x85, 0x9b, 0xb9, 0xa7, 0x0d, 0x13, 0x31, 0x2f, 0x75, 0x6b, + 0x49, 0x57, 0xe7, 0xf9, 0xdb, 0xc5, 0x9f, 0x81, 0xa3, 0xbd, + 0x17, 0x09, 0x2b, 0x35, 0x6f, 0x71, 0x53, 0x4d, 0x1a, 0x04, + 0x26, 0x38, 0x62, 0x7c, 0x5e, 0x40, 0xea, 0xf4, 0xd6, 0xc8, + 0x92, 0x8c, 0xae, 0xb0, 0xd3, 0xcd, 0xef, 0xf1, 0xab, 0xb5, + 0x97, 0x89, 0x23, 0x3d, 0x1f, 0x01, 0x5b, 0x45, 0x67, 0x79, + 0x2e, 0x30, 0x12, 0x0c, 0x56, 0x48, 0x6a, 0x74, 0xde, 0xc0, + 0xe2, 0xfc, 0xa6, 0xb8, 0x9a, 0x84, 0x34, 0x2a, 0x08, 0x16, + 0x4c, 0x52, 0x70, 0x6e, 0xc4, 0xda, 0xf8, 0xe6, 0xbc, 0xa2, + 0x80, 0x9e, 0xc9, 0xd7, 0xf5, 0xeb, 0xb1, 0xaf, 0x8d, 0x93, + 0x39, 0x27, 0x05, 0x1b, 0x41, 0x5f, 0x7d, 0x63, 0xbb, 0xa5, + 0x87, 0x99, 0xc3, 0xdd, 0xff, 0xe1, 0x4b, 0x55, 0x77, 0x69, + 0x33, 0x2d, 0x0f, 0x11, 0x46, 0x58, 0x7a, 0x64, 0x3e, 0x20, + 0x02, 0x1c, 0xb6, 0xa8, 0x8a, 0x94, 0xce, 0xd0, 0xf2, 0xec, + 0x5c, 0x42, 0x60, 0x7e, 0x24, 0x3a, 0x18, 0x06, 0xac, 0xb2, + 0x90, 0x8e, 0xd4, 0xca, 0xe8, 0xf6, 0xa1, 0xbf, 0x9d, 0x83, + 0xd9, 0xc7, 0xe5, 0xfb, 0x51, 0x4f, 0x6d, 0x73, 0x29, 0x37, + 0x15, 0x0b, 0x68, 0x76, 0x54, 0x4a, 0x10, 0x0e, 0x2c, 0x32, + 0x98, 0x86, 0xa4, 0xba, 0xe0, 0xfe, 0xdc, 0xc2, 0x95, 0x8b, + 0xa9, 0xb7, 0xed, 0xf3, 0xd1, 0xcf, 0x65, 0x7b, 0x59, 0x47, + 0x1d, 0x03, 0x21, 0x3f, 0x8f, 0x91, 0xb3, 0xad, 0xf7, 0xe9, + 0xcb, 0xd5, 0x7f, 0x61, 0x43, 0x5d, 0x07, 0x19, 0x3b, 0x25, + 0x72, 0x6c, 0x4e, 0x50, 0x0a, 0x14, 0x36, 0x28, 0x82, 0x9c, + 0xbe, 0xa0, 0xfa, 0xe4, 0xc6, 0xd8, 0x00, 0x1f, 0x3e, 0x21, + 0x7c, 0x63, 0x42, 0x5d, 0xf8, 0xe7, 0xc6, 0xd9, 0x84, 0x9b, + 0xba, 0xa5, 0xed, 0xf2, 0xd3, 0xcc, 0x91, 0x8e, 0xaf, 0xb0, + 0x15, 0x0a, 0x2b, 0x34, 0x69, 0x76, 0x57, 0x48, 0xc7, 0xd8, + 0xf9, 0xe6, 0xbb, 0xa4, 0x85, 0x9a, 0x3f, 0x20, 0x01, 0x1e, + 0x43, 0x5c, 0x7d, 0x62, 0x2a, 0x35, 0x14, 0x0b, 0x56, 0x49, + 0x68, 0x77, 0xd2, 0xcd, 0xec, 0xf3, 0xae, 0xb1, 0x90, 0x8f, + 0x93, 0x8c, 0xad, 0xb2, 0xef, 0xf0, 0xd1, 0xce, 0x6b, 0x74, + 0x55, 0x4a, 0x17, 0x08, 0x29, 0x36, 0x7e, 0x61, 0x40, 0x5f, + 0x02, 0x1d, 0x3c, 0x23, 0x86, 0x99, 0xb8, 0xa7, 0xfa, 0xe5, + 0xc4, 0xdb, 0x54, 0x4b, 0x6a, 0x75, 0x28, 0x37, 0x16, 0x09, + 0xac, 0xb3, 0x92, 0x8d, 0xd0, 0xcf, 0xee, 0xf1, 0xb9, 0xa6, + 0x87, 0x98, 0xc5, 0xda, 0xfb, 0xe4, 0x41, 0x5e, 0x7f, 0x60, + 0x3d, 0x22, 0x03, 0x1c, 0x3b, 0x24, 0x05, 0x1a, 0x47, 0x58, + 0x79, 0x66, 0xc3, 0xdc, 0xfd, 0xe2, 0xbf, 0xa0, 0x81, 0x9e, + 0xd6, 0xc9, 0xe8, 0xf7, 0xaa, 0xb5, 0x94, 0x8b, 0x2e, 0x31, + 0x10, 0x0f, 0x52, 0x4d, 0x6c, 0x73, 0xfc, 0xe3, 0xc2, 0xdd, + 0x80, 0x9f, 0xbe, 0xa1, 0x04, 0x1b, 0x3a, 0x25, 0x78, 0x67, + 0x46, 0x59, 0x11, 0x0e, 0x2f, 0x30, 0x6d, 0x72, 0x53, 0x4c, + 0xe9, 0xf6, 0xd7, 0xc8, 0x95, 0x8a, 0xab, 0xb4, 0xa8, 0xb7, + 0x96, 0x89, 0xd4, 0xcb, 0xea, 0xf5, 0x50, 0x4f, 0x6e, 0x71, + 0x2c, 0x33, 0x12, 0x0d, 0x45, 0x5a, 0x7b, 0x64, 0x39, 0x26, + 0x07, 0x18, 0xbd, 0xa2, 0x83, 0x9c, 0xc1, 0xde, 0xff, 0xe0, + 0x6f, 0x70, 0x51, 0x4e, 0x13, 0x0c, 0x2d, 0x32, 0x97, 0x88, + 0xa9, 0xb6, 0xeb, 0xf4, 0xd5, 0xca, 0x82, 0x9d, 0xbc, 0xa3, + 0xfe, 0xe1, 0xc0, 0xdf, 0x7a, 0x65, 0x44, 0x5b, 0x06, 0x19, + 0x38, 0x27, 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0, + 0x1d, 0x3d, 0x5d, 0x7d, 0x9d, 0xbd, 0xdd, 0xfd, 0x3a, 0x1a, + 0x7a, 0x5a, 0xba, 0x9a, 0xfa, 0xda, 0x27, 0x07, 0x67, 0x47, + 0xa7, 0x87, 0xe7, 0xc7, 0x74, 0x54, 0x34, 0x14, 0xf4, 0xd4, + 0xb4, 0x94, 0x69, 0x49, 0x29, 0x09, 0xe9, 0xc9, 0xa9, 0x89, + 0x4e, 0x6e, 0x0e, 0x2e, 0xce, 0xee, 0x8e, 0xae, 0x53, 0x73, + 0x13, 0x33, 0xd3, 0xf3, 0x93, 0xb3, 0xe8, 0xc8, 0xa8, 0x88, + 0x68, 0x48, 0x28, 0x08, 0xf5, 0xd5, 0xb5, 0x95, 0x75, 0x55, + 0x35, 0x15, 0xd2, 0xf2, 0x92, 0xb2, 0x52, 0x72, 0x12, 0x32, + 0xcf, 0xef, 0x8f, 0xaf, 0x4f, 0x6f, 0x0f, 0x2f, 0x9c, 0xbc, + 0xdc, 0xfc, 0x1c, 0x3c, 0x5c, 0x7c, 0x81, 0xa1, 0xc1, 0xe1, + 0x01, 0x21, 0x41, 0x61, 0xa6, 0x86, 0xe6, 0xc6, 0x26, 0x06, + 0x66, 0x46, 0xbb, 0x9b, 0xfb, 0xdb, 0x3b, 0x1b, 0x7b, 0x5b, + 0xcd, 0xed, 0x8d, 0xad, 0x4d, 0x6d, 0x0d, 0x2d, 0xd0, 0xf0, + 0x90, 0xb0, 0x50, 0x70, 0x10, 0x30, 0xf7, 0xd7, 0xb7, 0x97, + 0x77, 0x57, 0x37, 0x17, 0xea, 0xca, 0xaa, 0x8a, 0x6a, 0x4a, + 0x2a, 0x0a, 0xb9, 0x99, 0xf9, 0xd9, 0x39, 0x19, 0x79, 0x59, + 0xa4, 0x84, 0xe4, 0xc4, 0x24, 0x04, 0x64, 0x44, 0x83, 0xa3, + 0xc3, 0xe3, 0x03, 0x23, 0x43, 0x63, 0x9e, 0xbe, 0xde, 0xfe, + 0x1e, 0x3e, 0x5e, 0x7e, 0x25, 0x05, 0x65, 0x45, 0xa5, 0x85, + 0xe5, 0xc5, 0x38, 0x18, 0x78, 0x58, 0xb8, 0x98, 0xf8, 0xd8, + 0x1f, 0x3f, 0x5f, 0x7f, 0x9f, 0xbf, 0xdf, 0xff, 0x02, 0x22, + 0x42, 0x62, 0x82, 0xa2, 0xc2, 0xe2, 0x51, 0x71, 0x11, 0x31, + 0xd1, 0xf1, 0x91, 0xb1, 0x4c, 0x6c, 0x0c, 0x2c, 0xcc, 0xec, + 0x8c, 0xac, 0x6b, 0x4b, 0x2b, 0x0b, 0xeb, 0xcb, 0xab, 0x8b, + 0x76, 0x56, 0x36, 0x16, 0xf6, 0xd6, 0xb6, 0x96, 0x00, 0x21, + 0x42, 0x63, 0x84, 0xa5, 0xc6, 0xe7, 0x15, 0x34, 0x57, 0x76, + 0x91, 0xb0, 0xd3, 0xf2, 0x2a, 0x0b, 0x68, 0x49, 0xae, 0x8f, + 0xec, 0xcd, 0x3f, 0x1e, 0x7d, 0x5c, 0xbb, 0x9a, 0xf9, 0xd8, + 0x54, 0x75, 0x16, 0x37, 0xd0, 0xf1, 0x92, 0xb3, 0x41, 0x60, + 0x03, 0x22, 0xc5, 0xe4, 0x87, 0xa6, 0x7e, 0x5f, 0x3c, 0x1d, + 0xfa, 0xdb, 0xb8, 0x99, 0x6b, 0x4a, 0x29, 0x08, 0xef, 0xce, + 0xad, 0x8c, 0xa8, 0x89, 0xea, 0xcb, 0x2c, 0x0d, 0x6e, 0x4f, + 0xbd, 0x9c, 0xff, 0xde, 0x39, 0x18, 0x7b, 0x5a, 0x82, 0xa3, + 0xc0, 0xe1, 0x06, 0x27, 0x44, 0x65, 0x97, 0xb6, 0xd5, 0xf4, + 0x13, 0x32, 0x51, 0x70, 0xfc, 0xdd, 0xbe, 0x9f, 0x78, 0x59, + 0x3a, 0x1b, 0xe9, 0xc8, 0xab, 0x8a, 0x6d, 0x4c, 0x2f, 0x0e, + 0xd6, 0xf7, 0x94, 0xb5, 0x52, 0x73, 0x10, 0x31, 0xc3, 0xe2, + 0x81, 0xa0, 0x47, 0x66, 0x05, 0x24, 0x4d, 0x6c, 0x0f, 0x2e, + 0xc9, 0xe8, 0x8b, 0xaa, 0x58, 0x79, 0x1a, 0x3b, 0xdc, 0xfd, + 0x9e, 0xbf, 0x67, 0x46, 0x25, 0x04, 0xe3, 0xc2, 0xa1, 0x80, + 0x72, 0x53, 0x30, 0x11, 0xf6, 0xd7, 0xb4, 0x95, 0x19, 0x38, + 0x5b, 0x7a, 0x9d, 0xbc, 0xdf, 0xfe, 0x0c, 0x2d, 0x4e, 0x6f, + 0x88, 0xa9, 0xca, 0xeb, 0x33, 0x12, 0x71, 0x50, 0xb7, 0x96, + 0xf5, 0xd4, 0x26, 0x07, 0x64, 0x45, 0xa2, 0x83, 0xe0, 0xc1, + 0xe5, 0xc4, 0xa7, 0x86, 0x61, 0x40, 0x23, 0x02, 0xf0, 0xd1, + 0xb2, 0x93, 0x74, 0x55, 0x36, 0x17, 0xcf, 0xee, 0x8d, 0xac, + 0x4b, 0x6a, 0x09, 0x28, 0xda, 0xfb, 0x98, 0xb9, 0x5e, 0x7f, + 0x1c, 0x3d, 0xb1, 0x90, 0xf3, 0xd2, 0x35, 0x14, 0x77, 0x56, + 0xa4, 0x85, 0xe6, 0xc7, 0x20, 0x01, 0x62, 0x43, 0x9b, 0xba, + 0xd9, 0xf8, 0x1f, 0x3e, 0x5d, 0x7c, 0x8e, 0xaf, 0xcc, 0xed, + 0x0a, 0x2b, 0x48, 0x69, 0x00, 0x22, 0x44, 0x66, 0x88, 0xaa, + 0xcc, 0xee, 0x0d, 0x2f, 0x49, 0x6b, 0x85, 0xa7, 0xc1, 0xe3, + 0x1a, 0x38, 0x5e, 0x7c, 0x92, 0xb0, 0xd6, 0xf4, 0x17, 0x35, + 0x53, 0x71, 0x9f, 0xbd, 0xdb, 0xf9, 0x34, 0x16, 0x70, 0x52, + 0xbc, 0x9e, 0xf8, 0xda, 0x39, 0x1b, 0x7d, 0x5f, 0xb1, 0x93, + 0xf5, 0xd7, 0x2e, 0x0c, 0x6a, 0x48, 0xa6, 0x84, 0xe2, 0xc0, + 0x23, 0x01, 0x67, 0x45, 0xab, 0x89, 0xef, 0xcd, 0x68, 0x4a, + 0x2c, 0x0e, 0xe0, 0xc2, 0xa4, 0x86, 0x65, 0x47, 0x21, 0x03, + 0xed, 0xcf, 0xa9, 0x8b, 0x72, 0x50, 0x36, 0x14, 0xfa, 0xd8, + 0xbe, 0x9c, 0x7f, 0x5d, 0x3b, 0x19, 0xf7, 0xd5, 0xb3, 0x91, + 0x5c, 0x7e, 0x18, 0x3a, 0xd4, 0xf6, 0x90, 0xb2, 0x51, 0x73, + 0x15, 0x37, 0xd9, 0xfb, 0x9d, 0xbf, 0x46, 0x64, 0x02, 0x20, + 0xce, 0xec, 0x8a, 0xa8, 0x4b, 0x69, 0x0f, 0x2d, 0xc3, 0xe1, + 0x87, 0xa5, 0xd0, 0xf2, 0x94, 0xb6, 0x58, 0x7a, 0x1c, 0x3e, + 0xdd, 0xff, 0x99, 0xbb, 0x55, 0x77, 0x11, 0x33, 0xca, 0xe8, + 0x8e, 0xac, 0x42, 0x60, 0x06, 0x24, 0xc7, 0xe5, 0x83, 0xa1, + 0x4f, 0x6d, 0x0b, 0x29, 0xe4, 0xc6, 0xa0, 0x82, 0x6c, 0x4e, + 0x28, 0x0a, 0xe9, 0xcb, 0xad, 0x8f, 0x61, 0x43, 0x25, 0x07, + 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10, 0xf3, 0xd1, + 0xb7, 0x95, 0x7b, 0x59, 0x3f, 0x1d, 0xb8, 0x9a, 0xfc, 0xde, + 0x30, 0x12, 0x74, 0x56, 0xb5, 0x97, 0xf1, 0xd3, 0x3d, 0x1f, + 0x79, 0x5b, 0xa2, 0x80, 0xe6, 0xc4, 0x2a, 0x08, 0x6e, 0x4c, + 0xaf, 0x8d, 0xeb, 0xc9, 0x27, 0x05, 0x63, 0x41, 0x8c, 0xae, + 0xc8, 0xea, 0x04, 0x26, 0x40, 0x62, 0x81, 0xa3, 0xc5, 0xe7, + 0x09, 0x2b, 0x4d, 0x6f, 0x96, 0xb4, 0xd2, 0xf0, 0x1e, 0x3c, + 0x5a, 0x78, 0x9b, 0xb9, 0xdf, 0xfd, 0x13, 0x31, 0x57, 0x75, + 0x00, 0x23, 0x46, 0x65, 0x8c, 0xaf, 0xca, 0xe9, 0x05, 0x26, + 0x43, 0x60, 0x89, 0xaa, 0xcf, 0xec, 0x0a, 0x29, 0x4c, 0x6f, + 0x86, 0xa5, 0xc0, 0xe3, 0x0f, 0x2c, 0x49, 0x6a, 0x83, 0xa0, + 0xc5, 0xe6, 0x14, 0x37, 0x52, 0x71, 0x98, 0xbb, 0xde, 0xfd, + 0x11, 0x32, 0x57, 0x74, 0x9d, 0xbe, 0xdb, 0xf8, 0x1e, 0x3d, + 0x58, 0x7b, 0x92, 0xb1, 0xd4, 0xf7, 0x1b, 0x38, 0x5d, 0x7e, + 0x97, 0xb4, 0xd1, 0xf2, 0x28, 0x0b, 0x6e, 0x4d, 0xa4, 0x87, + 0xe2, 0xc1, 0x2d, 0x0e, 0x6b, 0x48, 0xa1, 0x82, 0xe7, 0xc4, + 0x22, 0x01, 0x64, 0x47, 0xae, 0x8d, 0xe8, 0xcb, 0x27, 0x04, + 0x61, 0x42, 0xab, 0x88, 0xed, 0xce, 0x3c, 0x1f, 0x7a, 0x59, + 0xb0, 0x93, 0xf6, 0xd5, 0x39, 0x1a, 0x7f, 0x5c, 0xb5, 0x96, + 0xf3, 0xd0, 0x36, 0x15, 0x70, 0x53, 0xba, 0x99, 0xfc, 0xdf, + 0x33, 0x10, 0x75, 0x56, 0xbf, 0x9c, 0xf9, 0xda, 0x50, 0x73, + 0x16, 0x35, 0xdc, 0xff, 0x9a, 0xb9, 0x55, 0x76, 0x13, 0x30, + 0xd9, 0xfa, 0x9f, 0xbc, 0x5a, 0x79, 0x1c, 0x3f, 0xd6, 0xf5, + 0x90, 0xb3, 0x5f, 0x7c, 0x19, 0x3a, 0xd3, 0xf0, 0x95, 0xb6, + 0x44, 0x67, 0x02, 0x21, 0xc8, 0xeb, 0x8e, 0xad, 0x41, 0x62, + 0x07, 0x24, 0xcd, 0xee, 0x8b, 0xa8, 0x4e, 0x6d, 0x08, 0x2b, + 0xc2, 0xe1, 0x84, 0xa7, 0x4b, 0x68, 0x0d, 0x2e, 0xc7, 0xe4, + 0x81, 0xa2, 0x78, 0x5b, 0x3e, 0x1d, 0xf4, 0xd7, 0xb2, 0x91, + 0x7d, 0x5e, 0x3b, 0x18, 0xf1, 0xd2, 0xb7, 0x94, 0x72, 0x51, + 0x34, 0x17, 0xfe, 0xdd, 0xb8, 0x9b, 0x77, 0x54, 0x31, 0x12, + 0xfb, 0xd8, 0xbd, 0x9e, 0x6c, 0x4f, 0x2a, 0x09, 0xe0, 0xc3, + 0xa6, 0x85, 0x69, 0x4a, 0x2f, 0x0c, 0xe5, 0xc6, 0xa3, 0x80, + 0x66, 0x45, 0x20, 0x03, 0xea, 0xc9, 0xac, 0x8f, 0x63, 0x40, + 0x25, 0x06, 0xef, 0xcc, 0xa9, 0x8a, 0x00, 0x24, 0x48, 0x6c, + 0x90, 0xb4, 0xd8, 0xfc, 0x3d, 0x19, 0x75, 0x51, 0xad, 0x89, + 0xe5, 0xc1, 0x7a, 0x5e, 0x32, 0x16, 0xea, 0xce, 0xa2, 0x86, + 0x47, 0x63, 0x0f, 0x2b, 0xd7, 0xf3, 0x9f, 0xbb, 0xf4, 0xd0, + 0xbc, 0x98, 0x64, 0x40, 0x2c, 0x08, 0xc9, 0xed, 0x81, 0xa5, + 0x59, 0x7d, 0x11, 0x35, 0x8e, 0xaa, 0xc6, 0xe2, 0x1e, 0x3a, + 0x56, 0x72, 0xb3, 0x97, 0xfb, 0xdf, 0x23, 0x07, 0x6b, 0x4f, + 0xf5, 0xd1, 0xbd, 0x99, 0x65, 0x41, 0x2d, 0x09, 0xc8, 0xec, + 0x80, 0xa4, 0x58, 0x7c, 0x10, 0x34, 0x8f, 0xab, 0xc7, 0xe3, + 0x1f, 0x3b, 0x57, 0x73, 0xb2, 0x96, 0xfa, 0xde, 0x22, 0x06, + 0x6a, 0x4e, 0x01, 0x25, 0x49, 0x6d, 0x91, 0xb5, 0xd9, 0xfd, + 0x3c, 0x18, 0x74, 0x50, 0xac, 0x88, 0xe4, 0xc0, 0x7b, 0x5f, + 0x33, 0x17, 0xeb, 0xcf, 0xa3, 0x87, 0x46, 0x62, 0x0e, 0x2a, + 0xd6, 0xf2, 0x9e, 0xba, 0xf7, 0xd3, 0xbf, 0x9b, 0x67, 0x43, + 0x2f, 0x0b, 0xca, 0xee, 0x82, 0xa6, 0x5a, 0x7e, 0x12, 0x36, + 0x8d, 0xa9, 0xc5, 0xe1, 0x1d, 0x39, 0x55, 0x71, 0xb0, 0x94, + 0xf8, 0xdc, 0x20, 0x04, 0x68, 0x4c, 0x03, 0x27, 0x4b, 0x6f, + 0x93, 0xb7, 0xdb, 0xff, 0x3e, 0x1a, 0x76, 0x52, 0xae, 0x8a, + 0xe6, 0xc2, 0x79, 0x5d, 0x31, 0x15, 0xe9, 0xcd, 0xa1, 0x85, + 0x44, 0x60, 0x0c, 0x28, 0xd4, 0xf0, 0x9c, 0xb8, 0x02, 0x26, + 0x4a, 0x6e, 0x92, 0xb6, 0xda, 0xfe, 0x3f, 0x1b, 0x77, 0x53, + 0xaf, 0x8b, 0xe7, 0xc3, 0x78, 0x5c, 0x30, 0x14, 0xe8, 0xcc, + 0xa0, 0x84, 0x45, 0x61, 0x0d, 0x29, 0xd5, 0xf1, 0x9d, 0xb9, + 0xf6, 0xd2, 0xbe, 0x9a, 0x66, 0x42, 0x2e, 0x0a, 0xcb, 0xef, + 0x83, 0xa7, 0x5b, 0x7f, 0x13, 0x37, 0x8c, 0xa8, 0xc4, 0xe0, + 0x1c, 0x38, 0x54, 0x70, 0xb1, 0x95, 0xf9, 0xdd, 0x21, 0x05, + 0x69, 0x4d, 0x00, 0x25, 0x4a, 0x6f, 0x94, 0xb1, 0xde, 0xfb, + 0x35, 0x10, 0x7f, 0x5a, 0xa1, 0x84, 0xeb, 0xce, 0x6a, 0x4f, + 0x20, 0x05, 0xfe, 0xdb, 0xb4, 0x91, 0x5f, 0x7a, 0x15, 0x30, + 0xcb, 0xee, 0x81, 0xa4, 0xd4, 0xf1, 0x9e, 0xbb, 0x40, 0x65, + 0x0a, 0x2f, 0xe1, 0xc4, 0xab, 0x8e, 0x75, 0x50, 0x3f, 0x1a, + 0xbe, 0x9b, 0xf4, 0xd1, 0x2a, 0x0f, 0x60, 0x45, 0x8b, 0xae, + 0xc1, 0xe4, 0x1f, 0x3a, 0x55, 0x70, 0xb5, 0x90, 0xff, 0xda, + 0x21, 0x04, 0x6b, 0x4e, 0x80, 0xa5, 0xca, 0xef, 0x14, 0x31, + 0x5e, 0x7b, 0xdf, 0xfa, 0x95, 0xb0, 0x4b, 0x6e, 0x01, 0x24, + 0xea, 0xcf, 0xa0, 0x85, 0x7e, 0x5b, 0x34, 0x11, 0x61, 0x44, + 0x2b, 0x0e, 0xf5, 0xd0, 0xbf, 0x9a, 0x54, 0x71, 0x1e, 0x3b, + 0xc0, 0xe5, 0x8a, 0xaf, 0x0b, 0x2e, 0x41, 0x64, 0x9f, 0xba, + 0xd5, 0xf0, 0x3e, 0x1b, 0x74, 0x51, 0xaa, 0x8f, 0xe0, 0xc5, + 0x77, 0x52, 0x3d, 0x18, 0xe3, 0xc6, 0xa9, 0x8c, 0x42, 0x67, + 0x08, 0x2d, 0xd6, 0xf3, 0x9c, 0xb9, 0x1d, 0x38, 0x57, 0x72, + 0x89, 0xac, 0xc3, 0xe6, 0x28, 0x0d, 0x62, 0x47, 0xbc, 0x99, + 0xf6, 0xd3, 0xa3, 0x86, 0xe9, 0xcc, 0x37, 0x12, 0x7d, 0x58, + 0x96, 0xb3, 0xdc, 0xf9, 0x02, 0x27, 0x48, 0x6d, 0xc9, 0xec, + 0x83, 0xa6, 0x5d, 0x78, 0x17, 0x32, 0xfc, 0xd9, 0xb6, 0x93, + 0x68, 0x4d, 0x22, 0x07, 0xc2, 0xe7, 0x88, 0xad, 0x56, 0x73, + 0x1c, 0x39, 0xf7, 0xd2, 0xbd, 0x98, 0x63, 0x46, 0x29, 0x0c, + 0xa8, 0x8d, 0xe2, 0xc7, 0x3c, 0x19, 0x76, 0x53, 0x9d, 0xb8, + 0xd7, 0xf2, 0x09, 0x2c, 0x43, 0x66, 0x16, 0x33, 0x5c, 0x79, + 0x82, 0xa7, 0xc8, 0xed, 0x23, 0x06, 0x69, 0x4c, 0xb7, 0x92, + 0xfd, 0xd8, 0x7c, 0x59, 0x36, 0x13, 0xe8, 0xcd, 0xa2, 0x87, + 0x49, 0x6c, 0x03, 0x26, 0xdd, 0xf8, 0x97, 0xb2, 0x00, 0x26, + 0x4c, 0x6a, 0x98, 0xbe, 0xd4, 0xf2, 0x2d, 0x0b, 0x61, 0x47, + 0xb5, 0x93, 0xf9, 0xdf, 0x5a, 0x7c, 0x16, 0x30, 0xc2, 0xe4, + 0x8e, 0xa8, 0x77, 0x51, 0x3b, 0x1d, 0xef, 0xc9, 0xa3, 0x85, + 0xb4, 0x92, 0xf8, 0xde, 0x2c, 0x0a, 0x60, 0x46, 0x99, 0xbf, + 0xd5, 0xf3, 0x01, 0x27, 0x4d, 0x6b, 0xee, 0xc8, 0xa2, 0x84, + 0x76, 0x50, 0x3a, 0x1c, 0xc3, 0xe5, 0x8f, 0xa9, 0x5b, 0x7d, + 0x17, 0x31, 0x75, 0x53, 0x39, 0x1f, 0xed, 0xcb, 0xa1, 0x87, + 0x58, 0x7e, 0x14, 0x32, 0xc0, 0xe6, 0x8c, 0xaa, 0x2f, 0x09, + 0x63, 0x45, 0xb7, 0x91, 0xfb, 0xdd, 0x02, 0x24, 0x4e, 0x68, + 0x9a, 0xbc, 0xd6, 0xf0, 0xc1, 0xe7, 0x8d, 0xab, 0x59, 0x7f, + 0x15, 0x33, 0xec, 0xca, 0xa0, 0x86, 0x74, 0x52, 0x38, 0x1e, + 0x9b, 0xbd, 0xd7, 0xf1, 0x03, 0x25, 0x4f, 0x69, 0xb6, 0x90, + 0xfa, 0xdc, 0x2e, 0x08, 0x62, 0x44, 0xea, 0xcc, 0xa6, 0x80, + 0x72, 0x54, 0x3e, 0x18, 0xc7, 0xe1, 0x8b, 0xad, 0x5f, 0x79, + 0x13, 0x35, 0xb0, 0x96, 0xfc, 0xda, 0x28, 0x0e, 0x64, 0x42, + 0x9d, 0xbb, 0xd1, 0xf7, 0x05, 0x23, 0x49, 0x6f, 0x5e, 0x78, + 0x12, 0x34, 0xc6, 0xe0, 0x8a, 0xac, 0x73, 0x55, 0x3f, 0x19, + 0xeb, 0xcd, 0xa7, 0x81, 0x04, 0x22, 0x48, 0x6e, 0x9c, 0xba, + 0xd0, 0xf6, 0x29, 0x0f, 0x65, 0x43, 0xb1, 0x97, 0xfd, 0xdb, + 0x9f, 0xb9, 0xd3, 0xf5, 0x07, 0x21, 0x4b, 0x6d, 0xb2, 0x94, + 0xfe, 0xd8, 0x2a, 0x0c, 0x66, 0x40, 0xc5, 0xe3, 0x89, 0xaf, + 0x5d, 0x7b, 0x11, 0x37, 0xe8, 0xce, 0xa4, 0x82, 0x70, 0x56, + 0x3c, 0x1a, 0x2b, 0x0d, 0x67, 0x41, 0xb3, 0x95, 0xff, 0xd9, + 0x06, 0x20, 0x4a, 0x6c, 0x9e, 0xb8, 0xd2, 0xf4, 0x71, 0x57, + 0x3d, 0x1b, 0xe9, 0xcf, 0xa5, 0x83, 0x5c, 0x7a, 0x10, 0x36, + 0xc4, 0xe2, 0x88, 0xae, 0x00, 0x27, 0x4e, 0x69, 0x9c, 0xbb, + 0xd2, 0xf5, 0x25, 0x02, 0x6b, 0x4c, 0xb9, 0x9e, 0xf7, 0xd0, + 0x4a, 0x6d, 0x04, 0x23, 0xd6, 0xf1, 0x98, 0xbf, 0x6f, 0x48, + 0x21, 0x06, 0xf3, 0xd4, 0xbd, 0x9a, 0x94, 0xb3, 0xda, 0xfd, + 0x08, 0x2f, 0x46, 0x61, 0xb1, 0x96, 0xff, 0xd8, 0x2d, 0x0a, + 0x63, 0x44, 0xde, 0xf9, 0x90, 0xb7, 0x42, 0x65, 0x0c, 0x2b, + 0xfb, 0xdc, 0xb5, 0x92, 0x67, 0x40, 0x29, 0x0e, 0x35, 0x12, + 0x7b, 0x5c, 0xa9, 0x8e, 0xe7, 0xc0, 0x10, 0x37, 0x5e, 0x79, + 0x8c, 0xab, 0xc2, 0xe5, 0x7f, 0x58, 0x31, 0x16, 0xe3, 0xc4, + 0xad, 0x8a, 0x5a, 0x7d, 0x14, 0x33, 0xc6, 0xe1, 0x88, 0xaf, + 0xa1, 0x86, 0xef, 0xc8, 0x3d, 0x1a, 0x73, 0x54, 0x84, 0xa3, + 0xca, 0xed, 0x18, 0x3f, 0x56, 0x71, 0xeb, 0xcc, 0xa5, 0x82, + 0x77, 0x50, 0x39, 0x1e, 0xce, 0xe9, 0x80, 0xa7, 0x52, 0x75, + 0x1c, 0x3b, 0x6a, 0x4d, 0x24, 0x03, 0xf6, 0xd1, 0xb8, 0x9f, + 0x4f, 0x68, 0x01, 0x26, 0xd3, 0xf4, 0x9d, 0xba, 0x20, 0x07, + 0x6e, 0x49, 0xbc, 0x9b, 0xf2, 0xd5, 0x05, 0x22, 0x4b, 0x6c, + 0x99, 0xbe, 0xd7, 0xf0, 0xfe, 0xd9, 0xb0, 0x97, 0x62, 0x45, + 0x2c, 0x0b, 0xdb, 0xfc, 0x95, 0xb2, 0x47, 0x60, 0x09, 0x2e, + 0xb4, 0x93, 0xfa, 0xdd, 0x28, 0x0f, 0x66, 0x41, 0x91, 0xb6, + 0xdf, 0xf8, 0x0d, 0x2a, 0x43, 0x64, 0x5f, 0x78, 0x11, 0x36, + 0xc3, 0xe4, 0x8d, 0xaa, 0x7a, 0x5d, 0x34, 0x13, 0xe6, 0xc1, + 0xa8, 0x8f, 0x15, 0x32, 0x5b, 0x7c, 0x89, 0xae, 0xc7, 0xe0, + 0x30, 0x17, 0x7e, 0x59, 0xac, 0x8b, 0xe2, 0xc5, 0xcb, 0xec, + 0x85, 0xa2, 0x57, 0x70, 0x19, 0x3e, 0xee, 0xc9, 0xa0, 0x87, + 0x72, 0x55, 0x3c, 0x1b, 0x81, 0xa6, 0xcf, 0xe8, 0x1d, 0x3a, + 0x53, 0x74, 0xa4, 0x83, 0xea, 0xcd, 0x38, 0x1f, 0x76, 0x51, + 0x00, 0x28, 0x50, 0x78, 0xa0, 0x88, 0xf0, 0xd8, 0x5d, 0x75, + 0x0d, 0x25, 0xfd, 0xd5, 0xad, 0x85, 0xba, 0x92, 0xea, 0xc2, + 0x1a, 0x32, 0x4a, 0x62, 0xe7, 0xcf, 0xb7, 0x9f, 0x47, 0x6f, + 0x17, 0x3f, 0x69, 0x41, 0x39, 0x11, 0xc9, 0xe1, 0x99, 0xb1, + 0x34, 0x1c, 0x64, 0x4c, 0x94, 0xbc, 0xc4, 0xec, 0xd3, 0xfb, + 0x83, 0xab, 0x73, 0x5b, 0x23, 0x0b, 0x8e, 0xa6, 0xde, 0xf6, + 0x2e, 0x06, 0x7e, 0x56, 0xd2, 0xfa, 0x82, 0xaa, 0x72, 0x5a, + 0x22, 0x0a, 0x8f, 0xa7, 0xdf, 0xf7, 0x2f, 0x07, 0x7f, 0x57, + 0x68, 0x40, 0x38, 0x10, 0xc8, 0xe0, 0x98, 0xb0, 0x35, 0x1d, + 0x65, 0x4d, 0x95, 0xbd, 0xc5, 0xed, 0xbb, 0x93, 0xeb, 0xc3, + 0x1b, 0x33, 0x4b, 0x63, 0xe6, 0xce, 0xb6, 0x9e, 0x46, 0x6e, + 0x16, 0x3e, 0x01, 0x29, 0x51, 0x79, 0xa1, 0x89, 0xf1, 0xd9, + 0x5c, 0x74, 0x0c, 0x24, 0xfc, 0xd4, 0xac, 0x84, 0xb9, 0x91, + 0xe9, 0xc1, 0x19, 0x31, 0x49, 0x61, 0xe4, 0xcc, 0xb4, 0x9c, + 0x44, 0x6c, 0x14, 0x3c, 0x03, 0x2b, 0x53, 0x7b, 0xa3, 0x8b, + 0xf3, 0xdb, 0x5e, 0x76, 0x0e, 0x26, 0xfe, 0xd6, 0xae, 0x86, + 0xd0, 0xf8, 0x80, 0xa8, 0x70, 0x58, 0x20, 0x08, 0x8d, 0xa5, + 0xdd, 0xf5, 0x2d, 0x05, 0x7d, 0x55, 0x6a, 0x42, 0x3a, 0x12, + 0xca, 0xe2, 0x9a, 0xb2, 0x37, 0x1f, 0x67, 0x4f, 0x97, 0xbf, + 0xc7, 0xef, 0x6b, 0x43, 0x3b, 0x13, 0xcb, 0xe3, 0x9b, 0xb3, + 0x36, 0x1e, 0x66, 0x4e, 0x96, 0xbe, 0xc6, 0xee, 0xd1, 0xf9, + 0x81, 0xa9, 0x71, 0x59, 0x21, 0x09, 0x8c, 0xa4, 0xdc, 0xf4, + 0x2c, 0x04, 0x7c, 0x54, 0x02, 0x2a, 0x52, 0x7a, 0xa2, 0x8a, + 0xf2, 0xda, 0x5f, 0x77, 0x0f, 0x27, 0xff, 0xd7, 0xaf, 0x87, + 0xb8, 0x90, 0xe8, 0xc0, 0x18, 0x30, 0x48, 0x60, 0xe5, 0xcd, + 0xb5, 0x9d, 0x45, 0x6d, 0x15, 0x3d, 0x00, 0x29, 0x52, 0x7b, + 0xa4, 0x8d, 0xf6, 0xdf, 0x55, 0x7c, 0x07, 0x2e, 0xf1, 0xd8, + 0xa3, 0x8a, 0xaa, 0x83, 0xf8, 0xd1, 0x0e, 0x27, 0x5c, 0x75, + 0xff, 0xd6, 0xad, 0x84, 0x5b, 0x72, 0x09, 0x20, 0x49, 0x60, + 0x1b, 0x32, 0xed, 0xc4, 0xbf, 0x96, 0x1c, 0x35, 0x4e, 0x67, + 0xb8, 0x91, 0xea, 0xc3, 0xe3, 0xca, 0xb1, 0x98, 0x47, 0x6e, + 0x15, 0x3c, 0xb6, 0x9f, 0xe4, 0xcd, 0x12, 0x3b, 0x40, 0x69, + 0x92, 0xbb, 0xc0, 0xe9, 0x36, 0x1f, 0x64, 0x4d, 0xc7, 0xee, + 0x95, 0xbc, 0x63, 0x4a, 0x31, 0x18, 0x38, 0x11, 0x6a, 0x43, + 0x9c, 0xb5, 0xce, 0xe7, 0x6d, 0x44, 0x3f, 0x16, 0xc9, 0xe0, + 0x9b, 0xb2, 0xdb, 0xf2, 0x89, 0xa0, 0x7f, 0x56, 0x2d, 0x04, + 0x8e, 0xa7, 0xdc, 0xf5, 0x2a, 0x03, 0x78, 0x51, 0x71, 0x58, + 0x23, 0x0a, 0xd5, 0xfc, 0x87, 0xae, 0x24, 0x0d, 0x76, 0x5f, + 0x80, 0xa9, 0xd2, 0xfb, 0x39, 0x10, 0x6b, 0x42, 0x9d, 0xb4, + 0xcf, 0xe6, 0x6c, 0x45, 0x3e, 0x17, 0xc8, 0xe1, 0x9a, 0xb3, + 0x93, 0xba, 0xc1, 0xe8, 0x37, 0x1e, 0x65, 0x4c, 0xc6, 0xef, + 0x94, 0xbd, 0x62, 0x4b, 0x30, 0x19, 0x70, 0x59, 0x22, 0x0b, + 0xd4, 0xfd, 0x86, 0xaf, 0x25, 0x0c, 0x77, 0x5e, 0x81, 0xa8, + 0xd3, 0xfa, 0xda, 0xf3, 0x88, 0xa1, 0x7e, 0x57, 0x2c, 0x05, + 0x8f, 0xa6, 0xdd, 0xf4, 0x2b, 0x02, 0x79, 0x50, 0xab, 0x82, + 0xf9, 0xd0, 0x0f, 0x26, 0x5d, 0x74, 0xfe, 0xd7, 0xac, 0x85, + 0x5a, 0x73, 0x08, 0x21, 0x01, 0x28, 0x53, 0x7a, 0xa5, 0x8c, + 0xf7, 0xde, 0x54, 0x7d, 0x06, 0x2f, 0xf0, 0xd9, 0xa2, 0x8b, + 0xe2, 0xcb, 0xb0, 0x99, 0x46, 0x6f, 0x14, 0x3d, 0xb7, 0x9e, + 0xe5, 0xcc, 0x13, 0x3a, 0x41, 0x68, 0x48, 0x61, 0x1a, 0x33, + 0xec, 0xc5, 0xbe, 0x97, 0x1d, 0x34, 0x4f, 0x66, 0xb9, 0x90, + 0xeb, 0xc2, 0x00, 0x2a, 0x54, 0x7e, 0xa8, 0x82, 0xfc, 0xd6, + 0x4d, 0x67, 0x19, 0x33, 0xe5, 0xcf, 0xb1, 0x9b, 0x9a, 0xb0, + 0xce, 0xe4, 0x32, 0x18, 0x66, 0x4c, 0xd7, 0xfd, 0x83, 0xa9, + 0x7f, 0x55, 0x2b, 0x01, 0x29, 0x03, 0x7d, 0x57, 0x81, 0xab, + 0xd5, 0xff, 0x64, 0x4e, 0x30, 0x1a, 0xcc, 0xe6, 0x98, 0xb2, + 0xb3, 0x99, 0xe7, 0xcd, 0x1b, 0x31, 0x4f, 0x65, 0xfe, 0xd4, + 0xaa, 0x80, 0x56, 0x7c, 0x02, 0x28, 0x52, 0x78, 0x06, 0x2c, + 0xfa, 0xd0, 0xae, 0x84, 0x1f, 0x35, 0x4b, 0x61, 0xb7, 0x9d, + 0xe3, 0xc9, 0xc8, 0xe2, 0x9c, 0xb6, 0x60, 0x4a, 0x34, 0x1e, + 0x85, 0xaf, 0xd1, 0xfb, 0x2d, 0x07, 0x79, 0x53, 0x7b, 0x51, + 0x2f, 0x05, 0xd3, 0xf9, 0x87, 0xad, 0x36, 0x1c, 0x62, 0x48, + 0x9e, 0xb4, 0xca, 0xe0, 0xe1, 0xcb, 0xb5, 0x9f, 0x49, 0x63, + 0x1d, 0x37, 0xac, 0x86, 0xf8, 0xd2, 0x04, 0x2e, 0x50, 0x7a, + 0xa4, 0x8e, 0xf0, 0xda, 0x0c, 0x26, 0x58, 0x72, 0xe9, 0xc3, + 0xbd, 0x97, 0x41, 0x6b, 0x15, 0x3f, 0x3e, 0x14, 0x6a, 0x40, + 0x96, 0xbc, 0xc2, 0xe8, 0x73, 0x59, 0x27, 0x0d, 0xdb, 0xf1, + 0x8f, 0xa5, 0x8d, 0xa7, 0xd9, 0xf3, 0x25, 0x0f, 0x71, 0x5b, + 0xc0, 0xea, 0x94, 0xbe, 0x68, 0x42, 0x3c, 0x16, 0x17, 0x3d, + 0x43, 0x69, 0xbf, 0x95, 0xeb, 0xc1, 0x5a, 0x70, 0x0e, 0x24, + 0xf2, 0xd8, 0xa6, 0x8c, 0xf6, 0xdc, 0xa2, 0x88, 0x5e, 0x74, + 0x0a, 0x20, 0xbb, 0x91, 0xef, 0xc5, 0x13, 0x39, 0x47, 0x6d, + 0x6c, 0x46, 0x38, 0x12, 0xc4, 0xee, 0x90, 0xba, 0x21, 0x0b, + 0x75, 0x5f, 0x89, 0xa3, 0xdd, 0xf7, 0xdf, 0xf5, 0x8b, 0xa1, + 0x77, 0x5d, 0x23, 0x09, 0x92, 0xb8, 0xc6, 0xec, 0x3a, 0x10, + 0x6e, 0x44, 0x45, 0x6f, 0x11, 0x3b, 0xed, 0xc7, 0xb9, 0x93, + 0x08, 0x22, 0x5c, 0x76, 0xa0, 0x8a, 0xf4, 0xde, 0x00, 0x2b, + 0x56, 0x7d, 0xac, 0x87, 0xfa, 0xd1, 0x45, 0x6e, 0x13, 0x38, + 0xe9, 0xc2, 0xbf, 0x94, 0x8a, 0xa1, 0xdc, 0xf7, 0x26, 0x0d, + 0x70, 0x5b, 0xcf, 0xe4, 0x99, 0xb2, 0x63, 0x48, 0x35, 0x1e, + 0x09, 0x22, 0x5f, 0x74, 0xa5, 0x8e, 0xf3, 0xd8, 0x4c, 0x67, + 0x1a, 0x31, 0xe0, 0xcb, 0xb6, 0x9d, 0x83, 0xa8, 0xd5, 0xfe, + 0x2f, 0x04, 0x79, 0x52, 0xc6, 0xed, 0x90, 0xbb, 0x6a, 0x41, + 0x3c, 0x17, 0x12, 0x39, 0x44, 0x6f, 0xbe, 0x95, 0xe8, 0xc3, + 0x57, 0x7c, 0x01, 0x2a, 0xfb, 0xd0, 0xad, 0x86, 0x98, 0xb3, + 0xce, 0xe5, 0x34, 0x1f, 0x62, 0x49, 0xdd, 0xf6, 0x8b, 0xa0, + 0x71, 0x5a, 0x27, 0x0c, 0x1b, 0x30, 0x4d, 0x66, 0xb7, 0x9c, + 0xe1, 0xca, 0x5e, 0x75, 0x08, 0x23, 0xf2, 0xd9, 0xa4, 0x8f, + 0x91, 0xba, 0xc7, 0xec, 0x3d, 0x16, 0x6b, 0x40, 0xd4, 0xff, + 0x82, 0xa9, 0x78, 0x53, 0x2e, 0x05, 0x24, 0x0f, 0x72, 0x59, + 0x88, 0xa3, 0xde, 0xf5, 0x61, 0x4a, 0x37, 0x1c, 0xcd, 0xe6, + 0x9b, 0xb0, 0xae, 0x85, 0xf8, 0xd3, 0x02, 0x29, 0x54, 0x7f, + 0xeb, 0xc0, 0xbd, 0x96, 0x47, 0x6c, 0x11, 0x3a, 0x2d, 0x06, + 0x7b, 0x50, 0x81, 0xaa, 0xd7, 0xfc, 0x68, 0x43, 0x3e, 0x15, + 0xc4, 0xef, 0x92, 0xb9, 0xa7, 0x8c, 0xf1, 0xda, 0x0b, 0x20, + 0x5d, 0x76, 0xe2, 0xc9, 0xb4, 0x9f, 0x4e, 0x65, 0x18, 0x33, + 0x36, 0x1d, 0x60, 0x4b, 0x9a, 0xb1, 0xcc, 0xe7, 0x73, 0x58, + 0x25, 0x0e, 0xdf, 0xf4, 0x89, 0xa2, 0xbc, 0x97, 0xea, 0xc1, + 0x10, 0x3b, 0x46, 0x6d, 0xf9, 0xd2, 0xaf, 0x84, 0x55, 0x7e, + 0x03, 0x28, 0x3f, 0x14, 0x69, 0x42, 0x93, 0xb8, 0xc5, 0xee, + 0x7a, 0x51, 0x2c, 0x07, 0xd6, 0xfd, 0x80, 0xab, 0xb5, 0x9e, + 0xe3, 0xc8, 0x19, 0x32, 0x4f, 0x64, 0xf0, 0xdb, 0xa6, 0x8d, + 0x5c, 0x77, 0x0a, 0x21, 0x00, 0x2c, 0x58, 0x74, 0xb0, 0x9c, + 0xe8, 0xc4, 0x7d, 0x51, 0x25, 0x09, 0xcd, 0xe1, 0x95, 0xb9, + 0xfa, 0xd6, 0xa2, 0x8e, 0x4a, 0x66, 0x12, 0x3e, 0x87, 0xab, + 0xdf, 0xf3, 0x37, 0x1b, 0x6f, 0x43, 0xe9, 0xc5, 0xb1, 0x9d, + 0x59, 0x75, 0x01, 0x2d, 0x94, 0xb8, 0xcc, 0xe0, 0x24, 0x08, + 0x7c, 0x50, 0x13, 0x3f, 0x4b, 0x67, 0xa3, 0x8f, 0xfb, 0xd7, + 0x6e, 0x42, 0x36, 0x1a, 0xde, 0xf2, 0x86, 0xaa, 0xcf, 0xe3, + 0x97, 0xbb, 0x7f, 0x53, 0x27, 0x0b, 0xb2, 0x9e, 0xea, 0xc6, + 0x02, 0x2e, 0x5a, 0x76, 0x35, 0x19, 0x6d, 0x41, 0x85, 0xa9, + 0xdd, 0xf1, 0x48, 0x64, 0x10, 0x3c, 0xf8, 0xd4, 0xa0, 0x8c, + 0x26, 0x0a, 0x7e, 0x52, 0x96, 0xba, 0xce, 0xe2, 0x5b, 0x77, + 0x03, 0x2f, 0xeb, 0xc7, 0xb3, 0x9f, 0xdc, 0xf0, 0x84, 0xa8, + 0x6c, 0x40, 0x34, 0x18, 0xa1, 0x8d, 0xf9, 0xd5, 0x11, 0x3d, + 0x49, 0x65, 0x83, 0xaf, 0xdb, 0xf7, 0x33, 0x1f, 0x6b, 0x47, + 0xfe, 0xd2, 0xa6, 0x8a, 0x4e, 0x62, 0x16, 0x3a, 0x79, 0x55, + 0x21, 0x0d, 0xc9, 0xe5, 0x91, 0xbd, 0x04, 0x28, 0x5c, 0x70, + 0xb4, 0x98, 0xec, 0xc0, 0x6a, 0x46, 0x32, 0x1e, 0xda, 0xf6, + 0x82, 0xae, 0x17, 0x3b, 0x4f, 0x63, 0xa7, 0x8b, 0xff, 0xd3, + 0x90, 0xbc, 0xc8, 0xe4, 0x20, 0x0c, 0x78, 0x54, 0xed, 0xc1, + 0xb5, 0x99, 0x5d, 0x71, 0x05, 0x29, 0x4c, 0x60, 0x14, 0x38, + 0xfc, 0xd0, 0xa4, 0x88, 0x31, 0x1d, 0x69, 0x45, 0x81, 0xad, + 0xd9, 0xf5, 0xb6, 0x9a, 0xee, 0xc2, 0x06, 0x2a, 0x5e, 0x72, + 0xcb, 0xe7, 0x93, 0xbf, 0x7b, 0x57, 0x23, 0x0f, 0xa5, 0x89, + 0xfd, 0xd1, 0x15, 0x39, 0x4d, 0x61, 0xd8, 0xf4, 0x80, 0xac, + 0x68, 0x44, 0x30, 0x1c, 0x5f, 0x73, 0x07, 0x2b, 0xef, 0xc3, + 0xb7, 0x9b, 0x22, 0x0e, 0x7a, 0x56, 0x92, 0xbe, 0xca, 0xe6, + 0x00, 0x2d, 0x5a, 0x77, 0xb4, 0x99, 0xee, 0xc3, 0x75, 0x58, + 0x2f, 0x02, 0xc1, 0xec, 0x9b, 0xb6, 0xea, 0xc7, 0xb0, 0x9d, + 0x5e, 0x73, 0x04, 0x29, 0x9f, 0xb2, 0xc5, 0xe8, 0x2b, 0x06, + 0x71, 0x5c, 0xc9, 0xe4, 0x93, 0xbe, 0x7d, 0x50, 0x27, 0x0a, + 0xbc, 0x91, 0xe6, 0xcb, 0x08, 0x25, 0x52, 0x7f, 0x23, 0x0e, + 0x79, 0x54, 0x97, 0xba, 0xcd, 0xe0, 0x56, 0x7b, 0x0c, 0x21, + 0xe2, 0xcf, 0xb8, 0x95, 0x8f, 0xa2, 0xd5, 0xf8, 0x3b, 0x16, + 0x61, 0x4c, 0xfa, 0xd7, 0xa0, 0x8d, 0x4e, 0x63, 0x14, 0x39, + 0x65, 0x48, 0x3f, 0x12, 0xd1, 0xfc, 0x8b, 0xa6, 0x10, 0x3d, + 0x4a, 0x67, 0xa4, 0x89, 0xfe, 0xd3, 0x46, 0x6b, 0x1c, 0x31, + 0xf2, 0xdf, 0xa8, 0x85, 0x33, 0x1e, 0x69, 0x44, 0x87, 0xaa, + 0xdd, 0xf0, 0xac, 0x81, 0xf6, 0xdb, 0x18, 0x35, 0x42, 0x6f, + 0xd9, 0xf4, 0x83, 0xae, 0x6d, 0x40, 0x37, 0x1a, 0x03, 0x2e, + 0x59, 0x74, 0xb7, 0x9a, 0xed, 0xc0, 0x76, 0x5b, 0x2c, 0x01, + 0xc2, 0xef, 0x98, 0xb5, 0xe9, 0xc4, 0xb3, 0x9e, 0x5d, 0x70, + 0x07, 0x2a, 0x9c, 0xb1, 0xc6, 0xeb, 0x28, 0x05, 0x72, 0x5f, + 0xca, 0xe7, 0x90, 0xbd, 0x7e, 0x53, 0x24, 0x09, 0xbf, 0x92, + 0xe5, 0xc8, 0x0b, 0x26, 0x51, 0x7c, 0x20, 0x0d, 0x7a, 0x57, + 0x94, 0xb9, 0xce, 0xe3, 0x55, 0x78, 0x0f, 0x22, 0xe1, 0xcc, + 0xbb, 0x96, 0x8c, 0xa1, 0xd6, 0xfb, 0x38, 0x15, 0x62, 0x4f, + 0xf9, 0xd4, 0xa3, 0x8e, 0x4d, 0x60, 0x17, 0x3a, 0x66, 0x4b, + 0x3c, 0x11, 0xd2, 0xff, 0x88, 0xa5, 0x13, 0x3e, 0x49, 0x64, + 0xa7, 0x8a, 0xfd, 0xd0, 0x45, 0x68, 0x1f, 0x32, 0xf1, 0xdc, + 0xab, 0x86, 0x30, 0x1d, 0x6a, 0x47, 0x84, 0xa9, 0xde, 0xf3, + 0xaf, 0x82, 0xf5, 0xd8, 0x1b, 0x36, 0x41, 0x6c, 0xda, 0xf7, + 0x80, 0xad, 0x6e, 0x43, 0x34, 0x19, 0x00, 0x2e, 0x5c, 0x72, + 0xb8, 0x96, 0xe4, 0xca, 0x6d, 0x43, 0x31, 0x1f, 0xd5, 0xfb, + 0x89, 0xa7, 0xda, 0xf4, 0x86, 0xa8, 0x62, 0x4c, 0x3e, 0x10, + 0xb7, 0x99, 0xeb, 0xc5, 0x0f, 0x21, 0x53, 0x7d, 0xa9, 0x87, + 0xf5, 0xdb, 0x11, 0x3f, 0x4d, 0x63, 0xc4, 0xea, 0x98, 0xb6, + 0x7c, 0x52, 0x20, 0x0e, 0x73, 0x5d, 0x2f, 0x01, 0xcb, 0xe5, + 0x97, 0xb9, 0x1e, 0x30, 0x42, 0x6c, 0xa6, 0x88, 0xfa, 0xd4, + 0x4f, 0x61, 0x13, 0x3d, 0xf7, 0xd9, 0xab, 0x85, 0x22, 0x0c, + 0x7e, 0x50, 0x9a, 0xb4, 0xc6, 0xe8, 0x95, 0xbb, 0xc9, 0xe7, + 0x2d, 0x03, 0x71, 0x5f, 0xf8, 0xd6, 0xa4, 0x8a, 0x40, 0x6e, + 0x1c, 0x32, 0xe6, 0xc8, 0xba, 0x94, 0x5e, 0x70, 0x02, 0x2c, + 0x8b, 0xa5, 0xd7, 0xf9, 0x33, 0x1d, 0x6f, 0x41, 0x3c, 0x12, + 0x60, 0x4e, 0x84, 0xaa, 0xd8, 0xf6, 0x51, 0x7f, 0x0d, 0x23, + 0xe9, 0xc7, 0xb5, 0x9b, 0x9e, 0xb0, 0xc2, 0xec, 0x26, 0x08, + 0x7a, 0x54, 0xf3, 0xdd, 0xaf, 0x81, 0x4b, 0x65, 0x17, 0x39, + 0x44, 0x6a, 0x18, 0x36, 0xfc, 0xd2, 0xa0, 0x8e, 0x29, 0x07, + 0x75, 0x5b, 0x91, 0xbf, 0xcd, 0xe3, 0x37, 0x19, 0x6b, 0x45, + 0x8f, 0xa1, 0xd3, 0xfd, 0x5a, 0x74, 0x06, 0x28, 0xe2, 0xcc, + 0xbe, 0x90, 0xed, 0xc3, 0xb1, 0x9f, 0x55, 0x7b, 0x09, 0x27, + 0x80, 0xae, 0xdc, 0xf2, 0x38, 0x16, 0x64, 0x4a, 0xd1, 0xff, + 0x8d, 0xa3, 0x69, 0x47, 0x35, 0x1b, 0xbc, 0x92, 0xe0, 0xce, + 0x04, 0x2a, 0x58, 0x76, 0x0b, 0x25, 0x57, 0x79, 0xb3, 0x9d, + 0xef, 0xc1, 0x66, 0x48, 0x3a, 0x14, 0xde, 0xf0, 0x82, 0xac, + 0x78, 0x56, 0x24, 0x0a, 0xc0, 0xee, 0x9c, 0xb2, 0x15, 0x3b, + 0x49, 0x67, 0xad, 0x83, 0xf1, 0xdf, 0xa2, 0x8c, 0xfe, 0xd0, + 0x1a, 0x34, 0x46, 0x68, 0xcf, 0xe1, 0x93, 0xbd, 0x77, 0x59, + 0x2b, 0x05, 0x00, 0x2f, 0x5e, 0x71, 0xbc, 0x93, 0xe2, 0xcd, + 0x65, 0x4a, 0x3b, 0x14, 0xd9, 0xf6, 0x87, 0xa8, 0xca, 0xe5, + 0x94, 0xbb, 0x76, 0x59, 0x28, 0x07, 0xaf, 0x80, 0xf1, 0xde, + 0x13, 0x3c, 0x4d, 0x62, 0x89, 0xa6, 0xd7, 0xf8, 0x35, 0x1a, + 0x6b, 0x44, 0xec, 0xc3, 0xb2, 0x9d, 0x50, 0x7f, 0x0e, 0x21, + 0x43, 0x6c, 0x1d, 0x32, 0xff, 0xd0, 0xa1, 0x8e, 0x26, 0x09, + 0x78, 0x57, 0x9a, 0xb5, 0xc4, 0xeb, 0x0f, 0x20, 0x51, 0x7e, + 0xb3, 0x9c, 0xed, 0xc2, 0x6a, 0x45, 0x34, 0x1b, 0xd6, 0xf9, + 0x88, 0xa7, 0xc5, 0xea, 0x9b, 0xb4, 0x79, 0x56, 0x27, 0x08, + 0xa0, 0x8f, 0xfe, 0xd1, 0x1c, 0x33, 0x42, 0x6d, 0x86, 0xa9, + 0xd8, 0xf7, 0x3a, 0x15, 0x64, 0x4b, 0xe3, 0xcc, 0xbd, 0x92, + 0x5f, 0x70, 0x01, 0x2e, 0x4c, 0x63, 0x12, 0x3d, 0xf0, 0xdf, + 0xae, 0x81, 0x29, 0x06, 0x77, 0x58, 0x95, 0xba, 0xcb, 0xe4, + 0x1e, 0x31, 0x40, 0x6f, 0xa2, 0x8d, 0xfc, 0xd3, 0x7b, 0x54, + 0x25, 0x0a, 0xc7, 0xe8, 0x99, 0xb6, 0xd4, 0xfb, 0x8a, 0xa5, + 0x68, 0x47, 0x36, 0x19, 0xb1, 0x9e, 0xef, 0xc0, 0x0d, 0x22, + 0x53, 0x7c, 0x97, 0xb8, 0xc9, 0xe6, 0x2b, 0x04, 0x75, 0x5a, + 0xf2, 0xdd, 0xac, 0x83, 0x4e, 0x61, 0x10, 0x3f, 0x5d, 0x72, + 0x03, 0x2c, 0xe1, 0xce, 0xbf, 0x90, 0x38, 0x17, 0x66, 0x49, + 0x84, 0xab, 0xda, 0xf5, 0x11, 0x3e, 0x4f, 0x60, 0xad, 0x82, + 0xf3, 0xdc, 0x74, 0x5b, 0x2a, 0x05, 0xc8, 0xe7, 0x96, 0xb9, + 0xdb, 0xf4, 0x85, 0xaa, 0x67, 0x48, 0x39, 0x16, 0xbe, 0x91, + 0xe0, 0xcf, 0x02, 0x2d, 0x5c, 0x73, 0x98, 0xb7, 0xc6, 0xe9, + 0x24, 0x0b, 0x7a, 0x55, 0xfd, 0xd2, 0xa3, 0x8c, 0x41, 0x6e, + 0x1f, 0x30, 0x52, 0x7d, 0x0c, 0x23, 0xee, 0xc1, 0xb0, 0x9f, + 0x37, 0x18, 0x69, 0x46, 0x8b, 0xa4, 0xd5, 0xfa, 0x00, 0x30, + 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90, 0x9d, 0xad, 0xfd, 0xcd, + 0x5d, 0x6d, 0x3d, 0x0d, 0x27, 0x17, 0x47, 0x77, 0xe7, 0xd7, + 0x87, 0xb7, 0xba, 0x8a, 0xda, 0xea, 0x7a, 0x4a, 0x1a, 0x2a, + 0x4e, 0x7e, 0x2e, 0x1e, 0x8e, 0xbe, 0xee, 0xde, 0xd3, 0xe3, + 0xb3, 0x83, 0x13, 0x23, 0x73, 0x43, 0x69, 0x59, 0x09, 0x39, + 0xa9, 0x99, 0xc9, 0xf9, 0xf4, 0xc4, 0x94, 0xa4, 0x34, 0x04, + 0x54, 0x64, 0x9c, 0xac, 0xfc, 0xcc, 0x5c, 0x6c, 0x3c, 0x0c, + 0x01, 0x31, 0x61, 0x51, 0xc1, 0xf1, 0xa1, 0x91, 0xbb, 0x8b, + 0xdb, 0xeb, 0x7b, 0x4b, 0x1b, 0x2b, 0x26, 0x16, 0x46, 0x76, + 0xe6, 0xd6, 0x86, 0xb6, 0xd2, 0xe2, 0xb2, 0x82, 0x12, 0x22, + 0x72, 0x42, 0x4f, 0x7f, 0x2f, 0x1f, 0x8f, 0xbf, 0xef, 0xdf, + 0xf5, 0xc5, 0x95, 0xa5, 0x35, 0x05, 0x55, 0x65, 0x68, 0x58, + 0x08, 0x38, 0xa8, 0x98, 0xc8, 0xf8, 0x25, 0x15, 0x45, 0x75, + 0xe5, 0xd5, 0x85, 0xb5, 0xb8, 0x88, 0xd8, 0xe8, 0x78, 0x48, + 0x18, 0x28, 0x02, 0x32, 0x62, 0x52, 0xc2, 0xf2, 0xa2, 0x92, + 0x9f, 0xaf, 0xff, 0xcf, 0x5f, 0x6f, 0x3f, 0x0f, 0x6b, 0x5b, + 0x0b, 0x3b, 0xab, 0x9b, 0xcb, 0xfb, 0xf6, 0xc6, 0x96, 0xa6, + 0x36, 0x06, 0x56, 0x66, 0x4c, 0x7c, 0x2c, 0x1c, 0x8c, 0xbc, + 0xec, 0xdc, 0xd1, 0xe1, 0xb1, 0x81, 0x11, 0x21, 0x71, 0x41, + 0xb9, 0x89, 0xd9, 0xe9, 0x79, 0x49, 0x19, 0x29, 0x24, 0x14, + 0x44, 0x74, 0xe4, 0xd4, 0x84, 0xb4, 0x9e, 0xae, 0xfe, 0xce, + 0x5e, 0x6e, 0x3e, 0x0e, 0x03, 0x33, 0x63, 0x53, 0xc3, 0xf3, + 0xa3, 0x93, 0xf7, 0xc7, 0x97, 0xa7, 0x37, 0x07, 0x57, 0x67, + 0x6a, 0x5a, 0x0a, 0x3a, 0xaa, 0x9a, 0xca, 0xfa, 0xd0, 0xe0, + 0xb0, 0x80, 0x10, 0x20, 0x70, 0x40, 0x4d, 0x7d, 0x2d, 0x1d, + 0x8d, 0xbd, 0xed, 0xdd, 0x00, 0x31, 0x62, 0x53, 0xc4, 0xf5, + 0xa6, 0x97, 0x95, 0xa4, 0xf7, 0xc6, 0x51, 0x60, 0x33, 0x02, + 0x37, 0x06, 0x55, 0x64, 0xf3, 0xc2, 0x91, 0xa0, 0xa2, 0x93, + 0xc0, 0xf1, 0x66, 0x57, 0x04, 0x35, 0x6e, 0x5f, 0x0c, 0x3d, + 0xaa, 0x9b, 0xc8, 0xf9, 0xfb, 0xca, 0x99, 0xa8, 0x3f, 0x0e, + 0x5d, 0x6c, 0x59, 0x68, 0x3b, 0x0a, 0x9d, 0xac, 0xff, 0xce, + 0xcc, 0xfd, 0xae, 0x9f, 0x08, 0x39, 0x6a, 0x5b, 0xdc, 0xed, + 0xbe, 0x8f, 0x18, 0x29, 0x7a, 0x4b, 0x49, 0x78, 0x2b, 0x1a, + 0x8d, 0xbc, 0xef, 0xde, 0xeb, 0xda, 0x89, 0xb8, 0x2f, 0x1e, + 0x4d, 0x7c, 0x7e, 0x4f, 0x1c, 0x2d, 0xba, 0x8b, 0xd8, 0xe9, + 0xb2, 0x83, 0xd0, 0xe1, 0x76, 0x47, 0x14, 0x25, 0x27, 0x16, + 0x45, 0x74, 0xe3, 0xd2, 0x81, 0xb0, 0x85, 0xb4, 0xe7, 0xd6, + 0x41, 0x70, 0x23, 0x12, 0x10, 0x21, 0x72, 0x43, 0xd4, 0xe5, + 0xb6, 0x87, 0xa5, 0x94, 0xc7, 0xf6, 0x61, 0x50, 0x03, 0x32, + 0x30, 0x01, 0x52, 0x63, 0xf4, 0xc5, 0x96, 0xa7, 0x92, 0xa3, + 0xf0, 0xc1, 0x56, 0x67, 0x34, 0x05, 0x07, 0x36, 0x65, 0x54, + 0xc3, 0xf2, 0xa1, 0x90, 0xcb, 0xfa, 0xa9, 0x98, 0x0f, 0x3e, + 0x6d, 0x5c, 0x5e, 0x6f, 0x3c, 0x0d, 0x9a, 0xab, 0xf8, 0xc9, + 0xfc, 0xcd, 0x9e, 0xaf, 0x38, 0x09, 0x5a, 0x6b, 0x69, 0x58, + 0x0b, 0x3a, 0xad, 0x9c, 0xcf, 0xfe, 0x79, 0x48, 0x1b, 0x2a, + 0xbd, 0x8c, 0xdf, 0xee, 0xec, 0xdd, 0x8e, 0xbf, 0x28, 0x19, + 0x4a, 0x7b, 0x4e, 0x7f, 0x2c, 0x1d, 0x8a, 0xbb, 0xe8, 0xd9, + 0xdb, 0xea, 0xb9, 0x88, 0x1f, 0x2e, 0x7d, 0x4c, 0x17, 0x26, + 0x75, 0x44, 0xd3, 0xe2, 0xb1, 0x80, 0x82, 0xb3, 0xe0, 0xd1, + 0x46, 0x77, 0x24, 0x15, 0x20, 0x11, 0x42, 0x73, 0xe4, 0xd5, + 0x86, 0xb7, 0xb5, 0x84, 0xd7, 0xe6, 0x71, 0x40, 0x13, 0x22, + 0x00, 0x32, 0x64, 0x56, 0xc8, 0xfa, 0xac, 0x9e, 0x8d, 0xbf, + 0xe9, 0xdb, 0x45, 0x77, 0x21, 0x13, 0x07, 0x35, 0x63, 0x51, + 0xcf, 0xfd, 0xab, 0x99, 0x8a, 0xb8, 0xee, 0xdc, 0x42, 0x70, + 0x26, 0x14, 0x0e, 0x3c, 0x6a, 0x58, 0xc6, 0xf4, 0xa2, 0x90, + 0x83, 0xb1, 0xe7, 0xd5, 0x4b, 0x79, 0x2f, 0x1d, 0x09, 0x3b, + 0x6d, 0x5f, 0xc1, 0xf3, 0xa5, 0x97, 0x84, 0xb6, 0xe0, 0xd2, + 0x4c, 0x7e, 0x28, 0x1a, 0x1c, 0x2e, 0x78, 0x4a, 0xd4, 0xe6, + 0xb0, 0x82, 0x91, 0xa3, 0xf5, 0xc7, 0x59, 0x6b, 0x3d, 0x0f, + 0x1b, 0x29, 0x7f, 0x4d, 0xd3, 0xe1, 0xb7, 0x85, 0x96, 0xa4, + 0xf2, 0xc0, 0x5e, 0x6c, 0x3a, 0x08, 0x12, 0x20, 0x76, 0x44, + 0xda, 0xe8, 0xbe, 0x8c, 0x9f, 0xad, 0xfb, 0xc9, 0x57, 0x65, + 0x33, 0x01, 0x15, 0x27, 0x71, 0x43, 0xdd, 0xef, 0xb9, 0x8b, + 0x98, 0xaa, 0xfc, 0xce, 0x50, 0x62, 0x34, 0x06, 0x38, 0x0a, + 0x5c, 0x6e, 0xf0, 0xc2, 0x94, 0xa6, 0xb5, 0x87, 0xd1, 0xe3, + 0x7d, 0x4f, 0x19, 0x2b, 0x3f, 0x0d, 0x5b, 0x69, 0xf7, 0xc5, + 0x93, 0xa1, 0xb2, 0x80, 0xd6, 0xe4, 0x7a, 0x48, 0x1e, 0x2c, + 0x36, 0x04, 0x52, 0x60, 0xfe, 0xcc, 0x9a, 0xa8, 0xbb, 0x89, + 0xdf, 0xed, 0x73, 0x41, 0x17, 0x25, 0x31, 0x03, 0x55, 0x67, + 0xf9, 0xcb, 0x9d, 0xaf, 0xbc, 0x8e, 0xd8, 0xea, 0x74, 0x46, + 0x10, 0x22, 0x24, 0x16, 0x40, 0x72, 0xec, 0xde, 0x88, 0xba, + 0xa9, 0x9b, 0xcd, 0xff, 0x61, 0x53, 0x05, 0x37, 0x23, 0x11, + 0x47, 0x75, 0xeb, 0xd9, 0x8f, 0xbd, 0xae, 0x9c, 0xca, 0xf8, + 0x66, 0x54, 0x02, 0x30, 0x2a, 0x18, 0x4e, 0x7c, 0xe2, 0xd0, + 0x86, 0xb4, 0xa7, 0x95, 0xc3, 0xf1, 0x6f, 0x5d, 0x0b, 0x39, + 0x2d, 0x1f, 0x49, 0x7b, 0xe5, 0xd7, 0x81, 0xb3, 0xa0, 0x92, + 0xc4, 0xf6, 0x68, 0x5a, 0x0c, 0x3e, 0x00, 0x33, 0x66, 0x55, + 0xcc, 0xff, 0xaa, 0x99, 0x85, 0xb6, 0xe3, 0xd0, 0x49, 0x7a, + 0x2f, 0x1c, 0x17, 0x24, 0x71, 0x42, 0xdb, 0xe8, 0xbd, 0x8e, + 0x92, 0xa1, 0xf4, 0xc7, 0x5e, 0x6d, 0x38, 0x0b, 0x2e, 0x1d, + 0x48, 0x7b, 0xe2, 0xd1, 0x84, 0xb7, 0xab, 0x98, 0xcd, 0xfe, + 0x67, 0x54, 0x01, 0x32, 0x39, 0x0a, 0x5f, 0x6c, 0xf5, 0xc6, + 0x93, 0xa0, 0xbc, 0x8f, 0xda, 0xe9, 0x70, 0x43, 0x16, 0x25, + 0x5c, 0x6f, 0x3a, 0x09, 0x90, 0xa3, 0xf6, 0xc5, 0xd9, 0xea, + 0xbf, 0x8c, 0x15, 0x26, 0x73, 0x40, 0x4b, 0x78, 0x2d, 0x1e, + 0x87, 0xb4, 0xe1, 0xd2, 0xce, 0xfd, 0xa8, 0x9b, 0x02, 0x31, + 0x64, 0x57, 0x72, 0x41, 0x14, 0x27, 0xbe, 0x8d, 0xd8, 0xeb, + 0xf7, 0xc4, 0x91, 0xa2, 0x3b, 0x08, 0x5d, 0x6e, 0x65, 0x56, + 0x03, 0x30, 0xa9, 0x9a, 0xcf, 0xfc, 0xe0, 0xd3, 0x86, 0xb5, + 0x2c, 0x1f, 0x4a, 0x79, 0xb8, 0x8b, 0xde, 0xed, 0x74, 0x47, + 0x12, 0x21, 0x3d, 0x0e, 0x5b, 0x68, 0xf1, 0xc2, 0x97, 0xa4, + 0xaf, 0x9c, 0xc9, 0xfa, 0x63, 0x50, 0x05, 0x36, 0x2a, 0x19, + 0x4c, 0x7f, 0xe6, 0xd5, 0x80, 0xb3, 0x96, 0xa5, 0xf0, 0xc3, + 0x5a, 0x69, 0x3c, 0x0f, 0x13, 0x20, 0x75, 0x46, 0xdf, 0xec, + 0xb9, 0x8a, 0x81, 0xb2, 0xe7, 0xd4, 0x4d, 0x7e, 0x2b, 0x18, + 0x04, 0x37, 0x62, 0x51, 0xc8, 0xfb, 0xae, 0x9d, 0xe4, 0xd7, + 0x82, 0xb1, 0x28, 0x1b, 0x4e, 0x7d, 0x61, 0x52, 0x07, 0x34, + 0xad, 0x9e, 0xcb, 0xf8, 0xf3, 0xc0, 0x95, 0xa6, 0x3f, 0x0c, + 0x59, 0x6a, 0x76, 0x45, 0x10, 0x23, 0xba, 0x89, 0xdc, 0xef, + 0xca, 0xf9, 0xac, 0x9f, 0x06, 0x35, 0x60, 0x53, 0x4f, 0x7c, + 0x29, 0x1a, 0x83, 0xb0, 0xe5, 0xd6, 0xdd, 0xee, 0xbb, 0x88, + 0x11, 0x22, 0x77, 0x44, 0x58, 0x6b, 0x3e, 0x0d, 0x94, 0xa7, + 0xf2, 0xc1, 0x00, 0x34, 0x68, 0x5c, 0xd0, 0xe4, 0xb8, 0x8c, + 0xbd, 0x89, 0xd5, 0xe1, 0x6d, 0x59, 0x05, 0x31, 0x67, 0x53, + 0x0f, 0x3b, 0xb7, 0x83, 0xdf, 0xeb, 0xda, 0xee, 0xb2, 0x86, + 0x0a, 0x3e, 0x62, 0x56, 0xce, 0xfa, 0xa6, 0x92, 0x1e, 0x2a, + 0x76, 0x42, 0x73, 0x47, 0x1b, 0x2f, 0xa3, 0x97, 0xcb, 0xff, + 0xa9, 0x9d, 0xc1, 0xf5, 0x79, 0x4d, 0x11, 0x25, 0x14, 0x20, + 0x7c, 0x48, 0xc4, 0xf0, 0xac, 0x98, 0x81, 0xb5, 0xe9, 0xdd, + 0x51, 0x65, 0x39, 0x0d, 0x3c, 0x08, 0x54, 0x60, 0xec, 0xd8, + 0x84, 0xb0, 0xe6, 0xd2, 0x8e, 0xba, 0x36, 0x02, 0x5e, 0x6a, + 0x5b, 0x6f, 0x33, 0x07, 0x8b, 0xbf, 0xe3, 0xd7, 0x4f, 0x7b, + 0x27, 0x13, 0x9f, 0xab, 0xf7, 0xc3, 0xf2, 0xc6, 0x9a, 0xae, + 0x22, 0x16, 0x4a, 0x7e, 0x28, 0x1c, 0x40, 0x74, 0xf8, 0xcc, + 0x90, 0xa4, 0x95, 0xa1, 0xfd, 0xc9, 0x45, 0x71, 0x2d, 0x19, + 0x1f, 0x2b, 0x77, 0x43, 0xcf, 0xfb, 0xa7, 0x93, 0xa2, 0x96, + 0xca, 0xfe, 0x72, 0x46, 0x1a, 0x2e, 0x78, 0x4c, 0x10, 0x24, + 0xa8, 0x9c, 0xc0, 0xf4, 0xc5, 0xf1, 0xad, 0x99, 0x15, 0x21, + 0x7d, 0x49, 0xd1, 0xe5, 0xb9, 0x8d, 0x01, 0x35, 0x69, 0x5d, + 0x6c, 0x58, 0x04, 0x30, 0xbc, 0x88, 0xd4, 0xe0, 0xb6, 0x82, + 0xde, 0xea, 0x66, 0x52, 0x0e, 0x3a, 0x0b, 0x3f, 0x63, 0x57, + 0xdb, 0xef, 0xb3, 0x87, 0x9e, 0xaa, 0xf6, 0xc2, 0x4e, 0x7a, + 0x26, 0x12, 0x23, 0x17, 0x4b, 0x7f, 0xf3, 0xc7, 0x9b, 0xaf, + 0xf9, 0xcd, 0x91, 0xa5, 0x29, 0x1d, 0x41, 0x75, 0x44, 0x70, + 0x2c, 0x18, 0x94, 0xa0, 0xfc, 0xc8, 0x50, 0x64, 0x38, 0x0c, + 0x80, 0xb4, 0xe8, 0xdc, 0xed, 0xd9, 0x85, 0xb1, 0x3d, 0x09, + 0x55, 0x61, 0x37, 0x03, 0x5f, 0x6b, 0xe7, 0xd3, 0x8f, 0xbb, + 0x8a, 0xbe, 0xe2, 0xd6, 0x5a, 0x6e, 0x32, 0x06, 0x00, 0x35, + 0x6a, 0x5f, 0xd4, 0xe1, 0xbe, 0x8b, 0xb5, 0x80, 0xdf, 0xea, + 0x61, 0x54, 0x0b, 0x3e, 0x77, 0x42, 0x1d, 0x28, 0xa3, 0x96, + 0xc9, 0xfc, 0xc2, 0xf7, 0xa8, 0x9d, 0x16, 0x23, 0x7c, 0x49, + 0xee, 0xdb, 0x84, 0xb1, 0x3a, 0x0f, 0x50, 0x65, 0x5b, 0x6e, + 0x31, 0x04, 0x8f, 0xba, 0xe5, 0xd0, 0x99, 0xac, 0xf3, 0xc6, + 0x4d, 0x78, 0x27, 0x12, 0x2c, 0x19, 0x46, 0x73, 0xf8, 0xcd, + 0x92, 0xa7, 0xc1, 0xf4, 0xab, 0x9e, 0x15, 0x20, 0x7f, 0x4a, + 0x74, 0x41, 0x1e, 0x2b, 0xa0, 0x95, 0xca, 0xff, 0xb6, 0x83, + 0xdc, 0xe9, 0x62, 0x57, 0x08, 0x3d, 0x03, 0x36, 0x69, 0x5c, + 0xd7, 0xe2, 0xbd, 0x88, 0x2f, 0x1a, 0x45, 0x70, 0xfb, 0xce, + 0x91, 0xa4, 0x9a, 0xaf, 0xf0, 0xc5, 0x4e, 0x7b, 0x24, 0x11, + 0x58, 0x6d, 0x32, 0x07, 0x8c, 0xb9, 0xe6, 0xd3, 0xed, 0xd8, + 0x87, 0xb2, 0x39, 0x0c, 0x53, 0x66, 0x9f, 0xaa, 0xf5, 0xc0, + 0x4b, 0x7e, 0x21, 0x14, 0x2a, 0x1f, 0x40, 0x75, 0xfe, 0xcb, + 0x94, 0xa1, 0xe8, 0xdd, 0x82, 0xb7, 0x3c, 0x09, 0x56, 0x63, + 0x5d, 0x68, 0x37, 0x02, 0x89, 0xbc, 0xe3, 0xd6, 0x71, 0x44, + 0x1b, 0x2e, 0xa5, 0x90, 0xcf, 0xfa, 0xc4, 0xf1, 0xae, 0x9b, + 0x10, 0x25, 0x7a, 0x4f, 0x06, 0x33, 0x6c, 0x59, 0xd2, 0xe7, + 0xb8, 0x8d, 0xb3, 0x86, 0xd9, 0xec, 0x67, 0x52, 0x0d, 0x38, + 0x5e, 0x6b, 0x34, 0x01, 0x8a, 0xbf, 0xe0, 0xd5, 0xeb, 0xde, + 0x81, 0xb4, 0x3f, 0x0a, 0x55, 0x60, 0x29, 0x1c, 0x43, 0x76, + 0xfd, 0xc8, 0x97, 0xa2, 0x9c, 0xa9, 0xf6, 0xc3, 0x48, 0x7d, + 0x22, 0x17, 0xb0, 0x85, 0xda, 0xef, 0x64, 0x51, 0x0e, 0x3b, + 0x05, 0x30, 0x6f, 0x5a, 0xd1, 0xe4, 0xbb, 0x8e, 0xc7, 0xf2, + 0xad, 0x98, 0x13, 0x26, 0x79, 0x4c, 0x72, 0x47, 0x18, 0x2d, + 0xa6, 0x93, 0xcc, 0xf9, 0x00, 0x36, 0x6c, 0x5a, 0xd8, 0xee, + 0xb4, 0x82, 0xad, 0x9b, 0xc1, 0xf7, 0x75, 0x43, 0x19, 0x2f, + 0x47, 0x71, 0x2b, 0x1d, 0x9f, 0xa9, 0xf3, 0xc5, 0xea, 0xdc, + 0x86, 0xb0, 0x32, 0x04, 0x5e, 0x68, 0x8e, 0xb8, 0xe2, 0xd4, + 0x56, 0x60, 0x3a, 0x0c, 0x23, 0x15, 0x4f, 0x79, 0xfb, 0xcd, + 0x97, 0xa1, 0xc9, 0xff, 0xa5, 0x93, 0x11, 0x27, 0x7d, 0x4b, + 0x64, 0x52, 0x08, 0x3e, 0xbc, 0x8a, 0xd0, 0xe6, 0x01, 0x37, + 0x6d, 0x5b, 0xd9, 0xef, 0xb5, 0x83, 0xac, 0x9a, 0xc0, 0xf6, + 0x74, 0x42, 0x18, 0x2e, 0x46, 0x70, 0x2a, 0x1c, 0x9e, 0xa8, + 0xf2, 0xc4, 0xeb, 0xdd, 0x87, 0xb1, 0x33, 0x05, 0x5f, 0x69, + 0x8f, 0xb9, 0xe3, 0xd5, 0x57, 0x61, 0x3b, 0x0d, 0x22, 0x14, + 0x4e, 0x78, 0xfa, 0xcc, 0x96, 0xa0, 0xc8, 0xfe, 0xa4, 0x92, + 0x10, 0x26, 0x7c, 0x4a, 0x65, 0x53, 0x09, 0x3f, 0xbd, 0x8b, + 0xd1, 0xe7, 0x02, 0x34, 0x6e, 0x58, 0xda, 0xec, 0xb6, 0x80, + 0xaf, 0x99, 0xc3, 0xf5, 0x77, 0x41, 0x1b, 0x2d, 0x45, 0x73, + 0x29, 0x1f, 0x9d, 0xab, 0xf1, 0xc7, 0xe8, 0xde, 0x84, 0xb2, + 0x30, 0x06, 0x5c, 0x6a, 0x8c, 0xba, 0xe0, 0xd6, 0x54, 0x62, + 0x38, 0x0e, 0x21, 0x17, 0x4d, 0x7b, 0xf9, 0xcf, 0x95, 0xa3, + 0xcb, 0xfd, 0xa7, 0x91, 0x13, 0x25, 0x7f, 0x49, 0x66, 0x50, + 0x0a, 0x3c, 0xbe, 0x88, 0xd2, 0xe4, 0x03, 0x35, 0x6f, 0x59, + 0xdb, 0xed, 0xb7, 0x81, 0xae, 0x98, 0xc2, 0xf4, 0x76, 0x40, + 0x1a, 0x2c, 0x44, 0x72, 0x28, 0x1e, 0x9c, 0xaa, 0xf0, 0xc6, + 0xe9, 0xdf, 0x85, 0xb3, 0x31, 0x07, 0x5d, 0x6b, 0x8d, 0xbb, + 0xe1, 0xd7, 0x55, 0x63, 0x39, 0x0f, 0x20, 0x16, 0x4c, 0x7a, + 0xf8, 0xce, 0x94, 0xa2, 0xca, 0xfc, 0xa6, 0x90, 0x12, 0x24, + 0x7e, 0x48, 0x67, 0x51, 0x0b, 0x3d, 0xbf, 0x89, 0xd3, 0xe5, + 0x00, 0x37, 0x6e, 0x59, 0xdc, 0xeb, 0xb2, 0x85, 0xa5, 0x92, + 0xcb, 0xfc, 0x79, 0x4e, 0x17, 0x20, 0x57, 0x60, 0x39, 0x0e, + 0x8b, 0xbc, 0xe5, 0xd2, 0xf2, 0xc5, 0x9c, 0xab, 0x2e, 0x19, + 0x40, 0x77, 0xae, 0x99, 0xc0, 0xf7, 0x72, 0x45, 0x1c, 0x2b, + 0x0b, 0x3c, 0x65, 0x52, 0xd7, 0xe0, 0xb9, 0x8e, 0xf9, 0xce, + 0x97, 0xa0, 0x25, 0x12, 0x4b, 0x7c, 0x5c, 0x6b, 0x32, 0x05, + 0x80, 0xb7, 0xee, 0xd9, 0x41, 0x76, 0x2f, 0x18, 0x9d, 0xaa, + 0xf3, 0xc4, 0xe4, 0xd3, 0x8a, 0xbd, 0x38, 0x0f, 0x56, 0x61, + 0x16, 0x21, 0x78, 0x4f, 0xca, 0xfd, 0xa4, 0x93, 0xb3, 0x84, + 0xdd, 0xea, 0x6f, 0x58, 0x01, 0x36, 0xef, 0xd8, 0x81, 0xb6, + 0x33, 0x04, 0x5d, 0x6a, 0x4a, 0x7d, 0x24, 0x13, 0x96, 0xa1, + 0xf8, 0xcf, 0xb8, 0x8f, 0xd6, 0xe1, 0x64, 0x53, 0x0a, 0x3d, + 0x1d, 0x2a, 0x73, 0x44, 0xc1, 0xf6, 0xaf, 0x98, 0x82, 0xb5, + 0xec, 0xdb, 0x5e, 0x69, 0x30, 0x07, 0x27, 0x10, 0x49, 0x7e, + 0xfb, 0xcc, 0x95, 0xa2, 0xd5, 0xe2, 0xbb, 0x8c, 0x09, 0x3e, + 0x67, 0x50, 0x70, 0x47, 0x1e, 0x29, 0xac, 0x9b, 0xc2, 0xf5, + 0x2c, 0x1b, 0x42, 0x75, 0xf0, 0xc7, 0x9e, 0xa9, 0x89, 0xbe, + 0xe7, 0xd0, 0x55, 0x62, 0x3b, 0x0c, 0x7b, 0x4c, 0x15, 0x22, + 0xa7, 0x90, 0xc9, 0xfe, 0xde, 0xe9, 0xb0, 0x87, 0x02, 0x35, + 0x6c, 0x5b, 0xc3, 0xf4, 0xad, 0x9a, 0x1f, 0x28, 0x71, 0x46, + 0x66, 0x51, 0x08, 0x3f, 0xba, 0x8d, 0xd4, 0xe3, 0x94, 0xa3, + 0xfa, 0xcd, 0x48, 0x7f, 0x26, 0x11, 0x31, 0x06, 0x5f, 0x68, + 0xed, 0xda, 0x83, 0xb4, 0x6d, 0x5a, 0x03, 0x34, 0xb1, 0x86, + 0xdf, 0xe8, 0xc8, 0xff, 0xa6, 0x91, 0x14, 0x23, 0x7a, 0x4d, + 0x3a, 0x0d, 0x54, 0x63, 0xe6, 0xd1, 0x88, 0xbf, 0x9f, 0xa8, + 0xf1, 0xc6, 0x43, 0x74, 0x2d, 0x1a, 0x00, 0x38, 0x70, 0x48, + 0xe0, 0xd8, 0x90, 0xa8, 0xdd, 0xe5, 0xad, 0x95, 0x3d, 0x05, + 0x4d, 0x75, 0xa7, 0x9f, 0xd7, 0xef, 0x47, 0x7f, 0x37, 0x0f, + 0x7a, 0x42, 0x0a, 0x32, 0x9a, 0xa2, 0xea, 0xd2, 0x53, 0x6b, + 0x23, 0x1b, 0xb3, 0x8b, 0xc3, 0xfb, 0x8e, 0xb6, 0xfe, 0xc6, + 0x6e, 0x56, 0x1e, 0x26, 0xf4, 0xcc, 0x84, 0xbc, 0x14, 0x2c, + 0x64, 0x5c, 0x29, 0x11, 0x59, 0x61, 0xc9, 0xf1, 0xb9, 0x81, + 0xa6, 0x9e, 0xd6, 0xee, 0x46, 0x7e, 0x36, 0x0e, 0x7b, 0x43, + 0x0b, 0x33, 0x9b, 0xa3, 0xeb, 0xd3, 0x01, 0x39, 0x71, 0x49, + 0xe1, 0xd9, 0x91, 0xa9, 0xdc, 0xe4, 0xac, 0x94, 0x3c, 0x04, + 0x4c, 0x74, 0xf5, 0xcd, 0x85, 0xbd, 0x15, 0x2d, 0x65, 0x5d, + 0x28, 0x10, 0x58, 0x60, 0xc8, 0xf0, 0xb8, 0x80, 0x52, 0x6a, + 0x22, 0x1a, 0xb2, 0x8a, 0xc2, 0xfa, 0x8f, 0xb7, 0xff, 0xc7, + 0x6f, 0x57, 0x1f, 0x27, 0x51, 0x69, 0x21, 0x19, 0xb1, 0x89, + 0xc1, 0xf9, 0x8c, 0xb4, 0xfc, 0xc4, 0x6c, 0x54, 0x1c, 0x24, + 0xf6, 0xce, 0x86, 0xbe, 0x16, 0x2e, 0x66, 0x5e, 0x2b, 0x13, + 0x5b, 0x63, 0xcb, 0xf3, 0xbb, 0x83, 0x02, 0x3a, 0x72, 0x4a, + 0xe2, 0xda, 0x92, 0xaa, 0xdf, 0xe7, 0xaf, 0x97, 0x3f, 0x07, + 0x4f, 0x77, 0xa5, 0x9d, 0xd5, 0xed, 0x45, 0x7d, 0x35, 0x0d, + 0x78, 0x40, 0x08, 0x30, 0x98, 0xa0, 0xe8, 0xd0, 0xf7, 0xcf, + 0x87, 0xbf, 0x17, 0x2f, 0x67, 0x5f, 0x2a, 0x12, 0x5a, 0x62, + 0xca, 0xf2, 0xba, 0x82, 0x50, 0x68, 0x20, 0x18, 0xb0, 0x88, + 0xc0, 0xf8, 0x8d, 0xb5, 0xfd, 0xc5, 0x6d, 0x55, 0x1d, 0x25, + 0xa4, 0x9c, 0xd4, 0xec, 0x44, 0x7c, 0x34, 0x0c, 0x79, 0x41, + 0x09, 0x31, 0x99, 0xa1, 0xe9, 0xd1, 0x03, 0x3b, 0x73, 0x4b, + 0xe3, 0xdb, 0x93, 0xab, 0xde, 0xe6, 0xae, 0x96, 0x3e, 0x06, + 0x4e, 0x76, 0x00, 0x39, 0x72, 0x4b, 0xe4, 0xdd, 0x96, 0xaf, + 0xd5, 0xec, 0xa7, 0x9e, 0x31, 0x08, 0x43, 0x7a, 0xb7, 0x8e, + 0xc5, 0xfc, 0x53, 0x6a, 0x21, 0x18, 0x62, 0x5b, 0x10, 0x29, + 0x86, 0xbf, 0xf4, 0xcd, 0x73, 0x4a, 0x01, 0x38, 0x97, 0xae, + 0xe5, 0xdc, 0xa6, 0x9f, 0xd4, 0xed, 0x42, 0x7b, 0x30, 0x09, + 0xc4, 0xfd, 0xb6, 0x8f, 0x20, 0x19, 0x52, 0x6b, 0x11, 0x28, + 0x63, 0x5a, 0xf5, 0xcc, 0x87, 0xbe, 0xe6, 0xdf, 0x94, 0xad, + 0x02, 0x3b, 0x70, 0x49, 0x33, 0x0a, 0x41, 0x78, 0xd7, 0xee, + 0xa5, 0x9c, 0x51, 0x68, 0x23, 0x1a, 0xb5, 0x8c, 0xc7, 0xfe, + 0x84, 0xbd, 0xf6, 0xcf, 0x60, 0x59, 0x12, 0x2b, 0x95, 0xac, + 0xe7, 0xde, 0x71, 0x48, 0x03, 0x3a, 0x40, 0x79, 0x32, 0x0b, + 0xa4, 0x9d, 0xd6, 0xef, 0x22, 0x1b, 0x50, 0x69, 0xc6, 0xff, + 0xb4, 0x8d, 0xf7, 0xce, 0x85, 0xbc, 0x13, 0x2a, 0x61, 0x58, + 0xd1, 0xe8, 0xa3, 0x9a, 0x35, 0x0c, 0x47, 0x7e, 0x04, 0x3d, + 0x76, 0x4f, 0xe0, 0xd9, 0x92, 0xab, 0x66, 0x5f, 0x14, 0x2d, + 0x82, 0xbb, 0xf0, 0xc9, 0xb3, 0x8a, 0xc1, 0xf8, 0x57, 0x6e, + 0x25, 0x1c, 0xa2, 0x9b, 0xd0, 0xe9, 0x46, 0x7f, 0x34, 0x0d, + 0x77, 0x4e, 0x05, 0x3c, 0x93, 0xaa, 0xe1, 0xd8, 0x15, 0x2c, + 0x67, 0x5e, 0xf1, 0xc8, 0x83, 0xba, 0xc0, 0xf9, 0xb2, 0x8b, + 0x24, 0x1d, 0x56, 0x6f, 0x37, 0x0e, 0x45, 0x7c, 0xd3, 0xea, + 0xa1, 0x98, 0xe2, 0xdb, 0x90, 0xa9, 0x06, 0x3f, 0x74, 0x4d, + 0x80, 0xb9, 0xf2, 0xcb, 0x64, 0x5d, 0x16, 0x2f, 0x55, 0x6c, + 0x27, 0x1e, 0xb1, 0x88, 0xc3, 0xfa, 0x44, 0x7d, 0x36, 0x0f, + 0xa0, 0x99, 0xd2, 0xeb, 0x91, 0xa8, 0xe3, 0xda, 0x75, 0x4c, + 0x07, 0x3e, 0xf3, 0xca, 0x81, 0xb8, 0x17, 0x2e, 0x65, 0x5c, + 0x26, 0x1f, 0x54, 0x6d, 0xc2, 0xfb, 0xb0, 0x89, 0x00, 0x3a, + 0x74, 0x4e, 0xe8, 0xd2, 0x9c, 0xa6, 0xcd, 0xf7, 0xb9, 0x83, + 0x25, 0x1f, 0x51, 0x6b, 0x87, 0xbd, 0xf3, 0xc9, 0x6f, 0x55, + 0x1b, 0x21, 0x4a, 0x70, 0x3e, 0x04, 0xa2, 0x98, 0xd6, 0xec, + 0x13, 0x29, 0x67, 0x5d, 0xfb, 0xc1, 0x8f, 0xb5, 0xde, 0xe4, + 0xaa, 0x90, 0x36, 0x0c, 0x42, 0x78, 0x94, 0xae, 0xe0, 0xda, + 0x7c, 0x46, 0x08, 0x32, 0x59, 0x63, 0x2d, 0x17, 0xb1, 0x8b, + 0xc5, 0xff, 0x26, 0x1c, 0x52, 0x68, 0xce, 0xf4, 0xba, 0x80, + 0xeb, 0xd1, 0x9f, 0xa5, 0x03, 0x39, 0x77, 0x4d, 0xa1, 0x9b, + 0xd5, 0xef, 0x49, 0x73, 0x3d, 0x07, 0x6c, 0x56, 0x18, 0x22, + 0x84, 0xbe, 0xf0, 0xca, 0x35, 0x0f, 0x41, 0x7b, 0xdd, 0xe7, + 0xa9, 0x93, 0xf8, 0xc2, 0x8c, 0xb6, 0x10, 0x2a, 0x64, 0x5e, + 0xb2, 0x88, 0xc6, 0xfc, 0x5a, 0x60, 0x2e, 0x14, 0x7f, 0x45, + 0x0b, 0x31, 0x97, 0xad, 0xe3, 0xd9, 0x4c, 0x76, 0x38, 0x02, + 0xa4, 0x9e, 0xd0, 0xea, 0x81, 0xbb, 0xf5, 0xcf, 0x69, 0x53, + 0x1d, 0x27, 0xcb, 0xf1, 0xbf, 0x85, 0x23, 0x19, 0x57, 0x6d, + 0x06, 0x3c, 0x72, 0x48, 0xee, 0xd4, 0x9a, 0xa0, 0x5f, 0x65, + 0x2b, 0x11, 0xb7, 0x8d, 0xc3, 0xf9, 0x92, 0xa8, 0xe6, 0xdc, + 0x7a, 0x40, 0x0e, 0x34, 0xd8, 0xe2, 0xac, 0x96, 0x30, 0x0a, + 0x44, 0x7e, 0x15, 0x2f, 0x61, 0x5b, 0xfd, 0xc7, 0x89, 0xb3, + 0x6a, 0x50, 0x1e, 0x24, 0x82, 0xb8, 0xf6, 0xcc, 0xa7, 0x9d, + 0xd3, 0xe9, 0x4f, 0x75, 0x3b, 0x01, 0xed, 0xd7, 0x99, 0xa3, + 0x05, 0x3f, 0x71, 0x4b, 0x20, 0x1a, 0x54, 0x6e, 0xc8, 0xf2, + 0xbc, 0x86, 0x79, 0x43, 0x0d, 0x37, 0x91, 0xab, 0xe5, 0xdf, + 0xb4, 0x8e, 0xc0, 0xfa, 0x5c, 0x66, 0x28, 0x12, 0xfe, 0xc4, + 0x8a, 0xb0, 0x16, 0x2c, 0x62, 0x58, 0x33, 0x09, 0x47, 0x7d, + 0xdb, 0xe1, 0xaf, 0x95, 0x00, 0x3b, 0x76, 0x4d, 0xec, 0xd7, + 0x9a, 0xa1, 0xc5, 0xfe, 0xb3, 0x88, 0x29, 0x12, 0x5f, 0x64, + 0x97, 0xac, 0xe1, 0xda, 0x7b, 0x40, 0x0d, 0x36, 0x52, 0x69, + 0x24, 0x1f, 0xbe, 0x85, 0xc8, 0xf3, 0x33, 0x08, 0x45, 0x7e, + 0xdf, 0xe4, 0xa9, 0x92, 0xf6, 0xcd, 0x80, 0xbb, 0x1a, 0x21, + 0x6c, 0x57, 0xa4, 0x9f, 0xd2, 0xe9, 0x48, 0x73, 0x3e, 0x05, + 0x61, 0x5a, 0x17, 0x2c, 0x8d, 0xb6, 0xfb, 0xc0, 0x66, 0x5d, + 0x10, 0x2b, 0x8a, 0xb1, 0xfc, 0xc7, 0xa3, 0x98, 0xd5, 0xee, + 0x4f, 0x74, 0x39, 0x02, 0xf1, 0xca, 0x87, 0xbc, 0x1d, 0x26, + 0x6b, 0x50, 0x34, 0x0f, 0x42, 0x79, 0xd8, 0xe3, 0xae, 0x95, + 0x55, 0x6e, 0x23, 0x18, 0xb9, 0x82, 0xcf, 0xf4, 0x90, 0xab, + 0xe6, 0xdd, 0x7c, 0x47, 0x0a, 0x31, 0xc2, 0xf9, 0xb4, 0x8f, + 0x2e, 0x15, 0x58, 0x63, 0x07, 0x3c, 0x71, 0x4a, 0xeb, 0xd0, + 0x9d, 0xa6, 0xcc, 0xf7, 0xba, 0x81, 0x20, 0x1b, 0x56, 0x6d, + 0x09, 0x32, 0x7f, 0x44, 0xe5, 0xde, 0x93, 0xa8, 0x5b, 0x60, + 0x2d, 0x16, 0xb7, 0x8c, 0xc1, 0xfa, 0x9e, 0xa5, 0xe8, 0xd3, + 0x72, 0x49, 0x04, 0x3f, 0xff, 0xc4, 0x89, 0xb2, 0x13, 0x28, + 0x65, 0x5e, 0x3a, 0x01, 0x4c, 0x77, 0xd6, 0xed, 0xa0, 0x9b, + 0x68, 0x53, 0x1e, 0x25, 0x84, 0xbf, 0xf2, 0xc9, 0xad, 0x96, + 0xdb, 0xe0, 0x41, 0x7a, 0x37, 0x0c, 0xaa, 0x91, 0xdc, 0xe7, + 0x46, 0x7d, 0x30, 0x0b, 0x6f, 0x54, 0x19, 0x22, 0x83, 0xb8, + 0xf5, 0xce, 0x3d, 0x06, 0x4b, 0x70, 0xd1, 0xea, 0xa7, 0x9c, + 0xf8, 0xc3, 0x8e, 0xb5, 0x14, 0x2f, 0x62, 0x59, 0x99, 0xa2, + 0xef, 0xd4, 0x75, 0x4e, 0x03, 0x38, 0x5c, 0x67, 0x2a, 0x11, + 0xb0, 0x8b, 0xc6, 0xfd, 0x0e, 0x35, 0x78, 0x43, 0xe2, 0xd9, + 0x94, 0xaf, 0xcb, 0xf0, 0xbd, 0x86, 0x27, 0x1c, 0x51, 0x6a, + 0x00, 0x3c, 0x78, 0x44, 0xf0, 0xcc, 0x88, 0xb4, 0xfd, 0xc1, + 0x85, 0xb9, 0x0d, 0x31, 0x75, 0x49, 0xe7, 0xdb, 0x9f, 0xa3, + 0x17, 0x2b, 0x6f, 0x53, 0x1a, 0x26, 0x62, 0x5e, 0xea, 0xd6, + 0x92, 0xae, 0xd3, 0xef, 0xab, 0x97, 0x23, 0x1f, 0x5b, 0x67, + 0x2e, 0x12, 0x56, 0x6a, 0xde, 0xe2, 0xa6, 0x9a, 0x34, 0x08, + 0x4c, 0x70, 0xc4, 0xf8, 0xbc, 0x80, 0xc9, 0xf5, 0xb1, 0x8d, + 0x39, 0x05, 0x41, 0x7d, 0xbb, 0x87, 0xc3, 0xff, 0x4b, 0x77, + 0x33, 0x0f, 0x46, 0x7a, 0x3e, 0x02, 0xb6, 0x8a, 0xce, 0xf2, + 0x5c, 0x60, 0x24, 0x18, 0xac, 0x90, 0xd4, 0xe8, 0xa1, 0x9d, + 0xd9, 0xe5, 0x51, 0x6d, 0x29, 0x15, 0x68, 0x54, 0x10, 0x2c, + 0x98, 0xa4, 0xe0, 0xdc, 0x95, 0xa9, 0xed, 0xd1, 0x65, 0x59, + 0x1d, 0x21, 0x8f, 0xb3, 0xf7, 0xcb, 0x7f, 0x43, 0x07, 0x3b, + 0x72, 0x4e, 0x0a, 0x36, 0x82, 0xbe, 0xfa, 0xc6, 0x6b, 0x57, + 0x13, 0x2f, 0x9b, 0xa7, 0xe3, 0xdf, 0x96, 0xaa, 0xee, 0xd2, + 0x66, 0x5a, 0x1e, 0x22, 0x8c, 0xb0, 0xf4, 0xc8, 0x7c, 0x40, + 0x04, 0x38, 0x71, 0x4d, 0x09, 0x35, 0x81, 0xbd, 0xf9, 0xc5, + 0xb8, 0x84, 0xc0, 0xfc, 0x48, 0x74, 0x30, 0x0c, 0x45, 0x79, + 0x3d, 0x01, 0xb5, 0x89, 0xcd, 0xf1, 0x5f, 0x63, 0x27, 0x1b, + 0xaf, 0x93, 0xd7, 0xeb, 0xa2, 0x9e, 0xda, 0xe6, 0x52, 0x6e, + 0x2a, 0x16, 0xd0, 0xec, 0xa8, 0x94, 0x20, 0x1c, 0x58, 0x64, + 0x2d, 0x11, 0x55, 0x69, 0xdd, 0xe1, 0xa5, 0x99, 0x37, 0x0b, + 0x4f, 0x73, 0xc7, 0xfb, 0xbf, 0x83, 0xca, 0xf6, 0xb2, 0x8e, + 0x3a, 0x06, 0x42, 0x7e, 0x03, 0x3f, 0x7b, 0x47, 0xf3, 0xcf, + 0x8b, 0xb7, 0xfe, 0xc2, 0x86, 0xba, 0x0e, 0x32, 0x76, 0x4a, + 0xe4, 0xd8, 0x9c, 0xa0, 0x14, 0x28, 0x6c, 0x50, 0x19, 0x25, + 0x61, 0x5d, 0xe9, 0xd5, 0x91, 0xad, 0x00, 0x3d, 0x7a, 0x47, + 0xf4, 0xc9, 0x8e, 0xb3, 0xf5, 0xc8, 0x8f, 0xb2, 0x01, 0x3c, + 0x7b, 0x46, 0xf7, 0xca, 0x8d, 0xb0, 0x03, 0x3e, 0x79, 0x44, + 0x02, 0x3f, 0x78, 0x45, 0xf6, 0xcb, 0x8c, 0xb1, 0xf3, 0xce, + 0x89, 0xb4, 0x07, 0x3a, 0x7d, 0x40, 0x06, 0x3b, 0x7c, 0x41, + 0xf2, 0xcf, 0x88, 0xb5, 0x04, 0x39, 0x7e, 0x43, 0xf0, 0xcd, + 0x8a, 0xb7, 0xf1, 0xcc, 0x8b, 0xb6, 0x05, 0x38, 0x7f, 0x42, + 0xfb, 0xc6, 0x81, 0xbc, 0x0f, 0x32, 0x75, 0x48, 0x0e, 0x33, + 0x74, 0x49, 0xfa, 0xc7, 0x80, 0xbd, 0x0c, 0x31, 0x76, 0x4b, + 0xf8, 0xc5, 0x82, 0xbf, 0xf9, 0xc4, 0x83, 0xbe, 0x0d, 0x30, + 0x77, 0x4a, 0x08, 0x35, 0x72, 0x4f, 0xfc, 0xc1, 0x86, 0xbb, + 0xfd, 0xc0, 0x87, 0xba, 0x09, 0x34, 0x73, 0x4e, 0xff, 0xc2, + 0x85, 0xb8, 0x0b, 0x36, 0x71, 0x4c, 0x0a, 0x37, 0x70, 0x4d, + 0xfe, 0xc3, 0x84, 0xb9, 0xeb, 0xd6, 0x91, 0xac, 0x1f, 0x22, + 0x65, 0x58, 0x1e, 0x23, 0x64, 0x59, 0xea, 0xd7, 0x90, 0xad, + 0x1c, 0x21, 0x66, 0x5b, 0xe8, 0xd5, 0x92, 0xaf, 0xe9, 0xd4, + 0x93, 0xae, 0x1d, 0x20, 0x67, 0x5a, 0x18, 0x25, 0x62, 0x5f, + 0xec, 0xd1, 0x96, 0xab, 0xed, 0xd0, 0x97, 0xaa, 0x19, 0x24, + 0x63, 0x5e, 0xef, 0xd2, 0x95, 0xa8, 0x1b, 0x26, 0x61, 0x5c, + 0x1a, 0x27, 0x60, 0x5d, 0xee, 0xd3, 0x94, 0xa9, 0x10, 0x2d, + 0x6a, 0x57, 0xe4, 0xd9, 0x9e, 0xa3, 0xe5, 0xd8, 0x9f, 0xa2, + 0x11, 0x2c, 0x6b, 0x56, 0xe7, 0xda, 0x9d, 0xa0, 0x13, 0x2e, + 0x69, 0x54, 0x12, 0x2f, 0x68, 0x55, 0xe6, 0xdb, 0x9c, 0xa1, + 0xe3, 0xde, 0x99, 0xa4, 0x17, 0x2a, 0x6d, 0x50, 0x16, 0x2b, + 0x6c, 0x51, 0xe2, 0xdf, 0x98, 0xa5, 0x14, 0x29, 0x6e, 0x53, + 0xe0, 0xdd, 0x9a, 0xa7, 0xe1, 0xdc, 0x9b, 0xa6, 0x15, 0x28, + 0x6f, 0x52, 0x00, 0x3e, 0x7c, 0x42, 0xf8, 0xc6, 0x84, 0xba, + 0xed, 0xd3, 0x91, 0xaf, 0x15, 0x2b, 0x69, 0x57, 0xc7, 0xf9, + 0xbb, 0x85, 0x3f, 0x01, 0x43, 0x7d, 0x2a, 0x14, 0x56, 0x68, + 0xd2, 0xec, 0xae, 0x90, 0x93, 0xad, 0xef, 0xd1, 0x6b, 0x55, + 0x17, 0x29, 0x7e, 0x40, 0x02, 0x3c, 0x86, 0xb8, 0xfa, 0xc4, + 0x54, 0x6a, 0x28, 0x16, 0xac, 0x92, 0xd0, 0xee, 0xb9, 0x87, + 0xc5, 0xfb, 0x41, 0x7f, 0x3d, 0x03, 0x3b, 0x05, 0x47, 0x79, + 0xc3, 0xfd, 0xbf, 0x81, 0xd6, 0xe8, 0xaa, 0x94, 0x2e, 0x10, + 0x52, 0x6c, 0xfc, 0xc2, 0x80, 0xbe, 0x04, 0x3a, 0x78, 0x46, + 0x11, 0x2f, 0x6d, 0x53, 0xe9, 0xd7, 0x95, 0xab, 0xa8, 0x96, + 0xd4, 0xea, 0x50, 0x6e, 0x2c, 0x12, 0x45, 0x7b, 0x39, 0x07, + 0xbd, 0x83, 0xc1, 0xff, 0x6f, 0x51, 0x13, 0x2d, 0x97, 0xa9, + 0xeb, 0xd5, 0x82, 0xbc, 0xfe, 0xc0, 0x7a, 0x44, 0x06, 0x38, + 0x76, 0x48, 0x0a, 0x34, 0x8e, 0xb0, 0xf2, 0xcc, 0x9b, 0xa5, + 0xe7, 0xd9, 0x63, 0x5d, 0x1f, 0x21, 0xb1, 0x8f, 0xcd, 0xf3, + 0x49, 0x77, 0x35, 0x0b, 0x5c, 0x62, 0x20, 0x1e, 0xa4, 0x9a, + 0xd8, 0xe6, 0xe5, 0xdb, 0x99, 0xa7, 0x1d, 0x23, 0x61, 0x5f, + 0x08, 0x36, 0x74, 0x4a, 0xf0, 0xce, 0x8c, 0xb2, 0x22, 0x1c, + 0x5e, 0x60, 0xda, 0xe4, 0xa6, 0x98, 0xcf, 0xf1, 0xb3, 0x8d, + 0x37, 0x09, 0x4b, 0x75, 0x4d, 0x73, 0x31, 0x0f, 0xb5, 0x8b, + 0xc9, 0xf7, 0xa0, 0x9e, 0xdc, 0xe2, 0x58, 0x66, 0x24, 0x1a, + 0x8a, 0xb4, 0xf6, 0xc8, 0x72, 0x4c, 0x0e, 0x30, 0x67, 0x59, + 0x1b, 0x25, 0x9f, 0xa1, 0xe3, 0xdd, 0xde, 0xe0, 0xa2, 0x9c, + 0x26, 0x18, 0x5a, 0x64, 0x33, 0x0d, 0x4f, 0x71, 0xcb, 0xf5, + 0xb7, 0x89, 0x19, 0x27, 0x65, 0x5b, 0xe1, 0xdf, 0x9d, 0xa3, + 0xf4, 0xca, 0x88, 0xb6, 0x0c, 0x32, 0x70, 0x4e, 0x00, 0x3f, + 0x7e, 0x41, 0xfc, 0xc3, 0x82, 0xbd, 0xe5, 0xda, 0x9b, 0xa4, + 0x19, 0x26, 0x67, 0x58, 0xd7, 0xe8, 0xa9, 0x96, 0x2b, 0x14, + 0x55, 0x6a, 0x32, 0x0d, 0x4c, 0x73, 0xce, 0xf1, 0xb0, 0x8f, + 0xb3, 0x8c, 0xcd, 0xf2, 0x4f, 0x70, 0x31, 0x0e, 0x56, 0x69, + 0x28, 0x17, 0xaa, 0x95, 0xd4, 0xeb, 0x64, 0x5b, 0x1a, 0x25, + 0x98, 0xa7, 0xe6, 0xd9, 0x81, 0xbe, 0xff, 0xc0, 0x7d, 0x42, + 0x03, 0x3c, 0x7b, 0x44, 0x05, 0x3a, 0x87, 0xb8, 0xf9, 0xc6, + 0x9e, 0xa1, 0xe0, 0xdf, 0x62, 0x5d, 0x1c, 0x23, 0xac, 0x93, + 0xd2, 0xed, 0x50, 0x6f, 0x2e, 0x11, 0x49, 0x76, 0x37, 0x08, + 0xb5, 0x8a, 0xcb, 0xf4, 0xc8, 0xf7, 0xb6, 0x89, 0x34, 0x0b, + 0x4a, 0x75, 0x2d, 0x12, 0x53, 0x6c, 0xd1, 0xee, 0xaf, 0x90, + 0x1f, 0x20, 0x61, 0x5e, 0xe3, 0xdc, 0x9d, 0xa2, 0xfa, 0xc5, + 0x84, 0xbb, 0x06, 0x39, 0x78, 0x47, 0xf6, 0xc9, 0x88, 0xb7, + 0x0a, 0x35, 0x74, 0x4b, 0x13, 0x2c, 0x6d, 0x52, 0xef, 0xd0, + 0x91, 0xae, 0x21, 0x1e, 0x5f, 0x60, 0xdd, 0xe2, 0xa3, 0x9c, + 0xc4, 0xfb, 0xba, 0x85, 0x38, 0x07, 0x46, 0x79, 0x45, 0x7a, + 0x3b, 0x04, 0xb9, 0x86, 0xc7, 0xf8, 0xa0, 0x9f, 0xde, 0xe1, + 0x5c, 0x63, 0x22, 0x1d, 0x92, 0xad, 0xec, 0xd3, 0x6e, 0x51, + 0x10, 0x2f, 0x77, 0x48, 0x09, 0x36, 0x8b, 0xb4, 0xf5, 0xca, + 0x8d, 0xb2, 0xf3, 0xcc, 0x71, 0x4e, 0x0f, 0x30, 0x68, 0x57, + 0x16, 0x29, 0x94, 0xab, 0xea, 0xd5, 0x5a, 0x65, 0x24, 0x1b, + 0xa6, 0x99, 0xd8, 0xe7, 0xbf, 0x80, 0xc1, 0xfe, 0x43, 0x7c, + 0x3d, 0x02, 0x3e, 0x01, 0x40, 0x7f, 0xc2, 0xfd, 0xbc, 0x83, + 0xdb, 0xe4, 0xa5, 0x9a, 0x27, 0x18, 0x59, 0x66, 0xe9, 0xd6, + 0x97, 0xa8, 0x15, 0x2a, 0x6b, 0x54, 0x0c, 0x33, 0x72, 0x4d, + 0xf0, 0xcf, 0x8e, 0xb1, 0x00, 0x40, 0x80, 0xc0, 0x1d, 0x5d, + 0x9d, 0xdd, 0x3a, 0x7a, 0xba, 0xfa, 0x27, 0x67, 0xa7, 0xe7, + 0x74, 0x34, 0xf4, 0xb4, 0x69, 0x29, 0xe9, 0xa9, 0x4e, 0x0e, + 0xce, 0x8e, 0x53, 0x13, 0xd3, 0x93, 0xe8, 0xa8, 0x68, 0x28, + 0xf5, 0xb5, 0x75, 0x35, 0xd2, 0x92, 0x52, 0x12, 0xcf, 0x8f, + 0x4f, 0x0f, 0x9c, 0xdc, 0x1c, 0x5c, 0x81, 0xc1, 0x01, 0x41, + 0xa6, 0xe6, 0x26, 0x66, 0xbb, 0xfb, 0x3b, 0x7b, 0xcd, 0x8d, + 0x4d, 0x0d, 0xd0, 0x90, 0x50, 0x10, 0xf7, 0xb7, 0x77, 0x37, + 0xea, 0xaa, 0x6a, 0x2a, 0xb9, 0xf9, 0x39, 0x79, 0xa4, 0xe4, + 0x24, 0x64, 0x83, 0xc3, 0x03, 0x43, 0x9e, 0xde, 0x1e, 0x5e, + 0x25, 0x65, 0xa5, 0xe5, 0x38, 0x78, 0xb8, 0xf8, 0x1f, 0x5f, + 0x9f, 0xdf, 0x02, 0x42, 0x82, 0xc2, 0x51, 0x11, 0xd1, 0x91, + 0x4c, 0x0c, 0xcc, 0x8c, 0x6b, 0x2b, 0xeb, 0xab, 0x76, 0x36, + 0xf6, 0xb6, 0x87, 0xc7, 0x07, 0x47, 0x9a, 0xda, 0x1a, 0x5a, + 0xbd, 0xfd, 0x3d, 0x7d, 0xa0, 0xe0, 0x20, 0x60, 0xf3, 0xb3, + 0x73, 0x33, 0xee, 0xae, 0x6e, 0x2e, 0xc9, 0x89, 0x49, 0x09, + 0xd4, 0x94, 0x54, 0x14, 0x6f, 0x2f, 0xef, 0xaf, 0x72, 0x32, + 0xf2, 0xb2, 0x55, 0x15, 0xd5, 0x95, 0x48, 0x08, 0xc8, 0x88, + 0x1b, 0x5b, 0x9b, 0xdb, 0x06, 0x46, 0x86, 0xc6, 0x21, 0x61, + 0xa1, 0xe1, 0x3c, 0x7c, 0xbc, 0xfc, 0x4a, 0x0a, 0xca, 0x8a, + 0x57, 0x17, 0xd7, 0x97, 0x70, 0x30, 0xf0, 0xb0, 0x6d, 0x2d, + 0xed, 0xad, 0x3e, 0x7e, 0xbe, 0xfe, 0x23, 0x63, 0xa3, 0xe3, + 0x04, 0x44, 0x84, 0xc4, 0x19, 0x59, 0x99, 0xd9, 0xa2, 0xe2, + 0x22, 0x62, 0xbf, 0xff, 0x3f, 0x7f, 0x98, 0xd8, 0x18, 0x58, + 0x85, 0xc5, 0x05, 0x45, 0xd6, 0x96, 0x56, 0x16, 0xcb, 0x8b, + 0x4b, 0x0b, 0xec, 0xac, 0x6c, 0x2c, 0xf1, 0xb1, 0x71, 0x31, + 0x00, 0x41, 0x82, 0xc3, 0x19, 0x58, 0x9b, 0xda, 0x32, 0x73, + 0xb0, 0xf1, 0x2b, 0x6a, 0xa9, 0xe8, 0x64, 0x25, 0xe6, 0xa7, + 0x7d, 0x3c, 0xff, 0xbe, 0x56, 0x17, 0xd4, 0x95, 0x4f, 0x0e, + 0xcd, 0x8c, 0xc8, 0x89, 0x4a, 0x0b, 0xd1, 0x90, 0x53, 0x12, + 0xfa, 0xbb, 0x78, 0x39, 0xe3, 0xa2, 0x61, 0x20, 0xac, 0xed, + 0x2e, 0x6f, 0xb5, 0xf4, 0x37, 0x76, 0x9e, 0xdf, 0x1c, 0x5d, + 0x87, 0xc6, 0x05, 0x44, 0x8d, 0xcc, 0x0f, 0x4e, 0x94, 0xd5, + 0x16, 0x57, 0xbf, 0xfe, 0x3d, 0x7c, 0xa6, 0xe7, 0x24, 0x65, + 0xe9, 0xa8, 0x6b, 0x2a, 0xf0, 0xb1, 0x72, 0x33, 0xdb, 0x9a, + 0x59, 0x18, 0xc2, 0x83, 0x40, 0x01, 0x45, 0x04, 0xc7, 0x86, + 0x5c, 0x1d, 0xde, 0x9f, 0x77, 0x36, 0xf5, 0xb4, 0x6e, 0x2f, + 0xec, 0xad, 0x21, 0x60, 0xa3, 0xe2, 0x38, 0x79, 0xba, 0xfb, + 0x13, 0x52, 0x91, 0xd0, 0x0a, 0x4b, 0x88, 0xc9, 0x07, 0x46, + 0x85, 0xc4, 0x1e, 0x5f, 0x9c, 0xdd, 0x35, 0x74, 0xb7, 0xf6, + 0x2c, 0x6d, 0xae, 0xef, 0x63, 0x22, 0xe1, 0xa0, 0x7a, 0x3b, + 0xf8, 0xb9, 0x51, 0x10, 0xd3, 0x92, 0x48, 0x09, 0xca, 0x8b, + 0xcf, 0x8e, 0x4d, 0x0c, 0xd6, 0x97, 0x54, 0x15, 0xfd, 0xbc, + 0x7f, 0x3e, 0xe4, 0xa5, 0x66, 0x27, 0xab, 0xea, 0x29, 0x68, + 0xb2, 0xf3, 0x30, 0x71, 0x99, 0xd8, 0x1b, 0x5a, 0x80, 0xc1, + 0x02, 0x43, 0x8a, 0xcb, 0x08, 0x49, 0x93, 0xd2, 0x11, 0x50, + 0xb8, 0xf9, 0x3a, 0x7b, 0xa1, 0xe0, 0x23, 0x62, 0xee, 0xaf, + 0x6c, 0x2d, 0xf7, 0xb6, 0x75, 0x34, 0xdc, 0x9d, 0x5e, 0x1f, + 0xc5, 0x84, 0x47, 0x06, 0x42, 0x03, 0xc0, 0x81, 0x5b, 0x1a, + 0xd9, 0x98, 0x70, 0x31, 0xf2, 0xb3, 0x69, 0x28, 0xeb, 0xaa, + 0x26, 0x67, 0xa4, 0xe5, 0x3f, 0x7e, 0xbd, 0xfc, 0x14, 0x55, + 0x96, 0xd7, 0x0d, 0x4c, 0x8f, 0xce, 0x00, 0x42, 0x84, 0xc6, + 0x15, 0x57, 0x91, 0xd3, 0x2a, 0x68, 0xae, 0xec, 0x3f, 0x7d, + 0xbb, 0xf9, 0x54, 0x16, 0xd0, 0x92, 0x41, 0x03, 0xc5, 0x87, + 0x7e, 0x3c, 0xfa, 0xb8, 0x6b, 0x29, 0xef, 0xad, 0xa8, 0xea, + 0x2c, 0x6e, 0xbd, 0xff, 0x39, 0x7b, 0x82, 0xc0, 0x06, 0x44, + 0x97, 0xd5, 0x13, 0x51, 0xfc, 0xbe, 0x78, 0x3a, 0xe9, 0xab, + 0x6d, 0x2f, 0xd6, 0x94, 0x52, 0x10, 0xc3, 0x81, 0x47, 0x05, + 0x4d, 0x0f, 0xc9, 0x8b, 0x58, 0x1a, 0xdc, 0x9e, 0x67, 0x25, + 0xe3, 0xa1, 0x72, 0x30, 0xf6, 0xb4, 0x19, 0x5b, 0x9d, 0xdf, + 0x0c, 0x4e, 0x88, 0xca, 0x33, 0x71, 0xb7, 0xf5, 0x26, 0x64, + 0xa2, 0xe0, 0xe5, 0xa7, 0x61, 0x23, 0xf0, 0xb2, 0x74, 0x36, + 0xcf, 0x8d, 0x4b, 0x09, 0xda, 0x98, 0x5e, 0x1c, 0xb1, 0xf3, + 0x35, 0x77, 0xa4, 0xe6, 0x20, 0x62, 0x9b, 0xd9, 0x1f, 0x5d, + 0x8e, 0xcc, 0x0a, 0x48, 0x9a, 0xd8, 0x1e, 0x5c, 0x8f, 0xcd, + 0x0b, 0x49, 0xb0, 0xf2, 0x34, 0x76, 0xa5, 0xe7, 0x21, 0x63, + 0xce, 0x8c, 0x4a, 0x08, 0xdb, 0x99, 0x5f, 0x1d, 0xe4, 0xa6, + 0x60, 0x22, 0xf1, 0xb3, 0x75, 0x37, 0x32, 0x70, 0xb6, 0xf4, + 0x27, 0x65, 0xa3, 0xe1, 0x18, 0x5a, 0x9c, 0xde, 0x0d, 0x4f, + 0x89, 0xcb, 0x66, 0x24, 0xe2, 0xa0, 0x73, 0x31, 0xf7, 0xb5, + 0x4c, 0x0e, 0xc8, 0x8a, 0x59, 0x1b, 0xdd, 0x9f, 0xd7, 0x95, + 0x53, 0x11, 0xc2, 0x80, 0x46, 0x04, 0xfd, 0xbf, 0x79, 0x3b, + 0xe8, 0xaa, 0x6c, 0x2e, 0x83, 0xc1, 0x07, 0x45, 0x96, 0xd4, + 0x12, 0x50, 0xa9, 0xeb, 0x2d, 0x6f, 0xbc, 0xfe, 0x38, 0x7a, + 0x7f, 0x3d, 0xfb, 0xb9, 0x6a, 0x28, 0xee, 0xac, 0x55, 0x17, + 0xd1, 0x93, 0x40, 0x02, 0xc4, 0x86, 0x2b, 0x69, 0xaf, 0xed, + 0x3e, 0x7c, 0xba, 0xf8, 0x01, 0x43, 0x85, 0xc7, 0x14, 0x56, + 0x90, 0xd2, 0x00, 0x43, 0x86, 0xc5, 0x11, 0x52, 0x97, 0xd4, + 0x22, 0x61, 0xa4, 0xe7, 0x33, 0x70, 0xb5, 0xf6, 0x44, 0x07, + 0xc2, 0x81, 0x55, 0x16, 0xd3, 0x90, 0x66, 0x25, 0xe0, 0xa3, + 0x77, 0x34, 0xf1, 0xb2, 0x88, 0xcb, 0x0e, 0x4d, 0x99, 0xda, + 0x1f, 0x5c, 0xaa, 0xe9, 0x2c, 0x6f, 0xbb, 0xf8, 0x3d, 0x7e, + 0xcc, 0x8f, 0x4a, 0x09, 0xdd, 0x9e, 0x5b, 0x18, 0xee, 0xad, + 0x68, 0x2b, 0xff, 0xbc, 0x79, 0x3a, 0x0d, 0x4e, 0x8b, 0xc8, + 0x1c, 0x5f, 0x9a, 0xd9, 0x2f, 0x6c, 0xa9, 0xea, 0x3e, 0x7d, + 0xb8, 0xfb, 0x49, 0x0a, 0xcf, 0x8c, 0x58, 0x1b, 0xde, 0x9d, + 0x6b, 0x28, 0xed, 0xae, 0x7a, 0x39, 0xfc, 0xbf, 0x85, 0xc6, + 0x03, 0x40, 0x94, 0xd7, 0x12, 0x51, 0xa7, 0xe4, 0x21, 0x62, + 0xb6, 0xf5, 0x30, 0x73, 0xc1, 0x82, 0x47, 0x04, 0xd0, 0x93, + 0x56, 0x15, 0xe3, 0xa0, 0x65, 0x26, 0xf2, 0xb1, 0x74, 0x37, + 0x1a, 0x59, 0x9c, 0xdf, 0x0b, 0x48, 0x8d, 0xce, 0x38, 0x7b, + 0xbe, 0xfd, 0x29, 0x6a, 0xaf, 0xec, 0x5e, 0x1d, 0xd8, 0x9b, + 0x4f, 0x0c, 0xc9, 0x8a, 0x7c, 0x3f, 0xfa, 0xb9, 0x6d, 0x2e, + 0xeb, 0xa8, 0x92, 0xd1, 0x14, 0x57, 0x83, 0xc0, 0x05, 0x46, + 0xb0, 0xf3, 0x36, 0x75, 0xa1, 0xe2, 0x27, 0x64, 0xd6, 0x95, + 0x50, 0x13, 0xc7, 0x84, 0x41, 0x02, 0xf4, 0xb7, 0x72, 0x31, + 0xe5, 0xa6, 0x63, 0x20, 0x17, 0x54, 0x91, 0xd2, 0x06, 0x45, + 0x80, 0xc3, 0x35, 0x76, 0xb3, 0xf0, 0x24, 0x67, 0xa2, 0xe1, + 0x53, 0x10, 0xd5, 0x96, 0x42, 0x01, 0xc4, 0x87, 0x71, 0x32, + 0xf7, 0xb4, 0x60, 0x23, 0xe6, 0xa5, 0x9f, 0xdc, 0x19, 0x5a, + 0x8e, 0xcd, 0x08, 0x4b, 0xbd, 0xfe, 0x3b, 0x78, 0xac, 0xef, + 0x2a, 0x69, 0xdb, 0x98, 0x5d, 0x1e, 0xca, 0x89, 0x4c, 0x0f, + 0xf9, 0xba, 0x7f, 0x3c, 0xe8, 0xab, 0x6e, 0x2d, 0x00, 0x44, + 0x88, 0xcc, 0x0d, 0x49, 0x85, 0xc1, 0x1a, 0x5e, 0x92, 0xd6, + 0x17, 0x53, 0x9f, 0xdb, 0x34, 0x70, 0xbc, 0xf8, 0x39, 0x7d, + 0xb1, 0xf5, 0x2e, 0x6a, 0xa6, 0xe2, 0x23, 0x67, 0xab, 0xef, + 0x68, 0x2c, 0xe0, 0xa4, 0x65, 0x21, 0xed, 0xa9, 0x72, 0x36, + 0xfa, 0xbe, 0x7f, 0x3b, 0xf7, 0xb3, 0x5c, 0x18, 0xd4, 0x90, + 0x51, 0x15, 0xd9, 0x9d, 0x46, 0x02, 0xce, 0x8a, 0x4b, 0x0f, + 0xc3, 0x87, 0xd0, 0x94, 0x58, 0x1c, 0xdd, 0x99, 0x55, 0x11, + 0xca, 0x8e, 0x42, 0x06, 0xc7, 0x83, 0x4f, 0x0b, 0xe4, 0xa0, + 0x6c, 0x28, 0xe9, 0xad, 0x61, 0x25, 0xfe, 0xba, 0x76, 0x32, + 0xf3, 0xb7, 0x7b, 0x3f, 0xb8, 0xfc, 0x30, 0x74, 0xb5, 0xf1, + 0x3d, 0x79, 0xa2, 0xe6, 0x2a, 0x6e, 0xaf, 0xeb, 0x27, 0x63, + 0x8c, 0xc8, 0x04, 0x40, 0x81, 0xc5, 0x09, 0x4d, 0x96, 0xd2, + 0x1e, 0x5a, 0x9b, 0xdf, 0x13, 0x57, 0xbd, 0xf9, 0x35, 0x71, + 0xb0, 0xf4, 0x38, 0x7c, 0xa7, 0xe3, 0x2f, 0x6b, 0xaa, 0xee, + 0x22, 0x66, 0x89, 0xcd, 0x01, 0x45, 0x84, 0xc0, 0x0c, 0x48, + 0x93, 0xd7, 0x1b, 0x5f, 0x9e, 0xda, 0x16, 0x52, 0xd5, 0x91, + 0x5d, 0x19, 0xd8, 0x9c, 0x50, 0x14, 0xcf, 0x8b, 0x47, 0x03, + 0xc2, 0x86, 0x4a, 0x0e, 0xe1, 0xa5, 0x69, 0x2d, 0xec, 0xa8, + 0x64, 0x20, 0xfb, 0xbf, 0x73, 0x37, 0xf6, 0xb2, 0x7e, 0x3a, + 0x6d, 0x29, 0xe5, 0xa1, 0x60, 0x24, 0xe8, 0xac, 0x77, 0x33, + 0xff, 0xbb, 0x7a, 0x3e, 0xf2, 0xb6, 0x59, 0x1d, 0xd1, 0x95, + 0x54, 0x10, 0xdc, 0x98, 0x43, 0x07, 0xcb, 0x8f, 0x4e, 0x0a, + 0xc6, 0x82, 0x05, 0x41, 0x8d, 0xc9, 0x08, 0x4c, 0x80, 0xc4, + 0x1f, 0x5b, 0x97, 0xd3, 0x12, 0x56, 0x9a, 0xde, 0x31, 0x75, + 0xb9, 0xfd, 0x3c, 0x78, 0xb4, 0xf0, 0x2b, 0x6f, 0xa3, 0xe7, + 0x26, 0x62, 0xae, 0xea, 0x00, 0x45, 0x8a, 0xcf, 0x09, 0x4c, + 0x83, 0xc6, 0x12, 0x57, 0x98, 0xdd, 0x1b, 0x5e, 0x91, 0xd4, + 0x24, 0x61, 0xae, 0xeb, 0x2d, 0x68, 0xa7, 0xe2, 0x36, 0x73, + 0xbc, 0xf9, 0x3f, 0x7a, 0xb5, 0xf0, 0x48, 0x0d, 0xc2, 0x87, + 0x41, 0x04, 0xcb, 0x8e, 0x5a, 0x1f, 0xd0, 0x95, 0x53, 0x16, + 0xd9, 0x9c, 0x6c, 0x29, 0xe6, 0xa3, 0x65, 0x20, 0xef, 0xaa, + 0x7e, 0x3b, 0xf4, 0xb1, 0x77, 0x32, 0xfd, 0xb8, 0x90, 0xd5, + 0x1a, 0x5f, 0x99, 0xdc, 0x13, 0x56, 0x82, 0xc7, 0x08, 0x4d, + 0x8b, 0xce, 0x01, 0x44, 0xb4, 0xf1, 0x3e, 0x7b, 0xbd, 0xf8, + 0x37, 0x72, 0xa6, 0xe3, 0x2c, 0x69, 0xaf, 0xea, 0x25, 0x60, + 0xd8, 0x9d, 0x52, 0x17, 0xd1, 0x94, 0x5b, 0x1e, 0xca, 0x8f, + 0x40, 0x05, 0xc3, 0x86, 0x49, 0x0c, 0xfc, 0xb9, 0x76, 0x33, + 0xf5, 0xb0, 0x7f, 0x3a, 0xee, 0xab, 0x64, 0x21, 0xe7, 0xa2, + 0x6d, 0x28, 0x3d, 0x78, 0xb7, 0xf2, 0x34, 0x71, 0xbe, 0xfb, + 0x2f, 0x6a, 0xa5, 0xe0, 0x26, 0x63, 0xac, 0xe9, 0x19, 0x5c, + 0x93, 0xd6, 0x10, 0x55, 0x9a, 0xdf, 0x0b, 0x4e, 0x81, 0xc4, + 0x02, 0x47, 0x88, 0xcd, 0x75, 0x30, 0xff, 0xba, 0x7c, 0x39, + 0xf6, 0xb3, 0x67, 0x22, 0xed, 0xa8, 0x6e, 0x2b, 0xe4, 0xa1, + 0x51, 0x14, 0xdb, 0x9e, 0x58, 0x1d, 0xd2, 0x97, 0x43, 0x06, + 0xc9, 0x8c, 0x4a, 0x0f, 0xc0, 0x85, 0xad, 0xe8, 0x27, 0x62, + 0xa4, 0xe1, 0x2e, 0x6b, 0xbf, 0xfa, 0x35, 0x70, 0xb6, 0xf3, + 0x3c, 0x79, 0x89, 0xcc, 0x03, 0x46, 0x80, 0xc5, 0x0a, 0x4f, + 0x9b, 0xde, 0x11, 0x54, 0x92, 0xd7, 0x18, 0x5d, 0xe5, 0xa0, + 0x6f, 0x2a, 0xec, 0xa9, 0x66, 0x23, 0xf7, 0xb2, 0x7d, 0x38, + 0xfe, 0xbb, 0x74, 0x31, 0xc1, 0x84, 0x4b, 0x0e, 0xc8, 0x8d, + 0x42, 0x07, 0xd3, 0x96, 0x59, 0x1c, 0xda, 0x9f, 0x50, 0x15, + 0x00, 0x46, 0x8c, 0xca, 0x05, 0x43, 0x89, 0xcf, 0x0a, 0x4c, + 0x86, 0xc0, 0x0f, 0x49, 0x83, 0xc5, 0x14, 0x52, 0x98, 0xde, + 0x11, 0x57, 0x9d, 0xdb, 0x1e, 0x58, 0x92, 0xd4, 0x1b, 0x5d, + 0x97, 0xd1, 0x28, 0x6e, 0xa4, 0xe2, 0x2d, 0x6b, 0xa1, 0xe7, + 0x22, 0x64, 0xae, 0xe8, 0x27, 0x61, 0xab, 0xed, 0x3c, 0x7a, + 0xb0, 0xf6, 0x39, 0x7f, 0xb5, 0xf3, 0x36, 0x70, 0xba, 0xfc, + 0x33, 0x75, 0xbf, 0xf9, 0x50, 0x16, 0xdc, 0x9a, 0x55, 0x13, + 0xd9, 0x9f, 0x5a, 0x1c, 0xd6, 0x90, 0x5f, 0x19, 0xd3, 0x95, + 0x44, 0x02, 0xc8, 0x8e, 0x41, 0x07, 0xcd, 0x8b, 0x4e, 0x08, + 0xc2, 0x84, 0x4b, 0x0d, 0xc7, 0x81, 0x78, 0x3e, 0xf4, 0xb2, + 0x7d, 0x3b, 0xf1, 0xb7, 0x72, 0x34, 0xfe, 0xb8, 0x77, 0x31, + 0xfb, 0xbd, 0x6c, 0x2a, 0xe0, 0xa6, 0x69, 0x2f, 0xe5, 0xa3, + 0x66, 0x20, 0xea, 0xac, 0x63, 0x25, 0xef, 0xa9, 0xa0, 0xe6, + 0x2c, 0x6a, 0xa5, 0xe3, 0x29, 0x6f, 0xaa, 0xec, 0x26, 0x60, + 0xaf, 0xe9, 0x23, 0x65, 0xb4, 0xf2, 0x38, 0x7e, 0xb1, 0xf7, + 0x3d, 0x7b, 0xbe, 0xf8, 0x32, 0x74, 0xbb, 0xfd, 0x37, 0x71, + 0x88, 0xce, 0x04, 0x42, 0x8d, 0xcb, 0x01, 0x47, 0x82, 0xc4, + 0x0e, 0x48, 0x87, 0xc1, 0x0b, 0x4d, 0x9c, 0xda, 0x10, 0x56, + 0x99, 0xdf, 0x15, 0x53, 0x96, 0xd0, 0x1a, 0x5c, 0x93, 0xd5, + 0x1f, 0x59, 0xf0, 0xb6, 0x7c, 0x3a, 0xf5, 0xb3, 0x79, 0x3f, + 0xfa, 0xbc, 0x76, 0x30, 0xff, 0xb9, 0x73, 0x35, 0xe4, 0xa2, + 0x68, 0x2e, 0xe1, 0xa7, 0x6d, 0x2b, 0xee, 0xa8, 0x62, 0x24, + 0xeb, 0xad, 0x67, 0x21, 0xd8, 0x9e, 0x54, 0x12, 0xdd, 0x9b, + 0x51, 0x17, 0xd2, 0x94, 0x5e, 0x18, 0xd7, 0x91, 0x5b, 0x1d, + 0xcc, 0x8a, 0x40, 0x06, 0xc9, 0x8f, 0x45, 0x03, 0xc6, 0x80, + 0x4a, 0x0c, 0xc3, 0x85, 0x4f, 0x09, 0x00, 0x47, 0x8e, 0xc9, + 0x01, 0x46, 0x8f, 0xc8, 0x02, 0x45, 0x8c, 0xcb, 0x03, 0x44, + 0x8d, 0xca, 0x04, 0x43, 0x8a, 0xcd, 0x05, 0x42, 0x8b, 0xcc, + 0x06, 0x41, 0x88, 0xcf, 0x07, 0x40, 0x89, 0xce, 0x08, 0x4f, + 0x86, 0xc1, 0x09, 0x4e, 0x87, 0xc0, 0x0a, 0x4d, 0x84, 0xc3, + 0x0b, 0x4c, 0x85, 0xc2, 0x0c, 0x4b, 0x82, 0xc5, 0x0d, 0x4a, + 0x83, 0xc4, 0x0e, 0x49, 0x80, 0xc7, 0x0f, 0x48, 0x81, 0xc6, + 0x10, 0x57, 0x9e, 0xd9, 0x11, 0x56, 0x9f, 0xd8, 0x12, 0x55, + 0x9c, 0xdb, 0x13, 0x54, 0x9d, 0xda, 0x14, 0x53, 0x9a, 0xdd, + 0x15, 0x52, 0x9b, 0xdc, 0x16, 0x51, 0x98, 0xdf, 0x17, 0x50, + 0x99, 0xde, 0x18, 0x5f, 0x96, 0xd1, 0x19, 0x5e, 0x97, 0xd0, + 0x1a, 0x5d, 0x94, 0xd3, 0x1b, 0x5c, 0x95, 0xd2, 0x1c, 0x5b, + 0x92, 0xd5, 0x1d, 0x5a, 0x93, 0xd4, 0x1e, 0x59, 0x90, 0xd7, + 0x1f, 0x58, 0x91, 0xd6, 0x20, 0x67, 0xae, 0xe9, 0x21, 0x66, + 0xaf, 0xe8, 0x22, 0x65, 0xac, 0xeb, 0x23, 0x64, 0xad, 0xea, + 0x24, 0x63, 0xaa, 0xed, 0x25, 0x62, 0xab, 0xec, 0x26, 0x61, + 0xa8, 0xef, 0x27, 0x60, 0xa9, 0xee, 0x28, 0x6f, 0xa6, 0xe1, + 0x29, 0x6e, 0xa7, 0xe0, 0x2a, 0x6d, 0xa4, 0xe3, 0x2b, 0x6c, + 0xa5, 0xe2, 0x2c, 0x6b, 0xa2, 0xe5, 0x2d, 0x6a, 0xa3, 0xe4, + 0x2e, 0x69, 0xa0, 0xe7, 0x2f, 0x68, 0xa1, 0xe6, 0x30, 0x77, + 0xbe, 0xf9, 0x31, 0x76, 0xbf, 0xf8, 0x32, 0x75, 0xbc, 0xfb, + 0x33, 0x74, 0xbd, 0xfa, 0x34, 0x73, 0xba, 0xfd, 0x35, 0x72, + 0xbb, 0xfc, 0x36, 0x71, 0xb8, 0xff, 0x37, 0x70, 0xb9, 0xfe, + 0x38, 0x7f, 0xb6, 0xf1, 0x39, 0x7e, 0xb7, 0xf0, 0x3a, 0x7d, + 0xb4, 0xf3, 0x3b, 0x7c, 0xb5, 0xf2, 0x3c, 0x7b, 0xb2, 0xf5, + 0x3d, 0x7a, 0xb3, 0xf4, 0x3e, 0x79, 0xb0, 0xf7, 0x3f, 0x78, + 0xb1, 0xf6, 0x00, 0x48, 0x90, 0xd8, 0x3d, 0x75, 0xad, 0xe5, + 0x7a, 0x32, 0xea, 0xa2, 0x47, 0x0f, 0xd7, 0x9f, 0xf4, 0xbc, + 0x64, 0x2c, 0xc9, 0x81, 0x59, 0x11, 0x8e, 0xc6, 0x1e, 0x56, + 0xb3, 0xfb, 0x23, 0x6b, 0xf5, 0xbd, 0x65, 0x2d, 0xc8, 0x80, + 0x58, 0x10, 0x8f, 0xc7, 0x1f, 0x57, 0xb2, 0xfa, 0x22, 0x6a, + 0x01, 0x49, 0x91, 0xd9, 0x3c, 0x74, 0xac, 0xe4, 0x7b, 0x33, + 0xeb, 0xa3, 0x46, 0x0e, 0xd6, 0x9e, 0xf7, 0xbf, 0x67, 0x2f, + 0xca, 0x82, 0x5a, 0x12, 0x8d, 0xc5, 0x1d, 0x55, 0xb0, 0xf8, + 0x20, 0x68, 0x03, 0x4b, 0x93, 0xdb, 0x3e, 0x76, 0xae, 0xe6, + 0x79, 0x31, 0xe9, 0xa1, 0x44, 0x0c, 0xd4, 0x9c, 0x02, 0x4a, + 0x92, 0xda, 0x3f, 0x77, 0xaf, 0xe7, 0x78, 0x30, 0xe8, 0xa0, + 0x45, 0x0d, 0xd5, 0x9d, 0xf6, 0xbe, 0x66, 0x2e, 0xcb, 0x83, + 0x5b, 0x13, 0x8c, 0xc4, 0x1c, 0x54, 0xb1, 0xf9, 0x21, 0x69, + 0xf3, 0xbb, 0x63, 0x2b, 0xce, 0x86, 0x5e, 0x16, 0x89, 0xc1, + 0x19, 0x51, 0xb4, 0xfc, 0x24, 0x6c, 0x07, 0x4f, 0x97, 0xdf, + 0x3a, 0x72, 0xaa, 0xe2, 0x7d, 0x35, 0xed, 0xa5, 0x40, 0x08, + 0xd0, 0x98, 0x06, 0x4e, 0x96, 0xde, 0x3b, 0x73, 0xab, 0xe3, + 0x7c, 0x34, 0xec, 0xa4, 0x41, 0x09, 0xd1, 0x99, 0xf2, 0xba, + 0x62, 0x2a, 0xcf, 0x87, 0x5f, 0x17, 0x88, 0xc0, 0x18, 0x50, + 0xb5, 0xfd, 0x25, 0x6d, 0x04, 0x4c, 0x94, 0xdc, 0x39, 0x71, + 0xa9, 0xe1, 0x7e, 0x36, 0xee, 0xa6, 0x43, 0x0b, 0xd3, 0x9b, + 0xf0, 0xb8, 0x60, 0x28, 0xcd, 0x85, 0x5d, 0x15, 0x8a, 0xc2, + 0x1a, 0x52, 0xb7, 0xff, 0x27, 0x6f, 0xf1, 0xb9, 0x61, 0x29, + 0xcc, 0x84, 0x5c, 0x14, 0x8b, 0xc3, 0x1b, 0x53, 0xb6, 0xfe, + 0x26, 0x6e, 0x05, 0x4d, 0x95, 0xdd, 0x38, 0x70, 0xa8, 0xe0, + 0x7f, 0x37, 0xef, 0xa7, 0x42, 0x0a, 0xd2, 0x9a, 0x00, 0x49, + 0x92, 0xdb, 0x39, 0x70, 0xab, 0xe2, 0x72, 0x3b, 0xe0, 0xa9, + 0x4b, 0x02, 0xd9, 0x90, 0xe4, 0xad, 0x76, 0x3f, 0xdd, 0x94, + 0x4f, 0x06, 0x96, 0xdf, 0x04, 0x4d, 0xaf, 0xe6, 0x3d, 0x74, + 0xd5, 0x9c, 0x47, 0x0e, 0xec, 0xa5, 0x7e, 0x37, 0xa7, 0xee, + 0x35, 0x7c, 0x9e, 0xd7, 0x0c, 0x45, 0x31, 0x78, 0xa3, 0xea, + 0x08, 0x41, 0x9a, 0xd3, 0x43, 0x0a, 0xd1, 0x98, 0x7a, 0x33, + 0xe8, 0xa1, 0xb7, 0xfe, 0x25, 0x6c, 0x8e, 0xc7, 0x1c, 0x55, + 0xc5, 0x8c, 0x57, 0x1e, 0xfc, 0xb5, 0x6e, 0x27, 0x53, 0x1a, + 0xc1, 0x88, 0x6a, 0x23, 0xf8, 0xb1, 0x21, 0x68, 0xb3, 0xfa, + 0x18, 0x51, 0x8a, 0xc3, 0x62, 0x2b, 0xf0, 0xb9, 0x5b, 0x12, + 0xc9, 0x80, 0x10, 0x59, 0x82, 0xcb, 0x29, 0x60, 0xbb, 0xf2, + 0x86, 0xcf, 0x14, 0x5d, 0xbf, 0xf6, 0x2d, 0x64, 0xf4, 0xbd, + 0x66, 0x2f, 0xcd, 0x84, 0x5f, 0x16, 0x73, 0x3a, 0xe1, 0xa8, + 0x4a, 0x03, 0xd8, 0x91, 0x01, 0x48, 0x93, 0xda, 0x38, 0x71, + 0xaa, 0xe3, 0x97, 0xde, 0x05, 0x4c, 0xae, 0xe7, 0x3c, 0x75, + 0xe5, 0xac, 0x77, 0x3e, 0xdc, 0x95, 0x4e, 0x07, 0xa6, 0xef, + 0x34, 0x7d, 0x9f, 0xd6, 0x0d, 0x44, 0xd4, 0x9d, 0x46, 0x0f, + 0xed, 0xa4, 0x7f, 0x36, 0x42, 0x0b, 0xd0, 0x99, 0x7b, 0x32, + 0xe9, 0xa0, 0x30, 0x79, 0xa2, 0xeb, 0x09, 0x40, 0x9b, 0xd2, + 0xc4, 0x8d, 0x56, 0x1f, 0xfd, 0xb4, 0x6f, 0x26, 0xb6, 0xff, + 0x24, 0x6d, 0x8f, 0xc6, 0x1d, 0x54, 0x20, 0x69, 0xb2, 0xfb, + 0x19, 0x50, 0x8b, 0xc2, 0x52, 0x1b, 0xc0, 0x89, 0x6b, 0x22, + 0xf9, 0xb0, 0x11, 0x58, 0x83, 0xca, 0x28, 0x61, 0xba, 0xf3, + 0x63, 0x2a, 0xf1, 0xb8, 0x5a, 0x13, 0xc8, 0x81, 0xf5, 0xbc, + 0x67, 0x2e, 0xcc, 0x85, 0x5e, 0x17, 0x87, 0xce, 0x15, 0x5c, + 0xbe, 0xf7, 0x2c, 0x65, 0x00, 0x4a, 0x94, 0xde, 0x35, 0x7f, + 0xa1, 0xeb, 0x6a, 0x20, 0xfe, 0xb4, 0x5f, 0x15, 0xcb, 0x81, + 0xd4, 0x9e, 0x40, 0x0a, 0xe1, 0xab, 0x75, 0x3f, 0xbe, 0xf4, + 0x2a, 0x60, 0x8b, 0xc1, 0x1f, 0x55, 0xb5, 0xff, 0x21, 0x6b, + 0x80, 0xca, 0x14, 0x5e, 0xdf, 0x95, 0x4b, 0x01, 0xea, 0xa0, + 0x7e, 0x34, 0x61, 0x2b, 0xf5, 0xbf, 0x54, 0x1e, 0xc0, 0x8a, + 0x0b, 0x41, 0x9f, 0xd5, 0x3e, 0x74, 0xaa, 0xe0, 0x77, 0x3d, + 0xe3, 0xa9, 0x42, 0x08, 0xd6, 0x9c, 0x1d, 0x57, 0x89, 0xc3, + 0x28, 0x62, 0xbc, 0xf6, 0xa3, 0xe9, 0x37, 0x7d, 0x96, 0xdc, + 0x02, 0x48, 0xc9, 0x83, 0x5d, 0x17, 0xfc, 0xb6, 0x68, 0x22, + 0xc2, 0x88, 0x56, 0x1c, 0xf7, 0xbd, 0x63, 0x29, 0xa8, 0xe2, + 0x3c, 0x76, 0x9d, 0xd7, 0x09, 0x43, 0x16, 0x5c, 0x82, 0xc8, + 0x23, 0x69, 0xb7, 0xfd, 0x7c, 0x36, 0xe8, 0xa2, 0x49, 0x03, + 0xdd, 0x97, 0xee, 0xa4, 0x7a, 0x30, 0xdb, 0x91, 0x4f, 0x05, + 0x84, 0xce, 0x10, 0x5a, 0xb1, 0xfb, 0x25, 0x6f, 0x3a, 0x70, + 0xae, 0xe4, 0x0f, 0x45, 0x9b, 0xd1, 0x50, 0x1a, 0xc4, 0x8e, + 0x65, 0x2f, 0xf1, 0xbb, 0x5b, 0x11, 0xcf, 0x85, 0x6e, 0x24, + 0xfa, 0xb0, 0x31, 0x7b, 0xa5, 0xef, 0x04, 0x4e, 0x90, 0xda, + 0x8f, 0xc5, 0x1b, 0x51, 0xba, 0xf0, 0x2e, 0x64, 0xe5, 0xaf, + 0x71, 0x3b, 0xd0, 0x9a, 0x44, 0x0e, 0x99, 0xd3, 0x0d, 0x47, + 0xac, 0xe6, 0x38, 0x72, 0xf3, 0xb9, 0x67, 0x2d, 0xc6, 0x8c, + 0x52, 0x18, 0x4d, 0x07, 0xd9, 0x93, 0x78, 0x32, 0xec, 0xa6, + 0x27, 0x6d, 0xb3, 0xf9, 0x12, 0x58, 0x86, 0xcc, 0x2c, 0x66, + 0xb8, 0xf2, 0x19, 0x53, 0x8d, 0xc7, 0x46, 0x0c, 0xd2, 0x98, + 0x73, 0x39, 0xe7, 0xad, 0xf8, 0xb2, 0x6c, 0x26, 0xcd, 0x87, + 0x59, 0x13, 0x92, 0xd8, 0x06, 0x4c, 0xa7, 0xed, 0x33, 0x79, + 0x00, 0x4b, 0x96, 0xdd, 0x31, 0x7a, 0xa7, 0xec, 0x62, 0x29, + 0xf4, 0xbf, 0x53, 0x18, 0xc5, 0x8e, 0xc4, 0x8f, 0x52, 0x19, + 0xf5, 0xbe, 0x63, 0x28, 0xa6, 0xed, 0x30, 0x7b, 0x97, 0xdc, + 0x01, 0x4a, 0x95, 0xde, 0x03, 0x48, 0xa4, 0xef, 0x32, 0x79, + 0xf7, 0xbc, 0x61, 0x2a, 0xc6, 0x8d, 0x50, 0x1b, 0x51, 0x1a, + 0xc7, 0x8c, 0x60, 0x2b, 0xf6, 0xbd, 0x33, 0x78, 0xa5, 0xee, + 0x02, 0x49, 0x94, 0xdf, 0x37, 0x7c, 0xa1, 0xea, 0x06, 0x4d, + 0x90, 0xdb, 0x55, 0x1e, 0xc3, 0x88, 0x64, 0x2f, 0xf2, 0xb9, + 0xf3, 0xb8, 0x65, 0x2e, 0xc2, 0x89, 0x54, 0x1f, 0x91, 0xda, + 0x07, 0x4c, 0xa0, 0xeb, 0x36, 0x7d, 0xa2, 0xe9, 0x34, 0x7f, + 0x93, 0xd8, 0x05, 0x4e, 0xc0, 0x8b, 0x56, 0x1d, 0xf1, 0xba, + 0x67, 0x2c, 0x66, 0x2d, 0xf0, 0xbb, 0x57, 0x1c, 0xc1, 0x8a, + 0x04, 0x4f, 0x92, 0xd9, 0x35, 0x7e, 0xa3, 0xe8, 0x6e, 0x25, + 0xf8, 0xb3, 0x5f, 0x14, 0xc9, 0x82, 0x0c, 0x47, 0x9a, 0xd1, + 0x3d, 0x76, 0xab, 0xe0, 0xaa, 0xe1, 0x3c, 0x77, 0x9b, 0xd0, + 0x0d, 0x46, 0xc8, 0x83, 0x5e, 0x15, 0xf9, 0xb2, 0x6f, 0x24, + 0xfb, 0xb0, 0x6d, 0x26, 0xca, 0x81, 0x5c, 0x17, 0x99, 0xd2, + 0x0f, 0x44, 0xa8, 0xe3, 0x3e, 0x75, 0x3f, 0x74, 0xa9, 0xe2, + 0x0e, 0x45, 0x98, 0xd3, 0x5d, 0x16, 0xcb, 0x80, 0x6c, 0x27, + 0xfa, 0xb1, 0x59, 0x12, 0xcf, 0x84, 0x68, 0x23, 0xfe, 0xb5, + 0x3b, 0x70, 0xad, 0xe6, 0x0a, 0x41, 0x9c, 0xd7, 0x9d, 0xd6, + 0x0b, 0x40, 0xac, 0xe7, 0x3a, 0x71, 0xff, 0xb4, 0x69, 0x22, + 0xce, 0x85, 0x58, 0x13, 0xcc, 0x87, 0x5a, 0x11, 0xfd, 0xb6, + 0x6b, 0x20, 0xae, 0xe5, 0x38, 0x73, 0x9f, 0xd4, 0x09, 0x42, + 0x08, 0x43, 0x9e, 0xd5, 0x39, 0x72, 0xaf, 0xe4, 0x6a, 0x21, + 0xfc, 0xb7, 0x5b, 0x10, 0xcd, 0x86, 0x00, 0x4c, 0x98, 0xd4, + 0x2d, 0x61, 0xb5, 0xf9, 0x5a, 0x16, 0xc2, 0x8e, 0x77, 0x3b, + 0xef, 0xa3, 0xb4, 0xf8, 0x2c, 0x60, 0x99, 0xd5, 0x01, 0x4d, + 0xee, 0xa2, 0x76, 0x3a, 0xc3, 0x8f, 0x5b, 0x17, 0x75, 0x39, + 0xed, 0xa1, 0x58, 0x14, 0xc0, 0x8c, 0x2f, 0x63, 0xb7, 0xfb, + 0x02, 0x4e, 0x9a, 0xd6, 0xc1, 0x8d, 0x59, 0x15, 0xec, 0xa0, + 0x74, 0x38, 0x9b, 0xd7, 0x03, 0x4f, 0xb6, 0xfa, 0x2e, 0x62, + 0xea, 0xa6, 0x72, 0x3e, 0xc7, 0x8b, 0x5f, 0x13, 0xb0, 0xfc, + 0x28, 0x64, 0x9d, 0xd1, 0x05, 0x49, 0x5e, 0x12, 0xc6, 0x8a, + 0x73, 0x3f, 0xeb, 0xa7, 0x04, 0x48, 0x9c, 0xd0, 0x29, 0x65, + 0xb1, 0xfd, 0x9f, 0xd3, 0x07, 0x4b, 0xb2, 0xfe, 0x2a, 0x66, + 0xc5, 0x89, 0x5d, 0x11, 0xe8, 0xa4, 0x70, 0x3c, 0x2b, 0x67, + 0xb3, 0xff, 0x06, 0x4a, 0x9e, 0xd2, 0x71, 0x3d, 0xe9, 0xa5, + 0x5c, 0x10, 0xc4, 0x88, 0xc9, 0x85, 0x51, 0x1d, 0xe4, 0xa8, + 0x7c, 0x30, 0x93, 0xdf, 0x0b, 0x47, 0xbe, 0xf2, 0x26, 0x6a, + 0x7d, 0x31, 0xe5, 0xa9, 0x50, 0x1c, 0xc8, 0x84, 0x27, 0x6b, + 0xbf, 0xf3, 0x0a, 0x46, 0x92, 0xde, 0xbc, 0xf0, 0x24, 0x68, + 0x91, 0xdd, 0x09, 0x45, 0xe6, 0xaa, 0x7e, 0x32, 0xcb, 0x87, + 0x53, 0x1f, 0x08, 0x44, 0x90, 0xdc, 0x25, 0x69, 0xbd, 0xf1, + 0x52, 0x1e, 0xca, 0x86, 0x7f, 0x33, 0xe7, 0xab, 0x23, 0x6f, + 0xbb, 0xf7, 0x0e, 0x42, 0x96, 0xda, 0x79, 0x35, 0xe1, 0xad, + 0x54, 0x18, 0xcc, 0x80, 0x97, 0xdb, 0x0f, 0x43, 0xba, 0xf6, + 0x22, 0x6e, 0xcd, 0x81, 0x55, 0x19, 0xe0, 0xac, 0x78, 0x34, + 0x56, 0x1a, 0xce, 0x82, 0x7b, 0x37, 0xe3, 0xaf, 0x0c, 0x40, + 0x94, 0xd8, 0x21, 0x6d, 0xb9, 0xf5, 0xe2, 0xae, 0x7a, 0x36, + 0xcf, 0x83, 0x57, 0x1b, 0xb8, 0xf4, 0x20, 0x6c, 0x95, 0xd9, + 0x0d, 0x41, 0x00, 0x4d, 0x9a, 0xd7, 0x29, 0x64, 0xb3, 0xfe, + 0x52, 0x1f, 0xc8, 0x85, 0x7b, 0x36, 0xe1, 0xac, 0xa4, 0xe9, + 0x3e, 0x73, 0x8d, 0xc0, 0x17, 0x5a, 0xf6, 0xbb, 0x6c, 0x21, + 0xdf, 0x92, 0x45, 0x08, 0x55, 0x18, 0xcf, 0x82, 0x7c, 0x31, + 0xe6, 0xab, 0x07, 0x4a, 0x9d, 0xd0, 0x2e, 0x63, 0xb4, 0xf9, + 0xf1, 0xbc, 0x6b, 0x26, 0xd8, 0x95, 0x42, 0x0f, 0xa3, 0xee, + 0x39, 0x74, 0x8a, 0xc7, 0x10, 0x5d, 0xaa, 0xe7, 0x30, 0x7d, + 0x83, 0xce, 0x19, 0x54, 0xf8, 0xb5, 0x62, 0x2f, 0xd1, 0x9c, + 0x4b, 0x06, 0x0e, 0x43, 0x94, 0xd9, 0x27, 0x6a, 0xbd, 0xf0, + 0x5c, 0x11, 0xc6, 0x8b, 0x75, 0x38, 0xef, 0xa2, 0xff, 0xb2, + 0x65, 0x28, 0xd6, 0x9b, 0x4c, 0x01, 0xad, 0xe0, 0x37, 0x7a, + 0x84, 0xc9, 0x1e, 0x53, 0x5b, 0x16, 0xc1, 0x8c, 0x72, 0x3f, + 0xe8, 0xa5, 0x09, 0x44, 0x93, 0xde, 0x20, 0x6d, 0xba, 0xf7, + 0x49, 0x04, 0xd3, 0x9e, 0x60, 0x2d, 0xfa, 0xb7, 0x1b, 0x56, + 0x81, 0xcc, 0x32, 0x7f, 0xa8, 0xe5, 0xed, 0xa0, 0x77, 0x3a, + 0xc4, 0x89, 0x5e, 0x13, 0xbf, 0xf2, 0x25, 0x68, 0x96, 0xdb, + 0x0c, 0x41, 0x1c, 0x51, 0x86, 0xcb, 0x35, 0x78, 0xaf, 0xe2, + 0x4e, 0x03, 0xd4, 0x99, 0x67, 0x2a, 0xfd, 0xb0, 0xb8, 0xf5, + 0x22, 0x6f, 0x91, 0xdc, 0x0b, 0x46, 0xea, 0xa7, 0x70, 0x3d, + 0xc3, 0x8e, 0x59, 0x14, 0xe3, 0xae, 0x79, 0x34, 0xca, 0x87, + 0x50, 0x1d, 0xb1, 0xfc, 0x2b, 0x66, 0x98, 0xd5, 0x02, 0x4f, + 0x47, 0x0a, 0xdd, 0x90, 0x6e, 0x23, 0xf4, 0xb9, 0x15, 0x58, + 0x8f, 0xc2, 0x3c, 0x71, 0xa6, 0xeb, 0xb6, 0xfb, 0x2c, 0x61, + 0x9f, 0xd2, 0x05, 0x48, 0xe4, 0xa9, 0x7e, 0x33, 0xcd, 0x80, + 0x57, 0x1a, 0x12, 0x5f, 0x88, 0xc5, 0x3b, 0x76, 0xa1, 0xec, + 0x40, 0x0d, 0xda, 0x97, 0x69, 0x24, 0xf3, 0xbe, 0x00, 0x4e, + 0x9c, 0xd2, 0x25, 0x6b, 0xb9, 0xf7, 0x4a, 0x04, 0xd6, 0x98, + 0x6f, 0x21, 0xf3, 0xbd, 0x94, 0xda, 0x08, 0x46, 0xb1, 0xff, + 0x2d, 0x63, 0xde, 0x90, 0x42, 0x0c, 0xfb, 0xb5, 0x67, 0x29, + 0x35, 0x7b, 0xa9, 0xe7, 0x10, 0x5e, 0x8c, 0xc2, 0x7f, 0x31, + 0xe3, 0xad, 0x5a, 0x14, 0xc6, 0x88, 0xa1, 0xef, 0x3d, 0x73, + 0x84, 0xca, 0x18, 0x56, 0xeb, 0xa5, 0x77, 0x39, 0xce, 0x80, + 0x52, 0x1c, 0x6a, 0x24, 0xf6, 0xb8, 0x4f, 0x01, 0xd3, 0x9d, + 0x20, 0x6e, 0xbc, 0xf2, 0x05, 0x4b, 0x99, 0xd7, 0xfe, 0xb0, + 0x62, 0x2c, 0xdb, 0x95, 0x47, 0x09, 0xb4, 0xfa, 0x28, 0x66, + 0x91, 0xdf, 0x0d, 0x43, 0x5f, 0x11, 0xc3, 0x8d, 0x7a, 0x34, + 0xe6, 0xa8, 0x15, 0x5b, 0x89, 0xc7, 0x30, 0x7e, 0xac, 0xe2, + 0xcb, 0x85, 0x57, 0x19, 0xee, 0xa0, 0x72, 0x3c, 0x81, 0xcf, + 0x1d, 0x53, 0xa4, 0xea, 0x38, 0x76, 0xd4, 0x9a, 0x48, 0x06, + 0xf1, 0xbf, 0x6d, 0x23, 0x9e, 0xd0, 0x02, 0x4c, 0xbb, 0xf5, + 0x27, 0x69, 0x40, 0x0e, 0xdc, 0x92, 0x65, 0x2b, 0xf9, 0xb7, + 0x0a, 0x44, 0x96, 0xd8, 0x2f, 0x61, 0xb3, 0xfd, 0xe1, 0xaf, + 0x7d, 0x33, 0xc4, 0x8a, 0x58, 0x16, 0xab, 0xe5, 0x37, 0x79, + 0x8e, 0xc0, 0x12, 0x5c, 0x75, 0x3b, 0xe9, 0xa7, 0x50, 0x1e, + 0xcc, 0x82, 0x3f, 0x71, 0xa3, 0xed, 0x1a, 0x54, 0x86, 0xc8, + 0xbe, 0xf0, 0x22, 0x6c, 0x9b, 0xd5, 0x07, 0x49, 0xf4, 0xba, + 0x68, 0x26, 0xd1, 0x9f, 0x4d, 0x03, 0x2a, 0x64, 0xb6, 0xf8, + 0x0f, 0x41, 0x93, 0xdd, 0x60, 0x2e, 0xfc, 0xb2, 0x45, 0x0b, + 0xd9, 0x97, 0x8b, 0xc5, 0x17, 0x59, 0xae, 0xe0, 0x32, 0x7c, + 0xc1, 0x8f, 0x5d, 0x13, 0xe4, 0xaa, 0x78, 0x36, 0x1f, 0x51, + 0x83, 0xcd, 0x3a, 0x74, 0xa6, 0xe8, 0x55, 0x1b, 0xc9, 0x87, + 0x70, 0x3e, 0xec, 0xa2, 0x00, 0x4f, 0x9e, 0xd1, 0x21, 0x6e, + 0xbf, 0xf0, 0x42, 0x0d, 0xdc, 0x93, 0x63, 0x2c, 0xfd, 0xb2, + 0x84, 0xcb, 0x1a, 0x55, 0xa5, 0xea, 0x3b, 0x74, 0xc6, 0x89, + 0x58, 0x17, 0xe7, 0xa8, 0x79, 0x36, 0x15, 0x5a, 0x8b, 0xc4, + 0x34, 0x7b, 0xaa, 0xe5, 0x57, 0x18, 0xc9, 0x86, 0x76, 0x39, + 0xe8, 0xa7, 0x91, 0xde, 0x0f, 0x40, 0xb0, 0xff, 0x2e, 0x61, + 0xd3, 0x9c, 0x4d, 0x02, 0xf2, 0xbd, 0x6c, 0x23, 0x2a, 0x65, + 0xb4, 0xfb, 0x0b, 0x44, 0x95, 0xda, 0x68, 0x27, 0xf6, 0xb9, + 0x49, 0x06, 0xd7, 0x98, 0xae, 0xe1, 0x30, 0x7f, 0x8f, 0xc0, + 0x11, 0x5e, 0xec, 0xa3, 0x72, 0x3d, 0xcd, 0x82, 0x53, 0x1c, + 0x3f, 0x70, 0xa1, 0xee, 0x1e, 0x51, 0x80, 0xcf, 0x7d, 0x32, + 0xe3, 0xac, 0x5c, 0x13, 0xc2, 0x8d, 0xbb, 0xf4, 0x25, 0x6a, + 0x9a, 0xd5, 0x04, 0x4b, 0xf9, 0xb6, 0x67, 0x28, 0xd8, 0x97, + 0x46, 0x09, 0x54, 0x1b, 0xca, 0x85, 0x75, 0x3a, 0xeb, 0xa4, + 0x16, 0x59, 0x88, 0xc7, 0x37, 0x78, 0xa9, 0xe6, 0xd0, 0x9f, + 0x4e, 0x01, 0xf1, 0xbe, 0x6f, 0x20, 0x92, 0xdd, 0x0c, 0x43, + 0xb3, 0xfc, 0x2d, 0x62, 0x41, 0x0e, 0xdf, 0x90, 0x60, 0x2f, + 0xfe, 0xb1, 0x03, 0x4c, 0x9d, 0xd2, 0x22, 0x6d, 0xbc, 0xf3, + 0xc5, 0x8a, 0x5b, 0x14, 0xe4, 0xab, 0x7a, 0x35, 0x87, 0xc8, + 0x19, 0x56, 0xa6, 0xe9, 0x38, 0x77, 0x7e, 0x31, 0xe0, 0xaf, + 0x5f, 0x10, 0xc1, 0x8e, 0x3c, 0x73, 0xa2, 0xed, 0x1d, 0x52, + 0x83, 0xcc, 0xfa, 0xb5, 0x64, 0x2b, 0xdb, 0x94, 0x45, 0x0a, + 0xb8, 0xf7, 0x26, 0x69, 0x99, 0xd6, 0x07, 0x48, 0x6b, 0x24, + 0xf5, 0xba, 0x4a, 0x05, 0xd4, 0x9b, 0x29, 0x66, 0xb7, 0xf8, + 0x08, 0x47, 0x96, 0xd9, 0xef, 0xa0, 0x71, 0x3e, 0xce, 0x81, + 0x50, 0x1f, 0xad, 0xe2, 0x33, 0x7c, 0x8c, 0xc3, 0x12, 0x5d, + 0x00, 0x50, 0xa0, 0xf0, 0x5d, 0x0d, 0xfd, 0xad, 0xba, 0xea, + 0x1a, 0x4a, 0xe7, 0xb7, 0x47, 0x17, 0x69, 0x39, 0xc9, 0x99, + 0x34, 0x64, 0x94, 0xc4, 0xd3, 0x83, 0x73, 0x23, 0x8e, 0xde, + 0x2e, 0x7e, 0xd2, 0x82, 0x72, 0x22, 0x8f, 0xdf, 0x2f, 0x7f, + 0x68, 0x38, 0xc8, 0x98, 0x35, 0x65, 0x95, 0xc5, 0xbb, 0xeb, + 0x1b, 0x4b, 0xe6, 0xb6, 0x46, 0x16, 0x01, 0x51, 0xa1, 0xf1, + 0x5c, 0x0c, 0xfc, 0xac, 0xb9, 0xe9, 0x19, 0x49, 0xe4, 0xb4, + 0x44, 0x14, 0x03, 0x53, 0xa3, 0xf3, 0x5e, 0x0e, 0xfe, 0xae, + 0xd0, 0x80, 0x70, 0x20, 0x8d, 0xdd, 0x2d, 0x7d, 0x6a, 0x3a, + 0xca, 0x9a, 0x37, 0x67, 0x97, 0xc7, 0x6b, 0x3b, 0xcb, 0x9b, + 0x36, 0x66, 0x96, 0xc6, 0xd1, 0x81, 0x71, 0x21, 0x8c, 0xdc, + 0x2c, 0x7c, 0x02, 0x52, 0xa2, 0xf2, 0x5f, 0x0f, 0xff, 0xaf, + 0xb8, 0xe8, 0x18, 0x48, 0xe5, 0xb5, 0x45, 0x15, 0x6f, 0x3f, + 0xcf, 0x9f, 0x32, 0x62, 0x92, 0xc2, 0xd5, 0x85, 0x75, 0x25, + 0x88, 0xd8, 0x28, 0x78, 0x06, 0x56, 0xa6, 0xf6, 0x5b, 0x0b, + 0xfb, 0xab, 0xbc, 0xec, 0x1c, 0x4c, 0xe1, 0xb1, 0x41, 0x11, + 0xbd, 0xed, 0x1d, 0x4d, 0xe0, 0xb0, 0x40, 0x10, 0x07, 0x57, + 0xa7, 0xf7, 0x5a, 0x0a, 0xfa, 0xaa, 0xd4, 0x84, 0x74, 0x24, + 0x89, 0xd9, 0x29, 0x79, 0x6e, 0x3e, 0xce, 0x9e, 0x33, 0x63, + 0x93, 0xc3, 0xd6, 0x86, 0x76, 0x26, 0x8b, 0xdb, 0x2b, 0x7b, + 0x6c, 0x3c, 0xcc, 0x9c, 0x31, 0x61, 0x91, 0xc1, 0xbf, 0xef, + 0x1f, 0x4f, 0xe2, 0xb2, 0x42, 0x12, 0x05, 0x55, 0xa5, 0xf5, + 0x58, 0x08, 0xf8, 0xa8, 0x04, 0x54, 0xa4, 0xf4, 0x59, 0x09, + 0xf9, 0xa9, 0xbe, 0xee, 0x1e, 0x4e, 0xe3, 0xb3, 0x43, 0x13, + 0x6d, 0x3d, 0xcd, 0x9d, 0x30, 0x60, 0x90, 0xc0, 0xd7, 0x87, + 0x77, 0x27, 0x8a, 0xda, 0x2a, 0x7a, 0x00, 0x51, 0xa2, 0xf3, + 0x59, 0x08, 0xfb, 0xaa, 0xb2, 0xe3, 0x10, 0x41, 0xeb, 0xba, + 0x49, 0x18, 0x79, 0x28, 0xdb, 0x8a, 0x20, 0x71, 0x82, 0xd3, + 0xcb, 0x9a, 0x69, 0x38, 0x92, 0xc3, 0x30, 0x61, 0xf2, 0xa3, + 0x50, 0x01, 0xab, 0xfa, 0x09, 0x58, 0x40, 0x11, 0xe2, 0xb3, + 0x19, 0x48, 0xbb, 0xea, 0x8b, 0xda, 0x29, 0x78, 0xd2, 0x83, + 0x70, 0x21, 0x39, 0x68, 0x9b, 0xca, 0x60, 0x31, 0xc2, 0x93, + 0xf9, 0xa8, 0x5b, 0x0a, 0xa0, 0xf1, 0x02, 0x53, 0x4b, 0x1a, + 0xe9, 0xb8, 0x12, 0x43, 0xb0, 0xe1, 0x80, 0xd1, 0x22, 0x73, + 0xd9, 0x88, 0x7b, 0x2a, 0x32, 0x63, 0x90, 0xc1, 0x6b, 0x3a, + 0xc9, 0x98, 0x0b, 0x5a, 0xa9, 0xf8, 0x52, 0x03, 0xf0, 0xa1, + 0xb9, 0xe8, 0x1b, 0x4a, 0xe0, 0xb1, 0x42, 0x13, 0x72, 0x23, + 0xd0, 0x81, 0x2b, 0x7a, 0x89, 0xd8, 0xc0, 0x91, 0x62, 0x33, + 0x99, 0xc8, 0x3b, 0x6a, 0xef, 0xbe, 0x4d, 0x1c, 0xb6, 0xe7, + 0x14, 0x45, 0x5d, 0x0c, 0xff, 0xae, 0x04, 0x55, 0xa6, 0xf7, + 0x96, 0xc7, 0x34, 0x65, 0xcf, 0x9e, 0x6d, 0x3c, 0x24, 0x75, + 0x86, 0xd7, 0x7d, 0x2c, 0xdf, 0x8e, 0x1d, 0x4c, 0xbf, 0xee, + 0x44, 0x15, 0xe6, 0xb7, 0xaf, 0xfe, 0x0d, 0x5c, 0xf6, 0xa7, + 0x54, 0x05, 0x64, 0x35, 0xc6, 0x97, 0x3d, 0x6c, 0x9f, 0xce, + 0xd6, 0x87, 0x74, 0x25, 0x8f, 0xde, 0x2d, 0x7c, 0x16, 0x47, + 0xb4, 0xe5, 0x4f, 0x1e, 0xed, 0xbc, 0xa4, 0xf5, 0x06, 0x57, + 0xfd, 0xac, 0x5f, 0x0e, 0x6f, 0x3e, 0xcd, 0x9c, 0x36, 0x67, + 0x94, 0xc5, 0xdd, 0x8c, 0x7f, 0x2e, 0x84, 0xd5, 0x26, 0x77, + 0xe4, 0xb5, 0x46, 0x17, 0xbd, 0xec, 0x1f, 0x4e, 0x56, 0x07, + 0xf4, 0xa5, 0x0f, 0x5e, 0xad, 0xfc, 0x9d, 0xcc, 0x3f, 0x6e, + 0xc4, 0x95, 0x66, 0x37, 0x2f, 0x7e, 0x8d, 0xdc, 0x76, 0x27, + 0xd4, 0x85, 0x00, 0x52, 0xa4, 0xf6, 0x55, 0x07, 0xf1, 0xa3, + 0xaa, 0xf8, 0x0e, 0x5c, 0xff, 0xad, 0x5b, 0x09, 0x49, 0x1b, + 0xed, 0xbf, 0x1c, 0x4e, 0xb8, 0xea, 0xe3, 0xb1, 0x47, 0x15, + 0xb6, 0xe4, 0x12, 0x40, 0x92, 0xc0, 0x36, 0x64, 0xc7, 0x95, + 0x63, 0x31, 0x38, 0x6a, 0x9c, 0xce, 0x6d, 0x3f, 0xc9, 0x9b, + 0xdb, 0x89, 0x7f, 0x2d, 0x8e, 0xdc, 0x2a, 0x78, 0x71, 0x23, + 0xd5, 0x87, 0x24, 0x76, 0x80, 0xd2, 0x39, 0x6b, 0x9d, 0xcf, + 0x6c, 0x3e, 0xc8, 0x9a, 0x93, 0xc1, 0x37, 0x65, 0xc6, 0x94, + 0x62, 0x30, 0x70, 0x22, 0xd4, 0x86, 0x25, 0x77, 0x81, 0xd3, + 0xda, 0x88, 0x7e, 0x2c, 0x8f, 0xdd, 0x2b, 0x79, 0xab, 0xf9, + 0x0f, 0x5d, 0xfe, 0xac, 0x5a, 0x08, 0x01, 0x53, 0xa5, 0xf7, + 0x54, 0x06, 0xf0, 0xa2, 0xe2, 0xb0, 0x46, 0x14, 0xb7, 0xe5, + 0x13, 0x41, 0x48, 0x1a, 0xec, 0xbe, 0x1d, 0x4f, 0xb9, 0xeb, + 0x72, 0x20, 0xd6, 0x84, 0x27, 0x75, 0x83, 0xd1, 0xd8, 0x8a, + 0x7c, 0x2e, 0x8d, 0xdf, 0x29, 0x7b, 0x3b, 0x69, 0x9f, 0xcd, + 0x6e, 0x3c, 0xca, 0x98, 0x91, 0xc3, 0x35, 0x67, 0xc4, 0x96, + 0x60, 0x32, 0xe0, 0xb2, 0x44, 0x16, 0xb5, 0xe7, 0x11, 0x43, + 0x4a, 0x18, 0xee, 0xbc, 0x1f, 0x4d, 0xbb, 0xe9, 0xa9, 0xfb, + 0x0d, 0x5f, 0xfc, 0xae, 0x58, 0x0a, 0x03, 0x51, 0xa7, 0xf5, + 0x56, 0x04, 0xf2, 0xa0, 0x4b, 0x19, 0xef, 0xbd, 0x1e, 0x4c, + 0xba, 0xe8, 0xe1, 0xb3, 0x45, 0x17, 0xb4, 0xe6, 0x10, 0x42, + 0x02, 0x50, 0xa6, 0xf4, 0x57, 0x05, 0xf3, 0xa1, 0xa8, 0xfa, + 0x0c, 0x5e, 0xfd, 0xaf, 0x59, 0x0b, 0xd9, 0x8b, 0x7d, 0x2f, + 0x8c, 0xde, 0x28, 0x7a, 0x73, 0x21, 0xd7, 0x85, 0x26, 0x74, + 0x82, 0xd0, 0x90, 0xc2, 0x34, 0x66, 0xc5, 0x97, 0x61, 0x33, + 0x3a, 0x68, 0x9e, 0xcc, 0x6f, 0x3d, 0xcb, 0x99, 0x00, 0x53, + 0xa6, 0xf5, 0x51, 0x02, 0xf7, 0xa4, 0xa2, 0xf1, 0x04, 0x57, + 0xf3, 0xa0, 0x55, 0x06, 0x59, 0x0a, 0xff, 0xac, 0x08, 0x5b, + 0xae, 0xfd, 0xfb, 0xa8, 0x5d, 0x0e, 0xaa, 0xf9, 0x0c, 0x5f, + 0xb2, 0xe1, 0x14, 0x47, 0xe3, 0xb0, 0x45, 0x16, 0x10, 0x43, + 0xb6, 0xe5, 0x41, 0x12, 0xe7, 0xb4, 0xeb, 0xb8, 0x4d, 0x1e, + 0xba, 0xe9, 0x1c, 0x4f, 0x49, 0x1a, 0xef, 0xbc, 0x18, 0x4b, + 0xbe, 0xed, 0x79, 0x2a, 0xdf, 0x8c, 0x28, 0x7b, 0x8e, 0xdd, + 0xdb, 0x88, 0x7d, 0x2e, 0x8a, 0xd9, 0x2c, 0x7f, 0x20, 0x73, + 0x86, 0xd5, 0x71, 0x22, 0xd7, 0x84, 0x82, 0xd1, 0x24, 0x77, + 0xd3, 0x80, 0x75, 0x26, 0xcb, 0x98, 0x6d, 0x3e, 0x9a, 0xc9, + 0x3c, 0x6f, 0x69, 0x3a, 0xcf, 0x9c, 0x38, 0x6b, 0x9e, 0xcd, + 0x92, 0xc1, 0x34, 0x67, 0xc3, 0x90, 0x65, 0x36, 0x30, 0x63, + 0x96, 0xc5, 0x61, 0x32, 0xc7, 0x94, 0xf2, 0xa1, 0x54, 0x07, + 0xa3, 0xf0, 0x05, 0x56, 0x50, 0x03, 0xf6, 0xa5, 0x01, 0x52, + 0xa7, 0xf4, 0xab, 0xf8, 0x0d, 0x5e, 0xfa, 0xa9, 0x5c, 0x0f, + 0x09, 0x5a, 0xaf, 0xfc, 0x58, 0x0b, 0xfe, 0xad, 0x40, 0x13, + 0xe6, 0xb5, 0x11, 0x42, 0xb7, 0xe4, 0xe2, 0xb1, 0x44, 0x17, + 0xb3, 0xe0, 0x15, 0x46, 0x19, 0x4a, 0xbf, 0xec, 0x48, 0x1b, + 0xee, 0xbd, 0xbb, 0xe8, 0x1d, 0x4e, 0xea, 0xb9, 0x4c, 0x1f, + 0x8b, 0xd8, 0x2d, 0x7e, 0xda, 0x89, 0x7c, 0x2f, 0x29, 0x7a, + 0x8f, 0xdc, 0x78, 0x2b, 0xde, 0x8d, 0xd2, 0x81, 0x74, 0x27, + 0x83, 0xd0, 0x25, 0x76, 0x70, 0x23, 0xd6, 0x85, 0x21, 0x72, + 0x87, 0xd4, 0x39, 0x6a, 0x9f, 0xcc, 0x68, 0x3b, 0xce, 0x9d, + 0x9b, 0xc8, 0x3d, 0x6e, 0xca, 0x99, 0x6c, 0x3f, 0x60, 0x33, + 0xc6, 0x95, 0x31, 0x62, 0x97, 0xc4, 0xc2, 0x91, 0x64, 0x37, + 0x93, 0xc0, 0x35, 0x66, 0x00, 0x54, 0xa8, 0xfc, 0x4d, 0x19, + 0xe5, 0xb1, 0x9a, 0xce, 0x32, 0x66, 0xd7, 0x83, 0x7f, 0x2b, + 0x29, 0x7d, 0x81, 0xd5, 0x64, 0x30, 0xcc, 0x98, 0xb3, 0xe7, + 0x1b, 0x4f, 0xfe, 0xaa, 0x56, 0x02, 0x52, 0x06, 0xfa, 0xae, + 0x1f, 0x4b, 0xb7, 0xe3, 0xc8, 0x9c, 0x60, 0x34, 0x85, 0xd1, + 0x2d, 0x79, 0x7b, 0x2f, 0xd3, 0x87, 0x36, 0x62, 0x9e, 0xca, + 0xe1, 0xb5, 0x49, 0x1d, 0xac, 0xf8, 0x04, 0x50, 0xa4, 0xf0, + 0x0c, 0x58, 0xe9, 0xbd, 0x41, 0x15, 0x3e, 0x6a, 0x96, 0xc2, + 0x73, 0x27, 0xdb, 0x8f, 0x8d, 0xd9, 0x25, 0x71, 0xc0, 0x94, + 0x68, 0x3c, 0x17, 0x43, 0xbf, 0xeb, 0x5a, 0x0e, 0xf2, 0xa6, + 0xf6, 0xa2, 0x5e, 0x0a, 0xbb, 0xef, 0x13, 0x47, 0x6c, 0x38, + 0xc4, 0x90, 0x21, 0x75, 0x89, 0xdd, 0xdf, 0x8b, 0x77, 0x23, + 0x92, 0xc6, 0x3a, 0x6e, 0x45, 0x11, 0xed, 0xb9, 0x08, 0x5c, + 0xa0, 0xf4, 0x55, 0x01, 0xfd, 0xa9, 0x18, 0x4c, 0xb0, 0xe4, + 0xcf, 0x9b, 0x67, 0x33, 0x82, 0xd6, 0x2a, 0x7e, 0x7c, 0x28, + 0xd4, 0x80, 0x31, 0x65, 0x99, 0xcd, 0xe6, 0xb2, 0x4e, 0x1a, + 0xab, 0xff, 0x03, 0x57, 0x07, 0x53, 0xaf, 0xfb, 0x4a, 0x1e, + 0xe2, 0xb6, 0x9d, 0xc9, 0x35, 0x61, 0xd0, 0x84, 0x78, 0x2c, + 0x2e, 0x7a, 0x86, 0xd2, 0x63, 0x37, 0xcb, 0x9f, 0xb4, 0xe0, + 0x1c, 0x48, 0xf9, 0xad, 0x51, 0x05, 0xf1, 0xa5, 0x59, 0x0d, + 0xbc, 0xe8, 0x14, 0x40, 0x6b, 0x3f, 0xc3, 0x97, 0x26, 0x72, + 0x8e, 0xda, 0xd8, 0x8c, 0x70, 0x24, 0x95, 0xc1, 0x3d, 0x69, + 0x42, 0x16, 0xea, 0xbe, 0x0f, 0x5b, 0xa7, 0xf3, 0xa3, 0xf7, + 0x0b, 0x5f, 0xee, 0xba, 0x46, 0x12, 0x39, 0x6d, 0x91, 0xc5, + 0x74, 0x20, 0xdc, 0x88, 0x8a, 0xde, 0x22, 0x76, 0xc7, 0x93, + 0x6f, 0x3b, 0x10, 0x44, 0xb8, 0xec, 0x5d, 0x09, 0xf5, 0xa1, + 0x00, 0x55, 0xaa, 0xff, 0x49, 0x1c, 0xe3, 0xb6, 0x92, 0xc7, + 0x38, 0x6d, 0xdb, 0x8e, 0x71, 0x24, 0x39, 0x6c, 0x93, 0xc6, + 0x70, 0x25, 0xda, 0x8f, 0xab, 0xfe, 0x01, 0x54, 0xe2, 0xb7, + 0x48, 0x1d, 0x72, 0x27, 0xd8, 0x8d, 0x3b, 0x6e, 0x91, 0xc4, + 0xe0, 0xb5, 0x4a, 0x1f, 0xa9, 0xfc, 0x03, 0x56, 0x4b, 0x1e, + 0xe1, 0xb4, 0x02, 0x57, 0xa8, 0xfd, 0xd9, 0x8c, 0x73, 0x26, + 0x90, 0xc5, 0x3a, 0x6f, 0xe4, 0xb1, 0x4e, 0x1b, 0xad, 0xf8, + 0x07, 0x52, 0x76, 0x23, 0xdc, 0x89, 0x3f, 0x6a, 0x95, 0xc0, + 0xdd, 0x88, 0x77, 0x22, 0x94, 0xc1, 0x3e, 0x6b, 0x4f, 0x1a, + 0xe5, 0xb0, 0x06, 0x53, 0xac, 0xf9, 0x96, 0xc3, 0x3c, 0x69, + 0xdf, 0x8a, 0x75, 0x20, 0x04, 0x51, 0xae, 0xfb, 0x4d, 0x18, + 0xe7, 0xb2, 0xaf, 0xfa, 0x05, 0x50, 0xe6, 0xb3, 0x4c, 0x19, + 0x3d, 0x68, 0x97, 0xc2, 0x74, 0x21, 0xde, 0x8b, 0xd5, 0x80, + 0x7f, 0x2a, 0x9c, 0xc9, 0x36, 0x63, 0x47, 0x12, 0xed, 0xb8, + 0x0e, 0x5b, 0xa4, 0xf1, 0xec, 0xb9, 0x46, 0x13, 0xa5, 0xf0, + 0x0f, 0x5a, 0x7e, 0x2b, 0xd4, 0x81, 0x37, 0x62, 0x9d, 0xc8, + 0xa7, 0xf2, 0x0d, 0x58, 0xee, 0xbb, 0x44, 0x11, 0x35, 0x60, + 0x9f, 0xca, 0x7c, 0x29, 0xd6, 0x83, 0x9e, 0xcb, 0x34, 0x61, + 0xd7, 0x82, 0x7d, 0x28, 0x0c, 0x59, 0xa6, 0xf3, 0x45, 0x10, + 0xef, 0xba, 0x31, 0x64, 0x9b, 0xce, 0x78, 0x2d, 0xd2, 0x87, + 0xa3, 0xf6, 0x09, 0x5c, 0xea, 0xbf, 0x40, 0x15, 0x08, 0x5d, + 0xa2, 0xf7, 0x41, 0x14, 0xeb, 0xbe, 0x9a, 0xcf, 0x30, 0x65, + 0xd3, 0x86, 0x79, 0x2c, 0x43, 0x16, 0xe9, 0xbc, 0x0a, 0x5f, + 0xa0, 0xf5, 0xd1, 0x84, 0x7b, 0x2e, 0x98, 0xcd, 0x32, 0x67, + 0x7a, 0x2f, 0xd0, 0x85, 0x33, 0x66, 0x99, 0xcc, 0xe8, 0xbd, + 0x42, 0x17, 0xa1, 0xf4, 0x0b, 0x5e, 0x00, 0x56, 0xac, 0xfa, + 0x45, 0x13, 0xe9, 0xbf, 0x8a, 0xdc, 0x26, 0x70, 0xcf, 0x99, + 0x63, 0x35, 0x09, 0x5f, 0xa5, 0xf3, 0x4c, 0x1a, 0xe0, 0xb6, + 0x83, 0xd5, 0x2f, 0x79, 0xc6, 0x90, 0x6a, 0x3c, 0x12, 0x44, + 0xbe, 0xe8, 0x57, 0x01, 0xfb, 0xad, 0x98, 0xce, 0x34, 0x62, + 0xdd, 0x8b, 0x71, 0x27, 0x1b, 0x4d, 0xb7, 0xe1, 0x5e, 0x08, + 0xf2, 0xa4, 0x91, 0xc7, 0x3d, 0x6b, 0xd4, 0x82, 0x78, 0x2e, + 0x24, 0x72, 0x88, 0xde, 0x61, 0x37, 0xcd, 0x9b, 0xae, 0xf8, + 0x02, 0x54, 0xeb, 0xbd, 0x47, 0x11, 0x2d, 0x7b, 0x81, 0xd7, + 0x68, 0x3e, 0xc4, 0x92, 0xa7, 0xf1, 0x0b, 0x5d, 0xe2, 0xb4, + 0x4e, 0x18, 0x36, 0x60, 0x9a, 0xcc, 0x73, 0x25, 0xdf, 0x89, + 0xbc, 0xea, 0x10, 0x46, 0xf9, 0xaf, 0x55, 0x03, 0x3f, 0x69, + 0x93, 0xc5, 0x7a, 0x2c, 0xd6, 0x80, 0xb5, 0xe3, 0x19, 0x4f, + 0xf0, 0xa6, 0x5c, 0x0a, 0x48, 0x1e, 0xe4, 0xb2, 0x0d, 0x5b, + 0xa1, 0xf7, 0xc2, 0x94, 0x6e, 0x38, 0x87, 0xd1, 0x2b, 0x7d, + 0x41, 0x17, 0xed, 0xbb, 0x04, 0x52, 0xa8, 0xfe, 0xcb, 0x9d, + 0x67, 0x31, 0x8e, 0xd8, 0x22, 0x74, 0x5a, 0x0c, 0xf6, 0xa0, + 0x1f, 0x49, 0xb3, 0xe5, 0xd0, 0x86, 0x7c, 0x2a, 0x95, 0xc3, + 0x39, 0x6f, 0x53, 0x05, 0xff, 0xa9, 0x16, 0x40, 0xba, 0xec, + 0xd9, 0x8f, 0x75, 0x23, 0x9c, 0xca, 0x30, 0x66, 0x6c, 0x3a, + 0xc0, 0x96, 0x29, 0x7f, 0x85, 0xd3, 0xe6, 0xb0, 0x4a, 0x1c, + 0xa3, 0xf5, 0x0f, 0x59, 0x65, 0x33, 0xc9, 0x9f, 0x20, 0x76, + 0x8c, 0xda, 0xef, 0xb9, 0x43, 0x15, 0xaa, 0xfc, 0x06, 0x50, + 0x7e, 0x28, 0xd2, 0x84, 0x3b, 0x6d, 0x97, 0xc1, 0xf4, 0xa2, + 0x58, 0x0e, 0xb1, 0xe7, 0x1d, 0x4b, 0x77, 0x21, 0xdb, 0x8d, + 0x32, 0x64, 0x9e, 0xc8, 0xfd, 0xab, 0x51, 0x07, 0xb8, 0xee, + 0x14, 0x42, 0x00, 0x57, 0xae, 0xf9, 0x41, 0x16, 0xef, 0xb8, + 0x82, 0xd5, 0x2c, 0x7b, 0xc3, 0x94, 0x6d, 0x3a, 0x19, 0x4e, + 0xb7, 0xe0, 0x58, 0x0f, 0xf6, 0xa1, 0x9b, 0xcc, 0x35, 0x62, + 0xda, 0x8d, 0x74, 0x23, 0x32, 0x65, 0x9c, 0xcb, 0x73, 0x24, + 0xdd, 0x8a, 0xb0, 0xe7, 0x1e, 0x49, 0xf1, 0xa6, 0x5f, 0x08, + 0x2b, 0x7c, 0x85, 0xd2, 0x6a, 0x3d, 0xc4, 0x93, 0xa9, 0xfe, + 0x07, 0x50, 0xe8, 0xbf, 0x46, 0x11, 0x64, 0x33, 0xca, 0x9d, + 0x25, 0x72, 0x8b, 0xdc, 0xe6, 0xb1, 0x48, 0x1f, 0xa7, 0xf0, + 0x09, 0x5e, 0x7d, 0x2a, 0xd3, 0x84, 0x3c, 0x6b, 0x92, 0xc5, + 0xff, 0xa8, 0x51, 0x06, 0xbe, 0xe9, 0x10, 0x47, 0x56, 0x01, + 0xf8, 0xaf, 0x17, 0x40, 0xb9, 0xee, 0xd4, 0x83, 0x7a, 0x2d, + 0x95, 0xc2, 0x3b, 0x6c, 0x4f, 0x18, 0xe1, 0xb6, 0x0e, 0x59, + 0xa0, 0xf7, 0xcd, 0x9a, 0x63, 0x34, 0x8c, 0xdb, 0x22, 0x75, + 0xc8, 0x9f, 0x66, 0x31, 0x89, 0xde, 0x27, 0x70, 0x4a, 0x1d, + 0xe4, 0xb3, 0x0b, 0x5c, 0xa5, 0xf2, 0xd1, 0x86, 0x7f, 0x28, + 0x90, 0xc7, 0x3e, 0x69, 0x53, 0x04, 0xfd, 0xaa, 0x12, 0x45, + 0xbc, 0xeb, 0xfa, 0xad, 0x54, 0x03, 0xbb, 0xec, 0x15, 0x42, + 0x78, 0x2f, 0xd6, 0x81, 0x39, 0x6e, 0x97, 0xc0, 0xe3, 0xb4, + 0x4d, 0x1a, 0xa2, 0xf5, 0x0c, 0x5b, 0x61, 0x36, 0xcf, 0x98, + 0x20, 0x77, 0x8e, 0xd9, 0xac, 0xfb, 0x02, 0x55, 0xed, 0xba, + 0x43, 0x14, 0x2e, 0x79, 0x80, 0xd7, 0x6f, 0x38, 0xc1, 0x96, + 0xb5, 0xe2, 0x1b, 0x4c, 0xf4, 0xa3, 0x5a, 0x0d, 0x37, 0x60, + 0x99, 0xce, 0x76, 0x21, 0xd8, 0x8f, 0x9e, 0xc9, 0x30, 0x67, + 0xdf, 0x88, 0x71, 0x26, 0x1c, 0x4b, 0xb2, 0xe5, 0x5d, 0x0a, + 0xf3, 0xa4, 0x87, 0xd0, 0x29, 0x7e, 0xc6, 0x91, 0x68, 0x3f, + 0x05, 0x52, 0xab, 0xfc, 0x44, 0x13, 0xea, 0xbd, 0x00, 0x58, + 0xb0, 0xe8, 0x7d, 0x25, 0xcd, 0x95, 0xfa, 0xa2, 0x4a, 0x12, + 0x87, 0xdf, 0x37, 0x6f, 0xe9, 0xb1, 0x59, 0x01, 0x94, 0xcc, + 0x24, 0x7c, 0x13, 0x4b, 0xa3, 0xfb, 0x6e, 0x36, 0xde, 0x86, + 0xcf, 0x97, 0x7f, 0x27, 0xb2, 0xea, 0x02, 0x5a, 0x35, 0x6d, + 0x85, 0xdd, 0x48, 0x10, 0xf8, 0xa0, 0x26, 0x7e, 0x96, 0xce, + 0x5b, 0x03, 0xeb, 0xb3, 0xdc, 0x84, 0x6c, 0x34, 0xa1, 0xf9, + 0x11, 0x49, 0x83, 0xdb, 0x33, 0x6b, 0xfe, 0xa6, 0x4e, 0x16, + 0x79, 0x21, 0xc9, 0x91, 0x04, 0x5c, 0xb4, 0xec, 0x6a, 0x32, + 0xda, 0x82, 0x17, 0x4f, 0xa7, 0xff, 0x90, 0xc8, 0x20, 0x78, + 0xed, 0xb5, 0x5d, 0x05, 0x4c, 0x14, 0xfc, 0xa4, 0x31, 0x69, + 0x81, 0xd9, 0xb6, 0xee, 0x06, 0x5e, 0xcb, 0x93, 0x7b, 0x23, + 0xa5, 0xfd, 0x15, 0x4d, 0xd8, 0x80, 0x68, 0x30, 0x5f, 0x07, + 0xef, 0xb7, 0x22, 0x7a, 0x92, 0xca, 0x1b, 0x43, 0xab, 0xf3, + 0x66, 0x3e, 0xd6, 0x8e, 0xe1, 0xb9, 0x51, 0x09, 0x9c, 0xc4, + 0x2c, 0x74, 0xf2, 0xaa, 0x42, 0x1a, 0x8f, 0xd7, 0x3f, 0x67, + 0x08, 0x50, 0xb8, 0xe0, 0x75, 0x2d, 0xc5, 0x9d, 0xd4, 0x8c, + 0x64, 0x3c, 0xa9, 0xf1, 0x19, 0x41, 0x2e, 0x76, 0x9e, 0xc6, + 0x53, 0x0b, 0xe3, 0xbb, 0x3d, 0x65, 0x8d, 0xd5, 0x40, 0x18, + 0xf0, 0xa8, 0xc7, 0x9f, 0x77, 0x2f, 0xba, 0xe2, 0x0a, 0x52, + 0x98, 0xc0, 0x28, 0x70, 0xe5, 0xbd, 0x55, 0x0d, 0x62, 0x3a, + 0xd2, 0x8a, 0x1f, 0x47, 0xaf, 0xf7, 0x71, 0x29, 0xc1, 0x99, + 0x0c, 0x54, 0xbc, 0xe4, 0x8b, 0xd3, 0x3b, 0x63, 0xf6, 0xae, + 0x46, 0x1e, 0x57, 0x0f, 0xe7, 0xbf, 0x2a, 0x72, 0x9a, 0xc2, + 0xad, 0xf5, 0x1d, 0x45, 0xd0, 0x88, 0x60, 0x38, 0xbe, 0xe6, + 0x0e, 0x56, 0xc3, 0x9b, 0x73, 0x2b, 0x44, 0x1c, 0xf4, 0xac, + 0x39, 0x61, 0x89, 0xd1, 0x00, 0x59, 0xb2, 0xeb, 0x79, 0x20, + 0xcb, 0x92, 0xf2, 0xab, 0x40, 0x19, 0x8b, 0xd2, 0x39, 0x60, + 0xf9, 0xa0, 0x4b, 0x12, 0x80, 0xd9, 0x32, 0x6b, 0x0b, 0x52, + 0xb9, 0xe0, 0x72, 0x2b, 0xc0, 0x99, 0xef, 0xb6, 0x5d, 0x04, + 0x96, 0xcf, 0x24, 0x7d, 0x1d, 0x44, 0xaf, 0xf6, 0x64, 0x3d, + 0xd6, 0x8f, 0x16, 0x4f, 0xa4, 0xfd, 0x6f, 0x36, 0xdd, 0x84, + 0xe4, 0xbd, 0x56, 0x0f, 0x9d, 0xc4, 0x2f, 0x76, 0xc3, 0x9a, + 0x71, 0x28, 0xba, 0xe3, 0x08, 0x51, 0x31, 0x68, 0x83, 0xda, + 0x48, 0x11, 0xfa, 0xa3, 0x3a, 0x63, 0x88, 0xd1, 0x43, 0x1a, + 0xf1, 0xa8, 0xc8, 0x91, 0x7a, 0x23, 0xb1, 0xe8, 0x03, 0x5a, + 0x2c, 0x75, 0x9e, 0xc7, 0x55, 0x0c, 0xe7, 0xbe, 0xde, 0x87, + 0x6c, 0x35, 0xa7, 0xfe, 0x15, 0x4c, 0xd5, 0x8c, 0x67, 0x3e, + 0xac, 0xf5, 0x1e, 0x47, 0x27, 0x7e, 0x95, 0xcc, 0x5e, 0x07, + 0xec, 0xb5, 0x9b, 0xc2, 0x29, 0x70, 0xe2, 0xbb, 0x50, 0x09, + 0x69, 0x30, 0xdb, 0x82, 0x10, 0x49, 0xa2, 0xfb, 0x62, 0x3b, + 0xd0, 0x89, 0x1b, 0x42, 0xa9, 0xf0, 0x90, 0xc9, 0x22, 0x7b, + 0xe9, 0xb0, 0x5b, 0x02, 0x74, 0x2d, 0xc6, 0x9f, 0x0d, 0x54, + 0xbf, 0xe6, 0x86, 0xdf, 0x34, 0x6d, 0xff, 0xa6, 0x4d, 0x14, + 0x8d, 0xd4, 0x3f, 0x66, 0xf4, 0xad, 0x46, 0x1f, 0x7f, 0x26, + 0xcd, 0x94, 0x06, 0x5f, 0xb4, 0xed, 0x58, 0x01, 0xea, 0xb3, + 0x21, 0x78, 0x93, 0xca, 0xaa, 0xf3, 0x18, 0x41, 0xd3, 0x8a, + 0x61, 0x38, 0xa1, 0xf8, 0x13, 0x4a, 0xd8, 0x81, 0x6a, 0x33, + 0x53, 0x0a, 0xe1, 0xb8, 0x2a, 0x73, 0x98, 0xc1, 0xb7, 0xee, + 0x05, 0x5c, 0xce, 0x97, 0x7c, 0x25, 0x45, 0x1c, 0xf7, 0xae, + 0x3c, 0x65, 0x8e, 0xd7, 0x4e, 0x17, 0xfc, 0xa5, 0x37, 0x6e, + 0x85, 0xdc, 0xbc, 0xe5, 0x0e, 0x57, 0xc5, 0x9c, 0x77, 0x2e, + 0x00, 0x5a, 0xb4, 0xee, 0x75, 0x2f, 0xc1, 0x9b, 0xea, 0xb0, + 0x5e, 0x04, 0x9f, 0xc5, 0x2b, 0x71, 0xc9, 0x93, 0x7d, 0x27, + 0xbc, 0xe6, 0x08, 0x52, 0x23, 0x79, 0x97, 0xcd, 0x56, 0x0c, + 0xe2, 0xb8, 0x8f, 0xd5, 0x3b, 0x61, 0xfa, 0xa0, 0x4e, 0x14, + 0x65, 0x3f, 0xd1, 0x8b, 0x10, 0x4a, 0xa4, 0xfe, 0x46, 0x1c, + 0xf2, 0xa8, 0x33, 0x69, 0x87, 0xdd, 0xac, 0xf6, 0x18, 0x42, + 0xd9, 0x83, 0x6d, 0x37, 0x03, 0x59, 0xb7, 0xed, 0x76, 0x2c, + 0xc2, 0x98, 0xe9, 0xb3, 0x5d, 0x07, 0x9c, 0xc6, 0x28, 0x72, + 0xca, 0x90, 0x7e, 0x24, 0xbf, 0xe5, 0x0b, 0x51, 0x20, 0x7a, + 0x94, 0xce, 0x55, 0x0f, 0xe1, 0xbb, 0x8c, 0xd6, 0x38, 0x62, + 0xf9, 0xa3, 0x4d, 0x17, 0x66, 0x3c, 0xd2, 0x88, 0x13, 0x49, + 0xa7, 0xfd, 0x45, 0x1f, 0xf1, 0xab, 0x30, 0x6a, 0x84, 0xde, + 0xaf, 0xf5, 0x1b, 0x41, 0xda, 0x80, 0x6e, 0x34, 0x06, 0x5c, + 0xb2, 0xe8, 0x73, 0x29, 0xc7, 0x9d, 0xec, 0xb6, 0x58, 0x02, + 0x99, 0xc3, 0x2d, 0x77, 0xcf, 0x95, 0x7b, 0x21, 0xba, 0xe0, + 0x0e, 0x54, 0x25, 0x7f, 0x91, 0xcb, 0x50, 0x0a, 0xe4, 0xbe, + 0x89, 0xd3, 0x3d, 0x67, 0xfc, 0xa6, 0x48, 0x12, 0x63, 0x39, + 0xd7, 0x8d, 0x16, 0x4c, 0xa2, 0xf8, 0x40, 0x1a, 0xf4, 0xae, + 0x35, 0x6f, 0x81, 0xdb, 0xaa, 0xf0, 0x1e, 0x44, 0xdf, 0x85, + 0x6b, 0x31, 0x05, 0x5f, 0xb1, 0xeb, 0x70, 0x2a, 0xc4, 0x9e, + 0xef, 0xb5, 0x5b, 0x01, 0x9a, 0xc0, 0x2e, 0x74, 0xcc, 0x96, + 0x78, 0x22, 0xb9, 0xe3, 0x0d, 0x57, 0x26, 0x7c, 0x92, 0xc8, + 0x53, 0x09, 0xe7, 0xbd, 0x8a, 0xd0, 0x3e, 0x64, 0xff, 0xa5, + 0x4b, 0x11, 0x60, 0x3a, 0xd4, 0x8e, 0x15, 0x4f, 0xa1, 0xfb, + 0x43, 0x19, 0xf7, 0xad, 0x36, 0x6c, 0x82, 0xd8, 0xa9, 0xf3, + 0x1d, 0x47, 0xdc, 0x86, 0x68, 0x32, 0x00, 0x5b, 0xb6, 0xed, + 0x71, 0x2a, 0xc7, 0x9c, 0xe2, 0xb9, 0x54, 0x0f, 0x93, 0xc8, + 0x25, 0x7e, 0xd9, 0x82, 0x6f, 0x34, 0xa8, 0xf3, 0x1e, 0x45, + 0x3b, 0x60, 0x8d, 0xd6, 0x4a, 0x11, 0xfc, 0xa7, 0xaf, 0xf4, + 0x19, 0x42, 0xde, 0x85, 0x68, 0x33, 0x4d, 0x16, 0xfb, 0xa0, + 0x3c, 0x67, 0x8a, 0xd1, 0x76, 0x2d, 0xc0, 0x9b, 0x07, 0x5c, + 0xb1, 0xea, 0x94, 0xcf, 0x22, 0x79, 0xe5, 0xbe, 0x53, 0x08, + 0x43, 0x18, 0xf5, 0xae, 0x32, 0x69, 0x84, 0xdf, 0xa1, 0xfa, + 0x17, 0x4c, 0xd0, 0x8b, 0x66, 0x3d, 0x9a, 0xc1, 0x2c, 0x77, + 0xeb, 0xb0, 0x5d, 0x06, 0x78, 0x23, 0xce, 0x95, 0x09, 0x52, + 0xbf, 0xe4, 0xec, 0xb7, 0x5a, 0x01, 0x9d, 0xc6, 0x2b, 0x70, + 0x0e, 0x55, 0xb8, 0xe3, 0x7f, 0x24, 0xc9, 0x92, 0x35, 0x6e, + 0x83, 0xd8, 0x44, 0x1f, 0xf2, 0xa9, 0xd7, 0x8c, 0x61, 0x3a, + 0xa6, 0xfd, 0x10, 0x4b, 0x86, 0xdd, 0x30, 0x6b, 0xf7, 0xac, + 0x41, 0x1a, 0x64, 0x3f, 0xd2, 0x89, 0x15, 0x4e, 0xa3, 0xf8, + 0x5f, 0x04, 0xe9, 0xb2, 0x2e, 0x75, 0x98, 0xc3, 0xbd, 0xe6, + 0x0b, 0x50, 0xcc, 0x97, 0x7a, 0x21, 0x29, 0x72, 0x9f, 0xc4, + 0x58, 0x03, 0xee, 0xb5, 0xcb, 0x90, 0x7d, 0x26, 0xba, 0xe1, + 0x0c, 0x57, 0xf0, 0xab, 0x46, 0x1d, 0x81, 0xda, 0x37, 0x6c, + 0x12, 0x49, 0xa4, 0xff, 0x63, 0x38, 0xd5, 0x8e, 0xc5, 0x9e, + 0x73, 0x28, 0xb4, 0xef, 0x02, 0x59, 0x27, 0x7c, 0x91, 0xca, + 0x56, 0x0d, 0xe0, 0xbb, 0x1c, 0x47, 0xaa, 0xf1, 0x6d, 0x36, + 0xdb, 0x80, 0xfe, 0xa5, 0x48, 0x13, 0x8f, 0xd4, 0x39, 0x62, + 0x6a, 0x31, 0xdc, 0x87, 0x1b, 0x40, 0xad, 0xf6, 0x88, 0xd3, + 0x3e, 0x65, 0xf9, 0xa2, 0x4f, 0x14, 0xb3, 0xe8, 0x05, 0x5e, + 0xc2, 0x99, 0x74, 0x2f, 0x51, 0x0a, 0xe7, 0xbc, 0x20, 0x7b, + 0x96, 0xcd, 0x00, 0x5c, 0xb8, 0xe4, 0x6d, 0x31, 0xd5, 0x89, + 0xda, 0x86, 0x62, 0x3e, 0xb7, 0xeb, 0x0f, 0x53, 0xa9, 0xf5, + 0x11, 0x4d, 0xc4, 0x98, 0x7c, 0x20, 0x73, 0x2f, 0xcb, 0x97, + 0x1e, 0x42, 0xa6, 0xfa, 0x4f, 0x13, 0xf7, 0xab, 0x22, 0x7e, + 0x9a, 0xc6, 0x95, 0xc9, 0x2d, 0x71, 0xf8, 0xa4, 0x40, 0x1c, + 0xe6, 0xba, 0x5e, 0x02, 0x8b, 0xd7, 0x33, 0x6f, 0x3c, 0x60, + 0x84, 0xd8, 0x51, 0x0d, 0xe9, 0xb5, 0x9e, 0xc2, 0x26, 0x7a, + 0xf3, 0xaf, 0x4b, 0x17, 0x44, 0x18, 0xfc, 0xa0, 0x29, 0x75, + 0x91, 0xcd, 0x37, 0x6b, 0x8f, 0xd3, 0x5a, 0x06, 0xe2, 0xbe, + 0xed, 0xb1, 0x55, 0x09, 0x80, 0xdc, 0x38, 0x64, 0xd1, 0x8d, + 0x69, 0x35, 0xbc, 0xe0, 0x04, 0x58, 0x0b, 0x57, 0xb3, 0xef, + 0x66, 0x3a, 0xde, 0x82, 0x78, 0x24, 0xc0, 0x9c, 0x15, 0x49, + 0xad, 0xf1, 0xa2, 0xfe, 0x1a, 0x46, 0xcf, 0x93, 0x77, 0x2b, + 0x21, 0x7d, 0x99, 0xc5, 0x4c, 0x10, 0xf4, 0xa8, 0xfb, 0xa7, + 0x43, 0x1f, 0x96, 0xca, 0x2e, 0x72, 0x88, 0xd4, 0x30, 0x6c, + 0xe5, 0xb9, 0x5d, 0x01, 0x52, 0x0e, 0xea, 0xb6, 0x3f, 0x63, + 0x87, 0xdb, 0x6e, 0x32, 0xd6, 0x8a, 0x03, 0x5f, 0xbb, 0xe7, + 0xb4, 0xe8, 0x0c, 0x50, 0xd9, 0x85, 0x61, 0x3d, 0xc7, 0x9b, + 0x7f, 0x23, 0xaa, 0xf6, 0x12, 0x4e, 0x1d, 0x41, 0xa5, 0xf9, + 0x70, 0x2c, 0xc8, 0x94, 0xbf, 0xe3, 0x07, 0x5b, 0xd2, 0x8e, + 0x6a, 0x36, 0x65, 0x39, 0xdd, 0x81, 0x08, 0x54, 0xb0, 0xec, + 0x16, 0x4a, 0xae, 0xf2, 0x7b, 0x27, 0xc3, 0x9f, 0xcc, 0x90, + 0x74, 0x28, 0xa1, 0xfd, 0x19, 0x45, 0xf0, 0xac, 0x48, 0x14, + 0x9d, 0xc1, 0x25, 0x79, 0x2a, 0x76, 0x92, 0xce, 0x47, 0x1b, + 0xff, 0xa3, 0x59, 0x05, 0xe1, 0xbd, 0x34, 0x68, 0x8c, 0xd0, + 0x83, 0xdf, 0x3b, 0x67, 0xee, 0xb2, 0x56, 0x0a, 0x00, 0x5d, + 0xba, 0xe7, 0x69, 0x34, 0xd3, 0x8e, 0xd2, 0x8f, 0x68, 0x35, + 0xbb, 0xe6, 0x01, 0x5c, 0xb9, 0xe4, 0x03, 0x5e, 0xd0, 0x8d, + 0x6a, 0x37, 0x6b, 0x36, 0xd1, 0x8c, 0x02, 0x5f, 0xb8, 0xe5, + 0x6f, 0x32, 0xd5, 0x88, 0x06, 0x5b, 0xbc, 0xe1, 0xbd, 0xe0, + 0x07, 0x5a, 0xd4, 0x89, 0x6e, 0x33, 0xd6, 0x8b, 0x6c, 0x31, + 0xbf, 0xe2, 0x05, 0x58, 0x04, 0x59, 0xbe, 0xe3, 0x6d, 0x30, + 0xd7, 0x8a, 0xde, 0x83, 0x64, 0x39, 0xb7, 0xea, 0x0d, 0x50, + 0x0c, 0x51, 0xb6, 0xeb, 0x65, 0x38, 0xdf, 0x82, 0x67, 0x3a, + 0xdd, 0x80, 0x0e, 0x53, 0xb4, 0xe9, 0xb5, 0xe8, 0x0f, 0x52, + 0xdc, 0x81, 0x66, 0x3b, 0xb1, 0xec, 0x0b, 0x56, 0xd8, 0x85, + 0x62, 0x3f, 0x63, 0x3e, 0xd9, 0x84, 0x0a, 0x57, 0xb0, 0xed, + 0x08, 0x55, 0xb2, 0xef, 0x61, 0x3c, 0xdb, 0x86, 0xda, 0x87, + 0x60, 0x3d, 0xb3, 0xee, 0x09, 0x54, 0xa1, 0xfc, 0x1b, 0x46, + 0xc8, 0x95, 0x72, 0x2f, 0x73, 0x2e, 0xc9, 0x94, 0x1a, 0x47, + 0xa0, 0xfd, 0x18, 0x45, 0xa2, 0xff, 0x71, 0x2c, 0xcb, 0x96, + 0xca, 0x97, 0x70, 0x2d, 0xa3, 0xfe, 0x19, 0x44, 0xce, 0x93, + 0x74, 0x29, 0xa7, 0xfa, 0x1d, 0x40, 0x1c, 0x41, 0xa6, 0xfb, + 0x75, 0x28, 0xcf, 0x92, 0x77, 0x2a, 0xcd, 0x90, 0x1e, 0x43, + 0xa4, 0xf9, 0xa5, 0xf8, 0x1f, 0x42, 0xcc, 0x91, 0x76, 0x2b, + 0x7f, 0x22, 0xc5, 0x98, 0x16, 0x4b, 0xac, 0xf1, 0xad, 0xf0, + 0x17, 0x4a, 0xc4, 0x99, 0x7e, 0x23, 0xc6, 0x9b, 0x7c, 0x21, + 0xaf, 0xf2, 0x15, 0x48, 0x14, 0x49, 0xae, 0xf3, 0x7d, 0x20, + 0xc7, 0x9a, 0x10, 0x4d, 0xaa, 0xf7, 0x79, 0x24, 0xc3, 0x9e, + 0xc2, 0x9f, 0x78, 0x25, 0xab, 0xf6, 0x11, 0x4c, 0xa9, 0xf4, + 0x13, 0x4e, 0xc0, 0x9d, 0x7a, 0x27, 0x7b, 0x26, 0xc1, 0x9c, + 0x12, 0x4f, 0xa8, 0xf5, 0x00, 0x5e, 0xbc, 0xe2, 0x65, 0x3b, + 0xd9, 0x87, 0xca, 0x94, 0x76, 0x28, 0xaf, 0xf1, 0x13, 0x4d, + 0x89, 0xd7, 0x35, 0x6b, 0xec, 0xb2, 0x50, 0x0e, 0x43, 0x1d, + 0xff, 0xa1, 0x26, 0x78, 0x9a, 0xc4, 0x0f, 0x51, 0xb3, 0xed, + 0x6a, 0x34, 0xd6, 0x88, 0xc5, 0x9b, 0x79, 0x27, 0xa0, 0xfe, + 0x1c, 0x42, 0x86, 0xd8, 0x3a, 0x64, 0xe3, 0xbd, 0x5f, 0x01, + 0x4c, 0x12, 0xf0, 0xae, 0x29, 0x77, 0x95, 0xcb, 0x1e, 0x40, + 0xa2, 0xfc, 0x7b, 0x25, 0xc7, 0x99, 0xd4, 0x8a, 0x68, 0x36, + 0xb1, 0xef, 0x0d, 0x53, 0x97, 0xc9, 0x2b, 0x75, 0xf2, 0xac, + 0x4e, 0x10, 0x5d, 0x03, 0xe1, 0xbf, 0x38, 0x66, 0x84, 0xda, + 0x11, 0x4f, 0xad, 0xf3, 0x74, 0x2a, 0xc8, 0x96, 0xdb, 0x85, + 0x67, 0x39, 0xbe, 0xe0, 0x02, 0x5c, 0x98, 0xc6, 0x24, 0x7a, + 0xfd, 0xa3, 0x41, 0x1f, 0x52, 0x0c, 0xee, 0xb0, 0x37, 0x69, + 0x8b, 0xd5, 0x3c, 0x62, 0x80, 0xde, 0x59, 0x07, 0xe5, 0xbb, + 0xf6, 0xa8, 0x4a, 0x14, 0x93, 0xcd, 0x2f, 0x71, 0xb5, 0xeb, + 0x09, 0x57, 0xd0, 0x8e, 0x6c, 0x32, 0x7f, 0x21, 0xc3, 0x9d, + 0x1a, 0x44, 0xa6, 0xf8, 0x33, 0x6d, 0x8f, 0xd1, 0x56, 0x08, + 0xea, 0xb4, 0xf9, 0xa7, 0x45, 0x1b, 0x9c, 0xc2, 0x20, 0x7e, + 0xba, 0xe4, 0x06, 0x58, 0xdf, 0x81, 0x63, 0x3d, 0x70, 0x2e, + 0xcc, 0x92, 0x15, 0x4b, 0xa9, 0xf7, 0x22, 0x7c, 0x9e, 0xc0, + 0x47, 0x19, 0xfb, 0xa5, 0xe8, 0xb6, 0x54, 0x0a, 0x8d, 0xd3, + 0x31, 0x6f, 0xab, 0xf5, 0x17, 0x49, 0xce, 0x90, 0x72, 0x2c, + 0x61, 0x3f, 0xdd, 0x83, 0x04, 0x5a, 0xb8, 0xe6, 0x2d, 0x73, + 0x91, 0xcf, 0x48, 0x16, 0xf4, 0xaa, 0xe7, 0xb9, 0x5b, 0x05, + 0x82, 0xdc, 0x3e, 0x60, 0xa4, 0xfa, 0x18, 0x46, 0xc1, 0x9f, + 0x7d, 0x23, 0x6e, 0x30, 0xd2, 0x8c, 0x0b, 0x55, 0xb7, 0xe9, + 0x00, 0x5f, 0xbe, 0xe1, 0x61, 0x3e, 0xdf, 0x80, 0xc2, 0x9d, + 0x7c, 0x23, 0xa3, 0xfc, 0x1d, 0x42, 0x99, 0xc6, 0x27, 0x78, + 0xf8, 0xa7, 0x46, 0x19, 0x5b, 0x04, 0xe5, 0xba, 0x3a, 0x65, + 0x84, 0xdb, 0x2f, 0x70, 0x91, 0xce, 0x4e, 0x11, 0xf0, 0xaf, + 0xed, 0xb2, 0x53, 0x0c, 0x8c, 0xd3, 0x32, 0x6d, 0xb6, 0xe9, + 0x08, 0x57, 0xd7, 0x88, 0x69, 0x36, 0x74, 0x2b, 0xca, 0x95, + 0x15, 0x4a, 0xab, 0xf4, 0x5e, 0x01, 0xe0, 0xbf, 0x3f, 0x60, + 0x81, 0xde, 0x9c, 0xc3, 0x22, 0x7d, 0xfd, 0xa2, 0x43, 0x1c, + 0xc7, 0x98, 0x79, 0x26, 0xa6, 0xf9, 0x18, 0x47, 0x05, 0x5a, + 0xbb, 0xe4, 0x64, 0x3b, 0xda, 0x85, 0x71, 0x2e, 0xcf, 0x90, + 0x10, 0x4f, 0xae, 0xf1, 0xb3, 0xec, 0x0d, 0x52, 0xd2, 0x8d, + 0x6c, 0x33, 0xe8, 0xb7, 0x56, 0x09, 0x89, 0xd6, 0x37, 0x68, + 0x2a, 0x75, 0x94, 0xcb, 0x4b, 0x14, 0xf5, 0xaa, 0xbc, 0xe3, + 0x02, 0x5d, 0xdd, 0x82, 0x63, 0x3c, 0x7e, 0x21, 0xc0, 0x9f, + 0x1f, 0x40, 0xa1, 0xfe, 0x25, 0x7a, 0x9b, 0xc4, 0x44, 0x1b, + 0xfa, 0xa5, 0xe7, 0xb8, 0x59, 0x06, 0x86, 0xd9, 0x38, 0x67, + 0x93, 0xcc, 0x2d, 0x72, 0xf2, 0xad, 0x4c, 0x13, 0x51, 0x0e, + 0xef, 0xb0, 0x30, 0x6f, 0x8e, 0xd1, 0x0a, 0x55, 0xb4, 0xeb, + 0x6b, 0x34, 0xd5, 0x8a, 0xc8, 0x97, 0x76, 0x29, 0xa9, 0xf6, + 0x17, 0x48, 0xe2, 0xbd, 0x5c, 0x03, 0x83, 0xdc, 0x3d, 0x62, + 0x20, 0x7f, 0x9e, 0xc1, 0x41, 0x1e, 0xff, 0xa0, 0x7b, 0x24, + 0xc5, 0x9a, 0x1a, 0x45, 0xa4, 0xfb, 0xb9, 0xe6, 0x07, 0x58, + 0xd8, 0x87, 0x66, 0x39, 0xcd, 0x92, 0x73, 0x2c, 0xac, 0xf3, + 0x12, 0x4d, 0x0f, 0x50, 0xb1, 0xee, 0x6e, 0x31, 0xd0, 0x8f, + 0x54, 0x0b, 0xea, 0xb5, 0x35, 0x6a, 0x8b, 0xd4, 0x96, 0xc9, + 0x28, 0x77, 0xf7, 0xa8, 0x49, 0x16, 0x00, 0x60, 0xc0, 0xa0, + 0x9d, 0xfd, 0x5d, 0x3d, 0x27, 0x47, 0xe7, 0x87, 0xba, 0xda, + 0x7a, 0x1a, 0x4e, 0x2e, 0x8e, 0xee, 0xd3, 0xb3, 0x13, 0x73, + 0x69, 0x09, 0xa9, 0xc9, 0xf4, 0x94, 0x34, 0x54, 0x9c, 0xfc, + 0x5c, 0x3c, 0x01, 0x61, 0xc1, 0xa1, 0xbb, 0xdb, 0x7b, 0x1b, + 0x26, 0x46, 0xe6, 0x86, 0xd2, 0xb2, 0x12, 0x72, 0x4f, 0x2f, + 0x8f, 0xef, 0xf5, 0x95, 0x35, 0x55, 0x68, 0x08, 0xa8, 0xc8, + 0x25, 0x45, 0xe5, 0x85, 0xb8, 0xd8, 0x78, 0x18, 0x02, 0x62, + 0xc2, 0xa2, 0x9f, 0xff, 0x5f, 0x3f, 0x6b, 0x0b, 0xab, 0xcb, + 0xf6, 0x96, 0x36, 0x56, 0x4c, 0x2c, 0x8c, 0xec, 0xd1, 0xb1, + 0x11, 0x71, 0xb9, 0xd9, 0x79, 0x19, 0x24, 0x44, 0xe4, 0x84, + 0x9e, 0xfe, 0x5e, 0x3e, 0x03, 0x63, 0xc3, 0xa3, 0xf7, 0x97, + 0x37, 0x57, 0x6a, 0x0a, 0xaa, 0xca, 0xd0, 0xb0, 0x10, 0x70, + 0x4d, 0x2d, 0x8d, 0xed, 0x4a, 0x2a, 0x8a, 0xea, 0xd7, 0xb7, + 0x17, 0x77, 0x6d, 0x0d, 0xad, 0xcd, 0xf0, 0x90, 0x30, 0x50, + 0x04, 0x64, 0xc4, 0xa4, 0x99, 0xf9, 0x59, 0x39, 0x23, 0x43, + 0xe3, 0x83, 0xbe, 0xde, 0x7e, 0x1e, 0xd6, 0xb6, 0x16, 0x76, + 0x4b, 0x2b, 0x8b, 0xeb, 0xf1, 0x91, 0x31, 0x51, 0x6c, 0x0c, + 0xac, 0xcc, 0x98, 0xf8, 0x58, 0x38, 0x05, 0x65, 0xc5, 0xa5, + 0xbf, 0xdf, 0x7f, 0x1f, 0x22, 0x42, 0xe2, 0x82, 0x6f, 0x0f, + 0xaf, 0xcf, 0xf2, 0x92, 0x32, 0x52, 0x48, 0x28, 0x88, 0xe8, + 0xd5, 0xb5, 0x15, 0x75, 0x21, 0x41, 0xe1, 0x81, 0xbc, 0xdc, + 0x7c, 0x1c, 0x06, 0x66, 0xc6, 0xa6, 0x9b, 0xfb, 0x5b, 0x3b, + 0xf3, 0x93, 0x33, 0x53, 0x6e, 0x0e, 0xae, 0xce, 0xd4, 0xb4, + 0x14, 0x74, 0x49, 0x29, 0x89, 0xe9, 0xbd, 0xdd, 0x7d, 0x1d, + 0x20, 0x40, 0xe0, 0x80, 0x9a, 0xfa, 0x5a, 0x3a, 0x07, 0x67, + 0xc7, 0xa7, 0x00, 0x61, 0xc2, 0xa3, 0x99, 0xf8, 0x5b, 0x3a, + 0x2f, 0x4e, 0xed, 0x8c, 0xb6, 0xd7, 0x74, 0x15, 0x5e, 0x3f, + 0x9c, 0xfd, 0xc7, 0xa6, 0x05, 0x64, 0x71, 0x10, 0xb3, 0xd2, + 0xe8, 0x89, 0x2a, 0x4b, 0xbc, 0xdd, 0x7e, 0x1f, 0x25, 0x44, + 0xe7, 0x86, 0x93, 0xf2, 0x51, 0x30, 0x0a, 0x6b, 0xc8, 0xa9, + 0xe2, 0x83, 0x20, 0x41, 0x7b, 0x1a, 0xb9, 0xd8, 0xcd, 0xac, + 0x0f, 0x6e, 0x54, 0x35, 0x96, 0xf7, 0x65, 0x04, 0xa7, 0xc6, + 0xfc, 0x9d, 0x3e, 0x5f, 0x4a, 0x2b, 0x88, 0xe9, 0xd3, 0xb2, + 0x11, 0x70, 0x3b, 0x5a, 0xf9, 0x98, 0xa2, 0xc3, 0x60, 0x01, + 0x14, 0x75, 0xd6, 0xb7, 0x8d, 0xec, 0x4f, 0x2e, 0xd9, 0xb8, + 0x1b, 0x7a, 0x40, 0x21, 0x82, 0xe3, 0xf6, 0x97, 0x34, 0x55, + 0x6f, 0x0e, 0xad, 0xcc, 0x87, 0xe6, 0x45, 0x24, 0x1e, 0x7f, + 0xdc, 0xbd, 0xa8, 0xc9, 0x6a, 0x0b, 0x31, 0x50, 0xf3, 0x92, + 0xca, 0xab, 0x08, 0x69, 0x53, 0x32, 0x91, 0xf0, 0xe5, 0x84, + 0x27, 0x46, 0x7c, 0x1d, 0xbe, 0xdf, 0x94, 0xf5, 0x56, 0x37, + 0x0d, 0x6c, 0xcf, 0xae, 0xbb, 0xda, 0x79, 0x18, 0x22, 0x43, + 0xe0, 0x81, 0x76, 0x17, 0xb4, 0xd5, 0xef, 0x8e, 0x2d, 0x4c, + 0x59, 0x38, 0x9b, 0xfa, 0xc0, 0xa1, 0x02, 0x63, 0x28, 0x49, + 0xea, 0x8b, 0xb1, 0xd0, 0x73, 0x12, 0x07, 0x66, 0xc5, 0xa4, + 0x9e, 0xff, 0x5c, 0x3d, 0xaf, 0xce, 0x6d, 0x0c, 0x36, 0x57, + 0xf4, 0x95, 0x80, 0xe1, 0x42, 0x23, 0x19, 0x78, 0xdb, 0xba, + 0xf1, 0x90, 0x33, 0x52, 0x68, 0x09, 0xaa, 0xcb, 0xde, 0xbf, + 0x1c, 0x7d, 0x47, 0x26, 0x85, 0xe4, 0x13, 0x72, 0xd1, 0xb0, + 0x8a, 0xeb, 0x48, 0x29, 0x3c, 0x5d, 0xfe, 0x9f, 0xa5, 0xc4, + 0x67, 0x06, 0x4d, 0x2c, 0x8f, 0xee, 0xd4, 0xb5, 0x16, 0x77, + 0x62, 0x03, 0xa0, 0xc1, 0xfb, 0x9a, 0x39, 0x58, 0x00, 0x62, + 0xc4, 0xa6, 0x95, 0xf7, 0x51, 0x33, 0x37, 0x55, 0xf3, 0x91, + 0xa2, 0xc0, 0x66, 0x04, 0x6e, 0x0c, 0xaa, 0xc8, 0xfb, 0x99, + 0x3f, 0x5d, 0x59, 0x3b, 0x9d, 0xff, 0xcc, 0xae, 0x08, 0x6a, + 0xdc, 0xbe, 0x18, 0x7a, 0x49, 0x2b, 0x8d, 0xef, 0xeb, 0x89, + 0x2f, 0x4d, 0x7e, 0x1c, 0xba, 0xd8, 0xb2, 0xd0, 0x76, 0x14, + 0x27, 0x45, 0xe3, 0x81, 0x85, 0xe7, 0x41, 0x23, 0x10, 0x72, + 0xd4, 0xb6, 0xa5, 0xc7, 0x61, 0x03, 0x30, 0x52, 0xf4, 0x96, + 0x92, 0xf0, 0x56, 0x34, 0x07, 0x65, 0xc3, 0xa1, 0xcb, 0xa9, + 0x0f, 0x6d, 0x5e, 0x3c, 0x9a, 0xf8, 0xfc, 0x9e, 0x38, 0x5a, + 0x69, 0x0b, 0xad, 0xcf, 0x79, 0x1b, 0xbd, 0xdf, 0xec, 0x8e, + 0x28, 0x4a, 0x4e, 0x2c, 0x8a, 0xe8, 0xdb, 0xb9, 0x1f, 0x7d, + 0x17, 0x75, 0xd3, 0xb1, 0x82, 0xe0, 0x46, 0x24, 0x20, 0x42, + 0xe4, 0x86, 0xb5, 0xd7, 0x71, 0x13, 0x57, 0x35, 0x93, 0xf1, + 0xc2, 0xa0, 0x06, 0x64, 0x60, 0x02, 0xa4, 0xc6, 0xf5, 0x97, + 0x31, 0x53, 0x39, 0x5b, 0xfd, 0x9f, 0xac, 0xce, 0x68, 0x0a, + 0x0e, 0x6c, 0xca, 0xa8, 0x9b, 0xf9, 0x5f, 0x3d, 0x8b, 0xe9, + 0x4f, 0x2d, 0x1e, 0x7c, 0xda, 0xb8, 0xbc, 0xde, 0x78, 0x1a, + 0x29, 0x4b, 0xed, 0x8f, 0xe5, 0x87, 0x21, 0x43, 0x70, 0x12, + 0xb4, 0xd6, 0xd2, 0xb0, 0x16, 0x74, 0x47, 0x25, 0x83, 0xe1, + 0xf2, 0x90, 0x36, 0x54, 0x67, 0x05, 0xa3, 0xc1, 0xc5, 0xa7, + 0x01, 0x63, 0x50, 0x32, 0x94, 0xf6, 0x9c, 0xfe, 0x58, 0x3a, + 0x09, 0x6b, 0xcd, 0xaf, 0xab, 0xc9, 0x6f, 0x0d, 0x3e, 0x5c, + 0xfa, 0x98, 0x2e, 0x4c, 0xea, 0x88, 0xbb, 0xd9, 0x7f, 0x1d, + 0x19, 0x7b, 0xdd, 0xbf, 0x8c, 0xee, 0x48, 0x2a, 0x40, 0x22, + 0x84, 0xe6, 0xd5, 0xb7, 0x11, 0x73, 0x77, 0x15, 0xb3, 0xd1, + 0xe2, 0x80, 0x26, 0x44, 0x00, 0x63, 0xc6, 0xa5, 0x91, 0xf2, + 0x57, 0x34, 0x3f, 0x5c, 0xf9, 0x9a, 0xae, 0xcd, 0x68, 0x0b, + 0x7e, 0x1d, 0xb8, 0xdb, 0xef, 0x8c, 0x29, 0x4a, 0x41, 0x22, + 0x87, 0xe4, 0xd0, 0xb3, 0x16, 0x75, 0xfc, 0x9f, 0x3a, 0x59, + 0x6d, 0x0e, 0xab, 0xc8, 0xc3, 0xa0, 0x05, 0x66, 0x52, 0x31, + 0x94, 0xf7, 0x82, 0xe1, 0x44, 0x27, 0x13, 0x70, 0xd5, 0xb6, + 0xbd, 0xde, 0x7b, 0x18, 0x2c, 0x4f, 0xea, 0x89, 0xe5, 0x86, + 0x23, 0x40, 0x74, 0x17, 0xb2, 0xd1, 0xda, 0xb9, 0x1c, 0x7f, + 0x4b, 0x28, 0x8d, 0xee, 0x9b, 0xf8, 0x5d, 0x3e, 0x0a, 0x69, + 0xcc, 0xaf, 0xa4, 0xc7, 0x62, 0x01, 0x35, 0x56, 0xf3, 0x90, + 0x19, 0x7a, 0xdf, 0xbc, 0x88, 0xeb, 0x4e, 0x2d, 0x26, 0x45, + 0xe0, 0x83, 0xb7, 0xd4, 0x71, 0x12, 0x67, 0x04, 0xa1, 0xc2, + 0xf6, 0x95, 0x30, 0x53, 0x58, 0x3b, 0x9e, 0xfd, 0xc9, 0xaa, + 0x0f, 0x6c, 0xd7, 0xb4, 0x11, 0x72, 0x46, 0x25, 0x80, 0xe3, + 0xe8, 0x8b, 0x2e, 0x4d, 0x79, 0x1a, 0xbf, 0xdc, 0xa9, 0xca, + 0x6f, 0x0c, 0x38, 0x5b, 0xfe, 0x9d, 0x96, 0xf5, 0x50, 0x33, + 0x07, 0x64, 0xc1, 0xa2, 0x2b, 0x48, 0xed, 0x8e, 0xba, 0xd9, + 0x7c, 0x1f, 0x14, 0x77, 0xd2, 0xb1, 0x85, 0xe6, 0x43, 0x20, + 0x55, 0x36, 0x93, 0xf0, 0xc4, 0xa7, 0x02, 0x61, 0x6a, 0x09, + 0xac, 0xcf, 0xfb, 0x98, 0x3d, 0x5e, 0x32, 0x51, 0xf4, 0x97, + 0xa3, 0xc0, 0x65, 0x06, 0x0d, 0x6e, 0xcb, 0xa8, 0x9c, 0xff, + 0x5a, 0x39, 0x4c, 0x2f, 0x8a, 0xe9, 0xdd, 0xbe, 0x1b, 0x78, + 0x73, 0x10, 0xb5, 0xd6, 0xe2, 0x81, 0x24, 0x47, 0xce, 0xad, + 0x08, 0x6b, 0x5f, 0x3c, 0x99, 0xfa, 0xf1, 0x92, 0x37, 0x54, + 0x60, 0x03, 0xa6, 0xc5, 0xb0, 0xd3, 0x76, 0x15, 0x21, 0x42, + 0xe7, 0x84, 0x8f, 0xec, 0x49, 0x2a, 0x1e, 0x7d, 0xd8, 0xbb, + 0x00, 0x64, 0xc8, 0xac, 0x8d, 0xe9, 0x45, 0x21, 0x07, 0x63, + 0xcf, 0xab, 0x8a, 0xee, 0x42, 0x26, 0x0e, 0x6a, 0xc6, 0xa2, + 0x83, 0xe7, 0x4b, 0x2f, 0x09, 0x6d, 0xc1, 0xa5, 0x84, 0xe0, + 0x4c, 0x28, 0x1c, 0x78, 0xd4, 0xb0, 0x91, 0xf5, 0x59, 0x3d, + 0x1b, 0x7f, 0xd3, 0xb7, 0x96, 0xf2, 0x5e, 0x3a, 0x12, 0x76, + 0xda, 0xbe, 0x9f, 0xfb, 0x57, 0x33, 0x15, 0x71, 0xdd, 0xb9, + 0x98, 0xfc, 0x50, 0x34, 0x38, 0x5c, 0xf0, 0x94, 0xb5, 0xd1, + 0x7d, 0x19, 0x3f, 0x5b, 0xf7, 0x93, 0xb2, 0xd6, 0x7a, 0x1e, + 0x36, 0x52, 0xfe, 0x9a, 0xbb, 0xdf, 0x73, 0x17, 0x31, 0x55, + 0xf9, 0x9d, 0xbc, 0xd8, 0x74, 0x10, 0x24, 0x40, 0xec, 0x88, + 0xa9, 0xcd, 0x61, 0x05, 0x23, 0x47, 0xeb, 0x8f, 0xae, 0xca, + 0x66, 0x02, 0x2a, 0x4e, 0xe2, 0x86, 0xa7, 0xc3, 0x6f, 0x0b, + 0x2d, 0x49, 0xe5, 0x81, 0xa0, 0xc4, 0x68, 0x0c, 0x70, 0x14, + 0xb8, 0xdc, 0xfd, 0x99, 0x35, 0x51, 0x77, 0x13, 0xbf, 0xdb, + 0xfa, 0x9e, 0x32, 0x56, 0x7e, 0x1a, 0xb6, 0xd2, 0xf3, 0x97, + 0x3b, 0x5f, 0x79, 0x1d, 0xb1, 0xd5, 0xf4, 0x90, 0x3c, 0x58, + 0x6c, 0x08, 0xa4, 0xc0, 0xe1, 0x85, 0x29, 0x4d, 0x6b, 0x0f, + 0xa3, 0xc7, 0xe6, 0x82, 0x2e, 0x4a, 0x62, 0x06, 0xaa, 0xce, + 0xef, 0x8b, 0x27, 0x43, 0x65, 0x01, 0xad, 0xc9, 0xe8, 0x8c, + 0x20, 0x44, 0x48, 0x2c, 0x80, 0xe4, 0xc5, 0xa1, 0x0d, 0x69, + 0x4f, 0x2b, 0x87, 0xe3, 0xc2, 0xa6, 0x0a, 0x6e, 0x46, 0x22, + 0x8e, 0xea, 0xcb, 0xaf, 0x03, 0x67, 0x41, 0x25, 0x89, 0xed, + 0xcc, 0xa8, 0x04, 0x60, 0x54, 0x30, 0x9c, 0xf8, 0xd9, 0xbd, + 0x11, 0x75, 0x53, 0x37, 0x9b, 0xff, 0xde, 0xba, 0x16, 0x72, + 0x5a, 0x3e, 0x92, 0xf6, 0xd7, 0xb3, 0x1f, 0x7b, 0x5d, 0x39, + 0x95, 0xf1, 0xd0, 0xb4, 0x18, 0x7c, 0x00, 0x65, 0xca, 0xaf, + 0x89, 0xec, 0x43, 0x26, 0x0f, 0x6a, 0xc5, 0xa0, 0x86, 0xe3, + 0x4c, 0x29, 0x1e, 0x7b, 0xd4, 0xb1, 0x97, 0xf2, 0x5d, 0x38, + 0x11, 0x74, 0xdb, 0xbe, 0x98, 0xfd, 0x52, 0x37, 0x3c, 0x59, + 0xf6, 0x93, 0xb5, 0xd0, 0x7f, 0x1a, 0x33, 0x56, 0xf9, 0x9c, + 0xba, 0xdf, 0x70, 0x15, 0x22, 0x47, 0xe8, 0x8d, 0xab, 0xce, + 0x61, 0x04, 0x2d, 0x48, 0xe7, 0x82, 0xa4, 0xc1, 0x6e, 0x0b, + 0x78, 0x1d, 0xb2, 0xd7, 0xf1, 0x94, 0x3b, 0x5e, 0x77, 0x12, + 0xbd, 0xd8, 0xfe, 0x9b, 0x34, 0x51, 0x66, 0x03, 0xac, 0xc9, + 0xef, 0x8a, 0x25, 0x40, 0x69, 0x0c, 0xa3, 0xc6, 0xe0, 0x85, + 0x2a, 0x4f, 0x44, 0x21, 0x8e, 0xeb, 0xcd, 0xa8, 0x07, 0x62, + 0x4b, 0x2e, 0x81, 0xe4, 0xc2, 0xa7, 0x08, 0x6d, 0x5a, 0x3f, + 0x90, 0xf5, 0xd3, 0xb6, 0x19, 0x7c, 0x55, 0x30, 0x9f, 0xfa, + 0xdc, 0xb9, 0x16, 0x73, 0xf0, 0x95, 0x3a, 0x5f, 0x79, 0x1c, + 0xb3, 0xd6, 0xff, 0x9a, 0x35, 0x50, 0x76, 0x13, 0xbc, 0xd9, + 0xee, 0x8b, 0x24, 0x41, 0x67, 0x02, 0xad, 0xc8, 0xe1, 0x84, + 0x2b, 0x4e, 0x68, 0x0d, 0xa2, 0xc7, 0xcc, 0xa9, 0x06, 0x63, + 0x45, 0x20, 0x8f, 0xea, 0xc3, 0xa6, 0x09, 0x6c, 0x4a, 0x2f, + 0x80, 0xe5, 0xd2, 0xb7, 0x18, 0x7d, 0x5b, 0x3e, 0x91, 0xf4, + 0xdd, 0xb8, 0x17, 0x72, 0x54, 0x31, 0x9e, 0xfb, 0x88, 0xed, + 0x42, 0x27, 0x01, 0x64, 0xcb, 0xae, 0x87, 0xe2, 0x4d, 0x28, + 0x0e, 0x6b, 0xc4, 0xa1, 0x96, 0xf3, 0x5c, 0x39, 0x1f, 0x7a, + 0xd5, 0xb0, 0x99, 0xfc, 0x53, 0x36, 0x10, 0x75, 0xda, 0xbf, + 0xb4, 0xd1, 0x7e, 0x1b, 0x3d, 0x58, 0xf7, 0x92, 0xbb, 0xde, + 0x71, 0x14, 0x32, 0x57, 0xf8, 0x9d, 0xaa, 0xcf, 0x60, 0x05, + 0x23, 0x46, 0xe9, 0x8c, 0xa5, 0xc0, 0x6f, 0x0a, 0x2c, 0x49, + 0xe6, 0x83, 0x00, 0x66, 0xcc, 0xaa, 0x85, 0xe3, 0x49, 0x2f, + 0x17, 0x71, 0xdb, 0xbd, 0x92, 0xf4, 0x5e, 0x38, 0x2e, 0x48, + 0xe2, 0x84, 0xab, 0xcd, 0x67, 0x01, 0x39, 0x5f, 0xf5, 0x93, + 0xbc, 0xda, 0x70, 0x16, 0x5c, 0x3a, 0x90, 0xf6, 0xd9, 0xbf, + 0x15, 0x73, 0x4b, 0x2d, 0x87, 0xe1, 0xce, 0xa8, 0x02, 0x64, + 0x72, 0x14, 0xbe, 0xd8, 0xf7, 0x91, 0x3b, 0x5d, 0x65, 0x03, + 0xa9, 0xcf, 0xe0, 0x86, 0x2c, 0x4a, 0xb8, 0xde, 0x74, 0x12, + 0x3d, 0x5b, 0xf1, 0x97, 0xaf, 0xc9, 0x63, 0x05, 0x2a, 0x4c, + 0xe6, 0x80, 0x96, 0xf0, 0x5a, 0x3c, 0x13, 0x75, 0xdf, 0xb9, + 0x81, 0xe7, 0x4d, 0x2b, 0x04, 0x62, 0xc8, 0xae, 0xe4, 0x82, + 0x28, 0x4e, 0x61, 0x07, 0xad, 0xcb, 0xf3, 0x95, 0x3f, 0x59, + 0x76, 0x10, 0xba, 0xdc, 0xca, 0xac, 0x06, 0x60, 0x4f, 0x29, + 0x83, 0xe5, 0xdd, 0xbb, 0x11, 0x77, 0x58, 0x3e, 0x94, 0xf2, + 0x6d, 0x0b, 0xa1, 0xc7, 0xe8, 0x8e, 0x24, 0x42, 0x7a, 0x1c, + 0xb6, 0xd0, 0xff, 0x99, 0x33, 0x55, 0x43, 0x25, 0x8f, 0xe9, + 0xc6, 0xa0, 0x0a, 0x6c, 0x54, 0x32, 0x98, 0xfe, 0xd1, 0xb7, + 0x1d, 0x7b, 0x31, 0x57, 0xfd, 0x9b, 0xb4, 0xd2, 0x78, 0x1e, + 0x26, 0x40, 0xea, 0x8c, 0xa3, 0xc5, 0x6f, 0x09, 0x1f, 0x79, + 0xd3, 0xb5, 0x9a, 0xfc, 0x56, 0x30, 0x08, 0x6e, 0xc4, 0xa2, + 0x8d, 0xeb, 0x41, 0x27, 0xd5, 0xb3, 0x19, 0x7f, 0x50, 0x36, + 0x9c, 0xfa, 0xc2, 0xa4, 0x0e, 0x68, 0x47, 0x21, 0x8b, 0xed, + 0xfb, 0x9d, 0x37, 0x51, 0x7e, 0x18, 0xb2, 0xd4, 0xec, 0x8a, + 0x20, 0x46, 0x69, 0x0f, 0xa5, 0xc3, 0x89, 0xef, 0x45, 0x23, + 0x0c, 0x6a, 0xc0, 0xa6, 0x9e, 0xf8, 0x52, 0x34, 0x1b, 0x7d, + 0xd7, 0xb1, 0xa7, 0xc1, 0x6b, 0x0d, 0x22, 0x44, 0xee, 0x88, + 0xb0, 0xd6, 0x7c, 0x1a, 0x35, 0x53, 0xf9, 0x9f, 0x00, 0x67, + 0xce, 0xa9, 0x81, 0xe6, 0x4f, 0x28, 0x1f, 0x78, 0xd1, 0xb6, + 0x9e, 0xf9, 0x50, 0x37, 0x3e, 0x59, 0xf0, 0x97, 0xbf, 0xd8, + 0x71, 0x16, 0x21, 0x46, 0xef, 0x88, 0xa0, 0xc7, 0x6e, 0x09, + 0x7c, 0x1b, 0xb2, 0xd5, 0xfd, 0x9a, 0x33, 0x54, 0x63, 0x04, + 0xad, 0xca, 0xe2, 0x85, 0x2c, 0x4b, 0x42, 0x25, 0x8c, 0xeb, + 0xc3, 0xa4, 0x0d, 0x6a, 0x5d, 0x3a, 0x93, 0xf4, 0xdc, 0xbb, + 0x12, 0x75, 0xf8, 0x9f, 0x36, 0x51, 0x79, 0x1e, 0xb7, 0xd0, + 0xe7, 0x80, 0x29, 0x4e, 0x66, 0x01, 0xa8, 0xcf, 0xc6, 0xa1, + 0x08, 0x6f, 0x47, 0x20, 0x89, 0xee, 0xd9, 0xbe, 0x17, 0x70, + 0x58, 0x3f, 0x96, 0xf1, 0x84, 0xe3, 0x4a, 0x2d, 0x05, 0x62, + 0xcb, 0xac, 0x9b, 0xfc, 0x55, 0x32, 0x1a, 0x7d, 0xd4, 0xb3, + 0xba, 0xdd, 0x74, 0x13, 0x3b, 0x5c, 0xf5, 0x92, 0xa5, 0xc2, + 0x6b, 0x0c, 0x24, 0x43, 0xea, 0x8d, 0xed, 0x8a, 0x23, 0x44, + 0x6c, 0x0b, 0xa2, 0xc5, 0xf2, 0x95, 0x3c, 0x5b, 0x73, 0x14, + 0xbd, 0xda, 0xd3, 0xb4, 0x1d, 0x7a, 0x52, 0x35, 0x9c, 0xfb, + 0xcc, 0xab, 0x02, 0x65, 0x4d, 0x2a, 0x83, 0xe4, 0x91, 0xf6, + 0x5f, 0x38, 0x10, 0x77, 0xde, 0xb9, 0x8e, 0xe9, 0x40, 0x27, + 0x0f, 0x68, 0xc1, 0xa6, 0xaf, 0xc8, 0x61, 0x06, 0x2e, 0x49, + 0xe0, 0x87, 0xb0, 0xd7, 0x7e, 0x19, 0x31, 0x56, 0xff, 0x98, + 0x15, 0x72, 0xdb, 0xbc, 0x94, 0xf3, 0x5a, 0x3d, 0x0a, 0x6d, + 0xc4, 0xa3, 0x8b, 0xec, 0x45, 0x22, 0x2b, 0x4c, 0xe5, 0x82, + 0xaa, 0xcd, 0x64, 0x03, 0x34, 0x53, 0xfa, 0x9d, 0xb5, 0xd2, + 0x7b, 0x1c, 0x69, 0x0e, 0xa7, 0xc0, 0xe8, 0x8f, 0x26, 0x41, + 0x76, 0x11, 0xb8, 0xdf, 0xf7, 0x90, 0x39, 0x5e, 0x57, 0x30, + 0x99, 0xfe, 0xd6, 0xb1, 0x18, 0x7f, 0x48, 0x2f, 0x86, 0xe1, + 0xc9, 0xae, 0x07, 0x60, 0x00, 0x68, 0xd0, 0xb8, 0xbd, 0xd5, + 0x6d, 0x05, 0x67, 0x0f, 0xb7, 0xdf, 0xda, 0xb2, 0x0a, 0x62, + 0xce, 0xa6, 0x1e, 0x76, 0x73, 0x1b, 0xa3, 0xcb, 0xa9, 0xc1, + 0x79, 0x11, 0x14, 0x7c, 0xc4, 0xac, 0x81, 0xe9, 0x51, 0x39, + 0x3c, 0x54, 0xec, 0x84, 0xe6, 0x8e, 0x36, 0x5e, 0x5b, 0x33, + 0x8b, 0xe3, 0x4f, 0x27, 0x9f, 0xf7, 0xf2, 0x9a, 0x22, 0x4a, + 0x28, 0x40, 0xf8, 0x90, 0x95, 0xfd, 0x45, 0x2d, 0x1f, 0x77, + 0xcf, 0xa7, 0xa2, 0xca, 0x72, 0x1a, 0x78, 0x10, 0xa8, 0xc0, + 0xc5, 0xad, 0x15, 0x7d, 0xd1, 0xb9, 0x01, 0x69, 0x6c, 0x04, + 0xbc, 0xd4, 0xb6, 0xde, 0x66, 0x0e, 0x0b, 0x63, 0xdb, 0xb3, + 0x9e, 0xf6, 0x4e, 0x26, 0x23, 0x4b, 0xf3, 0x9b, 0xf9, 0x91, + 0x29, 0x41, 0x44, 0x2c, 0x94, 0xfc, 0x50, 0x38, 0x80, 0xe8, + 0xed, 0x85, 0x3d, 0x55, 0x37, 0x5f, 0xe7, 0x8f, 0x8a, 0xe2, + 0x5a, 0x32, 0x3e, 0x56, 0xee, 0x86, 0x83, 0xeb, 0x53, 0x3b, + 0x59, 0x31, 0x89, 0xe1, 0xe4, 0x8c, 0x34, 0x5c, 0xf0, 0x98, + 0x20, 0x48, 0x4d, 0x25, 0x9d, 0xf5, 0x97, 0xff, 0x47, 0x2f, + 0x2a, 0x42, 0xfa, 0x92, 0xbf, 0xd7, 0x6f, 0x07, 0x02, 0x6a, + 0xd2, 0xba, 0xd8, 0xb0, 0x08, 0x60, 0x65, 0x0d, 0xb5, 0xdd, + 0x71, 0x19, 0xa1, 0xc9, 0xcc, 0xa4, 0x1c, 0x74, 0x16, 0x7e, + 0xc6, 0xae, 0xab, 0xc3, 0x7b, 0x13, 0x21, 0x49, 0xf1, 0x99, + 0x9c, 0xf4, 0x4c, 0x24, 0x46, 0x2e, 0x96, 0xfe, 0xfb, 0x93, + 0x2b, 0x43, 0xef, 0x87, 0x3f, 0x57, 0x52, 0x3a, 0x82, 0xea, + 0x88, 0xe0, 0x58, 0x30, 0x35, 0x5d, 0xe5, 0x8d, 0xa0, 0xc8, + 0x70, 0x18, 0x1d, 0x75, 0xcd, 0xa5, 0xc7, 0xaf, 0x17, 0x7f, + 0x7a, 0x12, 0xaa, 0xc2, 0x6e, 0x06, 0xbe, 0xd6, 0xd3, 0xbb, + 0x03, 0x6b, 0x09, 0x61, 0xd9, 0xb1, 0xb4, 0xdc, 0x64, 0x0c, + 0x00, 0x69, 0xd2, 0xbb, 0xb9, 0xd0, 0x6b, 0x02, 0x6f, 0x06, + 0xbd, 0xd4, 0xd6, 0xbf, 0x04, 0x6d, 0xde, 0xb7, 0x0c, 0x65, + 0x67, 0x0e, 0xb5, 0xdc, 0xb1, 0xd8, 0x63, 0x0a, 0x08, 0x61, + 0xda, 0xb3, 0xa1, 0xc8, 0x73, 0x1a, 0x18, 0x71, 0xca, 0xa3, + 0xce, 0xa7, 0x1c, 0x75, 0x77, 0x1e, 0xa5, 0xcc, 0x7f, 0x16, + 0xad, 0xc4, 0xc6, 0xaf, 0x14, 0x7d, 0x10, 0x79, 0xc2, 0xab, + 0xa9, 0xc0, 0x7b, 0x12, 0x5f, 0x36, 0x8d, 0xe4, 0xe6, 0x8f, + 0x34, 0x5d, 0x30, 0x59, 0xe2, 0x8b, 0x89, 0xe0, 0x5b, 0x32, + 0x81, 0xe8, 0x53, 0x3a, 0x38, 0x51, 0xea, 0x83, 0xee, 0x87, + 0x3c, 0x55, 0x57, 0x3e, 0x85, 0xec, 0xfe, 0x97, 0x2c, 0x45, + 0x47, 0x2e, 0x95, 0xfc, 0x91, 0xf8, 0x43, 0x2a, 0x28, 0x41, + 0xfa, 0x93, 0x20, 0x49, 0xf2, 0x9b, 0x99, 0xf0, 0x4b, 0x22, + 0x4f, 0x26, 0x9d, 0xf4, 0xf6, 0x9f, 0x24, 0x4d, 0xbe, 0xd7, + 0x6c, 0x05, 0x07, 0x6e, 0xd5, 0xbc, 0xd1, 0xb8, 0x03, 0x6a, + 0x68, 0x01, 0xba, 0xd3, 0x60, 0x09, 0xb2, 0xdb, 0xd9, 0xb0, + 0x0b, 0x62, 0x0f, 0x66, 0xdd, 0xb4, 0xb6, 0xdf, 0x64, 0x0d, + 0x1f, 0x76, 0xcd, 0xa4, 0xa6, 0xcf, 0x74, 0x1d, 0x70, 0x19, + 0xa2, 0xcb, 0xc9, 0xa0, 0x1b, 0x72, 0xc1, 0xa8, 0x13, 0x7a, + 0x78, 0x11, 0xaa, 0xc3, 0xae, 0xc7, 0x7c, 0x15, 0x17, 0x7e, + 0xc5, 0xac, 0xe1, 0x88, 0x33, 0x5a, 0x58, 0x31, 0x8a, 0xe3, + 0x8e, 0xe7, 0x5c, 0x35, 0x37, 0x5e, 0xe5, 0x8c, 0x3f, 0x56, + 0xed, 0x84, 0x86, 0xef, 0x54, 0x3d, 0x50, 0x39, 0x82, 0xeb, + 0xe9, 0x80, 0x3b, 0x52, 0x40, 0x29, 0x92, 0xfb, 0xf9, 0x90, + 0x2b, 0x42, 0x2f, 0x46, 0xfd, 0x94, 0x96, 0xff, 0x44, 0x2d, + 0x9e, 0xf7, 0x4c, 0x25, 0x27, 0x4e, 0xf5, 0x9c, 0xf1, 0x98, + 0x23, 0x4a, 0x48, 0x21, 0x9a, 0xf3, 0x00, 0x6a, 0xd4, 0xbe, + 0xb5, 0xdf, 0x61, 0x0b, 0x77, 0x1d, 0xa3, 0xc9, 0xc2, 0xa8, + 0x16, 0x7c, 0xee, 0x84, 0x3a, 0x50, 0x5b, 0x31, 0x8f, 0xe5, + 0x99, 0xf3, 0x4d, 0x27, 0x2c, 0x46, 0xf8, 0x92, 0xc1, 0xab, + 0x15, 0x7f, 0x74, 0x1e, 0xa0, 0xca, 0xb6, 0xdc, 0x62, 0x08, + 0x03, 0x69, 0xd7, 0xbd, 0x2f, 0x45, 0xfb, 0x91, 0x9a, 0xf0, + 0x4e, 0x24, 0x58, 0x32, 0x8c, 0xe6, 0xed, 0x87, 0x39, 0x53, + 0x9f, 0xf5, 0x4b, 0x21, 0x2a, 0x40, 0xfe, 0x94, 0xe8, 0x82, + 0x3c, 0x56, 0x5d, 0x37, 0x89, 0xe3, 0x71, 0x1b, 0xa5, 0xcf, + 0xc4, 0xae, 0x10, 0x7a, 0x06, 0x6c, 0xd2, 0xb8, 0xb3, 0xd9, + 0x67, 0x0d, 0x5e, 0x34, 0x8a, 0xe0, 0xeb, 0x81, 0x3f, 0x55, + 0x29, 0x43, 0xfd, 0x97, 0x9c, 0xf6, 0x48, 0x22, 0xb0, 0xda, + 0x64, 0x0e, 0x05, 0x6f, 0xd1, 0xbb, 0xc7, 0xad, 0x13, 0x79, + 0x72, 0x18, 0xa6, 0xcc, 0x23, 0x49, 0xf7, 0x9d, 0x96, 0xfc, + 0x42, 0x28, 0x54, 0x3e, 0x80, 0xea, 0xe1, 0x8b, 0x35, 0x5f, + 0xcd, 0xa7, 0x19, 0x73, 0x78, 0x12, 0xac, 0xc6, 0xba, 0xd0, + 0x6e, 0x04, 0x0f, 0x65, 0xdb, 0xb1, 0xe2, 0x88, 0x36, 0x5c, + 0x57, 0x3d, 0x83, 0xe9, 0x95, 0xff, 0x41, 0x2b, 0x20, 0x4a, + 0xf4, 0x9e, 0x0c, 0x66, 0xd8, 0xb2, 0xb9, 0xd3, 0x6d, 0x07, + 0x7b, 0x11, 0xaf, 0xc5, 0xce, 0xa4, 0x1a, 0x70, 0xbc, 0xd6, + 0x68, 0x02, 0x09, 0x63, 0xdd, 0xb7, 0xcb, 0xa1, 0x1f, 0x75, + 0x7e, 0x14, 0xaa, 0xc0, 0x52, 0x38, 0x86, 0xec, 0xe7, 0x8d, + 0x33, 0x59, 0x25, 0x4f, 0xf1, 0x9b, 0x90, 0xfa, 0x44, 0x2e, + 0x7d, 0x17, 0xa9, 0xc3, 0xc8, 0xa2, 0x1c, 0x76, 0x0a, 0x60, + 0xde, 0xb4, 0xbf, 0xd5, 0x6b, 0x01, 0x93, 0xf9, 0x47, 0x2d, + 0x26, 0x4c, 0xf2, 0x98, 0xe4, 0x8e, 0x30, 0x5a, 0x51, 0x3b, + 0x85, 0xef, 0x00, 0x6b, 0xd6, 0xbd, 0xb1, 0xda, 0x67, 0x0c, + 0x7f, 0x14, 0xa9, 0xc2, 0xce, 0xa5, 0x18, 0x73, 0xfe, 0x95, + 0x28, 0x43, 0x4f, 0x24, 0x99, 0xf2, 0x81, 0xea, 0x57, 0x3c, + 0x30, 0x5b, 0xe6, 0x8d, 0xe1, 0x8a, 0x37, 0x5c, 0x50, 0x3b, + 0x86, 0xed, 0x9e, 0xf5, 0x48, 0x23, 0x2f, 0x44, 0xf9, 0x92, + 0x1f, 0x74, 0xc9, 0xa2, 0xae, 0xc5, 0x78, 0x13, 0x60, 0x0b, + 0xb6, 0xdd, 0xd1, 0xba, 0x07, 0x6c, 0xdf, 0xb4, 0x09, 0x62, + 0x6e, 0x05, 0xb8, 0xd3, 0xa0, 0xcb, 0x76, 0x1d, 0x11, 0x7a, + 0xc7, 0xac, 0x21, 0x4a, 0xf7, 0x9c, 0x90, 0xfb, 0x46, 0x2d, + 0x5e, 0x35, 0x88, 0xe3, 0xef, 0x84, 0x39, 0x52, 0x3e, 0x55, + 0xe8, 0x83, 0x8f, 0xe4, 0x59, 0x32, 0x41, 0x2a, 0x97, 0xfc, + 0xf0, 0x9b, 0x26, 0x4d, 0xc0, 0xab, 0x16, 0x7d, 0x71, 0x1a, + 0xa7, 0xcc, 0xbf, 0xd4, 0x69, 0x02, 0x0e, 0x65, 0xd8, 0xb3, + 0xa3, 0xc8, 0x75, 0x1e, 0x12, 0x79, 0xc4, 0xaf, 0xdc, 0xb7, + 0x0a, 0x61, 0x6d, 0x06, 0xbb, 0xd0, 0x5d, 0x36, 0x8b, 0xe0, + 0xec, 0x87, 0x3a, 0x51, 0x22, 0x49, 0xf4, 0x9f, 0x93, 0xf8, + 0x45, 0x2e, 0x42, 0x29, 0x94, 0xff, 0xf3, 0x98, 0x25, 0x4e, + 0x3d, 0x56, 0xeb, 0x80, 0x8c, 0xe7, 0x5a, 0x31, 0xbc, 0xd7, + 0x6a, 0x01, 0x0d, 0x66, 0xdb, 0xb0, 0xc3, 0xa8, 0x15, 0x7e, + 0x72, 0x19, 0xa4, 0xcf, 0x7c, 0x17, 0xaa, 0xc1, 0xcd, 0xa6, + 0x1b, 0x70, 0x03, 0x68, 0xd5, 0xbe, 0xb2, 0xd9, 0x64, 0x0f, + 0x82, 0xe9, 0x54, 0x3f, 0x33, 0x58, 0xe5, 0x8e, 0xfd, 0x96, + 0x2b, 0x40, 0x4c, 0x27, 0x9a, 0xf1, 0x9d, 0xf6, 0x4b, 0x20, + 0x2c, 0x47, 0xfa, 0x91, 0xe2, 0x89, 0x34, 0x5f, 0x53, 0x38, + 0x85, 0xee, 0x63, 0x08, 0xb5, 0xde, 0xd2, 0xb9, 0x04, 0x6f, + 0x1c, 0x77, 0xca, 0xa1, 0xad, 0xc6, 0x7b, 0x10, 0x00, 0x6c, + 0xd8, 0xb4, 0xad, 0xc1, 0x75, 0x19, 0x47, 0x2b, 0x9f, 0xf3, + 0xea, 0x86, 0x32, 0x5e, 0x8e, 0xe2, 0x56, 0x3a, 0x23, 0x4f, + 0xfb, 0x97, 0xc9, 0xa5, 0x11, 0x7d, 0x64, 0x08, 0xbc, 0xd0, + 0x01, 0x6d, 0xd9, 0xb5, 0xac, 0xc0, 0x74, 0x18, 0x46, 0x2a, + 0x9e, 0xf2, 0xeb, 0x87, 0x33, 0x5f, 0x8f, 0xe3, 0x57, 0x3b, + 0x22, 0x4e, 0xfa, 0x96, 0xc8, 0xa4, 0x10, 0x7c, 0x65, 0x09, + 0xbd, 0xd1, 0x02, 0x6e, 0xda, 0xb6, 0xaf, 0xc3, 0x77, 0x1b, + 0x45, 0x29, 0x9d, 0xf1, 0xe8, 0x84, 0x30, 0x5c, 0x8c, 0xe0, + 0x54, 0x38, 0x21, 0x4d, 0xf9, 0x95, 0xcb, 0xa7, 0x13, 0x7f, + 0x66, 0x0a, 0xbe, 0xd2, 0x03, 0x6f, 0xdb, 0xb7, 0xae, 0xc2, + 0x76, 0x1a, 0x44, 0x28, 0x9c, 0xf0, 0xe9, 0x85, 0x31, 0x5d, + 0x8d, 0xe1, 0x55, 0x39, 0x20, 0x4c, 0xf8, 0x94, 0xca, 0xa6, + 0x12, 0x7e, 0x67, 0x0b, 0xbf, 0xd3, 0x04, 0x68, 0xdc, 0xb0, + 0xa9, 0xc5, 0x71, 0x1d, 0x43, 0x2f, 0x9b, 0xf7, 0xee, 0x82, + 0x36, 0x5a, 0x8a, 0xe6, 0x52, 0x3e, 0x27, 0x4b, 0xff, 0x93, + 0xcd, 0xa1, 0x15, 0x79, 0x60, 0x0c, 0xb8, 0xd4, 0x05, 0x69, + 0xdd, 0xb1, 0xa8, 0xc4, 0x70, 0x1c, 0x42, 0x2e, 0x9a, 0xf6, + 0xef, 0x83, 0x37, 0x5b, 0x8b, 0xe7, 0x53, 0x3f, 0x26, 0x4a, + 0xfe, 0x92, 0xcc, 0xa0, 0x14, 0x78, 0x61, 0x0d, 0xb9, 0xd5, + 0x06, 0x6a, 0xde, 0xb2, 0xab, 0xc7, 0x73, 0x1f, 0x41, 0x2d, + 0x99, 0xf5, 0xec, 0x80, 0x34, 0x58, 0x88, 0xe4, 0x50, 0x3c, + 0x25, 0x49, 0xfd, 0x91, 0xcf, 0xa3, 0x17, 0x7b, 0x62, 0x0e, + 0xba, 0xd6, 0x07, 0x6b, 0xdf, 0xb3, 0xaa, 0xc6, 0x72, 0x1e, + 0x40, 0x2c, 0x98, 0xf4, 0xed, 0x81, 0x35, 0x59, 0x89, 0xe5, + 0x51, 0x3d, 0x24, 0x48, 0xfc, 0x90, 0xce, 0xa2, 0x16, 0x7a, + 0x63, 0x0f, 0xbb, 0xd7, 0x00, 0x6d, 0xda, 0xb7, 0xa9, 0xc4, + 0x73, 0x1e, 0x4f, 0x22, 0x95, 0xf8, 0xe6, 0x8b, 0x3c, 0x51, + 0x9e, 0xf3, 0x44, 0x29, 0x37, 0x5a, 0xed, 0x80, 0xd1, 0xbc, + 0x0b, 0x66, 0x78, 0x15, 0xa2, 0xcf, 0x21, 0x4c, 0xfb, 0x96, + 0x88, 0xe5, 0x52, 0x3f, 0x6e, 0x03, 0xb4, 0xd9, 0xc7, 0xaa, + 0x1d, 0x70, 0xbf, 0xd2, 0x65, 0x08, 0x16, 0x7b, 0xcc, 0xa1, + 0xf0, 0x9d, 0x2a, 0x47, 0x59, 0x34, 0x83, 0xee, 0x42, 0x2f, + 0x98, 0xf5, 0xeb, 0x86, 0x31, 0x5c, 0x0d, 0x60, 0xd7, 0xba, + 0xa4, 0xc9, 0x7e, 0x13, 0xdc, 0xb1, 0x06, 0x6b, 0x75, 0x18, + 0xaf, 0xc2, 0x93, 0xfe, 0x49, 0x24, 0x3a, 0x57, 0xe0, 0x8d, + 0x63, 0x0e, 0xb9, 0xd4, 0xca, 0xa7, 0x10, 0x7d, 0x2c, 0x41, + 0xf6, 0x9b, 0x85, 0xe8, 0x5f, 0x32, 0xfd, 0x90, 0x27, 0x4a, + 0x54, 0x39, 0x8e, 0xe3, 0xb2, 0xdf, 0x68, 0x05, 0x1b, 0x76, + 0xc1, 0xac, 0x84, 0xe9, 0x5e, 0x33, 0x2d, 0x40, 0xf7, 0x9a, + 0xcb, 0xa6, 0x11, 0x7c, 0x62, 0x0f, 0xb8, 0xd5, 0x1a, 0x77, + 0xc0, 0xad, 0xb3, 0xde, 0x69, 0x04, 0x55, 0x38, 0x8f, 0xe2, + 0xfc, 0x91, 0x26, 0x4b, 0xa5, 0xc8, 0x7f, 0x12, 0x0c, 0x61, + 0xd6, 0xbb, 0xea, 0x87, 0x30, 0x5d, 0x43, 0x2e, 0x99, 0xf4, + 0x3b, 0x56, 0xe1, 0x8c, 0x92, 0xff, 0x48, 0x25, 0x74, 0x19, + 0xae, 0xc3, 0xdd, 0xb0, 0x07, 0x6a, 0xc6, 0xab, 0x1c, 0x71, + 0x6f, 0x02, 0xb5, 0xd8, 0x89, 0xe4, 0x53, 0x3e, 0x20, 0x4d, + 0xfa, 0x97, 0x58, 0x35, 0x82, 0xef, 0xf1, 0x9c, 0x2b, 0x46, + 0x17, 0x7a, 0xcd, 0xa0, 0xbe, 0xd3, 0x64, 0x09, 0xe7, 0x8a, + 0x3d, 0x50, 0x4e, 0x23, 0x94, 0xf9, 0xa8, 0xc5, 0x72, 0x1f, + 0x01, 0x6c, 0xdb, 0xb6, 0x79, 0x14, 0xa3, 0xce, 0xd0, 0xbd, + 0x0a, 0x67, 0x36, 0x5b, 0xec, 0x81, 0x9f, 0xf2, 0x45, 0x28, + 0x00, 0x6e, 0xdc, 0xb2, 0xa5, 0xcb, 0x79, 0x17, 0x57, 0x39, + 0x8b, 0xe5, 0xf2, 0x9c, 0x2e, 0x40, 0xae, 0xc0, 0x72, 0x1c, + 0x0b, 0x65, 0xd7, 0xb9, 0xf9, 0x97, 0x25, 0x4b, 0x5c, 0x32, + 0x80, 0xee, 0x41, 0x2f, 0x9d, 0xf3, 0xe4, 0x8a, 0x38, 0x56, + 0x16, 0x78, 0xca, 0xa4, 0xb3, 0xdd, 0x6f, 0x01, 0xef, 0x81, + 0x33, 0x5d, 0x4a, 0x24, 0x96, 0xf8, 0xb8, 0xd6, 0x64, 0x0a, + 0x1d, 0x73, 0xc1, 0xaf, 0x82, 0xec, 0x5e, 0x30, 0x27, 0x49, + 0xfb, 0x95, 0xd5, 0xbb, 0x09, 0x67, 0x70, 0x1e, 0xac, 0xc2, + 0x2c, 0x42, 0xf0, 0x9e, 0x89, 0xe7, 0x55, 0x3b, 0x7b, 0x15, + 0xa7, 0xc9, 0xde, 0xb0, 0x02, 0x6c, 0xc3, 0xad, 0x1f, 0x71, + 0x66, 0x08, 0xba, 0xd4, 0x94, 0xfa, 0x48, 0x26, 0x31, 0x5f, + 0xed, 0x83, 0x6d, 0x03, 0xb1, 0xdf, 0xc8, 0xa6, 0x14, 0x7a, + 0x3a, 0x54, 0xe6, 0x88, 0x9f, 0xf1, 0x43, 0x2d, 0x19, 0x77, + 0xc5, 0xab, 0xbc, 0xd2, 0x60, 0x0e, 0x4e, 0x20, 0x92, 0xfc, + 0xeb, 0x85, 0x37, 0x59, 0xb7, 0xd9, 0x6b, 0x05, 0x12, 0x7c, + 0xce, 0xa0, 0xe0, 0x8e, 0x3c, 0x52, 0x45, 0x2b, 0x99, 0xf7, + 0x58, 0x36, 0x84, 0xea, 0xfd, 0x93, 0x21, 0x4f, 0x0f, 0x61, + 0xd3, 0xbd, 0xaa, 0xc4, 0x76, 0x18, 0xf6, 0x98, 0x2a, 0x44, + 0x53, 0x3d, 0x8f, 0xe1, 0xa1, 0xcf, 0x7d, 0x13, 0x04, 0x6a, + 0xd8, 0xb6, 0x9b, 0xf5, 0x47, 0x29, 0x3e, 0x50, 0xe2, 0x8c, + 0xcc, 0xa2, 0x10, 0x7e, 0x69, 0x07, 0xb5, 0xdb, 0x35, 0x5b, + 0xe9, 0x87, 0x90, 0xfe, 0x4c, 0x22, 0x62, 0x0c, 0xbe, 0xd0, + 0xc7, 0xa9, 0x1b, 0x75, 0xda, 0xb4, 0x06, 0x68, 0x7f, 0x11, + 0xa3, 0xcd, 0x8d, 0xe3, 0x51, 0x3f, 0x28, 0x46, 0xf4, 0x9a, + 0x74, 0x1a, 0xa8, 0xc6, 0xd1, 0xbf, 0x0d, 0x63, 0x23, 0x4d, + 0xff, 0x91, 0x86, 0xe8, 0x5a, 0x34, 0x00, 0x6f, 0xde, 0xb1, + 0xa1, 0xce, 0x7f, 0x10, 0x5f, 0x30, 0x81, 0xee, 0xfe, 0x91, + 0x20, 0x4f, 0xbe, 0xd1, 0x60, 0x0f, 0x1f, 0x70, 0xc1, 0xae, + 0xe1, 0x8e, 0x3f, 0x50, 0x40, 0x2f, 0x9e, 0xf1, 0x61, 0x0e, + 0xbf, 0xd0, 0xc0, 0xaf, 0x1e, 0x71, 0x3e, 0x51, 0xe0, 0x8f, + 0x9f, 0xf0, 0x41, 0x2e, 0xdf, 0xb0, 0x01, 0x6e, 0x7e, 0x11, + 0xa0, 0xcf, 0x80, 0xef, 0x5e, 0x31, 0x21, 0x4e, 0xff, 0x90, + 0xc2, 0xad, 0x1c, 0x73, 0x63, 0x0c, 0xbd, 0xd2, 0x9d, 0xf2, + 0x43, 0x2c, 0x3c, 0x53, 0xe2, 0x8d, 0x7c, 0x13, 0xa2, 0xcd, + 0xdd, 0xb2, 0x03, 0x6c, 0x23, 0x4c, 0xfd, 0x92, 0x82, 0xed, + 0x5c, 0x33, 0xa3, 0xcc, 0x7d, 0x12, 0x02, 0x6d, 0xdc, 0xb3, + 0xfc, 0x93, 0x22, 0x4d, 0x5d, 0x32, 0x83, 0xec, 0x1d, 0x72, + 0xc3, 0xac, 0xbc, 0xd3, 0x62, 0x0d, 0x42, 0x2d, 0x9c, 0xf3, + 0xe3, 0x8c, 0x3d, 0x52, 0x99, 0xf6, 0x47, 0x28, 0x38, 0x57, + 0xe6, 0x89, 0xc6, 0xa9, 0x18, 0x77, 0x67, 0x08, 0xb9, 0xd6, + 0x27, 0x48, 0xf9, 0x96, 0x86, 0xe9, 0x58, 0x37, 0x78, 0x17, + 0xa6, 0xc9, 0xd9, 0xb6, 0x07, 0x68, 0xf8, 0x97, 0x26, 0x49, + 0x59, 0x36, 0x87, 0xe8, 0xa7, 0xc8, 0x79, 0x16, 0x06, 0x69, + 0xd8, 0xb7, 0x46, 0x29, 0x98, 0xf7, 0xe7, 0x88, 0x39, 0x56, + 0x19, 0x76, 0xc7, 0xa8, 0xb8, 0xd7, 0x66, 0x09, 0x5b, 0x34, + 0x85, 0xea, 0xfa, 0x95, 0x24, 0x4b, 0x04, 0x6b, 0xda, 0xb5, + 0xa5, 0xca, 0x7b, 0x14, 0xe5, 0x8a, 0x3b, 0x54, 0x44, 0x2b, + 0x9a, 0xf5, 0xba, 0xd5, 0x64, 0x0b, 0x1b, 0x74, 0xc5, 0xaa, + 0x3a, 0x55, 0xe4, 0x8b, 0x9b, 0xf4, 0x45, 0x2a, 0x65, 0x0a, + 0xbb, 0xd4, 0xc4, 0xab, 0x1a, 0x75, 0x84, 0xeb, 0x5a, 0x35, + 0x25, 0x4a, 0xfb, 0x94, 0xdb, 0xb4, 0x05, 0x6a, 0x7a, 0x15, + 0xa4, 0xcb, 0x00, 0x70, 0xe0, 0x90, 0xdd, 0xad, 0x3d, 0x4d, + 0xa7, 0xd7, 0x47, 0x37, 0x7a, 0x0a, 0x9a, 0xea, 0x53, 0x23, + 0xb3, 0xc3, 0x8e, 0xfe, 0x6e, 0x1e, 0xf4, 0x84, 0x14, 0x64, + 0x29, 0x59, 0xc9, 0xb9, 0xa6, 0xd6, 0x46, 0x36, 0x7b, 0x0b, + 0x9b, 0xeb, 0x01, 0x71, 0xe1, 0x91, 0xdc, 0xac, 0x3c, 0x4c, + 0xf5, 0x85, 0x15, 0x65, 0x28, 0x58, 0xc8, 0xb8, 0x52, 0x22, + 0xb2, 0xc2, 0x8f, 0xff, 0x6f, 0x1f, 0x51, 0x21, 0xb1, 0xc1, + 0x8c, 0xfc, 0x6c, 0x1c, 0xf6, 0x86, 0x16, 0x66, 0x2b, 0x5b, + 0xcb, 0xbb, 0x02, 0x72, 0xe2, 0x92, 0xdf, 0xaf, 0x3f, 0x4f, + 0xa5, 0xd5, 0x45, 0x35, 0x78, 0x08, 0x98, 0xe8, 0xf7, 0x87, + 0x17, 0x67, 0x2a, 0x5a, 0xca, 0xba, 0x50, 0x20, 0xb0, 0xc0, + 0x8d, 0xfd, 0x6d, 0x1d, 0xa4, 0xd4, 0x44, 0x34, 0x79, 0x09, + 0x99, 0xe9, 0x03, 0x73, 0xe3, 0x93, 0xde, 0xae, 0x3e, 0x4e, + 0xa2, 0xd2, 0x42, 0x32, 0x7f, 0x0f, 0x9f, 0xef, 0x05, 0x75, + 0xe5, 0x95, 0xd8, 0xa8, 0x38, 0x48, 0xf1, 0x81, 0x11, 0x61, + 0x2c, 0x5c, 0xcc, 0xbc, 0x56, 0x26, 0xb6, 0xc6, 0x8b, 0xfb, + 0x6b, 0x1b, 0x04, 0x74, 0xe4, 0x94, 0xd9, 0xa9, 0x39, 0x49, + 0xa3, 0xd3, 0x43, 0x33, 0x7e, 0x0e, 0x9e, 0xee, 0x57, 0x27, + 0xb7, 0xc7, 0x8a, 0xfa, 0x6a, 0x1a, 0xf0, 0x80, 0x10, 0x60, + 0x2d, 0x5d, 0xcd, 0xbd, 0xf3, 0x83, 0x13, 0x63, 0x2e, 0x5e, + 0xce, 0xbe, 0x54, 0x24, 0xb4, 0xc4, 0x89, 0xf9, 0x69, 0x19, + 0xa0, 0xd0, 0x40, 0x30, 0x7d, 0x0d, 0x9d, 0xed, 0x07, 0x77, + 0xe7, 0x97, 0xda, 0xaa, 0x3a, 0x4a, 0x55, 0x25, 0xb5, 0xc5, + 0x88, 0xf8, 0x68, 0x18, 0xf2, 0x82, 0x12, 0x62, 0x2f, 0x5f, + 0xcf, 0xbf, 0x06, 0x76, 0xe6, 0x96, 0xdb, 0xab, 0x3b, 0x4b, + 0xa1, 0xd1, 0x41, 0x31, 0x7c, 0x0c, 0x9c, 0xec, 0x00, 0x71, + 0xe2, 0x93, 0xd9, 0xa8, 0x3b, 0x4a, 0xaf, 0xde, 0x4d, 0x3c, + 0x76, 0x07, 0x94, 0xe5, 0x43, 0x32, 0xa1, 0xd0, 0x9a, 0xeb, + 0x78, 0x09, 0xec, 0x9d, 0x0e, 0x7f, 0x35, 0x44, 0xd7, 0xa6, + 0x86, 0xf7, 0x64, 0x15, 0x5f, 0x2e, 0xbd, 0xcc, 0x29, 0x58, + 0xcb, 0xba, 0xf0, 0x81, 0x12, 0x63, 0xc5, 0xb4, 0x27, 0x56, + 0x1c, 0x6d, 0xfe, 0x8f, 0x6a, 0x1b, 0x88, 0xf9, 0xb3, 0xc2, + 0x51, 0x20, 0x11, 0x60, 0xf3, 0x82, 0xc8, 0xb9, 0x2a, 0x5b, + 0xbe, 0xcf, 0x5c, 0x2d, 0x67, 0x16, 0x85, 0xf4, 0x52, 0x23, + 0xb0, 0xc1, 0x8b, 0xfa, 0x69, 0x18, 0xfd, 0x8c, 0x1f, 0x6e, + 0x24, 0x55, 0xc6, 0xb7, 0x97, 0xe6, 0x75, 0x04, 0x4e, 0x3f, + 0xac, 0xdd, 0x38, 0x49, 0xda, 0xab, 0xe1, 0x90, 0x03, 0x72, + 0xd4, 0xa5, 0x36, 0x47, 0x0d, 0x7c, 0xef, 0x9e, 0x7b, 0x0a, + 0x99, 0xe8, 0xa2, 0xd3, 0x40, 0x31, 0x22, 0x53, 0xc0, 0xb1, + 0xfb, 0x8a, 0x19, 0x68, 0x8d, 0xfc, 0x6f, 0x1e, 0x54, 0x25, + 0xb6, 0xc7, 0x61, 0x10, 0x83, 0xf2, 0xb8, 0xc9, 0x5a, 0x2b, + 0xce, 0xbf, 0x2c, 0x5d, 0x17, 0x66, 0xf5, 0x84, 0xa4, 0xd5, + 0x46, 0x37, 0x7d, 0x0c, 0x9f, 0xee, 0x0b, 0x7a, 0xe9, 0x98, + 0xd2, 0xa3, 0x30, 0x41, 0xe7, 0x96, 0x05, 0x74, 0x3e, 0x4f, + 0xdc, 0xad, 0x48, 0x39, 0xaa, 0xdb, 0x91, 0xe0, 0x73, 0x02, + 0x33, 0x42, 0xd1, 0xa0, 0xea, 0x9b, 0x08, 0x79, 0x9c, 0xed, + 0x7e, 0x0f, 0x45, 0x34, 0xa7, 0xd6, 0x70, 0x01, 0x92, 0xe3, + 0xa9, 0xd8, 0x4b, 0x3a, 0xdf, 0xae, 0x3d, 0x4c, 0x06, 0x77, + 0xe4, 0x95, 0xb5, 0xc4, 0x57, 0x26, 0x6c, 0x1d, 0x8e, 0xff, + 0x1a, 0x6b, 0xf8, 0x89, 0xc3, 0xb2, 0x21, 0x50, 0xf6, 0x87, + 0x14, 0x65, 0x2f, 0x5e, 0xcd, 0xbc, 0x59, 0x28, 0xbb, 0xca, + 0x80, 0xf1, 0x62, 0x13, 0x00, 0x72, 0xe4, 0x96, 0xd5, 0xa7, + 0x31, 0x43, 0xb7, 0xc5, 0x53, 0x21, 0x62, 0x10, 0x86, 0xf4, + 0x73, 0x01, 0x97, 0xe5, 0xa6, 0xd4, 0x42, 0x30, 0xc4, 0xb6, + 0x20, 0x52, 0x11, 0x63, 0xf5, 0x87, 0xe6, 0x94, 0x02, 0x70, + 0x33, 0x41, 0xd7, 0xa5, 0x51, 0x23, 0xb5, 0xc7, 0x84, 0xf6, + 0x60, 0x12, 0x95, 0xe7, 0x71, 0x03, 0x40, 0x32, 0xa4, 0xd6, + 0x22, 0x50, 0xc6, 0xb4, 0xf7, 0x85, 0x13, 0x61, 0xd1, 0xa3, + 0x35, 0x47, 0x04, 0x76, 0xe0, 0x92, 0x66, 0x14, 0x82, 0xf0, + 0xb3, 0xc1, 0x57, 0x25, 0xa2, 0xd0, 0x46, 0x34, 0x77, 0x05, + 0x93, 0xe1, 0x15, 0x67, 0xf1, 0x83, 0xc0, 0xb2, 0x24, 0x56, + 0x37, 0x45, 0xd3, 0xa1, 0xe2, 0x90, 0x06, 0x74, 0x80, 0xf2, + 0x64, 0x16, 0x55, 0x27, 0xb1, 0xc3, 0x44, 0x36, 0xa0, 0xd2, + 0x91, 0xe3, 0x75, 0x07, 0xf3, 0x81, 0x17, 0x65, 0x26, 0x54, + 0xc2, 0xb0, 0xbf, 0xcd, 0x5b, 0x29, 0x6a, 0x18, 0x8e, 0xfc, + 0x08, 0x7a, 0xec, 0x9e, 0xdd, 0xaf, 0x39, 0x4b, 0xcc, 0xbe, + 0x28, 0x5a, 0x19, 0x6b, 0xfd, 0x8f, 0x7b, 0x09, 0x9f, 0xed, + 0xae, 0xdc, 0x4a, 0x38, 0x59, 0x2b, 0xbd, 0xcf, 0x8c, 0xfe, + 0x68, 0x1a, 0xee, 0x9c, 0x0a, 0x78, 0x3b, 0x49, 0xdf, 0xad, + 0x2a, 0x58, 0xce, 0xbc, 0xff, 0x8d, 0x1b, 0x69, 0x9d, 0xef, + 0x79, 0x0b, 0x48, 0x3a, 0xac, 0xde, 0x6e, 0x1c, 0x8a, 0xf8, + 0xbb, 0xc9, 0x5f, 0x2d, 0xd9, 0xab, 0x3d, 0x4f, 0x0c, 0x7e, + 0xe8, 0x9a, 0x1d, 0x6f, 0xf9, 0x8b, 0xc8, 0xba, 0x2c, 0x5e, + 0xaa, 0xd8, 0x4e, 0x3c, 0x7f, 0x0d, 0x9b, 0xe9, 0x88, 0xfa, + 0x6c, 0x1e, 0x5d, 0x2f, 0xb9, 0xcb, 0x3f, 0x4d, 0xdb, 0xa9, + 0xea, 0x98, 0x0e, 0x7c, 0xfb, 0x89, 0x1f, 0x6d, 0x2e, 0x5c, + 0xca, 0xb8, 0x4c, 0x3e, 0xa8, 0xda, 0x99, 0xeb, 0x7d, 0x0f, + 0x00, 0x73, 0xe6, 0x95, 0xd1, 0xa2, 0x37, 0x44, 0xbf, 0xcc, + 0x59, 0x2a, 0x6e, 0x1d, 0x88, 0xfb, 0x63, 0x10, 0x85, 0xf6, + 0xb2, 0xc1, 0x54, 0x27, 0xdc, 0xaf, 0x3a, 0x49, 0x0d, 0x7e, + 0xeb, 0x98, 0xc6, 0xb5, 0x20, 0x53, 0x17, 0x64, 0xf1, 0x82, + 0x79, 0x0a, 0x9f, 0xec, 0xa8, 0xdb, 0x4e, 0x3d, 0xa5, 0xd6, + 0x43, 0x30, 0x74, 0x07, 0x92, 0xe1, 0x1a, 0x69, 0xfc, 0x8f, + 0xcb, 0xb8, 0x2d, 0x5e, 0x91, 0xe2, 0x77, 0x04, 0x40, 0x33, + 0xa6, 0xd5, 0x2e, 0x5d, 0xc8, 0xbb, 0xff, 0x8c, 0x19, 0x6a, + 0xf2, 0x81, 0x14, 0x67, 0x23, 0x50, 0xc5, 0xb6, 0x4d, 0x3e, + 0xab, 0xd8, 0x9c, 0xef, 0x7a, 0x09, 0x57, 0x24, 0xb1, 0xc2, + 0x86, 0xf5, 0x60, 0x13, 0xe8, 0x9b, 0x0e, 0x7d, 0x39, 0x4a, + 0xdf, 0xac, 0x34, 0x47, 0xd2, 0xa1, 0xe5, 0x96, 0x03, 0x70, + 0x8b, 0xf8, 0x6d, 0x1e, 0x5a, 0x29, 0xbc, 0xcf, 0x3f, 0x4c, + 0xd9, 0xaa, 0xee, 0x9d, 0x08, 0x7b, 0x80, 0xf3, 0x66, 0x15, + 0x51, 0x22, 0xb7, 0xc4, 0x5c, 0x2f, 0xba, 0xc9, 0x8d, 0xfe, + 0x6b, 0x18, 0xe3, 0x90, 0x05, 0x76, 0x32, 0x41, 0xd4, 0xa7, + 0xf9, 0x8a, 0x1f, 0x6c, 0x28, 0x5b, 0xce, 0xbd, 0x46, 0x35, + 0xa0, 0xd3, 0x97, 0xe4, 0x71, 0x02, 0x9a, 0xe9, 0x7c, 0x0f, + 0x4b, 0x38, 0xad, 0xde, 0x25, 0x56, 0xc3, 0xb0, 0xf4, 0x87, + 0x12, 0x61, 0xae, 0xdd, 0x48, 0x3b, 0x7f, 0x0c, 0x99, 0xea, + 0x11, 0x62, 0xf7, 0x84, 0xc0, 0xb3, 0x26, 0x55, 0xcd, 0xbe, + 0x2b, 0x58, 0x1c, 0x6f, 0xfa, 0x89, 0x72, 0x01, 0x94, 0xe7, + 0xa3, 0xd0, 0x45, 0x36, 0x68, 0x1b, 0x8e, 0xfd, 0xb9, 0xca, + 0x5f, 0x2c, 0xd7, 0xa4, 0x31, 0x42, 0x06, 0x75, 0xe0, 0x93, + 0x0b, 0x78, 0xed, 0x9e, 0xda, 0xa9, 0x3c, 0x4f, 0xb4, 0xc7, + 0x52, 0x21, 0x65, 0x16, 0x83, 0xf0, 0x00, 0x74, 0xe8, 0x9c, + 0xcd, 0xb9, 0x25, 0x51, 0x87, 0xf3, 0x6f, 0x1b, 0x4a, 0x3e, + 0xa2, 0xd6, 0x13, 0x67, 0xfb, 0x8f, 0xde, 0xaa, 0x36, 0x42, + 0x94, 0xe0, 0x7c, 0x08, 0x59, 0x2d, 0xb1, 0xc5, 0x26, 0x52, + 0xce, 0xba, 0xeb, 0x9f, 0x03, 0x77, 0xa1, 0xd5, 0x49, 0x3d, + 0x6c, 0x18, 0x84, 0xf0, 0x35, 0x41, 0xdd, 0xa9, 0xf8, 0x8c, + 0x10, 0x64, 0xb2, 0xc6, 0x5a, 0x2e, 0x7f, 0x0b, 0x97, 0xe3, + 0x4c, 0x38, 0xa4, 0xd0, 0x81, 0xf5, 0x69, 0x1d, 0xcb, 0xbf, + 0x23, 0x57, 0x06, 0x72, 0xee, 0x9a, 0x5f, 0x2b, 0xb7, 0xc3, + 0x92, 0xe6, 0x7a, 0x0e, 0xd8, 0xac, 0x30, 0x44, 0x15, 0x61, + 0xfd, 0x89, 0x6a, 0x1e, 0x82, 0xf6, 0xa7, 0xd3, 0x4f, 0x3b, + 0xed, 0x99, 0x05, 0x71, 0x20, 0x54, 0xc8, 0xbc, 0x79, 0x0d, + 0x91, 0xe5, 0xb4, 0xc0, 0x5c, 0x28, 0xfe, 0x8a, 0x16, 0x62, + 0x33, 0x47, 0xdb, 0xaf, 0x98, 0xec, 0x70, 0x04, 0x55, 0x21, + 0xbd, 0xc9, 0x1f, 0x6b, 0xf7, 0x83, 0xd2, 0xa6, 0x3a, 0x4e, + 0x8b, 0xff, 0x63, 0x17, 0x46, 0x32, 0xae, 0xda, 0x0c, 0x78, + 0xe4, 0x90, 0xc1, 0xb5, 0x29, 0x5d, 0xbe, 0xca, 0x56, 0x22, + 0x73, 0x07, 0x9b, 0xef, 0x39, 0x4d, 0xd1, 0xa5, 0xf4, 0x80, + 0x1c, 0x68, 0xad, 0xd9, 0x45, 0x31, 0x60, 0x14, 0x88, 0xfc, + 0x2a, 0x5e, 0xc2, 0xb6, 0xe7, 0x93, 0x0f, 0x7b, 0xd4, 0xa0, + 0x3c, 0x48, 0x19, 0x6d, 0xf1, 0x85, 0x53, 0x27, 0xbb, 0xcf, + 0x9e, 0xea, 0x76, 0x02, 0xc7, 0xb3, 0x2f, 0x5b, 0x0a, 0x7e, + 0xe2, 0x96, 0x40, 0x34, 0xa8, 0xdc, 0x8d, 0xf9, 0x65, 0x11, + 0xf2, 0x86, 0x1a, 0x6e, 0x3f, 0x4b, 0xd7, 0xa3, 0x75, 0x01, + 0x9d, 0xe9, 0xb8, 0xcc, 0x50, 0x24, 0xe1, 0x95, 0x09, 0x7d, + 0x2c, 0x58, 0xc4, 0xb0, 0x66, 0x12, 0x8e, 0xfa, 0xab, 0xdf, + 0x43, 0x37, 0x00, 0x75, 0xea, 0x9f, 0xc9, 0xbc, 0x23, 0x56, + 0x8f, 0xfa, 0x65, 0x10, 0x46, 0x33, 0xac, 0xd9, 0x03, 0x76, + 0xe9, 0x9c, 0xca, 0xbf, 0x20, 0x55, 0x8c, 0xf9, 0x66, 0x13, + 0x45, 0x30, 0xaf, 0xda, 0x06, 0x73, 0xec, 0x99, 0xcf, 0xba, + 0x25, 0x50, 0x89, 0xfc, 0x63, 0x16, 0x40, 0x35, 0xaa, 0xdf, + 0x05, 0x70, 0xef, 0x9a, 0xcc, 0xb9, 0x26, 0x53, 0x8a, 0xff, + 0x60, 0x15, 0x43, 0x36, 0xa9, 0xdc, 0x0c, 0x79, 0xe6, 0x93, + 0xc5, 0xb0, 0x2f, 0x5a, 0x83, 0xf6, 0x69, 0x1c, 0x4a, 0x3f, + 0xa0, 0xd5, 0x0f, 0x7a, 0xe5, 0x90, 0xc6, 0xb3, 0x2c, 0x59, + 0x80, 0xf5, 0x6a, 0x1f, 0x49, 0x3c, 0xa3, 0xd6, 0x0a, 0x7f, + 0xe0, 0x95, 0xc3, 0xb6, 0x29, 0x5c, 0x85, 0xf0, 0x6f, 0x1a, + 0x4c, 0x39, 0xa6, 0xd3, 0x09, 0x7c, 0xe3, 0x96, 0xc0, 0xb5, + 0x2a, 0x5f, 0x86, 0xf3, 0x6c, 0x19, 0x4f, 0x3a, 0xa5, 0xd0, + 0x18, 0x6d, 0xf2, 0x87, 0xd1, 0xa4, 0x3b, 0x4e, 0x97, 0xe2, + 0x7d, 0x08, 0x5e, 0x2b, 0xb4, 0xc1, 0x1b, 0x6e, 0xf1, 0x84, + 0xd2, 0xa7, 0x38, 0x4d, 0x94, 0xe1, 0x7e, 0x0b, 0x5d, 0x28, + 0xb7, 0xc2, 0x1e, 0x6b, 0xf4, 0x81, 0xd7, 0xa2, 0x3d, 0x48, + 0x91, 0xe4, 0x7b, 0x0e, 0x58, 0x2d, 0xb2, 0xc7, 0x1d, 0x68, + 0xf7, 0x82, 0xd4, 0xa1, 0x3e, 0x4b, 0x92, 0xe7, 0x78, 0x0d, + 0x5b, 0x2e, 0xb1, 0xc4, 0x14, 0x61, 0xfe, 0x8b, 0xdd, 0xa8, + 0x37, 0x42, 0x9b, 0xee, 0x71, 0x04, 0x52, 0x27, 0xb8, 0xcd, + 0x17, 0x62, 0xfd, 0x88, 0xde, 0xab, 0x34, 0x41, 0x98, 0xed, + 0x72, 0x07, 0x51, 0x24, 0xbb, 0xce, 0x12, 0x67, 0xf8, 0x8d, + 0xdb, 0xae, 0x31, 0x44, 0x9d, 0xe8, 0x77, 0x02, 0x54, 0x21, + 0xbe, 0xcb, 0x11, 0x64, 0xfb, 0x8e, 0xd8, 0xad, 0x32, 0x47, + 0x9e, 0xeb, 0x74, 0x01, 0x57, 0x22, 0xbd, 0xc8, 0x00, 0x76, + 0xec, 0x9a, 0xc5, 0xb3, 0x29, 0x5f, 0x97, 0xe1, 0x7b, 0x0d, + 0x52, 0x24, 0xbe, 0xc8, 0x33, 0x45, 0xdf, 0xa9, 0xf6, 0x80, + 0x1a, 0x6c, 0xa4, 0xd2, 0x48, 0x3e, 0x61, 0x17, 0x8d, 0xfb, + 0x66, 0x10, 0x8a, 0xfc, 0xa3, 0xd5, 0x4f, 0x39, 0xf1, 0x87, + 0x1d, 0x6b, 0x34, 0x42, 0xd8, 0xae, 0x55, 0x23, 0xb9, 0xcf, + 0x90, 0xe6, 0x7c, 0x0a, 0xc2, 0xb4, 0x2e, 0x58, 0x07, 0x71, + 0xeb, 0x9d, 0xcc, 0xba, 0x20, 0x56, 0x09, 0x7f, 0xe5, 0x93, + 0x5b, 0x2d, 0xb7, 0xc1, 0x9e, 0xe8, 0x72, 0x04, 0xff, 0x89, + 0x13, 0x65, 0x3a, 0x4c, 0xd6, 0xa0, 0x68, 0x1e, 0x84, 0xf2, + 0xad, 0xdb, 0x41, 0x37, 0xaa, 0xdc, 0x46, 0x30, 0x6f, 0x19, + 0x83, 0xf5, 0x3d, 0x4b, 0xd1, 0xa7, 0xf8, 0x8e, 0x14, 0x62, + 0x99, 0xef, 0x75, 0x03, 0x5c, 0x2a, 0xb0, 0xc6, 0x0e, 0x78, + 0xe2, 0x94, 0xcb, 0xbd, 0x27, 0x51, 0x85, 0xf3, 0x69, 0x1f, + 0x40, 0x36, 0xac, 0xda, 0x12, 0x64, 0xfe, 0x88, 0xd7, 0xa1, + 0x3b, 0x4d, 0xb6, 0xc0, 0x5a, 0x2c, 0x73, 0x05, 0x9f, 0xe9, + 0x21, 0x57, 0xcd, 0xbb, 0xe4, 0x92, 0x08, 0x7e, 0xe3, 0x95, + 0x0f, 0x79, 0x26, 0x50, 0xca, 0xbc, 0x74, 0x02, 0x98, 0xee, + 0xb1, 0xc7, 0x5d, 0x2b, 0xd0, 0xa6, 0x3c, 0x4a, 0x15, 0x63, + 0xf9, 0x8f, 0x47, 0x31, 0xab, 0xdd, 0x82, 0xf4, 0x6e, 0x18, + 0x49, 0x3f, 0xa5, 0xd3, 0x8c, 0xfa, 0x60, 0x16, 0xde, 0xa8, + 0x32, 0x44, 0x1b, 0x6d, 0xf7, 0x81, 0x7a, 0x0c, 0x96, 0xe0, + 0xbf, 0xc9, 0x53, 0x25, 0xed, 0x9b, 0x01, 0x77, 0x28, 0x5e, + 0xc4, 0xb2, 0x2f, 0x59, 0xc3, 0xb5, 0xea, 0x9c, 0x06, 0x70, + 0xb8, 0xce, 0x54, 0x22, 0x7d, 0x0b, 0x91, 0xe7, 0x1c, 0x6a, + 0xf0, 0x86, 0xd9, 0xaf, 0x35, 0x43, 0x8b, 0xfd, 0x67, 0x11, + 0x4e, 0x38, 0xa2, 0xd4, 0x00, 0x77, 0xee, 0x99, 0xc1, 0xb6, + 0x2f, 0x58, 0x9f, 0xe8, 0x71, 0x06, 0x5e, 0x29, 0xb0, 0xc7, + 0x23, 0x54, 0xcd, 0xba, 0xe2, 0x95, 0x0c, 0x7b, 0xbc, 0xcb, + 0x52, 0x25, 0x7d, 0x0a, 0x93, 0xe4, 0x46, 0x31, 0xa8, 0xdf, + 0x87, 0xf0, 0x69, 0x1e, 0xd9, 0xae, 0x37, 0x40, 0x18, 0x6f, + 0xf6, 0x81, 0x65, 0x12, 0x8b, 0xfc, 0xa4, 0xd3, 0x4a, 0x3d, + 0xfa, 0x8d, 0x14, 0x63, 0x3b, 0x4c, 0xd5, 0xa2, 0x8c, 0xfb, + 0x62, 0x15, 0x4d, 0x3a, 0xa3, 0xd4, 0x13, 0x64, 0xfd, 0x8a, + 0xd2, 0xa5, 0x3c, 0x4b, 0xaf, 0xd8, 0x41, 0x36, 0x6e, 0x19, + 0x80, 0xf7, 0x30, 0x47, 0xde, 0xa9, 0xf1, 0x86, 0x1f, 0x68, + 0xca, 0xbd, 0x24, 0x53, 0x0b, 0x7c, 0xe5, 0x92, 0x55, 0x22, + 0xbb, 0xcc, 0x94, 0xe3, 0x7a, 0x0d, 0xe9, 0x9e, 0x07, 0x70, + 0x28, 0x5f, 0xc6, 0xb1, 0x76, 0x01, 0x98, 0xef, 0xb7, 0xc0, + 0x59, 0x2e, 0x05, 0x72, 0xeb, 0x9c, 0xc4, 0xb3, 0x2a, 0x5d, + 0x9a, 0xed, 0x74, 0x03, 0x5b, 0x2c, 0xb5, 0xc2, 0x26, 0x51, + 0xc8, 0xbf, 0xe7, 0x90, 0x09, 0x7e, 0xb9, 0xce, 0x57, 0x20, + 0x78, 0x0f, 0x96, 0xe1, 0x43, 0x34, 0xad, 0xda, 0x82, 0xf5, + 0x6c, 0x1b, 0xdc, 0xab, 0x32, 0x45, 0x1d, 0x6a, 0xf3, 0x84, + 0x60, 0x17, 0x8e, 0xf9, 0xa1, 0xd6, 0x4f, 0x38, 0xff, 0x88, + 0x11, 0x66, 0x3e, 0x49, 0xd0, 0xa7, 0x89, 0xfe, 0x67, 0x10, + 0x48, 0x3f, 0xa6, 0xd1, 0x16, 0x61, 0xf8, 0x8f, 0xd7, 0xa0, + 0x39, 0x4e, 0xaa, 0xdd, 0x44, 0x33, 0x6b, 0x1c, 0x85, 0xf2, + 0x35, 0x42, 0xdb, 0xac, 0xf4, 0x83, 0x1a, 0x6d, 0xcf, 0xb8, + 0x21, 0x56, 0x0e, 0x79, 0xe0, 0x97, 0x50, 0x27, 0xbe, 0xc9, + 0x91, 0xe6, 0x7f, 0x08, 0xec, 0x9b, 0x02, 0x75, 0x2d, 0x5a, + 0xc3, 0xb4, 0x73, 0x04, 0x9d, 0xea, 0xb2, 0xc5, 0x5c, 0x2b, + 0x00, 0x78, 0xf0, 0x88, 0xfd, 0x85, 0x0d, 0x75, 0xe7, 0x9f, + 0x17, 0x6f, 0x1a, 0x62, 0xea, 0x92, 0xd3, 0xab, 0x23, 0x5b, + 0x2e, 0x56, 0xde, 0xa6, 0x34, 0x4c, 0xc4, 0xbc, 0xc9, 0xb1, + 0x39, 0x41, 0xbb, 0xc3, 0x4b, 0x33, 0x46, 0x3e, 0xb6, 0xce, + 0x5c, 0x24, 0xac, 0xd4, 0xa1, 0xd9, 0x51, 0x29, 0x68, 0x10, + 0x98, 0xe0, 0x95, 0xed, 0x65, 0x1d, 0x8f, 0xf7, 0x7f, 0x07, + 0x72, 0x0a, 0x82, 0xfa, 0x6b, 0x13, 0x9b, 0xe3, 0x96, 0xee, + 0x66, 0x1e, 0x8c, 0xf4, 0x7c, 0x04, 0x71, 0x09, 0x81, 0xf9, + 0xb8, 0xc0, 0x48, 0x30, 0x45, 0x3d, 0xb5, 0xcd, 0x5f, 0x27, + 0xaf, 0xd7, 0xa2, 0xda, 0x52, 0x2a, 0xd0, 0xa8, 0x20, 0x58, + 0x2d, 0x55, 0xdd, 0xa5, 0x37, 0x4f, 0xc7, 0xbf, 0xca, 0xb2, + 0x3a, 0x42, 0x03, 0x7b, 0xf3, 0x8b, 0xfe, 0x86, 0x0e, 0x76, + 0xe4, 0x9c, 0x14, 0x6c, 0x19, 0x61, 0xe9, 0x91, 0xd6, 0xae, + 0x26, 0x5e, 0x2b, 0x53, 0xdb, 0xa3, 0x31, 0x49, 0xc1, 0xb9, + 0xcc, 0xb4, 0x3c, 0x44, 0x05, 0x7d, 0xf5, 0x8d, 0xf8, 0x80, + 0x08, 0x70, 0xe2, 0x9a, 0x12, 0x6a, 0x1f, 0x67, 0xef, 0x97, + 0x6d, 0x15, 0x9d, 0xe5, 0x90, 0xe8, 0x60, 0x18, 0x8a, 0xf2, + 0x7a, 0x02, 0x77, 0x0f, 0x87, 0xff, 0xbe, 0xc6, 0x4e, 0x36, + 0x43, 0x3b, 0xb3, 0xcb, 0x59, 0x21, 0xa9, 0xd1, 0xa4, 0xdc, + 0x54, 0x2c, 0xbd, 0xc5, 0x4d, 0x35, 0x40, 0x38, 0xb0, 0xc8, + 0x5a, 0x22, 0xaa, 0xd2, 0xa7, 0xdf, 0x57, 0x2f, 0x6e, 0x16, + 0x9e, 0xe6, 0x93, 0xeb, 0x63, 0x1b, 0x89, 0xf1, 0x79, 0x01, + 0x74, 0x0c, 0x84, 0xfc, 0x06, 0x7e, 0xf6, 0x8e, 0xfb, 0x83, + 0x0b, 0x73, 0xe1, 0x99, 0x11, 0x69, 0x1c, 0x64, 0xec, 0x94, + 0xd5, 0xad, 0x25, 0x5d, 0x28, 0x50, 0xd8, 0xa0, 0x32, 0x4a, + 0xc2, 0xba, 0xcf, 0xb7, 0x3f, 0x47, 0x00, 0x79, 0xf2, 0x8b, + 0xf9, 0x80, 0x0b, 0x72, 0xef, 0x96, 0x1d, 0x64, 0x16, 0x6f, + 0xe4, 0x9d, 0xc3, 0xba, 0x31, 0x48, 0x3a, 0x43, 0xc8, 0xb1, + 0x2c, 0x55, 0xde, 0xa7, 0xd5, 0xac, 0x27, 0x5e, 0x9b, 0xe2, + 0x69, 0x10, 0x62, 0x1b, 0x90, 0xe9, 0x74, 0x0d, 0x86, 0xff, + 0x8d, 0xf4, 0x7f, 0x06, 0x58, 0x21, 0xaa, 0xd3, 0xa1, 0xd8, + 0x53, 0x2a, 0xb7, 0xce, 0x45, 0x3c, 0x4e, 0x37, 0xbc, 0xc5, + 0x2b, 0x52, 0xd9, 0xa0, 0xd2, 0xab, 0x20, 0x59, 0xc4, 0xbd, + 0x36, 0x4f, 0x3d, 0x44, 0xcf, 0xb6, 0xe8, 0x91, 0x1a, 0x63, + 0x11, 0x68, 0xe3, 0x9a, 0x07, 0x7e, 0xf5, 0x8c, 0xfe, 0x87, + 0x0c, 0x75, 0xb0, 0xc9, 0x42, 0x3b, 0x49, 0x30, 0xbb, 0xc2, + 0x5f, 0x26, 0xad, 0xd4, 0xa6, 0xdf, 0x54, 0x2d, 0x73, 0x0a, + 0x81, 0xf8, 0x8a, 0xf3, 0x78, 0x01, 0x9c, 0xe5, 0x6e, 0x17, + 0x65, 0x1c, 0x97, 0xee, 0x56, 0x2f, 0xa4, 0xdd, 0xaf, 0xd6, + 0x5d, 0x24, 0xb9, 0xc0, 0x4b, 0x32, 0x40, 0x39, 0xb2, 0xcb, + 0x95, 0xec, 0x67, 0x1e, 0x6c, 0x15, 0x9e, 0xe7, 0x7a, 0x03, + 0x88, 0xf1, 0x83, 0xfa, 0x71, 0x08, 0xcd, 0xb4, 0x3f, 0x46, + 0x34, 0x4d, 0xc6, 0xbf, 0x22, 0x5b, 0xd0, 0xa9, 0xdb, 0xa2, + 0x29, 0x50, 0x0e, 0x77, 0xfc, 0x85, 0xf7, 0x8e, 0x05, 0x7c, + 0xe1, 0x98, 0x13, 0x6a, 0x18, 0x61, 0xea, 0x93, 0x7d, 0x04, + 0x8f, 0xf6, 0x84, 0xfd, 0x76, 0x0f, 0x92, 0xeb, 0x60, 0x19, + 0x6b, 0x12, 0x99, 0xe0, 0xbe, 0xc7, 0x4c, 0x35, 0x47, 0x3e, + 0xb5, 0xcc, 0x51, 0x28, 0xa3, 0xda, 0xa8, 0xd1, 0x5a, 0x23, + 0xe6, 0x9f, 0x14, 0x6d, 0x1f, 0x66, 0xed, 0x94, 0x09, 0x70, + 0xfb, 0x82, 0xf0, 0x89, 0x02, 0x7b, 0x25, 0x5c, 0xd7, 0xae, + 0xdc, 0xa5, 0x2e, 0x57, 0xca, 0xb3, 0x38, 0x41, 0x33, 0x4a, + 0xc1, 0xb8, 0x00, 0x7a, 0xf4, 0x8e, 0xf5, 0x8f, 0x01, 0x7b, + 0xf7, 0x8d, 0x03, 0x79, 0x02, 0x78, 0xf6, 0x8c, 0xf3, 0x89, + 0x07, 0x7d, 0x06, 0x7c, 0xf2, 0x88, 0x04, 0x7e, 0xf0, 0x8a, + 0xf1, 0x8b, 0x05, 0x7f, 0xfb, 0x81, 0x0f, 0x75, 0x0e, 0x74, + 0xfa, 0x80, 0x0c, 0x76, 0xf8, 0x82, 0xf9, 0x83, 0x0d, 0x77, + 0x08, 0x72, 0xfc, 0x86, 0xfd, 0x87, 0x09, 0x73, 0xff, 0x85, + 0x0b, 0x71, 0x0a, 0x70, 0xfe, 0x84, 0xeb, 0x91, 0x1f, 0x65, + 0x1e, 0x64, 0xea, 0x90, 0x1c, 0x66, 0xe8, 0x92, 0xe9, 0x93, + 0x1d, 0x67, 0x18, 0x62, 0xec, 0x96, 0xed, 0x97, 0x19, 0x63, + 0xef, 0x95, 0x1b, 0x61, 0x1a, 0x60, 0xee, 0x94, 0x10, 0x6a, + 0xe4, 0x9e, 0xe5, 0x9f, 0x11, 0x6b, 0xe7, 0x9d, 0x13, 0x69, + 0x12, 0x68, 0xe6, 0x9c, 0xe3, 0x99, 0x17, 0x6d, 0x16, 0x6c, + 0xe2, 0x98, 0x14, 0x6e, 0xe0, 0x9a, 0xe1, 0x9b, 0x15, 0x6f, + 0xcb, 0xb1, 0x3f, 0x45, 0x3e, 0x44, 0xca, 0xb0, 0x3c, 0x46, + 0xc8, 0xb2, 0xc9, 0xb3, 0x3d, 0x47, 0x38, 0x42, 0xcc, 0xb6, + 0xcd, 0xb7, 0x39, 0x43, 0xcf, 0xb5, 0x3b, 0x41, 0x3a, 0x40, + 0xce, 0xb4, 0x30, 0x4a, 0xc4, 0xbe, 0xc5, 0xbf, 0x31, 0x4b, + 0xc7, 0xbd, 0x33, 0x49, 0x32, 0x48, 0xc6, 0xbc, 0xc3, 0xb9, + 0x37, 0x4d, 0x36, 0x4c, 0xc2, 0xb8, 0x34, 0x4e, 0xc0, 0xba, + 0xc1, 0xbb, 0x35, 0x4f, 0x20, 0x5a, 0xd4, 0xae, 0xd5, 0xaf, + 0x21, 0x5b, 0xd7, 0xad, 0x23, 0x59, 0x22, 0x58, 0xd6, 0xac, + 0xd3, 0xa9, 0x27, 0x5d, 0x26, 0x5c, 0xd2, 0xa8, 0x24, 0x5e, + 0xd0, 0xaa, 0xd1, 0xab, 0x25, 0x5f, 0xdb, 0xa1, 0x2f, 0x55, + 0x2e, 0x54, 0xda, 0xa0, 0x2c, 0x56, 0xd8, 0xa2, 0xd9, 0xa3, + 0x2d, 0x57, 0x28, 0x52, 0xdc, 0xa6, 0xdd, 0xa7, 0x29, 0x53, + 0xdf, 0xa5, 0x2b, 0x51, 0x2a, 0x50, 0xde, 0xa4, 0x00, 0x7b, + 0xf6, 0x8d, 0xf1, 0x8a, 0x07, 0x7c, 0xff, 0x84, 0x09, 0x72, + 0x0e, 0x75, 0xf8, 0x83, 0xe3, 0x98, 0x15, 0x6e, 0x12, 0x69, + 0xe4, 0x9f, 0x1c, 0x67, 0xea, 0x91, 0xed, 0x96, 0x1b, 0x60, + 0xdb, 0xa0, 0x2d, 0x56, 0x2a, 0x51, 0xdc, 0xa7, 0x24, 0x5f, + 0xd2, 0xa9, 0xd5, 0xae, 0x23, 0x58, 0x38, 0x43, 0xce, 0xb5, + 0xc9, 0xb2, 0x3f, 0x44, 0xc7, 0xbc, 0x31, 0x4a, 0x36, 0x4d, + 0xc0, 0xbb, 0xab, 0xd0, 0x5d, 0x26, 0x5a, 0x21, 0xac, 0xd7, + 0x54, 0x2f, 0xa2, 0xd9, 0xa5, 0xde, 0x53, 0x28, 0x48, 0x33, + 0xbe, 0xc5, 0xb9, 0xc2, 0x4f, 0x34, 0xb7, 0xcc, 0x41, 0x3a, + 0x46, 0x3d, 0xb0, 0xcb, 0x70, 0x0b, 0x86, 0xfd, 0x81, 0xfa, + 0x77, 0x0c, 0x8f, 0xf4, 0x79, 0x02, 0x7e, 0x05, 0x88, 0xf3, + 0x93, 0xe8, 0x65, 0x1e, 0x62, 0x19, 0x94, 0xef, 0x6c, 0x17, + 0x9a, 0xe1, 0x9d, 0xe6, 0x6b, 0x10, 0x4b, 0x30, 0xbd, 0xc6, + 0xba, 0xc1, 0x4c, 0x37, 0xb4, 0xcf, 0x42, 0x39, 0x45, 0x3e, + 0xb3, 0xc8, 0xa8, 0xd3, 0x5e, 0x25, 0x59, 0x22, 0xaf, 0xd4, + 0x57, 0x2c, 0xa1, 0xda, 0xa6, 0xdd, 0x50, 0x2b, 0x90, 0xeb, + 0x66, 0x1d, 0x61, 0x1a, 0x97, 0xec, 0x6f, 0x14, 0x99, 0xe2, + 0x9e, 0xe5, 0x68, 0x13, 0x73, 0x08, 0x85, 0xfe, 0x82, 0xf9, + 0x74, 0x0f, 0x8c, 0xf7, 0x7a, 0x01, 0x7d, 0x06, 0x8b, 0xf0, + 0xe0, 0x9b, 0x16, 0x6d, 0x11, 0x6a, 0xe7, 0x9c, 0x1f, 0x64, + 0xe9, 0x92, 0xee, 0x95, 0x18, 0x63, 0x03, 0x78, 0xf5, 0x8e, + 0xf2, 0x89, 0x04, 0x7f, 0xfc, 0x87, 0x0a, 0x71, 0x0d, 0x76, + 0xfb, 0x80, 0x3b, 0x40, 0xcd, 0xb6, 0xca, 0xb1, 0x3c, 0x47, + 0xc4, 0xbf, 0x32, 0x49, 0x35, 0x4e, 0xc3, 0xb8, 0xd8, 0xa3, + 0x2e, 0x55, 0x29, 0x52, 0xdf, 0xa4, 0x27, 0x5c, 0xd1, 0xaa, + 0xd6, 0xad, 0x20, 0x5b, 0x00, 0x7c, 0xf8, 0x84, 0xed, 0x91, + 0x15, 0x69, 0xc7, 0xbb, 0x3f, 0x43, 0x2a, 0x56, 0xd2, 0xae, + 0x93, 0xef, 0x6b, 0x17, 0x7e, 0x02, 0x86, 0xfa, 0x54, 0x28, + 0xac, 0xd0, 0xb9, 0xc5, 0x41, 0x3d, 0x3b, 0x47, 0xc3, 0xbf, + 0xd6, 0xaa, 0x2e, 0x52, 0xfc, 0x80, 0x04, 0x78, 0x11, 0x6d, + 0xe9, 0x95, 0xa8, 0xd4, 0x50, 0x2c, 0x45, 0x39, 0xbd, 0xc1, + 0x6f, 0x13, 0x97, 0xeb, 0x82, 0xfe, 0x7a, 0x06, 0x76, 0x0a, + 0x8e, 0xf2, 0x9b, 0xe7, 0x63, 0x1f, 0xb1, 0xcd, 0x49, 0x35, + 0x5c, 0x20, 0xa4, 0xd8, 0xe5, 0x99, 0x1d, 0x61, 0x08, 0x74, + 0xf0, 0x8c, 0x22, 0x5e, 0xda, 0xa6, 0xcf, 0xb3, 0x37, 0x4b, + 0x4d, 0x31, 0xb5, 0xc9, 0xa0, 0xdc, 0x58, 0x24, 0x8a, 0xf6, + 0x72, 0x0e, 0x67, 0x1b, 0x9f, 0xe3, 0xde, 0xa2, 0x26, 0x5a, + 0x33, 0x4f, 0xcb, 0xb7, 0x19, 0x65, 0xe1, 0x9d, 0xf4, 0x88, + 0x0c, 0x70, 0xec, 0x90, 0x14, 0x68, 0x01, 0x7d, 0xf9, 0x85, + 0x2b, 0x57, 0xd3, 0xaf, 0xc6, 0xba, 0x3e, 0x42, 0x7f, 0x03, + 0x87, 0xfb, 0x92, 0xee, 0x6a, 0x16, 0xb8, 0xc4, 0x40, 0x3c, + 0x55, 0x29, 0xad, 0xd1, 0xd7, 0xab, 0x2f, 0x53, 0x3a, 0x46, + 0xc2, 0xbe, 0x10, 0x6c, 0xe8, 0x94, 0xfd, 0x81, 0x05, 0x79, + 0x44, 0x38, 0xbc, 0xc0, 0xa9, 0xd5, 0x51, 0x2d, 0x83, 0xff, + 0x7b, 0x07, 0x6e, 0x12, 0x96, 0xea, 0x9a, 0xe6, 0x62, 0x1e, + 0x77, 0x0b, 0x8f, 0xf3, 0x5d, 0x21, 0xa5, 0xd9, 0xb0, 0xcc, + 0x48, 0x34, 0x09, 0x75, 0xf1, 0x8d, 0xe4, 0x98, 0x1c, 0x60, + 0xce, 0xb2, 0x36, 0x4a, 0x23, 0x5f, 0xdb, 0xa7, 0xa1, 0xdd, + 0x59, 0x25, 0x4c, 0x30, 0xb4, 0xc8, 0x66, 0x1a, 0x9e, 0xe2, + 0x8b, 0xf7, 0x73, 0x0f, 0x32, 0x4e, 0xca, 0xb6, 0xdf, 0xa3, + 0x27, 0x5b, 0xf5, 0x89, 0x0d, 0x71, 0x18, 0x64, 0xe0, 0x9c, + 0x00, 0x7d, 0xfa, 0x87, 0xe9, 0x94, 0x13, 0x6e, 0xcf, 0xb2, + 0x35, 0x48, 0x26, 0x5b, 0xdc, 0xa1, 0x83, 0xfe, 0x79, 0x04, + 0x6a, 0x17, 0x90, 0xed, 0x4c, 0x31, 0xb6, 0xcb, 0xa5, 0xd8, + 0x5f, 0x22, 0x1b, 0x66, 0xe1, 0x9c, 0xf2, 0x8f, 0x08, 0x75, + 0xd4, 0xa9, 0x2e, 0x53, 0x3d, 0x40, 0xc7, 0xba, 0x98, 0xe5, + 0x62, 0x1f, 0x71, 0x0c, 0x8b, 0xf6, 0x57, 0x2a, 0xad, 0xd0, + 0xbe, 0xc3, 0x44, 0x39, 0x36, 0x4b, 0xcc, 0xb1, 0xdf, 0xa2, + 0x25, 0x58, 0xf9, 0x84, 0x03, 0x7e, 0x10, 0x6d, 0xea, 0x97, + 0xb5, 0xc8, 0x4f, 0x32, 0x5c, 0x21, 0xa6, 0xdb, 0x7a, 0x07, + 0x80, 0xfd, 0x93, 0xee, 0x69, 0x14, 0x2d, 0x50, 0xd7, 0xaa, + 0xc4, 0xb9, 0x3e, 0x43, 0xe2, 0x9f, 0x18, 0x65, 0x0b, 0x76, + 0xf1, 0x8c, 0xae, 0xd3, 0x54, 0x29, 0x47, 0x3a, 0xbd, 0xc0, + 0x61, 0x1c, 0x9b, 0xe6, 0x88, 0xf5, 0x72, 0x0f, 0x6c, 0x11, + 0x96, 0xeb, 0x85, 0xf8, 0x7f, 0x02, 0xa3, 0xde, 0x59, 0x24, + 0x4a, 0x37, 0xb0, 0xcd, 0xef, 0x92, 0x15, 0x68, 0x06, 0x7b, + 0xfc, 0x81, 0x20, 0x5d, 0xda, 0xa7, 0xc9, 0xb4, 0x33, 0x4e, + 0x77, 0x0a, 0x8d, 0xf0, 0x9e, 0xe3, 0x64, 0x19, 0xb8, 0xc5, + 0x42, 0x3f, 0x51, 0x2c, 0xab, 0xd6, 0xf4, 0x89, 0x0e, 0x73, + 0x1d, 0x60, 0xe7, 0x9a, 0x3b, 0x46, 0xc1, 0xbc, 0xd2, 0xaf, + 0x28, 0x55, 0x5a, 0x27, 0xa0, 0xdd, 0xb3, 0xce, 0x49, 0x34, + 0x95, 0xe8, 0x6f, 0x12, 0x7c, 0x01, 0x86, 0xfb, 0xd9, 0xa4, + 0x23, 0x5e, 0x30, 0x4d, 0xca, 0xb7, 0x16, 0x6b, 0xec, 0x91, + 0xff, 0x82, 0x05, 0x78, 0x41, 0x3c, 0xbb, 0xc6, 0xa8, 0xd5, + 0x52, 0x2f, 0x8e, 0xf3, 0x74, 0x09, 0x67, 0x1a, 0x9d, 0xe0, + 0xc2, 0xbf, 0x38, 0x45, 0x2b, 0x56, 0xd1, 0xac, 0x0d, 0x70, + 0xf7, 0x8a, 0xe4, 0x99, 0x1e, 0x63, 0x00, 0x7e, 0xfc, 0x82, + 0xe5, 0x9b, 0x19, 0x67, 0xd7, 0xa9, 0x2b, 0x55, 0x32, 0x4c, + 0xce, 0xb0, 0xb3, 0xcd, 0x4f, 0x31, 0x56, 0x28, 0xaa, 0xd4, + 0x64, 0x1a, 0x98, 0xe6, 0x81, 0xff, 0x7d, 0x03, 0x7b, 0x05, + 0x87, 0xf9, 0x9e, 0xe0, 0x62, 0x1c, 0xac, 0xd2, 0x50, 0x2e, + 0x49, 0x37, 0xb5, 0xcb, 0xc8, 0xb6, 0x34, 0x4a, 0x2d, 0x53, + 0xd1, 0xaf, 0x1f, 0x61, 0xe3, 0x9d, 0xfa, 0x84, 0x06, 0x78, + 0xf6, 0x88, 0x0a, 0x74, 0x13, 0x6d, 0xef, 0x91, 0x21, 0x5f, + 0xdd, 0xa3, 0xc4, 0xba, 0x38, 0x46, 0x45, 0x3b, 0xb9, 0xc7, + 0xa0, 0xde, 0x5c, 0x22, 0x92, 0xec, 0x6e, 0x10, 0x77, 0x09, + 0x8b, 0xf5, 0x8d, 0xf3, 0x71, 0x0f, 0x68, 0x16, 0x94, 0xea, + 0x5a, 0x24, 0xa6, 0xd8, 0xbf, 0xc1, 0x43, 0x3d, 0x3e, 0x40, + 0xc2, 0xbc, 0xdb, 0xa5, 0x27, 0x59, 0xe9, 0x97, 0x15, 0x6b, + 0x0c, 0x72, 0xf0, 0x8e, 0xf1, 0x8f, 0x0d, 0x73, 0x14, 0x6a, + 0xe8, 0x96, 0x26, 0x58, 0xda, 0xa4, 0xc3, 0xbd, 0x3f, 0x41, + 0x42, 0x3c, 0xbe, 0xc0, 0xa7, 0xd9, 0x5b, 0x25, 0x95, 0xeb, + 0x69, 0x17, 0x70, 0x0e, 0x8c, 0xf2, 0x8a, 0xf4, 0x76, 0x08, + 0x6f, 0x11, 0x93, 0xed, 0x5d, 0x23, 0xa1, 0xdf, 0xb8, 0xc6, + 0x44, 0x3a, 0x39, 0x47, 0xc5, 0xbb, 0xdc, 0xa2, 0x20, 0x5e, + 0xee, 0x90, 0x12, 0x6c, 0x0b, 0x75, 0xf7, 0x89, 0x07, 0x79, + 0xfb, 0x85, 0xe2, 0x9c, 0x1e, 0x60, 0xd0, 0xae, 0x2c, 0x52, + 0x35, 0x4b, 0xc9, 0xb7, 0xb4, 0xca, 0x48, 0x36, 0x51, 0x2f, + 0xad, 0xd3, 0x63, 0x1d, 0x9f, 0xe1, 0x86, 0xf8, 0x7a, 0x04, + 0x7c, 0x02, 0x80, 0xfe, 0x99, 0xe7, 0x65, 0x1b, 0xab, 0xd5, + 0x57, 0x29, 0x4e, 0x30, 0xb2, 0xcc, 0xcf, 0xb1, 0x33, 0x4d, + 0x2a, 0x54, 0xd6, 0xa8, 0x18, 0x66, 0xe4, 0x9a, 0xfd, 0x83, + 0x01, 0x7f, 0x00, 0x7f, 0xfe, 0x81, 0xe1, 0x9e, 0x1f, 0x60, + 0xdf, 0xa0, 0x21, 0x5e, 0x3e, 0x41, 0xc0, 0xbf, 0xa3, 0xdc, + 0x5d, 0x22, 0x42, 0x3d, 0xbc, 0xc3, 0x7c, 0x03, 0x82, 0xfd, + 0x9d, 0xe2, 0x63, 0x1c, 0x5b, 0x24, 0xa5, 0xda, 0xba, 0xc5, + 0x44, 0x3b, 0x84, 0xfb, 0x7a, 0x05, 0x65, 0x1a, 0x9b, 0xe4, + 0xf8, 0x87, 0x06, 0x79, 0x19, 0x66, 0xe7, 0x98, 0x27, 0x58, + 0xd9, 0xa6, 0xc6, 0xb9, 0x38, 0x47, 0xb6, 0xc9, 0x48, 0x37, + 0x57, 0x28, 0xa9, 0xd6, 0x69, 0x16, 0x97, 0xe8, 0x88, 0xf7, + 0x76, 0x09, 0x15, 0x6a, 0xeb, 0x94, 0xf4, 0x8b, 0x0a, 0x75, + 0xca, 0xb5, 0x34, 0x4b, 0x2b, 0x54, 0xd5, 0xaa, 0xed, 0x92, + 0x13, 0x6c, 0x0c, 0x73, 0xf2, 0x8d, 0x32, 0x4d, 0xcc, 0xb3, + 0xd3, 0xac, 0x2d, 0x52, 0x4e, 0x31, 0xb0, 0xcf, 0xaf, 0xd0, + 0x51, 0x2e, 0x91, 0xee, 0x6f, 0x10, 0x70, 0x0f, 0x8e, 0xf1, + 0x71, 0x0e, 0x8f, 0xf0, 0x90, 0xef, 0x6e, 0x11, 0xae, 0xd1, + 0x50, 0x2f, 0x4f, 0x30, 0xb1, 0xce, 0xd2, 0xad, 0x2c, 0x53, + 0x33, 0x4c, 0xcd, 0xb2, 0x0d, 0x72, 0xf3, 0x8c, 0xec, 0x93, + 0x12, 0x6d, 0x2a, 0x55, 0xd4, 0xab, 0xcb, 0xb4, 0x35, 0x4a, + 0xf5, 0x8a, 0x0b, 0x74, 0x14, 0x6b, 0xea, 0x95, 0x89, 0xf6, + 0x77, 0x08, 0x68, 0x17, 0x96, 0xe9, 0x56, 0x29, 0xa8, 0xd7, + 0xb7, 0xc8, 0x49, 0x36, 0xc7, 0xb8, 0x39, 0x46, 0x26, 0x59, + 0xd8, 0xa7, 0x18, 0x67, 0xe6, 0x99, 0xf9, 0x86, 0x07, 0x78, + 0x64, 0x1b, 0x9a, 0xe5, 0x85, 0xfa, 0x7b, 0x04, 0xbb, 0xc4, + 0x45, 0x3a, 0x5a, 0x25, 0xa4, 0xdb, 0x9c, 0xe3, 0x62, 0x1d, + 0x7d, 0x02, 0x83, 0xfc, 0x43, 0x3c, 0xbd, 0xc2, 0xa2, 0xdd, + 0x5c, 0x23, 0x3f, 0x40, 0xc1, 0xbe, 0xde, 0xa1, 0x20, 0x5f, + 0xe0, 0x9f, 0x1e, 0x61, 0x01, 0x7e, 0xff, 0x80, 0x00, 0x80, + 0x1d, 0x9d, 0x3a, 0xba, 0x27, 0xa7, 0x74, 0xf4, 0x69, 0xe9, + 0x4e, 0xce, 0x53, 0xd3, 0xe8, 0x68, 0xf5, 0x75, 0xd2, 0x52, + 0xcf, 0x4f, 0x9c, 0x1c, 0x81, 0x01, 0xa6, 0x26, 0xbb, 0x3b, + 0xcd, 0x4d, 0xd0, 0x50, 0xf7, 0x77, 0xea, 0x6a, 0xb9, 0x39, + 0xa4, 0x24, 0x83, 0x03, 0x9e, 0x1e, 0x25, 0xa5, 0x38, 0xb8, + 0x1f, 0x9f, 0x02, 0x82, 0x51, 0xd1, 0x4c, 0xcc, 0x6b, 0xeb, + 0x76, 0xf6, 0x87, 0x07, 0x9a, 0x1a, 0xbd, 0x3d, 0xa0, 0x20, + 0xf3, 0x73, 0xee, 0x6e, 0xc9, 0x49, 0xd4, 0x54, 0x6f, 0xef, + 0x72, 0xf2, 0x55, 0xd5, 0x48, 0xc8, 0x1b, 0x9b, 0x06, 0x86, + 0x21, 0xa1, 0x3c, 0xbc, 0x4a, 0xca, 0x57, 0xd7, 0x70, 0xf0, + 0x6d, 0xed, 0x3e, 0xbe, 0x23, 0xa3, 0x04, 0x84, 0x19, 0x99, + 0xa2, 0x22, 0xbf, 0x3f, 0x98, 0x18, 0x85, 0x05, 0xd6, 0x56, + 0xcb, 0x4b, 0xec, 0x6c, 0xf1, 0x71, 0x13, 0x93, 0x0e, 0x8e, + 0x29, 0xa9, 0x34, 0xb4, 0x67, 0xe7, 0x7a, 0xfa, 0x5d, 0xdd, + 0x40, 0xc0, 0xfb, 0x7b, 0xe6, 0x66, 0xc1, 0x41, 0xdc, 0x5c, + 0x8f, 0x0f, 0x92, 0x12, 0xb5, 0x35, 0xa8, 0x28, 0xde, 0x5e, + 0xc3, 0x43, 0xe4, 0x64, 0xf9, 0x79, 0xaa, 0x2a, 0xb7, 0x37, + 0x90, 0x10, 0x8d, 0x0d, 0x36, 0xb6, 0x2b, 0xab, 0x0c, 0x8c, + 0x11, 0x91, 0x42, 0xc2, 0x5f, 0xdf, 0x78, 0xf8, 0x65, 0xe5, + 0x94, 0x14, 0x89, 0x09, 0xae, 0x2e, 0xb3, 0x33, 0xe0, 0x60, + 0xfd, 0x7d, 0xda, 0x5a, 0xc7, 0x47, 0x7c, 0xfc, 0x61, 0xe1, + 0x46, 0xc6, 0x5b, 0xdb, 0x08, 0x88, 0x15, 0x95, 0x32, 0xb2, + 0x2f, 0xaf, 0x59, 0xd9, 0x44, 0xc4, 0x63, 0xe3, 0x7e, 0xfe, + 0x2d, 0xad, 0x30, 0xb0, 0x17, 0x97, 0x0a, 0x8a, 0xb1, 0x31, + 0xac, 0x2c, 0x8b, 0x0b, 0x96, 0x16, 0xc5, 0x45, 0xd8, 0x58, + 0xff, 0x7f, 0xe2, 0x62, 0x00, 0x81, 0x1f, 0x9e, 0x3e, 0xbf, + 0x21, 0xa0, 0x7c, 0xfd, 0x63, 0xe2, 0x42, 0xc3, 0x5d, 0xdc, + 0xf8, 0x79, 0xe7, 0x66, 0xc6, 0x47, 0xd9, 0x58, 0x84, 0x05, + 0x9b, 0x1a, 0xba, 0x3b, 0xa5, 0x24, 0xed, 0x6c, 0xf2, 0x73, + 0xd3, 0x52, 0xcc, 0x4d, 0x91, 0x10, 0x8e, 0x0f, 0xaf, 0x2e, + 0xb0, 0x31, 0x15, 0x94, 0x0a, 0x8b, 0x2b, 0xaa, 0x34, 0xb5, + 0x69, 0xe8, 0x76, 0xf7, 0x57, 0xd6, 0x48, 0xc9, 0xc7, 0x46, + 0xd8, 0x59, 0xf9, 0x78, 0xe6, 0x67, 0xbb, 0x3a, 0xa4, 0x25, + 0x85, 0x04, 0x9a, 0x1b, 0x3f, 0xbe, 0x20, 0xa1, 0x01, 0x80, + 0x1e, 0x9f, 0x43, 0xc2, 0x5c, 0xdd, 0x7d, 0xfc, 0x62, 0xe3, + 0x2a, 0xab, 0x35, 0xb4, 0x14, 0x95, 0x0b, 0x8a, 0x56, 0xd7, + 0x49, 0xc8, 0x68, 0xe9, 0x77, 0xf6, 0xd2, 0x53, 0xcd, 0x4c, + 0xec, 0x6d, 0xf3, 0x72, 0xae, 0x2f, 0xb1, 0x30, 0x90, 0x11, + 0x8f, 0x0e, 0x93, 0x12, 0x8c, 0x0d, 0xad, 0x2c, 0xb2, 0x33, + 0xef, 0x6e, 0xf0, 0x71, 0xd1, 0x50, 0xce, 0x4f, 0x6b, 0xea, + 0x74, 0xf5, 0x55, 0xd4, 0x4a, 0xcb, 0x17, 0x96, 0x08, 0x89, + 0x29, 0xa8, 0x36, 0xb7, 0x7e, 0xff, 0x61, 0xe0, 0x40, 0xc1, + 0x5f, 0xde, 0x02, 0x83, 0x1d, 0x9c, 0x3c, 0xbd, 0x23, 0xa2, + 0x86, 0x07, 0x99, 0x18, 0xb8, 0x39, 0xa7, 0x26, 0xfa, 0x7b, + 0xe5, 0x64, 0xc4, 0x45, 0xdb, 0x5a, 0x54, 0xd5, 0x4b, 0xca, + 0x6a, 0xeb, 0x75, 0xf4, 0x28, 0xa9, 0x37, 0xb6, 0x16, 0x97, + 0x09, 0x88, 0xac, 0x2d, 0xb3, 0x32, 0x92, 0x13, 0x8d, 0x0c, + 0xd0, 0x51, 0xcf, 0x4e, 0xee, 0x6f, 0xf1, 0x70, 0xb9, 0x38, + 0xa6, 0x27, 0x87, 0x06, 0x98, 0x19, 0xc5, 0x44, 0xda, 0x5b, + 0xfb, 0x7a, 0xe4, 0x65, 0x41, 0xc0, 0x5e, 0xdf, 0x7f, 0xfe, + 0x60, 0xe1, 0x3d, 0xbc, 0x22, 0xa3, 0x03, 0x82, 0x1c, 0x9d, + 0x00, 0x82, 0x19, 0x9b, 0x32, 0xb0, 0x2b, 0xa9, 0x64, 0xe6, + 0x7d, 0xff, 0x56, 0xd4, 0x4f, 0xcd, 0xc8, 0x4a, 0xd1, 0x53, + 0xfa, 0x78, 0xe3, 0x61, 0xac, 0x2e, 0xb5, 0x37, 0x9e, 0x1c, + 0x87, 0x05, 0x8d, 0x0f, 0x94, 0x16, 0xbf, 0x3d, 0xa6, 0x24, + 0xe9, 0x6b, 0xf0, 0x72, 0xdb, 0x59, 0xc2, 0x40, 0x45, 0xc7, + 0x5c, 0xde, 0x77, 0xf5, 0x6e, 0xec, 0x21, 0xa3, 0x38, 0xba, + 0x13, 0x91, 0x0a, 0x88, 0x07, 0x85, 0x1e, 0x9c, 0x35, 0xb7, + 0x2c, 0xae, 0x63, 0xe1, 0x7a, 0xf8, 0x51, 0xd3, 0x48, 0xca, + 0xcf, 0x4d, 0xd6, 0x54, 0xfd, 0x7f, 0xe4, 0x66, 0xab, 0x29, + 0xb2, 0x30, 0x99, 0x1b, 0x80, 0x02, 0x8a, 0x08, 0x93, 0x11, + 0xb8, 0x3a, 0xa1, 0x23, 0xee, 0x6c, 0xf7, 0x75, 0xdc, 0x5e, + 0xc5, 0x47, 0x42, 0xc0, 0x5b, 0xd9, 0x70, 0xf2, 0x69, 0xeb, + 0x26, 0xa4, 0x3f, 0xbd, 0x14, 0x96, 0x0d, 0x8f, 0x0e, 0x8c, + 0x17, 0x95, 0x3c, 0xbe, 0x25, 0xa7, 0x6a, 0xe8, 0x73, 0xf1, + 0x58, 0xda, 0x41, 0xc3, 0xc6, 0x44, 0xdf, 0x5d, 0xf4, 0x76, + 0xed, 0x6f, 0xa2, 0x20, 0xbb, 0x39, 0x90, 0x12, 0x89, 0x0b, + 0x83, 0x01, 0x9a, 0x18, 0xb1, 0x33, 0xa8, 0x2a, 0xe7, 0x65, + 0xfe, 0x7c, 0xd5, 0x57, 0xcc, 0x4e, 0x4b, 0xc9, 0x52, 0xd0, + 0x79, 0xfb, 0x60, 0xe2, 0x2f, 0xad, 0x36, 0xb4, 0x1d, 0x9f, + 0x04, 0x86, 0x09, 0x8b, 0x10, 0x92, 0x3b, 0xb9, 0x22, 0xa0, + 0x6d, 0xef, 0x74, 0xf6, 0x5f, 0xdd, 0x46, 0xc4, 0xc1, 0x43, + 0xd8, 0x5a, 0xf3, 0x71, 0xea, 0x68, 0xa5, 0x27, 0xbc, 0x3e, + 0x97, 0x15, 0x8e, 0x0c, 0x84, 0x06, 0x9d, 0x1f, 0xb6, 0x34, + 0xaf, 0x2d, 0xe0, 0x62, 0xf9, 0x7b, 0xd2, 0x50, 0xcb, 0x49, + 0x4c, 0xce, 0x55, 0xd7, 0x7e, 0xfc, 0x67, 0xe5, 0x28, 0xaa, + 0x31, 0xb3, 0x1a, 0x98, 0x03, 0x81, 0x00, 0x83, 0x1b, 0x98, + 0x36, 0xb5, 0x2d, 0xae, 0x6c, 0xef, 0x77, 0xf4, 0x5a, 0xd9, + 0x41, 0xc2, 0xd8, 0x5b, 0xc3, 0x40, 0xee, 0x6d, 0xf5, 0x76, + 0xb4, 0x37, 0xaf, 0x2c, 0x82, 0x01, 0x99, 0x1a, 0xad, 0x2e, + 0xb6, 0x35, 0x9b, 0x18, 0x80, 0x03, 0xc1, 0x42, 0xda, 0x59, + 0xf7, 0x74, 0xec, 0x6f, 0x75, 0xf6, 0x6e, 0xed, 0x43, 0xc0, + 0x58, 0xdb, 0x19, 0x9a, 0x02, 0x81, 0x2f, 0xac, 0x34, 0xb7, + 0x47, 0xc4, 0x5c, 0xdf, 0x71, 0xf2, 0x6a, 0xe9, 0x2b, 0xa8, + 0x30, 0xb3, 0x1d, 0x9e, 0x06, 0x85, 0x9f, 0x1c, 0x84, 0x07, + 0xa9, 0x2a, 0xb2, 0x31, 0xf3, 0x70, 0xe8, 0x6b, 0xc5, 0x46, + 0xde, 0x5d, 0xea, 0x69, 0xf1, 0x72, 0xdc, 0x5f, 0xc7, 0x44, + 0x86, 0x05, 0x9d, 0x1e, 0xb0, 0x33, 0xab, 0x28, 0x32, 0xb1, + 0x29, 0xaa, 0x04, 0x87, 0x1f, 0x9c, 0x5e, 0xdd, 0x45, 0xc6, + 0x68, 0xeb, 0x73, 0xf0, 0x8e, 0x0d, 0x95, 0x16, 0xb8, 0x3b, + 0xa3, 0x20, 0xe2, 0x61, 0xf9, 0x7a, 0xd4, 0x57, 0xcf, 0x4c, + 0x56, 0xd5, 0x4d, 0xce, 0x60, 0xe3, 0x7b, 0xf8, 0x3a, 0xb9, + 0x21, 0xa2, 0x0c, 0x8f, 0x17, 0x94, 0x23, 0xa0, 0x38, 0xbb, + 0x15, 0x96, 0x0e, 0x8d, 0x4f, 0xcc, 0x54, 0xd7, 0x79, 0xfa, + 0x62, 0xe1, 0xfb, 0x78, 0xe0, 0x63, 0xcd, 0x4e, 0xd6, 0x55, + 0x97, 0x14, 0x8c, 0x0f, 0xa1, 0x22, 0xba, 0x39, 0xc9, 0x4a, + 0xd2, 0x51, 0xff, 0x7c, 0xe4, 0x67, 0xa5, 0x26, 0xbe, 0x3d, + 0x93, 0x10, 0x88, 0x0b, 0x11, 0x92, 0x0a, 0x89, 0x27, 0xa4, + 0x3c, 0xbf, 0x7d, 0xfe, 0x66, 0xe5, 0x4b, 0xc8, 0x50, 0xd3, + 0x64, 0xe7, 0x7f, 0xfc, 0x52, 0xd1, 0x49, 0xca, 0x08, 0x8b, + 0x13, 0x90, 0x3e, 0xbd, 0x25, 0xa6, 0xbc, 0x3f, 0xa7, 0x24, + 0x8a, 0x09, 0x91, 0x12, 0xd0, 0x53, 0xcb, 0x48, 0xe6, 0x65, + 0xfd, 0x7e, 0x00, 0x84, 0x15, 0x91, 0x2a, 0xae, 0x3f, 0xbb, + 0x54, 0xd0, 0x41, 0xc5, 0x7e, 0xfa, 0x6b, 0xef, 0xa8, 0x2c, + 0xbd, 0x39, 0x82, 0x06, 0x97, 0x13, 0xfc, 0x78, 0xe9, 0x6d, + 0xd6, 0x52, 0xc3, 0x47, 0x4d, 0xc9, 0x58, 0xdc, 0x67, 0xe3, + 0x72, 0xf6, 0x19, 0x9d, 0x0c, 0x88, 0x33, 0xb7, 0x26, 0xa2, + 0xe5, 0x61, 0xf0, 0x74, 0xcf, 0x4b, 0xda, 0x5e, 0xb1, 0x35, + 0xa4, 0x20, 0x9b, 0x1f, 0x8e, 0x0a, 0x9a, 0x1e, 0x8f, 0x0b, + 0xb0, 0x34, 0xa5, 0x21, 0xce, 0x4a, 0xdb, 0x5f, 0xe4, 0x60, + 0xf1, 0x75, 0x32, 0xb6, 0x27, 0xa3, 0x18, 0x9c, 0x0d, 0x89, + 0x66, 0xe2, 0x73, 0xf7, 0x4c, 0xc8, 0x59, 0xdd, 0xd7, 0x53, + 0xc2, 0x46, 0xfd, 0x79, 0xe8, 0x6c, 0x83, 0x07, 0x96, 0x12, + 0xa9, 0x2d, 0xbc, 0x38, 0x7f, 0xfb, 0x6a, 0xee, 0x55, 0xd1, + 0x40, 0xc4, 0x2b, 0xaf, 0x3e, 0xba, 0x01, 0x85, 0x14, 0x90, + 0x29, 0xad, 0x3c, 0xb8, 0x03, 0x87, 0x16, 0x92, 0x7d, 0xf9, + 0x68, 0xec, 0x57, 0xd3, 0x42, 0xc6, 0x81, 0x05, 0x94, 0x10, + 0xab, 0x2f, 0xbe, 0x3a, 0xd5, 0x51, 0xc0, 0x44, 0xff, 0x7b, + 0xea, 0x6e, 0x64, 0xe0, 0x71, 0xf5, 0x4e, 0xca, 0x5b, 0xdf, + 0x30, 0xb4, 0x25, 0xa1, 0x1a, 0x9e, 0x0f, 0x8b, 0xcc, 0x48, + 0xd9, 0x5d, 0xe6, 0x62, 0xf3, 0x77, 0x98, 0x1c, 0x8d, 0x09, + 0xb2, 0x36, 0xa7, 0x23, 0xb3, 0x37, 0xa6, 0x22, 0x99, 0x1d, + 0x8c, 0x08, 0xe7, 0x63, 0xf2, 0x76, 0xcd, 0x49, 0xd8, 0x5c, + 0x1b, 0x9f, 0x0e, 0x8a, 0x31, 0xb5, 0x24, 0xa0, 0x4f, 0xcb, + 0x5a, 0xde, 0x65, 0xe1, 0x70, 0xf4, 0xfe, 0x7a, 0xeb, 0x6f, + 0xd4, 0x50, 0xc1, 0x45, 0xaa, 0x2e, 0xbf, 0x3b, 0x80, 0x04, + 0x95, 0x11, 0x56, 0xd2, 0x43, 0xc7, 0x7c, 0xf8, 0x69, 0xed, + 0x02, 0x86, 0x17, 0x93, 0x28, 0xac, 0x3d, 0xb9, 0x00, 0x85, + 0x17, 0x92, 0x2e, 0xab, 0x39, 0xbc, 0x5c, 0xd9, 0x4b, 0xce, + 0x72, 0xf7, 0x65, 0xe0, 0xb8, 0x3d, 0xaf, 0x2a, 0x96, 0x13, + 0x81, 0x04, 0xe4, 0x61, 0xf3, 0x76, 0xca, 0x4f, 0xdd, 0x58, + 0x6d, 0xe8, 0x7a, 0xff, 0x43, 0xc6, 0x54, 0xd1, 0x31, 0xb4, + 0x26, 0xa3, 0x1f, 0x9a, 0x08, 0x8d, 0xd5, 0x50, 0xc2, 0x47, + 0xfb, 0x7e, 0xec, 0x69, 0x89, 0x0c, 0x9e, 0x1b, 0xa7, 0x22, + 0xb0, 0x35, 0xda, 0x5f, 0xcd, 0x48, 0xf4, 0x71, 0xe3, 0x66, + 0x86, 0x03, 0x91, 0x14, 0xa8, 0x2d, 0xbf, 0x3a, 0x62, 0xe7, + 0x75, 0xf0, 0x4c, 0xc9, 0x5b, 0xde, 0x3e, 0xbb, 0x29, 0xac, + 0x10, 0x95, 0x07, 0x82, 0xb7, 0x32, 0xa0, 0x25, 0x99, 0x1c, + 0x8e, 0x0b, 0xeb, 0x6e, 0xfc, 0x79, 0xc5, 0x40, 0xd2, 0x57, + 0x0f, 0x8a, 0x18, 0x9d, 0x21, 0xa4, 0x36, 0xb3, 0x53, 0xd6, + 0x44, 0xc1, 0x7d, 0xf8, 0x6a, 0xef, 0xa9, 0x2c, 0xbe, 0x3b, + 0x87, 0x02, 0x90, 0x15, 0xf5, 0x70, 0xe2, 0x67, 0xdb, 0x5e, + 0xcc, 0x49, 0x11, 0x94, 0x06, 0x83, 0x3f, 0xba, 0x28, 0xad, + 0x4d, 0xc8, 0x5a, 0xdf, 0x63, 0xe6, 0x74, 0xf1, 0xc4, 0x41, + 0xd3, 0x56, 0xea, 0x6f, 0xfd, 0x78, 0x98, 0x1d, 0x8f, 0x0a, + 0xb6, 0x33, 0xa1, 0x24, 0x7c, 0xf9, 0x6b, 0xee, 0x52, 0xd7, + 0x45, 0xc0, 0x20, 0xa5, 0x37, 0xb2, 0x0e, 0x8b, 0x19, 0x9c, + 0x73, 0xf6, 0x64, 0xe1, 0x5d, 0xd8, 0x4a, 0xcf, 0x2f, 0xaa, + 0x38, 0xbd, 0x01, 0x84, 0x16, 0x93, 0xcb, 0x4e, 0xdc, 0x59, + 0xe5, 0x60, 0xf2, 0x77, 0x97, 0x12, 0x80, 0x05, 0xb9, 0x3c, + 0xae, 0x2b, 0x1e, 0x9b, 0x09, 0x8c, 0x30, 0xb5, 0x27, 0xa2, + 0x42, 0xc7, 0x55, 0xd0, 0x6c, 0xe9, 0x7b, 0xfe, 0xa6, 0x23, + 0xb1, 0x34, 0x88, 0x0d, 0x9f, 0x1a, 0xfa, 0x7f, 0xed, 0x68, + 0xd4, 0x51, 0xc3, 0x46, 0x00, 0x86, 0x11, 0x97, 0x22, 0xa4, + 0x33, 0xb5, 0x44, 0xc2, 0x55, 0xd3, 0x66, 0xe0, 0x77, 0xf1, + 0x88, 0x0e, 0x99, 0x1f, 0xaa, 0x2c, 0xbb, 0x3d, 0xcc, 0x4a, + 0xdd, 0x5b, 0xee, 0x68, 0xff, 0x79, 0x0d, 0x8b, 0x1c, 0x9a, + 0x2f, 0xa9, 0x3e, 0xb8, 0x49, 0xcf, 0x58, 0xde, 0x6b, 0xed, + 0x7a, 0xfc, 0x85, 0x03, 0x94, 0x12, 0xa7, 0x21, 0xb6, 0x30, + 0xc1, 0x47, 0xd0, 0x56, 0xe3, 0x65, 0xf2, 0x74, 0x1a, 0x9c, + 0x0b, 0x8d, 0x38, 0xbe, 0x29, 0xaf, 0x5e, 0xd8, 0x4f, 0xc9, + 0x7c, 0xfa, 0x6d, 0xeb, 0x92, 0x14, 0x83, 0x05, 0xb0, 0x36, + 0xa1, 0x27, 0xd6, 0x50, 0xc7, 0x41, 0xf4, 0x72, 0xe5, 0x63, + 0x17, 0x91, 0x06, 0x80, 0x35, 0xb3, 0x24, 0xa2, 0x53, 0xd5, + 0x42, 0xc4, 0x71, 0xf7, 0x60, 0xe6, 0x9f, 0x19, 0x8e, 0x08, + 0xbd, 0x3b, 0xac, 0x2a, 0xdb, 0x5d, 0xca, 0x4c, 0xf9, 0x7f, + 0xe8, 0x6e, 0x34, 0xb2, 0x25, 0xa3, 0x16, 0x90, 0x07, 0x81, + 0x70, 0xf6, 0x61, 0xe7, 0x52, 0xd4, 0x43, 0xc5, 0xbc, 0x3a, + 0xad, 0x2b, 0x9e, 0x18, 0x8f, 0x09, 0xf8, 0x7e, 0xe9, 0x6f, + 0xda, 0x5c, 0xcb, 0x4d, 0x39, 0xbf, 0x28, 0xae, 0x1b, 0x9d, + 0x0a, 0x8c, 0x7d, 0xfb, 0x6c, 0xea, 0x5f, 0xd9, 0x4e, 0xc8, + 0xb1, 0x37, 0xa0, 0x26, 0x93, 0x15, 0x82, 0x04, 0xf5, 0x73, + 0xe4, 0x62, 0xd7, 0x51, 0xc6, 0x40, 0x2e, 0xa8, 0x3f, 0xb9, + 0x0c, 0x8a, 0x1d, 0x9b, 0x6a, 0xec, 0x7b, 0xfd, 0x48, 0xce, + 0x59, 0xdf, 0xa6, 0x20, 0xb7, 0x31, 0x84, 0x02, 0x95, 0x13, + 0xe2, 0x64, 0xf3, 0x75, 0xc0, 0x46, 0xd1, 0x57, 0x23, 0xa5, + 0x32, 0xb4, 0x01, 0x87, 0x10, 0x96, 0x67, 0xe1, 0x76, 0xf0, + 0x45, 0xc3, 0x54, 0xd2, 0xab, 0x2d, 0xba, 0x3c, 0x89, 0x0f, + 0x98, 0x1e, 0xef, 0x69, 0xfe, 0x78, 0xcd, 0x4b, 0xdc, 0x5a, + 0x00, 0x87, 0x13, 0x94, 0x26, 0xa1, 0x35, 0xb2, 0x4c, 0xcb, + 0x5f, 0xd8, 0x6a, 0xed, 0x79, 0xfe, 0x98, 0x1f, 0x8b, 0x0c, + 0xbe, 0x39, 0xad, 0x2a, 0xd4, 0x53, 0xc7, 0x40, 0xf2, 0x75, + 0xe1, 0x66, 0x2d, 0xaa, 0x3e, 0xb9, 0x0b, 0x8c, 0x18, 0x9f, + 0x61, 0xe6, 0x72, 0xf5, 0x47, 0xc0, 0x54, 0xd3, 0xb5, 0x32, + 0xa6, 0x21, 0x93, 0x14, 0x80, 0x07, 0xf9, 0x7e, 0xea, 0x6d, + 0xdf, 0x58, 0xcc, 0x4b, 0x5a, 0xdd, 0x49, 0xce, 0x7c, 0xfb, + 0x6f, 0xe8, 0x16, 0x91, 0x05, 0x82, 0x30, 0xb7, 0x23, 0xa4, + 0xc2, 0x45, 0xd1, 0x56, 0xe4, 0x63, 0xf7, 0x70, 0x8e, 0x09, + 0x9d, 0x1a, 0xa8, 0x2f, 0xbb, 0x3c, 0x77, 0xf0, 0x64, 0xe3, + 0x51, 0xd6, 0x42, 0xc5, 0x3b, 0xbc, 0x28, 0xaf, 0x1d, 0x9a, + 0x0e, 0x89, 0xef, 0x68, 0xfc, 0x7b, 0xc9, 0x4e, 0xda, 0x5d, + 0xa3, 0x24, 0xb0, 0x37, 0x85, 0x02, 0x96, 0x11, 0xb4, 0x33, + 0xa7, 0x20, 0x92, 0x15, 0x81, 0x06, 0xf8, 0x7f, 0xeb, 0x6c, + 0xde, 0x59, 0xcd, 0x4a, 0x2c, 0xab, 0x3f, 0xb8, 0x0a, 0x8d, + 0x19, 0x9e, 0x60, 0xe7, 0x73, 0xf4, 0x46, 0xc1, 0x55, 0xd2, + 0x99, 0x1e, 0x8a, 0x0d, 0xbf, 0x38, 0xac, 0x2b, 0xd5, 0x52, + 0xc6, 0x41, 0xf3, 0x74, 0xe0, 0x67, 0x01, 0x86, 0x12, 0x95, + 0x27, 0xa0, 0x34, 0xb3, 0x4d, 0xca, 0x5e, 0xd9, 0x6b, 0xec, + 0x78, 0xff, 0xee, 0x69, 0xfd, 0x7a, 0xc8, 0x4f, 0xdb, 0x5c, + 0xa2, 0x25, 0xb1, 0x36, 0x84, 0x03, 0x97, 0x10, 0x76, 0xf1, + 0x65, 0xe2, 0x50, 0xd7, 0x43, 0xc4, 0x3a, 0xbd, 0x29, 0xae, + 0x1c, 0x9b, 0x0f, 0x88, 0xc3, 0x44, 0xd0, 0x57, 0xe5, 0x62, + 0xf6, 0x71, 0x8f, 0x08, 0x9c, 0x1b, 0xa9, 0x2e, 0xba, 0x3d, + 0x5b, 0xdc, 0x48, 0xcf, 0x7d, 0xfa, 0x6e, 0xe9, 0x17, 0x90, + 0x04, 0x83, 0x31, 0xb6, 0x22, 0xa5, 0x00, 0x88, 0x0d, 0x85, + 0x1a, 0x92, 0x17, 0x9f, 0x34, 0xbc, 0x39, 0xb1, 0x2e, 0xa6, + 0x23, 0xab, 0x68, 0xe0, 0x65, 0xed, 0x72, 0xfa, 0x7f, 0xf7, + 0x5c, 0xd4, 0x51, 0xd9, 0x46, 0xce, 0x4b, 0xc3, 0xd0, 0x58, + 0xdd, 0x55, 0xca, 0x42, 0xc7, 0x4f, 0xe4, 0x6c, 0xe9, 0x61, + 0xfe, 0x76, 0xf3, 0x7b, 0xb8, 0x30, 0xb5, 0x3d, 0xa2, 0x2a, + 0xaf, 0x27, 0x8c, 0x04, 0x81, 0x09, 0x96, 0x1e, 0x9b, 0x13, + 0xbd, 0x35, 0xb0, 0x38, 0xa7, 0x2f, 0xaa, 0x22, 0x89, 0x01, + 0x84, 0x0c, 0x93, 0x1b, 0x9e, 0x16, 0xd5, 0x5d, 0xd8, 0x50, + 0xcf, 0x47, 0xc2, 0x4a, 0xe1, 0x69, 0xec, 0x64, 0xfb, 0x73, + 0xf6, 0x7e, 0x6d, 0xe5, 0x60, 0xe8, 0x77, 0xff, 0x7a, 0xf2, + 0x59, 0xd1, 0x54, 0xdc, 0x43, 0xcb, 0x4e, 0xc6, 0x05, 0x8d, + 0x08, 0x80, 0x1f, 0x97, 0x12, 0x9a, 0x31, 0xb9, 0x3c, 0xb4, + 0x2b, 0xa3, 0x26, 0xae, 0x67, 0xef, 0x6a, 0xe2, 0x7d, 0xf5, + 0x70, 0xf8, 0x53, 0xdb, 0x5e, 0xd6, 0x49, 0xc1, 0x44, 0xcc, + 0x0f, 0x87, 0x02, 0x8a, 0x15, 0x9d, 0x18, 0x90, 0x3b, 0xb3, + 0x36, 0xbe, 0x21, 0xa9, 0x2c, 0xa4, 0xb7, 0x3f, 0xba, 0x32, + 0xad, 0x25, 0xa0, 0x28, 0x83, 0x0b, 0x8e, 0x06, 0x99, 0x11, + 0x94, 0x1c, 0xdf, 0x57, 0xd2, 0x5a, 0xc5, 0x4d, 0xc8, 0x40, + 0xeb, 0x63, 0xe6, 0x6e, 0xf1, 0x79, 0xfc, 0x74, 0xda, 0x52, + 0xd7, 0x5f, 0xc0, 0x48, 0xcd, 0x45, 0xee, 0x66, 0xe3, 0x6b, + 0xf4, 0x7c, 0xf9, 0x71, 0xb2, 0x3a, 0xbf, 0x37, 0xa8, 0x20, + 0xa5, 0x2d, 0x86, 0x0e, 0x8b, 0x03, 0x9c, 0x14, 0x91, 0x19, + 0x0a, 0x82, 0x07, 0x8f, 0x10, 0x98, 0x1d, 0x95, 0x3e, 0xb6, + 0x33, 0xbb, 0x24, 0xac, 0x29, 0xa1, 0x62, 0xea, 0x6f, 0xe7, + 0x78, 0xf0, 0x75, 0xfd, 0x56, 0xde, 0x5b, 0xd3, 0x4c, 0xc4, + 0x41, 0xc9, 0x00, 0x89, 0x0f, 0x86, 0x1e, 0x97, 0x11, 0x98, + 0x3c, 0xb5, 0x33, 0xba, 0x22, 0xab, 0x2d, 0xa4, 0x78, 0xf1, + 0x77, 0xfe, 0x66, 0xef, 0x69, 0xe0, 0x44, 0xcd, 0x4b, 0xc2, + 0x5a, 0xd3, 0x55, 0xdc, 0xf0, 0x79, 0xff, 0x76, 0xee, 0x67, + 0xe1, 0x68, 0xcc, 0x45, 0xc3, 0x4a, 0xd2, 0x5b, 0xdd, 0x54, + 0x88, 0x01, 0x87, 0x0e, 0x96, 0x1f, 0x99, 0x10, 0xb4, 0x3d, + 0xbb, 0x32, 0xaa, 0x23, 0xa5, 0x2c, 0xfd, 0x74, 0xf2, 0x7b, + 0xe3, 0x6a, 0xec, 0x65, 0xc1, 0x48, 0xce, 0x47, 0xdf, 0x56, + 0xd0, 0x59, 0x85, 0x0c, 0x8a, 0x03, 0x9b, 0x12, 0x94, 0x1d, + 0xb9, 0x30, 0xb6, 0x3f, 0xa7, 0x2e, 0xa8, 0x21, 0x0d, 0x84, + 0x02, 0x8b, 0x13, 0x9a, 0x1c, 0x95, 0x31, 0xb8, 0x3e, 0xb7, + 0x2f, 0xa6, 0x20, 0xa9, 0x75, 0xfc, 0x7a, 0xf3, 0x6b, 0xe2, + 0x64, 0xed, 0x49, 0xc0, 0x46, 0xcf, 0x57, 0xde, 0x58, 0xd1, + 0xe7, 0x6e, 0xe8, 0x61, 0xf9, 0x70, 0xf6, 0x7f, 0xdb, 0x52, + 0xd4, 0x5d, 0xc5, 0x4c, 0xca, 0x43, 0x9f, 0x16, 0x90, 0x19, + 0x81, 0x08, 0x8e, 0x07, 0xa3, 0x2a, 0xac, 0x25, 0xbd, 0x34, + 0xb2, 0x3b, 0x17, 0x9e, 0x18, 0x91, 0x09, 0x80, 0x06, 0x8f, + 0x2b, 0xa2, 0x24, 0xad, 0x35, 0xbc, 0x3a, 0xb3, 0x6f, 0xe6, + 0x60, 0xe9, 0x71, 0xf8, 0x7e, 0xf7, 0x53, 0xda, 0x5c, 0xd5, + 0x4d, 0xc4, 0x42, 0xcb, 0x1a, 0x93, 0x15, 0x9c, 0x04, 0x8d, + 0x0b, 0x82, 0x26, 0xaf, 0x29, 0xa0, 0x38, 0xb1, 0x37, 0xbe, + 0x62, 0xeb, 0x6d, 0xe4, 0x7c, 0xf5, 0x73, 0xfa, 0x5e, 0xd7, + 0x51, 0xd8, 0x40, 0xc9, 0x4f, 0xc6, 0xea, 0x63, 0xe5, 0x6c, + 0xf4, 0x7d, 0xfb, 0x72, 0xd6, 0x5f, 0xd9, 0x50, 0xc8, 0x41, + 0xc7, 0x4e, 0x92, 0x1b, 0x9d, 0x14, 0x8c, 0x05, 0x83, 0x0a, + 0xae, 0x27, 0xa1, 0x28, 0xb0, 0x39, 0xbf, 0x36, 0x00, 0x8a, + 0x09, 0x83, 0x12, 0x98, 0x1b, 0x91, 0x24, 0xae, 0x2d, 0xa7, + 0x36, 0xbc, 0x3f, 0xb5, 0x48, 0xc2, 0x41, 0xcb, 0x5a, 0xd0, + 0x53, 0xd9, 0x6c, 0xe6, 0x65, 0xef, 0x7e, 0xf4, 0x77, 0xfd, + 0x90, 0x1a, 0x99, 0x13, 0x82, 0x08, 0x8b, 0x01, 0xb4, 0x3e, + 0xbd, 0x37, 0xa6, 0x2c, 0xaf, 0x25, 0xd8, 0x52, 0xd1, 0x5b, + 0xca, 0x40, 0xc3, 0x49, 0xfc, 0x76, 0xf5, 0x7f, 0xee, 0x64, + 0xe7, 0x6d, 0x3d, 0xb7, 0x34, 0xbe, 0x2f, 0xa5, 0x26, 0xac, + 0x19, 0x93, 0x10, 0x9a, 0x0b, 0x81, 0x02, 0x88, 0x75, 0xff, + 0x7c, 0xf6, 0x67, 0xed, 0x6e, 0xe4, 0x51, 0xdb, 0x58, 0xd2, + 0x43, 0xc9, 0x4a, 0xc0, 0xad, 0x27, 0xa4, 0x2e, 0xbf, 0x35, + 0xb6, 0x3c, 0x89, 0x03, 0x80, 0x0a, 0x9b, 0x11, 0x92, 0x18, + 0xe5, 0x6f, 0xec, 0x66, 0xf7, 0x7d, 0xfe, 0x74, 0xc1, 0x4b, + 0xc8, 0x42, 0xd3, 0x59, 0xda, 0x50, 0x7a, 0xf0, 0x73, 0xf9, + 0x68, 0xe2, 0x61, 0xeb, 0x5e, 0xd4, 0x57, 0xdd, 0x4c, 0xc6, + 0x45, 0xcf, 0x32, 0xb8, 0x3b, 0xb1, 0x20, 0xaa, 0x29, 0xa3, + 0x16, 0x9c, 0x1f, 0x95, 0x04, 0x8e, 0x0d, 0x87, 0xea, 0x60, + 0xe3, 0x69, 0xf8, 0x72, 0xf1, 0x7b, 0xce, 0x44, 0xc7, 0x4d, + 0xdc, 0x56, 0xd5, 0x5f, 0xa2, 0x28, 0xab, 0x21, 0xb0, 0x3a, + 0xb9, 0x33, 0x86, 0x0c, 0x8f, 0x05, 0x94, 0x1e, 0x9d, 0x17, + 0x47, 0xcd, 0x4e, 0xc4, 0x55, 0xdf, 0x5c, 0xd6, 0x63, 0xe9, + 0x6a, 0xe0, 0x71, 0xfb, 0x78, 0xf2, 0x0f, 0x85, 0x06, 0x8c, + 0x1d, 0x97, 0x14, 0x9e, 0x2b, 0xa1, 0x22, 0xa8, 0x39, 0xb3, + 0x30, 0xba, 0xd7, 0x5d, 0xde, 0x54, 0xc5, 0x4f, 0xcc, 0x46, + 0xf3, 0x79, 0xfa, 0x70, 0xe1, 0x6b, 0xe8, 0x62, 0x9f, 0x15, + 0x96, 0x1c, 0x8d, 0x07, 0x84, 0x0e, 0xbb, 0x31, 0xb2, 0x38, + 0xa9, 0x23, 0xa0, 0x2a, 0x00, 0x8b, 0x0b, 0x80, 0x16, 0x9d, + 0x1d, 0x96, 0x2c, 0xa7, 0x27, 0xac, 0x3a, 0xb1, 0x31, 0xba, + 0x58, 0xd3, 0x53, 0xd8, 0x4e, 0xc5, 0x45, 0xce, 0x74, 0xff, + 0x7f, 0xf4, 0x62, 0xe9, 0x69, 0xe2, 0xb0, 0x3b, 0xbb, 0x30, + 0xa6, 0x2d, 0xad, 0x26, 0x9c, 0x17, 0x97, 0x1c, 0x8a, 0x01, + 0x81, 0x0a, 0xe8, 0x63, 0xe3, 0x68, 0xfe, 0x75, 0xf5, 0x7e, + 0xc4, 0x4f, 0xcf, 0x44, 0xd2, 0x59, 0xd9, 0x52, 0x7d, 0xf6, + 0x76, 0xfd, 0x6b, 0xe0, 0x60, 0xeb, 0x51, 0xda, 0x5a, 0xd1, + 0x47, 0xcc, 0x4c, 0xc7, 0x25, 0xae, 0x2e, 0xa5, 0x33, 0xb8, + 0x38, 0xb3, 0x09, 0x82, 0x02, 0x89, 0x1f, 0x94, 0x14, 0x9f, + 0xcd, 0x46, 0xc6, 0x4d, 0xdb, 0x50, 0xd0, 0x5b, 0xe1, 0x6a, + 0xea, 0x61, 0xf7, 0x7c, 0xfc, 0x77, 0x95, 0x1e, 0x9e, 0x15, + 0x83, 0x08, 0x88, 0x03, 0xb9, 0x32, 0xb2, 0x39, 0xaf, 0x24, + 0xa4, 0x2f, 0xfa, 0x71, 0xf1, 0x7a, 0xec, 0x67, 0xe7, 0x6c, + 0xd6, 0x5d, 0xdd, 0x56, 0xc0, 0x4b, 0xcb, 0x40, 0xa2, 0x29, + 0xa9, 0x22, 0xb4, 0x3f, 0xbf, 0x34, 0x8e, 0x05, 0x85, 0x0e, + 0x98, 0x13, 0x93, 0x18, 0x4a, 0xc1, 0x41, 0xca, 0x5c, 0xd7, + 0x57, 0xdc, 0x66, 0xed, 0x6d, 0xe6, 0x70, 0xfb, 0x7b, 0xf0, + 0x12, 0x99, 0x19, 0x92, 0x04, 0x8f, 0x0f, 0x84, 0x3e, 0xb5, + 0x35, 0xbe, 0x28, 0xa3, 0x23, 0xa8, 0x87, 0x0c, 0x8c, 0x07, + 0x91, 0x1a, 0x9a, 0x11, 0xab, 0x20, 0xa0, 0x2b, 0xbd, 0x36, + 0xb6, 0x3d, 0xdf, 0x54, 0xd4, 0x5f, 0xc9, 0x42, 0xc2, 0x49, + 0xf3, 0x78, 0xf8, 0x73, 0xe5, 0x6e, 0xee, 0x65, 0x37, 0xbc, + 0x3c, 0xb7, 0x21, 0xaa, 0x2a, 0xa1, 0x1b, 0x90, 0x10, 0x9b, + 0x0d, 0x86, 0x06, 0x8d, 0x6f, 0xe4, 0x64, 0xef, 0x79, 0xf2, + 0x72, 0xf9, 0x43, 0xc8, 0x48, 0xc3, 0x55, 0xde, 0x5e, 0xd5, + 0x00, 0x8c, 0x05, 0x89, 0x0a, 0x86, 0x0f, 0x83, 0x14, 0x98, + 0x11, 0x9d, 0x1e, 0x92, 0x1b, 0x97, 0x28, 0xa4, 0x2d, 0xa1, + 0x22, 0xae, 0x27, 0xab, 0x3c, 0xb0, 0x39, 0xb5, 0x36, 0xba, + 0x33, 0xbf, 0x50, 0xdc, 0x55, 0xd9, 0x5a, 0xd6, 0x5f, 0xd3, + 0x44, 0xc8, 0x41, 0xcd, 0x4e, 0xc2, 0x4b, 0xc7, 0x78, 0xf4, + 0x7d, 0xf1, 0x72, 0xfe, 0x77, 0xfb, 0x6c, 0xe0, 0x69, 0xe5, + 0x66, 0xea, 0x63, 0xef, 0xa0, 0x2c, 0xa5, 0x29, 0xaa, 0x26, + 0xaf, 0x23, 0xb4, 0x38, 0xb1, 0x3d, 0xbe, 0x32, 0xbb, 0x37, + 0x88, 0x04, 0x8d, 0x01, 0x82, 0x0e, 0x87, 0x0b, 0x9c, 0x10, + 0x99, 0x15, 0x96, 0x1a, 0x93, 0x1f, 0xf0, 0x7c, 0xf5, 0x79, + 0xfa, 0x76, 0xff, 0x73, 0xe4, 0x68, 0xe1, 0x6d, 0xee, 0x62, + 0xeb, 0x67, 0xd8, 0x54, 0xdd, 0x51, 0xd2, 0x5e, 0xd7, 0x5b, + 0xcc, 0x40, 0xc9, 0x45, 0xc6, 0x4a, 0xc3, 0x4f, 0x5d, 0xd1, + 0x58, 0xd4, 0x57, 0xdb, 0x52, 0xde, 0x49, 0xc5, 0x4c, 0xc0, + 0x43, 0xcf, 0x46, 0xca, 0x75, 0xf9, 0x70, 0xfc, 0x7f, 0xf3, + 0x7a, 0xf6, 0x61, 0xed, 0x64, 0xe8, 0x6b, 0xe7, 0x6e, 0xe2, + 0x0d, 0x81, 0x08, 0x84, 0x07, 0x8b, 0x02, 0x8e, 0x19, 0x95, + 0x1c, 0x90, 0x13, 0x9f, 0x16, 0x9a, 0x25, 0xa9, 0x20, 0xac, + 0x2f, 0xa3, 0x2a, 0xa6, 0x31, 0xbd, 0x34, 0xb8, 0x3b, 0xb7, + 0x3e, 0xb2, 0xfd, 0x71, 0xf8, 0x74, 0xf7, 0x7b, 0xf2, 0x7e, + 0xe9, 0x65, 0xec, 0x60, 0xe3, 0x6f, 0xe6, 0x6a, 0xd5, 0x59, + 0xd0, 0x5c, 0xdf, 0x53, 0xda, 0x56, 0xc1, 0x4d, 0xc4, 0x48, + 0xcb, 0x47, 0xce, 0x42, 0xad, 0x21, 0xa8, 0x24, 0xa7, 0x2b, + 0xa2, 0x2e, 0xb9, 0x35, 0xbc, 0x30, 0xb3, 0x3f, 0xb6, 0x3a, + 0x85, 0x09, 0x80, 0x0c, 0x8f, 0x03, 0x8a, 0x06, 0x91, 0x1d, + 0x94, 0x18, 0x9b, 0x17, 0x9e, 0x12, 0x00, 0x8d, 0x07, 0x8a, + 0x0e, 0x83, 0x09, 0x84, 0x1c, 0x91, 0x1b, 0x96, 0x12, 0x9f, + 0x15, 0x98, 0x38, 0xb5, 0x3f, 0xb2, 0x36, 0xbb, 0x31, 0xbc, + 0x24, 0xa9, 0x23, 0xae, 0x2a, 0xa7, 0x2d, 0xa0, 0x70, 0xfd, + 0x77, 0xfa, 0x7e, 0xf3, 0x79, 0xf4, 0x6c, 0xe1, 0x6b, 0xe6, + 0x62, 0xef, 0x65, 0xe8, 0x48, 0xc5, 0x4f, 0xc2, 0x46, 0xcb, + 0x41, 0xcc, 0x54, 0xd9, 0x53, 0xde, 0x5a, 0xd7, 0x5d, 0xd0, + 0xe0, 0x6d, 0xe7, 0x6a, 0xee, 0x63, 0xe9, 0x64, 0xfc, 0x71, + 0xfb, 0x76, 0xf2, 0x7f, 0xf5, 0x78, 0xd8, 0x55, 0xdf, 0x52, + 0xd6, 0x5b, 0xd1, 0x5c, 0xc4, 0x49, 0xc3, 0x4e, 0xca, 0x47, + 0xcd, 0x40, 0x90, 0x1d, 0x97, 0x1a, 0x9e, 0x13, 0x99, 0x14, + 0x8c, 0x01, 0x8b, 0x06, 0x82, 0x0f, 0x85, 0x08, 0xa8, 0x25, + 0xaf, 0x22, 0xa6, 0x2b, 0xa1, 0x2c, 0xb4, 0x39, 0xb3, 0x3e, + 0xba, 0x37, 0xbd, 0x30, 0xdd, 0x50, 0xda, 0x57, 0xd3, 0x5e, + 0xd4, 0x59, 0xc1, 0x4c, 0xc6, 0x4b, 0xcf, 0x42, 0xc8, 0x45, + 0xe5, 0x68, 0xe2, 0x6f, 0xeb, 0x66, 0xec, 0x61, 0xf9, 0x74, + 0xfe, 0x73, 0xf7, 0x7a, 0xf0, 0x7d, 0xad, 0x20, 0xaa, 0x27, + 0xa3, 0x2e, 0xa4, 0x29, 0xb1, 0x3c, 0xb6, 0x3b, 0xbf, 0x32, + 0xb8, 0x35, 0x95, 0x18, 0x92, 0x1f, 0x9b, 0x16, 0x9c, 0x11, + 0x89, 0x04, 0x8e, 0x03, 0x87, 0x0a, 0x80, 0x0d, 0x3d, 0xb0, + 0x3a, 0xb7, 0x33, 0xbe, 0x34, 0xb9, 0x21, 0xac, 0x26, 0xab, + 0x2f, 0xa2, 0x28, 0xa5, 0x05, 0x88, 0x02, 0x8f, 0x0b, 0x86, + 0x0c, 0x81, 0x19, 0x94, 0x1e, 0x93, 0x17, 0x9a, 0x10, 0x9d, + 0x4d, 0xc0, 0x4a, 0xc7, 0x43, 0xce, 0x44, 0xc9, 0x51, 0xdc, + 0x56, 0xdb, 0x5f, 0xd2, 0x58, 0xd5, 0x75, 0xf8, 0x72, 0xff, + 0x7b, 0xf6, 0x7c, 0xf1, 0x69, 0xe4, 0x6e, 0xe3, 0x67, 0xea, + 0x60, 0xed, 0x00, 0x8e, 0x01, 0x8f, 0x02, 0x8c, 0x03, 0x8d, + 0x04, 0x8a, 0x05, 0x8b, 0x06, 0x88, 0x07, 0x89, 0x08, 0x86, + 0x09, 0x87, 0x0a, 0x84, 0x0b, 0x85, 0x0c, 0x82, 0x0d, 0x83, + 0x0e, 0x80, 0x0f, 0x81, 0x10, 0x9e, 0x11, 0x9f, 0x12, 0x9c, + 0x13, 0x9d, 0x14, 0x9a, 0x15, 0x9b, 0x16, 0x98, 0x17, 0x99, + 0x18, 0x96, 0x19, 0x97, 0x1a, 0x94, 0x1b, 0x95, 0x1c, 0x92, + 0x1d, 0x93, 0x1e, 0x90, 0x1f, 0x91, 0x20, 0xae, 0x21, 0xaf, + 0x22, 0xac, 0x23, 0xad, 0x24, 0xaa, 0x25, 0xab, 0x26, 0xa8, + 0x27, 0xa9, 0x28, 0xa6, 0x29, 0xa7, 0x2a, 0xa4, 0x2b, 0xa5, + 0x2c, 0xa2, 0x2d, 0xa3, 0x2e, 0xa0, 0x2f, 0xa1, 0x30, 0xbe, + 0x31, 0xbf, 0x32, 0xbc, 0x33, 0xbd, 0x34, 0xba, 0x35, 0xbb, + 0x36, 0xb8, 0x37, 0xb9, 0x38, 0xb6, 0x39, 0xb7, 0x3a, 0xb4, + 0x3b, 0xb5, 0x3c, 0xb2, 0x3d, 0xb3, 0x3e, 0xb0, 0x3f, 0xb1, + 0x40, 0xce, 0x41, 0xcf, 0x42, 0xcc, 0x43, 0xcd, 0x44, 0xca, + 0x45, 0xcb, 0x46, 0xc8, 0x47, 0xc9, 0x48, 0xc6, 0x49, 0xc7, + 0x4a, 0xc4, 0x4b, 0xc5, 0x4c, 0xc2, 0x4d, 0xc3, 0x4e, 0xc0, + 0x4f, 0xc1, 0x50, 0xde, 0x51, 0xdf, 0x52, 0xdc, 0x53, 0xdd, + 0x54, 0xda, 0x55, 0xdb, 0x56, 0xd8, 0x57, 0xd9, 0x58, 0xd6, + 0x59, 0xd7, 0x5a, 0xd4, 0x5b, 0xd5, 0x5c, 0xd2, 0x5d, 0xd3, + 0x5e, 0xd0, 0x5f, 0xd1, 0x60, 0xee, 0x61, 0xef, 0x62, 0xec, + 0x63, 0xed, 0x64, 0xea, 0x65, 0xeb, 0x66, 0xe8, 0x67, 0xe9, + 0x68, 0xe6, 0x69, 0xe7, 0x6a, 0xe4, 0x6b, 0xe5, 0x6c, 0xe2, + 0x6d, 0xe3, 0x6e, 0xe0, 0x6f, 0xe1, 0x70, 0xfe, 0x71, 0xff, + 0x72, 0xfc, 0x73, 0xfd, 0x74, 0xfa, 0x75, 0xfb, 0x76, 0xf8, + 0x77, 0xf9, 0x78, 0xf6, 0x79, 0xf7, 0x7a, 0xf4, 0x7b, 0xf5, + 0x7c, 0xf2, 0x7d, 0xf3, 0x7e, 0xf0, 0x7f, 0xf1, 0x00, 0x8f, + 0x03, 0x8c, 0x06, 0x89, 0x05, 0x8a, 0x0c, 0x83, 0x0f, 0x80, + 0x0a, 0x85, 0x09, 0x86, 0x18, 0x97, 0x1b, 0x94, 0x1e, 0x91, + 0x1d, 0x92, 0x14, 0x9b, 0x17, 0x98, 0x12, 0x9d, 0x11, 0x9e, + 0x30, 0xbf, 0x33, 0xbc, 0x36, 0xb9, 0x35, 0xba, 0x3c, 0xb3, + 0x3f, 0xb0, 0x3a, 0xb5, 0x39, 0xb6, 0x28, 0xa7, 0x2b, 0xa4, + 0x2e, 0xa1, 0x2d, 0xa2, 0x24, 0xab, 0x27, 0xa8, 0x22, 0xad, + 0x21, 0xae, 0x60, 0xef, 0x63, 0xec, 0x66, 0xe9, 0x65, 0xea, + 0x6c, 0xe3, 0x6f, 0xe0, 0x6a, 0xe5, 0x69, 0xe6, 0x78, 0xf7, + 0x7b, 0xf4, 0x7e, 0xf1, 0x7d, 0xf2, 0x74, 0xfb, 0x77, 0xf8, + 0x72, 0xfd, 0x71, 0xfe, 0x50, 0xdf, 0x53, 0xdc, 0x56, 0xd9, + 0x55, 0xda, 0x5c, 0xd3, 0x5f, 0xd0, 0x5a, 0xd5, 0x59, 0xd6, + 0x48, 0xc7, 0x4b, 0xc4, 0x4e, 0xc1, 0x4d, 0xc2, 0x44, 0xcb, + 0x47, 0xc8, 0x42, 0xcd, 0x41, 0xce, 0xc0, 0x4f, 0xc3, 0x4c, + 0xc6, 0x49, 0xc5, 0x4a, 0xcc, 0x43, 0xcf, 0x40, 0xca, 0x45, + 0xc9, 0x46, 0xd8, 0x57, 0xdb, 0x54, 0xde, 0x51, 0xdd, 0x52, + 0xd4, 0x5b, 0xd7, 0x58, 0xd2, 0x5d, 0xd1, 0x5e, 0xf0, 0x7f, + 0xf3, 0x7c, 0xf6, 0x79, 0xf5, 0x7a, 0xfc, 0x73, 0xff, 0x70, + 0xfa, 0x75, 0xf9, 0x76, 0xe8, 0x67, 0xeb, 0x64, 0xee, 0x61, + 0xed, 0x62, 0xe4, 0x6b, 0xe7, 0x68, 0xe2, 0x6d, 0xe1, 0x6e, + 0xa0, 0x2f, 0xa3, 0x2c, 0xa6, 0x29, 0xa5, 0x2a, 0xac, 0x23, + 0xaf, 0x20, 0xaa, 0x25, 0xa9, 0x26, 0xb8, 0x37, 0xbb, 0x34, + 0xbe, 0x31, 0xbd, 0x32, 0xb4, 0x3b, 0xb7, 0x38, 0xb2, 0x3d, + 0xb1, 0x3e, 0x90, 0x1f, 0x93, 0x1c, 0x96, 0x19, 0x95, 0x1a, + 0x9c, 0x13, 0x9f, 0x10, 0x9a, 0x15, 0x99, 0x16, 0x88, 0x07, + 0x8b, 0x04, 0x8e, 0x01, 0x8d, 0x02, 0x84, 0x0b, 0x87, 0x08, + 0x82, 0x0d, 0x81, 0x0e, 0x00, 0x90, 0x3d, 0xad, 0x7a, 0xea, + 0x47, 0xd7, 0xf4, 0x64, 0xc9, 0x59, 0x8e, 0x1e, 0xb3, 0x23, + 0xf5, 0x65, 0xc8, 0x58, 0x8f, 0x1f, 0xb2, 0x22, 0x01, 0x91, + 0x3c, 0xac, 0x7b, 0xeb, 0x46, 0xd6, 0xf7, 0x67, 0xca, 0x5a, + 0x8d, 0x1d, 0xb0, 0x20, 0x03, 0x93, 0x3e, 0xae, 0x79, 0xe9, + 0x44, 0xd4, 0x02, 0x92, 0x3f, 0xaf, 0x78, 0xe8, 0x45, 0xd5, + 0xf6, 0x66, 0xcb, 0x5b, 0x8c, 0x1c, 0xb1, 0x21, 0xf3, 0x63, + 0xce, 0x5e, 0x89, 0x19, 0xb4, 0x24, 0x07, 0x97, 0x3a, 0xaa, + 0x7d, 0xed, 0x40, 0xd0, 0x06, 0x96, 0x3b, 0xab, 0x7c, 0xec, + 0x41, 0xd1, 0xf2, 0x62, 0xcf, 0x5f, 0x88, 0x18, 0xb5, 0x25, + 0x04, 0x94, 0x39, 0xa9, 0x7e, 0xee, 0x43, 0xd3, 0xf0, 0x60, + 0xcd, 0x5d, 0x8a, 0x1a, 0xb7, 0x27, 0xf1, 0x61, 0xcc, 0x5c, + 0x8b, 0x1b, 0xb6, 0x26, 0x05, 0x95, 0x38, 0xa8, 0x7f, 0xef, + 0x42, 0xd2, 0xfb, 0x6b, 0xc6, 0x56, 0x81, 0x11, 0xbc, 0x2c, + 0x0f, 0x9f, 0x32, 0xa2, 0x75, 0xe5, 0x48, 0xd8, 0x0e, 0x9e, + 0x33, 0xa3, 0x74, 0xe4, 0x49, 0xd9, 0xfa, 0x6a, 0xc7, 0x57, + 0x80, 0x10, 0xbd, 0x2d, 0x0c, 0x9c, 0x31, 0xa1, 0x76, 0xe6, + 0x4b, 0xdb, 0xf8, 0x68, 0xc5, 0x55, 0x82, 0x12, 0xbf, 0x2f, + 0xf9, 0x69, 0xc4, 0x54, 0x83, 0x13, 0xbe, 0x2e, 0x0d, 0x9d, + 0x30, 0xa0, 0x77, 0xe7, 0x4a, 0xda, 0x08, 0x98, 0x35, 0xa5, + 0x72, 0xe2, 0x4f, 0xdf, 0xfc, 0x6c, 0xc1, 0x51, 0x86, 0x16, + 0xbb, 0x2b, 0xfd, 0x6d, 0xc0, 0x50, 0x87, 0x17, 0xba, 0x2a, + 0x09, 0x99, 0x34, 0xa4, 0x73, 0xe3, 0x4e, 0xde, 0xff, 0x6f, + 0xc2, 0x52, 0x85, 0x15, 0xb8, 0x28, 0x0b, 0x9b, 0x36, 0xa6, + 0x71, 0xe1, 0x4c, 0xdc, 0x0a, 0x9a, 0x37, 0xa7, 0x70, 0xe0, + 0x4d, 0xdd, 0xfe, 0x6e, 0xc3, 0x53, 0x84, 0x14, 0xb9, 0x29, + 0x00, 0x91, 0x3f, 0xae, 0x7e, 0xef, 0x41, 0xd0, 0xfc, 0x6d, + 0xc3, 0x52, 0x82, 0x13, 0xbd, 0x2c, 0xe5, 0x74, 0xda, 0x4b, + 0x9b, 0x0a, 0xa4, 0x35, 0x19, 0x88, 0x26, 0xb7, 0x67, 0xf6, + 0x58, 0xc9, 0xd7, 0x46, 0xe8, 0x79, 0xa9, 0x38, 0x96, 0x07, + 0x2b, 0xba, 0x14, 0x85, 0x55, 0xc4, 0x6a, 0xfb, 0x32, 0xa3, + 0x0d, 0x9c, 0x4c, 0xdd, 0x73, 0xe2, 0xce, 0x5f, 0xf1, 0x60, + 0xb0, 0x21, 0x8f, 0x1e, 0xb3, 0x22, 0x8c, 0x1d, 0xcd, 0x5c, + 0xf2, 0x63, 0x4f, 0xde, 0x70, 0xe1, 0x31, 0xa0, 0x0e, 0x9f, + 0x56, 0xc7, 0x69, 0xf8, 0x28, 0xb9, 0x17, 0x86, 0xaa, 0x3b, + 0x95, 0x04, 0xd4, 0x45, 0xeb, 0x7a, 0x64, 0xf5, 0x5b, 0xca, + 0x1a, 0x8b, 0x25, 0xb4, 0x98, 0x09, 0xa7, 0x36, 0xe6, 0x77, + 0xd9, 0x48, 0x81, 0x10, 0xbe, 0x2f, 0xff, 0x6e, 0xc0, 0x51, + 0x7d, 0xec, 0x42, 0xd3, 0x03, 0x92, 0x3c, 0xad, 0x7b, 0xea, + 0x44, 0xd5, 0x05, 0x94, 0x3a, 0xab, 0x87, 0x16, 0xb8, 0x29, + 0xf9, 0x68, 0xc6, 0x57, 0x9e, 0x0f, 0xa1, 0x30, 0xe0, 0x71, + 0xdf, 0x4e, 0x62, 0xf3, 0x5d, 0xcc, 0x1c, 0x8d, 0x23, 0xb2, + 0xac, 0x3d, 0x93, 0x02, 0xd2, 0x43, 0xed, 0x7c, 0x50, 0xc1, + 0x6f, 0xfe, 0x2e, 0xbf, 0x11, 0x80, 0x49, 0xd8, 0x76, 0xe7, + 0x37, 0xa6, 0x08, 0x99, 0xb5, 0x24, 0x8a, 0x1b, 0xcb, 0x5a, + 0xf4, 0x65, 0xc8, 0x59, 0xf7, 0x66, 0xb6, 0x27, 0x89, 0x18, + 0x34, 0xa5, 0x0b, 0x9a, 0x4a, 0xdb, 0x75, 0xe4, 0x2d, 0xbc, + 0x12, 0x83, 0x53, 0xc2, 0x6c, 0xfd, 0xd1, 0x40, 0xee, 0x7f, + 0xaf, 0x3e, 0x90, 0x01, 0x1f, 0x8e, 0x20, 0xb1, 0x61, 0xf0, + 0x5e, 0xcf, 0xe3, 0x72, 0xdc, 0x4d, 0x9d, 0x0c, 0xa2, 0x33, + 0xfa, 0x6b, 0xc5, 0x54, 0x84, 0x15, 0xbb, 0x2a, 0x06, 0x97, + 0x39, 0xa8, 0x78, 0xe9, 0x47, 0xd6, 0x00, 0x92, 0x39, 0xab, + 0x72, 0xe0, 0x4b, 0xd9, 0xe4, 0x76, 0xdd, 0x4f, 0x96, 0x04, + 0xaf, 0x3d, 0xd5, 0x47, 0xec, 0x7e, 0xa7, 0x35, 0x9e, 0x0c, + 0x31, 0xa3, 0x08, 0x9a, 0x43, 0xd1, 0x7a, 0xe8, 0xb7, 0x25, + 0x8e, 0x1c, 0xc5, 0x57, 0xfc, 0x6e, 0x53, 0xc1, 0x6a, 0xf8, + 0x21, 0xb3, 0x18, 0x8a, 0x62, 0xf0, 0x5b, 0xc9, 0x10, 0x82, + 0x29, 0xbb, 0x86, 0x14, 0xbf, 0x2d, 0xf4, 0x66, 0xcd, 0x5f, + 0x73, 0xe1, 0x4a, 0xd8, 0x01, 0x93, 0x38, 0xaa, 0x97, 0x05, + 0xae, 0x3c, 0xe5, 0x77, 0xdc, 0x4e, 0xa6, 0x34, 0x9f, 0x0d, + 0xd4, 0x46, 0xed, 0x7f, 0x42, 0xd0, 0x7b, 0xe9, 0x30, 0xa2, + 0x09, 0x9b, 0xc4, 0x56, 0xfd, 0x6f, 0xb6, 0x24, 0x8f, 0x1d, + 0x20, 0xb2, 0x19, 0x8b, 0x52, 0xc0, 0x6b, 0xf9, 0x11, 0x83, + 0x28, 0xba, 0x63, 0xf1, 0x5a, 0xc8, 0xf5, 0x67, 0xcc, 0x5e, + 0x87, 0x15, 0xbe, 0x2c, 0xe6, 0x74, 0xdf, 0x4d, 0x94, 0x06, + 0xad, 0x3f, 0x02, 0x90, 0x3b, 0xa9, 0x70, 0xe2, 0x49, 0xdb, + 0x33, 0xa1, 0x0a, 0x98, 0x41, 0xd3, 0x78, 0xea, 0xd7, 0x45, + 0xee, 0x7c, 0xa5, 0x37, 0x9c, 0x0e, 0x51, 0xc3, 0x68, 0xfa, + 0x23, 0xb1, 0x1a, 0x88, 0xb5, 0x27, 0x8c, 0x1e, 0xc7, 0x55, + 0xfe, 0x6c, 0x84, 0x16, 0xbd, 0x2f, 0xf6, 0x64, 0xcf, 0x5d, + 0x60, 0xf2, 0x59, 0xcb, 0x12, 0x80, 0x2b, 0xb9, 0x95, 0x07, + 0xac, 0x3e, 0xe7, 0x75, 0xde, 0x4c, 0x71, 0xe3, 0x48, 0xda, + 0x03, 0x91, 0x3a, 0xa8, 0x40, 0xd2, 0x79, 0xeb, 0x32, 0xa0, + 0x0b, 0x99, 0xa4, 0x36, 0x9d, 0x0f, 0xd6, 0x44, 0xef, 0x7d, + 0x22, 0xb0, 0x1b, 0x89, 0x50, 0xc2, 0x69, 0xfb, 0xc6, 0x54, + 0xff, 0x6d, 0xb4, 0x26, 0x8d, 0x1f, 0xf7, 0x65, 0xce, 0x5c, + 0x85, 0x17, 0xbc, 0x2e, 0x13, 0x81, 0x2a, 0xb8, 0x61, 0xf3, + 0x58, 0xca, 0x00, 0x93, 0x3b, 0xa8, 0x76, 0xe5, 0x4d, 0xde, + 0xec, 0x7f, 0xd7, 0x44, 0x9a, 0x09, 0xa1, 0x32, 0xc5, 0x56, + 0xfe, 0x6d, 0xb3, 0x20, 0x88, 0x1b, 0x29, 0xba, 0x12, 0x81, + 0x5f, 0xcc, 0x64, 0xf7, 0x97, 0x04, 0xac, 0x3f, 0xe1, 0x72, + 0xda, 0x49, 0x7b, 0xe8, 0x40, 0xd3, 0x0d, 0x9e, 0x36, 0xa5, + 0x52, 0xc1, 0x69, 0xfa, 0x24, 0xb7, 0x1f, 0x8c, 0xbe, 0x2d, + 0x85, 0x16, 0xc8, 0x5b, 0xf3, 0x60, 0x33, 0xa0, 0x08, 0x9b, + 0x45, 0xd6, 0x7e, 0xed, 0xdf, 0x4c, 0xe4, 0x77, 0xa9, 0x3a, + 0x92, 0x01, 0xf6, 0x65, 0xcd, 0x5e, 0x80, 0x13, 0xbb, 0x28, + 0x1a, 0x89, 0x21, 0xb2, 0x6c, 0xff, 0x57, 0xc4, 0xa4, 0x37, + 0x9f, 0x0c, 0xd2, 0x41, 0xe9, 0x7a, 0x48, 0xdb, 0x73, 0xe0, + 0x3e, 0xad, 0x05, 0x96, 0x61, 0xf2, 0x5a, 0xc9, 0x17, 0x84, + 0x2c, 0xbf, 0x8d, 0x1e, 0xb6, 0x25, 0xfb, 0x68, 0xc0, 0x53, + 0x66, 0xf5, 0x5d, 0xce, 0x10, 0x83, 0x2b, 0xb8, 0x8a, 0x19, + 0xb1, 0x22, 0xfc, 0x6f, 0xc7, 0x54, 0xa3, 0x30, 0x98, 0x0b, + 0xd5, 0x46, 0xee, 0x7d, 0x4f, 0xdc, 0x74, 0xe7, 0x39, 0xaa, + 0x02, 0x91, 0xf1, 0x62, 0xca, 0x59, 0x87, 0x14, 0xbc, 0x2f, + 0x1d, 0x8e, 0x26, 0xb5, 0x6b, 0xf8, 0x50, 0xc3, 0x34, 0xa7, + 0x0f, 0x9c, 0x42, 0xd1, 0x79, 0xea, 0xd8, 0x4b, 0xe3, 0x70, + 0xae, 0x3d, 0x95, 0x06, 0x55, 0xc6, 0x6e, 0xfd, 0x23, 0xb0, + 0x18, 0x8b, 0xb9, 0x2a, 0x82, 0x11, 0xcf, 0x5c, 0xf4, 0x67, + 0x90, 0x03, 0xab, 0x38, 0xe6, 0x75, 0xdd, 0x4e, 0x7c, 0xef, + 0x47, 0xd4, 0x0a, 0x99, 0x31, 0xa2, 0xc2, 0x51, 0xf9, 0x6a, + 0xb4, 0x27, 0x8f, 0x1c, 0x2e, 0xbd, 0x15, 0x86, 0x58, 0xcb, + 0x63, 0xf0, 0x07, 0x94, 0x3c, 0xaf, 0x71, 0xe2, 0x4a, 0xd9, + 0xeb, 0x78, 0xd0, 0x43, 0x9d, 0x0e, 0xa6, 0x35, 0x00, 0x94, + 0x35, 0xa1, 0x6a, 0xfe, 0x5f, 0xcb, 0xd4, 0x40, 0xe1, 0x75, + 0xbe, 0x2a, 0x8b, 0x1f, 0xb5, 0x21, 0x80, 0x14, 0xdf, 0x4b, + 0xea, 0x7e, 0x61, 0xf5, 0x54, 0xc0, 0x0b, 0x9f, 0x3e, 0xaa, + 0x77, 0xe3, 0x42, 0xd6, 0x1d, 0x89, 0x28, 0xbc, 0xa3, 0x37, + 0x96, 0x02, 0xc9, 0x5d, 0xfc, 0x68, 0xc2, 0x56, 0xf7, 0x63, + 0xa8, 0x3c, 0x9d, 0x09, 0x16, 0x82, 0x23, 0xb7, 0x7c, 0xe8, + 0x49, 0xdd, 0xee, 0x7a, 0xdb, 0x4f, 0x84, 0x10, 0xb1, 0x25, + 0x3a, 0xae, 0x0f, 0x9b, 0x50, 0xc4, 0x65, 0xf1, 0x5b, 0xcf, + 0x6e, 0xfa, 0x31, 0xa5, 0x04, 0x90, 0x8f, 0x1b, 0xba, 0x2e, + 0xe5, 0x71, 0xd0, 0x44, 0x99, 0x0d, 0xac, 0x38, 0xf3, 0x67, + 0xc6, 0x52, 0x4d, 0xd9, 0x78, 0xec, 0x27, 0xb3, 0x12, 0x86, + 0x2c, 0xb8, 0x19, 0x8d, 0x46, 0xd2, 0x73, 0xe7, 0xf8, 0x6c, + 0xcd, 0x59, 0x92, 0x06, 0xa7, 0x33, 0xc1, 0x55, 0xf4, 0x60, + 0xab, 0x3f, 0x9e, 0x0a, 0x15, 0x81, 0x20, 0xb4, 0x7f, 0xeb, + 0x4a, 0xde, 0x74, 0xe0, 0x41, 0xd5, 0x1e, 0x8a, 0x2b, 0xbf, + 0xa0, 0x34, 0x95, 0x01, 0xca, 0x5e, 0xff, 0x6b, 0xb6, 0x22, + 0x83, 0x17, 0xdc, 0x48, 0xe9, 0x7d, 0x62, 0xf6, 0x57, 0xc3, + 0x08, 0x9c, 0x3d, 0xa9, 0x03, 0x97, 0x36, 0xa2, 0x69, 0xfd, + 0x5c, 0xc8, 0xd7, 0x43, 0xe2, 0x76, 0xbd, 0x29, 0x88, 0x1c, + 0x2f, 0xbb, 0x1a, 0x8e, 0x45, 0xd1, 0x70, 0xe4, 0xfb, 0x6f, + 0xce, 0x5a, 0x91, 0x05, 0xa4, 0x30, 0x9a, 0x0e, 0xaf, 0x3b, + 0xf0, 0x64, 0xc5, 0x51, 0x4e, 0xda, 0x7b, 0xef, 0x24, 0xb0, + 0x11, 0x85, 0x58, 0xcc, 0x6d, 0xf9, 0x32, 0xa6, 0x07, 0x93, + 0x8c, 0x18, 0xb9, 0x2d, 0xe6, 0x72, 0xd3, 0x47, 0xed, 0x79, + 0xd8, 0x4c, 0x87, 0x13, 0xb2, 0x26, 0x39, 0xad, 0x0c, 0x98, + 0x53, 0xc7, 0x66, 0xf2, 0x00, 0x95, 0x37, 0xa2, 0x6e, 0xfb, + 0x59, 0xcc, 0xdc, 0x49, 0xeb, 0x7e, 0xb2, 0x27, 0x85, 0x10, + 0xa5, 0x30, 0x92, 0x07, 0xcb, 0x5e, 0xfc, 0x69, 0x79, 0xec, + 0x4e, 0xdb, 0x17, 0x82, 0x20, 0xb5, 0x57, 0xc2, 0x60, 0xf5, + 0x39, 0xac, 0x0e, 0x9b, 0x8b, 0x1e, 0xbc, 0x29, 0xe5, 0x70, + 0xd2, 0x47, 0xf2, 0x67, 0xc5, 0x50, 0x9c, 0x09, 0xab, 0x3e, + 0x2e, 0xbb, 0x19, 0x8c, 0x40, 0xd5, 0x77, 0xe2, 0xae, 0x3b, + 0x99, 0x0c, 0xc0, 0x55, 0xf7, 0x62, 0x72, 0xe7, 0x45, 0xd0, + 0x1c, 0x89, 0x2b, 0xbe, 0x0b, 0x9e, 0x3c, 0xa9, 0x65, 0xf0, + 0x52, 0xc7, 0xd7, 0x42, 0xe0, 0x75, 0xb9, 0x2c, 0x8e, 0x1b, + 0xf9, 0x6c, 0xce, 0x5b, 0x97, 0x02, 0xa0, 0x35, 0x25, 0xb0, + 0x12, 0x87, 0x4b, 0xde, 0x7c, 0xe9, 0x5c, 0xc9, 0x6b, 0xfe, + 0x32, 0xa7, 0x05, 0x90, 0x80, 0x15, 0xb7, 0x22, 0xee, 0x7b, + 0xd9, 0x4c, 0x41, 0xd4, 0x76, 0xe3, 0x2f, 0xba, 0x18, 0x8d, + 0x9d, 0x08, 0xaa, 0x3f, 0xf3, 0x66, 0xc4, 0x51, 0xe4, 0x71, + 0xd3, 0x46, 0x8a, 0x1f, 0xbd, 0x28, 0x38, 0xad, 0x0f, 0x9a, + 0x56, 0xc3, 0x61, 0xf4, 0x16, 0x83, 0x21, 0xb4, 0x78, 0xed, + 0x4f, 0xda, 0xca, 0x5f, 0xfd, 0x68, 0xa4, 0x31, 0x93, 0x06, + 0xb3, 0x26, 0x84, 0x11, 0xdd, 0x48, 0xea, 0x7f, 0x6f, 0xfa, + 0x58, 0xcd, 0x01, 0x94, 0x36, 0xa3, 0xef, 0x7a, 0xd8, 0x4d, + 0x81, 0x14, 0xb6, 0x23, 0x33, 0xa6, 0x04, 0x91, 0x5d, 0xc8, + 0x6a, 0xff, 0x4a, 0xdf, 0x7d, 0xe8, 0x24, 0xb1, 0x13, 0x86, + 0x96, 0x03, 0xa1, 0x34, 0xf8, 0x6d, 0xcf, 0x5a, 0xb8, 0x2d, + 0x8f, 0x1a, 0xd6, 0x43, 0xe1, 0x74, 0x64, 0xf1, 0x53, 0xc6, + 0x0a, 0x9f, 0x3d, 0xa8, 0x1d, 0x88, 0x2a, 0xbf, 0x73, 0xe6, + 0x44, 0xd1, 0xc1, 0x54, 0xf6, 0x63, 0xaf, 0x3a, 0x98, 0x0d, + 0x00, 0x96, 0x31, 0xa7, 0x62, 0xf4, 0x53, 0xc5, 0xc4, 0x52, + 0xf5, 0x63, 0xa6, 0x30, 0x97, 0x01, 0x95, 0x03, 0xa4, 0x32, + 0xf7, 0x61, 0xc6, 0x50, 0x51, 0xc7, 0x60, 0xf6, 0x33, 0xa5, + 0x02, 0x94, 0x37, 0xa1, 0x06, 0x90, 0x55, 0xc3, 0x64, 0xf2, + 0xf3, 0x65, 0xc2, 0x54, 0x91, 0x07, 0xa0, 0x36, 0xa2, 0x34, + 0x93, 0x05, 0xc0, 0x56, 0xf1, 0x67, 0x66, 0xf0, 0x57, 0xc1, + 0x04, 0x92, 0x35, 0xa3, 0x6e, 0xf8, 0x5f, 0xc9, 0x0c, 0x9a, + 0x3d, 0xab, 0xaa, 0x3c, 0x9b, 0x0d, 0xc8, 0x5e, 0xf9, 0x6f, + 0xfb, 0x6d, 0xca, 0x5c, 0x99, 0x0f, 0xa8, 0x3e, 0x3f, 0xa9, + 0x0e, 0x98, 0x5d, 0xcb, 0x6c, 0xfa, 0x59, 0xcf, 0x68, 0xfe, + 0x3b, 0xad, 0x0a, 0x9c, 0x9d, 0x0b, 0xac, 0x3a, 0xff, 0x69, + 0xce, 0x58, 0xcc, 0x5a, 0xfd, 0x6b, 0xae, 0x38, 0x9f, 0x09, + 0x08, 0x9e, 0x39, 0xaf, 0x6a, 0xfc, 0x5b, 0xcd, 0xdc, 0x4a, + 0xed, 0x7b, 0xbe, 0x28, 0x8f, 0x19, 0x18, 0x8e, 0x29, 0xbf, + 0x7a, 0xec, 0x4b, 0xdd, 0x49, 0xdf, 0x78, 0xee, 0x2b, 0xbd, + 0x1a, 0x8c, 0x8d, 0x1b, 0xbc, 0x2a, 0xef, 0x79, 0xde, 0x48, + 0xeb, 0x7d, 0xda, 0x4c, 0x89, 0x1f, 0xb8, 0x2e, 0x2f, 0xb9, + 0x1e, 0x88, 0x4d, 0xdb, 0x7c, 0xea, 0x7e, 0xe8, 0x4f, 0xd9, + 0x1c, 0x8a, 0x2d, 0xbb, 0xba, 0x2c, 0x8b, 0x1d, 0xd8, 0x4e, + 0xe9, 0x7f, 0xb2, 0x24, 0x83, 0x15, 0xd0, 0x46, 0xe1, 0x77, + 0x76, 0xe0, 0x47, 0xd1, 0x14, 0x82, 0x25, 0xb3, 0x27, 0xb1, + 0x16, 0x80, 0x45, 0xd3, 0x74, 0xe2, 0xe3, 0x75, 0xd2, 0x44, + 0x81, 0x17, 0xb0, 0x26, 0x85, 0x13, 0xb4, 0x22, 0xe7, 0x71, + 0xd6, 0x40, 0x41, 0xd7, 0x70, 0xe6, 0x23, 0xb5, 0x12, 0x84, + 0x10, 0x86, 0x21, 0xb7, 0x72, 0xe4, 0x43, 0xd5, 0xd4, 0x42, + 0xe5, 0x73, 0xb6, 0x20, 0x87, 0x11, 0x00, 0x97, 0x33, 0xa4, + 0x66, 0xf1, 0x55, 0xc2, 0xcc, 0x5b, 0xff, 0x68, 0xaa, 0x3d, + 0x99, 0x0e, 0x85, 0x12, 0xb6, 0x21, 0xe3, 0x74, 0xd0, 0x47, + 0x49, 0xde, 0x7a, 0xed, 0x2f, 0xb8, 0x1c, 0x8b, 0x17, 0x80, + 0x24, 0xb3, 0x71, 0xe6, 0x42, 0xd5, 0xdb, 0x4c, 0xe8, 0x7f, + 0xbd, 0x2a, 0x8e, 0x19, 0x92, 0x05, 0xa1, 0x36, 0xf4, 0x63, + 0xc7, 0x50, 0x5e, 0xc9, 0x6d, 0xfa, 0x38, 0xaf, 0x0b, 0x9c, + 0x2e, 0xb9, 0x1d, 0x8a, 0x48, 0xdf, 0x7b, 0xec, 0xe2, 0x75, + 0xd1, 0x46, 0x84, 0x13, 0xb7, 0x20, 0xab, 0x3c, 0x98, 0x0f, + 0xcd, 0x5a, 0xfe, 0x69, 0x67, 0xf0, 0x54, 0xc3, 0x01, 0x96, + 0x32, 0xa5, 0x39, 0xae, 0x0a, 0x9d, 0x5f, 0xc8, 0x6c, 0xfb, + 0xf5, 0x62, 0xc6, 0x51, 0x93, 0x04, 0xa0, 0x37, 0xbc, 0x2b, + 0x8f, 0x18, 0xda, 0x4d, 0xe9, 0x7e, 0x70, 0xe7, 0x43, 0xd4, + 0x16, 0x81, 0x25, 0xb2, 0x5c, 0xcb, 0x6f, 0xf8, 0x3a, 0xad, + 0x09, 0x9e, 0x90, 0x07, 0xa3, 0x34, 0xf6, 0x61, 0xc5, 0x52, + 0xd9, 0x4e, 0xea, 0x7d, 0xbf, 0x28, 0x8c, 0x1b, 0x15, 0x82, + 0x26, 0xb1, 0x73, 0xe4, 0x40, 0xd7, 0x4b, 0xdc, 0x78, 0xef, + 0x2d, 0xba, 0x1e, 0x89, 0x87, 0x10, 0xb4, 0x23, 0xe1, 0x76, + 0xd2, 0x45, 0xce, 0x59, 0xfd, 0x6a, 0xa8, 0x3f, 0x9b, 0x0c, + 0x02, 0x95, 0x31, 0xa6, 0x64, 0xf3, 0x57, 0xc0, 0x72, 0xe5, + 0x41, 0xd6, 0x14, 0x83, 0x27, 0xb0, 0xbe, 0x29, 0x8d, 0x1a, + 0xd8, 0x4f, 0xeb, 0x7c, 0xf7, 0x60, 0xc4, 0x53, 0x91, 0x06, + 0xa2, 0x35, 0x3b, 0xac, 0x08, 0x9f, 0x5d, 0xca, 0x6e, 0xf9, + 0x65, 0xf2, 0x56, 0xc1, 0x03, 0x94, 0x30, 0xa7, 0xa9, 0x3e, + 0x9a, 0x0d, 0xcf, 0x58, 0xfc, 0x6b, 0xe0, 0x77, 0xd3, 0x44, + 0x86, 0x11, 0xb5, 0x22, 0x2c, 0xbb, 0x1f, 0x88, 0x4a, 0xdd, + 0x79, 0xee, 0x00, 0x98, 0x2d, 0xb5, 0x5a, 0xc2, 0x77, 0xef, + 0xb4, 0x2c, 0x99, 0x01, 0xee, 0x76, 0xc3, 0x5b, 0x75, 0xed, + 0x58, 0xc0, 0x2f, 0xb7, 0x02, 0x9a, 0xc1, 0x59, 0xec, 0x74, + 0x9b, 0x03, 0xb6, 0x2e, 0xea, 0x72, 0xc7, 0x5f, 0xb0, 0x28, + 0x9d, 0x05, 0x5e, 0xc6, 0x73, 0xeb, 0x04, 0x9c, 0x29, 0xb1, + 0x9f, 0x07, 0xb2, 0x2a, 0xc5, 0x5d, 0xe8, 0x70, 0x2b, 0xb3, + 0x06, 0x9e, 0x71, 0xe9, 0x5c, 0xc4, 0xc9, 0x51, 0xe4, 0x7c, + 0x93, 0x0b, 0xbe, 0x26, 0x7d, 0xe5, 0x50, 0xc8, 0x27, 0xbf, + 0x0a, 0x92, 0xbc, 0x24, 0x91, 0x09, 0xe6, 0x7e, 0xcb, 0x53, + 0x08, 0x90, 0x25, 0xbd, 0x52, 0xca, 0x7f, 0xe7, 0x23, 0xbb, + 0x0e, 0x96, 0x79, 0xe1, 0x54, 0xcc, 0x97, 0x0f, 0xba, 0x22, + 0xcd, 0x55, 0xe0, 0x78, 0x56, 0xce, 0x7b, 0xe3, 0x0c, 0x94, + 0x21, 0xb9, 0xe2, 0x7a, 0xcf, 0x57, 0xb8, 0x20, 0x95, 0x0d, + 0x8f, 0x17, 0xa2, 0x3a, 0xd5, 0x4d, 0xf8, 0x60, 0x3b, 0xa3, + 0x16, 0x8e, 0x61, 0xf9, 0x4c, 0xd4, 0xfa, 0x62, 0xd7, 0x4f, + 0xa0, 0x38, 0x8d, 0x15, 0x4e, 0xd6, 0x63, 0xfb, 0x14, 0x8c, + 0x39, 0xa1, 0x65, 0xfd, 0x48, 0xd0, 0x3f, 0xa7, 0x12, 0x8a, + 0xd1, 0x49, 0xfc, 0x64, 0x8b, 0x13, 0xa6, 0x3e, 0x10, 0x88, + 0x3d, 0xa5, 0x4a, 0xd2, 0x67, 0xff, 0xa4, 0x3c, 0x89, 0x11, + 0xfe, 0x66, 0xd3, 0x4b, 0x46, 0xde, 0x6b, 0xf3, 0x1c, 0x84, + 0x31, 0xa9, 0xf2, 0x6a, 0xdf, 0x47, 0xa8, 0x30, 0x85, 0x1d, + 0x33, 0xab, 0x1e, 0x86, 0x69, 0xf1, 0x44, 0xdc, 0x87, 0x1f, + 0xaa, 0x32, 0xdd, 0x45, 0xf0, 0x68, 0xac, 0x34, 0x81, 0x19, + 0xf6, 0x6e, 0xdb, 0x43, 0x18, 0x80, 0x35, 0xad, 0x42, 0xda, + 0x6f, 0xf7, 0xd9, 0x41, 0xf4, 0x6c, 0x83, 0x1b, 0xae, 0x36, + 0x6d, 0xf5, 0x40, 0xd8, 0x37, 0xaf, 0x1a, 0x82, 0x00, 0x99, + 0x2f, 0xb6, 0x5e, 0xc7, 0x71, 0xe8, 0xbc, 0x25, 0x93, 0x0a, + 0xe2, 0x7b, 0xcd, 0x54, 0x65, 0xfc, 0x4a, 0xd3, 0x3b, 0xa2, + 0x14, 0x8d, 0xd9, 0x40, 0xf6, 0x6f, 0x87, 0x1e, 0xa8, 0x31, + 0xca, 0x53, 0xe5, 0x7c, 0x94, 0x0d, 0xbb, 0x22, 0x76, 0xef, + 0x59, 0xc0, 0x28, 0xb1, 0x07, 0x9e, 0xaf, 0x36, 0x80, 0x19, + 0xf1, 0x68, 0xde, 0x47, 0x13, 0x8a, 0x3c, 0xa5, 0x4d, 0xd4, + 0x62, 0xfb, 0x89, 0x10, 0xa6, 0x3f, 0xd7, 0x4e, 0xf8, 0x61, + 0x35, 0xac, 0x1a, 0x83, 0x6b, 0xf2, 0x44, 0xdd, 0xec, 0x75, + 0xc3, 0x5a, 0xb2, 0x2b, 0x9d, 0x04, 0x50, 0xc9, 0x7f, 0xe6, + 0x0e, 0x97, 0x21, 0xb8, 0x43, 0xda, 0x6c, 0xf5, 0x1d, 0x84, + 0x32, 0xab, 0xff, 0x66, 0xd0, 0x49, 0xa1, 0x38, 0x8e, 0x17, + 0x26, 0xbf, 0x09, 0x90, 0x78, 0xe1, 0x57, 0xce, 0x9a, 0x03, + 0xb5, 0x2c, 0xc4, 0x5d, 0xeb, 0x72, 0x0f, 0x96, 0x20, 0xb9, + 0x51, 0xc8, 0x7e, 0xe7, 0xb3, 0x2a, 0x9c, 0x05, 0xed, 0x74, + 0xc2, 0x5b, 0x6a, 0xf3, 0x45, 0xdc, 0x34, 0xad, 0x1b, 0x82, + 0xd6, 0x4f, 0xf9, 0x60, 0x88, 0x11, 0xa7, 0x3e, 0xc5, 0x5c, + 0xea, 0x73, 0x9b, 0x02, 0xb4, 0x2d, 0x79, 0xe0, 0x56, 0xcf, + 0x27, 0xbe, 0x08, 0x91, 0xa0, 0x39, 0x8f, 0x16, 0xfe, 0x67, + 0xd1, 0x48, 0x1c, 0x85, 0x33, 0xaa, 0x42, 0xdb, 0x6d, 0xf4, + 0x86, 0x1f, 0xa9, 0x30, 0xd8, 0x41, 0xf7, 0x6e, 0x3a, 0xa3, + 0x15, 0x8c, 0x64, 0xfd, 0x4b, 0xd2, 0xe3, 0x7a, 0xcc, 0x55, + 0xbd, 0x24, 0x92, 0x0b, 0x5f, 0xc6, 0x70, 0xe9, 0x01, 0x98, + 0x2e, 0xb7, 0x4c, 0xd5, 0x63, 0xfa, 0x12, 0x8b, 0x3d, 0xa4, + 0xf0, 0x69, 0xdf, 0x46, 0xae, 0x37, 0x81, 0x18, 0x29, 0xb0, + 0x06, 0x9f, 0x77, 0xee, 0x58, 0xc1, 0x95, 0x0c, 0xba, 0x23, + 0xcb, 0x52, 0xe4, 0x7d, 0x00, 0x9a, 0x29, 0xb3, 0x52, 0xc8, + 0x7b, 0xe1, 0xa4, 0x3e, 0x8d, 0x17, 0xf6, 0x6c, 0xdf, 0x45, + 0x55, 0xcf, 0x7c, 0xe6, 0x07, 0x9d, 0x2e, 0xb4, 0xf1, 0x6b, + 0xd8, 0x42, 0xa3, 0x39, 0x8a, 0x10, 0xaa, 0x30, 0x83, 0x19, + 0xf8, 0x62, 0xd1, 0x4b, 0x0e, 0x94, 0x27, 0xbd, 0x5c, 0xc6, + 0x75, 0xef, 0xff, 0x65, 0xd6, 0x4c, 0xad, 0x37, 0x84, 0x1e, + 0x5b, 0xc1, 0x72, 0xe8, 0x09, 0x93, 0x20, 0xba, 0x49, 0xd3, + 0x60, 0xfa, 0x1b, 0x81, 0x32, 0xa8, 0xed, 0x77, 0xc4, 0x5e, + 0xbf, 0x25, 0x96, 0x0c, 0x1c, 0x86, 0x35, 0xaf, 0x4e, 0xd4, + 0x67, 0xfd, 0xb8, 0x22, 0x91, 0x0b, 0xea, 0x70, 0xc3, 0x59, + 0xe3, 0x79, 0xca, 0x50, 0xb1, 0x2b, 0x98, 0x02, 0x47, 0xdd, + 0x6e, 0xf4, 0x15, 0x8f, 0x3c, 0xa6, 0xb6, 0x2c, 0x9f, 0x05, + 0xe4, 0x7e, 0xcd, 0x57, 0x12, 0x88, 0x3b, 0xa1, 0x40, 0xda, + 0x69, 0xf3, 0x92, 0x08, 0xbb, 0x21, 0xc0, 0x5a, 0xe9, 0x73, + 0x36, 0xac, 0x1f, 0x85, 0x64, 0xfe, 0x4d, 0xd7, 0xc7, 0x5d, + 0xee, 0x74, 0x95, 0x0f, 0xbc, 0x26, 0x63, 0xf9, 0x4a, 0xd0, + 0x31, 0xab, 0x18, 0x82, 0x38, 0xa2, 0x11, 0x8b, 0x6a, 0xf0, + 0x43, 0xd9, 0x9c, 0x06, 0xb5, 0x2f, 0xce, 0x54, 0xe7, 0x7d, + 0x6d, 0xf7, 0x44, 0xde, 0x3f, 0xa5, 0x16, 0x8c, 0xc9, 0x53, + 0xe0, 0x7a, 0x9b, 0x01, 0xb2, 0x28, 0xdb, 0x41, 0xf2, 0x68, + 0x89, 0x13, 0xa0, 0x3a, 0x7f, 0xe5, 0x56, 0xcc, 0x2d, 0xb7, + 0x04, 0x9e, 0x8e, 0x14, 0xa7, 0x3d, 0xdc, 0x46, 0xf5, 0x6f, + 0x2a, 0xb0, 0x03, 0x99, 0x78, 0xe2, 0x51, 0xcb, 0x71, 0xeb, + 0x58, 0xc2, 0x23, 0xb9, 0x0a, 0x90, 0xd5, 0x4f, 0xfc, 0x66, + 0x87, 0x1d, 0xae, 0x34, 0x24, 0xbe, 0x0d, 0x97, 0x76, 0xec, + 0x5f, 0xc5, 0x80, 0x1a, 0xa9, 0x33, 0xd2, 0x48, 0xfb, 0x61, + 0x00, 0x9b, 0x2b, 0xb0, 0x56, 0xcd, 0x7d, 0xe6, 0xac, 0x37, + 0x87, 0x1c, 0xfa, 0x61, 0xd1, 0x4a, 0x45, 0xde, 0x6e, 0xf5, + 0x13, 0x88, 0x38, 0xa3, 0xe9, 0x72, 0xc2, 0x59, 0xbf, 0x24, + 0x94, 0x0f, 0x8a, 0x11, 0xa1, 0x3a, 0xdc, 0x47, 0xf7, 0x6c, + 0x26, 0xbd, 0x0d, 0x96, 0x70, 0xeb, 0x5b, 0xc0, 0xcf, 0x54, + 0xe4, 0x7f, 0x99, 0x02, 0xb2, 0x29, 0x63, 0xf8, 0x48, 0xd3, + 0x35, 0xae, 0x1e, 0x85, 0x09, 0x92, 0x22, 0xb9, 0x5f, 0xc4, + 0x74, 0xef, 0xa5, 0x3e, 0x8e, 0x15, 0xf3, 0x68, 0xd8, 0x43, + 0x4c, 0xd7, 0x67, 0xfc, 0x1a, 0x81, 0x31, 0xaa, 0xe0, 0x7b, + 0xcb, 0x50, 0xb6, 0x2d, 0x9d, 0x06, 0x83, 0x18, 0xa8, 0x33, + 0xd5, 0x4e, 0xfe, 0x65, 0x2f, 0xb4, 0x04, 0x9f, 0x79, 0xe2, + 0x52, 0xc9, 0xc6, 0x5d, 0xed, 0x76, 0x90, 0x0b, 0xbb, 0x20, + 0x6a, 0xf1, 0x41, 0xda, 0x3c, 0xa7, 0x17, 0x8c, 0x12, 0x89, + 0x39, 0xa2, 0x44, 0xdf, 0x6f, 0xf4, 0xbe, 0x25, 0x95, 0x0e, + 0xe8, 0x73, 0xc3, 0x58, 0x57, 0xcc, 0x7c, 0xe7, 0x01, 0x9a, + 0x2a, 0xb1, 0xfb, 0x60, 0xd0, 0x4b, 0xad, 0x36, 0x86, 0x1d, + 0x98, 0x03, 0xb3, 0x28, 0xce, 0x55, 0xe5, 0x7e, 0x34, 0xaf, + 0x1f, 0x84, 0x62, 0xf9, 0x49, 0xd2, 0xdd, 0x46, 0xf6, 0x6d, + 0x8b, 0x10, 0xa0, 0x3b, 0x71, 0xea, 0x5a, 0xc1, 0x27, 0xbc, + 0x0c, 0x97, 0x1b, 0x80, 0x30, 0xab, 0x4d, 0xd6, 0x66, 0xfd, + 0xb7, 0x2c, 0x9c, 0x07, 0xe1, 0x7a, 0xca, 0x51, 0x5e, 0xc5, + 0x75, 0xee, 0x08, 0x93, 0x23, 0xb8, 0xf2, 0x69, 0xd9, 0x42, + 0xa4, 0x3f, 0x8f, 0x14, 0x91, 0x0a, 0xba, 0x21, 0xc7, 0x5c, + 0xec, 0x77, 0x3d, 0xa6, 0x16, 0x8d, 0x6b, 0xf0, 0x40, 0xdb, + 0xd4, 0x4f, 0xff, 0x64, 0x82, 0x19, 0xa9, 0x32, 0x78, 0xe3, + 0x53, 0xc8, 0x2e, 0xb5, 0x05, 0x9e, 0x00, 0x9c, 0x25, 0xb9, + 0x4a, 0xd6, 0x6f, 0xf3, 0x94, 0x08, 0xb1, 0x2d, 0xde, 0x42, + 0xfb, 0x67, 0x35, 0xa9, 0x10, 0x8c, 0x7f, 0xe3, 0x5a, 0xc6, + 0xa1, 0x3d, 0x84, 0x18, 0xeb, 0x77, 0xce, 0x52, 0x6a, 0xf6, + 0x4f, 0xd3, 0x20, 0xbc, 0x05, 0x99, 0xfe, 0x62, 0xdb, 0x47, + 0xb4, 0x28, 0x91, 0x0d, 0x5f, 0xc3, 0x7a, 0xe6, 0x15, 0x89, + 0x30, 0xac, 0xcb, 0x57, 0xee, 0x72, 0x81, 0x1d, 0xa4, 0x38, + 0xd4, 0x48, 0xf1, 0x6d, 0x9e, 0x02, 0xbb, 0x27, 0x40, 0xdc, + 0x65, 0xf9, 0x0a, 0x96, 0x2f, 0xb3, 0xe1, 0x7d, 0xc4, 0x58, + 0xab, 0x37, 0x8e, 0x12, 0x75, 0xe9, 0x50, 0xcc, 0x3f, 0xa3, + 0x1a, 0x86, 0xbe, 0x22, 0x9b, 0x07, 0xf4, 0x68, 0xd1, 0x4d, + 0x2a, 0xb6, 0x0f, 0x93, 0x60, 0xfc, 0x45, 0xd9, 0x8b, 0x17, + 0xae, 0x32, 0xc1, 0x5d, 0xe4, 0x78, 0x1f, 0x83, 0x3a, 0xa6, + 0x55, 0xc9, 0x70, 0xec, 0xb5, 0x29, 0x90, 0x0c, 0xff, 0x63, + 0xda, 0x46, 0x21, 0xbd, 0x04, 0x98, 0x6b, 0xf7, 0x4e, 0xd2, + 0x80, 0x1c, 0xa5, 0x39, 0xca, 0x56, 0xef, 0x73, 0x14, 0x88, + 0x31, 0xad, 0x5e, 0xc2, 0x7b, 0xe7, 0xdf, 0x43, 0xfa, 0x66, + 0x95, 0x09, 0xb0, 0x2c, 0x4b, 0xd7, 0x6e, 0xf2, 0x01, 0x9d, + 0x24, 0xb8, 0xea, 0x76, 0xcf, 0x53, 0xa0, 0x3c, 0x85, 0x19, + 0x7e, 0xe2, 0x5b, 0xc7, 0x34, 0xa8, 0x11, 0x8d, 0x61, 0xfd, + 0x44, 0xd8, 0x2b, 0xb7, 0x0e, 0x92, 0xf5, 0x69, 0xd0, 0x4c, + 0xbf, 0x23, 0x9a, 0x06, 0x54, 0xc8, 0x71, 0xed, 0x1e, 0x82, + 0x3b, 0xa7, 0xc0, 0x5c, 0xe5, 0x79, 0x8a, 0x16, 0xaf, 0x33, + 0x0b, 0x97, 0x2e, 0xb2, 0x41, 0xdd, 0x64, 0xf8, 0x9f, 0x03, + 0xba, 0x26, 0xd5, 0x49, 0xf0, 0x6c, 0x3e, 0xa2, 0x1b, 0x87, + 0x74, 0xe8, 0x51, 0xcd, 0xaa, 0x36, 0x8f, 0x13, 0xe0, 0x7c, + 0xc5, 0x59, 0x00, 0x9d, 0x27, 0xba, 0x4e, 0xd3, 0x69, 0xf4, + 0x9c, 0x01, 0xbb, 0x26, 0xd2, 0x4f, 0xf5, 0x68, 0x25, 0xb8, + 0x02, 0x9f, 0x6b, 0xf6, 0x4c, 0xd1, 0xb9, 0x24, 0x9e, 0x03, + 0xf7, 0x6a, 0xd0, 0x4d, 0x4a, 0xd7, 0x6d, 0xf0, 0x04, 0x99, + 0x23, 0xbe, 0xd6, 0x4b, 0xf1, 0x6c, 0x98, 0x05, 0xbf, 0x22, + 0x6f, 0xf2, 0x48, 0xd5, 0x21, 0xbc, 0x06, 0x9b, 0xf3, 0x6e, + 0xd4, 0x49, 0xbd, 0x20, 0x9a, 0x07, 0x94, 0x09, 0xb3, 0x2e, + 0xda, 0x47, 0xfd, 0x60, 0x08, 0x95, 0x2f, 0xb2, 0x46, 0xdb, + 0x61, 0xfc, 0xb1, 0x2c, 0x96, 0x0b, 0xff, 0x62, 0xd8, 0x45, + 0x2d, 0xb0, 0x0a, 0x97, 0x63, 0xfe, 0x44, 0xd9, 0xde, 0x43, + 0xf9, 0x64, 0x90, 0x0d, 0xb7, 0x2a, 0x42, 0xdf, 0x65, 0xf8, + 0x0c, 0x91, 0x2b, 0xb6, 0xfb, 0x66, 0xdc, 0x41, 0xb5, 0x28, + 0x92, 0x0f, 0x67, 0xfa, 0x40, 0xdd, 0x29, 0xb4, 0x0e, 0x93, + 0x35, 0xa8, 0x12, 0x8f, 0x7b, 0xe6, 0x5c, 0xc1, 0xa9, 0x34, + 0x8e, 0x13, 0xe7, 0x7a, 0xc0, 0x5d, 0x10, 0x8d, 0x37, 0xaa, + 0x5e, 0xc3, 0x79, 0xe4, 0x8c, 0x11, 0xab, 0x36, 0xc2, 0x5f, + 0xe5, 0x78, 0x7f, 0xe2, 0x58, 0xc5, 0x31, 0xac, 0x16, 0x8b, + 0xe3, 0x7e, 0xc4, 0x59, 0xad, 0x30, 0x8a, 0x17, 0x5a, 0xc7, + 0x7d, 0xe0, 0x14, 0x89, 0x33, 0xae, 0xc6, 0x5b, 0xe1, 0x7c, + 0x88, 0x15, 0xaf, 0x32, 0xa1, 0x3c, 0x86, 0x1b, 0xef, 0x72, + 0xc8, 0x55, 0x3d, 0xa0, 0x1a, 0x87, 0x73, 0xee, 0x54, 0xc9, + 0x84, 0x19, 0xa3, 0x3e, 0xca, 0x57, 0xed, 0x70, 0x18, 0x85, + 0x3f, 0xa2, 0x56, 0xcb, 0x71, 0xec, 0xeb, 0x76, 0xcc, 0x51, + 0xa5, 0x38, 0x82, 0x1f, 0x77, 0xea, 0x50, 0xcd, 0x39, 0xa4, + 0x1e, 0x83, 0xce, 0x53, 0xe9, 0x74, 0x80, 0x1d, 0xa7, 0x3a, + 0x52, 0xcf, 0x75, 0xe8, 0x1c, 0x81, 0x3b, 0xa6, 0x00, 0x9e, + 0x21, 0xbf, 0x42, 0xdc, 0x63, 0xfd, 0x84, 0x1a, 0xa5, 0x3b, + 0xc6, 0x58, 0xe7, 0x79, 0x15, 0x8b, 0x34, 0xaa, 0x57, 0xc9, + 0x76, 0xe8, 0x91, 0x0f, 0xb0, 0x2e, 0xd3, 0x4d, 0xf2, 0x6c, + 0x2a, 0xb4, 0x0b, 0x95, 0x68, 0xf6, 0x49, 0xd7, 0xae, 0x30, + 0x8f, 0x11, 0xec, 0x72, 0xcd, 0x53, 0x3f, 0xa1, 0x1e, 0x80, + 0x7d, 0xe3, 0x5c, 0xc2, 0xbb, 0x25, 0x9a, 0x04, 0xf9, 0x67, + 0xd8, 0x46, 0x54, 0xca, 0x75, 0xeb, 0x16, 0x88, 0x37, 0xa9, + 0xd0, 0x4e, 0xf1, 0x6f, 0x92, 0x0c, 0xb3, 0x2d, 0x41, 0xdf, + 0x60, 0xfe, 0x03, 0x9d, 0x22, 0xbc, 0xc5, 0x5b, 0xe4, 0x7a, + 0x87, 0x19, 0xa6, 0x38, 0x7e, 0xe0, 0x5f, 0xc1, 0x3c, 0xa2, + 0x1d, 0x83, 0xfa, 0x64, 0xdb, 0x45, 0xb8, 0x26, 0x99, 0x07, + 0x6b, 0xf5, 0x4a, 0xd4, 0x29, 0xb7, 0x08, 0x96, 0xef, 0x71, + 0xce, 0x50, 0xad, 0x33, 0x8c, 0x12, 0xa8, 0x36, 0x89, 0x17, + 0xea, 0x74, 0xcb, 0x55, 0x2c, 0xb2, 0x0d, 0x93, 0x6e, 0xf0, + 0x4f, 0xd1, 0xbd, 0x23, 0x9c, 0x02, 0xff, 0x61, 0xde, 0x40, + 0x39, 0xa7, 0x18, 0x86, 0x7b, 0xe5, 0x5a, 0xc4, 0x82, 0x1c, + 0xa3, 0x3d, 0xc0, 0x5e, 0xe1, 0x7f, 0x06, 0x98, 0x27, 0xb9, + 0x44, 0xda, 0x65, 0xfb, 0x97, 0x09, 0xb6, 0x28, 0xd5, 0x4b, + 0xf4, 0x6a, 0x13, 0x8d, 0x32, 0xac, 0x51, 0xcf, 0x70, 0xee, + 0xfc, 0x62, 0xdd, 0x43, 0xbe, 0x20, 0x9f, 0x01, 0x78, 0xe6, + 0x59, 0xc7, 0x3a, 0xa4, 0x1b, 0x85, 0xe9, 0x77, 0xc8, 0x56, + 0xab, 0x35, 0x8a, 0x14, 0x6d, 0xf3, 0x4c, 0xd2, 0x2f, 0xb1, + 0x0e, 0x90, 0xd6, 0x48, 0xf7, 0x69, 0x94, 0x0a, 0xb5, 0x2b, + 0x52, 0xcc, 0x73, 0xed, 0x10, 0x8e, 0x31, 0xaf, 0xc3, 0x5d, + 0xe2, 0x7c, 0x81, 0x1f, 0xa0, 0x3e, 0x47, 0xd9, 0x66, 0xf8, + 0x05, 0x9b, 0x24, 0xba, 0x00, 0x9f, 0x23, 0xbc, 0x46, 0xd9, + 0x65, 0xfa, 0x8c, 0x13, 0xaf, 0x30, 0xca, 0x55, 0xe9, 0x76, + 0x05, 0x9a, 0x26, 0xb9, 0x43, 0xdc, 0x60, 0xff, 0x89, 0x16, + 0xaa, 0x35, 0xcf, 0x50, 0xec, 0x73, 0x0a, 0x95, 0x29, 0xb6, + 0x4c, 0xd3, 0x6f, 0xf0, 0x86, 0x19, 0xa5, 0x3a, 0xc0, 0x5f, + 0xe3, 0x7c, 0x0f, 0x90, 0x2c, 0xb3, 0x49, 0xd6, 0x6a, 0xf5, + 0x83, 0x1c, 0xa0, 0x3f, 0xc5, 0x5a, 0xe6, 0x79, 0x14, 0x8b, + 0x37, 0xa8, 0x52, 0xcd, 0x71, 0xee, 0x98, 0x07, 0xbb, 0x24, + 0xde, 0x41, 0xfd, 0x62, 0x11, 0x8e, 0x32, 0xad, 0x57, 0xc8, + 0x74, 0xeb, 0x9d, 0x02, 0xbe, 0x21, 0xdb, 0x44, 0xf8, 0x67, + 0x1e, 0x81, 0x3d, 0xa2, 0x58, 0xc7, 0x7b, 0xe4, 0x92, 0x0d, + 0xb1, 0x2e, 0xd4, 0x4b, 0xf7, 0x68, 0x1b, 0x84, 0x38, 0xa7, + 0x5d, 0xc2, 0x7e, 0xe1, 0x97, 0x08, 0xb4, 0x2b, 0xd1, 0x4e, + 0xf2, 0x6d, 0x28, 0xb7, 0x0b, 0x94, 0x6e, 0xf1, 0x4d, 0xd2, + 0xa4, 0x3b, 0x87, 0x18, 0xe2, 0x7d, 0xc1, 0x5e, 0x2d, 0xb2, + 0x0e, 0x91, 0x6b, 0xf4, 0x48, 0xd7, 0xa1, 0x3e, 0x82, 0x1d, + 0xe7, 0x78, 0xc4, 0x5b, 0x22, 0xbd, 0x01, 0x9e, 0x64, 0xfb, + 0x47, 0xd8, 0xae, 0x31, 0x8d, 0x12, 0xe8, 0x77, 0xcb, 0x54, + 0x27, 0xb8, 0x04, 0x9b, 0x61, 0xfe, 0x42, 0xdd, 0xab, 0x34, + 0x88, 0x17, 0xed, 0x72, 0xce, 0x51, 0x3c, 0xa3, 0x1f, 0x80, + 0x7a, 0xe5, 0x59, 0xc6, 0xb0, 0x2f, 0x93, 0x0c, 0xf6, 0x69, + 0xd5, 0x4a, 0x39, 0xa6, 0x1a, 0x85, 0x7f, 0xe0, 0x5c, 0xc3, + 0xb5, 0x2a, 0x96, 0x09, 0xf3, 0x6c, 0xd0, 0x4f, 0x36, 0xa9, + 0x15, 0x8a, 0x70, 0xef, 0x53, 0xcc, 0xba, 0x25, 0x99, 0x06, + 0xfc, 0x63, 0xdf, 0x40, 0x33, 0xac, 0x10, 0x8f, 0x75, 0xea, + 0x56, 0xc9, 0xbf, 0x20, 0x9c, 0x03, 0xf9, 0x66, 0xda, 0x45, + 0x00, 0xa0, 0x5d, 0xfd, 0xba, 0x1a, 0xe7, 0x47, 0x69, 0xc9, + 0x34, 0x94, 0xd3, 0x73, 0x8e, 0x2e, 0xd2, 0x72, 0x8f, 0x2f, + 0x68, 0xc8, 0x35, 0x95, 0xbb, 0x1b, 0xe6, 0x46, 0x01, 0xa1, + 0x5c, 0xfc, 0xb9, 0x19, 0xe4, 0x44, 0x03, 0xa3, 0x5e, 0xfe, + 0xd0, 0x70, 0x8d, 0x2d, 0x6a, 0xca, 0x37, 0x97, 0x6b, 0xcb, + 0x36, 0x96, 0xd1, 0x71, 0x8c, 0x2c, 0x02, 0xa2, 0x5f, 0xff, + 0xb8, 0x18, 0xe5, 0x45, 0x6f, 0xcf, 0x32, 0x92, 0xd5, 0x75, + 0x88, 0x28, 0x06, 0xa6, 0x5b, 0xfb, 0xbc, 0x1c, 0xe1, 0x41, + 0xbd, 0x1d, 0xe0, 0x40, 0x07, 0xa7, 0x5a, 0xfa, 0xd4, 0x74, + 0x89, 0x29, 0x6e, 0xce, 0x33, 0x93, 0xd6, 0x76, 0x8b, 0x2b, + 0x6c, 0xcc, 0x31, 0x91, 0xbf, 0x1f, 0xe2, 0x42, 0x05, 0xa5, + 0x58, 0xf8, 0x04, 0xa4, 0x59, 0xf9, 0xbe, 0x1e, 0xe3, 0x43, + 0x6d, 0xcd, 0x30, 0x90, 0xd7, 0x77, 0x8a, 0x2a, 0xde, 0x7e, + 0x83, 0x23, 0x64, 0xc4, 0x39, 0x99, 0xb7, 0x17, 0xea, 0x4a, + 0x0d, 0xad, 0x50, 0xf0, 0x0c, 0xac, 0x51, 0xf1, 0xb6, 0x16, + 0xeb, 0x4b, 0x65, 0xc5, 0x38, 0x98, 0xdf, 0x7f, 0x82, 0x22, + 0x67, 0xc7, 0x3a, 0x9a, 0xdd, 0x7d, 0x80, 0x20, 0x0e, 0xae, + 0x53, 0xf3, 0xb4, 0x14, 0xe9, 0x49, 0xb5, 0x15, 0xe8, 0x48, + 0x0f, 0xaf, 0x52, 0xf2, 0xdc, 0x7c, 0x81, 0x21, 0x66, 0xc6, + 0x3b, 0x9b, 0xb1, 0x11, 0xec, 0x4c, 0x0b, 0xab, 0x56, 0xf6, + 0xd8, 0x78, 0x85, 0x25, 0x62, 0xc2, 0x3f, 0x9f, 0x63, 0xc3, + 0x3e, 0x9e, 0xd9, 0x79, 0x84, 0x24, 0x0a, 0xaa, 0x57, 0xf7, + 0xb0, 0x10, 0xed, 0x4d, 0x08, 0xa8, 0x55, 0xf5, 0xb2, 0x12, + 0xef, 0x4f, 0x61, 0xc1, 0x3c, 0x9c, 0xdb, 0x7b, 0x86, 0x26, + 0xda, 0x7a, 0x87, 0x27, 0x60, 0xc0, 0x3d, 0x9d, 0xb3, 0x13, + 0xee, 0x4e, 0x09, 0xa9, 0x54, 0xf4, 0x00, 0xa1, 0x5f, 0xfe, + 0xbe, 0x1f, 0xe1, 0x40, 0x61, 0xc0, 0x3e, 0x9f, 0xdf, 0x7e, + 0x80, 0x21, 0xc2, 0x63, 0x9d, 0x3c, 0x7c, 0xdd, 0x23, 0x82, + 0xa3, 0x02, 0xfc, 0x5d, 0x1d, 0xbc, 0x42, 0xe3, 0x99, 0x38, + 0xc6, 0x67, 0x27, 0x86, 0x78, 0xd9, 0xf8, 0x59, 0xa7, 0x06, + 0x46, 0xe7, 0x19, 0xb8, 0x5b, 0xfa, 0x04, 0xa5, 0xe5, 0x44, + 0xba, 0x1b, 0x3a, 0x9b, 0x65, 0xc4, 0x84, 0x25, 0xdb, 0x7a, + 0x2f, 0x8e, 0x70, 0xd1, 0x91, 0x30, 0xce, 0x6f, 0x4e, 0xef, + 0x11, 0xb0, 0xf0, 0x51, 0xaf, 0x0e, 0xed, 0x4c, 0xb2, 0x13, + 0x53, 0xf2, 0x0c, 0xad, 0x8c, 0x2d, 0xd3, 0x72, 0x32, 0x93, + 0x6d, 0xcc, 0xb6, 0x17, 0xe9, 0x48, 0x08, 0xa9, 0x57, 0xf6, + 0xd7, 0x76, 0x88, 0x29, 0x69, 0xc8, 0x36, 0x97, 0x74, 0xd5, + 0x2b, 0x8a, 0xca, 0x6b, 0x95, 0x34, 0x15, 0xb4, 0x4a, 0xeb, + 0xab, 0x0a, 0xf4, 0x55, 0x5e, 0xff, 0x01, 0xa0, 0xe0, 0x41, + 0xbf, 0x1e, 0x3f, 0x9e, 0x60, 0xc1, 0x81, 0x20, 0xde, 0x7f, + 0x9c, 0x3d, 0xc3, 0x62, 0x22, 0x83, 0x7d, 0xdc, 0xfd, 0x5c, + 0xa2, 0x03, 0x43, 0xe2, 0x1c, 0xbd, 0xc7, 0x66, 0x98, 0x39, + 0x79, 0xd8, 0x26, 0x87, 0xa6, 0x07, 0xf9, 0x58, 0x18, 0xb9, + 0x47, 0xe6, 0x05, 0xa4, 0x5a, 0xfb, 0xbb, 0x1a, 0xe4, 0x45, + 0x64, 0xc5, 0x3b, 0x9a, 0xda, 0x7b, 0x85, 0x24, 0x71, 0xd0, + 0x2e, 0x8f, 0xcf, 0x6e, 0x90, 0x31, 0x10, 0xb1, 0x4f, 0xee, + 0xae, 0x0f, 0xf1, 0x50, 0xb3, 0x12, 0xec, 0x4d, 0x0d, 0xac, + 0x52, 0xf3, 0xd2, 0x73, 0x8d, 0x2c, 0x6c, 0xcd, 0x33, 0x92, + 0xe8, 0x49, 0xb7, 0x16, 0x56, 0xf7, 0x09, 0xa8, 0x89, 0x28, + 0xd6, 0x77, 0x37, 0x96, 0x68, 0xc9, 0x2a, 0x8b, 0x75, 0xd4, + 0x94, 0x35, 0xcb, 0x6a, 0x4b, 0xea, 0x14, 0xb5, 0xf5, 0x54, + 0xaa, 0x0b, 0x00, 0xa2, 0x59, 0xfb, 0xb2, 0x10, 0xeb, 0x49, + 0x79, 0xdb, 0x20, 0x82, 0xcb, 0x69, 0x92, 0x30, 0xf2, 0x50, + 0xab, 0x09, 0x40, 0xe2, 0x19, 0xbb, 0x8b, 0x29, 0xd2, 0x70, + 0x39, 0x9b, 0x60, 0xc2, 0xf9, 0x5b, 0xa0, 0x02, 0x4b, 0xe9, + 0x12, 0xb0, 0x80, 0x22, 0xd9, 0x7b, 0x32, 0x90, 0x6b, 0xc9, + 0x0b, 0xa9, 0x52, 0xf0, 0xb9, 0x1b, 0xe0, 0x42, 0x72, 0xd0, + 0x2b, 0x89, 0xc0, 0x62, 0x99, 0x3b, 0xef, 0x4d, 0xb6, 0x14, + 0x5d, 0xff, 0x04, 0xa6, 0x96, 0x34, 0xcf, 0x6d, 0x24, 0x86, + 0x7d, 0xdf, 0x1d, 0xbf, 0x44, 0xe6, 0xaf, 0x0d, 0xf6, 0x54, + 0x64, 0xc6, 0x3d, 0x9f, 0xd6, 0x74, 0x8f, 0x2d, 0x16, 0xb4, + 0x4f, 0xed, 0xa4, 0x06, 0xfd, 0x5f, 0x6f, 0xcd, 0x36, 0x94, + 0xdd, 0x7f, 0x84, 0x26, 0xe4, 0x46, 0xbd, 0x1f, 0x56, 0xf4, + 0x0f, 0xad, 0x9d, 0x3f, 0xc4, 0x66, 0x2f, 0x8d, 0x76, 0xd4, + 0xc3, 0x61, 0x9a, 0x38, 0x71, 0xd3, 0x28, 0x8a, 0xba, 0x18, + 0xe3, 0x41, 0x08, 0xaa, 0x51, 0xf3, 0x31, 0x93, 0x68, 0xca, + 0x83, 0x21, 0xda, 0x78, 0x48, 0xea, 0x11, 0xb3, 0xfa, 0x58, + 0xa3, 0x01, 0x3a, 0x98, 0x63, 0xc1, 0x88, 0x2a, 0xd1, 0x73, + 0x43, 0xe1, 0x1a, 0xb8, 0xf1, 0x53, 0xa8, 0x0a, 0xc8, 0x6a, + 0x91, 0x33, 0x7a, 0xd8, 0x23, 0x81, 0xb1, 0x13, 0xe8, 0x4a, + 0x03, 0xa1, 0x5a, 0xf8, 0x2c, 0x8e, 0x75, 0xd7, 0x9e, 0x3c, + 0xc7, 0x65, 0x55, 0xf7, 0x0c, 0xae, 0xe7, 0x45, 0xbe, 0x1c, + 0xde, 0x7c, 0x87, 0x25, 0x6c, 0xce, 0x35, 0x97, 0xa7, 0x05, + 0xfe, 0x5c, 0x15, 0xb7, 0x4c, 0xee, 0xd5, 0x77, 0x8c, 0x2e, + 0x67, 0xc5, 0x3e, 0x9c, 0xac, 0x0e, 0xf5, 0x57, 0x1e, 0xbc, + 0x47, 0xe5, 0x27, 0x85, 0x7e, 0xdc, 0x95, 0x37, 0xcc, 0x6e, + 0x5e, 0xfc, 0x07, 0xa5, 0xec, 0x4e, 0xb5, 0x17, 0x00, 0xa3, + 0x5b, 0xf8, 0xb6, 0x15, 0xed, 0x4e, 0x71, 0xd2, 0x2a, 0x89, + 0xc7, 0x64, 0x9c, 0x3f, 0xe2, 0x41, 0xb9, 0x1a, 0x54, 0xf7, + 0x0f, 0xac, 0x93, 0x30, 0xc8, 0x6b, 0x25, 0x86, 0x7e, 0xdd, + 0xd9, 0x7a, 0x82, 0x21, 0x6f, 0xcc, 0x34, 0x97, 0xa8, 0x0b, + 0xf3, 0x50, 0x1e, 0xbd, 0x45, 0xe6, 0x3b, 0x98, 0x60, 0xc3, + 0x8d, 0x2e, 0xd6, 0x75, 0x4a, 0xe9, 0x11, 0xb2, 0xfc, 0x5f, + 0xa7, 0x04, 0xaf, 0x0c, 0xf4, 0x57, 0x19, 0xba, 0x42, 0xe1, + 0xde, 0x7d, 0x85, 0x26, 0x68, 0xcb, 0x33, 0x90, 0x4d, 0xee, + 0x16, 0xb5, 0xfb, 0x58, 0xa0, 0x03, 0x3c, 0x9f, 0x67, 0xc4, + 0x8a, 0x29, 0xd1, 0x72, 0x76, 0xd5, 0x2d, 0x8e, 0xc0, 0x63, + 0x9b, 0x38, 0x07, 0xa4, 0x5c, 0xff, 0xb1, 0x12, 0xea, 0x49, + 0x94, 0x37, 0xcf, 0x6c, 0x22, 0x81, 0x79, 0xda, 0xe5, 0x46, + 0xbe, 0x1d, 0x53, 0xf0, 0x08, 0xab, 0x43, 0xe0, 0x18, 0xbb, + 0xf5, 0x56, 0xae, 0x0d, 0x32, 0x91, 0x69, 0xca, 0x84, 0x27, + 0xdf, 0x7c, 0xa1, 0x02, 0xfa, 0x59, 0x17, 0xb4, 0x4c, 0xef, + 0xd0, 0x73, 0x8b, 0x28, 0x66, 0xc5, 0x3d, 0x9e, 0x9a, 0x39, + 0xc1, 0x62, 0x2c, 0x8f, 0x77, 0xd4, 0xeb, 0x48, 0xb0, 0x13, + 0x5d, 0xfe, 0x06, 0xa5, 0x78, 0xdb, 0x23, 0x80, 0xce, 0x6d, + 0x95, 0x36, 0x09, 0xaa, 0x52, 0xf1, 0xbf, 0x1c, 0xe4, 0x47, + 0xec, 0x4f, 0xb7, 0x14, 0x5a, 0xf9, 0x01, 0xa2, 0x9d, 0x3e, + 0xc6, 0x65, 0x2b, 0x88, 0x70, 0xd3, 0x0e, 0xad, 0x55, 0xf6, + 0xb8, 0x1b, 0xe3, 0x40, 0x7f, 0xdc, 0x24, 0x87, 0xc9, 0x6a, + 0x92, 0x31, 0x35, 0x96, 0x6e, 0xcd, 0x83, 0x20, 0xd8, 0x7b, + 0x44, 0xe7, 0x1f, 0xbc, 0xf2, 0x51, 0xa9, 0x0a, 0xd7, 0x74, + 0x8c, 0x2f, 0x61, 0xc2, 0x3a, 0x99, 0xa6, 0x05, 0xfd, 0x5e, + 0x10, 0xb3, 0x4b, 0xe8, 0x00, 0xa4, 0x55, 0xf1, 0xaa, 0x0e, + 0xff, 0x5b, 0x49, 0xed, 0x1c, 0xb8, 0xe3, 0x47, 0xb6, 0x12, + 0x92, 0x36, 0xc7, 0x63, 0x38, 0x9c, 0x6d, 0xc9, 0xdb, 0x7f, + 0x8e, 0x2a, 0x71, 0xd5, 0x24, 0x80, 0x39, 0x9d, 0x6c, 0xc8, + 0x93, 0x37, 0xc6, 0x62, 0x70, 0xd4, 0x25, 0x81, 0xda, 0x7e, + 0x8f, 0x2b, 0xab, 0x0f, 0xfe, 0x5a, 0x01, 0xa5, 0x54, 0xf0, + 0xe2, 0x46, 0xb7, 0x13, 0x48, 0xec, 0x1d, 0xb9, 0x72, 0xd6, + 0x27, 0x83, 0xd8, 0x7c, 0x8d, 0x29, 0x3b, 0x9f, 0x6e, 0xca, + 0x91, 0x35, 0xc4, 0x60, 0xe0, 0x44, 0xb5, 0x11, 0x4a, 0xee, + 0x1f, 0xbb, 0xa9, 0x0d, 0xfc, 0x58, 0x03, 0xa7, 0x56, 0xf2, + 0x4b, 0xef, 0x1e, 0xba, 0xe1, 0x45, 0xb4, 0x10, 0x02, 0xa6, + 0x57, 0xf3, 0xa8, 0x0c, 0xfd, 0x59, 0xd9, 0x7d, 0x8c, 0x28, + 0x73, 0xd7, 0x26, 0x82, 0x90, 0x34, 0xc5, 0x61, 0x3a, 0x9e, + 0x6f, 0xcb, 0xe4, 0x40, 0xb1, 0x15, 0x4e, 0xea, 0x1b, 0xbf, + 0xad, 0x09, 0xf8, 0x5c, 0x07, 0xa3, 0x52, 0xf6, 0x76, 0xd2, + 0x23, 0x87, 0xdc, 0x78, 0x89, 0x2d, 0x3f, 0x9b, 0x6a, 0xce, + 0x95, 0x31, 0xc0, 0x64, 0xdd, 0x79, 0x88, 0x2c, 0x77, 0xd3, + 0x22, 0x86, 0x94, 0x30, 0xc1, 0x65, 0x3e, 0x9a, 0x6b, 0xcf, + 0x4f, 0xeb, 0x1a, 0xbe, 0xe5, 0x41, 0xb0, 0x14, 0x06, 0xa2, + 0x53, 0xf7, 0xac, 0x08, 0xf9, 0x5d, 0x96, 0x32, 0xc3, 0x67, + 0x3c, 0x98, 0x69, 0xcd, 0xdf, 0x7b, 0x8a, 0x2e, 0x75, 0xd1, + 0x20, 0x84, 0x04, 0xa0, 0x51, 0xf5, 0xae, 0x0a, 0xfb, 0x5f, + 0x4d, 0xe9, 0x18, 0xbc, 0xe7, 0x43, 0xb2, 0x16, 0xaf, 0x0b, + 0xfa, 0x5e, 0x05, 0xa1, 0x50, 0xf4, 0xe6, 0x42, 0xb3, 0x17, + 0x4c, 0xe8, 0x19, 0xbd, 0x3d, 0x99, 0x68, 0xcc, 0x97, 0x33, + 0xc2, 0x66, 0x74, 0xd0, 0x21, 0x85, 0xde, 0x7a, 0x8b, 0x2f, + 0x00, 0xa5, 0x57, 0xf2, 0xae, 0x0b, 0xf9, 0x5c, 0x41, 0xe4, + 0x16, 0xb3, 0xef, 0x4a, 0xb8, 0x1d, 0x82, 0x27, 0xd5, 0x70, + 0x2c, 0x89, 0x7b, 0xde, 0xc3, 0x66, 0x94, 0x31, 0x6d, 0xc8, + 0x3a, 0x9f, 0x19, 0xbc, 0x4e, 0xeb, 0xb7, 0x12, 0xe0, 0x45, + 0x58, 0xfd, 0x0f, 0xaa, 0xf6, 0x53, 0xa1, 0x04, 0x9b, 0x3e, + 0xcc, 0x69, 0x35, 0x90, 0x62, 0xc7, 0xda, 0x7f, 0x8d, 0x28, + 0x74, 0xd1, 0x23, 0x86, 0x32, 0x97, 0x65, 0xc0, 0x9c, 0x39, + 0xcb, 0x6e, 0x73, 0xd6, 0x24, 0x81, 0xdd, 0x78, 0x8a, 0x2f, + 0xb0, 0x15, 0xe7, 0x42, 0x1e, 0xbb, 0x49, 0xec, 0xf1, 0x54, + 0xa6, 0x03, 0x5f, 0xfa, 0x08, 0xad, 0x2b, 0x8e, 0x7c, 0xd9, + 0x85, 0x20, 0xd2, 0x77, 0x6a, 0xcf, 0x3d, 0x98, 0xc4, 0x61, + 0x93, 0x36, 0xa9, 0x0c, 0xfe, 0x5b, 0x07, 0xa2, 0x50, 0xf5, + 0xe8, 0x4d, 0xbf, 0x1a, 0x46, 0xe3, 0x11, 0xb4, 0x64, 0xc1, + 0x33, 0x96, 0xca, 0x6f, 0x9d, 0x38, 0x25, 0x80, 0x72, 0xd7, + 0x8b, 0x2e, 0xdc, 0x79, 0xe6, 0x43, 0xb1, 0x14, 0x48, 0xed, + 0x1f, 0xba, 0xa7, 0x02, 0xf0, 0x55, 0x09, 0xac, 0x5e, 0xfb, + 0x7d, 0xd8, 0x2a, 0x8f, 0xd3, 0x76, 0x84, 0x21, 0x3c, 0x99, + 0x6b, 0xce, 0x92, 0x37, 0xc5, 0x60, 0xff, 0x5a, 0xa8, 0x0d, + 0x51, 0xf4, 0x06, 0xa3, 0xbe, 0x1b, 0xe9, 0x4c, 0x10, 0xb5, + 0x47, 0xe2, 0x56, 0xf3, 0x01, 0xa4, 0xf8, 0x5d, 0xaf, 0x0a, + 0x17, 0xb2, 0x40, 0xe5, 0xb9, 0x1c, 0xee, 0x4b, 0xd4, 0x71, + 0x83, 0x26, 0x7a, 0xdf, 0x2d, 0x88, 0x95, 0x30, 0xc2, 0x67, + 0x3b, 0x9e, 0x6c, 0xc9, 0x4f, 0xea, 0x18, 0xbd, 0xe1, 0x44, + 0xb6, 0x13, 0x0e, 0xab, 0x59, 0xfc, 0xa0, 0x05, 0xf7, 0x52, + 0xcd, 0x68, 0x9a, 0x3f, 0x63, 0xc6, 0x34, 0x91, 0x8c, 0x29, + 0xdb, 0x7e, 0x22, 0x87, 0x75, 0xd0, 0x00, 0xa6, 0x51, 0xf7, + 0xa2, 0x04, 0xf3, 0x55, 0x59, 0xff, 0x08, 0xae, 0xfb, 0x5d, + 0xaa, 0x0c, 0xb2, 0x14, 0xe3, 0x45, 0x10, 0xb6, 0x41, 0xe7, + 0xeb, 0x4d, 0xba, 0x1c, 0x49, 0xef, 0x18, 0xbe, 0x79, 0xdf, + 0x28, 0x8e, 0xdb, 0x7d, 0x8a, 0x2c, 0x20, 0x86, 0x71, 0xd7, + 0x82, 0x24, 0xd3, 0x75, 0xcb, 0x6d, 0x9a, 0x3c, 0x69, 0xcf, + 0x38, 0x9e, 0x92, 0x34, 0xc3, 0x65, 0x30, 0x96, 0x61, 0xc7, + 0xf2, 0x54, 0xa3, 0x05, 0x50, 0xf6, 0x01, 0xa7, 0xab, 0x0d, + 0xfa, 0x5c, 0x09, 0xaf, 0x58, 0xfe, 0x40, 0xe6, 0x11, 0xb7, + 0xe2, 0x44, 0xb3, 0x15, 0x19, 0xbf, 0x48, 0xee, 0xbb, 0x1d, + 0xea, 0x4c, 0x8b, 0x2d, 0xda, 0x7c, 0x29, 0x8f, 0x78, 0xde, + 0xd2, 0x74, 0x83, 0x25, 0x70, 0xd6, 0x21, 0x87, 0x39, 0x9f, + 0x68, 0xce, 0x9b, 0x3d, 0xca, 0x6c, 0x60, 0xc6, 0x31, 0x97, + 0xc2, 0x64, 0x93, 0x35, 0xf9, 0x5f, 0xa8, 0x0e, 0x5b, 0xfd, + 0x0a, 0xac, 0xa0, 0x06, 0xf1, 0x57, 0x02, 0xa4, 0x53, 0xf5, + 0x4b, 0xed, 0x1a, 0xbc, 0xe9, 0x4f, 0xb8, 0x1e, 0x12, 0xb4, + 0x43, 0xe5, 0xb0, 0x16, 0xe1, 0x47, 0x80, 0x26, 0xd1, 0x77, + 0x22, 0x84, 0x73, 0xd5, 0xd9, 0x7f, 0x88, 0x2e, 0x7b, 0xdd, + 0x2a, 0x8c, 0x32, 0x94, 0x63, 0xc5, 0x90, 0x36, 0xc1, 0x67, + 0x6b, 0xcd, 0x3a, 0x9c, 0xc9, 0x6f, 0x98, 0x3e, 0x0b, 0xad, + 0x5a, 0xfc, 0xa9, 0x0f, 0xf8, 0x5e, 0x52, 0xf4, 0x03, 0xa5, + 0xf0, 0x56, 0xa1, 0x07, 0xb9, 0x1f, 0xe8, 0x4e, 0x1b, 0xbd, + 0x4a, 0xec, 0xe0, 0x46, 0xb1, 0x17, 0x42, 0xe4, 0x13, 0xb5, + 0x72, 0xd4, 0x23, 0x85, 0xd0, 0x76, 0x81, 0x27, 0x2b, 0x8d, + 0x7a, 0xdc, 0x89, 0x2f, 0xd8, 0x7e, 0xc0, 0x66, 0x91, 0x37, + 0x62, 0xc4, 0x33, 0x95, 0x99, 0x3f, 0xc8, 0x6e, 0x3b, 0x9d, + 0x6a, 0xcc, 0x00, 0xa7, 0x53, 0xf4, 0xa6, 0x01, 0xf5, 0x52, + 0x51, 0xf6, 0x02, 0xa5, 0xf7, 0x50, 0xa4, 0x03, 0xa2, 0x05, + 0xf1, 0x56, 0x04, 0xa3, 0x57, 0xf0, 0xf3, 0x54, 0xa0, 0x07, + 0x55, 0xf2, 0x06, 0xa1, 0x59, 0xfe, 0x0a, 0xad, 0xff, 0x58, + 0xac, 0x0b, 0x08, 0xaf, 0x5b, 0xfc, 0xae, 0x09, 0xfd, 0x5a, + 0xfb, 0x5c, 0xa8, 0x0f, 0x5d, 0xfa, 0x0e, 0xa9, 0xaa, 0x0d, + 0xf9, 0x5e, 0x0c, 0xab, 0x5f, 0xf8, 0xb2, 0x15, 0xe1, 0x46, + 0x14, 0xb3, 0x47, 0xe0, 0xe3, 0x44, 0xb0, 0x17, 0x45, 0xe2, + 0x16, 0xb1, 0x10, 0xb7, 0x43, 0xe4, 0xb6, 0x11, 0xe5, 0x42, + 0x41, 0xe6, 0x12, 0xb5, 0xe7, 0x40, 0xb4, 0x13, 0xeb, 0x4c, + 0xb8, 0x1f, 0x4d, 0xea, 0x1e, 0xb9, 0xba, 0x1d, 0xe9, 0x4e, + 0x1c, 0xbb, 0x4f, 0xe8, 0x49, 0xee, 0x1a, 0xbd, 0xef, 0x48, + 0xbc, 0x1b, 0x18, 0xbf, 0x4b, 0xec, 0xbe, 0x19, 0xed, 0x4a, + 0x79, 0xde, 0x2a, 0x8d, 0xdf, 0x78, 0x8c, 0x2b, 0x28, 0x8f, + 0x7b, 0xdc, 0x8e, 0x29, 0xdd, 0x7a, 0xdb, 0x7c, 0x88, 0x2f, + 0x7d, 0xda, 0x2e, 0x89, 0x8a, 0x2d, 0xd9, 0x7e, 0x2c, 0x8b, + 0x7f, 0xd8, 0x20, 0x87, 0x73, 0xd4, 0x86, 0x21, 0xd5, 0x72, + 0x71, 0xd6, 0x22, 0x85, 0xd7, 0x70, 0x84, 0x23, 0x82, 0x25, + 0xd1, 0x76, 0x24, 0x83, 0x77, 0xd0, 0xd3, 0x74, 0x80, 0x27, + 0x75, 0xd2, 0x26, 0x81, 0xcb, 0x6c, 0x98, 0x3f, 0x6d, 0xca, + 0x3e, 0x99, 0x9a, 0x3d, 0xc9, 0x6e, 0x3c, 0x9b, 0x6f, 0xc8, + 0x69, 0xce, 0x3a, 0x9d, 0xcf, 0x68, 0x9c, 0x3b, 0x38, 0x9f, + 0x6b, 0xcc, 0x9e, 0x39, 0xcd, 0x6a, 0x92, 0x35, 0xc1, 0x66, + 0x34, 0x93, 0x67, 0xc0, 0xc3, 0x64, 0x90, 0x37, 0x65, 0xc2, + 0x36, 0x91, 0x30, 0x97, 0x63, 0xc4, 0x96, 0x31, 0xc5, 0x62, + 0x61, 0xc6, 0x32, 0x95, 0xc7, 0x60, 0x94, 0x33, 0x00, 0xa8, + 0x4d, 0xe5, 0x9a, 0x32, 0xd7, 0x7f, 0x29, 0x81, 0x64, 0xcc, + 0xb3, 0x1b, 0xfe, 0x56, 0x52, 0xfa, 0x1f, 0xb7, 0xc8, 0x60, + 0x85, 0x2d, 0x7b, 0xd3, 0x36, 0x9e, 0xe1, 0x49, 0xac, 0x04, + 0xa4, 0x0c, 0xe9, 0x41, 0x3e, 0x96, 0x73, 0xdb, 0x8d, 0x25, + 0xc0, 0x68, 0x17, 0xbf, 0x5a, 0xf2, 0xf6, 0x5e, 0xbb, 0x13, + 0x6c, 0xc4, 0x21, 0x89, 0xdf, 0x77, 0x92, 0x3a, 0x45, 0xed, + 0x08, 0xa0, 0x55, 0xfd, 0x18, 0xb0, 0xcf, 0x67, 0x82, 0x2a, + 0x7c, 0xd4, 0x31, 0x99, 0xe6, 0x4e, 0xab, 0x03, 0x07, 0xaf, + 0x4a, 0xe2, 0x9d, 0x35, 0xd0, 0x78, 0x2e, 0x86, 0x63, 0xcb, + 0xb4, 0x1c, 0xf9, 0x51, 0xf1, 0x59, 0xbc, 0x14, 0x6b, 0xc3, + 0x26, 0x8e, 0xd8, 0x70, 0x95, 0x3d, 0x42, 0xea, 0x0f, 0xa7, + 0xa3, 0x0b, 0xee, 0x46, 0x39, 0x91, 0x74, 0xdc, 0x8a, 0x22, + 0xc7, 0x6f, 0x10, 0xb8, 0x5d, 0xf5, 0xaa, 0x02, 0xe7, 0x4f, + 0x30, 0x98, 0x7d, 0xd5, 0x83, 0x2b, 0xce, 0x66, 0x19, 0xb1, + 0x54, 0xfc, 0xf8, 0x50, 0xb5, 0x1d, 0x62, 0xca, 0x2f, 0x87, + 0xd1, 0x79, 0x9c, 0x34, 0x4b, 0xe3, 0x06, 0xae, 0x0e, 0xa6, + 0x43, 0xeb, 0x94, 0x3c, 0xd9, 0x71, 0x27, 0x8f, 0x6a, 0xc2, + 0xbd, 0x15, 0xf0, 0x58, 0x5c, 0xf4, 0x11, 0xb9, 0xc6, 0x6e, + 0x8b, 0x23, 0x75, 0xdd, 0x38, 0x90, 0xef, 0x47, 0xa2, 0x0a, + 0xff, 0x57, 0xb2, 0x1a, 0x65, 0xcd, 0x28, 0x80, 0xd6, 0x7e, + 0x9b, 0x33, 0x4c, 0xe4, 0x01, 0xa9, 0xad, 0x05, 0xe0, 0x48, + 0x37, 0x9f, 0x7a, 0xd2, 0x84, 0x2c, 0xc9, 0x61, 0x1e, 0xb6, + 0x53, 0xfb, 0x5b, 0xf3, 0x16, 0xbe, 0xc1, 0x69, 0x8c, 0x24, + 0x72, 0xda, 0x3f, 0x97, 0xe8, 0x40, 0xa5, 0x0d, 0x09, 0xa1, + 0x44, 0xec, 0x93, 0x3b, 0xde, 0x76, 0x20, 0x88, 0x6d, 0xc5, + 0xba, 0x12, 0xf7, 0x5f, 0x00, 0xa9, 0x4f, 0xe6, 0x9e, 0x37, + 0xd1, 0x78, 0x21, 0x88, 0x6e, 0xc7, 0xbf, 0x16, 0xf0, 0x59, + 0x42, 0xeb, 0x0d, 0xa4, 0xdc, 0x75, 0x93, 0x3a, 0x63, 0xca, + 0x2c, 0x85, 0xfd, 0x54, 0xb2, 0x1b, 0x84, 0x2d, 0xcb, 0x62, + 0x1a, 0xb3, 0x55, 0xfc, 0xa5, 0x0c, 0xea, 0x43, 0x3b, 0x92, + 0x74, 0xdd, 0xc6, 0x6f, 0x89, 0x20, 0x58, 0xf1, 0x17, 0xbe, + 0xe7, 0x4e, 0xa8, 0x01, 0x79, 0xd0, 0x36, 0x9f, 0x15, 0xbc, + 0x5a, 0xf3, 0x8b, 0x22, 0xc4, 0x6d, 0x34, 0x9d, 0x7b, 0xd2, + 0xaa, 0x03, 0xe5, 0x4c, 0x57, 0xfe, 0x18, 0xb1, 0xc9, 0x60, + 0x86, 0x2f, 0x76, 0xdf, 0x39, 0x90, 0xe8, 0x41, 0xa7, 0x0e, + 0x91, 0x38, 0xde, 0x77, 0x0f, 0xa6, 0x40, 0xe9, 0xb0, 0x19, + 0xff, 0x56, 0x2e, 0x87, 0x61, 0xc8, 0xd3, 0x7a, 0x9c, 0x35, + 0x4d, 0xe4, 0x02, 0xab, 0xf2, 0x5b, 0xbd, 0x14, 0x6c, 0xc5, + 0x23, 0x8a, 0x2a, 0x83, 0x65, 0xcc, 0xb4, 0x1d, 0xfb, 0x52, + 0x0b, 0xa2, 0x44, 0xed, 0x95, 0x3c, 0xda, 0x73, 0x68, 0xc1, + 0x27, 0x8e, 0xf6, 0x5f, 0xb9, 0x10, 0x49, 0xe0, 0x06, 0xaf, + 0xd7, 0x7e, 0x98, 0x31, 0xae, 0x07, 0xe1, 0x48, 0x30, 0x99, + 0x7f, 0xd6, 0x8f, 0x26, 0xc0, 0x69, 0x11, 0xb8, 0x5e, 0xf7, + 0xec, 0x45, 0xa3, 0x0a, 0x72, 0xdb, 0x3d, 0x94, 0xcd, 0x64, + 0x82, 0x2b, 0x53, 0xfa, 0x1c, 0xb5, 0x3f, 0x96, 0x70, 0xd9, + 0xa1, 0x08, 0xee, 0x47, 0x1e, 0xb7, 0x51, 0xf8, 0x80, 0x29, + 0xcf, 0x66, 0x7d, 0xd4, 0x32, 0x9b, 0xe3, 0x4a, 0xac, 0x05, + 0x5c, 0xf5, 0x13, 0xba, 0xc2, 0x6b, 0x8d, 0x24, 0xbb, 0x12, + 0xf4, 0x5d, 0x25, 0x8c, 0x6a, 0xc3, 0x9a, 0x33, 0xd5, 0x7c, + 0x04, 0xad, 0x4b, 0xe2, 0xf9, 0x50, 0xb6, 0x1f, 0x67, 0xce, + 0x28, 0x81, 0xd8, 0x71, 0x97, 0x3e, 0x46, 0xef, 0x09, 0xa0, + 0x00, 0xaa, 0x49, 0xe3, 0x92, 0x38, 0xdb, 0x71, 0x39, 0x93, + 0x70, 0xda, 0xab, 0x01, 0xe2, 0x48, 0x72, 0xd8, 0x3b, 0x91, + 0xe0, 0x4a, 0xa9, 0x03, 0x4b, 0xe1, 0x02, 0xa8, 0xd9, 0x73, + 0x90, 0x3a, 0xe4, 0x4e, 0xad, 0x07, 0x76, 0xdc, 0x3f, 0x95, + 0xdd, 0x77, 0x94, 0x3e, 0x4f, 0xe5, 0x06, 0xac, 0x96, 0x3c, + 0xdf, 0x75, 0x04, 0xae, 0x4d, 0xe7, 0xaf, 0x05, 0xe6, 0x4c, + 0x3d, 0x97, 0x74, 0xde, 0xd5, 0x7f, 0x9c, 0x36, 0x47, 0xed, + 0x0e, 0xa4, 0xec, 0x46, 0xa5, 0x0f, 0x7e, 0xd4, 0x37, 0x9d, + 0xa7, 0x0d, 0xee, 0x44, 0x35, 0x9f, 0x7c, 0xd6, 0x9e, 0x34, + 0xd7, 0x7d, 0x0c, 0xa6, 0x45, 0xef, 0x31, 0x9b, 0x78, 0xd2, + 0xa3, 0x09, 0xea, 0x40, 0x08, 0xa2, 0x41, 0xeb, 0x9a, 0x30, + 0xd3, 0x79, 0x43, 0xe9, 0x0a, 0xa0, 0xd1, 0x7b, 0x98, 0x32, + 0x7a, 0xd0, 0x33, 0x99, 0xe8, 0x42, 0xa1, 0x0b, 0xb7, 0x1d, + 0xfe, 0x54, 0x25, 0x8f, 0x6c, 0xc6, 0x8e, 0x24, 0xc7, 0x6d, + 0x1c, 0xb6, 0x55, 0xff, 0xc5, 0x6f, 0x8c, 0x26, 0x57, 0xfd, + 0x1e, 0xb4, 0xfc, 0x56, 0xb5, 0x1f, 0x6e, 0xc4, 0x27, 0x8d, + 0x53, 0xf9, 0x1a, 0xb0, 0xc1, 0x6b, 0x88, 0x22, 0x6a, 0xc0, + 0x23, 0x89, 0xf8, 0x52, 0xb1, 0x1b, 0x21, 0x8b, 0x68, 0xc2, + 0xb3, 0x19, 0xfa, 0x50, 0x18, 0xb2, 0x51, 0xfb, 0x8a, 0x20, + 0xc3, 0x69, 0x62, 0xc8, 0x2b, 0x81, 0xf0, 0x5a, 0xb9, 0x13, + 0x5b, 0xf1, 0x12, 0xb8, 0xc9, 0x63, 0x80, 0x2a, 0x10, 0xba, + 0x59, 0xf3, 0x82, 0x28, 0xcb, 0x61, 0x29, 0x83, 0x60, 0xca, + 0xbb, 0x11, 0xf2, 0x58, 0x86, 0x2c, 0xcf, 0x65, 0x14, 0xbe, + 0x5d, 0xf7, 0xbf, 0x15, 0xf6, 0x5c, 0x2d, 0x87, 0x64, 0xce, + 0xf4, 0x5e, 0xbd, 0x17, 0x66, 0xcc, 0x2f, 0x85, 0xcd, 0x67, + 0x84, 0x2e, 0x5f, 0xf5, 0x16, 0xbc, 0x00, 0xab, 0x4b, 0xe0, + 0x96, 0x3d, 0xdd, 0x76, 0x31, 0x9a, 0x7a, 0xd1, 0xa7, 0x0c, + 0xec, 0x47, 0x62, 0xc9, 0x29, 0x82, 0xf4, 0x5f, 0xbf, 0x14, + 0x53, 0xf8, 0x18, 0xb3, 0xc5, 0x6e, 0x8e, 0x25, 0xc4, 0x6f, + 0x8f, 0x24, 0x52, 0xf9, 0x19, 0xb2, 0xf5, 0x5e, 0xbe, 0x15, + 0x63, 0xc8, 0x28, 0x83, 0xa6, 0x0d, 0xed, 0x46, 0x30, 0x9b, + 0x7b, 0xd0, 0x97, 0x3c, 0xdc, 0x77, 0x01, 0xaa, 0x4a, 0xe1, + 0x95, 0x3e, 0xde, 0x75, 0x03, 0xa8, 0x48, 0xe3, 0xa4, 0x0f, + 0xef, 0x44, 0x32, 0x99, 0x79, 0xd2, 0xf7, 0x5c, 0xbc, 0x17, + 0x61, 0xca, 0x2a, 0x81, 0xc6, 0x6d, 0x8d, 0x26, 0x50, 0xfb, + 0x1b, 0xb0, 0x51, 0xfa, 0x1a, 0xb1, 0xc7, 0x6c, 0x8c, 0x27, + 0x60, 0xcb, 0x2b, 0x80, 0xf6, 0x5d, 0xbd, 0x16, 0x33, 0x98, + 0x78, 0xd3, 0xa5, 0x0e, 0xee, 0x45, 0x02, 0xa9, 0x49, 0xe2, + 0x94, 0x3f, 0xdf, 0x74, 0x37, 0x9c, 0x7c, 0xd7, 0xa1, 0x0a, + 0xea, 0x41, 0x06, 0xad, 0x4d, 0xe6, 0x90, 0x3b, 0xdb, 0x70, + 0x55, 0xfe, 0x1e, 0xb5, 0xc3, 0x68, 0x88, 0x23, 0x64, 0xcf, + 0x2f, 0x84, 0xf2, 0x59, 0xb9, 0x12, 0xf3, 0x58, 0xb8, 0x13, + 0x65, 0xce, 0x2e, 0x85, 0xc2, 0x69, 0x89, 0x22, 0x54, 0xff, + 0x1f, 0xb4, 0x91, 0x3a, 0xda, 0x71, 0x07, 0xac, 0x4c, 0xe7, + 0xa0, 0x0b, 0xeb, 0x40, 0x36, 0x9d, 0x7d, 0xd6, 0xa2, 0x09, + 0xe9, 0x42, 0x34, 0x9f, 0x7f, 0xd4, 0x93, 0x38, 0xd8, 0x73, + 0x05, 0xae, 0x4e, 0xe5, 0xc0, 0x6b, 0x8b, 0x20, 0x56, 0xfd, + 0x1d, 0xb6, 0xf1, 0x5a, 0xba, 0x11, 0x67, 0xcc, 0x2c, 0x87, + 0x66, 0xcd, 0x2d, 0x86, 0xf0, 0x5b, 0xbb, 0x10, 0x57, 0xfc, + 0x1c, 0xb7, 0xc1, 0x6a, 0x8a, 0x21, 0x04, 0xaf, 0x4f, 0xe4, + 0x92, 0x39, 0xd9, 0x72, 0x35, 0x9e, 0x7e, 0xd5, 0xa3, 0x08, + 0xe8, 0x43, 0x00, 0xac, 0x45, 0xe9, 0x8a, 0x26, 0xcf, 0x63, + 0x09, 0xa5, 0x4c, 0xe0, 0x83, 0x2f, 0xc6, 0x6a, 0x12, 0xbe, + 0x57, 0xfb, 0x98, 0x34, 0xdd, 0x71, 0x1b, 0xb7, 0x5e, 0xf2, + 0x91, 0x3d, 0xd4, 0x78, 0x24, 0x88, 0x61, 0xcd, 0xae, 0x02, + 0xeb, 0x47, 0x2d, 0x81, 0x68, 0xc4, 0xa7, 0x0b, 0xe2, 0x4e, + 0x36, 0x9a, 0x73, 0xdf, 0xbc, 0x10, 0xf9, 0x55, 0x3f, 0x93, + 0x7a, 0xd6, 0xb5, 0x19, 0xf0, 0x5c, 0x48, 0xe4, 0x0d, 0xa1, + 0xc2, 0x6e, 0x87, 0x2b, 0x41, 0xed, 0x04, 0xa8, 0xcb, 0x67, + 0x8e, 0x22, 0x5a, 0xf6, 0x1f, 0xb3, 0xd0, 0x7c, 0x95, 0x39, + 0x53, 0xff, 0x16, 0xba, 0xd9, 0x75, 0x9c, 0x30, 0x6c, 0xc0, + 0x29, 0x85, 0xe6, 0x4a, 0xa3, 0x0f, 0x65, 0xc9, 0x20, 0x8c, + 0xef, 0x43, 0xaa, 0x06, 0x7e, 0xd2, 0x3b, 0x97, 0xf4, 0x58, + 0xb1, 0x1d, 0x77, 0xdb, 0x32, 0x9e, 0xfd, 0x51, 0xb8, 0x14, + 0x90, 0x3c, 0xd5, 0x79, 0x1a, 0xb6, 0x5f, 0xf3, 0x99, 0x35, + 0xdc, 0x70, 0x13, 0xbf, 0x56, 0xfa, 0x82, 0x2e, 0xc7, 0x6b, + 0x08, 0xa4, 0x4d, 0xe1, 0x8b, 0x27, 0xce, 0x62, 0x01, 0xad, + 0x44, 0xe8, 0xb4, 0x18, 0xf1, 0x5d, 0x3e, 0x92, 0x7b, 0xd7, + 0xbd, 0x11, 0xf8, 0x54, 0x37, 0x9b, 0x72, 0xde, 0xa6, 0x0a, + 0xe3, 0x4f, 0x2c, 0x80, 0x69, 0xc5, 0xaf, 0x03, 0xea, 0x46, + 0x25, 0x89, 0x60, 0xcc, 0xd8, 0x74, 0x9d, 0x31, 0x52, 0xfe, + 0x17, 0xbb, 0xd1, 0x7d, 0x94, 0x38, 0x5b, 0xf7, 0x1e, 0xb2, + 0xca, 0x66, 0x8f, 0x23, 0x40, 0xec, 0x05, 0xa9, 0xc3, 0x6f, + 0x86, 0x2a, 0x49, 0xe5, 0x0c, 0xa0, 0xfc, 0x50, 0xb9, 0x15, + 0x76, 0xda, 0x33, 0x9f, 0xf5, 0x59, 0xb0, 0x1c, 0x7f, 0xd3, + 0x3a, 0x96, 0xee, 0x42, 0xab, 0x07, 0x64, 0xc8, 0x21, 0x8d, + 0xe7, 0x4b, 0xa2, 0x0e, 0x6d, 0xc1, 0x28, 0x84, 0x00, 0xad, + 0x47, 0xea, 0x8e, 0x23, 0xc9, 0x64, 0x01, 0xac, 0x46, 0xeb, + 0x8f, 0x22, 0xc8, 0x65, 0x02, 0xaf, 0x45, 0xe8, 0x8c, 0x21, + 0xcb, 0x66, 0x03, 0xae, 0x44, 0xe9, 0x8d, 0x20, 0xca, 0x67, + 0x04, 0xa9, 0x43, 0xee, 0x8a, 0x27, 0xcd, 0x60, 0x05, 0xa8, + 0x42, 0xef, 0x8b, 0x26, 0xcc, 0x61, 0x06, 0xab, 0x41, 0xec, + 0x88, 0x25, 0xcf, 0x62, 0x07, 0xaa, 0x40, 0xed, 0x89, 0x24, + 0xce, 0x63, 0x08, 0xa5, 0x4f, 0xe2, 0x86, 0x2b, 0xc1, 0x6c, + 0x09, 0xa4, 0x4e, 0xe3, 0x87, 0x2a, 0xc0, 0x6d, 0x0a, 0xa7, + 0x4d, 0xe0, 0x84, 0x29, 0xc3, 0x6e, 0x0b, 0xa6, 0x4c, 0xe1, + 0x85, 0x28, 0xc2, 0x6f, 0x0c, 0xa1, 0x4b, 0xe6, 0x82, 0x2f, + 0xc5, 0x68, 0x0d, 0xa0, 0x4a, 0xe7, 0x83, 0x2e, 0xc4, 0x69, + 0x0e, 0xa3, 0x49, 0xe4, 0x80, 0x2d, 0xc7, 0x6a, 0x0f, 0xa2, + 0x48, 0xe5, 0x81, 0x2c, 0xc6, 0x6b, 0x10, 0xbd, 0x57, 0xfa, + 0x9e, 0x33, 0xd9, 0x74, 0x11, 0xbc, 0x56, 0xfb, 0x9f, 0x32, + 0xd8, 0x75, 0x12, 0xbf, 0x55, 0xf8, 0x9c, 0x31, 0xdb, 0x76, + 0x13, 0xbe, 0x54, 0xf9, 0x9d, 0x30, 0xda, 0x77, 0x14, 0xb9, + 0x53, 0xfe, 0x9a, 0x37, 0xdd, 0x70, 0x15, 0xb8, 0x52, 0xff, + 0x9b, 0x36, 0xdc, 0x71, 0x16, 0xbb, 0x51, 0xfc, 0x98, 0x35, + 0xdf, 0x72, 0x17, 0xba, 0x50, 0xfd, 0x99, 0x34, 0xde, 0x73, + 0x18, 0xb5, 0x5f, 0xf2, 0x96, 0x3b, 0xd1, 0x7c, 0x19, 0xb4, + 0x5e, 0xf3, 0x97, 0x3a, 0xd0, 0x7d, 0x1a, 0xb7, 0x5d, 0xf0, + 0x94, 0x39, 0xd3, 0x7e, 0x1b, 0xb6, 0x5c, 0xf1, 0x95, 0x38, + 0xd2, 0x7f, 0x1c, 0xb1, 0x5b, 0xf6, 0x92, 0x3f, 0xd5, 0x78, + 0x1d, 0xb0, 0x5a, 0xf7, 0x93, 0x3e, 0xd4, 0x79, 0x1e, 0xb3, + 0x59, 0xf4, 0x90, 0x3d, 0xd7, 0x7a, 0x1f, 0xb2, 0x58, 0xf5, + 0x91, 0x3c, 0xd6, 0x7b, 0x00, 0xae, 0x41, 0xef, 0x82, 0x2c, + 0xc3, 0x6d, 0x19, 0xb7, 0x58, 0xf6, 0x9b, 0x35, 0xda, 0x74, + 0x32, 0x9c, 0x73, 0xdd, 0xb0, 0x1e, 0xf1, 0x5f, 0x2b, 0x85, + 0x6a, 0xc4, 0xa9, 0x07, 0xe8, 0x46, 0x64, 0xca, 0x25, 0x8b, + 0xe6, 0x48, 0xa7, 0x09, 0x7d, 0xd3, 0x3c, 0x92, 0xff, 0x51, + 0xbe, 0x10, 0x56, 0xf8, 0x17, 0xb9, 0xd4, 0x7a, 0x95, 0x3b, + 0x4f, 0xe1, 0x0e, 0xa0, 0xcd, 0x63, 0x8c, 0x22, 0xc8, 0x66, + 0x89, 0x27, 0x4a, 0xe4, 0x0b, 0xa5, 0xd1, 0x7f, 0x90, 0x3e, + 0x53, 0xfd, 0x12, 0xbc, 0xfa, 0x54, 0xbb, 0x15, 0x78, 0xd6, + 0x39, 0x97, 0xe3, 0x4d, 0xa2, 0x0c, 0x61, 0xcf, 0x20, 0x8e, + 0xac, 0x02, 0xed, 0x43, 0x2e, 0x80, 0x6f, 0xc1, 0xb5, 0x1b, + 0xf4, 0x5a, 0x37, 0x99, 0x76, 0xd8, 0x9e, 0x30, 0xdf, 0x71, + 0x1c, 0xb2, 0x5d, 0xf3, 0x87, 0x29, 0xc6, 0x68, 0x05, 0xab, + 0x44, 0xea, 0x8d, 0x23, 0xcc, 0x62, 0x0f, 0xa1, 0x4e, 0xe0, + 0x94, 0x3a, 0xd5, 0x7b, 0x16, 0xb8, 0x57, 0xf9, 0xbf, 0x11, + 0xfe, 0x50, 0x3d, 0x93, 0x7c, 0xd2, 0xa6, 0x08, 0xe7, 0x49, + 0x24, 0x8a, 0x65, 0xcb, 0xe9, 0x47, 0xa8, 0x06, 0x6b, 0xc5, + 0x2a, 0x84, 0xf0, 0x5e, 0xb1, 0x1f, 0x72, 0xdc, 0x33, 0x9d, + 0xdb, 0x75, 0x9a, 0x34, 0x59, 0xf7, 0x18, 0xb6, 0xc2, 0x6c, + 0x83, 0x2d, 0x40, 0xee, 0x01, 0xaf, 0x45, 0xeb, 0x04, 0xaa, + 0xc7, 0x69, 0x86, 0x28, 0x5c, 0xf2, 0x1d, 0xb3, 0xde, 0x70, + 0x9f, 0x31, 0x77, 0xd9, 0x36, 0x98, 0xf5, 0x5b, 0xb4, 0x1a, + 0x6e, 0xc0, 0x2f, 0x81, 0xec, 0x42, 0xad, 0x03, 0x21, 0x8f, + 0x60, 0xce, 0xa3, 0x0d, 0xe2, 0x4c, 0x38, 0x96, 0x79, 0xd7, + 0xba, 0x14, 0xfb, 0x55, 0x13, 0xbd, 0x52, 0xfc, 0x91, 0x3f, + 0xd0, 0x7e, 0x0a, 0xa4, 0x4b, 0xe5, 0x88, 0x26, 0xc9, 0x67, + 0x00, 0xaf, 0x43, 0xec, 0x86, 0x29, 0xc5, 0x6a, 0x11, 0xbe, + 0x52, 0xfd, 0x97, 0x38, 0xd4, 0x7b, 0x22, 0x8d, 0x61, 0xce, + 0xa4, 0x0b, 0xe7, 0x48, 0x33, 0x9c, 0x70, 0xdf, 0xb5, 0x1a, + 0xf6, 0x59, 0x44, 0xeb, 0x07, 0xa8, 0xc2, 0x6d, 0x81, 0x2e, + 0x55, 0xfa, 0x16, 0xb9, 0xd3, 0x7c, 0x90, 0x3f, 0x66, 0xc9, + 0x25, 0x8a, 0xe0, 0x4f, 0xa3, 0x0c, 0x77, 0xd8, 0x34, 0x9b, + 0xf1, 0x5e, 0xb2, 0x1d, 0x88, 0x27, 0xcb, 0x64, 0x0e, 0xa1, + 0x4d, 0xe2, 0x99, 0x36, 0xda, 0x75, 0x1f, 0xb0, 0x5c, 0xf3, + 0xaa, 0x05, 0xe9, 0x46, 0x2c, 0x83, 0x6f, 0xc0, 0xbb, 0x14, + 0xf8, 0x57, 0x3d, 0x92, 0x7e, 0xd1, 0xcc, 0x63, 0x8f, 0x20, + 0x4a, 0xe5, 0x09, 0xa6, 0xdd, 0x72, 0x9e, 0x31, 0x5b, 0xf4, + 0x18, 0xb7, 0xee, 0x41, 0xad, 0x02, 0x68, 0xc7, 0x2b, 0x84, + 0xff, 0x50, 0xbc, 0x13, 0x79, 0xd6, 0x3a, 0x95, 0x0d, 0xa2, + 0x4e, 0xe1, 0x8b, 0x24, 0xc8, 0x67, 0x1c, 0xb3, 0x5f, 0xf0, + 0x9a, 0x35, 0xd9, 0x76, 0x2f, 0x80, 0x6c, 0xc3, 0xa9, 0x06, + 0xea, 0x45, 0x3e, 0x91, 0x7d, 0xd2, 0xb8, 0x17, 0xfb, 0x54, + 0x49, 0xe6, 0x0a, 0xa5, 0xcf, 0x60, 0x8c, 0x23, 0x58, 0xf7, + 0x1b, 0xb4, 0xde, 0x71, 0x9d, 0x32, 0x6b, 0xc4, 0x28, 0x87, + 0xed, 0x42, 0xae, 0x01, 0x7a, 0xd5, 0x39, 0x96, 0xfc, 0x53, + 0xbf, 0x10, 0x85, 0x2a, 0xc6, 0x69, 0x03, 0xac, 0x40, 0xef, + 0x94, 0x3b, 0xd7, 0x78, 0x12, 0xbd, 0x51, 0xfe, 0xa7, 0x08, + 0xe4, 0x4b, 0x21, 0x8e, 0x62, 0xcd, 0xb6, 0x19, 0xf5, 0x5a, + 0x30, 0x9f, 0x73, 0xdc, 0xc1, 0x6e, 0x82, 0x2d, 0x47, 0xe8, + 0x04, 0xab, 0xd0, 0x7f, 0x93, 0x3c, 0x56, 0xf9, 0x15, 0xba, + 0xe3, 0x4c, 0xa0, 0x0f, 0x65, 0xca, 0x26, 0x89, 0xf2, 0x5d, + 0xb1, 0x1e, 0x74, 0xdb, 0x37, 0x98, 0x00, 0xb0, 0x7d, 0xcd, + 0xfa, 0x4a, 0x87, 0x37, 0xe9, 0x59, 0x94, 0x24, 0x13, 0xa3, + 0x6e, 0xde, 0xcf, 0x7f, 0xb2, 0x02, 0x35, 0x85, 0x48, 0xf8, + 0x26, 0x96, 0x5b, 0xeb, 0xdc, 0x6c, 0xa1, 0x11, 0x83, 0x33, + 0xfe, 0x4e, 0x79, 0xc9, 0x04, 0xb4, 0x6a, 0xda, 0x17, 0xa7, + 0x90, 0x20, 0xed, 0x5d, 0x4c, 0xfc, 0x31, 0x81, 0xb6, 0x06, + 0xcb, 0x7b, 0xa5, 0x15, 0xd8, 0x68, 0x5f, 0xef, 0x22, 0x92, + 0x1b, 0xab, 0x66, 0xd6, 0xe1, 0x51, 0x9c, 0x2c, 0xf2, 0x42, + 0x8f, 0x3f, 0x08, 0xb8, 0x75, 0xc5, 0xd4, 0x64, 0xa9, 0x19, + 0x2e, 0x9e, 0x53, 0xe3, 0x3d, 0x8d, 0x40, 0xf0, 0xc7, 0x77, + 0xba, 0x0a, 0x98, 0x28, 0xe5, 0x55, 0x62, 0xd2, 0x1f, 0xaf, + 0x71, 0xc1, 0x0c, 0xbc, 0x8b, 0x3b, 0xf6, 0x46, 0x57, 0xe7, + 0x2a, 0x9a, 0xad, 0x1d, 0xd0, 0x60, 0xbe, 0x0e, 0xc3, 0x73, + 0x44, 0xf4, 0x39, 0x89, 0x36, 0x86, 0x4b, 0xfb, 0xcc, 0x7c, + 0xb1, 0x01, 0xdf, 0x6f, 0xa2, 0x12, 0x25, 0x95, 0x58, 0xe8, + 0xf9, 0x49, 0x84, 0x34, 0x03, 0xb3, 0x7e, 0xce, 0x10, 0xa0, + 0x6d, 0xdd, 0xea, 0x5a, 0x97, 0x27, 0xb5, 0x05, 0xc8, 0x78, + 0x4f, 0xff, 0x32, 0x82, 0x5c, 0xec, 0x21, 0x91, 0xa6, 0x16, + 0xdb, 0x6b, 0x7a, 0xca, 0x07, 0xb7, 0x80, 0x30, 0xfd, 0x4d, + 0x93, 0x23, 0xee, 0x5e, 0x69, 0xd9, 0x14, 0xa4, 0x2d, 0x9d, + 0x50, 0xe0, 0xd7, 0x67, 0xaa, 0x1a, 0xc4, 0x74, 0xb9, 0x09, + 0x3e, 0x8e, 0x43, 0xf3, 0xe2, 0x52, 0x9f, 0x2f, 0x18, 0xa8, + 0x65, 0xd5, 0x0b, 0xbb, 0x76, 0xc6, 0xf1, 0x41, 0x8c, 0x3c, + 0xae, 0x1e, 0xd3, 0x63, 0x54, 0xe4, 0x29, 0x99, 0x47, 0xf7, + 0x3a, 0x8a, 0xbd, 0x0d, 0xc0, 0x70, 0x61, 0xd1, 0x1c, 0xac, + 0x9b, 0x2b, 0xe6, 0x56, 0x88, 0x38, 0xf5, 0x45, 0x72, 0xc2, + 0x0f, 0xbf, 0x00, 0xb1, 0x7f, 0xce, 0xfe, 0x4f, 0x81, 0x30, + 0xe1, 0x50, 0x9e, 0x2f, 0x1f, 0xae, 0x60, 0xd1, 0xdf, 0x6e, + 0xa0, 0x11, 0x21, 0x90, 0x5e, 0xef, 0x3e, 0x8f, 0x41, 0xf0, + 0xc0, 0x71, 0xbf, 0x0e, 0xa3, 0x12, 0xdc, 0x6d, 0x5d, 0xec, + 0x22, 0x93, 0x42, 0xf3, 0x3d, 0x8c, 0xbc, 0x0d, 0xc3, 0x72, + 0x7c, 0xcd, 0x03, 0xb2, 0x82, 0x33, 0xfd, 0x4c, 0x9d, 0x2c, + 0xe2, 0x53, 0x63, 0xd2, 0x1c, 0xad, 0x5b, 0xea, 0x24, 0x95, + 0xa5, 0x14, 0xda, 0x6b, 0xba, 0x0b, 0xc5, 0x74, 0x44, 0xf5, + 0x3b, 0x8a, 0x84, 0x35, 0xfb, 0x4a, 0x7a, 0xcb, 0x05, 0xb4, + 0x65, 0xd4, 0x1a, 0xab, 0x9b, 0x2a, 0xe4, 0x55, 0xf8, 0x49, + 0x87, 0x36, 0x06, 0xb7, 0x79, 0xc8, 0x19, 0xa8, 0x66, 0xd7, + 0xe7, 0x56, 0x98, 0x29, 0x27, 0x96, 0x58, 0xe9, 0xd9, 0x68, + 0xa6, 0x17, 0xc6, 0x77, 0xb9, 0x08, 0x38, 0x89, 0x47, 0xf6, + 0xb6, 0x07, 0xc9, 0x78, 0x48, 0xf9, 0x37, 0x86, 0x57, 0xe6, + 0x28, 0x99, 0xa9, 0x18, 0xd6, 0x67, 0x69, 0xd8, 0x16, 0xa7, + 0x97, 0x26, 0xe8, 0x59, 0x88, 0x39, 0xf7, 0x46, 0x76, 0xc7, + 0x09, 0xb8, 0x15, 0xa4, 0x6a, 0xdb, 0xeb, 0x5a, 0x94, 0x25, + 0xf4, 0x45, 0x8b, 0x3a, 0x0a, 0xbb, 0x75, 0xc4, 0xca, 0x7b, + 0xb5, 0x04, 0x34, 0x85, 0x4b, 0xfa, 0x2b, 0x9a, 0x54, 0xe5, + 0xd5, 0x64, 0xaa, 0x1b, 0xed, 0x5c, 0x92, 0x23, 0x13, 0xa2, + 0x6c, 0xdd, 0x0c, 0xbd, 0x73, 0xc2, 0xf2, 0x43, 0x8d, 0x3c, + 0x32, 0x83, 0x4d, 0xfc, 0xcc, 0x7d, 0xb3, 0x02, 0xd3, 0x62, + 0xac, 0x1d, 0x2d, 0x9c, 0x52, 0xe3, 0x4e, 0xff, 0x31, 0x80, + 0xb0, 0x01, 0xcf, 0x7e, 0xaf, 0x1e, 0xd0, 0x61, 0x51, 0xe0, + 0x2e, 0x9f, 0x91, 0x20, 0xee, 0x5f, 0x6f, 0xde, 0x10, 0xa1, + 0x70, 0xc1, 0x0f, 0xbe, 0x8e, 0x3f, 0xf1, 0x40, 0x00, 0xb2, + 0x79, 0xcb, 0xf2, 0x40, 0x8b, 0x39, 0xf9, 0x4b, 0x80, 0x32, + 0x0b, 0xb9, 0x72, 0xc0, 0xef, 0x5d, 0x96, 0x24, 0x1d, 0xaf, + 0x64, 0xd6, 0x16, 0xa4, 0x6f, 0xdd, 0xe4, 0x56, 0x9d, 0x2f, + 0xc3, 0x71, 0xba, 0x08, 0x31, 0x83, 0x48, 0xfa, 0x3a, 0x88, + 0x43, 0xf1, 0xc8, 0x7a, 0xb1, 0x03, 0x2c, 0x9e, 0x55, 0xe7, + 0xde, 0x6c, 0xa7, 0x15, 0xd5, 0x67, 0xac, 0x1e, 0x27, 0x95, + 0x5e, 0xec, 0x9b, 0x29, 0xe2, 0x50, 0x69, 0xdb, 0x10, 0xa2, + 0x62, 0xd0, 0x1b, 0xa9, 0x90, 0x22, 0xe9, 0x5b, 0x74, 0xc6, + 0x0d, 0xbf, 0x86, 0x34, 0xff, 0x4d, 0x8d, 0x3f, 0xf4, 0x46, + 0x7f, 0xcd, 0x06, 0xb4, 0x58, 0xea, 0x21, 0x93, 0xaa, 0x18, + 0xd3, 0x61, 0xa1, 0x13, 0xd8, 0x6a, 0x53, 0xe1, 0x2a, 0x98, + 0xb7, 0x05, 0xce, 0x7c, 0x45, 0xf7, 0x3c, 0x8e, 0x4e, 0xfc, + 0x37, 0x85, 0xbc, 0x0e, 0xc5, 0x77, 0x2b, 0x99, 0x52, 0xe0, + 0xd9, 0x6b, 0xa0, 0x12, 0xd2, 0x60, 0xab, 0x19, 0x20, 0x92, + 0x59, 0xeb, 0xc4, 0x76, 0xbd, 0x0f, 0x36, 0x84, 0x4f, 0xfd, + 0x3d, 0x8f, 0x44, 0xf6, 0xcf, 0x7d, 0xb6, 0x04, 0xe8, 0x5a, + 0x91, 0x23, 0x1a, 0xa8, 0x63, 0xd1, 0x11, 0xa3, 0x68, 0xda, + 0xe3, 0x51, 0x9a, 0x28, 0x07, 0xb5, 0x7e, 0xcc, 0xf5, 0x47, + 0x8c, 0x3e, 0xfe, 0x4c, 0x87, 0x35, 0x0c, 0xbe, 0x75, 0xc7, + 0xb0, 0x02, 0xc9, 0x7b, 0x42, 0xf0, 0x3b, 0x89, 0x49, 0xfb, + 0x30, 0x82, 0xbb, 0x09, 0xc2, 0x70, 0x5f, 0xed, 0x26, 0x94, + 0xad, 0x1f, 0xd4, 0x66, 0xa6, 0x14, 0xdf, 0x6d, 0x54, 0xe6, + 0x2d, 0x9f, 0x73, 0xc1, 0x0a, 0xb8, 0x81, 0x33, 0xf8, 0x4a, + 0x8a, 0x38, 0xf3, 0x41, 0x78, 0xca, 0x01, 0xb3, 0x9c, 0x2e, + 0xe5, 0x57, 0x6e, 0xdc, 0x17, 0xa5, 0x65, 0xd7, 0x1c, 0xae, + 0x97, 0x25, 0xee, 0x5c, 0x00, 0xb3, 0x7b, 0xc8, 0xf6, 0x45, + 0x8d, 0x3e, 0xf1, 0x42, 0x8a, 0x39, 0x07, 0xb4, 0x7c, 0xcf, + 0xff, 0x4c, 0x84, 0x37, 0x09, 0xba, 0x72, 0xc1, 0x0e, 0xbd, + 0x75, 0xc6, 0xf8, 0x4b, 0x83, 0x30, 0xe3, 0x50, 0x98, 0x2b, + 0x15, 0xa6, 0x6e, 0xdd, 0x12, 0xa1, 0x69, 0xda, 0xe4, 0x57, + 0x9f, 0x2c, 0x1c, 0xaf, 0x67, 0xd4, 0xea, 0x59, 0x91, 0x22, + 0xed, 0x5e, 0x96, 0x25, 0x1b, 0xa8, 0x60, 0xd3, 0xdb, 0x68, + 0xa0, 0x13, 0x2d, 0x9e, 0x56, 0xe5, 0x2a, 0x99, 0x51, 0xe2, + 0xdc, 0x6f, 0xa7, 0x14, 0x24, 0x97, 0x5f, 0xec, 0xd2, 0x61, + 0xa9, 0x1a, 0xd5, 0x66, 0xae, 0x1d, 0x23, 0x90, 0x58, 0xeb, + 0x38, 0x8b, 0x43, 0xf0, 0xce, 0x7d, 0xb5, 0x06, 0xc9, 0x7a, + 0xb2, 0x01, 0x3f, 0x8c, 0x44, 0xf7, 0xc7, 0x74, 0xbc, 0x0f, + 0x31, 0x82, 0x4a, 0xf9, 0x36, 0x85, 0x4d, 0xfe, 0xc0, 0x73, + 0xbb, 0x08, 0xab, 0x18, 0xd0, 0x63, 0x5d, 0xee, 0x26, 0x95, + 0x5a, 0xe9, 0x21, 0x92, 0xac, 0x1f, 0xd7, 0x64, 0x54, 0xe7, + 0x2f, 0x9c, 0xa2, 0x11, 0xd9, 0x6a, 0xa5, 0x16, 0xde, 0x6d, + 0x53, 0xe0, 0x28, 0x9b, 0x48, 0xfb, 0x33, 0x80, 0xbe, 0x0d, + 0xc5, 0x76, 0xb9, 0x0a, 0xc2, 0x71, 0x4f, 0xfc, 0x34, 0x87, + 0xb7, 0x04, 0xcc, 0x7f, 0x41, 0xf2, 0x3a, 0x89, 0x46, 0xf5, + 0x3d, 0x8e, 0xb0, 0x03, 0xcb, 0x78, 0x70, 0xc3, 0x0b, 0xb8, + 0x86, 0x35, 0xfd, 0x4e, 0x81, 0x32, 0xfa, 0x49, 0x77, 0xc4, + 0x0c, 0xbf, 0x8f, 0x3c, 0xf4, 0x47, 0x79, 0xca, 0x02, 0xb1, + 0x7e, 0xcd, 0x05, 0xb6, 0x88, 0x3b, 0xf3, 0x40, 0x93, 0x20, + 0xe8, 0x5b, 0x65, 0xd6, 0x1e, 0xad, 0x62, 0xd1, 0x19, 0xaa, + 0x94, 0x27, 0xef, 0x5c, 0x6c, 0xdf, 0x17, 0xa4, 0x9a, 0x29, + 0xe1, 0x52, 0x9d, 0x2e, 0xe6, 0x55, 0x6b, 0xd8, 0x10, 0xa3, + 0x00, 0xb4, 0x75, 0xc1, 0xea, 0x5e, 0x9f, 0x2b, 0xc9, 0x7d, + 0xbc, 0x08, 0x23, 0x97, 0x56, 0xe2, 0x8f, 0x3b, 0xfa, 0x4e, + 0x65, 0xd1, 0x10, 0xa4, 0x46, 0xf2, 0x33, 0x87, 0xac, 0x18, + 0xd9, 0x6d, 0x03, 0xb7, 0x76, 0xc2, 0xe9, 0x5d, 0x9c, 0x28, + 0xca, 0x7e, 0xbf, 0x0b, 0x20, 0x94, 0x55, 0xe1, 0x8c, 0x38, + 0xf9, 0x4d, 0x66, 0xd2, 0x13, 0xa7, 0x45, 0xf1, 0x30, 0x84, + 0xaf, 0x1b, 0xda, 0x6e, 0x06, 0xb2, 0x73, 0xc7, 0xec, 0x58, + 0x99, 0x2d, 0xcf, 0x7b, 0xba, 0x0e, 0x25, 0x91, 0x50, 0xe4, + 0x89, 0x3d, 0xfc, 0x48, 0x63, 0xd7, 0x16, 0xa2, 0x40, 0xf4, + 0x35, 0x81, 0xaa, 0x1e, 0xdf, 0x6b, 0x05, 0xb1, 0x70, 0xc4, + 0xef, 0x5b, 0x9a, 0x2e, 0xcc, 0x78, 0xb9, 0x0d, 0x26, 0x92, + 0x53, 0xe7, 0x8a, 0x3e, 0xff, 0x4b, 0x60, 0xd4, 0x15, 0xa1, + 0x43, 0xf7, 0x36, 0x82, 0xa9, 0x1d, 0xdc, 0x68, 0x0c, 0xb8, + 0x79, 0xcd, 0xe6, 0x52, 0x93, 0x27, 0xc5, 0x71, 0xb0, 0x04, + 0x2f, 0x9b, 0x5a, 0xee, 0x83, 0x37, 0xf6, 0x42, 0x69, 0xdd, + 0x1c, 0xa8, 0x4a, 0xfe, 0x3f, 0x8b, 0xa0, 0x14, 0xd5, 0x61, + 0x0f, 0xbb, 0x7a, 0xce, 0xe5, 0x51, 0x90, 0x24, 0xc6, 0x72, + 0xb3, 0x07, 0x2c, 0x98, 0x59, 0xed, 0x80, 0x34, 0xf5, 0x41, + 0x6a, 0xde, 0x1f, 0xab, 0x49, 0xfd, 0x3c, 0x88, 0xa3, 0x17, + 0xd6, 0x62, 0x0a, 0xbe, 0x7f, 0xcb, 0xe0, 0x54, 0x95, 0x21, + 0xc3, 0x77, 0xb6, 0x02, 0x29, 0x9d, 0x5c, 0xe8, 0x85, 0x31, + 0xf0, 0x44, 0x6f, 0xdb, 0x1a, 0xae, 0x4c, 0xf8, 0x39, 0x8d, + 0xa6, 0x12, 0xd3, 0x67, 0x09, 0xbd, 0x7c, 0xc8, 0xe3, 0x57, + 0x96, 0x22, 0xc0, 0x74, 0xb5, 0x01, 0x2a, 0x9e, 0x5f, 0xeb, + 0x86, 0x32, 0xf3, 0x47, 0x6c, 0xd8, 0x19, 0xad, 0x4f, 0xfb, + 0x3a, 0x8e, 0xa5, 0x11, 0xd0, 0x64, 0x00, 0xb5, 0x77, 0xc2, + 0xee, 0x5b, 0x99, 0x2c, 0xc1, 0x74, 0xb6, 0x03, 0x2f, 0x9a, + 0x58, 0xed, 0x9f, 0x2a, 0xe8, 0x5d, 0x71, 0xc4, 0x06, 0xb3, + 0x5e, 0xeb, 0x29, 0x9c, 0xb0, 0x05, 0xc7, 0x72, 0x23, 0x96, + 0x54, 0xe1, 0xcd, 0x78, 0xba, 0x0f, 0xe2, 0x57, 0x95, 0x20, + 0x0c, 0xb9, 0x7b, 0xce, 0xbc, 0x09, 0xcb, 0x7e, 0x52, 0xe7, + 0x25, 0x90, 0x7d, 0xc8, 0x0a, 0xbf, 0x93, 0x26, 0xe4, 0x51, + 0x46, 0xf3, 0x31, 0x84, 0xa8, 0x1d, 0xdf, 0x6a, 0x87, 0x32, + 0xf0, 0x45, 0x69, 0xdc, 0x1e, 0xab, 0xd9, 0x6c, 0xae, 0x1b, + 0x37, 0x82, 0x40, 0xf5, 0x18, 0xad, 0x6f, 0xda, 0xf6, 0x43, + 0x81, 0x34, 0x65, 0xd0, 0x12, 0xa7, 0x8b, 0x3e, 0xfc, 0x49, + 0xa4, 0x11, 0xd3, 0x66, 0x4a, 0xff, 0x3d, 0x88, 0xfa, 0x4f, + 0x8d, 0x38, 0x14, 0xa1, 0x63, 0xd6, 0x3b, 0x8e, 0x4c, 0xf9, + 0xd5, 0x60, 0xa2, 0x17, 0x8c, 0x39, 0xfb, 0x4e, 0x62, 0xd7, + 0x15, 0xa0, 0x4d, 0xf8, 0x3a, 0x8f, 0xa3, 0x16, 0xd4, 0x61, + 0x13, 0xa6, 0x64, 0xd1, 0xfd, 0x48, 0x8a, 0x3f, 0xd2, 0x67, + 0xa5, 0x10, 0x3c, 0x89, 0x4b, 0xfe, 0xaf, 0x1a, 0xd8, 0x6d, + 0x41, 0xf4, 0x36, 0x83, 0x6e, 0xdb, 0x19, 0xac, 0x80, 0x35, + 0xf7, 0x42, 0x30, 0x85, 0x47, 0xf2, 0xde, 0x6b, 0xa9, 0x1c, + 0xf1, 0x44, 0x86, 0x33, 0x1f, 0xaa, 0x68, 0xdd, 0xca, 0x7f, + 0xbd, 0x08, 0x24, 0x91, 0x53, 0xe6, 0x0b, 0xbe, 0x7c, 0xc9, + 0xe5, 0x50, 0x92, 0x27, 0x55, 0xe0, 0x22, 0x97, 0xbb, 0x0e, + 0xcc, 0x79, 0x94, 0x21, 0xe3, 0x56, 0x7a, 0xcf, 0x0d, 0xb8, + 0xe9, 0x5c, 0x9e, 0x2b, 0x07, 0xb2, 0x70, 0xc5, 0x28, 0x9d, + 0x5f, 0xea, 0xc6, 0x73, 0xb1, 0x04, 0x76, 0xc3, 0x01, 0xb4, + 0x98, 0x2d, 0xef, 0x5a, 0xb7, 0x02, 0xc0, 0x75, 0x59, 0xec, + 0x2e, 0x9b, 0x00, 0xb6, 0x71, 0xc7, 0xe2, 0x54, 0x93, 0x25, + 0xd9, 0x6f, 0xa8, 0x1e, 0x3b, 0x8d, 0x4a, 0xfc, 0xaf, 0x19, + 0xde, 0x68, 0x4d, 0xfb, 0x3c, 0x8a, 0x76, 0xc0, 0x07, 0xb1, + 0x94, 0x22, 0xe5, 0x53, 0x43, 0xf5, 0x32, 0x84, 0xa1, 0x17, + 0xd0, 0x66, 0x9a, 0x2c, 0xeb, 0x5d, 0x78, 0xce, 0x09, 0xbf, + 0xec, 0x5a, 0x9d, 0x2b, 0x0e, 0xb8, 0x7f, 0xc9, 0x35, 0x83, + 0x44, 0xf2, 0xd7, 0x61, 0xa6, 0x10, 0x86, 0x30, 0xf7, 0x41, + 0x64, 0xd2, 0x15, 0xa3, 0x5f, 0xe9, 0x2e, 0x98, 0xbd, 0x0b, + 0xcc, 0x7a, 0x29, 0x9f, 0x58, 0xee, 0xcb, 0x7d, 0xba, 0x0c, + 0xf0, 0x46, 0x81, 0x37, 0x12, 0xa4, 0x63, 0xd5, 0xc5, 0x73, + 0xb4, 0x02, 0x27, 0x91, 0x56, 0xe0, 0x1c, 0xaa, 0x6d, 0xdb, + 0xfe, 0x48, 0x8f, 0x39, 0x6a, 0xdc, 0x1b, 0xad, 0x88, 0x3e, + 0xf9, 0x4f, 0xb3, 0x05, 0xc2, 0x74, 0x51, 0xe7, 0x20, 0x96, + 0x11, 0xa7, 0x60, 0xd6, 0xf3, 0x45, 0x82, 0x34, 0xc8, 0x7e, + 0xb9, 0x0f, 0x2a, 0x9c, 0x5b, 0xed, 0xbe, 0x08, 0xcf, 0x79, + 0x5c, 0xea, 0x2d, 0x9b, 0x67, 0xd1, 0x16, 0xa0, 0x85, 0x33, + 0xf4, 0x42, 0x52, 0xe4, 0x23, 0x95, 0xb0, 0x06, 0xc1, 0x77, + 0x8b, 0x3d, 0xfa, 0x4c, 0x69, 0xdf, 0x18, 0xae, 0xfd, 0x4b, + 0x8c, 0x3a, 0x1f, 0xa9, 0x6e, 0xd8, 0x24, 0x92, 0x55, 0xe3, + 0xc6, 0x70, 0xb7, 0x01, 0x97, 0x21, 0xe6, 0x50, 0x75, 0xc3, + 0x04, 0xb2, 0x4e, 0xf8, 0x3f, 0x89, 0xac, 0x1a, 0xdd, 0x6b, + 0x38, 0x8e, 0x49, 0xff, 0xda, 0x6c, 0xab, 0x1d, 0xe1, 0x57, + 0x90, 0x26, 0x03, 0xb5, 0x72, 0xc4, 0xd4, 0x62, 0xa5, 0x13, + 0x36, 0x80, 0x47, 0xf1, 0x0d, 0xbb, 0x7c, 0xca, 0xef, 0x59, + 0x9e, 0x28, 0x7b, 0xcd, 0x0a, 0xbc, 0x99, 0x2f, 0xe8, 0x5e, + 0xa2, 0x14, 0xd3, 0x65, 0x40, 0xf6, 0x31, 0x87, 0x00, 0xb7, + 0x73, 0xc4, 0xe6, 0x51, 0x95, 0x22, 0xd1, 0x66, 0xa2, 0x15, + 0x37, 0x80, 0x44, 0xf3, 0xbf, 0x08, 0xcc, 0x7b, 0x59, 0xee, + 0x2a, 0x9d, 0x6e, 0xd9, 0x1d, 0xaa, 0x88, 0x3f, 0xfb, 0x4c, + 0x63, 0xd4, 0x10, 0xa7, 0x85, 0x32, 0xf6, 0x41, 0xb2, 0x05, + 0xc1, 0x76, 0x54, 0xe3, 0x27, 0x90, 0xdc, 0x6b, 0xaf, 0x18, + 0x3a, 0x8d, 0x49, 0xfe, 0x0d, 0xba, 0x7e, 0xc9, 0xeb, 0x5c, + 0x98, 0x2f, 0xc6, 0x71, 0xb5, 0x02, 0x20, 0x97, 0x53, 0xe4, + 0x17, 0xa0, 0x64, 0xd3, 0xf1, 0x46, 0x82, 0x35, 0x79, 0xce, + 0x0a, 0xbd, 0x9f, 0x28, 0xec, 0x5b, 0xa8, 0x1f, 0xdb, 0x6c, + 0x4e, 0xf9, 0x3d, 0x8a, 0xa5, 0x12, 0xd6, 0x61, 0x43, 0xf4, + 0x30, 0x87, 0x74, 0xc3, 0x07, 0xb0, 0x92, 0x25, 0xe1, 0x56, + 0x1a, 0xad, 0x69, 0xde, 0xfc, 0x4b, 0x8f, 0x38, 0xcb, 0x7c, + 0xb8, 0x0f, 0x2d, 0x9a, 0x5e, 0xe9, 0x91, 0x26, 0xe2, 0x55, + 0x77, 0xc0, 0x04, 0xb3, 0x40, 0xf7, 0x33, 0x84, 0xa6, 0x11, + 0xd5, 0x62, 0x2e, 0x99, 0x5d, 0xea, 0xc8, 0x7f, 0xbb, 0x0c, + 0xff, 0x48, 0x8c, 0x3b, 0x19, 0xae, 0x6a, 0xdd, 0xf2, 0x45, + 0x81, 0x36, 0x14, 0xa3, 0x67, 0xd0, 0x23, 0x94, 0x50, 0xe7, + 0xc5, 0x72, 0xb6, 0x01, 0x4d, 0xfa, 0x3e, 0x89, 0xab, 0x1c, + 0xd8, 0x6f, 0x9c, 0x2b, 0xef, 0x58, 0x7a, 0xcd, 0x09, 0xbe, + 0x57, 0xe0, 0x24, 0x93, 0xb1, 0x06, 0xc2, 0x75, 0x86, 0x31, + 0xf5, 0x42, 0x60, 0xd7, 0x13, 0xa4, 0xe8, 0x5f, 0x9b, 0x2c, + 0x0e, 0xb9, 0x7d, 0xca, 0x39, 0x8e, 0x4a, 0xfd, 0xdf, 0x68, + 0xac, 0x1b, 0x34, 0x83, 0x47, 0xf0, 0xd2, 0x65, 0xa1, 0x16, + 0xe5, 0x52, 0x96, 0x21, 0x03, 0xb4, 0x70, 0xc7, 0x8b, 0x3c, + 0xf8, 0x4f, 0x6d, 0xda, 0x1e, 0xa9, 0x5a, 0xed, 0x29, 0x9e, + 0xbc, 0x0b, 0xcf, 0x78, 0x00, 0xb8, 0x6d, 0xd5, 0xda, 0x62, + 0xb7, 0x0f, 0xa9, 0x11, 0xc4, 0x7c, 0x73, 0xcb, 0x1e, 0xa6, + 0x4f, 0xf7, 0x22, 0x9a, 0x95, 0x2d, 0xf8, 0x40, 0xe6, 0x5e, + 0x8b, 0x33, 0x3c, 0x84, 0x51, 0xe9, 0x9e, 0x26, 0xf3, 0x4b, + 0x44, 0xfc, 0x29, 0x91, 0x37, 0x8f, 0x5a, 0xe2, 0xed, 0x55, + 0x80, 0x38, 0xd1, 0x69, 0xbc, 0x04, 0x0b, 0xb3, 0x66, 0xde, + 0x78, 0xc0, 0x15, 0xad, 0xa2, 0x1a, 0xcf, 0x77, 0x21, 0x99, + 0x4c, 0xf4, 0xfb, 0x43, 0x96, 0x2e, 0x88, 0x30, 0xe5, 0x5d, + 0x52, 0xea, 0x3f, 0x87, 0x6e, 0xd6, 0x03, 0xbb, 0xb4, 0x0c, + 0xd9, 0x61, 0xc7, 0x7f, 0xaa, 0x12, 0x1d, 0xa5, 0x70, 0xc8, + 0xbf, 0x07, 0xd2, 0x6a, 0x65, 0xdd, 0x08, 0xb0, 0x16, 0xae, + 0x7b, 0xc3, 0xcc, 0x74, 0xa1, 0x19, 0xf0, 0x48, 0x9d, 0x25, + 0x2a, 0x92, 0x47, 0xff, 0x59, 0xe1, 0x34, 0x8c, 0x83, 0x3b, + 0xee, 0x56, 0x42, 0xfa, 0x2f, 0x97, 0x98, 0x20, 0xf5, 0x4d, + 0xeb, 0x53, 0x86, 0x3e, 0x31, 0x89, 0x5c, 0xe4, 0x0d, 0xb5, + 0x60, 0xd8, 0xd7, 0x6f, 0xba, 0x02, 0xa4, 0x1c, 0xc9, 0x71, + 0x7e, 0xc6, 0x13, 0xab, 0xdc, 0x64, 0xb1, 0x09, 0x06, 0xbe, + 0x6b, 0xd3, 0x75, 0xcd, 0x18, 0xa0, 0xaf, 0x17, 0xc2, 0x7a, + 0x93, 0x2b, 0xfe, 0x46, 0x49, 0xf1, 0x24, 0x9c, 0x3a, 0x82, + 0x57, 0xef, 0xe0, 0x58, 0x8d, 0x35, 0x63, 0xdb, 0x0e, 0xb6, + 0xb9, 0x01, 0xd4, 0x6c, 0xca, 0x72, 0xa7, 0x1f, 0x10, 0xa8, + 0x7d, 0xc5, 0x2c, 0x94, 0x41, 0xf9, 0xf6, 0x4e, 0x9b, 0x23, + 0x85, 0x3d, 0xe8, 0x50, 0x5f, 0xe7, 0x32, 0x8a, 0xfd, 0x45, + 0x90, 0x28, 0x27, 0x9f, 0x4a, 0xf2, 0x54, 0xec, 0x39, 0x81, + 0x8e, 0x36, 0xe3, 0x5b, 0xb2, 0x0a, 0xdf, 0x67, 0x68, 0xd0, + 0x05, 0xbd, 0x1b, 0xa3, 0x76, 0xce, 0xc1, 0x79, 0xac, 0x14, + 0x00, 0xb9, 0x6f, 0xd6, 0xde, 0x67, 0xb1, 0x08, 0xa1, 0x18, + 0xce, 0x77, 0x7f, 0xc6, 0x10, 0xa9, 0x5f, 0xe6, 0x30, 0x89, + 0x81, 0x38, 0xee, 0x57, 0xfe, 0x47, 0x91, 0x28, 0x20, 0x99, + 0x4f, 0xf6, 0xbe, 0x07, 0xd1, 0x68, 0x60, 0xd9, 0x0f, 0xb6, + 0x1f, 0xa6, 0x70, 0xc9, 0xc1, 0x78, 0xae, 0x17, 0xe1, 0x58, + 0x8e, 0x37, 0x3f, 0x86, 0x50, 0xe9, 0x40, 0xf9, 0x2f, 0x96, + 0x9e, 0x27, 0xf1, 0x48, 0x61, 0xd8, 0x0e, 0xb7, 0xbf, 0x06, + 0xd0, 0x69, 0xc0, 0x79, 0xaf, 0x16, 0x1e, 0xa7, 0x71, 0xc8, + 0x3e, 0x87, 0x51, 0xe8, 0xe0, 0x59, 0x8f, 0x36, 0x9f, 0x26, + 0xf0, 0x49, 0x41, 0xf8, 0x2e, 0x97, 0xdf, 0x66, 0xb0, 0x09, + 0x01, 0xb8, 0x6e, 0xd7, 0x7e, 0xc7, 0x11, 0xa8, 0xa0, 0x19, + 0xcf, 0x76, 0x80, 0x39, 0xef, 0x56, 0x5e, 0xe7, 0x31, 0x88, + 0x21, 0x98, 0x4e, 0xf7, 0xff, 0x46, 0x90, 0x29, 0xc2, 0x7b, + 0xad, 0x14, 0x1c, 0xa5, 0x73, 0xca, 0x63, 0xda, 0x0c, 0xb5, + 0xbd, 0x04, 0xd2, 0x6b, 0x9d, 0x24, 0xf2, 0x4b, 0x43, 0xfa, + 0x2c, 0x95, 0x3c, 0x85, 0x53, 0xea, 0xe2, 0x5b, 0x8d, 0x34, + 0x7c, 0xc5, 0x13, 0xaa, 0xa2, 0x1b, 0xcd, 0x74, 0xdd, 0x64, + 0xb2, 0x0b, 0x03, 0xba, 0x6c, 0xd5, 0x23, 0x9a, 0x4c, 0xf5, + 0xfd, 0x44, 0x92, 0x2b, 0x82, 0x3b, 0xed, 0x54, 0x5c, 0xe5, + 0x33, 0x8a, 0xa3, 0x1a, 0xcc, 0x75, 0x7d, 0xc4, 0x12, 0xab, + 0x02, 0xbb, 0x6d, 0xd4, 0xdc, 0x65, 0xb3, 0x0a, 0xfc, 0x45, + 0x93, 0x2a, 0x22, 0x9b, 0x4d, 0xf4, 0x5d, 0xe4, 0x32, 0x8b, + 0x83, 0x3a, 0xec, 0x55, 0x1d, 0xa4, 0x72, 0xcb, 0xc3, 0x7a, + 0xac, 0x15, 0xbc, 0x05, 0xd3, 0x6a, 0x62, 0xdb, 0x0d, 0xb4, + 0x42, 0xfb, 0x2d, 0x94, 0x9c, 0x25, 0xf3, 0x4a, 0xe3, 0x5a, + 0x8c, 0x35, 0x3d, 0x84, 0x52, 0xeb, 0x00, 0xba, 0x69, 0xd3, + 0xd2, 0x68, 0xbb, 0x01, 0xb9, 0x03, 0xd0, 0x6a, 0x6b, 0xd1, + 0x02, 0xb8, 0x6f, 0xd5, 0x06, 0xbc, 0xbd, 0x07, 0xd4, 0x6e, + 0xd6, 0x6c, 0xbf, 0x05, 0x04, 0xbe, 0x6d, 0xd7, 0xde, 0x64, + 0xb7, 0x0d, 0x0c, 0xb6, 0x65, 0xdf, 0x67, 0xdd, 0x0e, 0xb4, + 0xb5, 0x0f, 0xdc, 0x66, 0xb1, 0x0b, 0xd8, 0x62, 0x63, 0xd9, + 0x0a, 0xb0, 0x08, 0xb2, 0x61, 0xdb, 0xda, 0x60, 0xb3, 0x09, + 0xa1, 0x1b, 0xc8, 0x72, 0x73, 0xc9, 0x1a, 0xa0, 0x18, 0xa2, + 0x71, 0xcb, 0xca, 0x70, 0xa3, 0x19, 0xce, 0x74, 0xa7, 0x1d, + 0x1c, 0xa6, 0x75, 0xcf, 0x77, 0xcd, 0x1e, 0xa4, 0xa5, 0x1f, + 0xcc, 0x76, 0x7f, 0xc5, 0x16, 0xac, 0xad, 0x17, 0xc4, 0x7e, + 0xc6, 0x7c, 0xaf, 0x15, 0x14, 0xae, 0x7d, 0xc7, 0x10, 0xaa, + 0x79, 0xc3, 0xc2, 0x78, 0xab, 0x11, 0xa9, 0x13, 0xc0, 0x7a, + 0x7b, 0xc1, 0x12, 0xa8, 0x5f, 0xe5, 0x36, 0x8c, 0x8d, 0x37, + 0xe4, 0x5e, 0xe6, 0x5c, 0x8f, 0x35, 0x34, 0x8e, 0x5d, 0xe7, + 0x30, 0x8a, 0x59, 0xe3, 0xe2, 0x58, 0x8b, 0x31, 0x89, 0x33, + 0xe0, 0x5a, 0x5b, 0xe1, 0x32, 0x88, 0x81, 0x3b, 0xe8, 0x52, + 0x53, 0xe9, 0x3a, 0x80, 0x38, 0x82, 0x51, 0xeb, 0xea, 0x50, + 0x83, 0x39, 0xee, 0x54, 0x87, 0x3d, 0x3c, 0x86, 0x55, 0xef, + 0x57, 0xed, 0x3e, 0x84, 0x85, 0x3f, 0xec, 0x56, 0xfe, 0x44, + 0x97, 0x2d, 0x2c, 0x96, 0x45, 0xff, 0x47, 0xfd, 0x2e, 0x94, + 0x95, 0x2f, 0xfc, 0x46, 0x91, 0x2b, 0xf8, 0x42, 0x43, 0xf9, + 0x2a, 0x90, 0x28, 0x92, 0x41, 0xfb, 0xfa, 0x40, 0x93, 0x29, + 0x20, 0x9a, 0x49, 0xf3, 0xf2, 0x48, 0x9b, 0x21, 0x99, 0x23, + 0xf0, 0x4a, 0x4b, 0xf1, 0x22, 0x98, 0x4f, 0xf5, 0x26, 0x9c, + 0x9d, 0x27, 0xf4, 0x4e, 0xf6, 0x4c, 0x9f, 0x25, 0x24, 0x9e, + 0x4d, 0xf7, 0x00, 0xbb, 0x6b, 0xd0, 0xd6, 0x6d, 0xbd, 0x06, + 0xb1, 0x0a, 0xda, 0x61, 0x67, 0xdc, 0x0c, 0xb7, 0x7f, 0xc4, + 0x14, 0xaf, 0xa9, 0x12, 0xc2, 0x79, 0xce, 0x75, 0xa5, 0x1e, + 0x18, 0xa3, 0x73, 0xc8, 0xfe, 0x45, 0x95, 0x2e, 0x28, 0x93, + 0x43, 0xf8, 0x4f, 0xf4, 0x24, 0x9f, 0x99, 0x22, 0xf2, 0x49, + 0x81, 0x3a, 0xea, 0x51, 0x57, 0xec, 0x3c, 0x87, 0x30, 0x8b, + 0x5b, 0xe0, 0xe6, 0x5d, 0x8d, 0x36, 0xe1, 0x5a, 0x8a, 0x31, + 0x37, 0x8c, 0x5c, 0xe7, 0x50, 0xeb, 0x3b, 0x80, 0x86, 0x3d, + 0xed, 0x56, 0x9e, 0x25, 0xf5, 0x4e, 0x48, 0xf3, 0x23, 0x98, + 0x2f, 0x94, 0x44, 0xff, 0xf9, 0x42, 0x92, 0x29, 0x1f, 0xa4, + 0x74, 0xcf, 0xc9, 0x72, 0xa2, 0x19, 0xae, 0x15, 0xc5, 0x7e, + 0x78, 0xc3, 0x13, 0xa8, 0x60, 0xdb, 0x0b, 0xb0, 0xb6, 0x0d, + 0xdd, 0x66, 0xd1, 0x6a, 0xba, 0x01, 0x07, 0xbc, 0x6c, 0xd7, + 0xdf, 0x64, 0xb4, 0x0f, 0x09, 0xb2, 0x62, 0xd9, 0x6e, 0xd5, + 0x05, 0xbe, 0xb8, 0x03, 0xd3, 0x68, 0xa0, 0x1b, 0xcb, 0x70, + 0x76, 0xcd, 0x1d, 0xa6, 0x11, 0xaa, 0x7a, 0xc1, 0xc7, 0x7c, + 0xac, 0x17, 0x21, 0x9a, 0x4a, 0xf1, 0xf7, 0x4c, 0x9c, 0x27, + 0x90, 0x2b, 0xfb, 0x40, 0x46, 0xfd, 0x2d, 0x96, 0x5e, 0xe5, + 0x35, 0x8e, 0x88, 0x33, 0xe3, 0x58, 0xef, 0x54, 0x84, 0x3f, + 0x39, 0x82, 0x52, 0xe9, 0x3e, 0x85, 0x55, 0xee, 0xe8, 0x53, + 0x83, 0x38, 0x8f, 0x34, 0xe4, 0x5f, 0x59, 0xe2, 0x32, 0x89, + 0x41, 0xfa, 0x2a, 0x91, 0x97, 0x2c, 0xfc, 0x47, 0xf0, 0x4b, + 0x9b, 0x20, 0x26, 0x9d, 0x4d, 0xf6, 0xc0, 0x7b, 0xab, 0x10, + 0x16, 0xad, 0x7d, 0xc6, 0x71, 0xca, 0x1a, 0xa1, 0xa7, 0x1c, + 0xcc, 0x77, 0xbf, 0x04, 0xd4, 0x6f, 0x69, 0xd2, 0x02, 0xb9, + 0x0e, 0xb5, 0x65, 0xde, 0xd8, 0x63, 0xb3, 0x08, 0x00, 0xbc, + 0x65, 0xd9, 0xca, 0x76, 0xaf, 0x13, 0x89, 0x35, 0xec, 0x50, + 0x43, 0xff, 0x26, 0x9a, 0x0f, 0xb3, 0x6a, 0xd6, 0xc5, 0x79, + 0xa0, 0x1c, 0x86, 0x3a, 0xe3, 0x5f, 0x4c, 0xf0, 0x29, 0x95, + 0x1e, 0xa2, 0x7b, 0xc7, 0xd4, 0x68, 0xb1, 0x0d, 0x97, 0x2b, + 0xf2, 0x4e, 0x5d, 0xe1, 0x38, 0x84, 0x11, 0xad, 0x74, 0xc8, + 0xdb, 0x67, 0xbe, 0x02, 0x98, 0x24, 0xfd, 0x41, 0x52, 0xee, + 0x37, 0x8b, 0x3c, 0x80, 0x59, 0xe5, 0xf6, 0x4a, 0x93, 0x2f, + 0xb5, 0x09, 0xd0, 0x6c, 0x7f, 0xc3, 0x1a, 0xa6, 0x33, 0x8f, + 0x56, 0xea, 0xf9, 0x45, 0x9c, 0x20, 0xba, 0x06, 0xdf, 0x63, + 0x70, 0xcc, 0x15, 0xa9, 0x22, 0x9e, 0x47, 0xfb, 0xe8, 0x54, + 0x8d, 0x31, 0xab, 0x17, 0xce, 0x72, 0x61, 0xdd, 0x04, 0xb8, + 0x2d, 0x91, 0x48, 0xf4, 0xe7, 0x5b, 0x82, 0x3e, 0xa4, 0x18, + 0xc1, 0x7d, 0x6e, 0xd2, 0x0b, 0xb7, 0x78, 0xc4, 0x1d, 0xa1, + 0xb2, 0x0e, 0xd7, 0x6b, 0xf1, 0x4d, 0x94, 0x28, 0x3b, 0x87, + 0x5e, 0xe2, 0x77, 0xcb, 0x12, 0xae, 0xbd, 0x01, 0xd8, 0x64, + 0xfe, 0x42, 0x9b, 0x27, 0x34, 0x88, 0x51, 0xed, 0x66, 0xda, + 0x03, 0xbf, 0xac, 0x10, 0xc9, 0x75, 0xef, 0x53, 0x8a, 0x36, + 0x25, 0x99, 0x40, 0xfc, 0x69, 0xd5, 0x0c, 0xb0, 0xa3, 0x1f, + 0xc6, 0x7a, 0xe0, 0x5c, 0x85, 0x39, 0x2a, 0x96, 0x4f, 0xf3, + 0x44, 0xf8, 0x21, 0x9d, 0x8e, 0x32, 0xeb, 0x57, 0xcd, 0x71, + 0xa8, 0x14, 0x07, 0xbb, 0x62, 0xde, 0x4b, 0xf7, 0x2e, 0x92, + 0x81, 0x3d, 0xe4, 0x58, 0xc2, 0x7e, 0xa7, 0x1b, 0x08, 0xb4, + 0x6d, 0xd1, 0x5a, 0xe6, 0x3f, 0x83, 0x90, 0x2c, 0xf5, 0x49, + 0xd3, 0x6f, 0xb6, 0x0a, 0x19, 0xa5, 0x7c, 0xc0, 0x55, 0xe9, + 0x30, 0x8c, 0x9f, 0x23, 0xfa, 0x46, 0xdc, 0x60, 0xb9, 0x05, + 0x16, 0xaa, 0x73, 0xcf, 0x00, 0xbd, 0x67, 0xda, 0xce, 0x73, + 0xa9, 0x14, 0x81, 0x3c, 0xe6, 0x5b, 0x4f, 0xf2, 0x28, 0x95, + 0x1f, 0xa2, 0x78, 0xc5, 0xd1, 0x6c, 0xb6, 0x0b, 0x9e, 0x23, + 0xf9, 0x44, 0x50, 0xed, 0x37, 0x8a, 0x3e, 0x83, 0x59, 0xe4, + 0xf0, 0x4d, 0x97, 0x2a, 0xbf, 0x02, 0xd8, 0x65, 0x71, 0xcc, + 0x16, 0xab, 0x21, 0x9c, 0x46, 0xfb, 0xef, 0x52, 0x88, 0x35, + 0xa0, 0x1d, 0xc7, 0x7a, 0x6e, 0xd3, 0x09, 0xb4, 0x7c, 0xc1, + 0x1b, 0xa6, 0xb2, 0x0f, 0xd5, 0x68, 0xfd, 0x40, 0x9a, 0x27, + 0x33, 0x8e, 0x54, 0xe9, 0x63, 0xde, 0x04, 0xb9, 0xad, 0x10, + 0xca, 0x77, 0xe2, 0x5f, 0x85, 0x38, 0x2c, 0x91, 0x4b, 0xf6, + 0x42, 0xff, 0x25, 0x98, 0x8c, 0x31, 0xeb, 0x56, 0xc3, 0x7e, + 0xa4, 0x19, 0x0d, 0xb0, 0x6a, 0xd7, 0x5d, 0xe0, 0x3a, 0x87, + 0x93, 0x2e, 0xf4, 0x49, 0xdc, 0x61, 0xbb, 0x06, 0x12, 0xaf, + 0x75, 0xc8, 0xf8, 0x45, 0x9f, 0x22, 0x36, 0x8b, 0x51, 0xec, + 0x79, 0xc4, 0x1e, 0xa3, 0xb7, 0x0a, 0xd0, 0x6d, 0xe7, 0x5a, + 0x80, 0x3d, 0x29, 0x94, 0x4e, 0xf3, 0x66, 0xdb, 0x01, 0xbc, + 0xa8, 0x15, 0xcf, 0x72, 0xc6, 0x7b, 0xa1, 0x1c, 0x08, 0xb5, + 0x6f, 0xd2, 0x47, 0xfa, 0x20, 0x9d, 0x89, 0x34, 0xee, 0x53, + 0xd9, 0x64, 0xbe, 0x03, 0x17, 0xaa, 0x70, 0xcd, 0x58, 0xe5, + 0x3f, 0x82, 0x96, 0x2b, 0xf1, 0x4c, 0x84, 0x39, 0xe3, 0x5e, + 0x4a, 0xf7, 0x2d, 0x90, 0x05, 0xb8, 0x62, 0xdf, 0xcb, 0x76, + 0xac, 0x11, 0x9b, 0x26, 0xfc, 0x41, 0x55, 0xe8, 0x32, 0x8f, + 0x1a, 0xa7, 0x7d, 0xc0, 0xd4, 0x69, 0xb3, 0x0e, 0xba, 0x07, + 0xdd, 0x60, 0x74, 0xc9, 0x13, 0xae, 0x3b, 0x86, 0x5c, 0xe1, + 0xf5, 0x48, 0x92, 0x2f, 0xa5, 0x18, 0xc2, 0x7f, 0x6b, 0xd6, + 0x0c, 0xb1, 0x24, 0x99, 0x43, 0xfe, 0xea, 0x57, 0x8d, 0x30, + 0x00, 0xbe, 0x61, 0xdf, 0xc2, 0x7c, 0xa3, 0x1d, 0x99, 0x27, + 0xf8, 0x46, 0x5b, 0xe5, 0x3a, 0x84, 0x2f, 0x91, 0x4e, 0xf0, + 0xed, 0x53, 0x8c, 0x32, 0xb6, 0x08, 0xd7, 0x69, 0x74, 0xca, + 0x15, 0xab, 0x5e, 0xe0, 0x3f, 0x81, 0x9c, 0x22, 0xfd, 0x43, + 0xc7, 0x79, 0xa6, 0x18, 0x05, 0xbb, 0x64, 0xda, 0x71, 0xcf, + 0x10, 0xae, 0xb3, 0x0d, 0xd2, 0x6c, 0xe8, 0x56, 0x89, 0x37, + 0x2a, 0x94, 0x4b, 0xf5, 0xbc, 0x02, 0xdd, 0x63, 0x7e, 0xc0, + 0x1f, 0xa1, 0x25, 0x9b, 0x44, 0xfa, 0xe7, 0x59, 0x86, 0x38, + 0x93, 0x2d, 0xf2, 0x4c, 0x51, 0xef, 0x30, 0x8e, 0x0a, 0xb4, + 0x6b, 0xd5, 0xc8, 0x76, 0xa9, 0x17, 0xe2, 0x5c, 0x83, 0x3d, + 0x20, 0x9e, 0x41, 0xff, 0x7b, 0xc5, 0x1a, 0xa4, 0xb9, 0x07, + 0xd8, 0x66, 0xcd, 0x73, 0xac, 0x12, 0x0f, 0xb1, 0x6e, 0xd0, + 0x54, 0xea, 0x35, 0x8b, 0x96, 0x28, 0xf7, 0x49, 0x65, 0xdb, + 0x04, 0xba, 0xa7, 0x19, 0xc6, 0x78, 0xfc, 0x42, 0x9d, 0x23, + 0x3e, 0x80, 0x5f, 0xe1, 0x4a, 0xf4, 0x2b, 0x95, 0x88, 0x36, + 0xe9, 0x57, 0xd3, 0x6d, 0xb2, 0x0c, 0x11, 0xaf, 0x70, 0xce, + 0x3b, 0x85, 0x5a, 0xe4, 0xf9, 0x47, 0x98, 0x26, 0xa2, 0x1c, + 0xc3, 0x7d, 0x60, 0xde, 0x01, 0xbf, 0x14, 0xaa, 0x75, 0xcb, + 0xd6, 0x68, 0xb7, 0x09, 0x8d, 0x33, 0xec, 0x52, 0x4f, 0xf1, + 0x2e, 0x90, 0xd9, 0x67, 0xb8, 0x06, 0x1b, 0xa5, 0x7a, 0xc4, + 0x40, 0xfe, 0x21, 0x9f, 0x82, 0x3c, 0xe3, 0x5d, 0xf6, 0x48, + 0x97, 0x29, 0x34, 0x8a, 0x55, 0xeb, 0x6f, 0xd1, 0x0e, 0xb0, + 0xad, 0x13, 0xcc, 0x72, 0x87, 0x39, 0xe6, 0x58, 0x45, 0xfb, + 0x24, 0x9a, 0x1e, 0xa0, 0x7f, 0xc1, 0xdc, 0x62, 0xbd, 0x03, + 0xa8, 0x16, 0xc9, 0x77, 0x6a, 0xd4, 0x0b, 0xb5, 0x31, 0x8f, + 0x50, 0xee, 0xf3, 0x4d, 0x92, 0x2c, 0x00, 0xbf, 0x63, 0xdc, + 0xc6, 0x79, 0xa5, 0x1a, 0x91, 0x2e, 0xf2, 0x4d, 0x57, 0xe8, + 0x34, 0x8b, 0x3f, 0x80, 0x5c, 0xe3, 0xf9, 0x46, 0x9a, 0x25, + 0xae, 0x11, 0xcd, 0x72, 0x68, 0xd7, 0x0b, 0xb4, 0x7e, 0xc1, + 0x1d, 0xa2, 0xb8, 0x07, 0xdb, 0x64, 0xef, 0x50, 0x8c, 0x33, + 0x29, 0x96, 0x4a, 0xf5, 0x41, 0xfe, 0x22, 0x9d, 0x87, 0x38, + 0xe4, 0x5b, 0xd0, 0x6f, 0xb3, 0x0c, 0x16, 0xa9, 0x75, 0xca, + 0xfc, 0x43, 0x9f, 0x20, 0x3a, 0x85, 0x59, 0xe6, 0x6d, 0xd2, + 0x0e, 0xb1, 0xab, 0x14, 0xc8, 0x77, 0xc3, 0x7c, 0xa0, 0x1f, + 0x05, 0xba, 0x66, 0xd9, 0x52, 0xed, 0x31, 0x8e, 0x94, 0x2b, + 0xf7, 0x48, 0x82, 0x3d, 0xe1, 0x5e, 0x44, 0xfb, 0x27, 0x98, + 0x13, 0xac, 0x70, 0xcf, 0xd5, 0x6a, 0xb6, 0x09, 0xbd, 0x02, + 0xde, 0x61, 0x7b, 0xc4, 0x18, 0xa7, 0x2c, 0x93, 0x4f, 0xf0, + 0xea, 0x55, 0x89, 0x36, 0xe5, 0x5a, 0x86, 0x39, 0x23, 0x9c, + 0x40, 0xff, 0x74, 0xcb, 0x17, 0xa8, 0xb2, 0x0d, 0xd1, 0x6e, + 0xda, 0x65, 0xb9, 0x06, 0x1c, 0xa3, 0x7f, 0xc0, 0x4b, 0xf4, + 0x28, 0x97, 0x8d, 0x32, 0xee, 0x51, 0x9b, 0x24, 0xf8, 0x47, + 0x5d, 0xe2, 0x3e, 0x81, 0x0a, 0xb5, 0x69, 0xd6, 0xcc, 0x73, + 0xaf, 0x10, 0xa4, 0x1b, 0xc7, 0x78, 0x62, 0xdd, 0x01, 0xbe, + 0x35, 0x8a, 0x56, 0xe9, 0xf3, 0x4c, 0x90, 0x2f, 0x19, 0xa6, + 0x7a, 0xc5, 0xdf, 0x60, 0xbc, 0x03, 0x88, 0x37, 0xeb, 0x54, + 0x4e, 0xf1, 0x2d, 0x92, 0x26, 0x99, 0x45, 0xfa, 0xe0, 0x5f, + 0x83, 0x3c, 0xb7, 0x08, 0xd4, 0x6b, 0x71, 0xce, 0x12, 0xad, + 0x67, 0xd8, 0x04, 0xbb, 0xa1, 0x1e, 0xc2, 0x7d, 0xf6, 0x49, + 0x95, 0x2a, 0x30, 0x8f, 0x53, 0xec, 0x58, 0xe7, 0x3b, 0x84, + 0x9e, 0x21, 0xfd, 0x42, 0xc9, 0x76, 0xaa, 0x15, 0x0f, 0xb0, + 0x6c, 0xd3, 0x00, 0xc0, 0x9d, 0x5d, 0x27, 0xe7, 0xba, 0x7a, + 0x4e, 0x8e, 0xd3, 0x13, 0x69, 0xa9, 0xf4, 0x34, 0x9c, 0x5c, + 0x01, 0xc1, 0xbb, 0x7b, 0x26, 0xe6, 0xd2, 0x12, 0x4f, 0x8f, + 0xf5, 0x35, 0x68, 0xa8, 0x25, 0xe5, 0xb8, 0x78, 0x02, 0xc2, + 0x9f, 0x5f, 0x6b, 0xab, 0xf6, 0x36, 0x4c, 0x8c, 0xd1, 0x11, + 0xb9, 0x79, 0x24, 0xe4, 0x9e, 0x5e, 0x03, 0xc3, 0xf7, 0x37, + 0x6a, 0xaa, 0xd0, 0x10, 0x4d, 0x8d, 0x4a, 0x8a, 0xd7, 0x17, + 0x6d, 0xad, 0xf0, 0x30, 0x04, 0xc4, 0x99, 0x59, 0x23, 0xe3, + 0xbe, 0x7e, 0xd6, 0x16, 0x4b, 0x8b, 0xf1, 0x31, 0x6c, 0xac, + 0x98, 0x58, 0x05, 0xc5, 0xbf, 0x7f, 0x22, 0xe2, 0x6f, 0xaf, + 0xf2, 0x32, 0x48, 0x88, 0xd5, 0x15, 0x21, 0xe1, 0xbc, 0x7c, + 0x06, 0xc6, 0x9b, 0x5b, 0xf3, 0x33, 0x6e, 0xae, 0xd4, 0x14, + 0x49, 0x89, 0xbd, 0x7d, 0x20, 0xe0, 0x9a, 0x5a, 0x07, 0xc7, + 0x94, 0x54, 0x09, 0xc9, 0xb3, 0x73, 0x2e, 0xee, 0xda, 0x1a, + 0x47, 0x87, 0xfd, 0x3d, 0x60, 0xa0, 0x08, 0xc8, 0x95, 0x55, + 0x2f, 0xef, 0xb2, 0x72, 0x46, 0x86, 0xdb, 0x1b, 0x61, 0xa1, + 0xfc, 0x3c, 0xb1, 0x71, 0x2c, 0xec, 0x96, 0x56, 0x0b, 0xcb, + 0xff, 0x3f, 0x62, 0xa2, 0xd8, 0x18, 0x45, 0x85, 0x2d, 0xed, + 0xb0, 0x70, 0x0a, 0xca, 0x97, 0x57, 0x63, 0xa3, 0xfe, 0x3e, + 0x44, 0x84, 0xd9, 0x19, 0xde, 0x1e, 0x43, 0x83, 0xf9, 0x39, + 0x64, 0xa4, 0x90, 0x50, 0x0d, 0xcd, 0xb7, 0x77, 0x2a, 0xea, + 0x42, 0x82, 0xdf, 0x1f, 0x65, 0xa5, 0xf8, 0x38, 0x0c, 0xcc, + 0x91, 0x51, 0x2b, 0xeb, 0xb6, 0x76, 0xfb, 0x3b, 0x66, 0xa6, + 0xdc, 0x1c, 0x41, 0x81, 0xb5, 0x75, 0x28, 0xe8, 0x92, 0x52, + 0x0f, 0xcf, 0x67, 0xa7, 0xfa, 0x3a, 0x40, 0x80, 0xdd, 0x1d, + 0x29, 0xe9, 0xb4, 0x74, 0x0e, 0xce, 0x93, 0x53, 0x00, 0xc1, + 0x9f, 0x5e, 0x23, 0xe2, 0xbc, 0x7d, 0x46, 0x87, 0xd9, 0x18, + 0x65, 0xa4, 0xfa, 0x3b, 0x8c, 0x4d, 0x13, 0xd2, 0xaf, 0x6e, + 0x30, 0xf1, 0xca, 0x0b, 0x55, 0x94, 0xe9, 0x28, 0x76, 0xb7, + 0x05, 0xc4, 0x9a, 0x5b, 0x26, 0xe7, 0xb9, 0x78, 0x43, 0x82, + 0xdc, 0x1d, 0x60, 0xa1, 0xff, 0x3e, 0x89, 0x48, 0x16, 0xd7, + 0xaa, 0x6b, 0x35, 0xf4, 0xcf, 0x0e, 0x50, 0x91, 0xec, 0x2d, + 0x73, 0xb2, 0x0a, 0xcb, 0x95, 0x54, 0x29, 0xe8, 0xb6, 0x77, + 0x4c, 0x8d, 0xd3, 0x12, 0x6f, 0xae, 0xf0, 0x31, 0x86, 0x47, + 0x19, 0xd8, 0xa5, 0x64, 0x3a, 0xfb, 0xc0, 0x01, 0x5f, 0x9e, + 0xe3, 0x22, 0x7c, 0xbd, 0x0f, 0xce, 0x90, 0x51, 0x2c, 0xed, + 0xb3, 0x72, 0x49, 0x88, 0xd6, 0x17, 0x6a, 0xab, 0xf5, 0x34, + 0x83, 0x42, 0x1c, 0xdd, 0xa0, 0x61, 0x3f, 0xfe, 0xc5, 0x04, + 0x5a, 0x9b, 0xe6, 0x27, 0x79, 0xb8, 0x14, 0xd5, 0x8b, 0x4a, + 0x37, 0xf6, 0xa8, 0x69, 0x52, 0x93, 0xcd, 0x0c, 0x71, 0xb0, + 0xee, 0x2f, 0x98, 0x59, 0x07, 0xc6, 0xbb, 0x7a, 0x24, 0xe5, + 0xde, 0x1f, 0x41, 0x80, 0xfd, 0x3c, 0x62, 0xa3, 0x11, 0xd0, + 0x8e, 0x4f, 0x32, 0xf3, 0xad, 0x6c, 0x57, 0x96, 0xc8, 0x09, + 0x74, 0xb5, 0xeb, 0x2a, 0x9d, 0x5c, 0x02, 0xc3, 0xbe, 0x7f, + 0x21, 0xe0, 0xdb, 0x1a, 0x44, 0x85, 0xf8, 0x39, 0x67, 0xa6, + 0x1e, 0xdf, 0x81, 0x40, 0x3d, 0xfc, 0xa2, 0x63, 0x58, 0x99, + 0xc7, 0x06, 0x7b, 0xba, 0xe4, 0x25, 0x92, 0x53, 0x0d, 0xcc, + 0xb1, 0x70, 0x2e, 0xef, 0xd4, 0x15, 0x4b, 0x8a, 0xf7, 0x36, + 0x68, 0xa9, 0x1b, 0xda, 0x84, 0x45, 0x38, 0xf9, 0xa7, 0x66, + 0x5d, 0x9c, 0xc2, 0x03, 0x7e, 0xbf, 0xe1, 0x20, 0x97, 0x56, + 0x08, 0xc9, 0xb4, 0x75, 0x2b, 0xea, 0xd1, 0x10, 0x4e, 0x8f, + 0xf2, 0x33, 0x6d, 0xac, 0x00, 0xc2, 0x99, 0x5b, 0x2f, 0xed, + 0xb6, 0x74, 0x5e, 0x9c, 0xc7, 0x05, 0x71, 0xb3, 0xe8, 0x2a, + 0xbc, 0x7e, 0x25, 0xe7, 0x93, 0x51, 0x0a, 0xc8, 0xe2, 0x20, + 0x7b, 0xb9, 0xcd, 0x0f, 0x54, 0x96, 0x65, 0xa7, 0xfc, 0x3e, + 0x4a, 0x88, 0xd3, 0x11, 0x3b, 0xf9, 0xa2, 0x60, 0x14, 0xd6, + 0x8d, 0x4f, 0xd9, 0x1b, 0x40, 0x82, 0xf6, 0x34, 0x6f, 0xad, + 0x87, 0x45, 0x1e, 0xdc, 0xa8, 0x6a, 0x31, 0xf3, 0xca, 0x08, + 0x53, 0x91, 0xe5, 0x27, 0x7c, 0xbe, 0x94, 0x56, 0x0d, 0xcf, + 0xbb, 0x79, 0x22, 0xe0, 0x76, 0xb4, 0xef, 0x2d, 0x59, 0x9b, + 0xc0, 0x02, 0x28, 0xea, 0xb1, 0x73, 0x07, 0xc5, 0x9e, 0x5c, + 0xaf, 0x6d, 0x36, 0xf4, 0x80, 0x42, 0x19, 0xdb, 0xf1, 0x33, + 0x68, 0xaa, 0xde, 0x1c, 0x47, 0x85, 0x13, 0xd1, 0x8a, 0x48, + 0x3c, 0xfe, 0xa5, 0x67, 0x4d, 0x8f, 0xd4, 0x16, 0x62, 0xa0, + 0xfb, 0x39, 0x89, 0x4b, 0x10, 0xd2, 0xa6, 0x64, 0x3f, 0xfd, + 0xd7, 0x15, 0x4e, 0x8c, 0xf8, 0x3a, 0x61, 0xa3, 0x35, 0xf7, + 0xac, 0x6e, 0x1a, 0xd8, 0x83, 0x41, 0x6b, 0xa9, 0xf2, 0x30, + 0x44, 0x86, 0xdd, 0x1f, 0xec, 0x2e, 0x75, 0xb7, 0xc3, 0x01, + 0x5a, 0x98, 0xb2, 0x70, 0x2b, 0xe9, 0x9d, 0x5f, 0x04, 0xc6, + 0x50, 0x92, 0xc9, 0x0b, 0x7f, 0xbd, 0xe6, 0x24, 0x0e, 0xcc, + 0x97, 0x55, 0x21, 0xe3, 0xb8, 0x7a, 0x43, 0x81, 0xda, 0x18, + 0x6c, 0xae, 0xf5, 0x37, 0x1d, 0xdf, 0x84, 0x46, 0x32, 0xf0, + 0xab, 0x69, 0xff, 0x3d, 0x66, 0xa4, 0xd0, 0x12, 0x49, 0x8b, + 0xa1, 0x63, 0x38, 0xfa, 0x8e, 0x4c, 0x17, 0xd5, 0x26, 0xe4, + 0xbf, 0x7d, 0x09, 0xcb, 0x90, 0x52, 0x78, 0xba, 0xe1, 0x23, + 0x57, 0x95, 0xce, 0x0c, 0x9a, 0x58, 0x03, 0xc1, 0xb5, 0x77, + 0x2c, 0xee, 0xc4, 0x06, 0x5d, 0x9f, 0xeb, 0x29, 0x72, 0xb0, + 0x00, 0xc3, 0x9b, 0x58, 0x2b, 0xe8, 0xb0, 0x73, 0x56, 0x95, + 0xcd, 0x0e, 0x7d, 0xbe, 0xe6, 0x25, 0xac, 0x6f, 0x37, 0xf4, + 0x87, 0x44, 0x1c, 0xdf, 0xfa, 0x39, 0x61, 0xa2, 0xd1, 0x12, + 0x4a, 0x89, 0x45, 0x86, 0xde, 0x1d, 0x6e, 0xad, 0xf5, 0x36, + 0x13, 0xd0, 0x88, 0x4b, 0x38, 0xfb, 0xa3, 0x60, 0xe9, 0x2a, + 0x72, 0xb1, 0xc2, 0x01, 0x59, 0x9a, 0xbf, 0x7c, 0x24, 0xe7, + 0x94, 0x57, 0x0f, 0xcc, 0x8a, 0x49, 0x11, 0xd2, 0xa1, 0x62, + 0x3a, 0xf9, 0xdc, 0x1f, 0x47, 0x84, 0xf7, 0x34, 0x6c, 0xaf, + 0x26, 0xe5, 0xbd, 0x7e, 0x0d, 0xce, 0x96, 0x55, 0x70, 0xb3, + 0xeb, 0x28, 0x5b, 0x98, 0xc0, 0x03, 0xcf, 0x0c, 0x54, 0x97, + 0xe4, 0x27, 0x7f, 0xbc, 0x99, 0x5a, 0x02, 0xc1, 0xb2, 0x71, + 0x29, 0xea, 0x63, 0xa0, 0xf8, 0x3b, 0x48, 0x8b, 0xd3, 0x10, + 0x35, 0xf6, 0xae, 0x6d, 0x1e, 0xdd, 0x85, 0x46, 0x09, 0xca, + 0x92, 0x51, 0x22, 0xe1, 0xb9, 0x7a, 0x5f, 0x9c, 0xc4, 0x07, + 0x74, 0xb7, 0xef, 0x2c, 0xa5, 0x66, 0x3e, 0xfd, 0x8e, 0x4d, + 0x15, 0xd6, 0xf3, 0x30, 0x68, 0xab, 0xd8, 0x1b, 0x43, 0x80, + 0x4c, 0x8f, 0xd7, 0x14, 0x67, 0xa4, 0xfc, 0x3f, 0x1a, 0xd9, + 0x81, 0x42, 0x31, 0xf2, 0xaa, 0x69, 0xe0, 0x23, 0x7b, 0xb8, + 0xcb, 0x08, 0x50, 0x93, 0xb6, 0x75, 0x2d, 0xee, 0x9d, 0x5e, + 0x06, 0xc5, 0x83, 0x40, 0x18, 0xdb, 0xa8, 0x6b, 0x33, 0xf0, + 0xd5, 0x16, 0x4e, 0x8d, 0xfe, 0x3d, 0x65, 0xa6, 0x2f, 0xec, + 0xb4, 0x77, 0x04, 0xc7, 0x9f, 0x5c, 0x79, 0xba, 0xe2, 0x21, + 0x52, 0x91, 0xc9, 0x0a, 0xc6, 0x05, 0x5d, 0x9e, 0xed, 0x2e, + 0x76, 0xb5, 0x90, 0x53, 0x0b, 0xc8, 0xbb, 0x78, 0x20, 0xe3, + 0x6a, 0xa9, 0xf1, 0x32, 0x41, 0x82, 0xda, 0x19, 0x3c, 0xff, + 0xa7, 0x64, 0x17, 0xd4, 0x8c, 0x4f, 0x00, 0xc4, 0x95, 0x51, + 0x37, 0xf3, 0xa2, 0x66, 0x6e, 0xaa, 0xfb, 0x3f, 0x59, 0x9d, + 0xcc, 0x08, 0xdc, 0x18, 0x49, 0x8d, 0xeb, 0x2f, 0x7e, 0xba, + 0xb2, 0x76, 0x27, 0xe3, 0x85, 0x41, 0x10, 0xd4, 0xa5, 0x61, + 0x30, 0xf4, 0x92, 0x56, 0x07, 0xc3, 0xcb, 0x0f, 0x5e, 0x9a, + 0xfc, 0x38, 0x69, 0xad, 0x79, 0xbd, 0xec, 0x28, 0x4e, 0x8a, + 0xdb, 0x1f, 0x17, 0xd3, 0x82, 0x46, 0x20, 0xe4, 0xb5, 0x71, + 0x57, 0x93, 0xc2, 0x06, 0x60, 0xa4, 0xf5, 0x31, 0x39, 0xfd, + 0xac, 0x68, 0x0e, 0xca, 0x9b, 0x5f, 0x8b, 0x4f, 0x1e, 0xda, + 0xbc, 0x78, 0x29, 0xed, 0xe5, 0x21, 0x70, 0xb4, 0xd2, 0x16, + 0x47, 0x83, 0xf2, 0x36, 0x67, 0xa3, 0xc5, 0x01, 0x50, 0x94, + 0x9c, 0x58, 0x09, 0xcd, 0xab, 0x6f, 0x3e, 0xfa, 0x2e, 0xea, + 0xbb, 0x7f, 0x19, 0xdd, 0x8c, 0x48, 0x40, 0x84, 0xd5, 0x11, + 0x77, 0xb3, 0xe2, 0x26, 0xae, 0x6a, 0x3b, 0xff, 0x99, 0x5d, + 0x0c, 0xc8, 0xc0, 0x04, 0x55, 0x91, 0xf7, 0x33, 0x62, 0xa6, + 0x72, 0xb6, 0xe7, 0x23, 0x45, 0x81, 0xd0, 0x14, 0x1c, 0xd8, + 0x89, 0x4d, 0x2b, 0xef, 0xbe, 0x7a, 0x0b, 0xcf, 0x9e, 0x5a, + 0x3c, 0xf8, 0xa9, 0x6d, 0x65, 0xa1, 0xf0, 0x34, 0x52, 0x96, + 0xc7, 0x03, 0xd7, 0x13, 0x42, 0x86, 0xe0, 0x24, 0x75, 0xb1, + 0xb9, 0x7d, 0x2c, 0xe8, 0x8e, 0x4a, 0x1b, 0xdf, 0xf9, 0x3d, + 0x6c, 0xa8, 0xce, 0x0a, 0x5b, 0x9f, 0x97, 0x53, 0x02, 0xc6, + 0xa0, 0x64, 0x35, 0xf1, 0x25, 0xe1, 0xb0, 0x74, 0x12, 0xd6, + 0x87, 0x43, 0x4b, 0x8f, 0xde, 0x1a, 0x7c, 0xb8, 0xe9, 0x2d, + 0x5c, 0x98, 0xc9, 0x0d, 0x6b, 0xaf, 0xfe, 0x3a, 0x32, 0xf6, + 0xa7, 0x63, 0x05, 0xc1, 0x90, 0x54, 0x80, 0x44, 0x15, 0xd1, + 0xb7, 0x73, 0x22, 0xe6, 0xee, 0x2a, 0x7b, 0xbf, 0xd9, 0x1d, + 0x4c, 0x88, 0x00, 0xc5, 0x97, 0x52, 0x33, 0xf6, 0xa4, 0x61, + 0x66, 0xa3, 0xf1, 0x34, 0x55, 0x90, 0xc2, 0x07, 0xcc, 0x09, + 0x5b, 0x9e, 0xff, 0x3a, 0x68, 0xad, 0xaa, 0x6f, 0x3d, 0xf8, + 0x99, 0x5c, 0x0e, 0xcb, 0x85, 0x40, 0x12, 0xd7, 0xb6, 0x73, + 0x21, 0xe4, 0xe3, 0x26, 0x74, 0xb1, 0xd0, 0x15, 0x47, 0x82, + 0x49, 0x8c, 0xde, 0x1b, 0x7a, 0xbf, 0xed, 0x28, 0x2f, 0xea, + 0xb8, 0x7d, 0x1c, 0xd9, 0x8b, 0x4e, 0x17, 0xd2, 0x80, 0x45, + 0x24, 0xe1, 0xb3, 0x76, 0x71, 0xb4, 0xe6, 0x23, 0x42, 0x87, + 0xd5, 0x10, 0xdb, 0x1e, 0x4c, 0x89, 0xe8, 0x2d, 0x7f, 0xba, + 0xbd, 0x78, 0x2a, 0xef, 0x8e, 0x4b, 0x19, 0xdc, 0x92, 0x57, + 0x05, 0xc0, 0xa1, 0x64, 0x36, 0xf3, 0xf4, 0x31, 0x63, 0xa6, + 0xc7, 0x02, 0x50, 0x95, 0x5e, 0x9b, 0xc9, 0x0c, 0x6d, 0xa8, + 0xfa, 0x3f, 0x38, 0xfd, 0xaf, 0x6a, 0x0b, 0xce, 0x9c, 0x59, + 0x2e, 0xeb, 0xb9, 0x7c, 0x1d, 0xd8, 0x8a, 0x4f, 0x48, 0x8d, + 0xdf, 0x1a, 0x7b, 0xbe, 0xec, 0x29, 0xe2, 0x27, 0x75, 0xb0, + 0xd1, 0x14, 0x46, 0x83, 0x84, 0x41, 0x13, 0xd6, 0xb7, 0x72, + 0x20, 0xe5, 0xab, 0x6e, 0x3c, 0xf9, 0x98, 0x5d, 0x0f, 0xca, + 0xcd, 0x08, 0x5a, 0x9f, 0xfe, 0x3b, 0x69, 0xac, 0x67, 0xa2, + 0xf0, 0x35, 0x54, 0x91, 0xc3, 0x06, 0x01, 0xc4, 0x96, 0x53, + 0x32, 0xf7, 0xa5, 0x60, 0x39, 0xfc, 0xae, 0x6b, 0x0a, 0xcf, + 0x9d, 0x58, 0x5f, 0x9a, 0xc8, 0x0d, 0x6c, 0xa9, 0xfb, 0x3e, + 0xf5, 0x30, 0x62, 0xa7, 0xc6, 0x03, 0x51, 0x94, 0x93, 0x56, + 0x04, 0xc1, 0xa0, 0x65, 0x37, 0xf2, 0xbc, 0x79, 0x2b, 0xee, + 0x8f, 0x4a, 0x18, 0xdd, 0xda, 0x1f, 0x4d, 0x88, 0xe9, 0x2c, + 0x7e, 0xbb, 0x70, 0xb5, 0xe7, 0x22, 0x43, 0x86, 0xd4, 0x11, + 0x16, 0xd3, 0x81, 0x44, 0x25, 0xe0, 0xb2, 0x77, 0x00, 0xc6, + 0x91, 0x57, 0x3f, 0xf9, 0xae, 0x68, 0x7e, 0xb8, 0xef, 0x29, + 0x41, 0x87, 0xd0, 0x16, 0xfc, 0x3a, 0x6d, 0xab, 0xc3, 0x05, + 0x52, 0x94, 0x82, 0x44, 0x13, 0xd5, 0xbd, 0x7b, 0x2c, 0xea, + 0xe5, 0x23, 0x74, 0xb2, 0xda, 0x1c, 0x4b, 0x8d, 0x9b, 0x5d, + 0x0a, 0xcc, 0xa4, 0x62, 0x35, 0xf3, 0x19, 0xdf, 0x88, 0x4e, + 0x26, 0xe0, 0xb7, 0x71, 0x67, 0xa1, 0xf6, 0x30, 0x58, 0x9e, + 0xc9, 0x0f, 0xd7, 0x11, 0x46, 0x80, 0xe8, 0x2e, 0x79, 0xbf, + 0xa9, 0x6f, 0x38, 0xfe, 0x96, 0x50, 0x07, 0xc1, 0x2b, 0xed, + 0xba, 0x7c, 0x14, 0xd2, 0x85, 0x43, 0x55, 0x93, 0xc4, 0x02, + 0x6a, 0xac, 0xfb, 0x3d, 0x32, 0xf4, 0xa3, 0x65, 0x0d, 0xcb, + 0x9c, 0x5a, 0x4c, 0x8a, 0xdd, 0x1b, 0x73, 0xb5, 0xe2, 0x24, + 0xce, 0x08, 0x5f, 0x99, 0xf1, 0x37, 0x60, 0xa6, 0xb0, 0x76, + 0x21, 0xe7, 0x8f, 0x49, 0x1e, 0xd8, 0xb3, 0x75, 0x22, 0xe4, + 0x8c, 0x4a, 0x1d, 0xdb, 0xcd, 0x0b, 0x5c, 0x9a, 0xf2, 0x34, + 0x63, 0xa5, 0x4f, 0x89, 0xde, 0x18, 0x70, 0xb6, 0xe1, 0x27, + 0x31, 0xf7, 0xa0, 0x66, 0x0e, 0xc8, 0x9f, 0x59, 0x56, 0x90, + 0xc7, 0x01, 0x69, 0xaf, 0xf8, 0x3e, 0x28, 0xee, 0xb9, 0x7f, + 0x17, 0xd1, 0x86, 0x40, 0xaa, 0x6c, 0x3b, 0xfd, 0x95, 0x53, + 0x04, 0xc2, 0xd4, 0x12, 0x45, 0x83, 0xeb, 0x2d, 0x7a, 0xbc, + 0x64, 0xa2, 0xf5, 0x33, 0x5b, 0x9d, 0xca, 0x0c, 0x1a, 0xdc, + 0x8b, 0x4d, 0x25, 0xe3, 0xb4, 0x72, 0x98, 0x5e, 0x09, 0xcf, + 0xa7, 0x61, 0x36, 0xf0, 0xe6, 0x20, 0x77, 0xb1, 0xd9, 0x1f, + 0x48, 0x8e, 0x81, 0x47, 0x10, 0xd6, 0xbe, 0x78, 0x2f, 0xe9, + 0xff, 0x39, 0x6e, 0xa8, 0xc0, 0x06, 0x51, 0x97, 0x7d, 0xbb, + 0xec, 0x2a, 0x42, 0x84, 0xd3, 0x15, 0x03, 0xc5, 0x92, 0x54, + 0x3c, 0xfa, 0xad, 0x6b, 0x00, 0xc7, 0x93, 0x54, 0x3b, 0xfc, + 0xa8, 0x6f, 0x76, 0xb1, 0xe5, 0x22, 0x4d, 0x8a, 0xde, 0x19, + 0xec, 0x2b, 0x7f, 0xb8, 0xd7, 0x10, 0x44, 0x83, 0x9a, 0x5d, + 0x09, 0xce, 0xa1, 0x66, 0x32, 0xf5, 0xc5, 0x02, 0x56, 0x91, + 0xfe, 0x39, 0x6d, 0xaa, 0xb3, 0x74, 0x20, 0xe7, 0x88, 0x4f, + 0x1b, 0xdc, 0x29, 0xee, 0xba, 0x7d, 0x12, 0xd5, 0x81, 0x46, + 0x5f, 0x98, 0xcc, 0x0b, 0x64, 0xa3, 0xf7, 0x30, 0x97, 0x50, + 0x04, 0xc3, 0xac, 0x6b, 0x3f, 0xf8, 0xe1, 0x26, 0x72, 0xb5, + 0xda, 0x1d, 0x49, 0x8e, 0x7b, 0xbc, 0xe8, 0x2f, 0x40, 0x87, + 0xd3, 0x14, 0x0d, 0xca, 0x9e, 0x59, 0x36, 0xf1, 0xa5, 0x62, + 0x52, 0x95, 0xc1, 0x06, 0x69, 0xae, 0xfa, 0x3d, 0x24, 0xe3, + 0xb7, 0x70, 0x1f, 0xd8, 0x8c, 0x4b, 0xbe, 0x79, 0x2d, 0xea, + 0x85, 0x42, 0x16, 0xd1, 0xc8, 0x0f, 0x5b, 0x9c, 0xf3, 0x34, + 0x60, 0xa7, 0x33, 0xf4, 0xa0, 0x67, 0x08, 0xcf, 0x9b, 0x5c, + 0x45, 0x82, 0xd6, 0x11, 0x7e, 0xb9, 0xed, 0x2a, 0xdf, 0x18, + 0x4c, 0x8b, 0xe4, 0x23, 0x77, 0xb0, 0xa9, 0x6e, 0x3a, 0xfd, + 0x92, 0x55, 0x01, 0xc6, 0xf6, 0x31, 0x65, 0xa2, 0xcd, 0x0a, + 0x5e, 0x99, 0x80, 0x47, 0x13, 0xd4, 0xbb, 0x7c, 0x28, 0xef, + 0x1a, 0xdd, 0x89, 0x4e, 0x21, 0xe6, 0xb2, 0x75, 0x6c, 0xab, + 0xff, 0x38, 0x57, 0x90, 0xc4, 0x03, 0xa4, 0x63, 0x37, 0xf0, + 0x9f, 0x58, 0x0c, 0xcb, 0xd2, 0x15, 0x41, 0x86, 0xe9, 0x2e, + 0x7a, 0xbd, 0x48, 0x8f, 0xdb, 0x1c, 0x73, 0xb4, 0xe0, 0x27, + 0x3e, 0xf9, 0xad, 0x6a, 0x05, 0xc2, 0x96, 0x51, 0x61, 0xa6, + 0xf2, 0x35, 0x5a, 0x9d, 0xc9, 0x0e, 0x17, 0xd0, 0x84, 0x43, + 0x2c, 0xeb, 0xbf, 0x78, 0x8d, 0x4a, 0x1e, 0xd9, 0xb6, 0x71, + 0x25, 0xe2, 0xfb, 0x3c, 0x68, 0xaf, 0xc0, 0x07, 0x53, 0x94, + 0x00, 0xc8, 0x8d, 0x45, 0x07, 0xcf, 0x8a, 0x42, 0x0e, 0xc6, + 0x83, 0x4b, 0x09, 0xc1, 0x84, 0x4c, 0x1c, 0xd4, 0x91, 0x59, + 0x1b, 0xd3, 0x96, 0x5e, 0x12, 0xda, 0x9f, 0x57, 0x15, 0xdd, + 0x98, 0x50, 0x38, 0xf0, 0xb5, 0x7d, 0x3f, 0xf7, 0xb2, 0x7a, + 0x36, 0xfe, 0xbb, 0x73, 0x31, 0xf9, 0xbc, 0x74, 0x24, 0xec, + 0xa9, 0x61, 0x23, 0xeb, 0xae, 0x66, 0x2a, 0xe2, 0xa7, 0x6f, + 0x2d, 0xe5, 0xa0, 0x68, 0x70, 0xb8, 0xfd, 0x35, 0x77, 0xbf, + 0xfa, 0x32, 0x7e, 0xb6, 0xf3, 0x3b, 0x79, 0xb1, 0xf4, 0x3c, + 0x6c, 0xa4, 0xe1, 0x29, 0x6b, 0xa3, 0xe6, 0x2e, 0x62, 0xaa, + 0xef, 0x27, 0x65, 0xad, 0xe8, 0x20, 0x48, 0x80, 0xc5, 0x0d, + 0x4f, 0x87, 0xc2, 0x0a, 0x46, 0x8e, 0xcb, 0x03, 0x41, 0x89, + 0xcc, 0x04, 0x54, 0x9c, 0xd9, 0x11, 0x53, 0x9b, 0xde, 0x16, + 0x5a, 0x92, 0xd7, 0x1f, 0x5d, 0x95, 0xd0, 0x18, 0xe0, 0x28, + 0x6d, 0xa5, 0xe7, 0x2f, 0x6a, 0xa2, 0xee, 0x26, 0x63, 0xab, + 0xe9, 0x21, 0x64, 0xac, 0xfc, 0x34, 0x71, 0xb9, 0xfb, 0x33, + 0x76, 0xbe, 0xf2, 0x3a, 0x7f, 0xb7, 0xf5, 0x3d, 0x78, 0xb0, + 0xd8, 0x10, 0x55, 0x9d, 0xdf, 0x17, 0x52, 0x9a, 0xd6, 0x1e, + 0x5b, 0x93, 0xd1, 0x19, 0x5c, 0x94, 0xc4, 0x0c, 0x49, 0x81, + 0xc3, 0x0b, 0x4e, 0x86, 0xca, 0x02, 0x47, 0x8f, 0xcd, 0x05, + 0x40, 0x88, 0x90, 0x58, 0x1d, 0xd5, 0x97, 0x5f, 0x1a, 0xd2, + 0x9e, 0x56, 0x13, 0xdb, 0x99, 0x51, 0x14, 0xdc, 0x8c, 0x44, + 0x01, 0xc9, 0x8b, 0x43, 0x06, 0xce, 0x82, 0x4a, 0x0f, 0xc7, + 0x85, 0x4d, 0x08, 0xc0, 0xa8, 0x60, 0x25, 0xed, 0xaf, 0x67, + 0x22, 0xea, 0xa6, 0x6e, 0x2b, 0xe3, 0xa1, 0x69, 0x2c, 0xe4, + 0xb4, 0x7c, 0x39, 0xf1, 0xb3, 0x7b, 0x3e, 0xf6, 0xba, 0x72, + 0x37, 0xff, 0xbd, 0x75, 0x30, 0xf8, 0x00, 0xc9, 0x8f, 0x46, + 0x03, 0xca, 0x8c, 0x45, 0x06, 0xcf, 0x89, 0x40, 0x05, 0xcc, + 0x8a, 0x43, 0x0c, 0xc5, 0x83, 0x4a, 0x0f, 0xc6, 0x80, 0x49, + 0x0a, 0xc3, 0x85, 0x4c, 0x09, 0xc0, 0x86, 0x4f, 0x18, 0xd1, + 0x97, 0x5e, 0x1b, 0xd2, 0x94, 0x5d, 0x1e, 0xd7, 0x91, 0x58, + 0x1d, 0xd4, 0x92, 0x5b, 0x14, 0xdd, 0x9b, 0x52, 0x17, 0xde, + 0x98, 0x51, 0x12, 0xdb, 0x9d, 0x54, 0x11, 0xd8, 0x9e, 0x57, + 0x30, 0xf9, 0xbf, 0x76, 0x33, 0xfa, 0xbc, 0x75, 0x36, 0xff, + 0xb9, 0x70, 0x35, 0xfc, 0xba, 0x73, 0x3c, 0xf5, 0xb3, 0x7a, + 0x3f, 0xf6, 0xb0, 0x79, 0x3a, 0xf3, 0xb5, 0x7c, 0x39, 0xf0, + 0xb6, 0x7f, 0x28, 0xe1, 0xa7, 0x6e, 0x2b, 0xe2, 0xa4, 0x6d, + 0x2e, 0xe7, 0xa1, 0x68, 0x2d, 0xe4, 0xa2, 0x6b, 0x24, 0xed, + 0xab, 0x62, 0x27, 0xee, 0xa8, 0x61, 0x22, 0xeb, 0xad, 0x64, + 0x21, 0xe8, 0xae, 0x67, 0x60, 0xa9, 0xef, 0x26, 0x63, 0xaa, + 0xec, 0x25, 0x66, 0xaf, 0xe9, 0x20, 0x65, 0xac, 0xea, 0x23, + 0x6c, 0xa5, 0xe3, 0x2a, 0x6f, 0xa6, 0xe0, 0x29, 0x6a, 0xa3, + 0xe5, 0x2c, 0x69, 0xa0, 0xe6, 0x2f, 0x78, 0xb1, 0xf7, 0x3e, + 0x7b, 0xb2, 0xf4, 0x3d, 0x7e, 0xb7, 0xf1, 0x38, 0x7d, 0xb4, + 0xf2, 0x3b, 0x74, 0xbd, 0xfb, 0x32, 0x77, 0xbe, 0xf8, 0x31, + 0x72, 0xbb, 0xfd, 0x34, 0x71, 0xb8, 0xfe, 0x37, 0x50, 0x99, + 0xdf, 0x16, 0x53, 0x9a, 0xdc, 0x15, 0x56, 0x9f, 0xd9, 0x10, + 0x55, 0x9c, 0xda, 0x13, 0x5c, 0x95, 0xd3, 0x1a, 0x5f, 0x96, + 0xd0, 0x19, 0x5a, 0x93, 0xd5, 0x1c, 0x59, 0x90, 0xd6, 0x1f, + 0x48, 0x81, 0xc7, 0x0e, 0x4b, 0x82, 0xc4, 0x0d, 0x4e, 0x87, + 0xc1, 0x08, 0x4d, 0x84, 0xc2, 0x0b, 0x44, 0x8d, 0xcb, 0x02, + 0x47, 0x8e, 0xc8, 0x01, 0x42, 0x8b, 0xcd, 0x04, 0x41, 0x88, + 0xce, 0x07, 0x00, 0xca, 0x89, 0x43, 0x0f, 0xc5, 0x86, 0x4c, + 0x1e, 0xd4, 0x97, 0x5d, 0x11, 0xdb, 0x98, 0x52, 0x3c, 0xf6, + 0xb5, 0x7f, 0x33, 0xf9, 0xba, 0x70, 0x22, 0xe8, 0xab, 0x61, + 0x2d, 0xe7, 0xa4, 0x6e, 0x78, 0xb2, 0xf1, 0x3b, 0x77, 0xbd, + 0xfe, 0x34, 0x66, 0xac, 0xef, 0x25, 0x69, 0xa3, 0xe0, 0x2a, + 0x44, 0x8e, 0xcd, 0x07, 0x4b, 0x81, 0xc2, 0x08, 0x5a, 0x90, + 0xd3, 0x19, 0x55, 0x9f, 0xdc, 0x16, 0xf0, 0x3a, 0x79, 0xb3, + 0xff, 0x35, 0x76, 0xbc, 0xee, 0x24, 0x67, 0xad, 0xe1, 0x2b, + 0x68, 0xa2, 0xcc, 0x06, 0x45, 0x8f, 0xc3, 0x09, 0x4a, 0x80, + 0xd2, 0x18, 0x5b, 0x91, 0xdd, 0x17, 0x54, 0x9e, 0x88, 0x42, + 0x01, 0xcb, 0x87, 0x4d, 0x0e, 0xc4, 0x96, 0x5c, 0x1f, 0xd5, + 0x99, 0x53, 0x10, 0xda, 0xb4, 0x7e, 0x3d, 0xf7, 0xbb, 0x71, + 0x32, 0xf8, 0xaa, 0x60, 0x23, 0xe9, 0xa5, 0x6f, 0x2c, 0xe6, + 0xfd, 0x37, 0x74, 0xbe, 0xf2, 0x38, 0x7b, 0xb1, 0xe3, 0x29, + 0x6a, 0xa0, 0xec, 0x26, 0x65, 0xaf, 0xc1, 0x0b, 0x48, 0x82, + 0xce, 0x04, 0x47, 0x8d, 0xdf, 0x15, 0x56, 0x9c, 0xd0, 0x1a, + 0x59, 0x93, 0x85, 0x4f, 0x0c, 0xc6, 0x8a, 0x40, 0x03, 0xc9, + 0x9b, 0x51, 0x12, 0xd8, 0x94, 0x5e, 0x1d, 0xd7, 0xb9, 0x73, + 0x30, 0xfa, 0xb6, 0x7c, 0x3f, 0xf5, 0xa7, 0x6d, 0x2e, 0xe4, + 0xa8, 0x62, 0x21, 0xeb, 0x0d, 0xc7, 0x84, 0x4e, 0x02, 0xc8, + 0x8b, 0x41, 0x13, 0xd9, 0x9a, 0x50, 0x1c, 0xd6, 0x95, 0x5f, + 0x31, 0xfb, 0xb8, 0x72, 0x3e, 0xf4, 0xb7, 0x7d, 0x2f, 0xe5, + 0xa6, 0x6c, 0x20, 0xea, 0xa9, 0x63, 0x75, 0xbf, 0xfc, 0x36, + 0x7a, 0xb0, 0xf3, 0x39, 0x6b, 0xa1, 0xe2, 0x28, 0x64, 0xae, + 0xed, 0x27, 0x49, 0x83, 0xc0, 0x0a, 0x46, 0x8c, 0xcf, 0x05, + 0x57, 0x9d, 0xde, 0x14, 0x58, 0x92, 0xd1, 0x1b, 0x00, 0xcb, + 0x8b, 0x40, 0x0b, 0xc0, 0x80, 0x4b, 0x16, 0xdd, 0x9d, 0x56, + 0x1d, 0xd6, 0x96, 0x5d, 0x2c, 0xe7, 0xa7, 0x6c, 0x27, 0xec, + 0xac, 0x67, 0x3a, 0xf1, 0xb1, 0x7a, 0x31, 0xfa, 0xba, 0x71, + 0x58, 0x93, 0xd3, 0x18, 0x53, 0x98, 0xd8, 0x13, 0x4e, 0x85, + 0xc5, 0x0e, 0x45, 0x8e, 0xce, 0x05, 0x74, 0xbf, 0xff, 0x34, + 0x7f, 0xb4, 0xf4, 0x3f, 0x62, 0xa9, 0xe9, 0x22, 0x69, 0xa2, + 0xe2, 0x29, 0xb0, 0x7b, 0x3b, 0xf0, 0xbb, 0x70, 0x30, 0xfb, + 0xa6, 0x6d, 0x2d, 0xe6, 0xad, 0x66, 0x26, 0xed, 0x9c, 0x57, + 0x17, 0xdc, 0x97, 0x5c, 0x1c, 0xd7, 0x8a, 0x41, 0x01, 0xca, + 0x81, 0x4a, 0x0a, 0xc1, 0xe8, 0x23, 0x63, 0xa8, 0xe3, 0x28, + 0x68, 0xa3, 0xfe, 0x35, 0x75, 0xbe, 0xf5, 0x3e, 0x7e, 0xb5, + 0xc4, 0x0f, 0x4f, 0x84, 0xcf, 0x04, 0x44, 0x8f, 0xd2, 0x19, + 0x59, 0x92, 0xd9, 0x12, 0x52, 0x99, 0x7d, 0xb6, 0xf6, 0x3d, + 0x76, 0xbd, 0xfd, 0x36, 0x6b, 0xa0, 0xe0, 0x2b, 0x60, 0xab, + 0xeb, 0x20, 0x51, 0x9a, 0xda, 0x11, 0x5a, 0x91, 0xd1, 0x1a, + 0x47, 0x8c, 0xcc, 0x07, 0x4c, 0x87, 0xc7, 0x0c, 0x25, 0xee, + 0xae, 0x65, 0x2e, 0xe5, 0xa5, 0x6e, 0x33, 0xf8, 0xb8, 0x73, + 0x38, 0xf3, 0xb3, 0x78, 0x09, 0xc2, 0x82, 0x49, 0x02, 0xc9, + 0x89, 0x42, 0x1f, 0xd4, 0x94, 0x5f, 0x14, 0xdf, 0x9f, 0x54, + 0xcd, 0x06, 0x46, 0x8d, 0xc6, 0x0d, 0x4d, 0x86, 0xdb, 0x10, + 0x50, 0x9b, 0xd0, 0x1b, 0x5b, 0x90, 0xe1, 0x2a, 0x6a, 0xa1, + 0xea, 0x21, 0x61, 0xaa, 0xf7, 0x3c, 0x7c, 0xb7, 0xfc, 0x37, + 0x77, 0xbc, 0x95, 0x5e, 0x1e, 0xd5, 0x9e, 0x55, 0x15, 0xde, + 0x83, 0x48, 0x08, 0xc3, 0x88, 0x43, 0x03, 0xc8, 0xb9, 0x72, + 0x32, 0xf9, 0xb2, 0x79, 0x39, 0xf2, 0xaf, 0x64, 0x24, 0xef, + 0xa4, 0x6f, 0x2f, 0xe4, 0x00, 0xcc, 0x85, 0x49, 0x17, 0xdb, + 0x92, 0x5e, 0x2e, 0xe2, 0xab, 0x67, 0x39, 0xf5, 0xbc, 0x70, + 0x5c, 0x90, 0xd9, 0x15, 0x4b, 0x87, 0xce, 0x02, 0x72, 0xbe, + 0xf7, 0x3b, 0x65, 0xa9, 0xe0, 0x2c, 0xb8, 0x74, 0x3d, 0xf1, + 0xaf, 0x63, 0x2a, 0xe6, 0x96, 0x5a, 0x13, 0xdf, 0x81, 0x4d, + 0x04, 0xc8, 0xe4, 0x28, 0x61, 0xad, 0xf3, 0x3f, 0x76, 0xba, + 0xca, 0x06, 0x4f, 0x83, 0xdd, 0x11, 0x58, 0x94, 0x6d, 0xa1, + 0xe8, 0x24, 0x7a, 0xb6, 0xff, 0x33, 0x43, 0x8f, 0xc6, 0x0a, + 0x54, 0x98, 0xd1, 0x1d, 0x31, 0xfd, 0xb4, 0x78, 0x26, 0xea, + 0xa3, 0x6f, 0x1f, 0xd3, 0x9a, 0x56, 0x08, 0xc4, 0x8d, 0x41, + 0xd5, 0x19, 0x50, 0x9c, 0xc2, 0x0e, 0x47, 0x8b, 0xfb, 0x37, + 0x7e, 0xb2, 0xec, 0x20, 0x69, 0xa5, 0x89, 0x45, 0x0c, 0xc0, + 0x9e, 0x52, 0x1b, 0xd7, 0xa7, 0x6b, 0x22, 0xee, 0xb0, 0x7c, + 0x35, 0xf9, 0xda, 0x16, 0x5f, 0x93, 0xcd, 0x01, 0x48, 0x84, + 0xf4, 0x38, 0x71, 0xbd, 0xe3, 0x2f, 0x66, 0xaa, 0x86, 0x4a, + 0x03, 0xcf, 0x91, 0x5d, 0x14, 0xd8, 0xa8, 0x64, 0x2d, 0xe1, + 0xbf, 0x73, 0x3a, 0xf6, 0x62, 0xae, 0xe7, 0x2b, 0x75, 0xb9, + 0xf0, 0x3c, 0x4c, 0x80, 0xc9, 0x05, 0x5b, 0x97, 0xde, 0x12, + 0x3e, 0xf2, 0xbb, 0x77, 0x29, 0xe5, 0xac, 0x60, 0x10, 0xdc, + 0x95, 0x59, 0x07, 0xcb, 0x82, 0x4e, 0xb7, 0x7b, 0x32, 0xfe, + 0xa0, 0x6c, 0x25, 0xe9, 0x99, 0x55, 0x1c, 0xd0, 0x8e, 0x42, + 0x0b, 0xc7, 0xeb, 0x27, 0x6e, 0xa2, 0xfc, 0x30, 0x79, 0xb5, + 0xc5, 0x09, 0x40, 0x8c, 0xd2, 0x1e, 0x57, 0x9b, 0x0f, 0xc3, + 0x8a, 0x46, 0x18, 0xd4, 0x9d, 0x51, 0x21, 0xed, 0xa4, 0x68, + 0x36, 0xfa, 0xb3, 0x7f, 0x53, 0x9f, 0xd6, 0x1a, 0x44, 0x88, + 0xc1, 0x0d, 0x7d, 0xb1, 0xf8, 0x34, 0x6a, 0xa6, 0xef, 0x23, + 0x00, 0xcd, 0x87, 0x4a, 0x13, 0xde, 0x94, 0x59, 0x26, 0xeb, + 0xa1, 0x6c, 0x35, 0xf8, 0xb2, 0x7f, 0x4c, 0x81, 0xcb, 0x06, + 0x5f, 0x92, 0xd8, 0x15, 0x6a, 0xa7, 0xed, 0x20, 0x79, 0xb4, + 0xfe, 0x33, 0x98, 0x55, 0x1f, 0xd2, 0x8b, 0x46, 0x0c, 0xc1, + 0xbe, 0x73, 0x39, 0xf4, 0xad, 0x60, 0x2a, 0xe7, 0xd4, 0x19, + 0x53, 0x9e, 0xc7, 0x0a, 0x40, 0x8d, 0xf2, 0x3f, 0x75, 0xb8, + 0xe1, 0x2c, 0x66, 0xab, 0x2d, 0xe0, 0xaa, 0x67, 0x3e, 0xf3, + 0xb9, 0x74, 0x0b, 0xc6, 0x8c, 0x41, 0x18, 0xd5, 0x9f, 0x52, + 0x61, 0xac, 0xe6, 0x2b, 0x72, 0xbf, 0xf5, 0x38, 0x47, 0x8a, + 0xc0, 0x0d, 0x54, 0x99, 0xd3, 0x1e, 0xb5, 0x78, 0x32, 0xff, + 0xa6, 0x6b, 0x21, 0xec, 0x93, 0x5e, 0x14, 0xd9, 0x80, 0x4d, + 0x07, 0xca, 0xf9, 0x34, 0x7e, 0xb3, 0xea, 0x27, 0x6d, 0xa0, + 0xdf, 0x12, 0x58, 0x95, 0xcc, 0x01, 0x4b, 0x86, 0x5a, 0x97, + 0xdd, 0x10, 0x49, 0x84, 0xce, 0x03, 0x7c, 0xb1, 0xfb, 0x36, + 0x6f, 0xa2, 0xe8, 0x25, 0x16, 0xdb, 0x91, 0x5c, 0x05, 0xc8, + 0x82, 0x4f, 0x30, 0xfd, 0xb7, 0x7a, 0x23, 0xee, 0xa4, 0x69, + 0xc2, 0x0f, 0x45, 0x88, 0xd1, 0x1c, 0x56, 0x9b, 0xe4, 0x29, + 0x63, 0xae, 0xf7, 0x3a, 0x70, 0xbd, 0x8e, 0x43, 0x09, 0xc4, + 0x9d, 0x50, 0x1a, 0xd7, 0xa8, 0x65, 0x2f, 0xe2, 0xbb, 0x76, + 0x3c, 0xf1, 0x77, 0xba, 0xf0, 0x3d, 0x64, 0xa9, 0xe3, 0x2e, + 0x51, 0x9c, 0xd6, 0x1b, 0x42, 0x8f, 0xc5, 0x08, 0x3b, 0xf6, + 0xbc, 0x71, 0x28, 0xe5, 0xaf, 0x62, 0x1d, 0xd0, 0x9a, 0x57, + 0x0e, 0xc3, 0x89, 0x44, 0xef, 0x22, 0x68, 0xa5, 0xfc, 0x31, + 0x7b, 0xb6, 0xc9, 0x04, 0x4e, 0x83, 0xda, 0x17, 0x5d, 0x90, + 0xa3, 0x6e, 0x24, 0xe9, 0xb0, 0x7d, 0x37, 0xfa, 0x85, 0x48, + 0x02, 0xcf, 0x96, 0x5b, 0x11, 0xdc, 0x00, 0xce, 0x81, 0x4f, + 0x1f, 0xd1, 0x9e, 0x50, 0x3e, 0xf0, 0xbf, 0x71, 0x21, 0xef, + 0xa0, 0x6e, 0x7c, 0xb2, 0xfd, 0x33, 0x63, 0xad, 0xe2, 0x2c, + 0x42, 0x8c, 0xc3, 0x0d, 0x5d, 0x93, 0xdc, 0x12, 0xf8, 0x36, + 0x79, 0xb7, 0xe7, 0x29, 0x66, 0xa8, 0xc6, 0x08, 0x47, 0x89, + 0xd9, 0x17, 0x58, 0x96, 0x84, 0x4a, 0x05, 0xcb, 0x9b, 0x55, + 0x1a, 0xd4, 0xba, 0x74, 0x3b, 0xf5, 0xa5, 0x6b, 0x24, 0xea, + 0xed, 0x23, 0x6c, 0xa2, 0xf2, 0x3c, 0x73, 0xbd, 0xd3, 0x1d, + 0x52, 0x9c, 0xcc, 0x02, 0x4d, 0x83, 0x91, 0x5f, 0x10, 0xde, + 0x8e, 0x40, 0x0f, 0xc1, 0xaf, 0x61, 0x2e, 0xe0, 0xb0, 0x7e, + 0x31, 0xff, 0x15, 0xdb, 0x94, 0x5a, 0x0a, 0xc4, 0x8b, 0x45, + 0x2b, 0xe5, 0xaa, 0x64, 0x34, 0xfa, 0xb5, 0x7b, 0x69, 0xa7, + 0xe8, 0x26, 0x76, 0xb8, 0xf7, 0x39, 0x57, 0x99, 0xd6, 0x18, + 0x48, 0x86, 0xc9, 0x07, 0xc7, 0x09, 0x46, 0x88, 0xd8, 0x16, + 0x59, 0x97, 0xf9, 0x37, 0x78, 0xb6, 0xe6, 0x28, 0x67, 0xa9, + 0xbb, 0x75, 0x3a, 0xf4, 0xa4, 0x6a, 0x25, 0xeb, 0x85, 0x4b, + 0x04, 0xca, 0x9a, 0x54, 0x1b, 0xd5, 0x3f, 0xf1, 0xbe, 0x70, + 0x20, 0xee, 0xa1, 0x6f, 0x01, 0xcf, 0x80, 0x4e, 0x1e, 0xd0, + 0x9f, 0x51, 0x43, 0x8d, 0xc2, 0x0c, 0x5c, 0x92, 0xdd, 0x13, + 0x7d, 0xb3, 0xfc, 0x32, 0x62, 0xac, 0xe3, 0x2d, 0x2a, 0xe4, + 0xab, 0x65, 0x35, 0xfb, 0xb4, 0x7a, 0x14, 0xda, 0x95, 0x5b, + 0x0b, 0xc5, 0x8a, 0x44, 0x56, 0x98, 0xd7, 0x19, 0x49, 0x87, + 0xc8, 0x06, 0x68, 0xa6, 0xe9, 0x27, 0x77, 0xb9, 0xf6, 0x38, + 0xd2, 0x1c, 0x53, 0x9d, 0xcd, 0x03, 0x4c, 0x82, 0xec, 0x22, + 0x6d, 0xa3, 0xf3, 0x3d, 0x72, 0xbc, 0xae, 0x60, 0x2f, 0xe1, + 0xb1, 0x7f, 0x30, 0xfe, 0x90, 0x5e, 0x11, 0xdf, 0x8f, 0x41, + 0x0e, 0xc0, 0x00, 0xcf, 0x83, 0x4c, 0x1b, 0xd4, 0x98, 0x57, + 0x36, 0xf9, 0xb5, 0x7a, 0x2d, 0xe2, 0xae, 0x61, 0x6c, 0xa3, + 0xef, 0x20, 0x77, 0xb8, 0xf4, 0x3b, 0x5a, 0x95, 0xd9, 0x16, + 0x41, 0x8e, 0xc2, 0x0d, 0xd8, 0x17, 0x5b, 0x94, 0xc3, 0x0c, + 0x40, 0x8f, 0xee, 0x21, 0x6d, 0xa2, 0xf5, 0x3a, 0x76, 0xb9, + 0xb4, 0x7b, 0x37, 0xf8, 0xaf, 0x60, 0x2c, 0xe3, 0x82, 0x4d, + 0x01, 0xce, 0x99, 0x56, 0x1a, 0xd5, 0xad, 0x62, 0x2e, 0xe1, + 0xb6, 0x79, 0x35, 0xfa, 0x9b, 0x54, 0x18, 0xd7, 0x80, 0x4f, + 0x03, 0xcc, 0xc1, 0x0e, 0x42, 0x8d, 0xda, 0x15, 0x59, 0x96, + 0xf7, 0x38, 0x74, 0xbb, 0xec, 0x23, 0x6f, 0xa0, 0x75, 0xba, + 0xf6, 0x39, 0x6e, 0xa1, 0xed, 0x22, 0x43, 0x8c, 0xc0, 0x0f, + 0x58, 0x97, 0xdb, 0x14, 0x19, 0xd6, 0x9a, 0x55, 0x02, 0xcd, + 0x81, 0x4e, 0x2f, 0xe0, 0xac, 0x63, 0x34, 0xfb, 0xb7, 0x78, + 0x47, 0x88, 0xc4, 0x0b, 0x5c, 0x93, 0xdf, 0x10, 0x71, 0xbe, + 0xf2, 0x3d, 0x6a, 0xa5, 0xe9, 0x26, 0x2b, 0xe4, 0xa8, 0x67, + 0x30, 0xff, 0xb3, 0x7c, 0x1d, 0xd2, 0x9e, 0x51, 0x06, 0xc9, + 0x85, 0x4a, 0x9f, 0x50, 0x1c, 0xd3, 0x84, 0x4b, 0x07, 0xc8, + 0xa9, 0x66, 0x2a, 0xe5, 0xb2, 0x7d, 0x31, 0xfe, 0xf3, 0x3c, + 0x70, 0xbf, 0xe8, 0x27, 0x6b, 0xa4, 0xc5, 0x0a, 0x46, 0x89, + 0xde, 0x11, 0x5d, 0x92, 0xea, 0x25, 0x69, 0xa6, 0xf1, 0x3e, + 0x72, 0xbd, 0xdc, 0x13, 0x5f, 0x90, 0xc7, 0x08, 0x44, 0x8b, + 0x86, 0x49, 0x05, 0xca, 0x9d, 0x52, 0x1e, 0xd1, 0xb0, 0x7f, + 0x33, 0xfc, 0xab, 0x64, 0x28, 0xe7, 0x32, 0xfd, 0xb1, 0x7e, + 0x29, 0xe6, 0xaa, 0x65, 0x04, 0xcb, 0x87, 0x48, 0x1f, 0xd0, + 0x9c, 0x53, 0x5e, 0x91, 0xdd, 0x12, 0x45, 0x8a, 0xc6, 0x09, + 0x68, 0xa7, 0xeb, 0x24, 0x73, 0xbc, 0xf0, 0x3f, 0x00, 0xd0, + 0xbd, 0x6d, 0x67, 0xb7, 0xda, 0x0a, 0xce, 0x1e, 0x73, 0xa3, + 0xa9, 0x79, 0x14, 0xc4, 0x81, 0x51, 0x3c, 0xec, 0xe6, 0x36, + 0x5b, 0x8b, 0x4f, 0x9f, 0xf2, 0x22, 0x28, 0xf8, 0x95, 0x45, + 0x1f, 0xcf, 0xa2, 0x72, 0x78, 0xa8, 0xc5, 0x15, 0xd1, 0x01, + 0x6c, 0xbc, 0xb6, 0x66, 0x0b, 0xdb, 0x9e, 0x4e, 0x23, 0xf3, + 0xf9, 0x29, 0x44, 0x94, 0x50, 0x80, 0xed, 0x3d, 0x37, 0xe7, + 0x8a, 0x5a, 0x3e, 0xee, 0x83, 0x53, 0x59, 0x89, 0xe4, 0x34, + 0xf0, 0x20, 0x4d, 0x9d, 0x97, 0x47, 0x2a, 0xfa, 0xbf, 0x6f, + 0x02, 0xd2, 0xd8, 0x08, 0x65, 0xb5, 0x71, 0xa1, 0xcc, 0x1c, + 0x16, 0xc6, 0xab, 0x7b, 0x21, 0xf1, 0x9c, 0x4c, 0x46, 0x96, + 0xfb, 0x2b, 0xef, 0x3f, 0x52, 0x82, 0x88, 0x58, 0x35, 0xe5, + 0xa0, 0x70, 0x1d, 0xcd, 0xc7, 0x17, 0x7a, 0xaa, 0x6e, 0xbe, + 0xd3, 0x03, 0x09, 0xd9, 0xb4, 0x64, 0x7c, 0xac, 0xc1, 0x11, + 0x1b, 0xcb, 0xa6, 0x76, 0xb2, 0x62, 0x0f, 0xdf, 0xd5, 0x05, + 0x68, 0xb8, 0xfd, 0x2d, 0x40, 0x90, 0x9a, 0x4a, 0x27, 0xf7, + 0x33, 0xe3, 0x8e, 0x5e, 0x54, 0x84, 0xe9, 0x39, 0x63, 0xb3, + 0xde, 0x0e, 0x04, 0xd4, 0xb9, 0x69, 0xad, 0x7d, 0x10, 0xc0, + 0xca, 0x1a, 0x77, 0xa7, 0xe2, 0x32, 0x5f, 0x8f, 0x85, 0x55, + 0x38, 0xe8, 0x2c, 0xfc, 0x91, 0x41, 0x4b, 0x9b, 0xf6, 0x26, + 0x42, 0x92, 0xff, 0x2f, 0x25, 0xf5, 0x98, 0x48, 0x8c, 0x5c, + 0x31, 0xe1, 0xeb, 0x3b, 0x56, 0x86, 0xc3, 0x13, 0x7e, 0xae, + 0xa4, 0x74, 0x19, 0xc9, 0x0d, 0xdd, 0xb0, 0x60, 0x6a, 0xba, + 0xd7, 0x07, 0x5d, 0x8d, 0xe0, 0x30, 0x3a, 0xea, 0x87, 0x57, + 0x93, 0x43, 0x2e, 0xfe, 0xf4, 0x24, 0x49, 0x99, 0xdc, 0x0c, + 0x61, 0xb1, 0xbb, 0x6b, 0x06, 0xd6, 0x12, 0xc2, 0xaf, 0x7f, + 0x75, 0xa5, 0xc8, 0x18, 0x00, 0xd1, 0xbf, 0x6e, 0x63, 0xb2, + 0xdc, 0x0d, 0xc6, 0x17, 0x79, 0xa8, 0xa5, 0x74, 0x1a, 0xcb, + 0x91, 0x40, 0x2e, 0xff, 0xf2, 0x23, 0x4d, 0x9c, 0x57, 0x86, + 0xe8, 0x39, 0x34, 0xe5, 0x8b, 0x5a, 0x3f, 0xee, 0x80, 0x51, + 0x5c, 0x8d, 0xe3, 0x32, 0xf9, 0x28, 0x46, 0x97, 0x9a, 0x4b, + 0x25, 0xf4, 0xae, 0x7f, 0x11, 0xc0, 0xcd, 0x1c, 0x72, 0xa3, + 0x68, 0xb9, 0xd7, 0x06, 0x0b, 0xda, 0xb4, 0x65, 0x7e, 0xaf, + 0xc1, 0x10, 0x1d, 0xcc, 0xa2, 0x73, 0xb8, 0x69, 0x07, 0xd6, + 0xdb, 0x0a, 0x64, 0xb5, 0xef, 0x3e, 0x50, 0x81, 0x8c, 0x5d, + 0x33, 0xe2, 0x29, 0xf8, 0x96, 0x47, 0x4a, 0x9b, 0xf5, 0x24, + 0x41, 0x90, 0xfe, 0x2f, 0x22, 0xf3, 0x9d, 0x4c, 0x87, 0x56, + 0x38, 0xe9, 0xe4, 0x35, 0x5b, 0x8a, 0xd0, 0x01, 0x6f, 0xbe, + 0xb3, 0x62, 0x0c, 0xdd, 0x16, 0xc7, 0xa9, 0x78, 0x75, 0xa4, + 0xca, 0x1b, 0xfc, 0x2d, 0x43, 0x92, 0x9f, 0x4e, 0x20, 0xf1, + 0x3a, 0xeb, 0x85, 0x54, 0x59, 0x88, 0xe6, 0x37, 0x6d, 0xbc, + 0xd2, 0x03, 0x0e, 0xdf, 0xb1, 0x60, 0xab, 0x7a, 0x14, 0xc5, + 0xc8, 0x19, 0x77, 0xa6, 0xc3, 0x12, 0x7c, 0xad, 0xa0, 0x71, + 0x1f, 0xce, 0x05, 0xd4, 0xba, 0x6b, 0x66, 0xb7, 0xd9, 0x08, + 0x52, 0x83, 0xed, 0x3c, 0x31, 0xe0, 0x8e, 0x5f, 0x94, 0x45, + 0x2b, 0xfa, 0xf7, 0x26, 0x48, 0x99, 0x82, 0x53, 0x3d, 0xec, + 0xe1, 0x30, 0x5e, 0x8f, 0x44, 0x95, 0xfb, 0x2a, 0x27, 0xf6, + 0x98, 0x49, 0x13, 0xc2, 0xac, 0x7d, 0x70, 0xa1, 0xcf, 0x1e, + 0xd5, 0x04, 0x6a, 0xbb, 0xb6, 0x67, 0x09, 0xd8, 0xbd, 0x6c, + 0x02, 0xd3, 0xde, 0x0f, 0x61, 0xb0, 0x7b, 0xaa, 0xc4, 0x15, + 0x18, 0xc9, 0xa7, 0x76, 0x2c, 0xfd, 0x93, 0x42, 0x4f, 0x9e, + 0xf0, 0x21, 0xea, 0x3b, 0x55, 0x84, 0x89, 0x58, 0x36, 0xe7, + 0x00, 0xd2, 0xb9, 0x6b, 0x6f, 0xbd, 0xd6, 0x04, 0xde, 0x0c, + 0x67, 0xb5, 0xb1, 0x63, 0x08, 0xda, 0xa1, 0x73, 0x18, 0xca, + 0xce, 0x1c, 0x77, 0xa5, 0x7f, 0xad, 0xc6, 0x14, 0x10, 0xc2, + 0xa9, 0x7b, 0x5f, 0x8d, 0xe6, 0x34, 0x30, 0xe2, 0x89, 0x5b, + 0x81, 0x53, 0x38, 0xea, 0xee, 0x3c, 0x57, 0x85, 0xfe, 0x2c, + 0x47, 0x95, 0x91, 0x43, 0x28, 0xfa, 0x20, 0xf2, 0x99, 0x4b, + 0x4f, 0x9d, 0xf6, 0x24, 0xbe, 0x6c, 0x07, 0xd5, 0xd1, 0x03, + 0x68, 0xba, 0x60, 0xb2, 0xd9, 0x0b, 0x0f, 0xdd, 0xb6, 0x64, + 0x1f, 0xcd, 0xa6, 0x74, 0x70, 0xa2, 0xc9, 0x1b, 0xc1, 0x13, + 0x78, 0xaa, 0xae, 0x7c, 0x17, 0xc5, 0xe1, 0x33, 0x58, 0x8a, + 0x8e, 0x5c, 0x37, 0xe5, 0x3f, 0xed, 0x86, 0x54, 0x50, 0x82, + 0xe9, 0x3b, 0x40, 0x92, 0xf9, 0x2b, 0x2f, 0xfd, 0x96, 0x44, + 0x9e, 0x4c, 0x27, 0xf5, 0xf1, 0x23, 0x48, 0x9a, 0x61, 0xb3, + 0xd8, 0x0a, 0x0e, 0xdc, 0xb7, 0x65, 0xbf, 0x6d, 0x06, 0xd4, + 0xd0, 0x02, 0x69, 0xbb, 0xc0, 0x12, 0x79, 0xab, 0xaf, 0x7d, + 0x16, 0xc4, 0x1e, 0xcc, 0xa7, 0x75, 0x71, 0xa3, 0xc8, 0x1a, + 0x3e, 0xec, 0x87, 0x55, 0x51, 0x83, 0xe8, 0x3a, 0xe0, 0x32, + 0x59, 0x8b, 0x8f, 0x5d, 0x36, 0xe4, 0x9f, 0x4d, 0x26, 0xf4, + 0xf0, 0x22, 0x49, 0x9b, 0x41, 0x93, 0xf8, 0x2a, 0x2e, 0xfc, + 0x97, 0x45, 0xdf, 0x0d, 0x66, 0xb4, 0xb0, 0x62, 0x09, 0xdb, + 0x01, 0xd3, 0xb8, 0x6a, 0x6e, 0xbc, 0xd7, 0x05, 0x7e, 0xac, + 0xc7, 0x15, 0x11, 0xc3, 0xa8, 0x7a, 0xa0, 0x72, 0x19, 0xcb, + 0xcf, 0x1d, 0x76, 0xa4, 0x80, 0x52, 0x39, 0xeb, 0xef, 0x3d, + 0x56, 0x84, 0x5e, 0x8c, 0xe7, 0x35, 0x31, 0xe3, 0x88, 0x5a, + 0x21, 0xf3, 0x98, 0x4a, 0x4e, 0x9c, 0xf7, 0x25, 0xff, 0x2d, + 0x46, 0x94, 0x90, 0x42, 0x29, 0xfb, 0x00, 0xd3, 0xbb, 0x68, + 0x6b, 0xb8, 0xd0, 0x03, 0xd6, 0x05, 0x6d, 0xbe, 0xbd, 0x6e, + 0x06, 0xd5, 0xb1, 0x62, 0x0a, 0xd9, 0xda, 0x09, 0x61, 0xb2, + 0x67, 0xb4, 0xdc, 0x0f, 0x0c, 0xdf, 0xb7, 0x64, 0x7f, 0xac, + 0xc4, 0x17, 0x14, 0xc7, 0xaf, 0x7c, 0xa9, 0x7a, 0x12, 0xc1, + 0xc2, 0x11, 0x79, 0xaa, 0xce, 0x1d, 0x75, 0xa6, 0xa5, 0x76, + 0x1e, 0xcd, 0x18, 0xcb, 0xa3, 0x70, 0x73, 0xa0, 0xc8, 0x1b, + 0xfe, 0x2d, 0x45, 0x96, 0x95, 0x46, 0x2e, 0xfd, 0x28, 0xfb, + 0x93, 0x40, 0x43, 0x90, 0xf8, 0x2b, 0x4f, 0x9c, 0xf4, 0x27, + 0x24, 0xf7, 0x9f, 0x4c, 0x99, 0x4a, 0x22, 0xf1, 0xf2, 0x21, + 0x49, 0x9a, 0x81, 0x52, 0x3a, 0xe9, 0xea, 0x39, 0x51, 0x82, + 0x57, 0x84, 0xec, 0x3f, 0x3c, 0xef, 0x87, 0x54, 0x30, 0xe3, + 0x8b, 0x58, 0x5b, 0x88, 0xe0, 0x33, 0xe6, 0x35, 0x5d, 0x8e, + 0x8d, 0x5e, 0x36, 0xe5, 0xe1, 0x32, 0x5a, 0x89, 0x8a, 0x59, + 0x31, 0xe2, 0x37, 0xe4, 0x8c, 0x5f, 0x5c, 0x8f, 0xe7, 0x34, + 0x50, 0x83, 0xeb, 0x38, 0x3b, 0xe8, 0x80, 0x53, 0x86, 0x55, + 0x3d, 0xee, 0xed, 0x3e, 0x56, 0x85, 0x9e, 0x4d, 0x25, 0xf6, + 0xf5, 0x26, 0x4e, 0x9d, 0x48, 0x9b, 0xf3, 0x20, 0x23, 0xf0, + 0x98, 0x4b, 0x2f, 0xfc, 0x94, 0x47, 0x44, 0x97, 0xff, 0x2c, + 0xf9, 0x2a, 0x42, 0x91, 0x92, 0x41, 0x29, 0xfa, 0x1f, 0xcc, + 0xa4, 0x77, 0x74, 0xa7, 0xcf, 0x1c, 0xc9, 0x1a, 0x72, 0xa1, + 0xa2, 0x71, 0x19, 0xca, 0xae, 0x7d, 0x15, 0xc6, 0xc5, 0x16, + 0x7e, 0xad, 0x78, 0xab, 0xc3, 0x10, 0x13, 0xc0, 0xa8, 0x7b, + 0x60, 0xb3, 0xdb, 0x08, 0x0b, 0xd8, 0xb0, 0x63, 0xb6, 0x65, + 0x0d, 0xde, 0xdd, 0x0e, 0x66, 0xb5, 0xd1, 0x02, 0x6a, 0xb9, + 0xba, 0x69, 0x01, 0xd2, 0x07, 0xd4, 0xbc, 0x6f, 0x6c, 0xbf, + 0xd7, 0x04, 0x00, 0xd4, 0xb5, 0x61, 0x77, 0xa3, 0xc2, 0x16, + 0xee, 0x3a, 0x5b, 0x8f, 0x99, 0x4d, 0x2c, 0xf8, 0xc1, 0x15, + 0x74, 0xa0, 0xb6, 0x62, 0x03, 0xd7, 0x2f, 0xfb, 0x9a, 0x4e, + 0x58, 0x8c, 0xed, 0x39, 0x9f, 0x4b, 0x2a, 0xfe, 0xe8, 0x3c, + 0x5d, 0x89, 0x71, 0xa5, 0xc4, 0x10, 0x06, 0xd2, 0xb3, 0x67, + 0x5e, 0x8a, 0xeb, 0x3f, 0x29, 0xfd, 0x9c, 0x48, 0xb0, 0x64, + 0x05, 0xd1, 0xc7, 0x13, 0x72, 0xa6, 0x23, 0xf7, 0x96, 0x42, + 0x54, 0x80, 0xe1, 0x35, 0xcd, 0x19, 0x78, 0xac, 0xba, 0x6e, + 0x0f, 0xdb, 0xe2, 0x36, 0x57, 0x83, 0x95, 0x41, 0x20, 0xf4, + 0x0c, 0xd8, 0xb9, 0x6d, 0x7b, 0xaf, 0xce, 0x1a, 0xbc, 0x68, + 0x09, 0xdd, 0xcb, 0x1f, 0x7e, 0xaa, 0x52, 0x86, 0xe7, 0x33, + 0x25, 0xf1, 0x90, 0x44, 0x7d, 0xa9, 0xc8, 0x1c, 0x0a, 0xde, + 0xbf, 0x6b, 0x93, 0x47, 0x26, 0xf2, 0xe4, 0x30, 0x51, 0x85, + 0x46, 0x92, 0xf3, 0x27, 0x31, 0xe5, 0x84, 0x50, 0xa8, 0x7c, + 0x1d, 0xc9, 0xdf, 0x0b, 0x6a, 0xbe, 0x87, 0x53, 0x32, 0xe6, + 0xf0, 0x24, 0x45, 0x91, 0x69, 0xbd, 0xdc, 0x08, 0x1e, 0xca, + 0xab, 0x7f, 0xd9, 0x0d, 0x6c, 0xb8, 0xae, 0x7a, 0x1b, 0xcf, + 0x37, 0xe3, 0x82, 0x56, 0x40, 0x94, 0xf5, 0x21, 0x18, 0xcc, + 0xad, 0x79, 0x6f, 0xbb, 0xda, 0x0e, 0xf6, 0x22, 0x43, 0x97, + 0x81, 0x55, 0x34, 0xe0, 0x65, 0xb1, 0xd0, 0x04, 0x12, 0xc6, + 0xa7, 0x73, 0x8b, 0x5f, 0x3e, 0xea, 0xfc, 0x28, 0x49, 0x9d, + 0xa4, 0x70, 0x11, 0xc5, 0xd3, 0x07, 0x66, 0xb2, 0x4a, 0x9e, + 0xff, 0x2b, 0x3d, 0xe9, 0x88, 0x5c, 0xfa, 0x2e, 0x4f, 0x9b, + 0x8d, 0x59, 0x38, 0xec, 0x14, 0xc0, 0xa1, 0x75, 0x63, 0xb7, + 0xd6, 0x02, 0x3b, 0xef, 0x8e, 0x5a, 0x4c, 0x98, 0xf9, 0x2d, + 0xd5, 0x01, 0x60, 0xb4, 0xa2, 0x76, 0x17, 0xc3, 0x00, 0xd5, + 0xb7, 0x62, 0x73, 0xa6, 0xc4, 0x11, 0xe6, 0x33, 0x51, 0x84, + 0x95, 0x40, 0x22, 0xf7, 0xd1, 0x04, 0x66, 0xb3, 0xa2, 0x77, + 0x15, 0xc0, 0x37, 0xe2, 0x80, 0x55, 0x44, 0x91, 0xf3, 0x26, + 0xbf, 0x6a, 0x08, 0xdd, 0xcc, 0x19, 0x7b, 0xae, 0x59, 0x8c, + 0xee, 0x3b, 0x2a, 0xff, 0x9d, 0x48, 0x6e, 0xbb, 0xd9, 0x0c, + 0x1d, 0xc8, 0xaa, 0x7f, 0x88, 0x5d, 0x3f, 0xea, 0xfb, 0x2e, + 0x4c, 0x99, 0x63, 0xb6, 0xd4, 0x01, 0x10, 0xc5, 0xa7, 0x72, + 0x85, 0x50, 0x32, 0xe7, 0xf6, 0x23, 0x41, 0x94, 0xb2, 0x67, + 0x05, 0xd0, 0xc1, 0x14, 0x76, 0xa3, 0x54, 0x81, 0xe3, 0x36, + 0x27, 0xf2, 0x90, 0x45, 0xdc, 0x09, 0x6b, 0xbe, 0xaf, 0x7a, + 0x18, 0xcd, 0x3a, 0xef, 0x8d, 0x58, 0x49, 0x9c, 0xfe, 0x2b, + 0x0d, 0xd8, 0xba, 0x6f, 0x7e, 0xab, 0xc9, 0x1c, 0xeb, 0x3e, + 0x5c, 0x89, 0x98, 0x4d, 0x2f, 0xfa, 0xc6, 0x13, 0x71, 0xa4, + 0xb5, 0x60, 0x02, 0xd7, 0x20, 0xf5, 0x97, 0x42, 0x53, 0x86, + 0xe4, 0x31, 0x17, 0xc2, 0xa0, 0x75, 0x64, 0xb1, 0xd3, 0x06, + 0xf1, 0x24, 0x46, 0x93, 0x82, 0x57, 0x35, 0xe0, 0x79, 0xac, + 0xce, 0x1b, 0x0a, 0xdf, 0xbd, 0x68, 0x9f, 0x4a, 0x28, 0xfd, + 0xec, 0x39, 0x5b, 0x8e, 0xa8, 0x7d, 0x1f, 0xca, 0xdb, 0x0e, + 0x6c, 0xb9, 0x4e, 0x9b, 0xf9, 0x2c, 0x3d, 0xe8, 0x8a, 0x5f, + 0xa5, 0x70, 0x12, 0xc7, 0xd6, 0x03, 0x61, 0xb4, 0x43, 0x96, + 0xf4, 0x21, 0x30, 0xe5, 0x87, 0x52, 0x74, 0xa1, 0xc3, 0x16, + 0x07, 0xd2, 0xb0, 0x65, 0x92, 0x47, 0x25, 0xf0, 0xe1, 0x34, + 0x56, 0x83, 0x1a, 0xcf, 0xad, 0x78, 0x69, 0xbc, 0xde, 0x0b, + 0xfc, 0x29, 0x4b, 0x9e, 0x8f, 0x5a, 0x38, 0xed, 0xcb, 0x1e, + 0x7c, 0xa9, 0xb8, 0x6d, 0x0f, 0xda, 0x2d, 0xf8, 0x9a, 0x4f, + 0x5e, 0x8b, 0xe9, 0x3c, 0x00, 0xd6, 0xb1, 0x67, 0x7f, 0xa9, + 0xce, 0x18, 0xfe, 0x28, 0x4f, 0x99, 0x81, 0x57, 0x30, 0xe6, + 0xe1, 0x37, 0x50, 0x86, 0x9e, 0x48, 0x2f, 0xf9, 0x1f, 0xc9, + 0xae, 0x78, 0x60, 0xb6, 0xd1, 0x07, 0xdf, 0x09, 0x6e, 0xb8, + 0xa0, 0x76, 0x11, 0xc7, 0x21, 0xf7, 0x90, 0x46, 0x5e, 0x88, + 0xef, 0x39, 0x3e, 0xe8, 0x8f, 0x59, 0x41, 0x97, 0xf0, 0x26, + 0xc0, 0x16, 0x71, 0xa7, 0xbf, 0x69, 0x0e, 0xd8, 0xa3, 0x75, + 0x12, 0xc4, 0xdc, 0x0a, 0x6d, 0xbb, 0x5d, 0x8b, 0xec, 0x3a, + 0x22, 0xf4, 0x93, 0x45, 0x42, 0x94, 0xf3, 0x25, 0x3d, 0xeb, + 0x8c, 0x5a, 0xbc, 0x6a, 0x0d, 0xdb, 0xc3, 0x15, 0x72, 0xa4, + 0x7c, 0xaa, 0xcd, 0x1b, 0x03, 0xd5, 0xb2, 0x64, 0x82, 0x54, + 0x33, 0xe5, 0xfd, 0x2b, 0x4c, 0x9a, 0x9d, 0x4b, 0x2c, 0xfa, + 0xe2, 0x34, 0x53, 0x85, 0x63, 0xb5, 0xd2, 0x04, 0x1c, 0xca, + 0xad, 0x7b, 0x5b, 0x8d, 0xea, 0x3c, 0x24, 0xf2, 0x95, 0x43, + 0xa5, 0x73, 0x14, 0xc2, 0xda, 0x0c, 0x6b, 0xbd, 0xba, 0x6c, + 0x0b, 0xdd, 0xc5, 0x13, 0x74, 0xa2, 0x44, 0x92, 0xf5, 0x23, + 0x3b, 0xed, 0x8a, 0x5c, 0x84, 0x52, 0x35, 0xe3, 0xfb, 0x2d, + 0x4a, 0x9c, 0x7a, 0xac, 0xcb, 0x1d, 0x05, 0xd3, 0xb4, 0x62, + 0x65, 0xb3, 0xd4, 0x02, 0x1a, 0xcc, 0xab, 0x7d, 0x9b, 0x4d, + 0x2a, 0xfc, 0xe4, 0x32, 0x55, 0x83, 0xf8, 0x2e, 0x49, 0x9f, + 0x87, 0x51, 0x36, 0xe0, 0x06, 0xd0, 0xb7, 0x61, 0x79, 0xaf, + 0xc8, 0x1e, 0x19, 0xcf, 0xa8, 0x7e, 0x66, 0xb0, 0xd7, 0x01, + 0xe7, 0x31, 0x56, 0x80, 0x98, 0x4e, 0x29, 0xff, 0x27, 0xf1, + 0x96, 0x40, 0x58, 0x8e, 0xe9, 0x3f, 0xd9, 0x0f, 0x68, 0xbe, + 0xa6, 0x70, 0x17, 0xc1, 0xc6, 0x10, 0x77, 0xa1, 0xb9, 0x6f, + 0x08, 0xde, 0x38, 0xee, 0x89, 0x5f, 0x47, 0x91, 0xf6, 0x20, + 0x00, 0xd7, 0xb3, 0x64, 0x7b, 0xac, 0xc8, 0x1f, 0xf6, 0x21, + 0x45, 0x92, 0x8d, 0x5a, 0x3e, 0xe9, 0xf1, 0x26, 0x42, 0x95, + 0x8a, 0x5d, 0x39, 0xee, 0x07, 0xd0, 0xb4, 0x63, 0x7c, 0xab, + 0xcf, 0x18, 0xff, 0x28, 0x4c, 0x9b, 0x84, 0x53, 0x37, 0xe0, + 0x09, 0xde, 0xba, 0x6d, 0x72, 0xa5, 0xc1, 0x16, 0x0e, 0xd9, + 0xbd, 0x6a, 0x75, 0xa2, 0xc6, 0x11, 0xf8, 0x2f, 0x4b, 0x9c, + 0x83, 0x54, 0x30, 0xe7, 0xe3, 0x34, 0x50, 0x87, 0x98, 0x4f, + 0x2b, 0xfc, 0x15, 0xc2, 0xa6, 0x71, 0x6e, 0xb9, 0xdd, 0x0a, + 0x12, 0xc5, 0xa1, 0x76, 0x69, 0xbe, 0xda, 0x0d, 0xe4, 0x33, + 0x57, 0x80, 0x9f, 0x48, 0x2c, 0xfb, 0x1c, 0xcb, 0xaf, 0x78, + 0x67, 0xb0, 0xd4, 0x03, 0xea, 0x3d, 0x59, 0x8e, 0x91, 0x46, + 0x22, 0xf5, 0xed, 0x3a, 0x5e, 0x89, 0x96, 0x41, 0x25, 0xf2, + 0x1b, 0xcc, 0xa8, 0x7f, 0x60, 0xb7, 0xd3, 0x04, 0xdb, 0x0c, + 0x68, 0xbf, 0xa0, 0x77, 0x13, 0xc4, 0x2d, 0xfa, 0x9e, 0x49, + 0x56, 0x81, 0xe5, 0x32, 0x2a, 0xfd, 0x99, 0x4e, 0x51, 0x86, + 0xe2, 0x35, 0xdc, 0x0b, 0x6f, 0xb8, 0xa7, 0x70, 0x14, 0xc3, + 0x24, 0xf3, 0x97, 0x40, 0x5f, 0x88, 0xec, 0x3b, 0xd2, 0x05, + 0x61, 0xb6, 0xa9, 0x7e, 0x1a, 0xcd, 0xd5, 0x02, 0x66, 0xb1, + 0xae, 0x79, 0x1d, 0xca, 0x23, 0xf4, 0x90, 0x47, 0x58, 0x8f, + 0xeb, 0x3c, 0x38, 0xef, 0x8b, 0x5c, 0x43, 0x94, 0xf0, 0x27, + 0xce, 0x19, 0x7d, 0xaa, 0xb5, 0x62, 0x06, 0xd1, 0xc9, 0x1e, + 0x7a, 0xad, 0xb2, 0x65, 0x01, 0xd6, 0x3f, 0xe8, 0x8c, 0x5b, + 0x44, 0x93, 0xf7, 0x20, 0xc7, 0x10, 0x74, 0xa3, 0xbc, 0x6b, + 0x0f, 0xd8, 0x31, 0xe6, 0x82, 0x55, 0x4a, 0x9d, 0xf9, 0x2e, + 0x36, 0xe1, 0x85, 0x52, 0x4d, 0x9a, 0xfe, 0x29, 0xc0, 0x17, + 0x73, 0xa4, 0xbb, 0x6c, 0x08, 0xdf, 0x00, 0xd8, 0xad, 0x75, + 0x47, 0x9f, 0xea, 0x32, 0x8e, 0x56, 0x23, 0xfb, 0xc9, 0x11, + 0x64, 0xbc, 0x01, 0xd9, 0xac, 0x74, 0x46, 0x9e, 0xeb, 0x33, + 0x8f, 0x57, 0x22, 0xfa, 0xc8, 0x10, 0x65, 0xbd, 0x02, 0xda, + 0xaf, 0x77, 0x45, 0x9d, 0xe8, 0x30, 0x8c, 0x54, 0x21, 0xf9, + 0xcb, 0x13, 0x66, 0xbe, 0x03, 0xdb, 0xae, 0x76, 0x44, 0x9c, + 0xe9, 0x31, 0x8d, 0x55, 0x20, 0xf8, 0xca, 0x12, 0x67, 0xbf, + 0x04, 0xdc, 0xa9, 0x71, 0x43, 0x9b, 0xee, 0x36, 0x8a, 0x52, + 0x27, 0xff, 0xcd, 0x15, 0x60, 0xb8, 0x05, 0xdd, 0xa8, 0x70, + 0x42, 0x9a, 0xef, 0x37, 0x8b, 0x53, 0x26, 0xfe, 0xcc, 0x14, + 0x61, 0xb9, 0x06, 0xde, 0xab, 0x73, 0x41, 0x99, 0xec, 0x34, + 0x88, 0x50, 0x25, 0xfd, 0xcf, 0x17, 0x62, 0xba, 0x07, 0xdf, + 0xaa, 0x72, 0x40, 0x98, 0xed, 0x35, 0x89, 0x51, 0x24, 0xfc, + 0xce, 0x16, 0x63, 0xbb, 0x08, 0xd0, 0xa5, 0x7d, 0x4f, 0x97, + 0xe2, 0x3a, 0x86, 0x5e, 0x2b, 0xf3, 0xc1, 0x19, 0x6c, 0xb4, + 0x09, 0xd1, 0xa4, 0x7c, 0x4e, 0x96, 0xe3, 0x3b, 0x87, 0x5f, + 0x2a, 0xf2, 0xc0, 0x18, 0x6d, 0xb5, 0x0a, 0xd2, 0xa7, 0x7f, + 0x4d, 0x95, 0xe0, 0x38, 0x84, 0x5c, 0x29, 0xf1, 0xc3, 0x1b, + 0x6e, 0xb6, 0x0b, 0xd3, 0xa6, 0x7e, 0x4c, 0x94, 0xe1, 0x39, + 0x85, 0x5d, 0x28, 0xf0, 0xc2, 0x1a, 0x6f, 0xb7, 0x0c, 0xd4, + 0xa1, 0x79, 0x4b, 0x93, 0xe6, 0x3e, 0x82, 0x5a, 0x2f, 0xf7, + 0xc5, 0x1d, 0x68, 0xb0, 0x0d, 0xd5, 0xa0, 0x78, 0x4a, 0x92, + 0xe7, 0x3f, 0x83, 0x5b, 0x2e, 0xf6, 0xc4, 0x1c, 0x69, 0xb1, + 0x0e, 0xd6, 0xa3, 0x7b, 0x49, 0x91, 0xe4, 0x3c, 0x80, 0x58, + 0x2d, 0xf5, 0xc7, 0x1f, 0x6a, 0xb2, 0x0f, 0xd7, 0xa2, 0x7a, + 0x48, 0x90, 0xe5, 0x3d, 0x81, 0x59, 0x2c, 0xf4, 0xc6, 0x1e, + 0x6b, 0xb3, 0x00, 0xd9, 0xaf, 0x76, 0x43, 0x9a, 0xec, 0x35, + 0x86, 0x5f, 0x29, 0xf0, 0xc5, 0x1c, 0x6a, 0xb3, 0x11, 0xc8, + 0xbe, 0x67, 0x52, 0x8b, 0xfd, 0x24, 0x97, 0x4e, 0x38, 0xe1, + 0xd4, 0x0d, 0x7b, 0xa2, 0x22, 0xfb, 0x8d, 0x54, 0x61, 0xb8, + 0xce, 0x17, 0xa4, 0x7d, 0x0b, 0xd2, 0xe7, 0x3e, 0x48, 0x91, + 0x33, 0xea, 0x9c, 0x45, 0x70, 0xa9, 0xdf, 0x06, 0xb5, 0x6c, + 0x1a, 0xc3, 0xf6, 0x2f, 0x59, 0x80, 0x44, 0x9d, 0xeb, 0x32, + 0x07, 0xde, 0xa8, 0x71, 0xc2, 0x1b, 0x6d, 0xb4, 0x81, 0x58, + 0x2e, 0xf7, 0x55, 0x8c, 0xfa, 0x23, 0x16, 0xcf, 0xb9, 0x60, + 0xd3, 0x0a, 0x7c, 0xa5, 0x90, 0x49, 0x3f, 0xe6, 0x66, 0xbf, + 0xc9, 0x10, 0x25, 0xfc, 0x8a, 0x53, 0xe0, 0x39, 0x4f, 0x96, + 0xa3, 0x7a, 0x0c, 0xd5, 0x77, 0xae, 0xd8, 0x01, 0x34, 0xed, + 0x9b, 0x42, 0xf1, 0x28, 0x5e, 0x87, 0xb2, 0x6b, 0x1d, 0xc4, + 0x88, 0x51, 0x27, 0xfe, 0xcb, 0x12, 0x64, 0xbd, 0x0e, 0xd7, + 0xa1, 0x78, 0x4d, 0x94, 0xe2, 0x3b, 0x99, 0x40, 0x36, 0xef, + 0xda, 0x03, 0x75, 0xac, 0x1f, 0xc6, 0xb0, 0x69, 0x5c, 0x85, + 0xf3, 0x2a, 0xaa, 0x73, 0x05, 0xdc, 0xe9, 0x30, 0x46, 0x9f, + 0x2c, 0xf5, 0x83, 0x5a, 0x6f, 0xb6, 0xc0, 0x19, 0xbb, 0x62, + 0x14, 0xcd, 0xf8, 0x21, 0x57, 0x8e, 0x3d, 0xe4, 0x92, 0x4b, + 0x7e, 0xa7, 0xd1, 0x08, 0xcc, 0x15, 0x63, 0xba, 0x8f, 0x56, + 0x20, 0xf9, 0x4a, 0x93, 0xe5, 0x3c, 0x09, 0xd0, 0xa6, 0x7f, + 0xdd, 0x04, 0x72, 0xab, 0x9e, 0x47, 0x31, 0xe8, 0x5b, 0x82, + 0xf4, 0x2d, 0x18, 0xc1, 0xb7, 0x6e, 0xee, 0x37, 0x41, 0x98, + 0xad, 0x74, 0x02, 0xdb, 0x68, 0xb1, 0xc7, 0x1e, 0x2b, 0xf2, + 0x84, 0x5d, 0xff, 0x26, 0x50, 0x89, 0xbc, 0x65, 0x13, 0xca, + 0x79, 0xa0, 0xd6, 0x0f, 0x3a, 0xe3, 0x95, 0x4c, 0x00, 0xda, + 0xa9, 0x73, 0x4f, 0x95, 0xe6, 0x3c, 0x9e, 0x44, 0x37, 0xed, + 0xd1, 0x0b, 0x78, 0xa2, 0x21, 0xfb, 0x88, 0x52, 0x6e, 0xb4, + 0xc7, 0x1d, 0xbf, 0x65, 0x16, 0xcc, 0xf0, 0x2a, 0x59, 0x83, + 0x42, 0x98, 0xeb, 0x31, 0x0d, 0xd7, 0xa4, 0x7e, 0xdc, 0x06, + 0x75, 0xaf, 0x93, 0x49, 0x3a, 0xe0, 0x63, 0xb9, 0xca, 0x10, + 0x2c, 0xf6, 0x85, 0x5f, 0xfd, 0x27, 0x54, 0x8e, 0xb2, 0x68, + 0x1b, 0xc1, 0x84, 0x5e, 0x2d, 0xf7, 0xcb, 0x11, 0x62, 0xb8, + 0x1a, 0xc0, 0xb3, 0x69, 0x55, 0x8f, 0xfc, 0x26, 0xa5, 0x7f, + 0x0c, 0xd6, 0xea, 0x30, 0x43, 0x99, 0x3b, 0xe1, 0x92, 0x48, + 0x74, 0xae, 0xdd, 0x07, 0xc6, 0x1c, 0x6f, 0xb5, 0x89, 0x53, + 0x20, 0xfa, 0x58, 0x82, 0xf1, 0x2b, 0x17, 0xcd, 0xbe, 0x64, + 0xe7, 0x3d, 0x4e, 0x94, 0xa8, 0x72, 0x01, 0xdb, 0x79, 0xa3, + 0xd0, 0x0a, 0x36, 0xec, 0x9f, 0x45, 0x15, 0xcf, 0xbc, 0x66, + 0x5a, 0x80, 0xf3, 0x29, 0x8b, 0x51, 0x22, 0xf8, 0xc4, 0x1e, + 0x6d, 0xb7, 0x34, 0xee, 0x9d, 0x47, 0x7b, 0xa1, 0xd2, 0x08, + 0xaa, 0x70, 0x03, 0xd9, 0xe5, 0x3f, 0x4c, 0x96, 0x57, 0x8d, + 0xfe, 0x24, 0x18, 0xc2, 0xb1, 0x6b, 0xc9, 0x13, 0x60, 0xba, + 0x86, 0x5c, 0x2f, 0xf5, 0x76, 0xac, 0xdf, 0x05, 0x39, 0xe3, + 0x90, 0x4a, 0xe8, 0x32, 0x41, 0x9b, 0xa7, 0x7d, 0x0e, 0xd4, + 0x91, 0x4b, 0x38, 0xe2, 0xde, 0x04, 0x77, 0xad, 0x0f, 0xd5, + 0xa6, 0x7c, 0x40, 0x9a, 0xe9, 0x33, 0xb0, 0x6a, 0x19, 0xc3, + 0xff, 0x25, 0x56, 0x8c, 0x2e, 0xf4, 0x87, 0x5d, 0x61, 0xbb, + 0xc8, 0x12, 0xd3, 0x09, 0x7a, 0xa0, 0x9c, 0x46, 0x35, 0xef, + 0x4d, 0x97, 0xe4, 0x3e, 0x02, 0xd8, 0xab, 0x71, 0xf2, 0x28, + 0x5b, 0x81, 0xbd, 0x67, 0x14, 0xce, 0x6c, 0xb6, 0xc5, 0x1f, + 0x23, 0xf9, 0x8a, 0x50, 0x00, 0xdb, 0xab, 0x70, 0x4b, 0x90, + 0xe0, 0x3b, 0x96, 0x4d, 0x3d, 0xe6, 0xdd, 0x06, 0x76, 0xad, + 0x31, 0xea, 0x9a, 0x41, 0x7a, 0xa1, 0xd1, 0x0a, 0xa7, 0x7c, + 0x0c, 0xd7, 0xec, 0x37, 0x47, 0x9c, 0x62, 0xb9, 0xc9, 0x12, + 0x29, 0xf2, 0x82, 0x59, 0xf4, 0x2f, 0x5f, 0x84, 0xbf, 0x64, + 0x14, 0xcf, 0x53, 0x88, 0xf8, 0x23, 0x18, 0xc3, 0xb3, 0x68, + 0xc5, 0x1e, 0x6e, 0xb5, 0x8e, 0x55, 0x25, 0xfe, 0xc4, 0x1f, + 0x6f, 0xb4, 0x8f, 0x54, 0x24, 0xff, 0x52, 0x89, 0xf9, 0x22, + 0x19, 0xc2, 0xb2, 0x69, 0xf5, 0x2e, 0x5e, 0x85, 0xbe, 0x65, + 0x15, 0xce, 0x63, 0xb8, 0xc8, 0x13, 0x28, 0xf3, 0x83, 0x58, + 0xa6, 0x7d, 0x0d, 0xd6, 0xed, 0x36, 0x46, 0x9d, 0x30, 0xeb, + 0x9b, 0x40, 0x7b, 0xa0, 0xd0, 0x0b, 0x97, 0x4c, 0x3c, 0xe7, + 0xdc, 0x07, 0x77, 0xac, 0x01, 0xda, 0xaa, 0x71, 0x4a, 0x91, + 0xe1, 0x3a, 0x95, 0x4e, 0x3e, 0xe5, 0xde, 0x05, 0x75, 0xae, + 0x03, 0xd8, 0xa8, 0x73, 0x48, 0x93, 0xe3, 0x38, 0xa4, 0x7f, + 0x0f, 0xd4, 0xef, 0x34, 0x44, 0x9f, 0x32, 0xe9, 0x99, 0x42, + 0x79, 0xa2, 0xd2, 0x09, 0xf7, 0x2c, 0x5c, 0x87, 0xbc, 0x67, + 0x17, 0xcc, 0x61, 0xba, 0xca, 0x11, 0x2a, 0xf1, 0x81, 0x5a, + 0xc6, 0x1d, 0x6d, 0xb6, 0x8d, 0x56, 0x26, 0xfd, 0x50, 0x8b, + 0xfb, 0x20, 0x1b, 0xc0, 0xb0, 0x6b, 0x51, 0x8a, 0xfa, 0x21, + 0x1a, 0xc1, 0xb1, 0x6a, 0xc7, 0x1c, 0x6c, 0xb7, 0x8c, 0x57, + 0x27, 0xfc, 0x60, 0xbb, 0xcb, 0x10, 0x2b, 0xf0, 0x80, 0x5b, + 0xf6, 0x2d, 0x5d, 0x86, 0xbd, 0x66, 0x16, 0xcd, 0x33, 0xe8, + 0x98, 0x43, 0x78, 0xa3, 0xd3, 0x08, 0xa5, 0x7e, 0x0e, 0xd5, + 0xee, 0x35, 0x45, 0x9e, 0x02, 0xd9, 0xa9, 0x72, 0x49, 0x92, + 0xe2, 0x39, 0x94, 0x4f, 0x3f, 0xe4, 0xdf, 0x04, 0x74, 0xaf, + 0x00, 0xdc, 0xa5, 0x79, 0x57, 0x8b, 0xf2, 0x2e, 0xae, 0x72, + 0x0b, 0xd7, 0xf9, 0x25, 0x5c, 0x80, 0x41, 0x9d, 0xe4, 0x38, + 0x16, 0xca, 0xb3, 0x6f, 0xef, 0x33, 0x4a, 0x96, 0xb8, 0x64, + 0x1d, 0xc1, 0x82, 0x5e, 0x27, 0xfb, 0xd5, 0x09, 0x70, 0xac, + 0x2c, 0xf0, 0x89, 0x55, 0x7b, 0xa7, 0xde, 0x02, 0xc3, 0x1f, + 0x66, 0xba, 0x94, 0x48, 0x31, 0xed, 0x6d, 0xb1, 0xc8, 0x14, + 0x3a, 0xe6, 0x9f, 0x43, 0x19, 0xc5, 0xbc, 0x60, 0x4e, 0x92, + 0xeb, 0x37, 0xb7, 0x6b, 0x12, 0xce, 0xe0, 0x3c, 0x45, 0x99, + 0x58, 0x84, 0xfd, 0x21, 0x0f, 0xd3, 0xaa, 0x76, 0xf6, 0x2a, + 0x53, 0x8f, 0xa1, 0x7d, 0x04, 0xd8, 0x9b, 0x47, 0x3e, 0xe2, + 0xcc, 0x10, 0x69, 0xb5, 0x35, 0xe9, 0x90, 0x4c, 0x62, 0xbe, + 0xc7, 0x1b, 0xda, 0x06, 0x7f, 0xa3, 0x8d, 0x51, 0x28, 0xf4, + 0x74, 0xa8, 0xd1, 0x0d, 0x23, 0xff, 0x86, 0x5a, 0x32, 0xee, + 0x97, 0x4b, 0x65, 0xb9, 0xc0, 0x1c, 0x9c, 0x40, 0x39, 0xe5, + 0xcb, 0x17, 0x6e, 0xb2, 0x73, 0xaf, 0xd6, 0x0a, 0x24, 0xf8, + 0x81, 0x5d, 0xdd, 0x01, 0x78, 0xa4, 0x8a, 0x56, 0x2f, 0xf3, + 0xb0, 0x6c, 0x15, 0xc9, 0xe7, 0x3b, 0x42, 0x9e, 0x1e, 0xc2, + 0xbb, 0x67, 0x49, 0x95, 0xec, 0x30, 0xf1, 0x2d, 0x54, 0x88, + 0xa6, 0x7a, 0x03, 0xdf, 0x5f, 0x83, 0xfa, 0x26, 0x08, 0xd4, + 0xad, 0x71, 0x2b, 0xf7, 0x8e, 0x52, 0x7c, 0xa0, 0xd9, 0x05, + 0x85, 0x59, 0x20, 0xfc, 0xd2, 0x0e, 0x77, 0xab, 0x6a, 0xb6, + 0xcf, 0x13, 0x3d, 0xe1, 0x98, 0x44, 0xc4, 0x18, 0x61, 0xbd, + 0x93, 0x4f, 0x36, 0xea, 0xa9, 0x75, 0x0c, 0xd0, 0xfe, 0x22, + 0x5b, 0x87, 0x07, 0xdb, 0xa2, 0x7e, 0x50, 0x8c, 0xf5, 0x29, + 0xe8, 0x34, 0x4d, 0x91, 0xbf, 0x63, 0x1a, 0xc6, 0x46, 0x9a, + 0xe3, 0x3f, 0x11, 0xcd, 0xb4, 0x68, 0x00, 0xdd, 0xa7, 0x7a, + 0x53, 0x8e, 0xf4, 0x29, 0xa6, 0x7b, 0x01, 0xdc, 0xf5, 0x28, + 0x52, 0x8f, 0x51, 0x8c, 0xf6, 0x2b, 0x02, 0xdf, 0xa5, 0x78, + 0xf7, 0x2a, 0x50, 0x8d, 0xa4, 0x79, 0x03, 0xde, 0xa2, 0x7f, + 0x05, 0xd8, 0xf1, 0x2c, 0x56, 0x8b, 0x04, 0xd9, 0xa3, 0x7e, + 0x57, 0x8a, 0xf0, 0x2d, 0xf3, 0x2e, 0x54, 0x89, 0xa0, 0x7d, + 0x07, 0xda, 0x55, 0x88, 0xf2, 0x2f, 0x06, 0xdb, 0xa1, 0x7c, + 0x59, 0x84, 0xfe, 0x23, 0x0a, 0xd7, 0xad, 0x70, 0xff, 0x22, + 0x58, 0x85, 0xac, 0x71, 0x0b, 0xd6, 0x08, 0xd5, 0xaf, 0x72, + 0x5b, 0x86, 0xfc, 0x21, 0xae, 0x73, 0x09, 0xd4, 0xfd, 0x20, + 0x5a, 0x87, 0xfb, 0x26, 0x5c, 0x81, 0xa8, 0x75, 0x0f, 0xd2, + 0x5d, 0x80, 0xfa, 0x27, 0x0e, 0xd3, 0xa9, 0x74, 0xaa, 0x77, + 0x0d, 0xd0, 0xf9, 0x24, 0x5e, 0x83, 0x0c, 0xd1, 0xab, 0x76, + 0x5f, 0x82, 0xf8, 0x25, 0xb2, 0x6f, 0x15, 0xc8, 0xe1, 0x3c, + 0x46, 0x9b, 0x14, 0xc9, 0xb3, 0x6e, 0x47, 0x9a, 0xe0, 0x3d, + 0xe3, 0x3e, 0x44, 0x99, 0xb0, 0x6d, 0x17, 0xca, 0x45, 0x98, + 0xe2, 0x3f, 0x16, 0xcb, 0xb1, 0x6c, 0x10, 0xcd, 0xb7, 0x6a, + 0x43, 0x9e, 0xe4, 0x39, 0xb6, 0x6b, 0x11, 0xcc, 0xe5, 0x38, + 0x42, 0x9f, 0x41, 0x9c, 0xe6, 0x3b, 0x12, 0xcf, 0xb5, 0x68, + 0xe7, 0x3a, 0x40, 0x9d, 0xb4, 0x69, 0x13, 0xce, 0xeb, 0x36, + 0x4c, 0x91, 0xb8, 0x65, 0x1f, 0xc2, 0x4d, 0x90, 0xea, 0x37, + 0x1e, 0xc3, 0xb9, 0x64, 0xba, 0x67, 0x1d, 0xc0, 0xe9, 0x34, + 0x4e, 0x93, 0x1c, 0xc1, 0xbb, 0x66, 0x4f, 0x92, 0xe8, 0x35, + 0x49, 0x94, 0xee, 0x33, 0x1a, 0xc7, 0xbd, 0x60, 0xef, 0x32, + 0x48, 0x95, 0xbc, 0x61, 0x1b, 0xc6, 0x18, 0xc5, 0xbf, 0x62, + 0x4b, 0x96, 0xec, 0x31, 0xbe, 0x63, 0x19, 0xc4, 0xed, 0x30, + 0x4a, 0x97, 0x00, 0xde, 0xa1, 0x7f, 0x5f, 0x81, 0xfe, 0x20, + 0xbe, 0x60, 0x1f, 0xc1, 0xe1, 0x3f, 0x40, 0x9e, 0x61, 0xbf, + 0xc0, 0x1e, 0x3e, 0xe0, 0x9f, 0x41, 0xdf, 0x01, 0x7e, 0xa0, + 0x80, 0x5e, 0x21, 0xff, 0xc2, 0x1c, 0x63, 0xbd, 0x9d, 0x43, + 0x3c, 0xe2, 0x7c, 0xa2, 0xdd, 0x03, 0x23, 0xfd, 0x82, 0x5c, + 0xa3, 0x7d, 0x02, 0xdc, 0xfc, 0x22, 0x5d, 0x83, 0x1d, 0xc3, + 0xbc, 0x62, 0x42, 0x9c, 0xe3, 0x3d, 0x99, 0x47, 0x38, 0xe6, + 0xc6, 0x18, 0x67, 0xb9, 0x27, 0xf9, 0x86, 0x58, 0x78, 0xa6, + 0xd9, 0x07, 0xf8, 0x26, 0x59, 0x87, 0xa7, 0x79, 0x06, 0xd8, + 0x46, 0x98, 0xe7, 0x39, 0x19, 0xc7, 0xb8, 0x66, 0x5b, 0x85, + 0xfa, 0x24, 0x04, 0xda, 0xa5, 0x7b, 0xe5, 0x3b, 0x44, 0x9a, + 0xba, 0x64, 0x1b, 0xc5, 0x3a, 0xe4, 0x9b, 0x45, 0x65, 0xbb, + 0xc4, 0x1a, 0x84, 0x5a, 0x25, 0xfb, 0xdb, 0x05, 0x7a, 0xa4, + 0x2f, 0xf1, 0x8e, 0x50, 0x70, 0xae, 0xd1, 0x0f, 0x91, 0x4f, + 0x30, 0xee, 0xce, 0x10, 0x6f, 0xb1, 0x4e, 0x90, 0xef, 0x31, + 0x11, 0xcf, 0xb0, 0x6e, 0xf0, 0x2e, 0x51, 0x8f, 0xaf, 0x71, + 0x0e, 0xd0, 0xed, 0x33, 0x4c, 0x92, 0xb2, 0x6c, 0x13, 0xcd, + 0x53, 0x8d, 0xf2, 0x2c, 0x0c, 0xd2, 0xad, 0x73, 0x8c, 0x52, + 0x2d, 0xf3, 0xd3, 0x0d, 0x72, 0xac, 0x32, 0xec, 0x93, 0x4d, + 0x6d, 0xb3, 0xcc, 0x12, 0xb6, 0x68, 0x17, 0xc9, 0xe9, 0x37, + 0x48, 0x96, 0x08, 0xd6, 0xa9, 0x77, 0x57, 0x89, 0xf6, 0x28, + 0xd7, 0x09, 0x76, 0xa8, 0x88, 0x56, 0x29, 0xf7, 0x69, 0xb7, + 0xc8, 0x16, 0x36, 0xe8, 0x97, 0x49, 0x74, 0xaa, 0xd5, 0x0b, + 0x2b, 0xf5, 0x8a, 0x54, 0xca, 0x14, 0x6b, 0xb5, 0x95, 0x4b, + 0x34, 0xea, 0x15, 0xcb, 0xb4, 0x6a, 0x4a, 0x94, 0xeb, 0x35, + 0xab, 0x75, 0x0a, 0xd4, 0xf4, 0x2a, 0x55, 0x8b, 0x00, 0xdf, + 0xa3, 0x7c, 0x5b, 0x84, 0xf8, 0x27, 0xb6, 0x69, 0x15, 0xca, + 0xed, 0x32, 0x4e, 0x91, 0x71, 0xae, 0xd2, 0x0d, 0x2a, 0xf5, + 0x89, 0x56, 0xc7, 0x18, 0x64, 0xbb, 0x9c, 0x43, 0x3f, 0xe0, + 0xe2, 0x3d, 0x41, 0x9e, 0xb9, 0x66, 0x1a, 0xc5, 0x54, 0x8b, + 0xf7, 0x28, 0x0f, 0xd0, 0xac, 0x73, 0x93, 0x4c, 0x30, 0xef, + 0xc8, 0x17, 0x6b, 0xb4, 0x25, 0xfa, 0x86, 0x59, 0x7e, 0xa1, + 0xdd, 0x02, 0xd9, 0x06, 0x7a, 0xa5, 0x82, 0x5d, 0x21, 0xfe, + 0x6f, 0xb0, 0xcc, 0x13, 0x34, 0xeb, 0x97, 0x48, 0xa8, 0x77, + 0x0b, 0xd4, 0xf3, 0x2c, 0x50, 0x8f, 0x1e, 0xc1, 0xbd, 0x62, + 0x45, 0x9a, 0xe6, 0x39, 0x3b, 0xe4, 0x98, 0x47, 0x60, 0xbf, + 0xc3, 0x1c, 0x8d, 0x52, 0x2e, 0xf1, 0xd6, 0x09, 0x75, 0xaa, + 0x4a, 0x95, 0xe9, 0x36, 0x11, 0xce, 0xb2, 0x6d, 0xfc, 0x23, + 0x5f, 0x80, 0xa7, 0x78, 0x04, 0xdb, 0xaf, 0x70, 0x0c, 0xd3, + 0xf4, 0x2b, 0x57, 0x88, 0x19, 0xc6, 0xba, 0x65, 0x42, 0x9d, + 0xe1, 0x3e, 0xde, 0x01, 0x7d, 0xa2, 0x85, 0x5a, 0x26, 0xf9, + 0x68, 0xb7, 0xcb, 0x14, 0x33, 0xec, 0x90, 0x4f, 0x4d, 0x92, + 0xee, 0x31, 0x16, 0xc9, 0xb5, 0x6a, 0xfb, 0x24, 0x58, 0x87, + 0xa0, 0x7f, 0x03, 0xdc, 0x3c, 0xe3, 0x9f, 0x40, 0x67, 0xb8, + 0xc4, 0x1b, 0x8a, 0x55, 0x29, 0xf6, 0xd1, 0x0e, 0x72, 0xad, + 0x76, 0xa9, 0xd5, 0x0a, 0x2d, 0xf2, 0x8e, 0x51, 0xc0, 0x1f, + 0x63, 0xbc, 0x9b, 0x44, 0x38, 0xe7, 0x07, 0xd8, 0xa4, 0x7b, + 0x5c, 0x83, 0xff, 0x20, 0xb1, 0x6e, 0x12, 0xcd, 0xea, 0x35, + 0x49, 0x96, 0x94, 0x4b, 0x37, 0xe8, 0xcf, 0x10, 0x6c, 0xb3, + 0x22, 0xfd, 0x81, 0x5e, 0x79, 0xa6, 0xda, 0x05, 0xe5, 0x3a, + 0x46, 0x99, 0xbe, 0x61, 0x1d, 0xc2, 0x53, 0x8c, 0xf0, 0x2f, + 0x08, 0xd7, 0xab, 0x74, 0x00, 0xe0, 0xdd, 0x3d, 0xa7, 0x47, + 0x7a, 0x9a, 0x53, 0xb3, 0x8e, 0x6e, 0xf4, 0x14, 0x29, 0xc9, + 0xa6, 0x46, 0x7b, 0x9b, 0x01, 0xe1, 0xdc, 0x3c, 0xf5, 0x15, + 0x28, 0xc8, 0x52, 0xb2, 0x8f, 0x6f, 0x51, 0xb1, 0x8c, 0x6c, + 0xf6, 0x16, 0x2b, 0xcb, 0x02, 0xe2, 0xdf, 0x3f, 0xa5, 0x45, + 0x78, 0x98, 0xf7, 0x17, 0x2a, 0xca, 0x50, 0xb0, 0x8d, 0x6d, + 0xa4, 0x44, 0x79, 0x99, 0x03, 0xe3, 0xde, 0x3e, 0xa2, 0x42, + 0x7f, 0x9f, 0x05, 0xe5, 0xd8, 0x38, 0xf1, 0x11, 0x2c, 0xcc, + 0x56, 0xb6, 0x8b, 0x6b, 0x04, 0xe4, 0xd9, 0x39, 0xa3, 0x43, + 0x7e, 0x9e, 0x57, 0xb7, 0x8a, 0x6a, 0xf0, 0x10, 0x2d, 0xcd, + 0xf3, 0x13, 0x2e, 0xce, 0x54, 0xb4, 0x89, 0x69, 0xa0, 0x40, + 0x7d, 0x9d, 0x07, 0xe7, 0xda, 0x3a, 0x55, 0xb5, 0x88, 0x68, + 0xf2, 0x12, 0x2f, 0xcf, 0x06, 0xe6, 0xdb, 0x3b, 0xa1, 0x41, + 0x7c, 0x9c, 0x59, 0xb9, 0x84, 0x64, 0xfe, 0x1e, 0x23, 0xc3, + 0x0a, 0xea, 0xd7, 0x37, 0xad, 0x4d, 0x70, 0x90, 0xff, 0x1f, + 0x22, 0xc2, 0x58, 0xb8, 0x85, 0x65, 0xac, 0x4c, 0x71, 0x91, + 0x0b, 0xeb, 0xd6, 0x36, 0x08, 0xe8, 0xd5, 0x35, 0xaf, 0x4f, + 0x72, 0x92, 0x5b, 0xbb, 0x86, 0x66, 0xfc, 0x1c, 0x21, 0xc1, + 0xae, 0x4e, 0x73, 0x93, 0x09, 0xe9, 0xd4, 0x34, 0xfd, 0x1d, + 0x20, 0xc0, 0x5a, 0xba, 0x87, 0x67, 0xfb, 0x1b, 0x26, 0xc6, + 0x5c, 0xbc, 0x81, 0x61, 0xa8, 0x48, 0x75, 0x95, 0x0f, 0xef, + 0xd2, 0x32, 0x5d, 0xbd, 0x80, 0x60, 0xfa, 0x1a, 0x27, 0xc7, + 0x0e, 0xee, 0xd3, 0x33, 0xa9, 0x49, 0x74, 0x94, 0xaa, 0x4a, + 0x77, 0x97, 0x0d, 0xed, 0xd0, 0x30, 0xf9, 0x19, 0x24, 0xc4, + 0x5e, 0xbe, 0x83, 0x63, 0x0c, 0xec, 0xd1, 0x31, 0xab, 0x4b, + 0x76, 0x96, 0x5f, 0xbf, 0x82, 0x62, 0xf8, 0x18, 0x25, 0xc5, + 0x00, 0xe1, 0xdf, 0x3e, 0xa3, 0x42, 0x7c, 0x9d, 0x5b, 0xba, + 0x84, 0x65, 0xf8, 0x19, 0x27, 0xc6, 0xb6, 0x57, 0x69, 0x88, + 0x15, 0xf4, 0xca, 0x2b, 0xed, 0x0c, 0x32, 0xd3, 0x4e, 0xaf, + 0x91, 0x70, 0x71, 0x90, 0xae, 0x4f, 0xd2, 0x33, 0x0d, 0xec, + 0x2a, 0xcb, 0xf5, 0x14, 0x89, 0x68, 0x56, 0xb7, 0xc7, 0x26, + 0x18, 0xf9, 0x64, 0x85, 0xbb, 0x5a, 0x9c, 0x7d, 0x43, 0xa2, + 0x3f, 0xde, 0xe0, 0x01, 0xe2, 0x03, 0x3d, 0xdc, 0x41, 0xa0, + 0x9e, 0x7f, 0xb9, 0x58, 0x66, 0x87, 0x1a, 0xfb, 0xc5, 0x24, + 0x54, 0xb5, 0x8b, 0x6a, 0xf7, 0x16, 0x28, 0xc9, 0x0f, 0xee, + 0xd0, 0x31, 0xac, 0x4d, 0x73, 0x92, 0x93, 0x72, 0x4c, 0xad, + 0x30, 0xd1, 0xef, 0x0e, 0xc8, 0x29, 0x17, 0xf6, 0x6b, 0x8a, + 0xb4, 0x55, 0x25, 0xc4, 0xfa, 0x1b, 0x86, 0x67, 0x59, 0xb8, + 0x7e, 0x9f, 0xa1, 0x40, 0xdd, 0x3c, 0x02, 0xe3, 0xd9, 0x38, + 0x06, 0xe7, 0x7a, 0x9b, 0xa5, 0x44, 0x82, 0x63, 0x5d, 0xbc, + 0x21, 0xc0, 0xfe, 0x1f, 0x6f, 0x8e, 0xb0, 0x51, 0xcc, 0x2d, + 0x13, 0xf2, 0x34, 0xd5, 0xeb, 0x0a, 0x97, 0x76, 0x48, 0xa9, + 0xa8, 0x49, 0x77, 0x96, 0x0b, 0xea, 0xd4, 0x35, 0xf3, 0x12, + 0x2c, 0xcd, 0x50, 0xb1, 0x8f, 0x6e, 0x1e, 0xff, 0xc1, 0x20, + 0xbd, 0x5c, 0x62, 0x83, 0x45, 0xa4, 0x9a, 0x7b, 0xe6, 0x07, + 0x39, 0xd8, 0x3b, 0xda, 0xe4, 0x05, 0x98, 0x79, 0x47, 0xa6, + 0x60, 0x81, 0xbf, 0x5e, 0xc3, 0x22, 0x1c, 0xfd, 0x8d, 0x6c, + 0x52, 0xb3, 0x2e, 0xcf, 0xf1, 0x10, 0xd6, 0x37, 0x09, 0xe8, + 0x75, 0x94, 0xaa, 0x4b, 0x4a, 0xab, 0x95, 0x74, 0xe9, 0x08, + 0x36, 0xd7, 0x11, 0xf0, 0xce, 0x2f, 0xb2, 0x53, 0x6d, 0x8c, + 0xfc, 0x1d, 0x23, 0xc2, 0x5f, 0xbe, 0x80, 0x61, 0xa7, 0x46, + 0x78, 0x99, 0x04, 0xe5, 0xdb, 0x3a, 0x00, 0xe2, 0xd9, 0x3b, + 0xaf, 0x4d, 0x76, 0x94, 0x43, 0xa1, 0x9a, 0x78, 0xec, 0x0e, + 0x35, 0xd7, 0x86, 0x64, 0x5f, 0xbd, 0x29, 0xcb, 0xf0, 0x12, + 0xc5, 0x27, 0x1c, 0xfe, 0x6a, 0x88, 0xb3, 0x51, 0x11, 0xf3, + 0xc8, 0x2a, 0xbe, 0x5c, 0x67, 0x85, 0x52, 0xb0, 0x8b, 0x69, + 0xfd, 0x1f, 0x24, 0xc6, 0x97, 0x75, 0x4e, 0xac, 0x38, 0xda, + 0xe1, 0x03, 0xd4, 0x36, 0x0d, 0xef, 0x7b, 0x99, 0xa2, 0x40, + 0x22, 0xc0, 0xfb, 0x19, 0x8d, 0x6f, 0x54, 0xb6, 0x61, 0x83, + 0xb8, 0x5a, 0xce, 0x2c, 0x17, 0xf5, 0xa4, 0x46, 0x7d, 0x9f, + 0x0b, 0xe9, 0xd2, 0x30, 0xe7, 0x05, 0x3e, 0xdc, 0x48, 0xaa, + 0x91, 0x73, 0x33, 0xd1, 0xea, 0x08, 0x9c, 0x7e, 0x45, 0xa7, + 0x70, 0x92, 0xa9, 0x4b, 0xdf, 0x3d, 0x06, 0xe4, 0xb5, 0x57, + 0x6c, 0x8e, 0x1a, 0xf8, 0xc3, 0x21, 0xf6, 0x14, 0x2f, 0xcd, + 0x59, 0xbb, 0x80, 0x62, 0x44, 0xa6, 0x9d, 0x7f, 0xeb, 0x09, + 0x32, 0xd0, 0x07, 0xe5, 0xde, 0x3c, 0xa8, 0x4a, 0x71, 0x93, + 0xc2, 0x20, 0x1b, 0xf9, 0x6d, 0x8f, 0xb4, 0x56, 0x81, 0x63, + 0x58, 0xba, 0x2e, 0xcc, 0xf7, 0x15, 0x55, 0xb7, 0x8c, 0x6e, + 0xfa, 0x18, 0x23, 0xc1, 0x16, 0xf4, 0xcf, 0x2d, 0xb9, 0x5b, + 0x60, 0x82, 0xd3, 0x31, 0x0a, 0xe8, 0x7c, 0x9e, 0xa5, 0x47, + 0x90, 0x72, 0x49, 0xab, 0x3f, 0xdd, 0xe6, 0x04, 0x66, 0x84, + 0xbf, 0x5d, 0xc9, 0x2b, 0x10, 0xf2, 0x25, 0xc7, 0xfc, 0x1e, + 0x8a, 0x68, 0x53, 0xb1, 0xe0, 0x02, 0x39, 0xdb, 0x4f, 0xad, + 0x96, 0x74, 0xa3, 0x41, 0x7a, 0x98, 0x0c, 0xee, 0xd5, 0x37, + 0x77, 0x95, 0xae, 0x4c, 0xd8, 0x3a, 0x01, 0xe3, 0x34, 0xd6, + 0xed, 0x0f, 0x9b, 0x79, 0x42, 0xa0, 0xf1, 0x13, 0x28, 0xca, + 0x5e, 0xbc, 0x87, 0x65, 0xb2, 0x50, 0x6b, 0x89, 0x1d, 0xff, + 0xc4, 0x26, 0x00, 0xe3, 0xdb, 0x38, 0xab, 0x48, 0x70, 0x93, + 0x4b, 0xa8, 0x90, 0x73, 0xe0, 0x03, 0x3b, 0xd8, 0x96, 0x75, + 0x4d, 0xae, 0x3d, 0xde, 0xe6, 0x05, 0xdd, 0x3e, 0x06, 0xe5, + 0x76, 0x95, 0xad, 0x4e, 0x31, 0xd2, 0xea, 0x09, 0x9a, 0x79, + 0x41, 0xa2, 0x7a, 0x99, 0xa1, 0x42, 0xd1, 0x32, 0x0a, 0xe9, + 0xa7, 0x44, 0x7c, 0x9f, 0x0c, 0xef, 0xd7, 0x34, 0xec, 0x0f, + 0x37, 0xd4, 0x47, 0xa4, 0x9c, 0x7f, 0x62, 0x81, 0xb9, 0x5a, + 0xc9, 0x2a, 0x12, 0xf1, 0x29, 0xca, 0xf2, 0x11, 0x82, 0x61, + 0x59, 0xba, 0xf4, 0x17, 0x2f, 0xcc, 0x5f, 0xbc, 0x84, 0x67, + 0xbf, 0x5c, 0x64, 0x87, 0x14, 0xf7, 0xcf, 0x2c, 0x53, 0xb0, + 0x88, 0x6b, 0xf8, 0x1b, 0x23, 0xc0, 0x18, 0xfb, 0xc3, 0x20, + 0xb3, 0x50, 0x68, 0x8b, 0xc5, 0x26, 0x1e, 0xfd, 0x6e, 0x8d, + 0xb5, 0x56, 0x8e, 0x6d, 0x55, 0xb6, 0x25, 0xc6, 0xfe, 0x1d, + 0xc4, 0x27, 0x1f, 0xfc, 0x6f, 0x8c, 0xb4, 0x57, 0x8f, 0x6c, + 0x54, 0xb7, 0x24, 0xc7, 0xff, 0x1c, 0x52, 0xb1, 0x89, 0x6a, + 0xf9, 0x1a, 0x22, 0xc1, 0x19, 0xfa, 0xc2, 0x21, 0xb2, 0x51, + 0x69, 0x8a, 0xf5, 0x16, 0x2e, 0xcd, 0x5e, 0xbd, 0x85, 0x66, + 0xbe, 0x5d, 0x65, 0x86, 0x15, 0xf6, 0xce, 0x2d, 0x63, 0x80, + 0xb8, 0x5b, 0xc8, 0x2b, 0x13, 0xf0, 0x28, 0xcb, 0xf3, 0x10, + 0x83, 0x60, 0x58, 0xbb, 0xa6, 0x45, 0x7d, 0x9e, 0x0d, 0xee, + 0xd6, 0x35, 0xed, 0x0e, 0x36, 0xd5, 0x46, 0xa5, 0x9d, 0x7e, + 0x30, 0xd3, 0xeb, 0x08, 0x9b, 0x78, 0x40, 0xa3, 0x7b, 0x98, + 0xa0, 0x43, 0xd0, 0x33, 0x0b, 0xe8, 0x97, 0x74, 0x4c, 0xaf, + 0x3c, 0xdf, 0xe7, 0x04, 0xdc, 0x3f, 0x07, 0xe4, 0x77, 0x94, + 0xac, 0x4f, 0x01, 0xe2, 0xda, 0x39, 0xaa, 0x49, 0x71, 0x92, + 0x4a, 0xa9, 0x91, 0x72, 0xe1, 0x02, 0x3a, 0xd9, 0x00, 0xe4, + 0xd5, 0x31, 0xb7, 0x53, 0x62, 0x86, 0x73, 0x97, 0xa6, 0x42, + 0xc4, 0x20, 0x11, 0xf5, 0xe6, 0x02, 0x33, 0xd7, 0x51, 0xb5, + 0x84, 0x60, 0x95, 0x71, 0x40, 0xa4, 0x22, 0xc6, 0xf7, 0x13, + 0xd1, 0x35, 0x04, 0xe0, 0x66, 0x82, 0xb3, 0x57, 0xa2, 0x46, + 0x77, 0x93, 0x15, 0xf1, 0xc0, 0x24, 0x37, 0xd3, 0xe2, 0x06, + 0x80, 0x64, 0x55, 0xb1, 0x44, 0xa0, 0x91, 0x75, 0xf3, 0x17, + 0x26, 0xc2, 0xbf, 0x5b, 0x6a, 0x8e, 0x08, 0xec, 0xdd, 0x39, + 0xcc, 0x28, 0x19, 0xfd, 0x7b, 0x9f, 0xae, 0x4a, 0x59, 0xbd, + 0x8c, 0x68, 0xee, 0x0a, 0x3b, 0xdf, 0x2a, 0xce, 0xff, 0x1b, + 0x9d, 0x79, 0x48, 0xac, 0x6e, 0x8a, 0xbb, 0x5f, 0xd9, 0x3d, + 0x0c, 0xe8, 0x1d, 0xf9, 0xc8, 0x2c, 0xaa, 0x4e, 0x7f, 0x9b, + 0x88, 0x6c, 0x5d, 0xb9, 0x3f, 0xdb, 0xea, 0x0e, 0xfb, 0x1f, + 0x2e, 0xca, 0x4c, 0xa8, 0x99, 0x7d, 0x63, 0x87, 0xb6, 0x52, + 0xd4, 0x30, 0x01, 0xe5, 0x10, 0xf4, 0xc5, 0x21, 0xa7, 0x43, + 0x72, 0x96, 0x85, 0x61, 0x50, 0xb4, 0x32, 0xd6, 0xe7, 0x03, + 0xf6, 0x12, 0x23, 0xc7, 0x41, 0xa5, 0x94, 0x70, 0xb2, 0x56, + 0x67, 0x83, 0x05, 0xe1, 0xd0, 0x34, 0xc1, 0x25, 0x14, 0xf0, + 0x76, 0x92, 0xa3, 0x47, 0x54, 0xb0, 0x81, 0x65, 0xe3, 0x07, + 0x36, 0xd2, 0x27, 0xc3, 0xf2, 0x16, 0x90, 0x74, 0x45, 0xa1, + 0xdc, 0x38, 0x09, 0xed, 0x6b, 0x8f, 0xbe, 0x5a, 0xaf, 0x4b, + 0x7a, 0x9e, 0x18, 0xfc, 0xcd, 0x29, 0x3a, 0xde, 0xef, 0x0b, + 0x8d, 0x69, 0x58, 0xbc, 0x49, 0xad, 0x9c, 0x78, 0xfe, 0x1a, + 0x2b, 0xcf, 0x0d, 0xe9, 0xd8, 0x3c, 0xba, 0x5e, 0x6f, 0x8b, + 0x7e, 0x9a, 0xab, 0x4f, 0xc9, 0x2d, 0x1c, 0xf8, 0xeb, 0x0f, + 0x3e, 0xda, 0x5c, 0xb8, 0x89, 0x6d, 0x98, 0x7c, 0x4d, 0xa9, + 0x2f, 0xcb, 0xfa, 0x1e, 0x00, 0xe5, 0xd7, 0x32, 0xb3, 0x56, + 0x64, 0x81, 0x7b, 0x9e, 0xac, 0x49, 0xc8, 0x2d, 0x1f, 0xfa, + 0xf6, 0x13, 0x21, 0xc4, 0x45, 0xa0, 0x92, 0x77, 0x8d, 0x68, + 0x5a, 0xbf, 0x3e, 0xdb, 0xe9, 0x0c, 0xf1, 0x14, 0x26, 0xc3, + 0x42, 0xa7, 0x95, 0x70, 0x8a, 0x6f, 0x5d, 0xb8, 0x39, 0xdc, + 0xee, 0x0b, 0x07, 0xe2, 0xd0, 0x35, 0xb4, 0x51, 0x63, 0x86, + 0x7c, 0x99, 0xab, 0x4e, 0xcf, 0x2a, 0x18, 0xfd, 0xff, 0x1a, + 0x28, 0xcd, 0x4c, 0xa9, 0x9b, 0x7e, 0x84, 0x61, 0x53, 0xb6, + 0x37, 0xd2, 0xe0, 0x05, 0x09, 0xec, 0xde, 0x3b, 0xba, 0x5f, + 0x6d, 0x88, 0x72, 0x97, 0xa5, 0x40, 0xc1, 0x24, 0x16, 0xf3, + 0x0e, 0xeb, 0xd9, 0x3c, 0xbd, 0x58, 0x6a, 0x8f, 0x75, 0x90, + 0xa2, 0x47, 0xc6, 0x23, 0x11, 0xf4, 0xf8, 0x1d, 0x2f, 0xca, + 0x4b, 0xae, 0x9c, 0x79, 0x83, 0x66, 0x54, 0xb1, 0x30, 0xd5, + 0xe7, 0x02, 0xe3, 0x06, 0x34, 0xd1, 0x50, 0xb5, 0x87, 0x62, + 0x98, 0x7d, 0x4f, 0xaa, 0x2b, 0xce, 0xfc, 0x19, 0x15, 0xf0, + 0xc2, 0x27, 0xa6, 0x43, 0x71, 0x94, 0x6e, 0x8b, 0xb9, 0x5c, + 0xdd, 0x38, 0x0a, 0xef, 0x12, 0xf7, 0xc5, 0x20, 0xa1, 0x44, + 0x76, 0x93, 0x69, 0x8c, 0xbe, 0x5b, 0xda, 0x3f, 0x0d, 0xe8, + 0xe4, 0x01, 0x33, 0xd6, 0x57, 0xb2, 0x80, 0x65, 0x9f, 0x7a, + 0x48, 0xad, 0x2c, 0xc9, 0xfb, 0x1e, 0x1c, 0xf9, 0xcb, 0x2e, + 0xaf, 0x4a, 0x78, 0x9d, 0x67, 0x82, 0xb0, 0x55, 0xd4, 0x31, + 0x03, 0xe6, 0xea, 0x0f, 0x3d, 0xd8, 0x59, 0xbc, 0x8e, 0x6b, + 0x91, 0x74, 0x46, 0xa3, 0x22, 0xc7, 0xf5, 0x10, 0xed, 0x08, + 0x3a, 0xdf, 0x5e, 0xbb, 0x89, 0x6c, 0x96, 0x73, 0x41, 0xa4, + 0x25, 0xc0, 0xf2, 0x17, 0x1b, 0xfe, 0xcc, 0x29, 0xa8, 0x4d, + 0x7f, 0x9a, 0x60, 0x85, 0xb7, 0x52, 0xd3, 0x36, 0x04, 0xe1, + 0x00, 0xe6, 0xd1, 0x37, 0xbf, 0x59, 0x6e, 0x88, 0x63, 0x85, + 0xb2, 0x54, 0xdc, 0x3a, 0x0d, 0xeb, 0xc6, 0x20, 0x17, 0xf1, + 0x79, 0x9f, 0xa8, 0x4e, 0xa5, 0x43, 0x74, 0x92, 0x1a, 0xfc, + 0xcb, 0x2d, 0x91, 0x77, 0x40, 0xa6, 0x2e, 0xc8, 0xff, 0x19, + 0xf2, 0x14, 0x23, 0xc5, 0x4d, 0xab, 0x9c, 0x7a, 0x57, 0xb1, + 0x86, 0x60, 0xe8, 0x0e, 0x39, 0xdf, 0x34, 0xd2, 0xe5, 0x03, + 0x8b, 0x6d, 0x5a, 0xbc, 0x3f, 0xd9, 0xee, 0x08, 0x80, 0x66, + 0x51, 0xb7, 0x5c, 0xba, 0x8d, 0x6b, 0xe3, 0x05, 0x32, 0xd4, + 0xf9, 0x1f, 0x28, 0xce, 0x46, 0xa0, 0x97, 0x71, 0x9a, 0x7c, + 0x4b, 0xad, 0x25, 0xc3, 0xf4, 0x12, 0xae, 0x48, 0x7f, 0x99, + 0x11, 0xf7, 0xc0, 0x26, 0xcd, 0x2b, 0x1c, 0xfa, 0x72, 0x94, + 0xa3, 0x45, 0x68, 0x8e, 0xb9, 0x5f, 0xd7, 0x31, 0x06, 0xe0, + 0x0b, 0xed, 0xda, 0x3c, 0xb4, 0x52, 0x65, 0x83, 0x7e, 0x98, + 0xaf, 0x49, 0xc1, 0x27, 0x10, 0xf6, 0x1d, 0xfb, 0xcc, 0x2a, + 0xa2, 0x44, 0x73, 0x95, 0xb8, 0x5e, 0x69, 0x8f, 0x07, 0xe1, + 0xd6, 0x30, 0xdb, 0x3d, 0x0a, 0xec, 0x64, 0x82, 0xb5, 0x53, + 0xef, 0x09, 0x3e, 0xd8, 0x50, 0xb6, 0x81, 0x67, 0x8c, 0x6a, + 0x5d, 0xbb, 0x33, 0xd5, 0xe2, 0x04, 0x29, 0xcf, 0xf8, 0x1e, + 0x96, 0x70, 0x47, 0xa1, 0x4a, 0xac, 0x9b, 0x7d, 0xf5, 0x13, + 0x24, 0xc2, 0x41, 0xa7, 0x90, 0x76, 0xfe, 0x18, 0x2f, 0xc9, + 0x22, 0xc4, 0xf3, 0x15, 0x9d, 0x7b, 0x4c, 0xaa, 0x87, 0x61, + 0x56, 0xb0, 0x38, 0xde, 0xe9, 0x0f, 0xe4, 0x02, 0x35, 0xd3, + 0x5b, 0xbd, 0x8a, 0x6c, 0xd0, 0x36, 0x01, 0xe7, 0x6f, 0x89, + 0xbe, 0x58, 0xb3, 0x55, 0x62, 0x84, 0x0c, 0xea, 0xdd, 0x3b, + 0x16, 0xf0, 0xc7, 0x21, 0xa9, 0x4f, 0x78, 0x9e, 0x75, 0x93, + 0xa4, 0x42, 0xca, 0x2c, 0x1b, 0xfd, 0x00, 0xe7, 0xd3, 0x34, + 0xbb, 0x5c, 0x68, 0x8f, 0x6b, 0x8c, 0xb8, 0x5f, 0xd0, 0x37, + 0x03, 0xe4, 0xd6, 0x31, 0x05, 0xe2, 0x6d, 0x8a, 0xbe, 0x59, + 0xbd, 0x5a, 0x6e, 0x89, 0x06, 0xe1, 0xd5, 0x32, 0xb1, 0x56, + 0x62, 0x85, 0x0a, 0xed, 0xd9, 0x3e, 0xda, 0x3d, 0x09, 0xee, + 0x61, 0x86, 0xb2, 0x55, 0x67, 0x80, 0xb4, 0x53, 0xdc, 0x3b, + 0x0f, 0xe8, 0x0c, 0xeb, 0xdf, 0x38, 0xb7, 0x50, 0x64, 0x83, + 0x7f, 0x98, 0xac, 0x4b, 0xc4, 0x23, 0x17, 0xf0, 0x14, 0xf3, + 0xc7, 0x20, 0xaf, 0x48, 0x7c, 0x9b, 0xa9, 0x4e, 0x7a, 0x9d, + 0x12, 0xf5, 0xc1, 0x26, 0xc2, 0x25, 0x11, 0xf6, 0x79, 0x9e, + 0xaa, 0x4d, 0xce, 0x29, 0x1d, 0xfa, 0x75, 0x92, 0xa6, 0x41, + 0xa5, 0x42, 0x76, 0x91, 0x1e, 0xf9, 0xcd, 0x2a, 0x18, 0xff, + 0xcb, 0x2c, 0xa3, 0x44, 0x70, 0x97, 0x73, 0x94, 0xa0, 0x47, + 0xc8, 0x2f, 0x1b, 0xfc, 0xfe, 0x19, 0x2d, 0xca, 0x45, 0xa2, + 0x96, 0x71, 0x95, 0x72, 0x46, 0xa1, 0x2e, 0xc9, 0xfd, 0x1a, + 0x28, 0xcf, 0xfb, 0x1c, 0x93, 0x74, 0x40, 0xa7, 0x43, 0xa4, + 0x90, 0x77, 0xf8, 0x1f, 0x2b, 0xcc, 0x4f, 0xa8, 0x9c, 0x7b, + 0xf4, 0x13, 0x27, 0xc0, 0x24, 0xc3, 0xf7, 0x10, 0x9f, 0x78, + 0x4c, 0xab, 0x99, 0x7e, 0x4a, 0xad, 0x22, 0xc5, 0xf1, 0x16, + 0xf2, 0x15, 0x21, 0xc6, 0x49, 0xae, 0x9a, 0x7d, 0x81, 0x66, + 0x52, 0xb5, 0x3a, 0xdd, 0xe9, 0x0e, 0xea, 0x0d, 0x39, 0xde, + 0x51, 0xb6, 0x82, 0x65, 0x57, 0xb0, 0x84, 0x63, 0xec, 0x0b, + 0x3f, 0xd8, 0x3c, 0xdb, 0xef, 0x08, 0x87, 0x60, 0x54, 0xb3, + 0x30, 0xd7, 0xe3, 0x04, 0x8b, 0x6c, 0x58, 0xbf, 0x5b, 0xbc, + 0x88, 0x6f, 0xe0, 0x07, 0x33, 0xd4, 0xe6, 0x01, 0x35, 0xd2, + 0x5d, 0xba, 0x8e, 0x69, 0x8d, 0x6a, 0x5e, 0xb9, 0x36, 0xd1, + 0xe5, 0x02, 0x00, 0xe8, 0xcd, 0x25, 0x87, 0x6f, 0x4a, 0xa2, + 0x13, 0xfb, 0xde, 0x36, 0x94, 0x7c, 0x59, 0xb1, 0x26, 0xce, + 0xeb, 0x03, 0xa1, 0x49, 0x6c, 0x84, 0x35, 0xdd, 0xf8, 0x10, + 0xb2, 0x5a, 0x7f, 0x97, 0x4c, 0xa4, 0x81, 0x69, 0xcb, 0x23, + 0x06, 0xee, 0x5f, 0xb7, 0x92, 0x7a, 0xd8, 0x30, 0x15, 0xfd, + 0x6a, 0x82, 0xa7, 0x4f, 0xed, 0x05, 0x20, 0xc8, 0x79, 0x91, + 0xb4, 0x5c, 0xfe, 0x16, 0x33, 0xdb, 0x98, 0x70, 0x55, 0xbd, + 0x1f, 0xf7, 0xd2, 0x3a, 0x8b, 0x63, 0x46, 0xae, 0x0c, 0xe4, + 0xc1, 0x29, 0xbe, 0x56, 0x73, 0x9b, 0x39, 0xd1, 0xf4, 0x1c, + 0xad, 0x45, 0x60, 0x88, 0x2a, 0xc2, 0xe7, 0x0f, 0xd4, 0x3c, + 0x19, 0xf1, 0x53, 0xbb, 0x9e, 0x76, 0xc7, 0x2f, 0x0a, 0xe2, + 0x40, 0xa8, 0x8d, 0x65, 0xf2, 0x1a, 0x3f, 0xd7, 0x75, 0x9d, + 0xb8, 0x50, 0xe1, 0x09, 0x2c, 0xc4, 0x66, 0x8e, 0xab, 0x43, + 0x2d, 0xc5, 0xe0, 0x08, 0xaa, 0x42, 0x67, 0x8f, 0x3e, 0xd6, + 0xf3, 0x1b, 0xb9, 0x51, 0x74, 0x9c, 0x0b, 0xe3, 0xc6, 0x2e, + 0x8c, 0x64, 0x41, 0xa9, 0x18, 0xf0, 0xd5, 0x3d, 0x9f, 0x77, + 0x52, 0xba, 0x61, 0x89, 0xac, 0x44, 0xe6, 0x0e, 0x2b, 0xc3, + 0x72, 0x9a, 0xbf, 0x57, 0xf5, 0x1d, 0x38, 0xd0, 0x47, 0xaf, + 0x8a, 0x62, 0xc0, 0x28, 0x0d, 0xe5, 0x54, 0xbc, 0x99, 0x71, + 0xd3, 0x3b, 0x1e, 0xf6, 0xb5, 0x5d, 0x78, 0x90, 0x32, 0xda, + 0xff, 0x17, 0xa6, 0x4e, 0x6b, 0x83, 0x21, 0xc9, 0xec, 0x04, + 0x93, 0x7b, 0x5e, 0xb6, 0x14, 0xfc, 0xd9, 0x31, 0x80, 0x68, + 0x4d, 0xa5, 0x07, 0xef, 0xca, 0x22, 0xf9, 0x11, 0x34, 0xdc, + 0x7e, 0x96, 0xb3, 0x5b, 0xea, 0x02, 0x27, 0xcf, 0x6d, 0x85, + 0xa0, 0x48, 0xdf, 0x37, 0x12, 0xfa, 0x58, 0xb0, 0x95, 0x7d, + 0xcc, 0x24, 0x01, 0xe9, 0x4b, 0xa3, 0x86, 0x6e, 0x00, 0xe9, + 0xcf, 0x26, 0x83, 0x6a, 0x4c, 0xa5, 0x1b, 0xf2, 0xd4, 0x3d, + 0x98, 0x71, 0x57, 0xbe, 0x36, 0xdf, 0xf9, 0x10, 0xb5, 0x5c, + 0x7a, 0x93, 0x2d, 0xc4, 0xe2, 0x0b, 0xae, 0x47, 0x61, 0x88, + 0x6c, 0x85, 0xa3, 0x4a, 0xef, 0x06, 0x20, 0xc9, 0x77, 0x9e, + 0xb8, 0x51, 0xf4, 0x1d, 0x3b, 0xd2, 0x5a, 0xb3, 0x95, 0x7c, + 0xd9, 0x30, 0x16, 0xff, 0x41, 0xa8, 0x8e, 0x67, 0xc2, 0x2b, + 0x0d, 0xe4, 0xd8, 0x31, 0x17, 0xfe, 0x5b, 0xb2, 0x94, 0x7d, + 0xc3, 0x2a, 0x0c, 0xe5, 0x40, 0xa9, 0x8f, 0x66, 0xee, 0x07, + 0x21, 0xc8, 0x6d, 0x84, 0xa2, 0x4b, 0xf5, 0x1c, 0x3a, 0xd3, + 0x76, 0x9f, 0xb9, 0x50, 0xb4, 0x5d, 0x7b, 0x92, 0x37, 0xde, + 0xf8, 0x11, 0xaf, 0x46, 0x60, 0x89, 0x2c, 0xc5, 0xe3, 0x0a, + 0x82, 0x6b, 0x4d, 0xa4, 0x01, 0xe8, 0xce, 0x27, 0x99, 0x70, + 0x56, 0xbf, 0x1a, 0xf3, 0xd5, 0x3c, 0xad, 0x44, 0x62, 0x8b, + 0x2e, 0xc7, 0xe1, 0x08, 0xb6, 0x5f, 0x79, 0x90, 0x35, 0xdc, + 0xfa, 0x13, 0x9b, 0x72, 0x54, 0xbd, 0x18, 0xf1, 0xd7, 0x3e, + 0x80, 0x69, 0x4f, 0xa6, 0x03, 0xea, 0xcc, 0x25, 0xc1, 0x28, + 0x0e, 0xe7, 0x42, 0xab, 0x8d, 0x64, 0xda, 0x33, 0x15, 0xfc, + 0x59, 0xb0, 0x96, 0x7f, 0xf7, 0x1e, 0x38, 0xd1, 0x74, 0x9d, + 0xbb, 0x52, 0xec, 0x05, 0x23, 0xca, 0x6f, 0x86, 0xa0, 0x49, + 0x75, 0x9c, 0xba, 0x53, 0xf6, 0x1f, 0x39, 0xd0, 0x6e, 0x87, + 0xa1, 0x48, 0xed, 0x04, 0x22, 0xcb, 0x43, 0xaa, 0x8c, 0x65, + 0xc0, 0x29, 0x0f, 0xe6, 0x58, 0xb1, 0x97, 0x7e, 0xdb, 0x32, + 0x14, 0xfd, 0x19, 0xf0, 0xd6, 0x3f, 0x9a, 0x73, 0x55, 0xbc, + 0x02, 0xeb, 0xcd, 0x24, 0x81, 0x68, 0x4e, 0xa7, 0x2f, 0xc6, + 0xe0, 0x09, 0xac, 0x45, 0x63, 0x8a, 0x34, 0xdd, 0xfb, 0x12, + 0xb7, 0x5e, 0x78, 0x91, 0x00, 0xea, 0xc9, 0x23, 0x8f, 0x65, + 0x46, 0xac, 0x03, 0xe9, 0xca, 0x20, 0x8c, 0x66, 0x45, 0xaf, + 0x06, 0xec, 0xcf, 0x25, 0x89, 0x63, 0x40, 0xaa, 0x05, 0xef, + 0xcc, 0x26, 0x8a, 0x60, 0x43, 0xa9, 0x0c, 0xe6, 0xc5, 0x2f, + 0x83, 0x69, 0x4a, 0xa0, 0x0f, 0xe5, 0xc6, 0x2c, 0x80, 0x6a, + 0x49, 0xa3, 0x0a, 0xe0, 0xc3, 0x29, 0x85, 0x6f, 0x4c, 0xa6, + 0x09, 0xe3, 0xc0, 0x2a, 0x86, 0x6c, 0x4f, 0xa5, 0x18, 0xf2, + 0xd1, 0x3b, 0x97, 0x7d, 0x5e, 0xb4, 0x1b, 0xf1, 0xd2, 0x38, + 0x94, 0x7e, 0x5d, 0xb7, 0x1e, 0xf4, 0xd7, 0x3d, 0x91, 0x7b, + 0x58, 0xb2, 0x1d, 0xf7, 0xd4, 0x3e, 0x92, 0x78, 0x5b, 0xb1, + 0x14, 0xfe, 0xdd, 0x37, 0x9b, 0x71, 0x52, 0xb8, 0x17, 0xfd, + 0xde, 0x34, 0x98, 0x72, 0x51, 0xbb, 0x12, 0xf8, 0xdb, 0x31, + 0x9d, 0x77, 0x54, 0xbe, 0x11, 0xfb, 0xd8, 0x32, 0x9e, 0x74, + 0x57, 0xbd, 0x30, 0xda, 0xf9, 0x13, 0xbf, 0x55, 0x76, 0x9c, + 0x33, 0xd9, 0xfa, 0x10, 0xbc, 0x56, 0x75, 0x9f, 0x36, 0xdc, + 0xff, 0x15, 0xb9, 0x53, 0x70, 0x9a, 0x35, 0xdf, 0xfc, 0x16, + 0xba, 0x50, 0x73, 0x99, 0x3c, 0xd6, 0xf5, 0x1f, 0xb3, 0x59, + 0x7a, 0x90, 0x3f, 0xd5, 0xf6, 0x1c, 0xb0, 0x5a, 0x79, 0x93, + 0x3a, 0xd0, 0xf3, 0x19, 0xb5, 0x5f, 0x7c, 0x96, 0x39, 0xd3, + 0xf0, 0x1a, 0xb6, 0x5c, 0x7f, 0x95, 0x28, 0xc2, 0xe1, 0x0b, + 0xa7, 0x4d, 0x6e, 0x84, 0x2b, 0xc1, 0xe2, 0x08, 0xa4, 0x4e, + 0x6d, 0x87, 0x2e, 0xc4, 0xe7, 0x0d, 0xa1, 0x4b, 0x68, 0x82, + 0x2d, 0xc7, 0xe4, 0x0e, 0xa2, 0x48, 0x6b, 0x81, 0x24, 0xce, + 0xed, 0x07, 0xab, 0x41, 0x62, 0x88, 0x27, 0xcd, 0xee, 0x04, + 0xa8, 0x42, 0x61, 0x8b, 0x22, 0xc8, 0xeb, 0x01, 0xad, 0x47, + 0x64, 0x8e, 0x21, 0xcb, 0xe8, 0x02, 0xae, 0x44, 0x67, 0x8d, + 0x00, 0xeb, 0xcb, 0x20, 0x8b, 0x60, 0x40, 0xab, 0x0b, 0xe0, + 0xc0, 0x2b, 0x80, 0x6b, 0x4b, 0xa0, 0x16, 0xfd, 0xdd, 0x36, + 0x9d, 0x76, 0x56, 0xbd, 0x1d, 0xf6, 0xd6, 0x3d, 0x96, 0x7d, + 0x5d, 0xb6, 0x2c, 0xc7, 0xe7, 0x0c, 0xa7, 0x4c, 0x6c, 0x87, + 0x27, 0xcc, 0xec, 0x07, 0xac, 0x47, 0x67, 0x8c, 0x3a, 0xd1, + 0xf1, 0x1a, 0xb1, 0x5a, 0x7a, 0x91, 0x31, 0xda, 0xfa, 0x11, + 0xba, 0x51, 0x71, 0x9a, 0x58, 0xb3, 0x93, 0x78, 0xd3, 0x38, + 0x18, 0xf3, 0x53, 0xb8, 0x98, 0x73, 0xd8, 0x33, 0x13, 0xf8, + 0x4e, 0xa5, 0x85, 0x6e, 0xc5, 0x2e, 0x0e, 0xe5, 0x45, 0xae, + 0x8e, 0x65, 0xce, 0x25, 0x05, 0xee, 0x74, 0x9f, 0xbf, 0x54, + 0xff, 0x14, 0x34, 0xdf, 0x7f, 0x94, 0xb4, 0x5f, 0xf4, 0x1f, + 0x3f, 0xd4, 0x62, 0x89, 0xa9, 0x42, 0xe9, 0x02, 0x22, 0xc9, + 0x69, 0x82, 0xa2, 0x49, 0xe2, 0x09, 0x29, 0xc2, 0xb0, 0x5b, + 0x7b, 0x90, 0x3b, 0xd0, 0xf0, 0x1b, 0xbb, 0x50, 0x70, 0x9b, + 0x30, 0xdb, 0xfb, 0x10, 0xa6, 0x4d, 0x6d, 0x86, 0x2d, 0xc6, + 0xe6, 0x0d, 0xad, 0x46, 0x66, 0x8d, 0x26, 0xcd, 0xed, 0x06, + 0x9c, 0x77, 0x57, 0xbc, 0x17, 0xfc, 0xdc, 0x37, 0x97, 0x7c, + 0x5c, 0xb7, 0x1c, 0xf7, 0xd7, 0x3c, 0x8a, 0x61, 0x41, 0xaa, + 0x01, 0xea, 0xca, 0x21, 0x81, 0x6a, 0x4a, 0xa1, 0x0a, 0xe1, + 0xc1, 0x2a, 0xe8, 0x03, 0x23, 0xc8, 0x63, 0x88, 0xa8, 0x43, + 0xe3, 0x08, 0x28, 0xc3, 0x68, 0x83, 0xa3, 0x48, 0xfe, 0x15, + 0x35, 0xde, 0x75, 0x9e, 0xbe, 0x55, 0xf5, 0x1e, 0x3e, 0xd5, + 0x7e, 0x95, 0xb5, 0x5e, 0xc4, 0x2f, 0x0f, 0xe4, 0x4f, 0xa4, + 0x84, 0x6f, 0xcf, 0x24, 0x04, 0xef, 0x44, 0xaf, 0x8f, 0x64, + 0xd2, 0x39, 0x19, 0xf2, 0x59, 0xb2, 0x92, 0x79, 0xd9, 0x32, + 0x12, 0xf9, 0x52, 0xb9, 0x99, 0x72, 0x00, 0xec, 0xc5, 0x29, + 0x97, 0x7b, 0x52, 0xbe, 0x33, 0xdf, 0xf6, 0x1a, 0xa4, 0x48, + 0x61, 0x8d, 0x66, 0x8a, 0xa3, 0x4f, 0xf1, 0x1d, 0x34, 0xd8, + 0x55, 0xb9, 0x90, 0x7c, 0xc2, 0x2e, 0x07, 0xeb, 0xcc, 0x20, + 0x09, 0xe5, 0x5b, 0xb7, 0x9e, 0x72, 0xff, 0x13, 0x3a, 0xd6, + 0x68, 0x84, 0xad, 0x41, 0xaa, 0x46, 0x6f, 0x83, 0x3d, 0xd1, + 0xf8, 0x14, 0x99, 0x75, 0x5c, 0xb0, 0x0e, 0xe2, 0xcb, 0x27, + 0x85, 0x69, 0x40, 0xac, 0x12, 0xfe, 0xd7, 0x3b, 0xb6, 0x5a, + 0x73, 0x9f, 0x21, 0xcd, 0xe4, 0x08, 0xe3, 0x0f, 0x26, 0xca, + 0x74, 0x98, 0xb1, 0x5d, 0xd0, 0x3c, 0x15, 0xf9, 0x47, 0xab, + 0x82, 0x6e, 0x49, 0xa5, 0x8c, 0x60, 0xde, 0x32, 0x1b, 0xf7, + 0x7a, 0x96, 0xbf, 0x53, 0xed, 0x01, 0x28, 0xc4, 0x2f, 0xc3, + 0xea, 0x06, 0xb8, 0x54, 0x7d, 0x91, 0x1c, 0xf0, 0xd9, 0x35, + 0x8b, 0x67, 0x4e, 0xa2, 0x17, 0xfb, 0xd2, 0x3e, 0x80, 0x6c, + 0x45, 0xa9, 0x24, 0xc8, 0xe1, 0x0d, 0xb3, 0x5f, 0x76, 0x9a, + 0x71, 0x9d, 0xb4, 0x58, 0xe6, 0x0a, 0x23, 0xcf, 0x42, 0xae, + 0x87, 0x6b, 0xd5, 0x39, 0x10, 0xfc, 0xdb, 0x37, 0x1e, 0xf2, + 0x4c, 0xa0, 0x89, 0x65, 0xe8, 0x04, 0x2d, 0xc1, 0x7f, 0x93, + 0xba, 0x56, 0xbd, 0x51, 0x78, 0x94, 0x2a, 0xc6, 0xef, 0x03, + 0x8e, 0x62, 0x4b, 0xa7, 0x19, 0xf5, 0xdc, 0x30, 0x92, 0x7e, + 0x57, 0xbb, 0x05, 0xe9, 0xc0, 0x2c, 0xa1, 0x4d, 0x64, 0x88, + 0x36, 0xda, 0xf3, 0x1f, 0xf4, 0x18, 0x31, 0xdd, 0x63, 0x8f, + 0xa6, 0x4a, 0xc7, 0x2b, 0x02, 0xee, 0x50, 0xbc, 0x95, 0x79, + 0x5e, 0xb2, 0x9b, 0x77, 0xc9, 0x25, 0x0c, 0xe0, 0x6d, 0x81, + 0xa8, 0x44, 0xfa, 0x16, 0x3f, 0xd3, 0x38, 0xd4, 0xfd, 0x11, + 0xaf, 0x43, 0x6a, 0x86, 0x0b, 0xe7, 0xce, 0x22, 0x9c, 0x70, + 0x59, 0xb5, 0x00, 0xed, 0xc7, 0x2a, 0x93, 0x7e, 0x54, 0xb9, + 0x3b, 0xd6, 0xfc, 0x11, 0xa8, 0x45, 0x6f, 0x82, 0x76, 0x9b, + 0xb1, 0x5c, 0xe5, 0x08, 0x22, 0xcf, 0x4d, 0xa0, 0x8a, 0x67, + 0xde, 0x33, 0x19, 0xf4, 0xec, 0x01, 0x2b, 0xc6, 0x7f, 0x92, + 0xb8, 0x55, 0xd7, 0x3a, 0x10, 0xfd, 0x44, 0xa9, 0x83, 0x6e, + 0x9a, 0x77, 0x5d, 0xb0, 0x09, 0xe4, 0xce, 0x23, 0xa1, 0x4c, + 0x66, 0x8b, 0x32, 0xdf, 0xf5, 0x18, 0xc5, 0x28, 0x02, 0xef, + 0x56, 0xbb, 0x91, 0x7c, 0xfe, 0x13, 0x39, 0xd4, 0x6d, 0x80, + 0xaa, 0x47, 0xb3, 0x5e, 0x74, 0x99, 0x20, 0xcd, 0xe7, 0x0a, + 0x88, 0x65, 0x4f, 0xa2, 0x1b, 0xf6, 0xdc, 0x31, 0x29, 0xc4, + 0xee, 0x03, 0xba, 0x57, 0x7d, 0x90, 0x12, 0xff, 0xd5, 0x38, + 0x81, 0x6c, 0x46, 0xab, 0x5f, 0xb2, 0x98, 0x75, 0xcc, 0x21, + 0x0b, 0xe6, 0x64, 0x89, 0xa3, 0x4e, 0xf7, 0x1a, 0x30, 0xdd, + 0x97, 0x7a, 0x50, 0xbd, 0x04, 0xe9, 0xc3, 0x2e, 0xac, 0x41, + 0x6b, 0x86, 0x3f, 0xd2, 0xf8, 0x15, 0xe1, 0x0c, 0x26, 0xcb, + 0x72, 0x9f, 0xb5, 0x58, 0xda, 0x37, 0x1d, 0xf0, 0x49, 0xa4, + 0x8e, 0x63, 0x7b, 0x96, 0xbc, 0x51, 0xe8, 0x05, 0x2f, 0xc2, + 0x40, 0xad, 0x87, 0x6a, 0xd3, 0x3e, 0x14, 0xf9, 0x0d, 0xe0, + 0xca, 0x27, 0x9e, 0x73, 0x59, 0xb4, 0x36, 0xdb, 0xf1, 0x1c, + 0xa5, 0x48, 0x62, 0x8f, 0x52, 0xbf, 0x95, 0x78, 0xc1, 0x2c, + 0x06, 0xeb, 0x69, 0x84, 0xae, 0x43, 0xfa, 0x17, 0x3d, 0xd0, + 0x24, 0xc9, 0xe3, 0x0e, 0xb7, 0x5a, 0x70, 0x9d, 0x1f, 0xf2, + 0xd8, 0x35, 0x8c, 0x61, 0x4b, 0xa6, 0xbe, 0x53, 0x79, 0x94, + 0x2d, 0xc0, 0xea, 0x07, 0x85, 0x68, 0x42, 0xaf, 0x16, 0xfb, + 0xd1, 0x3c, 0xc8, 0x25, 0x0f, 0xe2, 0x5b, 0xb6, 0x9c, 0x71, + 0xf3, 0x1e, 0x34, 0xd9, 0x60, 0x8d, 0xa7, 0x4a, 0x00, 0xee, + 0xc1, 0x2f, 0x9f, 0x71, 0x5e, 0xb0, 0x23, 0xcd, 0xe2, 0x0c, + 0xbc, 0x52, 0x7d, 0x93, 0x46, 0xa8, 0x87, 0x69, 0xd9, 0x37, + 0x18, 0xf6, 0x65, 0x8b, 0xa4, 0x4a, 0xfa, 0x14, 0x3b, 0xd5, + 0x8c, 0x62, 0x4d, 0xa3, 0x13, 0xfd, 0xd2, 0x3c, 0xaf, 0x41, + 0x6e, 0x80, 0x30, 0xde, 0xf1, 0x1f, 0xca, 0x24, 0x0b, 0xe5, + 0x55, 0xbb, 0x94, 0x7a, 0xe9, 0x07, 0x28, 0xc6, 0x76, 0x98, + 0xb7, 0x59, 0x05, 0xeb, 0xc4, 0x2a, 0x9a, 0x74, 0x5b, 0xb5, + 0x26, 0xc8, 0xe7, 0x09, 0xb9, 0x57, 0x78, 0x96, 0x43, 0xad, + 0x82, 0x6c, 0xdc, 0x32, 0x1d, 0xf3, 0x60, 0x8e, 0xa1, 0x4f, + 0xff, 0x11, 0x3e, 0xd0, 0x89, 0x67, 0x48, 0xa6, 0x16, 0xf8, + 0xd7, 0x39, 0xaa, 0x44, 0x6b, 0x85, 0x35, 0xdb, 0xf4, 0x1a, + 0xcf, 0x21, 0x0e, 0xe0, 0x50, 0xbe, 0x91, 0x7f, 0xec, 0x02, + 0x2d, 0xc3, 0x73, 0x9d, 0xb2, 0x5c, 0x0a, 0xe4, 0xcb, 0x25, + 0x95, 0x7b, 0x54, 0xba, 0x29, 0xc7, 0xe8, 0x06, 0xb6, 0x58, + 0x77, 0x99, 0x4c, 0xa2, 0x8d, 0x63, 0xd3, 0x3d, 0x12, 0xfc, + 0x6f, 0x81, 0xae, 0x40, 0xf0, 0x1e, 0x31, 0xdf, 0x86, 0x68, + 0x47, 0xa9, 0x19, 0xf7, 0xd8, 0x36, 0xa5, 0x4b, 0x64, 0x8a, + 0x3a, 0xd4, 0xfb, 0x15, 0xc0, 0x2e, 0x01, 0xef, 0x5f, 0xb1, + 0x9e, 0x70, 0xe3, 0x0d, 0x22, 0xcc, 0x7c, 0x92, 0xbd, 0x53, + 0x0f, 0xe1, 0xce, 0x20, 0x90, 0x7e, 0x51, 0xbf, 0x2c, 0xc2, + 0xed, 0x03, 0xb3, 0x5d, 0x72, 0x9c, 0x49, 0xa7, 0x88, 0x66, + 0xd6, 0x38, 0x17, 0xf9, 0x6a, 0x84, 0xab, 0x45, 0xf5, 0x1b, + 0x34, 0xda, 0x83, 0x6d, 0x42, 0xac, 0x1c, 0xf2, 0xdd, 0x33, + 0xa0, 0x4e, 0x61, 0x8f, 0x3f, 0xd1, 0xfe, 0x10, 0xc5, 0x2b, + 0x04, 0xea, 0x5a, 0xb4, 0x9b, 0x75, 0xe6, 0x08, 0x27, 0xc9, + 0x79, 0x97, 0xb8, 0x56, 0x00, 0xef, 0xc3, 0x2c, 0x9b, 0x74, + 0x58, 0xb7, 0x2b, 0xc4, 0xe8, 0x07, 0xb0, 0x5f, 0x73, 0x9c, + 0x56, 0xb9, 0x95, 0x7a, 0xcd, 0x22, 0x0e, 0xe1, 0x7d, 0x92, + 0xbe, 0x51, 0xe6, 0x09, 0x25, 0xca, 0xac, 0x43, 0x6f, 0x80, + 0x37, 0xd8, 0xf4, 0x1b, 0x87, 0x68, 0x44, 0xab, 0x1c, 0xf3, + 0xdf, 0x30, 0xfa, 0x15, 0x39, 0xd6, 0x61, 0x8e, 0xa2, 0x4d, + 0xd1, 0x3e, 0x12, 0xfd, 0x4a, 0xa5, 0x89, 0x66, 0x45, 0xaa, + 0x86, 0x69, 0xde, 0x31, 0x1d, 0xf2, 0x6e, 0x81, 0xad, 0x42, + 0xf5, 0x1a, 0x36, 0xd9, 0x13, 0xfc, 0xd0, 0x3f, 0x88, 0x67, + 0x4b, 0xa4, 0x38, 0xd7, 0xfb, 0x14, 0xa3, 0x4c, 0x60, 0x8f, + 0xe9, 0x06, 0x2a, 0xc5, 0x72, 0x9d, 0xb1, 0x5e, 0xc2, 0x2d, + 0x01, 0xee, 0x59, 0xb6, 0x9a, 0x75, 0xbf, 0x50, 0x7c, 0x93, + 0x24, 0xcb, 0xe7, 0x08, 0x94, 0x7b, 0x57, 0xb8, 0x0f, 0xe0, + 0xcc, 0x23, 0x8a, 0x65, 0x49, 0xa6, 0x11, 0xfe, 0xd2, 0x3d, + 0xa1, 0x4e, 0x62, 0x8d, 0x3a, 0xd5, 0xf9, 0x16, 0xdc, 0x33, + 0x1f, 0xf0, 0x47, 0xa8, 0x84, 0x6b, 0xf7, 0x18, 0x34, 0xdb, + 0x6c, 0x83, 0xaf, 0x40, 0x26, 0xc9, 0xe5, 0x0a, 0xbd, 0x52, + 0x7e, 0x91, 0x0d, 0xe2, 0xce, 0x21, 0x96, 0x79, 0x55, 0xba, + 0x70, 0x9f, 0xb3, 0x5c, 0xeb, 0x04, 0x28, 0xc7, 0x5b, 0xb4, + 0x98, 0x77, 0xc0, 0x2f, 0x03, 0xec, 0xcf, 0x20, 0x0c, 0xe3, + 0x54, 0xbb, 0x97, 0x78, 0xe4, 0x0b, 0x27, 0xc8, 0x7f, 0x90, + 0xbc, 0x53, 0x99, 0x76, 0x5a, 0xb5, 0x02, 0xed, 0xc1, 0x2e, + 0xb2, 0x5d, 0x71, 0x9e, 0x29, 0xc6, 0xea, 0x05, 0x63, 0x8c, + 0xa0, 0x4f, 0xf8, 0x17, 0x3b, 0xd4, 0x48, 0xa7, 0x8b, 0x64, + 0xd3, 0x3c, 0x10, 0xff, 0x35, 0xda, 0xf6, 0x19, 0xae, 0x41, + 0x6d, 0x82, 0x1e, 0xf1, 0xdd, 0x32, 0x85, 0x6a, 0x46, 0xa9, + 0x00, 0xf0, 0xfd, 0x0d, 0xe7, 0x17, 0x1a, 0xea, 0xd3, 0x23, + 0x2e, 0xde, 0x34, 0xc4, 0xc9, 0x39, 0xbb, 0x4b, 0x46, 0xb6, + 0x5c, 0xac, 0xa1, 0x51, 0x68, 0x98, 0x95, 0x65, 0x8f, 0x7f, + 0x72, 0x82, 0x6b, 0x9b, 0x96, 0x66, 0x8c, 0x7c, 0x71, 0x81, + 0xb8, 0x48, 0x45, 0xb5, 0x5f, 0xaf, 0xa2, 0x52, 0xd0, 0x20, + 0x2d, 0xdd, 0x37, 0xc7, 0xca, 0x3a, 0x03, 0xf3, 0xfe, 0x0e, + 0xe4, 0x14, 0x19, 0xe9, 0xd6, 0x26, 0x2b, 0xdb, 0x31, 0xc1, + 0xcc, 0x3c, 0x05, 0xf5, 0xf8, 0x08, 0xe2, 0x12, 0x1f, 0xef, + 0x6d, 0x9d, 0x90, 0x60, 0x8a, 0x7a, 0x77, 0x87, 0xbe, 0x4e, + 0x43, 0xb3, 0x59, 0xa9, 0xa4, 0x54, 0xbd, 0x4d, 0x40, 0xb0, + 0x5a, 0xaa, 0xa7, 0x57, 0x6e, 0x9e, 0x93, 0x63, 0x89, 0x79, + 0x74, 0x84, 0x06, 0xf6, 0xfb, 0x0b, 0xe1, 0x11, 0x1c, 0xec, + 0xd5, 0x25, 0x28, 0xd8, 0x32, 0xc2, 0xcf, 0x3f, 0xb1, 0x41, + 0x4c, 0xbc, 0x56, 0xa6, 0xab, 0x5b, 0x62, 0x92, 0x9f, 0x6f, + 0x85, 0x75, 0x78, 0x88, 0x0a, 0xfa, 0xf7, 0x07, 0xed, 0x1d, + 0x10, 0xe0, 0xd9, 0x29, 0x24, 0xd4, 0x3e, 0xce, 0xc3, 0x33, + 0xda, 0x2a, 0x27, 0xd7, 0x3d, 0xcd, 0xc0, 0x30, 0x09, 0xf9, + 0xf4, 0x04, 0xee, 0x1e, 0x13, 0xe3, 0x61, 0x91, 0x9c, 0x6c, + 0x86, 0x76, 0x7b, 0x8b, 0xb2, 0x42, 0x4f, 0xbf, 0x55, 0xa5, + 0xa8, 0x58, 0x67, 0x97, 0x9a, 0x6a, 0x80, 0x70, 0x7d, 0x8d, + 0xb4, 0x44, 0x49, 0xb9, 0x53, 0xa3, 0xae, 0x5e, 0xdc, 0x2c, + 0x21, 0xd1, 0x3b, 0xcb, 0xc6, 0x36, 0x0f, 0xff, 0xf2, 0x02, + 0xe8, 0x18, 0x15, 0xe5, 0x0c, 0xfc, 0xf1, 0x01, 0xeb, 0x1b, + 0x16, 0xe6, 0xdf, 0x2f, 0x22, 0xd2, 0x38, 0xc8, 0xc5, 0x35, + 0xb7, 0x47, 0x4a, 0xba, 0x50, 0xa0, 0xad, 0x5d, 0x64, 0x94, + 0x99, 0x69, 0x83, 0x73, 0x7e, 0x8e, 0x00, 0xf1, 0xff, 0x0e, + 0xe3, 0x12, 0x1c, 0xed, 0xdb, 0x2a, 0x24, 0xd5, 0x38, 0xc9, + 0xc7, 0x36, 0xab, 0x5a, 0x54, 0xa5, 0x48, 0xb9, 0xb7, 0x46, + 0x70, 0x81, 0x8f, 0x7e, 0x93, 0x62, 0x6c, 0x9d, 0x4b, 0xba, + 0xb4, 0x45, 0xa8, 0x59, 0x57, 0xa6, 0x90, 0x61, 0x6f, 0x9e, + 0x73, 0x82, 0x8c, 0x7d, 0xe0, 0x11, 0x1f, 0xee, 0x03, 0xf2, + 0xfc, 0x0d, 0x3b, 0xca, 0xc4, 0x35, 0xd8, 0x29, 0x27, 0xd6, + 0x96, 0x67, 0x69, 0x98, 0x75, 0x84, 0x8a, 0x7b, 0x4d, 0xbc, + 0xb2, 0x43, 0xae, 0x5f, 0x51, 0xa0, 0x3d, 0xcc, 0xc2, 0x33, + 0xde, 0x2f, 0x21, 0xd0, 0xe6, 0x17, 0x19, 0xe8, 0x05, 0xf4, + 0xfa, 0x0b, 0xdd, 0x2c, 0x22, 0xd3, 0x3e, 0xcf, 0xc1, 0x30, + 0x06, 0xf7, 0xf9, 0x08, 0xe5, 0x14, 0x1a, 0xeb, 0x76, 0x87, + 0x89, 0x78, 0x95, 0x64, 0x6a, 0x9b, 0xad, 0x5c, 0x52, 0xa3, + 0x4e, 0xbf, 0xb1, 0x40, 0x31, 0xc0, 0xce, 0x3f, 0xd2, 0x23, + 0x2d, 0xdc, 0xea, 0x1b, 0x15, 0xe4, 0x09, 0xf8, 0xf6, 0x07, + 0x9a, 0x6b, 0x65, 0x94, 0x79, 0x88, 0x86, 0x77, 0x41, 0xb0, + 0xbe, 0x4f, 0xa2, 0x53, 0x5d, 0xac, 0x7a, 0x8b, 0x85, 0x74, + 0x99, 0x68, 0x66, 0x97, 0xa1, 0x50, 0x5e, 0xaf, 0x42, 0xb3, + 0xbd, 0x4c, 0xd1, 0x20, 0x2e, 0xdf, 0x32, 0xc3, 0xcd, 0x3c, + 0x0a, 0xfb, 0xf5, 0x04, 0xe9, 0x18, 0x16, 0xe7, 0xa7, 0x56, + 0x58, 0xa9, 0x44, 0xb5, 0xbb, 0x4a, 0x7c, 0x8d, 0x83, 0x72, + 0x9f, 0x6e, 0x60, 0x91, 0x0c, 0xfd, 0xf3, 0x02, 0xef, 0x1e, + 0x10, 0xe1, 0xd7, 0x26, 0x28, 0xd9, 0x34, 0xc5, 0xcb, 0x3a, + 0xec, 0x1d, 0x13, 0xe2, 0x0f, 0xfe, 0xf0, 0x01, 0x37, 0xc6, + 0xc8, 0x39, 0xd4, 0x25, 0x2b, 0xda, 0x47, 0xb6, 0xb8, 0x49, + 0xa4, 0x55, 0x5b, 0xaa, 0x9c, 0x6d, 0x63, 0x92, 0x7f, 0x8e, + 0x80, 0x71, 0x00, 0xf2, 0xf9, 0x0b, 0xef, 0x1d, 0x16, 0xe4, + 0xc3, 0x31, 0x3a, 0xc8, 0x2c, 0xde, 0xd5, 0x27, 0x9b, 0x69, + 0x62, 0x90, 0x74, 0x86, 0x8d, 0x7f, 0x58, 0xaa, 0xa1, 0x53, + 0xb7, 0x45, 0x4e, 0xbc, 0x2b, 0xd9, 0xd2, 0x20, 0xc4, 0x36, + 0x3d, 0xcf, 0xe8, 0x1a, 0x11, 0xe3, 0x07, 0xf5, 0xfe, 0x0c, + 0xb0, 0x42, 0x49, 0xbb, 0x5f, 0xad, 0xa6, 0x54, 0x73, 0x81, + 0x8a, 0x78, 0x9c, 0x6e, 0x65, 0x97, 0x56, 0xa4, 0xaf, 0x5d, + 0xb9, 0x4b, 0x40, 0xb2, 0x95, 0x67, 0x6c, 0x9e, 0x7a, 0x88, + 0x83, 0x71, 0xcd, 0x3f, 0x34, 0xc6, 0x22, 0xd0, 0xdb, 0x29, + 0x0e, 0xfc, 0xf7, 0x05, 0xe1, 0x13, 0x18, 0xea, 0x7d, 0x8f, + 0x84, 0x76, 0x92, 0x60, 0x6b, 0x99, 0xbe, 0x4c, 0x47, 0xb5, + 0x51, 0xa3, 0xa8, 0x5a, 0xe6, 0x14, 0x1f, 0xed, 0x09, 0xfb, + 0xf0, 0x02, 0x25, 0xd7, 0xdc, 0x2e, 0xca, 0x38, 0x33, 0xc1, + 0xac, 0x5e, 0x55, 0xa7, 0x43, 0xb1, 0xba, 0x48, 0x6f, 0x9d, + 0x96, 0x64, 0x80, 0x72, 0x79, 0x8b, 0x37, 0xc5, 0xce, 0x3c, + 0xd8, 0x2a, 0x21, 0xd3, 0xf4, 0x06, 0x0d, 0xff, 0x1b, 0xe9, + 0xe2, 0x10, 0x87, 0x75, 0x7e, 0x8c, 0x68, 0x9a, 0x91, 0x63, + 0x44, 0xb6, 0xbd, 0x4f, 0xab, 0x59, 0x52, 0xa0, 0x1c, 0xee, + 0xe5, 0x17, 0xf3, 0x01, 0x0a, 0xf8, 0xdf, 0x2d, 0x26, 0xd4, + 0x30, 0xc2, 0xc9, 0x3b, 0xfa, 0x08, 0x03, 0xf1, 0x15, 0xe7, + 0xec, 0x1e, 0x39, 0xcb, 0xc0, 0x32, 0xd6, 0x24, 0x2f, 0xdd, + 0x61, 0x93, 0x98, 0x6a, 0x8e, 0x7c, 0x77, 0x85, 0xa2, 0x50, + 0x5b, 0xa9, 0x4d, 0xbf, 0xb4, 0x46, 0xd1, 0x23, 0x28, 0xda, + 0x3e, 0xcc, 0xc7, 0x35, 0x12, 0xe0, 0xeb, 0x19, 0xfd, 0x0f, + 0x04, 0xf6, 0x4a, 0xb8, 0xb3, 0x41, 0xa5, 0x57, 0x5c, 0xae, + 0x89, 0x7b, 0x70, 0x82, 0x66, 0x94, 0x9f, 0x6d, 0x00, 0xf3, + 0xfb, 0x08, 0xeb, 0x18, 0x10, 0xe3, 0xcb, 0x38, 0x30, 0xc3, + 0x20, 0xd3, 0xdb, 0x28, 0x8b, 0x78, 0x70, 0x83, 0x60, 0x93, + 0x9b, 0x68, 0x40, 0xb3, 0xbb, 0x48, 0xab, 0x58, 0x50, 0xa3, + 0x0b, 0xf8, 0xf0, 0x03, 0xe0, 0x13, 0x1b, 0xe8, 0xc0, 0x33, + 0x3b, 0xc8, 0x2b, 0xd8, 0xd0, 0x23, 0x80, 0x73, 0x7b, 0x88, + 0x6b, 0x98, 0x90, 0x63, 0x4b, 0xb8, 0xb0, 0x43, 0xa0, 0x53, + 0x5b, 0xa8, 0x16, 0xe5, 0xed, 0x1e, 0xfd, 0x0e, 0x06, 0xf5, + 0xdd, 0x2e, 0x26, 0xd5, 0x36, 0xc5, 0xcd, 0x3e, 0x9d, 0x6e, + 0x66, 0x95, 0x76, 0x85, 0x8d, 0x7e, 0x56, 0xa5, 0xad, 0x5e, + 0xbd, 0x4e, 0x46, 0xb5, 0x1d, 0xee, 0xe6, 0x15, 0xf6, 0x05, + 0x0d, 0xfe, 0xd6, 0x25, 0x2d, 0xde, 0x3d, 0xce, 0xc6, 0x35, + 0x96, 0x65, 0x6d, 0x9e, 0x7d, 0x8e, 0x86, 0x75, 0x5d, 0xae, + 0xa6, 0x55, 0xb6, 0x45, 0x4d, 0xbe, 0x2c, 0xdf, 0xd7, 0x24, + 0xc7, 0x34, 0x3c, 0xcf, 0xe7, 0x14, 0x1c, 0xef, 0x0c, 0xff, + 0xf7, 0x04, 0xa7, 0x54, 0x5c, 0xaf, 0x4c, 0xbf, 0xb7, 0x44, + 0x6c, 0x9f, 0x97, 0x64, 0x87, 0x74, 0x7c, 0x8f, 0x27, 0xd4, + 0xdc, 0x2f, 0xcc, 0x3f, 0x37, 0xc4, 0xec, 0x1f, 0x17, 0xe4, + 0x07, 0xf4, 0xfc, 0x0f, 0xac, 0x5f, 0x57, 0xa4, 0x47, 0xb4, + 0xbc, 0x4f, 0x67, 0x94, 0x9c, 0x6f, 0x8c, 0x7f, 0x77, 0x84, + 0x3a, 0xc9, 0xc1, 0x32, 0xd1, 0x22, 0x2a, 0xd9, 0xf1, 0x02, + 0x0a, 0xf9, 0x1a, 0xe9, 0xe1, 0x12, 0xb1, 0x42, 0x4a, 0xb9, + 0x5a, 0xa9, 0xa1, 0x52, 0x7a, 0x89, 0x81, 0x72, 0x91, 0x62, + 0x6a, 0x99, 0x31, 0xc2, 0xca, 0x39, 0xda, 0x29, 0x21, 0xd2, + 0xfa, 0x09, 0x01, 0xf2, 0x11, 0xe2, 0xea, 0x19, 0xba, 0x49, + 0x41, 0xb2, 0x51, 0xa2, 0xaa, 0x59, 0x71, 0x82, 0x8a, 0x79, + 0x9a, 0x69, 0x61, 0x92, 0x00, 0xf4, 0xf5, 0x01, 0xf7, 0x03, + 0x02, 0xf6, 0xf3, 0x07, 0x06, 0xf2, 0x04, 0xf0, 0xf1, 0x05, + 0xfb, 0x0f, 0x0e, 0xfa, 0x0c, 0xf8, 0xf9, 0x0d, 0x08, 0xfc, + 0xfd, 0x09, 0xff, 0x0b, 0x0a, 0xfe, 0xeb, 0x1f, 0x1e, 0xea, + 0x1c, 0xe8, 0xe9, 0x1d, 0x18, 0xec, 0xed, 0x19, 0xef, 0x1b, + 0x1a, 0xee, 0x10, 0xe4, 0xe5, 0x11, 0xe7, 0x13, 0x12, 0xe6, + 0xe3, 0x17, 0x16, 0xe2, 0x14, 0xe0, 0xe1, 0x15, 0xcb, 0x3f, + 0x3e, 0xca, 0x3c, 0xc8, 0xc9, 0x3d, 0x38, 0xcc, 0xcd, 0x39, + 0xcf, 0x3b, 0x3a, 0xce, 0x30, 0xc4, 0xc5, 0x31, 0xc7, 0x33, + 0x32, 0xc6, 0xc3, 0x37, 0x36, 0xc2, 0x34, 0xc0, 0xc1, 0x35, + 0x20, 0xd4, 0xd5, 0x21, 0xd7, 0x23, 0x22, 0xd6, 0xd3, 0x27, + 0x26, 0xd2, 0x24, 0xd0, 0xd1, 0x25, 0xdb, 0x2f, 0x2e, 0xda, + 0x2c, 0xd8, 0xd9, 0x2d, 0x28, 0xdc, 0xdd, 0x29, 0xdf, 0x2b, + 0x2a, 0xde, 0x8b, 0x7f, 0x7e, 0x8a, 0x7c, 0x88, 0x89, 0x7d, + 0x78, 0x8c, 0x8d, 0x79, 0x8f, 0x7b, 0x7a, 0x8e, 0x70, 0x84, + 0x85, 0x71, 0x87, 0x73, 0x72, 0x86, 0x83, 0x77, 0x76, 0x82, + 0x74, 0x80, 0x81, 0x75, 0x60, 0x94, 0x95, 0x61, 0x97, 0x63, + 0x62, 0x96, 0x93, 0x67, 0x66, 0x92, 0x64, 0x90, 0x91, 0x65, + 0x9b, 0x6f, 0x6e, 0x9a, 0x6c, 0x98, 0x99, 0x6d, 0x68, 0x9c, + 0x9d, 0x69, 0x9f, 0x6b, 0x6a, 0x9e, 0x40, 0xb4, 0xb5, 0x41, + 0xb7, 0x43, 0x42, 0xb6, 0xb3, 0x47, 0x46, 0xb2, 0x44, 0xb0, + 0xb1, 0x45, 0xbb, 0x4f, 0x4e, 0xba, 0x4c, 0xb8, 0xb9, 0x4d, + 0x48, 0xbc, 0xbd, 0x49, 0xbf, 0x4b, 0x4a, 0xbe, 0xab, 0x5f, + 0x5e, 0xaa, 0x5c, 0xa8, 0xa9, 0x5d, 0x58, 0xac, 0xad, 0x59, + 0xaf, 0x5b, 0x5a, 0xae, 0x50, 0xa4, 0xa5, 0x51, 0xa7, 0x53, + 0x52, 0xa6, 0xa3, 0x57, 0x56, 0xa2, 0x54, 0xa0, 0xa1, 0x55, + 0x00, 0xf5, 0xf7, 0x02, 0xf3, 0x06, 0x04, 0xf1, 0xfb, 0x0e, + 0x0c, 0xf9, 0x08, 0xfd, 0xff, 0x0a, 0xeb, 0x1e, 0x1c, 0xe9, + 0x18, 0xed, 0xef, 0x1a, 0x10, 0xe5, 0xe7, 0x12, 0xe3, 0x16, + 0x14, 0xe1, 0xcb, 0x3e, 0x3c, 0xc9, 0x38, 0xcd, 0xcf, 0x3a, + 0x30, 0xc5, 0xc7, 0x32, 0xc3, 0x36, 0x34, 0xc1, 0x20, 0xd5, + 0xd7, 0x22, 0xd3, 0x26, 0x24, 0xd1, 0xdb, 0x2e, 0x2c, 0xd9, + 0x28, 0xdd, 0xdf, 0x2a, 0x8b, 0x7e, 0x7c, 0x89, 0x78, 0x8d, + 0x8f, 0x7a, 0x70, 0x85, 0x87, 0x72, 0x83, 0x76, 0x74, 0x81, + 0x60, 0x95, 0x97, 0x62, 0x93, 0x66, 0x64, 0x91, 0x9b, 0x6e, + 0x6c, 0x99, 0x68, 0x9d, 0x9f, 0x6a, 0x40, 0xb5, 0xb7, 0x42, + 0xb3, 0x46, 0x44, 0xb1, 0xbb, 0x4e, 0x4c, 0xb9, 0x48, 0xbd, + 0xbf, 0x4a, 0xab, 0x5e, 0x5c, 0xa9, 0x58, 0xad, 0xaf, 0x5a, + 0x50, 0xa5, 0xa7, 0x52, 0xa3, 0x56, 0x54, 0xa1, 0x0b, 0xfe, + 0xfc, 0x09, 0xf8, 0x0d, 0x0f, 0xfa, 0xf0, 0x05, 0x07, 0xf2, + 0x03, 0xf6, 0xf4, 0x01, 0xe0, 0x15, 0x17, 0xe2, 0x13, 0xe6, + 0xe4, 0x11, 0x1b, 0xee, 0xec, 0x19, 0xe8, 0x1d, 0x1f, 0xea, + 0xc0, 0x35, 0x37, 0xc2, 0x33, 0xc6, 0xc4, 0x31, 0x3b, 0xce, + 0xcc, 0x39, 0xc8, 0x3d, 0x3f, 0xca, 0x2b, 0xde, 0xdc, 0x29, + 0xd8, 0x2d, 0x2f, 0xda, 0xd0, 0x25, 0x27, 0xd2, 0x23, 0xd6, + 0xd4, 0x21, 0x80, 0x75, 0x77, 0x82, 0x73, 0x86, 0x84, 0x71, + 0x7b, 0x8e, 0x8c, 0x79, 0x88, 0x7d, 0x7f, 0x8a, 0x6b, 0x9e, + 0x9c, 0x69, 0x98, 0x6d, 0x6f, 0x9a, 0x90, 0x65, 0x67, 0x92, + 0x63, 0x96, 0x94, 0x61, 0x4b, 0xbe, 0xbc, 0x49, 0xb8, 0x4d, + 0x4f, 0xba, 0xb0, 0x45, 0x47, 0xb2, 0x43, 0xb6, 0xb4, 0x41, + 0xa0, 0x55, 0x57, 0xa2, 0x53, 0xa6, 0xa4, 0x51, 0x5b, 0xae, + 0xac, 0x59, 0xa8, 0x5d, 0x5f, 0xaa, 0x00, 0xf6, 0xf1, 0x07, + 0xff, 0x09, 0x0e, 0xf8, 0xe3, 0x15, 0x12, 0xe4, 0x1c, 0xea, + 0xed, 0x1b, 0xdb, 0x2d, 0x2a, 0xdc, 0x24, 0xd2, 0xd5, 0x23, + 0x38, 0xce, 0xc9, 0x3f, 0xc7, 0x31, 0x36, 0xc0, 0xab, 0x5d, + 0x5a, 0xac, 0x54, 0xa2, 0xa5, 0x53, 0x48, 0xbe, 0xb9, 0x4f, + 0xb7, 0x41, 0x46, 0xb0, 0x70, 0x86, 0x81, 0x77, 0x8f, 0x79, + 0x7e, 0x88, 0x93, 0x65, 0x62, 0x94, 0x6c, 0x9a, 0x9d, 0x6b, + 0x4b, 0xbd, 0xba, 0x4c, 0xb4, 0x42, 0x45, 0xb3, 0xa8, 0x5e, + 0x59, 0xaf, 0x57, 0xa1, 0xa6, 0x50, 0x90, 0x66, 0x61, 0x97, + 0x6f, 0x99, 0x9e, 0x68, 0x73, 0x85, 0x82, 0x74, 0x8c, 0x7a, + 0x7d, 0x8b, 0xe0, 0x16, 0x11, 0xe7, 0x1f, 0xe9, 0xee, 0x18, + 0x03, 0xf5, 0xf2, 0x04, 0xfc, 0x0a, 0x0d, 0xfb, 0x3b, 0xcd, + 0xca, 0x3c, 0xc4, 0x32, 0x35, 0xc3, 0xd8, 0x2e, 0x29, 0xdf, + 0x27, 0xd1, 0xd6, 0x20, 0x96, 0x60, 0x67, 0x91, 0x69, 0x9f, + 0x98, 0x6e, 0x75, 0x83, 0x84, 0x72, 0x8a, 0x7c, 0x7b, 0x8d, + 0x4d, 0xbb, 0xbc, 0x4a, 0xb2, 0x44, 0x43, 0xb5, 0xae, 0x58, + 0x5f, 0xa9, 0x51, 0xa7, 0xa0, 0x56, 0x3d, 0xcb, 0xcc, 0x3a, + 0xc2, 0x34, 0x33, 0xc5, 0xde, 0x28, 0x2f, 0xd9, 0x21, 0xd7, + 0xd0, 0x26, 0xe6, 0x10, 0x17, 0xe1, 0x19, 0xef, 0xe8, 0x1e, + 0x05, 0xf3, 0xf4, 0x02, 0xfa, 0x0c, 0x0b, 0xfd, 0xdd, 0x2b, + 0x2c, 0xda, 0x22, 0xd4, 0xd3, 0x25, 0x3e, 0xc8, 0xcf, 0x39, + 0xc1, 0x37, 0x30, 0xc6, 0x06, 0xf0, 0xf7, 0x01, 0xf9, 0x0f, + 0x08, 0xfe, 0xe5, 0x13, 0x14, 0xe2, 0x1a, 0xec, 0xeb, 0x1d, + 0x76, 0x80, 0x87, 0x71, 0x89, 0x7f, 0x78, 0x8e, 0x95, 0x63, + 0x64, 0x92, 0x6a, 0x9c, 0x9b, 0x6d, 0xad, 0x5b, 0x5c, 0xaa, + 0x52, 0xa4, 0xa3, 0x55, 0x4e, 0xb8, 0xbf, 0x49, 0xb1, 0x47, + 0x40, 0xb6, 0x00, 0xf7, 0xf3, 0x04, 0xfb, 0x0c, 0x08, 0xff, + 0xeb, 0x1c, 0x18, 0xef, 0x10, 0xe7, 0xe3, 0x14, 0xcb, 0x3c, + 0x38, 0xcf, 0x30, 0xc7, 0xc3, 0x34, 0x20, 0xd7, 0xd3, 0x24, + 0xdb, 0x2c, 0x28, 0xdf, 0x8b, 0x7c, 0x78, 0x8f, 0x70, 0x87, + 0x83, 0x74, 0x60, 0x97, 0x93, 0x64, 0x9b, 0x6c, 0x68, 0x9f, + 0x40, 0xb7, 0xb3, 0x44, 0xbb, 0x4c, 0x48, 0xbf, 0xab, 0x5c, + 0x58, 0xaf, 0x50, 0xa7, 0xa3, 0x54, 0x0b, 0xfc, 0xf8, 0x0f, + 0xf0, 0x07, 0x03, 0xf4, 0xe0, 0x17, 0x13, 0xe4, 0x1b, 0xec, + 0xe8, 0x1f, 0xc0, 0x37, 0x33, 0xc4, 0x3b, 0xcc, 0xc8, 0x3f, + 0x2b, 0xdc, 0xd8, 0x2f, 0xd0, 0x27, 0x23, 0xd4, 0x80, 0x77, + 0x73, 0x84, 0x7b, 0x8c, 0x88, 0x7f, 0x6b, 0x9c, 0x98, 0x6f, + 0x90, 0x67, 0x63, 0x94, 0x4b, 0xbc, 0xb8, 0x4f, 0xb0, 0x47, + 0x43, 0xb4, 0xa0, 0x57, 0x53, 0xa4, 0x5b, 0xac, 0xa8, 0x5f, + 0x16, 0xe1, 0xe5, 0x12, 0xed, 0x1a, 0x1e, 0xe9, 0xfd, 0x0a, + 0x0e, 0xf9, 0x06, 0xf1, 0xf5, 0x02, 0xdd, 0x2a, 0x2e, 0xd9, + 0x26, 0xd1, 0xd5, 0x22, 0x36, 0xc1, 0xc5, 0x32, 0xcd, 0x3a, + 0x3e, 0xc9, 0x9d, 0x6a, 0x6e, 0x99, 0x66, 0x91, 0x95, 0x62, + 0x76, 0x81, 0x85, 0x72, 0x8d, 0x7a, 0x7e, 0x89, 0x56, 0xa1, + 0xa5, 0x52, 0xad, 0x5a, 0x5e, 0xa9, 0xbd, 0x4a, 0x4e, 0xb9, + 0x46, 0xb1, 0xb5, 0x42, 0x1d, 0xea, 0xee, 0x19, 0xe6, 0x11, + 0x15, 0xe2, 0xf6, 0x01, 0x05, 0xf2, 0x0d, 0xfa, 0xfe, 0x09, + 0xd6, 0x21, 0x25, 0xd2, 0x2d, 0xda, 0xde, 0x29, 0x3d, 0xca, + 0xce, 0x39, 0xc6, 0x31, 0x35, 0xc2, 0x96, 0x61, 0x65, 0x92, + 0x6d, 0x9a, 0x9e, 0x69, 0x7d, 0x8a, 0x8e, 0x79, 0x86, 0x71, + 0x75, 0x82, 0x5d, 0xaa, 0xae, 0x59, 0xa6, 0x51, 0x55, 0xa2, + 0xb6, 0x41, 0x45, 0xb2, 0x4d, 0xba, 0xbe, 0x49, 0x00, 0xf8, + 0xed, 0x15, 0xc7, 0x3f, 0x2a, 0xd2, 0x93, 0x6b, 0x7e, 0x86, + 0x54, 0xac, 0xb9, 0x41, 0x3b, 0xc3, 0xd6, 0x2e, 0xfc, 0x04, + 0x11, 0xe9, 0xa8, 0x50, 0x45, 0xbd, 0x6f, 0x97, 0x82, 0x7a, + 0x76, 0x8e, 0x9b, 0x63, 0xb1, 0x49, 0x5c, 0xa4, 0xe5, 0x1d, + 0x08, 0xf0, 0x22, 0xda, 0xcf, 0x37, 0x4d, 0xb5, 0xa0, 0x58, + 0x8a, 0x72, 0x67, 0x9f, 0xde, 0x26, 0x33, 0xcb, 0x19, 0xe1, + 0xf4, 0x0c, 0xec, 0x14, 0x01, 0xf9, 0x2b, 0xd3, 0xc6, 0x3e, + 0x7f, 0x87, 0x92, 0x6a, 0xb8, 0x40, 0x55, 0xad, 0xd7, 0x2f, + 0x3a, 0xc2, 0x10, 0xe8, 0xfd, 0x05, 0x44, 0xbc, 0xa9, 0x51, + 0x83, 0x7b, 0x6e, 0x96, 0x9a, 0x62, 0x77, 0x8f, 0x5d, 0xa5, + 0xb0, 0x48, 0x09, 0xf1, 0xe4, 0x1c, 0xce, 0x36, 0x23, 0xdb, + 0xa1, 0x59, 0x4c, 0xb4, 0x66, 0x9e, 0x8b, 0x73, 0x32, 0xca, + 0xdf, 0x27, 0xf5, 0x0d, 0x18, 0xe0, 0xc5, 0x3d, 0x28, 0xd0, + 0x02, 0xfa, 0xef, 0x17, 0x56, 0xae, 0xbb, 0x43, 0x91, 0x69, + 0x7c, 0x84, 0xfe, 0x06, 0x13, 0xeb, 0x39, 0xc1, 0xd4, 0x2c, + 0x6d, 0x95, 0x80, 0x78, 0xaa, 0x52, 0x47, 0xbf, 0xb3, 0x4b, + 0x5e, 0xa6, 0x74, 0x8c, 0x99, 0x61, 0x20, 0xd8, 0xcd, 0x35, + 0xe7, 0x1f, 0x0a, 0xf2, 0x88, 0x70, 0x65, 0x9d, 0x4f, 0xb7, + 0xa2, 0x5a, 0x1b, 0xe3, 0xf6, 0x0e, 0xdc, 0x24, 0x31, 0xc9, + 0x29, 0xd1, 0xc4, 0x3c, 0xee, 0x16, 0x03, 0xfb, 0xba, 0x42, + 0x57, 0xaf, 0x7d, 0x85, 0x90, 0x68, 0x12, 0xea, 0xff, 0x07, + 0xd5, 0x2d, 0x38, 0xc0, 0x81, 0x79, 0x6c, 0x94, 0x46, 0xbe, + 0xab, 0x53, 0x5f, 0xa7, 0xb2, 0x4a, 0x98, 0x60, 0x75, 0x8d, + 0xcc, 0x34, 0x21, 0xd9, 0x0b, 0xf3, 0xe6, 0x1e, 0x64, 0x9c, + 0x89, 0x71, 0xa3, 0x5b, 0x4e, 0xb6, 0xf7, 0x0f, 0x1a, 0xe2, + 0x30, 0xc8, 0xdd, 0x25, 0x00, 0xf9, 0xef, 0x16, 0xc3, 0x3a, + 0x2c, 0xd5, 0x9b, 0x62, 0x74, 0x8d, 0x58, 0xa1, 0xb7, 0x4e, + 0x2b, 0xd2, 0xc4, 0x3d, 0xe8, 0x11, 0x07, 0xfe, 0xb0, 0x49, + 0x5f, 0xa6, 0x73, 0x8a, 0x9c, 0x65, 0x56, 0xaf, 0xb9, 0x40, + 0x95, 0x6c, 0x7a, 0x83, 0xcd, 0x34, 0x22, 0xdb, 0x0e, 0xf7, + 0xe1, 0x18, 0x7d, 0x84, 0x92, 0x6b, 0xbe, 0x47, 0x51, 0xa8, + 0xe6, 0x1f, 0x09, 0xf0, 0x25, 0xdc, 0xca, 0x33, 0xac, 0x55, + 0x43, 0xba, 0x6f, 0x96, 0x80, 0x79, 0x37, 0xce, 0xd8, 0x21, + 0xf4, 0x0d, 0x1b, 0xe2, 0x87, 0x7e, 0x68, 0x91, 0x44, 0xbd, + 0xab, 0x52, 0x1c, 0xe5, 0xf3, 0x0a, 0xdf, 0x26, 0x30, 0xc9, + 0xfa, 0x03, 0x15, 0xec, 0x39, 0xc0, 0xd6, 0x2f, 0x61, 0x98, + 0x8e, 0x77, 0xa2, 0x5b, 0x4d, 0xb4, 0xd1, 0x28, 0x3e, 0xc7, + 0x12, 0xeb, 0xfd, 0x04, 0x4a, 0xb3, 0xa5, 0x5c, 0x89, 0x70, + 0x66, 0x9f, 0x45, 0xbc, 0xaa, 0x53, 0x86, 0x7f, 0x69, 0x90, + 0xde, 0x27, 0x31, 0xc8, 0x1d, 0xe4, 0xf2, 0x0b, 0x6e, 0x97, + 0x81, 0x78, 0xad, 0x54, 0x42, 0xbb, 0xf5, 0x0c, 0x1a, 0xe3, + 0x36, 0xcf, 0xd9, 0x20, 0x13, 0xea, 0xfc, 0x05, 0xd0, 0x29, + 0x3f, 0xc6, 0x88, 0x71, 0x67, 0x9e, 0x4b, 0xb2, 0xa4, 0x5d, + 0x38, 0xc1, 0xd7, 0x2e, 0xfb, 0x02, 0x14, 0xed, 0xa3, 0x5a, + 0x4c, 0xb5, 0x60, 0x99, 0x8f, 0x76, 0xe9, 0x10, 0x06, 0xff, + 0x2a, 0xd3, 0xc5, 0x3c, 0x72, 0x8b, 0x9d, 0x64, 0xb1, 0x48, + 0x5e, 0xa7, 0xc2, 0x3b, 0x2d, 0xd4, 0x01, 0xf8, 0xee, 0x17, + 0x59, 0xa0, 0xb6, 0x4f, 0x9a, 0x63, 0x75, 0x8c, 0xbf, 0x46, + 0x50, 0xa9, 0x7c, 0x85, 0x93, 0x6a, 0x24, 0xdd, 0xcb, 0x32, + 0xe7, 0x1e, 0x08, 0xf1, 0x94, 0x6d, 0x7b, 0x82, 0x57, 0xae, + 0xb8, 0x41, 0x0f, 0xf6, 0xe0, 0x19, 0xcc, 0x35, 0x23, 0xda, + 0x00, 0xfa, 0xe9, 0x13, 0xcf, 0x35, 0x26, 0xdc, 0x83, 0x79, + 0x6a, 0x90, 0x4c, 0xb6, 0xa5, 0x5f, 0x1b, 0xe1, 0xf2, 0x08, + 0xd4, 0x2e, 0x3d, 0xc7, 0x98, 0x62, 0x71, 0x8b, 0x57, 0xad, + 0xbe, 0x44, 0x36, 0xcc, 0xdf, 0x25, 0xf9, 0x03, 0x10, 0xea, + 0xb5, 0x4f, 0x5c, 0xa6, 0x7a, 0x80, 0x93, 0x69, 0x2d, 0xd7, + 0xc4, 0x3e, 0xe2, 0x18, 0x0b, 0xf1, 0xae, 0x54, 0x47, 0xbd, + 0x61, 0x9b, 0x88, 0x72, 0x6c, 0x96, 0x85, 0x7f, 0xa3, 0x59, + 0x4a, 0xb0, 0xef, 0x15, 0x06, 0xfc, 0x20, 0xda, 0xc9, 0x33, + 0x77, 0x8d, 0x9e, 0x64, 0xb8, 0x42, 0x51, 0xab, 0xf4, 0x0e, + 0x1d, 0xe7, 0x3b, 0xc1, 0xd2, 0x28, 0x5a, 0xa0, 0xb3, 0x49, + 0x95, 0x6f, 0x7c, 0x86, 0xd9, 0x23, 0x30, 0xca, 0x16, 0xec, + 0xff, 0x05, 0x41, 0xbb, 0xa8, 0x52, 0x8e, 0x74, 0x67, 0x9d, + 0xc2, 0x38, 0x2b, 0xd1, 0x0d, 0xf7, 0xe4, 0x1e, 0xd8, 0x22, + 0x31, 0xcb, 0x17, 0xed, 0xfe, 0x04, 0x5b, 0xa1, 0xb2, 0x48, + 0x94, 0x6e, 0x7d, 0x87, 0xc3, 0x39, 0x2a, 0xd0, 0x0c, 0xf6, + 0xe5, 0x1f, 0x40, 0xba, 0xa9, 0x53, 0x8f, 0x75, 0x66, 0x9c, + 0xee, 0x14, 0x07, 0xfd, 0x21, 0xdb, 0xc8, 0x32, 0x6d, 0x97, + 0x84, 0x7e, 0xa2, 0x58, 0x4b, 0xb1, 0xf5, 0x0f, 0x1c, 0xe6, + 0x3a, 0xc0, 0xd3, 0x29, 0x76, 0x8c, 0x9f, 0x65, 0xb9, 0x43, + 0x50, 0xaa, 0xb4, 0x4e, 0x5d, 0xa7, 0x7b, 0x81, 0x92, 0x68, + 0x37, 0xcd, 0xde, 0x24, 0xf8, 0x02, 0x11, 0xeb, 0xaf, 0x55, + 0x46, 0xbc, 0x60, 0x9a, 0x89, 0x73, 0x2c, 0xd6, 0xc5, 0x3f, + 0xe3, 0x19, 0x0a, 0xf0, 0x82, 0x78, 0x6b, 0x91, 0x4d, 0xb7, + 0xa4, 0x5e, 0x01, 0xfb, 0xe8, 0x12, 0xce, 0x34, 0x27, 0xdd, + 0x99, 0x63, 0x70, 0x8a, 0x56, 0xac, 0xbf, 0x45, 0x1a, 0xe0, + 0xf3, 0x09, 0xd5, 0x2f, 0x3c, 0xc6, 0x00, 0xfb, 0xeb, 0x10, + 0xcb, 0x30, 0x20, 0xdb, 0x8b, 0x70, 0x60, 0x9b, 0x40, 0xbb, + 0xab, 0x50, 0x0b, 0xf0, 0xe0, 0x1b, 0xc0, 0x3b, 0x2b, 0xd0, + 0x80, 0x7b, 0x6b, 0x90, 0x4b, 0xb0, 0xa0, 0x5b, 0x16, 0xed, + 0xfd, 0x06, 0xdd, 0x26, 0x36, 0xcd, 0x9d, 0x66, 0x76, 0x8d, + 0x56, 0xad, 0xbd, 0x46, 0x1d, 0xe6, 0xf6, 0x0d, 0xd6, 0x2d, + 0x3d, 0xc6, 0x96, 0x6d, 0x7d, 0x86, 0x5d, 0xa6, 0xb6, 0x4d, + 0x2c, 0xd7, 0xc7, 0x3c, 0xe7, 0x1c, 0x0c, 0xf7, 0xa7, 0x5c, + 0x4c, 0xb7, 0x6c, 0x97, 0x87, 0x7c, 0x27, 0xdc, 0xcc, 0x37, + 0xec, 0x17, 0x07, 0xfc, 0xac, 0x57, 0x47, 0xbc, 0x67, 0x9c, + 0x8c, 0x77, 0x3a, 0xc1, 0xd1, 0x2a, 0xf1, 0x0a, 0x1a, 0xe1, + 0xb1, 0x4a, 0x5a, 0xa1, 0x7a, 0x81, 0x91, 0x6a, 0x31, 0xca, + 0xda, 0x21, 0xfa, 0x01, 0x11, 0xea, 0xba, 0x41, 0x51, 0xaa, + 0x71, 0x8a, 0x9a, 0x61, 0x58, 0xa3, 0xb3, 0x48, 0x93, 0x68, + 0x78, 0x83, 0xd3, 0x28, 0x38, 0xc3, 0x18, 0xe3, 0xf3, 0x08, + 0x53, 0xa8, 0xb8, 0x43, 0x98, 0x63, 0x73, 0x88, 0xd8, 0x23, + 0x33, 0xc8, 0x13, 0xe8, 0xf8, 0x03, 0x4e, 0xb5, 0xa5, 0x5e, + 0x85, 0x7e, 0x6e, 0x95, 0xc5, 0x3e, 0x2e, 0xd5, 0x0e, 0xf5, + 0xe5, 0x1e, 0x45, 0xbe, 0xae, 0x55, 0x8e, 0x75, 0x65, 0x9e, + 0xce, 0x35, 0x25, 0xde, 0x05, 0xfe, 0xee, 0x15, 0x74, 0x8f, + 0x9f, 0x64, 0xbf, 0x44, 0x54, 0xaf, 0xff, 0x04, 0x14, 0xef, + 0x34, 0xcf, 0xdf, 0x24, 0x7f, 0x84, 0x94, 0x6f, 0xb4, 0x4f, + 0x5f, 0xa4, 0xf4, 0x0f, 0x1f, 0xe4, 0x3f, 0xc4, 0xd4, 0x2f, + 0x62, 0x99, 0x89, 0x72, 0xa9, 0x52, 0x42, 0xb9, 0xe9, 0x12, + 0x02, 0xf9, 0x22, 0xd9, 0xc9, 0x32, 0x69, 0x92, 0x82, 0x79, + 0xa2, 0x59, 0x49, 0xb2, 0xe2, 0x19, 0x09, 0xf2, 0x29, 0xd2, + 0xc2, 0x39, 0x00, 0xfc, 0xe5, 0x19, 0xd7, 0x2b, 0x32, 0xce, + 0xb3, 0x4f, 0x56, 0xaa, 0x64, 0x98, 0x81, 0x7d, 0x7b, 0x87, + 0x9e, 0x62, 0xac, 0x50, 0x49, 0xb5, 0xc8, 0x34, 0x2d, 0xd1, + 0x1f, 0xe3, 0xfa, 0x06, 0xf6, 0x0a, 0x13, 0xef, 0x21, 0xdd, + 0xc4, 0x38, 0x45, 0xb9, 0xa0, 0x5c, 0x92, 0x6e, 0x77, 0x8b, + 0x8d, 0x71, 0x68, 0x94, 0x5a, 0xa6, 0xbf, 0x43, 0x3e, 0xc2, + 0xdb, 0x27, 0xe9, 0x15, 0x0c, 0xf0, 0xf1, 0x0d, 0x14, 0xe8, + 0x26, 0xda, 0xc3, 0x3f, 0x42, 0xbe, 0xa7, 0x5b, 0x95, 0x69, + 0x70, 0x8c, 0x8a, 0x76, 0x6f, 0x93, 0x5d, 0xa1, 0xb8, 0x44, + 0x39, 0xc5, 0xdc, 0x20, 0xee, 0x12, 0x0b, 0xf7, 0x07, 0xfb, + 0xe2, 0x1e, 0xd0, 0x2c, 0x35, 0xc9, 0xb4, 0x48, 0x51, 0xad, + 0x63, 0x9f, 0x86, 0x7a, 0x7c, 0x80, 0x99, 0x65, 0xab, 0x57, + 0x4e, 0xb2, 0xcf, 0x33, 0x2a, 0xd6, 0x18, 0xe4, 0xfd, 0x01, + 0xff, 0x03, 0x1a, 0xe6, 0x28, 0xd4, 0xcd, 0x31, 0x4c, 0xb0, + 0xa9, 0x55, 0x9b, 0x67, 0x7e, 0x82, 0x84, 0x78, 0x61, 0x9d, + 0x53, 0xaf, 0xb6, 0x4a, 0x37, 0xcb, 0xd2, 0x2e, 0xe0, 0x1c, + 0x05, 0xf9, 0x09, 0xf5, 0xec, 0x10, 0xde, 0x22, 0x3b, 0xc7, + 0xba, 0x46, 0x5f, 0xa3, 0x6d, 0x91, 0x88, 0x74, 0x72, 0x8e, + 0x97, 0x6b, 0xa5, 0x59, 0x40, 0xbc, 0xc1, 0x3d, 0x24, 0xd8, + 0x16, 0xea, 0xf3, 0x0f, 0x0e, 0xf2, 0xeb, 0x17, 0xd9, 0x25, + 0x3c, 0xc0, 0xbd, 0x41, 0x58, 0xa4, 0x6a, 0x96, 0x8f, 0x73, + 0x75, 0x89, 0x90, 0x6c, 0xa2, 0x5e, 0x47, 0xbb, 0xc6, 0x3a, + 0x23, 0xdf, 0x11, 0xed, 0xf4, 0x08, 0xf8, 0x04, 0x1d, 0xe1, + 0x2f, 0xd3, 0xca, 0x36, 0x4b, 0xb7, 0xae, 0x52, 0x9c, 0x60, + 0x79, 0x85, 0x83, 0x7f, 0x66, 0x9a, 0x54, 0xa8, 0xb1, 0x4d, + 0x30, 0xcc, 0xd5, 0x29, 0xe7, 0x1b, 0x02, 0xfe, 0x00, 0xfd, + 0xe7, 0x1a, 0xd3, 0x2e, 0x34, 0xc9, 0xbb, 0x46, 0x5c, 0xa1, + 0x68, 0x95, 0x8f, 0x72, 0x6b, 0x96, 0x8c, 0x71, 0xb8, 0x45, + 0x5f, 0xa2, 0xd0, 0x2d, 0x37, 0xca, 0x03, 0xfe, 0xe4, 0x19, + 0xd6, 0x2b, 0x31, 0xcc, 0x05, 0xf8, 0xe2, 0x1f, 0x6d, 0x90, + 0x8a, 0x77, 0xbe, 0x43, 0x59, 0xa4, 0xbd, 0x40, 0x5a, 0xa7, + 0x6e, 0x93, 0x89, 0x74, 0x06, 0xfb, 0xe1, 0x1c, 0xd5, 0x28, + 0x32, 0xcf, 0xb1, 0x4c, 0x56, 0xab, 0x62, 0x9f, 0x85, 0x78, + 0x0a, 0xf7, 0xed, 0x10, 0xd9, 0x24, 0x3e, 0xc3, 0xda, 0x27, + 0x3d, 0xc0, 0x09, 0xf4, 0xee, 0x13, 0x61, 0x9c, 0x86, 0x7b, + 0xb2, 0x4f, 0x55, 0xa8, 0x67, 0x9a, 0x80, 0x7d, 0xb4, 0x49, + 0x53, 0xae, 0xdc, 0x21, 0x3b, 0xc6, 0x0f, 0xf2, 0xe8, 0x15, + 0x0c, 0xf1, 0xeb, 0x16, 0xdf, 0x22, 0x38, 0xc5, 0xb7, 0x4a, + 0x50, 0xad, 0x64, 0x99, 0x83, 0x7e, 0x7f, 0x82, 0x98, 0x65, + 0xac, 0x51, 0x4b, 0xb6, 0xc4, 0x39, 0x23, 0xde, 0x17, 0xea, + 0xf0, 0x0d, 0x14, 0xe9, 0xf3, 0x0e, 0xc7, 0x3a, 0x20, 0xdd, + 0xaf, 0x52, 0x48, 0xb5, 0x7c, 0x81, 0x9b, 0x66, 0xa9, 0x54, + 0x4e, 0xb3, 0x7a, 0x87, 0x9d, 0x60, 0x12, 0xef, 0xf5, 0x08, + 0xc1, 0x3c, 0x26, 0xdb, 0xc2, 0x3f, 0x25, 0xd8, 0x11, 0xec, + 0xf6, 0x0b, 0x79, 0x84, 0x9e, 0x63, 0xaa, 0x57, 0x4d, 0xb0, + 0xce, 0x33, 0x29, 0xd4, 0x1d, 0xe0, 0xfa, 0x07, 0x75, 0x88, + 0x92, 0x6f, 0xa6, 0x5b, 0x41, 0xbc, 0xa5, 0x58, 0x42, 0xbf, + 0x76, 0x8b, 0x91, 0x6c, 0x1e, 0xe3, 0xf9, 0x04, 0xcd, 0x30, + 0x2a, 0xd7, 0x18, 0xe5, 0xff, 0x02, 0xcb, 0x36, 0x2c, 0xd1, + 0xa3, 0x5e, 0x44, 0xb9, 0x70, 0x8d, 0x97, 0x6a, 0x73, 0x8e, + 0x94, 0x69, 0xa0, 0x5d, 0x47, 0xba, 0xc8, 0x35, 0x2f, 0xd2, + 0x1b, 0xe6, 0xfc, 0x01, 0x00, 0xfe, 0xe1, 0x1f, 0xdf, 0x21, + 0x3e, 0xc0, 0xa3, 0x5d, 0x42, 0xbc, 0x7c, 0x82, 0x9d, 0x63, + 0x5b, 0xa5, 0xba, 0x44, 0x84, 0x7a, 0x65, 0x9b, 0xf8, 0x06, + 0x19, 0xe7, 0x27, 0xd9, 0xc6, 0x38, 0xb6, 0x48, 0x57, 0xa9, + 0x69, 0x97, 0x88, 0x76, 0x15, 0xeb, 0xf4, 0x0a, 0xca, 0x34, + 0x2b, 0xd5, 0xed, 0x13, 0x0c, 0xf2, 0x32, 0xcc, 0xd3, 0x2d, + 0x4e, 0xb0, 0xaf, 0x51, 0x91, 0x6f, 0x70, 0x8e, 0x71, 0x8f, + 0x90, 0x6e, 0xae, 0x50, 0x4f, 0xb1, 0xd2, 0x2c, 0x33, 0xcd, + 0x0d, 0xf3, 0xec, 0x12, 0x2a, 0xd4, 0xcb, 0x35, 0xf5, 0x0b, + 0x14, 0xea, 0x89, 0x77, 0x68, 0x96, 0x56, 0xa8, 0xb7, 0x49, + 0xc7, 0x39, 0x26, 0xd8, 0x18, 0xe6, 0xf9, 0x07, 0x64, 0x9a, + 0x85, 0x7b, 0xbb, 0x45, 0x5a, 0xa4, 0x9c, 0x62, 0x7d, 0x83, + 0x43, 0xbd, 0xa2, 0x5c, 0x3f, 0xc1, 0xde, 0x20, 0xe0, 0x1e, + 0x01, 0xff, 0xe2, 0x1c, 0x03, 0xfd, 0x3d, 0xc3, 0xdc, 0x22, + 0x41, 0xbf, 0xa0, 0x5e, 0x9e, 0x60, 0x7f, 0x81, 0xb9, 0x47, + 0x58, 0xa6, 0x66, 0x98, 0x87, 0x79, 0x1a, 0xe4, 0xfb, 0x05, + 0xc5, 0x3b, 0x24, 0xda, 0x54, 0xaa, 0xb5, 0x4b, 0x8b, 0x75, + 0x6a, 0x94, 0xf7, 0x09, 0x16, 0xe8, 0x28, 0xd6, 0xc9, 0x37, + 0x0f, 0xf1, 0xee, 0x10, 0xd0, 0x2e, 0x31, 0xcf, 0xac, 0x52, + 0x4d, 0xb3, 0x73, 0x8d, 0x92, 0x6c, 0x93, 0x6d, 0x72, 0x8c, + 0x4c, 0xb2, 0xad, 0x53, 0x30, 0xce, 0xd1, 0x2f, 0xef, 0x11, + 0x0e, 0xf0, 0xc8, 0x36, 0x29, 0xd7, 0x17, 0xe9, 0xf6, 0x08, + 0x6b, 0x95, 0x8a, 0x74, 0xb4, 0x4a, 0x55, 0xab, 0x25, 0xdb, + 0xc4, 0x3a, 0xfa, 0x04, 0x1b, 0xe5, 0x86, 0x78, 0x67, 0x99, + 0x59, 0xa7, 0xb8, 0x46, 0x7e, 0x80, 0x9f, 0x61, 0xa1, 0x5f, + 0x40, 0xbe, 0xdd, 0x23, 0x3c, 0xc2, 0x02, 0xfc, 0xe3, 0x1d, + 0x00, 0xff, 0xe3, 0x1c, 0xdb, 0x24, 0x38, 0xc7, 0xab, 0x54, + 0x48, 0xb7, 0x70, 0x8f, 0x93, 0x6c, 0x4b, 0xb4, 0xa8, 0x57, + 0x90, 0x6f, 0x73, 0x8c, 0xe0, 0x1f, 0x03, 0xfc, 0x3b, 0xc4, + 0xd8, 0x27, 0x96, 0x69, 0x75, 0x8a, 0x4d, 0xb2, 0xae, 0x51, + 0x3d, 0xc2, 0xde, 0x21, 0xe6, 0x19, 0x05, 0xfa, 0xdd, 0x22, + 0x3e, 0xc1, 0x06, 0xf9, 0xe5, 0x1a, 0x76, 0x89, 0x95, 0x6a, + 0xad, 0x52, 0x4e, 0xb1, 0x31, 0xce, 0xd2, 0x2d, 0xea, 0x15, + 0x09, 0xf6, 0x9a, 0x65, 0x79, 0x86, 0x41, 0xbe, 0xa2, 0x5d, + 0x7a, 0x85, 0x99, 0x66, 0xa1, 0x5e, 0x42, 0xbd, 0xd1, 0x2e, + 0x32, 0xcd, 0x0a, 0xf5, 0xe9, 0x16, 0xa7, 0x58, 0x44, 0xbb, + 0x7c, 0x83, 0x9f, 0x60, 0x0c, 0xf3, 0xef, 0x10, 0xd7, 0x28, + 0x34, 0xcb, 0xec, 0x13, 0x0f, 0xf0, 0x37, 0xc8, 0xd4, 0x2b, + 0x47, 0xb8, 0xa4, 0x5b, 0x9c, 0x63, 0x7f, 0x80, 0x62, 0x9d, + 0x81, 0x7e, 0xb9, 0x46, 0x5a, 0xa5, 0xc9, 0x36, 0x2a, 0xd5, + 0x12, 0xed, 0xf1, 0x0e, 0x29, 0xd6, 0xca, 0x35, 0xf2, 0x0d, + 0x11, 0xee, 0x82, 0x7d, 0x61, 0x9e, 0x59, 0xa6, 0xba, 0x45, + 0xf4, 0x0b, 0x17, 0xe8, 0x2f, 0xd0, 0xcc, 0x33, 0x5f, 0xa0, + 0xbc, 0x43, 0x84, 0x7b, 0x67, 0x98, 0xbf, 0x40, 0x5c, 0xa3, + 0x64, 0x9b, 0x87, 0x78, 0x14, 0xeb, 0xf7, 0x08, 0xcf, 0x30, + 0x2c, 0xd3, 0x53, 0xac, 0xb0, 0x4f, 0x88, 0x77, 0x6b, 0x94, + 0xf8, 0x07, 0x1b, 0xe4, 0x23, 0xdc, 0xc0, 0x3f, 0x18, 0xe7, + 0xfb, 0x04, 0xc3, 0x3c, 0x20, 0xdf, 0xb3, 0x4c, 0x50, 0xaf, + 0x68, 0x97, 0x8b, 0x74, 0xc5, 0x3a, 0x26, 0xd9, 0x1e, 0xe1, + 0xfd, 0x02, 0x6e, 0x91, 0x8d, 0x72, 0xb5, 0x4a, 0x56, 0xa9, + 0x8e, 0x71, 0x6d, 0x92, 0x55, 0xaa, 0xb6, 0x49, 0x25, 0xda, + 0xc6, 0x39, 0xfe, 0x01, 0x1d, 0xe2 +}; + +static const unsigned char gf_inv_table_base[] = { + 0x00, 0x01, 0x8e, 0xf4, 0x47, 0xa7, 0x7a, 0xba, 0xad, 0x9d, + 0xdd, 0x98, 0x3d, 0xaa, 0x5d, 0x96, 0xd8, 0x72, 0xc0, 0x58, + 0xe0, 0x3e, 0x4c, 0x66, 0x90, 0xde, 0x55, 0x80, 0xa0, 0x83, + 0x4b, 0x2a, 0x6c, 0xed, 0x39, 0x51, 0x60, 0x56, 0x2c, 0x8a, + 0x70, 0xd0, 0x1f, 0x4a, 0x26, 0x8b, 0x33, 0x6e, 0x48, 0x89, + 0x6f, 0x2e, 0xa4, 0xc3, 0x40, 0x5e, 0x50, 0x22, 0xcf, 0xa9, + 0xab, 0x0c, 0x15, 0xe1, 0x36, 0x5f, 0xf8, 0xd5, 0x92, 0x4e, + 0xa6, 0x04, 0x30, 0x88, 0x2b, 0x1e, 0x16, 0x67, 0x45, 0x93, + 0x38, 0x23, 0x68, 0x8c, 0x81, 0x1a, 0x25, 0x61, 0x13, 0xc1, + 0xcb, 0x63, 0x97, 0x0e, 0x37, 0x41, 0x24, 0x57, 0xca, 0x5b, + 0xb9, 0xc4, 0x17, 0x4d, 0x52, 0x8d, 0xef, 0xb3, 0x20, 0xec, + 0x2f, 0x32, 0x28, 0xd1, 0x11, 0xd9, 0xe9, 0xfb, 0xda, 0x79, + 0xdb, 0x77, 0x06, 0xbb, 0x84, 0xcd, 0xfe, 0xfc, 0x1b, 0x54, + 0xa1, 0x1d, 0x7c, 0xcc, 0xe4, 0xb0, 0x49, 0x31, 0x27, 0x2d, + 0x53, 0x69, 0x02, 0xf5, 0x18, 0xdf, 0x44, 0x4f, 0x9b, 0xbc, + 0x0f, 0x5c, 0x0b, 0xdc, 0xbd, 0x94, 0xac, 0x09, 0xc7, 0xa2, + 0x1c, 0x82, 0x9f, 0xc6, 0x34, 0xc2, 0x46, 0x05, 0xce, 0x3b, + 0x0d, 0x3c, 0x9c, 0x08, 0xbe, 0xb7, 0x87, 0xe5, 0xee, 0x6b, + 0xeb, 0xf2, 0xbf, 0xaf, 0xc5, 0x64, 0x07, 0x7b, 0x95, 0x9a, + 0xae, 0xb6, 0x12, 0x59, 0xa5, 0x35, 0x65, 0xb8, 0xa3, 0x9e, + 0xd2, 0xf7, 0x62, 0x5a, 0x85, 0x7d, 0xa8, 0x3a, 0x29, 0x71, + 0xc8, 0xf6, 0xf9, 0x43, 0xd7, 0xd6, 0x10, 0x73, 0x76, 0x78, + 0x99, 0x0a, 0x19, 0x91, 0x14, 0x3f, 0xe6, 0xf0, 0x86, 0xb1, + 0xe2, 0xf1, 0xfa, 0x74, 0xf3, 0xb4, 0x6d, 0x21, 0xb2, 0x6a, + 0xe3, 0xe7, 0xb5, 0xea, 0x03, 0x8f, 0xd3, 0xc9, 0x42, 0xd4, + 0xe8, 0x75, 0x7f, 0xff, 0x7e, 0xfd +}; +#endif // GF_LARGE_TABLES + +#endif //_EC_BASE_H_ diff --git a/src/isa-l/erasure_code/ec_base_aliases.c b/src/isa-l/erasure_code/ec_base_aliases.c new file mode 100644 index 000000000..d046ff61a --- /dev/null +++ b/src/isa-l/erasure_code/ec_base_aliases.c @@ -0,0 +1,61 @@ +/********************************************************************** + Copyright(c) 2011-2017 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include "erasure_code.h" + +void gf_vect_dot_prod(int len, int vlen, unsigned char *v, + unsigned char **src, unsigned char *dest) +{ + gf_vect_dot_prod_base(len, vlen, v, src, dest); +} + +void gf_vect_mad(int len, int vec, int vec_i, + unsigned char *v, unsigned char *src, unsigned char *dest) +{ + gf_vect_mad_base(len, vec, vec_i, v, src, dest); + +} + +void ec_encode_data(int len, int srcs, int dests, unsigned char *v, + unsigned char **src, unsigned char **dest) +{ + ec_encode_data_base(len, srcs, dests, v, src, dest); +} + +void ec_encode_data_update(int len, int k, int rows, int vec_i, unsigned char *v, + unsigned char *data, unsigned char **dest) +{ + ec_encode_data_update_base(len, k, rows, vec_i, v, data, dest); +} + +int gf_vect_mul(int len, unsigned char *a, void *src, void *dest) +{ + gf_vect_mul_base(len, a, (unsigned char *)src, (unsigned char *)dest); + return 0; +} diff --git a/src/isa-l/erasure_code/ec_highlevel_func.c b/src/isa-l/erasure_code/ec_highlevel_func.c new file mode 100644 index 000000000..a9fe6abb5 --- /dev/null +++ b/src/isa-l/erasure_code/ec_highlevel_func.c @@ -0,0 +1,374 @@ +/********************************************************************** + Copyright(c) 2011-2019 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ +#include +#include "erasure_code.h" + +#if __x86_64__ || __i386__ || _M_X64 || _M_IX86 +void ec_encode_data_sse(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data, + unsigned char **coding) +{ + + if (len < 16) { + ec_encode_data_base(len, k, rows, g_tbls, data, coding); + return; + } + + while (rows >= 6) { + gf_6vect_dot_prod_sse(len, k, g_tbls, data, coding); + g_tbls += 6 * k * 32; + coding += 6; + rows -= 6; + } + switch (rows) { + case 5: + gf_5vect_dot_prod_sse(len, k, g_tbls, data, coding); + break; + case 4: + gf_4vect_dot_prod_sse(len, k, g_tbls, data, coding); + break; + case 3: + gf_3vect_dot_prod_sse(len, k, g_tbls, data, coding); + break; + case 2: + gf_2vect_dot_prod_sse(len, k, g_tbls, data, coding); + break; + case 1: + gf_vect_dot_prod_sse(len, k, g_tbls, data, *coding); + break; + case 0: + break; + } + +} + +void ec_encode_data_avx(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data, + unsigned char **coding) +{ + if (len < 16) { + ec_encode_data_base(len, k, rows, g_tbls, data, coding); + return; + } + + while (rows >= 6) { + gf_6vect_dot_prod_avx(len, k, g_tbls, data, coding); + g_tbls += 6 * k * 32; + coding += 6; + rows -= 6; + } + switch (rows) { + case 5: + gf_5vect_dot_prod_avx(len, k, g_tbls, data, coding); + break; + case 4: + gf_4vect_dot_prod_avx(len, k, g_tbls, data, coding); + break; + case 3: + gf_3vect_dot_prod_avx(len, k, g_tbls, data, coding); + break; + case 2: + gf_2vect_dot_prod_avx(len, k, g_tbls, data, coding); + break; + case 1: + gf_vect_dot_prod_avx(len, k, g_tbls, data, *coding); + break; + case 0: + break; + } + +} + +void ec_encode_data_avx2(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data, + unsigned char **coding) +{ + + if (len < 32) { + ec_encode_data_base(len, k, rows, g_tbls, data, coding); + return; + } + + while (rows >= 6) { + gf_6vect_dot_prod_avx2(len, k, g_tbls, data, coding); + g_tbls += 6 * k * 32; + coding += 6; + rows -= 6; + } + switch (rows) { + case 5: + gf_5vect_dot_prod_avx2(len, k, g_tbls, data, coding); + break; + case 4: + gf_4vect_dot_prod_avx2(len, k, g_tbls, data, coding); + break; + case 3: + gf_3vect_dot_prod_avx2(len, k, g_tbls, data, coding); + break; + case 2: + gf_2vect_dot_prod_avx2(len, k, g_tbls, data, coding); + break; + case 1: + gf_vect_dot_prod_avx2(len, k, g_tbls, data, *coding); + break; + case 0: + break; + } + +} + +#ifdef HAVE_AS_KNOWS_AVX512 + +extern int gf_vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls, unsigned char **data, + unsigned char *dest); +extern int gf_2vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls, + unsigned char **data, unsigned char **coding); +extern int gf_3vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls, + unsigned char **data, unsigned char **coding); +extern int gf_4vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls, + unsigned char **data, unsigned char **coding); +extern int gf_5vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls, + unsigned char **data, unsigned char **coding); +extern int gf_6vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls, + unsigned char **data, unsigned char **coding); +extern void gf_vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls, + unsigned char *src, unsigned char *dest); +extern void gf_2vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls, + unsigned char *src, unsigned char **dest); +extern void gf_3vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls, + unsigned char *src, unsigned char **dest); +extern void gf_4vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls, + unsigned char *src, unsigned char **dest); +extern void gf_5vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls, + unsigned char *src, unsigned char **dest); +extern void gf_6vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls, + unsigned char *src, unsigned char **dest); + +void ec_encode_data_avx512(int len, int k, int rows, unsigned char *g_tbls, + unsigned char **data, unsigned char **coding) +{ + + if (len < 64) { + ec_encode_data_base(len, k, rows, g_tbls, data, coding); + return; + } + + while (rows >= 6) { + gf_6vect_dot_prod_avx512(len, k, g_tbls, data, coding); + g_tbls += 6 * k * 32; + coding += 6; + rows -= 6; + } + switch (rows) { + case 5: + gf_5vect_dot_prod_avx512(len, k, g_tbls, data, coding); + break; + case 4: + gf_4vect_dot_prod_avx512(len, k, g_tbls, data, coding); + break; + case 3: + gf_3vect_dot_prod_avx512(len, k, g_tbls, data, coding); + break; + case 2: + gf_2vect_dot_prod_avx512(len, k, g_tbls, data, coding); + break; + case 1: + gf_vect_dot_prod_avx512(len, k, g_tbls, data, *coding); + break; + case 0: + break; + } +} + +void ec_encode_data_update_avx512(int len, int k, int rows, int vec_i, unsigned char *g_tbls, + unsigned char *data, unsigned char **coding) +{ + if (len < 64) { + ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding); + return; + } + + while (rows >= 6) { + gf_6vect_mad_avx512(len, k, vec_i, g_tbls, data, coding); + g_tbls += 6 * k * 32; + coding += 6; + rows -= 6; + } + switch (rows) { + case 5: + gf_5vect_mad_avx512(len, k, vec_i, g_tbls, data, coding); + break; + case 4: + gf_4vect_mad_avx512(len, k, vec_i, g_tbls, data, coding); + break; + case 3: + gf_3vect_mad_avx512(len, k, vec_i, g_tbls, data, coding); + break; + case 2: + gf_2vect_mad_avx512(len, k, vec_i, g_tbls, data, coding); + break; + case 1: + gf_vect_mad_avx512(len, k, vec_i, g_tbls, data, *coding); + break; + case 0: + break; + } +} + +#endif // HAVE_AS_KNOWS_AVX512 + +#if __WORDSIZE == 64 || _WIN64 || __x86_64__ + +void ec_encode_data_update_sse(int len, int k, int rows, int vec_i, unsigned char *g_tbls, + unsigned char *data, unsigned char **coding) +{ + if (len < 16) { + ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding); + return; + } + + while (rows > 6) { + gf_6vect_mad_sse(len, k, vec_i, g_tbls, data, coding); + g_tbls += 6 * k * 32; + coding += 6; + rows -= 6; + } + switch (rows) { + case 6: + gf_6vect_mad_sse(len, k, vec_i, g_tbls, data, coding); + break; + case 5: + gf_5vect_mad_sse(len, k, vec_i, g_tbls, data, coding); + break; + case 4: + gf_4vect_mad_sse(len, k, vec_i, g_tbls, data, coding); + break; + case 3: + gf_3vect_mad_sse(len, k, vec_i, g_tbls, data, coding); + break; + case 2: + gf_2vect_mad_sse(len, k, vec_i, g_tbls, data, coding); + break; + case 1: + gf_vect_mad_sse(len, k, vec_i, g_tbls, data, *coding); + break; + case 0: + break; + } + +} + +void ec_encode_data_update_avx(int len, int k, int rows, int vec_i, unsigned char *g_tbls, + unsigned char *data, unsigned char **coding) +{ + if (len < 16) { + ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding); + return; + } + while (rows > 6) { + gf_6vect_mad_avx(len, k, vec_i, g_tbls, data, coding); + g_tbls += 6 * k * 32; + coding += 6; + rows -= 6; + } + switch (rows) { + case 6: + gf_6vect_mad_avx(len, k, vec_i, g_tbls, data, coding); + break; + case 5: + gf_5vect_mad_avx(len, k, vec_i, g_tbls, data, coding); + break; + case 4: + gf_4vect_mad_avx(len, k, vec_i, g_tbls, data, coding); + break; + case 3: + gf_3vect_mad_avx(len, k, vec_i, g_tbls, data, coding); + break; + case 2: + gf_2vect_mad_avx(len, k, vec_i, g_tbls, data, coding); + break; + case 1: + gf_vect_mad_avx(len, k, vec_i, g_tbls, data, *coding); + break; + case 0: + break; + } + +} + +void ec_encode_data_update_avx2(int len, int k, int rows, int vec_i, unsigned char *g_tbls, + unsigned char *data, unsigned char **coding) +{ + if (len < 32) { + ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding); + return; + } + while (rows > 6) { + gf_6vect_mad_avx2(len, k, vec_i, g_tbls, data, coding); + g_tbls += 6 * k * 32; + coding += 6; + rows -= 6; + } + switch (rows) { + case 6: + gf_6vect_mad_avx2(len, k, vec_i, g_tbls, data, coding); + break; + case 5: + gf_5vect_mad_avx2(len, k, vec_i, g_tbls, data, coding); + break; + case 4: + gf_4vect_mad_avx2(len, k, vec_i, g_tbls, data, coding); + break; + case 3: + gf_3vect_mad_avx2(len, k, vec_i, g_tbls, data, coding); + break; + case 2: + gf_2vect_mad_avx2(len, k, vec_i, g_tbls, data, coding); + break; + case 1: + gf_vect_mad_avx2(len, k, vec_i, g_tbls, data, *coding); + break; + case 0: + break; + } + +} + +#endif //__WORDSIZE == 64 || _WIN64 || __x86_64__ +#endif //__x86_64__ || __i386__ || _M_X64 || _M_IX86 + +struct slver { + unsigned short snum; + unsigned char ver; + unsigned char core; +}; + +// Version info +struct slver ec_init_tables_slver_00010068; +struct slver ec_init_tables_slver = { 0x0068, 0x01, 0x00 }; + +struct slver ec_encode_data_sse_slver_00020069; +struct slver ec_encode_data_sse_slver = { 0x0069, 0x02, 0x00 }; diff --git a/src/isa-l/erasure_code/ec_multibinary.asm b/src/isa-l/erasure_code/ec_multibinary.asm new file mode 100644 index 000000000..a07f45d6f --- /dev/null +++ b/src/isa-l/erasure_code/ec_multibinary.asm @@ -0,0 +1,95 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +%include "reg_sizes.asm" +%include "multibinary.asm" + +%ifidn __OUTPUT_FORMAT__, elf32 + [bits 32] +%else + default rel + [bits 64] + + extern ec_encode_data_update_sse + extern ec_encode_data_update_avx + extern ec_encode_data_update_avx2 +%ifdef HAVE_AS_KNOWS_AVX512 + extern ec_encode_data_avx512 + extern gf_vect_dot_prod_avx512 + extern ec_encode_data_update_avx512 + extern gf_vect_mad_avx512 +%endif + extern gf_vect_mul_sse + extern gf_vect_mul_avx + + extern gf_vect_mad_sse + extern gf_vect_mad_avx + extern gf_vect_mad_avx2 +%endif + +extern gf_vect_mul_base +extern ec_encode_data_base +extern ec_encode_data_update_base +extern gf_vect_dot_prod_base +extern gf_vect_mad_base + +extern gf_vect_dot_prod_sse +extern gf_vect_dot_prod_avx +extern gf_vect_dot_prod_avx2 +extern ec_encode_data_sse +extern ec_encode_data_avx +extern ec_encode_data_avx2 + +mbin_interface ec_encode_data +mbin_interface gf_vect_dot_prod +mbin_interface gf_vect_mul +mbin_interface ec_encode_data_update +mbin_interface gf_vect_mad + +%ifidn __OUTPUT_FORMAT__, elf32 + mbin_dispatch_init5 ec_encode_data, ec_encode_data_base, ec_encode_data_sse, ec_encode_data_avx, ec_encode_data_avx2 + mbin_dispatch_init5 gf_vect_dot_prod, gf_vect_dot_prod_base, gf_vect_dot_prod_sse, gf_vect_dot_prod_avx, gf_vect_dot_prod_avx2 + mbin_dispatch_init2 gf_vect_mul, gf_vect_mul_base + mbin_dispatch_init2 ec_encode_data_update, ec_encode_data_update_base + mbin_dispatch_init2 gf_vect_mad, gf_vect_mad_base +%else + + mbin_dispatch_init5 gf_vect_mul, gf_vect_mul_base, gf_vect_mul_sse, gf_vect_mul_avx, gf_vect_mul_avx + mbin_dispatch_init6 ec_encode_data, ec_encode_data_base, ec_encode_data_sse, ec_encode_data_avx, ec_encode_data_avx2, ec_encode_data_avx512 + mbin_dispatch_init6 ec_encode_data_update, ec_encode_data_update_base, ec_encode_data_update_sse, ec_encode_data_update_avx, ec_encode_data_update_avx2, ec_encode_data_update_avx512 + mbin_dispatch_init6 gf_vect_mad, gf_vect_mad_base, gf_vect_mad_sse, gf_vect_mad_avx, gf_vect_mad_avx2, gf_vect_mad_avx512 + mbin_dispatch_init6 gf_vect_dot_prod, gf_vect_dot_prod_base, gf_vect_dot_prod_sse, gf_vect_dot_prod_avx, gf_vect_dot_prod_avx2, gf_vect_dot_prod_avx512 +%endif + +;;; func core, ver, snum +slversion ec_encode_data, 00, 06, 0133 +slversion gf_vect_mul, 00, 05, 0134 +slversion ec_encode_data_update, 00, 05, 0212 +slversion gf_vect_dot_prod, 00, 05, 0138 +slversion gf_vect_mad, 00, 04, 0213 diff --git a/src/isa-l/erasure_code/erasure_code_base_perf.c b/src/isa-l/erasure_code/erasure_code_base_perf.c new file mode 100644 index 000000000..9587788d8 --- /dev/null +++ b/src/isa-l/erasure_code/erasure_code_base_perf.c @@ -0,0 +1,176 @@ +/********************************************************************** + Copyright(c) 2011-2015 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include // for memset, memcmp +#include "erasure_code.h" +#include "test.h" + +//#define CACHED_TEST +#ifdef CACHED_TEST +// Cached test, loop many times over small dataset +# define TEST_SOURCES 32 +# define TEST_LEN(m) ((128*1024 / m) & ~(64-1)) +# define TEST_TYPE_STR "_warm" +#else +# ifndef TEST_CUSTOM +// Uncached test. Pull from large mem base. +# define TEST_SOURCES 32 +# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */ +# define TEST_LEN(m) ((GT_L3_CACHE / m) & ~(64-1)) +# define TEST_TYPE_STR "_cold" +# else +# define TEST_TYPE_STR "_cus" +# endif +#endif + +#define MMAX TEST_SOURCES +#define KMAX TEST_SOURCES + +#define BAD_MATRIX -1 + +typedef unsigned char u8; + +void ec_encode_perf(int m, int k, u8 * a, u8 * g_tbls, u8 ** buffs) +{ + ec_init_tables(k, m - k, &a[k * k], g_tbls); + ec_encode_data_base(TEST_LEN(m), k, m - k, g_tbls, buffs, &buffs[k]); +} + +int ec_decode_perf(int m, int k, u8 * a, u8 * g_tbls, u8 ** buffs, u8 * src_in_err, + u8 * src_err_list, int nerrs, u8 ** temp_buffs) +{ + int i, j, r; + u8 b[MMAX * KMAX], c[MMAX * KMAX], d[MMAX * KMAX]; + u8 *recov[TEST_SOURCES]; + + // Construct b by removing error rows + for (i = 0, r = 0; i < k; i++, r++) { + while (src_in_err[r]) + r++; + recov[i] = buffs[r]; + for (j = 0; j < k; j++) + b[k * i + j] = a[k * r + j]; + } + + if (gf_invert_matrix(b, d, k) < 0) + return BAD_MATRIX; + + for (i = 0; i < nerrs; i++) + for (j = 0; j < k; j++) + c[k * i + j] = d[k * src_err_list[i] + j]; + + // Recover data + ec_init_tables(k, nerrs, c, g_tbls); + ec_encode_data_base(TEST_LEN(m), k, nerrs, g_tbls, recov, temp_buffs); + + return 0; +} + +int main(int argc, char *argv[]) +{ + int i, j, m, k, nerrs, check; + void *buf; + u8 *temp_buffs[TEST_SOURCES], *buffs[TEST_SOURCES]; + u8 a[MMAX * KMAX]; + u8 g_tbls[KMAX * TEST_SOURCES * 32], src_in_err[TEST_SOURCES]; + u8 src_err_list[TEST_SOURCES]; + struct perf start; + + // Pick test parameters + m = 14; + k = 10; + nerrs = 4; + const u8 err_list[] = { 2, 4, 5, 7 }; + + printf("erasure_code_base_perf: %dx%d %d\n", m, TEST_LEN(m), nerrs); + + if (m > MMAX || k > KMAX || nerrs > (m - k)) { + printf(" Input test parameter error\n"); + return -1; + } + + memcpy(src_err_list, err_list, nerrs); + memset(src_in_err, 0, TEST_SOURCES); + for (i = 0; i < nerrs; i++) + src_in_err[src_err_list[i]] = 1; + + // Allocate the arrays + for (i = 0; i < m; i++) { + if (posix_memalign(&buf, 64, TEST_LEN(m))) { + printf("alloc error: Fail\n"); + return -1; + } + buffs[i] = buf; + } + + for (i = 0; i < (m - k); i++) { + if (posix_memalign(&buf, 64, TEST_LEN(m))) { + printf("alloc error: Fail\n"); + return -1; + } + temp_buffs[i] = buf; + } + + // Make random data + for (i = 0; i < k; i++) + for (j = 0; j < TEST_LEN(m); j++) + buffs[i][j] = rand(); + + gf_gen_rs_matrix(a, m, k); + + // Start encode test + BENCHMARK(&start, BENCHMARK_TIME, ec_encode_perf(m, k, a, g_tbls, buffs)); + printf("erasure_code_base_encode" TEST_TYPE_STR ": "); + perf_print(start, (long long)(TEST_LEN(m)) * (m)); + + // Start decode test + BENCHMARK(&start, BENCHMARK_TIME, check = + ec_decode_perf(m, k, a, g_tbls, buffs, src_in_err, src_err_list, nerrs, + temp_buffs)); + + if (check == BAD_MATRIX) { + printf("BAD MATRIX\n"); + return check; + } + + for (i = 0; i < nerrs; i++) { + if (0 != memcmp(temp_buffs[i], buffs[src_err_list[i]], TEST_LEN(m))) { + printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs); + return -1; + } + } + + printf("erasure_code_base_decode" TEST_TYPE_STR ": "); + perf_print(start, (long long)(TEST_LEN(m)) * (k + nerrs)); + + printf("done all: Pass\n"); + return 0; +} diff --git a/src/isa-l/erasure_code/erasure_code_base_test.c b/src/isa-l/erasure_code/erasure_code_base_test.c new file mode 100644 index 000000000..81e1b5778 --- /dev/null +++ b/src/isa-l/erasure_code/erasure_code_base_test.c @@ -0,0 +1,764 @@ +/********************************************************************** + Copyright(c) 2011-2015 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include // for memset, memcmp +#include "erasure_code.h" +#include "types.h" + +#define TEST_LEN 8192 +#define TEST_SIZE (TEST_LEN/2) + +#ifndef TEST_SOURCES +# define TEST_SOURCES 127 +#endif +#ifndef RANDOMS +# define RANDOMS 50 +#endif + +#define MMAX TEST_SOURCES +#define KMAX TEST_SOURCES + +#define EFENCE_TEST_MIN_SIZE 16 + +#ifdef EC_ALIGNED_ADDR +// Define power of 2 range to check ptr, len alignment +# define PTR_ALIGN_CHK_B 0 +# define LEN_ALIGN_CHK_B 0 // 0 for aligned only +#else +// Define power of 2 range to check ptr, len alignment +# define PTR_ALIGN_CHK_B 32 +# define LEN_ALIGN_CHK_B 32 // 0 for aligned only +#endif + +#ifndef TEST_SEED +#define TEST_SEED 11 +#endif + +typedef unsigned char u8; + +void dump(unsigned char *buf, int len) +{ + int i; + for (i = 0; i < len;) { + printf(" %2x", 0xff & buf[i++]); + if (i % 32 == 0) + printf("\n"); + } + printf("\n"); +} + +void dump_matrix(unsigned char **s, int k, int m) +{ + int i, j; + for (i = 0; i < k; i++) { + for (j = 0; j < m; j++) { + printf(" %2x", s[i][j]); + } + printf("\n"); + } + printf("\n"); +} + +void dump_u8xu8(unsigned char *s, int k, int m) +{ + int i, j; + for (i = 0; i < k; i++) { + for (j = 0; j < m; j++) { + printf(" %2x", 0xff & s[j + (i * m)]); + } + printf("\n"); + } + printf("\n"); +} + +// Generate Random errors +static void gen_err_list(unsigned char *src_err_list, + unsigned char *src_in_err, int *pnerrs, int *pnsrcerrs, int k, int m) +{ + int i, err; + int nerrs = 0, nsrcerrs = 0; + + for (i = 0, nerrs = 0, nsrcerrs = 0; i < m && nerrs < m - k; i++) { + err = 1 & rand(); + src_in_err[i] = err; + if (err) { + src_err_list[nerrs++] = i; + if (i < k) { + nsrcerrs++; + } + } + } + if (nerrs == 0) { // should have at least one error + while ((err = (rand() % KMAX)) >= m) ; + src_err_list[nerrs++] = err; + src_in_err[err] = 1; + if (err < k) + nsrcerrs = 1; + } + *pnerrs = nerrs; + *pnsrcerrs = nsrcerrs; + return; +} + +#define NO_INVERT_MATRIX -2 +// Generate decode matrix from encode matrix +static int gf_gen_decode_matrix(unsigned char *encode_matrix, + unsigned char *decode_matrix, + unsigned char *invert_matrix, + unsigned int *decode_index, + unsigned char *src_err_list, + unsigned char *src_in_err, + int nerrs, int nsrcerrs, int k, int m) +{ + int i, j, p; + int r; + unsigned char *backup, *b, s; + int incr = 0; + + b = malloc(MMAX * KMAX); + backup = malloc(MMAX * KMAX); + + if (b == NULL || backup == NULL) { + printf("Test failure! Error with malloc\n"); + free(b); + free(backup); + return -1; + } + // Construct matrix b by removing error rows + for (i = 0, r = 0; i < k; i++, r++) { + while (src_in_err[r]) + r++; + for (j = 0; j < k; j++) { + b[k * i + j] = encode_matrix[k * r + j]; + backup[k * i + j] = encode_matrix[k * r + j]; + } + decode_index[i] = r; + } + incr = 0; + while (gf_invert_matrix(b, invert_matrix, k) < 0) { + if (nerrs == (m - k)) { + free(b); + free(backup); + printf("BAD MATRIX\n"); + return NO_INVERT_MATRIX; + } + incr++; + memcpy(b, backup, MMAX * KMAX); + for (i = nsrcerrs; i < nerrs - nsrcerrs; i++) { + if (src_err_list[i] == (decode_index[k - 1] + incr)) { + // skip the erased parity line + incr++; + continue; + } + } + if (decode_index[k - 1] + incr >= m) { + free(b); + free(backup); + printf("BAD MATRIX\n"); + return NO_INVERT_MATRIX; + } + decode_index[k - 1] += incr; + for (j = 0; j < k; j++) + b[k * (k - 1) + j] = encode_matrix[k * decode_index[k - 1] + j]; + + }; + + for (i = 0; i < nsrcerrs; i++) { + for (j = 0; j < k; j++) { + decode_matrix[k * i + j] = invert_matrix[k * src_err_list[i] + j]; + } + } + /* src_err_list from encode_matrix * invert of b for parity decoding */ + for (p = nsrcerrs; p < nerrs; p++) { + for (i = 0; i < k; i++) { + s = 0; + for (j = 0; j < k; j++) + s ^= gf_mul(invert_matrix[j * k + i], + encode_matrix[k * src_err_list[p] + j]); + + decode_matrix[k * p + i] = s; + } + } + free(b); + free(backup); + return 0; +} + +int main(int argc, char *argv[]) +{ + int re = 0; + int i, j, p, rtest, m, k; + int nerrs, nsrcerrs; + void *buf; + unsigned int decode_index[MMAX]; + unsigned char *temp_buffs[TEST_SOURCES], *buffs[TEST_SOURCES]; + unsigned char *encode_matrix, *decode_matrix, *invert_matrix, *g_tbls; + unsigned char src_in_err[TEST_SOURCES], src_err_list[TEST_SOURCES]; + unsigned char *recov[TEST_SOURCES]; + + int rows, align, size; + unsigned char *efence_buffs[TEST_SOURCES]; + unsigned int offset; + u8 *ubuffs[TEST_SOURCES]; + u8 *temp_ubuffs[TEST_SOURCES]; + + printf("erasure_code_base_test: %dx%d ", TEST_SOURCES, TEST_LEN); + srand(TEST_SEED); + + // Allocate the arrays + for (i = 0; i < TEST_SOURCES; i++) { + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + buffs[i] = buf; + } + + for (i = 0; i < TEST_SOURCES; i++) { + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + temp_buffs[i] = buf; + } + + // Test erasure code by encode and recovery + + encode_matrix = malloc(MMAX * KMAX); + decode_matrix = malloc(MMAX * KMAX); + invert_matrix = malloc(MMAX * KMAX); + g_tbls = malloc(KMAX * TEST_SOURCES * 32); + if (encode_matrix == NULL || decode_matrix == NULL + || invert_matrix == NULL || g_tbls == NULL) { + printf("Test failure! Error with malloc\n"); + return -1; + } + // Pick a first test + m = 9; + k = 5; + if (m > MMAX || k > KMAX) + return -1; + + // Make random data + for (i = 0; i < k; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + // Generate encode matrix encode_matrix + // The matrix generated by gf_gen_rs_matrix + // is not always invertable. + gf_gen_rs_matrix(encode_matrix, m, k); + + // Generate g_tbls from encode matrix encode_matrix + ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls); + + // Perform matrix dot_prod for EC encoding + // using g_tbls from encode matrix encode_matrix + ec_encode_data_base(TEST_LEN, k, m - k, g_tbls, buffs, &buffs[k]); + + // Choose random buffers to be in erasure + memset(src_in_err, 0, TEST_SOURCES); + gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m); + + // Generate decode matrix + re = gf_gen_decode_matrix(encode_matrix, decode_matrix, + invert_matrix, decode_index, src_err_list, src_in_err, + nerrs, nsrcerrs, k, m); + if (re != 0) { + printf("Fail to gf_gen_decode_matrix\n"); + return -1; + } + // Pack recovery array as list of valid sources + // Its order must be the same as the order + // to generate matrix b in gf_gen_decode_matrix + for (i = 0; i < k; i++) { + recov[i] = buffs[decode_index[i]]; + } + + // Recover data + ec_init_tables(k, nerrs, decode_matrix, g_tbls); + ec_encode_data_base(TEST_LEN, k, nerrs, g_tbls, recov, &temp_buffs[k]); + for (i = 0; i < nerrs; i++) { + + if (0 != memcmp(temp_buffs[k + i], buffs[src_err_list[i]], TEST_LEN)) { + printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs); + printf(" - erase list = "); + for (j = 0; j < nerrs; j++) + printf(" %d", src_err_list[j]); + printf(" - Index = "); + for (p = 0; p < k; p++) + printf(" %d", decode_index[p]); + printf("\nencode_matrix:\n"); + dump_u8xu8((u8 *) encode_matrix, m, k); + printf("inv b:\n"); + dump_u8xu8((u8 *) invert_matrix, k, k); + printf("\ndecode_matrix:\n"); + dump_u8xu8((u8 *) decode_matrix, m, k); + printf("recov %d:", src_err_list[i]); + dump(temp_buffs[k + i], 25); + printf("orig :"); + dump(buffs[src_err_list[i]], 25); + return -1; + } + } + + // Pick a first test + m = 9; + k = 5; + if (m > MMAX || k > KMAX) + return -1; + + // Make random data + for (i = 0; i < k; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + // The matrix generated by gf_gen_cauchy1_matrix + // is always invertable. + gf_gen_cauchy1_matrix(encode_matrix, m, k); + + // Generate g_tbls from encode matrix encode_matrix + ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls); + + // Perform matrix dot_prod for EC encoding + // using g_tbls from encode matrix encode_matrix + ec_encode_data_base(TEST_LEN, k, m - k, g_tbls, buffs, &buffs[k]); + + // Choose random buffers to be in erasure + memset(src_in_err, 0, TEST_SOURCES); + gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m); + + // Generate decode matrix + re = gf_gen_decode_matrix(encode_matrix, decode_matrix, + invert_matrix, decode_index, src_err_list, src_in_err, + nerrs, nsrcerrs, k, m); + if (re != 0) { + printf("Fail to gf_gen_decode_matrix\n"); + return -1; + } + // Pack recovery array as list of valid sources + // Its order must be the same as the order + // to generate matrix b in gf_gen_decode_matrix + for (i = 0; i < k; i++) { + recov[i] = buffs[decode_index[i]]; + } + + // Recover data + ec_init_tables(k, nerrs, decode_matrix, g_tbls); + ec_encode_data_base(TEST_LEN, k, nerrs, g_tbls, recov, &temp_buffs[k]); + for (i = 0; i < nerrs; i++) { + + if (0 != memcmp(temp_buffs[k + i], buffs[src_err_list[i]], TEST_LEN)) { + printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs); + printf(" - erase list = "); + for (j = 0; j < nerrs; j++) + printf(" %d", src_err_list[j]); + printf(" - Index = "); + for (p = 0; p < k; p++) + printf(" %d", decode_index[p]); + printf("\nencode_matrix:\n"); + dump_u8xu8((u8 *) encode_matrix, m, k); + printf("inv b:\n"); + dump_u8xu8((u8 *) invert_matrix, k, k); + printf("\ndecode_matrix:\n"); + dump_u8xu8((u8 *) decode_matrix, m, k); + printf("recov %d:", src_err_list[i]); + dump(temp_buffs[k + i], 25); + printf("orig :"); + dump(buffs[src_err_list[i]], 25); + return -1; + } + } + + // Do more random tests + for (rtest = 0; rtest < RANDOMS; rtest++) { + while ((m = (rand() % MMAX)) < 2) ; + while ((k = (rand() % KMAX)) >= m || k < 1) ; + + if (m > MMAX || k > KMAX) + continue; + + // Make random data + for (i = 0; i < k; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + // The matrix generated by gf_gen_cauchy1_matrix + // is always invertable. + gf_gen_cauchy1_matrix(encode_matrix, m, k); + + // Make parity vects + // Generate g_tbls from encode matrix a + ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls); + // Perform matrix dot_prod for EC encoding + // using g_tbls from encode matrix a + ec_encode_data_base(TEST_LEN, k, m - k, g_tbls, buffs, &buffs[k]); + + // Random errors + memset(src_in_err, 0, TEST_SOURCES); + gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m); + + // Generate decode matrix + re = gf_gen_decode_matrix(encode_matrix, decode_matrix, + invert_matrix, decode_index, src_err_list, + src_in_err, nerrs, nsrcerrs, k, m); + if (re != 0) { + printf("Fail to gf_gen_decode_matrix\n"); + return -1; + } + // Pack recovery array as list of valid sources + // Its order must be the same as the order + // to generate matrix b in gf_gen_decode_matrix + for (i = 0; i < k; i++) { + recov[i] = buffs[decode_index[i]]; + } + + // Recover data + ec_init_tables(k, nerrs, decode_matrix, g_tbls); + ec_encode_data_base(TEST_LEN, k, nerrs, g_tbls, recov, &temp_buffs[k]); + + for (i = 0; i < nerrs; i++) { + + if (0 != memcmp(temp_buffs[k + i], buffs[src_err_list[i]], TEST_LEN)) { + printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs); + printf(" - erase list = "); + for (j = 0; j < nerrs; j++) + printf(" %d", src_err_list[j]); + printf(" - Index = "); + for (p = 0; p < k; p++) + printf(" %d", decode_index[p]); + printf("\nencode_matrix:\n"); + dump_u8xu8((u8 *) encode_matrix, m, k); + printf("inv b:\n"); + dump_u8xu8((u8 *) invert_matrix, k, k); + printf("\ndecode_matrix:\n"); + dump_u8xu8((u8 *) decode_matrix, m, k); + printf("orig data:\n"); + dump_matrix(buffs, m, 25); + printf("orig :"); + dump(buffs[src_err_list[i]], 25); + printf("recov %d:", src_err_list[i]); + dump(temp_buffs[k + i], 25); + return -1; + } + } + putchar('.'); + } + + // Run tests at end of buffer for Electric Fence + k = 16; + align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16; + if (k > KMAX) + return -1; + + for (rows = 1; rows <= 16; rows++) { + m = k + rows; + if (m > MMAX) + return -1; + + // Make random data + for (i = 0; i < k; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + for (size = EFENCE_TEST_MIN_SIZE; size <= TEST_SIZE; size += align) { + for (i = 0; i < m; i++) { // Line up TEST_SIZE from end + efence_buffs[i] = buffs[i] + TEST_LEN - size; + } + + // The matrix generated by gf_gen_cauchy1_matrix + // is always invertable. + gf_gen_cauchy1_matrix(encode_matrix, m, k); + + // Make parity vects + // Generate g_tbls from encode matrix a + ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls); + // Perform matrix dot_prod for EC encoding + // using g_tbls from encode matrix a + ec_encode_data_base(size, k, m - k, g_tbls, efence_buffs, + &efence_buffs[k]); + + // Random errors + memset(src_in_err, 0, TEST_SOURCES); + gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m); + + // Generate decode matrix + re = gf_gen_decode_matrix(encode_matrix, decode_matrix, + invert_matrix, decode_index, src_err_list, + src_in_err, nerrs, nsrcerrs, k, m); + if (re != 0) { + printf("Fail to gf_gen_decode_matrix\n"); + return -1; + } + // Pack recovery array as list of valid sources + // Its order must be the same as the order + // to generate matrix b in gf_gen_decode_matrix + for (i = 0; i < k; i++) { + recov[i] = efence_buffs[decode_index[i]]; + } + + // Recover data + ec_init_tables(k, nerrs, decode_matrix, g_tbls); + ec_encode_data_base(size, k, nerrs, g_tbls, recov, &temp_buffs[k]); + + for (i = 0; i < nerrs; i++) { + + if (0 != + memcmp(temp_buffs[k + i], efence_buffs[src_err_list[i]], + size)) { + printf("Efence: Fail error recovery (%d, %d, %d)\n", m, + k, nerrs); + + printf("size = %d\n", size); + + printf("Test erase list = "); + for (j = 0; j < nerrs; j++) + printf(" %d", src_err_list[j]); + printf(" - Index = "); + for (p = 0; p < k; p++) + printf(" %d", decode_index[p]); + printf("\nencode_matrix:\n"); + dump_u8xu8((u8 *) encode_matrix, m, k); + printf("inv b:\n"); + dump_u8xu8((u8 *) invert_matrix, k, k); + printf("\ndecode_matrix:\n"); + dump_u8xu8((u8 *) decode_matrix, m, k); + + printf("recov %d:", src_err_list[i]); + dump(temp_buffs[k + i], align); + printf("orig :"); + dump(efence_buffs[src_err_list[i]], align); + return -1; + } + } + } + + } + + // Test rand ptr alignment if available + + for (rtest = 0; rtest < RANDOMS; rtest++) { + while ((m = (rand() % MMAX)) < 2) ; + while ((k = (rand() % KMAX)) >= m || k < 1) ; + + if (m > MMAX || k > KMAX) + continue; + + size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~15; + + offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B; + // Add random offsets + for (i = 0; i < m; i++) { + memset(buffs[i], 0, TEST_LEN); // zero pad to check write-over + memset(temp_buffs[i], 0, TEST_LEN); // zero pad to check write-over + ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset)); + temp_ubuffs[i] = temp_buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset)); + } + + for (i = 0; i < k; i++) + for (j = 0; j < size; j++) + ubuffs[i][j] = rand(); + + // The matrix generated by gf_gen_cauchy1_matrix + // is always invertable. + gf_gen_cauchy1_matrix(encode_matrix, m, k); + + // Make parity vects + // Generate g_tbls from encode matrix a + ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls); + // Perform matrix dot_prod for EC encoding + // using g_tbls from encode matrix a + ec_encode_data_base(size, k, m - k, g_tbls, ubuffs, &ubuffs[k]); + + // Random errors + memset(src_in_err, 0, TEST_SOURCES); + gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m); + + // Generate decode matrix + re = gf_gen_decode_matrix(encode_matrix, decode_matrix, + invert_matrix, decode_index, src_err_list, + src_in_err, nerrs, nsrcerrs, k, m); + if (re != 0) { + printf("Fail to gf_gen_decode_matrix\n"); + return -1; + } + // Pack recovery array as list of valid sources + // Its order must be the same as the order + // to generate matrix b in gf_gen_decode_matrix + for (i = 0; i < k; i++) { + recov[i] = ubuffs[decode_index[i]]; + } + + // Recover data + ec_init_tables(k, nerrs, decode_matrix, g_tbls); + ec_encode_data_base(size, k, nerrs, g_tbls, recov, &temp_ubuffs[k]); + + for (i = 0; i < nerrs; i++) { + + if (0 != memcmp(temp_ubuffs[k + i], ubuffs[src_err_list[i]], size)) { + printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs); + printf(" - erase list = "); + for (j = 0; j < nerrs; j++) + printf(" %d", src_err_list[j]); + printf(" - Index = "); + for (p = 0; p < k; p++) + printf(" %d", decode_index[p]); + printf("\nencode_matrix:\n"); + dump_u8xu8((unsigned char *)encode_matrix, m, k); + printf("inv b:\n"); + dump_u8xu8((unsigned char *)invert_matrix, k, k); + printf("\ndecode_matrix:\n"); + dump_u8xu8((unsigned char *)decode_matrix, m, k); + printf("orig data:\n"); + dump_matrix(ubuffs, m, 25); + printf("orig :"); + dump(ubuffs[src_err_list[i]], 25); + printf("recov %d:", src_err_list[i]); + dump(temp_ubuffs[k + i], 25); + return -1; + } + } + + // Confirm that padding around dests is unchanged + memset(temp_buffs[0], 0, PTR_ALIGN_CHK_B); // Make reference zero buff + + for (i = 0; i < m; i++) { + + offset = ubuffs[i] - buffs[i]; + + if (memcmp(buffs[i], temp_buffs[0], offset)) { + printf("Fail rand ualign encode pad start\n"); + return -1; + } + if (memcmp + (buffs[i] + offset + size, temp_buffs[0], + PTR_ALIGN_CHK_B - offset)) { + printf("Fail rand ualign encode pad end\n"); + return -1; + } + } + + for (i = 0; i < nerrs; i++) { + + offset = temp_ubuffs[k + i] - temp_buffs[k + i]; + if (memcmp(temp_buffs[k + i], temp_buffs[0], offset)) { + printf("Fail rand ualign decode pad start\n"); + return -1; + } + if (memcmp + (temp_buffs[k + i] + offset + size, temp_buffs[0], + PTR_ALIGN_CHK_B - offset)) { + printf("Fail rand ualign decode pad end\n"); + return -1; + } + } + + putchar('.'); + } + + // Test size alignment + + align = (LEN_ALIGN_CHK_B != 0) ? 13 : 16; + + for (size = TEST_LEN; size > 0; size -= align) { + while ((m = (rand() % MMAX)) < 2) ; + while ((k = (rand() % KMAX)) >= m || k < 1) ; + + if (m > MMAX || k > KMAX) + continue; + + for (i = 0; i < k; i++) + for (j = 0; j < size; j++) + buffs[i][j] = rand(); + + // The matrix generated by gf_gen_cauchy1_matrix + // is always invertable. + gf_gen_cauchy1_matrix(encode_matrix, m, k); + + // Make parity vects + // Generate g_tbls from encode matrix a + ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls); + // Perform matrix dot_prod for EC encoding + // using g_tbls from encode matrix a + ec_encode_data_base(size, k, m - k, g_tbls, buffs, &buffs[k]); + + // Random errors + memset(src_in_err, 0, TEST_SOURCES); + gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m); + // Generate decode matrix + re = gf_gen_decode_matrix(encode_matrix, decode_matrix, + invert_matrix, decode_index, src_err_list, + src_in_err, nerrs, nsrcerrs, k, m); + if (re != 0) { + printf("Fail to gf_gen_decode_matrix\n"); + return -1; + } + // Pack recovery array as list of valid sources + // Its order must be the same as the order + // to generate matrix b in gf_gen_decode_matrix + for (i = 0; i < k; i++) { + recov[i] = buffs[decode_index[i]]; + } + + // Recover data + ec_init_tables(k, nerrs, decode_matrix, g_tbls); + ec_encode_data_base(size, k, nerrs, g_tbls, recov, &temp_buffs[k]); + + for (i = 0; i < nerrs; i++) { + + if (0 != memcmp(temp_buffs[k + i], buffs[src_err_list[i]], size)) { + printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs); + printf(" - erase list = "); + for (j = 0; j < nerrs; j++) + printf(" %d", src_err_list[j]); + printf(" - Index = "); + for (p = 0; p < k; p++) + printf(" %d", decode_index[p]); + printf("\nencode_matrix:\n"); + dump_u8xu8((unsigned char *)encode_matrix, m, k); + printf("inv b:\n"); + dump_u8xu8((unsigned char *)invert_matrix, k, k); + printf("\ndecode_matrix:\n"); + dump_u8xu8((unsigned char *)decode_matrix, m, k); + printf("orig data:\n"); + dump_matrix(buffs, m, 25); + printf("orig :"); + dump(buffs[src_err_list[i]], 25); + printf("recov %d:", src_err_list[i]); + dump(temp_buffs[k + i], 25); + return -1; + } + } + } + + printf("done EC tests: Pass\n"); + return 0; +} diff --git a/src/isa-l/erasure_code/erasure_code_perf.c b/src/isa-l/erasure_code/erasure_code_perf.c new file mode 100644 index 000000000..da81387b5 --- /dev/null +++ b/src/isa-l/erasure_code/erasure_code_perf.c @@ -0,0 +1,177 @@ +/********************************************************************** + Copyright(c) 2011-2015 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include // for memset, memcmp +#include "erasure_code.h" +#include "test.h" + +//#define CACHED_TEST +#ifdef CACHED_TEST +// Cached test, loop many times over small dataset +# define TEST_SOURCES 32 +# define TEST_LEN(m) ((128*1024 / m) & ~(64-1)) +# define TEST_TYPE_STR "_warm" +#else +# ifndef TEST_CUSTOM +// Uncached test. Pull from large mem base. +# define TEST_SOURCES 32 +# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */ +# define TEST_LEN(m) ((GT_L3_CACHE / m) & ~(64-1)) +# define TEST_TYPE_STR "_cold" +# else +# define TEST_TYPE_STR "_cus" +# endif +#endif + +#define MMAX TEST_SOURCES +#define KMAX TEST_SOURCES + +#define BAD_MATRIX -1 + +typedef unsigned char u8; + +void ec_encode_perf(int m, int k, u8 * a, u8 * g_tbls, u8 ** buffs, struct perf *start) +{ + ec_init_tables(k, m - k, &a[k * k], g_tbls); + BENCHMARK(start, BENCHMARK_TIME, + ec_encode_data(TEST_LEN(m), k, m - k, g_tbls, buffs, &buffs[k])); +} + +int ec_decode_perf(int m, int k, u8 * a, u8 * g_tbls, u8 ** buffs, u8 * src_in_err, + u8 * src_err_list, int nerrs, u8 ** temp_buffs, struct perf *start) +{ + int i, j, r; + u8 b[MMAX * KMAX], c[MMAX * KMAX], d[MMAX * KMAX]; + u8 *recov[TEST_SOURCES]; + + // Construct b by removing error rows + for (i = 0, r = 0; i < k; i++, r++) { + while (src_in_err[r]) + r++; + recov[i] = buffs[r]; + for (j = 0; j < k; j++) + b[k * i + j] = a[k * r + j]; + } + + if (gf_invert_matrix(b, d, k) < 0) + return BAD_MATRIX; + + for (i = 0; i < nerrs; i++) + for (j = 0; j < k; j++) + c[k * i + j] = d[k * src_err_list[i] + j]; + + // Recover data + ec_init_tables(k, nerrs, c, g_tbls); + BENCHMARK(start, BENCHMARK_TIME, + ec_encode_data(TEST_LEN(m), k, nerrs, g_tbls, recov, temp_buffs)); + + return 0; +} + +int main(int argc, char *argv[]) +{ + int i, j, m, k, nerrs, check; + void *buf; + u8 *temp_buffs[TEST_SOURCES], *buffs[TEST_SOURCES]; + u8 a[MMAX * KMAX]; + u8 g_tbls[KMAX * TEST_SOURCES * 32], src_in_err[TEST_SOURCES]; + u8 src_err_list[TEST_SOURCES]; + struct perf start; + + // Pick test parameters + m = 14; + k = 10; + nerrs = 4; + const u8 err_list[] = { 2, 4, 5, 7 }; + + printf("erasure_code_perf: %dx%d %d\n", m, TEST_LEN(m), nerrs); + + if (m > MMAX || k > KMAX || nerrs > (m - k)) { + printf(" Input test parameter error\n"); + return -1; + } + + memcpy(src_err_list, err_list, nerrs); + memset(src_in_err, 0, TEST_SOURCES); + for (i = 0; i < nerrs; i++) + src_in_err[src_err_list[i]] = 1; + + // Allocate the arrays + for (i = 0; i < m; i++) { + if (posix_memalign(&buf, 64, TEST_LEN(m))) { + printf("alloc error: Fail\n"); + return -1; + } + buffs[i] = buf; + } + + for (i = 0; i < (m - k); i++) { + if (posix_memalign(&buf, 64, TEST_LEN(m))) { + printf("alloc error: Fail\n"); + return -1; + } + temp_buffs[i] = buf; + } + + // Make random data + for (i = 0; i < k; i++) + for (j = 0; j < TEST_LEN(m); j++) + buffs[i][j] = rand(); + + gf_gen_rs_matrix(a, m, k); + + // Start encode test + ec_encode_perf(m, k, a, g_tbls, buffs, &start); + printf("erasure_code_encode" TEST_TYPE_STR ": "); + perf_print(start, (long long)(TEST_LEN(m)) * (m)); + + // Start decode test + check = ec_decode_perf(m, k, a, g_tbls, buffs, src_in_err, src_err_list, nerrs, + temp_buffs, &start); + + if (check == BAD_MATRIX) { + printf("BAD MATRIX\n"); + return check; + } + + for (i = 0; i < nerrs; i++) { + if (0 != memcmp(temp_buffs[i], buffs[src_err_list[i]], TEST_LEN(m))) { + printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs); + return -1; + } + } + + printf("erasure_code_decode" TEST_TYPE_STR ": "); + perf_print(start, (long long)(TEST_LEN(m)) * (k + nerrs)); + + printf("done all: Pass\n"); + return 0; +} diff --git a/src/isa-l/erasure_code/erasure_code_test.c b/src/isa-l/erasure_code/erasure_code_test.c new file mode 100644 index 000000000..a1736afd5 --- /dev/null +++ b/src/isa-l/erasure_code/erasure_code_test.c @@ -0,0 +1,764 @@ +/********************************************************************** + Copyright(c) 2011-2015 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include // for memset, memcmp +#include "erasure_code.h" +#include "types.h" + +#define TEST_LEN 8192 +#define TEST_SIZE (TEST_LEN/2) + +#ifndef TEST_SOURCES +# define TEST_SOURCES 127 +#endif +#ifndef RANDOMS +# define RANDOMS 200 +#endif + +#define MMAX TEST_SOURCES +#define KMAX TEST_SOURCES + +#define EFENCE_TEST_MIN_SIZE 16 +#define EFENCE_TEST_MAX_SIZE EFENCE_TEST_MIN_SIZE + 0x100 + +#ifdef EC_ALIGNED_ADDR +// Define power of 2 range to check ptr, len alignment +# define PTR_ALIGN_CHK_B 0 +# define LEN_ALIGN_CHK_B 0 // 0 for aligned only +#else +// Define power of 2 range to check ptr, len alignment +# define PTR_ALIGN_CHK_B 32 +# define LEN_ALIGN_CHK_B 32 // 0 for aligned only +#endif + +#ifndef TEST_SEED +#define TEST_SEED 11 +#endif + +typedef unsigned char u8; + +void dump(unsigned char *buf, int len) +{ + int i; + for (i = 0; i < len;) { + printf(" %2x", 0xff & buf[i++]); + if (i % 32 == 0) + printf("\n"); + } + printf("\n"); +} + +void dump_matrix(unsigned char **s, int k, int m) +{ + int i, j; + for (i = 0; i < k; i++) { + for (j = 0; j < m; j++) { + printf(" %2x", s[i][j]); + } + printf("\n"); + } + printf("\n"); +} + +void dump_u8xu8(unsigned char *s, int k, int m) +{ + int i, j; + for (i = 0; i < k; i++) { + for (j = 0; j < m; j++) { + printf(" %2x", 0xff & s[j + (i * m)]); + } + printf("\n"); + } + printf("\n"); +} + +// Generate Random errors +static void gen_err_list(unsigned char *src_err_list, + unsigned char *src_in_err, int *pnerrs, int *pnsrcerrs, int k, int m) +{ + int i, err; + int nerrs = 0, nsrcerrs = 0; + + for (i = 0, nerrs = 0, nsrcerrs = 0; i < m && nerrs < m - k; i++) { + err = 1 & rand(); + src_in_err[i] = err; + if (err) { + src_err_list[nerrs++] = i; + if (i < k) { + nsrcerrs++; + } + } + } + if (nerrs == 0) { // should have at least one error + while ((err = (rand() % KMAX)) >= m) ; + src_err_list[nerrs++] = err; + src_in_err[err] = 1; + if (err < k) + nsrcerrs = 1; + } + *pnerrs = nerrs; + *pnsrcerrs = nsrcerrs; + return; +} + +#define NO_INVERT_MATRIX -2 +// Generate decode matrix from encode matrix +static int gf_gen_decode_matrix(unsigned char *encode_matrix, + unsigned char *decode_matrix, + unsigned char *invert_matrix, + unsigned int *decode_index, + unsigned char *src_err_list, + unsigned char *src_in_err, + int nerrs, int nsrcerrs, int k, int m) +{ + int i, j, p; + int r; + unsigned char *backup, *b, s; + int incr = 0; + + b = malloc(MMAX * KMAX); + backup = malloc(MMAX * KMAX); + + if (b == NULL || backup == NULL) { + printf("Test failure! Error with malloc\n"); + free(b); + free(backup); + return -1; + } + // Construct matrix b by removing error rows + for (i = 0, r = 0; i < k; i++, r++) { + while (src_in_err[r]) + r++; + for (j = 0; j < k; j++) { + b[k * i + j] = encode_matrix[k * r + j]; + backup[k * i + j] = encode_matrix[k * r + j]; + } + decode_index[i] = r; + } + incr = 0; + while (gf_invert_matrix(b, invert_matrix, k) < 0) { + if (nerrs == (m - k)) { + free(b); + free(backup); + printf("BAD MATRIX\n"); + return NO_INVERT_MATRIX; + } + incr++; + memcpy(b, backup, MMAX * KMAX); + for (i = nsrcerrs; i < nerrs - nsrcerrs; i++) { + if (src_err_list[i] == (decode_index[k - 1] + incr)) { + // skip the erased parity line + incr++; + continue; + } + } + if (decode_index[k - 1] + incr >= m) { + free(b); + free(backup); + printf("BAD MATRIX\n"); + return NO_INVERT_MATRIX; + } + decode_index[k - 1] += incr; + for (j = 0; j < k; j++) + b[k * (k - 1) + j] = encode_matrix[k * decode_index[k - 1] + j]; + + }; + + for (i = 0; i < nsrcerrs; i++) { + for (j = 0; j < k; j++) { + decode_matrix[k * i + j] = invert_matrix[k * src_err_list[i] + j]; + } + } + /* src_err_list from encode_matrix * invert of b for parity decoding */ + for (p = nsrcerrs; p < nerrs; p++) { + for (i = 0; i < k; i++) { + s = 0; + for (j = 0; j < k; j++) + s ^= gf_mul(invert_matrix[j * k + i], + encode_matrix[k * src_err_list[p] + j]); + + decode_matrix[k * p + i] = s; + } + } + free(b); + free(backup); + return 0; +} + +int main(int argc, char *argv[]) +{ + int re = 0; + int i, j, p, rtest, m, k; + int nerrs, nsrcerrs; + void *buf; + unsigned int decode_index[MMAX]; + unsigned char *temp_buffs[TEST_SOURCES], *buffs[TEST_SOURCES]; + unsigned char *encode_matrix, *decode_matrix, *invert_matrix, *g_tbls; + unsigned char src_in_err[TEST_SOURCES], src_err_list[TEST_SOURCES]; + unsigned char *recov[TEST_SOURCES]; + + int rows, align, size; + unsigned char *efence_buffs[TEST_SOURCES]; + unsigned int offset; + u8 *ubuffs[TEST_SOURCES]; + u8 *temp_ubuffs[TEST_SOURCES]; + + printf("erasure_code_test: %dx%d ", TEST_SOURCES, TEST_LEN); + srand(TEST_SEED); + + // Allocate the arrays + for (i = 0; i < TEST_SOURCES; i++) { + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + buffs[i] = buf; + } + + for (i = 0; i < TEST_SOURCES; i++) { + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + temp_buffs[i] = buf; + } + + // Test erasure code by encode and recovery + + encode_matrix = malloc(MMAX * KMAX); + decode_matrix = malloc(MMAX * KMAX); + invert_matrix = malloc(MMAX * KMAX); + g_tbls = malloc(KMAX * TEST_SOURCES * 32); + if (encode_matrix == NULL || decode_matrix == NULL + || invert_matrix == NULL || g_tbls == NULL) { + printf("Test failure! Error with malloc\n"); + return -1; + } + // Pick a first test + m = 9; + k = 5; + if (m > MMAX || k > KMAX) + return -1; + + // Make random data + for (i = 0; i < k; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + // Generate encode matrix encode_matrix + // The matrix generated by gf_gen_rs_matrix + // is not always invertable. + gf_gen_rs_matrix(encode_matrix, m, k); + + // Generate g_tbls from encode matrix encode_matrix + ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls); + + // Perform matrix dot_prod for EC encoding + // using g_tbls from encode matrix encode_matrix + ec_encode_data(TEST_LEN, k, m - k, g_tbls, buffs, &buffs[k]); + + // Choose random buffers to be in erasure + memset(src_in_err, 0, TEST_SOURCES); + gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m); + + // Generate decode matrix + re = gf_gen_decode_matrix(encode_matrix, decode_matrix, + invert_matrix, decode_index, src_err_list, src_in_err, + nerrs, nsrcerrs, k, m); + if (re != 0) { + printf("Fail to gf_gen_decode_matrix\n"); + return -1; + } + // Pack recovery array as list of valid sources + // Its order must be the same as the order + // to generate matrix b in gf_gen_decode_matrix + for (i = 0; i < k; i++) { + recov[i] = buffs[decode_index[i]]; + } + + // Recover data + ec_init_tables(k, nerrs, decode_matrix, g_tbls); + ec_encode_data(TEST_LEN, k, nerrs, g_tbls, recov, &temp_buffs[k]); + for (i = 0; i < nerrs; i++) { + + if (0 != memcmp(temp_buffs[k + i], buffs[src_err_list[i]], TEST_LEN)) { + printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs); + printf(" - erase list = "); + for (j = 0; j < nerrs; j++) + printf(" %d", src_err_list[j]); + printf(" - Index = "); + for (p = 0; p < k; p++) + printf(" %d", decode_index[p]); + printf("\nencode_matrix:\n"); + dump_u8xu8((u8 *) encode_matrix, m, k); + printf("inv b:\n"); + dump_u8xu8((u8 *) invert_matrix, k, k); + printf("\ndecode_matrix:\n"); + dump_u8xu8((u8 *) decode_matrix, m, k); + printf("recov %d:", src_err_list[i]); + dump(temp_buffs[k + i], 25); + printf("orig :"); + dump(buffs[src_err_list[i]], 25); + return -1; + } + } + + // Pick a first test + m = 9; + k = 5; + if (m > MMAX || k > KMAX) + return -1; + + // Make random data + for (i = 0; i < k; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + // The matrix generated by gf_gen_cauchy1_matrix + // is always invertable. + gf_gen_cauchy1_matrix(encode_matrix, m, k); + + // Generate g_tbls from encode matrix encode_matrix + ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls); + + // Perform matrix dot_prod for EC encoding + // using g_tbls from encode matrix encode_matrix + ec_encode_data(TEST_LEN, k, m - k, g_tbls, buffs, &buffs[k]); + + // Choose random buffers to be in erasure + memset(src_in_err, 0, TEST_SOURCES); + gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m); + + // Generate decode matrix + re = gf_gen_decode_matrix(encode_matrix, decode_matrix, + invert_matrix, decode_index, src_err_list, src_in_err, + nerrs, nsrcerrs, k, m); + if (re != 0) { + printf("Fail to gf_gen_decode_matrix\n"); + return -1; + } + // Pack recovery array as list of valid sources + // Its order must be the same as the order + // to generate matrix b in gf_gen_decode_matrix + for (i = 0; i < k; i++) { + recov[i] = buffs[decode_index[i]]; + } + + // Recover data + ec_init_tables(k, nerrs, decode_matrix, g_tbls); + ec_encode_data(TEST_LEN, k, nerrs, g_tbls, recov, &temp_buffs[k]); + for (i = 0; i < nerrs; i++) { + + if (0 != memcmp(temp_buffs[k + i], buffs[src_err_list[i]], TEST_LEN)) { + printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs); + printf(" - erase list = "); + for (j = 0; j < nerrs; j++) + printf(" %d", src_err_list[j]); + printf(" - Index = "); + for (p = 0; p < k; p++) + printf(" %d", decode_index[p]); + printf("\nencode_matrix:\n"); + dump_u8xu8((u8 *) encode_matrix, m, k); + printf("inv b:\n"); + dump_u8xu8((u8 *) invert_matrix, k, k); + printf("\ndecode_matrix:\n"); + dump_u8xu8((u8 *) decode_matrix, m, k); + printf("recov %d:", src_err_list[i]); + dump(temp_buffs[k + i], 25); + printf("orig :"); + dump(buffs[src_err_list[i]], 25); + return -1; + } + } + + // Do more random tests + for (rtest = 0; rtest < RANDOMS; rtest++) { + while ((m = (rand() % MMAX)) < 2) ; + while ((k = (rand() % KMAX)) >= m || k < 1) ; + + if (m > MMAX || k > KMAX) + continue; + + // Make random data + for (i = 0; i < k; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + // The matrix generated by gf_gen_cauchy1_matrix + // is always invertable. + gf_gen_cauchy1_matrix(encode_matrix, m, k); + + // Make parity vects + // Generate g_tbls from encode matrix a + ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls); + // Perform matrix dot_prod for EC encoding + // using g_tbls from encode matrix a + ec_encode_data(TEST_LEN, k, m - k, g_tbls, buffs, &buffs[k]); + + // Random errors + memset(src_in_err, 0, TEST_SOURCES); + gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m); + + // Generate decode matrix + re = gf_gen_decode_matrix(encode_matrix, decode_matrix, + invert_matrix, decode_index, src_err_list, + src_in_err, nerrs, nsrcerrs, k, m); + if (re != 0) { + printf("Fail to gf_gen_decode_matrix\n"); + return -1; + } + // Pack recovery array as list of valid sources + // Its order must be the same as the order + // to generate matrix b in gf_gen_decode_matrix + for (i = 0; i < k; i++) { + recov[i] = buffs[decode_index[i]]; + } + + // Recover data + ec_init_tables(k, nerrs, decode_matrix, g_tbls); + ec_encode_data(TEST_LEN, k, nerrs, g_tbls, recov, &temp_buffs[k]); + + for (i = 0; i < nerrs; i++) { + + if (0 != memcmp(temp_buffs[k + i], buffs[src_err_list[i]], TEST_LEN)) { + printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs); + printf(" - erase list = "); + for (j = 0; j < nerrs; j++) + printf(" %d", src_err_list[j]); + printf(" - Index = "); + for (p = 0; p < k; p++) + printf(" %d", decode_index[p]); + printf("\nencode_matrix:\n"); + dump_u8xu8((u8 *) encode_matrix, m, k); + printf("inv b:\n"); + dump_u8xu8((u8 *) invert_matrix, k, k); + printf("\ndecode_matrix:\n"); + dump_u8xu8((u8 *) decode_matrix, m, k); + printf("orig data:\n"); + dump_matrix(buffs, m, 25); + printf("orig :"); + dump(buffs[src_err_list[i]], 25); + printf("recov %d:", src_err_list[i]); + dump(temp_buffs[k + i], 25); + return -1; + } + } + putchar('.'); + } + + // Run tests at end of buffer for Electric Fence + k = 16; + align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16; + if (k > KMAX) + return -1; + + for (rows = 1; rows <= 16; rows++) { + m = k + rows; + if (m > MMAX) + return -1; + + // Make random data + for (i = 0; i < k; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + for (size = EFENCE_TEST_MIN_SIZE; size <= EFENCE_TEST_MAX_SIZE; size += align) { + for (i = 0; i < m; i++) { // Line up TEST_SIZE from end + efence_buffs[i] = buffs[i] + TEST_LEN - size; + } + + // The matrix generated by gf_gen_cauchy1_matrix + // is always invertable. + gf_gen_cauchy1_matrix(encode_matrix, m, k); + + // Make parity vects + // Generate g_tbls from encode matrix a + ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls); + // Perform matrix dot_prod for EC encoding + // using g_tbls from encode matrix a + ec_encode_data(size, k, m - k, g_tbls, efence_buffs, &efence_buffs[k]); + + // Random errors + memset(src_in_err, 0, TEST_SOURCES); + gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m); + + // Generate decode matrix + re = gf_gen_decode_matrix(encode_matrix, decode_matrix, + invert_matrix, decode_index, src_err_list, + src_in_err, nerrs, nsrcerrs, k, m); + if (re != 0) { + printf("Fail to gf_gen_decode_matrix\n"); + return -1; + } + // Pack recovery array as list of valid sources + // Its order must be the same as the order + // to generate matrix b in gf_gen_decode_matrix + for (i = 0; i < k; i++) { + recov[i] = efence_buffs[decode_index[i]]; + } + + // Recover data + ec_init_tables(k, nerrs, decode_matrix, g_tbls); + ec_encode_data(size, k, nerrs, g_tbls, recov, &temp_buffs[k]); + + for (i = 0; i < nerrs; i++) { + + if (0 != + memcmp(temp_buffs[k + i], efence_buffs[src_err_list[i]], + size)) { + printf("Efence: Fail error recovery (%d, %d, %d)\n", m, + k, nerrs); + + printf("size = %d\n", size); + + printf("Test erase list = "); + for (j = 0; j < nerrs; j++) + printf(" %d", src_err_list[j]); + printf(" - Index = "); + for (p = 0; p < k; p++) + printf(" %d", decode_index[p]); + printf("\nencode_matrix:\n"); + dump_u8xu8((u8 *) encode_matrix, m, k); + printf("inv b:\n"); + dump_u8xu8((u8 *) invert_matrix, k, k); + printf("\ndecode_matrix:\n"); + dump_u8xu8((u8 *) decode_matrix, m, k); + + printf("recov %d:", src_err_list[i]); + dump(temp_buffs[k + i], align); + printf("orig :"); + dump(efence_buffs[src_err_list[i]], align); + return -1; + } + } + } + + } + + // Test rand ptr alignment if available + + for (rtest = 0; rtest < RANDOMS; rtest++) { + while ((m = (rand() % MMAX)) < 2) ; + while ((k = (rand() % KMAX)) >= m || k < 1) ; + + if (m > MMAX || k > KMAX) + continue; + + size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~15; + + offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B; + // Add random offsets + for (i = 0; i < m; i++) { + memset(buffs[i], 0, TEST_LEN); // zero pad to check write-over + memset(temp_buffs[i], 0, TEST_LEN); // zero pad to check write-over + ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset)); + temp_ubuffs[i] = temp_buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset)); + } + + for (i = 0; i < k; i++) + for (j = 0; j < size; j++) + ubuffs[i][j] = rand(); + + // The matrix generated by gf_gen_cauchy1_matrix + // is always invertable. + gf_gen_cauchy1_matrix(encode_matrix, m, k); + + // Make parity vects + // Generate g_tbls from encode matrix a + ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls); + // Perform matrix dot_prod for EC encoding + // using g_tbls from encode matrix a + ec_encode_data(size, k, m - k, g_tbls, ubuffs, &ubuffs[k]); + + // Random errors + memset(src_in_err, 0, TEST_SOURCES); + gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m); + + // Generate decode matrix + re = gf_gen_decode_matrix(encode_matrix, decode_matrix, + invert_matrix, decode_index, src_err_list, + src_in_err, nerrs, nsrcerrs, k, m); + if (re != 0) { + printf("Fail to gf_gen_decode_matrix\n"); + return -1; + } + // Pack recovery array as list of valid sources + // Its order must be the same as the order + // to generate matrix b in gf_gen_decode_matrix + for (i = 0; i < k; i++) { + recov[i] = ubuffs[decode_index[i]]; + } + + // Recover data + ec_init_tables(k, nerrs, decode_matrix, g_tbls); + ec_encode_data(size, k, nerrs, g_tbls, recov, &temp_ubuffs[k]); + + for (i = 0; i < nerrs; i++) { + + if (0 != memcmp(temp_ubuffs[k + i], ubuffs[src_err_list[i]], size)) { + printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs); + printf(" - erase list = "); + for (j = 0; j < nerrs; j++) + printf(" %d", src_err_list[j]); + printf(" - Index = "); + for (p = 0; p < k; p++) + printf(" %d", decode_index[p]); + printf("\nencode_matrix:\n"); + dump_u8xu8((unsigned char *)encode_matrix, m, k); + printf("inv b:\n"); + dump_u8xu8((unsigned char *)invert_matrix, k, k); + printf("\ndecode_matrix:\n"); + dump_u8xu8((unsigned char *)decode_matrix, m, k); + printf("orig data:\n"); + dump_matrix(ubuffs, m, 25); + printf("orig :"); + dump(ubuffs[src_err_list[i]], 25); + printf("recov %d:", src_err_list[i]); + dump(temp_ubuffs[k + i], 25); + return -1; + } + } + + // Confirm that padding around dests is unchanged + memset(temp_buffs[0], 0, PTR_ALIGN_CHK_B); // Make reference zero buff + + for (i = 0; i < m; i++) { + + offset = ubuffs[i] - buffs[i]; + + if (memcmp(buffs[i], temp_buffs[0], offset)) { + printf("Fail rand ualign encode pad start\n"); + return -1; + } + if (memcmp + (buffs[i] + offset + size, temp_buffs[0], + PTR_ALIGN_CHK_B - offset)) { + printf("Fail rand ualign encode pad end\n"); + return -1; + } + } + + for (i = 0; i < nerrs; i++) { + + offset = temp_ubuffs[k + i] - temp_buffs[k + i]; + if (memcmp(temp_buffs[k + i], temp_buffs[0], offset)) { + printf("Fail rand ualign decode pad start\n"); + return -1; + } + if (memcmp + (temp_buffs[k + i] + offset + size, temp_buffs[0], + PTR_ALIGN_CHK_B - offset)) { + printf("Fail rand ualign decode pad end\n"); + return -1; + } + } + + putchar('.'); + } + + // Test size alignment + + align = (LEN_ALIGN_CHK_B != 0) ? 13 : 16; + + for (size = TEST_LEN; size > 0; size -= align) { + while ((m = (rand() % MMAX)) < 2) ; + while ((k = (rand() % KMAX)) >= m || k < 1) ; + + if (m > MMAX || k > KMAX) + continue; + + for (i = 0; i < k; i++) + for (j = 0; j < size; j++) + buffs[i][j] = rand(); + + // The matrix generated by gf_gen_cauchy1_matrix + // is always invertable. + gf_gen_cauchy1_matrix(encode_matrix, m, k); + + // Make parity vects + // Generate g_tbls from encode matrix a + ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls); + // Perform matrix dot_prod for EC encoding + // using g_tbls from encode matrix a + ec_encode_data(size, k, m - k, g_tbls, buffs, &buffs[k]); + + // Random errors + memset(src_in_err, 0, TEST_SOURCES); + gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m); + // Generate decode matrix + re = gf_gen_decode_matrix(encode_matrix, decode_matrix, + invert_matrix, decode_index, src_err_list, + src_in_err, nerrs, nsrcerrs, k, m); + if (re != 0) { + printf("Fail to gf_gen_decode_matrix\n"); + return -1; + } + // Pack recovery array as list of valid sources + // Its order must be the same as the order + // to generate matrix b in gf_gen_decode_matrix + for (i = 0; i < k; i++) { + recov[i] = buffs[decode_index[i]]; + } + + // Recover data + ec_init_tables(k, nerrs, decode_matrix, g_tbls); + ec_encode_data(size, k, nerrs, g_tbls, recov, &temp_buffs[k]); + + for (i = 0; i < nerrs; i++) { + + if (0 != memcmp(temp_buffs[k + i], buffs[src_err_list[i]], size)) { + printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs); + printf(" - erase list = "); + for (j = 0; j < nerrs; j++) + printf(" %d", src_err_list[j]); + printf(" - Index = "); + for (p = 0; p < k; p++) + printf(" %d", decode_index[p]); + printf("\nencode_matrix:\n"); + dump_u8xu8((unsigned char *)encode_matrix, m, k); + printf("inv b:\n"); + dump_u8xu8((unsigned char *)invert_matrix, k, k); + printf("\ndecode_matrix:\n"); + dump_u8xu8((unsigned char *)decode_matrix, m, k); + printf("orig data:\n"); + dump_matrix(buffs, m, 25); + printf("orig :"); + dump(buffs[src_err_list[i]], 25); + printf("recov %d:", src_err_list[i]); + dump(temp_buffs[k + i], 25); + return -1; + } + } + } + + printf("done EC tests: Pass\n"); + return 0; +} diff --git a/src/isa-l/erasure_code/erasure_code_update_perf.c b/src/isa-l/erasure_code/erasure_code_update_perf.c new file mode 100644 index 000000000..909e89414 --- /dev/null +++ b/src/isa-l/erasure_code/erasure_code_update_perf.c @@ -0,0 +1,281 @@ +/********************************************************************** + Copyright(c) 2011-2015 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include // for memset, memcmp +#include "erasure_code.h" +#include "types.h" +#include "test.h" + +//By default, test multibinary version +#ifndef FUNCTION_UNDER_TEST +# define FUNCTION_UNDER_TEST ec_encode_data_update +# define REF_FUNCTION ec_encode_data +#endif + +//By default, test EC(8+4) +#if (!defined(VECT)) +# define VECT 4 +#endif + +#define str(s) #s +#define xstr(s) str(s) + +//#define CACHED_TEST +#ifdef CACHED_TEST +// Cached test, loop many times over small dataset +# define TEST_SOURCES 32 +# define TEST_LEN(m) ((128*1024 / m) & ~(64-1)) +# define TEST_TYPE_STR "_warm" +#else +# ifndef TEST_CUSTOM +// Uncached test. Pull from large mem base. +# define TEST_SOURCES 32 +# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */ +# define TEST_LEN(m) ((GT_L3_CACHE / m) & ~(64-1)) +# define TEST_TYPE_STR "_cold" +# else +# define TEST_TYPE_STR "_cus" +# endif +#endif + +#define MMAX TEST_SOURCES +#define KMAX TEST_SOURCES + +typedef unsigned char u8; + +void dump(unsigned char *buf, int len) +{ + int i; + for (i = 0; i < len;) { + printf(" %2x", 0xff & buf[i++]); + if (i % 32 == 0) + printf("\n"); + } + printf("\n"); +} + +void encode_update_test_ref(int m, int k, u8 * g_tbls, u8 ** buffs, u8 * a) +{ + ec_init_tables(k, m - k, &a[k * k], g_tbls); + REF_FUNCTION(TEST_LEN(m), k, m - k, g_tbls, buffs, &buffs[k]); +} + +void encode_update_test(int m, int k, u8 * g_tbls, u8 ** perf_update_buffs, u8 * a) +{ + int i; + + // Make parity vects + ec_init_tables(k, m - k, &a[k * k], g_tbls); + for (i = 0; i < k; i++) { + FUNCTION_UNDER_TEST(TEST_LEN(m), k, m - k, i, g_tbls, + perf_update_buffs[i], &perf_update_buffs[k]); + } +} + +int decode_test(int m, int k, u8 ** update_buffs, u8 ** recov, u8 * a, u8 * src_in_err, + u8 * src_err_list, int nerrs, u8 * g_tbls, u8 ** perf_update_buffs) +{ + int i, j, r; + u8 b[MMAX * KMAX], c[MMAX * KMAX], d[MMAX * KMAX]; + // Construct b by removing error rows + for (i = 0, r = 0; i < k; i++, r++) { + while (src_in_err[r]) + r++; + recov[i] = update_buffs[r]; + for (j = 0; j < k; j++) + b[k * i + j] = a[k * r + j]; + } + + if (gf_invert_matrix(b, d, k) < 0) { + printf("BAD MATRIX\n"); + return -1; + } + + for (i = 0; i < nerrs; i++) + for (j = 0; j < k; j++) + c[k * i + j] = d[k * src_err_list[i] + j]; + + // Recover data + ec_init_tables(k, nerrs, c, g_tbls); + for (i = 0; i < k; i++) { + FUNCTION_UNDER_TEST(TEST_LEN(m), k, nerrs, i, g_tbls, recov[i], + perf_update_buffs); + } + return 0; +} + +int main(int argc, char *argv[]) +{ + int i, j, check, m, k, nerrs; + void *buf; + u8 *temp_buffs[TEST_SOURCES], *buffs[TEST_SOURCES]; + u8 *update_buffs[TEST_SOURCES]; + u8 *perf_update_buffs[TEST_SOURCES]; + u8 a[MMAX * KMAX]; + u8 g_tbls[KMAX * TEST_SOURCES * 32], src_in_err[TEST_SOURCES]; + u8 src_err_list[TEST_SOURCES], *recov[TEST_SOURCES]; + struct perf start; + + // Pick test parameters + k = 10; + m = k + VECT; + nerrs = VECT; + const u8 err_list[] = { 0, 2, 4, 5, 7, 8 }; + + printf(xstr(FUNCTION_UNDER_TEST) "_perf: %dx%d %d\n", m, TEST_LEN(m), nerrs); + + if (m > MMAX || k > KMAX || nerrs > (m - k)) { + printf(" Input test parameter error\n"); + return -1; + } + + memcpy(src_err_list, err_list, nerrs); + memset(src_in_err, 0, TEST_SOURCES); + for (i = 0; i < nerrs; i++) + src_in_err[src_err_list[i]] = 1; + + // Allocate the arrays + for (i = 0; i < m; i++) { + if (posix_memalign(&buf, 64, TEST_LEN(m))) { + printf("alloc error: Fail\n"); + return -1; + } + buffs[i] = buf; + } + + for (i = 0; i < (m - k); i++) { + if (posix_memalign(&buf, 64, TEST_LEN(m))) { + printf("alloc error: Fail\n"); + return -1; + } + temp_buffs[i] = buf; + memset(temp_buffs[i], 0, TEST_LEN(m)); // initialize the destination buffer to be zero for update function + } + + for (i = 0; i < TEST_SOURCES; i++) { + if (posix_memalign(&buf, 64, TEST_LEN(m))) { + printf("alloc error: Fail"); + return -1; + } + update_buffs[i] = buf; + memset(update_buffs[i], 0, TEST_LEN(m)); // initialize the destination buffer to be zero for update function + } + for (i = 0; i < TEST_SOURCES; i++) { + if (posix_memalign(&buf, 64, TEST_LEN(m))) { + printf("alloc error: Fail"); + return -1; + } + perf_update_buffs[i] = buf; + memset(perf_update_buffs[i], 0, TEST_LEN(m)); // initialize the destination buffer to be zero for update function + } + + // Make random data + for (i = 0; i < k; i++) + for (j = 0; j < TEST_LEN(m); j++) { + buffs[i][j] = rand(); + update_buffs[i][j] = buffs[i][j]; + } + + gf_gen_rs_matrix(a, m, k); + + encode_update_test_ref(m, k, g_tbls, buffs, a); + encode_update_test(m, k, g_tbls, update_buffs, a); + for (i = 0; i < m - k; i++) { + if (0 != memcmp(update_buffs[k + i], buffs[k + i], TEST_LEN(m))) { + printf("\nupdate_buffs%d :", i); + dump(update_buffs[k + i], 25); + printf("buffs%d :", i); + dump(buffs[k + i], 25); + return -1; + } + } + +#ifdef DO_REF_PERF + // Start encode test + BENCHMARK(&start, BENCHMARK_TIME, encode_update_test_ref(m, k, g_tbls, buffs, a)); + printf(xstr(REF_FUNCTION) TEST_TYPE_STR ": "); + perf_print(start, (long long)(TEST_LEN(m)) * (m)); +#endif + + // Start encode test + BENCHMARK(&start, BENCHMARK_TIME, + encode_update_test(m, k, g_tbls, perf_update_buffs, a)); + printf(xstr(FUNCTION_UNDER_TEST) TEST_TYPE_STR ": "); + perf_print(start, (long long)(TEST_LEN(m)) * (m)); + + // Start encode test + BENCHMARK(&start, BENCHMARK_TIME, + // Make parity vects + ec_init_tables(k, m - k, &a[k * k], g_tbls); + FUNCTION_UNDER_TEST(TEST_LEN(m), k, m - k, 0, g_tbls, perf_update_buffs[0], + &perf_update_buffs[k])); + printf(xstr(FUNCTION_UNDER_TEST) "_single_src" TEST_TYPE_STR ": "); + perf_print(start, (long long)(TEST_LEN(m)) * (m - k + 1)); + + // Start encode test + BENCHMARK(&start, BENCHMARK_TIME, + // Make parity vects + FUNCTION_UNDER_TEST(TEST_LEN(m), k, m - k, 0, g_tbls, perf_update_buffs[0], + &perf_update_buffs[k])); + printf(xstr(FUNCTION_UNDER_TEST) "_single_src_simple" TEST_TYPE_STR ": "); + perf_print(start, (long long)(TEST_LEN(m)) * (m - k + 1)); + + for (i = k; i < m; i++) { + memset(update_buffs[i], 0, TEST_LEN(m)); // initialize the destination buffer to be zero for update function + } + for (i = 0; i < k; i++) { + FUNCTION_UNDER_TEST(TEST_LEN(m), k, m - k, i, g_tbls, update_buffs[i], + &update_buffs[k]); + } + + decode_test(m, k, update_buffs, recov, a, src_in_err, src_err_list, + nerrs, g_tbls, temp_buffs); + BENCHMARK(&start, BENCHMARK_TIME, check = + decode_test(m, k, update_buffs, recov, a, src_in_err, src_err_list, + nerrs, g_tbls, perf_update_buffs)); + if (check) { + printf("BAD_MATRIX\n"); + return -1; + } + + for (i = 0; i < nerrs; i++) { + if (0 != memcmp(temp_buffs[i], update_buffs[src_err_list[i]], TEST_LEN(m))) { + printf("Fail error recovery (%d, %d, %d) - \n", m, k, nerrs); + return -1; + } + } + + printf(xstr(FUNCTION_UNDER_TEST) "_decode" TEST_TYPE_STR ": "); + perf_print(start, (long long)(TEST_LEN(m)) * (k + nerrs)); + + printf("done all: Pass\n"); + return 0; +} diff --git a/src/isa-l/erasure_code/erasure_code_update_test.c b/src/isa-l/erasure_code/erasure_code_update_test.c new file mode 100644 index 000000000..f30a6a29b --- /dev/null +++ b/src/isa-l/erasure_code/erasure_code_update_test.c @@ -0,0 +1,959 @@ +/********************************************************************** + Copyright(c) 2011-2015 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include // for memset, memcmp +#include "erasure_code.h" +#include "types.h" + +#ifndef ALIGN_SIZE +# define ALIGN_SIZE 16 +#endif + +//By default, test multibinary version +#ifndef FUNCTION_UNDER_TEST +# define FUNCTION_UNDER_TEST ec_encode_data_update +# define REF_FUNCTION ec_encode_data +#endif + +#define TEST_LEN 8192 +#define TEST_SIZE (TEST_LEN/2) + +#ifndef TEST_SOURCES +# define TEST_SOURCES 127 +#endif +#ifndef RANDOMS +# define RANDOMS 200 +#endif + +#define MMAX TEST_SOURCES +#define KMAX TEST_SOURCES + +#define EFENCE_TEST_MAX_SIZE 0x100 + +#ifdef EC_ALIGNED_ADDR +// Define power of 2 range to check ptr, len alignment +# define PTR_ALIGN_CHK_B 0 +# define LEN_ALIGN_CHK_B 0 // 0 for aligned only +#else +// Define power of 2 range to check ptr, len alignment +# define PTR_ALIGN_CHK_B ALIGN_SIZE +# define LEN_ALIGN_CHK_B ALIGN_SIZE // 0 for aligned only +#endif + +#ifndef TEST_SEED +#define TEST_SEED 11 +#endif + +#define str(s) #s +#define xstr(s) str(s) + +typedef unsigned char u8; + +void dump(unsigned char *buf, int len) +{ + int i; + for (i = 0; i < len;) { + printf(" %2x", 0xff & buf[i++]); + if (i % 32 == 0) + printf("\n"); + } + printf("\n"); +} + +void dump_matrix(unsigned char **s, int k, int m) +{ + int i, j; + for (i = 0; i < k; i++) { + for (j = 0; j < m; j++) { + printf(" %2x", s[i][j]); + } + printf("\n"); + } + printf("\n"); +} + +void dump_u8xu8(unsigned char *s, int k, int m) +{ + int i, j; + for (i = 0; i < k; i++) { + for (j = 0; j < m; j++) { + printf(" %2x", 0xff & s[j + (i * m)]); + } + printf("\n"); + } + printf("\n"); +} + +// Generate Random errors +static void gen_err_list(unsigned char *src_err_list, + unsigned char *src_in_err, int *pnerrs, int *pnsrcerrs, int k, int m) +{ + int i, err; + int nerrs = 0, nsrcerrs = 0; + + for (i = 0, nerrs = 0, nsrcerrs = 0; i < m && nerrs < m - k; i++) { + err = 1 & rand(); + src_in_err[i] = err; + if (err) { + src_err_list[nerrs++] = i; + if (i < k) { + nsrcerrs++; + } + } + } + if (nerrs == 0) { // should have at least one error + while ((err = (rand() % KMAX)) >= m) ; + src_err_list[nerrs++] = err; + src_in_err[err] = 1; + if (err < k) + nsrcerrs = 1; + } + *pnerrs = nerrs; + *pnsrcerrs = nsrcerrs; + return; +} + +#define NO_INVERT_MATRIX -2 +// Generate decode matrix from encode matrix +static int gf_gen_decode_matrix(unsigned char *encode_matrix, + unsigned char *decode_matrix, + unsigned char *invert_matrix, + unsigned int *decode_index, + unsigned char *src_err_list, + unsigned char *src_in_err, + int nerrs, int nsrcerrs, int k, int m) +{ + int i, j, p; + int r; + unsigned char *backup, *b, s; + int incr = 0; + + b = malloc(MMAX * KMAX); + backup = malloc(MMAX * KMAX); + + if (b == NULL || backup == NULL) { + printf("Test failure! Error with malloc\n"); + free(b); + free(backup); + return -1; + } + // Construct matrix b by removing error rows + for (i = 0, r = 0; i < k; i++, r++) { + while (src_in_err[r]) + r++; + for (j = 0; j < k; j++) { + b[k * i + j] = encode_matrix[k * r + j]; + backup[k * i + j] = encode_matrix[k * r + j]; + } + decode_index[i] = r; + } + incr = 0; + while (gf_invert_matrix(b, invert_matrix, k) < 0) { + if (nerrs == (m - k)) { + free(b); + free(backup); + printf("BAD MATRIX\n"); + return NO_INVERT_MATRIX; + } + incr++; + memcpy(b, backup, MMAX * KMAX); + for (i = nsrcerrs; i < nerrs - nsrcerrs; i++) { + if (src_err_list[i] == (decode_index[k - 1] + incr)) { + // skip the erased parity line + incr++; + continue; + } + } + if (decode_index[k - 1] + incr >= m) { + free(b); + free(backup); + printf("BAD MATRIX\n"); + return NO_INVERT_MATRIX; + } + decode_index[k - 1] += incr; + for (j = 0; j < k; j++) + b[k * (k - 1) + j] = encode_matrix[k * decode_index[k - 1] + j]; + + }; + + for (i = 0; i < nsrcerrs; i++) { + for (j = 0; j < k; j++) { + decode_matrix[k * i + j] = invert_matrix[k * src_err_list[i] + j]; + } + } + /* src_err_list from encode_matrix * invert of b for parity decoding */ + for (p = nsrcerrs; p < nerrs; p++) { + for (i = 0; i < k; i++) { + s = 0; + for (j = 0; j < k; j++) + s ^= gf_mul(invert_matrix[j * k + i], + encode_matrix[k * src_err_list[p] + j]); + + decode_matrix[k * p + i] = s; + } + } + free(b); + free(backup); + return 0; +} + +int main(int argc, char *argv[]) +{ + int re = 0; + int i, j, p, rtest, m, k; + int nerrs, nsrcerrs; + void *buf; + unsigned int decode_index[MMAX]; + unsigned char *temp_buffs[TEST_SOURCES], *buffs[TEST_SOURCES]; + unsigned char *update_buffs[TEST_SOURCES]; + unsigned char *encode_matrix, *decode_matrix, *invert_matrix, *g_tbls; + unsigned char src_in_err[TEST_SOURCES], src_err_list[TEST_SOURCES]; + unsigned char *recov[TEST_SOURCES]; + + int rows, align, size; + unsigned char *efence_buffs[TEST_SOURCES]; + unsigned char *efence_update_buffs[TEST_SOURCES]; + unsigned int offset; + u8 *ubuffs[TEST_SOURCES]; + u8 *update_ubuffs[TEST_SOURCES]; + u8 *temp_ubuffs[TEST_SOURCES]; + + printf("test " xstr(FUNCTION_UNDER_TEST) ": %dx%d ", TEST_SOURCES, TEST_LEN); + srand(TEST_SEED); + + // Allocate the arrays + for (i = 0; i < TEST_SOURCES; i++) { + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + buffs[i] = buf; + } + + for (i = 0; i < TEST_SOURCES; i++) { + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + temp_buffs[i] = buf; + memset(temp_buffs[i], 0, TEST_LEN); // initialize the destination buffer to be zero for update function + } + + for (i = 0; i < TEST_SOURCES; i++) { + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + update_buffs[i] = buf; + memset(update_buffs[i], 0, TEST_LEN); // initialize the destination buffer to be zero for update function + } + // Test erasure code by encode and recovery + + encode_matrix = malloc(MMAX * KMAX); + decode_matrix = malloc(MMAX * KMAX); + invert_matrix = malloc(MMAX * KMAX); + g_tbls = malloc(KMAX * TEST_SOURCES * 32); + if (encode_matrix == NULL || decode_matrix == NULL + || invert_matrix == NULL || g_tbls == NULL) { + printf("Test failure! Error with malloc\n"); + return -1; + } + // Pick a first test + m = 14; + k = 10; + if (m > MMAX || k > KMAX) + return -1; + + // Make random data + for (i = 0; i < k; i++) { + for (j = 0; j < TEST_LEN; j++) { + buffs[i][j] = rand(); + update_buffs[i][j] = buffs[i][j]; + } + } + + // Generate encode matrix encode_matrix + // The matrix generated by gf_gen_rs_matrix + // is not always invertable. + gf_gen_rs_matrix(encode_matrix, m, k); + + // Generate g_tbls from encode matrix encode_matrix + ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls); + + // Perform matrix dot_prod for EC encoding + // using g_tbls from encode matrix encode_matrix + REF_FUNCTION(TEST_LEN, k, m - k, g_tbls, buffs, &buffs[k]); + for (i = 0; i < k; i++) { + FUNCTION_UNDER_TEST(TEST_LEN, k, m - k, i, g_tbls, update_buffs[i], + &update_buffs[k]); + } + for (i = 0; i < m - k; i++) { + if (0 != memcmp(update_buffs[k + i], buffs[k + i], TEST_LEN)) { + printf("\nupdate_buffs%d :", i); + dump(update_buffs[k + i], 25); + printf("buffs%d :", i); + dump(buffs[k + i], 25); + return -1; + } + } + + // Choose random buffers to be in erasure + memset(src_in_err, 0, TEST_SOURCES); + gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m); + + // Generate decode matrix + re = gf_gen_decode_matrix(encode_matrix, decode_matrix, + invert_matrix, decode_index, src_err_list, src_in_err, + nerrs, nsrcerrs, k, m); + if (re != 0) { + printf("Fail to gf_gen_decode_matrix\n"); + return -1; + } + // Pack recovery array as list of valid sources + // Its order must be the same as the order + // to generate matrix b in gf_gen_decode_matrix + for (i = 0; i < k; i++) { + recov[i] = update_buffs[decode_index[i]]; + } + + // Recover data + ec_init_tables(k, nerrs, decode_matrix, g_tbls); + REF_FUNCTION(TEST_LEN, k, nerrs, g_tbls, recov, &temp_buffs[k]); + for (i = 0; i < nerrs; i++) { + + if (0 != memcmp(temp_buffs[k + i], update_buffs[src_err_list[i]], TEST_LEN)) { + printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs); + printf(" - erase list = "); + for (j = 0; j < nerrs; j++) + printf(" %d", src_err_list[j]); + printf(" - Index = "); + for (p = 0; p < k; p++) + printf(" %d", decode_index[p]); + printf("\nencode_matrix:\n"); + dump_u8xu8((u8 *) encode_matrix, m, k); + printf("inv b:\n"); + dump_u8xu8((u8 *) invert_matrix, k, k); + printf("\ndecode_matrix:\n"); + dump_u8xu8((u8 *) decode_matrix, m, k); + printf("recov %d:", src_err_list[i]); + dump(temp_buffs[k + i], 25); + printf("orig :"); + dump(update_buffs[src_err_list[i]], 25); + return -1; + } + } + putchar('.'); + + // Pick a first test + m = 7; + k = 5; + if (m > MMAX || k > KMAX) + return -1; + + // Zero the destination buffer for update function + for (i = k; i < TEST_SOURCES; i++) { + memset(buffs[i], 0, TEST_LEN); + memset(update_buffs[i], 0, TEST_LEN); + } + // Make random data + for (i = 0; i < k; i++) { + for (j = 0; j < TEST_LEN; j++) { + buffs[i][j] = rand(); + update_buffs[i][j] = buffs[i][j]; + } + } + + // The matrix generated by gf_gen_cauchy1_matrix + // is always invertable. + gf_gen_cauchy1_matrix(encode_matrix, m, k); + + // Generate g_tbls from encode matrix encode_matrix + ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls); + + // Perform matrix dot_prod for EC encoding + // using g_tbls from encode matrix encode_matrix + REF_FUNCTION(TEST_LEN, k, m - k, g_tbls, buffs, &buffs[k]); + for (i = 0; i < k; i++) { + FUNCTION_UNDER_TEST(TEST_LEN, k, m - k, i, g_tbls, update_buffs[i], + &update_buffs[k]); + } + for (i = 0; i < m - k; i++) { + if (0 != memcmp(update_buffs[k + i], buffs[k + i], TEST_LEN)) { + printf("\nupdate_buffs%d :", i); + dump(update_buffs[k + i], 25); + printf("buffs%d :", i); + dump(buffs[k + i], 25); + return -1; + } + } + + // Choose random buffers to be in erasure + memset(src_in_err, 0, TEST_SOURCES); + gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m); + + // Generate decode matrix + re = gf_gen_decode_matrix(encode_matrix, decode_matrix, + invert_matrix, decode_index, src_err_list, src_in_err, + nerrs, nsrcerrs, k, m); + if (re != 0) { + printf("Fail to gf_gen_decode_matrix\n"); + return -1; + } + // Pack recovery array as list of valid sources + // Its order must be the same as the order + // to generate matrix b in gf_gen_decode_matrix + for (i = 0; i < k; i++) { + recov[i] = update_buffs[decode_index[i]]; + } + + // Recover data + for (i = 0; i < TEST_SOURCES; i++) { + memset(temp_buffs[i], 0, TEST_LEN); + } + ec_init_tables(k, nerrs, decode_matrix, g_tbls); + for (i = 0; i < k; i++) { + FUNCTION_UNDER_TEST(TEST_LEN, k, nerrs, i, g_tbls, recov[i], &temp_buffs[k]); + } + for (i = 0; i < nerrs; i++) { + + if (0 != memcmp(temp_buffs[k + i], update_buffs[src_err_list[i]], TEST_LEN)) { + printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs); + printf(" - erase list = "); + for (j = 0; j < nerrs; j++) + printf(" %d", src_err_list[j]); + printf(" - Index = "); + for (p = 0; p < k; p++) + printf(" %d", decode_index[p]); + printf("\nencode_matrix:\n"); + dump_u8xu8((u8 *) encode_matrix, m, k); + printf("inv b:\n"); + dump_u8xu8((u8 *) invert_matrix, k, k); + printf("\ndecode_matrix:\n"); + dump_u8xu8((u8 *) decode_matrix, m, k); + printf("recov %d:", src_err_list[i]); + dump(temp_buffs[k + i], 25); + printf("orig :"); + dump(update_buffs[src_err_list[i]], 25); + return -1; + } + } + putchar('.'); + + // Do more random tests + for (rtest = 0; rtest < RANDOMS; rtest++) { + while ((m = (rand() % MMAX)) < 2) ; + while ((k = (rand() % KMAX)) >= m || k < 1) ; + + if (m > MMAX || k > KMAX) + continue; + + // Zero the destination buffer for update function + for (i = k; i < TEST_SOURCES; i++) { + memset(buffs[i], 0, TEST_LEN); + memset(update_buffs[i], 0, TEST_LEN); + } + // Make random data + for (i = 0; i < k; i++) { + for (j = 0; j < TEST_LEN; j++) { + buffs[i][j] = rand(); + update_buffs[i][j] = buffs[i][j]; + } + } + + // The matrix generated by gf_gen_cauchy1_matrix + // is always invertable. + gf_gen_cauchy1_matrix(encode_matrix, m, k); + + // Make parity vects + // Generate g_tbls from encode matrix a + ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls); + // Perform matrix dot_prod for EC encoding + // using g_tbls from encode matrix a + REF_FUNCTION(TEST_LEN, k, m - k, g_tbls, buffs, &buffs[k]); + for (i = 0; i < k; i++) { + FUNCTION_UNDER_TEST(TEST_LEN, k, m - k, i, g_tbls, update_buffs[i], + &update_buffs[k]); + } + for (i = 0; i < m - k; i++) { + if (0 != memcmp(update_buffs[k + i], buffs[k + i], TEST_LEN)) { + printf("\nupdate_buffs%d :", i); + dump(update_buffs[k + i], 25); + printf("buffs%d :", i); + dump(buffs[k + i], 25); + return -1; + } + } + + // Random errors + memset(src_in_err, 0, TEST_SOURCES); + gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m); + + // Generate decode matrix + re = gf_gen_decode_matrix(encode_matrix, decode_matrix, + invert_matrix, decode_index, src_err_list, + src_in_err, nerrs, nsrcerrs, k, m); + if (re != 0) { + printf("Fail to gf_gen_decode_matrix\n"); + return -1; + } + // Pack recovery array as list of valid sources + // Its order must be the same as the order + // to generate matrix b in gf_gen_decode_matrix + for (i = 0; i < k; i++) { + recov[i] = update_buffs[decode_index[i]]; + } + + // Recover data + for (i = 0; i < TEST_SOURCES; i++) { + memset(temp_buffs[i], 0, TEST_LEN); + } + ec_init_tables(k, nerrs, decode_matrix, g_tbls); + for (i = 0; i < k; i++) { + FUNCTION_UNDER_TEST(TEST_LEN, k, nerrs, i, g_tbls, recov[i], + &temp_buffs[k]); + } + + for (i = 0; i < nerrs; i++) { + + if (0 != + memcmp(temp_buffs[k + i], update_buffs[src_err_list[i]], + TEST_LEN)) { + printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs); + printf(" - erase list = "); + for (j = 0; j < nerrs; j++) + printf(" %d", src_err_list[j]); + printf(" - Index = "); + for (p = 0; p < k; p++) + printf(" %d", decode_index[p]); + printf("\nencode_matrix:\n"); + dump_u8xu8((u8 *) encode_matrix, m, k); + printf("inv b:\n"); + dump_u8xu8((u8 *) invert_matrix, k, k); + printf("\ndecode_matrix:\n"); + dump_u8xu8((u8 *) decode_matrix, m, k); + printf("orig data:\n"); + dump_matrix(update_buffs, m, 25); + printf("orig :"); + dump(update_buffs[src_err_list[i]], 25); + printf("recov %d:", src_err_list[i]); + dump(temp_buffs[k + i], 25); + return -1; + } + } + putchar('.'); + } + + // Run tests at end of buffer for Electric Fence + k = 16; + align = (LEN_ALIGN_CHK_B != 0) ? 1 : ALIGN_SIZE; + if (k > KMAX) + return -1; + + for (rows = 1; rows <= 16; rows++) { + m = k + rows; + if (m > MMAX) + return -1; + + for (i = k; i < TEST_SOURCES; i++) { + memset(buffs[i], 0, TEST_LEN); + memset(update_buffs[i], 0, TEST_LEN); + } + // Make random data + for (i = 0; i < k; i++) { + for (j = 0; j < TEST_LEN; j++) { + buffs[i][j] = rand(); + update_buffs[i][j] = buffs[i][j]; + } + } + + for (size = 0; size <= EFENCE_TEST_MAX_SIZE; size += align) { + for (i = 0; i < m; i++) { // Line up TEST_SIZE from end + efence_buffs[i] = buffs[i] + TEST_LEN - size; + efence_update_buffs[i] = update_buffs[i] + TEST_LEN - size; + } + // Zero the destination buffer for update function + for (i = k; i < m; i++) { + memset(efence_buffs[i], 0, size); + memset(efence_update_buffs[i], 0, size); + } + + // The matrix generated by gf_gen_cauchy1_matrix + // is always invertable. + gf_gen_cauchy1_matrix(encode_matrix, m, k); + + // Make parity vects + // Generate g_tbls from encode matrix a + ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls); + // Perform matrix dot_prod for EC encoding + // using g_tbls from encode matrix a + REF_FUNCTION(size, k, m - k, g_tbls, efence_buffs, &efence_buffs[k]); + for (i = 0; i < k; i++) { + FUNCTION_UNDER_TEST(size, k, m - k, i, g_tbls, + efence_update_buffs[i], + &efence_update_buffs[k]); + } + for (i = 0; i < m - k; i++) { + if (0 != + memcmp(efence_update_buffs[k + i], efence_buffs[k + i], + size)) { + printf("\nefence_update_buffs%d :", i); + dump(efence_update_buffs[k + i], 25); + printf("efence_buffs%d :", i); + dump(efence_buffs[k + i], 25); + return -1; + } + } + + // Random errors + memset(src_in_err, 0, TEST_SOURCES); + gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m); + + // Generate decode matrix + re = gf_gen_decode_matrix(encode_matrix, decode_matrix, + invert_matrix, decode_index, src_err_list, + src_in_err, nerrs, nsrcerrs, k, m); + if (re != 0) { + printf("Fail to gf_gen_decode_matrix\n"); + return -1; + } + // Pack recovery array as list of valid sources + // Its order must be the same as the order + // to generate matrix b in gf_gen_decode_matrix + for (i = 0; i < k; i++) { + recov[i] = efence_update_buffs[decode_index[i]]; + } + + // Recover data + for (i = 0; i < TEST_SOURCES; i++) { + memset(temp_buffs[i], 0, TEST_LEN); + } + ec_init_tables(k, nerrs, decode_matrix, g_tbls); + for (i = 0; i < k; i++) { + FUNCTION_UNDER_TEST(size, k, nerrs, i, g_tbls, recov[i], + &temp_buffs[k]); + } + + for (i = 0; i < nerrs; i++) { + + if (0 != + memcmp(temp_buffs[k + i], + efence_update_buffs[src_err_list[i]], size)) { + printf("Efence: Fail error recovery (%d, %d, %d)\n", m, + k, nerrs); + + printf("size = %d\n", size); + + printf("Test erase list = "); + for (j = 0; j < nerrs; j++) + printf(" %d", src_err_list[j]); + printf(" - Index = "); + for (p = 0; p < k; p++) + printf(" %d", decode_index[p]); + printf("\nencode_matrix:\n"); + dump_u8xu8((u8 *) encode_matrix, m, k); + printf("inv b:\n"); + dump_u8xu8((u8 *) invert_matrix, k, k); + printf("\ndecode_matrix:\n"); + dump_u8xu8((u8 *) decode_matrix, m, k); + + printf("recov %d:", src_err_list[i]); + dump(temp_buffs[k + i], align); + printf("orig :"); + dump(efence_update_buffs[src_err_list[i]], align); + return -1; + } + } + } + putchar('.'); + + } + + // Test rand ptr alignment if available + + for (rtest = 0; rtest < RANDOMS; rtest++) { + while ((m = (rand() % MMAX)) < 2) ; + while ((k = (rand() % KMAX)) >= m || k < 1) ; + + if (m > MMAX || k > KMAX) + continue; + + size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~15; + + offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B; + // Add random offsets + for (i = 0; i < m; i++) { + memset(buffs[i], 0, TEST_LEN); // zero pad to check write-over + memset(update_buffs[i], 0, TEST_LEN); // zero pad to check write-over + memset(temp_buffs[i], 0, TEST_LEN); // zero pad to check write-over + ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset)); + update_ubuffs[i] = + update_buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset)); + temp_ubuffs[i] = temp_buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset)); + } + + // Zero the destination buffer for update function + for (i = k; i < m; i++) { + memset(ubuffs[i], 0, size); + memset(update_ubuffs[i], 0, size); + } + // Make random data + for (i = 0; i < k; i++) { + for (j = 0; j < size; j++) { + ubuffs[i][j] = rand(); + update_ubuffs[i][j] = ubuffs[i][j]; + } + } + + // The matrix generated by gf_gen_cauchy1_matrix + // is always invertable. + gf_gen_cauchy1_matrix(encode_matrix, m, k); + + // Make parity vects + // Generate g_tbls from encode matrix a + ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls); + // Perform matrix dot_prod for EC encoding + // using g_tbls from encode matrix a + REF_FUNCTION(size, k, m - k, g_tbls, ubuffs, &ubuffs[k]); + for (i = 0; i < k; i++) { + FUNCTION_UNDER_TEST(size, k, m - k, i, g_tbls, update_ubuffs[i], + &update_ubuffs[k]); + } + for (i = 0; i < m - k; i++) { + if (0 != memcmp(update_ubuffs[k + i], ubuffs[k + i], size)) { + printf("\nupdate_ubuffs%d :", i); + dump(update_ubuffs[k + i], 25); + printf("ubuffs%d :", i); + dump(ubuffs[k + i], 25); + return -1; + } + } + + // Random errors + memset(src_in_err, 0, TEST_SOURCES); + gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m); + + // Generate decode matrix + re = gf_gen_decode_matrix(encode_matrix, decode_matrix, + invert_matrix, decode_index, src_err_list, + src_in_err, nerrs, nsrcerrs, k, m); + if (re != 0) { + printf("Fail to gf_gen_decode_matrix\n"); + return -1; + } + // Pack recovery array as list of valid sources + // Its order must be the same as the order + // to generate matrix b in gf_gen_decode_matrix + for (i = 0; i < k; i++) { + recov[i] = update_ubuffs[decode_index[i]]; + } + + // Recover data + for (i = 0; i < m; i++) { + memset(temp_ubuffs[i], 0, size); + } + ec_init_tables(k, nerrs, decode_matrix, g_tbls); + for (i = 0; i < k; i++) { + FUNCTION_UNDER_TEST(size, k, nerrs, i, g_tbls, recov[i], + &temp_ubuffs[k]); + } + + for (i = 0; i < nerrs; i++) { + + if (0 != + memcmp(temp_ubuffs[k + i], update_ubuffs[src_err_list[i]], size)) { + printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs); + printf(" - erase list = "); + for (j = 0; j < nerrs; j++) + printf(" %d", src_err_list[j]); + printf(" - Index = "); + for (p = 0; p < k; p++) + printf(" %d", decode_index[p]); + printf("\nencode_matrix:\n"); + dump_u8xu8((unsigned char *)encode_matrix, m, k); + printf("inv b:\n"); + dump_u8xu8((unsigned char *)invert_matrix, k, k); + printf("\ndecode_matrix:\n"); + dump_u8xu8((unsigned char *)decode_matrix, m, k); + printf("orig data:\n"); + dump_matrix(update_ubuffs, m, 25); + printf("orig :"); + dump(update_ubuffs[src_err_list[i]], 25); + printf("recov %d:", src_err_list[i]); + dump(temp_ubuffs[k + i], 25); + return -1; + } + } + + // Confirm that padding around dests is unchanged + memset(temp_buffs[0], 0, PTR_ALIGN_CHK_B); // Make reference zero buff + + for (i = 0; i < m; i++) { + + offset = update_ubuffs[i] - update_buffs[i]; + + if (memcmp(update_buffs[i], temp_buffs[0], offset)) { + printf("Fail rand ualign encode pad start\n"); + return -1; + } + if (memcmp + (update_buffs[i] + offset + size, temp_buffs[0], + PTR_ALIGN_CHK_B - offset)) { + printf("Fail rand ualign encode pad end\n"); + return -1; + } + } + + for (i = 0; i < nerrs; i++) { + + offset = temp_ubuffs[k + i] - temp_buffs[k + i]; + if (memcmp(temp_buffs[k + i], temp_buffs[0], offset)) { + printf("Fail rand ualign decode pad start\n"); + return -1; + } + if (memcmp + (temp_buffs[k + i] + offset + size, temp_buffs[0], + PTR_ALIGN_CHK_B - offset)) { + printf("Fail rand ualign decode pad end\n"); + return -1; + } + } + + putchar('.'); + } + + // Test size alignment + + align = (LEN_ALIGN_CHK_B != 0) ? 13 : ALIGN_SIZE; + + for (size = TEST_LEN; size >= 0; size -= align) { + while ((m = (rand() % MMAX)) < 2) ; + while ((k = (rand() % KMAX)) >= m || k < 1) ; + + if (m > MMAX || k > KMAX) + continue; + + // Zero the destination buffer for update function + for (i = k; i < TEST_SOURCES; i++) { + memset(buffs[i], 0, size); + memset(update_buffs[i], 0, size); + } + // Make random data + for (i = 0; i < k; i++) { + for (j = 0; j < size; j++) { + buffs[i][j] = rand(); + update_buffs[i][j] = buffs[i][j]; + } + } + + // The matrix generated by gf_gen_cauchy1_matrix + // is always invertable. + gf_gen_cauchy1_matrix(encode_matrix, m, k); + + // Make parity vects + // Generate g_tbls from encode matrix a + ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls); + // Perform matrix dot_prod for EC encoding + // using g_tbls from encode matrix a + REF_FUNCTION(size, k, m - k, g_tbls, buffs, &buffs[k]); + for (i = 0; i < k; i++) { + FUNCTION_UNDER_TEST(size, k, m - k, i, g_tbls, update_buffs[i], + &update_buffs[k]); + } + for (i = 0; i < m - k; i++) { + if (0 != memcmp(update_buffs[k + i], buffs[k + i], size)) { + printf("\nupdate_buffs%d (size=%d) :", i, size); + dump(update_buffs[k + i], 25); + printf("buffs%d (size=%d) :", i, size); + dump(buffs[k + i], 25); + return -1; + } + } + + // Random errors + memset(src_in_err, 0, TEST_SOURCES); + gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m); + // Generate decode matrix + re = gf_gen_decode_matrix(encode_matrix, decode_matrix, + invert_matrix, decode_index, src_err_list, + src_in_err, nerrs, nsrcerrs, k, m); + if (re != 0) { + printf("Fail to gf_gen_decode_matrix\n"); + return -1; + } + // Pack recovery array as list of valid sources + // Its order must be the same as the order + // to generate matrix b in gf_gen_decode_matrix + for (i = 0; i < k; i++) { + recov[i] = update_buffs[decode_index[i]]; + } + + // Recover data + for (i = 0; i < TEST_SOURCES; i++) { + memset(temp_buffs[i], 0, TEST_LEN); + } + ec_init_tables(k, nerrs, decode_matrix, g_tbls); + for (i = 0; i < k; i++) { + FUNCTION_UNDER_TEST(size, k, nerrs, i, g_tbls, recov[i], + &temp_buffs[k]); + } + + for (i = 0; i < nerrs; i++) { + + if (0 != + memcmp(temp_buffs[k + i], update_buffs[src_err_list[i]], size)) { + printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs); + printf(" - erase list = "); + for (j = 0; j < nerrs; j++) + printf(" %d", src_err_list[j]); + printf(" - Index = "); + for (p = 0; p < k; p++) + printf(" %d", decode_index[p]); + printf("\nencode_matrix:\n"); + dump_u8xu8((unsigned char *)encode_matrix, m, k); + printf("inv b:\n"); + dump_u8xu8((unsigned char *)invert_matrix, k, k); + printf("\ndecode_matrix:\n"); + dump_u8xu8((unsigned char *)decode_matrix, m, k); + printf("orig data:\n"); + dump_matrix(update_buffs, m, 25); + printf("orig :"); + dump(update_buffs[src_err_list[i]], 25); + printf("recov %d:", src_err_list[i]); + dump(temp_buffs[k + i], 25); + return -1; + } + } + putchar('.'); + } + + printf("done EC tests: Pass\n"); + return 0; +} diff --git a/src/isa-l/erasure_code/gen_rs_matrix_limits.c b/src/isa-l/erasure_code/gen_rs_matrix_limits.c new file mode 100644 index 000000000..85061484b --- /dev/null +++ b/src/isa-l/erasure_code/gen_rs_matrix_limits.c @@ -0,0 +1,115 @@ +#include +#include +#include +#include "erasure_code.h" + +#define MAX_CHECK 63 /* Size is limited by using uint64_t to represent subsets */ +#define M_MAX 0x20 +#define K_MAX 0x10 +#define ROWS M_MAX +#define COLS K_MAX + +static inline int min(int a, int b) +{ + if (a <= b) + return a; + else + return b; +} + +void gen_sub_matrix(unsigned char *out_matrix, int dim, unsigned char *in_matrix, int rows, + int cols, uint64_t row_indicator, uint64_t col_indicator) +{ + int i, j, r, s; + + for (i = 0, r = 0; i < rows; i++) { + if (!(row_indicator & ((uint64_t) 1 << i))) + continue; + + for (j = 0, s = 0; j < cols; j++) { + if (!(col_indicator & ((uint64_t) 1 << j))) + continue; + out_matrix[dim * r + s] = in_matrix[cols * i + j]; + s++; + } + r++; + } +} + +/* Gosper's Hack */ +uint64_t next_subset(uint64_t * subset, uint64_t element_count, uint64_t subsize) +{ + uint64_t tmp1 = *subset & -*subset; + uint64_t tmp2 = *subset + tmp1; + *subset = (((*subset ^ tmp2) >> 2) / tmp1) | tmp2; + if (*subset & (((uint64_t) 1 << element_count))) { + /* Overflow on last subset */ + *subset = ((uint64_t) 1 << subsize) - 1; + return 1; + } + + return 0; +} + +int are_submatrices_singular(unsigned char *vmatrix, int rows, int cols) +{ + unsigned char matrix[COLS * COLS]; + unsigned char invert_matrix[COLS * COLS]; + uint64_t row_indicator, col_indicator, subset_init, subsize; + + /* Check all square subsize x subsize submatrices of the rows x cols + * vmatrix for singularity*/ + for (subsize = 1; subsize <= min(rows, cols); subsize++) { + subset_init = (1 << subsize) - 1; + col_indicator = subset_init; + do { + row_indicator = subset_init; + do { + gen_sub_matrix(matrix, subsize, vmatrix, rows, + cols, row_indicator, col_indicator); + if (gf_invert_matrix(matrix, invert_matrix, subsize)) + return 1; + + } while (next_subset(&row_indicator, rows, subsize) == 0); + } while (next_subset(&col_indicator, cols, subsize) == 0); + } + + return 0; +} + +int main(int argc, char **argv) +{ + unsigned char vmatrix[(ROWS + COLS) * COLS]; + int rows, cols; + + if (K_MAX > MAX_CHECK) { + printf("K_MAX too large for this test\n"); + return 0; + } + if (M_MAX > MAX_CHECK) { + printf("M_MAX too large for this test\n"); + return 0; + } + if (M_MAX < K_MAX) { + printf("M_MAX must be smaller than K_MAX"); + return 0; + } + + printf("Checking gen_rs_matrix for k <= %d and m <= %d.\n", K_MAX, M_MAX); + printf("gen_rs_matrix creates erasure codes for:\n"); + + for (cols = 1; cols <= K_MAX; cols++) { + for (rows = 1; rows <= M_MAX - cols; rows++) { + gf_gen_rs_matrix(vmatrix, rows + cols, cols); + + /* Verify the Vandermonde portion of vmatrix contains no + * singular submatrix */ + if (are_submatrices_singular(&vmatrix[cols * cols], rows, cols)) + break; + + } + printf(" k = %2d, m <= %2d \n", cols, rows + cols - 1); + + } + return 0; +} diff --git a/src/isa-l/erasure_code/gf_2vect_dot_prod_avx.asm b/src/isa-l/erasure_code/gf_2vect_dot_prod_avx.asm new file mode 100644 index 000000000..6b68d93f5 --- /dev/null +++ b/src/isa-l/erasure_code/gf_2vect_dot_prod_avx.asm @@ -0,0 +1,337 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_2vect_dot_prod_avx(len, vec, *g_tbls, **buffs, **dests); +;;; + +%include "reg_sizes.asm" + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + + %define tmp r11 + %define tmp2 r10 + %define tmp3 r9 + %define tmp4 r12 ; must be saved and restored + %define return rax + %macro SLDR 2 + %endmacro + %define SSTR SLDR + %define PS 8 + %define LOG_PS 3 + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + %endmacro + %macro FUNC_RESTORE 0 + pop r12 + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + + %define arg4 r12 ; must be saved, loaded and restored + %define tmp r11 + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r14 ; must be saved and restored + %define return rax + %macro SLDR 2 + %endmacro + %define SSTR SLDR + %define PS 8 + %define LOG_PS 3 + %define stack_size 3*16 + 3*8 ; must be an odd multiple of 8 + %define arg(x) [rsp + stack_size + PS + PS*x] + + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size + save_xmm128 xmm6, 0*16 + save_xmm128 xmm7, 1*16 + save_xmm128 xmm8, 2*16 + save_reg r12, 3*16 + 0*8 + save_reg r13, 3*16 + 1*8 + save_reg r14, 3*16 + 2*8 + end_prolog + mov arg4, arg(4) + %endmacro + + %macro FUNC_RESTORE 0 + vmovdqa xmm6, [rsp + 0*16] + vmovdqa xmm7, [rsp + 1*16] + vmovdqa xmm8, [rsp + 2*16] + mov r12, [rsp + 3*16 + 0*8] + mov r13, [rsp + 3*16 + 1*8] + mov r14, [rsp + 3*16 + 2*8] + add rsp, stack_size + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, elf32 + +;;;================== High Address; +;;; arg4 +;;; arg3 +;;; arg2 +;;; arg1 +;;; arg0 +;;; return +;;;<================= esp of caller +;;; ebp +;;;<================= ebp = esp +;;; var0 +;;; esi +;;; edi +;;; ebx +;;;<================= esp of callee +;;; +;;;================== Low Address; + + %define PS 4 + %define LOG_PS 2 + %define func(x) x: + %define arg(x) [ebp + PS*2 + PS*x] + %define var(x) [ebp - PS - PS*x] + + %define trans ecx + %define trans2 esi + %define arg0 trans ;trans and trans2 are for the variables in stack + %define arg0_m arg(0) + %define arg1 ebx + %define arg2 arg2_m + %define arg2_m arg(2) + %define arg3 trans + %define arg3_m arg(3) + %define arg4 trans + %define arg4_m arg(4) + %define tmp edx + %define tmp2 edi + %define tmp3 trans2 + %define tmp4 trans2 + %define tmp4_m var(0) + %define return eax + %macro SLDR 2 ;; stack load/restore + mov %1, %2 + %endmacro + %define SSTR SLDR + + %macro FUNC_SAVE 0 + push ebp + mov ebp, esp + sub esp, PS*1 ;1 local variable + push esi + push edi + push ebx + mov arg1, arg(1) + %endmacro + + %macro FUNC_RESTORE 0 + pop ebx + pop edi + pop esi + add esp, PS*1 ;1 local variable + pop ebp + %endmacro + +%endif ; output formats + +%define len arg0 +%define vec arg1 +%define mul_array arg2 +%define src arg3 +%define dest1 arg4 + +%define vec_i tmp2 +%define ptr tmp3 +%define dest2 tmp4 +%define pos return + + %ifidn PS,4 ;32-bit code + %define len_m arg0_m + %define src_m arg3_m + %define dest1_m arg4_m + %define dest2_m tmp4_m + %endif + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu + %define XSTR vmovdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + +%ifidn PS,8 ; 64-bit code + default rel + [bits 64] +%endif + +section .text + +%ifidn PS,8 ;64-bit code + %define xmask0f xmm8 + %define xgft1_lo xmm7 + %define xgft1_hi xmm6 + %define xgft2_lo xmm5 + %define xgft2_hi xmm4 + + %define x0 xmm0 + %define xtmpa xmm1 + %define xp1 xmm2 + %define xp2 xmm3 +%else ;32-bit code + %define xmask0f xmm4 + %define xgft1_lo xmm7 + %define xgft1_hi xmm6 + %define xgft2_lo xgft1_lo + %define xgft2_hi xgft1_hi + + %define x0 xmm0 + %define xtmpa xmm1 + %define xp1 xmm2 + %define xp2 xmm3 +%endif + +align 16 +global gf_2vect_dot_prod_avx:ISAL_SYM_TYPE_FUNCTION + +func(gf_2vect_dot_prod_avx) + FUNC_SAVE + SLDR len, len_m + sub len, 16 + SSTR len_m, len + jl .return_fail + xor pos, pos + vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte + sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS + SLDR dest1, dest1_m + mov dest2, [dest1+PS] + SSTR dest2_m, dest2 + mov dest1, [dest1] + SSTR dest1_m, dest1 + +.loop16: + vpxor xp1, xp1 + vpxor xp2, xp2 + mov tmp, mul_array + xor vec_i, vec_i + +.next_vect: + SLDR src, src_m + mov ptr, [src+vec_i] + + vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f} + vmovdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0} + %ifidn PS,8 ; 64-bit code + vmovdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f} + vmovdqu xgft2_hi, [tmp+vec*(32/PS)+16] ; " Bx{00}, Bx{10}, ..., Bx{f0} + add tmp, 32 + add vec_i, PS + %endif + XLDR x0, [ptr+pos] ;Get next source vector + + vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft1_hi, xgft1_lo ;GF add high and low partials + vpxor xp1, xgft1_hi ;xp1 += partial + + %ifidn PS,4 ; 32-bit code + vmovdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f} + vmovdqu xgft2_hi, [tmp+vec*(32/PS)+16] ; " Bx{00}, Bx{10}, ..., Bx{f0} + add tmp, 32 + add vec_i, PS + %endif + vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft2_hi, xgft2_lo ;GF add high and low partials + vpxor xp2, xgft2_hi ;xp2 += partial + + cmp vec_i, vec + jl .next_vect + + SLDR dest1, dest1_m + SLDR dest2, dest2_m + XSTR [dest1+pos], xp1 + XSTR [dest2+pos], xp2 + + SLDR len, len_m + add pos, 16 ;Loop on 16 bytes at a time + cmp pos, len + jle .loop16 + + lea tmp, [len + 16] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, len ;Overlapped offset length-16 + jmp .loop16 ;Do one more overlap pass + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame + +section .data + +align 16 +mask0f: dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f + +;;; func core, ver, snum +slversion gf_2vect_dot_prod_avx, 02, 05, 0191 diff --git a/src/isa-l/erasure_code/gf_2vect_dot_prod_avx2.asm b/src/isa-l/erasure_code/gf_2vect_dot_prod_avx2.asm new file mode 100644 index 000000000..db37b0e2e --- /dev/null +++ b/src/isa-l/erasure_code/gf_2vect_dot_prod_avx2.asm @@ -0,0 +1,356 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_2vect_dot_prod_avx2(len, vec, *g_tbls, **buffs, **dests); +;;; + +%include "reg_sizes.asm" + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + + %define tmp r11 + %define tmp.w r11d + %define tmp.b r11b + %define tmp2 r10 + %define tmp3 r9 + %define tmp4 r12 ; must be saved and restored + %define return rax + %macro SLDR 2 + %endmacro + %define SSTR SLDR + %define PS 8 + %define LOG_PS 3 + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + %endmacro + %macro FUNC_RESTORE 0 + pop r12 + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + + %define arg4 r12 ; must be saved, loaded and restored + %define tmp r11 + %define tmp.w r11d + %define tmp.b r11b + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r14 ; must be saved and restored + %define return rax + %macro SLDR 2 + %endmacro + %define SSTR SLDR + %define PS 8 + %define LOG_PS 3 + %define stack_size 3*16 + 3*8 ; must be an odd multiple of 8 + %define arg(x) [rsp + stack_size + PS + PS*x] + + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size + vmovdqa [rsp + 0*16], xmm6 + vmovdqa [rsp + 1*16], xmm7 + vmovdqa [rsp + 2*16], xmm8 + save_reg r12, 3*16 + 0*8 + save_reg r13, 3*16 + 1*8 + save_reg r14, 3*16 + 2*8 + end_prolog + mov arg4, arg(4) + %endmacro + + %macro FUNC_RESTORE 0 + vmovdqa xmm6, [rsp + 0*16] + vmovdqa xmm7, [rsp + 1*16] + vmovdqa xmm8, [rsp + 2*16] + mov r12, [rsp + 3*16 + 0*8] + mov r13, [rsp + 3*16 + 1*8] + mov r14, [rsp + 3*16 + 2*8] + add rsp, stack_size + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, elf32 + +;;;================== High Address; +;;; arg4 +;;; arg3 +;;; arg2 +;;; arg1 +;;; arg0 +;;; return +;;;<================= esp of caller +;;; ebp +;;;<================= ebp = esp +;;; var0 +;;; esi +;;; edi +;;; ebx +;;;<================= esp of callee +;;; +;;;================== Low Address; + + %define PS 4 + %define LOG_PS 2 + %define func(x) x: + %define arg(x) [ebp + PS*2 + PS*x] + %define var(x) [ebp - PS - PS*x] + + %define trans ecx + %define trans2 esi + %define arg0 trans ;trans and trans2 are for the variables in stack + %define arg0_m arg(0) + %define arg1 ebx + %define arg2 arg2_m + %define arg2_m arg(2) + %define arg3 trans + %define arg3_m arg(3) + %define arg4 trans + %define arg4_m arg(4) + %define tmp edx + %define tmp.w edx + %define tmp.b dl + %define tmp2 edi + %define tmp3 trans2 + %define tmp4 trans2 + %define tmp4_m var(0) + %define return eax + %macro SLDR 2 ;stack load/restore + mov %1, %2 + %endmacro + %define SSTR SLDR + + %macro FUNC_SAVE 0 + push ebp + mov ebp, esp + sub esp, PS*1 ;1 local variable + push esi + push edi + push ebx + mov arg1, arg(1) + %endmacro + + %macro FUNC_RESTORE 0 + pop ebx + pop edi + pop esi + add esp, PS*1 ;1 local variable + pop ebp + %endmacro + +%endif ; output formats + +%define len arg0 +%define vec arg1 +%define mul_array arg2 +%define src arg3 +%define dest1 arg4 + +%define vec_i tmp2 +%define ptr tmp3 +%define dest2 tmp4 +%define pos return + +%ifidn PS,4 ;32-bit code + %define len_m arg0_m + %define src_m arg3_m + %define dest1_m arg4_m + %define dest2_m tmp4_m +%endif + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu + %define XSTR vmovdqu +%else + +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + +%ifidn PS,8 ;64-bit code + default rel + [bits 64] +%endif + +section .text + +%ifidn PS,8 ;64-bit code + %define xmask0f ymm8 + %define xmask0fx xmm8 + %define xgft1_lo ymm7 + %define xgft1_hi ymm6 + %define xgft2_lo ymm5 + %define xgft2_hi ymm4 + + %define x0 ymm0 + %define xtmpa ymm1 + %define xp1 ymm2 + %define xp2 ymm3 +%else ;32-bit code + %define xmask0f ymm7 + %define xmask0fx xmm7 + %define xgft1_lo ymm5 + %define xgft1_hi ymm4 + %define xgft2_lo xgft1_lo + %define xgft2_hi xgft1_hi + + %define x0 ymm0 + %define xtmpa ymm1 + %define xp1 ymm2 + %define xp2 ymm3 + +%endif + +align 16 +global gf_2vect_dot_prod_avx2:ISAL_SYM_TYPE_FUNCTION + +func(gf_2vect_dot_prod_avx2) + FUNC_SAVE + SLDR len, len_m + sub len, 32 + SSTR len_m, len + jl .return_fail + xor pos, pos + mov tmp.b, 0x0f + vpinsrb xmask0fx, xmask0fx, tmp.w, 0 + vpbroadcastb xmask0f, xmask0fx ;Construct mask 0x0f0f0f... + + sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS + SLDR dest1, dest1_m + mov dest2, [dest1+PS] + SSTR dest2_m, dest2 + mov dest1, [dest1] + SSTR dest1_m, dest1 + +.loop32: + vpxor xp1, xp1 + vpxor xp2, xp2 + mov tmp, mul_array + xor vec_i, vec_i + +.next_vect: + SLDR src, src_m + mov ptr, [src+vec_i] + + vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f} + ; " Ax{00}, Ax{10}, ..., Ax{f0} + vperm2i128 xgft1_hi, xgft1_lo, xgft1_lo, 0x11 ; swapped to hi | hi + vperm2i128 xgft1_lo, xgft1_lo, xgft1_lo, 0x00 ; swapped to lo | lo + %ifidn PS,8 ; 64-bit code + vmovdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f} + ; " Bx{00}, Bx{10}, ..., Bx{f0} + vperm2i128 xgft2_hi, xgft2_lo, xgft2_lo, 0x11 ; swapped to hi | hi + vperm2i128 xgft2_lo, xgft2_lo, xgft2_lo, 0x00 ; swapped to lo | lo + + XLDR x0, [ptr+pos] ;Get next source vector + add tmp, 32 + add vec_i, PS + %else + XLDR x0, [ptr+pos] ;Get next source vector + %endif + + vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft1_hi, xgft1_lo ;GF add high and low partials + vpxor xp1, xgft1_hi ;xp1 += partial + + %ifidn PS,4 ; 32-bit code + vmovdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f} + ; " Bx{00}, Bx{10}, ..., Bx{f0} + vperm2i128 xgft2_hi, xgft2_lo, xgft2_lo, 0x11 ; swapped to hi | hi + vperm2i128 xgft2_lo, xgft2_lo, xgft2_lo, 0x00 ; swapped to lo | lo + add tmp, 32 + add vec_i, PS + %endif + vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft2_hi, xgft2_lo ;GF add high and low partials + vpxor xp2, xgft2_hi ;xp2 += partial + + cmp vec_i, vec + jl .next_vect + + SLDR dest1, dest1_m + SLDR dest2, dest2_m + XSTR [dest1+pos], xp1 + XSTR [dest2+pos], xp2 + + SLDR len, len_m + add pos, 32 ;Loop on 32 bytes at a time + cmp pos, len + jle .loop32 + + lea tmp, [len + 32] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, len ;Overlapped offset length-16 + jmp .loop32 ;Do one more overlap pass + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame + +section .data + +;;; func core, ver, snum +slversion gf_2vect_dot_prod_avx2, 04, 05, 0196 diff --git a/src/isa-l/erasure_code/gf_2vect_dot_prod_avx512.asm b/src/isa-l/erasure_code/gf_2vect_dot_prod_avx512.asm new file mode 100644 index 000000000..470051d69 --- /dev/null +++ b/src/isa-l/erasure_code/gf_2vect_dot_prod_avx512.asm @@ -0,0 +1,245 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_2vect_dot_prod_avx512(len, vec, *g_tbls, **buffs, **dests); +;;; + +%include "reg_sizes.asm" + +%ifdef HAVE_AS_KNOWS_AVX512 + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + + %define tmp r11 + %define tmp2 r10 + %define tmp3 r12 ; must be saved and restored + %define return rax + %define PS 8 + %define LOG_PS 3 + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + %endmacro + %macro FUNC_RESTORE 0 + pop r12 + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + + %define arg4 r12 ; must be saved, loaded and restored + %define arg5 r15 ; must be saved and restored + %define tmp r11 + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define return rax + %define PS 8 + %define LOG_PS 3 + %define stack_size 9*16 + 5*8 ; must be an odd multiple of 8 + %define arg(x) [rsp + stack_size + PS + PS*x] + + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size + vmovdqa [rsp + 0*16], xmm6 + vmovdqa [rsp + 1*16], xmm7 + vmovdqa [rsp + 2*16], xmm8 + vmovdqa [rsp + 3*16], xmm9 + vmovdqa [rsp + 4*16], xmm10 + vmovdqa [rsp + 5*16], xmm11 + vmovdqa [rsp + 6*16], xmm12 + vmovdqa [rsp + 7*16], xmm13 + vmovdqa [rsp + 8*16], xmm14 + save_reg r12, 9*16 + 0*8 + save_reg r13, 9*16 + 1*8 + save_reg r14, 9*16 + 2*8 + save_reg r15, 9*16 + 3*8 + end_prolog + mov arg4, arg(4) + %endmacro + + %macro FUNC_RESTORE 0 + vmovdqa xmm6, [rsp + 0*16] + vmovdqa xmm7, [rsp + 1*16] + vmovdqa xmm8, [rsp + 2*16] + vmovdqa xmm9, [rsp + 3*16] + vmovdqa xmm10, [rsp + 4*16] + vmovdqa xmm11, [rsp + 5*16] + vmovdqa xmm12, [rsp + 6*16] + vmovdqa xmm13, [rsp + 7*16] + vmovdqa xmm14, [rsp + 8*16] + mov r12, [rsp + 9*16 + 0*8] + mov r13, [rsp + 9*16 + 1*8] + mov r14, [rsp + 9*16 + 2*8] + mov r15, [rsp + 9*16 + 3*8] + add rsp, stack_size + %endmacro +%endif + + +%define len arg0 +%define vec arg1 +%define mul_array arg2 +%define src arg3 +%define dest1 arg4 +%define ptr arg5 +%define vec_i tmp2 +%define dest2 tmp3 +%define pos return + + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu8 + %define XSTR vmovdqu8 +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + +%define xmask0f zmm8 +%define xgft1_lo zmm7 +%define xgft1_loy ymm7 +%define xgft1_hi zmm6 +%define xgft2_lo zmm5 +%define xgft2_loy ymm5 +%define xgft2_hi zmm4 + +%define x0 zmm0 +%define xtmpa zmm1 +%define xp1 zmm2 +%define xp2 zmm3 + +default rel +[bits 64] + +section .text + +align 16 +global gf_2vect_dot_prod_avx512:ISAL_SYM_TYPE_FUNCTION +func(gf_2vect_dot_prod_avx512) + FUNC_SAVE + sub len, 64 + jl .return_fail + + xor pos, pos + mov tmp, 0x0f + vpbroadcastb xmask0f, tmp ;Construct mask 0x0f0f0f... + sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS + mov dest2, [dest1+PS] + mov dest1, [dest1] + +.loop64: + vpxorq xp1, xp1, xp1 + vpxorq xp2, xp2, xp2 + mov tmp, mul_array + xor vec_i, vec_i + +.next_vect: + mov ptr, [src+vec_i] + XLDR x0, [ptr+pos] ;Get next source vector + add vec_i, PS + + vpandq xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpandq x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + vmovdqu8 xgft1_loy, [tmp] ;Load array Ax{00}..{0f}, Ax{00}..{f0} + vmovdqu8 xgft2_loy, [tmp+vec*(32/PS)] ;Load array Bx{00}..{0f}, Bx{00}..{f0} + add tmp, 32 + + vshufi64x2 xgft1_hi, xgft1_lo, xgft1_lo, 0x55 + vshufi64x2 xgft1_lo, xgft1_lo, xgft1_lo, 0x00 + vshufi64x2 xgft2_hi, xgft2_lo, xgft2_lo, 0x55 + vshufi64x2 xgft2_lo, xgft2_lo, xgft2_lo, 0x00 + + vpshufb xgft1_hi, xgft1_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft1_lo, xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxorq xgft1_hi, xgft1_hi, xgft1_lo ;GF add high and low partials + vpxorq xp1, xp1, xgft1_hi ;xp1 += partial + + vpshufb xgft2_hi, xgft2_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft2_lo, xgft2_lo, xtmpa ;Lookup mul table of low nibble + vpxorq xgft2_hi, xgft2_hi, xgft2_lo ;GF add high and low partials + vpxorq xp2, xp2, xgft2_hi ;xp2 += partial + + cmp vec_i, vec + jl .next_vect + + XSTR [dest1+pos], xp1 + XSTR [dest2+pos], xp2 + + add pos, 64 ;Loop on 64 bytes at a time + cmp pos, len + jle .loop64 + + lea tmp, [len + 64] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, len ;Overlapped offset length-64 + jmp .loop64 ;Do one more overlap pass + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame + +%else +%ifidn __OUTPUT_FORMAT__, win64 +global no_gf_2vect_dot_prod_avx512 +no_gf_2vect_dot_prod_avx512: +%endif +%endif ; ifdef HAVE_AS_KNOWS_AVX512 diff --git a/src/isa-l/erasure_code/gf_2vect_dot_prod_sse.asm b/src/isa-l/erasure_code/gf_2vect_dot_prod_sse.asm new file mode 100644 index 000000000..05a0c28a4 --- /dev/null +++ b/src/isa-l/erasure_code/gf_2vect_dot_prod_sse.asm @@ -0,0 +1,339 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_2vect_dot_prod_sse(len, vec, *g_tbls, **buffs, **dests); +;;; + +%include "reg_sizes.asm" + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + + %define tmp r11 + %define tmp2 r10 + %define tmp3 r9 + %define tmp4 r12 ; must be saved and restored + %define return rax + %macro SLDR 2 + %endmacro + %define SSTR SLDR + %define PS 8 + %define LOG_PS 3 + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + %endmacro + %macro FUNC_RESTORE 0 + pop r12 + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + + %define arg4 r12 ; must be saved, loaded and restored + %define tmp r11 + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r14 ; must be saved and restored + %define return rax + %macro SLDR 2 + %endmacro + %define SSTR SLDR + %define PS 8 + %define LOG_PS 3 + %define stack_size 3*16 + 3*8 ; must be an odd multiple of 8 + %define arg(x) [rsp + stack_size + PS + PS*x] + + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size + save_xmm128 xmm6, 0*16 + save_xmm128 xmm7, 1*16 + save_xmm128 xmm8, 2*16 + save_reg r12, 3*16 + 0*8 + save_reg r13, 3*16 + 1*8 + save_reg r14, 3*16 + 2*8 + end_prolog + mov arg4, arg(4) + %endmacro + + %macro FUNC_RESTORE 0 + movdqa xmm6, [rsp + 0*16] + movdqa xmm7, [rsp + 1*16] + movdqa xmm8, [rsp + 2*16] + mov r12, [rsp + 3*16 + 0*8] + mov r13, [rsp + 3*16 + 1*8] + mov r14, [rsp + 3*16 + 2*8] + add rsp, stack_size + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, elf32 + +;;;================== High Address; +;;; arg4 +;;; arg3 +;;; arg2 +;;; arg1 +;;; arg0 +;;; return +;;;<================= esp of caller +;;; ebp +;;;<================= ebp = esp +;;; var0 +;;; esi +;;; edi +;;; ebx +;;;<================= esp of callee +;;; +;;;================== Low Address; + + %define PS 4 + %define LOG_PS 2 + %define func(x) x: + %define arg(x) [ebp + PS*2 + PS*x] + %define var(x) [ebp - PS - PS*x] + + %define trans ecx + %define trans2 esi + %define arg0 trans ;trans and trans2 are for the variables in stack + %define arg0_m arg(0) + %define arg1 ebx + %define arg2 arg2_m + %define arg2_m arg(2) + %define arg3 trans + %define arg3_m arg(3) + %define arg4 trans + %define arg4_m arg(4) + %define tmp edx + %define tmp2 edi + %define tmp3 trans2 + %define tmp4 trans2 + %define tmp4_m var(0) + %define return eax + %macro SLDR 2 ;; stack load/restore + mov %1, %2 + %endmacro + %define SSTR SLDR + + %macro FUNC_SAVE 0 + push ebp + mov ebp, esp + sub esp, PS*1 ;1 local variable + push esi + push edi + push ebx + mov arg1, arg(1) + %endmacro + + %macro FUNC_RESTORE 0 + pop ebx + pop edi + pop esi + add esp, PS*1 ;1 local variable + pop ebp + %endmacro + +%endif ; output formats + +%define len arg0 +%define vec arg1 +%define mul_array arg2 +%define src arg3 +%define dest1 arg4 + +%define vec_i tmp2 +%define ptr tmp3 +%define dest2 tmp4 +%define pos return + + %ifidn PS,4 ;32-bit code + %define len_m arg0_m + %define src_m arg3_m + %define dest1_m arg4_m + %define dest2_m tmp4_m + %endif + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR movdqu + %define XSTR movdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR movdqa + %define XSTR movdqa + %else + %define XLDR movntdqa + %define XSTR movntdq + %endif +%endif + +%ifidn PS,8 ;64-bit code + default rel + [bits 64] +%endif + +section .text + +%ifidn PS,8 ;64-bit code + %define xmask0f xmm8 + %define xgft1_lo xmm7 + %define xgft1_hi xmm6 + %define xgft2_lo xmm5 + %define xgft2_hi xmm4 + + %define x0 xmm0 + %define xtmpa xmm1 + %define xp1 xmm2 + %define xp2 xmm3 +%else ;32-bit code + %define xmask0f xmm4 + %define xgft1_lo xmm7 + %define xgft1_hi xmm6 + %define xgft2_lo xgft1_lo + %define xgft2_hi xgft1_hi + + %define x0 xmm0 + %define xtmpa xmm1 + %define xp1 xmm2 + %define xp2 xmm3 +%endif + +align 16 +global gf_2vect_dot_prod_sse:ISAL_SYM_TYPE_FUNCTION + +func(gf_2vect_dot_prod_sse) + FUNC_SAVE + SLDR len, len_m + sub len, 16 + SSTR len_m, len + jl .return_fail + xor pos, pos + movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte + sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS + SLDR dest1, dest1_m + mov dest2, [dest1+PS] + SSTR dest2_m, dest2 + mov dest1, [dest1] + SSTR dest1_m, dest1 + +.loop16: + pxor xp1, xp1 + pxor xp2, xp2 + mov tmp, mul_array + xor vec_i, vec_i + +.next_vect: + SLDR src, src_m + mov ptr, [src+vec_i] + + movdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f} + movdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0} + %ifidn PS,8 ;64-bit code + movdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f} + movdqu xgft2_hi, [tmp+vec*(32/PS)+16] ; " Bx{00}, Bx{10}, ..., Bx{f0} + add tmp, 32 + add vec_i, PS + %endif + XLDR x0, [ptr+pos] ;Get next source vector + + movdqa xtmpa, x0 ;Keep unshifted copy of src + psraw x0, 4 ;Shift to put high nibble into bits 4-0 + pand x0, xmask0f ;Mask high src nibble in bits 4-0 + pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0 + + pshufb xgft1_hi, x0 ;Lookup mul table of high nibble + pshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble + pxor xgft1_hi, xgft1_lo ;GF add high and low partials + pxor xp1, xgft1_hi ;xp1 += partial + + %ifidn PS,4 ;32-bit code + movdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f} + movdqu xgft2_hi, [tmp+vec*(32/PS)+16] ; " Bx{00}, Bx{10}, ..., Bx{f0} + + add tmp, 32 + add vec_i, PS + %endif + pshufb xgft2_hi, x0 ;Lookup mul table of high nibble + pshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble + pxor xgft2_hi, xgft2_lo ;GF add high and low partials + pxor xp2, xgft2_hi ;xp2 += partial + + cmp vec_i, vec + jl .next_vect + + SLDR dest1, dest1_m + SLDR dest2, dest2_m + XSTR [dest1+pos], xp1 + XSTR [dest2+pos], xp2 + + SLDR len, len_m + add pos, 16 ;Loop on 16 bytes at a time + cmp pos, len + jle .loop16 + + lea tmp, [len + 16] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, len ;Overlapped offset length-16 + jmp .loop16 ;Do one more overlap pass + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame + +section .data + +align 16 +mask0f: dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f + +;;; func core, ver, snum +slversion gf_2vect_dot_prod_sse, 00, 04, 0062 diff --git a/src/isa-l/erasure_code/gf_2vect_dot_prod_sse_test.c b/src/isa-l/erasure_code/gf_2vect_dot_prod_sse_test.c new file mode 100644 index 000000000..f4fd9d0ef --- /dev/null +++ b/src/isa-l/erasure_code/gf_2vect_dot_prod_sse_test.c @@ -0,0 +1,480 @@ +/********************************************************************** + Copyright(c) 2011-2015 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include // for memset, memcmp +#include "erasure_code.h" +#include "types.h" + +#ifndef FUNCTION_UNDER_TEST +# define FUNCTION_UNDER_TEST gf_2vect_dot_prod_sse +#endif +#ifndef TEST_MIN_SIZE +# define TEST_MIN_SIZE 16 +#endif + +#define str(s) #s +#define xstr(s) str(s) + +#define TEST_LEN 8192 +#define TEST_SIZE (TEST_LEN/2) +#define TEST_MEM TEST_SIZE +#define TEST_LOOPS 10000 +#define TEST_TYPE_STR "" + +#ifndef TEST_SOURCES +# define TEST_SOURCES 16 +#endif +#ifndef RANDOMS +# define RANDOMS 20 +#endif + +#ifdef EC_ALIGNED_ADDR +// Define power of 2 range to check ptr, len alignment +# define PTR_ALIGN_CHK_B 0 +# define LEN_ALIGN_CHK_B 0 // 0 for aligned only +#else +// Define power of 2 range to check ptr, len alignment +# define PTR_ALIGN_CHK_B 32 +# define LEN_ALIGN_CHK_B 32 // 0 for aligned only +#endif + +typedef unsigned char u8; + +extern void FUNCTION_UNDER_TEST(int len, int vlen, unsigned char *gftbls, + unsigned char **src, unsigned char **dest); + +void dump(unsigned char *buf, int len) +{ + int i; + for (i = 0; i < len;) { + printf(" %2x", 0xff & buf[i++]); + if (i % 32 == 0) + printf("\n"); + } + printf("\n"); +} + +void dump_matrix(unsigned char **s, int k, int m) +{ + int i, j; + for (i = 0; i < k; i++) { + for (j = 0; j < m; j++) { + printf(" %2x", s[i][j]); + } + printf("\n"); + } + printf("\n"); +} + +void dump_u8xu8(unsigned char *s, int k, int m) +{ + int i, j; + for (i = 0; i < k; i++) { + for (j = 0; j < m; j++) { + printf(" %2x", 0xff & s[j + (i * m)]); + } + printf("\n"); + } + printf("\n"); +} + +int main(int argc, char *argv[]) +{ + int i, j, rtest, srcs; + void *buf; + u8 g1[TEST_SOURCES], g2[TEST_SOURCES], g_tbls[2 * TEST_SOURCES * 32]; + u8 *dest1, *dest2, *dest_ref1, *dest_ref2, *dest_ptrs[2]; + u8 *buffs[TEST_SOURCES]; + + int align, size; + unsigned char *efence_buffs[TEST_SOURCES]; + unsigned int offset; + u8 *ubuffs[TEST_SOURCES]; + u8 *udest_ptrs[2]; + + printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d ", TEST_SOURCES, TEST_LEN); + + // Allocate the arrays + for (i = 0; i < TEST_SOURCES; i++) { + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + buffs[i] = buf; + } + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest1 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest2 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest_ref1 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest_ref2 = buf; + + dest_ptrs[0] = dest1; + dest_ptrs[1] = dest2; + + // Test of all zeros + for (i = 0; i < TEST_SOURCES; i++) + memset(buffs[i], 0, TEST_LEN); + + memset(dest1, 0, TEST_LEN); + memset(dest2, 0, TEST_LEN); + memset(dest_ref1, 0, TEST_LEN); + memset(dest_ref2, 0, TEST_LEN); + memset(g1, 2, TEST_SOURCES); + memset(g2, 1, TEST_SOURCES); + + for (i = 0; i < TEST_SOURCES; i++) { + gf_vect_mul_init(g1[i], &g_tbls[i * 32]); + gf_vect_mul_init(g2[i], &g_tbls[32 * TEST_SOURCES + i * 32]); + } + + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1); + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], buffs, + dest_ref2); + + FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs); + + if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) { + printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test1\n"); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref1, 25); + printf("dprod_dut:"); + dump(dest1, 25); + return -1; + } + if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) { + printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test2\n"); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref2, 25); + printf("dprod_dut:"); + dump(dest2, 25); + return -1; + } + + putchar('.'); + + // Rand data test + + for (rtest = 0; rtest < RANDOMS; rtest++) { + for (i = 0; i < TEST_SOURCES; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + for (i = 0; i < TEST_SOURCES; i++) { + g1[i] = rand(); + g2[i] = rand(); + } + + for (i = 0; i < TEST_SOURCES; i++) { + gf_vect_mul_init(g1[i], &g_tbls[i * 32]); + gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]); + } + + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1); + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], + buffs, dest_ref2); + + FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs); + + if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref1, 25); + printf("dprod_dut:"); + dump(dest1, 25); + return -1; + } + if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref2, 25); + printf("dprod_dut:"); + dump(dest2, 25); + return -1; + } + + putchar('.'); + } + + // Rand data test with varied parameters + for (rtest = 0; rtest < RANDOMS; rtest++) { + for (srcs = TEST_SOURCES; srcs > 0; srcs--) { + for (i = 0; i < srcs; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + for (i = 0; i < srcs; i++) { + g1[i] = rand(); + g2[i] = rand(); + } + + for (i = 0; i < srcs; i++) { + gf_vect_mul_init(g1[i], &g_tbls[i * 32]); + gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]); + } + + gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[0], buffs, dest_ref1); + gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[32 * srcs], buffs, + dest_ref2); + + FUNCTION_UNDER_TEST(TEST_LEN, srcs, g_tbls, buffs, dest_ptrs); + + if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) + " test1 srcs=%d\n", srcs); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref1, 25); + printf("dprod_dut:"); + dump(dest1, 25); + return -1; + } + if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) + " test2 srcs=%d\n", srcs); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref2, 25); + printf("dprod_dut:"); + dump(dest2, 25); + return -1; + } + + putchar('.'); + } + } + + // Run tests at end of buffer for Electric Fence + align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16; + for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) { + for (i = 0; i < TEST_SOURCES; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + for (i = 0; i < TEST_SOURCES; i++) // Line up TEST_SIZE from end + efence_buffs[i] = buffs[i] + TEST_LEN - size; + + for (i = 0; i < TEST_SOURCES; i++) { + g1[i] = rand(); + g2[i] = rand(); + } + + for (i = 0; i < TEST_SOURCES; i++) { + gf_vect_mul_init(g1[i], &g_tbls[i * 32]); + gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]); + } + + gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[0], efence_buffs, dest_ref1); + gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], + efence_buffs, dest_ref2); + + FUNCTION_UNDER_TEST(size, TEST_SOURCES, g_tbls, efence_buffs, dest_ptrs); + + if (0 != memcmp(dest_ref1, dest1, size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest); + dump_matrix(efence_buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref1, align); + printf("dprod_dut:"); + dump(dest1, align); + return -1; + } + + if (0 != memcmp(dest_ref2, dest2, size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest); + dump_matrix(efence_buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref2, align); + printf("dprod_dut:"); + dump(dest2, align); + return -1; + } + + putchar('.'); + } + + // Test rand ptr alignment if available + + for (rtest = 0; rtest < RANDOMS; rtest++) { + size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1); + srcs = rand() % TEST_SOURCES; + if (srcs == 0) + continue; + + offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B; + // Add random offsets + for (i = 0; i < srcs; i++) + ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset)); + + udest_ptrs[0] = dest1 + (rand() & (PTR_ALIGN_CHK_B - offset)); + udest_ptrs[1] = dest2 + (rand() & (PTR_ALIGN_CHK_B - offset)); + + memset(dest1, 0, TEST_LEN); // zero pad to check write-over + memset(dest2, 0, TEST_LEN); + + for (i = 0; i < srcs; i++) + for (j = 0; j < size; j++) + ubuffs[i][j] = rand(); + + for (i = 0; i < srcs; i++) { + g1[i] = rand(); + g2[i] = rand(); + } + + for (i = 0; i < srcs; i++) { + gf_vect_mul_init(g1[i], &g_tbls[i * 32]); + gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]); + } + + gf_vect_dot_prod_base(size, srcs, &g_tbls[0], ubuffs, dest_ref1); + gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], ubuffs, dest_ref2); + + FUNCTION_UNDER_TEST(size, srcs, g_tbls, ubuffs, udest_ptrs); + + if (memcmp(dest_ref1, udest_ptrs[0], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", + srcs); + dump_matrix(ubuffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref1, 25); + printf("dprod_dut:"); + dump(udest_ptrs[0], 25); + return -1; + } + if (memcmp(dest_ref2, udest_ptrs[1], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", + srcs); + dump_matrix(ubuffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref2, 25); + printf("dprod_dut:"); + dump(udest_ptrs[1], 25); + return -1; + } + // Confirm that padding around dests is unchanged + memset(dest_ref1, 0, PTR_ALIGN_CHK_B); // Make reference zero buff + offset = udest_ptrs[0] - dest1; + + if (memcmp(dest1, dest_ref1, offset)) { + printf("Fail rand ualign pad1 start\n"); + return -1; + } + if (memcmp(dest1 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { + printf("Fail rand ualign pad1 end\n"); + return -1; + } + + offset = udest_ptrs[1] - dest2; + if (memcmp(dest2, dest_ref1, offset)) { + printf("Fail rand ualign pad2 start\n"); + return -1; + } + if (memcmp(dest2 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { + printf("Fail rand ualign pad2 end\n"); + return -1; + } + + putchar('.'); + } + + // Test all size alignment + align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16; + + for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) { + srcs = TEST_SOURCES; + + for (i = 0; i < srcs; i++) + for (j = 0; j < size; j++) + buffs[i][j] = rand(); + + for (i = 0; i < srcs; i++) { + g1[i] = rand(); + g2[i] = rand(); + } + + for (i = 0; i < srcs; i++) { + gf_vect_mul_init(g1[i], &g_tbls[i * 32]); + gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]); + } + + gf_vect_dot_prod_base(size, srcs, &g_tbls[0], buffs, dest_ref1); + gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], buffs, dest_ref2); + + FUNCTION_UNDER_TEST(size, srcs, g_tbls, buffs, dest_ptrs); + + if (memcmp(dest_ref1, dest_ptrs[0], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", + size); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref1, 25); + printf("dprod_dut:"); + dump(dest_ptrs[0], 25); + return -1; + } + if (memcmp(dest_ref2, dest_ptrs[1], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", + size); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref2, 25); + printf("dprod_dut:"); + dump(dest_ptrs[1], 25); + return -1; + } + } + + printf("Pass\n"); + return 0; + +} diff --git a/src/isa-l/erasure_code/gf_2vect_mad_avx.asm b/src/isa-l/erasure_code/gf_2vect_mad_avx.asm new file mode 100644 index 000000000..fcf3a7545 --- /dev/null +++ b/src/isa-l/erasure_code/gf_2vect_mad_avx.asm @@ -0,0 +1,236 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_2vect_mad_avx(len, vec, vec_i, mul_array, src, dest); +;;; + +%include "reg_sizes.asm" + +%define PS 8 + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg0.w ecx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + %define arg4 r12 + %define arg5 r15 + %define tmp r11 + %define tmp2 r10 + %define return rax + %define return.w eax + %define stack_size 16*9 + 3*8 + %define arg(x) [rsp + stack_size + PS + PS*x] + %define func(x) proc_frame x + +%macro FUNC_SAVE 0 + sub rsp, stack_size + movdqa [rsp+16*0],xmm6 + movdqa [rsp+16*1],xmm7 + movdqa [rsp+16*2],xmm8 + movdqa [rsp+16*3],xmm9 + movdqa [rsp+16*4],xmm10 + movdqa [rsp+16*5],xmm11 + movdqa [rsp+16*6],xmm12 + movdqa [rsp+16*7],xmm13 + movdqa [rsp+16*8],xmm14 + save_reg r12, 9*16 + 0*8 + save_reg r15, 9*16 + 1*8 + end_prolog + mov arg4, arg(4) + mov arg5, arg(5) +%endmacro + +%macro FUNC_RESTORE 0 + movdqa xmm6, [rsp+16*0] + movdqa xmm7, [rsp+16*1] + movdqa xmm8, [rsp+16*2] + movdqa xmm9, [rsp+16*3] + movdqa xmm10, [rsp+16*4] + movdqa xmm11, [rsp+16*5] + movdqa xmm12, [rsp+16*6] + movdqa xmm13, [rsp+16*7] + movdqa xmm14, [rsp+16*8] + mov r12, [rsp + 9*16 + 0*8] + mov r15, [rsp + 9*16 + 1*8] + add rsp, stack_size +%endmacro + +%elifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg0.w edi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + %define tmp r11 + %define tmp2 r10 + %define return rax + %define return.w eax + + %define func(x) x: + %define FUNC_SAVE + %define FUNC_RESTORE +%endif + +;;; gf_2vect_mad_avx(len, vec, vec_i, mul_array, src, dest) +%define len arg0 +%define len.w arg0.w +%define vec arg1 +%define vec_i arg2 +%define mul_array arg3 +%define src arg4 +%define dest1 arg5 +%define pos return +%define pos.w return.w + +%define dest2 tmp2 + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu + %define XSTR vmovdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + + +default rel + +[bits 64] +section .text + +%define xmask0f xmm14 +%define xgft1_lo xmm13 +%define xgft1_hi xmm12 +%define xgft2_lo xmm11 +%define xgft2_hi xmm10 + +%define x0 xmm0 +%define xtmpa xmm1 +%define xtmph1 xmm2 +%define xtmpl1 xmm3 +%define xtmph2 xmm4 +%define xtmpl2 xmm5 +%define xd1 xmm6 +%define xd2 xmm7 +%define xtmpd1 xmm8 +%define xtmpd2 xmm9 + + +align 16 +global gf_2vect_mad_avx:ISAL_SYM_TYPE_FUNCTION + +func(gf_2vect_mad_avx) + FUNC_SAVE + sub len, 16 + jl .return_fail + + xor pos, pos + vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte + sal vec_i, 5 ;Multiply by 32 + sal vec, 5 + lea tmp, [mul_array + vec_i] + vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, Ax{02}, ... + vmovdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0} + vmovdqu xgft2_lo, [tmp+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ... + vmovdqu xgft2_hi, [tmp+vec+16] ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0} + + mov dest2, [dest1+PS] + mov dest1, [dest1] + + XLDR xtmpd1, [dest1+len] ;backup the last 16 bytes in dest + XLDR xtmpd2, [dest2+len] ;backup the last 16 bytes in dest + +.loop16: + XLDR xd1, [dest1+pos] ;Get next dest vector + XLDR xd2, [dest2+pos] ;Get next dest vector +.loop16_overlap: + XLDR x0, [src+pos] ;Get next source vector + + vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + vpshufb xtmph1, xgft1_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl1, xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph1, xtmph1, xtmpl1 ;GF add high and low partials + vpxor xd1, xd1, xtmph1 ;xd1 += partial + + vpshufb xtmph2, xgft2_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl2, xgft2_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph2, xtmph2, xtmpl2 ;GF add high and low partials + vpxor xd2, xd2, xtmph2 ;xd2 += partial + + XSTR [dest1+pos], xd1 + XSTR [dest2+pos], xd2 + + add pos, 16 ;Loop on 16 bytes at a time + cmp pos, len + jle .loop16 + + lea tmp, [len + 16] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, len ;Overlapped offset length-16 + vmovdqa xd1, xtmpd1 ;Restore xd1 + vmovdqa xd2, xtmpd2 ;Restore xd2 + jmp .loop16_overlap ;Do one more overlap pass + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame + +section .data + +align 16 +mask0f: dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f + +;;; func core, ver, snum +slversion gf_2vect_mad_avx, 02, 01, 0204 diff --git a/src/isa-l/erasure_code/gf_2vect_mad_avx2.asm b/src/isa-l/erasure_code/gf_2vect_mad_avx2.asm new file mode 100644 index 000000000..0e77ebef4 --- /dev/null +++ b/src/isa-l/erasure_code/gf_2vect_mad_avx2.asm @@ -0,0 +1,247 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_2vect_mad_avx2(len, vec, vec_i, mul_array, src, dest); +;;; + +%include "reg_sizes.asm" + +%define PS 8 + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg0.w ecx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + %define arg4 r12 + %define arg5 r15 + + %define tmp r11 + %define tmp.w r11d + %define tmp.b r11b + %define tmp2 r10 + %define return rax + %define return.w eax + %define stack_size 16*9 + 3*8 ; must be an odd multiple of 8 + %define arg(x) [rsp + stack_size + PS + PS*x] + + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + sub rsp, stack_size + vmovdqa [rsp+16*0],xmm6 + vmovdqa [rsp+16*1],xmm7 + vmovdqa [rsp+16*2],xmm8 + vmovdqa [rsp+16*3],xmm9 + vmovdqa [rsp+16*4],xmm10 + vmovdqa [rsp+16*5],xmm11 + vmovdqa [rsp+16*6],xmm12 + vmovdqa [rsp+16*7],xmm13 + vmovdqa [rsp+16*8],xmm14 + save_reg r12, 9*16 + 0*8 + save_reg r15, 9*16 + 1*8 + end_prolog + mov arg4, arg(4) + mov arg5, arg(5) + %endmacro + + %macro FUNC_RESTORE 0 + vmovdqa xmm6, [rsp+16*0] + vmovdqa xmm7, [rsp+16*1] + vmovdqa xmm8, [rsp+16*2] + vmovdqa xmm9, [rsp+16*3] + vmovdqa xmm10, [rsp+16*4] + vmovdqa xmm11, [rsp+16*5] + vmovdqa xmm12, [rsp+16*6] + vmovdqa xmm13, [rsp+16*7] + vmovdqa xmm14, [rsp+16*8] + mov r12, [rsp + 9*16 + 0*8] + mov r15, [rsp + 9*16 + 1*8] + add rsp, stack_size + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg0.w edi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + + %define tmp r11 + %define tmp.w r11d + %define tmp.b r11b + %define tmp2 r10 + %define return rax + %define return.w eax + + %define func(x) x: + %define FUNC_SAVE + %define FUNC_RESTORE +%endif + +;;; gf_2vect_mad_avx2(len, vec, vec_i, mul_array, src, dest) +%define len arg0 +%define len.w arg0.w +%define vec arg1 +%define vec_i arg2 +%define mul_array arg3 +%define src arg4 +%define dest1 arg5 +%define pos return +%define pos.w return.w + +%define dest2 tmp2 + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu + %define XSTR vmovdqu +%else + +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + + +default rel + +[bits 64] +section .text + +%define xmask0f ymm14 +%define xmask0fx xmm14 +%define xgft1_lo ymm13 +%define xgft1_hi ymm12 +%define xgft2_lo ymm11 +%define xgft2_hi ymm10 + +%define x0 ymm0 +%define xtmpa ymm1 +%define xtmph1 ymm2 +%define xtmpl1 ymm3 +%define xtmph2 ymm4 +%define xtmpl2 ymm5 +%define xd1 ymm6 +%define xd2 ymm7 +%define xtmpd1 ymm8 +%define xtmpd2 ymm9 + +align 16 +global gf_2vect_mad_avx2:ISAL_SYM_TYPE_FUNCTION + +func(gf_2vect_mad_avx2) + FUNC_SAVE + sub len, 32 + jl .return_fail + xor pos, pos + mov tmp.b, 0x0f + vpinsrb xmask0fx, xmask0fx, tmp.w, 0 + vpbroadcastb xmask0f, xmask0fx ;Construct mask 0x0f0f0f... + + sal vec_i, 5 ;Multiply by 32 + sal vec, 5 + lea tmp, [mul_array + vec_i] + vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f} + ; " Ax{00}, Ax{10}, ..., Ax{f0} + vmovdqu xgft2_lo, [tmp+vec] ;Load array Bx{00}, Bx{01}, ..., Bx{0f} + ; " Bx{00}, Bx{10}, ..., Bx{f0} + + vperm2i128 xgft1_hi, xgft1_lo, xgft1_lo, 0x11 ; swapped to hi | hi + vperm2i128 xgft1_lo, xgft1_lo, xgft1_lo, 0x00 ; swapped to lo | lo + vperm2i128 xgft2_hi, xgft2_lo, xgft2_lo, 0x11 ; swapped to hi | hi + vperm2i128 xgft2_lo, xgft2_lo, xgft2_lo, 0x00 ; swapped to lo | lo + mov dest2, [dest1+PS] ; reuse mul_array + mov dest1, [dest1] + + XLDR xtmpd1, [dest1+len] ;backup the last 16 bytes in dest + XLDR xtmpd2, [dest2+len] ;backup the last 16 bytes in dest + +.loop32: + XLDR xd1, [dest1+pos] ;Get next dest vector + XLDR xd2, [dest2+pos] ;Get next dest vector +.loop32_overlap: + XLDR x0, [src+pos] ;Get next source vector + + vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + vpshufb xtmph1, xgft1_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl1, xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph1, xtmph1, xtmpl1 ;GF add high and low partials + vpxor xd1, xd1, xtmph1 ;xd1 += partial + + vpshufb xtmph2, xgft2_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl2, xgft2_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph2, xtmph2, xtmpl2 ;GF add high and low partials + vpxor xd2, xd2, xtmph2 ;xd2 += partial + + XSTR [dest1+pos], xd1 + XSTR [dest2+pos], xd2 + + add pos, 32 ;Loop on 32 bytes at a time + cmp pos, len + jle .loop32 + + lea tmp, [len + 32] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, len ;Overlapped offset length-32 + vmovdqa xd1, xtmpd1 ;Restore xd1 + vmovdqa xd2, xtmpd2 ;Restore xd2 + jmp .loop32_overlap ;Do one more overlap pass + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame + +section .data + +;;; func core, ver, snum +slversion gf_2vect_mad_avx2, 04, 01, 0205 diff --git a/src/isa-l/erasure_code/gf_2vect_mad_avx512.asm b/src/isa-l/erasure_code/gf_2vect_mad_avx512.asm new file mode 100644 index 000000000..6d972bba5 --- /dev/null +++ b/src/isa-l/erasure_code/gf_2vect_mad_avx512.asm @@ -0,0 +1,230 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_2vect_mad_avx512(len, vec, vec_i, mul_array, src, dest); +;;; + +%include "reg_sizes.asm" + +%ifdef HAVE_AS_KNOWS_AVX512 + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + %define tmp r11 + %define tmp2 r10 + %define return rax + %define func(x) x: + %define FUNC_SAVE + %define FUNC_RESTORE +%endif + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + %define arg4 r12 + %define arg5 r15 + %define tmp r11 + %define tmp2 r10 + %define return rax + %define stack_size 16*9 + 3*8 ; must be an odd multiple of 8 + %define arg(x) [rsp + stack_size + PS + PS*x] + + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + sub rsp, stack_size + vmovdqa [rsp+16*0],xmm6 + vmovdqa [rsp+16*1],xmm7 + vmovdqa [rsp+16*2],xmm8 + vmovdqa [rsp+16*3],xmm9 + vmovdqa [rsp+16*4],xmm10 + vmovdqa [rsp+16*5],xmm11 + vmovdqa [rsp+16*6],xmm12 + vmovdqa [rsp+16*7],xmm13 + vmovdqa [rsp+16*8],xmm14 + save_reg r12, 9*16 + 0*8 + save_reg r15, 9*16 + 1*8 + end_prolog + mov arg4, arg(4) + mov arg5, arg(5) + %endmacro + + %macro FUNC_RESTORE 0 + vmovdqa xmm6, [rsp+16*0] + vmovdqa xmm7, [rsp+16*1] + vmovdqa xmm8, [rsp+16*2] + vmovdqa xmm9, [rsp+16*3] + vmovdqa xmm10, [rsp+16*4] + vmovdqa xmm11, [rsp+16*5] + vmovdqa xmm12, [rsp+16*6] + vmovdqa xmm13, [rsp+16*7] + vmovdqa xmm14, [rsp+16*8] + mov r12, [rsp + 9*16 + 0*8] + mov r15, [rsp + 9*16 + 1*8] + add rsp, stack_size + %endmacro +%endif + + +%define PS 8 +%define len arg0 +%define len.w arg0.w +%define vec arg1 +%define vec_i arg2 +%define mul_array arg3 +%define src arg4 +%define dest1 arg5 +%define pos return +%define pos.w return.w +%define dest2 tmp2 + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu8 + %define XSTR vmovdqu8 +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + +default rel +[bits 64] +section .text + +%define x0 zmm0 +%define xtmpa zmm1 +%define xtmph1 zmm2 +%define xtmpl1 zmm3 +%define xtmph2 zmm4 +%define xtmpl2 zmm5 +%define xd1 zmm6 +%define xd2 zmm7 +%define xtmpd1 zmm8 +%define xtmpd2 zmm9 +%define xgft1_hi zmm10 +%define xgft1_lo zmm11 +%define xgft1_loy ymm11 +%define xgft2_hi zmm12 +%define xgft2_lo zmm13 +%define xgft2_loy ymm13 +%define xmask0f zmm14 + +align 16 +global gf_2vect_mad_avx512:ISAL_SYM_TYPE_FUNCTION +func(gf_2vect_mad_avx512) + FUNC_SAVE + sub len, 64 + jl .return_fail + xor pos, pos + mov tmp, 0x0f + vpbroadcastb xmask0f, tmp ;Construct mask 0x0f0f0f... + sal vec_i, 5 ;Multiply by 32 + sal vec, 5 + lea tmp, [mul_array + vec_i] + vmovdqu xgft1_loy, [tmp] ;Load array Ax{00}..{0f}, Ax{00}..{f0} + vmovdqu xgft2_loy, [tmp+vec] ;Load array Bx{00}..{0f}, Bx{00}..{f0} + vshufi64x2 xgft1_hi, xgft1_lo, xgft1_lo, 0x55 + vshufi64x2 xgft1_lo, xgft1_lo, xgft1_lo, 0x00 + vshufi64x2 xgft2_hi, xgft2_lo, xgft2_lo, 0x55 + vshufi64x2 xgft2_lo, xgft2_lo, xgft2_lo, 0x00 + mov dest2, [dest1+PS] ; reuse mul_array + mov dest1, [dest1] + mov tmp, -1 + kmovq k1, tmp + +.loop64: + XLDR xd1, [dest1+pos] ;Get next dest vector + XLDR xd2, [dest2+pos] ;Get next dest vector + XLDR x0, [src+pos] ;Get next source vector + + vpandq xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpandq x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + vpshufb xtmph1 {k1}{z}, xgft1_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl1 {k1}{z}, xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxorq xtmph1, xtmph1, xtmpl1 ;GF add high and low partials + vpxorq xd1, xd1, xtmph1 ;xd1 += partial + + vpshufb xtmph2 {k1}{z}, xgft2_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl2 {k1}{z}, xgft2_lo, xtmpa ;Lookup mul table of low nibble + vpxorq xtmph2, xtmph2, xtmpl2 ;GF add high and low partials + vpxorq xd2, xd2, xtmph2 ;xd2 += partial + + XSTR [dest1+pos], xd1 + XSTR [dest2+pos], xd2 + + add pos, 64 ;Loop on 64 bytes at a time + cmp pos, len + jle .loop64 + + lea tmp, [len + 64] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, (1 << 63) + lea tmp, [len + 64 - 1] + and tmp, 63 + sarx pos, pos, tmp + kmovq k1, pos + mov pos, len ;Overlapped offset length-64 + jmp .loop64 ;Do one more overlap pass + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame + +%else +%ifidn __OUTPUT_FORMAT__, win64 +global no_gf_2vect_mad_avx512 +no_gf_2vect_mad_avx512: +%endif +%endif ; ifdef HAVE_AS_KNOWS_AVX512 diff --git a/src/isa-l/erasure_code/gf_2vect_mad_sse.asm b/src/isa-l/erasure_code/gf_2vect_mad_sse.asm new file mode 100644 index 000000000..7ee1b249a --- /dev/null +++ b/src/isa-l/erasure_code/gf_2vect_mad_sse.asm @@ -0,0 +1,239 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_2vect_mad_sse(len, vec, vec_i, mul_array, src, dest); +;;; + +%include "reg_sizes.asm" + +%define PS 8 + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg0.w ecx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + %define arg4 r12 + %define arg5 r15 + %define tmp r11 + %define tmp2 r10 + %define return rax + %define return.w eax + %define stack_size 16*9 + 3*8 + %define arg(x) [rsp + stack_size + PS + PS*x] + %define func(x) proc_frame x + +%macro FUNC_SAVE 0 + sub rsp, stack_size + movdqa [rsp+16*0],xmm6 + movdqa [rsp+16*1],xmm7 + movdqa [rsp+16*2],xmm8 + movdqa [rsp+16*3],xmm9 + movdqa [rsp+16*4],xmm10 + movdqa [rsp+16*5],xmm11 + movdqa [rsp+16*6],xmm12 + movdqa [rsp+16*7],xmm13 + movdqa [rsp+16*8],xmm14 + save_reg r12, 9*16 + 0*8 + save_reg r15, 9*16 + 1*8 + end_prolog + mov arg4, arg(4) + mov arg5, arg(5) +%endmacro + +%macro FUNC_RESTORE 0 + movdqa xmm6, [rsp+16*0] + movdqa xmm7, [rsp+16*1] + movdqa xmm8, [rsp+16*2] + movdqa xmm9, [rsp+16*3] + movdqa xmm10, [rsp+16*4] + movdqa xmm11, [rsp+16*5] + movdqa xmm12, [rsp+16*6] + movdqa xmm13, [rsp+16*7] + movdqa xmm14, [rsp+16*8] + mov r12, [rsp + 9*16 + 0*8] + mov r15, [rsp + 9*16 + 1*8] + add rsp, stack_size +%endmacro + +%elifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg0.w edi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + %define tmp r11 + %define tmp2 r10 + %define return rax + %define return.w eax + + %define func(x) x: + %define FUNC_SAVE + %define FUNC_RESTORE +%endif + +;;; gf_2vect_mad_sse(len, vec, vec_i, mul_array, src, dest) +%define len arg0 +%define len.w arg0.w +%define vec arg1 +%define vec_i arg2 +%define mul_array arg3 +%define src arg4 +%define dest1 arg5 +%define pos return +%define pos.w return.w + +%define dest2 tmp2 + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR movdqu + %define XSTR movdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR movdqa + %define XSTR movdqa + %else + %define XLDR movntdqa + %define XSTR movntdq + %endif +%endif + +default rel + +[bits 64] +section .text + +%define xmask0f xmm14 +%define xgft1_lo xmm13 +%define xgft1_hi xmm12 +%define xgft2_lo xmm11 +%define xgft2_hi xmm10 + +%define x0 xmm0 +%define xtmpa xmm1 +%define xtmph1 xmm2 +%define xtmpl1 xmm3 +%define xtmph2 xmm4 +%define xtmpl2 xmm5 +%define xd1 xmm6 +%define xd2 xmm7 +%define xtmpd1 xmm8 +%define xtmpd2 xmm9 + + +align 16 +global gf_2vect_mad_sse:ISAL_SYM_TYPE_FUNCTION +func(gf_2vect_mad_sse) + FUNC_SAVE + sub len, 16 + jl .return_fail + + xor pos, pos + movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte + sal vec_i, 5 ;Multiply by 32 + sal vec, 5 + lea tmp, [mul_array + vec_i] + movdqu xgft1_lo,[tmp] ;Load array Ax{00}, Ax{01}, Ax{02}, ... + movdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0} + movdqu xgft2_lo, [tmp+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ... + movdqu xgft2_hi, [tmp+vec+16] ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0} + mov dest2, [dest1+PS] + mov dest1, [dest1] + + XLDR xtmpd1, [dest1+len] ;backup the last 16 bytes in dest + XLDR xtmpd2, [dest2+len] ;backup the last 16 bytes in dest + +.loop16: + XLDR xd1, [dest1+pos] ;Get next dest vector + XLDR xd2, [dest2+pos] ;Get next dest vector +.loop16_overlap: + XLDR x0, [src+pos] ;Get next source vector + movdqa xtmph1, xgft1_hi ;Reload const array registers + movdqa xtmpl1, xgft1_lo + movdqa xtmph2, xgft2_hi ;Reload const array registers + movdqa xtmpl2, xgft2_lo + movdqa xtmpa, x0 ;Keep unshifted copy of src + psraw x0, 4 ;Shift to put high nibble into bits 4-0 + pand x0, xmask0f ;Mask high src nibble in bits 4-0 + pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0 + + pshufb xtmph1, x0 ;Lookup mul table of high nibble + pshufb xtmpl1, xtmpa ;Lookup mul table of low nibble + pxor xtmph1, xtmpl1 ;GF add high and low partials + pxor xd1, xtmph1 + + pshufb xtmph2, x0 ;Lookup mul table of high nibble + pshufb xtmpl2, xtmpa ;Lookup mul table of low nibble + pxor xtmph2, xtmpl2 ;GF add high and low partials + pxor xd2, xtmph2 + + XSTR [dest1+pos], xd1 ;Store result + XSTR [dest2+pos], xd2 ;Store result + + add pos, 16 ;Loop on 16 bytes at a time + cmp pos, len + jle .loop16 + + lea tmp, [len + 16] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, len ;Overlapped offset length-16 + movdqa xd1, xtmpd1 ;Restore xd1 + movdqa xd2, xtmpd2 ;Restore xd2 + jmp .loop16_overlap ;Do one more overlap pass + +.return_pass: + FUNC_RESTORE + mov return, 0 + ret + +.return_fail: + FUNC_RESTORE + mov return, 1 + ret + +endproc_frame + +section .data + +align 16 + +mask0f: + dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f + +;;; func core, ver, snum +slversion gf_2vect_mad_sse, 00, 01, 0203 diff --git a/src/isa-l/erasure_code/gf_3vect_dot_prod_avx.asm b/src/isa-l/erasure_code/gf_3vect_dot_prod_avx.asm new file mode 100644 index 000000000..b006cf13a --- /dev/null +++ b/src/isa-l/erasure_code/gf_3vect_dot_prod_avx.asm @@ -0,0 +1,377 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_3vect_dot_prod_avx(len, vec, *g_tbls, **buffs, **dests); +;;; + +%include "reg_sizes.asm" + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + + %define tmp r11 + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r12 ; must be saved and restored + %define return rax + %macro SLDR 2 + %endmacro + %define SSTR SLDR + %define PS 8 + %define LOG_PS 3 + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + push r13 + %endmacro + %macro FUNC_RESTORE 0 + pop r13 + pop r12 + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + + %define arg4 r12 ; must be saved, loaded and restored + %define arg5 r15 ; must be saved and restored + %define tmp r11 + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r14 ; must be saved and restored + %define return rax + %macro SLDR 2 + %endmacro + %define SSTR SLDR + %define PS 8 + %define LOG_PS 3 + %define stack_size 6*16 + 5*8 ; must be an odd multiple of 8 + %define arg(x) [rsp + stack_size + PS + PS*x] + + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size + save_xmm128 xmm6, 0*16 + save_xmm128 xmm7, 1*16 + save_xmm128 xmm8, 2*16 + save_xmm128 xmm9, 3*16 + save_xmm128 xmm10, 4*16 + save_xmm128 xmm11, 5*16 + save_reg r12, 6*16 + 0*8 + save_reg r13, 6*16 + 1*8 + save_reg r14, 6*16 + 2*8 + save_reg r15, 6*16 + 3*8 + end_prolog + mov arg4, arg(4) + %endmacro + + %macro FUNC_RESTORE 0 + vmovdqa xmm6, [rsp + 0*16] + vmovdqa xmm7, [rsp + 1*16] + vmovdqa xmm8, [rsp + 2*16] + vmovdqa xmm9, [rsp + 3*16] + vmovdqa xmm10, [rsp + 4*16] + vmovdqa xmm11, [rsp + 5*16] + mov r12, [rsp + 6*16 + 0*8] + mov r13, [rsp + 6*16 + 1*8] + mov r14, [rsp + 6*16 + 2*8] + mov r15, [rsp + 6*16 + 3*8] + add rsp, stack_size + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, elf32 + +;;;================== High Address; +;;; arg4 +;;; arg3 +;;; arg2 +;;; arg1 +;;; arg0 +;;; return +;;;<================= esp of caller +;;; ebp +;;;<================= ebp = esp +;;; var0 +;;; var1 +;;; esi +;;; edi +;;; ebx +;;;<================= esp of callee +;;; +;;;================== Low Address; + + %define PS 4 + %define LOG_PS 2 + %define func(x) x: + %define arg(x) [ebp + PS*2 + PS*x] + %define var(x) [ebp - PS - PS*x] + + %define trans ecx + %define trans2 esi + %define arg0 trans ;trans and trans2 are for the variables in stack + %define arg0_m arg(0) + %define arg1 ebx + %define arg2 arg2_m + %define arg2_m arg(2) + %define arg3 trans + %define arg3_m arg(3) + %define arg4 trans + %define arg4_m arg(4) + %define arg5 trans2 + %define tmp edx + %define tmp2 edi + %define tmp3 trans2 + %define tmp3_m var(0) + %define tmp4 trans2 + %define tmp4_m var(1) + %define return eax + %macro SLDR 2 ;; stack load/restore + mov %1, %2 + %endmacro + %define SSTR SLDR + + %macro FUNC_SAVE 0 + push ebp + mov ebp, esp + sub esp, PS*2 ;2 local variables + push esi + push edi + push ebx + mov arg1, arg(1) + %endmacro + + %macro FUNC_RESTORE 0 + pop ebx + pop edi + pop esi + add esp, PS*2 ;2 local variables + pop ebp + %endmacro + +%endif ; output formats + +%define len arg0 +%define vec arg1 +%define mul_array arg2 +%define src arg3 +%define dest1 arg4 +%define ptr arg5 + +%define vec_i tmp2 +%define dest2 tmp3 +%define dest3 tmp4 +%define pos return + + %ifidn PS,4 ;32-bit code + %define len_m arg0_m + %define src_m arg3_m + %define dest1_m arg4_m + %define dest2_m tmp3_m + %define dest3_m tmp4_m + %endif + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu + %define XSTR vmovdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + +%ifidn PS,8 ; 64-bit code + default rel + [bits 64] +%endif + + +section .text + +%ifidn PS,8 ;64-bit code + %define xmask0f xmm11 + %define xgft1_lo xmm10 + %define xgft1_hi xmm9 + %define xgft2_lo xmm8 + %define xgft2_hi xmm7 + %define xgft3_lo xmm6 + %define xgft3_hi xmm5 + + %define x0 xmm0 + %define xtmpa xmm1 + %define xp1 xmm2 + %define xp2 xmm3 + %define xp3 xmm4 +%else + %define xmask0f xmm7 + %define xgft1_lo xmm6 + %define xgft1_hi xmm5 + %define xgft2_lo xgft1_lo + %define xgft2_hi xgft1_hi + %define xgft3_lo xgft1_lo + %define xgft3_hi xgft1_hi + + %define x0 xmm0 + %define xtmpa xmm1 + %define xp1 xmm2 + %define xp2 xmm3 + %define xp3 xmm4 +%endif + +align 16 +global gf_3vect_dot_prod_avx:ISAL_SYM_TYPE_FUNCTION +func(gf_3vect_dot_prod_avx) + FUNC_SAVE + SLDR len, len_m + sub len, 16 + SSTR len_m, len + jl .return_fail + xor pos, pos + vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte + sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS + SLDR dest1, dest1_m + mov dest2, [dest1+PS] + SSTR dest2_m, dest2 + mov dest3, [dest1+2*PS] + SSTR dest3_m, dest3 + mov dest1, [dest1] + SSTR dest1_m, dest1 + +.loop16: + vpxor xp1, xp1 + vpxor xp2, xp2 + vpxor xp3, xp3 + mov tmp, mul_array + xor vec_i, vec_i + +.next_vect: + SLDR src, src_m + mov ptr, [src+vec_i] + + vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f} + vmovdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0} + %ifidn PS,8 ; 64-bit code + vmovdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f} + vmovdqu xgft2_hi, [tmp+vec*(32/PS)+16] ; " Bx{00}, Bx{10}, ..., Bx{f0} + vmovdqu xgft3_lo, [tmp+vec*(64/PS)] ;Load array Cx{00}, Cx{01}, ..., Cx{0f} + vmovdqu xgft3_hi, [tmp+vec*(64/PS)+16] ; " Cx{00}, Cx{10}, ..., Cx{f0} + add tmp, 32 + add vec_i, PS + %endif + XLDR x0, [ptr+pos] ;Get next source vector + + vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft1_hi, xgft1_lo ;GF add high and low partials + vpxor xp1, xgft1_hi ;xp1 += partial + + %ifidn PS,4 ; 32-bit code + vmovdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f} + vmovdqu xgft2_hi, [tmp+vec*(32/PS)+16] ; " Bx{00}, Bx{10}, ..., Bx{f0} + %endif + vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft2_hi, xgft2_lo ;GF add high and low partials + vpxor xp2, xgft2_hi ;xp2 += partial + + %ifidn PS,4 ; 32-bit code + sal vec, 1 + vmovdqu xgft3_lo, [tmp+vec*(32/PS)] ;Load array Cx{00}, Cx{01}, ..., Cx{0f} + vmovdqu xgft3_hi, [tmp+vec*(32/PS)+16] ; " Cx{00}, Cx{10}, ..., Cx{f0} + sar vec, 1 + add tmp, 32 + add vec_i, PS + %endif + vpshufb xgft3_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft3_hi, xgft3_lo ;GF add high and low partials + vpxor xp3, xgft3_hi ;xp3 += partial + + cmp vec_i, vec + jl .next_vect + + SLDR dest1, dest1_m + SLDR dest2, dest2_m + XSTR [dest1+pos], xp1 + XSTR [dest2+pos], xp2 + SLDR dest3, dest3_m + XSTR [dest3+pos], xp3 + + SLDR len, len_m + add pos, 16 ;Loop on 16 bytes at a time + cmp pos, len + jle .loop16 + + lea tmp, [len + 16] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, len ;Overlapped offset length-16 + jmp .loop16 ;Do one more overlap pass + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame + +section .data + +align 16 +mask0f: dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f + +;;; func core, ver, snum +slversion gf_3vect_dot_prod_avx, 02, 05, 0192 diff --git a/src/isa-l/erasure_code/gf_3vect_dot_prod_avx2.asm b/src/isa-l/erasure_code/gf_3vect_dot_prod_avx2.asm new file mode 100644 index 000000000..38dddcf21 --- /dev/null +++ b/src/isa-l/erasure_code/gf_3vect_dot_prod_avx2.asm @@ -0,0 +1,397 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_3vect_dot_prod_avx2(len, vec, *g_tbls, **buffs, **dests); +;;; + +%include "reg_sizes.asm" + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + + %define tmp r11 + %define tmp.w r11d + %define tmp.b r11b + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r12 ; must be saved and restored + %define return rax + %macro SLDR 2 + %endmacro + %define SSTR SLDR + %define PS 8 + %define LOG_PS 3 + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + push r13 + %endmacro + %macro FUNC_RESTORE 0 + pop r13 + pop r12 + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + + %define arg4 r12 ; must be saved, loaded and restored + %define arg5 r15 ; must be saved and restored + %define tmp r11 + %define tmp.w r11d + %define tmp.b r11b + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r14 ; must be saved and restored + %define return rax + %macro SLDR 2 + %endmacro + %define SSTR SLDR + %define PS 8 + %define LOG_PS 3 + %define stack_size 6*16 + 5*8 ; must be an odd multiple of 8 + %define arg(x) [rsp + stack_size + PS + PS*x] + + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size + vmovdqa [rsp + 0*16], xmm6 + vmovdqa [rsp + 1*16], xmm7 + vmovdqa [rsp + 2*16], xmm8 + vmovdqa [rsp + 3*16], xmm9 + vmovdqa [rsp + 4*16], xmm10 + vmovdqa [rsp + 5*16], xmm11 + save_reg r12, 6*16 + 0*8 + save_reg r13, 6*16 + 1*8 + save_reg r14, 6*16 + 2*8 + save_reg r15, 6*16 + 3*8 + end_prolog + mov arg4, arg(4) + %endmacro + + %macro FUNC_RESTORE 0 + vmovdqa xmm6, [rsp + 0*16] + vmovdqa xmm7, [rsp + 1*16] + vmovdqa xmm8, [rsp + 2*16] + vmovdqa xmm9, [rsp + 3*16] + vmovdqa xmm10, [rsp + 4*16] + vmovdqa xmm11, [rsp + 5*16] + mov r12, [rsp + 6*16 + 0*8] + mov r13, [rsp + 6*16 + 1*8] + mov r14, [rsp + 6*16 + 2*8] + mov r15, [rsp + 6*16 + 3*8] + add rsp, stack_size + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, elf32 + +;;;================== High Address; +;;; arg4 +;;; arg3 +;;; arg2 +;;; arg1 +;;; arg0 +;;; return +;;;<================= esp of caller +;;; ebp +;;;<================= ebp = esp +;;; var0 +;;; var1 +;;; esi +;;; edi +;;; ebx +;;;<================= esp of callee +;;; +;;;================== Low Address; + + %define PS 4 + %define LOG_PS 2 + %define func(x) x: + %define arg(x) [ebp + PS*2 + PS*x] + %define var(x) [ebp - PS - PS*x] + + %define trans ecx + %define trans2 esi + %define arg0 trans ;trans and trans2 are for the variables in stack + %define arg0_m arg(0) + %define arg1 ebx + %define arg2 arg2_m + %define arg2_m arg(2) + %define arg3 trans + %define arg3_m arg(3) + %define arg4 trans + %define arg4_m arg(4) + %define arg5 trans2 + %define tmp edx + %define tmp.w edx + %define tmp.b dl + %define tmp2 edi + %define tmp3 trans2 + %define tmp3_m var(0) + %define tmp4 trans2 + %define tmp4_m var(1) + %define return eax + %macro SLDR 2 ;stack load/restore + mov %1, %2 + %endmacro + %define SSTR SLDR + + %macro FUNC_SAVE 0 + push ebp + mov ebp, esp + sub esp, PS*2 ;2 local variables + push esi + push edi + push ebx + mov arg1, arg(1) + %endmacro + + %macro FUNC_RESTORE 0 + pop ebx + pop edi + pop esi + add esp, PS*2 ;2 local variables + pop ebp + %endmacro + +%endif ; output formats + +%define len arg0 +%define vec arg1 +%define mul_array arg2 +%define src arg3 +%define dest1 arg4 +%define ptr arg5 + +%define vec_i tmp2 +%define dest2 tmp3 +%define dest3 tmp4 +%define pos return + +%ifidn PS,4 ;32-bit code + %define len_m arg0_m + %define src_m arg3_m + %define dest1_m arg4_m + %define dest2_m tmp3_m + %define dest3_m tmp4_m +%endif + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu + %define XSTR vmovdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + +%ifidn PS,8 ;64-bit code + default rel + [bits 64] +%endif + +section .text + +%ifidn PS,8 ;64-bit code + %define xmask0f ymm11 + %define xmask0fx xmm11 + %define xgft1_lo ymm10 + %define xgft1_hi ymm9 + %define xgft2_lo ymm8 + %define xgft2_hi ymm7 + %define xgft3_lo ymm6 + %define xgft3_hi ymm5 + + %define x0 ymm0 + %define xtmpa ymm1 + %define xp1 ymm2 + %define xp2 ymm3 + %define xp3 ymm4 +%else + %define xmask0f ymm7 + %define xmask0fx xmm7 + %define xgft1_lo ymm6 + %define xgft1_hi ymm5 + %define xgft2_lo xgft1_lo + %define xgft2_hi xgft1_hi + %define xgft3_lo xgft1_lo + %define xgft3_hi xgft1_hi + + %define x0 ymm0 + %define xtmpa ymm1 + %define xp1 ymm2 + %define xp2 ymm3 + %define xp3 ymm4 + +%endif + +align 16 +global gf_3vect_dot_prod_avx2:ISAL_SYM_TYPE_FUNCTION +func(gf_3vect_dot_prod_avx2) + FUNC_SAVE + SLDR len, len_m + sub len, 32 + SSTR len_m, len + jl .return_fail + xor pos, pos + mov tmp.b, 0x0f + vpinsrb xmask0fx, xmask0fx, tmp.w, 0 + vpbroadcastb xmask0f, xmask0fx ;Construct mask 0x0f0f0f... + + sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS + SLDR dest1, dest1_m + mov dest2, [dest1+PS] + SSTR dest2_m, dest2 + mov dest3, [dest1+2*PS] + SSTR dest3_m, dest3 + mov dest1, [dest1] + SSTR dest1_m, dest1 + +.loop32: + vpxor xp1, xp1 + vpxor xp2, xp2 + vpxor xp3, xp3 + mov tmp, mul_array + xor vec_i, vec_i + +.next_vect: + SLDR src, src_m + mov ptr, [src+vec_i] + + vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f} + ; " Ax{00}, Ax{10}, ..., Ax{f0} + vperm2i128 xgft1_hi, xgft1_lo, xgft1_lo, 0x11 ; swapped to hi | hi + vperm2i128 xgft1_lo, xgft1_lo, xgft1_lo, 0x00 ; swapped to lo | lo + %ifidn PS,8 ; 64-bit code + vmovdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f} + ; " Bx{00}, Bx{10}, ..., Bx{f0} + vperm2i128 xgft2_hi, xgft2_lo, xgft2_lo, 0x11 ; swapped to hi | hi + vperm2i128 xgft2_lo, xgft2_lo, xgft2_lo, 0x00 ; swapped to lo | lo + + vmovdqu xgft3_lo, [tmp+vec*(64/PS)] ;Load array Cx{00}, Cx{01}, ..., Cx{0f} + ; " Cx{00}, Cx{10}, ..., Cx{f0} + vperm2i128 xgft3_hi, xgft3_lo, xgft3_lo, 0x11 ; swapped to hi | hi + vperm2i128 xgft3_lo, xgft3_lo, xgft3_lo, 0x00 ; swapped to lo | lo + + add tmp, 32 + add vec_i, PS + %endif + XLDR x0, [ptr+pos] ;Get next source vector + + vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft1_hi, xgft1_lo ;GF add high and low partials + vpxor xp1, xgft1_hi ;xp1 += partial + + %ifidn PS,4 ; 32-bit code + vmovdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f} + ; " Bx{00}, Bx{10}, ..., Bx{f0} + vperm2i128 xgft2_hi, xgft2_lo, xgft2_lo, 0x11 ; swapped to hi | hi + vperm2i128 xgft2_lo, xgft2_lo, xgft2_lo, 0x00 ; swapped to lo | lo + %endif + vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft2_hi, xgft2_lo ;GF add high and low partials + vpxor xp2, xgft2_hi ;xp2 += partial + + %ifidn PS,4 ; 32-bit code + sal vec, 1 + vmovdqu xgft3_lo, [tmp+vec*(32/PS)] ;Load array Cx{00}, Cx{01}, ..., Cx{0f} + ; " Cx{00}, Cx{10}, ..., Cx{f0} + vperm2i128 xgft3_hi, xgft3_lo, xgft3_lo, 0x11 ; swapped to hi | hi + vperm2i128 xgft3_lo, xgft3_lo, xgft3_lo, 0x00 ; swapped to lo | lo + sar vec, 1 + add tmp, 32 + add vec_i, PS + %endif + vpshufb xgft3_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft3_hi, xgft3_lo ;GF add high and low partials + vpxor xp3, xgft3_hi ;xp3 += partial + + cmp vec_i, vec + jl .next_vect + + SLDR dest1, dest1_m + SLDR dest2, dest2_m + XSTR [dest1+pos], xp1 + XSTR [dest2+pos], xp2 + SLDR dest3, dest3_m + XSTR [dest3+pos], xp3 + + SLDR len, len_m + add pos, 32 ;Loop on 32 bytes at a time + cmp pos, len + jle .loop32 + + lea tmp, [len + 32] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, len ;Overlapped offset length-16 + jmp .loop32 ;Do one more overlap pass + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame + +section .data + +;;; func core, ver, snum +slversion gf_3vect_dot_prod_avx2, 04, 05, 0197 diff --git a/src/isa-l/erasure_code/gf_3vect_dot_prod_avx512.asm b/src/isa-l/erasure_code/gf_3vect_dot_prod_avx512.asm new file mode 100644 index 000000000..057cd3730 --- /dev/null +++ b/src/isa-l/erasure_code/gf_3vect_dot_prod_avx512.asm @@ -0,0 +1,270 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_3vect_dot_prod_avx512(len, vec, *g_tbls, **buffs, **dests); +;;; + +%include "reg_sizes.asm" + +%ifdef HAVE_AS_KNOWS_AVX512 + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + + %define tmp r11 + %define tmp.w r11d + %define tmp.b r11b + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r12 ; must be saved and restored + %define return rax + %define PS 8 + %define LOG_PS 3 + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + push r13 + %endmacro + %macro FUNC_RESTORE 0 + pop r13 + pop r12 + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + + %define arg4 r12 ; must be saved, loaded and restored + %define arg5 r15 ; must be saved and restored + %define tmp r11 + %define tmp.w r11d + %define tmp.b r11b + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r14 ; must be saved and restored + %define return rax + %define PS 8 + %define LOG_PS 3 + %define stack_size 9*16 + 5*8 ; must be an odd multiple of 8 + %define arg(x) [rsp + stack_size + PS + PS*x] + + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size + vmovdqa [rsp + 0*16], xmm6 + vmovdqa [rsp + 1*16], xmm7 + vmovdqa [rsp + 2*16], xmm8 + vmovdqa [rsp + 3*16], xmm9 + vmovdqa [rsp + 4*16], xmm10 + vmovdqa [rsp + 5*16], xmm11 + vmovdqa [rsp + 6*16], xmm12 + vmovdqa [rsp + 7*16], xmm13 + vmovdqa [rsp + 8*16], xmm14 + save_reg r12, 9*16 + 0*8 + save_reg r13, 9*16 + 1*8 + save_reg r14, 9*16 + 2*8 + save_reg r15, 9*16 + 3*8 + end_prolog + mov arg4, arg(4) + %endmacro + + %macro FUNC_RESTORE 0 + vmovdqa xmm6, [rsp + 0*16] + vmovdqa xmm7, [rsp + 1*16] + vmovdqa xmm8, [rsp + 2*16] + vmovdqa xmm9, [rsp + 3*16] + vmovdqa xmm10, [rsp + 4*16] + vmovdqa xmm11, [rsp + 5*16] + vmovdqa xmm12, [rsp + 6*16] + vmovdqa xmm13, [rsp + 7*16] + vmovdqa xmm14, [rsp + 8*16] + mov r12, [rsp + 9*16 + 0*8] + mov r13, [rsp + 9*16 + 1*8] + mov r14, [rsp + 9*16 + 2*8] + mov r15, [rsp + 9*16 + 3*8] + add rsp, stack_size + %endmacro +%endif + + +%define len arg0 +%define vec arg1 +%define mul_array arg2 +%define src arg3 +%define dest1 arg4 +%define ptr arg5 +%define vec_i tmp2 +%define dest2 tmp3 +%define dest3 tmp4 +%define pos return + + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu8 + %define XSTR vmovdqu8 +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + +%define xmask0f zmm11 +%define xgft1_lo zmm10 +%define xgft1_loy ymm10 +%define xgft1_hi zmm9 +%define xgft2_lo zmm8 +%define xgft2_loy ymm8 +%define xgft2_hi zmm7 +%define xgft3_lo zmm6 +%define xgft3_loy ymm6 +%define xgft3_hi zmm5 + +%define x0 zmm0 +%define xtmpa zmm1 +%define xp1 zmm2 +%define xp2 zmm3 +%define xp3 zmm4 + +default rel +[bits 64] + +section .text + +align 16 +global gf_3vect_dot_prod_avx512:ISAL_SYM_TYPE_FUNCTION +func(gf_3vect_dot_prod_avx512) + FUNC_SAVE + sub len, 64 + jl .return_fail + + xor pos, pos + mov tmp, 0x0f + vpbroadcastb xmask0f, tmp ;Construct mask 0x0f0f0f... + sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS + mov dest2, [dest1+PS] + mov dest3, [dest1+2*PS] + mov dest1, [dest1] + +.loop64: + vpxorq xp1, xp1, xp1 + vpxorq xp2, xp2, xp2 + vpxorq xp3, xp3, xp3 + mov tmp, mul_array + xor vec_i, vec_i + +.next_vect: + mov ptr, [src+vec_i] + XLDR x0, [ptr+pos] ;Get next source vector + add vec_i, PS + + vpandq xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpandq x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + vmovdqu8 xgft1_loy, [tmp] ;Load array Ax{00}..{0f}, Ax{00}..{f0} + vmovdqu8 xgft2_loy, [tmp+vec*(32/PS)] ;Load array Bx{00}..{0f}, Bx{00}..{f0} + vmovdqu8 xgft3_loy, [tmp+vec*(64/PS)] ;Load array Cx{00}..{0f}, Cx{00}..{f0} + add tmp, 32 + + vshufi64x2 xgft1_hi, xgft1_lo, xgft1_lo, 0x55 + vshufi64x2 xgft1_lo, xgft1_lo, xgft1_lo, 0x00 + vshufi64x2 xgft2_hi, xgft2_lo, xgft2_lo, 0x55 + vshufi64x2 xgft2_lo, xgft2_lo, xgft2_lo, 0x00 + + vpshufb xgft1_hi, xgft1_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft1_lo, xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxorq xgft1_hi, xgft1_hi, xgft1_lo ;GF add high and low partials + vpxorq xp1, xp1, xgft1_hi ;xp1 += partial + + vpshufb xgft2_hi, xgft2_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft2_lo, xgft2_lo, xtmpa ;Lookup mul table of low nibble + vpxorq xgft2_hi, xgft2_hi, xgft2_lo ;GF add high and low partials + vpxorq xp2, xp2, xgft2_hi ;xp2 += partial + + vshufi64x2 xgft3_hi, xgft3_lo, xgft3_lo, 0x55 + vshufi64x2 xgft3_lo, xgft3_lo, xgft3_lo, 0x00 + + vpshufb xgft3_hi, xgft3_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft3_lo, xgft3_lo, xtmpa ;Lookup mul table of low nibble + vpxorq xgft3_hi, xgft3_hi, xgft3_lo ;GF add high and low partials + vpxorq xp3, xp3, xgft3_hi ;xp3 += partial + + cmp vec_i, vec + jl .next_vect + + XSTR [dest1+pos], xp1 + XSTR [dest2+pos], xp2 + XSTR [dest3+pos], xp3 + + add pos, 64 ;Loop on 64 bytes at a time + cmp pos, len + jle .loop64 + + lea tmp, [len + 64] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, len ;Overlapped offset length-64 + jmp .loop64 ;Do one more overlap pass + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame + +%else +%ifidn __OUTPUT_FORMAT__, win64 +global no_gf_3vect_dot_prod_avx512 +no_gf_3vect_dot_prod_avx512: +%endif +%endif ; ifdef HAVE_AS_KNOWS_AVX512 diff --git a/src/isa-l/erasure_code/gf_3vect_dot_prod_sse.asm b/src/isa-l/erasure_code/gf_3vect_dot_prod_sse.asm new file mode 100644 index 000000000..da0bdf920 --- /dev/null +++ b/src/isa-l/erasure_code/gf_3vect_dot_prod_sse.asm @@ -0,0 +1,378 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_3vect_dot_prod_sse(len, vec, *g_tbls, **buffs, **dests); +;;; + +%include "reg_sizes.asm" + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + + %define tmp r11 + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r12 ; must be saved and restored + %define return rax + %macro SLDR 2 + %endmacro + %define SSTR SLDR + %define PS 8 + %define LOG_PS 3 + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + push r13 + %endmacro + %macro FUNC_RESTORE 0 + pop r13 + pop r12 + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + + %define arg4 r12 ; must be saved, loaded and restored + %define arg5 r15 ; must be saved and restored + %define tmp r11 + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r14 ; must be saved and restored + %define return rax + %macro SLDR 2 + %endmacro + %define SSTR SLDR + %define PS 8 + %define LOG_PS 3 + %define stack_size 6*16 + 5*8 ; must be an odd multiple of 8 + %define arg(x) [rsp + stack_size + PS + PS*x] + + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size + save_xmm128 xmm6, 0*16 + save_xmm128 xmm7, 1*16 + save_xmm128 xmm8, 2*16 + save_xmm128 xmm9, 3*16 + save_xmm128 xmm10, 4*16 + save_xmm128 xmm11, 5*16 + save_reg r12, 6*16 + 0*8 + save_reg r13, 6*16 + 1*8 + save_reg r14, 6*16 + 2*8 + save_reg r15, 6*16 + 3*8 + end_prolog + mov arg4, arg(4) + %endmacro + + %macro FUNC_RESTORE 0 + movdqa xmm6, [rsp + 0*16] + movdqa xmm7, [rsp + 1*16] + movdqa xmm8, [rsp + 2*16] + movdqa xmm9, [rsp + 3*16] + movdqa xmm10, [rsp + 4*16] + movdqa xmm11, [rsp + 5*16] + mov r12, [rsp + 6*16 + 0*8] + mov r13, [rsp + 6*16 + 1*8] + mov r14, [rsp + 6*16 + 2*8] + mov r15, [rsp + 6*16 + 3*8] + add rsp, stack_size + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, elf32 + +;;;================== High Address; +;;; arg4 +;;; arg3 +;;; arg2 +;;; arg1 +;;; arg0 +;;; return +;;;<================= esp of caller +;;; ebp +;;;<================= ebp = esp +;;; var0 +;;; var1 +;;; esi +;;; edi +;;; ebx +;;;<================= esp of callee +;;; +;;;================== Low Address; + + %define PS 4 + %define LOG_PS 2 + %define func(x) x: + %define arg(x) [ebp + PS*2 + PS*x] + %define var(x) [ebp - PS - PS*x] + + %define trans ecx + %define trans2 esi + %define arg0 trans ;trans and trans2 are for the variables in stack + %define arg0_m arg(0) + %define arg1 ebx + %define arg2 arg2_m + %define arg2_m arg(2) + %define arg3 trans + %define arg3_m arg(3) + %define arg4 trans + %define arg4_m arg(4) + %define arg5 trans2 + %define tmp edx + %define tmp2 edi + %define tmp3 trans2 + %define tmp3_m var(0) + %define tmp4 trans2 + %define tmp4_m var(1) + %define return eax + %macro SLDR 2 ;; stack load/restore + mov %1, %2 + %endmacro + %define SSTR SLDR + + %macro FUNC_SAVE 0 + push ebp + mov ebp, esp + sub esp, PS*2 ;2 local variables + push esi + push edi + push ebx + mov arg1, arg(1) + %endmacro + + %macro FUNC_RESTORE 0 + pop ebx + pop edi + pop esi + add esp, PS*2 ;2 local variables + pop ebp + %endmacro + +%endif ; output formats + +%define len arg0 +%define vec arg1 +%define mul_array arg2 +%define src arg3 +%define dest1 arg4 +%define ptr arg5 + +%define vec_i tmp2 +%define dest2 tmp3 +%define dest3 tmp4 +%define pos return + + %ifidn PS,4 ;32-bit code + %define len_m arg0_m + %define src_m arg3_m + %define dest1_m arg4_m + %define dest2_m tmp3_m + %define dest3_m tmp4_m + %endif + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR movdqu + %define XSTR movdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR movdqa + %define XSTR movdqa + %else + %define XLDR movntdqa + %define XSTR movntdq + %endif +%endif + +%ifidn PS,8 ; 64-bit code + default rel + [bits 64] +%endif + + +section .text + +%ifidn PS,8 ;64-bit code + %define xmask0f xmm11 + %define xgft1_lo xmm2 + %define xgft1_hi xmm3 + %define xgft2_lo xmm4 + %define xgft2_hi xmm7 + %define xgft3_lo xmm6 + %define xgft3_hi xmm5 + + %define x0 xmm0 + %define xtmpa xmm1 + %define xp1 xmm10 + %define xp2 xmm9 + %define xp3 xmm8 +%else + %define xmask0f xmm7 + %define xgft1_lo xmm6 + %define xgft1_hi xmm5 + %define xgft2_lo xgft1_lo + %define xgft2_hi xgft1_hi + %define xgft3_lo xgft1_lo + %define xgft3_hi xgft1_hi + + %define x0 xmm0 + %define xtmpa xmm1 + %define xp1 xmm2 + %define xp2 xmm3 + %define xp3 xmm4 +%endif + +align 16 +global gf_3vect_dot_prod_sse:ISAL_SYM_TYPE_FUNCTION +func(gf_3vect_dot_prod_sse) + FUNC_SAVE + SLDR len, len_m + sub len, 16 + SSTR len_m, len + jl .return_fail + xor pos, pos + movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte + sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS + SLDR dest1, dest1_m + mov dest2, [dest1+PS] + SSTR dest2_m, dest2 + mov dest3, [dest1+2*PS] + SSTR dest3_m, dest3 + mov dest1, [dest1] + SSTR dest1_m, dest1 + +.loop16: + pxor xp1, xp1 + pxor xp2, xp2 + pxor xp3, xp3 + mov tmp, mul_array + xor vec_i, vec_i + +.next_vect: + SLDR src, src_m + mov ptr, [src+vec_i] + + movdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f} + movdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0} + %ifidn PS,8 ;64-bit code + movdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f} + movdqu xgft2_hi, [tmp+vec*(32/PS)+16] ; " Bx{00}, Bx{10}, ..., Bx{f0} + movdqu xgft3_lo, [tmp+vec*(64/PS)] ;Load array Cx{00}, Cx{01}, ..., Cx{0f} + movdqu xgft3_hi, [tmp+vec*(64/PS)+16] ; " Cx{00}, Cx{10}, ..., Cx{f0} + add tmp, 32 + add vec_i, PS + %endif + XLDR x0, [ptr+pos] ;Get next source vector + + movdqa xtmpa, x0 ;Keep unshifted copy of src + psraw x0, 4 ;Shift to put high nibble into bits 4-0 + pand x0, xmask0f ;Mask high src nibble in bits 4-0 + pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0 + + pshufb xgft1_hi, x0 ;Lookup mul table of high nibble + pshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble + pxor xgft1_hi, xgft1_lo ;GF add high and low partials + pxor xp1, xgft1_hi ;xp1 += partial + + %ifidn PS,4 ;32-bit code + movdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f} + movdqu xgft2_hi, [tmp+vec*(32/PS)+16] ; " Bx{00}, Bx{10}, ..., Bx{f0} + %endif + pshufb xgft2_hi, x0 ;Lookup mul table of high nibble + pshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble + pxor xgft2_hi, xgft2_lo ;GF add high and low partials + pxor xp2, xgft2_hi ;xp2 += partial + + %ifidn PS,4 ;32-bit code + sal vec, 1 + movdqu xgft3_lo, [tmp+vec*(32/PS)] ;Load array Cx{00}, Cx{01}, ..., Cx{0f} + movdqu xgft3_hi, [tmp+vec*(32/PS)+16] ; " Cx{00}, Cx{10}, ..., Cx{f0} + sar vec, 1 + add tmp, 32 + add vec_i, PS + %endif + pshufb xgft3_hi, x0 ;Lookup mul table of high nibble + pshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble + pxor xgft3_hi, xgft3_lo ;GF add high and low partials + pxor xp3, xgft3_hi ;xp3 += partial + + cmp vec_i, vec + jl .next_vect + + SLDR dest1, dest1_m + SLDR dest2, dest2_m + XSTR [dest1+pos], xp1 + XSTR [dest2+pos], xp2 + SLDR dest3, dest3_m + XSTR [dest3+pos], xp3 + + SLDR len, len_m + add pos, 16 ;Loop on 16 bytes at a time + cmp pos, len + jle .loop16 + + lea tmp, [len + 16] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, len ;Overlapped offset length-16 + jmp .loop16 ;Do one more overlap pass + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame + +section .data + +align 16 +mask0f: dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f + +;;; func core, ver, snum +slversion gf_3vect_dot_prod_sse, 00, 06, 0063 diff --git a/src/isa-l/erasure_code/gf_3vect_dot_prod_sse_test.c b/src/isa-l/erasure_code/gf_3vect_dot_prod_sse_test.c new file mode 100644 index 000000000..32609c757 --- /dev/null +++ b/src/isa-l/erasure_code/gf_3vect_dot_prod_sse_test.c @@ -0,0 +1,586 @@ +/********************************************************************** + Copyright(c) 2011-2015 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include // for memset, memcmp +#include "erasure_code.h" +#include "types.h" + +#ifndef FUNCTION_UNDER_TEST +# define FUNCTION_UNDER_TEST gf_3vect_dot_prod_sse +#endif +#ifndef TEST_MIN_SIZE +# define TEST_MIN_SIZE 16 +#endif + +#define str(s) #s +#define xstr(s) str(s) + +#define TEST_LEN 8192 +#define TEST_SIZE (TEST_LEN/2) +#define TEST_MEM TEST_SIZE +#define TEST_LOOPS 10000 +#define TEST_TYPE_STR "" + +#ifndef TEST_SOURCES +# define TEST_SOURCES 16 +#endif +#ifndef RANDOMS +# define RANDOMS 20 +#endif + +#ifdef EC_ALIGNED_ADDR +// Define power of 2 range to check ptr, len alignment +# define PTR_ALIGN_CHK_B 0 +# define LEN_ALIGN_CHK_B 0 // 0 for aligned only +#else +// Define power of 2 range to check ptr, len alignment +# define PTR_ALIGN_CHK_B 32 +# define LEN_ALIGN_CHK_B 32 // 0 for aligned only +#endif + +typedef unsigned char u8; + +extern void FUNCTION_UNDER_TEST(int len, int vlen, unsigned char *gftbls, + unsigned char **src, unsigned char **dest); + +void dump(unsigned char *buf, int len) +{ + int i; + for (i = 0; i < len;) { + printf(" %2x", 0xff & buf[i++]); + if (i % 32 == 0) + printf("\n"); + } + printf("\n"); +} + +void dump_matrix(unsigned char **s, int k, int m) +{ + int i, j; + for (i = 0; i < k; i++) { + for (j = 0; j < m; j++) { + printf(" %2x", s[i][j]); + } + printf("\n"); + } + printf("\n"); +} + +void dump_u8xu8(unsigned char *s, int k, int m) +{ + int i, j; + for (i = 0; i < k; i++) { + for (j = 0; j < m; j++) { + printf(" %2x", 0xff & s[j + (i * m)]); + } + printf("\n"); + } + printf("\n"); +} + +int main(int argc, char *argv[]) +{ + int i, j, rtest, srcs; + void *buf; + u8 g1[TEST_SOURCES], g2[TEST_SOURCES], g3[TEST_SOURCES]; + u8 g_tbls[3 * TEST_SOURCES * 32], *dest_ptrs[3], *buffs[TEST_SOURCES]; + u8 *dest1, *dest2, *dest3, *dest_ref1, *dest_ref2, *dest_ref3; + + int align, size; + unsigned char *efence_buffs[TEST_SOURCES]; + unsigned int offset; + u8 *ubuffs[TEST_SOURCES]; + u8 *udest_ptrs[3]; + printf(xstr(FUNCTION_UNDER_TEST) "_test: %dx%d ", TEST_SOURCES, TEST_LEN); + + // Allocate the arrays + for (i = 0; i < TEST_SOURCES; i++) { + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + buffs[i] = buf; + } + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest1 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest2 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest3 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest_ref1 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail");; + return -1; + } + dest_ref2 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest_ref3 = buf; + + dest_ptrs[0] = dest1; + dest_ptrs[1] = dest2; + dest_ptrs[2] = dest3; + + // Test of all zeros + for (i = 0; i < TEST_SOURCES; i++) + memset(buffs[i], 0, TEST_LEN); + + memset(dest1, 0, TEST_LEN); + memset(dest2, 0, TEST_LEN); + memset(dest3, 0, TEST_LEN); + memset(dest_ref1, 0, TEST_LEN); + memset(dest_ref2, 0, TEST_LEN); + memset(dest_ref3, 0, TEST_LEN); + memset(g1, 2, TEST_SOURCES); + memset(g2, 1, TEST_SOURCES); + memset(g3, 7, TEST_SOURCES); + + for (i = 0; i < TEST_SOURCES; i++) { + gf_vect_mul_init(g1[i], &g_tbls[i * 32]); + gf_vect_mul_init(g2[i], &g_tbls[32 * TEST_SOURCES + i * 32]); + gf_vect_mul_init(g3[i], &g_tbls[64 * TEST_SOURCES + i * 32]); + } + + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1); + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], buffs, + dest_ref2); + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], buffs, + dest_ref3); + + FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs); + + if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) { + printf("Fail zero" xstr(FUNCTION_UNDER_TEST) " test1\n"); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref1, 25); + printf("dprod_dut:"); + dump(dest1, 25); + return -1; + } + if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) { + printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test2\n"); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref2, 25); + printf("dprod_dut:"); + dump(dest2, 25); + return -1; + } + if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) { + printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test3\n"); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref3, 25); + printf("dprod_dut:"); + dump(dest3, 25); + return -1; + } + + putchar('.'); + + // Rand data test + + for (rtest = 0; rtest < RANDOMS; rtest++) { + for (i = 0; i < TEST_SOURCES; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + for (i = 0; i < TEST_SOURCES; i++) { + g1[i] = rand(); + g2[i] = rand(); + g3[i] = rand(); + } + + for (i = 0; i < TEST_SOURCES; i++) { + gf_vect_mul_init(g1[i], &g_tbls[i * 32]); + gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]); + gf_vect_mul_init(g3[i], &g_tbls[(64 * TEST_SOURCES) + (i * 32)]); + } + + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1); + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], + buffs, dest_ref2); + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], + buffs, dest_ref3); + + FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs); + + if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref1, 25); + printf("dprod_dut:"); + dump(dest1, 25); + return -1; + } + if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref2, 25); + printf("dprod_dut:"); + dump(dest2, 25); + return -1; + } + if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test3 %d\n", rtest); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref3, 25); + printf("dprod_dut:"); + dump(dest3, 25); + return -1; + } + + putchar('.'); + } + + // Rand data test with varied parameters + for (rtest = 0; rtest < RANDOMS; rtest++) { + for (srcs = TEST_SOURCES; srcs > 0; srcs--) { + for (i = 0; i < srcs; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + for (i = 0; i < srcs; i++) { + g1[i] = rand(); + g2[i] = rand(); + g3[i] = rand(); + } + + for (i = 0; i < srcs; i++) { + gf_vect_mul_init(g1[i], &g_tbls[i * 32]); + gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]); + gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]); + } + + gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[0], buffs, dest_ref1); + gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[32 * srcs], buffs, + dest_ref2); + gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[64 * srcs], buffs, + dest_ref3); + + FUNCTION_UNDER_TEST(TEST_LEN, srcs, g_tbls, buffs, dest_ptrs); + + if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) + " test1 srcs=%d\n", srcs); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref1, 25); + printf("dprod_dut:"); + dump(dest1, 25); + return -1; + } + if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) + " test2 srcs=%d\n", srcs); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref2, 25); + printf("dprod_dut:"); + dump(dest2, 25); + return -1; + } + if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) + " test3 srcs=%d\n", srcs); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref3, 25); + printf("dprod_dut:"); + dump(dest3, 25); + return -1; + } + + putchar('.'); + } + } + + // Run tests at end of buffer for Electric Fence + align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16; + for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) { + for (i = 0; i < TEST_SOURCES; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + for (i = 0; i < TEST_SOURCES; i++) // Line up TEST_SIZE from end + efence_buffs[i] = buffs[i] + TEST_LEN - size; + + for (i = 0; i < TEST_SOURCES; i++) { + g1[i] = rand(); + g2[i] = rand(); + g3[i] = rand(); + } + + for (i = 0; i < TEST_SOURCES; i++) { + gf_vect_mul_init(g1[i], &g_tbls[i * 32]); + gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]); + gf_vect_mul_init(g3[i], &g_tbls[(64 * TEST_SOURCES) + (i * 32)]); + } + + gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[0], efence_buffs, dest_ref1); + gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], + efence_buffs, dest_ref2); + gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], + efence_buffs, dest_ref3); + + FUNCTION_UNDER_TEST(size, TEST_SOURCES, g_tbls, efence_buffs, dest_ptrs); + + if (0 != memcmp(dest_ref1, dest1, size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest); + dump_matrix(efence_buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref1, align); + printf("dprod_dut:"); + dump(dest1, align); + return -1; + } + + if (0 != memcmp(dest_ref2, dest2, size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest); + dump_matrix(efence_buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref2, align); + printf("dprod_dut:"); + dump(dest2, align); + return -1; + } + + if (0 != memcmp(dest_ref3, dest3, size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test3 %d\n", rtest); + dump_matrix(efence_buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref3, align); + printf("dprod_dut:"); + dump(dest3, align); + return -1; + } + + putchar('.'); + } + + // Test rand ptr alignment if available + + for (rtest = 0; rtest < RANDOMS; rtest++) { + size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1); + srcs = rand() % TEST_SOURCES; + if (srcs == 0) + continue; + + offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B; + // Add random offsets + for (i = 0; i < srcs; i++) + ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset)); + + udest_ptrs[0] = dest1 + (rand() & (PTR_ALIGN_CHK_B - offset)); + udest_ptrs[1] = dest2 + (rand() & (PTR_ALIGN_CHK_B - offset)); + udest_ptrs[2] = dest3 + (rand() & (PTR_ALIGN_CHK_B - offset)); + + memset(dest1, 0, TEST_LEN); // zero pad to check write-over + memset(dest2, 0, TEST_LEN); + memset(dest3, 0, TEST_LEN); + + for (i = 0; i < srcs; i++) + for (j = 0; j < size; j++) + ubuffs[i][j] = rand(); + + for (i = 0; i < srcs; i++) { + g1[i] = rand(); + g2[i] = rand(); + g3[i] = rand(); + } + + for (i = 0; i < srcs; i++) { + gf_vect_mul_init(g1[i], &g_tbls[i * 32]); + gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]); + gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]); + } + + gf_vect_dot_prod_base(size, srcs, &g_tbls[0], ubuffs, dest_ref1); + gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], ubuffs, dest_ref2); + gf_vect_dot_prod_base(size, srcs, &g_tbls[64 * srcs], ubuffs, dest_ref3); + + FUNCTION_UNDER_TEST(size, srcs, g_tbls, ubuffs, udest_ptrs); + + if (memcmp(dest_ref1, udest_ptrs[0], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", + srcs); + dump_matrix(ubuffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref1, 25); + printf("dprod_dut:"); + dump(udest_ptrs[0], 25); + return -1; + } + if (memcmp(dest_ref2, udest_ptrs[1], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", + srcs); + dump_matrix(ubuffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref2, 25); + printf("dprod_dut:"); + dump(udest_ptrs[1], 25); + return -1; + } + if (memcmp(dest_ref3, udest_ptrs[2], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", + srcs); + dump_matrix(ubuffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref3, 25); + printf("dprod_dut:"); + dump(udest_ptrs[2], 25); + return -1; + } + // Confirm that padding around dests is unchanged + memset(dest_ref1, 0, PTR_ALIGN_CHK_B); // Make reference zero buff + offset = udest_ptrs[0] - dest1; + + if (memcmp(dest1, dest_ref1, offset)) { + printf("Fail rand ualign pad1 start\n"); + return -1; + } + if (memcmp(dest1 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { + printf("Fail rand ualign pad1 end\n"); + return -1; + } + + offset = udest_ptrs[1] - dest2; + if (memcmp(dest2, dest_ref1, offset)) { + printf("Fail rand ualign pad2 start\n"); + return -1; + } + if (memcmp(dest2 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { + printf("Fail rand ualign pad2 end\n"); + return -1; + } + + offset = udest_ptrs[2] - dest3; + if (memcmp(dest3, dest_ref1, offset)) { + printf("Fail rand ualign pad3 start\n"); + return -1; + } + if (memcmp(dest3 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { + printf("Fail rand ualign pad3 end\n");; + return -1; + } + + putchar('.'); + } + + // Test all size alignment + align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16; + + for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) { + srcs = TEST_SOURCES; + + for (i = 0; i < srcs; i++) + for (j = 0; j < size; j++) + buffs[i][j] = rand(); + + for (i = 0; i < srcs; i++) { + g1[i] = rand(); + g2[i] = rand(); + g3[i] = rand(); + } + + for (i = 0; i < srcs; i++) { + gf_vect_mul_init(g1[i], &g_tbls[i * 32]); + gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]); + gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]); + } + + gf_vect_dot_prod_base(size, srcs, &g_tbls[0], buffs, dest_ref1); + gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], buffs, dest_ref2); + gf_vect_dot_prod_base(size, srcs, &g_tbls[64 * srcs], buffs, dest_ref3); + + FUNCTION_UNDER_TEST(size, srcs, g_tbls, buffs, dest_ptrs); + + if (memcmp(dest_ref1, dest_ptrs[0], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", + size); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref1, 25); + printf("dprod_dut:"); + dump(dest_ptrs[0], 25); + return -1; + } + if (memcmp(dest_ref2, dest_ptrs[1], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", + size); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref2, 25); + printf("dprod_dut:"); + dump(dest_ptrs[1], 25); + return -1; + } + if (memcmp(dest_ref3, dest_ptrs[2], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", + size); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref3, 25); + printf("dprod_dut:"); + dump(dest_ptrs[2], 25); + return -1; + } + } + + printf("Pass\n"); + return 0; + +} diff --git a/src/isa-l/erasure_code/gf_3vect_mad_avx.asm b/src/isa-l/erasure_code/gf_3vect_mad_avx.asm new file mode 100644 index 000000000..1f40eb780 --- /dev/null +++ b/src/isa-l/erasure_code/gf_3vect_mad_avx.asm @@ -0,0 +1,288 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_3vect_mad_avx(len, vec, vec_i, mul_array, src, dest); +;;; + +%include "reg_sizes.asm" + +%define PS 8 + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg0.w ecx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + %define arg4 r12 + %define arg5 r15 + %define tmp r11 + %define return rax + %define return.w eax + %define stack_size 16*10 + 3*8 + %define arg(x) [rsp + stack_size + PS + PS*x] + %define func(x) proc_frame x + +%macro FUNC_SAVE 0 + sub rsp, stack_size + vmovdqa [rsp+16*0],xmm6 + vmovdqa [rsp+16*1],xmm7 + vmovdqa [rsp+16*2],xmm8 + vmovdqa [rsp+16*3],xmm9 + vmovdqa [rsp+16*4],xmm10 + vmovdqa [rsp+16*5],xmm11 + vmovdqa [rsp+16*6],xmm12 + vmovdqa [rsp+16*7],xmm13 + vmovdqa [rsp+16*8],xmm14 + vmovdqa [rsp+16*9],xmm15 + save_reg r12, 10*16 + 0*8 + save_reg r15, 10*16 + 1*8 + end_prolog + mov arg4, arg(4) + mov arg5, arg(5) +%endmacro + +%macro FUNC_RESTORE 0 + vmovdqa xmm6, [rsp+16*0] + vmovdqa xmm7, [rsp+16*1] + vmovdqa xmm8, [rsp+16*2] + vmovdqa xmm9, [rsp+16*3] + vmovdqa xmm10, [rsp+16*4] + vmovdqa xmm11, [rsp+16*5] + vmovdqa xmm12, [rsp+16*6] + vmovdqa xmm13, [rsp+16*7] + vmovdqa xmm14, [rsp+16*8] + vmovdqa xmm15, [rsp+16*9] + mov r12, [rsp + 10*16 + 0*8] + mov r15, [rsp + 10*16 + 1*8] + add rsp, stack_size +%endmacro + +%elifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg0.w edi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + %define tmp r11 + %define return rax + %define return.w eax + + %define func(x) x: + %define FUNC_SAVE + %define FUNC_RESTORE +%endif + +;;; gf_3vect_mad_avx(len, vec, vec_i, mul_array, src, dest) +%define len arg0 +%define len.w arg0.w +%define vec arg1 +%define vec_i arg2 +%define mul_array arg3 +%define src arg4 +%define dest1 arg5 +%define pos return +%define pos.w return.w + +%define dest2 mul_array +%define dest3 vec_i + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu + %define XSTR vmovdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + + +default rel + +[bits 64] +section .text + +%define xmask0f xmm15 +%define xgft1_lo xmm14 +%define xgft1_hi xmm13 +%define xgft2_lo xmm12 +%define xgft2_hi xmm11 +%define xgft3_lo xmm10 +%define xgft3_hi xmm9 + +%define x0 xmm0 +%define xtmpa xmm1 +%define xtmph1 xmm2 +%define xtmpl1 xmm3 +%define xtmph2 xmm4 +%define xtmpl2 xmm5 +%define xtmph3 xmm6 +%define xtmpl3 xmm7 +%define xd1 xmm8 +%define xd2 xtmpl1 +%define xd3 xtmph1 + +align 16 +global gf_3vect_mad_avx:ISAL_SYM_TYPE_FUNCTION +func(gf_3vect_mad_avx) + FUNC_SAVE + sub len, 16 + jl .return_fail + xor pos, pos + vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte + + sal vec_i, 5 ;Multiply by 32 + sal vec, 5 + lea tmp, [mul_array + vec_i] + vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, Ax{02}, ... + vmovdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0} + vmovdqu xgft2_lo, [tmp+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ... + vmovdqu xgft2_hi, [tmp+vec+16] ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0} + vmovdqu xgft3_lo, [tmp+2*vec] ;Load array Cx{00}, Cx{01}, Cx{02}, ... + vmovdqu xgft3_hi, [tmp+2*vec+16]; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0} + mov dest2, [dest1+PS] ; reuse mul_array + mov dest3, [dest1+2*PS] ; reuse vec_i + mov dest1, [dest1] + +.loop16: + XLDR x0, [src+pos] ;Get next source vector + XLDR xd1, [dest1+pos] ;Get next dest vector + + vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + ; dest1 + vpshufb xtmph1, xgft1_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl1, xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph1, xtmph1, xtmpl1 ;GF add high and low partials + vpxor xd1, xd1, xtmph1 ;xd1 += partial + + XLDR xd2, [dest2+pos] ;reuse xtmpl1. Get next dest vector + XLDR xd3, [dest3+pos] ;reuse xtmph1. Get next dest vector + + ; dest2 + vpshufb xtmph2, xgft2_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl2, xgft2_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph2, xtmph2, xtmpl2 ;GF add high and low partials + vpxor xd2, xd2, xtmph2 ;xd2 += partial + + ; dest3 + vpshufb xtmph3, xgft3_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl3, xgft3_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph3, xtmph3, xtmpl3 ;GF add high and low partials + vpxor xd3, xd3, xtmph3 ;xd3 += partial + + XSTR [dest1+pos], xd1 + XSTR [dest2+pos], xd2 + XSTR [dest3+pos], xd3 + + add pos, 16 ;Loop on 16 bytes at a time + cmp pos, len + jle .loop16 + + lea tmp, [len + 16] + cmp pos, tmp + je .return_pass + +.lessthan16: + ;; Tail len + ;; Do one more overlap pass + mov tmp, len ;Overlapped offset length-16 + XLDR x0, [src+tmp] ;Get next source vector + XLDR xd1, [dest1+tmp] ;Get next dest vector + XLDR xd2, [dest2+tmp] ;reuse xtmpl1. Get next dest vector + XLDR xd3, [dest3+tmp] ;reuse xtmph1. Get next dest vector + + sub len, pos + + movdqa xtmph3, [constip16] ;Load const of i + 16 + vpinsrb xtmpl3, xtmpl3, len.w, 15 + vpshufb xtmpl3, xtmpl3, xmask0f ;Broadcast len to all bytes + vpcmpgtb xtmpl3, xtmpl3, xtmph3 + + vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + ; dest1 + vpshufb xgft1_hi, xgft1_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft1_lo, xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft1_hi, xgft1_hi, xgft1_lo ;GF add high and low partials + vpand xgft1_hi, xgft1_hi, xtmpl3 + vpxor xd1, xd1, xgft1_hi + + ; dest2 + vpshufb xgft2_hi, xgft2_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft2_lo, xgft2_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft2_hi, xgft2_hi, xgft2_lo ;GF add high and low partials + vpand xgft2_hi, xgft2_hi, xtmpl3 + vpxor xd2, xd2, xgft2_hi + + ; dest3 + vpshufb xgft3_hi, xgft3_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft3_lo, xgft3_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft3_hi, xgft3_hi, xgft3_lo ;GF add high and low partials + vpand xgft3_hi, xgft3_hi, xtmpl3 + vpxor xd3, xd3, xgft3_hi + + XSTR [dest1+tmp], xd1 + XSTR [dest2+tmp], xd2 + XSTR [dest3+tmp], xd3 + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame + +section .data + +align 16 +mask0f: dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f +constip16: + dq 0xf8f9fafbfcfdfeff, 0xf0f1f2f3f4f5f6f7 + +;;; func core, ver, snum +slversion gf_3vect_mad_avx, 02, 01, 0207 diff --git a/src/isa-l/erasure_code/gf_3vect_mad_avx2.asm b/src/isa-l/erasure_code/gf_3vect_mad_avx2.asm new file mode 100644 index 000000000..0b3666171 --- /dev/null +++ b/src/isa-l/erasure_code/gf_3vect_mad_avx2.asm @@ -0,0 +1,317 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_3vect_mad_avx2(len, vec, vec_i, mul_array, src, dest); +;;; + +%include "reg_sizes.asm" + +%define PS 8 + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg0.w ecx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + %define arg4 r12 ; must be saved, loaded and restored + %define arg5 r15 ; must be saved and restored + + %define tmp r11 + %define tmp.w r11d + %define tmp.b r11b + %define return rax + %define return.w eax + %define stack_size 16*10 + 3*8 + %define arg(x) [rsp + stack_size + PS + PS*x] + %define func(x) proc_frame x + + %macro FUNC_SAVE 0 + sub rsp, stack_size + vmovdqa [rsp+16*0],xmm6 + vmovdqa [rsp+16*1],xmm7 + vmovdqa [rsp+16*2],xmm8 + vmovdqa [rsp+16*3],xmm9 + vmovdqa [rsp+16*4],xmm10 + vmovdqa [rsp+16*5],xmm11 + vmovdqa [rsp+16*6],xmm12 + vmovdqa [rsp+16*7],xmm13 + vmovdqa [rsp+16*8],xmm14 + vmovdqa [rsp+16*9],xmm15 + save_reg r12, 10*16 + 0*8 + save_reg r15, 10*16 + 1*8 + end_prolog + mov arg4, arg(4) + mov arg5, arg(5) + %endmacro + + %macro FUNC_RESTORE 0 + vmovdqa xmm6, [rsp+16*0] + vmovdqa xmm7, [rsp+16*1] + vmovdqa xmm8, [rsp+16*2] + vmovdqa xmm9, [rsp+16*3] + vmovdqa xmm10, [rsp+16*4] + vmovdqa xmm11, [rsp+16*5] + vmovdqa xmm12, [rsp+16*6] + vmovdqa xmm13, [rsp+16*7] + vmovdqa xmm14, [rsp+16*8] + vmovdqa xmm15, [rsp+16*9] + mov r12, [rsp + 10*16 + 0*8] + mov r15, [rsp + 10*16 + 1*8] + add rsp, stack_size + %endmacro + +%elifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg0.w edi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + + %define tmp r11 + %define tmp.w r11d + %define tmp.b r11b + %define return rax + %define return.w eax + + %define func(x) x: + %define FUNC_SAVE + %define FUNC_RESTORE +%endif + +;;; gf_3vect_mad_avx2(len, vec, vec_i, mul_array, src, dest) +%define len arg0 +%define len.w arg0.w +%define vec arg1 +%define vec_i arg2 +%define mul_array arg3 +%define src arg4 +%define dest1 arg5 +%define pos return +%define pos.w return.w + +%define dest2 mul_array +%define dest3 vec_i + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu + %define XSTR vmovdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + + +default rel + +[bits 64] +section .text + +%define xmask0f ymm15 +%define xmask0fx xmm15 +%define xgft1_lo ymm14 +%define xgft1_hi ymm13 +%define xgft2_lo ymm12 +%define xgft3_lo ymm11 + +%define x0 ymm0 +%define xtmpa ymm1 +%define xtmph1 ymm2 +%define xtmpl1 ymm3 +%define xtmph2 ymm4 +%define xtmpl2 ymm5 +%define xtmpl2x xmm5 +%define xtmph3 ymm6 +%define xtmpl3 ymm7 +%define xtmpl3x xmm7 +%define xd1 ymm8 +%define xd2 ymm9 +%define xd3 ymm10 + +align 16 +global gf_3vect_mad_avx2:ISAL_SYM_TYPE_FUNCTION +func(gf_3vect_mad_avx2) + FUNC_SAVE + sub len, 32 + jl .return_fail + xor pos, pos + mov tmp.b, 0x0f + vpinsrb xmask0fx, xmask0fx, tmp.w, 0 + vpbroadcastb xmask0f, xmask0fx ;Construct mask 0x0f0f0f... + + sal vec_i, 5 ;Multiply by 32 + sal vec, 5 + lea tmp, [mul_array + vec_i] + + vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f} + ; " Ax{00}, Ax{10}, ..., Ax{f0} + vperm2i128 xgft1_hi, xgft1_lo, xgft1_lo, 0x11 ; swapped to hi | hi + vperm2i128 xgft1_lo, xgft1_lo, xgft1_lo, 0x00 ; swapped to lo | lo + + vmovdqu xgft2_lo, [tmp+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ... + ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0} + vmovdqu xgft3_lo, [tmp+2*vec] ;Load array Cx{00}, Cx{01}, Cx{02}, ... + ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0} + mov dest2, [dest1+PS] ; reuse mul_array + mov dest3, [dest1+2*PS] ; reuse vec_i + mov dest1, [dest1] + +.loop32: + XLDR x0, [src+pos] ;Get next source vector + XLDR xd1, [dest1+pos] ;Get next dest vector + XLDR xd2, [dest2+pos] ;Get next dest vector + XLDR xd3, [dest3+pos] ;Get next dest vector + vperm2i128 xtmph2, xgft2_lo, xgft2_lo, 0x11 ; swapped to hi | hi + vperm2i128 xtmpl2, xgft2_lo, xgft2_lo, 0x00 ; swapped to lo | lo + + vperm2i128 xtmph3, xgft3_lo, xgft3_lo, 0x11 ; swapped to hi | hi + vperm2i128 xtmpl3, xgft3_lo, xgft3_lo, 0x00 ; swapped to lo | lo + + vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + ; dest1 + vpshufb xtmph1, xgft1_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl1, xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph1, xtmph1, xtmpl1 ;GF add high and low partials + vpxor xd1, xd1, xtmph1 ;xd1 += partial + + ; dest2 + vpshufb xtmph2, x0 ;Lookup mul table of high nibble + vpshufb xtmpl2, xtmpa ;Lookup mul table of low nibble + vpxor xtmph2, xtmpl2 ;GF add high and low partials + vpxor xd2, xtmph2 ;xd2 += partial + + ; dest3 + vpshufb xtmph3, x0 ;Lookup mul table of high nibble + vpshufb xtmpl3, xtmpa ;Lookup mul table of low nibble + vpxor xtmph3, xtmpl3 ;GF add high and low partials + vpxor xd3, xtmph3 ;xd3 += partial + + XSTR [dest1+pos], xd1 + XSTR [dest2+pos], xd2 + XSTR [dest3+pos], xd3 + + add pos, 32 ;Loop on 32 bytes at a time + cmp pos, len + jle .loop32 + + lea tmp, [len + 32] + cmp pos, tmp + je .return_pass + +.lessthan32: + ;; Tail len + ;; Do one more overlap pass + mov tmp.b, 0x1f + vpinsrb xtmpl2x, xtmpl2x, tmp.w, 0 + vpbroadcastb xtmpl2, xtmpl2x ;Construct mask 0x1f1f1f... + + mov tmp, len ;Overlapped offset length-32 + + XLDR x0, [src+tmp] ;Get next source vector + XLDR xd1, [dest1+tmp] ;Get next dest vector + XLDR xd2, [dest2+tmp] ;Get next dest vector + XLDR xd3, [dest3+tmp] ;Get next dest vector + + sub len, pos + + vmovdqa xtmph3, [constip32] ;Load const of i + 32 + vpinsrb xtmpl3x, xtmpl3x, len.w, 15 + vinserti128 xtmpl3, xtmpl3, xtmpl3x, 1 ;swapped to xtmpl3x | xtmpl3x + vpshufb xtmpl3, xtmpl3, xtmpl2 ;Broadcast len to all bytes. xtmpl2=0x1f1f1f... + vpcmpgtb xtmpl3, xtmpl3, xtmph3 + + vperm2i128 xtmph2, xgft2_lo, xgft2_lo, 0x11 ; swapped to hi | hi + vperm2i128 xgft2_lo, xgft2_lo, xgft2_lo, 0x00 ; swapped to lo | lo + + vperm2i128 xtmph3, xgft3_lo, xgft3_lo, 0x11 ; swapped to hi | hi + vperm2i128 xgft3_lo, xgft3_lo, xgft3_lo, 0x00 ; swapped to lo | lo + + vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + ; dest1 + vpshufb xtmph1, xgft1_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl1, xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph1, xtmph1, xtmpl1 ;GF add high and low partials + vpand xtmph1, xtmph1, xtmpl3 + vpxor xd1, xd1, xtmph1 ;xd1 += partial + + ; dest2 + vpshufb xtmph2, xtmph2, x0 ;Lookup mul table of high nibble + vpshufb xgft2_lo, xgft2_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph2, xtmph2, xgft2_lo ;GF add high and low partials + vpand xtmph2, xtmph2, xtmpl3 + vpxor xd2, xd2, xtmph2 ;xd2 += partial + + ; dest3 + vpshufb xtmph3, xtmph3, x0 ;Lookup mul table of high nibble + vpshufb xgft3_lo, xgft3_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph3, xtmph3, xgft3_lo ;GF add high and low partials + vpand xtmph3, xtmph3, xtmpl3 + vpxor xd3, xd3, xtmph3 ;xd3 += partial + + XSTR [dest1+tmp], xd1 + XSTR [dest2+tmp], xd2 + XSTR [dest3+tmp], xd3 + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame + +section .data + +align 32 +constip32: + dq 0xf8f9fafbfcfdfeff, 0xf0f1f2f3f4f5f6f7 + dq 0xe8e9eaebecedeeef, 0xe0e1e2e3e4e5e6e7 + +;;; func core, ver, snum +slversion gf_3vect_mad_avx2, 04, 01, 0208 diff --git a/src/isa-l/erasure_code/gf_3vect_mad_avx512.asm b/src/isa-l/erasure_code/gf_3vect_mad_avx512.asm new file mode 100644 index 000000000..dcafbc791 --- /dev/null +++ b/src/isa-l/erasure_code/gf_3vect_mad_avx512.asm @@ -0,0 +1,247 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_3vect_mad_avx512(len, vec, vec_i, mul_array, src, dest); +;;; + +%include "reg_sizes.asm" + +%ifdef HAVE_AS_KNOWS_AVX512 + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + %define tmp r11 + %define return rax + %define func(x) x: + %define FUNC_SAVE + %define FUNC_RESTORE +%endif + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + %define arg4 r12 ; must be saved, loaded and restored + %define arg5 r15 ; must be saved and restored + %define tmp r11 + %define return rax + %define return.w eax + %define stack_size 16*10 + 3*8 + %define arg(x) [rsp + stack_size + PS + PS*x] + %define func(x) proc_frame x + + %macro FUNC_SAVE 0 + sub rsp, stack_size + vmovdqa [rsp+16*0],xmm6 + vmovdqa [rsp+16*1],xmm7 + vmovdqa [rsp+16*2],xmm8 + vmovdqa [rsp+16*3],xmm9 + vmovdqa [rsp+16*4],xmm10 + vmovdqa [rsp+16*5],xmm11 + vmovdqa [rsp+16*6],xmm12 + vmovdqa [rsp+16*7],xmm13 + vmovdqa [rsp+16*8],xmm14 + vmovdqa [rsp+16*9],xmm15 + save_reg r12, 10*16 + 0*8 + save_reg r15, 10*16 + 1*8 + end_prolog + mov arg4, arg(4) + mov arg5, arg(5) + %endmacro + + %macro FUNC_RESTORE 0 + vmovdqa xmm6, [rsp+16*0] + vmovdqa xmm7, [rsp+16*1] + vmovdqa xmm8, [rsp+16*2] + vmovdqa xmm9, [rsp+16*3] + vmovdqa xmm10, [rsp+16*4] + vmovdqa xmm11, [rsp+16*5] + vmovdqa xmm12, [rsp+16*6] + vmovdqa xmm13, [rsp+16*7] + vmovdqa xmm14, [rsp+16*8] + vmovdqa xmm15, [rsp+16*9] + mov r12, [rsp + 10*16 + 0*8] + mov r15, [rsp + 10*16 + 1*8] + add rsp, stack_size + %endmacro +%endif + +%define PS 8 +%define len arg0 +%define vec arg1 +%define vec_i arg2 +%define mul_array arg3 +%define src arg4 +%define dest1 arg5 +%define pos return +%define dest2 mul_array +%define dest3 vec_i + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu8 + %define XSTR vmovdqu8 +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + +default rel +[bits 64] +section .text + +%define x0 zmm0 +%define xtmpa zmm1 +%define xtmph1 zmm2 +%define xtmpl1 zmm3 +%define xtmph2 zmm4 +%define xtmpl2 zmm5 +%define xtmph3 zmm6 +%define xtmpl3 zmm7 +%define xgft1_hi zmm8 +%define xgft1_lo zmm9 +%define xgft1_loy ymm9 +%define xgft2_hi zmm10 +%define xgft2_lo zmm11 +%define xgft2_loy ymm11 +%define xgft3_hi zmm12 +%define xgft3_lo zmm13 +%define xgft3_loy ymm13 +%define xd1 zmm14 +%define xd2 zmm15 +%define xd3 zmm16 +%define xmask0f zmm17 + +align 16 +global gf_3vect_mad_avx512:ISAL_SYM_TYPE_FUNCTION +func(gf_3vect_mad_avx512) + FUNC_SAVE + sub len, 64 + jl .return_fail + xor pos, pos + mov tmp, 0x0f + vpbroadcastb xmask0f, tmp ;Construct mask 0x0f0f0f... + sal vec_i, 5 ;Multiply by 32 + sal vec, 5 + lea tmp, [mul_array + vec_i] + vmovdqu xgft1_loy, [tmp] ;Load array Ax{00}..{0f}, Ax{00}..{f0} + vmovdqu xgft2_loy, [tmp+vec] ;Load array Bx{00}..{0f}, Bx{00}..{f0} + vmovdqu xgft3_loy, [tmp+2*vec] ;Load array Cx{00}..{0f}, Cx{00}..{f0} + vshufi64x2 xgft1_hi, xgft1_lo, xgft1_lo, 0x55 + vshufi64x2 xgft1_lo, xgft1_lo, xgft1_lo, 0x00 + vshufi64x2 xgft2_hi, xgft2_lo, xgft2_lo, 0x55 + vshufi64x2 xgft2_lo, xgft2_lo, xgft2_lo, 0x00 + vshufi64x2 xgft3_hi, xgft3_lo, xgft3_lo, 0x55 + vshufi64x2 xgft3_lo, xgft3_lo, xgft3_lo, 0x00 + mov dest2, [dest1+PS] ; reuse mul_array + mov dest3, [dest1+2*PS] ; reuse vec_i + mov dest1, [dest1] + mov tmp, -1 + kmovq k1, tmp + +.loop64: + XLDR x0, [src+pos] ;Get next source vector + XLDR xd1, [dest1+pos] ;Get next dest vector + XLDR xd2, [dest2+pos] ;Get next dest vector + XLDR xd3, [dest3+pos] ;Get next dest vector + + vpandq xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpandq x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + ; dest1 + vpshufb xtmph1 {k1}{z}, xgft1_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl1 {k1}{z}, xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxorq xtmph1, xtmph1, xtmpl1 ;GF add high and low partials + vpxorq xd1, xd1, xtmph1 ;xd1 += partial + + ; dest2 + vpshufb xtmph2 {k1}{z}, xgft2_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl2 {k1}{z}, xgft2_lo, xtmpa ;Lookup mul table of low nibble + vpxorq xtmph2, xtmph2, xtmpl2 ;GF add high and low partials + vpxorq xd2, xd2, xtmph2 ;xd2 += partial + + ; dest3 + vpshufb xtmph3 {k1}{z}, xgft3_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl3 {k1}{z}, xgft3_lo, xtmpa ;Lookup mul table of low nibble + vpxorq xtmph3, xtmph3, xtmpl3 ;GF add high and low partials + vpxorq xd3, xd3, xtmph3 ;xd2 += partial + + XSTR [dest1+pos], xd1 + XSTR [dest2+pos], xd2 + XSTR [dest3+pos], xd3 + + add pos, 64 ;Loop on 64 bytes at a time + cmp pos, len + jle .loop64 + + lea tmp, [len + 64] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, (1 << 63) + lea tmp, [len + 64 - 1] + and tmp, 63 + sarx pos, pos, tmp + kmovq k1, pos + mov pos, len ;Overlapped offset length-64 + jmp .loop64 ;Do one more overlap pass + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame + +%else +%ifidn __OUTPUT_FORMAT__, win64 +global no_gf_3vect_mad_avx512 +no_gf_3vect_mad_avx512: +%endif +%endif ; ifdef HAVE_AS_KNOWS_AVX512 diff --git a/src/isa-l/erasure_code/gf_3vect_mad_sse.asm b/src/isa-l/erasure_code/gf_3vect_mad_sse.asm new file mode 100644 index 000000000..0d9028bc8 --- /dev/null +++ b/src/isa-l/erasure_code/gf_3vect_mad_sse.asm @@ -0,0 +1,298 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_3vect_mad_sse(len, vec, vec_i, mul_array, src, dest); +;;; + +%include "reg_sizes.asm" + +%define PS 8 +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg0.w ecx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + %define arg4 r12 + %define arg5 r15 + %define tmp r11 + %define return rax + %define return.w eax + %define stack_size 16*10 + 3*8 + %define arg(x) [rsp + stack_size + PS + PS*x] + %define func(x) proc_frame x + +%macro FUNC_SAVE 0 + sub rsp, stack_size + movdqa [rsp+16*0],xmm6 + movdqa [rsp+16*1],xmm7 + movdqa [rsp+16*2],xmm8 + movdqa [rsp+16*3],xmm9 + movdqa [rsp+16*4],xmm10 + movdqa [rsp+16*5],xmm11 + movdqa [rsp+16*6],xmm12 + movdqa [rsp+16*7],xmm13 + movdqa [rsp+16*8],xmm14 + movdqa [rsp+16*9],xmm15 + save_reg r12, 10*16 + 0*8 + save_reg r15, 10*16 + 1*8 + end_prolog + mov arg4, arg(4) + mov arg5, arg(5) +%endmacro + +%macro FUNC_RESTORE 0 + movdqa xmm6, [rsp+16*0] + movdqa xmm7, [rsp+16*1] + movdqa xmm8, [rsp+16*2] + movdqa xmm9, [rsp+16*3] + movdqa xmm10, [rsp+16*4] + movdqa xmm11, [rsp+16*5] + movdqa xmm12, [rsp+16*6] + movdqa xmm13, [rsp+16*7] + movdqa xmm14, [rsp+16*8] + movdqa xmm15, [rsp+16*9] + mov r12, [rsp + 10*16 + 0*8] + mov r15, [rsp + 10*16 + 1*8] + add rsp, stack_size +%endmacro + +%elifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg0.w edi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + %define tmp r11 + %define return rax + %define return.w eax + + %define func(x) x: + %define FUNC_SAVE + %define FUNC_RESTORE +%endif + +;;; gf_3vect_mad_sse(len, vec, vec_i, mul_array, src, dest) +%define len arg0 +%define len.w arg0.w +%define vec arg1 +%define vec_i arg2 +%define mul_array arg3 +%define src arg4 +%define dest1 arg5 +%define pos return +%define pos.w return.w + +%define dest2 mul_array +%define dest3 vec_i + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR movdqu + %define XSTR movdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR movdqa + %define XSTR movdqa + %else + %define XLDR movntdqa + %define XSTR movntdq + %endif +%endif + +default rel + +[bits 64] +section .text + +%define xmask0f xmm15 +%define xgft1_lo xmm14 +%define xgft1_hi xmm13 +%define xgft2_lo xmm12 +%define xgft2_hi xmm11 +%define xgft3_lo xmm10 +%define xgft3_hi xmm9 + +%define x0 xmm0 +%define xtmpa xmm1 +%define xtmph1 xmm2 +%define xtmpl1 xmm3 +%define xtmph2 xmm4 +%define xtmpl2 xmm5 +%define xtmph3 xmm6 +%define xtmpl3 xmm7 +%define xd1 xmm8 +%define xd2 xtmpl1 +%define xd3 xtmph1 + +align 16 +global gf_3vect_mad_sse:ISAL_SYM_TYPE_FUNCTION +func(gf_3vect_mad_sse) + FUNC_SAVE + sub len, 16 + jl .return_fail + xor pos, pos + movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte + sal vec_i, 5 ;Multiply by 32 + sal vec, 5 + lea tmp, [mul_array + vec_i] + + movdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, Ax{02}, ... + movdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0} + movdqu xgft2_lo, [tmp+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ... + movdqu xgft2_hi, [tmp+vec+16] ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0} + movdqu xgft3_lo, [tmp+2*vec] ;Load array Cx{00}, Cx{01}, Cx{02}, ... + movdqu xgft3_hi, [tmp+2*vec+16] ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0} + mov dest2, [dest1+PS] ; reuse mul_array + mov dest3, [dest1+2*PS] ; reuse vec_i + mov dest1, [dest1] + +.loop16: + XLDR x0, [src+pos] ;Get next source vector + movdqa xtmph1, xgft1_hi ;Reload const array registers + movdqa xtmpl1, xgft1_lo + movdqa xtmph2, xgft2_hi ;Reload const array registers + movdqa xtmpl2, xgft2_lo + movdqa xtmph3, xgft3_hi ;Reload const array registers + movdqa xtmpl3, xgft3_lo + + XLDR xd1, [dest1+pos] ;Get next dest vector + + movdqa xtmpa, x0 ;Keep unshifted copy of src + psraw x0, 4 ;Shift to put high nibble into bits 4-0 + pand x0, xmask0f ;Mask high src nibble in bits 4-0 + pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0 + + ; dest1 + pshufb xtmph1, x0 ;Lookup mul table of high nibble + pshufb xtmpl1, xtmpa ;Lookup mul table of low nibble + pxor xtmph1, xtmpl1 ;GF add high and low partials + pxor xd1, xtmph1 + + XLDR xd2, [dest2+pos] ;reuse xtmpl1. Get next dest vector + XLDR xd3, [dest3+pos] ;reuse xtmph1. Get next dest vector + + ; dest2 + pshufb xtmph2, x0 ;Lookup mul table of high nibble + pshufb xtmpl2, xtmpa ;Lookup mul table of low nibble + pxor xtmph2, xtmpl2 ;GF add high and low partials + pxor xd2, xtmph2 + + ; dest3 + pshufb xtmph3, x0 ;Lookup mul table of high nibble + pshufb xtmpl3, xtmpa ;Lookup mul table of low nibble + pxor xtmph3, xtmpl3 ;GF add high and low partials + pxor xd3, xtmph3 + + XSTR [dest1+pos], xd1 ;Store result + XSTR [dest2+pos], xd2 ;Store result + XSTR [dest3+pos], xd3 ;Store result + + add pos, 16 ;Loop on 16 bytes at a time + cmp pos, len + jle .loop16 + + lea tmp, [len + 16] + cmp pos, tmp + je .return_pass + +.lessthan16: + ;; Tail len + ;; Do one more overlap pass + mov tmp, len ;Overlapped offset length-16 + + XLDR x0, [src+tmp] ;Get next source vector + XLDR xd1, [dest1+tmp] ;Get next dest vector + XLDR xd2, [dest2+tmp] ;reuse xtmpl1. Get next dest vector + XLDR xd3, [dest3+tmp] ;reuse xtmph1. Get next dest vector + + sub len, pos + + movdqa xtmph3, [constip16] ;Load const of i + 16 + pinsrb xtmpl3, len.w, 15 + pshufb xtmpl3, xmask0f ;Broadcast len to all bytes + pcmpgtb xtmpl3, xtmph3 + + movdqa xtmpa, x0 ;Keep unshifted copy of src + psraw x0, 4 ;Shift to put high nibble into bits 4-0 + pand x0, xmask0f ;Mask high src nibble in bits 4-0 + pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0 + + ; dest1 + pshufb xgft1_hi, x0 ;Lookup mul table of high nibble + pshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble + pxor xgft1_hi, xgft1_lo ;GF add high and low partials + pand xgft1_hi, xtmpl3 + pxor xd1, xgft1_hi + + ; dest2 + pshufb xgft2_hi, x0 ;Lookup mul table of high nibble + pshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble + pxor xgft2_hi, xgft2_lo ;GF add high and low partials + pand xgft2_hi, xtmpl3 + pxor xd2, xgft2_hi + + ; dest3 + pshufb xgft3_hi, x0 ;Lookup mul table of high nibble + pshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble + pxor xgft3_hi, xgft3_lo ;GF add high and low partials + pand xgft3_hi, xtmpl3 + pxor xd3, xgft3_hi + + XSTR [dest1+tmp], xd1 ;Store result + XSTR [dest2+tmp], xd2 ;Store result + XSTR [dest3+tmp], xd3 ;Store result + +.return_pass: + FUNC_RESTORE + mov return, 0 + ret + +.return_fail: + FUNC_RESTORE + mov return, 1 + ret + +endproc_frame + +section .data + +align 16 + +mask0f: + dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f +constip16: + dq 0xf8f9fafbfcfdfeff, 0xf0f1f2f3f4f5f6f7 + +;;; func core, ver, snum +slversion gf_3vect_mad_sse, 00, 01, 0206 diff --git a/src/isa-l/erasure_code/gf_4vect_dot_prod_avx.asm b/src/isa-l/erasure_code/gf_4vect_dot_prod_avx.asm new file mode 100644 index 000000000..ccfc8ce53 --- /dev/null +++ b/src/isa-l/erasure_code/gf_4vect_dot_prod_avx.asm @@ -0,0 +1,441 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_4vect_dot_prod_avx(len, vec, *g_tbls, **buffs, **dests); +;;; + +%include "reg_sizes.asm" + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + + %define tmp r11 + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r12 ; must be saved and restored + %define tmp5 r14 ; must be saved and restored + %define tmp6 r15 ; must be saved and restored + %define return rax + %macro SLDR 2 + %endmacro + %define SSTR SLDR + %define PS 8 + %define LOG_PS 3 + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + push r13 + push r14 + push r15 + %endmacro + %macro FUNC_RESTORE 0 + pop r15 + pop r14 + pop r13 + pop r12 + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + + %define arg4 r12 ; must be saved, loaded and restored + %define arg5 r15 ; must be saved and restored + %define tmp r11 + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r14 ; must be saved and restored + %define tmp5 rdi ; must be saved and restored + %define tmp6 rsi ; must be saved and restored + %define return rax + %macro SLDR 2 + %endmacro + %define SSTR SLDR + %define PS 8 + %define LOG_PS 3 + %define stack_size 9*16 + 7*8 ; must be an odd multiple of 8 + %define arg(x) [rsp + stack_size + PS + PS*x] + + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size + save_xmm128 xmm6, 0*16 + save_xmm128 xmm7, 1*16 + save_xmm128 xmm8, 2*16 + save_xmm128 xmm9, 3*16 + save_xmm128 xmm10, 4*16 + save_xmm128 xmm11, 5*16 + save_xmm128 xmm12, 6*16 + save_xmm128 xmm13, 7*16 + save_xmm128 xmm14, 8*16 + save_reg r12, 9*16 + 0*8 + save_reg r13, 9*16 + 1*8 + save_reg r14, 9*16 + 2*8 + save_reg r15, 9*16 + 3*8 + save_reg rdi, 9*16 + 4*8 + save_reg rsi, 9*16 + 5*8 + end_prolog + mov arg4, arg(4) + %endmacro + + %macro FUNC_RESTORE 0 + vmovdqa xmm6, [rsp + 0*16] + vmovdqa xmm7, [rsp + 1*16] + vmovdqa xmm8, [rsp + 2*16] + vmovdqa xmm9, [rsp + 3*16] + vmovdqa xmm10, [rsp + 4*16] + vmovdqa xmm11, [rsp + 5*16] + vmovdqa xmm12, [rsp + 6*16] + vmovdqa xmm13, [rsp + 7*16] + vmovdqa xmm14, [rsp + 8*16] + mov r12, [rsp + 9*16 + 0*8] + mov r13, [rsp + 9*16 + 1*8] + mov r14, [rsp + 9*16 + 2*8] + mov r15, [rsp + 9*16 + 3*8] + mov rdi, [rsp + 9*16 + 4*8] + mov rsi, [rsp + 9*16 + 5*8] + add rsp, stack_size + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, elf32 + +;;;================== High Address; +;;; arg4 +;;; arg3 +;;; arg2 +;;; arg1 +;;; arg0 +;;; return +;;;<================= esp of caller +;;; ebp +;;;<================= ebp = esp +;;; var0 +;;; var1 +;;; var2 +;;; var3 +;;; esi +;;; edi +;;; ebx +;;;<================= esp of callee +;;; +;;;================== Low Address; + + %define PS 4 + %define LOG_PS 2 + %define func(x) x: + %define arg(x) [ebp + PS*2 + PS*x] + %define var(x) [ebp - PS - PS*x] + + %define trans ecx + %define trans2 esi + %define arg0 trans ;trans and trans2 are for the variables in stack + %define arg0_m arg(0) + %define arg1 ebx + %define arg2 arg2_m + %define arg2_m arg(2) + %define arg3 trans + %define arg3_m arg(3) + %define arg4 trans + %define arg4_m arg(4) + %define arg5 trans2 + %define tmp edx + %define tmp2 edi + %define tmp3 trans2 + %define tmp3_m var(0) + %define tmp4 trans2 + %define tmp4_m var(1) + %define tmp5 trans2 + %define tmp5_m var(2) + %define tmp6 trans2 + %define tmp6_m var(3) + %define return eax + %macro SLDR 2 ;stack load/restore + mov %1, %2 + %endmacro + %define SSTR SLDR + + %macro FUNC_SAVE 0 + push ebp + mov ebp, esp + sub esp, PS*4 ;4 local variables + push esi + push edi + push ebx + mov arg1, arg(1) + %endmacro + + %macro FUNC_RESTORE 0 + pop ebx + pop edi + pop esi + add esp, PS*4 ;4 local variables + pop ebp + %endmacro + +%endif ; output formats + +%define len arg0 +%define vec arg1 +%define mul_array arg2 +%define src arg3 +%define dest1 arg4 +%define ptr arg5 +%define vec_i tmp2 +%define dest2 tmp3 +%define dest3 tmp4 +%define dest4 tmp5 +%define vskip3 tmp6 +%define pos return + + %ifidn PS,4 ;32-bit code + %define len_m arg0_m + %define src_m arg3_m + %define dest1_m arg4_m + %define dest2_m tmp3_m + %define dest3_m tmp4_m + %define dest4_m tmp5_m + %define vskip3_m tmp6_m + %endif + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu + %define XSTR vmovdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + +%ifidn PS,8 ; 64-bit code + default rel + [bits 64] +%endif + + +section .text + +%ifidn PS,8 ;64-bit code + %define xmask0f xmm14 + %define xgft1_lo xmm13 + %define xgft1_hi xmm12 + %define xgft2_lo xmm11 + %define xgft2_hi xmm10 + %define xgft3_lo xmm9 + %define xgft3_hi xmm8 + %define xgft4_lo xmm7 + %define xgft4_hi xmm6 + + %define x0 xmm0 + %define xtmpa xmm1 + %define xp1 xmm2 + %define xp2 xmm3 + %define xp3 xmm4 + %define xp4 xmm5 +%else + %define xmm_trans xmm7 ;reuse xmask0f and xgft1_lo + %define xmask0f xmm_trans + %define xgft1_lo xmm_trans + %define xgft1_hi xmm6 + %define xgft2_lo xgft1_lo + %define xgft2_hi xgft1_hi + %define xgft3_lo xgft1_lo + %define xgft3_hi xgft1_hi + %define xgft4_lo xgft1_lo + %define xgft4_hi xgft1_hi + + %define x0 xmm0 + %define xtmpa xmm1 + %define xp1 xmm2 + %define xp2 xmm3 + %define xp3 xmm4 + %define xp4 xmm5 +%endif +align 16 +global gf_4vect_dot_prod_avx:ISAL_SYM_TYPE_FUNCTION +func(gf_4vect_dot_prod_avx) + FUNC_SAVE + SLDR len, len_m + sub len, 16 + SSTR len_m, len + jl .return_fail + xor pos, pos + vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte + mov vskip3, vec + imul vskip3, 96 + SSTR vskip3_m, vskip3 + sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS + SLDR dest1, dest1_m + mov dest2, [dest1+PS] + SSTR dest2_m, dest2 + mov dest3, [dest1+2*PS] + SSTR dest3_m, dest3 + mov dest4, [dest1+3*PS] + SSTR dest4_m, dest4 + mov dest1, [dest1] + SSTR dest1_m, dest1 + +.loop16: + vpxor xp1, xp1 + vpxor xp2, xp2 + vpxor xp3, xp3 + vpxor xp4, xp4 + mov tmp, mul_array + xor vec_i, vec_i + +.next_vect: + SLDR src, src_m + mov ptr, [src+vec_i] + + %ifidn PS,8 ;64-bit code + vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f} + vmovdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0} + vmovdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f} + vmovdqu xgft2_hi, [tmp+vec*(32/PS)+16] ; " Bx{00}, Bx{10}, ..., Bx{f0} + vmovdqu xgft3_lo, [tmp+vec*(64/PS)] ;Load array Cx{00}, Cx{01}, ..., Cx{0f} + vmovdqu xgft3_hi, [tmp+vec*(64/PS)+16] ; " Cx{00}, Cx{10}, ..., Cx{f0} + vmovdqu xgft4_lo, [tmp+vskip3] ;Load array Dx{00}, Dx{01}, ..., Dx{0f} + vmovdqu xgft4_hi, [tmp+vskip3+16] ; " Dx{00}, Dx{10}, ..., Dx{f0} + + XLDR x0, [ptr+pos] ;Get next source vector + add tmp, 32 + add vec_i, PS + + vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + %else ;32-bit code + XLDR x0, [ptr+pos] ;Get next source vector + vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte + + vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f} + vmovdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0} + %endif + + vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft1_hi, xgft1_lo ;GF add high and low partials + vpxor xp1, xgft1_hi ;xp1 += partial + + %ifidn PS,4 ;32-bit code + vmovdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f} + vmovdqu xgft2_hi, [tmp+vec*(32/PS)+16] ; " Bx{00}, Bx{10}, ..., Bx{f0} + %endif + vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft2_hi, xgft2_lo ;GF add high and low partials + vpxor xp2, xgft2_hi ;xp2 += partial + + %ifidn PS,4 ;32-bit code + sal vec, 1 + vmovdqu xgft3_lo, [tmp+vec*(32/PS)] ;Load array Cx{00}, Cx{01}, ..., Cx{0f} + vmovdqu xgft3_hi, [tmp+vec*(32/PS)+16] ; " Cx{00}, Cx{10}, ..., Cx{f0} + sar vec, 1 + %endif + vpshufb xgft3_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft3_hi, xgft3_lo ;GF add high and low partials + vpxor xp3, xgft3_hi ;xp3 += partial + + %ifidn PS,4 ;32-bit code + SLDR vskip3, vskip3_m + vmovdqu xgft4_lo, [tmp+vskip3] ;Load array Dx{00}, Dx{01}, ..., Dx{0f} + vmovdqu xgft4_hi, [tmp+vskip3+16] ; " Dx{00}, Dx{10}, ..., Dx{f0} + add tmp, 32 + add vec_i, PS + %endif + vpshufb xgft4_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft4_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft4_hi, xgft4_lo ;GF add high and low partials + vpxor xp4, xgft4_hi ;xp4 += partial + + cmp vec_i, vec + jl .next_vect + + SLDR dest1, dest1_m + SLDR dest2, dest2_m + XSTR [dest1+pos], xp1 + XSTR [dest2+pos], xp2 + SLDR dest3, dest3_m + XSTR [dest3+pos], xp3 + SLDR dest4, dest4_m + XSTR [dest4+pos], xp4 + + SLDR len, len_m + add pos, 16 ;Loop on 16 bytes at a time + cmp pos, len + jle .loop16 + + lea tmp, [len + 16] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, len ;Overlapped offset length-16 + jmp .loop16 ;Do one more overlap pass + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame + +section .data + +align 16 +mask0f: dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f + +;;; func core, ver, snum +slversion gf_4vect_dot_prod_avx, 02, 05, 0193 diff --git a/src/isa-l/erasure_code/gf_4vect_dot_prod_avx2.asm b/src/isa-l/erasure_code/gf_4vect_dot_prod_avx2.asm new file mode 100644 index 000000000..181a18d9d --- /dev/null +++ b/src/isa-l/erasure_code/gf_4vect_dot_prod_avx2.asm @@ -0,0 +1,460 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_4vect_dot_prod_avx2(len, vec, *g_tbls, **buffs, **dests); +;;; + +%include "reg_sizes.asm" + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + + %define tmp r11 + %define tmp.w r11d + %define tmp.b r11b + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r12 ; must be saved and restored + %define tmp5 r14 ; must be saved and restored + %define tmp6 r15 ; must be saved and restored + %define return rax + %macro SLDR 2 + %endmacro + %define SSTR SLDR + %define PS 8 + %define LOG_PS 3 + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + push r13 + push r14 + push r15 + %endmacro + %macro FUNC_RESTORE 0 + pop r15 + pop r14 + pop r13 + pop r12 + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + + %define arg4 r12 ; must be saved, loaded and restored + %define arg5 r15 ; must be saved and restored + %define tmp r11 + %define tmp.w r11d + %define tmp.b r11b + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r14 ; must be saved and restored + %define tmp5 rdi ; must be saved and restored + %define tmp6 rsi ; must be saved and restored + %define return rax + %macro SLDR 2 + %endmacro + %define SSTR SLDR + %define PS 8 + %define LOG_PS 3 + %define stack_size 9*16 + 7*8 ; must be an odd multiple of 8 + %define arg(x) [rsp + stack_size + PS + PS*x] + + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size + vmovdqa [rsp + 0*16], xmm6 + vmovdqa [rsp + 1*16], xmm7 + vmovdqa [rsp + 2*16], xmm8 + vmovdqa [rsp + 3*16], xmm9 + vmovdqa [rsp + 4*16], xmm10 + vmovdqa [rsp + 5*16], xmm11 + vmovdqa [rsp + 6*16], xmm12 + vmovdqa [rsp + 7*16], xmm13 + vmovdqa [rsp + 8*16], xmm14 + save_reg r12, 9*16 + 0*8 + save_reg r13, 9*16 + 1*8 + save_reg r14, 9*16 + 2*8 + save_reg r15, 9*16 + 3*8 + save_reg rdi, 9*16 + 4*8 + save_reg rsi, 9*16 + 5*8 + end_prolog + mov arg4, arg(4) + %endmacro + + %macro FUNC_RESTORE 0 + vmovdqa xmm6, [rsp + 0*16] + vmovdqa xmm7, [rsp + 1*16] + vmovdqa xmm8, [rsp + 2*16] + vmovdqa xmm9, [rsp + 3*16] + vmovdqa xmm10, [rsp + 4*16] + vmovdqa xmm11, [rsp + 5*16] + vmovdqa xmm12, [rsp + 6*16] + vmovdqa xmm13, [rsp + 7*16] + vmovdqa xmm14, [rsp + 8*16] + mov r12, [rsp + 9*16 + 0*8] + mov r13, [rsp + 9*16 + 1*8] + mov r14, [rsp + 9*16 + 2*8] + mov r15, [rsp + 9*16 + 3*8] + mov rdi, [rsp + 9*16 + 4*8] + mov rsi, [rsp + 9*16 + 5*8] + add rsp, stack_size + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, elf32 + +;;;================== High Address; +;;; arg4 +;;; arg3 +;;; arg2 +;;; arg1 +;;; arg0 +;;; return +;;;<================= esp of caller +;;; ebp +;;;<================= ebp = esp +;;; var0 +;;; var1 +;;; var2 +;;; var3 +;;; esi +;;; edi +;;; ebx +;;;<================= esp of callee +;;; +;;;================== Low Address; + + %define PS 4 + %define LOG_PS 2 + %define func(x) x: + %define arg(x) [ebp + PS*2 + PS*x] + %define var(x) [ebp - PS - PS*x] + + %define trans ecx + %define trans2 esi + %define arg0 trans ;trans and trans2 are for the variables in stack + %define arg0_m arg(0) + %define arg1 ebx + %define arg2 arg2_m + %define arg2_m arg(2) + %define arg3 trans + %define arg3_m arg(3) + %define arg4 trans + %define arg4_m arg(4) + %define arg5 trans2 + %define tmp edx + %define tmp.w edx + %define tmp.b dl + %define tmp2 edi + %define tmp3 trans2 + %define tmp3_m var(0) + %define tmp4 trans2 + %define tmp4_m var(1) + %define tmp5 trans2 + %define tmp5_m var(2) + %define tmp6 trans2 + %define tmp6_m var(3) + %define return eax + %macro SLDR 2 ;stack load/restore + mov %1, %2 + %endmacro + %define SSTR SLDR + + %macro FUNC_SAVE 0 + push ebp + mov ebp, esp + sub esp, PS*4 ;4 local variables + push esi + push edi + push ebx + mov arg1, arg(1) + %endmacro + + %macro FUNC_RESTORE 0 + pop ebx + pop edi + pop esi + add esp, PS*4 ;4 local variables + pop ebp + %endmacro + +%endif ; output formats + +%define len arg0 +%define vec arg1 +%define mul_array arg2 +%define src arg3 +%define dest1 arg4 +%define ptr arg5 +%define vec_i tmp2 +%define dest2 tmp3 +%define dest3 tmp4 +%define dest4 tmp5 +%define vskip3 tmp6 +%define pos return + + %ifidn PS,4 ;32-bit code + %define len_m arg0_m + %define src_m arg3_m + %define dest1_m arg4_m + %define dest2_m tmp3_m + %define dest3_m tmp4_m + %define dest4_m tmp5_m + %define vskip3_m tmp6_m + %endif + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu + %define XSTR vmovdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + +%ifidn PS,8 ;64-bit code + default rel + [bits 64] +%endif + + +section .text + +%ifidn PS,8 ;64-bit code + %define xmask0f ymm14 + %define xmask0fx xmm14 + %define xgft1_lo ymm13 + %define xgft1_hi ymm12 + %define xgft2_lo ymm11 + %define xgft2_hi ymm10 + %define xgft3_lo ymm9 + %define xgft3_hi ymm8 + %define xgft4_lo ymm7 + %define xgft4_hi ymm6 + + %define x0 ymm0 + %define xtmpa ymm1 + %define xp1 ymm2 + %define xp2 ymm3 + %define xp3 ymm4 + %define xp4 ymm5 +%else + %define ymm_trans ymm7 ;reuse xmask0f and xgft1_hi + %define xmask0f ymm_trans + %define xmask0fx xmm7 + %define xgft1_lo ymm6 + %define xgft1_hi ymm_trans + %define xgft2_lo xgft1_lo + %define xgft2_hi xgft1_hi + %define xgft3_lo xgft1_lo + %define xgft3_hi xgft1_hi + %define xgft4_lo xgft1_lo + %define xgft4_hi xgft1_hi + + %define x0 ymm0 + %define xtmpa ymm1 + %define xp1 ymm2 + %define xp2 ymm3 + %define xp3 ymm4 + %define xp4 ymm5 +%endif +align 16 +global gf_4vect_dot_prod_avx2:ISAL_SYM_TYPE_FUNCTION +func(gf_4vect_dot_prod_avx2) + FUNC_SAVE + SLDR len, len_m + sub len, 32 + SSTR len_m, len + jl .return_fail + xor pos, pos + mov tmp.b, 0x0f + vpinsrb xmask0fx, xmask0fx, tmp.w, 0 + vpbroadcastb xmask0f, xmask0fx ;Construct mask 0x0f0f0f... + mov vskip3, vec + imul vskip3, 96 + SSTR vskip3_m, vskip3 + sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS + SLDR dest1, dest1_m + mov dest2, [dest1+PS] + SSTR dest2_m, dest2 + mov dest3, [dest1+2*PS] + SSTR dest3_m, dest3 + mov dest4, [dest1+3*PS] + SSTR dest4_m, dest4 + mov dest1, [dest1] + SSTR dest1_m, dest1 + +.loop32: + vpxor xp1, xp1 + vpxor xp2, xp2 + vpxor xp3, xp3 + vpxor xp4, xp4 + mov tmp, mul_array + xor vec_i, vec_i + +.next_vect: + SLDR src, src_m + mov ptr, [src+vec_i] + XLDR x0, [ptr+pos] ;Get next source vector + + add vec_i, PS + %ifidn PS,8 ;64-bit code + vpand xgft4_lo, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + vperm2i128 xtmpa, xgft4_lo, x0, 0x30 ;swap xtmpa from 1lo|2lo to 1lo|2hi + vperm2i128 x0, xgft4_lo, x0, 0x12 ;swap x0 from 1hi|2hi to 1hi|2lo + + vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f} + ; " Ax{00}, Ax{10}, ..., Ax{f0} + vmovdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f} + ; " Bx{00}, Bx{10}, ..., Bx{f0} + vmovdqu xgft3_lo, [tmp+vec*(64/PS)] ;Load array Cx{00}, Cx{01}, ..., Cx{0f} + ; " Cx{00}, Cx{10}, ..., Cx{f0} + vmovdqu xgft4_lo, [tmp+vskip3] ;Load array Dx{00}, Dx{01}, ..., Dx{0f} + ; " Dx{00}, Dx{10}, ..., Dx{f0} + + vperm2i128 xgft1_hi, xgft1_lo, xgft1_lo, 0x01 ; swapped to hi | lo + vperm2i128 xgft2_hi, xgft2_lo, xgft2_lo, 0x01 ; swapped to hi | lo + vperm2i128 xgft3_hi, xgft3_lo, xgft3_lo, 0x01 ; swapped to hi | lo + vperm2i128 xgft4_hi, xgft4_lo, xgft4_lo, 0x01 ; swapped to hi | lo + add tmp, 32 + %else ;32-bit code + mov cl, 0x0f ;use ecx as a temp variable + vpinsrb xmask0fx, xmask0fx, ecx, 0 + vpbroadcastb xmask0f, xmask0fx ;Construct mask 0x0f0f0f... + + vpand xgft4_lo, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + vperm2i128 xtmpa, xgft4_lo, x0, 0x30 ;swap xtmpa from 1lo|2lo to 1lo|2hi + vperm2i128 x0, xgft4_lo, x0, 0x12 ;swap x0 from 1hi|2hi to 1hi|2lo + + vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f} + ; " Ax{00}, Ax{10}, ..., Ax{f0} + vperm2i128 xgft1_hi, xgft1_lo, xgft1_lo, 0x01 ; swapped to hi | lo + %endif + + vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft1_hi, xgft1_lo ;GF add high and low partials + vpxor xp1, xgft1_hi ;xp1 += partial + + %ifidn PS,4 ; 32-bit code + vmovdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f} + ; " Bx{00}, Bx{10}, ..., Bx{f0} + vperm2i128 xgft2_hi, xgft2_lo, xgft2_lo, 0x01 ; swapped to hi | lo + %endif + vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft2_hi, xgft2_lo ;GF add high and low partials + vpxor xp2, xgft2_hi ;xp2 += partial + + %ifidn PS,4 ; 32-bit code + sal vec, 1 + vmovdqu xgft3_lo, [tmp+vec*(32/PS)] ;Load array Cx{00}, Cx{01}, ..., Cx{0f} + ; " Cx{00}, Cx{10}, ..., Cx{f0} + vperm2i128 xgft3_hi, xgft3_lo, xgft3_lo, 0x01 ; swapped to hi | lo + sar vec, 1 + %endif + vpshufb xgft3_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft3_hi, xgft3_lo ;GF add high and low partials + vpxor xp3, xgft3_hi ;xp3 += partial + + %ifidn PS,4 ; 32-bit code + SLDR vskip3, vskip3_m + vmovdqu xgft4_lo, [tmp+vskip3] ;Load array Dx{00}, Dx{01}, ..., Dx{0f} + ; " DX{00}, Dx{10}, ..., Dx{f0} + vperm2i128 xgft4_hi, xgft4_lo, xgft4_lo, 0x01 ; swapped to hi | lo + add tmp, 32 + %endif + vpshufb xgft4_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft4_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft4_hi, xgft4_lo ;GF add high and low partials + vpxor xp4, xgft4_hi ;xp4 += partial + + cmp vec_i, vec + jl .next_vect + + SLDR dest1, dest1_m + SLDR dest2, dest2_m + XSTR [dest1+pos], xp1 + XSTR [dest2+pos], xp2 + SLDR dest3, dest3_m + XSTR [dest3+pos], xp3 + SLDR dest4, dest4_m + XSTR [dest4+pos], xp4 + + SLDR len, len_m + add pos, 32 ;Loop on 32 bytes at a time + cmp pos, len + jle .loop32 + + lea tmp, [len + 32] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, len ;Overlapped offset length-32 + jmp .loop32 ;Do one more overlap pass + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame + +section .data + +;;; func core, ver, snum +slversion gf_4vect_dot_prod_avx2, 04, 05, 0198 diff --git a/src/isa-l/erasure_code/gf_4vect_dot_prod_avx512.asm b/src/isa-l/erasure_code/gf_4vect_dot_prod_avx512.asm new file mode 100644 index 000000000..92886782d --- /dev/null +++ b/src/isa-l/erasure_code/gf_4vect_dot_prod_avx512.asm @@ -0,0 +1,301 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_4vect_dot_prod_avx512(len, vec, *g_tbls, **buffs, **dests); +;;; + +%include "reg_sizes.asm" + +%ifdef HAVE_AS_KNOWS_AVX512 + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + + %define tmp r11 + %define tmp.w r11d + %define tmp.b r11b + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r12 ; must be saved and restored + %define tmp5 r14 ; must be saved and restored + %define tmp6 r15 ; must be saved and restored + %define return rax + %define PS 8 + %define LOG_PS 3 + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + push r13 + push r14 + push r15 + %endmacro + %macro FUNC_RESTORE 0 + pop r15 + pop r14 + pop r13 + pop r12 + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + + %define arg4 r12 ; must be saved, loaded and restored + %define arg5 r15 ; must be saved and restored + %define tmp r11 + %define tmp.w r11d + %define tmp.b r11b + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r14 ; must be saved and restored + %define tmp5 rdi ; must be saved and restored + %define tmp6 rsi ; must be saved and restored + %define return rax + %define PS 8 + %define LOG_PS 3 + %define stack_size 9*16 + 7*8 ; must be an odd multiple of 8 + %define arg(x) [rsp + stack_size + PS + PS*x] + + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size + vmovdqa [rsp + 0*16], xmm6 + vmovdqa [rsp + 1*16], xmm7 + vmovdqa [rsp + 2*16], xmm8 + vmovdqa [rsp + 3*16], xmm9 + vmovdqa [rsp + 4*16], xmm10 + vmovdqa [rsp + 5*16], xmm11 + vmovdqa [rsp + 6*16], xmm12 + vmovdqa [rsp + 7*16], xmm13 + vmovdqa [rsp + 8*16], xmm14 + save_reg r12, 9*16 + 0*8 + save_reg r13, 9*16 + 1*8 + save_reg r14, 9*16 + 2*8 + save_reg r15, 9*16 + 3*8 + save_reg rdi, 9*16 + 4*8 + save_reg rsi, 9*16 + 5*8 + end_prolog + mov arg4, arg(4) + %endmacro + + %macro FUNC_RESTORE 0 + vmovdqa xmm6, [rsp + 0*16] + vmovdqa xmm7, [rsp + 1*16] + vmovdqa xmm8, [rsp + 2*16] + vmovdqa xmm9, [rsp + 3*16] + vmovdqa xmm10, [rsp + 4*16] + vmovdqa xmm11, [rsp + 5*16] + vmovdqa xmm12, [rsp + 6*16] + vmovdqa xmm13, [rsp + 7*16] + vmovdqa xmm14, [rsp + 8*16] + mov r12, [rsp + 9*16 + 0*8] + mov r13, [rsp + 9*16 + 1*8] + mov r14, [rsp + 9*16 + 2*8] + mov r15, [rsp + 9*16 + 3*8] + mov rdi, [rsp + 9*16 + 4*8] + mov rsi, [rsp + 9*16 + 5*8] + add rsp, stack_size + %endmacro +%endif + + +%define len arg0 +%define vec arg1 +%define mul_array arg2 +%define src arg3 +%define dest1 arg4 +%define ptr arg5 +%define vec_i tmp2 +%define dest2 tmp3 +%define dest3 tmp4 +%define dest4 tmp5 +%define vskip3 tmp6 +%define pos return + + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu8 + %define XSTR vmovdqu8 +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + +%define xmask0f zmm14 +%define xgft1_lo zmm13 +%define xgft1_loy ymm13 +%define xgft1_hi zmm12 +%define xgft2_lo zmm11 +%define xgft2_loy ymm11 +%define xgft2_hi zmm10 +%define xgft3_lo zmm9 +%define xgft3_loy ymm9 +%define xgft3_hi zmm8 +%define xgft4_lo zmm7 +%define xgft4_loy ymm7 +%define xgft4_hi zmm6 + +%define x0 zmm0 +%define xtmpa zmm1 +%define xp1 zmm2 +%define xp2 zmm3 +%define xp3 zmm4 +%define xp4 zmm5 + +default rel +[bits 64] + +section .text + +align 16 +global gf_4vect_dot_prod_avx512:ISAL_SYM_TYPE_FUNCTION +func(gf_4vect_dot_prod_avx512) + FUNC_SAVE + sub len, 64 + jl .return_fail + + xor pos, pos + mov tmp, 0x0f + vpbroadcastb xmask0f, tmp ;Construct mask 0x0f0f0f... + mov vskip3, vec + imul vskip3, 96 + sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS + mov dest2, [dest1+PS] + mov dest3, [dest1+2*PS] + mov dest4, [dest1+3*PS] + mov dest1, [dest1] + +.loop64: + vpxorq xp1, xp1, xp1 + vpxorq xp2, xp2, xp2 + vpxorq xp3, xp3, xp3 + vpxorq xp4, xp4, xp4 + mov tmp, mul_array + xor vec_i, vec_i + +.next_vect: + mov ptr, [src+vec_i] + XLDR x0, [ptr+pos] ;Get next source vector + add vec_i, PS + + vpandq xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpandq x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + vmovdqu8 xgft1_loy, [tmp] ;Load array Ax{00}..{0f}, Ax{00}..{f0} + vmovdqu8 xgft2_loy, [tmp+vec*(32/PS)] ;Load array Bx{00}..{0f}, Bx{00}..{f0} + vmovdqu8 xgft3_loy, [tmp+vec*(64/PS)] ;Load array Cx{00}..{0f}, Cx{00}..{f0} + vmovdqu8 xgft4_loy, [tmp+vskip3] ;Load array Dx{00}..{0f}, Dx{00}..{f0} + add tmp, 32 + + vshufi64x2 xgft1_hi, xgft1_lo, xgft1_lo, 0x55 + vshufi64x2 xgft1_lo, xgft1_lo, xgft1_lo, 0x00 + vshufi64x2 xgft2_hi, xgft2_lo, xgft2_lo, 0x55 + vshufi64x2 xgft2_lo, xgft2_lo, xgft2_lo, 0x00 + + vpshufb xgft1_hi, xgft1_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft1_lo, xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxorq xgft1_hi, xgft1_hi, xgft1_lo ;GF add high and low partials + vpxorq xp1, xp1, xgft1_hi ;xp1 += partial + + vpshufb xgft2_hi, xgft2_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft2_lo, xgft2_lo, xtmpa ;Lookup mul table of low nibble + vpxorq xgft2_hi, xgft2_hi, xgft2_lo ;GF add high and low partials + vpxorq xp2, xp2, xgft2_hi ;xp2 += partial + + vshufi64x2 xgft3_hi, xgft3_lo, xgft3_lo, 0x55 + vshufi64x2 xgft3_lo, xgft3_lo, xgft3_lo, 0x00 + vshufi64x2 xgft4_hi, xgft4_lo, xgft4_lo, 0x55 + vshufi64x2 xgft4_lo, xgft4_lo, xgft4_lo, 0x00 + + vpshufb xgft3_hi, xgft3_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft3_lo, xgft3_lo, xtmpa ;Lookup mul table of low nibble + vpxorq xgft3_hi, xgft3_hi, xgft3_lo ;GF add high and low partials + vpxorq xp3, xp3, xgft3_hi ;xp3 += partial + + vpshufb xgft4_hi, xgft4_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft4_lo, xgft4_lo, xtmpa ;Lookup mul table of low nibble + vpxorq xgft4_hi, xgft4_hi, xgft4_lo ;GF add high and low partials + vpxorq xp4, xp4, xgft4_hi ;xp4 += partial + + cmp vec_i, vec + jl .next_vect + + XSTR [dest1+pos], xp1 + XSTR [dest2+pos], xp2 + XSTR [dest3+pos], xp3 + XSTR [dest4+pos], xp4 + + add pos, 64 ;Loop on 64 bytes at a time + cmp pos, len + jle .loop64 + + lea tmp, [len + 64] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, len ;Overlapped offset length-64 + jmp .loop64 ;Do one more overlap pass + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame + +%else +%ifidn __OUTPUT_FORMAT__, win64 +global no_gf_4vect_dot_prod_avx512 +no_gf_4vect_dot_prod_avx512: +%endif +%endif ; ifdef HAVE_AS_KNOWS_AVX512 diff --git a/src/isa-l/erasure_code/gf_4vect_dot_prod_sse.asm b/src/isa-l/erasure_code/gf_4vect_dot_prod_sse.asm new file mode 100644 index 000000000..b32962490 --- /dev/null +++ b/src/isa-l/erasure_code/gf_4vect_dot_prod_sse.asm @@ -0,0 +1,443 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_4vect_dot_prod_sse(len, vec, *g_tbls, **buffs, **dests); +;;; + +%include "reg_sizes.asm" + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + + %define tmp r11 + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r12 ; must be saved and restored + %define tmp5 r14 ; must be saved and restored + %define tmp6 r15 ; must be saved and restored + %define return rax + %macro SLDR 2 + %endmacro + %define SSTR SLDR + %define PS 8 + %define LOG_PS 3 + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + push r13 + push r14 + push r15 + %endmacro + %macro FUNC_RESTORE 0 + pop r15 + pop r14 + pop r13 + pop r12 + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + + %define arg4 r12 ; must be saved, loaded and restored + %define arg5 r15 ; must be saved and restored + %define tmp r11 + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r14 ; must be saved and restored + %define tmp5 rdi ; must be saved and restored + %define tmp6 rsi ; must be saved and restored + %define return rax + %macro SLDR 2 + %endmacro + %define SSTR SLDR + %define PS 8 + %define LOG_PS 3 + %define stack_size 9*16 + 7*8 ; must be an odd multiple of 8 + %define arg(x) [rsp + stack_size + PS + PS*x] + + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size + save_xmm128 xmm6, 0*16 + save_xmm128 xmm7, 1*16 + save_xmm128 xmm8, 2*16 + save_xmm128 xmm9, 3*16 + save_xmm128 xmm10, 4*16 + save_xmm128 xmm11, 5*16 + save_xmm128 xmm12, 6*16 + save_xmm128 xmm13, 7*16 + save_xmm128 xmm14, 8*16 + save_reg r12, 9*16 + 0*8 + save_reg r13, 9*16 + 1*8 + save_reg r14, 9*16 + 2*8 + save_reg r15, 9*16 + 3*8 + save_reg rdi, 9*16 + 4*8 + save_reg rsi, 9*16 + 5*8 + end_prolog + mov arg4, arg(4) + %endmacro + + %macro FUNC_RESTORE 0 + movdqa xmm6, [rsp + 0*16] + movdqa xmm7, [rsp + 1*16] + movdqa xmm8, [rsp + 2*16] + movdqa xmm9, [rsp + 3*16] + movdqa xmm10, [rsp + 4*16] + movdqa xmm11, [rsp + 5*16] + movdqa xmm12, [rsp + 6*16] + movdqa xmm13, [rsp + 7*16] + movdqa xmm14, [rsp + 8*16] + mov r12, [rsp + 9*16 + 0*8] + mov r13, [rsp + 9*16 + 1*8] + mov r14, [rsp + 9*16 + 2*8] + mov r15, [rsp + 9*16 + 3*8] + mov rdi, [rsp + 9*16 + 4*8] + mov rsi, [rsp + 9*16 + 5*8] + add rsp, stack_size + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, elf32 + +;;;================== High Address; +;;; arg4 +;;; arg3 +;;; arg2 +;;; arg1 +;;; arg0 +;;; return +;;;<================= esp of caller +;;; ebp +;;;<================= ebp = esp +;;; var0 +;;; var1 +;;; var2 +;;; var3 +;;; esi +;;; edi +;;; ebx +;;;<================= esp of callee +;;; +;;;================== Low Address; + + %define PS 4 + %define LOG_PS 2 + %define func(x) x: + %define arg(x) [ebp + PS*2 + PS*x] + %define var(x) [ebp - PS - PS*x] + + %define trans ecx + %define trans2 esi + %define arg0 trans ;trans and trans2 are for the variables in stack + %define arg0_m arg(0) + %define arg1 ebx + %define arg2 arg2_m + %define arg2_m arg(2) + %define arg3 trans + %define arg3_m arg(3) + %define arg4 trans + %define arg4_m arg(4) + %define arg5 trans2 + %define tmp edx + %define tmp2 edi + %define tmp3 trans2 + %define tmp3_m var(0) + %define tmp4 trans2 + %define tmp4_m var(1) + %define tmp5 trans2 + %define tmp5_m var(2) + %define tmp6 trans2 + %define tmp6_m var(3) + %define return eax + %macro SLDR 2 ;stack load/restore + mov %1, %2 + %endmacro + %define SSTR SLDR + + %macro FUNC_SAVE 0 + push ebp + mov ebp, esp + sub esp, PS*4 ;4 local variables + push esi + push edi + push ebx + mov arg1, arg(1) + %endmacro + + %macro FUNC_RESTORE 0 + pop ebx + pop edi + pop esi + add esp, PS*4 ;4 local variables + pop ebp + %endmacro + +%endif ; output formats + +%define len arg0 +%define vec arg1 +%define mul_array arg2 +%define src arg3 +%define dest1 arg4 +%define ptr arg5 +%define vec_i tmp2 +%define dest2 tmp3 +%define dest3 tmp4 +%define dest4 tmp5 +%define vskip3 tmp6 +%define pos return + + %ifidn PS,4 ;32-bit code + %define len_m arg0_m + %define src_m arg3_m + %define dest1_m arg4_m + %define dest2_m tmp3_m + %define dest3_m tmp4_m + %define dest4_m tmp5_m + %define vskip3_m tmp6_m + %endif + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR movdqu + %define XSTR movdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR movdqa + %define XSTR movdqa + %else + %define XLDR movntdqa + %define XSTR movntdq + %endif +%endif + +%ifidn PS,8 ; 64-bit code + default rel + [bits 64] +%endif + + +section .text + +%ifidn PS,8 ;64-bit code + %define xmask0f xmm14 + %define xgft1_lo xmm2 + %define xgft1_hi xmm3 + %define xgft2_lo xmm11 + %define xgft2_hi xmm4 + %define xgft3_lo xmm9 + %define xgft3_hi xmm5 + %define xgft4_lo xmm7 + %define xgft4_hi xmm6 + + %define x0 xmm0 + %define xtmpa xmm1 + %define xp1 xmm8 + %define xp2 xmm10 + %define xp3 xmm12 + %define xp4 xmm13 +%else + %define xmm_trans xmm7 ;reuse xmask0f and xgft1_lo + %define xmask0f xmm_trans + %define xgft1_lo xmm_trans + %define xgft1_hi xmm6 + %define xgft2_lo xgft1_lo + %define xgft2_hi xgft1_hi + %define xgft3_lo xgft1_lo + %define xgft3_hi xgft1_hi + %define xgft4_lo xgft1_lo + %define xgft4_hi xgft1_hi + + %define x0 xmm0 + %define xtmpa xmm1 + %define xp1 xmm2 + %define xp2 xmm3 + %define xp3 xmm4 + %define xp4 xmm5 +%endif +align 16 +global gf_4vect_dot_prod_sse:ISAL_SYM_TYPE_FUNCTION +func(gf_4vect_dot_prod_sse) + FUNC_SAVE + SLDR len, len_m + sub len, 16 + SSTR len_m, len + jl .return_fail + xor pos, pos + movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte + mov vskip3, vec + imul vskip3, 96 + SSTR vskip3_m, vskip3 + sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS + SLDR dest1, dest1_m + mov dest2, [dest1+PS] + SSTR dest2_m, dest2 + mov dest3, [dest1+2*PS] + SSTR dest3_m, dest3 + mov dest4, [dest1+3*PS] + SSTR dest4_m, dest4 + mov dest1, [dest1] + SSTR dest1_m, dest1 + +.loop16: + pxor xp1, xp1 + pxor xp2, xp2 + pxor xp3, xp3 + pxor xp4, xp4 + mov tmp, mul_array + xor vec_i, vec_i + +.next_vect: + SLDR src, src_m + mov ptr, [src+vec_i] + + %ifidn PS,8 ;64-bit code + movdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f} + movdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0} + movdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f} + movdqu xgft2_hi, [tmp+vec*(32/PS)+16] ; " Bx{00}, Bx{10}, ..., Bx{f0} + movdqu xgft3_lo, [tmp+vec*(64/PS)] ;Load array Cx{00}, Cx{01}, ..., Cx{0f} + movdqu xgft3_hi, [tmp+vec*(64/PS)+16] ; " Cx{00}, Cx{10}, ..., Cx{f0} + movdqu xgft4_lo, [tmp+vskip3] ;Load array Dx{00}, Dx{01}, ..., Dx{0f} + movdqu xgft4_hi, [tmp+vskip3+16] ; " Dx{00}, Dx{10}, ..., Dx{f0} + + XLDR x0, [ptr+pos] ;Get next source vector + add tmp, 32 + add vec_i, PS + + movdqa xtmpa, x0 ;Keep unshifted copy of src + psraw x0, 4 ;Shift to put high nibble into bits 4-0 + pand x0, xmask0f ;Mask high src nibble in bits 4-0 + pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0 + %else ;32-bit code + XLDR x0, [ptr+pos] ;Get next source vector + movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte + + movdqa xtmpa, x0 ;Keep unshifted copy of src + psraw x0, 4 ;Shift to put high nibble into bits 4-0 + pand x0, xmask0f ;Mask high src nibble in bits 4-0 + pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0 + + movdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f} + movdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0} + %endif + + pshufb xgft1_hi, x0 ;Lookup mul table of high nibble + pshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble + pxor xgft1_hi, xgft1_lo ;GF add high and low partials + pxor xp1, xgft1_hi ;xp1 += partial + + %ifidn PS,4 ;32-bit code + movdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f} + movdqu xgft2_hi, [tmp+vec*(32/PS)+16] ; " Bx{00}, Bx{10}, ..., Bx{f0} + %endif + pshufb xgft2_hi, x0 ;Lookup mul table of high nibble + pshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble + pxor xgft2_hi, xgft2_lo ;GF add high and low partials + pxor xp2, xgft2_hi ;xp2 += partial + + %ifidn PS,4 ;32-bit code + sal vec, 1 + movdqu xgft3_lo, [tmp+vec*(32/PS)] ;Load array Cx{00}, Cx{01}, ..., Cx{0f} + movdqu xgft3_hi, [tmp+vec*(32/PS)+16] ; " Cx{00}, Cx{10}, ..., Cx{f0} + sar vec, 1 + %endif + pshufb xgft3_hi, x0 ;Lookup mul table of high nibble + pshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble + pxor xgft3_hi, xgft3_lo ;GF add high and low partials + pxor xp3, xgft3_hi ;xp3 += partial + + %ifidn PS,4 ;32-bit code + SLDR vskip3, vskip3_m + movdqu xgft4_lo, [tmp+vskip3] ;Load array Dx{00}, Dx{01}, ..., Dx{0f} + movdqu xgft4_hi, [tmp+vskip3+16] ; " Dx{00}, Dx{10}, ..., Dx{f0} + add tmp, 32 + add vec_i, PS + %endif + pshufb xgft4_hi, x0 ;Lookup mul table of high nibble + pshufb xgft4_lo, xtmpa ;Lookup mul table of low nibble + pxor xgft4_hi, xgft4_lo ;GF add high and low partials + pxor xp4, xgft4_hi ;xp4 += partial + + cmp vec_i, vec + jl .next_vect + + SLDR dest1, dest1_m + SLDR dest2, dest2_m + XSTR [dest1+pos], xp1 + XSTR [dest2+pos], xp2 + SLDR dest3, dest3_m + XSTR [dest3+pos], xp3 + SLDR dest4, dest4_m + XSTR [dest4+pos], xp4 + + SLDR len, len_m + add pos, 16 ;Loop on 16 bytes at a time + cmp pos, len + jle .loop16 + + lea tmp, [len + 16] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, len ;Overlapped offset length-16 + jmp .loop16 ;Do one more overlap pass + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame + +section .data + +align 16 +mask0f: dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f + +;;; func core, ver, snum +slversion gf_4vect_dot_prod_sse, 00, 06, 0064 diff --git a/src/isa-l/erasure_code/gf_4vect_dot_prod_sse_test.c b/src/isa-l/erasure_code/gf_4vect_dot_prod_sse_test.c new file mode 100644 index 000000000..0352eefa8 --- /dev/null +++ b/src/isa-l/erasure_code/gf_4vect_dot_prod_sse_test.c @@ -0,0 +1,695 @@ +/********************************************************************** + Copyright(c) 2011-2015 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include // for memset, memcmp +#include "erasure_code.h" +#include "types.h" + +#ifndef FUNCTION_UNDER_TEST +# define FUNCTION_UNDER_TEST gf_4vect_dot_prod_sse +#endif +#ifndef TEST_MIN_SIZE +# define TEST_MIN_SIZE 16 +#endif + +#define str(s) #s +#define xstr(s) str(s) + +#define TEST_LEN 8192 +#define TEST_SIZE (TEST_LEN/2) +#define TEST_MEM TEST_SIZE +#define TEST_LOOPS 10000 +#define TEST_TYPE_STR "" + +#ifndef TEST_SOURCES +# define TEST_SOURCES 16 +#endif +#ifndef RANDOMS +# define RANDOMS 20 +#endif + +#ifdef EC_ALIGNED_ADDR +// Define power of 2 range to check ptr, len alignment +# define PTR_ALIGN_CHK_B 0 +# define LEN_ALIGN_CHK_B 0 // 0 for aligned only +#else +// Define power of 2 range to check ptr, len alignment +# define PTR_ALIGN_CHK_B 32 +# define LEN_ALIGN_CHK_B 32 // 0 for aligned only +#endif + +typedef unsigned char u8; + +extern void FUNCTION_UNDER_TEST(int len, int vlen, unsigned char *gftbls, + unsigned char **src, unsigned char **dest); + +void dump(unsigned char *buf, int len) +{ + int i; + for (i = 0; i < len;) { + printf(" %2x", 0xff & buf[i++]); + if (i % 32 == 0) + printf("\n"); + } + printf("\n"); +} + +void dump_matrix(unsigned char **s, int k, int m) +{ + int i, j; + for (i = 0; i < k; i++) { + for (j = 0; j < m; j++) { + printf(" %2x", s[i][j]); + } + printf("\n"); + } + printf("\n"); +} + +void dump_u8xu8(unsigned char *s, int k, int m) +{ + int i, j; + for (i = 0; i < k; i++) { + for (j = 0; j < m; j++) { + printf(" %2x", 0xff & s[j + (i * m)]); + } + printf("\n"); + } + printf("\n"); +} + +int main(int argc, char *argv[]) +{ + int i, j, rtest, srcs; + void *buf; + u8 g1[TEST_SOURCES], g2[TEST_SOURCES], g3[TEST_SOURCES]; + u8 g4[TEST_SOURCES], g_tbls[4 * TEST_SOURCES * 32], *buffs[TEST_SOURCES]; + u8 *dest1, *dest2, *dest3, *dest4, *dest_ref1, *dest_ref2, *dest_ref3; + u8 *dest_ref4, *dest_ptrs[4]; + + int align, size; + unsigned char *efence_buffs[TEST_SOURCES]; + unsigned int offset; + u8 *ubuffs[TEST_SOURCES]; + u8 *udest_ptrs[4]; + printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d ", TEST_SOURCES, TEST_LEN); + + // Allocate the arrays + for (i = 0; i < TEST_SOURCES; i++) { + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + buffs[i] = buf; + } + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest1 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest2 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest3 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest4 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest_ref1 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest_ref2 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest_ref3 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest_ref4 = buf; + + dest_ptrs[0] = dest1; + dest_ptrs[1] = dest2; + dest_ptrs[2] = dest3; + dest_ptrs[3] = dest4; + + // Test of all zeros + for (i = 0; i < TEST_SOURCES; i++) + memset(buffs[i], 0, TEST_LEN); + + memset(dest1, 0, TEST_LEN); + memset(dest2, 0, TEST_LEN); + memset(dest3, 0, TEST_LEN); + memset(dest4, 0, TEST_LEN); + memset(dest_ref1, 0, TEST_LEN); + memset(dest_ref2, 0, TEST_LEN); + memset(dest_ref3, 0, TEST_LEN); + memset(dest_ref4, 0, TEST_LEN); + memset(g1, 2, TEST_SOURCES); + memset(g2, 1, TEST_SOURCES); + memset(g3, 7, TEST_SOURCES); + memset(g4, 3, TEST_SOURCES); + + for (i = 0; i < TEST_SOURCES; i++) { + gf_vect_mul_init(g1[i], &g_tbls[i * 32]); + gf_vect_mul_init(g2[i], &g_tbls[32 * TEST_SOURCES + i * 32]); + gf_vect_mul_init(g3[i], &g_tbls[64 * TEST_SOURCES + i * 32]); + gf_vect_mul_init(g4[i], &g_tbls[96 * TEST_SOURCES + i * 32]); + } + + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1); + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], buffs, + dest_ref2); + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], buffs, + dest_ref3); + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES], buffs, + dest_ref4); + + FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs); + + if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) { + printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test1\n"); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref1, 25); + printf("dprod_dut:"); + dump(dest1, 25); + return -1; + } + if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) { + printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test2\n"); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref2, 25); + printf("dprod_dut:"); + dump(dest2, 25); + return -1; + } + if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) { + printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test3\n"); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref3, 25); + printf("dprod_dut:"); + dump(dest3, 25); + return -1; + } + if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) { + printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test4\n"); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref4, 25); + printf("dprod_dut:"); + dump(dest4, 25); + return -1; + } + + putchar('.'); + + // Rand data test + + for (rtest = 0; rtest < RANDOMS; rtest++) { + for (i = 0; i < TEST_SOURCES; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + for (i = 0; i < TEST_SOURCES; i++) { + g1[i] = rand(); + g2[i] = rand(); + g3[i] = rand(); + g4[i] = rand(); + } + + for (i = 0; i < TEST_SOURCES; i++) { + gf_vect_mul_init(g1[i], &g_tbls[i * 32]); + gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]); + gf_vect_mul_init(g3[i], &g_tbls[(64 * TEST_SOURCES) + (i * 32)]); + gf_vect_mul_init(g4[i], &g_tbls[(96 * TEST_SOURCES) + (i * 32)]); + } + + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1); + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], + buffs, dest_ref2); + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], + buffs, dest_ref3); + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES], + buffs, dest_ref4); + + FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs); + + if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref1, 25); + printf("dprod_dut:"); + dump(dest1, 25); + return -1; + } + if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref2, 25); + printf("dprod_dut:"); + dump(dest2, 25); + return -1; + } + if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test3 %d\n", rtest); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref3, 25); + printf("dprod_dut:"); + dump(dest3, 25); + return -1; + } + if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test4 %d\n", rtest); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref4, 25); + printf("dprod_dut:"); + dump(dest4, 25); + return -1; + } + + putchar('.'); + } + + // Rand data test with varied parameters + for (rtest = 0; rtest < RANDOMS; rtest++) { + for (srcs = TEST_SOURCES; srcs > 0; srcs--) { + for (i = 0; i < srcs; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + for (i = 0; i < srcs; i++) { + g1[i] = rand(); + g2[i] = rand(); + g3[i] = rand(); + g4[i] = rand(); + } + + for (i = 0; i < srcs; i++) { + gf_vect_mul_init(g1[i], &g_tbls[i * 32]); + gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]); + gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]); + gf_vect_mul_init(g4[i], &g_tbls[(96 * srcs) + (i * 32)]); + } + + gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[0], buffs, dest_ref1); + gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[32 * srcs], buffs, + dest_ref2); + gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[64 * srcs], buffs, + dest_ref3); + gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[96 * srcs], buffs, + dest_ref4); + + FUNCTION_UNDER_TEST(TEST_LEN, srcs, g_tbls, buffs, dest_ptrs); + + if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) + " test1 srcs=%d\n", srcs); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref1, 25); + printf("dprod_dut:"); + dump(dest1, 25); + return -1; + } + if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) + " test2 srcs=%d\n", srcs); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref2, 25); + printf("dprod_dut:"); + dump(dest2, 25); + return -1; + } + if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) + " test3 srcs=%d\n", srcs); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref3, 25); + printf("dprod_dut:"); + dump(dest3, 25); + return -1; + } + if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) + " test4 srcs=%d\n", srcs); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref4, 25); + printf("dprod_dut:"); + dump(dest4, 25); + return -1; + } + + putchar('.'); + } + } + + // Run tests at end of buffer for Electric Fence + align = (LEN_ALIGN_CHK_B != 0) ? 1 : 32; + for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) { + for (i = 0; i < TEST_SOURCES; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + for (i = 0; i < TEST_SOURCES; i++) // Line up TEST_SIZE from end + efence_buffs[i] = buffs[i] + TEST_LEN - size; + + for (i = 0; i < TEST_SOURCES; i++) { + g1[i] = rand(); + g2[i] = rand(); + g3[i] = rand(); + g4[i] = rand(); + } + + for (i = 0; i < TEST_SOURCES; i++) { + gf_vect_mul_init(g1[i], &g_tbls[i * 32]); + gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]); + gf_vect_mul_init(g3[i], &g_tbls[(64 * TEST_SOURCES) + (i * 32)]); + gf_vect_mul_init(g4[i], &g_tbls[(96 * TEST_SOURCES) + (i * 32)]); + } + + gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[0], efence_buffs, dest_ref1); + gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], + efence_buffs, dest_ref2); + gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], + efence_buffs, dest_ref3); + gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES], + efence_buffs, dest_ref4); + + FUNCTION_UNDER_TEST(size, TEST_SOURCES, g_tbls, efence_buffs, dest_ptrs); + + if (0 != memcmp(dest_ref1, dest1, size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest); + dump_matrix(efence_buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref1, align); + printf("dprod_dut:"); + dump(dest1, align); + return -1; + } + + if (0 != memcmp(dest_ref2, dest2, size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest); + dump_matrix(efence_buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref2, align); + printf("dprod_dut:"); + dump(dest2, align); + return -1; + } + + if (0 != memcmp(dest_ref3, dest3, size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test3 %d\n", rtest); + dump_matrix(efence_buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref3, align); + printf("dprod_dut:"); + dump(dest3, align); + return -1; + } + + if (0 != memcmp(dest_ref4, dest4, size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test4 %d\n", rtest); + dump_matrix(efence_buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref4, align); + printf("dprod_dut:"); + dump(dest4, align); + return -1; + } + + putchar('.'); + } + + // Test rand ptr alignment if available + + for (rtest = 0; rtest < RANDOMS; rtest++) { + size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1); + srcs = rand() % TEST_SOURCES; + if (srcs == 0) + continue; + + offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B; + // Add random offsets + for (i = 0; i < srcs; i++) + ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset)); + + udest_ptrs[0] = dest1 + (rand() & (PTR_ALIGN_CHK_B - offset)); + udest_ptrs[1] = dest2 + (rand() & (PTR_ALIGN_CHK_B - offset)); + udest_ptrs[2] = dest3 + (rand() & (PTR_ALIGN_CHK_B - offset)); + udest_ptrs[3] = dest4 + (rand() & (PTR_ALIGN_CHK_B - offset)); + + memset(dest1, 0, TEST_LEN); // zero pad to check write-over + memset(dest2, 0, TEST_LEN); + memset(dest3, 0, TEST_LEN); + memset(dest4, 0, TEST_LEN); + + for (i = 0; i < srcs; i++) + for (j = 0; j < size; j++) + ubuffs[i][j] = rand(); + + for (i = 0; i < srcs; i++) { + g1[i] = rand(); + g2[i] = rand(); + g3[i] = rand(); + g4[i] = rand(); + } + + for (i = 0; i < srcs; i++) { + gf_vect_mul_init(g1[i], &g_tbls[i * 32]); + gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]); + gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]); + gf_vect_mul_init(g4[i], &g_tbls[(96 * srcs) + (i * 32)]); + } + + gf_vect_dot_prod_base(size, srcs, &g_tbls[0], ubuffs, dest_ref1); + gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], ubuffs, dest_ref2); + gf_vect_dot_prod_base(size, srcs, &g_tbls[64 * srcs], ubuffs, dest_ref3); + gf_vect_dot_prod_base(size, srcs, &g_tbls[96 * srcs], ubuffs, dest_ref4); + + FUNCTION_UNDER_TEST(size, srcs, g_tbls, ubuffs, udest_ptrs); + + if (memcmp(dest_ref1, udest_ptrs[0], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", + srcs); + dump_matrix(ubuffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref1, 25); + printf("dprod_dut:"); + dump(udest_ptrs[0], 25); + return -1; + } + if (memcmp(dest_ref2, udest_ptrs[1], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", + srcs); + dump_matrix(ubuffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref2, 25); + printf("dprod_dut:"); + dump(udest_ptrs[1], 25); + return -1; + } + if (memcmp(dest_ref3, udest_ptrs[2], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", + srcs); + dump_matrix(ubuffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref3, 25); + printf("dprod_dut:"); + dump(udest_ptrs[2], 25); + return -1; + } + if (memcmp(dest_ref4, udest_ptrs[3], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", + srcs); + dump_matrix(ubuffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref4, 25); + printf("dprod_dut:"); + dump(udest_ptrs[3], 25); + return -1; + } + // Confirm that padding around dests is unchanged + memset(dest_ref1, 0, PTR_ALIGN_CHK_B); // Make reference zero buff + offset = udest_ptrs[0] - dest1; + + if (memcmp(dest1, dest_ref1, offset)) { + printf("Fail rand ualign pad1 start\n"); + return -1; + } + if (memcmp(dest1 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { + printf("Fail rand ualign pad1 end\n"); + printf("size=%d offset=%d srcs=%d\n", size, offset, srcs); + return -1; + } + + offset = udest_ptrs[1] - dest2; + if (memcmp(dest2, dest_ref1, offset)) { + printf("Fail rand ualign pad2 start\n"); + return -1; + } + if (memcmp(dest2 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { + printf("Fail rand ualign pad2 end\n"); + return -1; + } + + offset = udest_ptrs[2] - dest3; + if (memcmp(dest3, dest_ref1, offset)) { + printf("Fail rand ualign pad3 start\n"); + return -1; + } + if (memcmp(dest3 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { + printf("Fail rand ualign pad3 end\n"); + return -1; + } + + offset = udest_ptrs[3] - dest4; + if (memcmp(dest4, dest_ref1, offset)) { + printf("Fail rand ualign pad4 start\n"); + return -1; + } + if (memcmp(dest4 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { + printf("Fail rand ualign pad4 end\n"); + return -1; + } + + putchar('.'); + } + + // Test all size alignment + align = (LEN_ALIGN_CHK_B != 0) ? 1 : 32; + + for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) { + srcs = TEST_SOURCES; + + for (i = 0; i < srcs; i++) + for (j = 0; j < size; j++) + buffs[i][j] = rand(); + + for (i = 0; i < srcs; i++) { + g1[i] = rand(); + g2[i] = rand(); + g3[i] = rand(); + g4[i] = rand(); + } + + for (i = 0; i < srcs; i++) { + gf_vect_mul_init(g1[i], &g_tbls[i * 32]); + gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]); + gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]); + gf_vect_mul_init(g4[i], &g_tbls[(96 * srcs) + (i * 32)]); + } + + gf_vect_dot_prod_base(size, srcs, &g_tbls[0], buffs, dest_ref1); + gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], buffs, dest_ref2); + gf_vect_dot_prod_base(size, srcs, &g_tbls[64 * srcs], buffs, dest_ref3); + gf_vect_dot_prod_base(size, srcs, &g_tbls[96 * srcs], buffs, dest_ref4); + + FUNCTION_UNDER_TEST(size, srcs, g_tbls, buffs, dest_ptrs); + + if (memcmp(dest_ref1, dest_ptrs[0], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", + size); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref1, 25); + printf("dprod_dut:"); + dump(dest_ptrs[0], 25); + return -1; + } + if (memcmp(dest_ref2, dest_ptrs[1], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", + size); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref2, 25); + printf("dprod_dut:"); + dump(dest_ptrs[1], 25); + return -1; + } + if (memcmp(dest_ref3, dest_ptrs[2], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", + size); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref3, 25); + printf("dprod_dut:"); + dump(dest_ptrs[2], 25); + return -1; + } + if (memcmp(dest_ref4, dest_ptrs[3], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", + size); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref4, 25); + printf("dprod_dut:"); + dump(dest_ptrs[3], 25); + return -1; + } + } + + printf("Pass\n"); + return 0; + +} diff --git a/src/isa-l/erasure_code/gf_4vect_mad_avx.asm b/src/isa-l/erasure_code/gf_4vect_mad_avx.asm new file mode 100644 index 000000000..62441c192 --- /dev/null +++ b/src/isa-l/erasure_code/gf_4vect_mad_avx.asm @@ -0,0 +1,336 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_4vect_mad_avx(len, vec, vec_i, mul_array, src, dest); +;;; + +%include "reg_sizes.asm" + +%define PS 8 + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg0.w ecx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + %define arg4 r12 + %define arg5 r15 + %define tmp r11 + %define tmp2 r10 + %define tmp3 r13 + %define return rax + %define return.w eax + %define stack_size 16*10 + 3*8 + %define arg(x) [rsp + stack_size + PS + PS*x] + %define func(x) proc_frame x + +%macro FUNC_SAVE 0 + sub rsp, stack_size + movdqa [rsp+16*0],xmm6 + movdqa [rsp+16*1],xmm7 + movdqa [rsp+16*2],xmm8 + movdqa [rsp+16*3],xmm9 + movdqa [rsp+16*4],xmm10 + movdqa [rsp+16*5],xmm11 + movdqa [rsp+16*6],xmm12 + movdqa [rsp+16*7],xmm13 + movdqa [rsp+16*8],xmm14 + movdqa [rsp+16*9],xmm15 + save_reg r12, 10*16 + 0*8 + save_reg r13, 10*16 + 1*8 + save_reg r15, 10*16 + 2*8 + end_prolog + mov arg4, arg(4) + mov arg5, arg(5) +%endmacro + +%macro FUNC_RESTORE 0 + movdqa xmm6, [rsp+16*0] + movdqa xmm7, [rsp+16*1] + movdqa xmm8, [rsp+16*2] + movdqa xmm9, [rsp+16*3] + movdqa xmm10, [rsp+16*4] + movdqa xmm11, [rsp+16*5] + movdqa xmm12, [rsp+16*6] + movdqa xmm13, [rsp+16*7] + movdqa xmm14, [rsp+16*8] + movdqa xmm15, [rsp+16*9] + mov r12, [rsp + 10*16 + 0*8] + mov r13, [rsp + 10*16 + 1*8] + mov r15, [rsp + 10*16 + 2*8] + add rsp, stack_size +%endmacro + +%elifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg0.w edi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + %define tmp r11 + %define tmp2 r10 + %define tmp3 r12 + %define return rax + %define return.w eax + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + %endmacro + %macro FUNC_RESTORE 0 + pop r12 + %endmacro +%endif + +;;; gf_4vect_mad_avx(len, vec, vec_i, mul_array, src, dest) +%define len arg0 +%define len.w arg0.w +%define vec arg1 +%define vec_i arg2 +%define mul_array arg3 +%define src arg4 +%define dest1 arg5 +%define pos return +%define pos.w return.w + +%define dest2 mul_array +%define dest3 tmp2 +%define dest4 vec_i + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu + %define XSTR vmovdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + + +default rel + +[bits 64] +section .text + +%define xmask0f xmm15 +%define xgft3_hi xmm14 +%define xgft4_hi xmm13 +%define xgft4_lo xmm12 + +%define x0 xmm0 +%define xtmpa xmm1 +%define xtmph1 xmm2 +%define xtmpl1 xmm3 +%define xtmph2 xmm4 +%define xtmpl2 xmm5 +%define xtmph3 xmm6 +%define xtmpl3 xmm7 +%define xtmph4 xmm8 +%define xtmpl4 xmm9 +%define xd1 xmm10 +%define xd2 xmm11 +%define xd3 xtmph1 +%define xd4 xtmpl1 + +align 16 +global gf_4vect_mad_avx:ISAL_SYM_TYPE_FUNCTION +func(gf_4vect_mad_avx) + FUNC_SAVE + sub len, 16 + jl .return_fail + xor pos, pos + vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte + + mov tmp, vec + + sal vec_i, 5 ;Multiply by 32 + lea tmp3, [mul_array + vec_i] + + sal tmp, 6 ;Multiply by 64 + vmovdqu xgft3_hi, [tmp3+tmp+16] ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0} + sal vec, 5 ;Multiply by 32 + add tmp, vec + vmovdqu xgft4_lo, [tmp3+tmp] ;Load array Dx{00}, Dx{01}, Dx{02}, ... + vmovdqu xgft4_hi, [tmp3+tmp+16] ; " Dx{00}, Dx{10}, Dx{20}, ... , Dx{f0} + + mov dest2, [dest1+PS] ; reuse mul_array + mov dest3, [dest1+2*PS] + mov dest4, [dest1+3*PS] ; reuse vec_i + mov dest1, [dest1] + +.loop16: + XLDR x0, [src+pos] ;Get next source vector + vmovdqu xtmph1, [tmp3+16] ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0} + vmovdqu xtmpl1, [tmp3] ;Load array Ax{00}, Ax{01}, Ax{02}, ... + vmovdqu xtmph2, [tmp3+vec+16] ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0} + vmovdqu xtmpl2, [tmp3+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ... + vmovdqu xtmpl3, [tmp3+2*vec] ;Load array Cx{00}, Cx{01}, Cx{02}, ... + + XLDR xd1, [dest1+pos] ;Get next dest vector + XLDR xd2, [dest2+pos] ;Get next dest vector + + vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + ; dest1 + vpshufb xtmph1, xtmph1, x0 ;Lookup mul table of high nibble + vpshufb xtmpl1, xtmpl1, xtmpa ;Lookup mul table of low nibble + vpxor xtmph1, xtmph1, xtmpl1 ;GF add high and low partials + vpxor xd1, xd1, xtmph1 + + XLDR xd3, [dest3+pos] ;Reuse xtmph1, Get next dest vector + XLDR xd4, [dest4+pos] ;Reuse xtmpl1, Get next dest vector + + ; dest2 + vpshufb xtmph2, xtmph2, x0 ;Lookup mul table of high nibble + vpshufb xtmpl2, xtmpl2, xtmpa ;Lookup mul table of low nibble + vpxor xtmph2, xtmph2, xtmpl2 ;GF add high and low partials + vpxor xd2, xd2, xtmph2 + + ; dest3 + vpshufb xtmph3, xgft3_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl3, xtmpl3, xtmpa ;Lookup mul table of low nibble + vpxor xtmph3, xtmph3, xtmpl3 ;GF add high and low partials + vpxor xd3, xd3, xtmph3 + + ; dest4 + vpshufb xtmph4, xgft4_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl4, xgft4_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph4, xtmph4, xtmpl4 ;GF add high and low partials + vpxor xd4, xd4, xtmph4 + + XSTR [dest1+pos], xd1 ;Store result + XSTR [dest2+pos], xd2 ;Store result + XSTR [dest3+pos], xd3 ;Store result + XSTR [dest4+pos], xd4 ;Store result + + add pos, 16 ;Loop on 16 bytes at a time + cmp pos, len + jle .loop16 + + lea tmp, [len + 16] + cmp pos, tmp + je .return_pass + +.lessthan16: + ;; Tail len + ;; Do one more overlap pass + + mov tmp, len ;Overlapped offset length-16 + + XLDR x0, [src+tmp] ;Get next source vector + + vmovdqu xtmph1, [tmp3+16] ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0} + vmovdqu xtmpl1, [tmp3] ;Load array Ax{00}, Ax{01}, Ax{02}, ... + vmovdqu xtmph2, [tmp3+vec+16] ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0} + vmovdqu xtmpl2, [tmp3+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ... + vmovdqu xtmpl3, [tmp3+2*vec] ;Load array Cx{00}, Cx{01}, Cx{02}, ... + + XLDR xd1, [dest1+tmp] ;Get next dest vector + XLDR xd2, [dest2+tmp] ;Get next dest vector + XLDR xtmph4, [dest3+tmp] ;Get next dest vector + + sub len, pos + + vmovdqa xtmpl4, [constip16] ;Load const of i + 16 + vpinsrb xtmph3, xtmph3, len.w, 15 + vpshufb xtmph3, xtmph3, xmask0f ;Broadcast len to all bytes + vpcmpgtb xtmph3, xtmph3, xtmpl4 + + XLDR xtmpl4, [dest4+tmp] ;Get next dest vector + + vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + ; dest1 + vpshufb xtmph1, xtmph1, x0 ;Lookup mul table of high nibble + vpshufb xtmpl1, xtmpl1, xtmpa ;Lookup mul table of low nibble + vpxor xtmph1, xtmph1, xtmpl1 ;GF add high and low partials + vpand xtmph1, xtmph1, xtmph3 + vpxor xd1, xd1, xtmph1 + + ; dest2 + vpshufb xtmph2, xtmph2, x0 ;Lookup mul table of high nibble + vpshufb xtmpl2, xtmpl2, xtmpa ;Lookup mul table of low nibble + vpxor xtmph2, xtmph2, xtmpl2 ;GF add high and low partials + vpand xtmph2, xtmph2, xtmph3 + vpxor xd2, xd2, xtmph2 + + ; dest3 + vpshufb xgft3_hi, xgft3_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl3, xtmpl3, xtmpa ;Lookup mul table of low nibble + vpxor xgft3_hi, xgft3_hi, xtmpl3 ;GF add high and low partials + vpand xgft3_hi, xgft3_hi, xtmph3 + vpxor xtmph4, xtmph4, xgft3_hi + + ; dest4 + vpshufb xgft4_hi, xgft4_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft4_lo, xgft4_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft4_hi, xgft4_hi, xgft4_lo ;GF add high and low partials + vpand xgft4_hi, xgft4_hi, xtmph3 + vpxor xtmpl4, xtmpl4, xgft4_hi + + XSTR [dest1+tmp], xd1 ;Store result + XSTR [dest2+tmp], xd2 ;Store result + XSTR [dest3+tmp], xtmph4 ;Store result + XSTR [dest4+tmp], xtmpl4 ;Store result + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame + +section .data + +align 16 +mask0f: dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f +constip16: + dq 0xf8f9fafbfcfdfeff, 0xf0f1f2f3f4f5f6f7 + +;;; func core, ver, snum +slversion gf_4vect_mad_avx, 02, 01, 020a diff --git a/src/isa-l/erasure_code/gf_4vect_mad_avx2.asm b/src/isa-l/erasure_code/gf_4vect_mad_avx2.asm new file mode 100644 index 000000000..9a7b7d94b --- /dev/null +++ b/src/isa-l/erasure_code/gf_4vect_mad_avx2.asm @@ -0,0 +1,342 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_4vect_mad_avx2(len, vec, vec_i, mul_array, src, dest); +;;; + +%include "reg_sizes.asm" + +%define PS 8 + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg0.w ecx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + %define arg4 r12 + %define arg5 r15 + %define tmp r11 + %define tmp.w r11d + %define tmp.b r11b + %define return rax + %define return.w eax + %define stack_size 16*10 + 3*8 + %define arg(x) [rsp + stack_size + PS + PS*x] + %define func(x) proc_frame x + +%macro FUNC_SAVE 0 + sub rsp, stack_size + movdqa [rsp+16*0],xmm6 + movdqa [rsp+16*1],xmm7 + movdqa [rsp+16*2],xmm8 + movdqa [rsp+16*3],xmm9 + movdqa [rsp+16*4],xmm10 + movdqa [rsp+16*5],xmm11 + movdqa [rsp+16*6],xmm12 + movdqa [rsp+16*7],xmm13 + movdqa [rsp+16*8],xmm14 + movdqa [rsp+16*9],xmm15 + save_reg r12, 10*16 + 0*8 + save_reg r15, 10*16 + 1*8 + end_prolog + mov arg4, arg(4) + mov arg5, arg(5) +%endmacro + +%macro FUNC_RESTORE 0 + movdqa xmm6, [rsp+16*0] + movdqa xmm7, [rsp+16*1] + movdqa xmm8, [rsp+16*2] + movdqa xmm9, [rsp+16*3] + movdqa xmm10, [rsp+16*4] + movdqa xmm11, [rsp+16*5] + movdqa xmm12, [rsp+16*6] + movdqa xmm13, [rsp+16*7] + movdqa xmm14, [rsp+16*8] + movdqa xmm15, [rsp+16*9] + mov r12, [rsp + 10*16 + 0*8] + mov r15, [rsp + 10*16 + 1*8] + add rsp, stack_size +%endmacro + +%elifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg0.w edi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + %define tmp r11 + %define tmp.w r11d + %define tmp.b r11b + %define return rax + %define return.w eax + + %define func(x) x: + %define FUNC_SAVE + %define FUNC_RESTORE +%endif + + +;;; gf_4vect_mad_avx2(len, vec, vec_i, mul_array, src, dest) +%define len arg0 +%define len.w arg0.w +%define vec arg1 +%define vec_i arg2 +%define mul_array arg3 +%define src arg4 +%define dest1 arg5 +%define pos return +%define pos.w return.w + +%define dest2 mul_array +%define dest3 vec +%define dest4 vec_i + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu + %define XSTR vmovdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + + +default rel + +[bits 64] +section .text + +%define xmask0f ymm15 +%define xmask0fx xmm15 +%define xgft1_lo ymm14 +%define xgft2_lo ymm13 +%define xgft3_lo ymm12 +%define xgft4_lo ymm11 + +%define x0 ymm0 +%define xtmpa ymm1 +%define xtmpl ymm2 +%define xtmplx xmm2 +%define xtmph1 ymm3 +%define xtmph1x xmm3 +%define xtmph2 ymm4 +%define xtmph3 ymm5 +%define xtmph4 ymm6 +%define xd1 ymm7 +%define xd2 ymm8 +%define xd3 ymm9 +%define xd4 ymm10 + +align 16 +global gf_4vect_mad_avx2:ISAL_SYM_TYPE_FUNCTION +func(gf_4vect_mad_avx2) + FUNC_SAVE + sub len, 32 + jl .return_fail + xor pos, pos + mov tmp.b, 0x0f + vpinsrb xmask0fx, xmask0fx, tmp.w, 0 + vpbroadcastb xmask0f, xmask0fx ;Construct mask 0x0f0f0f... + + sal vec_i, 5 ;Multiply by 32 + sal vec, 5 ;Multiply by 32 + lea tmp, [mul_array + vec_i] + + vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, Ax{02}, ... + ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0} + vmovdqu xgft2_lo, [tmp+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ... + ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0} + vmovdqu xgft3_lo, [tmp+2*vec] ;Load array Cx{00}, Cx{01}, Cx{02}, ... + ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0} + add tmp, vec + vmovdqu xgft4_lo, [tmp+2*vec] ;Load array Dx{00}, Dx{01}, Dx{02}, ... + ; " Dx{00}, Dx{10}, Dx{20}, ... , Dx{f0} + + mov dest2, [dest1+PS] ; reuse mul_array + mov dest3, [dest1+2*PS] ; reuse vec + mov dest4, [dest1+3*PS] ; reuse vec_i + mov dest1, [dest1] + +.loop32: + XLDR x0, [src+pos] ;Get next source vector + + XLDR xd1, [dest1+pos] ;Get next dest vector + XLDR xd2, [dest2+pos] ;Get next dest vector + XLDR xd3, [dest3+pos] ;Get next dest vector + XLDR xd4, [dest4+pos] ;reuse xtmpl1. Get next dest vector + + vpand xtmpl, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + vperm2i128 xtmpa, xtmpl, x0, 0x30 ;swap xtmpa from 1lo|2lo to 1lo|2hi + vperm2i128 x0, xtmpl, x0, 0x12 ;swap x0 from 1hi|2hi to 1hi|2lo + + vperm2i128 xtmph1, xgft1_lo, xgft1_lo, 0x01 ; swapped to hi | lo + vperm2i128 xtmph2, xgft2_lo, xgft2_lo, 0x01 ; swapped to hi | lo + vperm2i128 xtmph3, xgft3_lo, xgft3_lo, 0x01 ; swapped to hi | lo + vperm2i128 xtmph4, xgft4_lo, xgft4_lo, 0x01 ; swapped to hi | lo + + ; dest1 + vpshufb xtmph1, xtmph1, x0 ;Lookup mul table of high nibble + vpshufb xtmpl, xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph1, xtmph1, xtmpl ;GF add high and low partials + vpxor xd1, xd1, xtmph1 ;xd1 += partial + + ; dest2 + vpshufb xtmph2, xtmph2, x0 ;Lookup mul table of high nibble + vpshufb xtmpl, xgft2_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph2, xtmph2, xtmpl ;GF add high and low partials + vpxor xd2, xd2, xtmph2 ;xd2 += partial + + ; dest3 + vpshufb xtmph3, xtmph3, x0 ;Lookup mul table of high nibble + vpshufb xtmpl, xgft3_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph3, xtmph3, xtmpl ;GF add high and low partials + vpxor xd3, xd3, xtmph3 ;xd3 += partial + + ; dest4 + vpshufb xtmph4, xtmph4, x0 ;Lookup mul table of high nibble + vpshufb xtmpl, xgft4_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph4, xtmph4, xtmpl ;GF add high and low partials + vpxor xd4, xd4, xtmph4 ;xd4 += partial + + XSTR [dest1+pos], xd1 + XSTR [dest2+pos], xd2 + XSTR [dest3+pos], xd3 + XSTR [dest4+pos], xd4 + + add pos, 32 ;Loop on 32 bytes at a time + cmp pos, len + jle .loop32 + + lea tmp, [len + 32] + cmp pos, tmp + je .return_pass + +.lessthan32: + ;; Tail len + ;; Do one more overlap pass + mov tmp.b, 0x1f + vpinsrb xtmph1x, xtmph1x, tmp.w, 0 + vpbroadcastb xtmph1, xtmph1x ;Construct mask 0x1f1f1f... + + mov tmp, len ;Overlapped offset length-32 + + XLDR x0, [src+tmp] ;Get next source vector + + XLDR xd1, [dest1+tmp] ;Get next dest vector + XLDR xd2, [dest2+tmp] ;Get next dest vector + XLDR xd3, [dest3+tmp] ;Get next dest vector + XLDR xd4, [dest4+tmp] ;Get next dest vector + + sub len, pos + + vmovdqa xtmph2, [constip32] ;Load const of i + 32 + vpinsrb xtmplx, xtmplx, len.w, 15 + vinserti128 xtmpl, xtmpl, xtmplx, 1 ;swapped to xtmplx | xtmplx + vpshufb xtmpl, xtmpl, xtmph1 ;Broadcast len to all bytes. xtmph1=0x1f1f1f... + vpcmpgtb xtmpl, xtmpl, xtmph2 + + vpand xtmph1, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + vperm2i128 xtmpa, xtmph1, x0, 0x30 ;swap xtmpa from 1lo|2lo to 1lo|2hi + vperm2i128 x0, xtmph1, x0, 0x12 ;swap x0 from 1hi|2hi to 1hi|2lo + + vperm2i128 xtmph1, xgft1_lo, xgft1_lo, 0x01 ; swapped to hi | lo + vperm2i128 xtmph2, xgft2_lo, xgft2_lo, 0x01 ; swapped to hi | lo + vperm2i128 xtmph3, xgft3_lo, xgft3_lo, 0x01 ; swapped to hi | lo + vperm2i128 xtmph4, xgft4_lo, xgft4_lo, 0x01 ; swapped to hi | lo + + ; dest1 + vpshufb xtmph1, xtmph1, x0 ;Lookup mul table of high nibble + vpshufb xgft1_lo, xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph1, xtmph1, xgft1_lo ;GF add high and low partials + vpand xtmph1, xtmph1, xtmpl + vpxor xd1, xd1, xtmph1 ;xd1 += partial + + ; dest2 + vpshufb xtmph2, xtmph2, x0 ;Lookup mul table of high nibble + vpshufb xgft2_lo, xgft2_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph2, xtmph2, xgft2_lo ;GF add high and low partials + vpand xtmph2, xtmph2, xtmpl + vpxor xd2, xd2, xtmph2 ;xd2 += partial + + ; dest3 + vpshufb xtmph3, xtmph3, x0 ;Lookup mul table of high nibble + vpshufb xgft3_lo, xgft3_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph3, xtmph3, xgft3_lo ;GF add high and low partials + vpand xtmph3, xtmph3, xtmpl + vpxor xd3, xd3, xtmph3 ;xd3 += partial + + ; dest4 + vpshufb xtmph4, xtmph4, x0 ;Lookup mul table of high nibble + vpshufb xgft4_lo, xgft4_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph4, xtmph4, xgft4_lo ;GF add high and low partials + vpand xtmph4, xtmph4, xtmpl + vpxor xd4, xd4, xtmph4 ;xd4 += partial + + XSTR [dest1+tmp], xd1 + XSTR [dest2+tmp], xd2 + XSTR [dest3+tmp], xd3 + XSTR [dest4+tmp], xd4 + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame + +section .data +align 32 +constip32: + dq 0xf8f9fafbfcfdfeff, 0xf0f1f2f3f4f5f6f7 + dq 0xe8e9eaebecedeeef, 0xe0e1e2e3e4e5e6e7 + +;;; func core, ver, snum +slversion gf_4vect_mad_avx2, 04, 01, 020b diff --git a/src/isa-l/erasure_code/gf_4vect_mad_avx512.asm b/src/isa-l/erasure_code/gf_4vect_mad_avx512.asm new file mode 100644 index 000000000..bc836af6b --- /dev/null +++ b/src/isa-l/erasure_code/gf_4vect_mad_avx512.asm @@ -0,0 +1,267 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_4vect_mad_avx512(len, vec, vec_i, mul_array, src, dest); +;;; + +%include "reg_sizes.asm" + +%ifdef HAVE_AS_KNOWS_AVX512 + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + %define tmp r11 + %define return rax + %define func(x) x: + %define FUNC_SAVE + %define FUNC_RESTORE +%endif + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + %define arg4 r12 + %define arg5 r15 + %define tmp r11 + %define return rax + %define stack_size 16*10 + 3*8 + %define arg(x) [rsp + stack_size + PS + PS*x] + %define func(x) proc_frame x + +%macro FUNC_SAVE 0 + sub rsp, stack_size + movdqa [rsp+16*0],xmm6 + movdqa [rsp+16*1],xmm7 + movdqa [rsp+16*2],xmm8 + movdqa [rsp+16*3],xmm9 + movdqa [rsp+16*4],xmm10 + movdqa [rsp+16*5],xmm11 + movdqa [rsp+16*6],xmm12 + movdqa [rsp+16*7],xmm13 + movdqa [rsp+16*8],xmm14 + movdqa [rsp+16*9],xmm15 + save_reg r12, 10*16 + 0*8 + save_reg r15, 10*16 + 1*8 + end_prolog + mov arg4, arg(4) + mov arg5, arg(5) +%endmacro + +%macro FUNC_RESTORE 0 + movdqa xmm6, [rsp+16*0] + movdqa xmm7, [rsp+16*1] + movdqa xmm8, [rsp+16*2] + movdqa xmm9, [rsp+16*3] + movdqa xmm10, [rsp+16*4] + movdqa xmm11, [rsp+16*5] + movdqa xmm12, [rsp+16*6] + movdqa xmm13, [rsp+16*7] + movdqa xmm14, [rsp+16*8] + movdqa xmm15, [rsp+16*9] + mov r12, [rsp + 10*16 + 0*8] + mov r15, [rsp + 10*16 + 1*8] + add rsp, stack_size +%endmacro +%endif + +%define PS 8 +%define len arg0 +%define vec arg1 +%define vec_i arg2 +%define mul_array arg3 +%define src arg4 +%define dest1 arg5 +%define pos return +%define dest2 mul_array +%define dest3 vec +%define dest4 vec_i + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu8 + %define XSTR vmovdqu8 +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + +default rel +[bits 64] +section .text + +%define x0 zmm0 +%define xtmpa zmm1 +%define xtmpl1 zmm2 +%define xtmph1 zmm3 +%define xtmph2 zmm4 +%define xtmph3 zmm5 +%define xtmph4 zmm6 +%define xgft1_hi zmm7 +%define xgft1_lo zmm8 +%define xgft1_loy ymm8 +%define xgft2_hi zmm9 +%define xgft2_lo zmm10 +%define xgft2_loy ymm10 +%define xgft3_hi zmm11 +%define xgft3_lo zmm12 +%define xgft3_loy ymm12 +%define xgft4_hi zmm13 +%define xgft4_lo zmm14 +%define xgft4_loy ymm14 +%define xd1 zmm15 +%define xd2 zmm16 +%define xd3 zmm17 +%define xd4 zmm18 +%define xmask0f zmm19 +%define xtmpl2 zmm20 +%define xtmpl3 zmm21 +%define xtmpl4 zmm22 +%define xtmpl5 zmm23 + +align 16 +global gf_4vect_mad_avx512:ISAL_SYM_TYPE_FUNCTION +func(gf_4vect_mad_avx512) + FUNC_SAVE + sub len, 64 + jl .return_fail + xor pos, pos + mov tmp, 0x0f + vpbroadcastb xmask0f, tmp ;Construct mask 0x0f0f0f... + sal vec_i, 5 ;Multiply by 32 + sal vec, 5 ;Multiply by 32 + lea tmp, [mul_array + vec_i] + vmovdqu xgft1_loy, [tmp] ;Load array Ax{00}..{0f}, Ax{00}..{f0} + vmovdqu xgft2_loy, [tmp+vec] ;Load array Bx{00}..{0f}, Bx{00}..{f0} + vmovdqu xgft3_loy, [tmp+2*vec] ;Load array Cx{00}..{0f}, Cx{00}..{f0} + add tmp, vec + vmovdqu xgft4_loy, [tmp+2*vec] ;Load array Dx{00}..{0f}, Dx{00}..{f0} + vshufi64x2 xgft1_hi, xgft1_lo, xgft1_lo, 0x55 + vshufi64x2 xgft1_lo, xgft1_lo, xgft1_lo, 0x00 + vshufi64x2 xgft2_hi, xgft2_lo, xgft2_lo, 0x55 + vshufi64x2 xgft2_lo, xgft2_lo, xgft2_lo, 0x00 + vshufi64x2 xgft3_hi, xgft3_lo, xgft3_lo, 0x55 + vshufi64x2 xgft3_lo, xgft3_lo, xgft3_lo, 0x00 + vshufi64x2 xgft4_hi, xgft4_lo, xgft4_lo, 0x55 + vshufi64x2 xgft4_lo, xgft4_lo, xgft4_lo, 0x00 + mov dest2, [dest1+PS] ; reuse mul_array + mov dest3, [dest1+2*PS] ; reuse vec + mov dest4, [dest1+3*PS] ; reuse vec_i + mov dest1, [dest1] + mov tmp, -1 + kmovq k1, tmp + +.loop64: + XLDR x0, [src+pos] ;Get next source vector + XLDR xd1, [dest1+pos] ;Get next dest vector + XLDR xd2, [dest2+pos] ;Get next dest vector + XLDR xd3, [dest3+pos] ;Get next dest vector + XLDR xd4, [dest4+pos] ;reuse xtmpl1. Get next dest vector + + vpandq xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpandq x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + ; dest1 + vpshufb xtmph1 {k1}{z}, xgft1_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl1 {k1}{z}, xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxorq xtmph1, xtmph1, xtmpl1 ;GF add high and low partials + vpxorq xd1, xd1, xtmph1 ;xd1 += partial + + ; dest2 + vpshufb xtmph2 {k1}{z}, xgft2_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl2 {k1}{z}, xgft2_lo, xtmpa ;Lookup mul table of low nibble + vpxorq xtmph2, xtmph2, xtmpl2 ;GF add high and low partials + vpxorq xd2, xd2, xtmph2 ;xd2 += partial + + ; dest3 + vpshufb xtmph3 {k1}{z}, xgft3_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl3 {k1}{z}, xgft3_lo, xtmpa ;Lookup mul table of low nibble + vpxorq xtmph3, xtmph3, xtmpl3 ;GF add high and low partials + vpxorq xd3, xd3, xtmph3 ;xd2 += partial + + ; dest4 + vpshufb xtmph4 {k1}{z}, xgft4_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl4 {k1}{z}, xgft4_lo, xtmpa ;Lookup mul table of low nibble + vpxorq xtmph4, xtmph4, xtmpl4 ;GF add high and low partials + vpxorq xd4, xd4, xtmph4 ;xd2 += partial + + XSTR [dest1+pos], xd1 + XSTR [dest2+pos], xd2 + XSTR [dest3+pos], xd3 + XSTR [dest4+pos], xd4 + + add pos, 64 ;Loop on 64 bytes at a time + cmp pos, len + jle .loop64 + + lea tmp, [len + 64] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, (1 << 63) + lea tmp, [len + 64 - 1] + and tmp, 63 + sarx pos, pos, tmp + kmovq k1, pos + mov pos, len ;Overlapped offset length-64 + jmp .loop64 ;Do one more overlap pass + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame + +%else +%ifidn __OUTPUT_FORMAT__, win64 +global no_gf_4vect_mad_avx512 +no_gf_4vect_mad_avx512: +%endif +%endif ; ifdef HAVE_AS_KNOWS_AVX512 diff --git a/src/isa-l/erasure_code/gf_4vect_mad_sse.asm b/src/isa-l/erasure_code/gf_4vect_mad_sse.asm new file mode 100644 index 000000000..c3d4c5d77 --- /dev/null +++ b/src/isa-l/erasure_code/gf_4vect_mad_sse.asm @@ -0,0 +1,342 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_4vect_mad_sse(len, vec, vec_i, mul_array, src, dest); +;;; + +%include "reg_sizes.asm" + +%define PS 8 + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg0.w ecx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + %define arg4 r12 + %define arg5 r15 + %define tmp r11 + %define tmp2 r10 + %define tmp3 r13 + %define return rax + %define return.w eax + %define stack_size 16*10 + 3*8 + %define arg(x) [rsp + stack_size + PS + PS*x] + %define func(x) proc_frame x + +%macro FUNC_SAVE 0 + sub rsp, stack_size + movdqa [rsp+16*0],xmm6 + movdqa [rsp+16*1],xmm7 + movdqa [rsp+16*2],xmm8 + movdqa [rsp+16*3],xmm9 + movdqa [rsp+16*4],xmm10 + movdqa [rsp+16*5],xmm11 + movdqa [rsp+16*6],xmm12 + movdqa [rsp+16*7],xmm13 + movdqa [rsp+16*8],xmm14 + movdqa [rsp+16*9],xmm15 + save_reg r12, 10*16 + 0*8 + save_reg r13, 10*16 + 1*8 + save_reg r15, 10*16 + 2*8 + end_prolog + mov arg4, arg(4) + mov arg5, arg(5) +%endmacro + +%macro FUNC_RESTORE 0 + movdqa xmm6, [rsp+16*0] + movdqa xmm7, [rsp+16*1] + movdqa xmm8, [rsp+16*2] + movdqa xmm9, [rsp+16*3] + movdqa xmm10, [rsp+16*4] + movdqa xmm11, [rsp+16*5] + movdqa xmm12, [rsp+16*6] + movdqa xmm13, [rsp+16*7] + movdqa xmm14, [rsp+16*8] + movdqa xmm15, [rsp+16*9] + mov r12, [rsp + 10*16 + 0*8] + mov r13, [rsp + 10*16 + 1*8] + mov r15, [rsp + 10*16 + 2*8] + add rsp, stack_size +%endmacro + +%elifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg0.w edi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + %define tmp r11 + %define tmp2 r10 + %define tmp3 r12 + %define return rax + %define return.w eax + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + %endmacro + %macro FUNC_RESTORE 0 + pop r12 + %endmacro +%endif + +;;; gf_4vect_mad_sse(len, vec, vec_i, mul_array, src, dest) +%define len arg0 +%define len.w arg0.w +%define vec arg1 +%define vec_i arg2 +%define mul_array arg3 +%define src arg4 +%define dest1 arg5 +%define pos return +%define pos.w return.w + +%define dest2 mul_array +%define dest3 tmp2 +%define dest4 vec_i + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR movdqu + %define XSTR movdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR movdqa + %define XSTR movdqa + %else + %define XLDR movntdqa + %define XSTR movntdq + %endif +%endif + +default rel + +[bits 64] +section .text + +%define xmask0f xmm15 +%define xgft3_hi xmm14 +%define xgft4_hi xmm13 +%define xgft4_lo xmm12 + +%define x0 xmm0 +%define xtmpa xmm1 +%define xtmph1 xmm2 +%define xtmpl1 xmm3 +%define xtmph2 xmm4 +%define xtmpl2 xmm5 +%define xtmph3 xmm6 +%define xtmpl3 xmm7 +%define xtmph4 xmm8 +%define xtmpl4 xmm9 +%define xd1 xmm10 +%define xd2 xmm11 +%define xd3 xtmph1 +%define xd4 xtmpl1 + +align 16 +global gf_4vect_mad_sse:ISAL_SYM_TYPE_FUNCTION +func(gf_4vect_mad_sse) + FUNC_SAVE + sub len, 16 + jl .return_fail + xor pos, pos + movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte + mov tmp, vec + + sal vec_i, 5 ;Multiply by 32 + lea tmp3, [mul_array + vec_i] + + sal tmp, 6 ;Multiply by 64 + + movdqu xgft3_hi, [tmp3+tmp+16] ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0} + sal vec, 5 ;Multiply by 32 + add tmp, vec + movdqu xgft4_lo, [tmp3+tmp] ;Load array Dx{00}, Dx{01}, Dx{02}, ... + movdqu xgft4_hi, [tmp3+tmp+16] ; " Dx{00}, Dx{10}, Dx{20}, ... , Dx{f0} + + mov dest2, [dest1+PS] ; reuse mul_array + mov dest3, [dest1+2*PS] + mov dest4, [dest1+3*PS] ; reuse vec_i + mov dest1, [dest1] + +.loop16: + XLDR x0, [src+pos] ;Get next source vector + movdqu xtmph1, [tmp3+16] ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0} + movdqu xtmpl1, [tmp3] ;Load array Ax{00}, Ax{01}, Ax{02}, ... + movdqu xtmph2, [tmp3+vec+16] ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0} + movdqu xtmpl2, [tmp3+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ... + movdqu xtmpl3, [tmp3+2*vec] ;Load array Cx{00}, Cx{01}, Cx{02}, ... + + movdqa xtmph3, xgft3_hi + movdqa xtmpl4, xgft4_lo + movdqa xtmph4, xgft4_hi + + XLDR xd1, [dest1+pos] ;Get next dest vector + XLDR xd2, [dest2+pos] ;Get next dest vector + + movdqa xtmpa, x0 ;Keep unshifted copy of src + psraw x0, 4 ;Shift to put high nibble into bits 4-0 + pand x0, xmask0f ;Mask high src nibble in bits 4-0 + pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0 + + ; dest1 + pshufb xtmph1, x0 ;Lookup mul table of high nibble + pshufb xtmpl1, xtmpa ;Lookup mul table of low nibble + pxor xtmph1, xtmpl1 ;GF add high and low partials + pxor xd1, xtmph1 + + XLDR xd3, [dest3+pos] ;Reuse xtmph1, Get next dest vector + XLDR xd4, [dest4+pos] ;Reuse xtmpl1, Get next dest vector + + ; dest2 + pshufb xtmph2, x0 ;Lookup mul table of high nibble + pshufb xtmpl2, xtmpa ;Lookup mul table of low nibble + pxor xtmph2, xtmpl2 ;GF add high and low partials + pxor xd2, xtmph2 + + ; dest3 + pshufb xtmph3, x0 ;Lookup mul table of high nibble + pshufb xtmpl3, xtmpa ;Lookup mul table of low nibble + pxor xtmph3, xtmpl3 ;GF add high and low partials + pxor xd3, xtmph3 + + ; dest4 + pshufb xtmph4, x0 ;Lookup mul table of high nibble + pshufb xtmpl4, xtmpa ;Lookup mul table of low nibble + pxor xtmph4, xtmpl4 ;GF add high and low partials + pxor xd4, xtmph4 + + XSTR [dest1+pos], xd1 ;Store result + XSTR [dest2+pos], xd2 ;Store result + XSTR [dest3+pos], xd3 ;Store result + XSTR [dest4+pos], xd4 ;Store result + + add pos, 16 ;Loop on 16 bytes at a time + cmp pos, len + jle .loop16 + + lea tmp, [len + 16] + cmp pos, tmp + je .return_pass + +.lessthan16: + ;; Tail len + ;; Do one more overlap pass + mov tmp, len ;Overlapped offset length-16 + + XLDR x0, [src+tmp] ;Get next source vector + + movdqu xtmph1, [tmp3+16] ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0} + movdqu xtmpl1, [tmp3] ;Load array Ax{00}, Ax{01}, Ax{02}, ... + movdqu xtmph2, [tmp3+vec+16] ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0} + movdqu xtmpl2, [tmp3+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ... + movdqu xtmpl3, [tmp3+2*vec] ;Load array Cx{00}, Cx{01}, Cx{02}, ... + + XLDR xd1, [dest1+tmp] ;Get next dest vector + XLDR xd2, [dest2+tmp] ;Get next dest vector + XLDR xtmph4, [dest3+tmp] ;Reuse xtmph1. Get next dest vector + + sub len, pos + + movdqa xtmpl4, [constip16] ;Load const of i + 16 + pinsrb xtmph3, len.w, 15 + pshufb xtmph3, xmask0f ;Broadcast len to all bytes + pcmpgtb xtmph3, xtmpl4 + + XLDR xtmpl4, [dest4+tmp] ;Get next dest vector + + movdqa xtmpa, x0 ;Keep unshifted copy of src + psraw x0, 4 ;Shift to put high nibble into bits 4-0 + pand x0, xmask0f ;Mask high src nibble in bits 4-0 + pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0 + + ; dest1 + pshufb xtmph1, x0 ;Lookup mul table of high nibble + pshufb xtmpl1, xtmpa ;Lookup mul table of low nibble + pxor xtmph1, xtmpl1 ;GF add high and low partials + pand xtmph1, xtmph3 + pxor xd1, xtmph1 + + ; dest2 + pshufb xtmph2, x0 ;Lookup mul table of high nibble + pshufb xtmpl2, xtmpa ;Lookup mul table of low nibble + pxor xtmph2, xtmpl2 ;GF add high and low partials + pand xtmph2, xtmph3 + pxor xd2, xtmph2 + + ; dest3 + pshufb xgft3_hi, x0 ;Lookup mul table of high nibble + pshufb xtmpl3, xtmpa ;Lookup mul table of low nibble + pxor xgft3_hi, xtmpl3 ;GF add high and low partials + pand xgft3_hi, xtmph3 + pxor xtmph4, xgft3_hi + + ; dest4 + pshufb xgft4_hi, x0 ;Lookup mul table of high nibble + pshufb xgft4_lo, xtmpa ;Lookup mul table of low nibble + pxor xgft4_hi, xgft4_lo ;GF add high and low partials + pand xgft4_hi, xtmph3 + pxor xtmpl4, xgft4_hi + + XSTR [dest1+tmp], xd1 ;Store result + XSTR [dest2+tmp], xd2 ;Store result + XSTR [dest3+tmp], xtmph4 ;Store result + XSTR [dest4+tmp], xtmpl4 ;Store result + +.return_pass: + FUNC_RESTORE + mov return, 0 + ret + +.return_fail: + FUNC_RESTORE + mov return, 1 + ret + +endproc_frame + +section .data + +align 16 + +mask0f: + dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f +constip16: + dq 0xf8f9fafbfcfdfeff, 0xf0f1f2f3f4f5f6f7 + +;;; func core, ver, snum +slversion gf_4vect_mad_sse, 00, 01, 0209 diff --git a/src/isa-l/erasure_code/gf_5vect_dot_prod_avx.asm b/src/isa-l/erasure_code/gf_5vect_dot_prod_avx.asm new file mode 100644 index 000000000..eb1c15ed4 --- /dev/null +++ b/src/isa-l/erasure_code/gf_5vect_dot_prod_avx.asm @@ -0,0 +1,303 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_5vect_dot_prod_avx(len, vec, *g_tbls, **buffs, **dests); +;;; + +%include "reg_sizes.asm" + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + + %define tmp r11 + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r12 ; must be saved and restored + %define tmp5 r14 ; must be saved and restored + %define tmp6 r15 ; must be saved and restored + %define return rax + %define PS 8 + %define LOG_PS 3 + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + push r13 + push r14 + push r15 + %endmacro + %macro FUNC_RESTORE 0 + pop r15 + pop r14 + pop r13 + pop r12 + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + + %define arg4 r12 ; must be saved, loaded and restored + %define arg5 r15 ; must be saved and restored + %define tmp r11 + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r14 ; must be saved and restored + %define tmp5 rdi ; must be saved and restored + %define tmp6 rsi ; must be saved and restored + %define return rax + %define PS 8 + %define LOG_PS 3 + %define stack_size 10*16 + 7*8 ; must be an odd multiple of 8 + %define arg(x) [rsp + stack_size + PS + PS*x] + + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size + save_xmm128 xmm6, 0*16 + save_xmm128 xmm7, 1*16 + save_xmm128 xmm8, 2*16 + save_xmm128 xmm9, 3*16 + save_xmm128 xmm10, 4*16 + save_xmm128 xmm11, 5*16 + save_xmm128 xmm12, 6*16 + save_xmm128 xmm13, 7*16 + save_xmm128 xmm14, 8*16 + save_xmm128 xmm15, 9*16 + save_reg r12, 10*16 + 0*8 + save_reg r13, 10*16 + 1*8 + save_reg r14, 10*16 + 2*8 + save_reg r15, 10*16 + 3*8 + save_reg rdi, 10*16 + 4*8 + save_reg rsi, 10*16 + 5*8 + end_prolog + mov arg4, arg(4) + %endmacro + + %macro FUNC_RESTORE 0 + vmovdqa xmm6, [rsp + 0*16] + vmovdqa xmm7, [rsp + 1*16] + vmovdqa xmm8, [rsp + 2*16] + vmovdqa xmm9, [rsp + 3*16] + vmovdqa xmm10, [rsp + 4*16] + vmovdqa xmm11, [rsp + 5*16] + vmovdqa xmm12, [rsp + 6*16] + vmovdqa xmm13, [rsp + 7*16] + vmovdqa xmm14, [rsp + 8*16] + vmovdqa xmm15, [rsp + 9*16] + mov r12, [rsp + 10*16 + 0*8] + mov r13, [rsp + 10*16 + 1*8] + mov r14, [rsp + 10*16 + 2*8] + mov r15, [rsp + 10*16 + 3*8] + mov rdi, [rsp + 10*16 + 4*8] + mov rsi, [rsp + 10*16 + 5*8] + add rsp, stack_size + %endmacro +%endif + +%define len arg0 +%define vec arg1 +%define mul_array arg2 +%define src arg3 +%define dest arg4 +%define ptr arg5 +%define vec_i tmp2 +%define dest1 tmp3 +%define dest2 tmp4 +%define vskip1 tmp5 +%define vskip3 tmp6 +%define pos return + + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu + %define XSTR vmovdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + +default rel + +[bits 64] +section .text + +%define xmask0f xmm15 +%define xgft1_lo xmm14 +%define xgft1_hi xmm13 +%define xgft2_lo xmm12 +%define xgft2_hi xmm11 +%define xgft3_lo xmm10 +%define xgft3_hi xmm9 +%define xgft4_lo xmm8 +%define xgft4_hi xmm7 + + +%define x0 xmm0 +%define xtmpa xmm1 +%define xp1 xmm2 +%define xp2 xmm3 +%define xp3 xmm4 +%define xp4 xmm5 +%define xp5 xmm6 + +align 16 +global gf_5vect_dot_prod_avx:ISAL_SYM_TYPE_FUNCTION +func(gf_5vect_dot_prod_avx) + FUNC_SAVE + sub len, 16 + jl .return_fail + xor pos, pos + vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte + mov vskip1, vec + imul vskip1, 32 + mov vskip3, vec + imul vskip3, 96 + sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS + mov dest1, [dest] + mov dest2, [dest+PS] + + +.loop16: + mov tmp, mul_array + xor vec_i, vec_i + vpxor xp1, xp1 + vpxor xp2, xp2 + vpxor xp3, xp3 + vpxor xp4, xp4 + vpxor xp5, xp5 + + +.next_vect: + mov ptr, [src+vec_i] + add vec_i, PS + XLDR x0, [ptr+pos] ;Get next source vector + + vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f} + vmovdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0} + vmovdqu xgft2_lo, [tmp+vskip1*1] ;Load array Bx{00}, Bx{01}, ..., Bx{0f} + vmovdqu xgft2_hi, [tmp+vskip1*1+16] ; " Bx{00}, Bx{10}, ..., Bx{f0} + vmovdqu xgft3_lo, [tmp+vskip1*2] ;Load array Cx{00}, Cx{01}, ..., Cx{0f} + vmovdqu xgft3_hi, [tmp+vskip1*2+16] ; " Cx{00}, Cx{10}, ..., Cx{f0} + vmovdqu xgft4_lo, [tmp+vskip3] ;Load array Dx{00}, Dx{01}, ..., Dx{0f} + vmovdqu xgft4_hi, [tmp+vskip3+16] ; " Dx{00}, Dx{10}, ..., Dx{f0} + + vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft1_hi, xgft1_lo ;GF add high and low partials + vpxor xp1, xgft1_hi ;xp1 += partial + + vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft2_hi, xgft2_lo ;GF add high and low partials + vpxor xp2, xgft2_hi ;xp2 += partial + + vmovdqu xgft1_lo, [tmp+vskip1*4] ;Load array Ex{00}, Ex{01}, ..., Ex{0f} + vmovdqu xgft1_hi, [tmp+vskip1*4+16] ; " Ex{00}, Ex{10}, ..., Ex{f0} + add tmp, 32 + + vpshufb xgft3_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft3_hi, xgft3_lo ;GF add high and low partials + vpxor xp3, xgft3_hi ;xp3 += partial + + vpshufb xgft4_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft4_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft4_hi, xgft4_lo ;GF add high and low partials + vpxor xp4, xgft4_hi ;xp4 += partial + + vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft1_hi, xgft1_lo ;GF add high and low partials + vpxor xp5, xgft1_hi ;xp5 += partial + + cmp vec_i, vec + jl .next_vect + + mov tmp, [dest+2*PS] + mov ptr, [dest+3*PS] + mov vec_i, [dest+4*PS] + + XSTR [dest1+pos], xp1 + XSTR [dest2+pos], xp2 + XSTR [tmp+pos], xp3 + XSTR [ptr+pos], xp4 + XSTR [vec_i+pos], xp5 + + add pos, 16 ;Loop on 16 bytes at a time + cmp pos, len + jle .loop16 + + lea tmp, [len + 16] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, len ;Overlapped offset length-16 + jmp .loop16 ;Do one more overlap pass + +.return_pass: + FUNC_RESTORE + mov return, 0 + ret + +.return_fail: + FUNC_RESTORE + mov return, 1 + ret + +endproc_frame + +section .data + +align 16 +mask0f: dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f + +;;; func core, ver, snum +slversion gf_5vect_dot_prod_avx, 02, 04, 0194 diff --git a/src/isa-l/erasure_code/gf_5vect_dot_prod_avx2.asm b/src/isa-l/erasure_code/gf_5vect_dot_prod_avx2.asm new file mode 100644 index 000000000..dfafd8ab5 --- /dev/null +++ b/src/isa-l/erasure_code/gf_5vect_dot_prod_avx2.asm @@ -0,0 +1,315 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_5vect_dot_prod_avx2(len, vec, *g_tbls, **buffs, **dests); +;;; + +%include "reg_sizes.asm" + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + + %define tmp r11 + %define tmp.w r11d + %define tmp.b r11b + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r12 ; must be saved and restored + %define tmp5 r14 ; must be saved and restored + %define tmp6 r15 ; must be saved and restored + %define return rax + %define PS 8 + %define LOG_PS 3 + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + push r13 + push r14 + push r15 + %endmacro + %macro FUNC_RESTORE 0 + pop r15 + pop r14 + pop r13 + pop r12 + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + + %define arg4 r12 ; must be saved, loaded and restored + %define arg5 r15 ; must be saved and restored + %define tmp r11 + %define tmp.w r11d + %define tmp.b r11b + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r14 ; must be saved and restored + %define tmp5 rdi ; must be saved and restored + %define tmp6 rsi ; must be saved and restored + %define return rax + %define PS 8 + %define LOG_PS 3 + %define stack_size 10*16 + 7*8 ; must be an odd multiple of 8 + %define arg(x) [rsp + stack_size + PS + PS*x] + + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size + vmovdqa [rsp + 0*16], xmm6 + vmovdqa [rsp + 1*16], xmm7 + vmovdqa [rsp + 2*16], xmm8 + vmovdqa [rsp + 3*16], xmm9 + vmovdqa [rsp + 4*16], xmm10 + vmovdqa [rsp + 5*16], xmm11 + vmovdqa [rsp + 6*16], xmm12 + vmovdqa [rsp + 7*16], xmm13 + vmovdqa [rsp + 8*16], xmm14 + vmovdqa [rsp + 9*16], xmm15 + save_reg r12, 10*16 + 0*8 + save_reg r13, 10*16 + 1*8 + save_reg r14, 10*16 + 2*8 + save_reg r15, 10*16 + 3*8 + save_reg rdi, 10*16 + 4*8 + save_reg rsi, 10*16 + 5*8 + end_prolog + mov arg4, arg(4) + %endmacro + + %macro FUNC_RESTORE 0 + vmovdqa xmm6, [rsp + 0*16] + vmovdqa xmm7, [rsp + 1*16] + vmovdqa xmm8, [rsp + 2*16] + vmovdqa xmm9, [rsp + 3*16] + vmovdqa xmm10, [rsp + 4*16] + vmovdqa xmm11, [rsp + 5*16] + vmovdqa xmm12, [rsp + 6*16] + vmovdqa xmm13, [rsp + 7*16] + vmovdqa xmm14, [rsp + 8*16] + vmovdqa xmm15, [rsp + 9*16] + mov r12, [rsp + 10*16 + 0*8] + mov r13, [rsp + 10*16 + 1*8] + mov r14, [rsp + 10*16 + 2*8] + mov r15, [rsp + 10*16 + 3*8] + mov rdi, [rsp + 10*16 + 4*8] + mov rsi, [rsp + 10*16 + 5*8] + add rsp, stack_size + %endmacro +%endif + +%define len arg0 +%define vec arg1 +%define mul_array arg2 +%define src arg3 +%define dest arg4 +%define ptr arg5 +%define vec_i tmp2 +%define dest1 tmp3 +%define dest2 tmp4 +%define vskip1 tmp5 +%define vskip3 tmp6 +%define pos return + + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu + %define XSTR vmovdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + +default rel + +[bits 64] +section .text + +%define xmask0f ymm15 +%define xmask0fx xmm15 +%define xgft1_lo ymm14 +%define xgft1_hi ymm13 +%define xgft2_lo ymm12 +%define xgft2_hi ymm11 +%define xgft3_lo ymm10 +%define xgft3_hi ymm9 +%define xgft4_lo ymm8 +%define xgft4_hi ymm7 + + +%define x0 ymm0 +%define xtmpa ymm1 +%define xp1 ymm2 +%define xp2 ymm3 +%define xp3 ymm4 +%define xp4 ymm5 +%define xp5 ymm6 + +align 16 +global gf_5vect_dot_prod_avx2:ISAL_SYM_TYPE_FUNCTION +func(gf_5vect_dot_prod_avx2) + FUNC_SAVE + sub len, 32 + jl .return_fail + xor pos, pos + mov tmp.b, 0x0f + vpinsrb xmask0fx, xmask0fx, tmp.w, 0 + vpbroadcastb xmask0f, xmask0fx ;Construct mask 0x0f0f0f... + mov vskip1, vec + imul vskip1, 32 + mov vskip3, vec + imul vskip3, 96 + sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS + mov dest1, [dest] + mov dest2, [dest+PS] + + +.loop32: + mov tmp, mul_array + xor vec_i, vec_i + vpxor xp1, xp1 + vpxor xp2, xp2 + vpxor xp3, xp3 + vpxor xp4, xp4 + vpxor xp5, xp5 + + +.next_vect: + mov ptr, [src+vec_i] + XLDR x0, [ptr+pos] ;Get next source vector + add vec_i, PS + + vpand xgft4_lo, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + vperm2i128 xtmpa, xgft4_lo, x0, 0x30 ;swap xtmpa from 1lo|2lo to 1lo|2hi + vperm2i128 x0, xgft4_lo, x0, 0x12 ;swap x0 from 1hi|2hi to 1hi|2lo + + vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f} + ; " Ax{00}, Ax{10}, ..., Ax{f0} + vmovdqu xgft2_lo, [tmp+vskip1*1] ;Load array Bx{00}, Bx{01}, ..., Bx{0f} + ; " Bx{00}, Bx{10}, ..., Bx{f0} + vmovdqu xgft3_lo, [tmp+vskip1*2] ;Load array Cx{00}, Cx{01}, ..., Cx{0f} + ; " Cx{00}, Cx{10}, ..., Cx{f0} + vmovdqu xgft4_lo, [tmp+vskip3] ;Load array Dx{00}, Dx{01}, ..., Dx{0f} + ; " Dx{00}, Dx{10}, ..., Dx{f0} + + vperm2i128 xgft1_hi, xgft1_lo, xgft1_lo, 0x01 ; swapped to hi | lo + vperm2i128 xgft2_hi, xgft2_lo, xgft2_lo, 0x01 ; swapped to hi | lo + vperm2i128 xgft3_hi, xgft3_lo, xgft3_lo, 0x01 ; swapped to hi | lo + vperm2i128 xgft4_hi, xgft4_lo, xgft4_lo, 0x01 ; swapped to hi | lo + + vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft1_hi, xgft1_lo ;GF add high and low partials + vpxor xp1, xgft1_hi ;xp1 += partial + + vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft2_hi, xgft2_lo ;GF add high and low partials + vpxor xp2, xgft2_hi ;xp2 += partial + + vmovdqu xgft1_lo, [tmp+vskip1*4] ;Load array Ex{00}, Ex{01}, ..., Ex{0f} + ; " Ex{00}, Ex{10}, ..., Ex{f0} + vperm2i128 xgft1_hi, xgft1_lo, xgft1_lo, 0x01 ; swapped to hi | lo + add tmp, 32 + + vpshufb xgft3_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft3_hi, xgft3_lo ;GF add high and low partials + vpxor xp3, xgft3_hi ;xp3 += partial + + vpshufb xgft4_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft4_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft4_hi, xgft4_lo ;GF add high and low partials + vpxor xp4, xgft4_hi ;xp4 += partial + + vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft1_hi, xgft1_lo ;GF add high and low partials + vpxor xp5, xgft1_hi ;xp5 += partial + + cmp vec_i, vec + jl .next_vect + + mov tmp, [dest+2*PS] + mov ptr, [dest+3*PS] + mov vec_i, [dest+4*PS] + + XSTR [dest1+pos], xp1 + XSTR [dest2+pos], xp2 + XSTR [tmp+pos], xp3 + XSTR [ptr+pos], xp4 + XSTR [vec_i+pos], xp5 + + add pos, 32 ;Loop on 32 bytes at a time + cmp pos, len + jle .loop32 + + lea tmp, [len + 32] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, len ;Overlapped offset length-16 + jmp .loop32 ;Do one more overlap pass + +.return_pass: + FUNC_RESTORE + mov return, 0 + ret + +.return_fail: + FUNC_RESTORE + mov return, 1 + ret + +endproc_frame + +section .data + +;;; func core, ver, snum +slversion gf_5vect_dot_prod_avx2, 04, 04, 0199 diff --git a/src/isa-l/erasure_code/gf_5vect_dot_prod_avx512.asm b/src/isa-l/erasure_code/gf_5vect_dot_prod_avx512.asm new file mode 100644 index 000000000..8b80d2b92 --- /dev/null +++ b/src/isa-l/erasure_code/gf_5vect_dot_prod_avx512.asm @@ -0,0 +1,335 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2019 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_5vect_dot_prod_avx512(len, vec, *g_tbls, **buffs, **dests); +;;; + +%include "reg_sizes.asm" + +%ifdef HAVE_AS_KNOWS_AVX512 + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + + %define tmp r11 + %define tmp.w r11d + %define tmp.b r11b + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r12 ; must be saved and restored + %define tmp5 r14 ; must be saved and restored + %define tmp6 r15 ; must be saved and restored + %define tmp7 rbp ; must be saved and restored + %define tmp8 rbx ; must be saved and restored + %define return rax + %define PS 8 + %define LOG_PS 3 + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + push r13 + push r14 + push r15 + push rbp + push rbx + %endmacro + %macro FUNC_RESTORE 0 + pop rbx + pop rbp + pop r15 + pop r14 + pop r13 + pop r12 + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + + %define arg4 r12 ; must be saved, loaded and restored + %define arg5 r15 ; must be saved and restored + %define tmp r11 + %define tmp.w r11d + %define tmp.b r11b + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r14 ; must be saved and restored + %define tmp5 rdi ; must be saved and restored + %define tmp6 rsi ; must be saved and restored + %define tmp7 rbp ; must be saved and restored + %define tmp8 rbx ; must be saved and restored + %define return rax + %define PS 8 + %define LOG_PS 3 + %define stack_size 10*16 + 9*8 ; must be an odd multiple of 8 + %define arg(x) [rsp + stack_size + PS + PS*x] + + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size + vmovdqa [rsp + 0*16], xmm6 + vmovdqa [rsp + 1*16], xmm7 + vmovdqa [rsp + 2*16], xmm8 + vmovdqa [rsp + 3*16], xmm9 + vmovdqa [rsp + 4*16], xmm10 + vmovdqa [rsp + 5*16], xmm11 + vmovdqa [rsp + 6*16], xmm12 + vmovdqa [rsp + 7*16], xmm13 + vmovdqa [rsp + 8*16], xmm14 + vmovdqa [rsp + 9*16], xmm15 + save_reg r12, 9*16 + 0*8 + save_reg r13, 9*16 + 1*8 + save_reg r14, 9*16 + 2*8 + save_reg r15, 9*16 + 3*8 + save_reg rdi, 9*16 + 4*8 + save_reg rsi, 9*16 + 5*8 + save_reg rbp, 9*16 + 6*8 + save_reg rbx, 9*16 + 7*8 + end_prolog + mov arg4, arg(4) + %endmacro + + %macro FUNC_RESTORE 0 + vmovdqa xmm6, [rsp + 0*16] + vmovdqa xmm7, [rsp + 1*16] + vmovdqa xmm8, [rsp + 2*16] + vmovdqa xmm9, [rsp + 3*16] + vmovdqa xmm10, [rsp + 4*16] + vmovdqa xmm11, [rsp + 5*16] + vmovdqa xmm12, [rsp + 6*16] + vmovdqa xmm13, [rsp + 7*16] + vmovdqa xmm14, [rsp + 8*16] + vmovdqa xmm15, [rsp + 9*16] + mov r12, [rsp + 9*16 + 0*8] + mov r13, [rsp + 9*16 + 1*8] + mov r14, [rsp + 9*16 + 2*8] + mov r15, [rsp + 9*16 + 3*8] + mov rdi, [rsp + 9*16 + 4*8] + mov rsi, [rsp + 9*16 + 5*8] + mov rbp, [rsp + 9*16 + 6*8] + mov rbx, [rsp + 9*16 + 7*8] + add rsp, stack_size + %endmacro +%endif + + +%define len arg0 +%define vec arg1 +%define mul_array arg2 +%define src arg3 +%define dest1 arg4 +%define ptr arg5 +%define vec_i tmp2 +%define dest2 tmp3 +%define dest3 tmp4 +%define dest4 tmp5 +%define vskip3 tmp6 +%define dest5 tmp7 +%define vskip1 tmp8 +%define pos return + + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu8 + %define XSTR vmovdqu8 +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + +%define xmask0f zmm17 +%define xgft1_lo zmm16 +%define xgft1_loy ymm16 +%define xgft1_hi zmm15 +%define xgft2_lo zmm14 +%define xgft2_loy ymm14 +%define xgft2_hi zmm13 +%define xgft3_lo zmm12 +%define xgft3_loy ymm12 +%define xgft3_hi zmm11 +%define xgft4_lo zmm10 +%define xgft4_loy ymm10 +%define xgft4_hi zmm9 +%define xgft5_lo zmm8 +%define xgft5_loy ymm8 +%define xgft5_hi zmm7 + +%define x0 zmm0 +%define xtmpa zmm1 +%define xp1 zmm2 +%define xp2 zmm3 +%define xp3 zmm4 +%define xp4 zmm5 +%define xp5 zmm6 + +default rel +[bits 64] + +section .text + +align 16 +global gf_5vect_dot_prod_avx512:ISAL_SYM_TYPE_FUNCTION +func(gf_5vect_dot_prod_avx512) + FUNC_SAVE + sub len, 64 + jl .return_fail + + xor pos, pos + mov tmp, 0x0f + vpbroadcastb xmask0f, tmp ;Construct mask 0x0f0f0f... + mov vskip1, vec + imul vskip1, 32 + mov vskip3, vec + imul vskip3, 96 + sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS + mov dest2, [dest1+PS] + mov dest3, [dest1+2*PS] + mov dest4, [dest1+3*PS] + mov dest5, [dest1+4*PS] + mov dest1, [dest1] + +.loop64: + vpxorq xp1, xp1, xp1 + vpxorq xp2, xp2, xp2 + vpxorq xp3, xp3, xp3 + vpxorq xp4, xp4, xp4 + vpxorq xp5, xp5, xp5 + mov tmp, mul_array + xor vec_i, vec_i + +.next_vect: + mov ptr, [src+vec_i] + XLDR x0, [ptr+pos] ;Get next source vector + add vec_i, PS + + vpandq xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpandq x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + vmovdqu8 xgft1_loy, [tmp] ;Load array Ax{00}..{0f}, Ax{00}..{f0} + vmovdqu8 xgft2_loy, [tmp+vec*(32/PS)] ;Load array Bx{00}..{0f}, Bx{00}..{f0} + vmovdqu8 xgft3_loy, [tmp+vec*(64/PS)] ;Load array Cx{00}..{0f}, Cx{00}..{f0} + vmovdqu8 xgft4_loy, [tmp+vskip3] ;Load array Dx{00}..{0f}, Dx{00}..{f0} + vmovdqu8 xgft5_loy, [tmp+vskip1*4] ;Load array Ex{00}..{0f}, Ex{00}..{f0} + add tmp, 32 + + vshufi64x2 xgft1_hi, xgft1_lo, xgft1_lo, 0x55 + vshufi64x2 xgft1_lo, xgft1_lo, xgft1_lo, 0x00 + vshufi64x2 xgft2_hi, xgft2_lo, xgft2_lo, 0x55 + vshufi64x2 xgft2_lo, xgft2_lo, xgft2_lo, 0x00 + + vpshufb xgft1_hi, xgft1_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft1_lo, xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxorq xgft1_hi, xgft1_hi, xgft1_lo ;GF add high and low partials + vpxorq xp1, xp1, xgft1_hi ;xp1 += partial + + vpshufb xgft2_hi, xgft2_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft2_lo, xgft2_lo, xtmpa ;Lookup mul table of low nibble + vpxorq xgft2_hi, xgft2_hi, xgft2_lo ;GF add high and low partials + vpxorq xp2, xp2, xgft2_hi ;xp2 += partial + + vshufi64x2 xgft3_hi, xgft3_lo, xgft3_lo, 0x55 + vshufi64x2 xgft3_lo, xgft3_lo, xgft3_lo, 0x00 + vshufi64x2 xgft4_hi, xgft4_lo, xgft4_lo, 0x55 + vshufi64x2 xgft4_lo, xgft4_lo, xgft4_lo, 0x00 + + vpshufb xgft3_hi, xgft3_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft3_lo, xgft3_lo, xtmpa ;Lookup mul table of low nibble + vpxorq xgft3_hi, xgft3_hi, xgft3_lo ;GF add high and low partials + vpxorq xp3, xp3, xgft3_hi ;xp3 += partial + + vpshufb xgft4_hi, xgft4_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft4_lo, xgft4_lo, xtmpa ;Lookup mul table of low nibble + vpxorq xgft4_hi, xgft4_hi, xgft4_lo ;GF add high and low partials + vpxorq xp4, xp4, xgft4_hi ;xp4 += partial + + vshufi64x2 xgft5_hi, xgft5_lo, xgft5_lo, 0x55 + vshufi64x2 xgft5_lo, xgft5_lo, xgft5_lo, 0x00 + + vpshufb xgft5_hi, xgft5_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft5_lo, xgft5_lo, xtmpa ;Lookup mul table of low nibble + vpxorq xgft5_hi, xgft5_hi, xgft5_lo ;GF add high and low partials + vpxorq xp5, xp5, xgft5_hi ;xp5 += partial + + cmp vec_i, vec + jl .next_vect + + XSTR [dest1+pos], xp1 + XSTR [dest2+pos], xp2 + XSTR [dest3+pos], xp3 + XSTR [dest4+pos], xp4 + XSTR [dest5+pos], xp5 + + add pos, 64 ;Loop on 64 bytes at a time + cmp pos, len + jle .loop64 + + lea tmp, [len + 64] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, len ;Overlapped offset length-64 + jmp .loop64 ;Do one more overlap pass + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame + +%else +%ifidn __OUTPUT_FORMAT__, win64 +global no_gf_5vect_dot_prod_avx512 +no_gf_5vect_dot_prod_avx512: +%endif +%endif ; ifdef HAVE_AS_KNOWS_AVX512 diff --git a/src/isa-l/erasure_code/gf_5vect_dot_prod_sse.asm b/src/isa-l/erasure_code/gf_5vect_dot_prod_sse.asm new file mode 100644 index 000000000..59b0ac2bc --- /dev/null +++ b/src/isa-l/erasure_code/gf_5vect_dot_prod_sse.asm @@ -0,0 +1,304 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_5vect_dot_prod_sse(len, vec, *g_tbls, **buffs, **dests); +;;; + +%include "reg_sizes.asm" + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + + %define tmp r11 + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r12 ; must be saved and restored + %define tmp5 r14 ; must be saved and restored + %define tmp6 r15 ; must be saved and restored + %define return rax + %define PS 8 + %define LOG_PS 3 + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + push r13 + push r14 + push r15 + %endmacro + %macro FUNC_RESTORE 0 + pop r15 + pop r14 + pop r13 + pop r12 + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + + %define arg4 r12 ; must be saved, loaded and restored + %define arg5 r15 ; must be saved and restored + %define tmp r11 + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r14 ; must be saved and restored + %define tmp5 rdi ; must be saved and restored + %define tmp6 rsi ; must be saved and restored + %define return rax + %define PS 8 + %define LOG_PS 3 + %define stack_size 10*16 + 7*8 ; must be an odd multiple of 8 + %define arg(x) [rsp + stack_size + PS + PS*x] + + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size + save_xmm128 xmm6, 0*16 + save_xmm128 xmm7, 1*16 + save_xmm128 xmm8, 2*16 + save_xmm128 xmm9, 3*16 + save_xmm128 xmm10, 4*16 + save_xmm128 xmm11, 5*16 + save_xmm128 xmm12, 6*16 + save_xmm128 xmm13, 7*16 + save_xmm128 xmm14, 8*16 + save_xmm128 xmm15, 9*16 + save_reg r12, 10*16 + 0*8 + save_reg r13, 10*16 + 1*8 + save_reg r14, 10*16 + 2*8 + save_reg r15, 10*16 + 3*8 + save_reg rdi, 10*16 + 4*8 + save_reg rsi, 10*16 + 5*8 + end_prolog + mov arg4, arg(4) + %endmacro + + %macro FUNC_RESTORE 0 + movdqa xmm6, [rsp + 0*16] + movdqa xmm7, [rsp + 1*16] + movdqa xmm8, [rsp + 2*16] + movdqa xmm9, [rsp + 3*16] + movdqa xmm10, [rsp + 4*16] + movdqa xmm11, [rsp + 5*16] + movdqa xmm12, [rsp + 6*16] + movdqa xmm13, [rsp + 7*16] + movdqa xmm14, [rsp + 8*16] + movdqa xmm15, [rsp + 9*16] + mov r12, [rsp + 10*16 + 0*8] + mov r13, [rsp + 10*16 + 1*8] + mov r14, [rsp + 10*16 + 2*8] + mov r15, [rsp + 10*16 + 3*8] + mov rdi, [rsp + 10*16 + 4*8] + mov rsi, [rsp + 10*16 + 5*8] + add rsp, stack_size + %endmacro +%endif + +%define len arg0 +%define vec arg1 +%define mul_array arg2 +%define src arg3 +%define dest arg4 +%define ptr arg5 +%define vec_i tmp2 +%define dest1 tmp3 +%define dest2 tmp4 +%define vskip1 tmp5 +%define vskip3 tmp6 +%define pos return + + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR movdqu + %define XSTR movdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR movdqa + %define XSTR movdqa + %else + %define XLDR movntdqa + %define XSTR movntdq + %endif +%endif + +default rel + +[bits 64] +section .text + +%define xmask0f xmm15 +%define xgft1_lo xmm2 +%define xgft1_hi xmm3 +%define xgft2_lo xmm4 +%define xgft2_hi xmm5 +%define xgft3_lo xmm10 +%define xgft3_hi xmm6 +%define xgft4_lo xmm8 +%define xgft4_hi xmm7 + + +%define x0 xmm0 +%define xtmpa xmm1 +%define xp1 xmm9 +%define xp2 xmm11 +%define xp3 xmm12 +%define xp4 xmm13 +%define xp5 xmm14 + +align 16 +global gf_5vect_dot_prod_sse:ISAL_SYM_TYPE_FUNCTION +func(gf_5vect_dot_prod_sse) + FUNC_SAVE + sub len, 16 + jl .return_fail + xor pos, pos + movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte + mov vskip1, vec + imul vskip1, 32 + mov vskip3, vec + imul vskip3, 96 + sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS + mov dest1, [dest] + mov dest2, [dest+PS] + + +.loop16: + mov tmp, mul_array + xor vec_i, vec_i + pxor xp1, xp1 + pxor xp2, xp2 + pxor xp3, xp3 + pxor xp4, xp4 + pxor xp5, xp5 + + +.next_vect: + mov ptr, [src+vec_i] + add vec_i, PS + XLDR x0, [ptr+pos] ;Get next source vector + + movdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f} + movdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0} + movdqu xgft2_lo, [tmp+vskip1*1] ;Load array Bx{00}, Bx{01}, ..., Bx{0f} + movdqu xgft2_hi, [tmp+vskip1*1+16] ; " Bx{00}, Bx{10}, ..., Bx{f0} + movdqu xgft3_lo, [tmp+vskip1*2] ;Load array Cx{00}, Cx{01}, ..., Cx{0f} + movdqu xgft3_hi, [tmp+vskip1*2+16] ; " Cx{00}, Cx{10}, ..., Cx{f0} + movdqu xgft4_lo, [tmp+vskip3] ;Load array Dx{00}, Dx{01}, ..., Dx{0f} + movdqu xgft4_hi, [tmp+vskip3+16] ; " Dx{00}, Dx{10}, ..., Dx{f0} + + movdqa xtmpa, x0 ;Keep unshifted copy of src + psraw x0, 4 ;Shift to put high nibble into bits 4-0 + pand x0, xmask0f ;Mask high src nibble in bits 4-0 + pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0 + + pshufb xgft1_hi, x0 ;Lookup mul table of high nibble + pshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble + pxor xgft1_hi, xgft1_lo ;GF add high and low partials + pxor xp1, xgft1_hi ;xp1 += partial + + pshufb xgft2_hi, x0 ;Lookup mul table of high nibble + pshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble + pxor xgft2_hi, xgft2_lo ;GF add high and low partials + pxor xp2, xgft2_hi ;xp2 += partial + + movdqu xgft1_lo, [tmp+vskip1*4] ;Load array Ex{00}, Ex{01}, ..., Ex{0f} + movdqu xgft1_hi, [tmp+vskip1*4+16] ; " Ex{00}, Ex{10}, ..., Ex{f0} + add tmp, 32 + + pshufb xgft3_hi, x0 ;Lookup mul table of high nibble + pshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble + pxor xgft3_hi, xgft3_lo ;GF add high and low partials + pxor xp3, xgft3_hi ;xp3 += partial + + pshufb xgft4_hi, x0 ;Lookup mul table of high nibble + pshufb xgft4_lo, xtmpa ;Lookup mul table of low nibble + pxor xgft4_hi, xgft4_lo ;GF add high and low partials + pxor xp4, xgft4_hi ;xp4 += partial + + pshufb xgft1_hi, x0 ;Lookup mul table of high nibble + pshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble + pxor xgft1_hi, xgft1_lo ;GF add high and low partials + pxor xp5, xgft1_hi ;xp5 += partial + + cmp vec_i, vec + jl .next_vect + + mov tmp, [dest+2*PS] + mov ptr, [dest+3*PS] + mov vec_i, [dest+4*PS] + + XSTR [dest1+pos], xp1 + XSTR [dest2+pos], xp2 + XSTR [tmp+pos], xp3 + XSTR [ptr+pos], xp4 + XSTR [vec_i+pos], xp5 + + add pos, 16 ;Loop on 16 bytes at a time + cmp pos, len + jle .loop16 + + lea tmp, [len + 16] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, len ;Overlapped offset length-16 + jmp .loop16 ;Do one more overlap pass + +.return_pass: + FUNC_RESTORE + mov return, 0 + ret + +.return_fail: + FUNC_RESTORE + mov return, 1 + ret + +endproc_frame + +section .data + +align 16 +mask0f: dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f + +;;; func core, ver, snum +slversion gf_5vect_dot_prod_sse, 00, 05, 0065 diff --git a/src/isa-l/erasure_code/gf_5vect_dot_prod_sse_test.c b/src/isa-l/erasure_code/gf_5vect_dot_prod_sse_test.c new file mode 100644 index 000000000..977054cbe --- /dev/null +++ b/src/isa-l/erasure_code/gf_5vect_dot_prod_sse_test.c @@ -0,0 +1,805 @@ +/********************************************************************** + Copyright(c) 2011-2015 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include // for memset, memcmp +#include "erasure_code.h" +#include "types.h" + +#ifndef FUNCTION_UNDER_TEST +# define FUNCTION_UNDER_TEST gf_5vect_dot_prod_sse +#endif +#ifndef TEST_MIN_SIZE +# define TEST_MIN_SIZE 16 +#endif + +#define str(s) #s +#define xstr(s) str(s) + +#define TEST_LEN 8192 +#define TEST_SIZE (TEST_LEN/2) +#define TEST_MEM TEST_SIZE +#define TEST_LOOPS 20000 +#define TEST_TYPE_STR "" + +#ifndef TEST_SOURCES +# define TEST_SOURCES 16 +#endif +#ifndef RANDOMS +# define RANDOMS 20 +#endif + +#ifdef EC_ALIGNED_ADDR +// Define power of 2 range to check ptr, len alignment +# define PTR_ALIGN_CHK_B 0 +# define LEN_ALIGN_CHK_B 0 // 0 for aligned only +#else +// Define power of 2 range to check ptr, len alignment +# define PTR_ALIGN_CHK_B 32 +# define LEN_ALIGN_CHK_B 32 // 0 for aligned only +#endif + +typedef unsigned char u8; + +void dump(unsigned char *buf, int len) +{ + int i; + for (i = 0; i < len;) { + printf(" %2x", 0xff & buf[i++]); + if (i % 32 == 0) + printf("\n"); + } + printf("\n"); +} + +void dump_matrix(unsigned char **s, int k, int m) +{ + int i, j; + for (i = 0; i < k; i++) { + for (j = 0; j < m; j++) { + printf(" %2x", s[i][j]); + } + printf("\n"); + } + printf("\n"); +} + +void dump_u8xu8(unsigned char *s, int k, int m) +{ + int i, j; + for (i = 0; i < k; i++) { + for (j = 0; j < m; j++) { + printf(" %2x", 0xff & s[j + (i * m)]); + } + printf("\n"); + } + printf("\n"); +} + +int main(int argc, char *argv[]) +{ + int i, j, rtest, srcs; + void *buf; + u8 g1[TEST_SOURCES], g2[TEST_SOURCES], g3[TEST_SOURCES]; + u8 g4[TEST_SOURCES], g5[TEST_SOURCES], *g_tbls; + u8 *dest1, *dest2, *dest3, *dest4, *dest5, *buffs[TEST_SOURCES]; + u8 *dest_ref1, *dest_ref2, *dest_ref3, *dest_ref4, *dest_ref5; + u8 *dest_ptrs[5]; + + int align, size; + unsigned char *efence_buffs[TEST_SOURCES]; + unsigned int offset; + u8 *ubuffs[TEST_SOURCES]; + u8 *udest_ptrs[5]; + printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d ", TEST_SOURCES, TEST_LEN); + + // Allocate the arrays + for (i = 0; i < TEST_SOURCES; i++) { + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + buffs[i] = buf; + } + + if (posix_memalign(&buf, 16, 2 * (6 * TEST_SOURCES * 32))) { + printf("alloc error: Fail"); + return -1; + } + g_tbls = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest1 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest2 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest3 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest4 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest5 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest_ref1 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest_ref2 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest_ref3 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest_ref4 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest_ref5 = buf; + + dest_ptrs[0] = dest1; + dest_ptrs[1] = dest2; + dest_ptrs[2] = dest3; + dest_ptrs[3] = dest4; + dest_ptrs[4] = dest5; + + // Test of all zeros + for (i = 0; i < TEST_SOURCES; i++) + memset(buffs[i], 0, TEST_LEN); + + memset(dest1, 0, TEST_LEN); + memset(dest2, 0, TEST_LEN); + memset(dest3, 0, TEST_LEN); + memset(dest4, 0, TEST_LEN); + memset(dest5, 0, TEST_LEN); + memset(dest_ref1, 0, TEST_LEN); + memset(dest_ref2, 0, TEST_LEN); + memset(dest_ref3, 0, TEST_LEN); + memset(dest_ref4, 0, TEST_LEN); + memset(dest_ref5, 0, TEST_LEN); + memset(g1, 2, TEST_SOURCES); + memset(g2, 1, TEST_SOURCES); + memset(g3, 7, TEST_SOURCES); + memset(g4, 9, TEST_SOURCES); + memset(g5, 4, TEST_SOURCES); + + for (i = 0; i < TEST_SOURCES; i++) { + gf_vect_mul_init(g1[i], &g_tbls[i * 32]); + gf_vect_mul_init(g2[i], &g_tbls[32 * TEST_SOURCES + i * 32]); + gf_vect_mul_init(g3[i], &g_tbls[64 * TEST_SOURCES + i * 32]); + gf_vect_mul_init(g4[i], &g_tbls[96 * TEST_SOURCES + i * 32]); + gf_vect_mul_init(g5[i], &g_tbls[128 * TEST_SOURCES + i * 32]); + } + + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1); + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], buffs, + dest_ref2); + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], buffs, + dest_ref3); + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES], buffs, + dest_ref4); + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[128 * TEST_SOURCES], buffs, + dest_ref5); + + FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs); + + if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) { + printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test1\n"); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref1, 25); + printf("dprod_dut:"); + dump(dest1, 25); + return -1; + } + if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) { + printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test2\n"); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref2, 25); + printf("dprod_dut:"); + dump(dest2, 25); + return -1; + } + if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) { + printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test3\n"); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref3, 25); + printf("dprod_dut:"); + dump(dest3, 25); + return -1; + } + if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) { + printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test4\n"); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref4, 25); + printf("dprod_dut:"); + dump(dest4, 25); + return -1; + } + if (0 != memcmp(dest_ref5, dest5, TEST_LEN)) { + printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test5\n"); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref5, 25); + printf("dprod_dut:"); + dump(dest5, 25); + return -1; + } + putchar('.'); + + // Rand data test + + for (rtest = 0; rtest < RANDOMS; rtest++) { + for (i = 0; i < TEST_SOURCES; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + for (i = 0; i < TEST_SOURCES; i++) { + g1[i] = rand(); + g2[i] = rand(); + g3[i] = rand(); + g4[i] = rand(); + g5[i] = rand(); + } + + for (i = 0; i < TEST_SOURCES; i++) { + gf_vect_mul_init(g1[i], &g_tbls[i * 32]); + gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]); + gf_vect_mul_init(g3[i], &g_tbls[(64 * TEST_SOURCES) + (i * 32)]); + gf_vect_mul_init(g4[i], &g_tbls[(96 * TEST_SOURCES) + (i * 32)]); + gf_vect_mul_init(g5[i], &g_tbls[(128 * TEST_SOURCES) + (i * 32)]); + } + + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1); + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], + buffs, dest_ref2); + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], + buffs, dest_ref3); + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES], + buffs, dest_ref4); + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[128 * TEST_SOURCES], + buffs, dest_ref5); + + FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs); + + if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref1, 25); + printf("dprod_dut:"); + dump(dest1, 25); + return -1; + } + if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref2, 25); + printf("dprod_dut:"); + dump(dest2, 25); + return -1; + } + if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test3 %d\n", rtest); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref3, 25); + printf("dprod_dut:"); + dump(dest3, 25); + return -1; + } + if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test4 %d\n", rtest); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref4, 25); + printf("dprod_dut:"); + dump(dest4, 25); + return -1; + } + if (0 != memcmp(dest_ref5, dest5, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test5 %d\n", rtest); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref5, 25); + printf("dprod_dut:"); + dump(dest5, 25); + return -1; + } + + putchar('.'); + } + + // Rand data test with varied parameters + for (rtest = 0; rtest < RANDOMS; rtest++) { + for (srcs = TEST_SOURCES; srcs > 0; srcs--) { + for (i = 0; i < srcs; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + for (i = 0; i < srcs; i++) { + g1[i] = rand(); + g2[i] = rand(); + g3[i] = rand(); + g4[i] = rand(); + g5[i] = rand(); + } + + for (i = 0; i < srcs; i++) { + gf_vect_mul_init(g1[i], &g_tbls[i * 32]); + gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]); + gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]); + gf_vect_mul_init(g4[i], &g_tbls[(96 * srcs) + (i * 32)]); + gf_vect_mul_init(g5[i], &g_tbls[(128 * srcs) + (i * 32)]); + } + + gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[0], buffs, dest_ref1); + gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[32 * srcs], buffs, + dest_ref2); + gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[64 * srcs], buffs, + dest_ref3); + gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[96 * srcs], buffs, + dest_ref4); + gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[128 * srcs], buffs, + dest_ref5); + + FUNCTION_UNDER_TEST(TEST_LEN, srcs, g_tbls, buffs, dest_ptrs); + + if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) + " test1 srcs=%d\n", srcs); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref1, 25); + printf("dprod_dut:"); + dump(dest1, 25); + return -1; + } + if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) + " test2 srcs=%d\n", srcs); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref2, 25); + printf("dprod_dut:"); + dump(dest2, 25); + return -1; + } + if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) + " test3 srcs=%d\n", srcs); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref3, 25); + printf("dprod_dut:"); + dump(dest3, 25); + return -1; + } + if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) + " test4 srcs=%d\n", srcs); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref4, 25); + printf("dprod_dut:"); + dump(dest4, 25); + return -1; + } + if (0 != memcmp(dest_ref5, dest5, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) + " test5 srcs=%d\n", srcs); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref5, 25); + printf("dprod_dut:"); + dump(dest5, 25); + return -1; + } + + putchar('.'); + } + } + + // Run tests at end of buffer for Electric Fence + align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16; + for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) { + for (i = 0; i < TEST_SOURCES; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + for (i = 0; i < TEST_SOURCES; i++) // Line up TEST_SIZE from end + efence_buffs[i] = buffs[i] + TEST_LEN - size; + + for (i = 0; i < TEST_SOURCES; i++) { + g1[i] = rand(); + g2[i] = rand(); + g3[i] = rand(); + g4[i] = rand(); + g5[i] = rand(); + } + + for (i = 0; i < TEST_SOURCES; i++) { + gf_vect_mul_init(g1[i], &g_tbls[i * 32]); + gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]); + gf_vect_mul_init(g3[i], &g_tbls[(64 * TEST_SOURCES) + (i * 32)]); + gf_vect_mul_init(g4[i], &g_tbls[(96 * TEST_SOURCES) + (i * 32)]); + gf_vect_mul_init(g5[i], &g_tbls[(128 * TEST_SOURCES) + (i * 32)]); + } + + gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[0], efence_buffs, dest_ref1); + gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], + efence_buffs, dest_ref2); + gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], + efence_buffs, dest_ref3); + gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES], + efence_buffs, dest_ref4); + gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[128 * TEST_SOURCES], + efence_buffs, dest_ref5); + + FUNCTION_UNDER_TEST(size, TEST_SOURCES, g_tbls, efence_buffs, dest_ptrs); + + if (0 != memcmp(dest_ref1, dest1, size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest); + dump_matrix(efence_buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref1, align); + printf("dprod_dut:"); + dump(dest1, align); + return -1; + } + + if (0 != memcmp(dest_ref2, dest2, size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest); + dump_matrix(efence_buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref2, align); + printf("dprod_dut:"); + dump(dest2, align); + return -1; + } + + if (0 != memcmp(dest_ref3, dest3, size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test3 %d\n", rtest); + dump_matrix(efence_buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref3, align); + printf("dprod_dut:"); + dump(dest3, align); + return -1; + } + + if (0 != memcmp(dest_ref4, dest4, size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test4 %d\n", rtest); + dump_matrix(efence_buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref4, align); + printf("dprod_dut:"); + dump(dest4, align); + return -1; + } + + if (0 != memcmp(dest_ref5, dest5, size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test5 %d\n", rtest); + dump_matrix(efence_buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref5, align); + printf("dprod_dut:"); + dump(dest5, align); + return -1; + } + + putchar('.'); + } + + // Test rand ptr alignment if available + + for (rtest = 0; rtest < RANDOMS; rtest++) { + size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1); + srcs = rand() % TEST_SOURCES; + if (srcs == 0) + continue; + + offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B; + // Add random offsets + for (i = 0; i < srcs; i++) + ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset)); + + udest_ptrs[0] = dest1 + (rand() & (PTR_ALIGN_CHK_B - offset)); + udest_ptrs[1] = dest2 + (rand() & (PTR_ALIGN_CHK_B - offset)); + udest_ptrs[2] = dest3 + (rand() & (PTR_ALIGN_CHK_B - offset)); + udest_ptrs[3] = dest4 + (rand() & (PTR_ALIGN_CHK_B - offset)); + udest_ptrs[4] = dest5 + (rand() & (PTR_ALIGN_CHK_B - offset)); + + memset(dest1, 0, TEST_LEN); // zero pad to check write-over + memset(dest2, 0, TEST_LEN); + memset(dest3, 0, TEST_LEN); + memset(dest4, 0, TEST_LEN); + memset(dest5, 0, TEST_LEN); + + for (i = 0; i < srcs; i++) + for (j = 0; j < size; j++) + ubuffs[i][j] = rand(); + + for (i = 0; i < srcs; i++) { + g1[i] = rand(); + g2[i] = rand(); + g3[i] = rand(); + g4[i] = rand(); + g5[i] = rand(); + } + + for (i = 0; i < srcs; i++) { + gf_vect_mul_init(g1[i], &g_tbls[i * 32]); + gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]); + gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]); + gf_vect_mul_init(g4[i], &g_tbls[(96 * srcs) + (i * 32)]); + gf_vect_mul_init(g5[i], &g_tbls[(128 * srcs) + (i * 32)]); + } + + gf_vect_dot_prod_base(size, srcs, &g_tbls[0], ubuffs, dest_ref1); + gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], ubuffs, dest_ref2); + gf_vect_dot_prod_base(size, srcs, &g_tbls[64 * srcs], ubuffs, dest_ref3); + gf_vect_dot_prod_base(size, srcs, &g_tbls[96 * srcs], ubuffs, dest_ref4); + gf_vect_dot_prod_base(size, srcs, &g_tbls[128 * srcs], ubuffs, dest_ref5); + + FUNCTION_UNDER_TEST(size, srcs, g_tbls, ubuffs, udest_ptrs); + + if (memcmp(dest_ref1, udest_ptrs[0], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", + srcs); + dump_matrix(ubuffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref1, 25); + printf("dprod_dut:"); + dump(udest_ptrs[0], 25); + return -1; + } + if (memcmp(dest_ref2, udest_ptrs[1], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", + srcs); + dump_matrix(ubuffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref2, 25); + printf("dprod_dut:"); + dump(udest_ptrs[1], 25); + return -1; + } + if (memcmp(dest_ref3, udest_ptrs[2], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", + srcs); + dump_matrix(ubuffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref3, 25); + printf("dprod_dut:"); + dump(udest_ptrs[2], 25); + return -1; + } + if (memcmp(dest_ref4, udest_ptrs[3], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", + srcs); + dump_matrix(ubuffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref4, 25); + printf("dprod_dut:"); + dump(udest_ptrs[3], 25); + return -1; + } + if (memcmp(dest_ref5, udest_ptrs[4], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", + srcs); + dump_matrix(ubuffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref5, 25); + printf("dprod_dut:"); + dump(udest_ptrs[4], 25); + return -1; + } + // Confirm that padding around dests is unchanged + memset(dest_ref1, 0, PTR_ALIGN_CHK_B); // Make reference zero buff + offset = udest_ptrs[0] - dest1; + + if (memcmp(dest1, dest_ref1, offset)) { + printf("Fail rand ualign pad1 start\n"); + return -1; + } + if (memcmp(dest1 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { + printf("Fail rand ualign pad1 end\n"); + return -1; + } + + offset = udest_ptrs[1] - dest2; + if (memcmp(dest2, dest_ref1, offset)) { + printf("Fail rand ualign pad2 start\n"); + return -1; + } + if (memcmp(dest2 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { + printf("Fail rand ualign pad2 end\n"); + return -1; + } + + offset = udest_ptrs[2] - dest3; + if (memcmp(dest3, dest_ref1, offset)) { + printf("Fail rand ualign pad3 start\n"); + return -1; + } + if (memcmp(dest3 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { + printf("Fail rand ualign pad3 end\n"); + return -1; + } + + offset = udest_ptrs[3] - dest4; + if (memcmp(dest4, dest_ref1, offset)) { + printf("Fail rand ualign pad4 start\n"); + return -1; + } + if (memcmp(dest4 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { + printf("Fail rand ualign pad4 end\n"); + return -1; + } + + offset = udest_ptrs[4] - dest5; + if (memcmp(dest5, dest_ref1, offset)) { + printf("Fail rand ualign pad5 start\n"); + return -1; + } + if (memcmp(dest5 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { + printf("Fail rand ualign pad5 end\n"); + return -1; + } + + putchar('.'); + } + + // Test all size alignment + align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16; + + for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) { + srcs = TEST_SOURCES; + + for (i = 0; i < srcs; i++) + for (j = 0; j < size; j++) + buffs[i][j] = rand(); + + for (i = 0; i < srcs; i++) { + g1[i] = rand(); + g2[i] = rand(); + g3[i] = rand(); + g4[i] = rand(); + g5[i] = rand(); + } + + for (i = 0; i < srcs; i++) { + gf_vect_mul_init(g1[i], &g_tbls[i * 32]); + gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]); + gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]); + gf_vect_mul_init(g4[i], &g_tbls[(96 * srcs) + (i * 32)]); + gf_vect_mul_init(g5[i], &g_tbls[(128 * srcs) + (i * 32)]); + } + + gf_vect_dot_prod_base(size, srcs, &g_tbls[0], buffs, dest_ref1); + gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], buffs, dest_ref2); + gf_vect_dot_prod_base(size, srcs, &g_tbls[64 * srcs], buffs, dest_ref3); + gf_vect_dot_prod_base(size, srcs, &g_tbls[96 * srcs], buffs, dest_ref4); + gf_vect_dot_prod_base(size, srcs, &g_tbls[128 * srcs], buffs, dest_ref5); + + FUNCTION_UNDER_TEST(size, srcs, g_tbls, buffs, dest_ptrs); + + if (memcmp(dest_ref1, dest_ptrs[0], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", + size); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref1, 25); + printf("dprod_dut:"); + dump(dest_ptrs[0], 25); + + return -1; + } + if (memcmp(dest_ref2, dest_ptrs[1], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", + size); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref2, 25); + printf("dprod_dut:"); + dump(dest_ptrs[1], 25); + return -1; + } + if (memcmp(dest_ref3, dest_ptrs[2], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", + size); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref3, 25); + printf("dprod_dut:"); + dump(dest_ptrs[2], 25); + return -1; + } + if (memcmp(dest_ref4, dest_ptrs[3], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", + size); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref4, 25); + printf("dprod_dut:"); + dump(dest_ptrs[3], 25); + return -1; + } + if (memcmp(dest_ref5, dest_ptrs[4], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", + size); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref5, 25); + printf("dprod_dut:"); + dump(dest_ptrs[4], 25); + return -1; + } + } + + printf("Pass\n"); + return 0; + +} diff --git a/src/isa-l/erasure_code/gf_5vect_mad_avx.asm b/src/isa-l/erasure_code/gf_5vect_mad_avx.asm new file mode 100644 index 000000000..696b6a0dc --- /dev/null +++ b/src/isa-l/erasure_code/gf_5vect_mad_avx.asm @@ -0,0 +1,365 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_5vect_mad_avx(len, vec, vec_i, mul_array, src, dest); +;;; + +%include "reg_sizes.asm" + +%define PS 8 + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg0.w ecx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + %define arg4 r12 + %define arg5 r15 + %define tmp r11 + %define tmp2 r10 + %define tmp3 r13 + %define tmp4 r14 + %define return rax + %define return.w eax + %define stack_size 16*10 + 5*8 + %define arg(x) [rsp + stack_size + PS + PS*x] + %define func(x) proc_frame x + +%macro FUNC_SAVE 0 + sub rsp, stack_size + movdqa [rsp+16*0],xmm6 + movdqa [rsp+16*1],xmm7 + movdqa [rsp+16*2],xmm8 + movdqa [rsp+16*3],xmm9 + movdqa [rsp+16*4],xmm10 + movdqa [rsp+16*5],xmm11 + movdqa [rsp+16*6],xmm12 + movdqa [rsp+16*7],xmm13 + movdqa [rsp+16*8],xmm14 + movdqa [rsp+16*9],xmm15 + save_reg r12, 10*16 + 0*8 + save_reg r13, 10*16 + 1*8 + save_reg r14, 10*16 + 2*8 + save_reg r15, 10*16 + 3*8 + end_prolog + mov arg4, arg(4) + mov arg5, arg(5) +%endmacro + +%macro FUNC_RESTORE 0 + movdqa xmm6, [rsp+16*0] + movdqa xmm7, [rsp+16*1] + movdqa xmm8, [rsp+16*2] + movdqa xmm9, [rsp+16*3] + movdqa xmm10, [rsp+16*4] + movdqa xmm11, [rsp+16*5] + movdqa xmm12, [rsp+16*6] + movdqa xmm13, [rsp+16*7] + movdqa xmm14, [rsp+16*8] + movdqa xmm15, [rsp+16*9] + mov r12, [rsp + 10*16 + 0*8] + mov r13, [rsp + 10*16 + 1*8] + mov r14, [rsp + 10*16 + 2*8] + mov r15, [rsp + 10*16 + 3*8] + add rsp, stack_size +%endmacro + +%elifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg0.w edi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + %define tmp r11 + %define tmp2 r10 + %define tmp3 r12 + %define tmp4 r13 + %define return rax + %define return.w eax + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + push r13 + %endmacro + %macro FUNC_RESTORE 0 + pop r13 + pop r12 + %endmacro +%endif + +;;; gf_5vect_mad_avx(len, vec, vec_i, mul_array, src, dest) +%define len arg0 +%define len.w arg0.w +%define vec arg1 +%define vec_i arg2 +%define mul_array arg3 +%define src arg4 +%define dest1 arg5 +%define pos return +%define pos.w return.w + +%define dest2 tmp4 +%define dest3 mul_array +%define dest4 tmp2 +%define dest5 vec_i + + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu + %define XSTR vmovdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + +default rel + +[bits 64] +section .text + +%define xmask0f xmm15 +%define xgft5_hi xmm14 +%define xgft4_lo xmm13 +%define xgft4_hi xmm12 + +%define x0 xmm0 +%define xtmpa xmm1 +%define xtmph1 xmm2 +%define xtmpl1 xmm3 +%define xtmph2 xmm4 +%define xtmpl2 xmm5 +%define xtmph3 xmm6 +%define xtmpl3 xmm7 +%define xtmph5 xmm8 +%define xtmpl5 xmm9 +%define xd1 xmm10 +%define xd2 xmm11 +%define xd3 xtmpl1 +%define xd4 xtmph1 +%define xd5 xtmpl2 + + +align 16 +global gf_5vect_mad_avx:ISAL_SYM_TYPE_FUNCTION +func(gf_5vect_mad_avx) + FUNC_SAVE + sub len, 16 + jl .return_fail + xor pos, pos + vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte + mov tmp, vec + sal vec_i, 5 ;Multiply by 32 + lea tmp3, [mul_array + vec_i] + sal tmp, 6 ;Multiply by 64 + vmovdqu xgft5_hi, [tmp3+2*tmp+16] ; " Ex{00}, Ex{10}, ..., Ex{f0} + sal vec, 5 ;Multiply by 32 + add tmp, vec + vmovdqu xgft4_hi, [tmp3+tmp+16] ; " Dx{00}, Dx{10}, Dx{20}, ... , Dx{f0} + vmovdqu xgft4_lo, [tmp3+tmp] ;Load array Dx{00}, Dx{01}, Dx{02}, ... + + mov dest3, [dest1+2*PS] ; reuse mul_array + mov dest4, [dest1+3*PS] + mov dest5, [dest1+4*PS] ; reuse vec_i + mov dest2, [dest1+PS] + mov dest1, [dest1] + +.loop16: + XLDR x0, [src+pos] ;Get next source vector + + vmovdqu xtmph1, [tmp3+16] ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0} + vmovdqu xtmpl1, [tmp3] ;Load array Ax{00}, Ax{01}, Ax{02}, ... + vmovdqu xtmph2, [tmp3+vec+16] ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0} + vmovdqu xtmpl2, [tmp3+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ... + vmovdqu xtmph3, [tmp3+2*vec+16] ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0} + vmovdqu xtmpl3, [tmp3+2*vec] ;Load array Cx{00}, Cx{01}, Cx{02}, ... + vmovdqu xtmpl5, [tmp3+4*vec] ;Load array Ex{00}, Ex{01}, ..., Ex{0f} + + XLDR xd1, [dest1+pos] ;Get next dest vector + XLDR xd2, [dest2+pos] ;Get next dest vector + + vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + ; dest1 + vpshufb xtmph1, xtmph1, x0 ;Lookup mul table of high nibble + vpshufb xtmpl1, xtmpl1, xtmpa ;Lookup mul table of low nibble + vpxor xtmph1, xtmph1, xtmpl1 ;GF add high and low partials + vpxor xd1, xd1, xtmph1 + + XLDR xd3, [dest3+pos] ;Reuse xtmpl1, Get next dest vector + XLDR xd4, [dest4+pos] ;Reuse xtmph1, Get next dest vector + + ; dest2 + vpshufb xtmph2, xtmph2, x0 ;Lookup mul table of high nibble + vpshufb xtmpl2, xtmpl2, xtmpa ;Lookup mul table of low nibble + vpxor xtmph2, xtmph2, xtmpl2 ;GF add high and low partials + vpxor xd2, xd2, xtmph2 + + XLDR xd5, [dest5+pos] ;Reuse xtmpl2. Get next dest vector + + ; dest3 + vpshufb xtmph3, xtmph3, x0 ;Lookup mul table of high nibble + vpshufb xtmpl3, xtmpl3, xtmpa ;Lookup mul table of low nibble + vpxor xtmph3, xtmph3, xtmpl3 ;GF add high and low partials + vpxor xd3, xd3, xtmph3 + + ; dest4 + vpshufb xtmph2, xgft4_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl3, xgft4_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph2, xtmph2, xtmpl3 ;GF add high and low partials + vpxor xd4, xd4, xtmph2 + + ; dest5 + vpshufb xtmph5, xgft5_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl5, xtmpl5, xtmpa ;Lookup mul table of low nibble + vpxor xtmph5, xtmph5, xtmpl5 ;GF add high and low partials + vpxor xd5, xd5, xtmph5 + + XSTR [dest1+pos], xd1 ;Store result into dest1 + XSTR [dest2+pos], xd2 ;Store result into dest2 + XSTR [dest3+pos], xd3 ;Store result into dest3 + XSTR [dest4+pos], xd4 ;Store result into dest4 + XSTR [dest5+pos], xd5 ;Store result into dest5 + + add pos, 16 ;Loop on 16 bytes at a time + cmp pos, len + jle .loop16 + + lea tmp, [len + 16] + cmp pos, tmp + je .return_pass + +.lessthan16: + ;; Tail len + ;; Do one more overlap pass + mov tmp, len ;Overlapped offset length-16 + XLDR x0, [src+tmp] ;Get next source vector + + sub len, pos + + vmovdqa xtmph1, [constip16] ;Load const of i + 16 + vpinsrb xtmph5, len.w, 15 + vpshufb xtmph5, xmask0f ;Broadcast len to all bytes + vpcmpgtb xtmph5, xtmph5, xtmph1 + + vmovdqu xtmph1, [tmp3+16] ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0} + vmovdqu xtmpl1, [tmp3] ;Load array Ax{00}, Ax{01}, Ax{02}, ... + vmovdqu xtmph2, [tmp3+vec+16] ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0} + vmovdqu xtmpl2, [tmp3+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ... + vmovdqu xtmph3, [tmp3+2*vec+16] ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0} + vmovdqu xtmpl3, [tmp3+2*vec] ;Load array Cx{00}, Cx{01}, Cx{02}, ... + vmovdqu xtmpl5, [tmp3+4*vec] ;Load array Ex{00}, Ex{01}, ..., Ex{0f} + + XLDR xd1, [dest1+tmp] ;Get next dest vector + XLDR xd2, [dest2+tmp] ;Get next dest vector + + vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + ; dest1 + vpshufb xtmph1, xtmph1, x0 ;Lookup mul table of high nibble + vpshufb xtmpl1, xtmpl1, xtmpa ;Lookup mul table of low nibble + vpxor xtmph1, xtmph1, xtmpl1 ;GF add high and low partials + vpand xtmph1, xtmph1, xtmph5 + vpxor xd1, xd1, xtmph1 + + XLDR xd3, [dest3+tmp] ;Reuse xtmpl1, Get next dest vector + XLDR xd4, [dest4+tmp] ;Reuse xtmph1, Get next dest vector + + ; dest2 + vpshufb xtmph2, xtmph2, x0 ;Lookup mul table of high nibble + vpshufb xtmpl2, xtmpl2, xtmpa ;Lookup mul table of low nibble + vpxor xtmph2, xtmph2, xtmpl2 ;GF add high and low partials + vpand xtmph2, xtmph2, xtmph5 + vpxor xd2, xd2, xtmph2 + + XLDR xd5, [dest5+tmp] ;Reuse xtmpl2. Get next dest vector + + ; dest3 + vpshufb xtmph3, xtmph3, x0 ;Lookup mul table of high nibble + vpshufb xtmpl3, xtmpl3, xtmpa ;Lookup mul table of low nibble + vpxor xtmph3, xtmph3, xtmpl3 ;GF add high and low partials + vpand xtmph3, xtmph3, xtmph5 + vpxor xd3, xd3, xtmph3 + + ; dest4 + vpshufb xgft4_hi, xgft4_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft4_lo, xgft4_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft4_hi, xgft4_hi, xgft4_lo ;GF add high and low partials + vpand xgft4_hi, xgft4_hi, xtmph5 + vpxor xd4, xd4, xgft4_hi + + ; dest5 + vpshufb xgft5_hi, xgft5_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl5, xtmpl5, xtmpa ;Lookup mul table of low nibble + vpxor xgft5_hi, xgft5_hi, xtmpl5 ;GF add high and low partials + vpand xgft5_hi, xgft5_hi, xtmph5 + vpxor xd5, xd5, xgft5_hi + + XSTR [dest1+tmp], xd1 ;Store result into dest1 + XSTR [dest2+tmp], xd2 ;Store result into dest2 + XSTR [dest3+tmp], xd3 ;Store result into dest3 + XSTR [dest4+tmp], xd4 ;Store result into dest4 + XSTR [dest5+tmp], xd5 ;Store result into dest5 + +.return_pass: + FUNC_RESTORE + mov return, 0 + ret + +.return_fail: + FUNC_RESTORE + mov return, 1 + ret + +endproc_frame + +section .data + +align 16 +mask0f: dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f +constip16: + dq 0xf8f9fafbfcfdfeff, 0xf0f1f2f3f4f5f6f7 + +;;; func core, ver, snum +slversion gf_5vect_mad_avx, 02, 01, 020d diff --git a/src/isa-l/erasure_code/gf_5vect_mad_avx2.asm b/src/isa-l/erasure_code/gf_5vect_mad_avx2.asm new file mode 100644 index 000000000..3c65c05ea --- /dev/null +++ b/src/isa-l/erasure_code/gf_5vect_mad_avx2.asm @@ -0,0 +1,363 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_5vect_mad_avx2(len, vec, vec_i, mul_array, src, dest); +;;; + +%include "reg_sizes.asm" + +%define PS 8 + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg0.w ecx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + %define arg4 r12 + %define arg5 r15 + %define tmp r11 + %define tmp.w r11d + %define tmp.b r11b + %define tmp2 r10 + %define return rax + %define return.w eax + %define stack_size 16*10 + 3*8 + %define arg(x) [rsp + stack_size + PS + PS*x] + %define func(x) proc_frame x + +%macro FUNC_SAVE 0 + sub rsp, stack_size + movdqa [rsp+16*0],xmm6 + movdqa [rsp+16*1],xmm7 + movdqa [rsp+16*2],xmm8 + movdqa [rsp+16*3],xmm9 + movdqa [rsp+16*4],xmm10 + movdqa [rsp+16*5],xmm11 + movdqa [rsp+16*6],xmm12 + movdqa [rsp+16*7],xmm13 + movdqa [rsp+16*8],xmm14 + movdqa [rsp+16*9],xmm15 + save_reg r12, 10*16 + 0*8 + save_reg r15, 10*16 + 1*8 + end_prolog + mov arg4, arg(4) + mov arg5, arg(5) +%endmacro + +%macro FUNC_RESTORE 0 + movdqa xmm6, [rsp+16*0] + movdqa xmm7, [rsp+16*1] + movdqa xmm8, [rsp+16*2] + movdqa xmm9, [rsp+16*3] + movdqa xmm10, [rsp+16*4] + movdqa xmm11, [rsp+16*5] + movdqa xmm12, [rsp+16*6] + movdqa xmm13, [rsp+16*7] + movdqa xmm14, [rsp+16*8] + movdqa xmm15, [rsp+16*9] + mov r12, [rsp + 10*16 + 0*8] + mov r15, [rsp + 10*16 + 1*8] + add rsp, stack_size +%endmacro + +%elifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg0.w edi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + %define tmp r11 + %define tmp.w r11d + %define tmp.b r11b + %define tmp2 r10 + %define return rax + %define return.w eax + + %define func(x) x: + %define FUNC_SAVE + %define FUNC_RESTORE +%endif + +;;; gf_5vect_mad_avx2(len, vec, vec_i, mul_array, src, dest) +%define len arg0 +%define len.w arg0.w +%define vec arg1 +%define vec_i arg2 +%define mul_array arg3 +%define src arg4 +%define dest1 arg5 +%define pos return +%define pos.w return.w + +%define dest2 tmp2 +%define dest3 mul_array +%define dest4 vec +%define dest5 vec_i + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu + %define XSTR vmovdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + +default rel + +[bits 64] +section .text + +%define xmask0f ymm15 +%define xmask0fx xmm15 +%define xgft1_lo ymm14 +%define xgft2_lo ymm13 +%define xgft3_lo ymm12 +%define xgft4_lo ymm11 +%define xgft5_lo ymm10 + +%define x0 ymm0 +%define xtmpa ymm1 +%define xtmpl ymm2 +%define xtmplx xmm2 +%define xtmph1 ymm3 +%define xtmph1x xmm3 +%define xtmph2 ymm4 +%define xd1 ymm5 +%define xd2 ymm6 +%define xd3 ymm7 +%define xd4 ymm8 +%define xd5 ymm9 + +align 16 +global gf_5vect_mad_avx2:ISAL_SYM_TYPE_FUNCTION +func(gf_5vect_mad_avx2) + FUNC_SAVE + sub len, 32 + jl .return_fail + xor pos, pos + mov tmp.b, 0x0f + vpinsrb xmask0fx, xmask0fx, tmp.w, 0 + vpbroadcastb xmask0f, xmask0fx ;Construct mask 0x0f0f0f... + + sal vec_i, 5 ;Multiply by 32 + sal vec, 5 ;Multiply by 32 + lea tmp, [mul_array + vec_i] + + vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f} + ; " Ax{00}, Ax{10}, ..., Ax{f0} + vmovdqu xgft2_lo, [tmp+vec] ;Load array Bx{00}, Bx{01}, ..., Bx{0f} + ; " Bx{00}, Bx{10}, ..., Bx{f0} + vmovdqu xgft3_lo, [tmp+2*vec] ;Load array Cx{00}, Cx{01}, ..., Cx{0f} + ; " Cx{00}, Cx{10}, ..., Cx{f0} + vmovdqu xgft5_lo, [tmp+4*vec] ;Load array Ex{00}, Ex{01}, ..., Ex{0f} + ; " Ex{00}, Ex{10}, ..., Ex{f0} + add tmp, vec + vmovdqu xgft4_lo, [tmp+2*vec] ;Load array Dx{00}, Dx{01}, ..., Dx{0f} + ; " Dx{00}, Dx{10}, ..., Dx{f0} + + mov dest3, [dest1+2*PS] ; reuse mul_array + mov dest4, [dest1+3*PS] ; reuse vec + mov dest5, [dest1+4*PS] ; reuse vec_i + mov dest2, [dest1+PS] + mov dest1, [dest1] + +.loop32: + XLDR x0, [src+pos] ;Get next source vector + + XLDR xd1, [dest1+pos] ;Get next dest vector + XLDR xd2, [dest2+pos] ;Get next dest vector + XLDR xd3, [dest3+pos] ;Get next dest vector + XLDR xd4, [dest4+pos] ;Get next dest vector + XLDR xd5, [dest5+pos] ;Get next dest vector + + vpand xtmpl, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + vperm2i128 xtmpa, xtmpl, x0, 0x30 ;swap xtmpa from 1lo|2lo to 1lo|2hi + vperm2i128 x0, xtmpl, x0, 0x12 ;swap x0 from 1hi|2hi to 1hi|2lo + + vperm2i128 xtmph1, xgft1_lo, xgft1_lo, 0x01 ; swapped to hi | lo + vperm2i128 xtmph2, xgft2_lo, xgft2_lo, 0x01 ; swapped to hi | lo + + ; dest1 + vpshufb xtmph1, xtmph1, x0 ;Lookup mul table of high nibble + vpshufb xtmpl, xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph1, xtmph1, xtmpl ;GF add high and low partials + vpxor xd1, xd1, xtmph1 ;xd1 += partial + + vperm2i128 xtmph1, xgft3_lo, xgft3_lo, 0x01 ; swapped to hi | lo + ; dest2 + vpshufb xtmph2, xtmph2, x0 ;Lookup mul table of high nibble + vpshufb xtmpl, xgft2_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph2, xtmph2, xtmpl ;GF add high and low partials + vpxor xd2, xd2, xtmph2 ;xd2 += partial + + vperm2i128 xtmph2, xgft4_lo, xgft4_lo, 0x01 ; swapped to hi | lo + ; dest3 + vpshufb xtmph1, xtmph1, x0 ;Lookup mul table of high nibble + vpshufb xtmpl, xgft3_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph1, xtmph1, xtmpl ;GF add high and low partials + vpxor xd3, xd3, xtmph1 ;xd3 += partial + + vperm2i128 xtmph1, xgft5_lo, xgft5_lo, 0x01 ; swapped to hi | lo + ; dest4 + vpshufb xtmph2, xtmph2, x0 ;Lookup mul table of high nibble + vpshufb xtmpl, xgft4_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph2, xtmph2, xtmpl ;GF add high and low partials + vpxor xd4, xd4, xtmph2 ;xd4 += partial + + ; dest5 + vpshufb xtmph1, xtmph1, x0 ;Lookup mul table of high nibble + vpshufb xtmpl, xgft5_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph1, xtmph1, xtmpl ;GF add high and low partials + vpxor xd5, xd5, xtmph1 ;xd5 += partial + + XSTR [dest1+pos], xd1 + XSTR [dest2+pos], xd2 + XSTR [dest3+pos], xd3 + XSTR [dest4+pos], xd4 + XSTR [dest5+pos], xd5 + + add pos, 32 ;Loop on 32 bytes at a time + cmp pos, len + jle .loop32 + + lea tmp, [len + 32] + cmp pos, tmp + je .return_pass + +.lessthan32: + ;; Tail len + ;; Do one more overlap pass + mov tmp.b, 0x1f + vpinsrb xtmph1x, xtmph1x, tmp.w, 0 + vpbroadcastb xtmph1, xtmph1x ;Construct mask 0x1f1f1f... + + mov tmp, len ;Overlapped offset length-32 + + XLDR x0, [src+tmp] ;Get next source vector + + XLDR xd1, [dest1+tmp] ;Get next dest vector + XLDR xd2, [dest2+tmp] ;Get next dest vector + XLDR xd3, [dest3+tmp] ;Get next dest vector + XLDR xd4, [dest4+tmp] ;Get next dest vector + XLDR xd5, [dest5+tmp] ;Get next dest vector + + sub len, pos + + vmovdqa xtmph2, [constip32] ;Load const of i + 32 + vpinsrb xtmplx, xtmplx, len.w, 15 + vinserti128 xtmpl, xtmpl, xtmplx, 1 ;swapped to xtmplx | xtmplx + vpshufb xtmpl, xtmpl, xtmph1 ;Broadcast len to all bytes. xtmph1=0x1f1f1f... + vpcmpgtb xtmpl, xtmpl, xtmph2 + + vpand xtmph1, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + vperm2i128 xtmpa, xtmph1, x0, 0x30 ;swap xtmpa from 1lo|2lo to 1lo|2hi + vperm2i128 x0, xtmph1, x0, 0x12 ;swap x0 from 1hi|2hi to 1hi|2lo + + vperm2i128 xtmph1, xgft1_lo, xgft1_lo, 0x01 ; swapped to hi | lo + vperm2i128 xtmph2, xgft2_lo, xgft2_lo, 0x01 ; swapped to hi | lo + + ; dest1 + vpshufb xtmph1, xtmph1, x0 ;Lookup mul table of high nibble + vpshufb xgft1_lo, xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph1, xtmph1, xgft1_lo ;GF add high and low partials + vpand xtmph1, xtmph1, xtmpl + vpxor xd1, xd1, xtmph1 ;xd1 += partial + + vperm2i128 xtmph1, xgft3_lo, xgft3_lo, 0x01 ; swapped to hi | lo + ; dest2 + vpshufb xtmph2, xtmph2, x0 ;Lookup mul table of high nibble + vpshufb xgft2_lo, xgft2_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph2, xtmph2, xgft2_lo ;GF add high and low partials + vpand xtmph2, xtmph2, xtmpl + vpxor xd2, xd2, xtmph2 ;xd2 += partial + + vperm2i128 xtmph2, xgft4_lo, xgft4_lo, 0x01 ; swapped to hi | lo + ; dest3 + vpshufb xtmph1, xtmph1, x0 ;Lookup mul table of high nibble + vpshufb xgft3_lo, xgft3_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph1, xtmph1, xgft3_lo ;GF add high and low partials + vpand xtmph1, xtmph1, xtmpl + vpxor xd3, xd3, xtmph1 ;xd3 += partial + + vperm2i128 xtmph1, xgft5_lo, xgft5_lo, 0x01 ; swapped to hi | lo + ; dest4 + vpshufb xtmph2, xtmph2, x0 ;Lookup mul table of high nibble + vpshufb xgft4_lo, xgft4_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph2, xtmph2, xgft4_lo ;GF add high and low partials + vpand xtmph2, xtmph2, xtmpl + vpxor xd4, xd4, xtmph2 ;xd4 += partial + + ; dest5 + vpshufb xtmph1, xtmph1, x0 ;Lookup mul table of high nibble + vpshufb xgft5_lo, xgft5_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph1, xtmph1, xgft5_lo ;GF add high and low partials + vpand xtmph1, xtmph1, xtmpl + vpxor xd5, xd5, xtmph1 ;xd5 += partial + + XSTR [dest1+tmp], xd1 + XSTR [dest2+tmp], xd2 + XSTR [dest3+tmp], xd3 + XSTR [dest4+tmp], xd4 + XSTR [dest5+tmp], xd5 + +.return_pass: + FUNC_RESTORE + mov return, 0 + ret + +.return_fail: + FUNC_RESTORE + mov return, 1 + ret + +endproc_frame + +section .data +align 32 +constip32: + dq 0xf8f9fafbfcfdfeff, 0xf0f1f2f3f4f5f6f7 + dq 0xe8e9eaebecedeeef, 0xe0e1e2e3e4e5e6e7 + +;;; func core, ver, snum +slversion gf_5vect_mad_avx2, 04, 01, 020e diff --git a/src/isa-l/erasure_code/gf_5vect_mad_avx512.asm b/src/isa-l/erasure_code/gf_5vect_mad_avx512.asm new file mode 100644 index 000000000..96b498c9d --- /dev/null +++ b/src/isa-l/erasure_code/gf_5vect_mad_avx512.asm @@ -0,0 +1,287 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2019 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_5vect_mad_avx512(len, vec, vec_i, mul_array, src, dest); +;;; + +%include "reg_sizes.asm" + +%ifdef HAVE_AS_KNOWS_AVX512 + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + %define tmp r11 + %define tmp2 r10 + %define return rax + %define func(x) x: + %define FUNC_SAVE + %define FUNC_RESTORE +%endif + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + %define arg4 r12 + %define arg5 r15 + %define tmp r11 + %define tmp2 r10 + %define return rax + %define stack_size 16*10 + 3*8 + %define arg(x) [rsp + stack_size + PS + PS*x] + %define func(x) proc_frame x + +%macro FUNC_SAVE 0 + sub rsp, stack_size + movdqa [rsp+16*0],xmm6 + movdqa [rsp+16*1],xmm7 + movdqa [rsp+16*2],xmm8 + movdqa [rsp+16*3],xmm9 + movdqa [rsp+16*4],xmm10 + movdqa [rsp+16*5],xmm11 + movdqa [rsp+16*6],xmm12 + movdqa [rsp+16*7],xmm13 + movdqa [rsp+16*8],xmm14 + movdqa [rsp+16*9],xmm15 + save_reg r12, 10*16 + 0*8 + save_reg r15, 10*16 + 1*8 + end_prolog + mov arg4, arg(4) + mov arg5, arg(5) +%endmacro + +%macro FUNC_RESTORE 0 + movdqa xmm6, [rsp+16*0] + movdqa xmm7, [rsp+16*1] + movdqa xmm8, [rsp+16*2] + movdqa xmm9, [rsp+16*3] + movdqa xmm10, [rsp+16*4] + movdqa xmm11, [rsp+16*5] + movdqa xmm12, [rsp+16*6] + movdqa xmm13, [rsp+16*7] + movdqa xmm14, [rsp+16*8] + movdqa xmm15, [rsp+16*9] + mov r12, [rsp + 10*16 + 0*8] + mov r15, [rsp + 10*16 + 1*8] + add rsp, stack_size +%endmacro +%endif + +%define PS 8 +%define len arg0 +%define vec arg1 +%define vec_i arg2 +%define mul_array arg3 +%define src arg4 +%define dest1 arg5 +%define pos return +%define dest2 tmp2 +%define dest3 mul_array +%define dest4 vec +%define dest5 vec_i + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu8 + %define XSTR vmovdqu8 +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + +default rel +[bits 64] +section .text + +%define x0 zmm0 +%define xtmpa zmm1 +%define xtmpl1 zmm2 +%define xtmph1 zmm3 +%define xtmph2 zmm4 +%define xtmph3 zmm5 +%define xgft1_hi zmm6 +%define xgft1_lo zmm7 +%define xgft1_loy ymm7 +%define xgft2_hi zmm8 +%define xgft2_lo zmm9 +%define xgft2_loy ymm9 +%define xgft3_hi zmm10 +%define xgft3_lo zmm11 +%define xgft3_loy ymm11 +%define xgft4_hi zmm12 +%define xgft4_lo zmm13 +%define xgft4_loy ymm13 +%define xgft5_hi zmm14 +%define xgft5_lo zmm15 +%define xgft5_loy ymm15 +%define xd1 zmm16 +%define xd2 zmm17 +%define xd3 zmm18 +%define xd4 zmm19 +%define xd5 zmm20 +%define xmask0f zmm21 +%define xtmpl2 zmm22 +%define xtmpl3 zmm23 +%define xtmpl4 zmm24 +%define xtmpl5 zmm25 +%define xtmph4 zmm26 +%define xtmph5 zmm27 + +align 16 +global gf_5vect_mad_avx512:ISAL_SYM_TYPE_FUNCTION +func(gf_5vect_mad_avx512) + FUNC_SAVE + sub len, 64 + jl .return_fail + xor pos, pos + mov tmp, 0x0f + vpbroadcastb xmask0f, tmp ;Construct mask 0x0f0f0f... + sal vec_i, 5 ;Multiply by 32 + sal vec, 5 ;Multiply by 32 + lea tmp, [mul_array + vec_i] + vmovdqu xgft1_loy, [tmp] ;Load array Ax{00}..{0f}, Ax{00}..{f0} + vmovdqu xgft2_loy, [tmp+vec] ;Load array Bx{00}..{0f}, Bx{00}..{f0} + vmovdqu xgft3_loy, [tmp+2*vec] ;Load array Cx{00}..{0f}, Cx{00}..{f0} + vmovdqu xgft5_loy, [tmp+4*vec] ;Load array Ex{00}..{0f}, Ex{00}..{f0} + add tmp, vec + vmovdqu xgft4_loy, [tmp+2*vec] ;Load array Dx{00}..{0f}, Dx{00}..{f0} + vshufi64x2 xgft1_hi, xgft1_lo, xgft1_lo, 0x55 + vshufi64x2 xgft1_lo, xgft1_lo, xgft1_lo, 0x00 + vshufi64x2 xgft2_hi, xgft2_lo, xgft2_lo, 0x55 + vshufi64x2 xgft2_lo, xgft2_lo, xgft2_lo, 0x00 + vshufi64x2 xgft3_hi, xgft3_lo, xgft3_lo, 0x55 + vshufi64x2 xgft3_lo, xgft3_lo, xgft3_lo, 0x00 + vshufi64x2 xgft4_hi, xgft4_lo, xgft4_lo, 0x55 + vshufi64x2 xgft4_lo, xgft4_lo, xgft4_lo, 0x00 + vshufi64x2 xgft5_hi, xgft5_lo, xgft5_lo, 0x55 + vshufi64x2 xgft5_lo, xgft5_lo, xgft5_lo, 0x00 + mov dest2, [dest1+PS] + mov dest3, [dest1+2*PS] ; reuse mul_array + mov dest4, [dest1+3*PS] ; reuse vec + mov dest5, [dest1+4*PS] ; reuse vec_i + mov dest1, [dest1] + mov tmp, -1 + kmovq k1, tmp + +.loop64: + XLDR x0, [src+pos] ;Get next source vector + XLDR xd1, [dest1+pos] ;Get next dest vector + XLDR xd2, [dest2+pos] ;Get next dest vector + XLDR xd3, [dest3+pos] ;Get next dest vector + XLDR xd4, [dest4+pos] ;reuse xtmpl1. Get next dest vector + XLDR xd5, [dest5+pos] ;Get next dest vector + + vpandq xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpandq x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + ; dest1 + vpshufb xtmph1 {k1}{z}, xgft1_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl1 {k1}{z}, xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxorq xtmph1, xtmph1, xtmpl1 ;GF add high and low partials + vpxorq xd1, xd1, xtmph1 ;xd1 += partial + + ; dest2 + vpshufb xtmph2 {k1}{z}, xgft2_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl2 {k1}{z}, xgft2_lo, xtmpa ;Lookup mul table of low nibble + vpxorq xtmph2, xtmph2, xtmpl2 ;GF add high and low partials + vpxorq xd2, xd2, xtmph2 ;xd2 += partial + + ; dest3 + vpshufb xtmph3 {k1}{z}, xgft3_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl3 {k1}{z}, xgft3_lo, xtmpa ;Lookup mul table of low nibble + vpxorq xtmph3, xtmph3, xtmpl3 ;GF add high and low partials + vpxorq xd3, xd3, xtmph3 ;xd2 += partial + + ; dest4 + vpshufb xtmph4 {k1}{z}, xgft4_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl4 {k1}{z}, xgft4_lo, xtmpa ;Lookup mul table of low nibble + vpxorq xtmph4, xtmph4, xtmpl4 ;GF add high and low partials + vpxorq xd4, xd4, xtmph4 ;xd2 += partial + + ; dest5 + vpshufb xtmph5 {k1}{z}, xgft5_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl5 {k1}{z}, xgft5_lo, xtmpa ;Lookup mul table of low nibble + vpxorq xtmph5, xtmph5, xtmpl5 ;GF add high and low partials + vpxorq xd5, xd5, xtmph5 ;xd2 += partial + + XSTR [dest1+pos], xd1 + XSTR [dest2+pos], xd2 + XSTR [dest3+pos], xd3 + XSTR [dest4+pos], xd4 + XSTR [dest5+pos], xd5 + + add pos, 64 ;Loop on 64 bytes at a time + cmp pos, len + jle .loop64 + + lea tmp, [len + 64] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, (1 << 63) + lea tmp, [len + 64 - 1] + and tmp, 63 + sarx pos, pos, tmp + kmovq k1, pos + mov pos, len ;Overlapped offset length-64 + jmp .loop64 ;Do one more overlap pass + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame + +%else +%ifidn __OUTPUT_FORMAT__, win64 +global no_gf_5vect_mad_avx512 +no_gf_5vect_mad_avx512: +%endif +%endif ; ifdef HAVE_AS_KNOWS_AVX512 diff --git a/src/isa-l/erasure_code/gf_5vect_mad_sse.asm b/src/isa-l/erasure_code/gf_5vect_mad_sse.asm new file mode 100644 index 000000000..b16f4058c --- /dev/null +++ b/src/isa-l/erasure_code/gf_5vect_mad_sse.asm @@ -0,0 +1,373 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_5vect_mad_sse(len, vec, vec_i, mul_array, src, dest); +;;; + +%include "reg_sizes.asm" + +%define PS 8 + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg0.w ecx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + %define arg4 r12 + %define arg5 r15 + %define tmp r11 + %define tmp2 r10 + %define tmp3 r13 + %define tmp4 r14 + %define return rax + %define return.w eax + %define stack_size 16*10 + 5*8 + %define arg(x) [rsp + stack_size + PS + PS*x] + %define func(x) proc_frame x + +%macro FUNC_SAVE 0 + sub rsp, stack_size + movdqa [rsp+16*0],xmm6 + movdqa [rsp+16*1],xmm7 + movdqa [rsp+16*2],xmm8 + movdqa [rsp+16*3],xmm9 + movdqa [rsp+16*4],xmm10 + movdqa [rsp+16*5],xmm11 + movdqa [rsp+16*6],xmm12 + movdqa [rsp+16*7],xmm13 + movdqa [rsp+16*8],xmm14 + movdqa [rsp+16*9],xmm15 + save_reg r12, 10*16 + 0*8 + save_reg r13, 10*16 + 1*8 + save_reg r14, 10*16 + 2*8 + save_reg r15, 10*16 + 3*8 + end_prolog + mov arg4, arg(4) + mov arg5, arg(5) +%endmacro + +%macro FUNC_RESTORE 0 + movdqa xmm6, [rsp+16*0] + movdqa xmm7, [rsp+16*1] + movdqa xmm8, [rsp+16*2] + movdqa xmm9, [rsp+16*3] + movdqa xmm10, [rsp+16*4] + movdqa xmm11, [rsp+16*5] + movdqa xmm12, [rsp+16*6] + movdqa xmm13, [rsp+16*7] + movdqa xmm14, [rsp+16*8] + movdqa xmm15, [rsp+16*9] + mov r12, [rsp + 10*16 + 0*8] + mov r13, [rsp + 10*16 + 1*8] + mov r14, [rsp + 10*16 + 2*8] + mov r15, [rsp + 10*16 + 3*8] + add rsp, stack_size +%endmacro + +%elifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg0.w edi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + %define tmp r11 + %define tmp2 r10 + %define tmp3 r12 + %define tmp4 r13 + %define return rax + %define return.w eax + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + push r13 + %endmacro + %macro FUNC_RESTORE 0 + pop r13 + pop r12 + %endmacro +%endif + +;;; gf_5vect_mad_sse(len, vec, vec_i, mul_array, src, dest) +%define len arg0 +%define len.w arg0.w +%define vec arg1 +%define vec_i arg2 +%define mul_array arg3 +%define src arg4 +%define dest1 arg5 +%define pos return +%define pos.w return.w + +%define dest2 tmp4 +%define dest3 mul_array +%define dest4 tmp2 +%define dest5 vec_i + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR movdqu + %define XSTR movdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR movdqa + %define XSTR movdqa + %else + %define XLDR movntdqa + %define XSTR movntdq + %endif +%endif + +default rel + +[bits 64] +section .text + +%define xmask0f xmm15 +%define xgft5_hi xmm14 +%define xgft4_lo xmm13 +%define xgft4_hi xmm12 + +%define x0 xmm0 +%define xtmpa xmm1 +%define xtmph1 xmm2 +%define xtmpl1 xmm3 +%define xtmph2 xmm4 +%define xtmpl2 xmm5 +%define xtmph3 xmm6 +%define xtmpl3 xmm7 +%define xtmph5 xmm8 +%define xtmpl5 xmm9 +%define xd1 xmm10 +%define xd2 xmm11 +%define xd3 xtmpl1 +%define xd4 xtmph1 +%define xd5 xtmpl2 + + +align 16 +global gf_5vect_mad_sse:ISAL_SYM_TYPE_FUNCTION +func(gf_5vect_mad_sse) + FUNC_SAVE + sub len, 16 + jl .return_fail + xor pos, pos + + movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte + mov tmp, vec + sal vec_i, 5 ;Multiply by 32 + lea tmp3, [mul_array + vec_i] + sal tmp, 6 ;Multiply by 64 + movdqu xgft5_hi, [tmp3+2*tmp+16] ; " Ex{00}, Ex{10}, ..., Ex{f0} + sal vec, 5 ;Multiply by 32 + add tmp, vec + movdqu xgft4_hi, [tmp3+tmp+16] ; " Dx{00}, Dx{10}, Dx{20}, ... , Dx{f0} + movdqu xgft4_lo, [tmp3+tmp] ;Load array Dx{00}, Dx{01}, Dx{02}, ... + + mov dest3, [dest1+2*PS] ; reuse mul_array + mov dest4, [dest1+3*PS] + mov dest5, [dest1+4*PS] ; reuse vec_i + mov dest2, [dest1+PS] + mov dest1, [dest1] + +.loop16: + XLDR x0, [src+pos] ;Get next source vector + + movdqu xtmph1, [tmp3+16] ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0} + movdqu xtmpl1, [tmp3] ;Load array Ax{00}, Ax{01}, Ax{02}, ... + movdqu xtmph2, [tmp3+vec+16] ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0} + movdqu xtmpl2, [tmp3+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ... + movdqu xtmph3, [tmp3+2*vec+16] ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0} + movdqu xtmpl3, [tmp3+2*vec] ;Load array Cx{00}, Cx{01}, Cx{02}, ... + movdqu xtmpl5, [tmp3+4*vec] ;Load array Ex{00}, Ex{01}, ..., Ex{0f} + movdqa xtmph5, xgft5_hi ;Reload const array registers + + XLDR xd1, [dest1+pos] ;Get next dest vector + XLDR xd2, [dest2+pos] ;Get next dest vector + + movdqa xtmpa, x0 ;Keep unshifted copy of src + psraw x0, 4 ;Shift to put high nibble into bits 4-0 + pand x0, xmask0f ;Mask high src nibble in bits 4-0 + pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0 + + ; dest1 + pshufb xtmph1, x0 ;Lookup mul table of high nibble + pshufb xtmpl1, xtmpa ;Lookup mul table of low nibble + pxor xtmph1, xtmpl1 ;GF add high and low partials + pxor xd1, xtmph1 + + XLDR xd3, [dest3+pos] ;Reuse xtmpl1, Get next dest vector + XLDR xd4, [dest4+pos] ;Reuse xtmph1. Get next dest vector + + ; dest2 + pshufb xtmph2, x0 ;Lookup mul table of high nibble + pshufb xtmpl2, xtmpa ;Lookup mul table of low nibble + pxor xtmph2, xtmpl2 ;GF add high and low partials + pxor xd2, xtmph2 + + XLDR xd5, [dest5+pos] ;Reuse xtmpl2. Get next dest vector + + ; dest3 + pshufb xtmph3, x0 ;Lookup mul table of high nibble + pshufb xtmpl3, xtmpa ;Lookup mul table of low nibble + pxor xtmph3, xtmpl3 ;GF add high and low partials + pxor xd3, xtmph3 + + movdqa xtmph2, xgft4_hi ;Reload const array registers + movdqa xtmpl3, xgft4_lo ;Reload const array registers + + ; dest5 + pshufb xtmph5, x0 ;Lookup mul table of high nibble + pshufb xtmpl5, xtmpa ;Lookup mul table of low nibble + pxor xtmph5, xtmpl5 ;GF add high and low partials + pxor xd5, xtmph5 + + ; dest4 + pshufb xtmph2, x0 ;Lookup mul table of high nibble + pshufb xtmpl3, xtmpa ;Lookup mul table of low nibble + pxor xtmph2, xtmpl3 ;GF add high and low partials + pxor xd4, xtmph2 + + XSTR [dest1+pos], xd1 ;Store result into dest1 + XSTR [dest2+pos], xd2 ;Store result into dest2 + XSTR [dest3+pos], xd3 ;Store result into dest3 + XSTR [dest4+pos], xd4 ;Store result into dest4 + XSTR [dest5+pos], xd5 ;Store result into dest5 + + add pos, 16 ;Loop on 16 bytes at a time + cmp pos, len + jle .loop16 + + lea tmp, [len + 16] + cmp pos, tmp + je .return_pass + +.lessthan16: + ;; Tail len + ;; Do one more overlap pass + mov tmp, len ;Overlapped offset length-16 + XLDR x0, [src+tmp] ;Get next source vector + + sub len, pos + + movdqa xtmpl1, [constip16] ;Load const of i + 16 + pinsrb xtmph5, len.w, 15 + pshufb xtmph5, xmask0f ;Broadcast len to all bytes + pcmpgtb xtmph5, xtmpl1 + + movdqu xtmph1, [tmp3+16] ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0} + movdqu xtmpl1, [tmp3] ;Load array Ax{00}, Ax{01}, Ax{02}, ... + movdqu xtmph2, [tmp3+vec+16] ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0} + movdqu xtmpl2, [tmp3+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ... + movdqu xtmph3, [tmp3+2*vec+16] ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0} + movdqu xtmpl3, [tmp3+2*vec] ;Load array Cx{00}, Cx{01}, Cx{02}, ... + movdqu xtmpl5, [tmp3+4*vec] ;Load array Ex{00}, Ex{01}, ..., Ex{0f} + + XLDR xd1, [dest1+tmp] ;Get next dest vector + XLDR xd2, [dest2+tmp] ;Get next dest vector + + movdqa xtmpa, x0 ;Keep unshifted copy of src + psraw x0, 4 ;Shift to put high nibble into bits 4-0 + pand x0, xmask0f ;Mask high src nibble in bits 4-0 + pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0 + + ; dest1 + pshufb xtmph1, x0 ;Lookup mul table of high nibble + pshufb xtmpl1, xtmpa ;Lookup mul table of low nibble + pxor xtmph1, xtmpl1 ;GF add high and low partials + pand xtmph1, xtmph5 + pxor xd1, xtmph1 + + XLDR xd3, [dest3+tmp] ;Reuse xtmpl1, Get next dest vector + XLDR xd4, [dest4+tmp] ;Reuse xtmph1. Get next dest vector + + ; dest2 + pshufb xtmph2, x0 ;Lookup mul table of high nibble + pshufb xtmpl2, xtmpa ;Lookup mul table of low nibble + pxor xtmph2, xtmpl2 ;GF add high and low partials + pand xtmph2, xtmph5 + pxor xd2, xtmph2 + + XLDR xd5, [dest5+tmp] ;Reuse xtmpl2. Get next dest vector + + ; dest3 + pshufb xtmph3, x0 ;Lookup mul table of high nibble + pshufb xtmpl3, xtmpa ;Lookup mul table of low nibble + pxor xtmph3, xtmpl3 ;GF add high and low partials + pand xtmph3, xtmph5 + pxor xd3, xtmph3 + + ; dest4 + pshufb xgft4_hi, x0 ;Lookup mul table of high nibble + pshufb xgft4_lo, xtmpa ;Lookup mul table of low nibble + pxor xgft4_hi, xgft4_lo ;GF add high and low partials + pand xgft4_hi, xtmph5 + pxor xd4, xgft4_hi + + ; dest5 + pshufb xgft5_hi, x0 ;Lookup mul table of high nibble + pshufb xtmpl5, xtmpa ;Lookup mul table of low nibble + pxor xgft5_hi, xtmpl5 ;GF add high and low partials + pand xgft5_hi, xtmph5 + pxor xd5, xgft5_hi + + XSTR [dest1+tmp], xd1 ;Store result into dest1 + XSTR [dest2+tmp], xd2 ;Store result into dest2 + XSTR [dest3+tmp], xd3 ;Store result into dest3 + XSTR [dest4+tmp], xd4 ;Store result into dest4 + XSTR [dest5+tmp], xd5 ;Store result into dest5 + +.return_pass: + FUNC_RESTORE + mov return, 0 + ret + +.return_fail: + FUNC_RESTORE + mov return, 1 + ret + +endproc_frame + +section .data + +align 16 + +mask0f: + dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f +constip16: + dq 0xf8f9fafbfcfdfeff, 0xf0f1f2f3f4f5f6f7 + +;;; func core, ver, snum +slversion gf_5vect_mad_sse, 00, 01, 020c diff --git a/src/isa-l/erasure_code/gf_6vect_dot_prod_avx.asm b/src/isa-l/erasure_code/gf_6vect_dot_prod_avx.asm new file mode 100644 index 000000000..a519d5224 --- /dev/null +++ b/src/isa-l/erasure_code/gf_6vect_dot_prod_avx.asm @@ -0,0 +1,315 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_6vect_dot_prod_avx(len, vec, *g_tbls, **buffs, **dests); +;;; + +%include "reg_sizes.asm" + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + + %define tmp r11 + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r12 ; must be saved and restored + %define tmp5 r14 ; must be saved and restored + %define tmp6 r15 ; must be saved and restored + %define return rax + %define PS 8 + %define LOG_PS 3 + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + push r13 + push r14 + push r15 + %endmacro + %macro FUNC_RESTORE 0 + pop r15 + pop r14 + pop r13 + pop r12 + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + + %define arg4 r12 ; must be saved, loaded and restored + %define arg5 r15 ; must be saved and restored + %define tmp r11 + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r14 ; must be saved and restored + %define tmp5 rdi ; must be saved and restored + %define tmp6 rsi ; must be saved and restored + %define return rax + %define PS 8 + %define LOG_PS 3 + %define stack_size 10*16 + 7*8 ; must be an odd multiple of 8 + %define arg(x) [rsp + stack_size + PS + PS*x] + + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size + save_xmm128 xmm6, 0*16 + save_xmm128 xmm7, 1*16 + save_xmm128 xmm8, 2*16 + save_xmm128 xmm9, 3*16 + save_xmm128 xmm10, 4*16 + save_xmm128 xmm11, 5*16 + save_xmm128 xmm12, 6*16 + save_xmm128 xmm13, 7*16 + save_xmm128 xmm14, 8*16 + save_xmm128 xmm15, 9*16 + save_reg r12, 10*16 + 0*8 + save_reg r13, 10*16 + 1*8 + save_reg r14, 10*16 + 2*8 + save_reg r15, 10*16 + 3*8 + save_reg rdi, 10*16 + 4*8 + save_reg rsi, 10*16 + 5*8 + end_prolog + mov arg4, arg(4) + %endmacro + + %macro FUNC_RESTORE 0 + vmovdqa xmm6, [rsp + 0*16] + vmovdqa xmm7, [rsp + 1*16] + vmovdqa xmm8, [rsp + 2*16] + vmovdqa xmm9, [rsp + 3*16] + vmovdqa xmm10, [rsp + 4*16] + vmovdqa xmm11, [rsp + 5*16] + vmovdqa xmm12, [rsp + 6*16] + vmovdqa xmm13, [rsp + 7*16] + vmovdqa xmm14, [rsp + 8*16] + vmovdqa xmm15, [rsp + 9*16] + mov r12, [rsp + 10*16 + 0*8] + mov r13, [rsp + 10*16 + 1*8] + mov r14, [rsp + 10*16 + 2*8] + mov r15, [rsp + 10*16 + 3*8] + mov rdi, [rsp + 10*16 + 4*8] + mov rsi, [rsp + 10*16 + 5*8] + add rsp, stack_size + %endmacro +%endif + +%define len arg0 +%define vec arg1 +%define mul_array arg2 +%define src arg3 +%define dest arg4 +%define ptr arg5 +%define vec_i tmp2 +%define dest1 tmp3 +%define dest2 tmp4 +%define vskip1 tmp5 +%define vskip3 tmp6 +%define pos return + + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu + %define XSTR vmovdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + + +default rel + +[bits 64] +section .text + +%define xmask0f xmm15 +%define xgft1_lo xmm14 +%define xgft1_hi xmm13 +%define xgft2_lo xmm12 +%define xgft2_hi xmm11 +%define xgft3_lo xmm10 +%define xgft3_hi xmm9 +%define x0 xmm0 +%define xtmpa xmm1 +%define xp1 xmm2 +%define xp2 xmm3 +%define xp3 xmm4 +%define xp4 xmm5 +%define xp5 xmm6 +%define xp6 xmm7 + +align 16 +global gf_6vect_dot_prod_avx:ISAL_SYM_TYPE_FUNCTION +func(gf_6vect_dot_prod_avx) + FUNC_SAVE + sub len, 16 + jl .return_fail + xor pos, pos + vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte + mov vskip1, vec + imul vskip1, 32 + mov vskip3, vec + imul vskip3, 96 + sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS + mov dest1, [dest] + mov dest2, [dest+PS] + + +.loop16: + mov tmp, mul_array + xor vec_i, vec_i + vpxor xp1, xp1 + vpxor xp2, xp2 + vpxor xp3, xp3 + vpxor xp4, xp4 + vpxor xp5, xp5 + vpxor xp6, xp6 + +.next_vect: + mov ptr, [src+vec_i] + add vec_i, PS + XLDR x0, [ptr+pos] ;Get next source vector + + vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f} + vmovdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0} + vmovdqu xgft2_lo, [tmp+vskip1*1] ;Load array Bx{00}, Bx{01}, ..., Bx{0f} + vmovdqu xgft2_hi, [tmp+vskip1*1+16] ; " Bx{00}, Bx{10}, ..., Bx{f0} + vmovdqu xgft3_lo, [tmp+vskip1*2] ;Load array Cx{00}, Cx{01}, ..., Cx{0f} + vmovdqu xgft3_hi, [tmp+vskip1*2+16] ; " Cx{00}, Cx{10}, ..., Cx{f0} + lea ptr, [vskip1 + vskip1*4] ;ptr = vskip5 + + vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + + vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft1_hi, xgft1_lo ;GF add high and low partials + vpxor xp1, xgft1_hi ;xp1 += partial + + vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft2_hi, xgft2_lo ;GF add high and low partials + vpxor xp2, xgft2_hi ;xp2 += partial + + vpshufb xgft3_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft3_hi, xgft3_lo ;GF add high and low partials + vpxor xp3, xgft3_hi ;xp3 += partial + + + vmovdqu xgft1_lo, [tmp+vskip3] ;Load array Dx{00}, Dx{01}, ..., Dx{0f} + vmovdqu xgft1_hi, [tmp+vskip3+16] ; " Dx{00}, Dx{10}, ..., Dx{f0} + vmovdqu xgft2_lo, [tmp+vskip1*4] ;Load array Ex{00}, Ex{01}, ..., Ex{0f} + vmovdqu xgft2_hi, [tmp+vskip1*4+16] ; " Ex{00}, Ex{10}, ..., Ex{f0} + vmovdqu xgft3_lo, [tmp+ptr] ;Load array Fx{00}, Fx{01}, ..., Fx{0f} + vmovdqu xgft3_hi, [tmp+ptr+16] ; " Fx{00}, Fx{10}, ..., Fx{f0} + add tmp, 32 + + + vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft1_hi, xgft1_lo ;GF add high and low partials + vpxor xp4, xgft1_hi ;xp4 += partial + + vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft2_hi, xgft2_lo ;GF add high and low partials + vpxor xp5, xgft2_hi ;xp5 += partial + + vpshufb xgft3_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft3_hi, xgft3_lo ;GF add high and low partials + vpxor xp6, xgft3_hi ;xp6 += partial + + cmp vec_i, vec + jl .next_vect + + + mov tmp, [dest+2*PS] + mov ptr, [dest+3*PS] + mov vec_i, [dest+4*PS] + + XSTR [dest1+pos], xp1 + XSTR [dest2+pos], xp2 + XSTR [tmp+pos], xp3 + mov tmp, [dest+5*PS] + XSTR [ptr+pos], xp4 + XSTR [vec_i+pos], xp5 + XSTR [tmp+pos], xp6 + + add pos, 16 ;Loop on 16 bytes at a time + cmp pos, len + jle .loop16 + + lea tmp, [len + 16] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, len ;Overlapped offset length-16 + jmp .loop16 ;Do one more overlap pass + +.return_pass: + FUNC_RESTORE + mov return, 0 + ret + +.return_fail: + FUNC_RESTORE + mov return, 1 + ret + +endproc_frame + +section .data + +align 16 +mask0f: dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f + +;;; func core, ver, snum +slversion gf_6vect_dot_prod_avx, 02, 04, 0195 diff --git a/src/isa-l/erasure_code/gf_6vect_dot_prod_avx2.asm b/src/isa-l/erasure_code/gf_6vect_dot_prod_avx2.asm new file mode 100644 index 000000000..a57c52a00 --- /dev/null +++ b/src/isa-l/erasure_code/gf_6vect_dot_prod_avx2.asm @@ -0,0 +1,326 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_6vect_dot_prod_avx2(len, vec, *g_tbls, **buffs, **dests); +;;; + +%include "reg_sizes.asm" + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + + %define tmp r11 + %define tmp.w r11d + %define tmp.b r11b + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r12 ; must be saved and restored + %define tmp5 r14 ; must be saved and restored + %define tmp6 r15 ; must be saved and restored + %define return rax + %define PS 8 + %define LOG_PS 3 + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + push r13 + push r14 + push r15 + %endmacro + %macro FUNC_RESTORE 0 + pop r15 + pop r14 + pop r13 + pop r12 + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + + %define arg4 r12 ; must be saved, loaded and restored + %define arg5 r15 ; must be saved and restored + %define tmp r11 + %define tmp.w r11d + %define tmp.b r11b + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r14 ; must be saved and restored + %define tmp5 rdi ; must be saved and restored + %define tmp6 rsi ; must be saved and restored + %define return rax + %define PS 8 + %define LOG_PS 3 + %define stack_size 10*16 + 7*8 ; must be an odd multiple of 8 + %define arg(x) [rsp + stack_size + PS + PS*x] + + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size + vmovdqa [rsp + 0*16], xmm6 + vmovdqa [rsp + 1*16], xmm7 + vmovdqa [rsp + 2*16], xmm8 + vmovdqa [rsp + 3*16], xmm9 + vmovdqa [rsp + 4*16], xmm10 + vmovdqa [rsp + 5*16], xmm11 + vmovdqa [rsp + 6*16], xmm12 + vmovdqa [rsp + 7*16], xmm13 + vmovdqa [rsp + 8*16], xmm14 + vmovdqa [rsp + 9*16], xmm15 + save_reg r12, 10*16 + 0*8 + save_reg r13, 10*16 + 1*8 + save_reg r14, 10*16 + 2*8 + save_reg r15, 10*16 + 3*8 + save_reg rdi, 10*16 + 4*8 + save_reg rsi, 10*16 + 5*8 + end_prolog + mov arg4, arg(4) + %endmacro + + %macro FUNC_RESTORE 0 + vmovdqa xmm6, [rsp + 0*16] + vmovdqa xmm7, [rsp + 1*16] + vmovdqa xmm8, [rsp + 2*16] + vmovdqa xmm9, [rsp + 3*16] + vmovdqa xmm10, [rsp + 4*16] + vmovdqa xmm11, [rsp + 5*16] + vmovdqa xmm12, [rsp + 6*16] + vmovdqa xmm13, [rsp + 7*16] + vmovdqa xmm14, [rsp + 8*16] + vmovdqa xmm15, [rsp + 9*16] + mov r12, [rsp + 10*16 + 0*8] + mov r13, [rsp + 10*16 + 1*8] + mov r14, [rsp + 10*16 + 2*8] + mov r15, [rsp + 10*16 + 3*8] + mov rdi, [rsp + 10*16 + 4*8] + mov rsi, [rsp + 10*16 + 5*8] + add rsp, stack_size + %endmacro +%endif + +%define len arg0 +%define vec arg1 +%define mul_array arg2 +%define src arg3 +%define dest arg4 +%define ptr arg5 +%define vec_i tmp2 +%define dest1 tmp3 +%define dest2 tmp4 +%define vskip1 tmp5 +%define vskip3 tmp6 +%define pos return + + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu + %define XSTR vmovdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + + +default rel + +[bits 64] +section .text + +%define xmask0f ymm15 +%define xmask0fx xmm15 +%define xgft1_lo ymm14 +%define xgft1_hi ymm13 +%define xgft2_lo ymm12 +%define xgft2_hi ymm11 +%define xgft3_lo ymm10 +%define xgft3_hi ymm9 +%define x0 ymm0 +%define xtmpa ymm1 +%define xp1 ymm2 +%define xp2 ymm3 +%define xp3 ymm4 +%define xp4 ymm5 +%define xp5 ymm6 +%define xp6 ymm7 + +align 16 +global gf_6vect_dot_prod_avx2:ISAL_SYM_TYPE_FUNCTION +func(gf_6vect_dot_prod_avx2) + FUNC_SAVE + sub len, 32 + jl .return_fail + xor pos, pos + mov tmp.b, 0x0f + vpinsrb xmask0fx, xmask0fx, tmp.w, 0 + vpbroadcastb xmask0f, xmask0fx ;Construct mask 0x0f0f0f... + mov vskip1, vec + imul vskip1, 32 + mov vskip3, vec + imul vskip3, 96 + sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS + mov dest1, [dest] + mov dest2, [dest+PS] + + +.loop32: + mov tmp, mul_array + xor vec_i, vec_i + vpxor xp1, xp1 + vpxor xp2, xp2 + vpxor xp3, xp3 + vpxor xp4, xp4 + vpxor xp5, xp5 + vpxor xp6, xp6 + +.next_vect: + mov ptr, [src+vec_i] + XLDR x0, [ptr+pos] ;Get next source vector + add vec_i, PS + + vpand xgft3_lo, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + vperm2i128 xtmpa, xgft3_lo, x0, 0x30 ;swap xtmpa from 1lo|2lo to 1lo|2hi + vperm2i128 x0, xgft3_lo, x0, 0x12 ;swap x0 from 1hi|2hi to 1hi|2lo + + vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f} + ; " Ax{00}, Ax{10}, ..., Ax{f0} + vmovdqu xgft2_lo, [tmp+vskip1*1] ;Load array Bx{00}, Bx{01}, ..., Bx{0f} + ; " Bx{00}, Bx{10}, ..., Bx{f0} + vmovdqu xgft3_lo, [tmp+vskip1*2] ;Load array Cx{00}, Cx{01}, ..., Cx{0f} + ; " Cx{00}, Cx{10}, ..., Cx{f0} + lea ptr, [vskip1 + vskip1*4] ;ptr = vskip5 + + vperm2i128 xgft1_hi, xgft1_lo, xgft1_lo, 0x01 ; swapped to hi | lo + vperm2i128 xgft2_hi, xgft2_lo, xgft2_lo, 0x01 ; swapped to hi | lo + vperm2i128 xgft3_hi, xgft3_lo, xgft3_lo, 0x01 ; swapped to hi | lo + + vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft1_hi, xgft1_lo ;GF add high and low partials + vpxor xp1, xgft1_hi ;xp1 += partial + + vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft2_hi, xgft2_lo ;GF add high and low partials + vpxor xp2, xgft2_hi ;xp2 += partial + + vpshufb xgft3_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft3_hi, xgft3_lo ;GF add high and low partials + vpxor xp3, xgft3_hi ;xp3 += partial + + + vmovdqu xgft1_lo, [tmp+vskip3] ;Load array Dx{00}, Dx{01}, ..., Dx{0f} + ; " Dx{00}, Dx{10}, ..., Dx{f0} + vmovdqu xgft2_lo, [tmp+vskip1*4] ;Load array Ex{00}, Ex{01}, ..., Ex{0f} + ; " Ex{00}, Ex{10}, ..., Ex{f0} + vmovdqu xgft3_lo, [tmp+ptr] ;Load array Fx{00}, Fx{01}, ..., Fx{0f} + ; " Fx{00}, Fx{10}, ..., Fx{f0} + add tmp, 32 + vperm2i128 xgft1_hi, xgft1_lo, xgft1_lo, 0x01 ; swapped to hi | lo + vperm2i128 xgft2_hi, xgft2_lo, xgft2_lo, 0x01 ; swapped to hi | lo + vperm2i128 xgft3_hi, xgft3_lo, xgft3_lo, 0x01 ; swapped to hi | lo + + vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft1_hi, xgft1_lo ;GF add high and low partials + vpxor xp4, xgft1_hi ;xp4 += partial + + vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft2_hi, xgft2_lo ;GF add high and low partials + vpxor xp5, xgft2_hi ;xp5 += partial + + vpshufb xgft3_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft3_hi, xgft3_lo ;GF add high and low partials + vpxor xp6, xgft3_hi ;xp6 += partial + + cmp vec_i, vec + jl .next_vect + + + mov tmp, [dest+2*PS] + mov ptr, [dest+3*PS] + mov vec_i, [dest+4*PS] + + XSTR [dest1+pos], xp1 + XSTR [dest2+pos], xp2 + XSTR [tmp+pos], xp3 + mov tmp, [dest+5*PS] + XSTR [ptr+pos], xp4 + XSTR [vec_i+pos], xp5 + XSTR [tmp+pos], xp6 + + add pos, 32 ;Loop on 32 bytes at a time + cmp pos, len + jle .loop32 + + lea tmp, [len + 32] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, len ;Overlapped offset length-16 + jmp .loop32 ;Do one more overlap pass + +.return_pass: + FUNC_RESTORE + mov return, 0 + ret + +.return_fail: + FUNC_RESTORE + mov return, 1 + ret + +endproc_frame + +section .data + +;;; func core, ver, snum +slversion gf_6vect_dot_prod_avx2, 04, 04, 019a diff --git a/src/isa-l/erasure_code/gf_6vect_dot_prod_avx512.asm b/src/isa-l/erasure_code/gf_6vect_dot_prod_avx512.asm new file mode 100644 index 000000000..bfcfacb8b --- /dev/null +++ b/src/isa-l/erasure_code/gf_6vect_dot_prod_avx512.asm @@ -0,0 +1,354 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2019 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_6vect_dot_prod_avx512(len, vec, *g_tbls, **buffs, **dests); +;;; + +%include "reg_sizes.asm" + +%ifdef HAVE_AS_KNOWS_AVX512 + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + + %define tmp r11 + %define tmp.w r11d + %define tmp.b r11b + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r12 ; must be saved and restored + %define tmp5 r14 ; must be saved and restored + %define tmp6 r15 ; must be saved and restored + %define tmp7 rbp ; must be saved and restored + %define tmp8 rbx ; must be saved and restored + %define return rax + %define PS 8 + %define LOG_PS 3 + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + push r13 + push r14 + push r15 + push rbp + push rbx + %endmacro + %macro FUNC_RESTORE 0 + pop rbx + pop rbp + pop r15 + pop r14 + pop r13 + pop r12 + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + + %define arg4 r12 ; must be saved, loaded and restored + %define arg5 r15 ; must be saved and restored + %define tmp r11 + %define tmp.w r11d + %define tmp.b r11b + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r14 ; must be saved and restored + %define tmp5 rdi ; must be saved and restored + %define tmp6 rsi ; must be saved and restored + %define tmp7 rbp ; must be saved and restored + %define tmp8 rbx ; must be saved and restored + %define return rax + %define PS 8 + %define LOG_PS 3 + %define stack_size 10*16 + 9*8 ; must be an odd multiple of 8 + %define arg(x) [rsp + stack_size + PS + PS*x] + + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size + vmovdqa [rsp + 0*16], xmm6 + vmovdqa [rsp + 1*16], xmm7 + vmovdqa [rsp + 2*16], xmm8 + vmovdqa [rsp + 3*16], xmm9 + vmovdqa [rsp + 4*16], xmm10 + vmovdqa [rsp + 5*16], xmm11 + vmovdqa [rsp + 6*16], xmm12 + vmovdqa [rsp + 7*16], xmm13 + vmovdqa [rsp + 8*16], xmm14 + vmovdqa [rsp + 9*16], xmm15 + save_reg r12, 9*16 + 0*8 + save_reg r13, 9*16 + 1*8 + save_reg r14, 9*16 + 2*8 + save_reg r15, 9*16 + 3*8 + save_reg rdi, 9*16 + 4*8 + save_reg rsi, 9*16 + 5*8 + save_reg rbp, 9*16 + 6*8 + save_reg rbx, 9*16 + 7*8 + end_prolog + mov arg4, arg(4) + %endmacro + + %macro FUNC_RESTORE 0 + vmovdqa xmm6, [rsp + 0*16] + vmovdqa xmm7, [rsp + 1*16] + vmovdqa xmm8, [rsp + 2*16] + vmovdqa xmm9, [rsp + 3*16] + vmovdqa xmm10, [rsp + 4*16] + vmovdqa xmm11, [rsp + 5*16] + vmovdqa xmm12, [rsp + 6*16] + vmovdqa xmm13, [rsp + 7*16] + vmovdqa xmm14, [rsp + 8*16] + vmovdqa xmm15, [rsp + 9*16] + mov r12, [rsp + 9*16 + 0*8] + mov r13, [rsp + 9*16 + 1*8] + mov r14, [rsp + 9*16 + 2*8] + mov r15, [rsp + 9*16 + 3*8] + mov rdi, [rsp + 9*16 + 4*8] + mov rsi, [rsp + 9*16 + 5*8] + mov rbp, [rsp + 9*16 + 6*8] + mov rbx, [rsp + 9*16 + 7*8] + add rsp, stack_size + %endmacro +%endif + + +%define len arg0 +%define vec arg1 +%define mul_array arg2 +%define src arg3 +%define dest1 arg4 +%define ptr arg5 +%define vec_i tmp2 +%define dest2 tmp3 +%define dest3 tmp4 +%define dest4 tmp5 +%define vskip3 tmp6 +%define dest5 tmp7 +%define vskip1 tmp8 +%define pos return + + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu8 + %define XSTR vmovdqu8 +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + +%define xmask0f zmm20 +%define xgft1_lo zmm19 +%define xgft1_loy ymm19 +%define xgft1_hi zmm18 +%define xgft2_lo zmm17 +%define xgft2_loy ymm17 +%define xgft2_hi zmm16 +%define xgft3_lo zmm15 +%define xgft3_loy ymm15 +%define xgft3_hi zmm14 +%define xgft4_lo zmm13 +%define xgft4_loy ymm13 +%define xgft4_hi zmm12 +%define xgft5_lo zmm11 +%define xgft5_loy ymm11 +%define xgft5_hi zmm10 +%define xgft6_lo zmm9 +%define xgft6_loy ymm9 +%define xgft6_hi zmm8 + +%define x0 zmm0 +%define xtmpa zmm1 +%define xp1 zmm2 +%define xp2 zmm3 +%define xp3 zmm4 +%define xp4 zmm5 +%define xp5 zmm6 +%define xp6 zmm7 + +default rel +[bits 64] + +section .text + +align 16 +global gf_6vect_dot_prod_avx512:ISAL_SYM_TYPE_FUNCTION +func(gf_6vect_dot_prod_avx512) + FUNC_SAVE + sub len, 64 + jl .return_fail + + xor pos, pos + mov tmp, 0x0f + vpbroadcastb xmask0f, tmp ;Construct mask 0x0f0f0f... + mov vskip1, vec + imul vskip1, 32 + mov vskip3, vec + imul vskip3, 96 + sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS + mov dest2, [dest1+PS] + mov dest3, [dest1+2*PS] + mov dest4, [dest1+3*PS] + mov dest5, [dest1+4*PS] + +.loop64: + vpxorq xp1, xp1, xp1 + vpxorq xp2, xp2, xp2 + vpxorq xp3, xp3, xp3 + vpxorq xp4, xp4, xp4 + vpxorq xp5, xp5, xp5 + vpxorq xp6, xp6, xp6 + mov tmp, mul_array + xor vec_i, vec_i + +.next_vect: + mov ptr, [src+vec_i] + XLDR x0, [ptr+pos] ;Get next source vector + add vec_i, PS + + vpandq xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpandq x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + vmovdqu8 xgft1_loy, [tmp] ;Load array Ax{00}..{0f}, Ax{00}..{f0} + vmovdqu8 xgft2_loy, [tmp+vec*(32/PS)] ;Load array Bx{00}..{0f}, Bx{00}..{f0} + vmovdqu8 xgft3_loy, [tmp+vec*(64/PS)] ;Load array Cx{00}..{0f}, Cx{00}..{f0} + vmovdqu8 xgft4_loy, [tmp+vskip3] ;Load array Dx{00}..{0f}, Dx{00}..{f0} + vmovdqu8 xgft5_loy, [tmp+vskip1*4] ;Load array Ex{00}..{0f}, Ex{00}..{f0} + lea ptr, [vskip1 + vskip1*4] ;ptr = vskip5 + vmovdqu8 xgft6_loy, [tmp+ptr] ;Load array Fx{00}..{0f}, Fx{00}..{f0} + add tmp, 32 + + vshufi64x2 xgft1_hi, xgft1_lo, xgft1_lo, 0x55 + vshufi64x2 xgft1_lo, xgft1_lo, xgft1_lo, 0x00 + vshufi64x2 xgft2_hi, xgft2_lo, xgft2_lo, 0x55 + vshufi64x2 xgft2_lo, xgft2_lo, xgft2_lo, 0x00 + + vpshufb xgft1_hi, xgft1_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft1_lo, xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxorq xgft1_hi, xgft1_hi, xgft1_lo ;GF add high and low partials + vpxorq xp1, xp1, xgft1_hi ;xp1 += partial + + vpshufb xgft2_hi, xgft2_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft2_lo, xgft2_lo, xtmpa ;Lookup mul table of low nibble + vpxorq xgft2_hi, xgft2_hi, xgft2_lo ;GF add high and low partials + vpxorq xp2, xp2, xgft2_hi ;xp2 += partial + + vshufi64x2 xgft3_hi, xgft3_lo, xgft3_lo, 0x55 + vshufi64x2 xgft3_lo, xgft3_lo, xgft3_lo, 0x00 + vshufi64x2 xgft4_hi, xgft4_lo, xgft4_lo, 0x55 + vshufi64x2 xgft4_lo, xgft4_lo, xgft4_lo, 0x00 + + vpshufb xgft3_hi, xgft3_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft3_lo, xgft3_lo, xtmpa ;Lookup mul table of low nibble + vpxorq xgft3_hi, xgft3_hi, xgft3_lo ;GF add high and low partials + vpxorq xp3, xp3, xgft3_hi ;xp3 += partial + + vpshufb xgft4_hi, xgft4_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft4_lo, xgft4_lo, xtmpa ;Lookup mul table of low nibble + vpxorq xgft4_hi, xgft4_hi, xgft4_lo ;GF add high and low partials + vpxorq xp4, xp4, xgft4_hi ;xp4 += partial + + vshufi64x2 xgft5_hi, xgft5_lo, xgft5_lo, 0x55 + vshufi64x2 xgft5_lo, xgft5_lo, xgft5_lo, 0x00 + + vpshufb xgft5_hi, xgft5_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft5_lo, xgft5_lo, xtmpa ;Lookup mul table of low nibble + vpxorq xgft5_hi, xgft5_hi, xgft5_lo ;GF add high and low partials + vpxorq xp5, xp5, xgft5_hi ;xp5 += partial + + vshufi64x2 xgft6_hi, xgft6_lo, xgft6_lo, 0x55 + vshufi64x2 xgft6_lo, xgft6_lo, xgft6_lo, 0x00 + + vpshufb xgft6_hi, xgft6_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft6_lo, xgft6_lo, xtmpa ;Lookup mul table of low nibble + vpxorq xgft6_hi, xgft6_hi, xgft6_lo ;GF add high and low partials + vpxorq xp6, xp6, xgft6_hi ;x6 += partial + + cmp vec_i, vec + jl .next_vect + + mov ptr, [dest1] ;reuse ptr + mov tmp, [dest1+5*PS] ;reuse tmp + + XSTR [dest2+pos], xp2 + XSTR [dest3+pos], xp3 + XSTR [dest4+pos], xp4 + XSTR [dest5+pos], xp5 + + XSTR [ptr+pos], xp1 + XSTR [tmp+pos], xp6 + + add pos, 64 ;Loop on 64 bytes at a time + cmp pos, len + jle .loop64 + + lea tmp, [len + 64] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, len ;Overlapped offset length-64 + jmp .loop64 ;Do one more overlap pass + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame + +%else +%ifidn __OUTPUT_FORMAT__, win64 +global no_gf_6vect_dot_prod_avx512 +no_gf_6vect_dot_prod_avx512: +%endif +%endif ; ifdef HAVE_AS_KNOWS_AVX512 diff --git a/src/isa-l/erasure_code/gf_6vect_dot_prod_sse.asm b/src/isa-l/erasure_code/gf_6vect_dot_prod_sse.asm new file mode 100644 index 000000000..b62881136 --- /dev/null +++ b/src/isa-l/erasure_code/gf_6vect_dot_prod_sse.asm @@ -0,0 +1,315 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_6vect_dot_prod_sse(len, vec, *g_tbls, **buffs, **dests); +;;; + +%include "reg_sizes.asm" + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + + %define tmp r11 + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r12 ; must be saved and restored + %define tmp5 r14 ; must be saved and restored + %define tmp6 r15 ; must be saved and restored + %define return rax + %define PS 8 + %define LOG_PS 3 + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + push r13 + push r14 + push r15 + %endmacro + %macro FUNC_RESTORE 0 + pop r15 + pop r14 + pop r13 + pop r12 + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + + %define arg4 r12 ; must be saved, loaded and restored + %define arg5 r15 ; must be saved and restored + %define tmp r11 + %define tmp2 r10 + %define tmp3 r13 ; must be saved and restored + %define tmp4 r14 ; must be saved and restored + %define tmp5 rdi ; must be saved and restored + %define tmp6 rsi ; must be saved and restored + %define return rax + %define PS 8 + %define LOG_PS 3 + %define stack_size 10*16 + 7*8 ; must be an odd multiple of 8 + %define arg(x) [rsp + stack_size + PS + PS*x] + + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size + save_xmm128 xmm6, 0*16 + save_xmm128 xmm7, 1*16 + save_xmm128 xmm8, 2*16 + save_xmm128 xmm9, 3*16 + save_xmm128 xmm10, 4*16 + save_xmm128 xmm11, 5*16 + save_xmm128 xmm12, 6*16 + save_xmm128 xmm13, 7*16 + save_xmm128 xmm14, 8*16 + save_xmm128 xmm15, 9*16 + save_reg r12, 10*16 + 0*8 + save_reg r13, 10*16 + 1*8 + save_reg r14, 10*16 + 2*8 + save_reg r15, 10*16 + 3*8 + save_reg rdi, 10*16 + 4*8 + save_reg rsi, 10*16 + 5*8 + end_prolog + mov arg4, arg(4) + %endmacro + + %macro FUNC_RESTORE 0 + movdqa xmm6, [rsp + 0*16] + movdqa xmm7, [rsp + 1*16] + movdqa xmm8, [rsp + 2*16] + movdqa xmm9, [rsp + 3*16] + movdqa xmm10, [rsp + 4*16] + movdqa xmm11, [rsp + 5*16] + movdqa xmm12, [rsp + 6*16] + movdqa xmm13, [rsp + 7*16] + movdqa xmm14, [rsp + 8*16] + movdqa xmm15, [rsp + 9*16] + mov r12, [rsp + 10*16 + 0*8] + mov r13, [rsp + 10*16 + 1*8] + mov r14, [rsp + 10*16 + 2*8] + mov r15, [rsp + 10*16 + 3*8] + mov rdi, [rsp + 10*16 + 4*8] + mov rsi, [rsp + 10*16 + 5*8] + add rsp, stack_size + %endmacro +%endif + +%define len arg0 +%define vec arg1 +%define mul_array arg2 +%define src arg3 +%define dest arg4 +%define ptr arg5 +%define vec_i tmp2 +%define dest1 tmp3 +%define dest2 tmp4 +%define vskip1 tmp5 +%define vskip3 tmp6 +%define pos return + + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR movdqu + %define XSTR movdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR movdqa + %define XSTR movdqa + %else + %define XLDR movntdqa + %define XSTR movntdq + %endif +%endif + + +default rel + +[bits 64] +section .text + +%define xmask0f xmm15 +%define xgft1_lo xmm2 +%define xgft1_hi xmm3 +%define xgft2_lo xmm4 +%define xgft2_hi xmm5 +%define xgft3_lo xmm6 +%define xgft3_hi xmm7 +%define x0 xmm0 +%define xtmpa xmm1 +%define xp1 xmm8 +%define xp2 xmm9 +%define xp3 xmm10 +%define xp4 xmm11 +%define xp5 xmm12 +%define xp6 xmm13 + +align 16 +global gf_6vect_dot_prod_sse:ISAL_SYM_TYPE_FUNCTION +func(gf_6vect_dot_prod_sse) + FUNC_SAVE + sub len, 16 + jl .return_fail + xor pos, pos + movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte + mov vskip1, vec + imul vskip1, 32 + mov vskip3, vec + imul vskip3, 96 + sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS + mov dest1, [dest] + mov dest2, [dest+PS] + + +.loop16: + mov tmp, mul_array + xor vec_i, vec_i + pxor xp1, xp1 + pxor xp2, xp2 + pxor xp3, xp3 + pxor xp4, xp4 + pxor xp5, xp5 + pxor xp6, xp6 + +.next_vect: + mov ptr, [src+vec_i] + add vec_i, PS + XLDR x0, [ptr+pos] ;Get next source vector + + movdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f} + movdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0} + movdqu xgft2_lo, [tmp+vskip1*1] ;Load array Bx{00}, Bx{01}, ..., Bx{0f} + movdqu xgft2_hi, [tmp+vskip1*1+16] ; " Bx{00}, Bx{10}, ..., Bx{f0} + movdqu xgft3_lo, [tmp+vskip1*2] ;Load array Cx{00}, Cx{01}, ..., Cx{0f} + movdqu xgft3_hi, [tmp+vskip1*2+16] ; " Cx{00}, Cx{10}, ..., Cx{f0} + lea ptr, [vskip1 + vskip1*4] ;ptr = vskip5 + + movdqa xtmpa, x0 ;Keep unshifted copy of src + psraw x0, 4 ;Shift to put high nibble into bits 4-0 + pand x0, xmask0f ;Mask high src nibble in bits 4-0 + pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0 + + pshufb xgft1_hi, x0 ;Lookup mul table of high nibble + pshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble + pxor xgft1_hi, xgft1_lo ;GF add high and low partials + pxor xp1, xgft1_hi ;xp1 += partial + + pshufb xgft2_hi, x0 ;Lookup mul table of high nibble + pshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble + pxor xgft2_hi, xgft2_lo ;GF add high and low partials + pxor xp2, xgft2_hi ;xp2 += partial + + pshufb xgft3_hi, x0 ;Lookup mul table of high nibble + pshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble + pxor xgft3_hi, xgft3_lo ;GF add high and low partials + pxor xp3, xgft3_hi ;xp3 += partial + + + movdqu xgft1_lo, [tmp+vskip3] ;Load array Dx{00}, Dx{01}, ..., Dx{0f} + movdqu xgft1_hi, [tmp+vskip3+16] ; " Dx{00}, Dx{10}, ..., Dx{f0} + movdqu xgft2_lo, [tmp+vskip1*4] ;Load array Ex{00}, Ex{01}, ..., Ex{0f} + movdqu xgft2_hi, [tmp+vskip1*4+16] ; " Ex{00}, Ex{10}, ..., Ex{f0} + movdqu xgft3_lo, [tmp+ptr] ;Load array Fx{00}, Fx{01}, ..., Fx{0f} + movdqu xgft3_hi, [tmp+ptr+16] ; " Fx{00}, Fx{10}, ..., Fx{f0} + add tmp, 32 + + + pshufb xgft1_hi, x0 ;Lookup mul table of high nibble + pshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble + pxor xgft1_hi, xgft1_lo ;GF add high and low partials + pxor xp4, xgft1_hi ;xp4 += partial + + pshufb xgft2_hi, x0 ;Lookup mul table of high nibble + pshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble + pxor xgft2_hi, xgft2_lo ;GF add high and low partials + pxor xp5, xgft2_hi ;xp5 += partial + + pshufb xgft3_hi, x0 ;Lookup mul table of high nibble + pshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble + pxor xgft3_hi, xgft3_lo ;GF add high and low partials + pxor xp6, xgft3_hi ;xp6 += partial + + cmp vec_i, vec + jl .next_vect + + + mov tmp, [dest+2*PS] + mov ptr, [dest+3*PS] + mov vec_i, [dest+4*PS] + + XSTR [dest1+pos], xp1 + XSTR [dest2+pos], xp2 + XSTR [tmp+pos], xp3 + mov tmp, [dest+5*PS] + XSTR [ptr+pos], xp4 + XSTR [vec_i+pos], xp5 + XSTR [tmp+pos], xp6 + + add pos, 16 ;Loop on 16 bytes at a time + cmp pos, len + jle .loop16 + + lea tmp, [len + 16] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, len ;Overlapped offset length-16 + jmp .loop16 ;Do one more overlap pass + +.return_pass: + FUNC_RESTORE + mov return, 0 + ret + +.return_fail: + FUNC_RESTORE + mov return, 1 + ret + +endproc_frame + +section .data + +align 16 +mask0f: dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f + +;;; func core, ver, snum +slversion gf_6vect_dot_prod_sse, 00, 05, 0066 diff --git a/src/isa-l/erasure_code/gf_6vect_dot_prod_sse_test.c b/src/isa-l/erasure_code/gf_6vect_dot_prod_sse_test.c new file mode 100644 index 000000000..96f67f19a --- /dev/null +++ b/src/isa-l/erasure_code/gf_6vect_dot_prod_sse_test.c @@ -0,0 +1,911 @@ +/********************************************************************** + Copyright(c) 2011-2015 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include // for memset, memcmp +#include "erasure_code.h" +#include "types.h" + +#ifndef FUNCTION_UNDER_TEST +# define FUNCTION_UNDER_TEST gf_6vect_dot_prod_sse +#endif +#ifndef TEST_MIN_SIZE +# define TEST_MIN_SIZE 16 +#endif + +#define str(s) #s +#define xstr(s) str(s) + +#define TEST_LEN 8192 +#define TEST_SIZE (TEST_LEN/2) +#define TEST_MEM TEST_SIZE +#define TEST_LOOPS 20000 +#define TEST_TYPE_STR "" + +#ifndef TEST_SOURCES +# define TEST_SOURCES 16 +#endif +#ifndef RANDOMS +# define RANDOMS 20 +#endif + +#ifdef EC_ALIGNED_ADDR +// Define power of 2 range to check ptr, len alignment +# define PTR_ALIGN_CHK_B 0 +# define LEN_ALIGN_CHK_B 0 // 0 for aligned only +#else +// Define power of 2 range to check ptr, len alignment +# define PTR_ALIGN_CHK_B 32 +# define LEN_ALIGN_CHK_B 32 // 0 for aligned only +#endif + +typedef unsigned char u8; + +void dump(unsigned char *buf, int len) +{ + int i; + for (i = 0; i < len;) { + printf(" %2x", 0xff & buf[i++]); + if (i % 32 == 0) + printf("\n"); + } + printf("\n"); +} + +void dump_matrix(unsigned char **s, int k, int m) +{ + int i, j; + for (i = 0; i < k; i++) { + for (j = 0; j < m; j++) { + printf(" %2x", s[i][j]); + } + printf("\n"); + } + printf("\n"); +} + +void dump_u8xu8(unsigned char *s, int k, int m) +{ + int i, j; + for (i = 0; i < k; i++) { + for (j = 0; j < m; j++) { + printf(" %2x", 0xff & s[j + (i * m)]); + } + printf("\n"); + } + printf("\n"); +} + +int main(int argc, char *argv[]) +{ + int i, j, rtest, srcs; + void *buf; + u8 g1[TEST_SOURCES], g2[TEST_SOURCES], g3[TEST_SOURCES]; + u8 g4[TEST_SOURCES], g5[TEST_SOURCES], g6[TEST_SOURCES], *g_tbls; + u8 *dest1, *dest2, *dest3, *dest4, *dest5, *dest6, *dest_ref1; + u8 *dest_ref2, *dest_ref3, *dest_ref4, *dest_ref5, *dest_ref6; + u8 *dest_ptrs[6], *buffs[TEST_SOURCES]; + + int align, size; + unsigned char *efence_buffs[TEST_SOURCES]; + unsigned int offset; + u8 *ubuffs[TEST_SOURCES]; + u8 *udest_ptrs[6]; + printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d ", TEST_SOURCES, TEST_LEN); + + // Allocate the arrays + for (i = 0; i < TEST_SOURCES; i++) { + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + buffs[i] = buf; + } + + if (posix_memalign(&buf, 16, 2 * (6 * TEST_SOURCES * 32))) { + printf("alloc error: Fail"); + return -1; + } + g_tbls = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest1 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest2 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest3 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest4 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest5 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest6 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest_ref1 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest_ref2 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest_ref3 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest_ref4 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest_ref5 = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest_ref6 = buf; + + dest_ptrs[0] = dest1; + dest_ptrs[1] = dest2; + dest_ptrs[2] = dest3; + dest_ptrs[3] = dest4; + dest_ptrs[4] = dest5; + dest_ptrs[5] = dest6; + + // Test of all zeros + for (i = 0; i < TEST_SOURCES; i++) + memset(buffs[i], 0, TEST_LEN); + + memset(dest1, 0, TEST_LEN); + memset(dest2, 0, TEST_LEN); + memset(dest3, 0, TEST_LEN); + memset(dest4, 0, TEST_LEN); + memset(dest5, 0, TEST_LEN); + memset(dest6, 0, TEST_LEN); + memset(dest_ref1, 0, TEST_LEN); + memset(dest_ref2, 0, TEST_LEN); + memset(dest_ref3, 0, TEST_LEN); + memset(dest_ref4, 0, TEST_LEN); + memset(dest_ref5, 0, TEST_LEN); + memset(dest_ref6, 0, TEST_LEN); + memset(g1, 2, TEST_SOURCES); + memset(g2, 1, TEST_SOURCES); + memset(g3, 7, TEST_SOURCES); + memset(g4, 9, TEST_SOURCES); + memset(g5, 4, TEST_SOURCES); + memset(g6, 0xe6, TEST_SOURCES); + + for (i = 0; i < TEST_SOURCES; i++) { + gf_vect_mul_init(g1[i], &g_tbls[i * 32]); + gf_vect_mul_init(g2[i], &g_tbls[32 * TEST_SOURCES + i * 32]); + gf_vect_mul_init(g3[i], &g_tbls[64 * TEST_SOURCES + i * 32]); + gf_vect_mul_init(g4[i], &g_tbls[96 * TEST_SOURCES + i * 32]); + gf_vect_mul_init(g5[i], &g_tbls[128 * TEST_SOURCES + i * 32]); + gf_vect_mul_init(g6[i], &g_tbls[160 * TEST_SOURCES + i * 32]); + } + + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1); + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], buffs, + dest_ref2); + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], buffs, + dest_ref3); + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES], buffs, + dest_ref4); + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[128 * TEST_SOURCES], buffs, + dest_ref5); + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[160 * TEST_SOURCES], buffs, + dest_ref6); + + FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs); + + if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) { + printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test1\n"); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref1, 25); + printf("dprod_dut:"); + dump(dest1, 25); + return -1; + } + if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) { + printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test2\n"); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref2, 25); + printf("dprod_dut:"); + dump(dest2, 25); + return -1; + } + if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) { + printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test3\n"); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref3, 25); + printf("dprod_dut:"); + dump(dest3, 25); + return -1; + } + if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) { + printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test4\n"); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref4, 25); + printf("dprod_dut:"); + dump(dest4, 25); + return -1; + } + if (0 != memcmp(dest_ref5, dest5, TEST_LEN)) { + printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test5\n"); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref5, 25); + printf("dprod_dut:"); + dump(dest5, 25); + return -1; + } + if (0 != memcmp(dest_ref6, dest6, TEST_LEN)) { + printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test6\n"); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref6, 25); + printf("dprod_dut:"); + dump(dest6, 25); + return -1; + } + putchar('.'); + + // Rand data test + + for (rtest = 0; rtest < RANDOMS; rtest++) { + for (i = 0; i < TEST_SOURCES; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + for (i = 0; i < TEST_SOURCES; i++) { + g1[i] = rand(); + g2[i] = rand(); + g3[i] = rand(); + g4[i] = rand(); + g5[i] = rand(); + g6[i] = rand(); + } + + for (i = 0; i < TEST_SOURCES; i++) { + gf_vect_mul_init(g1[i], &g_tbls[i * 32]); + gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]); + gf_vect_mul_init(g3[i], &g_tbls[(64 * TEST_SOURCES) + (i * 32)]); + gf_vect_mul_init(g4[i], &g_tbls[(96 * TEST_SOURCES) + (i * 32)]); + gf_vect_mul_init(g5[i], &g_tbls[(128 * TEST_SOURCES) + (i * 32)]); + gf_vect_mul_init(g6[i], &g_tbls[(160 * TEST_SOURCES) + (i * 32)]); + } + + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1); + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], + buffs, dest_ref2); + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], + buffs, dest_ref3); + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES], + buffs, dest_ref4); + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[128 * TEST_SOURCES], + buffs, dest_ref5); + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[160 * TEST_SOURCES], + buffs, dest_ref6); + + FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs); + + if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref1, 25); + printf("dprod_dut:"); + dump(dest1, 25); + return -1; + } + if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref2, 25); + printf("dprod_dut:"); + dump(dest2, 25); + return -1; + } + if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test3 %d\n", rtest); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref3, 25); + printf("dprod_dut:"); + dump(dest3, 25); + return -1; + } + if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test4 %d\n", rtest); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref4, 25); + printf("dprod_dut:"); + dump(dest4, 25); + return -1; + } + if (0 != memcmp(dest_ref5, dest5, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test5 %d\n", rtest); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref5, 25); + printf("dprod_dut:"); + dump(dest5, 25); + return -1; + } + if (0 != memcmp(dest_ref6, dest6, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test6 %d\n", rtest); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref6, 25); + printf("dprod_dut:"); + dump(dest6, 25); + return -1; + } + + putchar('.'); + } + + // Rand data test with varied parameters + for (rtest = 0; rtest < RANDOMS; rtest++) { + for (srcs = TEST_SOURCES; srcs > 0; srcs--) { + for (i = 0; i < srcs; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + for (i = 0; i < srcs; i++) { + g1[i] = rand(); + g2[i] = rand(); + g3[i] = rand(); + g4[i] = rand(); + g5[i] = rand(); + g6[i] = rand(); + } + + for (i = 0; i < srcs; i++) { + gf_vect_mul_init(g1[i], &g_tbls[i * 32]); + gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]); + gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]); + gf_vect_mul_init(g4[i], &g_tbls[(96 * srcs) + (i * 32)]); + gf_vect_mul_init(g5[i], &g_tbls[(128 * srcs) + (i * 32)]); + gf_vect_mul_init(g6[i], &g_tbls[(160 * srcs) + (i * 32)]); + } + + gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[0], buffs, dest_ref1); + gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[32 * srcs], buffs, + dest_ref2); + gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[64 * srcs], buffs, + dest_ref3); + gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[96 * srcs], buffs, + dest_ref4); + gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[128 * srcs], buffs, + dest_ref5); + gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[160 * srcs], buffs, + dest_ref6); + + FUNCTION_UNDER_TEST(TEST_LEN, srcs, g_tbls, buffs, dest_ptrs); + + if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) + " test1 srcs=%d\n", srcs); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref1, 25); + printf("dprod_dut:"); + dump(dest1, 25); + return -1; + } + if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) + " test2 srcs=%d\n", srcs); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref2, 25); + printf("dprod_dut:"); + dump(dest2, 25); + return -1; + } + if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) + " test3 srcs=%d\n", srcs); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref3, 25); + printf("dprod_dut:"); + dump(dest3, 25); + return -1; + } + if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) + " test4 srcs=%d\n", srcs); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref4, 25); + printf("dprod_dut:"); + dump(dest4, 25); + return -1; + } + if (0 != memcmp(dest_ref5, dest5, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) + " test5 srcs=%d\n", srcs); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref5, 25); + printf("dprod_dut:"); + dump(dest5, 25); + return -1; + } + if (0 != memcmp(dest_ref6, dest6, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) + " test6 srcs=%d\n", srcs); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref6, 25); + printf("dprod_dut:"); + dump(dest6, 25); + return -1; + } + + putchar('.'); + } + } + + // Run tests at end of buffer for Electric Fence + align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16; + for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) { + for (i = 0; i < TEST_SOURCES; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + for (i = 0; i < TEST_SOURCES; i++) // Line up TEST_SIZE from end + efence_buffs[i] = buffs[i] + TEST_LEN - size; + + for (i = 0; i < TEST_SOURCES; i++) { + g1[i] = rand(); + g2[i] = rand(); + g3[i] = rand(); + g4[i] = rand(); + g5[i] = rand(); + g6[i] = rand(); + } + + for (i = 0; i < TEST_SOURCES; i++) { + gf_vect_mul_init(g1[i], &g_tbls[i * 32]); + gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]); + gf_vect_mul_init(g3[i], &g_tbls[(64 * TEST_SOURCES) + (i * 32)]); + gf_vect_mul_init(g4[i], &g_tbls[(96 * TEST_SOURCES) + (i * 32)]); + gf_vect_mul_init(g5[i], &g_tbls[(128 * TEST_SOURCES) + (i * 32)]); + gf_vect_mul_init(g6[i], &g_tbls[(160 * TEST_SOURCES) + (i * 32)]); + } + + gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[0], efence_buffs, dest_ref1); + gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], + efence_buffs, dest_ref2); + gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], + efence_buffs, dest_ref3); + gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES], + efence_buffs, dest_ref4); + gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[128 * TEST_SOURCES], + efence_buffs, dest_ref5); + gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[160 * TEST_SOURCES], + efence_buffs, dest_ref6); + + FUNCTION_UNDER_TEST(size, TEST_SOURCES, g_tbls, efence_buffs, dest_ptrs); + + if (0 != memcmp(dest_ref1, dest1, size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest); + dump_matrix(efence_buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref1, align); + printf("dprod_dut:"); + dump(dest1, align); + return -1; + } + + if (0 != memcmp(dest_ref2, dest2, size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest); + dump_matrix(efence_buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref2, align); + printf("dprod_dut:"); + dump(dest2, align); + return -1; + } + + if (0 != memcmp(dest_ref3, dest3, size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test3 %d\n", rtest); + dump_matrix(efence_buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref3, align); + printf("dprod_dut:"); + dump(dest3, align); + return -1; + } + + if (0 != memcmp(dest_ref4, dest4, size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test4 %d\n", rtest); + dump_matrix(efence_buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref4, align); + printf("dprod_dut:"); + dump(dest4, align); + return -1; + } + + if (0 != memcmp(dest_ref5, dest5, size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test5 %d\n", rtest); + dump_matrix(efence_buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref5, align); + printf("dprod_dut:"); + dump(dest5, align); + return -1; + } + + if (0 != memcmp(dest_ref6, dest6, size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test6 %d\n", rtest); + dump_matrix(efence_buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref6, align); + printf("dprod_dut:"); + dump(dest6, align); + return -1; + } + + putchar('.'); + } + + // Test rand ptr alignment if available + + for (rtest = 0; rtest < RANDOMS; rtest++) { + size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1); + srcs = rand() % TEST_SOURCES; + if (srcs == 0) + continue; + + offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B; + // Add random offsets + for (i = 0; i < srcs; i++) + ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset)); + + udest_ptrs[0] = dest1 + (rand() & (PTR_ALIGN_CHK_B - offset)); + udest_ptrs[1] = dest2 + (rand() & (PTR_ALIGN_CHK_B - offset)); + udest_ptrs[2] = dest3 + (rand() & (PTR_ALIGN_CHK_B - offset)); + udest_ptrs[3] = dest4 + (rand() & (PTR_ALIGN_CHK_B - offset)); + udest_ptrs[4] = dest5 + (rand() & (PTR_ALIGN_CHK_B - offset)); + udest_ptrs[5] = dest6 + (rand() & (PTR_ALIGN_CHK_B - offset)); + + memset(dest1, 0, TEST_LEN); // zero pad to check write-over + memset(dest2, 0, TEST_LEN); + memset(dest3, 0, TEST_LEN); + memset(dest4, 0, TEST_LEN); + memset(dest5, 0, TEST_LEN); + memset(dest6, 0, TEST_LEN); + + for (i = 0; i < srcs; i++) + for (j = 0; j < size; j++) + ubuffs[i][j] = rand(); + + for (i = 0; i < srcs; i++) { + g1[i] = rand(); + g2[i] = rand(); + g3[i] = rand(); + g4[i] = rand(); + g5[i] = rand(); + g6[i] = rand(); + } + + for (i = 0; i < srcs; i++) { + gf_vect_mul_init(g1[i], &g_tbls[i * 32]); + gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]); + gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]); + gf_vect_mul_init(g4[i], &g_tbls[(96 * srcs) + (i * 32)]); + gf_vect_mul_init(g5[i], &g_tbls[(128 * srcs) + (i * 32)]); + gf_vect_mul_init(g6[i], &g_tbls[(160 * srcs) + (i * 32)]); + } + + gf_vect_dot_prod_base(size, srcs, &g_tbls[0], ubuffs, dest_ref1); + gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], ubuffs, dest_ref2); + gf_vect_dot_prod_base(size, srcs, &g_tbls[64 * srcs], ubuffs, dest_ref3); + gf_vect_dot_prod_base(size, srcs, &g_tbls[96 * srcs], ubuffs, dest_ref4); + gf_vect_dot_prod_base(size, srcs, &g_tbls[128 * srcs], ubuffs, dest_ref5); + gf_vect_dot_prod_base(size, srcs, &g_tbls[160 * srcs], ubuffs, dest_ref6); + + FUNCTION_UNDER_TEST(size, srcs, g_tbls, ubuffs, udest_ptrs); + + if (memcmp(dest_ref1, udest_ptrs[0], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", + srcs); + dump_matrix(ubuffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref1, 25); + printf("dprod_dut:"); + dump(udest_ptrs[0], 25); + return -1; + } + if (memcmp(dest_ref2, udest_ptrs[1], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", + srcs); + dump_matrix(ubuffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref2, 25); + printf("dprod_dut:"); + dump(udest_ptrs[1], 25); + return -1; + } + if (memcmp(dest_ref3, udest_ptrs[2], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", + srcs); + dump_matrix(ubuffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref3, 25); + printf("dprod_dut:"); + dump(udest_ptrs[2], 25); + return -1; + } + if (memcmp(dest_ref4, udest_ptrs[3], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", + srcs); + dump_matrix(ubuffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref4, 25); + printf("dprod_dut:"); + dump(udest_ptrs[3], 25); + return -1; + } + if (memcmp(dest_ref5, udest_ptrs[4], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", + srcs); + dump_matrix(ubuffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref5, 25); + printf("dprod_dut:"); + dump(udest_ptrs[4], 25); + return -1; + } + if (memcmp(dest_ref6, udest_ptrs[5], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n", + srcs); + dump_matrix(ubuffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref6, 25); + printf("dprod_dut:"); + dump(udest_ptrs[5], 25); + return -1; + } + // Confirm that padding around dests is unchanged + memset(dest_ref1, 0, PTR_ALIGN_CHK_B); // Make reference zero buff + offset = udest_ptrs[0] - dest1; + + if (memcmp(dest1, dest_ref1, offset)) { + printf("Fail rand ualign pad1 start\n"); + return -1; + } + if (memcmp(dest1 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { + printf("Fail rand ualign pad1 end\n"); + return -1; + } + + offset = udest_ptrs[1] - dest2; + if (memcmp(dest2, dest_ref1, offset)) { + printf("Fail rand ualign pad2 start\n"); + return -1; + } + if (memcmp(dest2 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { + printf("Fail rand ualign pad2 end\n"); + return -1; + } + + offset = udest_ptrs[2] - dest3; + if (memcmp(dest3, dest_ref1, offset)) { + printf("Fail rand ualign pad3 start\n"); + return -1; + } + if (memcmp(dest3 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { + printf("Fail rand ualign pad3 end\n"); + return -1; + } + + offset = udest_ptrs[3] - dest4; + if (memcmp(dest4, dest_ref1, offset)) { + printf("Fail rand ualign pad4 start\n"); + return -1; + } + if (memcmp(dest4 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { + printf("Fail rand ualign pad4 end\n"); + return -1; + } + + offset = udest_ptrs[4] - dest5; + if (memcmp(dest5, dest_ref1, offset)) { + printf("Fail rand ualign pad5 start\n"); + return -1; + } + if (memcmp(dest5 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { + printf("Fail rand ualign pad5 end\n"); + return -1; + } + + offset = udest_ptrs[5] - dest6; + if (memcmp(dest6, dest_ref1, offset)) { + printf("Fail rand ualign pad6 start\n"); + return -1; + } + if (memcmp(dest6 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) { + printf("Fail rand ualign pad6 end\n"); + return -1; + } + + putchar('.'); + } + + // Test all size alignment + align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16; + + for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) { + srcs = TEST_SOURCES; + + for (i = 0; i < srcs; i++) + for (j = 0; j < size; j++) + buffs[i][j] = rand(); + + for (i = 0; i < srcs; i++) { + g1[i] = rand(); + g2[i] = rand(); + g3[i] = rand(); + g4[i] = rand(); + g5[i] = rand(); + g6[i] = rand(); + } + + for (i = 0; i < srcs; i++) { + gf_vect_mul_init(g1[i], &g_tbls[i * 32]); + gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]); + gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]); + gf_vect_mul_init(g4[i], &g_tbls[(96 * srcs) + (i * 32)]); + gf_vect_mul_init(g5[i], &g_tbls[(128 * srcs) + (i * 32)]); + gf_vect_mul_init(g6[i], &g_tbls[(160 * srcs) + (i * 32)]); + } + + gf_vect_dot_prod_base(size, srcs, &g_tbls[0], buffs, dest_ref1); + gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], buffs, dest_ref2); + gf_vect_dot_prod_base(size, srcs, &g_tbls[64 * srcs], buffs, dest_ref3); + gf_vect_dot_prod_base(size, srcs, &g_tbls[96 * srcs], buffs, dest_ref4); + gf_vect_dot_prod_base(size, srcs, &g_tbls[128 * srcs], buffs, dest_ref5); + gf_vect_dot_prod_base(size, srcs, &g_tbls[160 * srcs], buffs, dest_ref6); + + FUNCTION_UNDER_TEST(size, srcs, g_tbls, buffs, dest_ptrs); + + if (memcmp(dest_ref1, dest_ptrs[0], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", + size); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref1, 25); + printf("dprod_dut:"); + dump(dest_ptrs[0], 25); + return -1; + } + if (memcmp(dest_ref2, dest_ptrs[1], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", + size); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref2, 25); + printf("dprod_dut:"); + dump(dest_ptrs[1], 25); + return -1; + } + if (memcmp(dest_ref3, dest_ptrs[2], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", + size); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref3, 25); + printf("dprod_dut:"); + dump(dest_ptrs[2], 25); + return -1; + } + if (memcmp(dest_ref4, dest_ptrs[3], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", + size); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref4, 25); + printf("dprod_dut:"); + dump(dest_ptrs[3], 25); + return -1; + } + if (memcmp(dest_ref5, dest_ptrs[4], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", + size); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref5, 25); + printf("dprod_dut:"); + dump(dest_ptrs[4], 25); + return -1; + } + if (memcmp(dest_ref6, dest_ptrs[5], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n", + size); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref6, 25); + printf("dprod_dut:"); + dump(dest_ptrs[5], 25); + return -1; + } + } + + printf("Pass\n"); + return 0; + +} diff --git a/src/isa-l/erasure_code/gf_6vect_mad_avx.asm b/src/isa-l/erasure_code/gf_6vect_mad_avx.asm new file mode 100644 index 000000000..f2e04cd70 --- /dev/null +++ b/src/isa-l/erasure_code/gf_6vect_mad_avx.asm @@ -0,0 +1,394 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_6vect_mad_avx(len, vec, vec_i, mul_array, src, dest); +;;; + +%include "reg_sizes.asm" + +%define PS 8 + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg0.w ecx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + %define arg4 r12 + %define arg5 r15 + %define tmp r11 + %define tmp2 r10 + %define tmp3 r13 + %define tmp4 r14 + %define tmp5 rdi + %define return rax + %define return.w eax + %define stack_size 16*10 + 5*8 + %define arg(x) [rsp + stack_size + PS + PS*x] + %define func(x) proc_frame x + +%macro FUNC_SAVE 0 + sub rsp, stack_size + movdqa [rsp+16*0],xmm6 + movdqa [rsp+16*1],xmm7 + movdqa [rsp+16*2],xmm8 + movdqa [rsp+16*3],xmm9 + movdqa [rsp+16*4],xmm10 + movdqa [rsp+16*5],xmm11 + movdqa [rsp+16*6],xmm12 + movdqa [rsp+16*7],xmm13 + movdqa [rsp+16*8],xmm14 + movdqa [rsp+16*9],xmm15 + save_reg r12, 10*16 + 0*8 + save_reg r13, 10*16 + 1*8 + save_reg r14, 10*16 + 2*8 + save_reg r15, 10*16 + 3*8 + save_reg rdi, 10*16 + 4*8 + end_prolog + mov arg4, arg(4) + mov arg5, arg(5) +%endmacro + +%macro FUNC_RESTORE 0 + movdqa xmm6, [rsp+16*0] + movdqa xmm7, [rsp+16*1] + movdqa xmm8, [rsp+16*2] + movdqa xmm9, [rsp+16*3] + movdqa xmm10, [rsp+16*4] + movdqa xmm11, [rsp+16*5] + movdqa xmm12, [rsp+16*6] + movdqa xmm13, [rsp+16*7] + movdqa xmm14, [rsp+16*8] + movdqa xmm15, [rsp+16*9] + mov r12, [rsp + 10*16 + 0*8] + mov r13, [rsp + 10*16 + 1*8] + mov r14, [rsp + 10*16 + 2*8] + mov r15, [rsp + 10*16 + 3*8] + mov rdi, [rsp + 10*16 + 4*8] + add rsp, stack_size +%endmacro + +%elifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg0.w edi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + %define tmp r11 + %define tmp2 r10 + %define tmp3 r12 + %define tmp4 r13 + %define tmp5 r14 + %define return rax + %define return.w eax + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + push r13 + push r14 + %endmacro + %macro FUNC_RESTORE 0 + pop r14 + pop r13 + pop r12 + %endmacro +%endif + +;;; gf_6vect_mad_avx(len, vec, vec_i, mul_array, src, dest) +%define len arg0 +%define len.w arg0.w +%define vec arg1 +%define vec_i arg2 +%define mul_array arg3 +%define src arg4 +%define dest1 arg5 +%define pos return +%define pos.w return.w + +%define dest2 tmp4 +%define dest3 tmp2 +%define dest4 mul_array +%define dest5 tmp5 +%define dest6 vec_i + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu + %define XSTR vmovdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + + +default rel + +[bits 64] +section .text + +%define xmask0f xmm15 +%define xgft4_lo xmm14 +%define xgft4_hi xmm13 +%define xgft5_lo xmm12 +%define xgft5_hi xmm11 +%define xgft6_lo xmm10 +%define xgft6_hi xmm9 + +%define x0 xmm0 +%define xtmpa xmm1 +%define xtmph1 xmm2 +%define xtmpl1 xmm3 +%define xtmph2 xmm4 +%define xtmpl2 xmm5 +%define xtmph3 xmm6 +%define xtmpl3 xmm7 +%define xd1 xmm8 +%define xd2 xtmpl1 +%define xd3 xtmph1 + + +align 16 +global gf_6vect_mad_avx:ISAL_SYM_TYPE_FUNCTION +func(gf_6vect_mad_avx) + FUNC_SAVE + sub len, 16 + jl .return_fail + xor pos, pos + vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte + mov tmp, vec + sal vec_i, 5 ;Multiply by 32 + lea tmp3, [mul_array + vec_i] + sal tmp, 6 ;Multiply by 64 + + sal vec, 5 ;Multiply by 32 + lea vec_i, [tmp + vec] ;vec_i = vec*96 + lea mul_array, [tmp + vec_i] ;mul_array = vec*160 + + vmovdqu xgft5_lo, [tmp3+2*tmp] ;Load array Ex{00}, Ex{01}, ..., Ex{0f} + vmovdqu xgft5_hi, [tmp3+2*tmp+16] ; " Ex{00}, Ex{10}, ..., Ex{f0} + vmovdqu xgft4_lo, [tmp3+vec_i] ;Load array Dx{00}, Dx{01}, Dx{02}, ... + vmovdqu xgft4_hi, [tmp3+vec_i+16] ; " Dx{00}, Dx{10}, Dx{20}, ... , Dx{f0} + vmovdqu xgft6_lo, [tmp3+mul_array] ;Load array Fx{00}, Fx{01}, ..., Fx{0f} + vmovdqu xgft6_hi, [tmp3+mul_array+16] ; " Fx{00}, Fx{10}, ..., Fx{f0} + + mov dest2, [dest1+PS] + mov dest3, [dest1+2*PS] + mov dest4, [dest1+3*PS] ; reuse mul_array + mov dest5, [dest1+4*PS] + mov dest6, [dest1+5*PS] ; reuse vec_i + mov dest1, [dest1] + +.loop16: + XLDR x0, [src+pos] ;Get next source vector + + vmovdqu xtmpl1, [tmp3] ;Load array Ax{00}, Ax{01}, Ax{02}, ... + vmovdqu xtmph1, [tmp3+16] ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0} + vmovdqu xtmpl2, [tmp3+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ... + vmovdqu xtmph2, [tmp3+vec+16] ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0} + vmovdqu xtmpl3, [tmp3+2*vec] ;Load array Cx{00}, Cx{01}, Cx{02}, ... + vmovdqu xtmph3, [tmp3+2*vec+16] ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0} + XLDR xd1, [dest1+pos] ;Get next dest vector + + vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + + ;dest1 + vpshufb xtmph1, x0 ;Lookup mul table of high nibble + vpshufb xtmpl1, xtmpa ;Lookup mul table of low nibble + vpxor xtmph1, xtmpl1 ;GF add high and low partials + vpxor xd1, xtmph1 + + XLDR xd2, [dest2+pos] ;reuse xtmpl1. Get next dest vector + XLDR xd3, [dest3+pos] ;reuse xtmph1. Get next dest vector + + ;dest2 + vpshufb xtmph2, x0 ;Lookup mul table of high nibble + vpshufb xtmpl2, xtmpa ;Lookup mul table of low nibble + vpxor xtmph2, xtmpl2 ;GF add high and low partials + vpxor xd2, xtmph2 + + ;dest3 + vpshufb xtmph3, x0 ;Lookup mul table of high nibble + vpshufb xtmpl3, xtmpa ;Lookup mul table of low nibble + vpxor xtmph3, xtmpl3 ;GF add high and low partials + vpxor xd3, xtmph3 + + XSTR [dest1+pos], xd1 ;Store result into dest1 + XSTR [dest2+pos], xd2 ;Store result into dest2 + XSTR [dest3+pos], xd3 ;Store result into dest3 + + ;dest4 + XLDR xd1, [dest4+pos] ;Get next dest vector + vpshufb xtmph1, xgft4_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl1, xgft4_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph1, xtmph1, xtmpl1 ;GF add high and low partials + vpxor xd1, xd1, xtmph1 + + XLDR xd2, [dest5+pos] ;reuse xtmpl1. Get next dest vector + XLDR xd3, [dest6+pos] ;reuse xtmph1. Get next dest vector + + ;dest5 + vpshufb xtmph2, xgft5_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl2, xgft5_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph2, xtmph2, xtmpl2 ;GF add high and low partials + vpxor xd2, xd2, xtmph2 + + ;dest6 + vpshufb xtmph3, xgft6_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl3, xgft6_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph3, xtmph3, xtmpl3 ;GF add high and low partials + vpxor xd3, xd3, xtmph3 + + XSTR [dest4+pos], xd1 ;Store result into dest4 + XSTR [dest5+pos], xd2 ;Store result into dest5 + XSTR [dest6+pos], xd3 ;Store result into dest6 + + add pos, 16 ;Loop on 16 bytes at a time + cmp pos, len + jle .loop16 + + lea tmp, [len + 16] + cmp pos, tmp + je .return_pass + +.lessthan16: + ;; Tail len + ;; Do one more overlap pass + ;; Overlapped offset length-16 + mov tmp, len ;Backup len as len=rdi + + XLDR x0, [src+tmp] ;Get next source vector + XLDR xd1, [dest4+tmp] ;Get next dest vector + XLDR xd2, [dest5+tmp] ;reuse xtmpl1. Get next dest vector + XLDR xd3, [dest6+tmp] ;reuse xtmph1. Get next dest vector + + sub len, pos + + vmovdqa xtmph3, [constip16] ;Load const of i + 16 + vpinsrb xtmpl3, len.w, 15 + vpshufb xtmpl3, xmask0f ;Broadcast len to all bytes + vpcmpgtb xtmpl3, xtmpl3, xtmph3 + + vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + ;dest4 + vpshufb xgft4_hi, xgft4_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft4_lo, xgft4_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft4_hi, xgft4_hi, xgft4_lo ;GF add high and low partials + vpand xgft4_hi, xgft4_hi, xtmpl3 + vpxor xd1, xd1, xgft4_hi + + ;dest5 + vpshufb xgft5_hi, xgft5_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft5_lo, xgft5_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft5_hi, xgft5_hi, xgft5_lo ;GF add high and low partials + vpand xgft5_hi, xgft5_hi, xtmpl3 + vpxor xd2, xd2, xgft5_hi + + ;dest6 + vpshufb xgft6_hi, xgft6_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft6_lo, xgft6_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft6_hi, xgft6_hi, xgft6_lo ;GF add high and low partials + vpand xgft6_hi, xgft6_hi, xtmpl3 + vpxor xd3, xd3, xgft6_hi + + XSTR [dest4+tmp], xd1 ;Store result into dest4 + XSTR [dest5+tmp], xd2 ;Store result into dest5 + XSTR [dest6+tmp], xd3 ;Store result into dest6 + + vmovdqu xgft4_lo, [tmp3] ;Load array Ax{00}, Ax{01}, Ax{02}, ... + vmovdqu xgft4_hi, [tmp3+16] ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0} + vmovdqu xgft5_lo, [tmp3+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ... + vmovdqu xgft5_hi, [tmp3+vec+16] ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0} + vmovdqu xgft6_lo, [tmp3+2*vec] ;Load array Cx{00}, Cx{01}, Cx{02}, ... + vmovdqu xgft6_hi, [tmp3+2*vec+16] ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0} + XLDR xd1, [dest1+tmp] ;Get next dest vector + XLDR xd2, [dest2+tmp] ;reuse xtmpl1. Get next dest vector + XLDR xd3, [dest3+tmp] ;reuse xtmph1. Get next dest3 vector + + ;dest1 + vpshufb xgft4_hi, xgft4_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft4_lo, xgft4_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft4_hi, xgft4_hi, xgft4_lo ;GF add high and low partials + vpand xgft4_hi, xgft4_hi, xtmpl3 + vpxor xd1, xd1, xgft4_hi + + ;dest2 + vpshufb xgft5_hi, xgft5_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft5_lo, xgft5_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft5_hi, xgft5_hi, xgft5_lo ;GF add high and low partials + vpand xgft5_hi, xgft5_hi, xtmpl3 + vpxor xd2, xd2, xgft5_hi + + ;dest3 + vpshufb xgft6_hi, xgft6_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft6_lo, xgft6_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft6_hi, xgft6_hi, xgft6_lo ;GF add high and low partials + vpand xgft6_hi, xgft6_hi, xtmpl3 + vpxor xd3, xd3, xgft6_hi + + XSTR [dest1+tmp], xd1 ;Store result into dest1 + XSTR [dest2+tmp], xd2 ;Store result into dest2 + XSTR [dest3+tmp], xd3 ;Store result into dest3 + +.return_pass: + FUNC_RESTORE + mov return, 0 + ret + +.return_fail: + FUNC_RESTORE + mov return, 1 + ret + +endproc_frame + +section .data + +align 16 +mask0f: dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f +constip16: + dq 0xf8f9fafbfcfdfeff, 0xf0f1f2f3f4f5f6f7 + +;;; func core, ver, snum +slversion gf_6vect_mad_avx, 02, 01, 0210 diff --git a/src/isa-l/erasure_code/gf_6vect_mad_avx2.asm b/src/isa-l/erasure_code/gf_6vect_mad_avx2.asm new file mode 100644 index 000000000..b344532fe --- /dev/null +++ b/src/isa-l/erasure_code/gf_6vect_mad_avx2.asm @@ -0,0 +1,400 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_6vect_mad_avx2(len, vec, vec_i, mul_array, src, dest); +;;; + +%include "reg_sizes.asm" + +%define PS 8 + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg0.w ecx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + %define arg4 r12 + %define arg5 r15 + %define tmp r11 + %define tmp.w r11d + %define tmp.b r11b + %define tmp2 r10 + %define tmp3 r13 + %define return rax + %define return.w eax + %define stack_size 16*10 + 3*8 + %define arg(x) [rsp + stack_size + PS + PS*x] + %define func(x) proc_frame x + +%macro FUNC_SAVE 0 + sub rsp, stack_size + movdqa [rsp+16*0],xmm6 + movdqa [rsp+16*1],xmm7 + movdqa [rsp+16*2],xmm8 + movdqa [rsp+16*3],xmm9 + movdqa [rsp+16*4],xmm10 + movdqa [rsp+16*5],xmm11 + movdqa [rsp+16*6],xmm12 + movdqa [rsp+16*7],xmm13 + movdqa [rsp+16*8],xmm14 + movdqa [rsp+16*9],xmm15 + save_reg r12, 10*16 + 0*8 + save_reg r13, 10*16 + 1*8 + save_reg r15, 10*16 + 2*8 + end_prolog + mov arg4, arg(4) + mov arg5, arg(5) +%endmacro + +%macro FUNC_RESTORE 0 + movdqa xmm6, [rsp+16*0] + movdqa xmm7, [rsp+16*1] + movdqa xmm8, [rsp+16*2] + movdqa xmm9, [rsp+16*3] + movdqa xmm10, [rsp+16*4] + movdqa xmm11, [rsp+16*5] + movdqa xmm12, [rsp+16*6] + movdqa xmm13, [rsp+16*7] + movdqa xmm14, [rsp+16*8] + movdqa xmm15, [rsp+16*9] + mov r12, [rsp + 10*16 + 0*8] + mov r13, [rsp + 10*16 + 1*8] + mov r15, [rsp + 10*16 + 2*8] + add rsp, stack_size +%endmacro + +%elifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg0.w edi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + %define tmp r11 + %define tmp.w r11d + %define tmp.b r11b + %define tmp2 r10 + %define tmp3 r12 + %define return rax + %define return.w eax + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + %endmacro + %macro FUNC_RESTORE 0 + pop r12 + %endmacro +%endif + +;;; gf_6vect_mad_avx2(len, vec, vec_i, mul_array, src, dest) +%define len arg0 +%define len.w arg0.w +%define vec arg1 +%define vec_i arg2 +%define mul_array arg3 +%define src arg4 +%define dest1 arg5 +%define pos return +%define pos.w return.w + +%define dest2 tmp3 +%define dest3 tmp2 +%define dest4 mul_array +%define dest5 vec +%define dest6 vec_i + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu + %define XSTR vmovdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + + +default rel + +[bits 64] +section .text + +%define xmask0f ymm15 +%define xmask0fx xmm15 +%define xgft1_lo ymm14 +%define xgft2_lo ymm13 +%define xgft3_lo ymm12 +%define xgft4_lo ymm11 +%define xgft5_lo ymm10 +%define xgft6_lo ymm9 + +%define x0 ymm0 +%define xtmpa ymm1 +%define xtmpl ymm2 +%define xtmplx xmm2 +%define xtmph ymm3 +%define xtmphx xmm3 +%define xd1 ymm4 +%define xd2 ymm5 +%define xd3 ymm6 +%define xd4 ymm7 +%define xd5 ymm8 +%define xd6 xd1 + +align 16 +global gf_6vect_mad_avx2:ISAL_SYM_TYPE_FUNCTION +func(gf_6vect_mad_avx2) + FUNC_SAVE + sub len, 32 + jl .return_fail + xor pos, pos + mov tmp.b, 0x0f + vpinsrb xmask0fx, xmask0fx, tmp.w, 0 + vpbroadcastb xmask0f, xmask0fx ;Construct mask 0x0f0f0f... + + sal vec_i, 5 ;Multiply by 32 + sal vec, 5 ;Multiply by 32 + lea tmp, [mul_array + vec_i] + mov vec_i, vec + mov mul_array, vec + sal vec_i, 1 + sal mul_array, 1 + add vec_i, vec ;vec_i=vec*96 + add mul_array, vec_i ;vec_i=vec*160 + + vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f} + ; " Ax{00}, Ax{10}, ..., Ax{f0} + vmovdqu xgft2_lo, [tmp+vec] ;Load array Bx{00}, Bx{01}, ..., Bx{0f} + ; " Bx{00}, Bx{10}, ..., Bx{f0} + vmovdqu xgft3_lo, [tmp+2*vec] ;Load array Cx{00}, Cx{01}, ..., Cx{0f} + ; " Cx{00}, Cx{10}, ..., Cx{f0} + vmovdqu xgft4_lo, [tmp+vec_i] ;Load array Fx{00}, Fx{01}, ..., Fx{0f} + ; " Fx{00}, Fx{10}, ..., Fx{f0} + vmovdqu xgft5_lo, [tmp+4*vec] ;Load array Ex{00}, Ex{01}, ..., Ex{0f} + ; " Ex{00}, Ex{10}, ..., Ex{f0} + vmovdqu xgft6_lo, [tmp+mul_array] ;Load array Dx{00}, Dx{01}, ..., Dx{0f} + ; " Dx{00}, Dx{10}, ..., Dx{f0} + + mov dest2, [dest1+PS] ; reuse tmp3 + mov dest3, [dest1+2*PS] ; reuse tmp2 + mov dest4, [dest1+3*PS] ; reuse mul_array + mov dest5, [dest1+4*PS] ; reuse vec + mov dest6, [dest1+5*PS] ; reuse vec_i + mov dest1, [dest1] + +.loop32: + XLDR x0, [src+pos] ;Get next source vector + XLDR xd1, [dest1+pos] ;Get next dest vector + XLDR xd2, [dest2+pos] ;Get next dest vector + XLDR xd3, [dest3+pos] ;Get next dest vector + XLDR xd4, [dest4+pos] ;Get next dest vector + XLDR xd5, [dest5+pos] ;Get next dest vector + + vpand xtmpl, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + vperm2i128 xtmpa, xtmpl, x0, 0x30 ;swap xtmpa from 1lo|2lo to 1lo|2hi + vperm2i128 x0, xtmpl, x0, 0x12 ;swap x0 from 1hi|2hi to 1hi|2lo + + ;dest1 + vperm2i128 xtmph, xgft1_lo, xgft1_lo, 0x01 ; swapped to hi | lo + vpshufb xtmph, xtmph, x0 ;Lookup mul table of high nibble + vpshufb xtmpl, xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph, xtmph, xtmpl ;GF add high and low partials + vpxor xd1, xd1, xtmph ;xd1 += partial + + XSTR [dest1+pos], xd1 ;Store result into dest1 + + ;dest2 + vperm2i128 xtmph, xgft2_lo, xgft2_lo, 0x01 ; swapped to hi | lo + vpshufb xtmph, xtmph, x0 ;Lookup mul table of high nibble + vpshufb xtmpl, xgft2_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph, xtmph, xtmpl ;GF add high and low partials + vpxor xd2, xd2, xtmph ;xd2 += partial + + ;dest3 + vperm2i128 xtmph, xgft3_lo, xgft3_lo, 0x01 ; swapped to hi | lo + vpshufb xtmph, xtmph, x0 ;Lookup mul table of high nibble + vpshufb xtmpl, xgft3_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph, xtmph, xtmpl ;GF add high and low partials + vpxor xd3, xd3, xtmph ;xd3 += partial + + XLDR xd6, [dest6+pos] ;reuse xd1. Get next dest vector + + ;dest4 + vperm2i128 xtmph, xgft4_lo, xgft4_lo, 0x01 ; swapped to hi | lo + vpshufb xtmph, xtmph, x0 ;Lookup mul table of high nibble + vpshufb xtmpl, xgft4_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph, xtmph, xtmpl ;GF add high and low partials + vpxor xd4, xd4, xtmph ;xd4 += partial + + ;dest5 + vperm2i128 xtmph, xgft5_lo, xgft5_lo, 0x01 ; swapped to hi | lo + vpshufb xtmph, xtmph, x0 ;Lookup mul table of high nibble + vpshufb xtmpl, xgft5_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph, xtmph, xtmpl ;GF add high and low partials + vpxor xd5, xd5, xtmph ;xd5 += partial + + ;dest6 + vperm2i128 xtmph, xgft6_lo, xgft6_lo, 0x01 ; swapped to hi | lo + vpshufb xtmph, xtmph, x0 ;Lookup mul table of high nibble + vpshufb xtmpl, xgft6_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph, xtmph, xtmpl ;GF add high and low partials + vpxor xd6, xd6, xtmph ;xd6 += partial + + XSTR [dest2+pos], xd2 ;Store result into dest2 + XSTR [dest3+pos], xd3 ;Store result into dest3 + XSTR [dest4+pos], xd4 ;Store result into dest4 + XSTR [dest5+pos], xd5 ;Store result into dest5 + XSTR [dest6+pos], xd6 ;Store result into dest6 + + add pos, 32 ;Loop on 32 bytes at a time + cmp pos, len + jle .loop32 + + lea tmp, [len + 32] + cmp pos, tmp + je .return_pass + +.lessthan32: + ;; Tail len + ;; Do one more overlap pass + mov tmp.b, 0x1f + vpinsrb xtmphx, xtmphx, tmp.w, 0 + vpbroadcastb xtmph, xtmphx ;Construct mask 0x1f1f1f... + + mov tmp, len ;Overlapped offset length-32 + + XLDR x0, [src+tmp] ;Get next source vector + XLDR xd1, [dest1+tmp] ;Get next dest vector + XLDR xd2, [dest2+tmp] ;Get next dest vector + XLDR xd3, [dest3+tmp] ;Get next dest vector + XLDR xd4, [dest4+tmp] ;Get next dest vector + XLDR xd5, [dest5+tmp] ;Get next dest vector + + sub len, pos + + vpinsrb xtmplx, xtmplx, len.w, 15 + vinserti128 xtmpl, xtmpl, xtmplx, 1 ;swapped to xtmplx | xtmplx + vpshufb xtmpl, xtmpl, xtmph ;Broadcast len to all bytes. xtmph=0x1f1f1f... + vpcmpgtb xtmpl, xtmpl, [constip32] + + vpand xtmph, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + vperm2i128 xtmpa, xtmph, x0, 0x30 ;swap xtmpa from 1lo|2lo to 1lo|2hi + vperm2i128 x0, xtmph, x0, 0x12 ;swap x0 from 1hi|2hi to 1hi|2lo + + ;dest1 + vperm2i128 xtmph, xgft1_lo, xgft1_lo, 0x01 ; swapped to hi | lo + vpshufb xtmph, xtmph, x0 ;Lookup mul table of high nibble + vpshufb xgft1_lo, xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph, xtmph, xgft1_lo ;GF add high and low partials + vpand xtmph, xtmph, xtmpl + vpxor xd1, xd1, xtmph ;xd1 += partial + + XSTR [dest1+tmp], xd1 ;Store result into dest1 + + ;dest2 + vperm2i128 xtmph, xgft2_lo, xgft2_lo, 0x01 ; swapped to hi | lo + vpshufb xtmph, xtmph, x0 ;Lookup mul table of high nibble + vpshufb xgft2_lo, xgft2_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph, xtmph, xgft2_lo ;GF add high and low partials + vpand xtmph, xtmph, xtmpl + vpxor xd2, xd2, xtmph ;xd2 += partial + + ;dest3 + vperm2i128 xtmph, xgft3_lo, xgft3_lo, 0x01 ; swapped to hi | lo + vpshufb xtmph, xtmph, x0 ;Lookup mul table of high nibble + vpshufb xgft3_lo, xgft3_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph, xtmph, xgft3_lo ;GF add high and low partials + vpand xtmph, xtmph, xtmpl + vpxor xd3, xd3, xtmph ;xd3 += partial + + XLDR xd6, [dest6+tmp] ;reuse xd1. Get next dest vector + + ;dest4 + vperm2i128 xtmph, xgft4_lo, xgft4_lo, 0x01 ; swapped to hi | lo + vpshufb xtmph, xtmph, x0 ;Lookup mul table of high nibble + vpshufb xgft4_lo, xgft4_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph, xtmph, xgft4_lo ;GF add high and low partials + vpand xtmph, xtmph, xtmpl + vpxor xd4, xd4, xtmph ;xd4 += partial + + ;dest5 + vperm2i128 xtmph, xgft5_lo, xgft5_lo, 0x01 ; swapped to hi | lo + vpshufb xtmph, xtmph, x0 ;Lookup mul table of high nibble + vpshufb xgft5_lo, xgft5_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph, xtmph, xgft5_lo ;GF add high and low partials + vpand xtmph, xtmph, xtmpl + vpxor xd5, xd5, xtmph ;xd5 += partial + + ;dest6 + vperm2i128 xtmph, xgft6_lo, xgft6_lo, 0x01 ; swapped to hi | lo + vpshufb xtmph, xtmph, x0 ;Lookup mul table of high nibble + vpshufb xgft6_lo, xgft6_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph, xtmph, xgft6_lo ;GF add high and low partials + vpand xtmph, xtmph, xtmpl + vpxor xd6, xd6, xtmph ;xd6 += partial + + XSTR [dest2+tmp], xd2 ;Store result into dest2 + XSTR [dest3+tmp], xd3 ;Store result into dest3 + XSTR [dest4+tmp], xd4 ;Store result into dest4 + XSTR [dest5+tmp], xd5 ;Store result into dest5 + XSTR [dest6+tmp], xd6 ;Store result into dest6 + +.return_pass: + FUNC_RESTORE + mov return, 0 + ret + +.return_fail: + FUNC_RESTORE + mov return, 1 + ret + +endproc_frame + +section .data +align 32 +constip32: + dq 0xf8f9fafbfcfdfeff, 0xf0f1f2f3f4f5f6f7 + dq 0xe8e9eaebecedeeef, 0xe0e1e2e3e4e5e6e7 + +;;; func core, ver, snum +slversion gf_6vect_mad_avx2, 04, 01, 0211 diff --git a/src/isa-l/erasure_code/gf_6vect_mad_avx512.asm b/src/isa-l/erasure_code/gf_6vect_mad_avx512.asm new file mode 100644 index 000000000..5f31bf1da --- /dev/null +++ b/src/isa-l/erasure_code/gf_6vect_mad_avx512.asm @@ -0,0 +1,321 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2019 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_6vect_mad_avx512(len, vec, vec_i, mul_array, src, dest); +;;; + +%include "reg_sizes.asm" + +%ifdef HAVE_AS_KNOWS_AVX512 + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + %define tmp r11 + %define tmp2 r10 + %define tmp3 r12 ;must be saved and restored + %define return rax + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + %endmacro + %macro FUNC_RESTORE 0 + pop r12 + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + %define arg4 r12 + %define arg5 r15 + %define tmp r11 + %define tmp2 r10 + %define tmp3 r13 + %define return rax + %define stack_size 16*10 + 3*8 + %define arg(x) [rsp + stack_size + PS + PS*x] + %define func(x) proc_frame x + +%macro FUNC_SAVE 0 + sub rsp, stack_size + movdqa [rsp+16*0],xmm6 + movdqa [rsp+16*1],xmm7 + movdqa [rsp+16*2],xmm8 + movdqa [rsp+16*3],xmm9 + movdqa [rsp+16*4],xmm10 + movdqa [rsp+16*5],xmm11 + movdqa [rsp+16*6],xmm12 + movdqa [rsp+16*7],xmm13 + movdqa [rsp+16*8],xmm14 + movdqa [rsp+16*9],xmm15 + save_reg r12, 10*16 + 0*8 + save_reg r15, 10*16 + 1*8 + save_reg r13, 10*16 + 2*8 + end_prolog + mov arg4, arg(4) + mov arg5, arg(5) +%endmacro + +%macro FUNC_RESTORE 0 + movdqa xmm6, [rsp+16*0] + movdqa xmm7, [rsp+16*1] + movdqa xmm8, [rsp+16*2] + movdqa xmm9, [rsp+16*3] + movdqa xmm10, [rsp+16*4] + movdqa xmm11, [rsp+16*5] + movdqa xmm12, [rsp+16*6] + movdqa xmm13, [rsp+16*7] + movdqa xmm14, [rsp+16*8] + movdqa xmm15, [rsp+16*9] + mov r12, [rsp + 10*16 + 0*8] + mov r15, [rsp + 10*16 + 1*8] + mov r13, [rsp + 10*16 + 2*8] + add rsp, stack_size +%endmacro +%endif + +%define PS 8 +%define len arg0 +%define vec arg1 +%define vec_i arg2 +%define mul_array arg3 +%define src arg4 +%define dest1 arg5 +%define pos return +%define dest2 tmp3 +%define dest3 tmp2 +%define dest4 mul_array +%define dest5 vec +%define dest6 vec_i + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu8 + %define XSTR vmovdqu8 +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + +default rel +[bits 64] +section .text + +%define x0 zmm0 +%define xtmpa zmm1 +%define xtmpl1 zmm2 +%define xtmph1 zmm3 +%define xgft1_hi zmm4 +%define xgft1_lo zmm5 +%define xgft1_loy ymm5 +%define xgft2_hi zmm6 +%define xgft2_lo zmm7 +%define xgft2_loy ymm7 +%define xgft3_hi zmm8 +%define xgft3_lo zmm9 +%define xgft3_loy ymm9 +%define xgft4_hi zmm10 +%define xgft4_lo zmm11 +%define xgft4_loy ymm11 +%define xgft5_hi zmm12 +%define xgft5_lo zmm13 +%define xgft5_loy ymm13 +%define xgft6_hi zmm14 +%define xgft6_lo zmm15 +%define xgft6_loy ymm15 +%define xd1 zmm16 +%define xd2 zmm17 +%define xd3 zmm18 +%define xd4 zmm19 +%define xd5 zmm20 +%define xd6 zmm21 +%define xmask0f zmm22 +%define xtmpl2 zmm23 +%define xtmpl3 zmm24 +%define xtmpl4 zmm25 +%define xtmpl5 zmm26 +%define xtmph2 zmm27 +%define xtmph3 zmm28 +%define xtmph4 zmm29 +%define xtmph5 zmm30 +%define xtmph6 zmm31 + +align 16 +global gf_6vect_mad_avx512:ISAL_SYM_TYPE_FUNCTION +func(gf_6vect_mad_avx512) + FUNC_SAVE + sub len, 64 + jl .return_fail + xor pos, pos + mov tmp, 0x0f + vpbroadcastb xmask0f, tmp ;Construct mask 0x0f0f0f... + sal vec_i, 5 ;Multiply by 32 + sal vec, 5 ;Multiply by 32 + lea tmp, [mul_array + vec_i] + mov vec_i, vec + mov mul_array, vec + sal vec_i, 1 ;vec_i=vec*64 + sal mul_array, 1 ;mul_array=vec*64 + add vec_i, vec ;vec_i=vec*96 + add mul_array, vec_i ;vec_i=vec*160 + + vmovdqu xgft1_loy, [tmp] ;Load array Ax{00}..{0f}, Ax{00}..{f0} + vmovdqu xgft2_loy, [tmp+vec] ;Load array Bx{00}..{0f}, Bx{00}..{f0} + vmovdqu xgft3_loy, [tmp+2*vec] ;Load array Cx{00}..{0f}, Cx{00}..{f0} + vmovdqu xgft4_loy, [tmp+vec_i] ;Load array Dx{00}..{0f}, Dx{00}..{f0} + vmovdqu xgft5_loy, [tmp+4*vec] ;Load array Ex{00}..{0f}, Ex{00}..{f0} + vmovdqu xgft6_loy, [tmp+mul_array] ;Load array Fx{00}..{0f}, Fx{00}..{f0} + + vshufi64x2 xgft1_hi, xgft1_lo, xgft1_lo, 0x55 + vshufi64x2 xgft1_lo, xgft1_lo, xgft1_lo, 0x00 + vshufi64x2 xgft2_hi, xgft2_lo, xgft2_lo, 0x55 + vshufi64x2 xgft2_lo, xgft2_lo, xgft2_lo, 0x00 + vshufi64x2 xgft3_hi, xgft3_lo, xgft3_lo, 0x55 + vshufi64x2 xgft3_lo, xgft3_lo, xgft3_lo, 0x00 + vshufi64x2 xgft4_hi, xgft4_lo, xgft4_lo, 0x55 + vshufi64x2 xgft4_lo, xgft4_lo, xgft4_lo, 0x00 + vshufi64x2 xgft5_hi, xgft5_lo, xgft5_lo, 0x55 + vshufi64x2 xgft5_lo, xgft5_lo, xgft5_lo, 0x00 + vshufi64x2 xgft6_hi, xgft6_lo, xgft6_lo, 0x55 + vshufi64x2 xgft6_lo, xgft6_lo, xgft6_lo, 0x00 + + mov dest2, [dest1+PS] + mov dest3, [dest1+2*PS] + mov dest4, [dest1+3*PS] ; reuse mul_array + mov dest5, [dest1+4*PS] ; reuse vec + mov dest6, [dest1+5*PS] ; reuse vec_i + mov dest1, [dest1] + mov tmp, -1 + kmovq k1, tmp + +.loop64: + XLDR x0, [src+pos] ;Get next source vector + XLDR xd1, [dest1+pos] ;Get next dest vector + XLDR xd2, [dest2+pos] ;Get next dest vector + XLDR xd3, [dest3+pos] ;Get next dest vector + XLDR xd4, [dest4+pos] ;Get next dest vector + XLDR xd5, [dest5+pos] ;Get next dest vector + XLDR xd6, [dest6+pos] ;Get next dest vector + + vpandq xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpandq x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + ; dest1 + vpshufb xtmph1 {k1}{z}, xgft1_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl1 {k1}{z}, xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxorq xtmph1, xtmph1, xtmpl1 ;GF add high and low partials + vpxorq xd1, xd1, xtmph1 ;xd1 += partial + + ; dest2 + vpshufb xtmph2 {k1}{z}, xgft2_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl2 {k1}{z}, xgft2_lo, xtmpa ;Lookup mul table of low nibble + vpxorq xtmph2, xtmph2, xtmpl2 ;GF add high and low partials + vpxorq xd2, xd2, xtmph2 ;xd2 += partial + + ; dest3 + vpshufb xtmph3 {k1}{z}, xgft3_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl3 {k1}{z}, xgft3_lo, xtmpa ;Lookup mul table of low nibble + vpxorq xtmph3, xtmph3, xtmpl3 ;GF add high and low partials + vpxorq xd3, xd3, xtmph3 ;xd3 += partial + + ; dest4 + vpshufb xtmph4 {k1}{z}, xgft4_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl4 {k1}{z}, xgft4_lo, xtmpa ;Lookup mul table of low nibble + vpxorq xtmph4, xtmph4, xtmpl4 ;GF add high and low partials + vpxorq xd4, xd4, xtmph4 ;xd4 += partial + + ; dest5 + vpshufb xtmph5 {k1}{z}, xgft5_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl5 {k1}{z}, xgft5_lo, xtmpa ;Lookup mul table of low nibble + vpxorq xtmph5, xtmph5, xtmpl5 ;GF add high and low partials + vpxorq xd5, xd5, xtmph5 ;xd5 += partial + + ; dest6 + vpshufb xtmph6 {k1}{z}, xgft6_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl5 {k1}{z}, xgft6_lo, xtmpa ;Lookup mul table of low nibble. Reuse xtmpl5 + vpxorq xtmph6, xtmph6, xtmpl5 ;GF add high and low partials. + vpxorq xd6, xd6, xtmph6 ;xd6 += partial + + XSTR [dest1+pos], xd1 + XSTR [dest2+pos], xd2 + XSTR [dest3+pos], xd3 + XSTR [dest4+pos], xd4 + XSTR [dest5+pos], xd5 + XSTR [dest6+pos], xd6 + + add pos, 64 ;Loop on 64 bytes at a time + cmp pos, len + jle .loop64 + + lea tmp, [len + 64] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, (1 << 63) + lea tmp, [len + 64 - 1] + and tmp, 63 + sarx pos, pos, tmp + kmovq k1, pos + mov pos, len ;Overlapped offset length-64 + jmp .loop64 ;Do one more overlap pass + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame + +%else +%ifidn __OUTPUT_FORMAT__, win64 +global no_gf_6vect_mad_avx512 +no_gf_6vect_mad_avx512: +%endif +%endif ; ifdef HAVE_AS_KNOWS_AVX512 diff --git a/src/isa-l/erasure_code/gf_6vect_mad_sse.asm b/src/isa-l/erasure_code/gf_6vect_mad_sse.asm new file mode 100644 index 000000000..4fed2aad9 --- /dev/null +++ b/src/isa-l/erasure_code/gf_6vect_mad_sse.asm @@ -0,0 +1,406 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_6vect_mad_sse(len, vec, vec_i, mul_array, src, dest); +;;; + +%include "reg_sizes.asm" + +%define PS 8 + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg0.w ecx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + %define arg4 r12 + %define arg5 r15 + %define tmp r11 + %define tmp.w r11d + %define tmp2 r10 + %define tmp3 r13 + %define tmp4 r14 + %define tmp5 rdi + %define return rax + %define return.w eax + %define stack_size 16*10 + 5*8 + %define arg(x) [rsp + stack_size + PS + PS*x] + %define func(x) proc_frame x + +%macro FUNC_SAVE 0 + sub rsp, stack_size + movdqa [rsp+16*0],xmm6 + movdqa [rsp+16*1],xmm7 + movdqa [rsp+16*2],xmm8 + movdqa [rsp+16*3],xmm9 + movdqa [rsp+16*4],xmm10 + movdqa [rsp+16*5],xmm11 + movdqa [rsp+16*6],xmm12 + movdqa [rsp+16*7],xmm13 + movdqa [rsp+16*8],xmm14 + movdqa [rsp+16*9],xmm15 + save_reg r12, 10*16 + 0*8 + save_reg r13, 10*16 + 1*8 + save_reg r14, 10*16 + 2*8 + save_reg r15, 10*16 + 3*8 + save_reg rdi, 10*16 + 4*8 + end_prolog + mov arg4, arg(4) + mov arg5, arg(5) +%endmacro + +%macro FUNC_RESTORE 0 + movdqa xmm6, [rsp+16*0] + movdqa xmm7, [rsp+16*1] + movdqa xmm8, [rsp+16*2] + movdqa xmm9, [rsp+16*3] + movdqa xmm10, [rsp+16*4] + movdqa xmm11, [rsp+16*5] + movdqa xmm12, [rsp+16*6] + movdqa xmm13, [rsp+16*7] + movdqa xmm14, [rsp+16*8] + movdqa xmm15, [rsp+16*9] + mov r12, [rsp + 10*16 + 0*8] + mov r13, [rsp + 10*16 + 1*8] + mov r14, [rsp + 10*16 + 2*8] + mov r15, [rsp + 10*16 + 3*8] + mov rdi, [rsp + 10*16 + 4*8] + add rsp, stack_size +%endmacro + +%elifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg0.w edi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + %define tmp r11 + %define tmp.w r11d + %define tmp2 r10 + %define tmp3 r12 + %define tmp4 r13 + %define tmp5 r14 + %define return rax + %define return.w eax + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + push r13 + push r14 + %endmacro + %macro FUNC_RESTORE 0 + pop r14 + pop r13 + pop r12 + %endmacro +%endif + +;;; gf_6vect_mad_sse(len, vec, vec_i, mul_array, src, dest) +%define len arg0 +%define len.w arg0.w +%define vec arg1 +%define vec_i arg2 +%define mul_array arg3 +%define src arg4 +%define dest1 arg5 +%define pos return +%define pos.w return.w + +%define dest2 mul_array +%define dest3 tmp2 +%define dest4 tmp4 +%define dest5 tmp5 +%define dest6 vec_i + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR movdqu + %define XSTR movdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR movdqa + %define XSTR movdqa + %else + %define XLDR movntdqa + %define XSTR movntdq + %endif +%endif + +default rel + +[bits 64] +section .text + +%define xmask0f xmm15 +%define xgft4_lo xmm14 +%define xgft4_hi xmm13 +%define xgft5_lo xmm12 +%define xgft5_hi xmm11 +%define xgft6_lo xmm10 +%define xgft6_hi xmm9 + +%define x0 xmm0 +%define xtmpa xmm1 +%define xtmph1 xmm2 +%define xtmpl1 xmm3 +%define xtmph2 xmm4 +%define xtmpl2 xmm5 +%define xtmph3 xmm6 +%define xtmpl3 xmm7 +%define xd1 xmm8 +%define xd2 xtmpl1 +%define xd3 xtmph1 + + +align 16 +global gf_6vect_mad_sse:ISAL_SYM_TYPE_FUNCTION +func(gf_6vect_mad_sse) + FUNC_SAVE + sub len, 16 + jl .return_fail + + xor pos, pos + movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte + + mov tmp, vec + sal vec_i, 5 ;Multiply by 32 + lea tmp3, [mul_array + vec_i] + sal tmp, 6 ;Multiply by 64 + + sal vec, 5 ;Multiply by 32 + lea vec_i, [tmp + vec] ;vec_i = 96 + lea mul_array, [tmp + vec_i] ;mul_array = 160 + + movdqu xgft5_lo, [tmp3+2*tmp] ;Load array Ex{00}, Ex{01}, ..., Ex{0f} + movdqu xgft5_hi, [tmp3+2*tmp+16] ; " Ex{00}, Ex{10}, ..., Ex{f0} + movdqu xgft4_lo, [tmp3+vec_i] ;Load array Dx{00}, Dx{01}, Dx{02}, ... + movdqu xgft4_hi, [tmp3+vec_i+16] ; " Dx{00}, Dx{10}, Dx{20}, ... , Dx{f0} + movdqu xgft6_lo, [tmp3+mul_array] ;Load array Fx{00}, Fx{01}, ..., Fx{0f} + movdqu xgft6_hi, [tmp3+mul_array+16] ; " Fx{00}, Fx{10}, ..., Fx{f0} + + mov dest2, [dest1+PS] + mov dest3, [dest1+2*PS] + mov dest4, [dest1+3*PS] ; reuse mul_array + mov dest5, [dest1+4*PS] + mov dest6, [dest1+5*PS] ; reuse vec_i + mov dest1, [dest1] + +.loop16: + XLDR x0, [src+pos] ;Get next source vector + + movdqu xtmpl1, [tmp3] ;Load array Ax{00}, Ax{01}, Ax{02}, ... + movdqu xtmph1, [tmp3+16] ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0} + movdqu xtmpl2, [tmp3+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ... + movdqu xtmph2, [tmp3+vec+16] ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0} + movdqu xtmpl3, [tmp3+2*vec] ;Load array Cx{00}, Cx{01}, Cx{02}, ... + movdqu xtmph3, [tmp3+2*vec+16] ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0} + XLDR xd1, [dest1+pos] ;Get next dest vector + + movdqa xtmpa, x0 ;Keep unshifted copy of src + psraw x0, 4 ;Shift to put high nibble into bits 4-0 + pand x0, xmask0f ;Mask high src nibble in bits 4-0 + pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0 + + ;dest1 + pshufb xtmph1, x0 ;Lookup mul table of high nibble + pshufb xtmpl1, xtmpa ;Lookup mul table of low nibble + pxor xtmph1, xtmpl1 ;GF add high and low partials + pxor xd1, xtmph1 + + XLDR xd2, [dest2+pos] ;reuse xtmpl1. Get next dest vector + XLDR xd3, [dest3+pos] ;reuse xtmph1. Get next dest3 vector + + ;dest2 + pshufb xtmph2, x0 ;Lookup mul table of high nibble + pshufb xtmpl2, xtmpa ;Lookup mul table of low nibble + pxor xtmph2, xtmpl2 ;GF add high and low partials + pxor xd2, xtmph2 + + ;dest3 + pshufb xtmph3, x0 ;Lookup mul table of high nibble + pshufb xtmpl3, xtmpa ;Lookup mul table of low nibble + pxor xtmph3, xtmpl3 ;GF add high and low partials + pxor xd3, xtmph3 + + XSTR [dest1+pos], xd1 ;Store result into dest1 + XSTR [dest2+pos], xd2 ;Store result into dest2 + XSTR [dest3+pos], xd3 ;Store result into dest3 + + movdqa xtmph1, xgft4_hi ;Reload const array registers + movdqa xtmpl1, xgft4_lo ;Reload const array registers + movdqa xtmph2, xgft5_hi ;Reload const array registers + movdqa xtmpl2, xgft5_lo ;Reload const array registers + movdqa xtmph3, xgft6_hi ;Reload const array registers + movdqa xtmpl3, xgft6_lo ;Reload const array registers + + ;dest4 + XLDR xd1, [dest4+pos] ;Get next dest vector + pshufb xtmph1, x0 ;Lookup mul table of high nibble + pshufb xtmpl1, xtmpa ;Lookup mul table of low nibble + pxor xtmph1, xtmpl1 ;GF add high and low partials + pxor xd1, xtmph1 + + XLDR xd2, [dest5+pos] ;reuse xtmpl1. Get next dest vector + XLDR xd3, [dest6+pos] ;reuse xtmph1. Get next dest vector + + ;dest5 + pshufb xtmph2, x0 ;Lookup mul table of high nibble + pshufb xtmpl2, xtmpa ;Lookup mul table of low nibble + pxor xtmph2, xtmpl2 ;GF add high and low partials + pxor xd2, xtmph2 + + ;dest6 + pshufb xtmph3, x0 ;Lookup mul table of high nibble + pshufb xtmpl3, xtmpa ;Lookup mul table of low nibble + pxor xtmph3, xtmpl3 ;GF add high and low partials + pxor xd3, xtmph3 + + XSTR [dest4+pos], xd1 ;Store result into dest4 + XSTR [dest5+pos], xd2 ;Store result into dest5 + XSTR [dest6+pos], xd3 ;Store result into dest6 + + add pos, 16 ;Loop on 16 bytes at a time + cmp pos, len + jle .loop16 + + lea tmp, [len + 16] + cmp pos, tmp + je .return_pass + +.lessthan16: + ;; Tail len + ;; Do one more overlap pass + ;; Overlapped offset length-16 + mov tmp, len ;Backup len as len=rdi + + XLDR x0, [src+tmp] ;Get next source vector + XLDR xd1, [dest4+tmp] ;Get next dest vector + XLDR xd2, [dest5+tmp] ;reuse xtmpl1. Get next dest vector + XLDR xd3, [dest6+tmp] ;reuse xtmph1. Get next dest vector + + sub len, pos + + movdqa xtmph3, [constip16] ;Load const of i + 16 + pinsrb xtmpl3, len.w, 15 + pshufb xtmpl3, xmask0f ;Broadcast len to all bytes + pcmpgtb xtmpl3, xtmph3 + + movdqa xtmpa, x0 ;Keep unshifted copy of src + psraw x0, 4 ;Shift to put high nibble into bits 4-0 + pand x0, xmask0f ;Mask high src nibble in bits 4-0 + pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0 + + ;dest4 + pshufb xgft4_hi, x0 ;Lookup mul table of high nibble + pshufb xgft4_lo, xtmpa ;Lookup mul table of low nibble + pxor xgft4_hi, xgft4_lo ;GF add high and low partials + pand xgft4_hi, xtmpl3 + pxor xd1, xgft4_hi + + ;dest5 + pshufb xgft5_hi, x0 ;Lookup mul table of high nibble + pshufb xgft5_lo, xtmpa ;Lookup mul table of low nibble + pxor xgft5_hi, xgft5_lo ;GF add high and low partials + pand xgft5_hi, xtmpl3 + pxor xd2, xgft5_hi + + ;dest6 + pshufb xgft6_hi, x0 ;Lookup mul table of high nibble + pshufb xgft6_lo, xtmpa ;Lookup mul table of low nibble + pxor xgft6_hi, xgft6_lo ;GF add high and low partials + pand xgft6_hi, xtmpl3 + pxor xd3, xgft6_hi + + XSTR [dest4+tmp], xd1 ;Store result into dest4 + XSTR [dest5+tmp], xd2 ;Store result into dest5 + XSTR [dest6+tmp], xd3 ;Store result into dest6 + + movdqu xgft4_lo, [tmp3] ;Load array Ax{00}, Ax{01}, Ax{02}, ... + movdqu xgft4_hi, [tmp3+16] ; " Ax{00}, Ax{10}, Ax{20}, ... , Ax{f0} + movdqu xgft5_lo, [tmp3+vec] ;Load array Bx{00}, Bx{01}, Bx{02}, ... + movdqu xgft5_hi, [tmp3+vec+16] ; " Bx{00}, Bx{10}, Bx{20}, ... , Bx{f0} + movdqu xgft6_lo, [tmp3+2*vec] ;Load array Cx{00}, Cx{01}, Cx{02}, ... + movdqu xgft6_hi, [tmp3+2*vec+16] ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0} + XLDR xd1, [dest1+tmp] ;Get next dest vector + XLDR xd2, [dest2+tmp] ;reuse xtmpl1. Get next dest vector + XLDR xd3, [dest3+tmp] ;reuse xtmph1. Get next dest3 vector + + ;dest1 + pshufb xgft4_hi, x0 ;Lookup mul table of high nibble + pshufb xgft4_lo, xtmpa ;Lookup mul table of low nibble + pxor xgft4_hi, xgft4_lo ;GF add high and low partials + pand xgft4_hi, xtmpl3 + pxor xd1, xgft4_hi + + ;dest2 + pshufb xgft5_hi, x0 ;Lookup mul table of high nibble + pshufb xgft5_lo, xtmpa ;Lookup mul table of low nibble + pxor xgft5_hi, xgft5_lo ;GF add high and low partials + pand xgft5_hi, xtmpl3 + pxor xd2, xgft5_hi + + ;dest3 + pshufb xgft6_hi, x0 ;Lookup mul table of high nibble + pshufb xgft6_lo, xtmpa ;Lookup mul table of low nibble + pxor xgft6_hi, xgft6_lo ;GF add high and low partials + pand xgft6_hi, xtmpl3 + pxor xd3, xgft6_hi + + XSTR [dest1+tmp], xd1 ;Store result into dest1 + XSTR [dest2+tmp], xd2 ;Store result into dest2 + XSTR [dest3+tmp], xd3 ;Store result into dest3 + +.return_pass: + FUNC_RESTORE + mov return, 0 + ret + +.return_fail: + FUNC_RESTORE + mov return, 1 + ret + +endproc_frame + +section .data + +align 16 + +mask0f: dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f +constip16: + dq 0xf8f9fafbfcfdfeff, 0xf0f1f2f3f4f5f6f7 + +;;; func core, ver, snum +slversion gf_6vect_mad_sse, 00, 01, 020f diff --git a/src/isa-l/erasure_code/gf_inverse_test.c b/src/isa-l/erasure_code/gf_inverse_test.c new file mode 100644 index 000000000..418d7c195 --- /dev/null +++ b/src/isa-l/erasure_code/gf_inverse_test.c @@ -0,0 +1,225 @@ +/********************************************************************** + Copyright(c) 2011-2015 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include // for memset, memcmp +#include + +#include "erasure_code.h" + +#define TEST_LEN 8192 + +#ifndef TEST_SOURCES +# define TEST_SOURCES 128 +#endif +#ifndef RANDOMS +# define RANDOMS 200 +#endif + +#define KMAX TEST_SOURCES + +typedef unsigned char u8; + +void matrix_mult(u8 * a, u8 * b, u8 * c, int n) +{ + int i, j, k; + u8 d; + + for (i = 0; i < n; i++) { + for (j = 0; j < n; j++) { + d = 0; + for (k = 0; k < n; k++) { + d ^= gf_mul(a[n * i + k], b[n * k + j]); + } + c[i * n + j] = d; + } + } +} + +void print_matrix(u8 * a, int n) +{ + int i, j; + + for (i = 0; i < n; i++) { + for (j = 0; j < n; j++) { + printf(" %2x", a[i * n + j]); + } + printf("\n"); + } + printf("\n"); +} + +int is_ident(u8 * a, const int n) +{ + int i, j; + u8 c; + for (i = 0; i < n; i++) { + for (j = 0; j < n; j++) { + c = *a++; + if (i == j) + c--; + if (c != 0) + return -1; + } + } + return 0; +} + +int inv_test(u8 * in, u8 * inv, u8 * sav, int n) +{ + memcpy(sav, in, n * n); + + if (gf_invert_matrix(in, inv, n)) { + printf("Given singular matrix\n"); + print_matrix(sav, n); + return -1; + } + + matrix_mult(inv, sav, in, n); + + if (is_ident(in, n)) { + printf("fail\n"); + print_matrix(sav, n); + print_matrix(inv, n); + print_matrix(in, n); + return -1; + } + putchar('.'); + + return 0; +} + +int main(int argc, char *argv[]) +{ + int i, k, t; + u8 *test_mat, *save_mat, *invr_mat; + + u8 test1[] = { 1, 1, 6, + 1, 1, 1, + 7, 1, 9 + }; + + u8 test2[] = { 0, 1, 6, + 1, 0, 1, + 0, 1, 9 + }; + + u8 test3[] = { 0, 0, 1, + 1, 0, 0, + 0, 1, 1 + }; + + u8 test4[] = { 0, 1, 6, 7, + 1, 1, 0, 0, + 0, 1, 2, 3, + 3, 2, 2, 3 + }; // = row3+3*row2 + + printf("gf_inverse_test: max=%d ", KMAX); + + test_mat = malloc(KMAX * KMAX); + save_mat = malloc(KMAX * KMAX); + invr_mat = malloc(KMAX * KMAX); + + if (NULL == test_mat || NULL == save_mat || NULL == invr_mat) + return -1; + + // Test with lots of leading 1's + k = 3; + memcpy(test_mat, test1, k * k); + if (inv_test(test_mat, invr_mat, save_mat, k)) + return -1; + + // Test with leading zeros + k = 3; + memcpy(test_mat, test2, k * k); + if (inv_test(test_mat, invr_mat, save_mat, k)) + return -1; + + // Test 3 + k = 3; + memcpy(test_mat, test3, k * k); + if (inv_test(test_mat, invr_mat, save_mat, k)) + return -1; + + // Test 4 - try a singular matrix + k = 4; + memcpy(test_mat, test4, k * k); + if (!gf_invert_matrix(test_mat, invr_mat, k)) { + printf("Fail: didn't catch singular matrix\n"); + print_matrix(test4, 4); + return -1; + } + // Do random test of size KMAX + k = KMAX; + + for (i = 0; i < k * k; i++) + test_mat[i] = save_mat[i] = rand(); + + if (gf_invert_matrix(test_mat, invr_mat, k)) { + printf("rand picked a singular matrix, try again\n"); + return -1; + } + + matrix_mult(invr_mat, save_mat, test_mat, k); + + if (is_ident(test_mat, k)) { + printf("fail\n"); + print_matrix(save_mat, k); + print_matrix(invr_mat, k); + print_matrix(test_mat, k); + return -1; + } + // Do Randoms. Random size and coefficients + for (t = 0; t < RANDOMS; t++) { + k = rand() % KMAX; + + for (i = 0; i < k * k; i++) + test_mat[i] = save_mat[i] = rand(); + + if (gf_invert_matrix(test_mat, invr_mat, k)) + continue; + + matrix_mult(invr_mat, save_mat, test_mat, k); + + if (is_ident(test_mat, k)) { + printf("fail rand k=%d\n", k); + print_matrix(save_mat, k); + print_matrix(invr_mat, k); + print_matrix(test_mat, k); + return -1; + } + if (0 == (t % 8)) + putchar('.'); + } + + printf(" Pass\n"); + return 0; +} diff --git a/src/isa-l/erasure_code/gf_vect_dot_prod_1tbl.c b/src/isa-l/erasure_code/gf_vect_dot_prod_1tbl.c new file mode 100644 index 000000000..1d23eb67f --- /dev/null +++ b/src/isa-l/erasure_code/gf_vect_dot_prod_1tbl.c @@ -0,0 +1,152 @@ +/********************************************************************** + Copyright(c) 2011-2015 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include // for memset, memcmp +#include "test.h" +#include "erasure_code.h" + +//#define CACHED_TEST +#ifdef CACHED_TEST +// Cached test, loop many times over small dataset +# define TEST_SOURCES 10 +# define TEST_LEN 8*1024 +# define TEST_TYPE_STR "_warm" +#else +# ifndef TEST_CUSTOM +// Uncached test. Pull from large mem base. +# define TEST_SOURCES 10 +# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */ +# define TEST_LEN GT_L3_CACHE / TEST_SOURCES +# define TEST_TYPE_STR "_cold" +# else +# define TEST_TYPE_STR "_cus" +# endif +#endif + +typedef unsigned char u8; + +// Global GF(256) tables +u8 gff[256]; +u8 gflog[256]; +u8 gf_mul_table[256 * 256]; + +void mk_gf_field(void) +{ + int i; + u8 s = 1; + gflog[0] = 0; + + for (i = 0; i < 256; i++) { + gff[i] = s; + gflog[s] = i; + s = (s << 1) ^ ((s & 0x80) ? 0x1d : 0); // mult by GF{2} + } +} + +void mk_gf_mul_table(u8 * table) +{ + // Populate a single table with all multiply combinations for a fast, + // single-table lookup of GF(2^8) multiply at the expense of memory. + int i, j; + for (i = 0; i < 256; i++) + for (j = 0; j < 256; j++) + table[i * 256 + j] = gf_mul(i, j); +} + +void gf_vect_dot_prod_ref(int len, int vlen, u8 * v, u8 ** src, u8 * dest) +{ + int i, j; + u8 s; + for (i = 0; i < len; i++) { + s = 0; + for (j = 0; j < vlen; j++) + s ^= gf_mul(src[j][i], v[j]); + + dest[i] = s; + } +} + +void gf_vect_dot_prod_mult(int len, int vlen, u8 * v, u8 ** src, u8 * dest) +{ + int i, j; + u8 s; + for (i = 0; i < len; i++) { + s = 0; + for (j = 0; j < vlen; j++) { + s ^= gf_mul_table[v[j] * 256 + src[j][i]]; + } + dest[i] = s; + } + +} + +int main(void) +{ + int i, j; + u8 vec[TEST_SOURCES], dest1[TEST_LEN], dest2[TEST_LEN]; + u8 *matrix[TEST_SOURCES]; + struct perf start; + + mk_gf_field(); + mk_gf_mul_table(gf_mul_table); + + //generate random vector and matrix/data + for (i = 0; i < TEST_SOURCES; i++) { + vec[i] = rand(); + + if (!(matrix[i] = malloc(TEST_LEN))) { + fprintf(stderr, "Error failure\n\n"); + return -1; + } + for (j = 0; j < TEST_LEN; j++) + matrix[i][j] = rand(); + + } + + BENCHMARK(&start, BENCHMARK_TIME, + gf_vect_dot_prod_ref(TEST_LEN, TEST_SOURCES, vec, matrix, dest1)); + printf("gf_vect_dot_prod_2tbl" TEST_TYPE_STR ": "); + perf_print(start, (long long)TEST_LEN * (TEST_SOURCES + 1)); + + BENCHMARK(&start, BENCHMARK_TIME, + gf_vect_dot_prod_mult(TEST_LEN, TEST_SOURCES, vec, matrix, dest2)); + printf("gf_vect_dot_prod_1tbl" TEST_TYPE_STR ": "); + perf_print(start, (long long)TEST_LEN * (TEST_SOURCES + 1)); + + // Compare with reference function + if (0 != memcmp(dest1, dest2, TEST_LEN)) { + printf("Error, different results!\n\n"); + return -1; + } + + printf("Pass functional test\n"); + return 0; +} diff --git a/src/isa-l/erasure_code/gf_vect_dot_prod_avx.asm b/src/isa-l/erasure_code/gf_vect_dot_prod_avx.asm new file mode 100644 index 000000000..c123a3d90 --- /dev/null +++ b/src/isa-l/erasure_code/gf_vect_dot_prod_avx.asm @@ -0,0 +1,271 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_vect_dot_prod_avx(len, vec, *g_tbls, **buffs, *dest); +;;; + +%include "reg_sizes.asm" + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + + %define tmp r11 + %define tmp2 r10 + %define tmp3 r9 + %define return rax + %macro SLDR 2 + %endmacro + %define SSTR SLDR + %define PS 8 + %define func(x) x: + %define FUNC_SAVE + %define FUNC_RESTORE +%endif + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + + %define arg4 r12 ; must be saved and loaded + %define tmp r11 + %define tmp2 r10 + %define tmp3 rdi ; must be saved and loaded + %define return rax + %macro SLDR 2 + %endmacro + %define SSTR SLDR + %define PS 8 + %define frame_size 2*8 + %define arg(x) [rsp + frame_size + PS + PS*x] + + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + rex_push_reg r12 + push_reg rdi + end_prolog + mov arg4, arg(4) + %endmacro + + %macro FUNC_RESTORE 0 + pop rdi + pop r12 + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, elf32 + +;;;================== High Address; +;;; arg4 +;;; arg3 +;;; arg2 +;;; arg1 +;;; arg0 +;;; return +;;;<================= esp of caller +;;; ebp +;;;<================= ebp = esp +;;; esi +;;; edi +;;; ebx +;;;<================= esp of callee +;;; +;;;================== Low Address; + + %define PS 4 + %define LOG_PS 2 + %define func(x) x: + %define arg(x) [ebp + PS*2 + PS*x] + + %define trans ecx ;trans is for the variables in stack + %define arg0 trans + %define arg0_m arg(0) + %define arg1 trans + %define arg1_m arg(1) + %define arg2 arg2_m + %define arg2_m arg(2) + %define arg3 ebx + %define arg4 trans + %define arg4_m arg(4) + %define tmp edx + %define tmp2 edi + %define tmp3 esi + %define return eax + %macro SLDR 2 ;; stack load/restore + mov %1, %2 + %endmacro + %define SSTR SLDR + + %macro FUNC_SAVE 0 + push ebp + mov ebp, esp + push esi + push edi + push ebx + mov arg3, arg(3) + %endmacro + + %macro FUNC_RESTORE 0 + pop ebx + pop edi + pop esi + mov esp, ebp + pop ebp + %endmacro + +%endif ; output formats + +%define len arg0 +%define vec arg1 +%define mul_array arg2 +%define src arg3 +%define dest arg4 + +%define vec_i tmp2 +%define ptr tmp3 +%define pos return + + %ifidn PS,4 ;32-bit code + %define vec_m arg1_m + %define len_m arg0_m + %define dest_m arg4_m + %endif + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu + %define XSTR vmovdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + +%ifidn PS,8 ; 64-bit code + default rel + [bits 64] +%endif + +section .text + +%define xmask0f xmm5 +%define xgft_lo xmm4 +%define xgft_hi xmm3 + +%define x0 xmm0 +%define xtmpa xmm1 +%define xp xmm2 + +align 16 +global gf_vect_dot_prod_avx:ISAL_SYM_TYPE_FUNCTION +func(gf_vect_dot_prod_avx) + FUNC_SAVE + SLDR len, len_m + sub len, 16 + SSTR len_m, len + jl .return_fail + xor pos, pos + vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte + +.loop16: + vpxor xp, xp + mov tmp, mul_array + xor vec_i, vec_i + +.next_vect: + + mov ptr, [src+vec_i*PS] + vmovdqu xgft_lo, [tmp] ;Load array Cx{00}, Cx{01}, ..., Cx{0f} + vmovdqu xgft_hi, [tmp+16] ; " Cx{00}, Cx{10}, ..., Cx{f0} + XLDR x0, [ptr+pos] ;Get next source vector + + add tmp, 32 + add vec_i, 1 + + vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + vpshufb xgft_hi, xgft_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft_lo, xgft_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft_hi, xgft_hi, xgft_lo ;GF add high and low partials + vpxor xp, xp, xgft_hi ;xp += partial + + SLDR vec, vec_m + cmp vec_i, vec + jl .next_vect + + SLDR dest, dest_m + XSTR [dest+pos], xp + + add pos, 16 ;Loop on 16 bytes at a time + SLDR len, len_m + cmp pos, len + jle .loop16 + + lea tmp, [len + 16] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, len ;Overlapped offset length-16 + jmp .loop16 ;Do one more overlap pass + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame + +section .data + +align 16 + +mask0f: +dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f + +;;; func core, ver, snum +slversion gf_vect_dot_prod_avx, 02, 05, 0061 diff --git a/src/isa-l/erasure_code/gf_vect_dot_prod_avx2.asm b/src/isa-l/erasure_code/gf_vect_dot_prod_avx2.asm new file mode 100644 index 000000000..f84dd4756 --- /dev/null +++ b/src/isa-l/erasure_code/gf_vect_dot_prod_avx2.asm @@ -0,0 +1,280 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_vect_dot_prod_avx2(len, vec, *g_tbls, **buffs, *dest); +;;; + +%include "reg_sizes.asm" + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + + %define tmp r11 + %define tmp.w r11d + %define tmp.b r11b + %define tmp2 r10 + %define tmp3 r9 + %define return rax + %macro SLDR 2 + %endmacro + %define SSTR SLDR + %define PS 8 + %define func(x) x: + %define FUNC_SAVE + %define FUNC_RESTORE +%endif + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + + %define arg4 r12 ; must be saved and loaded + %define tmp r11 + %define tmp.w r11d + %define tmp.b r11b + %define tmp2 r10 + %define tmp3 rdi ; must be saved and loaded + %define return rax + %macro SLDR 2 + %endmacro + %define SSTR SLDR + %define PS 8 + %define frame_size 2*8 + %define arg(x) [rsp + frame_size + PS + PS*x] + + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + rex_push_reg r12 + push_reg rdi + end_prolog + mov arg4, arg(4) + %endmacro + + %macro FUNC_RESTORE 0 + pop rdi + pop r12 + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, elf32 + +;;;================== High Address; +;;; arg4 +;;; arg3 +;;; arg2 +;;; arg1 +;;; arg0 +;;; return +;;;<================= esp of caller +;;; ebp +;;;<================= ebp = esp +;;; esi +;;; edi +;;; ebx +;;;<================= esp of callee +;;; +;;;================== Low Address; + + %define PS 4 + %define LOG_PS 2 + %define func(x) x: + %define arg(x) [ebp + PS*2 + PS*x] + + %define trans ecx ;trans is for the variables in stack + %define arg0 trans + %define arg0_m arg(0) + %define arg1 trans + %define arg1_m arg(1) + %define arg2 arg2_m + %define arg2_m arg(2) + %define arg3 ebx + %define arg4 trans + %define arg4_m arg(4) + %define tmp edx + %define tmp.w edx + %define tmp.b dl + %define tmp2 edi + %define tmp3 esi + %define return eax + %macro SLDR 2 ;stack load/restore + mov %1, %2 + %endmacro + %define SSTR SLDR + + %macro FUNC_SAVE 0 + push ebp + mov ebp, esp + push esi + push edi + push ebx + mov arg3, arg(3) + %endmacro + + %macro FUNC_RESTORE 0 + pop ebx + pop edi + pop esi + mov esp, ebp + pop ebp + %endmacro + +%endif ; output formats + +%define len arg0 +%define vec arg1 +%define mul_array arg2 +%define src arg3 +%define dest arg4 + +%define vec_i tmp2 +%define ptr tmp3 +%define pos return + +%ifidn PS,4 ;32-bit code + %define vec_m arg1_m + %define len_m arg0_m + %define dest_m arg4_m +%endif + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu + %define XSTR vmovdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + +%ifidn PS,8 ;64-bit code + default rel + [bits 64] +%endif + +section .text + +%define xmask0f ymm3 +%define xmask0fx xmm3 +%define xgft_lo ymm4 +%define xgft_hi ymm5 + +%define x0 ymm0 +%define xtmpa ymm1 +%define xp ymm2 + +align 16 +global gf_vect_dot_prod_avx2:ISAL_SYM_TYPE_FUNCTION +func(gf_vect_dot_prod_avx2) + FUNC_SAVE + SLDR len, len_m + sub len, 32 + SSTR len_m, len + jl .return_fail + xor pos, pos + mov tmp.b, 0x0f + vpinsrb xmask0fx, xmask0fx, tmp.w, 0 + vpbroadcastb xmask0f, xmask0fx ;Construct mask 0x0f0f0f... + +.loop32: + vpxor xp, xp + mov tmp, mul_array + xor vec_i, vec_i + +.next_vect: + + mov ptr, [src+vec_i*PS] + + vmovdqu xgft_lo, [tmp] ;Load array Cx{00}, Cx{01}, Cx{02}, ... + ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0} + vperm2i128 xgft_hi, xgft_lo, xgft_lo, 0x11 ; swapped to hi | hi + vperm2i128 xgft_lo, xgft_lo, xgft_lo, 0x00 ; swapped to lo | lo + + XLDR x0, [ptr+pos] ;Get next source vector + + add tmp, 32 + add vec_i, 1 + + vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + vpshufb xgft_hi, xgft_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft_lo, xgft_lo, xtmpa ;Lookup mul table of low nibble + vpxor xgft_hi, xgft_hi, xgft_lo ;GF add high and low partials + vpxor xp, xp, xgft_hi ;xp += partial + + SLDR vec, vec_m + cmp vec_i, vec + jl .next_vect + + SLDR dest, dest_m + XSTR [dest+pos], xp + + add pos, 32 ;Loop on 32 bytes at a time + SLDR len, len_m + cmp pos, len + jle .loop32 + + lea tmp, [len + 32] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, len ;Overlapped offset length-32 + jmp .loop32 ;Do one more overlap pass + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame + +section .data + +;;; func core, ver, snum +slversion gf_vect_dot_prod_avx2, 04, 05, 0190 diff --git a/src/isa-l/erasure_code/gf_vect_dot_prod_avx512.asm b/src/isa-l/erasure_code/gf_vect_dot_prod_avx512.asm new file mode 100644 index 000000000..ad01fcf89 --- /dev/null +++ b/src/isa-l/erasure_code/gf_vect_dot_prod_avx512.asm @@ -0,0 +1,240 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_vect_dot_prod_avx512(len, vec, *g_tbls, **buffs, *dest); +;;; + +%include "reg_sizes.asm" + +%ifdef HAVE_AS_KNOWS_AVX512 + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + + %define tmp r11 + %define tmp2 r10 + %define return rax + %define PS 8 + %define LOG_PS 3 + + %define func(x) x: + %define FUNC_SAVE + %define FUNC_RESTORE +%endif + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + + %define arg4 r12 ; must be saved, loaded and restored + %define arg5 r15 ; must be saved and restored + %define tmp r11 + %define tmp2 r10 + %define return rax + %define PS 8 + %define LOG_PS 3 + %define stack_size 0*16 + 3*8 ; must be an odd multiple of 8 + %define arg(x) [rsp + stack_size + PS + PS*x] + + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size + save_reg r12, 9*16 + 0*8 + save_reg r15, 9*16 + 3*8 + end_prolog + mov arg4, arg(4) + %endmacro + + %macro FUNC_RESTORE 0 + mov r12, [rsp + 9*16 + 0*8] + mov r15, [rsp + 9*16 + 3*8] + add rsp, stack_size + %endmacro +%endif + + +%define len arg0 +%define vec arg1 +%define mul_array arg2 +%define src arg3 +%define dest1 arg4 +%define ptr arg5 +%define vec_i tmp2 +%define pos return + + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu8 + %define XSTR vmovdqu8 +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + +%define xmask0f zmm5 +%define xgft1_lo zmm4 +%define xgft1_loy ymm4 +%define xgft1_hi zmm3 +%define x0 zmm0 +%define xgft1_loy ymm4 +%define x0y ymm0 +%define xtmpa zmm1 +%define xp1 zmm2 +%define xp1y ymm2 + +default rel +[bits 64] +section .text + +align 16 +global gf_vect_dot_prod_avx512:ISAL_SYM_TYPE_FUNCTION +func(gf_vect_dot_prod_avx512) + FUNC_SAVE + xor pos, pos + mov tmp, 0x0f + vpbroadcastb xmask0f, tmp ;Construct mask 0x0f0f0f... + sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS + sub len, 64 + jl .len_lt_64 + +.loop64: + vpxorq xp1, xp1, xp1 + mov tmp, mul_array + xor vec_i, vec_i + +.next_vect: + mov ptr, [src+vec_i] + XLDR x0, [ptr+pos] ;Get next source vector + add vec_i, PS + + vpandq xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpandq x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + vmovdqu8 xgft1_loy, [tmp] ;Load array Ax{00}..{0f}, Ax{00}..{f0} + add tmp, 32 + + vshufi64x2 xgft1_hi, xgft1_lo, xgft1_lo, 0x55 + vshufi64x2 xgft1_lo, xgft1_lo, xgft1_lo, 0x00 + + vpshufb xgft1_hi, xgft1_hi, x0 ;Lookup mul table of high nibble + vpshufb xgft1_lo, xgft1_lo, xtmpa ;Lookup mul table of low nibble + vpxorq xgft1_hi, xgft1_hi, xgft1_lo ;GF add high and low partials + vpxorq xp1, xp1, xgft1_hi ;xp1 += partial + + cmp vec_i, vec + jl .next_vect + + XSTR [dest1+pos], xp1 + + add pos, 64 ;Loop on 64 bytes at a time + cmp pos, len + jle .loop64 + + lea tmp, [len + 64] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, len ;Overlapped offset length-64 + jmp .loop64 ;Do one more overlap pass + + +.len_lt_64: ; 32-byte version + add len, 32 + jl .return_fail + +.loop32: + vpxorq xp1, xp1, xp1 + mov tmp, mul_array + xor vec_i, vec_i + +.next_vect2: + mov ptr, [src+vec_i] + XLDR x0y, [ptr+pos] ;Get next source vector 32B + add vec_i, PS + vpsraw xtmpa, x0, 4 ;Shift to put high nibble into bits 4-0 + vshufi64x2 x0, x0, xtmpa, 0x44 ;put x0 = xl:xh + vpandq x0, x0, xmask0f ;Mask bits 4-0 + vmovdqu8 xgft1_loy, [tmp] ;Load array Ax{00}..{0f}, Ax{00}..{f0} + add tmp, 32 + vshufi64x2 xgft1_lo, xgft1_lo, xgft1_lo, 0x50 ;=AlAh:AlAh + vpshufb xgft1_lo, xgft1_lo, x0 ;Lookup mul table + vshufi64x2 xgft1_hi, xgft1_lo, xgft1_lo, 0x0e ;=xh: + vpxorq xgft1_hi, xgft1_hi, xgft1_lo ;GF add high and low partials + vpxorq xp1, xp1, xgft1_hi ;xp1 += partial + cmp vec_i, vec + jl .next_vect2 + + XSTR [dest1+pos], xp1y + add pos, 32 ;Loop on 32 bytes at a time + cmp pos, len + jle .loop32 + + lea tmp, [len + 32] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, len ;Overlapped offset length-32 + jmp .loop32 ;Do one more overlap pass + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame + +%else +%ifidn __OUTPUT_FORMAT__, win64 +global no_gf_vect_dot_prod_avx512 +no_gf_vect_dot_prod_avx512: +%endif +%endif ; ifdef HAVE_AS_KNOWS_AVX512 diff --git a/src/isa-l/erasure_code/gf_vect_dot_prod_base_test.c b/src/isa-l/erasure_code/gf_vect_dot_prod_base_test.c new file mode 100644 index 000000000..2b4dfbbe0 --- /dev/null +++ b/src/isa-l/erasure_code/gf_vect_dot_prod_base_test.c @@ -0,0 +1,290 @@ +/********************************************************************** + Copyright(c) 2011-2015 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include // for memset, memcmp +#include "erasure_code.h" +#include "types.h" + +#define TEST_LEN 8192 +#define TEST_SIZE (TEST_LEN/2) + +#ifndef TEST_SOURCES +# define TEST_SOURCES 250 +#endif +#ifndef RANDOMS +# define RANDOMS 20 +#endif + +#define MMAX TEST_SOURCES +#define KMAX TEST_SOURCES + +typedef unsigned char u8; + +void dump(unsigned char *buf, int len) +{ + int i; + for (i = 0; i < len;) { + printf(" %2x", 0xff & buf[i++]); + if (i % 32 == 0) + printf("\n"); + } + printf("\n"); +} + +void dump_matrix(unsigned char **s, int k, int m) +{ + int i, j; + for (i = 0; i < k; i++) { + for (j = 0; j < m; j++) { + printf(" %2x", s[i][j]); + } + printf("\n"); + } + printf("\n"); +} + +void dump_u8xu8(unsigned char *s, int k, int m) +{ + int i, j; + for (i = 0; i < k; i++) { + for (j = 0; j < m; j++) { + printf(" %2x", 0xff & s[j + (i * m)]); + } + printf("\n"); + } + printf("\n"); +} + +int main(int argc, char *argv[]) +{ + int i, j, rtest, m, k, nerrs, r, err; + void *buf; + u8 g[TEST_SOURCES], g_tbls[TEST_SOURCES * 32], src_in_err[TEST_SOURCES]; + u8 *dest, *dest_ref, *temp_buff, *buffs[TEST_SOURCES]; + u8 a[MMAX * KMAX], b[MMAX * KMAX], d[MMAX * KMAX]; + u8 src_err_list[TEST_SOURCES], *recov[TEST_SOURCES]; + + printf("gf_vect_dot_prod_base: %dx%d ", TEST_SOURCES, TEST_LEN); + + // Allocate the arrays + for (i = 0; i < TEST_SOURCES; i++) { + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + buffs[i] = buf; + } + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest_ref = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + temp_buff = buf; + + // Init + for (i = 0; i < TEST_SOURCES; i++) + memset(buffs[i], 0, TEST_LEN); + + memset(dest, 0, TEST_LEN); + memset(temp_buff, 0, TEST_LEN); + memset(dest_ref, 0, TEST_LEN); + memset(g, 0, TEST_SOURCES); + + // Test erasure code using gf_vect_dot_prod + // Pick a first test + m = 9; + k = 5; + if (m > MMAX || k > KMAX) + return -1; + + gf_gen_cauchy1_matrix(a, m, k); + + // Make random data + for (i = 0; i < k; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + // Make parity vects + for (i = k; i < m; i++) { + for (j = 0; j < k; j++) + gf_vect_mul_init(a[k * i + j], &g_tbls[j * 32]); + + gf_vect_dot_prod_base(TEST_LEN, k, g_tbls, buffs, buffs[i]); + } + + // Random buffers in erasure + memset(src_in_err, 0, TEST_SOURCES); + for (i = 0, nerrs = 0; i < k && nerrs < m - k; i++) { + err = 1 & rand(); + src_in_err[i] = err; + if (err) + src_err_list[nerrs++] = i; + } + + // construct b by removing error rows + for (i = 0, r = 0; i < k; i++, r++) { + while (src_in_err[r]) { + r++; + continue; + } + for (j = 0; j < k; j++) + b[k * i + j] = a[k * r + j]; + } + + if (gf_invert_matrix((u8 *) b, (u8 *) d, k) < 0) + printf("BAD MATRIX\n"); + + for (i = 0, r = 0; i < k; i++, r++) { + while (src_in_err[r]) { + r++; + continue; + } + recov[i] = buffs[r]; + } + + // Recover data + for (i = 0; i < nerrs; i++) { + for (j = 0; j < k; j++) + gf_vect_mul_init(d[k * src_err_list[i] + j], &g_tbls[j * 32]); + + gf_vect_dot_prod_base(TEST_LEN, k, g_tbls, recov, temp_buff); + + if (0 != memcmp(temp_buff, buffs[src_err_list[i]], TEST_LEN)) { + printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs); + printf("recov %d:", src_err_list[i]); + dump(temp_buff, 25); + printf("orig :"); + dump(buffs[src_err_list[i]], 25); + return -1; + } + } + + // Do more random tests + + for (rtest = 0; rtest < RANDOMS; rtest++) { + while ((m = (rand() % MMAX)) < 2) ; + while ((k = (rand() % KMAX)) >= m || k < 1) ; + + if (m > MMAX || k > KMAX) + continue; + + gf_gen_cauchy1_matrix(a, m, k); + + // Make random data + for (i = 0; i < k; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + // Make parity vects + for (i = k; i < m; i++) { + for (j = 0; j < k; j++) + gf_vect_mul_init(a[k * i + j], &g_tbls[j * 32]); + + gf_vect_dot_prod_base(TEST_LEN, k, g_tbls, buffs, buffs[i]); + } + + // Random errors + memset(src_in_err, 0, TEST_SOURCES); + for (i = 0, nerrs = 0; i < k && nerrs < m - k; i++) { + err = 1 & rand(); + src_in_err[i] = err; + if (err) + src_err_list[nerrs++] = i; + } + if (nerrs == 0) { // should have at least one error + while ((err = (rand() % KMAX)) >= k) ; + src_err_list[nerrs++] = err; + src_in_err[err] = 1; + } + // construct b by removing error rows + for (i = 0, r = 0; i < k; i++, r++) { + while (src_in_err[r]) { + r++; + continue; + } + for (j = 0; j < k; j++) + b[k * i + j] = a[k * r + j]; + } + + if (gf_invert_matrix((u8 *) b, (u8 *) d, k) < 0) + printf("BAD MATRIX\n"); + + for (i = 0, r = 0; i < k; i++, r++) { + while (src_in_err[r]) { + r++; + continue; + } + recov[i] = buffs[r]; + } + + // Recover data + for (i = 0; i < nerrs; i++) { + for (j = 0; j < k; j++) + gf_vect_mul_init(d[k * src_err_list[i] + j], &g_tbls[j * 32]); + + gf_vect_dot_prod_base(TEST_LEN, k, g_tbls, recov, temp_buff); + + if (0 != memcmp(temp_buff, buffs[src_err_list[i]], TEST_LEN)) { + printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs); + printf(" - erase list = "); + for (i = 0; i < nerrs; i++) + printf(" %d", src_err_list[i]); + printf("\na:\n"); + dump_u8xu8((u8 *) a, m, k); + printf("inv b:\n"); + dump_u8xu8((u8 *) d, k, k); + printf("orig data:\n"); + dump_matrix(buffs, m, 25); + printf("orig :"); + dump(buffs[src_err_list[i]], 25); + printf("recov %d:", src_err_list[i]); + dump(temp_buff, 25); + return -1; + } + } + putchar('.'); + } + + printf("done all: Pass\n"); + return 0; +} diff --git a/src/isa-l/erasure_code/gf_vect_dot_prod_perf.c b/src/isa-l/erasure_code/gf_vect_dot_prod_perf.c new file mode 100644 index 000000000..bd2b555b0 --- /dev/null +++ b/src/isa-l/erasure_code/gf_vect_dot_prod_perf.c @@ -0,0 +1,174 @@ +/********************************************************************** + Copyright(c) 2011-2015 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include // for memset, memcmp +#include "erasure_code.h" +#include "test.h" + +#ifndef FUNCTION_UNDER_TEST +# define FUNCTION_UNDER_TEST gf_vect_dot_prod +#endif + +#define str(s) #s +#define xstr(s) str(s) + +//#define CACHED_TEST +#ifdef CACHED_TEST +// Cached test, loop many times over small dataset +# define TEST_SOURCES 10 +# define TEST_LEN 8*1024 +# define TEST_TYPE_STR "_warm" +#else +# ifndef TEST_CUSTOM +// Uncached test. Pull from large mem base. +# define TEST_SOURCES 10 +# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */ +# define TEST_LEN ((GT_L3_CACHE / TEST_SOURCES) & ~(64-1)) +# define TEST_TYPE_STR "_cold" +# else +# define TEST_TYPE_STR "_cus" +# endif +#endif + +typedef unsigned char u8; + +void dump(unsigned char *buf, int len) +{ + int i; + for (i = 0; i < len;) { + printf(" %2x", 0xff & buf[i++]); + if (i % 32 == 0) + printf("\n"); + } + printf("\n"); +} + +void dump_matrix(unsigned char **s, int k, int m) +{ + int i, j; + for (i = 0; i < k; i++) { + for (j = 0; j < m; j++) { + printf(" %2x", s[i][j]); + } + printf("\n"); + } + printf("\n"); +} + +void vect_dot_prod_perf(void (*fun_ptr) + (int, int, unsigned char *, unsigned char **, unsigned char *), + u8 * g, u8 * g_tbls, u8 ** buffs, u8 * dest_ref) +{ + int j; + for (j = 0; j < TEST_SOURCES; j++) + gf_vect_mul_init(g[j], &g_tbls[j * 32]); + + (*fun_ptr) (TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref); +} + +int main(int argc, char *argv[]) +{ + int i, j; + void *buf; + u8 g[TEST_SOURCES], g_tbls[TEST_SOURCES * 32], *dest, *dest_ref; + u8 *temp_buff, *buffs[TEST_SOURCES]; + struct perf start; + + printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d\n", TEST_SOURCES, TEST_LEN); + + // Allocate the arrays + for (i = 0; i < TEST_SOURCES; i++) { + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + buffs[i] = buf; + } + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest_ref = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + temp_buff = buf; + + // Performance test + for (i = 0; i < TEST_SOURCES; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + memset(dest, 0, TEST_LEN); + memset(temp_buff, 0, TEST_LEN); + memset(dest_ref, 0, TEST_LEN); + memset(g, 0, TEST_SOURCES); + + for (i = 0; i < TEST_SOURCES; i++) + g[i] = rand(); + +#ifdef DO_REF_PERF + BENCHMARK(&start, BENCHMARK_TIME, + vect_dot_prod_perf(&gf_vect_dot_prod_base, g, g_tbls, buffs, dest_ref) + ); + printf("gf_vect_dot_prod_base" TEST_TYPE_STR ": "); + perf_print(start, (long long)TEST_LEN * (TEST_SOURCES + 1)); +#else + vect_dot_prod_perf(&gf_vect_dot_prod_base, g, g_tbls, buffs, dest_ref); +#endif + + BENCHMARK(&start, BENCHMARK_TIME, + vect_dot_prod_perf(&FUNCTION_UNDER_TEST, g, g_tbls, buffs, dest)); + printf(xstr(FUNCTION_UNDER_TEST) TEST_TYPE_STR ": "); + perf_print(start, (long long)TEST_LEN * (TEST_SOURCES + 1)); + + if (0 != memcmp(dest_ref, dest, TEST_LEN)) { + printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test\n"); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref, 25); + printf("dprod:"); + dump(dest, 25); + return -1; + } + + printf("pass perf check\n"); + return 0; +} diff --git a/src/isa-l/erasure_code/gf_vect_dot_prod_sse.asm b/src/isa-l/erasure_code/gf_vect_dot_prod_sse.asm new file mode 100644 index 000000000..108fa36a4 --- /dev/null +++ b/src/isa-l/erasure_code/gf_vect_dot_prod_sse.asm @@ -0,0 +1,271 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_vect_dot_prod_sse(len, vec, *g_tbls, **buffs, *dest); +;;; + +%include "reg_sizes.asm" + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + + %define tmp r11 + %define tmp2 r10 + %define tmp3 r9 + %define return rax + %macro SLDR 2 + %endmacro + %define SSTR SLDR + %define PS 8 + %define func(x) x: + %define FUNC_SAVE + %define FUNC_RESTORE +%endif + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + + %define arg4 r12 ; must be saved and loaded + %define tmp r11 + %define tmp2 r10 + %define tmp3 rdi ; must be saved and loaded + %define return rax + %macro SLDR 2 + %endmacro + %define SSTR SLDR + %define PS 8 + %define frame_size 2*8 + %define arg(x) [rsp + frame_size + PS + PS*x] + + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + rex_push_reg r12 + push_reg rdi + end_prolog + mov arg4, arg(4) + %endmacro + + %macro FUNC_RESTORE 0 + pop rdi + pop r12 + %endmacro +%endif + +%ifidn __OUTPUT_FORMAT__, elf32 + +;;;================== High Address; +;;; arg4 +;;; arg3 +;;; arg2 +;;; arg1 +;;; arg0 +;;; return +;;;<================= esp of caller +;;; ebp +;;;<================= ebp = esp +;;; esi +;;; edi +;;; ebx +;;;<================= esp of callee +;;; +;;;================== Low Address; + + %define PS 4 + %define LOG_PS 2 + %define func(x) x: + %define arg(x) [ebp + PS*2 + PS*x] + + %define trans ecx ;trans is for the variables in stack + %define arg0 trans + %define arg0_m arg(0) + %define arg1 trans + %define arg1_m arg(1) + %define arg2 arg2_m + %define arg2_m arg(2) + %define arg3 ebx + %define arg4 trans + %define arg4_m arg(4) + %define tmp edx + %define tmp2 edi + %define tmp3 esi + %define return eax + %macro SLDR 2 ;; stack load/restore + mov %1, %2 + %endmacro + %define SSTR SLDR + + %macro FUNC_SAVE 0 + push ebp + mov ebp, esp + push esi + push edi + push ebx + mov arg3, arg(3) + %endmacro + + %macro FUNC_RESTORE 0 + pop ebx + pop edi + pop esi + mov esp, ebp + pop ebp + %endmacro + +%endif ; output formats + +%define len arg0 +%define vec arg1 +%define mul_array arg2 +%define src arg3 +%define dest arg4 + +%define vec_i tmp2 +%define ptr tmp3 +%define pos return + + %ifidn PS,4 ;32-bit code + %define vec_m arg1_m + %define len_m arg0_m + %define dest_m arg4_m + %endif + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR movdqu + %define XSTR movdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR movdqa + %define XSTR movdqa + %else + %define XLDR movntdqa + %define XSTR movntdq + %endif +%endif + +%ifidn PS,8 ;64-bit code + default rel + [bits 64] +%endif + +section .text + +%define xmask0f xmm5 +%define xgft_lo xmm4 +%define xgft_hi xmm3 + +%define x0 xmm0 +%define xtmpa xmm1 +%define xp xmm2 + +align 16 +global gf_vect_dot_prod_sse:ISAL_SYM_TYPE_FUNCTION +func(gf_vect_dot_prod_sse) + FUNC_SAVE + SLDR len, len_m + sub len, 16 + SSTR len_m, len + jl .return_fail + xor pos, pos + movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte + +.loop16: + pxor xp, xp + mov tmp, mul_array + xor vec_i, vec_i + +.next_vect: + + mov ptr, [src+vec_i*PS] + movdqu xgft_lo, [tmp] ;Load array Cx{00}, Cx{01}, ..., Cx{0f} + movdqu xgft_hi, [tmp+16] ; " Cx{00}, Cx{10}, ..., Cx{f0} + XLDR x0, [ptr+pos] ;Get next source vector + + add tmp, 32 + add vec_i, 1 + + movdqa xtmpa, x0 ;Keep unshifted copy of src + psraw x0, 4 ;Shift to put high nibble into bits 4-0 + pand x0, xmask0f ;Mask high src nibble in bits 4-0 + pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0 + + pshufb xgft_hi, x0 ;Lookup mul table of high nibble + pshufb xgft_lo, xtmpa ;Lookup mul table of low nibble + pxor xgft_hi, xgft_lo ;GF add high and low partials + pxor xp, xgft_hi ;xp += partial + + SLDR vec, vec_m + cmp vec_i, vec + jl .next_vect + + SLDR dest, dest_m + XSTR [dest+pos], xp + + add pos, 16 ;Loop on 16 bytes at a time + SLDR len, len_m + cmp pos, len + jle .loop16 + + lea tmp, [len + 16] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, len ;Overlapped offset length-16 + jmp .loop16 ;Do one more overlap pass + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame + +section .data + +align 16 + +mask0f: dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f + +;;; func core, ver, snum +slversion gf_vect_dot_prod_sse, 00, 05, 0060 diff --git a/src/isa-l/erasure_code/gf_vect_dot_prod_test.c b/src/isa-l/erasure_code/gf_vect_dot_prod_test.c new file mode 100644 index 000000000..1c0232cca --- /dev/null +++ b/src/isa-l/erasure_code/gf_vect_dot_prod_test.c @@ -0,0 +1,525 @@ +/********************************************************************** + Copyright(c) 2011-2015 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include // for memset, memcmp +#include "erasure_code.h" +#include "types.h" + +#ifndef FUNCTION_UNDER_TEST +# define FUNCTION_UNDER_TEST gf_vect_dot_prod +#endif +#ifndef TEST_MIN_SIZE +# define TEST_MIN_SIZE 32 +#endif + +#define str(s) #s +#define xstr(s) str(s) + +#define TEST_LEN 8192 +#define TEST_SIZE (TEST_LEN/2) + +#ifndef TEST_SOURCES +# define TEST_SOURCES 16 +#endif +#ifndef RANDOMS +# define RANDOMS 20 +#endif + +#define MMAX TEST_SOURCES +#define KMAX TEST_SOURCES + +#ifdef EC_ALIGNED_ADDR +// Define power of 2 range to check ptr, len alignment +# define PTR_ALIGN_CHK_B 0 +# define LEN_ALIGN_CHK_B 0 // 0 for aligned only +#else +// Define power of 2 range to check ptr, len alignment +# define PTR_ALIGN_CHK_B 32 +# define LEN_ALIGN_CHK_B 32 // 0 for aligned only +#endif + +typedef unsigned char u8; + +void dump(unsigned char *buf, int len) +{ + int i; + for (i = 0; i < len;) { + printf(" %2x", 0xff & buf[i++]); + if (i % 32 == 0) + printf("\n"); + } + printf("\n"); +} + +void dump_matrix(unsigned char **s, int k, int m) +{ + int i, j; + for (i = 0; i < k; i++) { + for (j = 0; j < m; j++) { + printf(" %2x", s[i][j]); + } + printf("\n"); + } + printf("\n"); +} + +void dump_u8xu8(unsigned char *s, int k, int m) +{ + int i, j; + for (i = 0; i < k; i++) { + for (j = 0; j < m; j++) { + printf(" %2x", 0xff & s[j + (i * m)]); + } + printf("\n"); + } + printf("\n"); +} + +int main(int argc, char *argv[]) +{ + int i, j, rtest, srcs, m, k, nerrs, r, err; + void *buf; + u8 g[TEST_SOURCES], g_tbls[TEST_SOURCES * 32], src_in_err[TEST_SOURCES]; + u8 *dest, *dest_ref, *temp_buff, *buffs[TEST_SOURCES]; + u8 a[MMAX * KMAX], b[MMAX * KMAX], d[MMAX * KMAX]; + u8 src_err_list[TEST_SOURCES], *recov[TEST_SOURCES]; + + int align, size; + unsigned char *efence_buffs[TEST_SOURCES]; + unsigned int offset; + u8 *ubuffs[TEST_SOURCES]; + u8 *udest_ptr; + + printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d ", TEST_SOURCES, TEST_LEN); + + // Allocate the arrays + for (i = 0; i < TEST_SOURCES; i++) { + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + buffs[i] = buf; + } + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest_ref = buf; + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + temp_buff = buf; + + // Test of all zeros + for (i = 0; i < TEST_SOURCES; i++) + memset(buffs[i], 0, TEST_LEN); + + memset(dest, 0, TEST_LEN); + memset(temp_buff, 0, TEST_LEN); + memset(dest_ref, 0, TEST_LEN); + memset(g, 0, TEST_SOURCES); + + for (i = 0; i < TEST_SOURCES; i++) + gf_vect_mul_init(g[i], &g_tbls[i * 32]); + + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref); + + FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest); + + if (0 != memcmp(dest_ref, dest, TEST_LEN)) { + printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " \n"); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref, 25); + printf("dprod:"); + dump(dest, 25); + return -1; + } else + putchar('.'); + + // Rand data test + for (rtest = 0; rtest < RANDOMS; rtest++) { + for (i = 0; i < TEST_SOURCES; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + for (i = 0; i < TEST_SOURCES; i++) + g[i] = rand(); + + for (i = 0; i < TEST_SOURCES; i++) + gf_vect_mul_init(g[i], &g_tbls[i * 32]); + + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref); + FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest); + + if (0 != memcmp(dest_ref, dest, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " 1\n"); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref, 25); + printf("dprod:"); + dump(dest, 25); + return -1; + } + + putchar('.'); + } + + // Rand data test with varied parameters + for (rtest = 0; rtest < RANDOMS; rtest++) { + for (srcs = TEST_SOURCES; srcs > 0; srcs--) { + for (i = 0; i < srcs; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + for (i = 0; i < srcs; i++) + g[i] = rand(); + + for (i = 0; i < srcs; i++) + gf_vect_mul_init(g[i], &g_tbls[i * 32]); + + gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[0], buffs, dest_ref); + FUNCTION_UNDER_TEST(TEST_LEN, srcs, g_tbls, buffs, dest); + + if (0 != memcmp(dest_ref, dest, TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test 2\n"); + dump_matrix(buffs, 5, srcs); + printf("dprod_base:"); + dump(dest_ref, 5); + printf("dprod:"); + dump(dest, 5); + return -1; + } + + putchar('.'); + } + } + + // Test erasure code using gf_vect_dot_prod + + // Pick a first test + m = 9; + k = 5; + if (m > MMAX || k > KMAX) + return -1; + + gf_gen_rs_matrix(a, m, k); + + // Make random data + for (i = 0; i < k; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + // Make parity vects + for (i = k; i < m; i++) { + for (j = 0; j < k; j++) + gf_vect_mul_init(a[k * i + j], &g_tbls[j * 32]); +#ifndef USEREF + FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, buffs, buffs[i]); +#else + gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], buffs, buffs[i]); +#endif + } + + // Random buffers in erasure + memset(src_in_err, 0, TEST_SOURCES); + for (i = 0, nerrs = 0; i < k && nerrs < m - k; i++) { + err = 1 & rand(); + src_in_err[i] = err; + if (err) + src_err_list[nerrs++] = i; + } + + // construct b by removing error rows + for (i = 0, r = 0; i < k; i++, r++) { + while (src_in_err[r]) { + r++; + continue; + } + for (j = 0; j < k; j++) + b[k * i + j] = a[k * r + j]; + } + + if (gf_invert_matrix((u8 *) b, (u8 *) d, k) < 0) + printf("BAD MATRIX\n"); + + for (i = 0, r = 0; i < k; i++, r++) { + while (src_in_err[r]) { + r++; + continue; + } + recov[i] = buffs[r]; + } + + // Recover data + for (i = 0; i < nerrs; i++) { + for (j = 0; j < k; j++) + gf_vect_mul_init(d[k * src_err_list[i] + j], &g_tbls[j * 32]); +#ifndef USEREF + FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, recov, temp_buff); +#else + gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], recov, temp_buff); +#endif + + if (0 != memcmp(temp_buff, buffs[src_err_list[i]], TEST_LEN)) { + printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs); + printf("recov %d:", src_err_list[i]); + dump(temp_buff, 25); + printf("orig :"); + dump(buffs[src_err_list[i]], 25); + return -1; + } + } + + // Do more random tests + + for (rtest = 0; rtest < RANDOMS; rtest++) { + while ((m = (rand() % MMAX)) < 2) ; + while ((k = (rand() % KMAX)) >= m || k < 1) ; + + if (m > MMAX || k > KMAX) + continue; + + gf_gen_rs_matrix(a, m, k); + + // Make random data + for (i = 0; i < k; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + // Make parity vects + for (i = k; i < m; i++) { + for (j = 0; j < k; j++) + gf_vect_mul_init(a[k * i + j], &g_tbls[j * 32]); +#ifndef USEREF + FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, buffs, buffs[i]); +#else + gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], buffs, buffs[i]); +#endif + } + + // Random errors + memset(src_in_err, 0, TEST_SOURCES); + for (i = 0, nerrs = 0; i < k && nerrs < m - k; i++) { + err = 1 & rand(); + src_in_err[i] = err; + if (err) + src_err_list[nerrs++] = i; + } + if (nerrs == 0) { // should have at least one error + while ((err = (rand() % KMAX)) >= k) ; + src_err_list[nerrs++] = err; + src_in_err[err] = 1; + } + // construct b by removing error rows + for (i = 0, r = 0; i < k; i++, r++) { + while (src_in_err[r]) { + r++; + continue; + } + for (j = 0; j < k; j++) + b[k * i + j] = a[k * r + j]; + } + + if (gf_invert_matrix((u8 *) b, (u8 *) d, k) < 0) + printf("BAD MATRIX\n"); + + for (i = 0, r = 0; i < k; i++, r++) { + while (src_in_err[r]) { + r++; + continue; + } + recov[i] = buffs[r]; + } + + // Recover data + for (i = 0; i < nerrs; i++) { + for (j = 0; j < k; j++) + gf_vect_mul_init(d[k * src_err_list[i] + j], &g_tbls[j * 32]); +#ifndef USEREF + FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, recov, temp_buff); +#else + gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], recov, temp_buff); +#endif + if (0 != memcmp(temp_buff, buffs[src_err_list[i]], TEST_LEN)) { + printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs); + printf(" - erase list = "); + for (i = 0; i < nerrs; i++) + printf(" %d", src_err_list[i]); + printf("\na:\n"); + dump_u8xu8((u8 *) a, m, k); + printf("inv b:\n"); + dump_u8xu8((u8 *) d, k, k); + printf("orig data:\n"); + dump_matrix(buffs, m, 25); + printf("orig :"); + dump(buffs[src_err_list[i]], 25); + printf("recov %d:", src_err_list[i]); + dump(temp_buff, 25); + return -1; + } + } + putchar('.'); + } + + // Run tests at end of buffer for Electric Fence + align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16; + for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) { + for (i = 0; i < TEST_SOURCES; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + for (i = 0; i < TEST_SOURCES; i++) // Line up TEST_SIZE from end + efence_buffs[i] = buffs[i] + TEST_LEN - size; + + for (i = 0; i < TEST_SOURCES; i++) + g[i] = rand(); + + for (i = 0; i < TEST_SOURCES; i++) + gf_vect_mul_init(g[i], &g_tbls[i * 32]); + + gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[0], efence_buffs, dest_ref); + FUNCTION_UNDER_TEST(size, TEST_SOURCES, g_tbls, efence_buffs, dest); + + if (0 != memcmp(dest_ref, dest, size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test 3\n"); + dump_matrix(efence_buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref, align); + printf("dprod:"); + dump(dest, align); + return -1; + } + + putchar('.'); + } + + // Test rand ptr alignment if available + + for (rtest = 0; rtest < RANDOMS; rtest++) { + size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1); + srcs = rand() % TEST_SOURCES; + if (srcs == 0) + continue; + + offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B; + // Add random offsets + for (i = 0; i < srcs; i++) + ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset)); + + udest_ptr = dest + (rand() & (PTR_ALIGN_CHK_B - offset)); + + memset(dest, 0, TEST_LEN); // zero pad to check write-over + + for (i = 0; i < srcs; i++) + for (j = 0; j < size; j++) + ubuffs[i][j] = rand(); + + for (i = 0; i < srcs; i++) + g[i] = rand(); + + for (i = 0; i < srcs; i++) + gf_vect_mul_init(g[i], &g_tbls[i * 32]); + + gf_vect_dot_prod_base(size, srcs, &g_tbls[0], ubuffs, dest_ref); + + FUNCTION_UNDER_TEST(size, srcs, g_tbls, ubuffs, udest_ptr); + + if (memcmp(dest_ref, udest_ptr, size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " ualign srcs=%d\n", + srcs); + dump_matrix(ubuffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref, 25); + printf("dprod:"); + dump(udest_ptr, 25); + return -1; + } + // Confirm that padding around dests is unchanged + memset(dest_ref, 0, PTR_ALIGN_CHK_B); // Make reference zero buff + offset = udest_ptr - dest; + + if (memcmp(dest, dest_ref, offset)) { + printf("Fail rand ualign pad start\n"); + return -1; + } + if (memcmp(dest + offset + size, dest_ref, PTR_ALIGN_CHK_B - offset)) { + printf("Fail rand ualign pad end\n"); + return -1; + } + + putchar('.'); + } + + // Test all size alignment + align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16; + + for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) { + srcs = TEST_SOURCES; + + for (i = 0; i < srcs; i++) + for (j = 0; j < size; j++) + buffs[i][j] = rand(); + + for (i = 0; i < srcs; i++) + g[i] = rand(); + + for (i = 0; i < srcs; i++) + gf_vect_mul_init(g[i], &g_tbls[i * 32]); + + gf_vect_dot_prod_base(size, srcs, &g_tbls[0], buffs, dest_ref); + + FUNCTION_UNDER_TEST(size, srcs, g_tbls, buffs, dest); + + if (memcmp(dest_ref, dest, size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " ualign len=%d\n", + size); + dump_matrix(buffs, 5, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref, 25); + printf("dprod:"); + dump(dest, 25); + return -1; + } + } + + printf("done all: Pass\n"); + return 0; +} diff --git a/src/isa-l/erasure_code/gf_vect_mad_avx.asm b/src/isa-l/erasure_code/gf_vect_mad_avx.asm new file mode 100644 index 000000000..f444d113b --- /dev/null +++ b/src/isa-l/erasure_code/gf_vect_mad_avx.asm @@ -0,0 +1,196 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_vect_mad_avx(len, vec, vec_i, mul_array, src, dest); +;;; + +%include "reg_sizes.asm" + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg0.w ecx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + %define arg4 r12 + %define arg5 r15 + %define tmp r11 + %define return rax + %define return.w eax + %define PS 8 + %define stack_size 16*3 + 3*8 + %define arg(x) [rsp + stack_size + PS + PS*x] + %define func(x) proc_frame x + +%macro FUNC_SAVE 0 + sub rsp, stack_size + vmovdqa [rsp+16*0],xmm6 + vmovdqa [rsp+16*1],xmm7 + vmovdqa [rsp+16*2],xmm8 + save_reg r12, 3*16 + 0*8 + save_reg r15, 3*16 + 1*8 + end_prolog + mov arg4, arg(4) + mov arg5, arg(5) +%endmacro + +%macro FUNC_RESTORE 0 + vmovdqa xmm6, [rsp+16*0] + vmovdqa xmm7, [rsp+16*1] + vmovdqa xmm8, [rsp+16*2] + mov r12, [rsp + 3*16 + 0*8] + mov r15, [rsp + 3*16 + 1*8] + add rsp, stack_size +%endmacro + +%elifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg0.w edi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + %define tmp r11 + %define return rax + %define return.w eax + + %define func(x) x: + %define FUNC_SAVE + %define FUNC_RESTORE +%endif + +;;; gf_vect_mad_avx(len, vec, vec_i, mul_array, src, dest) +%define len arg0 +%define len.w arg0.w +%define vec arg1 +%define vec_i arg2 +%define mul_array arg3 +%define src arg4 +%define dest arg5 +%define pos return +%define pos.w return.w + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu + %define XSTR vmovdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + + +default rel + +[bits 64] +section .text + +%define xmask0f xmm8 +%define xgft_lo xmm7 +%define xgft_hi xmm6 + +%define x0 xmm0 +%define xtmpa xmm1 +%define xtmph xmm2 +%define xtmpl xmm3 +%define xd xmm4 +%define xtmpd xmm5 + +align 16 +global gf_vect_mad_avx:ISAL_SYM_TYPE_FUNCTION +func(gf_vect_mad_avx) + FUNC_SAVE + sub len, 16 + jl .return_fail + + xor pos, pos + vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte + + sal vec_i, 5 ;Multiply by 32 + vmovdqu xgft_lo, [vec_i+mul_array] ;Load array Cx{00}, Cx{01}, Cx{02}, ... + vmovdqu xgft_hi, [vec_i+mul_array+16] ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0} + + XLDR xtmpd, [dest+len] ;backup the last 16 bytes in dest + +.loop16: + XLDR xd, [dest+pos] ;Get next dest vector +.loop16_overlap: + XLDR x0, [src+pos] ;Get next source vector + + vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + vpshufb xtmph, xgft_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl, xgft_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph, xtmph, xtmpl ;GF add high and low partials + vpxor xd, xd, xtmph ;xd += partial + + XSTR [dest+pos], xd + add pos, 16 ;Loop on 16 bytes at a time + cmp pos, len + jle .loop16 + + lea tmp, [len + 16] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, len ;Overlapped offset length-16 + vmovdqa xd, xtmpd ;Restore xd + jmp .loop16_overlap ;Do one more overlap pass + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame + +section .data + +align 16 + +mask0f: dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f + +;;; func core, ver, snum +slversion gf_vect_mad_avx, 02, 01, 0201 diff --git a/src/isa-l/erasure_code/gf_vect_mad_avx2.asm b/src/isa-l/erasure_code/gf_vect_mad_avx2.asm new file mode 100644 index 000000000..b65d0aae3 --- /dev/null +++ b/src/isa-l/erasure_code/gf_vect_mad_avx2.asm @@ -0,0 +1,203 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_vect_mad_avx2(len, vec, vec_i, mul_array, src, dest); +;;; + +%include "reg_sizes.asm" + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg0.w ecx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + %define arg4 r12 ; must be saved and loaded + %define arg5 r15 + + %define tmp r11 + %define tmp.w r11d + %define tmp.b r11b + %define return rax + %define return.w eax + %define PS 8 + %define stack_size 16*3 + 3*8 + %define arg(x) [rsp + stack_size + PS + PS*x] + %define func(x) proc_frame x + + %macro FUNC_SAVE 0 + sub rsp, stack_size + vmovdqa [rsp+16*0],xmm6 + vmovdqa [rsp+16*1],xmm7 + vmovdqa [rsp+16*2],xmm8 + save_reg r12, 3*16 + 0*8 + save_reg r15, 3*16 + 1*8 + end_prolog + mov arg4, arg(4) + mov arg5, arg(5) + %endmacro + + %macro FUNC_RESTORE 0 + vmovdqa xmm6, [rsp+16*0] + vmovdqa xmm7, [rsp+16*1] + vmovdqa xmm8, [rsp+16*2] + mov r12, [rsp + 3*16 + 0*8] + mov r15, [rsp + 3*16 + 1*8] + add rsp, stack_size + %endmacro + +%elifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg0.w edi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + + %define tmp r11 + %define tmp.w r11d + %define tmp.b r11b + %define return rax + %define return.w eax + + %define func(x) x: + %define FUNC_SAVE + %define FUNC_RESTORE +%endif + + +;;; gf_vect_mad_avx2(len, vec, vec_i, mul_array, src, dest) +%define len arg0 +%define len.w arg0.w +%define vec arg1 +%define vec_i arg2 +%define mul_array arg3 +%define src arg4 +%define dest arg5 +%define pos return +%define pos.w return.w + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu + %define XSTR vmovdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + + +default rel + +[bits 64] +section .text + +%define xmask0f ymm8 +%define xmask0fx xmm8 +%define xgft_lo ymm7 +%define xgft_hi ymm6 + +%define x0 ymm0 +%define xtmpa ymm1 +%define xtmph ymm2 +%define xtmpl ymm3 +%define xd ymm4 +%define xtmpd ymm5 + +align 16 +global gf_vect_mad_avx2:ISAL_SYM_TYPE_FUNCTION +func(gf_vect_mad_avx2) + FUNC_SAVE + sub len, 32 + jl .return_fail + xor pos, pos + mov tmp.b, 0x0f + vpinsrb xmask0fx, xmask0fx, tmp.w, 0 + vpbroadcastb xmask0f, xmask0fx ;Construct mask 0x0f0f0f... + + sal vec_i, 5 ;Multiply by 32 + vmovdqu xgft_lo, [vec_i+mul_array] ;Load array Cx{00}, Cx{01}, Cx{02}, ... + ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0} + vperm2i128 xgft_hi, xgft_lo, xgft_lo, 0x11 ; swapped to hi | hi + vperm2i128 xgft_lo, xgft_lo, xgft_lo, 0x00 ; swapped to lo | lo + + XLDR xtmpd, [dest+len] ;backup the last 32 bytes in dest + +.loop32: + XLDR xd, [dest+pos] ;Get next dest vector +.loop32_overlap: + XLDR x0, [src+pos] ;Get next source vector + + vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + vpshufb xtmph, xgft_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl, xgft_lo, xtmpa ;Lookup mul table of low nibble + vpxor xtmph, xtmph, xtmpl ;GF add high and low partials + vpxor xd, xd, xtmph ;xd += partial + + XSTR [dest+pos], xd + add pos, 32 ;Loop on 32 bytes at a time + cmp pos, len + jle .loop32 + + lea tmp, [len + 32] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, len ;Overlapped offset length-32 + vmovdqa xd, xtmpd ;Restore xd + jmp .loop32_overlap ;Do one more overlap pass + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame + +section .data + +;;; func core, ver, snum +slversion gf_vect_mad_avx2, 04, 01, 0202 diff --git a/src/isa-l/erasure_code/gf_vect_mad_avx512.asm b/src/isa-l/erasure_code/gf_vect_mad_avx512.asm new file mode 100644 index 000000000..44fb653f5 --- /dev/null +++ b/src/isa-l/erasure_code/gf_vect_mad_avx512.asm @@ -0,0 +1,193 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_vect_mad_avx512(len, vec, vec_i, mul_array, src, dest); +;;; + +%include "reg_sizes.asm" + +%ifdef HAVE_AS_KNOWS_AVX512 + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + %define tmp r11 + %define return rax + %define func(x) x: + %define FUNC_SAVE + %define FUNC_RESTORE +%endif + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + %define arg4 r12 ; must be saved and loaded + %define arg5 r15 + %define tmp r11 + %define return rax + %define PS 8 + %define stack_size 16*3 + 3*8 + %define arg(x) [rsp + stack_size + PS + PS*x] + %define func(x) proc_frame x + + %macro FUNC_SAVE 0 + sub rsp, stack_size + vmovdqa [rsp+16*0],xmm6 + vmovdqa [rsp+16*1],xmm7 + vmovdqa [rsp+16*2],xmm8 + save_reg r12, 3*16 + 0*8 + save_reg r15, 3*16 + 1*8 + end_prolog + mov arg4, arg(4) + mov arg5, arg(5) + %endmacro + + %macro FUNC_RESTORE 0 + vmovdqa xmm6, [rsp+16*0] + vmovdqa xmm7, [rsp+16*1] + vmovdqa xmm8, [rsp+16*2] + mov r12, [rsp + 3*16 + 0*8] + mov r15, [rsp + 3*16 + 1*8] + add rsp, stack_size + %endmacro +%endif + +;;; gf_vect_mad_avx512(len, vec, vec_i, mul_array, src, dest) +%define len arg0 +%define vec arg1 +%define vec_i arg2 +%define mul_array arg3 +%define src arg4 +%define dest arg5 +%define pos return + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR vmovdqu8 + %define XSTR vmovdqu8 +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa + %else + %define XLDR vmovntdqa + %define XSTR vmovntdq + %endif +%endif + + +default rel + +[bits 64] +section .text + +%define x0 zmm0 +%define xtmpa zmm1 +%define xtmph zmm2 +%define xtmpl zmm3 +%define xd zmm4 +%define xtmpd zmm5 +%define xgft_hi zmm6 +%define xgft_lo zmm7 +%define xgft_loy ymm7 +%define xmask0f zmm8 + +align 16 +global gf_vect_mad_avx512:ISAL_SYM_TYPE_FUNCTION +func(gf_vect_mad_avx512) + FUNC_SAVE + sub len, 64 + jl .return_fail + xor pos, pos + mov tmp, 0x0f + vpbroadcastb xmask0f, tmp ;Construct mask 0x0f0f0f... + sal vec_i, 5 ;Multiply by 32 + vmovdqu8 xgft_loy, [vec_i+mul_array] ;Load array Cx{00}..{0f}, Cx{00}..{f0} + vshufi64x2 xgft_hi, xgft_lo, xgft_lo, 0x55 + vshufi64x2 xgft_lo, xgft_lo, xgft_lo, 0x00 + mov tmp, -1 + kmovq k1, tmp + +.loop64: + XLDR xd, [dest+pos] ;Get next dest vector + XLDR x0, [src+pos] ;Get next source vector + + vpandq xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpandq x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + + vpshufb xtmph {k1}{z}, xgft_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmpl {k1}{z}, xgft_lo, xtmpa ;Lookup mul table of low nibble + vpxorq xtmph, xtmph, xtmpl ;GF add high and low partials + vpxorq xd, xd, xtmph ;xd += partial + + XSTR [dest+pos], xd + add pos, 64 ;Loop on 64 bytes at a time + cmp pos, len + jle .loop64 + + lea tmp, [len + 64] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, (1 << 63) + lea tmp, [len + 64 - 1] + and tmp, 63 + sarx pos, pos, tmp + kmovq k1, pos + mov pos, len ;Overlapped offset length-64 + jmp .loop64 ;Do one more overlap pass + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame + +%else +%ifidn __OUTPUT_FORMAT__, win64 +global no_gf_vect_mad_avx512 +no_gf_vect_mad_avx512: +%endif +%endif ; ifdef HAVE_AS_KNOWS_AVX512 diff --git a/src/isa-l/erasure_code/gf_vect_mad_sse.asm b/src/isa-l/erasure_code/gf_vect_mad_sse.asm new file mode 100644 index 000000000..8d7e5eecd --- /dev/null +++ b/src/isa-l/erasure_code/gf_vect_mad_sse.asm @@ -0,0 +1,197 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_vect_mad_sse(len, vec, vec_i, mul_array, src, dest); +;;; + +%include "reg_sizes.asm" + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg0.w ecx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + %define arg4 r12 + %define arg5 r15 + %define tmp r11 + %define return rax + %define return.w eax + %define PS 8 + %define stack_size 16*3 + 3*8 + %define arg(x) [rsp + stack_size + PS + PS*x] + %define func(x) proc_frame x + +%macro FUNC_SAVE 0 + sub rsp, stack_size + movdqa [rsp+16*0],xmm6 + movdqa [rsp+16*1],xmm7 + movdqa [rsp+16*2],xmm8 + save_reg r12, 3*16 + 0*8 + save_reg r15, 3*16 + 1*8 + end_prolog + mov arg4, arg(4) + mov arg5, arg(5) +%endmacro + +%macro FUNC_RESTORE 0 + movdqa xmm6, [rsp+16*0] + movdqa xmm7, [rsp+16*1] + movdqa xmm8, [rsp+16*2] + mov r12, [rsp + 3*16 + 0*8] + mov r15, [rsp + 3*16 + 1*8] + add rsp, stack_size +%endmacro + +%elifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg0.w edi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + %define tmp r11 + %define return rax + %define return.w eax + + %define func(x) x: + %define FUNC_SAVE + %define FUNC_RESTORE +%endif + +;;; gf_vect_mad_sse(len, vec, vec_i, mul_array, src, dest) +%define len arg0 +%define len.w arg0.w +%define vec arg1 +%define vec_i arg2 +%define mul_array arg3 +%define src arg4 +%define dest arg5 +%define pos return +%define pos.w return.w + +%ifndef EC_ALIGNED_ADDR +;;; Use Un-aligned load/store + %define XLDR movdqu + %define XSTR movdqu +%else +;;; Use Non-temporal load/stor + %ifdef NO_NT_LDST + %define XLDR movdqa + %define XSTR movdqa + %else + %define XLDR movntdqa + %define XSTR movntdq + %endif +%endif + +default rel + +[bits 64] +section .text + +%define xmask0f xmm8 +%define xgft_lo xmm7 +%define xgft_hi xmm6 + +%define x0 xmm0 +%define xtmpa xmm1 +%define xtmph xmm2 +%define xtmpl xmm3 +%define xd xmm4 +%define xtmpd xmm5 + + +align 16 +global gf_vect_mad_sse:ISAL_SYM_TYPE_FUNCTION +func(gf_vect_mad_sse) + FUNC_SAVE + sub len, 16 + jl .return_fail + + xor pos, pos + movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte + sal vec_i, 5 ;Multiply by 32 + movdqu xgft_lo, [vec_i+mul_array] ;Load array Cx{00}, Cx{01}, Cx{02}, ... + movdqu xgft_hi, [vec_i+mul_array+16] ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0} + + XLDR xtmpd, [dest+len] ;backup the last 16 bytes in dest + +.loop16: + XLDR xd, [dest+pos] ;Get next dest vector +.loop16_overlap: + XLDR x0, [src+pos] ;Get next source vector + movdqa xtmph, xgft_hi ;Reload const array registers + movdqa xtmpl, xgft_lo + movdqa xtmpa, x0 ;Keep unshifted copy of src + psraw x0, 4 ;Shift to put high nibble into bits 4-0 + pand x0, xmask0f ;Mask high src nibble in bits 4-0 + pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0 + pshufb xtmph, x0 ;Lookup mul table of high nibble + pshufb xtmpl, xtmpa ;Lookup mul table of low nibble + pxor xtmph, xtmpl ;GF add high and low partials + + pxor xd, xtmph + XSTR [dest+pos], xd ;Store result + + add pos, 16 ;Loop on 16 bytes at a time + cmp pos, len + jle .loop16 + + lea tmp, [len + 16] + cmp pos, tmp + je .return_pass + + ;; Tail len + mov pos, len ;Overlapped offset length-16 + movdqa xd, xtmpd ;Restore xd + jmp .loop16_overlap ;Do one more overlap pass + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame + +section .data + +align 16 + +mask0f: dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f + +;;; func core, ver, snum +slversion gf_vect_mad_sse, 00, 01, 0200 diff --git a/src/isa-l/erasure_code/gf_vect_mad_test.c b/src/isa-l/erasure_code/gf_vect_mad_test.c new file mode 100644 index 000000000..cecbc1669 --- /dev/null +++ b/src/isa-l/erasure_code/gf_vect_mad_test.c @@ -0,0 +1,519 @@ +/********************************************************************** + Copyright(c) 2011-2015 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include // for memset, memcmp +#include "erasure_code.h" +#include "types.h" + +#ifndef ALIGN_SIZE +# define ALIGN_SIZE 32 +#endif + +#ifndef FUNCTION_UNDER_TEST +//By default, test multi-binary version +# define FUNCTION_UNDER_TEST gf_vect_mad +# define REF_FUNCTION gf_vect_dot_prod +# define VECT 1 +#endif + +#ifndef TEST_MIN_SIZE +# define TEST_MIN_SIZE 64 +#endif + +#define str(s) #s +#define xstr(s) str(s) + +#define TEST_LEN 8192 +#define TEST_SIZE (TEST_LEN/2) +#define TEST_MEM TEST_SIZE +#define TEST_LOOPS 20000 +#define TEST_TYPE_STR "" + +#ifndef TEST_SOURCES +# define TEST_SOURCES 16 +#endif +#ifndef RANDOMS +# define RANDOMS 20 +#endif + +#ifdef EC_ALIGNED_ADDR +// Define power of 2 range to check ptr, len alignment +# define PTR_ALIGN_CHK_B 0 +# define LEN_ALIGN_CHK_B 0 // 0 for aligned only +#else +// Define power of 2 range to check ptr, len alignment +# define PTR_ALIGN_CHK_B ALIGN_SIZE +# define LEN_ALIGN_CHK_B ALIGN_SIZE // 0 for aligned only +#endif + +#define str(s) #s +#define xstr(s) str(s) + +typedef unsigned char u8; + +#if (VECT == 1) +# define LAST_ARG *dest +#else +# define LAST_ARG **dest +#endif + +extern void FUNCTION_UNDER_TEST(int len, int vec, int vec_i, unsigned char *gftbls, + unsigned char *src, unsigned char LAST_ARG); +extern void REF_FUNCTION(int len, int vlen, unsigned char *gftbls, unsigned char **src, + unsigned char LAST_ARG); + +void dump(unsigned char *buf, int len) +{ + int i; + for (i = 0; i < len;) { + printf(" %2x", 0xff & buf[i++]); + if (i % 32 == 0) + printf("\n"); + } + printf("\n"); +} + +void dump_matrix(unsigned char **s, int k, int m) +{ + int i, j; + for (i = 0; i < k; i++) { + for (j = 0; j < m; j++) { + printf(" %2x", s[i][j]); + } + printf("\n"); + } + printf("\n"); +} + +void dump_u8xu8(unsigned char *s, int k, int m) +{ + int i, j; + for (i = 0; i < k; i++) { + for (j = 0; j < m; j++) { + printf(" %2x", 0xff & s[j + (i * m)]); + } + printf("\n"); + } + printf("\n"); +} + +int main(int argc, char *argv[]) +{ + int i, j, rtest, srcs; + void *buf; + u8 gf[6][TEST_SOURCES]; + u8 *g_tbls; + u8 *dest_ref[VECT]; + u8 *dest_ptrs[VECT], *buffs[TEST_SOURCES]; + int vector = VECT; + + int align, size; + unsigned char *efence_buffs[TEST_SOURCES]; + unsigned int offset; + u8 *ubuffs[TEST_SOURCES]; + u8 *udest_ptrs[VECT]; + printf("test" xstr(FUNCTION_UNDER_TEST) ": %dx%d ", TEST_SOURCES, TEST_LEN); + + // Allocate the arrays + for (i = 0; i < TEST_SOURCES; i++) { + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + buffs[i] = buf; + } + + if (posix_memalign(&buf, 16, 2 * (vector * TEST_SOURCES * 32))) { + printf("alloc error: Fail"); + return -1; + } + g_tbls = buf; + + for (i = 0; i < vector; i++) { + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest_ptrs[i] = buf; + memset(dest_ptrs[i], 0, TEST_LEN); + } + + for (i = 0; i < vector; i++) { + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + dest_ref[i] = buf; + memset(dest_ref[i], 0, TEST_LEN); + } + + // Test of all zeros + for (i = 0; i < TEST_SOURCES; i++) + memset(buffs[i], 0, TEST_LEN); + + switch (vector) { + case 6: + memset(gf[5], 0xe6, TEST_SOURCES); + case 5: + memset(gf[4], 4, TEST_SOURCES); + case 4: + memset(gf[3], 9, TEST_SOURCES); + case 3: + memset(gf[2], 7, TEST_SOURCES); + case 2: + memset(gf[1], 1, TEST_SOURCES); + case 1: + memset(gf[0], 2, TEST_SOURCES); + break; + default: + return -1; + } + + for (i = 0; i < TEST_SOURCES; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + for (i = 0; i < vector; i++) + for (j = 0; j < TEST_SOURCES; j++) { + gf[i][j] = rand(); + gf_vect_mul_init(gf[i][j], &g_tbls[i * (32 * TEST_SOURCES) + j * 32]); + } + + for (i = 0; i < vector; i++) + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[i * 32 * TEST_SOURCES], + buffs, dest_ref[i]); + + for (i = 0; i < vector; i++) + memset(dest_ptrs[i], 0, TEST_LEN); + for (i = 0; i < TEST_SOURCES; i++) { +#if (VECT == 1) + FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, i, g_tbls, buffs[i], *dest_ptrs); +#else + FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, i, g_tbls, buffs[i], dest_ptrs); +#endif + } + for (i = 0; i < vector; i++) { + if (0 != memcmp(dest_ref[i], dest_ptrs[i], TEST_LEN)) { + printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test%d\n", i); + dump_matrix(buffs, vector, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref[i], 25); + printf("dprod_dut:"); + dump(dest_ptrs[i], 25); + return -1; + } + } + +#if (VECT == 1) + REF_FUNCTION(TEST_LEN, TEST_SOURCES, g_tbls, buffs, *dest_ref); +#else + REF_FUNCTION(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ref); +#endif + for (i = 0; i < vector; i++) { + if (0 != memcmp(dest_ref[i], dest_ptrs[i], TEST_LEN)) { + printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test%d\n", i); + dump_matrix(buffs, vector, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref[i], 25); + printf("dprod_dut:"); + dump(dest_ptrs[i], 25); + return -1; + } + } + + putchar('.'); + + // Rand data test + + for (rtest = 0; rtest < RANDOMS; rtest++) { + for (i = 0; i < TEST_SOURCES; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + for (i = 0; i < vector; i++) + for (j = 0; j < TEST_SOURCES; j++) { + gf[i][j] = rand(); + gf_vect_mul_init(gf[i][j], + &g_tbls[i * (32 * TEST_SOURCES) + j * 32]); + } + + for (i = 0; i < vector; i++) + gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, + &g_tbls[i * 32 * TEST_SOURCES], buffs, + dest_ref[i]); + + for (i = 0; i < vector; i++) + memset(dest_ptrs[i], 0, TEST_LEN); + for (i = 0; i < TEST_SOURCES; i++) { +#if (VECT == 1) + FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, i, g_tbls, buffs[i], + *dest_ptrs); +#else + FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, i, g_tbls, buffs[i], + dest_ptrs); +#endif + } + for (i = 0; i < vector; i++) { + if (0 != memcmp(dest_ref[i], dest_ptrs[i], TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test%d %d\n", + i, rtest); + dump_matrix(buffs, vector, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref[i], 25); + printf("dprod_dut:"); + dump(dest_ptrs[i], 25); + return -1; + } + } + + putchar('.'); + } + + // Rand data test with varied parameters + for (rtest = 0; rtest < RANDOMS; rtest++) { + for (srcs = TEST_SOURCES; srcs > 0; srcs--) { + for (i = 0; i < srcs; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + for (i = 0; i < vector; i++) + for (j = 0; j < srcs; j++) { + gf[i][j] = rand(); + gf_vect_mul_init(gf[i][j], + &g_tbls[i * (32 * srcs) + j * 32]); + } + + for (i = 0; i < vector; i++) + gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[i * 32 * srcs], + buffs, dest_ref[i]); + + for (i = 0; i < vector; i++) + memset(dest_ptrs[i], 0, TEST_LEN); + for (i = 0; i < srcs; i++) { +#if (VECT == 1) + FUNCTION_UNDER_TEST(TEST_LEN, srcs, i, g_tbls, buffs[i], + *dest_ptrs); +#else + FUNCTION_UNDER_TEST(TEST_LEN, srcs, i, g_tbls, buffs[i], + dest_ptrs); +#endif + + } + for (i = 0; i < vector; i++) { + if (0 != memcmp(dest_ref[i], dest_ptrs[i], TEST_LEN)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) + " test%d srcs=%d\n", i, srcs); + dump_matrix(buffs, vector, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref[i], 25); + printf("dprod_dut:"); + dump(dest_ptrs[i], 25); + return -1; + } + } + + putchar('.'); + } + } + + // Run tests at end of buffer for Electric Fence + align = (LEN_ALIGN_CHK_B != 0) ? 1 : ALIGN_SIZE; + for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) { + for (i = 0; i < TEST_SOURCES; i++) + for (j = 0; j < TEST_LEN; j++) + buffs[i][j] = rand(); + + for (i = 0; i < TEST_SOURCES; i++) // Line up TEST_SIZE from end + efence_buffs[i] = buffs[i] + TEST_LEN - size; + + for (i = 0; i < vector; i++) + for (j = 0; j < TEST_SOURCES; j++) { + gf[i][j] = rand(); + gf_vect_mul_init(gf[i][j], + &g_tbls[i * (32 * TEST_SOURCES) + j * 32]); + } + + for (i = 0; i < vector; i++) + gf_vect_dot_prod_base(size, TEST_SOURCES, + &g_tbls[i * 32 * TEST_SOURCES], efence_buffs, + dest_ref[i]); + + for (i = 0; i < vector; i++) + memset(dest_ptrs[i], 0, size); + for (i = 0; i < TEST_SOURCES; i++) { +#if (VECT == 1) + FUNCTION_UNDER_TEST(size, TEST_SOURCES, i, g_tbls, efence_buffs[i], + *dest_ptrs); +#else + FUNCTION_UNDER_TEST(size, TEST_SOURCES, i, g_tbls, efence_buffs[i], + dest_ptrs); +#endif + } + for (i = 0; i < vector; i++) { + if (0 != memcmp(dest_ref[i], dest_ptrs[i], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) + " test%d size=%d\n", i, size); + dump_matrix(buffs, vector, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref[i], TEST_MIN_SIZE + align); + printf("dprod_dut:"); + dump(dest_ptrs[i], TEST_MIN_SIZE + align); + return -1; + } + } + + putchar('.'); + } + + // Test rand ptr alignment if available + + for (rtest = 0; rtest < RANDOMS; rtest++) { + size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1); + srcs = rand() % TEST_SOURCES; + if (srcs == 0) + continue; + + offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B; + // Add random offsets + for (i = 0; i < srcs; i++) + ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset)); + + for (i = 0; i < vector; i++) { + udest_ptrs[i] = dest_ptrs[i] + (rand() & (PTR_ALIGN_CHK_B - offset)); + memset(dest_ptrs[i], 0, TEST_LEN); // zero pad to check write-over + } + + for (i = 0; i < srcs; i++) + for (j = 0; j < size; j++) + ubuffs[i][j] = rand(); + + for (i = 0; i < vector; i++) + for (j = 0; j < srcs; j++) { + gf[i][j] = rand(); + gf_vect_mul_init(gf[i][j], &g_tbls[i * (32 * srcs) + j * 32]); + } + + for (i = 0; i < vector; i++) + gf_vect_dot_prod_base(size, srcs, &g_tbls[i * 32 * srcs], ubuffs, + dest_ref[i]); + + for (i = 0; i < srcs; i++) { +#if (VECT == 1) + FUNCTION_UNDER_TEST(size, srcs, i, g_tbls, ubuffs[i], *udest_ptrs); +#else + FUNCTION_UNDER_TEST(size, srcs, i, g_tbls, ubuffs[i], udest_ptrs); +#endif + } + for (i = 0; i < vector; i++) { + if (0 != memcmp(dest_ref[i], udest_ptrs[i], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) + " test%d ualign srcs=%d\n", i, srcs); + dump_matrix(buffs, vector, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref[i], 25); + printf("dprod_dut:"); + dump(udest_ptrs[i], 25); + return -1; + } + } + + // Confirm that padding around dests is unchanged + memset(dest_ref[0], 0, PTR_ALIGN_CHK_B); // Make reference zero buff + + for (i = 0; i < vector; i++) { + offset = udest_ptrs[i] - dest_ptrs[i]; + if (memcmp(dest_ptrs[i], dest_ref[0], offset)) { + printf("Fail rand ualign pad1 start\n"); + return -1; + } + if (memcmp + (dest_ptrs[i] + offset + size, dest_ref[0], + PTR_ALIGN_CHK_B - offset)) { + printf("Fail rand ualign pad1 end\n"); + return -1; + } + } + + putchar('.'); + } + + // Test all size alignment + align = (LEN_ALIGN_CHK_B != 0) ? 1 : ALIGN_SIZE; + + for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) { + for (i = 0; i < TEST_SOURCES; i++) + for (j = 0; j < size; j++) + buffs[i][j] = rand(); + + for (i = 0; i < vector; i++) { + for (j = 0; j < TEST_SOURCES; j++) { + gf[i][j] = rand(); + gf_vect_mul_init(gf[i][j], + &g_tbls[i * (32 * TEST_SOURCES) + j * 32]); + } + memset(dest_ptrs[i], 0, TEST_LEN); // zero pad to check write-over + } + + for (i = 0; i < vector; i++) + gf_vect_dot_prod_base(size, TEST_SOURCES, + &g_tbls[i * 32 * TEST_SOURCES], buffs, + dest_ref[i]); + + for (i = 0; i < TEST_SOURCES; i++) { +#if (VECT == 1) + FUNCTION_UNDER_TEST(size, TEST_SOURCES, i, g_tbls, buffs[i], + *dest_ptrs); +#else + FUNCTION_UNDER_TEST(size, TEST_SOURCES, i, g_tbls, buffs[i], + dest_ptrs); +#endif + } + for (i = 0; i < vector; i++) { + if (0 != memcmp(dest_ref[i], dest_ptrs[i], size)) { + printf("Fail rand " xstr(FUNCTION_UNDER_TEST) + " test%d ualign len=%d\n", i, size); + dump_matrix(buffs, vector, TEST_SOURCES); + printf("dprod_base:"); + dump(dest_ref[i], 25); + printf("dprod_dut:"); + dump(dest_ptrs[i], 25); + return -1; + } + } + + putchar('.'); + + } + + printf("Pass\n"); + return 0; + +} diff --git a/src/isa-l/erasure_code/gf_vect_mul_avx.asm b/src/isa-l/erasure_code/gf_vect_mul_avx.asm new file mode 100644 index 000000000..c1a9b9799 --- /dev/null +++ b/src/isa-l/erasure_code/gf_vect_mul_avx.asm @@ -0,0 +1,164 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_vect_mul_avx(len, mul_array, src, dest) +;;; + +%include "reg_sizes.asm" + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + %define tmp r11 + %define return rax + %define func(x) x: + %define FUNC_SAVE + %define FUNC_RESTORE + +%elifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + %define return rax + %define stack_size 5*16 + 8 ; must be an odd multiple of 8 + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size + save_xmm128 xmm6, 0*16 + save_xmm128 xmm7, 1*16 + save_xmm128 xmm13, 2*16 + save_xmm128 xmm14, 3*16 + save_xmm128 xmm15, 4*16 + end_prolog + %endmacro + + %macro FUNC_RESTORE 0 + vmovdqa xmm6, [rsp + 0*16] + vmovdqa xmm7, [rsp + 1*16] + vmovdqa xmm13, [rsp + 2*16] + vmovdqa xmm14, [rsp + 3*16] + vmovdqa xmm15, [rsp + 4*16] + add rsp, stack_size + %endmacro + +%endif + + +%define len arg0 +%define mul_array arg1 +%define src arg2 +%define dest arg3 +%define pos return + + +;;; Use Non-temporal load/stor +%ifdef NO_NT_LDST + %define XLDR vmovdqa + %define XSTR vmovdqa +%else + %define XLDR vmovntdqa + %define XSTR vmovntdq +%endif + +default rel + +[bits 64] +section .text + +%define xmask0f xmm15 +%define xgft_lo xmm14 +%define xgft_hi xmm13 + +%define x0 xmm0 +%define xtmp1a xmm1 +%define xtmp1b xmm2 +%define xtmp1c xmm3 +%define x1 xmm4 +%define xtmp2a xmm5 +%define xtmp2b xmm6 +%define xtmp2c xmm7 + +align 16 +global gf_vect_mul_avx:ISAL_SYM_TYPE_FUNCTION +func(gf_vect_mul_avx) + FUNC_SAVE + mov pos, 0 + vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte + vmovdqu xgft_lo, [mul_array] ;Load array Cx{00}, Cx{01}, Cx{02}, ... + vmovdqu xgft_hi, [mul_array+16] ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0} + +loop32: + XLDR x0, [src+pos] ;Get next source vector + XLDR x1, [src+pos+16] ;Get next source vector + 16B ahead + add pos, 32 ;Loop on 16 bytes at a time + cmp pos, len + vpand xtmp1a, x0, xmask0f ;Mask low src nibble in bits 4-0 + vpand xtmp2a, x1, xmask0f + vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0 + vpsraw x1, x1, 4 + vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0 + vpand x1, x1, xmask0f + vpshufb xtmp1b, xgft_hi, x0 ;Lookup mul table of high nibble + vpshufb xtmp1c, xgft_lo, xtmp1a ;Lookup mul table of low nibble + vpshufb xtmp2b, xgft_hi, x1 ;Lookup mul table of high nibble + vpshufb xtmp2c, xgft_lo, xtmp2a ;Lookup mul table of low nibble + vpxor xtmp1b, xtmp1b, xtmp1c ;GF add high and low partials + vpxor xtmp2b, xtmp2b, xtmp2c + XSTR [dest+pos-32], xtmp1b ;Store result + XSTR [dest+pos-16], xtmp2b ;Store +16B result + jl loop32 + + +return_pass: + FUNC_RESTORE + sub pos, len + ret + +return_fail: + FUNC_RESTORE + mov return, 1 + ret + +endproc_frame + +section .data + +align 16 + +mask0f: +dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f + +;;; func core, ver, snum +slversion gf_vect_mul_avx, 01, 03, 0036 diff --git a/src/isa-l/erasure_code/gf_vect_mul_base_test.c b/src/isa-l/erasure_code/gf_vect_mul_base_test.c new file mode 100644 index 000000000..c47d2365d --- /dev/null +++ b/src/isa-l/erasure_code/gf_vect_mul_base_test.c @@ -0,0 +1,129 @@ +/********************************************************************** + Copyright(c) 2011-2015 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include // for memset +#include "erasure_code.h" + +#define TEST_SIZE 8192 +#define TEST_MEM TEST_SIZE +#define TEST_LOOPS 100000 +#define TEST_TYPE_STR "" + +typedef unsigned char u8; + +int main(int argc, char *argv[]) +{ + int i; + u8 *buff1, *buff2, *buff3, gf_const_tbl[64], a = 2; + int align, size; + unsigned char *efence_buff1; + unsigned char *efence_buff2; + + printf("gf_vect_mul_base_test:\n"); + + gf_vect_mul_init(a, gf_const_tbl); + + buff1 = (u8 *) malloc(TEST_SIZE); + buff2 = (u8 *) malloc(TEST_SIZE); + buff3 = (u8 *) malloc(TEST_SIZE); + + if (NULL == buff1 || NULL == buff2 || NULL == buff3) { + printf("buffer alloc error\n"); + return -1; + } + // Fill with rand data + for (i = 0; i < TEST_SIZE; i++) + buff1[i] = rand(); + + gf_vect_mul_base(TEST_SIZE, gf_const_tbl, buff1, buff2); + + for (i = 0; i < TEST_SIZE; i++) + if (gf_mul(a, buff1[i]) != buff2[i]) { + printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n", i, buff1[i], buff2[i], + gf_mul(2, buff1[i])); + return 1; + } + + gf_vect_mul_base(TEST_SIZE, gf_const_tbl, buff1, buff3); + + // Check reference function + for (i = 0; i < TEST_SIZE; i++) + if (buff2[i] != buff3[i]) { + printf("fail at %d, 0x%x x 0x%d = 0x%x (0x%x)\n", + i, a, buff1[i], buff2[i], gf_mul(a, buff1[i])); + return 1; + } + + for (i = 0; i < TEST_SIZE; i++) + buff1[i] = rand(); + + // Check each possible constant + printf("Random tests "); + for (a = 0; a != 255; a++) { + gf_vect_mul_init(a, gf_const_tbl); + gf_vect_mul_base(TEST_SIZE, gf_const_tbl, buff1, buff2); + + for (i = 0; i < TEST_SIZE; i++) + if (gf_mul(a, buff1[i]) != buff2[i]) { + printf("fail at %d, 0x%x x %d = 0x%x (0x%x)\n", + i, a, buff1[i], buff2[i], gf_mul(2, buff1[i])); + return 1; + } + putchar('.'); + } + + // Run tests at end of buffer for Electric Fence + align = 32; + a = 2; + + gf_vect_mul_init(a, gf_const_tbl); + for (size = 0; size < TEST_SIZE; size += align) { + // Line up TEST_SIZE from end + efence_buff1 = buff1 + size; + efence_buff2 = buff2 + size; + + gf_vect_mul_base(TEST_SIZE - size, gf_const_tbl, efence_buff1, efence_buff2); + + for (i = 0; i < TEST_SIZE - size; i++) + if (gf_mul(a, efence_buff1[i]) != efence_buff2[i]) { + printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n", + i, efence_buff1[i], efence_buff2[i], gf_mul(2, + efence_buff1 + [i])); + return 1; + } + + putchar('.'); + } + + printf(" done: Pass\n"); + return 0; +} diff --git a/src/isa-l/erasure_code/gf_vect_mul_perf.c b/src/isa-l/erasure_code/gf_vect_mul_perf.c new file mode 100644 index 000000000..58194cceb --- /dev/null +++ b/src/isa-l/erasure_code/gf_vect_mul_perf.c @@ -0,0 +1,90 @@ +/********************************************************************** + Copyright(c) 2011-2015 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include // for memset +#include "erasure_code.h" +#include "test.h" + +//#define CACHED_TEST +#ifdef CACHED_TEST +// Cached test, loop many times over small dataset +# define TEST_LEN 8*1024 +# define TEST_TYPE_STR "_warm" +#else +# ifndef TEST_CUSTOM +// Uncached test. Pull from large mem base. +# define TEST_SOURCES 10 +# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */ +# define TEST_LEN GT_L3_CACHE / 2 +# define TEST_TYPE_STR "_cold" +# else +# define TEST_TYPE_STR "_cus" +# endif +#endif + +#define TEST_MEM (2 * TEST_LEN) + +typedef unsigned char u8; + +void gf_vect_mul_perf(u8 a, u8 * gf_const_tbl, u8 * buff1, u8 * buff2) +{ + gf_vect_mul_init(a, gf_const_tbl); + gf_vect_mul(TEST_LEN, gf_const_tbl, buff1, buff2); +} + +int main(int argc, char *argv[]) +{ + u8 *buff1, *buff2, gf_const_tbl[64], a = 2; + struct perf start; + + printf("gf_vect_mul_perf:\n"); + + // Allocate large mem region + buff1 = (u8 *) malloc(TEST_LEN); + buff2 = (u8 *) malloc(TEST_LEN); + if (NULL == buff1 || NULL == buff2) { + printf("Failed to allocate %dB\n", TEST_LEN); + return 1; + } + + memset(buff1, 0, TEST_LEN); + memset(buff2, 0, TEST_LEN); + + printf("Start timed tests\n"); + fflush(0); + + BENCHMARK(&start, BENCHMARK_TIME, gf_vect_mul_perf(a, gf_const_tbl, buff1, buff2)); + + printf("gf_vect_mul" TEST_TYPE_STR ": "); + perf_print(start, (long long)TEST_LEN); + + return 0; +} diff --git a/src/isa-l/erasure_code/gf_vect_mul_sse.asm b/src/isa-l/erasure_code/gf_vect_mul_sse.asm new file mode 100644 index 000000000..36323d639 --- /dev/null +++ b/src/isa-l/erasure_code/gf_vect_mul_sse.asm @@ -0,0 +1,170 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2015 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; +;;; gf_vect_mul_sse(len, mul_array, src, dest) +;;; + +%include "reg_sizes.asm" + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + %define tmp r11 + %define return rax + %define func(x) x: + %define FUNC_SAVE + %define FUNC_RESTORE + +%elifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + %define return rax + %define stack_size 5*16 + 8 ; must be an odd multiple of 8 + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size + save_xmm128 xmm6, 0*16 + save_xmm128 xmm7, 1*16 + save_xmm128 xmm13, 2*16 + save_xmm128 xmm14, 3*16 + save_xmm128 xmm15, 4*16 + end_prolog + %endmacro + + %macro FUNC_RESTORE 0 + movdqa xmm6, [rsp + 0*16] + movdqa xmm7, [rsp + 1*16] + movdqa xmm13, [rsp + 2*16] + movdqa xmm14, [rsp + 3*16] + movdqa xmm15, [rsp + 4*16] + add rsp, stack_size + %endmacro + +%endif + + +%define len arg0 +%define mul_array arg1 +%define src arg2 +%define dest arg3 +%define pos return + + +;;; Use Non-temporal load/stor +%ifdef NO_NT_LDST + %define XLDR movdqa + %define XSTR movdqa +%else + %define XLDR movntdqa + %define XSTR movntdq +%endif + +default rel + +[bits 64] +section .text + +%define xmask0f xmm15 +%define xgft_lo xmm14 +%define xgft_hi xmm13 + +%define x0 xmm0 +%define xtmp1a xmm1 +%define xtmp1b xmm2 +%define xtmp1c xmm3 +%define x1 xmm4 +%define xtmp2a xmm5 +%define xtmp2b xmm6 +%define xtmp2c xmm7 + + +align 16 +global gf_vect_mul_sse:ISAL_SYM_TYPE_FUNCTION +func(gf_vect_mul_sse) + FUNC_SAVE + mov pos, 0 + movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte + movdqu xgft_lo, [mul_array] ;Load array Cx{00}, Cx{01}, Cx{02}, ... + movdqu xgft_hi, [mul_array+16] ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0} + +loop32: + XLDR x0, [src+pos] ;Get next source vector + XLDR x1, [src+pos+16] ;Get next source vector + 16B ahead + movdqa xtmp1b, xgft_hi ;Reload const array registers + movdqa xtmp1c, xgft_lo + movdqa xtmp2b, xgft_hi + movdqa xtmp2c, xgft_lo + movdqa xtmp1a, x0 ;Keep unshifted copy of src + movdqa xtmp2a, x1 + psraw x0, 4 ;Shift to put high nibble into bits 4-0 + psraw x1, 4 + pand xtmp1a, xmask0f ;Mask low src nibble in bits 4-0 + pand xtmp2a, xmask0f + pand x0, xmask0f ;Mask high src nibble in bits 4-0 + pand x1, xmask0f + pshufb xtmp1b, x0 ;Lookup mul table of high nibble + pshufb xtmp1c, xtmp1a ;Lookup mul table of low nibble + pshufb xtmp2b, x1 + pshufb xtmp2c, xtmp2a + pxor xtmp1b, xtmp1c ;GF add high and low partials + pxor xtmp2b, xtmp2c + XSTR [dest+pos], xtmp1b ;Store result + XSTR [dest+pos+16], xtmp2b ;Store +16B result + add pos, 32 ;Loop on 32 bytes at at time + cmp pos, len + jl loop32 + + +return_pass: + sub pos, len + FUNC_RESTORE + ret + +return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame + +section .data + +align 16 +mask0f: +dq 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f + +;;; func core, ver, snum +slversion gf_vect_mul_sse, 00, 03, 0034 diff --git a/src/isa-l/erasure_code/gf_vect_mul_test.c b/src/isa-l/erasure_code/gf_vect_mul_test.c new file mode 100644 index 000000000..b1a406624 --- /dev/null +++ b/src/isa-l/erasure_code/gf_vect_mul_test.c @@ -0,0 +1,158 @@ +/********************************************************************** + Copyright(c) 2011-2015 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include "erasure_code.h" + +#define TEST_SIZE (128*1024) + +typedef unsigned char u8; + +int main(int argc, char *argv[]) +{ + int i; + u8 *buff1, *buff2, *buff3, gf_const_tbl[64], a = 2; + int tsize; + int align, size; + unsigned char *efence_buff1; + unsigned char *efence_buff2; + unsigned char *efence_buff3; + + printf("gf_vect_mul_test: "); + + gf_vect_mul_init(a, gf_const_tbl); + + buff1 = (u8 *) malloc(TEST_SIZE); + buff2 = (u8 *) malloc(TEST_SIZE); + buff3 = (u8 *) malloc(TEST_SIZE); + + if (NULL == buff1 || NULL == buff2 || NULL == buff3) { + printf("buffer alloc error\n"); + return -1; + } + // Fill with rand data + for (i = 0; i < TEST_SIZE; i++) + buff1[i] = rand(); + + gf_vect_mul(TEST_SIZE, gf_const_tbl, buff1, buff2); + + for (i = 0; i < TEST_SIZE; i++) { + if (gf_mul(a, buff1[i]) != buff2[i]) { + printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n", i, + buff1[i], buff2[i], gf_mul(2, buff1[i])); + return -1; + } + } + + gf_vect_mul_base(TEST_SIZE, gf_const_tbl, buff1, buff3); + + // Check reference function + for (i = 0; i < TEST_SIZE; i++) { + if (buff2[i] != buff3[i]) { + printf("fail at %d, 0x%x x 0x%d = 0x%x (0x%x)\n", + i, a, buff1[i], buff2[i], gf_mul(a, buff1[i])); + return -1; + } + } + + for (i = 0; i < TEST_SIZE; i++) + buff1[i] = rand(); + + // Check each possible constant + for (a = 0; a != 255; a++) { + gf_vect_mul_init(a, gf_const_tbl); + gf_vect_mul(TEST_SIZE, gf_const_tbl, buff1, buff2); + + for (i = 0; i < TEST_SIZE; i++) + if (gf_mul(a, buff1[i]) != buff2[i]) { + printf("fail at %d, 0x%x x %d = 0x%x (0x%x)\n", + i, a, buff1[i], buff2[i], gf_mul(2, buff1[i])); + return -1; + } + putchar('.'); + } + + // Check buffer len + for (tsize = TEST_SIZE; tsize > 0; tsize -= 32) { + a = rand(); + gf_vect_mul_init(a, gf_const_tbl); + gf_vect_mul(tsize, gf_const_tbl, buff1, buff2); + + for (i = 0; i < tsize; i++) + if (gf_mul(a, buff1[i]) != buff2[i]) { + printf("fail at %d, 0x%x x %d = 0x%x (0x%x)\n", + i, a, buff1[i], buff2[i], gf_mul(2, buff1[i])); + return -1; + } + if (0 == tsize % (32 * 8)) { + putchar('.'); + fflush(0); + } + } + + // Run tests at end of buffer for Electric Fence + align = 32; + a = 2; + + gf_vect_mul_init(a, gf_const_tbl); + for (size = 0; size < TEST_SIZE; size += align) { + // Line up TEST_SIZE from end + efence_buff1 = buff1 + size; + efence_buff2 = buff2 + size; + efence_buff3 = buff3 + size; + + gf_vect_mul(TEST_SIZE - size, gf_const_tbl, efence_buff1, efence_buff2); + + for (i = 0; i < TEST_SIZE - size; i++) + if (gf_mul(a, efence_buff1[i]) != efence_buff2[i]) { + printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n", + i, efence_buff1[i], efence_buff2[i], + gf_mul(2, efence_buff1[i])); + return 1; + } + + gf_vect_mul_base(TEST_SIZE - size, gf_const_tbl, efence_buff1, efence_buff3); + + // Check reference function + for (i = 0; i < TEST_SIZE - size; i++) + if (efence_buff2[i] != efence_buff3[i]) { + printf("fail at %d, 0x%x x 0x%d = 0x%x (0x%x)\n", + i, a, efence_buff2[i], efence_buff3[i], + gf_mul(2, efence_buff1[i])); + return 1; + } + + putchar('.'); + } + + printf(" done: Pass\n"); + fflush(0); + return 0; +} diff --git a/src/isa-l/erasure_code/ppc64le/Makefile.am b/src/isa-l/erasure_code/ppc64le/Makefile.am new file mode 100644 index 000000000..9d263ac22 --- /dev/null +++ b/src/isa-l/erasure_code/ppc64le/Makefile.am @@ -0,0 +1,15 @@ +lsrc_ppc64le += erasure_code/ppc64le/ec_base_vsx.c \ + erasure_code/ppc64le/gf_vect_mul_vsx.c \ + erasure_code/ppc64le/gf_vect_dot_prod_vsx.c \ + erasure_code/ppc64le/gf_vect_mad_vsx.c \ + erasure_code/ppc64le/gf_2vect_dot_prod_vsx.c \ + erasure_code/ppc64le/gf_2vect_mad_vsx.c \ + erasure_code/ppc64le/gf_3vect_dot_prod_vsx.c \ + erasure_code/ppc64le/gf_3vect_mad_vsx.c \ + erasure_code/ppc64le/gf_4vect_dot_prod_vsx.c \ + erasure_code/ppc64le/gf_4vect_mad_vsx.c \ + erasure_code/ppc64le/gf_5vect_dot_prod_vsx.c \ + erasure_code/ppc64le/gf_5vect_mad_vsx.c \ + erasure_code/ppc64le/gf_6vect_dot_prod_vsx.c \ + erasure_code/ppc64le/gf_6vect_mad_vsx.c + diff --git a/src/isa-l/erasure_code/ppc64le/ec_base_vsx.c b/src/isa-l/erasure_code/ppc64le/ec_base_vsx.c new file mode 100644 index 000000000..05624f1b6 --- /dev/null +++ b/src/isa-l/erasure_code/ppc64le/ec_base_vsx.c @@ -0,0 +1,97 @@ +#include "erasure_code.h" +#include "ec_base_vsx.h" + +void gf_vect_dot_prod(int len, int vlen, unsigned char *v, + unsigned char **src, unsigned char *dest) +{ + gf_vect_dot_prod_vsx(len, vlen, v, src, dest); +} + +void gf_vect_mad(int len, int vec, int vec_i, unsigned char *v, + unsigned char *src, unsigned char *dest) +{ + gf_vect_mad_vsx(len, vec, vec_i, v, src, dest); + +} + +void ec_encode_data(int len, int srcs, int dests, unsigned char *v, + unsigned char **src, unsigned char **dest) +{ + if (len < 64) { + ec_encode_data_base(len, srcs, dests, v, src, dest); + return; + } + + while (dests >= 6) { + gf_6vect_dot_prod_vsx(len, srcs, v, src, dest); + v += 6 * srcs * 32; + dest += 6; + dests -= 6; + } + switch (dests) { + case 6: + gf_6vect_dot_prod_vsx(len, srcs, v, src, dest); + break; + case 5: + gf_5vect_dot_prod_vsx(len, srcs, v, src, dest); + break; + case 4: + gf_4vect_dot_prod_vsx(len, srcs, v, src, dest); + break; + case 3: + gf_3vect_dot_prod_vsx(len, srcs, v, src, dest); + break; + case 2: + gf_2vect_dot_prod_vsx(len, srcs, v, src, dest); + break; + case 1: + gf_vect_dot_prod_vsx(len, srcs, v, src, *dest); + break; + case 0: + break; + } +} + +void ec_encode_data_update(int len, int k, int rows, int vec_i, unsigned char *v, + unsigned char *data, unsigned char **dest) +{ + if (len < 64) { + ec_encode_data_update_base(len, k, rows, vec_i, v, data, dest); + return; + } + + while (rows >= 6) { + gf_6vect_mad_vsx(len, k, vec_i, v, data, dest); + v += 6 * k * 32; + dest += 6; + rows -= 6; + } + switch (rows) { + case 6: + gf_6vect_mad_vsx(len, k, vec_i, v, data, dest); + break; + case 5: + gf_5vect_mad_vsx(len, k, vec_i, v, data, dest); + break; + case 4: + gf_4vect_mad_vsx(len, k, vec_i, v, data, dest); + break; + case 3: + gf_3vect_mad_vsx(len, k, vec_i, v, data, dest); + break; + case 2: + gf_2vect_mad_vsx(len, k, vec_i, v, data, dest); + break; + case 1: + gf_vect_mad_vsx(len, k, vec_i, v, data, *dest); + break; + case 0: + break; + } +} + +int gf_vect_mul(int len, unsigned char *a, void *src, void *dest) +{ + gf_vect_mul_vsx(len, a, (unsigned char *)src, (unsigned char *)dest); + return 0; +} diff --git a/src/isa-l/erasure_code/ppc64le/ec_base_vsx.h b/src/isa-l/erasure_code/ppc64le/ec_base_vsx.h new file mode 100644 index 000000000..c808629a9 --- /dev/null +++ b/src/isa-l/erasure_code/ppc64le/ec_base_vsx.h @@ -0,0 +1,338 @@ +#ifndef _ERASURE_CODE_PPC64LE_H_ +#define _ERASURE_CODE_PPC64LE_H_ + +#include "erasure_code.h" +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#if defined(__ibmxl__) +#define EC_vec_xl(a, b) vec_xl_be(a, b) +#define EC_vec_permxor(va, vb, vc) __vpermxor(va, vb, vc) +#elif defined __GNUC__ && __GNUC__ >= 8 +#define EC_vec_xl(a, b) vec_xl_be(a, b) +#define EC_vec_permxor(va, vb, vc) __builtin_crypto_vpermxor(va, vb, vc) +#elif defined __GNUC__ && __GNUC__ >= 7 +#if defined _ARCH_PWR9 +#define EC_vec_xl(a, b) vec_vsx_ld(a, b) +#define EC_vec_permxor(va, vb, vc) __builtin_crypto_vpermxor(va, vb, vec_nor(vc, vc)) +#else +inline vector unsigned char EC_vec_xl(int off, unsigned char *ptr) { + vector unsigned char vc; + __asm__ __volatile__("lxvd2x %x0, %1, %2; xxswapd %x0, %x0" : "=wa" (vc) : "r" (off), "r" (ptr)); + return vc; +} +#define EC_vec_permxor(va, vb, vc) __builtin_crypto_vpermxor(va, vb, vec_nor(vc, vc)) +#endif +#else +#if defined _ARCH_PWR8 +inline vector unsigned char EC_vec_xl(int off, unsigned char *ptr) { + vector unsigned char vc; + __asm__ __volatile__("lxvd2x %x0, %1, %2; xxswapd %x0, %x0" : "=wa" (vc) : "r" (off), "r" (ptr)); + return vc; +} +#define EC_vec_permxor(va, vb, vc) __builtin_crypto_vpermxor(va, vb, vec_nor(vc, vc)) +#else +#error "This code is only supported on ppc64le." +#endif +#endif + +/** + * @brief GF(2^8) vector multiply. VSX version. + * + * Does a GF(2^8) multiply across each byte of input source with expanded + * constant and save to destination array. Can be used for erasure coding encode + * and decode update when only one source is available at a time. Function + * requires pre-calculation of a 32 byte constant array based on the input + * coefficients. + * @requires VSX + * + * @param len Length of each vector in bytes. + * @param gftbls Pointer to array of input tables generated from coding + * coefficients in ec_init_tables(). Must be of size 32. + * @param src Array of pointers to source inputs. + * @param dest Pointer to destination data array. + * @returns none + */ + +void gf_vect_mul_vsx(int len, unsigned char *gftbls, unsigned char *src, unsigned char *dest); + +/** + * @brief GF(2^8) vector dot product. VSX version. + * + * Does a GF(2^8) dot product across each byte of the input array and a constant + * set of coefficients to produce each byte of the output. Can be used for + * erasure coding encode and decode. Function requires pre-calculation of a + * 32*vlen byte constant array based on the input coefficients. + * @requires VSX + * + * @param len Length of each vector in bytes. + * @param vlen Number of vector sources. + * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based + * on the array of input coefficients. + * @param src Array of pointers to source inputs. + * @param dest Pointer to destination data array. + * @returns none + */ + +void gf_vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, + unsigned char **src, unsigned char *dest); + +/** + * @brief GF(2^8) vector dot product with two outputs. VSX version. + * + * Vector dot product optimized to calculate two outputs at a time. Does two + * GF(2^8) dot products across each byte of the input array and two constant + * sets of coefficients to produce each byte of the outputs. Can be used for + * erasure coding encode and decode. Function requires pre-calculation of a + * 2*32*vlen byte constant array based on the two sets of input coefficients. + * @requires VSX + * + * @param len Length of each vector in bytes. + * @param vlen Number of vector sources. + * @param gftbls Pointer to 2*32*vlen byte array of pre-calculated constants + * based on the array of input coefficients. + * @param src Array of pointers to source inputs. + * @param dest Array of pointers to destination data buffers. + * @returns none + */ + +void gf_2vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, + unsigned char **src, unsigned char **dest); + +/** + * @brief GF(2^8) vector dot product with three outputs. VSX version. + * + * Vector dot product optimized to calculate three outputs at a time. Does three + * GF(2^8) dot products across each byte of the input array and three constant + * sets of coefficients to produce each byte of the outputs. Can be used for + * erasure coding encode and decode. Function requires pre-calculation of a + * 3*32*vlen byte constant array based on the three sets of input coefficients. + * @requires VSX + * + * @param len Length of each vector in bytes. + * @param vlen Number of vector sources. + * @param gftbls Pointer to 3*32*vlen byte array of pre-calculated constants + * based on the array of input coefficients. + * @param src Array of pointers to source inputs. + * @param dest Array of pointers to destination data buffers. + * @returns none + */ + +void gf_3vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, + unsigned char **src, unsigned char **dest); + +/** + * @brief GF(2^8) vector dot product with four outputs. VSX version. + * + * Vector dot product optimized to calculate four outputs at a time. Does four + * GF(2^8) dot products across each byte of the input array and four constant + * sets of coefficients to produce each byte of the outputs. Can be used for + * erasure coding encode and decode. Function requires pre-calculation of a + * 4*32*vlen byte constant array based on the four sets of input coefficients. + * @requires VSX + * + * @param len Length of each vector in bytes. + * @param vlen Number of vector sources. + * @param gftbls Pointer to 4*32*vlen byte array of pre-calculated constants + * based on the array of input coefficients. + * @param src Array of pointers to source inputs. + * @param dest Array of pointers to destination data buffers. + * @returns none + */ + +void gf_4vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, + unsigned char **src, unsigned char **dest); + +/** + * @brief GF(2^8) vector dot product with five outputs. VSX version. + * + * Vector dot product optimized to calculate five outputs at a time. Does five + * GF(2^8) dot products across each byte of the input array and five constant + * sets of coefficients to produce each byte of the outputs. Can be used for + * erasure coding encode and decode. Function requires pre-calculation of a + * 5*32*vlen byte constant array based on the five sets of input coefficients. + * @requires VSX + * + * @param len Length of each vector in bytes. Must >= 16. + * @param vlen Number of vector sources. + * @param gftbls Pointer to 5*32*vlen byte array of pre-calculated constants + * based on the array of input coefficients. + * @param src Array of pointers to source inputs. + * @param dest Array of pointers to destination data buffers. + * @returns none + */ + +void gf_5vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, + unsigned char **src, unsigned char **dest); + +/** + * @brief GF(2^8) vector dot product with six outputs. VSX version. + * + * Vector dot product optimized to calculate six outputs at a time. Does six + * GF(2^8) dot products across each byte of the input array and six constant + * sets of coefficients to produce each byte of the outputs. Can be used for + * erasure coding encode and decode. Function requires pre-calculation of a + * 6*32*vlen byte constant array based on the six sets of input coefficients. + * @requires VSX + * + * @param len Length of each vector in bytes. + * @param vlen Number of vector sources. + * @param gftbls Pointer to 6*32*vlen byte array of pre-calculated constants + * based on the array of input coefficients. + * @param src Array of pointers to source inputs. + * @param dest Array of pointers to destination data buffers. + * @returns none + */ + +void gf_6vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, + unsigned char **src, unsigned char **dest); + +/** + * @brief GF(2^8) vector multiply accumulate. VSX version. + * + * Does a GF(2^8) multiply across each byte of input source with expanded + * constant and add to destination array. Can be used for erasure coding encode + * and decode update when only one source is available at a time. Function + * requires pre-calculation of a 32*vec byte constant array based on the input + * coefficients. + * @requires VSX + * + * @param len Length of each vector in bytes. + * @param vec The number of vector sources or rows in the generator matrix + * for coding. + * @param vec_i The vector index corresponding to the single input source. + * @param gftbls Pointer to array of input tables generated from coding + * coefficients in ec_init_tables(). Must be of size 32*vec. + * @param src Array of pointers to source inputs. + * @param dest Pointer to destination data array. + * @returns none + */ + +void gf_vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, + unsigned char *dest); +/** + * @brief GF(2^8) vector multiply with 2 accumulate. VSX version. + * + * Does a GF(2^8) multiply across each byte of input source with expanded + * constants and add to destination arrays. Can be used for erasure coding + * encode and decode update when only one source is available at a + * time. Function requires pre-calculation of a 32*vec byte constant array based + * on the input coefficients. + * @requires VSX + * + * @param len Length of each vector in bytes. + * @param vec The number of vector sources or rows in the generator matrix + * for coding. + * @param vec_i The vector index corresponding to the single input source. + * @param gftbls Pointer to array of input tables generated from coding + * coefficients in ec_init_tables(). Must be of size 32*vec. + * @param src Pointer to source input array. + * @param dest Array of pointers to destination input/outputs. + * @returns none + */ + +void gf_2vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, + unsigned char **dest); + +/** + * @brief GF(2^8) vector multiply with 3 accumulate. VSX version. + * + * Does a GF(2^8) multiply across each byte of input source with expanded + * constants and add to destination arrays. Can be used for erasure coding + * encode and decode update when only one source is available at a + * time. Function requires pre-calculation of a 32*vec byte constant array based + * on the input coefficients. + * @requires VSX + * + * @param len Length of each vector in bytes. + * @param vec The number of vector sources or rows in the generator matrix + * for coding. + * @param vec_i The vector index corresponding to the single input source. + * @param gftbls Pointer to array of input tables generated from coding + * coefficients in ec_init_tables(). Must be of size 32*vec. + * @param src Pointer to source input array. + * @param dest Array of pointers to destination input/outputs. + * @returns none + */ + +void gf_3vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, + unsigned char **dest); + +/** + * @brief GF(2^8) vector multiply with 4 accumulate. VSX version. + * + * Does a GF(2^8) multiply across each byte of input source with expanded + * constants and add to destination arrays. Can be used for erasure coding + * encode and decode update when only one source is available at a + * time. Function requires pre-calculation of a 32*vec byte constant array based + * on the input coefficients. + * @requires VSX + * + * @param len Length of each vector in bytes. + * @param vec The number of vector sources or rows in the generator matrix + * for coding. + * @param vec_i The vector index corresponding to the single input source. + * @param gftbls Pointer to array of input tables generated from coding + * coefficients in ec_init_tables(). Must be of size 32*vec. + * @param src Pointer to source input array. + * @param dest Array of pointers to destination input/outputs. + * @returns none + */ + +void gf_4vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, + unsigned char **dest); + +/** + * @brief GF(2^8) vector multiply with 5 accumulate. VSX version. + * + * Does a GF(2^8) multiply across each byte of input source with expanded + * constants and add to destination arrays. Can be used for erasure coding + * encode and decode update when only one source is available at a + * time. Function requires pre-calculation of a 32*vec byte constant array based + * on the input coefficients. + * @requires VSX + * + * @param len Length of each vector in bytes. + * @param vec The number of vector sources or rows in the generator matrix + * for coding. + * @param vec_i The vector index corresponding to the single input source. + * @param gftbls Pointer to array of input tables generated from coding + * coefficients in ec_init_tables(). Must be of size 32*vec. + * @param src Pointer to source input array. + * @param dest Array of pointers to destination input/outputs. + * @returns none + */ +void gf_5vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, + unsigned char **dest); + +/** + * @brief GF(2^8) vector multiply with 6 accumulate. VSX version. + * + * Does a GF(2^8) multiply across each byte of input source with expanded + * constants and add to destination arrays. Can be used for erasure coding + * encode and decode update when only one source is available at a + * time. Function requires pre-calculation of a 32*vec byte constant array based + * on the input coefficients. + * @requires VSX + * + * @param len Length of each vector in bytes. + * @param vec The number of vector sources or rows in the generator matrix + * for coding. + * @param vec_i The vector index corresponding to the single input source. + * @param gftbls Pointer to array of input tables generated from coding + * coefficients in ec_init_tables(). Must be of size 32*vec. + * @param src Pointer to source input array. + * @param dest Array of pointers to destination input/outputs. + * @returns none + */ +void gf_6vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, + unsigned char **dest); + +#ifdef __cplusplus +} +#endif + +#endif //_ERASURE_CODE_PPC64LE_H_ diff --git a/src/isa-l/erasure_code/ppc64le/gf_2vect_dot_prod_vsx.c b/src/isa-l/erasure_code/ppc64le/gf_2vect_dot_prod_vsx.c new file mode 100644 index 000000000..3cb269cce --- /dev/null +++ b/src/isa-l/erasure_code/ppc64le/gf_2vect_dot_prod_vsx.c @@ -0,0 +1,83 @@ +#include "ec_base_vsx.h" + +void gf_2vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, + unsigned char **src, unsigned char **dest) +{ + unsigned char *s, *t0, *t1; + vector unsigned char vX1, vX2, vX3, vX4; + vector unsigned char vY1, vY2, vY3, vY4; + vector unsigned char vYD, vYE, vYF, vYG; + vector unsigned char vhi0, vlo0, vhi1, vlo1; + int i, j, head; + + if (vlen < 128) { + gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *)dest[0]); + gf_vect_mul_vsx(len, &gftbls[1 * 32 * vlen], src[0], (unsigned char *)dest[1]); + + for (j = 1; j < vlen; j++) { + gf_2vect_mad_vsx(len, vlen, j, gftbls, src[j], dest); + } + return; + } + + t0 = (unsigned char *)dest[0]; + t1 = (unsigned char *)dest[1]; + + head = len % 64; + if (head != 0) { + gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0); + gf_vect_dot_prod_base(head, vlen, &gftbls[1 * 32 * vlen], src, t1); + } + + for (i = head; i < len - 63; i += 64) { + vY1 = vY1 ^ vY1; + vY2 = vY2 ^ vY2; + vY3 = vY3 ^ vY3; + vY4 = vY4 ^ vY4; + + vYD = vYD ^ vYD; + vYE = vYE ^ vYE; + vYF = vYF ^ vYF; + vYG = vYG ^ vYG; + + unsigned char *g0 = &gftbls[0 * 32 * vlen]; + unsigned char *g1 = &gftbls[1 * 32 * vlen]; + + for (j = 0; j < vlen; j++) { + s = (unsigned char *)src[j]; + vX1 = vec_xl(0, s + i); + vX2 = vec_xl(16, s + i); + vX3 = vec_xl(32, s + i); + vX4 = vec_xl(48, s + i); + + vlo0 = EC_vec_xl(0, g0); + vhi0 = EC_vec_xl(16, g0); + vlo1 = EC_vec_xl(0, g1); + vhi1 = EC_vec_xl(16, g1); + + vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1); + vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2); + vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3); + vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4); + + vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1); + vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2); + vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3); + vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4); + + g0 += 32; + g1 += 32; + } + + vec_xst(vY1, 0, t0 + i); + vec_xst(vY2, 16, t0 + i); + vec_xst(vY3, 0, t1 + i); + vec_xst(vY4, 16, t1 + i); + + vec_xst(vYD, 32, t0 + i); + vec_xst(vYE, 48, t0 + i); + vec_xst(vYF, 32, t1 + i); + vec_xst(vYG, 48, t1 + i); + } + return; +} diff --git a/src/isa-l/erasure_code/ppc64le/gf_2vect_mad_vsx.c b/src/isa-l/erasure_code/ppc64le/gf_2vect_mad_vsx.c new file mode 100644 index 000000000..621684a5f --- /dev/null +++ b/src/isa-l/erasure_code/ppc64le/gf_2vect_mad_vsx.c @@ -0,0 +1,65 @@ +#include "ec_base_vsx.h" + +void gf_2vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, + unsigned char *src, unsigned char **dest) +{ + unsigned char *s, *t0, *t1; + vector unsigned char vX1, vX2, vX3, vX4; + vector unsigned char vY1, vY2, vY3, vY4; + vector unsigned char vYD, vYE, vYF, vYG; + vector unsigned char vhi0, vlo0, vhi1, vlo1; + int i, head; + + s = (unsigned char *)src; + t0 = (unsigned char *)dest[0]; + t1 = (unsigned char *)dest[1]; + + head = len % 64; + if (head != 0) { + gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, t0); + gf_vect_mad_base(head, vec, vec_i, &gftbls[1 * 32 * vec], src, t1); + } + + vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5))); + vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5))); + vlo1 = EC_vec_xl(0, gftbls + (((1 * vec) << 5) + (vec_i << 5))); + vhi1 = EC_vec_xl(16, gftbls + (((1 * vec) << 5) + (vec_i << 5))); + + for (i = head; i < len - 63; i += 64) { + vX1 = vec_xl(0, s + i); + vX2 = vec_xl(16, s + i); + vX3 = vec_xl(32, s + i); + vX4 = vec_xl(48, s + i); + + vY1 = vec_xl(0, t0 + i); + vY2 = vec_xl(16, t0 + i); + vYD = vec_xl(32, t0 + i); + vYE = vec_xl(48, t0 + i); + + vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1); + vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2); + vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3); + vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4); + + vY3 = vec_xl(0, t1 + i); + vY4 = vec_xl(16, t1 + i); + vYF = vec_xl(32, t1 + i); + vYG = vec_xl(48, t1 + i); + + vec_xst(vY1, 0, t0 + i); + vec_xst(vY2, 16, t0 + i); + vec_xst(vYD, 32, t0 + i); + vec_xst(vYE, 48, t0 + i); + + vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1); + vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2); + vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3); + vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4); + + vec_xst(vY3, 0, t1 + i); + vec_xst(vY4, 16, t1 + i); + vec_xst(vYF, 32, t1 + i); + vec_xst(vYG, 48, t1 + i); + } + return; +} diff --git a/src/isa-l/erasure_code/ppc64le/gf_3vect_dot_prod_vsx.c b/src/isa-l/erasure_code/ppc64le/gf_3vect_dot_prod_vsx.c new file mode 100644 index 000000000..23b72dc4b --- /dev/null +++ b/src/isa-l/erasure_code/ppc64le/gf_3vect_dot_prod_vsx.c @@ -0,0 +1,104 @@ +#include "ec_base_vsx.h" + +void gf_3vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, + unsigned char **src, unsigned char **dest) +{ + unsigned char *s, *t0, *t1, *t2; + vector unsigned char vX1, vX2, vX3, vX4; + vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6; + vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI; + vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2; + int i, j, head; + + if (vlen < 128) { + gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *)dest[0]); + gf_vect_mul_vsx(len, &gftbls[1 * 32 * vlen], src[0], (unsigned char *)dest[1]); + gf_vect_mul_vsx(len, &gftbls[2 * 32 * vlen], src[0], (unsigned char *)dest[2]); + + for (j = 1; j < vlen; j++) { + gf_3vect_mad_vsx(len, vlen, j, gftbls, src[j], dest); + } + return; + } + + t0 = (unsigned char *)dest[0]; + t1 = (unsigned char *)dest[1]; + t2 = (unsigned char *)dest[2]; + + head = len % 64; + if (head != 0) { + gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0); + gf_vect_dot_prod_base(head, vlen, &gftbls[1 * 32 * vlen], src, t1); + gf_vect_dot_prod_base(head, vlen, &gftbls[2 * 32 * vlen], src, t2); + } + + for (i = head; i < len - 63; i += 64) { + vY1 = vY1 ^ vY1; + vY2 = vY2 ^ vY2; + vY3 = vY3 ^ vY3; + vY4 = vY4 ^ vY4; + vY5 = vY5 ^ vY5; + vY6 = vY6 ^ vY6; + + vYD = vYD ^ vYD; + vYE = vYE ^ vYE; + vYF = vYF ^ vYF; + vYG = vYG ^ vYG; + vYH = vYH ^ vYH; + vYI = vYI ^ vYI; + + unsigned char *g0 = &gftbls[0 * 32 * vlen]; + unsigned char *g1 = &gftbls[1 * 32 * vlen]; + unsigned char *g2 = &gftbls[2 * 32 * vlen]; + + for (j = 0; j < vlen; j++) { + s = (unsigned char *)src[j]; + vX1 = vec_xl(0, s + i); + vX2 = vec_xl(16, s + i); + vX3 = vec_xl(32, s + i); + vX4 = vec_xl(48, s + i); + + vlo0 = EC_vec_xl(0, g0); + vhi0 = EC_vec_xl(16, g0); + vlo1 = EC_vec_xl(0, g1); + vhi1 = EC_vec_xl(16, g1); + + vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1); + vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2); + vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3); + vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4); + + vlo2 = vec_xl(0, g2); + vhi2 = vec_xl(16, g2); + + vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1); + vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2); + vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3); + vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4); + + vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1); + vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2); + vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3); + vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4); + + g0 += 32; + g1 += 32; + g2 += 32; + } + + vec_xst(vY1, 0, t0 + i); + vec_xst(vY2, 16, t0 + i); + vec_xst(vY3, 0, t1 + i); + vec_xst(vY4, 16, t1 + i); + vec_xst(vY5, 0, t2 + i); + vec_xst(vY6, 16, t2 + i); + + vec_xst(vYD, 32, t0 + i); + vec_xst(vYE, 48, t0 + i); + vec_xst(vYF, 32, t1 + i); + vec_xst(vYG, 48, t1 + i); + vec_xst(vYH, 32, t2 + i); + vec_xst(vYI, 48, t2 + i); + } + return; +} diff --git a/src/isa-l/erasure_code/ppc64le/gf_3vect_mad_vsx.c b/src/isa-l/erasure_code/ppc64le/gf_3vect_mad_vsx.c new file mode 100644 index 000000000..ba90c1fdb --- /dev/null +++ b/src/isa-l/erasure_code/ppc64le/gf_3vect_mad_vsx.c @@ -0,0 +1,84 @@ +#include "ec_base_vsx.h" + +void gf_3vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, + unsigned char *src, unsigned char **dest) +{ + unsigned char *s, *t0, *t1, *t2; + vector unsigned char vX1, vX2, vX3, vX4; + vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6; + vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI; + vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2; + int i, head; + + s = (unsigned char *)src; + t0 = (unsigned char *)dest[0]; + t1 = (unsigned char *)dest[1]; + t2 = (unsigned char *)dest[2]; + + head = len % 64; + if (head != 0) { + gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, t0); + gf_vect_mad_base(head, vec, vec_i, &gftbls[1 * 32 * vec], src, t1); + gf_vect_mad_base(head, vec, vec_i, &gftbls[2 * 32 * vec], src, t2); + } + + vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5))); + vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5))); + vlo1 = EC_vec_xl(0, gftbls + (((1 * vec) << 5) + (vec_i << 5))); + vhi1 = EC_vec_xl(16, gftbls + (((1 * vec) << 5) + (vec_i << 5))); + vlo2 = EC_vec_xl(0, gftbls + (((2 * vec) << 5) + (vec_i << 5))); + vhi2 = EC_vec_xl(16, gftbls + (((2 * vec) << 5) + (vec_i << 5))); + + for (i = head; i < len - 63; i += 64) { + vX1 = vec_xl(0, s + i); + vX2 = vec_xl(16, s + i); + vX3 = vec_xl(32, s + i); + vX4 = vec_xl(48, s + i); + + vY1 = vec_xl(0, t0 + i); + vY2 = vec_xl(16, t0 + i); + vYD = vec_xl(32, t0 + i); + vYE = vec_xl(48, t0 + i); + + vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1); + vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2); + vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3); + vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4); + + vY3 = vec_xl(0, t1 + i); + vY4 = vec_xl(16, t1 + i); + vYF = vec_xl(32, t1 + i); + vYG = vec_xl(48, t1 + i); + + vec_xst(vY1, 0, t0 + i); + vec_xst(vY2, 16, t0 + i); + vec_xst(vYD, 32, t0 + i); + vec_xst(vYE, 48, t0 + i); + + vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1); + vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2); + vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3); + vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4); + + vY5 = vec_xl(0, t2 + i); + vY6 = vec_xl(16, t2 + i); + vYH = vec_xl(32, t2 + i); + vYI = vec_xl(48, t2 + i); + + vec_xst(vY3, 0, t1 + i); + vec_xst(vY4, 16, t1 + i); + vec_xst(vYF, 32, t1 + i); + vec_xst(vYG, 48, t1 + i); + + vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1); + vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2); + vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3); + vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4); + + vec_xst(vY5, 0, t2 + i); + vec_xst(vY6, 16, t2 + i); + vec_xst(vYH, 32, t2 + i); + vec_xst(vYI, 48, t2 + i); + } + return; +} diff --git a/src/isa-l/erasure_code/ppc64le/gf_4vect_dot_prod_vsx.c b/src/isa-l/erasure_code/ppc64le/gf_4vect_dot_prod_vsx.c new file mode 100644 index 000000000..e65654453 --- /dev/null +++ b/src/isa-l/erasure_code/ppc64le/gf_4vect_dot_prod_vsx.c @@ -0,0 +1,124 @@ +#include "ec_base_vsx.h" + +void gf_4vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, + unsigned char **src, unsigned char **dest) +{ + unsigned char *s, *t0, *t1, *t2, *t3; + vector unsigned char vX1, vX2, vX3, vX4; + vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8; + vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK; + vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2, vhi3, vlo3; + int i, j, head; + + if (vlen < 128) { + gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *)dest[0]); + gf_vect_mul_vsx(len, &gftbls[1 * 32 * vlen], src[0], (unsigned char *)dest[1]); + gf_vect_mul_vsx(len, &gftbls[2 * 32 * vlen], src[0], (unsigned char *)dest[2]); + gf_vect_mul_vsx(len, &gftbls[3 * 32 * vlen], src[0], (unsigned char *)dest[3]); + + for (j = 1; j < vlen; j++) { + gf_4vect_mad_vsx(len, vlen, j, gftbls, src[j], dest); + } + return; + } + + t0 = (unsigned char *)dest[0]; + t1 = (unsigned char *)dest[1]; + t2 = (unsigned char *)dest[2]; + t3 = (unsigned char *)dest[3]; + + head = len % 64; + if (head != 0) { + gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0); + gf_vect_dot_prod_base(head, vlen, &gftbls[1 * 32 * vlen], src, t1); + gf_vect_dot_prod_base(head, vlen, &gftbls[2 * 32 * vlen], src, t2); + gf_vect_dot_prod_base(head, vlen, &gftbls[3 * 32 * vlen], src, t3); + } + + for (i = head; i < len - 63; i += 64) { + vY1 = vY1 ^ vY1; + vY2 = vY2 ^ vY2; + vY3 = vY3 ^ vY3; + vY4 = vY4 ^ vY4; + vY5 = vY5 ^ vY5; + vY6 = vY6 ^ vY6; + vY7 = vY7 ^ vY7; + vY8 = vY8 ^ vY8; + + vYD = vYD ^ vYD; + vYE = vYE ^ vYE; + vYF = vYF ^ vYF; + vYG = vYG ^ vYG; + vYH = vYH ^ vYH; + vYI = vYI ^ vYI; + vYJ = vYJ ^ vYJ; + vYK = vYK ^ vYK; + + unsigned char *g0 = &gftbls[0 * 32 * vlen]; + unsigned char *g1 = &gftbls[1 * 32 * vlen]; + unsigned char *g2 = &gftbls[2 * 32 * vlen]; + unsigned char *g3 = &gftbls[3 * 32 * vlen]; + + for (j = 0; j < vlen; j++) { + s = (unsigned char *)src[j]; + vX1 = vec_xl(0, s + i); + vX2 = vec_xl(16, s + i); + vX3 = vec_xl(32, s + i); + vX4 = vec_xl(48, s + i); + + vlo0 = EC_vec_xl(0, g0); + vhi0 = EC_vec_xl(16, g0); + vlo1 = EC_vec_xl(0, g1); + vhi1 = EC_vec_xl(16, g1); + + vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1); + vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2); + vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3); + vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4); + + vlo2 = vec_xl(0, g2); + vhi2 = vec_xl(16, g2); + vlo3 = vec_xl(0, g3); + vhi3 = vec_xl(16, g3); + + vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1); + vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2); + vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3); + vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4); + + vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1); + vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2); + vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3); + vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4); + + vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1); + vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2); + vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3); + vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4); + + g0 += 32; + g1 += 32; + g2 += 32; + g3 += 32; + } + + vec_xst(vY1, 0, t0 + i); + vec_xst(vY2, 16, t0 + i); + vec_xst(vY3, 0, t1 + i); + vec_xst(vY4, 16, t1 + i); + vec_xst(vY5, 0, t2 + i); + vec_xst(vY6, 16, t2 + i); + vec_xst(vY7, 0, t3 + i); + vec_xst(vY8, 16, t3 + i); + + vec_xst(vYD, 32, t0 + i); + vec_xst(vYE, 48, t0 + i); + vec_xst(vYF, 32, t1 + i); + vec_xst(vYG, 48, t1 + i); + vec_xst(vYH, 32, t2 + i); + vec_xst(vYI, 48, t2 + i); + vec_xst(vYJ, 32, t3 + i); + vec_xst(vYK, 48, t3 + i); + } + return; +} diff --git a/src/isa-l/erasure_code/ppc64le/gf_4vect_mad_vsx.c b/src/isa-l/erasure_code/ppc64le/gf_4vect_mad_vsx.c new file mode 100644 index 000000000..7b236b6f8 --- /dev/null +++ b/src/isa-l/erasure_code/ppc64le/gf_4vect_mad_vsx.c @@ -0,0 +1,103 @@ +#include "ec_base_vsx.h" + +void gf_4vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, + unsigned char *src, unsigned char **dest) +{ + unsigned char *s, *t0, *t1, *t2, *t3; + vector unsigned char vX1, vX2, vX3, vX4; + vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8; + vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK; + vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2, vhi3, vlo3; + int i, head; + + s = (unsigned char *)src; + t0 = (unsigned char *)dest[0]; + t1 = (unsigned char *)dest[1]; + t2 = (unsigned char *)dest[2]; + t3 = (unsigned char *)dest[3]; + + head = len % 64; + if (head != 0) { + gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, t0); + gf_vect_mad_base(head, vec, vec_i, &gftbls[1 * 32 * vec], src, t1); + gf_vect_mad_base(head, vec, vec_i, &gftbls[2 * 32 * vec], src, t2); + gf_vect_mad_base(head, vec, vec_i, &gftbls[3 * 32 * vec], src, t3); + } + + vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5))); + vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5))); + vlo1 = EC_vec_xl(0, gftbls + (((1 * vec) << 5) + (vec_i << 5))); + vhi1 = EC_vec_xl(16, gftbls + (((1 * vec) << 5) + (vec_i << 5))); + vlo2 = EC_vec_xl(0, gftbls + (((2 * vec) << 5) + (vec_i << 5))); + vhi2 = EC_vec_xl(16, gftbls + (((2 * vec) << 5) + (vec_i << 5))); + vlo3 = EC_vec_xl(0, gftbls + (((3 * vec) << 5) + (vec_i << 5))); + vhi3 = EC_vec_xl(16, gftbls + (((3 * vec) << 5) + (vec_i << 5))); + + for (i = head; i < len - 63; i += 64) { + vX1 = vec_xl(0, s + i); + vX2 = vec_xl(16, s + i); + vX3 = vec_xl(32, s + i); + vX4 = vec_xl(48, s + i); + + vY1 = vec_xl(0, t0 + i); + vY2 = vec_xl(16, t0 + i); + vYD = vec_xl(32, t0 + i); + vYE = vec_xl(48, t0 + i); + + vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1); + vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2); + vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3); + vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4); + + vY3 = vec_xl(0, t1 + i); + vY4 = vec_xl(16, t1 + i); + vYF = vec_xl(32, t1 + i); + vYG = vec_xl(48, t1 + i); + + vec_xst(vY1, 0, t0 + i); + vec_xst(vY2, 16, t0 + i); + vec_xst(vYD, 32, t0 + i); + vec_xst(vYE, 48, t0 + i); + + vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1); + vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2); + vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3); + vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4); + + vY5 = vec_xl(0, t2 + i); + vY6 = vec_xl(16, t2 + i); + vYH = vec_xl(32, t2 + i); + vYI = vec_xl(48, t2 + i); + + vec_xst(vY3, 0, t1 + i); + vec_xst(vY4, 16, t1 + i); + vec_xst(vYF, 32, t1 + i); + vec_xst(vYG, 48, t1 + i); + + vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1); + vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2); + vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3); + vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4); + + vY7 = vec_xl(0, t3 + i); + vY8 = vec_xl(16, t3 + i); + vYJ = vec_xl(32, t3 + i); + vYK = vec_xl(48, t3 + i); + + vec_xst(vY5, 0, t2 + i); + vec_xst(vY6, 16, t2 + i); + vec_xst(vYH, 32, t2 + i); + vec_xst(vYI, 48, t2 + i); + + vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1); + vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2); + vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3); + vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4); + + vec_xst(vY7, 0, t3 + i); + vec_xst(vY8, 16, t3 + i); + vec_xst(vYJ, 32, t3 + i); + vec_xst(vYK, 48, t3 + i); + } + return; +} diff --git a/src/isa-l/erasure_code/ppc64le/gf_5vect_dot_prod_vsx.c b/src/isa-l/erasure_code/ppc64le/gf_5vect_dot_prod_vsx.c new file mode 100644 index 000000000..e9eef0e63 --- /dev/null +++ b/src/isa-l/erasure_code/ppc64le/gf_5vect_dot_prod_vsx.c @@ -0,0 +1,145 @@ +#include "ec_base_vsx.h" + +void gf_5vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, + unsigned char **src, unsigned char **dest) +{ + unsigned char *s, *t0, *t1, *t2, *t3, *t4; + vector unsigned char vX1, vX2, vX3, vX4; + vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8, vY9, vYA; + vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK, vYL, vYM; + vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2, vhi3, vlo3, vhi4, vlo4; + int i, j, head; + + if (vlen < 128) { + gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *)dest[0]); + gf_vect_mul_vsx(len, &gftbls[1 * 32 * vlen], src[0], (unsigned char *)dest[1]); + gf_vect_mul_vsx(len, &gftbls[2 * 32 * vlen], src[0], (unsigned char *)dest[2]); + gf_vect_mul_vsx(len, &gftbls[3 * 32 * vlen], src[0], (unsigned char *)dest[3]); + gf_vect_mul_vsx(len, &gftbls[4 * 32 * vlen], src[0], (unsigned char *)dest[4]); + + for (j = 1; j < vlen; j++) { + gf_5vect_mad_vsx(len, vlen, j, gftbls, src[j], dest); + } + return; + } + + t0 = (unsigned char *)dest[0]; + t1 = (unsigned char *)dest[1]; + t2 = (unsigned char *)dest[2]; + t3 = (unsigned char *)dest[3]; + t4 = (unsigned char *)dest[4]; + + head = len % 64; + if (head != 0) { + gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0); + gf_vect_dot_prod_base(head, vlen, &gftbls[1 * 32 * vlen], src, t1); + gf_vect_dot_prod_base(head, vlen, &gftbls[2 * 32 * vlen], src, t2); + gf_vect_dot_prod_base(head, vlen, &gftbls[3 * 32 * vlen], src, t3); + gf_vect_dot_prod_base(head, vlen, &gftbls[4 * 32 * vlen], src, t4); + } + + for (i = head; i < len - 63; i += 64) { + vY1 = vY1 ^ vY1; + vY2 = vY2 ^ vY2; + vY3 = vY3 ^ vY3; + vY4 = vY4 ^ vY4; + vY5 = vY5 ^ vY5; + vY6 = vY6 ^ vY6; + vY7 = vY7 ^ vY7; + vY8 = vY8 ^ vY8; + vY9 = vY9 ^ vY9; + vYA = vYA ^ vYA; + + vYD = vYD ^ vYD; + vYE = vYE ^ vYE; + vYF = vYF ^ vYF; + vYG = vYG ^ vYG; + vYH = vYH ^ vYH; + vYI = vYI ^ vYI; + vYJ = vYJ ^ vYJ; + vYK = vYK ^ vYK; + vYL = vYL ^ vYL; + vYM = vYM ^ vYM; + + unsigned char *g0 = &gftbls[0 * 32 * vlen]; + unsigned char *g1 = &gftbls[1 * 32 * vlen]; + unsigned char *g2 = &gftbls[2 * 32 * vlen]; + unsigned char *g3 = &gftbls[3 * 32 * vlen]; + unsigned char *g4 = &gftbls[4 * 32 * vlen]; + + for (j = 0; j < vlen; j++) { + s = (unsigned char *)src[j]; + vX1 = vec_xl(0, s + i); + vX2 = vec_xl(16, s + i); + vX3 = vec_xl(32, s + i); + vX4 = vec_xl(48, s + i); + + vlo0 = EC_vec_xl(0, g0); + vhi0 = EC_vec_xl(16, g0); + vlo1 = EC_vec_xl(0, g1); + vhi1 = EC_vec_xl(16, g1); + + vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1); + vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2); + vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3); + vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4); + + vlo2 = vec_xl(0, g2); + vhi2 = vec_xl(16, g2); + vlo3 = vec_xl(0, g3); + vhi3 = vec_xl(16, g3); + + vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1); + vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2); + vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3); + vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4); + + vlo4 = vec_xl(0, g4); + vhi4 = vec_xl(16, g4); + + vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1); + vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2); + vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3); + vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4); + + vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1); + vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2); + vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3); + vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4); + + vY9 = vY9 ^ EC_vec_permxor(vhi4, vlo4, vX1); + vYA = vYA ^ EC_vec_permxor(vhi4, vlo4, vX2); + vYL = vYL ^ EC_vec_permxor(vhi4, vlo4, vX3); + vYM = vYM ^ EC_vec_permxor(vhi4, vlo4, vX4); + + g0 += 32; + g1 += 32; + g2 += 32; + g3 += 32; + g4 += 32; + } + + vec_xst(vY1, 0, t0 + i); + vec_xst(vY2, 16, t0 + i); + vec_xst(vY3, 0, t1 + i); + vec_xst(vY4, 16, t1 + i); + vec_xst(vY5, 0, t2 + i); + vec_xst(vY6, 16, t2 + i); + vec_xst(vY7, 0, t3 + i); + vec_xst(vY8, 16, t3 + i); + vec_xst(vY9, 0, t4 + i); + vec_xst(vYA, 16, t4 + i); + + vec_xst(vYD, 32, t0 + i); + vec_xst(vYE, 48, t0 + i); + vec_xst(vYF, 32, t1 + i); + vec_xst(vYG, 48, t1 + i); + vec_xst(vYH, 32, t2 + i); + vec_xst(vYI, 48, t2 + i); + vec_xst(vYJ, 32, t3 + i); + vec_xst(vYK, 48, t3 + i); + vec_xst(vYL, 32, t4 + i); + vec_xst(vYM, 48, t4 + i); + } + return; +} diff --git a/src/isa-l/erasure_code/ppc64le/gf_5vect_mad_vsx.c b/src/isa-l/erasure_code/ppc64le/gf_5vect_mad_vsx.c new file mode 100644 index 000000000..7bb7bb211 --- /dev/null +++ b/src/isa-l/erasure_code/ppc64le/gf_5vect_mad_vsx.c @@ -0,0 +1,122 @@ +#include "ec_base_vsx.h" + +void gf_5vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, + unsigned char *src, unsigned char **dest) +{ + unsigned char *s, *t0, *t1, *t2, *t3, *t4; + vector unsigned char vX1, vX2, vX3, vX4; + vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8, vY9, vYA; + vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK, vYL, vYM; + vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2, vhi3, vlo3, vhi4, vlo4; + int i, head; + + s = (unsigned char *)src; + t0 = (unsigned char *)dest[0]; + t1 = (unsigned char *)dest[1]; + t2 = (unsigned char *)dest[2]; + t3 = (unsigned char *)dest[3]; + t4 = (unsigned char *)dest[4]; + + head = len % 64; + if (head != 0) { + gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, t0); + gf_vect_mad_base(head, vec, vec_i, &gftbls[1 * 32 * vec], src, t1); + gf_vect_mad_base(head, vec, vec_i, &gftbls[2 * 32 * vec], src, t2); + gf_vect_mad_base(head, vec, vec_i, &gftbls[3 * 32 * vec], src, t3); + gf_vect_mad_base(head, vec, vec_i, &gftbls[4 * 32 * vec], src, t4); + } + + vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5))); + vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5))); + vlo1 = EC_vec_xl(0, gftbls + (((1 * vec) << 5) + (vec_i << 5))); + vhi1 = EC_vec_xl(16, gftbls + (((1 * vec) << 5) + (vec_i << 5))); + vlo2 = EC_vec_xl(0, gftbls + (((2 * vec) << 5) + (vec_i << 5))); + vhi2 = EC_vec_xl(16, gftbls + (((2 * vec) << 5) + (vec_i << 5))); + vlo3 = EC_vec_xl(0, gftbls + (((3 * vec) << 5) + (vec_i << 5))); + vhi3 = EC_vec_xl(16, gftbls + (((3 * vec) << 5) + (vec_i << 5))); + vlo4 = EC_vec_xl(0, gftbls + (((4 * vec) << 5) + (vec_i << 5))); + vhi4 = EC_vec_xl(16, gftbls + (((4 * vec) << 5) + (vec_i << 5))); + + for (i = head; i < len - 63; i += 64) { + vX1 = vec_xl(0, s + i); + vX2 = vec_xl(16, s + i); + vX3 = vec_xl(32, s + i); + vX4 = vec_xl(48, s + i); + + vY1 = vec_xl(0, t0 + i); + vY2 = vec_xl(16, t0 + i); + vYD = vec_xl(32, t0 + i); + vYE = vec_xl(48, t0 + i); + + vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1); + vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2); + vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3); + vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4); + + vY3 = vec_xl(0, t1 + i); + vY4 = vec_xl(16, t1 + i); + vYF = vec_xl(32, t1 + i); + vYG = vec_xl(48, t1 + i); + + vec_xst(vY1, 0, t0 + i); + vec_xst(vY2, 16, t0 + i); + vec_xst(vYD, 32, t0 + i); + vec_xst(vYE, 48, t0 + i); + + vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1); + vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2); + vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3); + vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4); + + vY5 = vec_xl(0, t2 + i); + vY6 = vec_xl(16, t2 + i); + vYH = vec_xl(32, t2 + i); + vYI = vec_xl(48, t2 + i); + + vec_xst(vY3, 0, t1 + i); + vec_xst(vY4, 16, t1 + i); + vec_xst(vYF, 32, t1 + i); + vec_xst(vYG, 48, t1 + i); + + vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1); + vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2); + vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3); + vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4); + + vY7 = vec_xl(0, t3 + i); + vY8 = vec_xl(16, t3 + i); + vYJ = vec_xl(32, t3 + i); + vYK = vec_xl(48, t3 + i); + + vec_xst(vY5, 0, t2 + i); + vec_xst(vY6, 16, t2 + i); + vec_xst(vYH, 32, t2 + i); + vec_xst(vYI, 48, t2 + i); + + vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1); + vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2); + vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3); + vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4); + + vY9 = vec_xl(0, t4 + i); + vYA = vec_xl(16, t4 + i); + vYL = vec_xl(32, t4 + i); + vYM = vec_xl(48, t4 + i); + + vec_xst(vY7, 0, t3 + i); + vec_xst(vY8, 16, t3 + i); + vec_xst(vYJ, 32, t3 + i); + vec_xst(vYK, 48, t3 + i); + + vY9 = vY9 ^ EC_vec_permxor(vhi4, vlo4, vX1); + vYA = vYA ^ EC_vec_permxor(vhi4, vlo4, vX2); + vYL = vYL ^ EC_vec_permxor(vhi4, vlo4, vX3); + vYM = vYM ^ EC_vec_permxor(vhi4, vlo4, vX4); + + vec_xst(vY9, 0, t4 + i); + vec_xst(vYA, 16, t4 + i); + vec_xst(vYL, 32, t4 + i); + vec_xst(vYM, 48, t4 + i); + } + return; +} diff --git a/src/isa-l/erasure_code/ppc64le/gf_6vect_dot_prod_vsx.c b/src/isa-l/erasure_code/ppc64le/gf_6vect_dot_prod_vsx.c new file mode 100644 index 000000000..ac918bd49 --- /dev/null +++ b/src/isa-l/erasure_code/ppc64le/gf_6vect_dot_prod_vsx.c @@ -0,0 +1,166 @@ +#include "ec_base_vsx.h" + +void gf_6vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, + unsigned char **src, unsigned char **dest) +{ + unsigned char *s, *t0, *t1, *t2, *t3, *t4, *t5; + vector unsigned char vX1, vX2, vX3, vX4; + vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8, vY9, vYA, vYB, vYC; + vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK, vYL, vYM, vYN, vYO; + vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2; + vector unsigned char vhi3, vlo3, vhi4, vlo4, vhi5, vlo5; + int i, j, head; + + if (vlen < 128) { + gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *)dest[0]); + gf_vect_mul_vsx(len, &gftbls[1 * 32 * vlen], src[0], (unsigned char *)dest[1]); + gf_vect_mul_vsx(len, &gftbls[2 * 32 * vlen], src[0], (unsigned char *)dest[2]); + gf_vect_mul_vsx(len, &gftbls[3 * 32 * vlen], src[0], (unsigned char *)dest[3]); + gf_vect_mul_vsx(len, &gftbls[4 * 32 * vlen], src[0], (unsigned char *)dest[4]); + gf_vect_mul_vsx(len, &gftbls[5 * 32 * vlen], src[0], (unsigned char *)dest[5]); + + for (j = 1; j < vlen; j++) { + gf_6vect_mad_vsx(len, vlen, j, gftbls, src[j], dest); + } + return; + } + + t0 = (unsigned char *)dest[0]; + t1 = (unsigned char *)dest[1]; + t2 = (unsigned char *)dest[2]; + t3 = (unsigned char *)dest[3]; + t4 = (unsigned char *)dest[4]; + t5 = (unsigned char *)dest[5]; + + head = len % 64; + if (head != 0) { + gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0); + gf_vect_dot_prod_base(head, vlen, &gftbls[1 * 32 * vlen], src, t1); + gf_vect_dot_prod_base(head, vlen, &gftbls[2 * 32 * vlen], src, t2); + gf_vect_dot_prod_base(head, vlen, &gftbls[3 * 32 * vlen], src, t3); + gf_vect_dot_prod_base(head, vlen, &gftbls[4 * 32 * vlen], src, t4); + gf_vect_dot_prod_base(head, vlen, &gftbls[5 * 32 * vlen], src, t5); + } + + for (i = head; i < len - 63; i += 64) { + vY1 = vY1 ^ vY1; + vY2 = vY2 ^ vY2; + vY3 = vY3 ^ vY3; + vY4 = vY4 ^ vY4; + vY5 = vY5 ^ vY5; + vY6 = vY6 ^ vY6; + vY7 = vY7 ^ vY7; + vY8 = vY8 ^ vY8; + vY9 = vY9 ^ vY9; + vYA = vYA ^ vYA; + vYB = vYB ^ vYB; + vYC = vYC ^ vYC; + + vYD = vYD ^ vYD; + vYE = vYE ^ vYE; + vYF = vYF ^ vYF; + vYG = vYG ^ vYG; + vYH = vYH ^ vYH; + vYI = vYI ^ vYI; + vYJ = vYJ ^ vYJ; + vYK = vYK ^ vYK; + vYL = vYL ^ vYL; + vYM = vYM ^ vYM; + vYN = vYN ^ vYN; + vYO = vYO ^ vYO; + + unsigned char *g0 = &gftbls[0 * 32 * vlen]; + unsigned char *g1 = &gftbls[1 * 32 * vlen]; + unsigned char *g2 = &gftbls[2 * 32 * vlen]; + unsigned char *g3 = &gftbls[3 * 32 * vlen]; + unsigned char *g4 = &gftbls[4 * 32 * vlen]; + unsigned char *g5 = &gftbls[5 * 32 * vlen]; + + for (j = 0; j < vlen; j++) { + s = (unsigned char *)src[j]; + vX1 = vec_xl(0, s + i); + vX2 = vec_xl(16, s + i); + vX3 = vec_xl(32, s + i); + vX4 = vec_xl(48, s + i); + + vlo0 = EC_vec_xl(0, g0); + vhi0 = EC_vec_xl(16, g0); + vlo1 = EC_vec_xl(0, g1); + vhi1 = EC_vec_xl(16, g1); + + vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1); + vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2); + vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3); + vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4); + + vlo2 = EC_vec_xl(0, g2); + vhi2 = EC_vec_xl(16, g2); + vlo3 = EC_vec_xl(0, g3); + vhi3 = EC_vec_xl(16, g3); + + vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1); + vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2); + vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3); + vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4); + + vlo4 = EC_vec_xl(0, g4); + vhi4 = EC_vec_xl(16, g4); + vlo5 = EC_vec_xl(0, g5); + vhi5 = EC_vec_xl(16, g5); + + vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1); + vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2); + vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3); + vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4); + + vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1); + vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2); + vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3); + vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4); + + vY9 = vY9 ^ EC_vec_permxor(vhi4, vlo4, vX1); + vYA = vYA ^ EC_vec_permxor(vhi4, vlo4, vX2); + vYL = vYL ^ EC_vec_permxor(vhi4, vlo4, vX3); + vYM = vYM ^ EC_vec_permxor(vhi4, vlo4, vX4); + + vYB = vYB ^ EC_vec_permxor(vhi5, vlo5, vX1); + vYC = vYC ^ EC_vec_permxor(vhi5, vlo5, vX2); + vYN = vYN ^ EC_vec_permxor(vhi5, vlo5, vX3); + vYO = vYO ^ EC_vec_permxor(vhi5, vlo5, vX4); + + g0 += 32; + g1 += 32; + g2 += 32; + g3 += 32; + g4 += 32; + g5 += 32; + } + + vec_xst(vY1, 0, t0 + i); + vec_xst(vY2, 16, t0 + i); + vec_xst(vY3, 0, t1 + i); + vec_xst(vY4, 16, t1 + i); + vec_xst(vY5, 0, t2 + i); + vec_xst(vY6, 16, t2 + i); + vec_xst(vY7, 0, t3 + i); + vec_xst(vY8, 16, t3 + i); + vec_xst(vY9, 0, t4 + i); + vec_xst(vYA, 16, t4 + i); + vec_xst(vYB, 0, t5 + i); + vec_xst(vYC, 16, t5 + i); + + vec_xst(vYD, 32, t0 + i); + vec_xst(vYE, 48, t0 + i); + vec_xst(vYF, 32, t1 + i); + vec_xst(vYG, 48, t1 + i); + vec_xst(vYH, 32, t2 + i); + vec_xst(vYI, 48, t2 + i); + vec_xst(vYJ, 32, t3 + i); + vec_xst(vYK, 48, t3 + i); + vec_xst(vYL, 32, t4 + i); + vec_xst(vYM, 48, t4 + i); + vec_xst(vYN, 32, t5 + i); + vec_xst(vYO, 48, t5 + i); + } + return; +} diff --git a/src/isa-l/erasure_code/ppc64le/gf_6vect_mad_vsx.c b/src/isa-l/erasure_code/ppc64le/gf_6vect_mad_vsx.c new file mode 100644 index 000000000..43ea6c696 --- /dev/null +++ b/src/isa-l/erasure_code/ppc64le/gf_6vect_mad_vsx.c @@ -0,0 +1,142 @@ +#include "ec_base_vsx.h" + +void gf_6vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, + unsigned char *src, unsigned char **dest) +{ + unsigned char *s, *t0, *t1, *t2, *t3, *t4, *t5; + vector unsigned char vX1, vX2, vX3, vX4; + vector unsigned char vY1, vY2, vY3, vY4, vY5, vY6, vY7, vY8, vY9, vYA, vYB, vYC; + vector unsigned char vYD, vYE, vYF, vYG, vYH, vYI, vYJ, vYK, vYL, vYM, vYN, vYO; + vector unsigned char vhi0, vlo0, vhi1, vlo1, vhi2, vlo2; + vector unsigned char vhi3, vlo3, vhi4, vlo4, vhi5, vlo5; + int i, head; + + s = (unsigned char *)src; + t0 = (unsigned char *)dest[0]; + t1 = (unsigned char *)dest[1]; + t2 = (unsigned char *)dest[2]; + t3 = (unsigned char *)dest[3]; + t4 = (unsigned char *)dest[4]; + t5 = (unsigned char *)dest[5]; + + head = len % 64; + if (head != 0) { + gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, t0); + gf_vect_mad_base(head, vec, vec_i, &gftbls[1 * 32 * vec], src, t1); + gf_vect_mad_base(head, vec, vec_i, &gftbls[2 * 32 * vec], src, t2); + gf_vect_mad_base(head, vec, vec_i, &gftbls[3 * 32 * vec], src, t3); + gf_vect_mad_base(head, vec, vec_i, &gftbls[4 * 32 * vec], src, t4); + gf_vect_mad_base(head, vec, vec_i, &gftbls[5 * 32 * vec], src, t5); + } + + vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5))); + vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5))); + vlo1 = EC_vec_xl(0, gftbls + (((1 * vec) << 5) + (vec_i << 5))); + vhi1 = EC_vec_xl(16, gftbls + (((1 * vec) << 5) + (vec_i << 5))); + vlo2 = EC_vec_xl(0, gftbls + (((2 * vec) << 5) + (vec_i << 5))); + vhi2 = EC_vec_xl(16, gftbls + (((2 * vec) << 5) + (vec_i << 5))); + vlo3 = EC_vec_xl(0, gftbls + (((3 * vec) << 5) + (vec_i << 5))); + vhi3 = EC_vec_xl(16, gftbls + (((3 * vec) << 5) + (vec_i << 5))); + vlo4 = EC_vec_xl(0, gftbls + (((4 * vec) << 5) + (vec_i << 5))); + vhi4 = EC_vec_xl(16, gftbls + (((4 * vec) << 5) + (vec_i << 5))); + vlo5 = EC_vec_xl(0, gftbls + (((5 * vec) << 5) + (vec_i << 5))); + vhi5 = EC_vec_xl(16, gftbls + (((5 * vec) << 5) + (vec_i << 5))); + + for (i = head; i < len - 63; i += 64) { + vX1 = vec_xl(0, s + i); + vX2 = vec_xl(16, s + i); + vX3 = vec_xl(32, s + i); + vX4 = vec_xl(48, s + i); + + vY1 = vec_xl(0, t0 + i); + vY2 = vec_xl(16, t0 + i); + vYD = vec_xl(32, t0 + i); + vYE = vec_xl(48, t0 + i); + + vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1); + vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2); + vYD = vYD ^ EC_vec_permxor(vhi0, vlo0, vX3); + vYE = vYE ^ EC_vec_permxor(vhi0, vlo0, vX4); + + vec_xst(vY1, 0, t0 + i); + vec_xst(vY2, 16, t0 + i); + vec_xst(vYD, 32, t0 + i); + vec_xst(vYE, 48, t0 + i); + + vY3 = vec_xl(0, t1 + i); + vY4 = vec_xl(16, t1 + i); + vYF = vec_xl(32, t1 + i); + vYG = vec_xl(48, t1 + i); + + vY3 = vY3 ^ EC_vec_permxor(vhi1, vlo1, vX1); + vY4 = vY4 ^ EC_vec_permxor(vhi1, vlo1, vX2); + vYF = vYF ^ EC_vec_permxor(vhi1, vlo1, vX3); + vYG = vYG ^ EC_vec_permxor(vhi1, vlo1, vX4); + + vec_xst(vY3, 0, t1 + i); + vec_xst(vY4, 16, t1 + i); + vec_xst(vYF, 32, t1 + i); + vec_xst(vYG, 48, t1 + i); + + vY5 = vec_xl(0, t2 + i); + vY6 = vec_xl(16, t2 + i); + vYH = vec_xl(32, t2 + i); + vYI = vec_xl(48, t2 + i); + + vY5 = vY5 ^ EC_vec_permxor(vhi2, vlo2, vX1); + vY6 = vY6 ^ EC_vec_permxor(vhi2, vlo2, vX2); + vYH = vYH ^ EC_vec_permxor(vhi2, vlo2, vX3); + vYI = vYI ^ EC_vec_permxor(vhi2, vlo2, vX4); + + vY7 = vec_xl(0, t3 + i); + vY8 = vec_xl(16, t3 + i); + vYJ = vec_xl(32, t3 + i); + vYK = vec_xl(48, t3 + i); + + vec_xst(vY5, 0, t2 + i); + vec_xst(vY6, 16, t2 + i); + vec_xst(vYH, 32, t2 + i); + vec_xst(vYI, 48, t2 + i); + + vY7 = vY7 ^ EC_vec_permxor(vhi3, vlo3, vX1); + vY8 = vY8 ^ EC_vec_permxor(vhi3, vlo3, vX2); + vYJ = vYJ ^ EC_vec_permxor(vhi3, vlo3, vX3); + vYK = vYK ^ EC_vec_permxor(vhi3, vlo3, vX4); + + vY9 = vec_xl(0, t4 + i); + vYA = vec_xl(16, t4 + i); + vYL = vec_xl(32, t4 + i); + vYM = vec_xl(48, t4 + i); + + vec_xst(vY7, 0, t3 + i); + vec_xst(vY8, 16, t3 + i); + vec_xst(vYJ, 32, t3 + i); + vec_xst(vYK, 48, t3 + i); + + vY9 = vY9 ^ EC_vec_permxor(vhi4, vlo4, vX1); + vYA = vYA ^ EC_vec_permxor(vhi4, vlo4, vX2); + vYL = vYL ^ EC_vec_permxor(vhi4, vlo4, vX3); + vYM = vYM ^ EC_vec_permxor(vhi4, vlo4, vX4); + + vYB = vec_xl(0, t5 + i); + vYC = vec_xl(16, t5 + i); + vYN = vec_xl(32, t5 + i); + vYO = vec_xl(48, t5 + i); + + vec_xst(vY9, 0, t4 + i); + vec_xst(vYA, 16, t4 + i); + vec_xst(vYL, 32, t4 + i); + vec_xst(vYM, 48, t4 + i); + + vYB = vYB ^ EC_vec_permxor(vhi5, vlo5, vX1); + vYC = vYC ^ EC_vec_permxor(vhi5, vlo5, vX2); + vYN = vYN ^ EC_vec_permxor(vhi5, vlo5, vX3); + vYO = vYO ^ EC_vec_permxor(vhi5, vlo5, vX4); + + vec_xst(vYB, 0, t5 + i); + vec_xst(vYC, 16, t5 + i); + vec_xst(vYN, 32, t5 + i); + vec_xst(vYO, 48, t5 + i); + } + return; +} diff --git a/src/isa-l/erasure_code/ppc64le/gf_vect_dot_prod_vsx.c b/src/isa-l/erasure_code/ppc64le/gf_vect_dot_prod_vsx.c new file mode 100644 index 000000000..2f97e3421 --- /dev/null +++ b/src/isa-l/erasure_code/ppc64le/gf_vect_dot_prod_vsx.c @@ -0,0 +1,85 @@ +#include "ec_base_vsx.h" + +void gf_vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls, + unsigned char **src, unsigned char *dest) +{ + unsigned char *s, *t0; + vector unsigned char vX1, vY1; + vector unsigned char vX2, vY2; + vector unsigned char vX3, vY3; + vector unsigned char vX4, vY4; + vector unsigned char vX5, vY5; + vector unsigned char vX6, vY6; + vector unsigned char vX7, vY7; + vector unsigned char vX8, vY8; + vector unsigned char vhi0, vlo0; + int i, j, head; + + if (vlen < 128) { + gf_vect_mul_vsx(len, &gftbls[0 * 32 * vlen], src[0], (unsigned char *)dest); + + for (j = 1; j < vlen; j++) { + gf_vect_mad_vsx(len, vlen, j, gftbls, src[j], dest); + } + return; + } + + t0 = (unsigned char *)dest; + + head = len % 128; + if (head != 0) { + gf_vect_dot_prod_base(head, vlen, &gftbls[0 * 32 * vlen], src, t0); + } + + for (i = head; i < len - 127; i += 128) { + vY1 = vY1 ^ vY1; + vY2 = vY2 ^ vY2; + vY3 = vY3 ^ vY3; + vY4 = vY4 ^ vY4; + + vY5 = vY5 ^ vY5; + vY6 = vY6 ^ vY6; + vY7 = vY7 ^ vY7; + vY8 = vY8 ^ vY8; + + unsigned char *g0 = &gftbls[0 * 32 * vlen]; + + for (j = 0; j < vlen; j++) { + s = (unsigned char *)src[j]; + vX1 = vec_xl(0, s + i); + vX2 = vec_xl(16, s + i); + vX3 = vec_xl(32, s + i); + vX4 = vec_xl(48, s + i); + + vlo0 = EC_vec_xl(0, g0); + vhi0 = EC_vec_xl(16, g0); + + vX5 = vec_xl(64, s + i); + vX6 = vec_xl(80, s + i); + vX7 = vec_xl(96, s + i); + vX8 = vec_xl(112, s + i); + + vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1); + vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2); + vY3 = vY3 ^ EC_vec_permxor(vhi0, vlo0, vX3); + vY4 = vY4 ^ EC_vec_permxor(vhi0, vlo0, vX4); + + vY5 = vY5 ^ EC_vec_permxor(vhi0, vlo0, vX5); + vY6 = vY6 ^ EC_vec_permxor(vhi0, vlo0, vX6); + vY7 = vY7 ^ EC_vec_permxor(vhi0, vlo0, vX7); + vY8 = vY8 ^ EC_vec_permxor(vhi0, vlo0, vX8); + + g0 += 32; + } + vec_xst(vY1, 0, t0 + i); + vec_xst(vY2, 16, t0 + i); + vec_xst(vY3, 32, t0 + i); + vec_xst(vY4, 48, t0 + i); + + vec_xst(vY5, 64, t0 + i); + vec_xst(vY6, 80, t0 + i); + vec_xst(vY7, 96, t0 + i); + vec_xst(vY8, 112, t0 + i); + } + return; +} diff --git a/src/isa-l/erasure_code/ppc64le/gf_vect_mad_vsx.c b/src/isa-l/erasure_code/ppc64le/gf_vect_mad_vsx.c new file mode 100644 index 000000000..a4810b96d --- /dev/null +++ b/src/isa-l/erasure_code/ppc64le/gf_vect_mad_vsx.c @@ -0,0 +1,48 @@ +#include "ec_base_vsx.h" + +void gf_vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, + unsigned char *src, unsigned char *dest) +{ + unsigned char *s, *t0; + vector unsigned char vX1, vY1; + vector unsigned char vX2, vY2; + vector unsigned char vX3, vY3; + vector unsigned char vX4, vY4; + vector unsigned char vhi0, vlo0; + int i, head; + + s = (unsigned char *)src; + t0 = (unsigned char *)dest; + + head = len % 64; + if (head != 0) { + gf_vect_mad_base(head, vec, vec_i, &gftbls[0 * 32 * vec], src, dest); + } + + vlo0 = EC_vec_xl(0, gftbls + (((0 * vec) << 5) + (vec_i << 5))); + vhi0 = EC_vec_xl(16, gftbls + (((0 * vec) << 5) + (vec_i << 5))); + + for (i = head; i < len - 63; i += 64) { + vX1 = vec_xl(0, s + i); + vX2 = vec_xl(16, s + i); + vX3 = vec_xl(32, s + i); + vX4 = vec_xl(48, s + i); + + vY1 = vec_xl(0, t0 + i); + vY2 = vec_xl(16, t0 + i); + vY3 = vec_xl(32, t0 + i); + vY4 = vec_xl(48, t0 + i); + + vY1 = vY1 ^ EC_vec_permxor(vhi0, vlo0, vX1); + vY2 = vY2 ^ EC_vec_permxor(vhi0, vlo0, vX2); + vY3 = vY3 ^ EC_vec_permxor(vhi0, vlo0, vX3); + vY4 = vY4 ^ EC_vec_permxor(vhi0, vlo0, vX4); + + vec_xst(vY1, 0, t0 + i); + vec_xst(vY2, 16, t0 + i); + vec_xst(vY3, 32, t0 + i); + vec_xst(vY4, 48, t0 + i); + } + + return; +} diff --git a/src/isa-l/erasure_code/ppc64le/gf_vect_mul_vsx.c b/src/isa-l/erasure_code/ppc64le/gf_vect_mul_vsx.c new file mode 100644 index 000000000..3e610a104 --- /dev/null +++ b/src/isa-l/erasure_code/ppc64le/gf_vect_mul_vsx.c @@ -0,0 +1,61 @@ +#include "ec_base_vsx.h" + +void gf_vect_mul_vsx(int len, unsigned char *gftbl, unsigned char *src, unsigned char *dest) +{ + unsigned char *s, *t0; + vector unsigned char vX1, vY1; + vector unsigned char vX2, vY2; + vector unsigned char vX3, vY3; + vector unsigned char vX4, vY4; + vector unsigned char vX5, vY5; + vector unsigned char vX6, vY6; + vector unsigned char vX7, vY7; + vector unsigned char vX8, vY8; + vector unsigned char vhi0, vlo0; + int i, head; + + s = (unsigned char *)src; + t0 = (unsigned char *)dest; + + head = len % 128; + if (head != 0) { + gf_vect_mul_base(head, gftbl, src, dest); + } + + vlo0 = EC_vec_xl(0, gftbl); + vhi0 = EC_vec_xl(16, gftbl); + + for (i = head; i < len - 127; i += 128) { + vX1 = vec_xl(0, s + i); + vX2 = vec_xl(16, s + i); + vX3 = vec_xl(32, s + i); + vX4 = vec_xl(48, s + i); + + vX5 = vec_xl(64, s + i); + vX6 = vec_xl(80, s + i); + vX7 = vec_xl(96, s + i); + vX8 = vec_xl(112, s + i); + + vY1 = EC_vec_permxor(vhi0, vlo0, vX1); + vY2 = EC_vec_permxor(vhi0, vlo0, vX2); + vY3 = EC_vec_permxor(vhi0, vlo0, vX3); + vY4 = EC_vec_permxor(vhi0, vlo0, vX4); + + vY5 = EC_vec_permxor(vhi0, vlo0, vX5); + vY6 = EC_vec_permxor(vhi0, vlo0, vX6); + vY7 = EC_vec_permxor(vhi0, vlo0, vX7); + vY8 = EC_vec_permxor(vhi0, vlo0, vX8); + + vec_xst(vY1, 0, t0 + i); + vec_xst(vY2, 16, t0 + i); + vec_xst(vY3, 32, t0 + i); + vec_xst(vY4, 48, t0 + i); + + vec_xst(vY5, 64, t0 + i); + vec_xst(vY6, 80, t0 + i); + vec_xst(vY7, 96, t0 + i); + vec_xst(vY8, 112, t0 + i); + } + + return; +} -- cgit v1.2.3