diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
commit | e6918187568dbd01842d8d1d2c808ce16a894239 (patch) | |
tree | 64f88b554b444a49f656b6c656111a145cbbaa28 /src/isa-l/mem | |
parent | Initial commit. (diff) | |
download | ceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip |
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/isa-l/mem')
-rw-r--r-- | src/isa-l/mem/Makefile.am | 48 | ||||
-rw-r--r-- | src/isa-l/mem/aarch64/Makefile.am | 33 | ||||
-rw-r--r-- | src/isa-l/mem/aarch64/mem_aarch64_dispatcher.c | 39 | ||||
-rw-r--r-- | src/isa-l/mem/aarch64/mem_multibinary_arm.S | 33 | ||||
-rw-r--r-- | src/isa-l/mem/aarch64/mem_zero_detect_neon.S | 243 | ||||
-rw-r--r-- | src/isa-l/mem/mem_multibinary.asm | 42 | ||||
-rw-r--r-- | src/isa-l/mem/mem_zero_detect_avx.asm | 189 | ||||
-rw-r--r-- | src/isa-l/mem/mem_zero_detect_base.c | 69 | ||||
-rw-r--r-- | src/isa-l/mem/mem_zero_detect_base_aliases.c | 38 | ||||
-rw-r--r-- | src/isa-l/mem/mem_zero_detect_perf.c | 60 | ||||
-rw-r--r-- | src/isa-l/mem/mem_zero_detect_sse.asm | 176 | ||||
-rw-r--r-- | src/isa-l/mem/mem_zero_detect_test.c | 226 |
12 files changed, 1196 insertions, 0 deletions
diff --git a/src/isa-l/mem/Makefile.am b/src/isa-l/mem/Makefile.am new file mode 100644 index 000000000..c864f66fd --- /dev/null +++ b/src/isa-l/mem/Makefile.am @@ -0,0 +1,48 @@ +######################################################################## +# Copyright(c) 2011-2018 Intel Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +include mem/aarch64/Makefile.am + +lsrc += mem/mem_zero_detect_base.c + +lsrc_base_aliases += mem/mem_zero_detect_base_aliases.c +lsrc_ppc64le += mem/mem_zero_detect_base_aliases.c + +lsrc_x86_64 += mem/mem_zero_detect_avx.asm \ + mem/mem_zero_detect_sse.asm \ + mem/mem_multibinary.asm + +extern_hdrs += include/mem_routines.h + +other_src += include/test.h \ + include/types.h + +check_tests += mem/mem_zero_detect_test + +perf_tests += mem/mem_zero_detect_perf diff --git a/src/isa-l/mem/aarch64/Makefile.am b/src/isa-l/mem/aarch64/Makefile.am new file mode 100644 index 000000000..c18659872 --- /dev/null +++ b/src/isa-l/mem/aarch64/Makefile.am @@ -0,0 +1,33 @@ +######################################################################## +# Copyright(c) 2019 Arm Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Arm Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################### + +lsrc_aarch64 += \ + mem/aarch64/mem_zero_detect_neon.S \ + mem/aarch64/mem_multibinary_arm.S \ + mem/aarch64/mem_aarch64_dispatcher.c diff --git a/src/isa-l/mem/aarch64/mem_aarch64_dispatcher.c b/src/isa-l/mem/aarch64/mem_aarch64_dispatcher.c new file mode 100644 index 000000000..0dfe3a3ae --- /dev/null +++ b/src/isa-l/mem/aarch64/mem_aarch64_dispatcher.c @@ -0,0 +1,39 @@ +/********************************************************************** + Copyright(c) 2019 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ +#include <aarch64_multibinary.h> + +DEFINE_INTERFACE_DISPATCHER(isal_zero_detect) +{ + unsigned long auxval = getauxval(AT_HWCAP); + if (auxval & HWCAP_ASIMD) + return PROVIDER_INFO(mem_zero_detect_neon); + + return PROVIDER_BASIC(mem_zero_detect); + +} diff --git a/src/isa-l/mem/aarch64/mem_multibinary_arm.S b/src/isa-l/mem/aarch64/mem_multibinary_arm.S new file mode 100644 index 000000000..baac3ca38 --- /dev/null +++ b/src/isa-l/mem/aarch64/mem_multibinary_arm.S @@ -0,0 +1,33 @@ +######################################################################## +# Copyright(c) 2019 Arm Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Arm Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################### + +#include <aarch64_multibinary.h> + +mbin_interface isal_zero_detect + diff --git a/src/isa-l/mem/aarch64/mem_zero_detect_neon.S b/src/isa-l/mem/aarch64/mem_zero_detect_neon.S new file mode 100644 index 000000000..6f93ff612 --- /dev/null +++ b/src/isa-l/mem/aarch64/mem_zero_detect_neon.S @@ -0,0 +1,243 @@ +######################################################################## +# Copyright(c) 2019 Arm Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Arm Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################### + +.text +.arch armv8-a + +/*int mem_zero_detect_neon(void *buf, size_t n)*/ + +// input: buf -> x0 +// input: n -> x1 +// output: -> x0 (true or false) + +.global mem_zero_detect_neon +.type mem_zero_detect_neon, %function + +mem_zero_detect_neon: + cmp x1, #(16*24-1) + b.ls .loop_16x24_end + +.loop_16x24: // 16x24 block loop + + ldr q0, [x0] + ldr q1, [x0, #16] + ldr q2, [x0, #(16*2)] + ldr q3, [x0, #(16*3)] + ldr q4, [x0, #(16*4)] + ldr q5, [x0, #(16*5)] + ldr q6, [x0, #(16*6)] + ldr q7, [x0, #(16*7)] + ldr q16, [x0, #(16*8)] + ldr q17, [x0, #(16*9)] + ldr q18, [x0, #(16*10)] + ldr q19, [x0, #(16*11)] + ldr q20, [x0, #(16*12)] + ldr q21, [x0, #(16*13)] + ldr q22, [x0, #(16*14)] + ldr q23, [x0, #(16*15)] + ldr q24, [x0, #(16*16)] + ldr q25, [x0, #(16*17)] + ldr q26, [x0, #(16*18)] + ldr q27, [x0, #(16*19)] + ldr q28, [x0, #(16*20)] + ldr q29, [x0, #(16*21)] + ldr q30, [x0, #(16*22)] + ldr q31, [x0, #(16*23)] + + add x0, x0, #(16*24) + + orr v0.16b, v0.16b, v1.16b + orr v2.16b, v2.16b, v3.16b + orr v4.16b, v4.16b, v5.16b + orr v6.16b, v6.16b, v7.16b + orr v16.16b, v16.16b, v17.16b + orr v18.16b, v18.16b, v19.16b + orr v20.16b, v20.16b, v21.16b + orr v22.16b, v22.16b, v23.16b + orr v24.16b, v24.16b, v25.16b + orr v26.16b, v26.16b, v27.16b + orr v28.16b, v28.16b, v29.16b + orr v30.16b, v30.16b, v31.16b + + orr v0.16b, v0.16b, v2.16b + orr v4.16b, v4.16b, v6.16b + orr v16.16b, v16.16b, v18.16b + orr v20.16b, v20.16b, v22.16b + orr v24.16b, v24.16b, v26.16b + orr v28.16b, v28.16b, v30.16b + + orr v0.16b, v0.16b, v4.16b + orr v16.16b, v16.16b, v20.16b + orr v24.16b, v24.16b, v28.16b + + orr v0.16b, v0.16b, v16.16b + orr v0.16b, v0.16b, v24.16b + + mov x3, v0.d[0] + mov x2, v0.d[1] + orr x2, x3, x2 + cbnz x2, .fail_exit + + // loop condition check + sub x1, x1, #(16*24) + cmp x1, #(16*24-1) + b.hi .loop_16x24 + +.loop_16x24_end: + cmp x1, #(16*8-1) + b.ls .loop_16x8_end + +.loop_16x8: // 16x8 block loop + ldr q0, [x0] + ldr q1, [x0, #16] + ldr q2, [x0, #(16*2)] + ldr q3, [x0, #(16*3)] + ldr q4, [x0, #(16*4)] + ldr q5, [x0, #(16*5)] + ldr q6, [x0, #(16*6)] + ldr q7, [x0, #(16*7)] + + add x0, x0, #(16*8) + + orr v0.16b, v0.16b, v1.16b + orr v2.16b, v2.16b, v3.16b + orr v4.16b, v4.16b, v5.16b + orr v6.16b, v6.16b, v7.16b + + orr v0.16b, v0.16b, v2.16b + orr v4.16b, v4.16b, v6.16b + orr v0.16b, v0.16b, v4.16b + + mov x3, v0.d[0] + mov x2, v0.d[1] + orr x2, x3, x2 + cbnz x2, .fail_exit + + sub x1, x1, #(16*8) + cmp x1, #(16*8-1) + b.hi .loop_16x8 + +.loop_16x8_end: + cmp x1, #(8*8-1) + b.ls .loop_8x8_end + +.loop_8x8: // 8x8 block loop + ldp x2, x3, [x0] + ldp x4, x5, [x0, #16] + ldp x6, x7, [x0, #32] + ldp x8, x9, [x0, #48] + + add x0, x0, #(8*8) + + orr x2, x2, x3 + orr x4, x4, x5 + orr x6, x6, x7 + orr x8, x8, x9 + orr x2, x2, x4 + orr x6, x6, x8 + orr x2, x2, x6 + + cbnz x2, .fail_exit + + sub x1, x1, #(8*8) + cmp x1, #(8*8-1) + b.hi .loop_8x8 + +.loop_8x8_end: + cmp x1, #(8-1) + b.ls .handle_remainder + +.loop_8: // loop per 8bytes + ldr x2, [x0] + add x0, x0, #8 + cbnz x2, .fail_exit + + sub x1, x1, #8 + cmp x1, #7 + b.hi .loop_8 + +.loop_8_end: + + // check remaining bytes +.handle_remainder: + mov w2, #0 + + cmp x1, #0 + beq .handle_reminder_end + cmp x1, #1 + beq .case1 + cmp x1, #2 + beq .case2 + cmp x1, #3 + beq .case3 + cmp x1, #4 + beq .case4 + cmp x1, #5 + beq .case5 + cmp x1, #6 + beq .case6 + +.case7: // case7 drop here directly + ldrb w3, [x0] + add x0, x0, #1 + orr w2, w2, w3 +.case6: + ldrb w3, [x0] + add x0, x0, #1 + orr w2, w2, w3 +.case5: + ldrb w3, [x0] + add x0, x0, #1 + orr w2, w2, w3 +.case4: + ldr w3, [x0] + orr w2, w2, w3 + b .handle_reminder_end +.case3: + ldrb w3, [x0] + add x0, x0, #1 + orr w2, w2, w3 +.case2: + ldrh w3, [x0] + orr w2, w2, w3 + b .handle_reminder_end +.case1: + ldrb w3, [x0] + orr w2, w2, w3 + +.handle_reminder_end: + cbz w2, .pass_exit + +.fail_exit: + mov w0, #0xffffffff + ret + +.pass_exit: + mov w0, #0x0 + ret diff --git a/src/isa-l/mem/mem_multibinary.asm b/src/isa-l/mem/mem_multibinary.asm new file mode 100644 index 000000000..38f63e22b --- /dev/null +++ b/src/isa-l/mem/mem_multibinary.asm @@ -0,0 +1,42 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2018 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +%include "reg_sizes.asm" +%include "multibinary.asm" + +default rel +[bits 64] + +extern mem_zero_detect_avx +extern mem_zero_detect_sse +extern mem_zero_detect_base + +mbin_interface isal_zero_detect + +mbin_dispatch_init5 isal_zero_detect, mem_zero_detect_base, mem_zero_detect_sse, mem_zero_detect_avx, mem_zero_detect_avx diff --git a/src/isa-l/mem/mem_zero_detect_avx.asm b/src/isa-l/mem/mem_zero_detect_avx.asm new file mode 100644 index 000000000..1b5de8415 --- /dev/null +++ b/src/isa-l/mem/mem_zero_detect_avx.asm @@ -0,0 +1,189 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2018 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +%include "reg_sizes.asm" + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + %define tmp r11 + %define tmpb r11b + %define tmp3 arg4 + %define return rax + %define func(x) x: endbranch + %define FUNC_SAVE + %define FUNC_RESTORE +%endif + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + %define tmp r11 + %define tmpb r11b + %define tmp3 r10 + %define return rax + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + end_prolog + %endmacro + %macro FUNC_RESTORE 0 + %endmacro +%endif + +%define src arg0 +%define len arg1 +%define ptr arg2 +%define pos return + +default rel + +[bits 64] +section .text + +align 16 +mk_global mem_zero_detect_avx, function +func(mem_zero_detect_avx) + FUNC_SAVE + mov pos, 0 + sub len, 4*32 + jle .mem_z_small_block + +.mem_z_loop: + vmovdqu ymm0, [src+pos] + vmovdqu ymm1, [src+pos+1*32] + vmovdqu ymm2, [src+pos+2*32] + vmovdqu ymm3, [src+pos+3*32] + vptest ymm0, ymm0 + jnz .return_fail + vptest ymm1, ymm1 + jnz .return_fail + vptest ymm2, ymm2 + jnz .return_fail + vptest ymm3, ymm3 + jnz .return_fail + add pos, 4*32 + cmp pos, len + jl .mem_z_loop + +.mem_z_last_block: + vmovdqu ymm0, [src+len] + vmovdqu ymm1, [src+len+1*32] + vmovdqu ymm2, [src+len+2*32] + vmovdqu ymm3, [src+len+3*32] + vptest ymm0, ymm0 + jnz .return_fail + vptest ymm1, ymm1 + jnz .return_fail + vptest ymm2, ymm2 + jnz .return_fail + vptest ymm3, ymm3 + jnz .return_fail + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + + +.mem_z_small_block: + add len, 4*32 + cmp len, 2*32 + jl .mem_z_lt64 + vmovdqu ymm0, [src] + vmovdqu ymm1, [src+32] + vmovdqu ymm2, [src+len-2*32] + vmovdqu ymm3, [src+len-1*32] + vptest ymm0, ymm0 + jnz .return_fail + vptest ymm1, ymm1 + jnz .return_fail + vptest ymm2, ymm2 + jnz .return_fail + vptest ymm3, ymm3 + jnz .return_fail + jmp .return_pass + +.mem_z_lt64: + cmp len, 32 + jl .mem_z_lt32 + vmovdqu ymm0, [src] + vmovdqu ymm1, [src+len-32] + vptest ymm0, ymm0 + jnz .return_fail + vptest ymm1, ymm1 + jnz .return_fail + jmp .return_pass + + +.mem_z_lt32: + cmp len, 16 + jl .mem_z_lt16 + vmovdqu xmm0, [src] + vmovdqu xmm1, [src+len-16] + vptest xmm0, xmm0 + jnz .return_fail + vptest xmm1, xmm1 + jnz .return_fail + jmp .return_pass + + +.mem_z_lt16: + cmp len, 8 + jl .mem_z_lt8 + mov tmp, [src] + mov tmp3,[src+len-8] + or tmp, tmp3 + test tmp, tmp + jnz .return_fail + jmp .return_pass + +.mem_z_lt8: + cmp len, 0 + je .return_pass +.mem_z_1byte_loop: + mov tmpb, [src+pos] + cmp tmpb, 0 + jnz .return_fail + add pos, 1 + cmp pos, len + jl .mem_z_1byte_loop + jmp .return_pass + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame diff --git a/src/isa-l/mem/mem_zero_detect_base.c b/src/isa-l/mem/mem_zero_detect_base.c new file mode 100644 index 000000000..235301658 --- /dev/null +++ b/src/isa-l/mem/mem_zero_detect_base.c @@ -0,0 +1,69 @@ +/********************************************************************** + Copyright(c) 2011-2018 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include <stdint.h> +#include <stddef.h> +#include "unaligned.h" + +int mem_zero_detect_base(void *buf, size_t n) +{ + uint8_t *c = buf; + uintmax_t a = 0; + + // Check buffer in native machine width comparisons + while (n >= sizeof(uintmax_t)) { + n -= sizeof(uintmax_t); + if (load_umax(c) != 0) + return -1; + c += sizeof(uintmax_t); + } + + // Check remaining bytes + switch (n) { + case 7: + a |= *c++; // fall through to case 6,5,4 + case 6: + a |= *c++; // fall through to case 5,4 + case 5: + a |= *c++; // fall through to case 4 + case 4: + a |= load_u32(c); + break; + case 3: + a |= *c++; // fall through to case 2 + case 2: + a |= load_u16(c); + break; + case 1: + a |= *c; + break; + } + + return (a == 0) ? 0 : -1; +} diff --git a/src/isa-l/mem/mem_zero_detect_base_aliases.c b/src/isa-l/mem/mem_zero_detect_base_aliases.c new file mode 100644 index 000000000..8c75b06be --- /dev/null +++ b/src/isa-l/mem/mem_zero_detect_base_aliases.c @@ -0,0 +1,38 @@ +/********************************************************************** + Copyright(c) 2011-2018 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include <stdint.h> +#include "mem_routines.h" + +int mem_zero_detect_base(void *buf, size_t n); + +int isal_zero_detect(void *mem, size_t len) +{ + return mem_zero_detect_base(mem, len); +} diff --git a/src/isa-l/mem/mem_zero_detect_perf.c b/src/isa-l/mem/mem_zero_detect_perf.c new file mode 100644 index 000000000..90a308862 --- /dev/null +++ b/src/isa-l/mem/mem_zero_detect_perf.c @@ -0,0 +1,60 @@ +/********************************************************************** + Copyright(c) 2011-2018 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "mem_routines.h" +#include "test.h" +#include "types.h" + +#define TEST_LEN 8*1024 +#define TEST_TYPE_STR "_warm" + +int main(int argc, char *argv[]) +{ + int val = 0; + void *buf; + struct perf start; + + printf("Test mem_zero_detect_perf %d bytes\n", TEST_LEN); + + if (posix_memalign(&buf, 64, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + + memset(buf, 0, TEST_LEN); + BENCHMARK(&start, BENCHMARK_TIME, val |= isal_zero_detect(buf, TEST_LEN)); + + printf("mem_zero_detect_perf" TEST_TYPE_STR ": "); + perf_print(start, (long long)TEST_LEN); + + return 0; +} diff --git a/src/isa-l/mem/mem_zero_detect_sse.asm b/src/isa-l/mem/mem_zero_detect_sse.asm new file mode 100644 index 000000000..c84f0f034 --- /dev/null +++ b/src/isa-l/mem/mem_zero_detect_sse.asm @@ -0,0 +1,176 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2018 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +%include "reg_sizes.asm" + +%ifidn __OUTPUT_FORMAT__, elf64 + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + %define arg4 r8 + %define arg5 r9 + %define tmp r11 + %define tmpb r11b + %define tmp3 arg4 + %define return rax + %define func(x) x: endbranch + %define FUNC_SAVE + %define FUNC_RESTORE +%endif + +%ifidn __OUTPUT_FORMAT__, win64 + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + %define tmp r11 + %define tmpb r11b + %define tmp3 r10 + %define return rax + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + end_prolog + %endmacro + %macro FUNC_RESTORE 0 + %endmacro +%endif + +%define src arg0 +%define len arg1 +%define ptr arg2 +%define pos return + +default rel + +[bits 64] +section .text + +align 16 +mk_global mem_zero_detect_sse, function +func(mem_zero_detect_sse) + FUNC_SAVE + mov pos, 0 + sub len, 4*16 + jle .mem_z_small_block + +.mem_z_loop: + movdqu xmm0, [src+pos] + movdqu xmm1, [src+pos+1*16] + movdqu xmm2, [src+pos+2*16] + movdqu xmm3, [src+pos+3*16] + ptest xmm0, xmm0 + jnz .return_fail + ptest xmm1, xmm1 + jnz .return_fail + ptest xmm2, xmm2 + jnz .return_fail + ptest xmm3, xmm3 + jnz .return_fail + add pos, 4*16 + cmp pos, len + jl .mem_z_loop + +.mem_z_last_block: + movdqu xmm0, [src+len] + movdqu xmm1, [src+len+1*16] + movdqu xmm2, [src+len+2*16] + movdqu xmm3, [src+len+3*16] + ptest xmm0, xmm0 + jnz .return_fail + ptest xmm1, xmm1 + jnz .return_fail + ptest xmm2, xmm2 + jnz .return_fail + ptest xmm3, xmm3 + jnz .return_fail + +.return_pass: + mov return, 0 + FUNC_RESTORE + ret + + +.mem_z_small_block: + add len, 4*16 + cmp len, 2*16 + jl .mem_z_lt32 + movdqu xmm0, [src] + movdqu xmm1, [src+16] + movdqu xmm2, [src+len-2*16] + movdqu xmm3, [src+len-1*16] + ptest xmm0, xmm0 + jnz .return_fail + ptest xmm1, xmm1 + jnz .return_fail + ptest xmm2, xmm2 + jnz .return_fail + ptest xmm3, xmm3 + jnz .return_fail + jmp .return_pass + +.mem_z_lt32: + cmp len, 16 + jl .mem_z_lt16 + movdqu xmm0, [src] + movdqu xmm1, [src+len-16] + ptest xmm0, xmm0 + jnz .return_fail + ptest xmm1, xmm1 + jnz .return_fail + jmp .return_pass + +.mem_z_lt16: + cmp len, 8 + jl .mem_z_lt8 + mov tmp, [src] + mov tmp3,[src+len-8] + or tmp, tmp3 + test tmp, tmp + jnz .return_fail + jmp .return_pass + +.mem_z_lt8: + cmp len, 0 + je .return_pass +.mem_z_1byte_loop: + mov tmpb, [src+pos] + cmp tmpb, 0 + jnz .return_fail + add pos, 1 + cmp pos, len + jl .mem_z_1byte_loop + jmp .return_pass + +.return_fail: + mov return, 1 + FUNC_RESTORE + ret + +endproc_frame diff --git a/src/isa-l/mem/mem_zero_detect_test.c b/src/isa-l/mem/mem_zero_detect_test.c new file mode 100644 index 000000000..12d5f4bdb --- /dev/null +++ b/src/isa-l/mem/mem_zero_detect_test.c @@ -0,0 +1,226 @@ +/********************************************************************** + Copyright(c) 2011-2018 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <limits.h> +#include "mem_routines.h" +#include "types.h" + +#define TEST_MEM 10*1024*1024 +#define TEST_LEN 8*1024 +#define RAND_ALIGN 32 +#define BORDER_BYTES (5*RAND_ALIGN + 7) + +#ifndef RANDOMS +# define RANDOMS 2000 +#endif +#ifndef TEST_SEED +# define TEST_SEED 0x1234 +#endif + +int main(int argc, char *argv[]) +{ + int i, j, sign; + long long r, l; + void *buf; + unsigned char *a; + int failures = 0, ret_neg = 1; + + printf("mem_zero_detect_test %d bytes, %d randoms, seed=0x%x ", TEST_MEM, RANDOMS, + TEST_SEED); + if (posix_memalign(&buf, 64, TEST_MEM)) { + printf("alloc error: Fail"); + return -1; + } + + srand(TEST_SEED); + + // Test full zero buffer + memset(buf, 0, TEST_MEM); + failures = isal_zero_detect(buf, TEST_MEM); + + if (failures) { + printf("Fail large buf test\n"); + return failures; + } + putchar('.'); + + // Test small buffers + for (i = 0; i < TEST_LEN; i++) { + failures |= isal_zero_detect(buf, i); + if (failures) { + printf("Fail len=%d\n", i); + return failures; + } + } + putchar('.'); + + // Test small buffers near end of alloc region + a = buf; + for (i = 0; i < TEST_LEN; i++) + failures |= isal_zero_detect(&a[TEST_LEN - i], i); + + if (failures) { + printf("Fail:\n"); + return failures; + } + putchar('.'); + + // Test for detect non zero + a[TEST_MEM / 2] = 1; + ret_neg = isal_zero_detect(a, TEST_MEM); + if (ret_neg == 0) { + printf("Fail on not detect\n"); + return -1; + } + a[TEST_MEM / 2] = 0; + putchar('.'); + + // Test various non-zero offsets + for (i = 0; i < BORDER_BYTES; i++) { + for (j = 0; j < CHAR_BIT; j++) { + a[i] = 1 << j; + ret_neg = isal_zero_detect(a, TEST_MEM); + if (ret_neg == 0) { + printf("Fail on not detect offsets %d, %d\n", i, j); + return -1; + } + a[i] = 0; + } + } + putchar('.'); + fflush(0); + + // Test random non-zero offsets + for (i = 0; i < RANDOMS; i++) { + r = rand(); + r = (r % TEST_LEN) ^ (r & (RAND_ALIGN - 1)); + if (r >= TEST_LEN) + continue; + + a[r] = 1 << (r & (CHAR_BIT - 1)); + ret_neg = isal_zero_detect(a, TEST_MEM); + if (ret_neg == 0) { + printf("Fail on not detect rand %d, e=%lld\n", i, r); + return -1; + } + a[r] = 0; + } + putchar('.'); + fflush(0); + + // Test putting non-zero byte at end of buffer + for (i = 1; i < BORDER_BYTES; i++) { + for (j = 0; j < CHAR_BIT; j++) { + a[TEST_MEM - i] = 1 << j; + ret_neg = isal_zero_detect(a, TEST_MEM); + if (ret_neg == 0) { + printf("Fail on not detect rand offset=%d, idx=%d\n", i, j); + return -1; + } + a[TEST_MEM - i] = 0; + } + } + putchar('.'); + + // Test various size buffers and non-zero offsets + for (l = 1; l < TEST_LEN; l++) { + for (i = 0; i < l + BORDER_BYTES; i++) { + failures = isal_zero_detect(a, l); + + if (failures) { + printf("Fail on detect non-zero with l=%lld\n", l); + return -1; + } + + a[i] = 1; + ret_neg = isal_zero_detect(a, l); + + if ((i < l) && (ret_neg == 0)) { + printf("Fail on non-zero buffer l=%lld err=%d\n", l, i); + return -1; + } + if ((i >= l) && (ret_neg != 0)) { + printf("Fail on bad pass detect l=%lld err=%d\n", l, i); + return -1; + } + a[i] = 0; + } + } + putchar('.'); + + // Test random test size and non-zero error offsets + for (i = 0; i < RANDOMS; i++) { + r = rand(); + r = (r % TEST_LEN) ^ (r & (RAND_ALIGN - 1)); + l = r + 1 + (rand() & (CHAR_BIT - 1)); + a[r] = 1 << (r & (CHAR_BIT - 1)); + ret_neg = isal_zero_detect(a, l); + if (ret_neg == 0) { + printf("Fail on not detect rand %d, l=%lld, e=%lld\n", i, l, r); + return -1; + } + a[r] = 0; + } + putchar('.'); + fflush(0); + + // Test combinations of zero and non-zero buffers + for (i = 0; i < RANDOMS; i++) { + r = rand(); + r = (r % TEST_LEN) ^ (r & (RAND_ALIGN - 1)); + sign = rand() & 1 ? 1 : -1; + l = r + sign * (rand() & (2 * RAND_ALIGN - 1)); + + if ((l >= TEST_LEN) || (l < 0) || (r >= TEST_LEN)) + continue; + + a[r] = 1 << (r & (CHAR_BIT - 1)); + ret_neg = isal_zero_detect(a, l); + + if ((r < l) && (ret_neg == 0)) { + printf("Fail on non-zero rand buffer %d, l=%lld, e=%lld\n", i, l, r); + return -1; + } + if ((r >= l) && (ret_neg != 0)) { + printf("Fail on bad pass zero detect rand %d, l=%lld, e=%lld\n", i, l, + r); + return -1; + } + + a[r] = 0; + } + putchar('.'); + fflush(0); + + printf(failures == 0 ? " Pass\n" : " Fail\n"); + return failures; +} |