summaryrefslogtreecommitdiffstats
path: root/src/isa-l/mem
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 18:45:59 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 18:45:59 +0000
commit19fcec84d8d7d21e796c7624e521b60d28ee21ed (patch)
tree42d26aa27d1e3f7c0b8bd3fd14e7d7082f5008dc /src/isa-l/mem
parentInitial commit. (diff)
downloadceph-upstream/16.2.11+ds.tar.xz
ceph-upstream/16.2.11+ds.zip
Adding upstream version 16.2.11+ds.upstream/16.2.11+dsupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r--src/isa-l/mem/Makefile.am48
-rw-r--r--src/isa-l/mem/aarch64/Makefile.am33
-rw-r--r--src/isa-l/mem/aarch64/mem_aarch64_dispatcher.c39
-rw-r--r--src/isa-l/mem/aarch64/mem_multibinary_arm.S33
-rw-r--r--src/isa-l/mem/aarch64/mem_zero_detect_neon.S243
-rw-r--r--src/isa-l/mem/mem_multibinary.asm42
-rw-r--r--src/isa-l/mem/mem_zero_detect_avx.asm189
-rw-r--r--src/isa-l/mem/mem_zero_detect_base.c69
-rw-r--r--src/isa-l/mem/mem_zero_detect_base_aliases.c38
-rw-r--r--src/isa-l/mem/mem_zero_detect_perf.c60
-rw-r--r--src/isa-l/mem/mem_zero_detect_sse.asm176
-rw-r--r--src/isa-l/mem/mem_zero_detect_test.c226
12 files changed, 1196 insertions, 0 deletions
diff --git a/src/isa-l/mem/Makefile.am b/src/isa-l/mem/Makefile.am
new file mode 100644
index 000000000..c864f66fd
--- /dev/null
+++ b/src/isa-l/mem/Makefile.am
@@ -0,0 +1,48 @@
+########################################################################
+# Copyright(c) 2011-2018 Intel Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+########################################################################
+
+include mem/aarch64/Makefile.am
+
+lsrc += mem/mem_zero_detect_base.c
+
+lsrc_base_aliases += mem/mem_zero_detect_base_aliases.c
+lsrc_ppc64le += mem/mem_zero_detect_base_aliases.c
+
+lsrc_x86_64 += mem/mem_zero_detect_avx.asm \
+ mem/mem_zero_detect_sse.asm \
+ mem/mem_multibinary.asm
+
+extern_hdrs += include/mem_routines.h
+
+other_src += include/test.h \
+ include/types.h
+
+check_tests += mem/mem_zero_detect_test
+
+perf_tests += mem/mem_zero_detect_perf
diff --git a/src/isa-l/mem/aarch64/Makefile.am b/src/isa-l/mem/aarch64/Makefile.am
new file mode 100644
index 000000000..c18659872
--- /dev/null
+++ b/src/isa-l/mem/aarch64/Makefile.am
@@ -0,0 +1,33 @@
+########################################################################
+# Copyright(c) 2019 Arm Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Arm Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#########################################################################
+
+lsrc_aarch64 += \
+ mem/aarch64/mem_zero_detect_neon.S \
+ mem/aarch64/mem_multibinary_arm.S \
+ mem/aarch64/mem_aarch64_dispatcher.c
diff --git a/src/isa-l/mem/aarch64/mem_aarch64_dispatcher.c b/src/isa-l/mem/aarch64/mem_aarch64_dispatcher.c
new file mode 100644
index 000000000..0dfe3a3ae
--- /dev/null
+++ b/src/isa-l/mem/aarch64/mem_aarch64_dispatcher.c
@@ -0,0 +1,39 @@
+/**********************************************************************
+ Copyright(c) 2019 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+#include <aarch64_multibinary.h>
+
+DEFINE_INTERFACE_DISPATCHER(isal_zero_detect)
+{
+ unsigned long auxval = getauxval(AT_HWCAP);
+ if (auxval & HWCAP_ASIMD)
+ return PROVIDER_INFO(mem_zero_detect_neon);
+
+ return PROVIDER_BASIC(mem_zero_detect);
+
+}
diff --git a/src/isa-l/mem/aarch64/mem_multibinary_arm.S b/src/isa-l/mem/aarch64/mem_multibinary_arm.S
new file mode 100644
index 000000000..baac3ca38
--- /dev/null
+++ b/src/isa-l/mem/aarch64/mem_multibinary_arm.S
@@ -0,0 +1,33 @@
+########################################################################
+# Copyright(c) 2019 Arm Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Arm Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#########################################################################
+
+#include <aarch64_multibinary.h>
+
+mbin_interface isal_zero_detect
+
diff --git a/src/isa-l/mem/aarch64/mem_zero_detect_neon.S b/src/isa-l/mem/aarch64/mem_zero_detect_neon.S
new file mode 100644
index 000000000..6f93ff612
--- /dev/null
+++ b/src/isa-l/mem/aarch64/mem_zero_detect_neon.S
@@ -0,0 +1,243 @@
+########################################################################
+# Copyright(c) 2019 Arm Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Arm Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#########################################################################
+
+.text
+.arch armv8-a
+
+/*int mem_zero_detect_neon(void *buf, size_t n)*/
+
+// input: buf -> x0
+// input: n -> x1
+// output: -> x0 (true or false)
+
+.global mem_zero_detect_neon
+.type mem_zero_detect_neon, %function
+
+mem_zero_detect_neon:
+ cmp x1, #(16*24-1)
+ b.ls .loop_16x24_end
+
+.loop_16x24: // 16x24 block loop
+
+ ldr q0, [x0]
+ ldr q1, [x0, #16]
+ ldr q2, [x0, #(16*2)]
+ ldr q3, [x0, #(16*3)]
+ ldr q4, [x0, #(16*4)]
+ ldr q5, [x0, #(16*5)]
+ ldr q6, [x0, #(16*6)]
+ ldr q7, [x0, #(16*7)]
+ ldr q16, [x0, #(16*8)]
+ ldr q17, [x0, #(16*9)]
+ ldr q18, [x0, #(16*10)]
+ ldr q19, [x0, #(16*11)]
+ ldr q20, [x0, #(16*12)]
+ ldr q21, [x0, #(16*13)]
+ ldr q22, [x0, #(16*14)]
+ ldr q23, [x0, #(16*15)]
+ ldr q24, [x0, #(16*16)]
+ ldr q25, [x0, #(16*17)]
+ ldr q26, [x0, #(16*18)]
+ ldr q27, [x0, #(16*19)]
+ ldr q28, [x0, #(16*20)]
+ ldr q29, [x0, #(16*21)]
+ ldr q30, [x0, #(16*22)]
+ ldr q31, [x0, #(16*23)]
+
+ add x0, x0, #(16*24)
+
+ orr v0.16b, v0.16b, v1.16b
+ orr v2.16b, v2.16b, v3.16b
+ orr v4.16b, v4.16b, v5.16b
+ orr v6.16b, v6.16b, v7.16b
+ orr v16.16b, v16.16b, v17.16b
+ orr v18.16b, v18.16b, v19.16b
+ orr v20.16b, v20.16b, v21.16b
+ orr v22.16b, v22.16b, v23.16b
+ orr v24.16b, v24.16b, v25.16b
+ orr v26.16b, v26.16b, v27.16b
+ orr v28.16b, v28.16b, v29.16b
+ orr v30.16b, v30.16b, v31.16b
+
+ orr v0.16b, v0.16b, v2.16b
+ orr v4.16b, v4.16b, v6.16b
+ orr v16.16b, v16.16b, v18.16b
+ orr v20.16b, v20.16b, v22.16b
+ orr v24.16b, v24.16b, v26.16b
+ orr v28.16b, v28.16b, v30.16b
+
+ orr v0.16b, v0.16b, v4.16b
+ orr v16.16b, v16.16b, v20.16b
+ orr v24.16b, v24.16b, v28.16b
+
+ orr v0.16b, v0.16b, v16.16b
+ orr v0.16b, v0.16b, v24.16b
+
+ mov x3, v0.d[0]
+ mov x2, v0.d[1]
+ orr x2, x3, x2
+ cbnz x2, .fail_exit
+
+ // loop condition check
+ sub x1, x1, #(16*24)
+ cmp x1, #(16*24-1)
+ b.hi .loop_16x24
+
+.loop_16x24_end:
+ cmp x1, #(16*8-1)
+ b.ls .loop_16x8_end
+
+.loop_16x8: // 16x8 block loop
+ ldr q0, [x0]
+ ldr q1, [x0, #16]
+ ldr q2, [x0, #(16*2)]
+ ldr q3, [x0, #(16*3)]
+ ldr q4, [x0, #(16*4)]
+ ldr q5, [x0, #(16*5)]
+ ldr q6, [x0, #(16*6)]
+ ldr q7, [x0, #(16*7)]
+
+ add x0, x0, #(16*8)
+
+ orr v0.16b, v0.16b, v1.16b
+ orr v2.16b, v2.16b, v3.16b
+ orr v4.16b, v4.16b, v5.16b
+ orr v6.16b, v6.16b, v7.16b
+
+ orr v0.16b, v0.16b, v2.16b
+ orr v4.16b, v4.16b, v6.16b
+ orr v0.16b, v0.16b, v4.16b
+
+ mov x3, v0.d[0]
+ mov x2, v0.d[1]
+ orr x2, x3, x2
+ cbnz x2, .fail_exit
+
+ sub x1, x1, #(16*8)
+ cmp x1, #(16*8-1)
+ b.hi .loop_16x8
+
+.loop_16x8_end:
+ cmp x1, #(8*8-1)
+ b.ls .loop_8x8_end
+
+.loop_8x8: // 8x8 block loop
+ ldp x2, x3, [x0]
+ ldp x4, x5, [x0, #16]
+ ldp x6, x7, [x0, #32]
+ ldp x8, x9, [x0, #48]
+
+ add x0, x0, #(8*8)
+
+ orr x2, x2, x3
+ orr x4, x4, x5
+ orr x6, x6, x7
+ orr x8, x8, x9
+ orr x2, x2, x4
+ orr x6, x6, x8
+ orr x2, x2, x6
+
+ cbnz x2, .fail_exit
+
+ sub x1, x1, #(8*8)
+ cmp x1, #(8*8-1)
+ b.hi .loop_8x8
+
+.loop_8x8_end:
+ cmp x1, #(8-1)
+ b.ls .handle_remainder
+
+.loop_8: // loop per 8bytes
+ ldr x2, [x0]
+ add x0, x0, #8
+ cbnz x2, .fail_exit
+
+ sub x1, x1, #8
+ cmp x1, #7
+ b.hi .loop_8
+
+.loop_8_end:
+
+ // check remaining bytes
+.handle_remainder:
+ mov w2, #0
+
+ cmp x1, #0
+ beq .handle_reminder_end
+ cmp x1, #1
+ beq .case1
+ cmp x1, #2
+ beq .case2
+ cmp x1, #3
+ beq .case3
+ cmp x1, #4
+ beq .case4
+ cmp x1, #5
+ beq .case5
+ cmp x1, #6
+ beq .case6
+
+.case7: // case7 drop here directly
+ ldrb w3, [x0]
+ add x0, x0, #1
+ orr w2, w2, w3
+.case6:
+ ldrb w3, [x0]
+ add x0, x0, #1
+ orr w2, w2, w3
+.case5:
+ ldrb w3, [x0]
+ add x0, x0, #1
+ orr w2, w2, w3
+.case4:
+ ldr w3, [x0]
+ orr w2, w2, w3
+ b .handle_reminder_end
+.case3:
+ ldrb w3, [x0]
+ add x0, x0, #1
+ orr w2, w2, w3
+.case2:
+ ldrh w3, [x0]
+ orr w2, w2, w3
+ b .handle_reminder_end
+.case1:
+ ldrb w3, [x0]
+ orr w2, w2, w3
+
+.handle_reminder_end:
+ cbz w2, .pass_exit
+
+.fail_exit:
+ mov w0, #0xffffffff
+ ret
+
+.pass_exit:
+ mov w0, #0x0
+ ret
diff --git a/src/isa-l/mem/mem_multibinary.asm b/src/isa-l/mem/mem_multibinary.asm
new file mode 100644
index 000000000..38f63e22b
--- /dev/null
+++ b/src/isa-l/mem/mem_multibinary.asm
@@ -0,0 +1,42 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2018 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+%include "reg_sizes.asm"
+%include "multibinary.asm"
+
+default rel
+[bits 64]
+
+extern mem_zero_detect_avx
+extern mem_zero_detect_sse
+extern mem_zero_detect_base
+
+mbin_interface isal_zero_detect
+
+mbin_dispatch_init5 isal_zero_detect, mem_zero_detect_base, mem_zero_detect_sse, mem_zero_detect_avx, mem_zero_detect_avx
diff --git a/src/isa-l/mem/mem_zero_detect_avx.asm b/src/isa-l/mem/mem_zero_detect_avx.asm
new file mode 100644
index 000000000..871b65261
--- /dev/null
+++ b/src/isa-l/mem/mem_zero_detect_avx.asm
@@ -0,0 +1,189 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2018 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+%include "reg_sizes.asm"
+
+%ifidn __OUTPUT_FORMAT__, elf64
+ %define arg0 rdi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+ %define arg4 r8
+ %define arg5 r9
+ %define tmp r11
+ %define tmpb r11b
+ %define tmp3 arg4
+ %define return rax
+ %define func(x) x:
+ %define FUNC_SAVE
+ %define FUNC_RESTORE
+%endif
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %define arg0 rcx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+ %define tmp r11
+ %define tmpb r11b
+ %define tmp3 r10
+ %define return rax
+ %define func(x) proc_frame x
+ %macro FUNC_SAVE 0
+ end_prolog
+ %endmacro
+ %macro FUNC_RESTORE 0
+ %endmacro
+%endif
+
+%define src arg0
+%define len arg1
+%define ptr arg2
+%define pos return
+
+default rel
+
+[bits 64]
+section .text
+
+align 16
+global mem_zero_detect_avx:ISAL_SYM_TYPE_FUNCTION
+func(mem_zero_detect_avx)
+ FUNC_SAVE
+ mov pos, 0
+ sub len, 4*32
+ jle .mem_z_small_block
+
+.mem_z_loop:
+ vmovdqu ymm0, [src+pos]
+ vmovdqu ymm1, [src+pos+1*32]
+ vmovdqu ymm2, [src+pos+2*32]
+ vmovdqu ymm3, [src+pos+3*32]
+ vptest ymm0, ymm0
+ jnz .return_fail
+ vptest ymm1, ymm1
+ jnz .return_fail
+ vptest ymm2, ymm2
+ jnz .return_fail
+ vptest ymm3, ymm3
+ jnz .return_fail
+ add pos, 4*32
+ cmp pos, len
+ jl .mem_z_loop
+
+.mem_z_last_block:
+ vmovdqu ymm0, [src+len]
+ vmovdqu ymm1, [src+len+1*32]
+ vmovdqu ymm2, [src+len+2*32]
+ vmovdqu ymm3, [src+len+3*32]
+ vptest ymm0, ymm0
+ jnz .return_fail
+ vptest ymm1, ymm1
+ jnz .return_fail
+ vptest ymm2, ymm2
+ jnz .return_fail
+ vptest ymm3, ymm3
+ jnz .return_fail
+
+.return_pass:
+ mov return, 0
+ FUNC_RESTORE
+ ret
+
+
+.mem_z_small_block:
+ add len, 4*32
+ cmp len, 2*32
+ jl .mem_z_lt64
+ vmovdqu ymm0, [src]
+ vmovdqu ymm1, [src+32]
+ vmovdqu ymm2, [src+len-2*32]
+ vmovdqu ymm3, [src+len-1*32]
+ vptest ymm0, ymm0
+ jnz .return_fail
+ vptest ymm1, ymm1
+ jnz .return_fail
+ vptest ymm2, ymm2
+ jnz .return_fail
+ vptest ymm3, ymm3
+ jnz .return_fail
+ jmp .return_pass
+
+.mem_z_lt64:
+ cmp len, 32
+ jl .mem_z_lt32
+ vmovdqu ymm0, [src]
+ vmovdqu ymm1, [src+len-32]
+ vptest ymm0, ymm0
+ jnz .return_fail
+ vptest ymm1, ymm1
+ jnz .return_fail
+ jmp .return_pass
+
+
+.mem_z_lt32:
+ cmp len, 16
+ jl .mem_z_lt16
+ vmovdqu xmm0, [src]
+ vmovdqu xmm1, [src+len-16]
+ vptest xmm0, xmm0
+ jnz .return_fail
+ vptest xmm1, xmm1
+ jnz .return_fail
+ jmp .return_pass
+
+
+.mem_z_lt16:
+ cmp len, 8
+ jl .mem_z_lt8
+ mov tmp, [src]
+ mov tmp3,[src+len-8]
+ or tmp, tmp3
+ test tmp, tmp
+ jnz .return_fail
+ jmp .return_pass
+
+.mem_z_lt8:
+ cmp len, 0
+ je .return_pass
+.mem_z_1byte_loop:
+ mov tmpb, [src+pos]
+ cmp tmpb, 0
+ jnz .return_fail
+ add pos, 1
+ cmp pos, len
+ jl .mem_z_1byte_loop
+ jmp .return_pass
+
+.return_fail:
+ mov return, 1
+ FUNC_RESTORE
+ ret
+
+endproc_frame
diff --git a/src/isa-l/mem/mem_zero_detect_base.c b/src/isa-l/mem/mem_zero_detect_base.c
new file mode 100644
index 000000000..235301658
--- /dev/null
+++ b/src/isa-l/mem/mem_zero_detect_base.c
@@ -0,0 +1,69 @@
+/**********************************************************************
+ Copyright(c) 2011-2018 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdint.h>
+#include <stddef.h>
+#include "unaligned.h"
+
+int mem_zero_detect_base(void *buf, size_t n)
+{
+ uint8_t *c = buf;
+ uintmax_t a = 0;
+
+ // Check buffer in native machine width comparisons
+ while (n >= sizeof(uintmax_t)) {
+ n -= sizeof(uintmax_t);
+ if (load_umax(c) != 0)
+ return -1;
+ c += sizeof(uintmax_t);
+ }
+
+ // Check remaining bytes
+ switch (n) {
+ case 7:
+ a |= *c++; // fall through to case 6,5,4
+ case 6:
+ a |= *c++; // fall through to case 5,4
+ case 5:
+ a |= *c++; // fall through to case 4
+ case 4:
+ a |= load_u32(c);
+ break;
+ case 3:
+ a |= *c++; // fall through to case 2
+ case 2:
+ a |= load_u16(c);
+ break;
+ case 1:
+ a |= *c;
+ break;
+ }
+
+ return (a == 0) ? 0 : -1;
+}
diff --git a/src/isa-l/mem/mem_zero_detect_base_aliases.c b/src/isa-l/mem/mem_zero_detect_base_aliases.c
new file mode 100644
index 000000000..8c75b06be
--- /dev/null
+++ b/src/isa-l/mem/mem_zero_detect_base_aliases.c
@@ -0,0 +1,38 @@
+/**********************************************************************
+ Copyright(c) 2011-2018 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdint.h>
+#include "mem_routines.h"
+
+int mem_zero_detect_base(void *buf, size_t n);
+
+int isal_zero_detect(void *mem, size_t len)
+{
+ return mem_zero_detect_base(mem, len);
+}
diff --git a/src/isa-l/mem/mem_zero_detect_perf.c b/src/isa-l/mem/mem_zero_detect_perf.c
new file mode 100644
index 000000000..90a308862
--- /dev/null
+++ b/src/isa-l/mem/mem_zero_detect_perf.c
@@ -0,0 +1,60 @@
+/**********************************************************************
+ Copyright(c) 2011-2018 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "mem_routines.h"
+#include "test.h"
+#include "types.h"
+
+#define TEST_LEN 8*1024
+#define TEST_TYPE_STR "_warm"
+
+int main(int argc, char *argv[])
+{
+ int val = 0;
+ void *buf;
+ struct perf start;
+
+ printf("Test mem_zero_detect_perf %d bytes\n", TEST_LEN);
+
+ if (posix_memalign(&buf, 64, TEST_LEN)) {
+ printf("alloc error: Fail");
+ return -1;
+ }
+
+ memset(buf, 0, TEST_LEN);
+ BENCHMARK(&start, BENCHMARK_TIME, val |= isal_zero_detect(buf, TEST_LEN));
+
+ printf("mem_zero_detect_perf" TEST_TYPE_STR ": ");
+ perf_print(start, (long long)TEST_LEN);
+
+ return 0;
+}
diff --git a/src/isa-l/mem/mem_zero_detect_sse.asm b/src/isa-l/mem/mem_zero_detect_sse.asm
new file mode 100644
index 000000000..63dad4fc9
--- /dev/null
+++ b/src/isa-l/mem/mem_zero_detect_sse.asm
@@ -0,0 +1,176 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2018 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+%include "reg_sizes.asm"
+
+%ifidn __OUTPUT_FORMAT__, elf64
+ %define arg0 rdi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+ %define arg4 r8
+ %define arg5 r9
+ %define tmp r11
+ %define tmpb r11b
+ %define tmp3 arg4
+ %define return rax
+ %define func(x) x:
+ %define FUNC_SAVE
+ %define FUNC_RESTORE
+%endif
+
+%ifidn __OUTPUT_FORMAT__, win64
+ %define arg0 rcx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+ %define tmp r11
+ %define tmpb r11b
+ %define tmp3 r10
+ %define return rax
+ %define func(x) proc_frame x
+ %macro FUNC_SAVE 0
+ end_prolog
+ %endmacro
+ %macro FUNC_RESTORE 0
+ %endmacro
+%endif
+
+%define src arg0
+%define len arg1
+%define ptr arg2
+%define pos return
+
+default rel
+
+[bits 64]
+section .text
+
+align 16
+global mem_zero_detect_sse:ISAL_SYM_TYPE_FUNCTION
+func(mem_zero_detect_sse)
+ FUNC_SAVE
+ mov pos, 0
+ sub len, 4*16
+ jle .mem_z_small_block
+
+.mem_z_loop:
+ movdqu xmm0, [src+pos]
+ movdqu xmm1, [src+pos+1*16]
+ movdqu xmm2, [src+pos+2*16]
+ movdqu xmm3, [src+pos+3*16]
+ ptest xmm0, xmm0
+ jnz .return_fail
+ ptest xmm1, xmm1
+ jnz .return_fail
+ ptest xmm2, xmm2
+ jnz .return_fail
+ ptest xmm3, xmm3
+ jnz .return_fail
+ add pos, 4*16
+ cmp pos, len
+ jl .mem_z_loop
+
+.mem_z_last_block:
+ movdqu xmm0, [src+len]
+ movdqu xmm1, [src+len+1*16]
+ movdqu xmm2, [src+len+2*16]
+ movdqu xmm3, [src+len+3*16]
+ ptest xmm0, xmm0
+ jnz .return_fail
+ ptest xmm1, xmm1
+ jnz .return_fail
+ ptest xmm2, xmm2
+ jnz .return_fail
+ ptest xmm3, xmm3
+ jnz .return_fail
+
+.return_pass:
+ mov return, 0
+ FUNC_RESTORE
+ ret
+
+
+.mem_z_small_block:
+ add len, 4*16
+ cmp len, 2*16
+ jl .mem_z_lt32
+ movdqu xmm0, [src]
+ movdqu xmm1, [src+16]
+ movdqu xmm2, [src+len-2*16]
+ movdqu xmm3, [src+len-1*16]
+ ptest xmm0, xmm0
+ jnz .return_fail
+ ptest xmm1, xmm1
+ jnz .return_fail
+ ptest xmm2, xmm2
+ jnz .return_fail
+ ptest xmm3, xmm3
+ jnz .return_fail
+ jmp .return_pass
+
+.mem_z_lt32:
+ cmp len, 16
+ jl .mem_z_lt16
+ movdqu xmm0, [src]
+ movdqu xmm1, [src+len-16]
+ ptest xmm0, xmm0
+ jnz .return_fail
+ ptest xmm1, xmm1
+ jnz .return_fail
+ jmp .return_pass
+
+.mem_z_lt16:
+ cmp len, 8
+ jl .mem_z_lt8
+ mov tmp, [src]
+ mov tmp3,[src+len-8]
+ or tmp, tmp3
+ test tmp, tmp
+ jnz .return_fail
+ jmp .return_pass
+
+.mem_z_lt8:
+ cmp len, 0
+ je .return_pass
+.mem_z_1byte_loop:
+ mov tmpb, [src+pos]
+ cmp tmpb, 0
+ jnz .return_fail
+ add pos, 1
+ cmp pos, len
+ jl .mem_z_1byte_loop
+ jmp .return_pass
+
+.return_fail:
+ mov return, 1
+ FUNC_RESTORE
+ ret
+
+endproc_frame
diff --git a/src/isa-l/mem/mem_zero_detect_test.c b/src/isa-l/mem/mem_zero_detect_test.c
new file mode 100644
index 000000000..12d5f4bdb
--- /dev/null
+++ b/src/isa-l/mem/mem_zero_detect_test.c
@@ -0,0 +1,226 @@
+/**********************************************************************
+ Copyright(c) 2011-2018 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <limits.h>
+#include "mem_routines.h"
+#include "types.h"
+
+#define TEST_MEM 10*1024*1024
+#define TEST_LEN 8*1024
+#define RAND_ALIGN 32
+#define BORDER_BYTES (5*RAND_ALIGN + 7)
+
+#ifndef RANDOMS
+# define RANDOMS 2000
+#endif
+#ifndef TEST_SEED
+# define TEST_SEED 0x1234
+#endif
+
+int main(int argc, char *argv[])
+{
+ int i, j, sign;
+ long long r, l;
+ void *buf;
+ unsigned char *a;
+ int failures = 0, ret_neg = 1;
+
+ printf("mem_zero_detect_test %d bytes, %d randoms, seed=0x%x ", TEST_MEM, RANDOMS,
+ TEST_SEED);
+ if (posix_memalign(&buf, 64, TEST_MEM)) {
+ printf("alloc error: Fail");
+ return -1;
+ }
+
+ srand(TEST_SEED);
+
+ // Test full zero buffer
+ memset(buf, 0, TEST_MEM);
+ failures = isal_zero_detect(buf, TEST_MEM);
+
+ if (failures) {
+ printf("Fail large buf test\n");
+ return failures;
+ }
+ putchar('.');
+
+ // Test small buffers
+ for (i = 0; i < TEST_LEN; i++) {
+ failures |= isal_zero_detect(buf, i);
+ if (failures) {
+ printf("Fail len=%d\n", i);
+ return failures;
+ }
+ }
+ putchar('.');
+
+ // Test small buffers near end of alloc region
+ a = buf;
+ for (i = 0; i < TEST_LEN; i++)
+ failures |= isal_zero_detect(&a[TEST_LEN - i], i);
+
+ if (failures) {
+ printf("Fail:\n");
+ return failures;
+ }
+ putchar('.');
+
+ // Test for detect non zero
+ a[TEST_MEM / 2] = 1;
+ ret_neg = isal_zero_detect(a, TEST_MEM);
+ if (ret_neg == 0) {
+ printf("Fail on not detect\n");
+ return -1;
+ }
+ a[TEST_MEM / 2] = 0;
+ putchar('.');
+
+ // Test various non-zero offsets
+ for (i = 0; i < BORDER_BYTES; i++) {
+ for (j = 0; j < CHAR_BIT; j++) {
+ a[i] = 1 << j;
+ ret_neg = isal_zero_detect(a, TEST_MEM);
+ if (ret_neg == 0) {
+ printf("Fail on not detect offsets %d, %d\n", i, j);
+ return -1;
+ }
+ a[i] = 0;
+ }
+ }
+ putchar('.');
+ fflush(0);
+
+ // Test random non-zero offsets
+ for (i = 0; i < RANDOMS; i++) {
+ r = rand();
+ r = (r % TEST_LEN) ^ (r & (RAND_ALIGN - 1));
+ if (r >= TEST_LEN)
+ continue;
+
+ a[r] = 1 << (r & (CHAR_BIT - 1));
+ ret_neg = isal_zero_detect(a, TEST_MEM);
+ if (ret_neg == 0) {
+ printf("Fail on not detect rand %d, e=%lld\n", i, r);
+ return -1;
+ }
+ a[r] = 0;
+ }
+ putchar('.');
+ fflush(0);
+
+ // Test putting non-zero byte at end of buffer
+ for (i = 1; i < BORDER_BYTES; i++) {
+ for (j = 0; j < CHAR_BIT; j++) {
+ a[TEST_MEM - i] = 1 << j;
+ ret_neg = isal_zero_detect(a, TEST_MEM);
+ if (ret_neg == 0) {
+ printf("Fail on not detect rand offset=%d, idx=%d\n", i, j);
+ return -1;
+ }
+ a[TEST_MEM - i] = 0;
+ }
+ }
+ putchar('.');
+
+ // Test various size buffers and non-zero offsets
+ for (l = 1; l < TEST_LEN; l++) {
+ for (i = 0; i < l + BORDER_BYTES; i++) {
+ failures = isal_zero_detect(a, l);
+
+ if (failures) {
+ printf("Fail on detect non-zero with l=%lld\n", l);
+ return -1;
+ }
+
+ a[i] = 1;
+ ret_neg = isal_zero_detect(a, l);
+
+ if ((i < l) && (ret_neg == 0)) {
+ printf("Fail on non-zero buffer l=%lld err=%d\n", l, i);
+ return -1;
+ }
+ if ((i >= l) && (ret_neg != 0)) {
+ printf("Fail on bad pass detect l=%lld err=%d\n", l, i);
+ return -1;
+ }
+ a[i] = 0;
+ }
+ }
+ putchar('.');
+
+ // Test random test size and non-zero error offsets
+ for (i = 0; i < RANDOMS; i++) {
+ r = rand();
+ r = (r % TEST_LEN) ^ (r & (RAND_ALIGN - 1));
+ l = r + 1 + (rand() & (CHAR_BIT - 1));
+ a[r] = 1 << (r & (CHAR_BIT - 1));
+ ret_neg = isal_zero_detect(a, l);
+ if (ret_neg == 0) {
+ printf("Fail on not detect rand %d, l=%lld, e=%lld\n", i, l, r);
+ return -1;
+ }
+ a[r] = 0;
+ }
+ putchar('.');
+ fflush(0);
+
+ // Test combinations of zero and non-zero buffers
+ for (i = 0; i < RANDOMS; i++) {
+ r = rand();
+ r = (r % TEST_LEN) ^ (r & (RAND_ALIGN - 1));
+ sign = rand() & 1 ? 1 : -1;
+ l = r + sign * (rand() & (2 * RAND_ALIGN - 1));
+
+ if ((l >= TEST_LEN) || (l < 0) || (r >= TEST_LEN))
+ continue;
+
+ a[r] = 1 << (r & (CHAR_BIT - 1));
+ ret_neg = isal_zero_detect(a, l);
+
+ if ((r < l) && (ret_neg == 0)) {
+ printf("Fail on non-zero rand buffer %d, l=%lld, e=%lld\n", i, l, r);
+ return -1;
+ }
+ if ((r >= l) && (ret_neg != 0)) {
+ printf("Fail on bad pass zero detect rand %d, l=%lld, e=%lld\n", i, l,
+ r);
+ return -1;
+ }
+
+ a[r] = 0;
+ }
+ putchar('.');
+ fflush(0);
+
+ printf(failures == 0 ? " Pass\n" : " Fail\n");
+ return failures;
+}