From e6918187568dbd01842d8d1d2c808ce16a894239 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 21 Apr 2024 13:54:28 +0200 Subject: Adding upstream version 18.2.2. Signed-off-by: Daniel Baumann --- src/isa-l/mem/aarch64/Makefile.am | 33 ++++ src/isa-l/mem/aarch64/mem_aarch64_dispatcher.c | 39 ++++ src/isa-l/mem/aarch64/mem_multibinary_arm.S | 33 ++++ src/isa-l/mem/aarch64/mem_zero_detect_neon.S | 243 +++++++++++++++++++++++++ 4 files changed, 348 insertions(+) create mode 100644 src/isa-l/mem/aarch64/Makefile.am create mode 100644 src/isa-l/mem/aarch64/mem_aarch64_dispatcher.c create mode 100644 src/isa-l/mem/aarch64/mem_multibinary_arm.S create mode 100644 src/isa-l/mem/aarch64/mem_zero_detect_neon.S (limited to 'src/isa-l/mem/aarch64') diff --git a/src/isa-l/mem/aarch64/Makefile.am b/src/isa-l/mem/aarch64/Makefile.am new file mode 100644 index 000000000..c18659872 --- /dev/null +++ b/src/isa-l/mem/aarch64/Makefile.am @@ -0,0 +1,33 @@ +######################################################################## +# Copyright(c) 2019 Arm Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Arm Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################### + +lsrc_aarch64 += \ + mem/aarch64/mem_zero_detect_neon.S \ + mem/aarch64/mem_multibinary_arm.S \ + mem/aarch64/mem_aarch64_dispatcher.c diff --git a/src/isa-l/mem/aarch64/mem_aarch64_dispatcher.c b/src/isa-l/mem/aarch64/mem_aarch64_dispatcher.c new file mode 100644 index 000000000..0dfe3a3ae --- /dev/null +++ b/src/isa-l/mem/aarch64/mem_aarch64_dispatcher.c @@ -0,0 +1,39 @@ +/********************************************************************** + Copyright(c) 2019 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ +#include + +DEFINE_INTERFACE_DISPATCHER(isal_zero_detect) +{ + unsigned long auxval = getauxval(AT_HWCAP); + if (auxval & HWCAP_ASIMD) + return PROVIDER_INFO(mem_zero_detect_neon); + + return PROVIDER_BASIC(mem_zero_detect); + +} diff --git a/src/isa-l/mem/aarch64/mem_multibinary_arm.S b/src/isa-l/mem/aarch64/mem_multibinary_arm.S new file mode 100644 index 000000000..baac3ca38 --- /dev/null +++ b/src/isa-l/mem/aarch64/mem_multibinary_arm.S @@ -0,0 +1,33 @@ +######################################################################## +# Copyright(c) 2019 Arm Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Arm Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################### + +#include + +mbin_interface isal_zero_detect + diff --git a/src/isa-l/mem/aarch64/mem_zero_detect_neon.S b/src/isa-l/mem/aarch64/mem_zero_detect_neon.S new file mode 100644 index 000000000..6f93ff612 --- /dev/null +++ b/src/isa-l/mem/aarch64/mem_zero_detect_neon.S @@ -0,0 +1,243 @@ +######################################################################## +# Copyright(c) 2019 Arm Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Arm Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################### + +.text +.arch armv8-a + +/*int mem_zero_detect_neon(void *buf, size_t n)*/ + +// input: buf -> x0 +// input: n -> x1 +// output: -> x0 (true or false) + +.global mem_zero_detect_neon +.type mem_zero_detect_neon, %function + +mem_zero_detect_neon: + cmp x1, #(16*24-1) + b.ls .loop_16x24_end + +.loop_16x24: // 16x24 block loop + + ldr q0, [x0] + ldr q1, [x0, #16] + ldr q2, [x0, #(16*2)] + ldr q3, [x0, #(16*3)] + ldr q4, [x0, #(16*4)] + ldr q5, [x0, #(16*5)] + ldr q6, [x0, #(16*6)] + ldr q7, [x0, #(16*7)] + ldr q16, [x0, #(16*8)] + ldr q17, [x0, #(16*9)] + ldr q18, [x0, #(16*10)] + ldr q19, [x0, #(16*11)] + ldr q20, [x0, #(16*12)] + ldr q21, [x0, #(16*13)] + ldr q22, [x0, #(16*14)] + ldr q23, [x0, #(16*15)] + ldr q24, [x0, #(16*16)] + ldr q25, [x0, #(16*17)] + ldr q26, [x0, #(16*18)] + ldr q27, [x0, #(16*19)] + ldr q28, [x0, #(16*20)] + ldr q29, [x0, #(16*21)] + ldr q30, [x0, #(16*22)] + ldr q31, [x0, #(16*23)] + + add x0, x0, #(16*24) + + orr v0.16b, v0.16b, v1.16b + orr v2.16b, v2.16b, v3.16b + orr v4.16b, v4.16b, v5.16b + orr v6.16b, v6.16b, v7.16b + orr v16.16b, v16.16b, v17.16b + orr v18.16b, v18.16b, v19.16b + orr v20.16b, v20.16b, v21.16b + orr v22.16b, v22.16b, v23.16b + orr v24.16b, v24.16b, v25.16b + orr v26.16b, v26.16b, v27.16b + orr v28.16b, v28.16b, v29.16b + orr v30.16b, v30.16b, v31.16b + + orr v0.16b, v0.16b, v2.16b + orr v4.16b, v4.16b, v6.16b + orr v16.16b, v16.16b, v18.16b + orr v20.16b, v20.16b, v22.16b + orr v24.16b, v24.16b, v26.16b + orr v28.16b, v28.16b, v30.16b + + orr v0.16b, v0.16b, v4.16b + orr v16.16b, v16.16b, v20.16b + orr v24.16b, v24.16b, v28.16b + + orr v0.16b, v0.16b, v16.16b + orr v0.16b, v0.16b, v24.16b + + mov x3, v0.d[0] + mov x2, v0.d[1] + orr x2, x3, x2 + cbnz x2, .fail_exit + + // loop condition check + sub x1, x1, #(16*24) + cmp x1, #(16*24-1) + b.hi .loop_16x24 + +.loop_16x24_end: + cmp x1, #(16*8-1) + b.ls .loop_16x8_end + +.loop_16x8: // 16x8 block loop + ldr q0, [x0] + ldr q1, [x0, #16] + ldr q2, [x0, #(16*2)] + ldr q3, [x0, #(16*3)] + ldr q4, [x0, #(16*4)] + ldr q5, [x0, #(16*5)] + ldr q6, [x0, #(16*6)] + ldr q7, [x0, #(16*7)] + + add x0, x0, #(16*8) + + orr v0.16b, v0.16b, v1.16b + orr v2.16b, v2.16b, v3.16b + orr v4.16b, v4.16b, v5.16b + orr v6.16b, v6.16b, v7.16b + + orr v0.16b, v0.16b, v2.16b + orr v4.16b, v4.16b, v6.16b + orr v0.16b, v0.16b, v4.16b + + mov x3, v0.d[0] + mov x2, v0.d[1] + orr x2, x3, x2 + cbnz x2, .fail_exit + + sub x1, x1, #(16*8) + cmp x1, #(16*8-1) + b.hi .loop_16x8 + +.loop_16x8_end: + cmp x1, #(8*8-1) + b.ls .loop_8x8_end + +.loop_8x8: // 8x8 block loop + ldp x2, x3, [x0] + ldp x4, x5, [x0, #16] + ldp x6, x7, [x0, #32] + ldp x8, x9, [x0, #48] + + add x0, x0, #(8*8) + + orr x2, x2, x3 + orr x4, x4, x5 + orr x6, x6, x7 + orr x8, x8, x9 + orr x2, x2, x4 + orr x6, x6, x8 + orr x2, x2, x6 + + cbnz x2, .fail_exit + + sub x1, x1, #(8*8) + cmp x1, #(8*8-1) + b.hi .loop_8x8 + +.loop_8x8_end: + cmp x1, #(8-1) + b.ls .handle_remainder + +.loop_8: // loop per 8bytes + ldr x2, [x0] + add x0, x0, #8 + cbnz x2, .fail_exit + + sub x1, x1, #8 + cmp x1, #7 + b.hi .loop_8 + +.loop_8_end: + + // check remaining bytes +.handle_remainder: + mov w2, #0 + + cmp x1, #0 + beq .handle_reminder_end + cmp x1, #1 + beq .case1 + cmp x1, #2 + beq .case2 + cmp x1, #3 + beq .case3 + cmp x1, #4 + beq .case4 + cmp x1, #5 + beq .case5 + cmp x1, #6 + beq .case6 + +.case7: // case7 drop here directly + ldrb w3, [x0] + add x0, x0, #1 + orr w2, w2, w3 +.case6: + ldrb w3, [x0] + add x0, x0, #1 + orr w2, w2, w3 +.case5: + ldrb w3, [x0] + add x0, x0, #1 + orr w2, w2, w3 +.case4: + ldr w3, [x0] + orr w2, w2, w3 + b .handle_reminder_end +.case3: + ldrb w3, [x0] + add x0, x0, #1 + orr w2, w2, w3 +.case2: + ldrh w3, [x0] + orr w2, w2, w3 + b .handle_reminder_end +.case1: + ldrb w3, [x0] + orr w2, w2, w3 + +.handle_reminder_end: + cbz w2, .pass_exit + +.fail_exit: + mov w0, #0xffffffff + ret + +.pass_exit: + mov w0, #0x0 + ret -- cgit v1.2.3