summaryrefslogtreecommitdiffstats
path: root/src/isa-l/mem/aarch64
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 18:45:59 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 18:45:59 +0000
commit19fcec84d8d7d21e796c7624e521b60d28ee21ed (patch)
tree42d26aa27d1e3f7c0b8bd3fd14e7d7082f5008dc /src/isa-l/mem/aarch64
parentInitial commit. (diff)
downloadceph-upstream/16.2.11+ds.tar.xz
ceph-upstream/16.2.11+ds.zip
Adding upstream version 16.2.11+ds.upstream/16.2.11+dsupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r--src/isa-l/mem/aarch64/Makefile.am33
-rw-r--r--src/isa-l/mem/aarch64/mem_aarch64_dispatcher.c39
-rw-r--r--src/isa-l/mem/aarch64/mem_multibinary_arm.S33
-rw-r--r--src/isa-l/mem/aarch64/mem_zero_detect_neon.S243
4 files changed, 348 insertions, 0 deletions
diff --git a/src/isa-l/mem/aarch64/Makefile.am b/src/isa-l/mem/aarch64/Makefile.am
new file mode 100644
index 000000000..c18659872
--- /dev/null
+++ b/src/isa-l/mem/aarch64/Makefile.am
@@ -0,0 +1,33 @@
+########################################################################
+# Copyright(c) 2019 Arm Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Arm Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#########################################################################
+
+lsrc_aarch64 += \
+ mem/aarch64/mem_zero_detect_neon.S \
+ mem/aarch64/mem_multibinary_arm.S \
+ mem/aarch64/mem_aarch64_dispatcher.c
diff --git a/src/isa-l/mem/aarch64/mem_aarch64_dispatcher.c b/src/isa-l/mem/aarch64/mem_aarch64_dispatcher.c
new file mode 100644
index 000000000..0dfe3a3ae
--- /dev/null
+++ b/src/isa-l/mem/aarch64/mem_aarch64_dispatcher.c
@@ -0,0 +1,39 @@
+/**********************************************************************
+ Copyright(c) 2019 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+#include <aarch64_multibinary.h>
+
+DEFINE_INTERFACE_DISPATCHER(isal_zero_detect)
+{
+ unsigned long auxval = getauxval(AT_HWCAP);
+ if (auxval & HWCAP_ASIMD)
+ return PROVIDER_INFO(mem_zero_detect_neon);
+
+ return PROVIDER_BASIC(mem_zero_detect);
+
+}
diff --git a/src/isa-l/mem/aarch64/mem_multibinary_arm.S b/src/isa-l/mem/aarch64/mem_multibinary_arm.S
new file mode 100644
index 000000000..baac3ca38
--- /dev/null
+++ b/src/isa-l/mem/aarch64/mem_multibinary_arm.S
@@ -0,0 +1,33 @@
+########################################################################
+# Copyright(c) 2019 Arm Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Arm Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#########################################################################
+
+#include <aarch64_multibinary.h>
+
+mbin_interface isal_zero_detect
+
diff --git a/src/isa-l/mem/aarch64/mem_zero_detect_neon.S b/src/isa-l/mem/aarch64/mem_zero_detect_neon.S
new file mode 100644
index 000000000..6f93ff612
--- /dev/null
+++ b/src/isa-l/mem/aarch64/mem_zero_detect_neon.S
@@ -0,0 +1,243 @@
+########################################################################
+# Copyright(c) 2019 Arm Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Arm Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#########################################################################
+
+.text
+.arch armv8-a
+
+/*int mem_zero_detect_neon(void *buf, size_t n)*/
+
+// input: buf -> x0
+// input: n -> x1
+// output: -> x0 (true or false)
+
+.global mem_zero_detect_neon
+.type mem_zero_detect_neon, %function
+
+mem_zero_detect_neon:
+ cmp x1, #(16*24-1)
+ b.ls .loop_16x24_end
+
+.loop_16x24: // 16x24 block loop
+
+ ldr q0, [x0]
+ ldr q1, [x0, #16]
+ ldr q2, [x0, #(16*2)]
+ ldr q3, [x0, #(16*3)]
+ ldr q4, [x0, #(16*4)]
+ ldr q5, [x0, #(16*5)]
+ ldr q6, [x0, #(16*6)]
+ ldr q7, [x0, #(16*7)]
+ ldr q16, [x0, #(16*8)]
+ ldr q17, [x0, #(16*9)]
+ ldr q18, [x0, #(16*10)]
+ ldr q19, [x0, #(16*11)]
+ ldr q20, [x0, #(16*12)]
+ ldr q21, [x0, #(16*13)]
+ ldr q22, [x0, #(16*14)]
+ ldr q23, [x0, #(16*15)]
+ ldr q24, [x0, #(16*16)]
+ ldr q25, [x0, #(16*17)]
+ ldr q26, [x0, #(16*18)]
+ ldr q27, [x0, #(16*19)]
+ ldr q28, [x0, #(16*20)]
+ ldr q29, [x0, #(16*21)]
+ ldr q30, [x0, #(16*22)]
+ ldr q31, [x0, #(16*23)]
+
+ add x0, x0, #(16*24)
+
+ orr v0.16b, v0.16b, v1.16b
+ orr v2.16b, v2.16b, v3.16b
+ orr v4.16b, v4.16b, v5.16b
+ orr v6.16b, v6.16b, v7.16b
+ orr v16.16b, v16.16b, v17.16b
+ orr v18.16b, v18.16b, v19.16b
+ orr v20.16b, v20.16b, v21.16b
+ orr v22.16b, v22.16b, v23.16b
+ orr v24.16b, v24.16b, v25.16b
+ orr v26.16b, v26.16b, v27.16b
+ orr v28.16b, v28.16b, v29.16b
+ orr v30.16b, v30.16b, v31.16b
+
+ orr v0.16b, v0.16b, v2.16b
+ orr v4.16b, v4.16b, v6.16b
+ orr v16.16b, v16.16b, v18.16b
+ orr v20.16b, v20.16b, v22.16b
+ orr v24.16b, v24.16b, v26.16b
+ orr v28.16b, v28.16b, v30.16b
+
+ orr v0.16b, v0.16b, v4.16b
+ orr v16.16b, v16.16b, v20.16b
+ orr v24.16b, v24.16b, v28.16b
+
+ orr v0.16b, v0.16b, v16.16b
+ orr v0.16b, v0.16b, v24.16b
+
+ mov x3, v0.d[0]
+ mov x2, v0.d[1]
+ orr x2, x3, x2
+ cbnz x2, .fail_exit
+
+ // loop condition check
+ sub x1, x1, #(16*24)
+ cmp x1, #(16*24-1)
+ b.hi .loop_16x24
+
+.loop_16x24_end:
+ cmp x1, #(16*8-1)
+ b.ls .loop_16x8_end
+
+.loop_16x8: // 16x8 block loop
+ ldr q0, [x0]
+ ldr q1, [x0, #16]
+ ldr q2, [x0, #(16*2)]
+ ldr q3, [x0, #(16*3)]
+ ldr q4, [x0, #(16*4)]
+ ldr q5, [x0, #(16*5)]
+ ldr q6, [x0, #(16*6)]
+ ldr q7, [x0, #(16*7)]
+
+ add x0, x0, #(16*8)
+
+ orr v0.16b, v0.16b, v1.16b
+ orr v2.16b, v2.16b, v3.16b
+ orr v4.16b, v4.16b, v5.16b
+ orr v6.16b, v6.16b, v7.16b
+
+ orr v0.16b, v0.16b, v2.16b
+ orr v4.16b, v4.16b, v6.16b
+ orr v0.16b, v0.16b, v4.16b
+
+ mov x3, v0.d[0]
+ mov x2, v0.d[1]
+ orr x2, x3, x2
+ cbnz x2, .fail_exit
+
+ sub x1, x1, #(16*8)
+ cmp x1, #(16*8-1)
+ b.hi .loop_16x8
+
+.loop_16x8_end:
+ cmp x1, #(8*8-1)
+ b.ls .loop_8x8_end
+
+.loop_8x8: // 8x8 block loop
+ ldp x2, x3, [x0]
+ ldp x4, x5, [x0, #16]
+ ldp x6, x7, [x0, #32]
+ ldp x8, x9, [x0, #48]
+
+ add x0, x0, #(8*8)
+
+ orr x2, x2, x3
+ orr x4, x4, x5
+ orr x6, x6, x7
+ orr x8, x8, x9
+ orr x2, x2, x4
+ orr x6, x6, x8
+ orr x2, x2, x6
+
+ cbnz x2, .fail_exit
+
+ sub x1, x1, #(8*8)
+ cmp x1, #(8*8-1)
+ b.hi .loop_8x8
+
+.loop_8x8_end:
+ cmp x1, #(8-1)
+ b.ls .handle_remainder
+
+.loop_8: // loop per 8bytes
+ ldr x2, [x0]
+ add x0, x0, #8
+ cbnz x2, .fail_exit
+
+ sub x1, x1, #8
+ cmp x1, #7
+ b.hi .loop_8
+
+.loop_8_end:
+
+ // check remaining bytes
+.handle_remainder:
+ mov w2, #0
+
+ cmp x1, #0
+ beq .handle_reminder_end
+ cmp x1, #1
+ beq .case1
+ cmp x1, #2
+ beq .case2
+ cmp x1, #3
+ beq .case3
+ cmp x1, #4
+ beq .case4
+ cmp x1, #5
+ beq .case5
+ cmp x1, #6
+ beq .case6
+
+.case7: // case7 drop here directly
+ ldrb w3, [x0]
+ add x0, x0, #1
+ orr w2, w2, w3
+.case6:
+ ldrb w3, [x0]
+ add x0, x0, #1
+ orr w2, w2, w3
+.case5:
+ ldrb w3, [x0]
+ add x0, x0, #1
+ orr w2, w2, w3
+.case4:
+ ldr w3, [x0]
+ orr w2, w2, w3
+ b .handle_reminder_end
+.case3:
+ ldrb w3, [x0]
+ add x0, x0, #1
+ orr w2, w2, w3
+.case2:
+ ldrh w3, [x0]
+ orr w2, w2, w3
+ b .handle_reminder_end
+.case1:
+ ldrb w3, [x0]
+ orr w2, w2, w3
+
+.handle_reminder_end:
+ cbz w2, .pass_exit
+
+.fail_exit:
+ mov w0, #0xffffffff
+ ret
+
+.pass_exit:
+ mov w0, #0x0
+ ret