summaryrefslogtreecommitdiffstats
path: root/third_party/dav1d/src/arm/32/util.S
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--third_party/dav1d/src/arm/32/util.S184
1 files changed, 184 insertions, 0 deletions
diff --git a/third_party/dav1d/src/arm/32/util.S b/third_party/dav1d/src/arm/32/util.S
new file mode 100644
index 0000000000..c3710d3767
--- /dev/null
+++ b/third_party/dav1d/src/arm/32/util.S
@@ -0,0 +1,184 @@
+/******************************************************************************
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2015 Martin Storsjo
+ * Copyright © 2015 Janne Grunau
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+
+#ifndef DAV1D_SRC_ARM_32_UTIL_S
+#define DAV1D_SRC_ARM_32_UTIL_S
+
+#include "config.h"
+#include "src/arm/asm.S"
+
+.macro movrel_local rd, val, offset=0
+#if defined(PIC)
+ ldr \rd, 90001f
+ b 90002f
+90001:
+ .word \val + \offset - (90002f + 8 - 4 * CONFIG_THUMB)
+90002:
+ add \rd, \rd, pc
+#else
+ movw \rd, #:lower16:\val+\offset
+ movt \rd, #:upper16:\val+\offset
+#endif
+.endm
+
+.macro movrel rd, val, offset=0
+#if defined(PIC) && defined(__APPLE__)
+ ldr \rd, 1f
+ b 2f
+1:
+ .word 3f - (2f + 8 - 4 * CONFIG_THUMB)
+2:
+ ldr \rd, [pc, \rd]
+.if \offset < 0
+ sub \rd, \rd, #-(\offset)
+.elseif \offset > 0
+ add \rd, \rd, #\offset
+.endif
+ .non_lazy_symbol_pointer
+3:
+ .indirect_symbol \val
+ .word 0
+ .text
+#else
+ movrel_local \rd, \val, \offset
+#endif
+.endm
+
+// This macro clobbers r7 (and r12 on windows) and stores data at the
+// bottom of the stack; sp is the start of the space allocated that
+// the caller can use.
+.macro sub_sp_align space
+#if CONFIG_THUMB
+ mov r7, sp
+ and r7, r7, #15
+#else
+ and r7, sp, #15
+#endif
+ sub sp, sp, r7
+ // Now the stack is aligned, store the amount of adjustment back
+ // on the stack, as we don't want to waste a register as frame
+ // pointer.
+ str r7, [sp, #-16]!
+#ifdef _WIN32
+.if \space > 8192
+ // Here, we'd need to touch two (or more) pages while decrementing
+ // the stack pointer.
+ .error "sub_sp_align doesn't support values over 8K at the moment"
+.elseif \space > 4096
+ sub r7, sp, #4096
+ ldr r12, [r7]
+ sub r7, r7, #(\space - 4096)
+ mov sp, r7
+.else
+ sub sp, sp, #\space
+.endif
+#else
+.if \space >= 4096
+ sub sp, sp, #(\space)/4096*4096
+.endif
+.if (\space % 4096) != 0
+ sub sp, sp, #(\space)%4096
+.endif
+#endif
+.endm
+
+.macro add_sp_align space
+.if \space >= 4096
+ add sp, sp, #(\space)/4096*4096
+.endif
+.if (\space % 4096) != 0
+ add sp, sp, #(\space)%4096
+.endif
+ ldr r7, [sp], #16
+ // Add back the original stack adjustment
+ add sp, sp, r7
+.endm
+
+.macro transpose_8x8b q0, q1, q2, q3, r0, r1, r2, r3, r4, r5, r6, r7
+ vtrn.32 \q0, \q2
+ vtrn.32 \q1, \q3
+
+ vtrn.16 \r0, \r2
+ vtrn.16 \r1, \r3
+ vtrn.16 \r4, \r6
+ vtrn.16 \r5, \r7
+
+ vtrn.8 \r0, \r1
+ vtrn.8 \r2, \r3
+ vtrn.8 \r4, \r5
+ vtrn.8 \r6, \r7
+.endm
+
+.macro transpose_8x8h r0, r1, r2, r3, r4, r5, r6, r7, d0, d1, d2, d3, d4, d5, d6, d7
+ vswp \d0, \d4
+ vswp \d1, \d5
+ vswp \d2, \d6
+ vswp \d3, \d7
+
+ vtrn.32 \r0, \r2
+ vtrn.32 \r1, \r3
+ vtrn.32 \r4, \r6
+ vtrn.32 \r5, \r7
+
+ vtrn.16 \r0, \r1
+ vtrn.16 \r2, \r3
+ vtrn.16 \r4, \r5
+ vtrn.16 \r6, \r7
+.endm
+
+.macro transpose_4x8b q0, q1, r0, r1, r2, r3
+ vtrn.16 \q0, \q1
+
+ vtrn.8 \r0, \r1
+ vtrn.8 \r2, \r3
+.endm
+
+.macro transpose_4x4s q0, q1, q2, q3, r0, r1, r2, r3, r4, r5, r6, r7
+ vswp \r1, \r4 // vtrn.64 \q0, \q2
+ vswp \r3, \r6 // vtrn.64 \q1, \q3
+
+ vtrn.32 \q0, \q1
+ vtrn.32 \q2, \q3
+.endm
+
+.macro transpose_4x4h q0, q1, r0, r1, r2, r3
+ vtrn.32 \q0, \q1
+
+ vtrn.16 \r0, \r1
+ vtrn.16 \r2, \r3
+.endm
+
+.macro transpose_4x8h r0, r1, r2, r3
+ vtrn.32 \r0, \r2
+ vtrn.32 \r1, \r3
+
+ vtrn.16 \r0, \r1
+ vtrn.16 \r2, \r3
+.endm
+
+#endif /* DAV1D_SRC_ARM_32_UTIL_S */