summaryrefslogtreecommitdiffstats
path: root/third_party/dav1d/src/arm/64/itx.S
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--third_party/dav1d/src/arm/64/itx.S99
1 files changed, 50 insertions, 49 deletions
diff --git a/third_party/dav1d/src/arm/64/itx.S b/third_party/dav1d/src/arm/64/itx.S
index 53490cd677..7063cbde1d 100644
--- a/third_party/dav1d/src/arm/64/itx.S
+++ b/third_party/dav1d/src/arm/64/itx.S
@@ -879,6 +879,8 @@ function inv_txfm_\variant\()add_8x8_neon
.ifc \variant, identity_
// The identity shl #1 and downshift srshr #1 cancel out
+
+ b L(itx_8x8_epilog)
.else
blr x4
@@ -890,19 +892,20 @@ function inv_txfm_\variant\()add_8x8_neon
srshr v21.8h, v21.8h, #1
srshr v22.8h, v22.8h, #1
srshr v23.8h, v23.8h, #1
-.endif
+L(itx_8x8_epilog):
transpose_8x8h v16, v17, v18, v19, v20, v21, v22, v23, v24, v25
blr x5
load_add_store_8x8 x0, x7
ret x15
+.endif
endfunc
.endm
-def_fn_8x8_base
def_fn_8x8_base identity_
+def_fn_8x8_base
.macro def_fn_8x8 txfm1, txfm2
function inv_txfm_add_\txfm1\()_\txfm2\()_8x8_8bpc_neon, export=1
@@ -1390,14 +1393,16 @@ function inv_txfm_horz\suffix\()_16x8_neon
.endif
.if \identity
identity_8x16_shift2 v0.h[0]
+ b L(horz_16x8_epilog)
.else
blr x4
-.endif
-.if \shift > 0
.irp i, v16.8h, v17.8h, v18.8h, v19.8h, v20.8h, v21.8h, v22.8h, v23.8h, v24.8h, v25.8h, v26.8h, v27.8h, v28.8h, v29.8h, v30.8h, v31.8h
srshr \i, \i, #\shift
.endr
-.endif
+.if \shift == 1
+ b L(horz_16x8_epilog)
+.else
+L(horz_16x8_epilog):
transpose_8x8h v16, v17, v18, v19, v20, v21, v22, v23, v4, v5
transpose_8x8h v24, v25, v26, v27, v28, v29, v30, v31, v4, v5
@@ -1406,12 +1411,14 @@ function inv_txfm_horz\suffix\()_16x8_neon
.endr
ret x14
+.endif
+.endif
endfunc
.endm
-def_horz_16 scale=0, identity=0, shift=2
def_horz_16 scale=1, identity=0, shift=1, suffix=_scale
def_horz_16 scale=0, identity=1, shift=0, suffix=_identity
+def_horz_16 scale=0, identity=0, shift=2
function inv_txfm_add_vert_8x16_neon
mov x14, x30
@@ -1512,6 +1519,8 @@ function inv_txfm_\variant\()add_16x4_neon
.endr
identity_8x16_shift1 v0.h[0]
+
+ b L(itx_16x4_epilog)
.else
.irp i, v16.4h, v17.4h, v18.4h, v19.4h, v20.4h, v21.4h, v22.4h, v23.4h, v24.4h, v25.4h, v26.4h, v27.4h, v28.4h, v29.4h, v30.4h, v31.4h
ld1 {\i}, [x2]
@@ -1527,33 +1536,29 @@ function inv_txfm_\variant\()add_16x4_neon
.irp i, v16.8h, v17.8h, v18.8h, v19.8h
srshr \i, \i, #1
.endr
-.endif
- transpose_4x8h v16, v17, v18, v19, v2, v3, v4, v5
- blr x5
- mov x6, x0
- load_add_store_8x4 x6, x7
-.ifc \variant, identity_
- mov v16.16b, v20.16b
- mov v17.16b, v21.16b
- mov v18.16b, v22.16b
- mov v19.16b, v23.16b
-.else
ins v24.d[1], v28.d[0]
ins v25.d[1], v29.d[0]
ins v26.d[1], v30.d[0]
ins v27.d[1], v31.d[0]
- srshr v16.8h, v24.8h, #1
- srshr v17.8h, v25.8h, #1
- srshr v18.8h, v26.8h, #1
- srshr v19.8h, v27.8h, #1
-.endif
+ srshr v20.8h, v24.8h, #1
+ srshr v21.8h, v25.8h, #1
+ srshr v22.8h, v26.8h, #1
+ srshr v23.8h, v27.8h, #1
+
+L(itx_16x4_epilog):
transpose_4x8h v16, v17, v18, v19, v2, v3, v4, v5
blr x5
+ mov x6, x0
+ load_add_store_8x4 x6, x7
+
+ transpose_4x8h_mov v20, v21, v22, v23, v2, v3, v4, v5, v16, v17, v18, v19
+ blr x5
add x6, x0, #8
load_add_store_8x4 x6, x7
ret x15
+.endif
endfunc
function inv_txfm_\variant\()add_4x16_neon
@@ -1605,12 +1610,14 @@ function inv_txfm_\variant\()add_4x16_neon
mov w16, #(5793-4096)*8
dup v0.4h, w16
identity_8x4_shift1 v16, v17, v18, v19, v0.h[0]
+
+ b L(itx_4x16_epilog)
.else
blr x4
.irp i, v16.8h, v17.8h, v18.8h, v19.8h
srshr \i, \i, #1
.endr
-.endif
+L(itx_4x16_epilog):
transpose_4x8h v16, v17, v18, v19, v4, v5, v6, v7
ins v20.d[0], v16.d[1]
ins v21.d[0], v17.d[1]
@@ -1622,11 +1629,12 @@ function inv_txfm_\variant\()add_4x16_neon
load_add_store_4x16 x0, x6
ret x15
+.endif
endfunc
.endm
-def_fn_416_base
def_fn_416_base identity_
+def_fn_416_base
.macro def_fn_416 w, h, txfm1, txfm2, eob_half
function inv_txfm_add_\txfm1\()_\txfm2\()_\w\()x\h\()_8bpc_neon, export=1
@@ -1634,11 +1642,15 @@ function inv_txfm_add_\txfm1\()_\txfm2\()_\w\()x\h\()_8bpc_neon, export=1
idct_dc \w, \h, 1
.endif
.if \w == 4
+.ifnc \txfm1, identity
adr x4, inv_\txfm1\()_8h_x\w\()_neon
+.endif
adr x5, inv_\txfm2\()_4h_x\h\()_neon
mov w13, #\eob_half
.else
+.ifnc \txfm1, identity
adr x4, inv_\txfm1\()_4h_x\w\()_neon
+.endif
adr x5, inv_\txfm2\()_8h_x\h\()_neon
.endif
.ifc \txfm1, identity
@@ -1690,13 +1702,16 @@ function inv_txfm_\variant\()add_16x8_neon
mov w16, #2*(5793-4096)*8
dup v0.4h, w16
identity_8x16_shift1 v0.h[0]
+
+ b L(itx_16x8_epilog)
.else
blr x4
-.irp i, v16.8h, v17.8h, v18.8h, v19.8h, v20.8h, v21.8h, v22.8h, v23.8h
+.irp i, v16.8h, v17.8h, v18.8h, v19.8h, v20.8h, v21.8h, v22.8h, v23.8h, v24.8h, v25.8h, v26.8h, v27.8h, v28.8h, v29.8h, v30.8h, v31.8h
srshr \i, \i, #1
.endr
-.endif
+
+L(itx_16x8_epilog):
transpose_8x8h v16, v17, v18, v19, v20, v21, v22, v23, v2, v3
blr x5
@@ -1704,27 +1719,7 @@ function inv_txfm_\variant\()add_16x8_neon
mov x6, x0
load_add_store_8x8 x6, x7
-.ifc \variant, identity_
- mov v16.16b, v24.16b
- mov v17.16b, v25.16b
- mov v18.16b, v26.16b
- mov v19.16b, v27.16b
- mov v20.16b, v28.16b
- mov v21.16b, v29.16b
- mov v22.16b, v30.16b
- mov v23.16b, v31.16b
-.else
- srshr v16.8h, v24.8h, #1
- srshr v17.8h, v25.8h, #1
- srshr v18.8h, v26.8h, #1
- srshr v19.8h, v27.8h, #1
- srshr v20.8h, v28.8h, #1
- srshr v21.8h, v29.8h, #1
- srshr v22.8h, v30.8h, #1
- srshr v23.8h, v31.8h, #1
-.endif
-
- transpose_8x8h v16, v17, v18, v19, v20, v21, v22, v23, v2, v3
+ transpose_8x8h_mov v24, v25, v26, v27, v28, v29, v30, v31, v2, v3, v16, v17, v18, v19, v20, v21, v22, v23
blr x5
@@ -1732,6 +1727,7 @@ function inv_txfm_\variant\()add_16x8_neon
load_add_store_8x8 x0, x7
ret x15
+.endif
endfunc
function inv_txfm_\variant\()add_8x16_neon
@@ -1790,14 +1786,16 @@ function inv_txfm_\variant\()add_8x16_neon
scale_input .8h, v0.h[0], v16, v17, v18, v19, v20, v21, v22, v23
.ifc \variant, identity_
// The identity shl #1 and downshift srshr #1 cancel out
+
+ b L(itx_8x16_epilog)
.else
blr x4
.irp i, v16.8h, v17.8h, v18.8h, v19.8h, v20.8h, v21.8h, v22.8h, v23.8h
srshr \i, \i, #1
.endr
-.endif
+L(itx_8x16_epilog):
transpose_8x8h v16, v17, v18, v19, v20, v21, v22, v23, v2, v3
blr x5
@@ -1805,18 +1803,21 @@ function inv_txfm_\variant\()add_8x16_neon
load_add_store_8x16 x0, x6
ret x15
+.endif
endfunc
.endm
-def_fn_816_base
def_fn_816_base identity_
+def_fn_816_base
.macro def_fn_816 w, h, txfm1, txfm2, eob_half
function inv_txfm_add_\txfm1\()_\txfm2\()_\w\()x\h\()_8bpc_neon, export=1
.ifc \txfm1\()_\txfm2, dct_dct
idct_dc \w, \h, 1
.endif
+.ifnc \txfm1, identity
adr x4, inv_\txfm1\()_8h_x\w\()_neon
+.endif
adr x5, inv_\txfm2\()_8h_x\h\()_neon
.if \w == 8
mov x13, #\eob_half