From d8bbc7858622b6d9c278469aab701ca0b609cddf Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 15 May 2024 05:35:49 +0200 Subject: Merging upstream version 126.0. Signed-off-by: Daniel Baumann --- third_party/dav1d/src/arm/64/itx.S | 99 +++++++++++++++++++------------------- 1 file changed, 50 insertions(+), 49 deletions(-) (limited to 'third_party/dav1d/src/arm/64/itx.S') diff --git a/third_party/dav1d/src/arm/64/itx.S b/third_party/dav1d/src/arm/64/itx.S index 53490cd677..7063cbde1d 100644 --- a/third_party/dav1d/src/arm/64/itx.S +++ b/third_party/dav1d/src/arm/64/itx.S @@ -879,6 +879,8 @@ function inv_txfm_\variant\()add_8x8_neon .ifc \variant, identity_ // The identity shl #1 and downshift srshr #1 cancel out + + b L(itx_8x8_epilog) .else blr x4 @@ -890,19 +892,20 @@ function inv_txfm_\variant\()add_8x8_neon srshr v21.8h, v21.8h, #1 srshr v22.8h, v22.8h, #1 srshr v23.8h, v23.8h, #1 -.endif +L(itx_8x8_epilog): transpose_8x8h v16, v17, v18, v19, v20, v21, v22, v23, v24, v25 blr x5 load_add_store_8x8 x0, x7 ret x15 +.endif endfunc .endm -def_fn_8x8_base def_fn_8x8_base identity_ +def_fn_8x8_base .macro def_fn_8x8 txfm1, txfm2 function inv_txfm_add_\txfm1\()_\txfm2\()_8x8_8bpc_neon, export=1 @@ -1390,14 +1393,16 @@ function inv_txfm_horz\suffix\()_16x8_neon .endif .if \identity identity_8x16_shift2 v0.h[0] + b L(horz_16x8_epilog) .else blr x4 -.endif -.if \shift > 0 .irp i, v16.8h, v17.8h, v18.8h, v19.8h, v20.8h, v21.8h, v22.8h, v23.8h, v24.8h, v25.8h, v26.8h, v27.8h, v28.8h, v29.8h, v30.8h, v31.8h srshr \i, \i, #\shift .endr -.endif +.if \shift == 1 + b L(horz_16x8_epilog) +.else +L(horz_16x8_epilog): transpose_8x8h v16, v17, v18, v19, v20, v21, v22, v23, v4, v5 transpose_8x8h v24, v25, v26, v27, v28, v29, v30, v31, v4, v5 @@ -1406,12 +1411,14 @@ function inv_txfm_horz\suffix\()_16x8_neon .endr ret x14 +.endif +.endif endfunc .endm -def_horz_16 scale=0, identity=0, shift=2 def_horz_16 scale=1, identity=0, shift=1, suffix=_scale def_horz_16 scale=0, identity=1, shift=0, suffix=_identity +def_horz_16 scale=0, identity=0, shift=2 function inv_txfm_add_vert_8x16_neon mov x14, x30 @@ -1512,6 +1519,8 @@ function inv_txfm_\variant\()add_16x4_neon .endr identity_8x16_shift1 v0.h[0] + + b L(itx_16x4_epilog) .else .irp i, v16.4h, v17.4h, v18.4h, v19.4h, v20.4h, v21.4h, v22.4h, v23.4h, v24.4h, v25.4h, v26.4h, v27.4h, v28.4h, v29.4h, v30.4h, v31.4h ld1 {\i}, [x2] @@ -1527,33 +1536,29 @@ function inv_txfm_\variant\()add_16x4_neon .irp i, v16.8h, v17.8h, v18.8h, v19.8h srshr \i, \i, #1 .endr -.endif - transpose_4x8h v16, v17, v18, v19, v2, v3, v4, v5 - blr x5 - mov x6, x0 - load_add_store_8x4 x6, x7 -.ifc \variant, identity_ - mov v16.16b, v20.16b - mov v17.16b, v21.16b - mov v18.16b, v22.16b - mov v19.16b, v23.16b -.else ins v24.d[1], v28.d[0] ins v25.d[1], v29.d[0] ins v26.d[1], v30.d[0] ins v27.d[1], v31.d[0] - srshr v16.8h, v24.8h, #1 - srshr v17.8h, v25.8h, #1 - srshr v18.8h, v26.8h, #1 - srshr v19.8h, v27.8h, #1 -.endif + srshr v20.8h, v24.8h, #1 + srshr v21.8h, v25.8h, #1 + srshr v22.8h, v26.8h, #1 + srshr v23.8h, v27.8h, #1 + +L(itx_16x4_epilog): transpose_4x8h v16, v17, v18, v19, v2, v3, v4, v5 blr x5 + mov x6, x0 + load_add_store_8x4 x6, x7 + + transpose_4x8h_mov v20, v21, v22, v23, v2, v3, v4, v5, v16, v17, v18, v19 + blr x5 add x6, x0, #8 load_add_store_8x4 x6, x7 ret x15 +.endif endfunc function inv_txfm_\variant\()add_4x16_neon @@ -1605,12 +1610,14 @@ function inv_txfm_\variant\()add_4x16_neon mov w16, #(5793-4096)*8 dup v0.4h, w16 identity_8x4_shift1 v16, v17, v18, v19, v0.h[0] + + b L(itx_4x16_epilog) .else blr x4 .irp i, v16.8h, v17.8h, v18.8h, v19.8h srshr \i, \i, #1 .endr -.endif +L(itx_4x16_epilog): transpose_4x8h v16, v17, v18, v19, v4, v5, v6, v7 ins v20.d[0], v16.d[1] ins v21.d[0], v17.d[1] @@ -1622,11 +1629,12 @@ function inv_txfm_\variant\()add_4x16_neon load_add_store_4x16 x0, x6 ret x15 +.endif endfunc .endm -def_fn_416_base def_fn_416_base identity_ +def_fn_416_base .macro def_fn_416 w, h, txfm1, txfm2, eob_half function inv_txfm_add_\txfm1\()_\txfm2\()_\w\()x\h\()_8bpc_neon, export=1 @@ -1634,11 +1642,15 @@ function inv_txfm_add_\txfm1\()_\txfm2\()_\w\()x\h\()_8bpc_neon, export=1 idct_dc \w, \h, 1 .endif .if \w == 4 +.ifnc \txfm1, identity adr x4, inv_\txfm1\()_8h_x\w\()_neon +.endif adr x5, inv_\txfm2\()_4h_x\h\()_neon mov w13, #\eob_half .else +.ifnc \txfm1, identity adr x4, inv_\txfm1\()_4h_x\w\()_neon +.endif adr x5, inv_\txfm2\()_8h_x\h\()_neon .endif .ifc \txfm1, identity @@ -1690,13 +1702,16 @@ function inv_txfm_\variant\()add_16x8_neon mov w16, #2*(5793-4096)*8 dup v0.4h, w16 identity_8x16_shift1 v0.h[0] + + b L(itx_16x8_epilog) .else blr x4 -.irp i, v16.8h, v17.8h, v18.8h, v19.8h, v20.8h, v21.8h, v22.8h, v23.8h +.irp i, v16.8h, v17.8h, v18.8h, v19.8h, v20.8h, v21.8h, v22.8h, v23.8h, v24.8h, v25.8h, v26.8h, v27.8h, v28.8h, v29.8h, v30.8h, v31.8h srshr \i, \i, #1 .endr -.endif + +L(itx_16x8_epilog): transpose_8x8h v16, v17, v18, v19, v20, v21, v22, v23, v2, v3 blr x5 @@ -1704,27 +1719,7 @@ function inv_txfm_\variant\()add_16x8_neon mov x6, x0 load_add_store_8x8 x6, x7 -.ifc \variant, identity_ - mov v16.16b, v24.16b - mov v17.16b, v25.16b - mov v18.16b, v26.16b - mov v19.16b, v27.16b - mov v20.16b, v28.16b - mov v21.16b, v29.16b - mov v22.16b, v30.16b - mov v23.16b, v31.16b -.else - srshr v16.8h, v24.8h, #1 - srshr v17.8h, v25.8h, #1 - srshr v18.8h, v26.8h, #1 - srshr v19.8h, v27.8h, #1 - srshr v20.8h, v28.8h, #1 - srshr v21.8h, v29.8h, #1 - srshr v22.8h, v30.8h, #1 - srshr v23.8h, v31.8h, #1 -.endif - - transpose_8x8h v16, v17, v18, v19, v20, v21, v22, v23, v2, v3 + transpose_8x8h_mov v24, v25, v26, v27, v28, v29, v30, v31, v2, v3, v16, v17, v18, v19, v20, v21, v22, v23 blr x5 @@ -1732,6 +1727,7 @@ function inv_txfm_\variant\()add_16x8_neon load_add_store_8x8 x0, x7 ret x15 +.endif endfunc function inv_txfm_\variant\()add_8x16_neon @@ -1790,14 +1786,16 @@ function inv_txfm_\variant\()add_8x16_neon scale_input .8h, v0.h[0], v16, v17, v18, v19, v20, v21, v22, v23 .ifc \variant, identity_ // The identity shl #1 and downshift srshr #1 cancel out + + b L(itx_8x16_epilog) .else blr x4 .irp i, v16.8h, v17.8h, v18.8h, v19.8h, v20.8h, v21.8h, v22.8h, v23.8h srshr \i, \i, #1 .endr -.endif +L(itx_8x16_epilog): transpose_8x8h v16, v17, v18, v19, v20, v21, v22, v23, v2, v3 blr x5 @@ -1805,18 +1803,21 @@ function inv_txfm_\variant\()add_8x16_neon load_add_store_8x16 x0, x6 ret x15 +.endif endfunc .endm -def_fn_816_base def_fn_816_base identity_ +def_fn_816_base .macro def_fn_816 w, h, txfm1, txfm2, eob_half function inv_txfm_add_\txfm1\()_\txfm2\()_\w\()x\h\()_8bpc_neon, export=1 .ifc \txfm1\()_\txfm2, dct_dct idct_dc \w, \h, 1 .endif +.ifnc \txfm1, identity adr x4, inv_\txfm1\()_8h_x\w\()_neon +.endif adr x5, inv_\txfm2\()_8h_x\h\()_neon .if \w == 8 mov x13, #\eob_half -- cgit v1.2.3