diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-15 03:35:49 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-15 03:35:49 +0000 |
commit | d8bbc7858622b6d9c278469aab701ca0b609cddf (patch) | |
tree | eff41dc61d9f714852212739e6b3738b82a2af87 /third_party/dav1d/src/arm/64/msac.S | |
parent | Releasing progress-linux version 125.0.3-1~progress7.99u1. (diff) | |
download | firefox-d8bbc7858622b6d9c278469aab701ca0b609cddf.tar.xz firefox-d8bbc7858622b6d9c278469aab701ca0b609cddf.zip |
Merging upstream version 126.0.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r-- | third_party/dav1d/src/arm/64/msac.S | 167 |
1 files changed, 88 insertions, 79 deletions
diff --git a/third_party/dav1d/src/arm/64/msac.S b/third_party/dav1d/src/arm/64/msac.S index 3a6cf900a9..7bef9243fb 100644 --- a/third_party/dav1d/src/arm/64/msac.S +++ b/third_party/dav1d/src/arm/64/msac.S @@ -208,60 +208,66 @@ L(renorm): sub w4, w4, w3 // rng = u - v clz w5, w4 // clz(rng) eor w5, w5, #16 // d = clz(rng) ^ 16 - mvn x7, x7 // ~dif - add x7, x7, x3, lsl #48 // ~dif + (v << 48) + sub x7, x7, x3, lsl #48 // dif - (v << 48) L(renorm2): lsl w4, w4, w5 // rng << d subs w6, w6, w5 // cnt -= d - lsl x7, x7, x5 // (~dif + (v << 48)) << d + lsl x7, x7, x5 // (dif - (v << 48)) << d str w4, [x0, #RNG] - mvn x7, x7 // ~dif - b.hs 9f + b.hs 4f // refill ldp x3, x4, [x0] // BUF_POS, BUF_END add x5, x3, #8 - cmp x5, x4 - b.gt 2f - - ldr x3, [x3] // next_bits - add w8, w6, #23 // shift_bits = cnt + 23 - add w6, w6, #16 // cnt += 16 - rev x3, x3 // next_bits = bswap(next_bits) - sub x5, x5, x8, lsr #3 // buf_pos -= shift_bits >> 3 - and w8, w8, #24 // shift_bits &= 24 - lsr x3, x3, x8 // next_bits >>= shift_bits - sub w8, w8, w6 // shift_bits -= 16 + cnt - str x5, [x0, #BUF_POS] - lsl x3, x3, x8 // next_bits <<= shift_bits - mov w4, #48 - sub w6, w4, w8 // cnt = cnt + 64 - shift_bits - eor x7, x7, x3 // dif ^= next_bits - b 9f - -2: // refill_eob - mov w14, #40 - sub w5, w14, w6 // c = 40 - cnt -3: - cmp x3, x4 - b.ge 4f - ldrb w8, [x3], #1 - lsl x8, x8, x5 - eor x7, x7, x8 - subs w5, w5, #8 - b.ge 3b - -4: // refill_eob_end + subs x5, x5, x4 + b.hi 6f + + ldr x8, [x3] // next_bits + add w4, w6, #-48 // shift_bits = cnt + 16 (- 64) + mvn x8, x8 + neg w5, w4 + rev x8, x8 // next_bits = bswap(next_bits) + lsr w5, w5, #3 // num_bytes_read + lsr x8, x8, x4 // next_bits >>= (shift_bits & 63) + +2: // refill_end + add x3, x3, x5 + add w6, w6, w5, lsl #3 // cnt += num_bits_read str x3, [x0, #BUF_POS] - sub w6, w14, w5 // cnt = 40 - c -9: +3: // refill_end2 + orr x7, x7, x8 // dif |= next_bits + +4: // end str w6, [x0, #CNT] str x7, [x0, #DIF] mov w0, w15 add sp, sp, #48 ret + +5: // pad_with_ones + add w8, w6, #-16 + ror x8, x8, x8 + b 3b + +6: // refill_eob + cmp x3, x4 + b.hs 5b + + ldr x8, [x4, #-8] + lsl w5, w5, #3 + lsr x8, x8, x5 + add w5, w6, #-48 + mvn x8, x8 + sub w4, w4, w3 // num_bytes_left + rev x8, x8 + lsr x8, x8, x5 + neg w5, w5 + lsr w5, w5, #3 + cmp w5, w4 + csel w5, w5, w4, lo // num_bytes_read + b 2b endfunc function msac_decode_symbol_adapt8_neon, export=1 @@ -334,54 +340,37 @@ function msac_decode_hi_tok_neon, export=1 sub w4, w4, w3 // rng = u - v clz w5, w4 // clz(rng) eor w5, w5, #16 // d = clz(rng) ^ 16 - mvn x7, x7 // ~dif - add x7, x7, x3, lsl #48 // ~dif + (v << 48) + sub x7, x7, x3, lsl #48 // dif - (v << 48) lsl w4, w4, w5 // rng << d subs w6, w6, w5 // cnt -= d - lsl x7, x7, x5 // (~dif + (v << 48)) << d + lsl x7, x7, x5 // (dif - (v << 48)) << d str w4, [x0, #RNG] dup v3.4h, w4 - mvn x7, x7 // ~dif - b.hs 9f + b.hs 5f // refill ldp x3, x4, [x0] // BUF_POS, BUF_END add x5, x3, #8 - cmp x5, x4 - b.gt 2f - - ldr x3, [x3] // next_bits - add w8, w6, #23 // shift_bits = cnt + 23 - add w6, w6, #16 // cnt += 16 - rev x3, x3 // next_bits = bswap(next_bits) - sub x5, x5, x8, lsr #3 // buf_pos -= shift_bits >> 3 - and w8, w8, #24 // shift_bits &= 24 - lsr x3, x3, x8 // next_bits >>= shift_bits - sub w8, w8, w6 // shift_bits -= 16 + cnt - str x5, [x0, #BUF_POS] - lsl x3, x3, x8 // next_bits <<= shift_bits - mov w4, #48 - sub w6, w4, w8 // cnt = cnt + 64 - shift_bits - eor x7, x7, x3 // dif ^= next_bits - b 9f - -2: // refill_eob - mov w14, #40 - sub w5, w14, w6 // c = 40 - cnt -3: - cmp x3, x4 - b.ge 4f - ldrb w8, [x3], #1 - lsl x8, x8, x5 - eor x7, x7, x8 - subs w5, w5, #8 - b.ge 3b - -4: // refill_eob_end + subs x5, x5, x4 + b.hi 7f + + ldr x8, [x3] // next_bits + add w4, w6, #-48 // shift_bits = cnt + 16 (- 64) + mvn x8, x8 + neg w5, w4 + rev x8, x8 // next_bits = bswap(next_bits) + lsr w5, w5, #3 // num_bytes_read + lsr x8, x8, x4 // next_bits >>= (shift_bits & 63) + +3: // refill_end + add x3, x3, x5 + add w6, w6, w5, lsl #3 // cnt += num_bits_read str x3, [x0, #BUF_POS] - sub w6, w14, w5 // cnt = 40 - c -9: +4: // refill_end2 + orr x7, x7, x8 // dif |= next_bits + +5: // end lsl w15, w15, #1 sub w15, w15, #5 lsr x12, x7, #48 @@ -394,6 +383,29 @@ function msac_decode_hi_tok_neon, export=1 str x7, [x0, #DIF] lsr w0, w13, #1 ret + +6: // pad_with_ones + add w8, w6, #-16 + ror x8, x8, x8 + b 4b + +7: // refill_eob + cmp x3, x4 + b.hs 6b + + ldr x8, [x4, #-8] + lsl w5, w5, #3 + lsr x8, x8, x5 + add w5, w6, #-48 + mvn x8, x8 + sub w4, w4, w3 // num_bytes_left + rev x8, x8 + lsr x8, x8, x5 + neg w5, w5 + lsr w5, w5, #3 + cmp w5, w4 + csel w5, w5, w4, lo // num_bytes_read + b 3b endfunc function msac_decode_bool_equi_neon, export=1 @@ -410,7 +422,6 @@ function msac_decode_bool_equi_neon, export=1 csel x7, x8, x7, hs // if (ret) dif = dif - vw; clz w5, w4 // clz(rng) - mvn x7, x7 // ~dif eor w5, w5, #16 // d = clz(rng) ^ 16 b L(renorm2) endfunc @@ -431,7 +442,6 @@ function msac_decode_bool_neon, export=1 csel x7, x8, x7, hs // if (ret) dif = dif - vw; clz w5, w4 // clz(rng) - mvn x7, x7 // ~dif eor w5, w5, #16 // d = clz(rng) ^ 16 b L(renorm2) endfunc @@ -455,7 +465,6 @@ function msac_decode_bool_adapt_neon, export=1 ldr w10, [x0, #ALLOW_UPDATE_CDF] clz w5, w4 // clz(rng) - mvn x7, x7 // ~dif eor w5, w5, #16 // d = clz(rng) ^ 16 cbz w10, L(renorm2) |