From fbaf0bb26397aa498eb9156f06d5a6fe34dd7dd8 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 19 Apr 2024 03:14:29 +0200 Subject: Merging upstream version 125.0.1. Signed-off-by: Daniel Baumann --- media/libpng/mips/filter_mmi_inline_assembly.c | 525 +++++++++++++++++++++++++ media/libpng/mips/filter_msa_intrinsics.c | 14 +- media/libpng/mips/mips_init.c | 86 +++- 3 files changed, 612 insertions(+), 13 deletions(-) create mode 100644 media/libpng/mips/filter_mmi_inline_assembly.c (limited to 'media/libpng/mips') diff --git a/media/libpng/mips/filter_mmi_inline_assembly.c b/media/libpng/mips/filter_mmi_inline_assembly.c new file mode 100644 index 0000000000..b330a46538 --- /dev/null +++ b/media/libpng/mips/filter_mmi_inline_assembly.c @@ -0,0 +1,525 @@ +/* filter_mmi_intrinsics.c - MMI optimized filter functions + * + * Copyright (c) 2024 Cosmin Truta + * Written by zhanglixia and guxiwei, 2023 + * + * This code is released under the libpng license. + * For conditions of distribution and use, see the disclaimer + * and license in png.h + */ + +#include "../pngpriv.h" + +#ifdef PNG_READ_SUPPORTED + +#if PNG_MIPS_MMI_IMPLEMENTATION == 2 /* Inline Assembly */ + +/* Functions in this file look at most 3 pixels (a,b,c) to predict the 4th (d). + * They're positioned like this: + * prev: c b + * row: a d + * The Sub filter predicts d=a, Avg d=(a+b)/2, and Paeth predicts d to be + * whichever of a, b, or c is closest to p=a+b-c. + */ + +void png_read_filter_row_up_mmi(png_row_infop row_info, png_bytep row, + png_const_bytep prev_row) +{ + int istop = row_info->rowbytes; + double rp,pp; + __asm__ volatile ( + "1: \n\t" + "ldc1 %[rp], 0x00(%[row]) \n\t" + "ldc1 %[pp], 0x00(%[prev_row]) \n\t" + "paddb %[rp], %[rp], %[pp] \n\t" + "sdc1 %[rp], 0x00(%[row]) \n\t" + + "daddiu %[row], %[row], 0x08 \n\t" + "daddiu %[prev_row], %[prev_row], 0x08 \n\t" + "daddiu %[istop], %[istop], -0x08 \n\t" + "bgtz %[istop], 1b \n\t" + : [rp]"=&f"(rp), [pp]"=&f"(pp) + : [row]"r"(row), [prev_row]"r"(prev_row), + [istop]"r"(istop) + : "memory" + ); +} + +void png_read_filter_row_sub3_mmi(png_row_infop row_info, png_bytep row, + png_const_bytep prev) +{ + int istop = row_info->rowbytes; + double rp, pp, dest; + double eight, sixteen, twenty_four, forty_eight; + double tmp0; + double ftmp[2]; + + __asm__ volatile ( + "li %[tmp0], 0x08 \n\t" + "dmtc1 %[tmp0], %[eight] \n\t" + "li %[tmp0], 0x10 \n\t" + "dmtc1 %[tmp0], %[sixteen] \n\t" + "li %[tmp0], 0x18 \n\t" + "dmtc1 %[tmp0], %[twenty_four] \n\t" + "li %[tmp0], 0x30 \n\t" + "dmtc1 %[tmp0], %[forty_eight] \n\t" + "xor %[dest], %[dest], %[dest] \n\t" + + "1: \n\t" + "gsldrc1 %[rp], 0x00(%[row]) \n\t" + "gsldlc1 %[rp], 0x07(%[row]) \n\t" + "gsldrc1 %[pp], 0x08(%[row]) \n\t" + "gsldlc1 %[pp], 0x0f(%[row]) \n\t" + + "paddb %[ftmp0], %[dest], %[rp] \n\t" + "swc1 %[ftmp0], 0x00(%[row]) \n\t" + + "dsrl %[ftmp1], %[rp], %[twenty_four] \n\t" + "paddb %[dest], %[ftmp1], %[ftmp0] \n\t" + "gsswrc1 %[dest], 0x03(%[row]) \n\t" + "gsswlc1 %[dest], 0x06(%[row]) \n\t" + + "dsrl %[ftmp0], %[rp], %[forty_eight] \n\t" + "dsll %[ftmp1], %[pp], %[sixteen] \n\t" + "or %[ftmp0], %[ftmp0], %[ftmp1] \n\t" + "paddb %[dest], %[dest], %[ftmp0] \n\t" + "gsswrc1 %[dest], 0x06(%[row]) \n\t" + "gsswlc1 %[dest], 0x09(%[row]) \n\t" + + "dsrl %[ftmp0], %[pp], %[eight] \n\t" + "paddb %[dest], %[dest], %[ftmp0] \n\t" + "gsswrc1 %[dest], 0x09(%[row]) \n\t" + "daddiu %[row], %[row], 0x0c \n\t" + "daddiu %[istop], %[istop], -0x0c \n\t" + "bgtz %[istop], 1b \n\t" + : [rp]"=&f"(rp), [pp]"=&f"(pp), [dest]"=&f"(dest), + [tmp0]"=&r"(tmp0), [ftmp0]"=&f"(ftmp[0]), + [ftmp1]"=&f"(ftmp[1]), [eight]"=&f"(eight), + [sixteen]"=&f"(sixteen), [twenty_four]"=&f"(twenty_four), + [forty_eight]"=&f"(forty_eight) + : [row]"r"(row), [istop]"r"(istop) + : "memory" + ); + + PNG_UNUSED(prev) +} + +void png_read_filter_row_sub4_mmi(png_row_infop row_info, png_bytep row, + png_const_bytep prev) +{ + /* The Sub filter predicts each pixel as the previous pixel, a. + * There is no pixel to the left of the first pixel. It's encoded directly. + * That works with our main loop if we just say that left pixel was zero. + */ + int istop = row_info->rowbytes; + double rp,pp; + + __asm__ volatile ( + "1: \n\t" + "lwc1 %[pp], 0x00(%[row]) \n\t" + "lwc1 %[rp], 0x04(%[row]) \n\t" + "paddb %[rp], %[rp], %[pp] \n\t" + "swc1 %[rp], 0x04(%[row]) \n\t" + + "daddiu %[row], %[row], 0x04 \n\t" + "daddiu %[istop], %[istop], -0x04 \n\t" + "bgtz %[istop], 1b \n\t" + : [rp]"=&f"(rp), [pp]"=&f"(pp) + : [row]"r"(row), [istop]"r"(istop) + : "memory" + ); + + PNG_UNUSED(prev) +} + +void png_read_filter_row_avg3_mmi(png_row_infop row_info, png_bytep row, + png_const_bytep prev) +{ + int istop = row_info->rowbytes; + double rp, pp, rp1, pp1; + double tmp0; + double ftmp[3]; + double one, dest; + double eight, sixteen, twenty_four, forty_eight; + + __asm__ volatile ( + "li %[tmp0], 0x08 \n\t" + "dmtc1 %[tmp0], %[eight] \n\t" + "li %[tmp0], 0x10 \n\t" + "dmtc1 %[tmp0], %[sixteen] \n\t" + "li %[tmp0], 0x18 \n\t" + "dmtc1 %[tmp0], %[twenty_four] \n\t" + "li %[tmp0], 0x30 \n\t" + "dmtc1 %[tmp0], %[forty_eight] \n\t" + "xor %[dest], %[dest], %[dest] \n\t" + + "li %[tmp0], 0x01 \n\t" + "ins %[tmp0], %[tmp0], 8, 8 \n\t" + "dmtc1 %[tmp0], %[one] \n\t" + "pshufh %[one], %[one], %[dest] \n\t" + + "1: \n\t" + "gsldrc1 %[rp], 0x00(%[row]) \n\t" + "gsldlc1 %[rp], 0x07(%[row]) \n\t" + "gsldrc1 %[pp], 0x00(%[prev]) \n\t" + "gsldlc1 %[pp], 0x07(%[prev]) \n\t" + "gsldrc1 %[rp1], 0x08(%[row]) \n\t" + "gsldlc1 %[rp1], 0x0f(%[row]) \n\t" + "gsldrc1 %[pp1], 0x08(%[prev]) \n\t" + "gsldlc1 %[pp1], 0x0f(%[prev]) \n\t" + + "xor %[ftmp0], %[pp], %[dest] \n\t" + "pavgb %[ftmp1], %[pp], %[dest] \n\t" + "and %[ftmp0], %[ftmp0], %[one] \n\t" + "psubb %[ftmp1], %[ftmp1], %[ftmp0] \n\t" + "paddb %[dest], %[rp], %[ftmp1] \n\t" + "swc1 %[dest], 0x00(%[row]) \n\t" + + "dsrl %[ftmp0], %[rp], %[twenty_four] \n\t" + "dsrl %[ftmp1], %[pp], %[twenty_four] \n\t" + + "xor %[ftmp2], %[ftmp1], %[dest] \n\t" + "pavgb %[ftmp1], %[ftmp1], %[dest] \n\t" + "and %[ftmp2], %[ftmp2], %[one] \n\t" + "psubb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" + "paddb %[dest], %[ftmp0], %[ftmp1] \n\t" + "gsswrc1 %[dest], 0x03(%[row]) \n\t" + "gsswlc1 %[dest], 0x06(%[row]) \n\t" + + "dsrl %[ftmp0], %[rp], %[forty_eight] \n\t" + "dsll %[ftmp1], %[rp1], %[sixteen] \n\t" + "or %[ftmp0], %[ftmp0], %[ftmp1] \n\t" + "dsrl %[ftmp2], %[pp], %[forty_eight] \n\t" + "dsll %[ftmp1], %[pp1], %[sixteen] \n\t" + "or %[ftmp1], %[ftmp2], %[ftmp1] \n\t" + + "xor %[ftmp2], %[ftmp1], %[dest] \n\t" + "pavgb %[ftmp1], %[ftmp1], %[dest] \n\t" + "and %[ftmp2], %[ftmp2], %[one] \n\t" + "psubb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" + "paddb %[dest], %[ftmp0], %[ftmp1] \n\t" + "gsswrc1 %[dest], 0x06(%[row]) \n\t" + "gsswlc1 %[dest], 0x09(%[row]) \n\t" + + "dsrl %[ftmp0], %[rp1], %[eight] \n\t" + "dsrl %[ftmp1], %[pp1], %[eight] \n\t" + + "xor %[ftmp2], %[ftmp1], %[dest] \n\t" + "pavgb %[ftmp1], %[ftmp1], %[dest] \n\t" + "and %[ftmp2], %[ftmp2], %[one] \n\t" + "psubb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" + "paddb %[dest], %[ftmp0], %[ftmp1] \n\t" + "gsswrc1 %[dest], 0x09(%[row]) \n\t" + "daddiu %[row], %[row], 0x0c \n\t" + "daddiu %[prev], %[prev], 0x0c \n\t" + "daddiu %[istop], %[istop], -0x0c \n\t" + "bgtz %[istop], 1b \n\t" + : [rp]"=&f"(rp), [pp]"=&f"(pp), [rp1]"=&f"(rp1), + [pp1]"=&f"(pp1), [tmp0]"=&r"(tmp0), [ftmp0]"=&f"(ftmp[0]), + [ftmp1]"=&f"(ftmp[1]), [ftmp2]"=&f"(ftmp[2]), [one]"=&f"(one), + [dest]"=&f"(dest), [eight]"=&f"(eight), [sixteen]"=&f"(sixteen), + [twenty_four]"=&f"(twenty_four), [forty_eight]"=&f"(forty_eight) + : [row]"r"(row), [prev]"r"(prev), [istop]"r"(istop) + : "memory" + ); +} + +void png_read_filter_row_avg4_mmi(png_row_infop row_info, png_bytep row, + png_const_bytep prev) +{ + int istop = row_info->rowbytes; + double rp,pp; + double dest; + double ftmp[2]; + double tmp; + + __asm__ volatile ( + "xor %[dest], %[dest], %[dest] \n\t" + "li %[tmp], 0x01 \n\t" + "ins %[tmp], %[tmp], 8, 8 \n\t" + "dmtc1 %[tmp], %[ftmp1] \n\t" + "pshufh %[ftmp1], %[ftmp1], %[dest] \n\t" + + "1: \n\t" + "lwc1 %[rp], 0x00(%[row]) \n\t" + "lwc1 %[pp], 0x00(%[prev]) \n\t" + "xor %[ftmp0], %[pp], %[dest] \n\t" + "pavgb %[pp], %[pp], %[dest] \n\t" + "and %[ftmp0], %[ftmp0], %[ftmp1] \n\t" + "psubb %[pp], %[pp], %[ftmp0] \n\t" + "paddb %[dest], %[rp], %[pp] \n\t" + "swc1 %[dest], 0x00(%[row]) \n\t" + "daddiu %[row], %[row], 0x04 \n\t" + "daddiu %[prev], %[prev], 0x04 \n\t" + "daddiu %[istop], %[istop], -0x04 \n\t" + "bgtz %[istop], 1b \n\t" + : [rp]"=&f"(rp), [pp]"=&f"(pp), [ftmp0]"=&f"(ftmp[0]), + [ftmp1]"=&f"(ftmp[1]), [dest]"=&f"(dest), [tmp]"=&r"(tmp) + : [row]"r"(row), [prev]"r"(prev), [istop]"r"(istop) + : "memory" + ); +} + +void png_read_filter_row_paeth3_mmi(png_row_infop row_info, png_bytep row, + png_const_bytep prev) +{ + /* Paeth tries to predict pixel d using the pixel to the left of it, a, + * and two pixels from the previous row, b and c: + * prev: c b + * row: a d + * The Paeth function predicts d to be whichever of a, b, or c is nearest to + * p=a+b-c. + * + * The first pixel has no left context, and so uses an Up filter, p = b. + * This works naturally with our main loop's p = a+b-c if we force a and c + * to zero. + * Here we zero b and d, which become c and a respectively at the start of + * the loop. + */ + int istop = row_info->rowbytes; + double rp, pp, rp1, pp1, zero; + double a, b, c, d, pa, pb, pc; + double tmp0; + double ftmp[3]; + double eight, sixteen, twenty_four, forty_eight; + + __asm__ volatile ( + "xor %[a], %[a], %[a] \n\t" + "xor %[c], %[c], %[c] \n\t" + "xor %[zero], %[zero], %[zero] \n\t" + "li %[tmp0], 0x08 \n\t" + "dmtc1 %[tmp0], %[eight] \n\t" + "li %[tmp0], 0x10 \n\t" + "dmtc1 %[tmp0], %[sixteen] \n\t" + "li %[tmp0], 0x18 \n\t" + "dmtc1 %[tmp0], %[twenty_four] \n\t" + "li %[tmp0], 0x30 \n\t" + "dmtc1 %[tmp0], %[forty_eight] \n\t" + + "1: \n\t" + "gsldrc1 %[rp], 0x00(%[row]) \n\t" + "gsldlc1 %[rp], 0x07(%[row]) \n\t" + "gsldrc1 %[pp], 0x00(%[prev]) \n\t" + "gsldlc1 %[pp], 0x07(%[prev]) \n\t" + "gsldrc1 %[rp1], 0x08(%[row]) \n\t" + "gsldlc1 %[rp1], 0x0f(%[row]) \n\t" + "gsldrc1 %[pp1], 0x08(%[prev]) \n\t" + "gsldlc1 %[pp1], 0x0f(%[prev]) \n\t" + + "punpcklbh %[b], %[pp], %[zero] \n\t" + "punpcklbh %[d], %[rp], %[zero] \n\t" + "packushb %[ftmp0], %[c], %[c] \n\t" + "packushb %[ftmp1], %[a], %[a] \n\t" + "pasubub %[pa], %[pp], %[ftmp0] \n\t" + "pasubub %[pb], %[ftmp1], %[ftmp0] \n\t" + "psubh %[ftmp0], %[b], %[c] \n\t" + "psubh %[ftmp1], %[a], %[c] \n\t" + "paddh %[pc], %[ftmp0], %[ftmp1] \n\t" + "pcmpgth %[ftmp0], %[zero], %[pc] \n\t" + "xor %[pc], %[pc], %[ftmp0] \n\t" + "psubh %[pc], %[pc], %[ftmp0] \n\t" + "punpcklbh %[pa], %[pa], %[zero] \n\t" + "punpcklbh %[pb], %[pb], %[zero] \n\t" + "pcmpgth %[ftmp0], %[pa], %[pb] \n\t" + "and %[ftmp1], %[b], %[ftmp0] \n\t" + "pandn %[a], %[ftmp0], %[a] \n\t" + "or %[a], %[a], %[ftmp1] \n\t" + "pminsh %[pa], %[pa], %[pb] \n\t" + "pcmpgth %[ftmp0], %[pa], %[pc] \n\t" + "and %[ftmp1], %[c], %[ftmp0] \n\t" + "pandn %[a], %[ftmp0], %[a] \n\t" + "or %[a], %[a], %[ftmp1] \n\t" + "paddb %[a], %[a], %[d] \n\t" + "packushb %[d], %[a], %[a] \n\t" + "punpcklbh %[c], %[pp], %[zero] \n\t" + "swc1 %[d], 0x00(%[row]) \n\t" + + "dsrl %[ftmp0], %[rp], %[twenty_four] \n\t" + "dsrl %[ftmp2], %[pp], %[twenty_four] \n\t" + + "punpcklbh %[b], %[ftmp2], %[zero] \n\t" + "punpcklbh %[d], %[ftmp0], %[zero] \n\t" + "packushb %[ftmp0], %[c], %[c] \n\t" + "packushb %[ftmp1], %[a], %[a] \n\t" + "pasubub %[pa], %[ftmp2], %[ftmp0] \n\t" + "pasubub %[pb], %[ftmp1], %[ftmp0] \n\t" + "psubh %[ftmp0], %[b], %[c] \n\t" + "psubh %[ftmp1], %[a], %[c] \n\t" + "paddh %[pc], %[ftmp0], %[ftmp1] \n\t" + "pcmpgth %[ftmp0], %[zero], %[pc] \n\t" + "xor %[pc], %[pc], %[ftmp0] \n\t" + "psubh %[pc], %[pc], %[ftmp0] \n\t" + "punpcklbh %[pa], %[pa], %[zero] \n\t" + "punpcklbh %[pb], %[pb], %[zero] \n\t" + "pcmpgth %[ftmp0], %[pa], %[pb] \n\t" + "and %[ftmp1], %[b], %[ftmp0] \n\t" + "pandn %[a], %[ftmp0], %[a] \n\t" + "or %[a], %[a], %[ftmp1] \n\t" + "pminsh %[pa], %[pa], %[pb] \n\t" + "pcmpgth %[ftmp0], %[pa], %[pc] \n\t" + "and %[ftmp1], %[c], %[ftmp0] \n\t" + "pandn %[a], %[ftmp0], %[a] \n\t" + "or %[a], %[a], %[ftmp1] \n\t" + "paddb %[a], %[a], %[d] \n\t" + "packushb %[d], %[a], %[a] \n\t" + "punpcklbh %[c], %[ftmp2], %[zero] \n\t" + "gsswrc1 %[d], 0x03(%[row]) \n\t" + "gsswlc1 %[d], 0x06(%[row]) \n\t" + + "dsrl %[ftmp0], %[rp], %[forty_eight] \n\t" + "dsll %[ftmp1], %[rp1], %[sixteen] \n\t" + "or %[ftmp0], %[ftmp0], %[ftmp1] \n\t" + "dsrl %[ftmp2], %[pp], %[forty_eight] \n\t" + "dsll %[ftmp1], %[pp1], %[sixteen] \n\t" + "or %[ftmp2], %[ftmp2], %[ftmp1] \n\t" + + "punpcklbh %[b], %[ftmp2], %[zero] \n\t" + "punpcklbh %[d], %[ftmp0], %[zero] \n\t" + "packushb %[ftmp0], %[c], %[c] \n\t" + "packushb %[ftmp1], %[a], %[a] \n\t" + "pasubub %[pa], %[ftmp2], %[ftmp0] \n\t" + "pasubub %[pb], %[ftmp1], %[ftmp0] \n\t" + "psubh %[ftmp0], %[b], %[c] \n\t" + "psubh %[ftmp1], %[a], %[c] \n\t" + "paddh %[pc], %[ftmp0], %[ftmp1] \n\t" + "pcmpgth %[ftmp0], %[zero], %[pc] \n\t" + "xor %[pc], %[pc], %[ftmp0] \n\t" + "psubh %[pc], %[pc], %[ftmp0] \n\t" + "punpcklbh %[pa], %[pa], %[zero] \n\t" + "punpcklbh %[pb], %[pb], %[zero] \n\t" + "pcmpgth %[ftmp0], %[pa], %[pb] \n\t" + "and %[ftmp1], %[b], %[ftmp0] \n\t" + "pandn %[a], %[ftmp0], %[a] \n\t" + "or %[a], %[a], %[ftmp1] \n\t" + "pminsh %[pa], %[pa], %[pb] \n\t" + "pcmpgth %[ftmp0], %[pa], %[pc] \n\t" + "and %[ftmp1], %[c], %[ftmp0] \n\t" + "pandn %[a], %[ftmp0], %[a] \n\t" + "or %[a], %[a], %[ftmp1] \n\t" + "paddb %[a], %[a], %[d] \n\t" + "packushb %[d], %[a], %[a] \n\t" + "punpcklbh %[c], %[ftmp2], %[zero] \n\t" + "gsswrc1 %[d], 0x06(%[row]) \n\t" + "gsswlc1 %[d], 0x09(%[row]) \n\t" + + "dsrl %[ftmp0], %[rp1], %[eight] \n\t" + "dsrl %[ftmp2], %[pp1], %[eight] \n\t" + + "punpcklbh %[b], %[ftmp2], %[zero] \n\t" + "punpcklbh %[d], %[ftmp0], %[zero] \n\t" + "packushb %[ftmp0], %[c], %[c] \n\t" + "packushb %[ftmp1], %[a], %[a] \n\t" + "pasubub %[pa], %[ftmp2], %[ftmp0] \n\t" + "pasubub %[pb], %[ftmp1], %[ftmp0] \n\t" + "psubh %[ftmp0], %[b], %[c] \n\t" + "psubh %[ftmp1], %[a], %[c] \n\t" + "paddh %[pc], %[ftmp0], %[ftmp1] \n\t" + "pcmpgth %[ftmp0], %[zero], %[pc] \n\t" + "xor %[pc], %[pc], %[ftmp0] \n\t" + "psubh %[pc], %[pc], %[ftmp0] \n\t" + "punpcklbh %[pa], %[pa], %[zero] \n\t" + "punpcklbh %[pb], %[pb], %[zero] \n\t" + "pcmpgth %[ftmp0], %[pa], %[pb] \n\t" + "and %[ftmp1], %[b], %[ftmp0] \n\t" + "pandn %[a], %[ftmp0], %[a] \n\t" + "or %[a], %[a], %[ftmp1] \n\t" + "pminsh %[pa], %[pa], %[pb] \n\t" + "pcmpgth %[ftmp0], %[pa], %[pc] \n\t" + "and %[ftmp1], %[c], %[ftmp0] \n\t" + "pandn %[a], %[ftmp0], %[a] \n\t" + "or %[a], %[a], %[ftmp1] \n\t" + "paddb %[a], %[a], %[d] \n\t" + "packushb %[d], %[a], %[a] \n\t" + "punpcklbh %[c], %[ftmp2], %[zero] \n\t" + "gsswrc1 %[d], 0x09(%[row]) \n\t" + + "daddiu %[row], %[row], 0x0c \n\t" + "daddiu %[prev], %[prev], 0x0c \n\t" + "daddiu %[istop], %[istop], -0x0c \n\t" + "bgtz %[istop], 1b \n\t" + : [rp]"=&f"(rp), [pp]"=&f"(pp), [rp1]"=&f"(rp1), [pp1]"=&f"(pp1), + [zero]"=&f"(zero), [a]"=&f"(a),[b]"=&f"(b), [c]"=&f"(c), + [d]"=&f"(d), [pa]"=&f"(pa), [pb]"=&f"(pb), [pc]"=&f"(pc), + [tmp0]"=&r"(tmp0), [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), + [ftmp2]"=&f"(ftmp[2]), [eight]"=&f"(eight), [sixteen]"=&f"(sixteen), + [twenty_four]"=&f"(twenty_four), [forty_eight]"=&f"(forty_eight) + : [row]"r"(row), [prev]"r"(prev), [istop]"r"(istop) + : "memory" + ); +} + +void png_read_filter_row_paeth4_mmi(png_row_infop row_info, png_bytep row, + png_const_bytep prev) +{ + /* Paeth tries to predict pixel d using the pixel to the left of it, a, + * and two pixels from the previous row, b and c: + * prev: c b + * row: a d + * The Paeth function predicts d to be whichever of a, b, or c is nearest to + * p=a+b-c. + * + * The first pixel has no left context, and so uses an Up filter, p = b. + * This works naturally with our main loop's p = a+b-c if we force a and c + * to zero. + * Here we zero b and d, which become c and a respectively at the start of + * the loop. + */ + int istop = row_info->rowbytes; + double rp, pp, zero; + double a, b, c, d, pa, pb, pc; + double ftmp[2]; + + __asm__ volatile ( + "xor %[a], %[a], %[a] \n\t" + "xor %[c], %[c], %[c] \n\t" + "xor %[zero], %[zero], %[zero] \n\t" + + "1: \n\t" + "lwc1 %[rp], 0x00(%[row]) \n\t" + "lwc1 %[pp], 0x00(%[prev]) \n\t" + "punpcklbh %[b], %[pp], %[zero] \n\t" + "punpcklbh %[d], %[rp], %[zero] \n\t" + + "packushb %[ftmp0], %[c], %[c] \n\t" + "packushb %[ftmp1], %[a], %[a] \n\t" + "pasubub %[pa], %[pp], %[ftmp0] \n\t" + "pasubub %[pb], %[ftmp1], %[ftmp0] \n\t" + "psubh %[ftmp0], %[b], %[c] \n\t" + "psubh %[ftmp1], %[a], %[c] \n\t" + "paddh %[pc], %[ftmp0], %[ftmp1] \n\t" + "pcmpgth %[ftmp0], %[zero], %[pc] \n\t" + "xor %[pc], %[pc], %[ftmp0] \n\t" + "psubh %[pc], %[pc], %[ftmp0] \n\t" + + "punpcklbh %[pa], %[pa], %[zero] \n\t" + "punpcklbh %[pb], %[pb], %[zero] \n\t" + + "pcmpgth %[ftmp0], %[pa], %[pb] \n\t" + "and %[ftmp1], %[b], %[ftmp0] \n\t" + "pandn %[a], %[ftmp0], %[a] \n\t" + "or %[a], %[a], %[ftmp1] \n\t" + "pminsh %[pa], %[pa], %[pb] \n\t" + + "pcmpgth %[ftmp0], %[pa], %[pc] \n\t" + "and %[ftmp1], %[c], %[ftmp0] \n\t" + "pandn %[a], %[ftmp0], %[a] \n\t" + "or %[a], %[a], %[ftmp1] \n\t" + "paddb %[a], %[a], %[d] \n\t" + "packushb %[d], %[a], %[a] \n\t" + "swc1 %[d], 0x00(%[row]) \n\t" + "punpcklbh %[c], %[pp], %[zero] \n\t" + "daddiu %[row], %[row], 0x04 \n\t" + "daddiu %[prev], %[prev], 0x04 \n\t" + "daddiu %[istop], %[istop], -0x04 \n\t" + "bgtz %[istop], 1b \n\t" + : [rp]"=&f"(rp), [pp]"=&f"(pp), [zero]"=&f"(zero), + [a]"=&f"(a), [b]"=&f"(b), [c]"=&f"(c), [d]"=&f"(d), + [pa]"=&f"(pa), [pb]"=&f"(pb), [pc]"=&f"(pc), + [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]) + : [row]"r"(row), [prev]"r"(prev), [istop]"r"(istop) + : "memory" + ); +} + +#endif /* PNG_MIPS_MMI_IMPLEMENTATION > 0 */ +#endif /* READ */ diff --git a/media/libpng/mips/filter_msa_intrinsics.c b/media/libpng/mips/filter_msa_intrinsics.c index a579179421..1b734f4d9a 100644 --- a/media/libpng/mips/filter_msa_intrinsics.c +++ b/media/libpng/mips/filter_msa_intrinsics.c @@ -1,9 +1,9 @@ /* filter_msa_intrinsics.c - MSA optimised filter functions * - * Copyright (c) 2018 Cosmin Truta + * Copyright (c) 2018-2024 Cosmin Truta * Copyright (c) 2016 Glenn Randers-Pehrson - * Written by Mandar Sahastrabuddhe, August 2016. + * Written by Mandar Sahastrabuddhe, August 2016 * * This code is released under the libpng license. * For conditions of distribution and use, see the disclaimer @@ -11,7 +11,6 @@ */ #include -#include #include "../pngpriv.h" #ifdef PNG_READ_SUPPORTED @@ -20,6 +19,7 @@ #if PNG_MIPS_MSA_IMPLEMENTATION == 1 /* intrinsics code from pngpriv.h */ #include +#include /* libpng row pointers are not necessarily aligned to any particular boundary, * however this code will only work with appropriate alignment. mips/mips_init.c @@ -379,8 +379,8 @@ void png_read_filter_row_up_msa(png_row_infop row_info, png_bytep row, LD_UB4(pp, 16, src4, src5, src6, src7); pp += 64; - ADD4(src0, src4, src1, src5, src2, src6, src3, src7, - src0, src1, src2, src3); + ADD4(src0, src4, src1, src5, src2, src6, src3, src7, + src0, src1, src2, src3); ST_UB4(src0, src1, src2, src3, rp, 16); rp += 64; @@ -400,7 +400,7 @@ void png_read_filter_row_up_msa(png_row_infop row_info, png_bytep row, LD_UB4(pp, 16, src4, src5, src6, src7); ADD4(src0, src4, src1, src5, src2, src6, src3, src7, - src0, src1, src2, src3); + src0, src1, src2, src3); ST_UB4(src0, src1, src2, src3, rp, 16); rp += 64; @@ -425,7 +425,7 @@ void png_read_filter_row_up_msa(png_row_infop row_info, png_bytep row, LD_UB2(rp, 16, src0, src1); LD_UB2(pp, 16, src4, src5); - ADD2(src0, src4, src1, src5, src0, src1); + ADD2(src0, src4, src1, src5, src0, src1); ST_UB2(src0, src1, rp, 16); rp += 32; diff --git a/media/libpng/mips/mips_init.c b/media/libpng/mips/mips_init.c index 8dd283deef..5c6fa1dbf1 100644 --- a/media/libpng/mips/mips_init.c +++ b/media/libpng/mips/mips_init.c @@ -1,9 +1,10 @@ /* mips_init.c - MSA optimised filter functions * - * Copyright (c) 2018 Cosmin Truta + * Copyright (c) 2018-2024 Cosmin Truta * Copyright (c) 2016 Glenn Randers-Pehrson - * Written by Mandar Sahastrabuddhe, 2016. + * Written by Mandar Sahastrabuddhe, 2016 + * Updated by guxiwei, 2023 * * This code is released under the libpng license. * For conditions of distribution and use, see the disclaimer @@ -20,8 +21,9 @@ #ifdef PNG_READ_SUPPORTED -#if PNG_MIPS_MSA_OPT > 0 -#ifdef PNG_MIPS_MSA_CHECK_SUPPORTED /* Do run-time checks */ +#if PNG_MIPS_MSA_IMPLEMENTATION == 1 || PNG_MIPS_MMI_IMPLEMENTATION > 0 + +#ifdef PNG_MIPS_MSA_CHECK_SUPPORTED /* Do MIPS MSA run-time checks */ /* WARNING: it is strongly recommended that you do not build libpng with * run-time checks for CPU features if at all possible. In the case of the MIPS * MSA instructions there is no processor-specific way of detecting the @@ -51,13 +53,83 @@ static int png_have_msa(png_structp png_ptr); #endif /* PNG_MIPS_MSA_FILE */ #endif /* PNG_MIPS_MSA_CHECK_SUPPORTED */ +#ifdef PNG_MIPS_MMI_CHECK_SUPPORTED /* Do MIPS MMI run-times checks */ +#ifndef PNG_MIPS_MMI_FILE +# ifdef __linux__ +# define PNG_MIPS_MMI_FILE "contrib/mips-mmi/linux.c" +# endif +#endif + +#ifdef PNG_MIPS_MMI_FILE + +#include /* for sig_atomic_t */ +static int png_have_mmi(); +#include PNG_MIPS_MMI_FILE + +#else /* PNG_MIPS_MMI_FILE */ +# error "PNG_MIPS_MMI_FILE undefined: no support for run-time MIPS MMI checks" +#endif /* PNG_MIPS_MMI_FILE */ +#endif /* PNG_MIPS_MMI_CHECK_SUPPORTED*/ + #ifndef PNG_ALIGNED_MEMORY_SUPPORTED # error "ALIGNED_MEMORY is required; set: -DPNG_ALIGNED_MEMORY_SUPPORTED" #endif +/* MIPS supports two optimizations: MMI and MSA. The appropriate + * optimization is chosen at runtime + */ void -png_init_filter_functions_msa(png_structp pp, unsigned int bpp) +png_init_filter_functions_mips(png_structp pp, unsigned int bpp) { +#if PNG_MIPS_MMI_IMPLEMENTATION > 0 +#ifdef PNG_MIPS_MMI_API_SUPPORTED + switch ((pp->options >> PNG_MIPS_MMI) & 3) + { + case PNG_OPTION_UNSET: +#endif /* PNG_MIPS_MMI_API_SUPPORTED */ +#ifdef PNG_MIPS_MMI_CHECK_SUPPORTED + { + static volatile sig_atomic_t no_mmi = -1; /* not checked */ + + if (no_mmi < 0) + no_mmi = !png_have_mmi(); + + if (no_mmi) + goto MIPS_MSA_INIT; + } +#ifdef PNG_MIPS_MMI_API_SUPPORTED + break; +#endif +#endif /* PNG_MIPS_MMI_CHECK_SUPPORTED */ + +#ifdef PNG_MIPS_MMI_API_SUPPORTED + default: /* OFF or INVALID */ + goto MIPS_MSA_INIT; + + case PNG_OPTION_ON: + /* Option turned on */ + break; + } +#endif + pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up_mmi; + if (bpp == 3) + { + pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_mmi; + pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_mmi; + pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = + png_read_filter_row_paeth3_mmi; + } + else if (bpp == 4) + { + pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_mmi; + pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_mmi; + pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = + png_read_filter_row_paeth4_mmi; + } +#endif /* PNG_MIPS_MMI_IMPLEMENTATION > 0 */ + +MIPS_MSA_INIT: +#if PNG_MIPS_MSA_IMPLEMENTATION == 1 /* The switch statement is compiled in for MIPS_MSA_API, the call to * png_have_msa is compiled in for MIPS_MSA_CHECK. If both are defined * the check is only performed if the API has not set the MSA option on @@ -125,6 +197,8 @@ png_init_filter_functions_msa(png_structp pp, unsigned int bpp) pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_msa; pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = png_read_filter_row_paeth4_msa; } +#endif /* PNG_MIPS_MSA_IMPLEMENTATION == 1 */ + return; } -#endif /* PNG_MIPS_MSA_OPT > 0 */ +#endif /* PNG_MIPS_MSA_IMPLEMENTATION == 1 || PNG_MIPS_MMI_IMPLEMENTATION > 0 */ #endif /* READ */ -- cgit v1.2.3