summaryrefslogtreecommitdiffstats
path: root/media/libpng/mips
diff options
context:
space:
mode:
Diffstat (limited to 'media/libpng/mips')
-rw-r--r--media/libpng/mips/filter_mmi_inline_assembly.c525
-rw-r--r--media/libpng/mips/filter_msa_intrinsics.c14
-rw-r--r--media/libpng/mips/mips_init.c86
3 files changed, 612 insertions, 13 deletions
diff --git a/media/libpng/mips/filter_mmi_inline_assembly.c b/media/libpng/mips/filter_mmi_inline_assembly.c
new file mode 100644
index 0000000000..b330a46538
--- /dev/null
+++ b/media/libpng/mips/filter_mmi_inline_assembly.c
@@ -0,0 +1,525 @@
+/* filter_mmi_intrinsics.c - MMI optimized filter functions
+ *
+ * Copyright (c) 2024 Cosmin Truta
+ * Written by zhanglixia and guxiwei, 2023
+ *
+ * This code is released under the libpng license.
+ * For conditions of distribution and use, see the disclaimer
+ * and license in png.h
+ */
+
+#include "../pngpriv.h"
+
+#ifdef PNG_READ_SUPPORTED
+
+#if PNG_MIPS_MMI_IMPLEMENTATION == 2 /* Inline Assembly */
+
+/* Functions in this file look at most 3 pixels (a,b,c) to predict the 4th (d).
+ * They're positioned like this:
+ * prev: c b
+ * row: a d
+ * The Sub filter predicts d=a, Avg d=(a+b)/2, and Paeth predicts d to be
+ * whichever of a, b, or c is closest to p=a+b-c.
+ */
+
+void png_read_filter_row_up_mmi(png_row_infop row_info, png_bytep row,
+ png_const_bytep prev_row)
+{
+ int istop = row_info->rowbytes;
+ double rp,pp;
+ __asm__ volatile (
+ "1: \n\t"
+ "ldc1 %[rp], 0x00(%[row]) \n\t"
+ "ldc1 %[pp], 0x00(%[prev_row]) \n\t"
+ "paddb %[rp], %[rp], %[pp] \n\t"
+ "sdc1 %[rp], 0x00(%[row]) \n\t"
+
+ "daddiu %[row], %[row], 0x08 \n\t"
+ "daddiu %[prev_row], %[prev_row], 0x08 \n\t"
+ "daddiu %[istop], %[istop], -0x08 \n\t"
+ "bgtz %[istop], 1b \n\t"
+ : [rp]"=&f"(rp), [pp]"=&f"(pp)
+ : [row]"r"(row), [prev_row]"r"(prev_row),
+ [istop]"r"(istop)
+ : "memory"
+ );
+}
+
+void png_read_filter_row_sub3_mmi(png_row_infop row_info, png_bytep row,
+ png_const_bytep prev)
+{
+ int istop = row_info->rowbytes;
+ double rp, pp, dest;
+ double eight, sixteen, twenty_four, forty_eight;
+ double tmp0;
+ double ftmp[2];
+
+ __asm__ volatile (
+ "li %[tmp0], 0x08 \n\t"
+ "dmtc1 %[tmp0], %[eight] \n\t"
+ "li %[tmp0], 0x10 \n\t"
+ "dmtc1 %[tmp0], %[sixteen] \n\t"
+ "li %[tmp0], 0x18 \n\t"
+ "dmtc1 %[tmp0], %[twenty_four] \n\t"
+ "li %[tmp0], 0x30 \n\t"
+ "dmtc1 %[tmp0], %[forty_eight] \n\t"
+ "xor %[dest], %[dest], %[dest] \n\t"
+
+ "1: \n\t"
+ "gsldrc1 %[rp], 0x00(%[row]) \n\t"
+ "gsldlc1 %[rp], 0x07(%[row]) \n\t"
+ "gsldrc1 %[pp], 0x08(%[row]) \n\t"
+ "gsldlc1 %[pp], 0x0f(%[row]) \n\t"
+
+ "paddb %[ftmp0], %[dest], %[rp] \n\t"
+ "swc1 %[ftmp0], 0x00(%[row]) \n\t"
+
+ "dsrl %[ftmp1], %[rp], %[twenty_four] \n\t"
+ "paddb %[dest], %[ftmp1], %[ftmp0] \n\t"
+ "gsswrc1 %[dest], 0x03(%[row]) \n\t"
+ "gsswlc1 %[dest], 0x06(%[row]) \n\t"
+
+ "dsrl %[ftmp0], %[rp], %[forty_eight] \n\t"
+ "dsll %[ftmp1], %[pp], %[sixteen] \n\t"
+ "or %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
+ "paddb %[dest], %[dest], %[ftmp0] \n\t"
+ "gsswrc1 %[dest], 0x06(%[row]) \n\t"
+ "gsswlc1 %[dest], 0x09(%[row]) \n\t"
+
+ "dsrl %[ftmp0], %[pp], %[eight] \n\t"
+ "paddb %[dest], %[dest], %[ftmp0] \n\t"
+ "gsswrc1 %[dest], 0x09(%[row]) \n\t"
+ "daddiu %[row], %[row], 0x0c \n\t"
+ "daddiu %[istop], %[istop], -0x0c \n\t"
+ "bgtz %[istop], 1b \n\t"
+ : [rp]"=&f"(rp), [pp]"=&f"(pp), [dest]"=&f"(dest),
+ [tmp0]"=&r"(tmp0), [ftmp0]"=&f"(ftmp[0]),
+ [ftmp1]"=&f"(ftmp[1]), [eight]"=&f"(eight),
+ [sixteen]"=&f"(sixteen), [twenty_four]"=&f"(twenty_four),
+ [forty_eight]"=&f"(forty_eight)
+ : [row]"r"(row), [istop]"r"(istop)
+ : "memory"
+ );
+
+ PNG_UNUSED(prev)
+}
+
+void png_read_filter_row_sub4_mmi(png_row_infop row_info, png_bytep row,
+ png_const_bytep prev)
+{
+ /* The Sub filter predicts each pixel as the previous pixel, a.
+ * There is no pixel to the left of the first pixel. It's encoded directly.
+ * That works with our main loop if we just say that left pixel was zero.
+ */
+ int istop = row_info->rowbytes;
+ double rp,pp;
+
+ __asm__ volatile (
+ "1: \n\t"
+ "lwc1 %[pp], 0x00(%[row]) \n\t"
+ "lwc1 %[rp], 0x04(%[row]) \n\t"
+ "paddb %[rp], %[rp], %[pp] \n\t"
+ "swc1 %[rp], 0x04(%[row]) \n\t"
+
+ "daddiu %[row], %[row], 0x04 \n\t"
+ "daddiu %[istop], %[istop], -0x04 \n\t"
+ "bgtz %[istop], 1b \n\t"
+ : [rp]"=&f"(rp), [pp]"=&f"(pp)
+ : [row]"r"(row), [istop]"r"(istop)
+ : "memory"
+ );
+
+ PNG_UNUSED(prev)
+}
+
+void png_read_filter_row_avg3_mmi(png_row_infop row_info, png_bytep row,
+ png_const_bytep prev)
+{
+ int istop = row_info->rowbytes;
+ double rp, pp, rp1, pp1;
+ double tmp0;
+ double ftmp[3];
+ double one, dest;
+ double eight, sixteen, twenty_four, forty_eight;
+
+ __asm__ volatile (
+ "li %[tmp0], 0x08 \n\t"
+ "dmtc1 %[tmp0], %[eight] \n\t"
+ "li %[tmp0], 0x10 \n\t"
+ "dmtc1 %[tmp0], %[sixteen] \n\t"
+ "li %[tmp0], 0x18 \n\t"
+ "dmtc1 %[tmp0], %[twenty_four] \n\t"
+ "li %[tmp0], 0x30 \n\t"
+ "dmtc1 %[tmp0], %[forty_eight] \n\t"
+ "xor %[dest], %[dest], %[dest] \n\t"
+
+ "li %[tmp0], 0x01 \n\t"
+ "ins %[tmp0], %[tmp0], 8, 8 \n\t"
+ "dmtc1 %[tmp0], %[one] \n\t"
+ "pshufh %[one], %[one], %[dest] \n\t"
+
+ "1: \n\t"
+ "gsldrc1 %[rp], 0x00(%[row]) \n\t"
+ "gsldlc1 %[rp], 0x07(%[row]) \n\t"
+ "gsldrc1 %[pp], 0x00(%[prev]) \n\t"
+ "gsldlc1 %[pp], 0x07(%[prev]) \n\t"
+ "gsldrc1 %[rp1], 0x08(%[row]) \n\t"
+ "gsldlc1 %[rp1], 0x0f(%[row]) \n\t"
+ "gsldrc1 %[pp1], 0x08(%[prev]) \n\t"
+ "gsldlc1 %[pp1], 0x0f(%[prev]) \n\t"
+
+ "xor %[ftmp0], %[pp], %[dest] \n\t"
+ "pavgb %[ftmp1], %[pp], %[dest] \n\t"
+ "and %[ftmp0], %[ftmp0], %[one] \n\t"
+ "psubb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
+ "paddb %[dest], %[rp], %[ftmp1] \n\t"
+ "swc1 %[dest], 0x00(%[row]) \n\t"
+
+ "dsrl %[ftmp0], %[rp], %[twenty_four] \n\t"
+ "dsrl %[ftmp1], %[pp], %[twenty_four] \n\t"
+
+ "xor %[ftmp2], %[ftmp1], %[dest] \n\t"
+ "pavgb %[ftmp1], %[ftmp1], %[dest] \n\t"
+ "and %[ftmp2], %[ftmp2], %[one] \n\t"
+ "psubb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
+ "paddb %[dest], %[ftmp0], %[ftmp1] \n\t"
+ "gsswrc1 %[dest], 0x03(%[row]) \n\t"
+ "gsswlc1 %[dest], 0x06(%[row]) \n\t"
+
+ "dsrl %[ftmp0], %[rp], %[forty_eight] \n\t"
+ "dsll %[ftmp1], %[rp1], %[sixteen] \n\t"
+ "or %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
+ "dsrl %[ftmp2], %[pp], %[forty_eight] \n\t"
+ "dsll %[ftmp1], %[pp1], %[sixteen] \n\t"
+ "or %[ftmp1], %[ftmp2], %[ftmp1] \n\t"
+
+ "xor %[ftmp2], %[ftmp1], %[dest] \n\t"
+ "pavgb %[ftmp1], %[ftmp1], %[dest] \n\t"
+ "and %[ftmp2], %[ftmp2], %[one] \n\t"
+ "psubb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
+ "paddb %[dest], %[ftmp0], %[ftmp1] \n\t"
+ "gsswrc1 %[dest], 0x06(%[row]) \n\t"
+ "gsswlc1 %[dest], 0x09(%[row]) \n\t"
+
+ "dsrl %[ftmp0], %[rp1], %[eight] \n\t"
+ "dsrl %[ftmp1], %[pp1], %[eight] \n\t"
+
+ "xor %[ftmp2], %[ftmp1], %[dest] \n\t"
+ "pavgb %[ftmp1], %[ftmp1], %[dest] \n\t"
+ "and %[ftmp2], %[ftmp2], %[one] \n\t"
+ "psubb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
+ "paddb %[dest], %[ftmp0], %[ftmp1] \n\t"
+ "gsswrc1 %[dest], 0x09(%[row]) \n\t"
+ "daddiu %[row], %[row], 0x0c \n\t"
+ "daddiu %[prev], %[prev], 0x0c \n\t"
+ "daddiu %[istop], %[istop], -0x0c \n\t"
+ "bgtz %[istop], 1b \n\t"
+ : [rp]"=&f"(rp), [pp]"=&f"(pp), [rp1]"=&f"(rp1),
+ [pp1]"=&f"(pp1), [tmp0]"=&r"(tmp0), [ftmp0]"=&f"(ftmp[0]),
+ [ftmp1]"=&f"(ftmp[1]), [ftmp2]"=&f"(ftmp[2]), [one]"=&f"(one),
+ [dest]"=&f"(dest), [eight]"=&f"(eight), [sixteen]"=&f"(sixteen),
+ [twenty_four]"=&f"(twenty_four), [forty_eight]"=&f"(forty_eight)
+ : [row]"r"(row), [prev]"r"(prev), [istop]"r"(istop)
+ : "memory"
+ );
+}
+
+void png_read_filter_row_avg4_mmi(png_row_infop row_info, png_bytep row,
+ png_const_bytep prev)
+{
+ int istop = row_info->rowbytes;
+ double rp,pp;
+ double dest;
+ double ftmp[2];
+ double tmp;
+
+ __asm__ volatile (
+ "xor %[dest], %[dest], %[dest] \n\t"
+ "li %[tmp], 0x01 \n\t"
+ "ins %[tmp], %[tmp], 8, 8 \n\t"
+ "dmtc1 %[tmp], %[ftmp1] \n\t"
+ "pshufh %[ftmp1], %[ftmp1], %[dest] \n\t"
+
+ "1: \n\t"
+ "lwc1 %[rp], 0x00(%[row]) \n\t"
+ "lwc1 %[pp], 0x00(%[prev]) \n\t"
+ "xor %[ftmp0], %[pp], %[dest] \n\t"
+ "pavgb %[pp], %[pp], %[dest] \n\t"
+ "and %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
+ "psubb %[pp], %[pp], %[ftmp0] \n\t"
+ "paddb %[dest], %[rp], %[pp] \n\t"
+ "swc1 %[dest], 0x00(%[row]) \n\t"
+ "daddiu %[row], %[row], 0x04 \n\t"
+ "daddiu %[prev], %[prev], 0x04 \n\t"
+ "daddiu %[istop], %[istop], -0x04 \n\t"
+ "bgtz %[istop], 1b \n\t"
+ : [rp]"=&f"(rp), [pp]"=&f"(pp), [ftmp0]"=&f"(ftmp[0]),
+ [ftmp1]"=&f"(ftmp[1]), [dest]"=&f"(dest), [tmp]"=&r"(tmp)
+ : [row]"r"(row), [prev]"r"(prev), [istop]"r"(istop)
+ : "memory"
+ );
+}
+
+void png_read_filter_row_paeth3_mmi(png_row_infop row_info, png_bytep row,
+ png_const_bytep prev)
+{
+ /* Paeth tries to predict pixel d using the pixel to the left of it, a,
+ * and two pixels from the previous row, b and c:
+ * prev: c b
+ * row: a d
+ * The Paeth function predicts d to be whichever of a, b, or c is nearest to
+ * p=a+b-c.
+ *
+ * The first pixel has no left context, and so uses an Up filter, p = b.
+ * This works naturally with our main loop's p = a+b-c if we force a and c
+ * to zero.
+ * Here we zero b and d, which become c and a respectively at the start of
+ * the loop.
+ */
+ int istop = row_info->rowbytes;
+ double rp, pp, rp1, pp1, zero;
+ double a, b, c, d, pa, pb, pc;
+ double tmp0;
+ double ftmp[3];
+ double eight, sixteen, twenty_four, forty_eight;
+
+ __asm__ volatile (
+ "xor %[a], %[a], %[a] \n\t"
+ "xor %[c], %[c], %[c] \n\t"
+ "xor %[zero], %[zero], %[zero] \n\t"
+ "li %[tmp0], 0x08 \n\t"
+ "dmtc1 %[tmp0], %[eight] \n\t"
+ "li %[tmp0], 0x10 \n\t"
+ "dmtc1 %[tmp0], %[sixteen] \n\t"
+ "li %[tmp0], 0x18 \n\t"
+ "dmtc1 %[tmp0], %[twenty_four] \n\t"
+ "li %[tmp0], 0x30 \n\t"
+ "dmtc1 %[tmp0], %[forty_eight] \n\t"
+
+ "1: \n\t"
+ "gsldrc1 %[rp], 0x00(%[row]) \n\t"
+ "gsldlc1 %[rp], 0x07(%[row]) \n\t"
+ "gsldrc1 %[pp], 0x00(%[prev]) \n\t"
+ "gsldlc1 %[pp], 0x07(%[prev]) \n\t"
+ "gsldrc1 %[rp1], 0x08(%[row]) \n\t"
+ "gsldlc1 %[rp1], 0x0f(%[row]) \n\t"
+ "gsldrc1 %[pp1], 0x08(%[prev]) \n\t"
+ "gsldlc1 %[pp1], 0x0f(%[prev]) \n\t"
+
+ "punpcklbh %[b], %[pp], %[zero] \n\t"
+ "punpcklbh %[d], %[rp], %[zero] \n\t"
+ "packushb %[ftmp0], %[c], %[c] \n\t"
+ "packushb %[ftmp1], %[a], %[a] \n\t"
+ "pasubub %[pa], %[pp], %[ftmp0] \n\t"
+ "pasubub %[pb], %[ftmp1], %[ftmp0] \n\t"
+ "psubh %[ftmp0], %[b], %[c] \n\t"
+ "psubh %[ftmp1], %[a], %[c] \n\t"
+ "paddh %[pc], %[ftmp0], %[ftmp1] \n\t"
+ "pcmpgth %[ftmp0], %[zero], %[pc] \n\t"
+ "xor %[pc], %[pc], %[ftmp0] \n\t"
+ "psubh %[pc], %[pc], %[ftmp0] \n\t"
+ "punpcklbh %[pa], %[pa], %[zero] \n\t"
+ "punpcklbh %[pb], %[pb], %[zero] \n\t"
+ "pcmpgth %[ftmp0], %[pa], %[pb] \n\t"
+ "and %[ftmp1], %[b], %[ftmp0] \n\t"
+ "pandn %[a], %[ftmp0], %[a] \n\t"
+ "or %[a], %[a], %[ftmp1] \n\t"
+ "pminsh %[pa], %[pa], %[pb] \n\t"
+ "pcmpgth %[ftmp0], %[pa], %[pc] \n\t"
+ "and %[ftmp1], %[c], %[ftmp0] \n\t"
+ "pandn %[a], %[ftmp0], %[a] \n\t"
+ "or %[a], %[a], %[ftmp1] \n\t"
+ "paddb %[a], %[a], %[d] \n\t"
+ "packushb %[d], %[a], %[a] \n\t"
+ "punpcklbh %[c], %[pp], %[zero] \n\t"
+ "swc1 %[d], 0x00(%[row]) \n\t"
+
+ "dsrl %[ftmp0], %[rp], %[twenty_four] \n\t"
+ "dsrl %[ftmp2], %[pp], %[twenty_four] \n\t"
+
+ "punpcklbh %[b], %[ftmp2], %[zero] \n\t"
+ "punpcklbh %[d], %[ftmp0], %[zero] \n\t"
+ "packushb %[ftmp0], %[c], %[c] \n\t"
+ "packushb %[ftmp1], %[a], %[a] \n\t"
+ "pasubub %[pa], %[ftmp2], %[ftmp0] \n\t"
+ "pasubub %[pb], %[ftmp1], %[ftmp0] \n\t"
+ "psubh %[ftmp0], %[b], %[c] \n\t"
+ "psubh %[ftmp1], %[a], %[c] \n\t"
+ "paddh %[pc], %[ftmp0], %[ftmp1] \n\t"
+ "pcmpgth %[ftmp0], %[zero], %[pc] \n\t"
+ "xor %[pc], %[pc], %[ftmp0] \n\t"
+ "psubh %[pc], %[pc], %[ftmp0] \n\t"
+ "punpcklbh %[pa], %[pa], %[zero] \n\t"
+ "punpcklbh %[pb], %[pb], %[zero] \n\t"
+ "pcmpgth %[ftmp0], %[pa], %[pb] \n\t"
+ "and %[ftmp1], %[b], %[ftmp0] \n\t"
+ "pandn %[a], %[ftmp0], %[a] \n\t"
+ "or %[a], %[a], %[ftmp1] \n\t"
+ "pminsh %[pa], %[pa], %[pb] \n\t"
+ "pcmpgth %[ftmp0], %[pa], %[pc] \n\t"
+ "and %[ftmp1], %[c], %[ftmp0] \n\t"
+ "pandn %[a], %[ftmp0], %[a] \n\t"
+ "or %[a], %[a], %[ftmp1] \n\t"
+ "paddb %[a], %[a], %[d] \n\t"
+ "packushb %[d], %[a], %[a] \n\t"
+ "punpcklbh %[c], %[ftmp2], %[zero] \n\t"
+ "gsswrc1 %[d], 0x03(%[row]) \n\t"
+ "gsswlc1 %[d], 0x06(%[row]) \n\t"
+
+ "dsrl %[ftmp0], %[rp], %[forty_eight] \n\t"
+ "dsll %[ftmp1], %[rp1], %[sixteen] \n\t"
+ "or %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
+ "dsrl %[ftmp2], %[pp], %[forty_eight] \n\t"
+ "dsll %[ftmp1], %[pp1], %[sixteen] \n\t"
+ "or %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
+
+ "punpcklbh %[b], %[ftmp2], %[zero] \n\t"
+ "punpcklbh %[d], %[ftmp0], %[zero] \n\t"
+ "packushb %[ftmp0], %[c], %[c] \n\t"
+ "packushb %[ftmp1], %[a], %[a] \n\t"
+ "pasubub %[pa], %[ftmp2], %[ftmp0] \n\t"
+ "pasubub %[pb], %[ftmp1], %[ftmp0] \n\t"
+ "psubh %[ftmp0], %[b], %[c] \n\t"
+ "psubh %[ftmp1], %[a], %[c] \n\t"
+ "paddh %[pc], %[ftmp0], %[ftmp1] \n\t"
+ "pcmpgth %[ftmp0], %[zero], %[pc] \n\t"
+ "xor %[pc], %[pc], %[ftmp0] \n\t"
+ "psubh %[pc], %[pc], %[ftmp0] \n\t"
+ "punpcklbh %[pa], %[pa], %[zero] \n\t"
+ "punpcklbh %[pb], %[pb], %[zero] \n\t"
+ "pcmpgth %[ftmp0], %[pa], %[pb] \n\t"
+ "and %[ftmp1], %[b], %[ftmp0] \n\t"
+ "pandn %[a], %[ftmp0], %[a] \n\t"
+ "or %[a], %[a], %[ftmp1] \n\t"
+ "pminsh %[pa], %[pa], %[pb] \n\t"
+ "pcmpgth %[ftmp0], %[pa], %[pc] \n\t"
+ "and %[ftmp1], %[c], %[ftmp0] \n\t"
+ "pandn %[a], %[ftmp0], %[a] \n\t"
+ "or %[a], %[a], %[ftmp1] \n\t"
+ "paddb %[a], %[a], %[d] \n\t"
+ "packushb %[d], %[a], %[a] \n\t"
+ "punpcklbh %[c], %[ftmp2], %[zero] \n\t"
+ "gsswrc1 %[d], 0x06(%[row]) \n\t"
+ "gsswlc1 %[d], 0x09(%[row]) \n\t"
+
+ "dsrl %[ftmp0], %[rp1], %[eight] \n\t"
+ "dsrl %[ftmp2], %[pp1], %[eight] \n\t"
+
+ "punpcklbh %[b], %[ftmp2], %[zero] \n\t"
+ "punpcklbh %[d], %[ftmp0], %[zero] \n\t"
+ "packushb %[ftmp0], %[c], %[c] \n\t"
+ "packushb %[ftmp1], %[a], %[a] \n\t"
+ "pasubub %[pa], %[ftmp2], %[ftmp0] \n\t"
+ "pasubub %[pb], %[ftmp1], %[ftmp0] \n\t"
+ "psubh %[ftmp0], %[b], %[c] \n\t"
+ "psubh %[ftmp1], %[a], %[c] \n\t"
+ "paddh %[pc], %[ftmp0], %[ftmp1] \n\t"
+ "pcmpgth %[ftmp0], %[zero], %[pc] \n\t"
+ "xor %[pc], %[pc], %[ftmp0] \n\t"
+ "psubh %[pc], %[pc], %[ftmp0] \n\t"
+ "punpcklbh %[pa], %[pa], %[zero] \n\t"
+ "punpcklbh %[pb], %[pb], %[zero] \n\t"
+ "pcmpgth %[ftmp0], %[pa], %[pb] \n\t"
+ "and %[ftmp1], %[b], %[ftmp0] \n\t"
+ "pandn %[a], %[ftmp0], %[a] \n\t"
+ "or %[a], %[a], %[ftmp1] \n\t"
+ "pminsh %[pa], %[pa], %[pb] \n\t"
+ "pcmpgth %[ftmp0], %[pa], %[pc] \n\t"
+ "and %[ftmp1], %[c], %[ftmp0] \n\t"
+ "pandn %[a], %[ftmp0], %[a] \n\t"
+ "or %[a], %[a], %[ftmp1] \n\t"
+ "paddb %[a], %[a], %[d] \n\t"
+ "packushb %[d], %[a], %[a] \n\t"
+ "punpcklbh %[c], %[ftmp2], %[zero] \n\t"
+ "gsswrc1 %[d], 0x09(%[row]) \n\t"
+
+ "daddiu %[row], %[row], 0x0c \n\t"
+ "daddiu %[prev], %[prev], 0x0c \n\t"
+ "daddiu %[istop], %[istop], -0x0c \n\t"
+ "bgtz %[istop], 1b \n\t"
+ : [rp]"=&f"(rp), [pp]"=&f"(pp), [rp1]"=&f"(rp1), [pp1]"=&f"(pp1),
+ [zero]"=&f"(zero), [a]"=&f"(a),[b]"=&f"(b), [c]"=&f"(c),
+ [d]"=&f"(d), [pa]"=&f"(pa), [pb]"=&f"(pb), [pc]"=&f"(pc),
+ [tmp0]"=&r"(tmp0), [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
+ [ftmp2]"=&f"(ftmp[2]), [eight]"=&f"(eight), [sixteen]"=&f"(sixteen),
+ [twenty_four]"=&f"(twenty_four), [forty_eight]"=&f"(forty_eight)
+ : [row]"r"(row), [prev]"r"(prev), [istop]"r"(istop)
+ : "memory"
+ );
+}
+
+void png_read_filter_row_paeth4_mmi(png_row_infop row_info, png_bytep row,
+ png_const_bytep prev)
+{
+ /* Paeth tries to predict pixel d using the pixel to the left of it, a,
+ * and two pixels from the previous row, b and c:
+ * prev: c b
+ * row: a d
+ * The Paeth function predicts d to be whichever of a, b, or c is nearest to
+ * p=a+b-c.
+ *
+ * The first pixel has no left context, and so uses an Up filter, p = b.
+ * This works naturally with our main loop's p = a+b-c if we force a and c
+ * to zero.
+ * Here we zero b and d, which become c and a respectively at the start of
+ * the loop.
+ */
+ int istop = row_info->rowbytes;
+ double rp, pp, zero;
+ double a, b, c, d, pa, pb, pc;
+ double ftmp[2];
+
+ __asm__ volatile (
+ "xor %[a], %[a], %[a] \n\t"
+ "xor %[c], %[c], %[c] \n\t"
+ "xor %[zero], %[zero], %[zero] \n\t"
+
+ "1: \n\t"
+ "lwc1 %[rp], 0x00(%[row]) \n\t"
+ "lwc1 %[pp], 0x00(%[prev]) \n\t"
+ "punpcklbh %[b], %[pp], %[zero] \n\t"
+ "punpcklbh %[d], %[rp], %[zero] \n\t"
+
+ "packushb %[ftmp0], %[c], %[c] \n\t"
+ "packushb %[ftmp1], %[a], %[a] \n\t"
+ "pasubub %[pa], %[pp], %[ftmp0] \n\t"
+ "pasubub %[pb], %[ftmp1], %[ftmp0] \n\t"
+ "psubh %[ftmp0], %[b], %[c] \n\t"
+ "psubh %[ftmp1], %[a], %[c] \n\t"
+ "paddh %[pc], %[ftmp0], %[ftmp1] \n\t"
+ "pcmpgth %[ftmp0], %[zero], %[pc] \n\t"
+ "xor %[pc], %[pc], %[ftmp0] \n\t"
+ "psubh %[pc], %[pc], %[ftmp0] \n\t"
+
+ "punpcklbh %[pa], %[pa], %[zero] \n\t"
+ "punpcklbh %[pb], %[pb], %[zero] \n\t"
+
+ "pcmpgth %[ftmp0], %[pa], %[pb] \n\t"
+ "and %[ftmp1], %[b], %[ftmp0] \n\t"
+ "pandn %[a], %[ftmp0], %[a] \n\t"
+ "or %[a], %[a], %[ftmp1] \n\t"
+ "pminsh %[pa], %[pa], %[pb] \n\t"
+
+ "pcmpgth %[ftmp0], %[pa], %[pc] \n\t"
+ "and %[ftmp1], %[c], %[ftmp0] \n\t"
+ "pandn %[a], %[ftmp0], %[a] \n\t"
+ "or %[a], %[a], %[ftmp1] \n\t"
+ "paddb %[a], %[a], %[d] \n\t"
+ "packushb %[d], %[a], %[a] \n\t"
+ "swc1 %[d], 0x00(%[row]) \n\t"
+ "punpcklbh %[c], %[pp], %[zero] \n\t"
+ "daddiu %[row], %[row], 0x04 \n\t"
+ "daddiu %[prev], %[prev], 0x04 \n\t"
+ "daddiu %[istop], %[istop], -0x04 \n\t"
+ "bgtz %[istop], 1b \n\t"
+ : [rp]"=&f"(rp), [pp]"=&f"(pp), [zero]"=&f"(zero),
+ [a]"=&f"(a), [b]"=&f"(b), [c]"=&f"(c), [d]"=&f"(d),
+ [pa]"=&f"(pa), [pb]"=&f"(pb), [pc]"=&f"(pc),
+ [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1])
+ : [row]"r"(row), [prev]"r"(prev), [istop]"r"(istop)
+ : "memory"
+ );
+}
+
+#endif /* PNG_MIPS_MMI_IMPLEMENTATION > 0 */
+#endif /* READ */
diff --git a/media/libpng/mips/filter_msa_intrinsics.c b/media/libpng/mips/filter_msa_intrinsics.c
index a579179421..1b734f4d9a 100644
--- a/media/libpng/mips/filter_msa_intrinsics.c
+++ b/media/libpng/mips/filter_msa_intrinsics.c
@@ -1,9 +1,9 @@
/* filter_msa_intrinsics.c - MSA optimised filter functions
*
- * Copyright (c) 2018 Cosmin Truta
+ * Copyright (c) 2018-2024 Cosmin Truta
* Copyright (c) 2016 Glenn Randers-Pehrson
- * Written by Mandar Sahastrabuddhe, August 2016.
+ * Written by Mandar Sahastrabuddhe, August 2016
*
* This code is released under the libpng license.
* For conditions of distribution and use, see the disclaimer
@@ -11,7 +11,6 @@
*/
#include <stdio.h>
-#include <stdint.h>
#include "../pngpriv.h"
#ifdef PNG_READ_SUPPORTED
@@ -20,6 +19,7 @@
#if PNG_MIPS_MSA_IMPLEMENTATION == 1 /* intrinsics code from pngpriv.h */
#include <msa.h>
+#include <stdint.h>
/* libpng row pointers are not necessarily aligned to any particular boundary,
* however this code will only work with appropriate alignment. mips/mips_init.c
@@ -379,8 +379,8 @@ void png_read_filter_row_up_msa(png_row_infop row_info, png_bytep row,
LD_UB4(pp, 16, src4, src5, src6, src7);
pp += 64;
- ADD4(src0, src4, src1, src5, src2, src6, src3, src7,
- src0, src1, src2, src3);
+ ADD4(src0, src4, src1, src5, src2, src6, src3, src7,
+ src0, src1, src2, src3);
ST_UB4(src0, src1, src2, src3, rp, 16);
rp += 64;
@@ -400,7 +400,7 @@ void png_read_filter_row_up_msa(png_row_infop row_info, png_bytep row,
LD_UB4(pp, 16, src4, src5, src6, src7);
ADD4(src0, src4, src1, src5, src2, src6, src3, src7,
- src0, src1, src2, src3);
+ src0, src1, src2, src3);
ST_UB4(src0, src1, src2, src3, rp, 16);
rp += 64;
@@ -425,7 +425,7 @@ void png_read_filter_row_up_msa(png_row_infop row_info, png_bytep row,
LD_UB2(rp, 16, src0, src1);
LD_UB2(pp, 16, src4, src5);
- ADD2(src0, src4, src1, src5, src0, src1);
+ ADD2(src0, src4, src1, src5, src0, src1);
ST_UB2(src0, src1, rp, 16);
rp += 32;
diff --git a/media/libpng/mips/mips_init.c b/media/libpng/mips/mips_init.c
index 8dd283deef..5c6fa1dbf1 100644
--- a/media/libpng/mips/mips_init.c
+++ b/media/libpng/mips/mips_init.c
@@ -1,9 +1,10 @@
/* mips_init.c - MSA optimised filter functions
*
- * Copyright (c) 2018 Cosmin Truta
+ * Copyright (c) 2018-2024 Cosmin Truta
* Copyright (c) 2016 Glenn Randers-Pehrson
- * Written by Mandar Sahastrabuddhe, 2016.
+ * Written by Mandar Sahastrabuddhe, 2016
+ * Updated by guxiwei, 2023
*
* This code is released under the libpng license.
* For conditions of distribution and use, see the disclaimer
@@ -20,8 +21,9 @@
#ifdef PNG_READ_SUPPORTED
-#if PNG_MIPS_MSA_OPT > 0
-#ifdef PNG_MIPS_MSA_CHECK_SUPPORTED /* Do run-time checks */
+#if PNG_MIPS_MSA_IMPLEMENTATION == 1 || PNG_MIPS_MMI_IMPLEMENTATION > 0
+
+#ifdef PNG_MIPS_MSA_CHECK_SUPPORTED /* Do MIPS MSA run-time checks */
/* WARNING: it is strongly recommended that you do not build libpng with
* run-time checks for CPU features if at all possible. In the case of the MIPS
* MSA instructions there is no processor-specific way of detecting the
@@ -51,13 +53,83 @@ static int png_have_msa(png_structp png_ptr);
#endif /* PNG_MIPS_MSA_FILE */
#endif /* PNG_MIPS_MSA_CHECK_SUPPORTED */
+#ifdef PNG_MIPS_MMI_CHECK_SUPPORTED /* Do MIPS MMI run-times checks */
+#ifndef PNG_MIPS_MMI_FILE
+# ifdef __linux__
+# define PNG_MIPS_MMI_FILE "contrib/mips-mmi/linux.c"
+# endif
+#endif
+
+#ifdef PNG_MIPS_MMI_FILE
+
+#include <signal.h> /* for sig_atomic_t */
+static int png_have_mmi();
+#include PNG_MIPS_MMI_FILE
+
+#else /* PNG_MIPS_MMI_FILE */
+# error "PNG_MIPS_MMI_FILE undefined: no support for run-time MIPS MMI checks"
+#endif /* PNG_MIPS_MMI_FILE */
+#endif /* PNG_MIPS_MMI_CHECK_SUPPORTED*/
+
#ifndef PNG_ALIGNED_MEMORY_SUPPORTED
# error "ALIGNED_MEMORY is required; set: -DPNG_ALIGNED_MEMORY_SUPPORTED"
#endif
+/* MIPS supports two optimizations: MMI and MSA. The appropriate
+ * optimization is chosen at runtime
+ */
void
-png_init_filter_functions_msa(png_structp pp, unsigned int bpp)
+png_init_filter_functions_mips(png_structp pp, unsigned int bpp)
{
+#if PNG_MIPS_MMI_IMPLEMENTATION > 0
+#ifdef PNG_MIPS_MMI_API_SUPPORTED
+ switch ((pp->options >> PNG_MIPS_MMI) & 3)
+ {
+ case PNG_OPTION_UNSET:
+#endif /* PNG_MIPS_MMI_API_SUPPORTED */
+#ifdef PNG_MIPS_MMI_CHECK_SUPPORTED
+ {
+ static volatile sig_atomic_t no_mmi = -1; /* not checked */
+
+ if (no_mmi < 0)
+ no_mmi = !png_have_mmi();
+
+ if (no_mmi)
+ goto MIPS_MSA_INIT;
+ }
+#ifdef PNG_MIPS_MMI_API_SUPPORTED
+ break;
+#endif
+#endif /* PNG_MIPS_MMI_CHECK_SUPPORTED */
+
+#ifdef PNG_MIPS_MMI_API_SUPPORTED
+ default: /* OFF or INVALID */
+ goto MIPS_MSA_INIT;
+
+ case PNG_OPTION_ON:
+ /* Option turned on */
+ break;
+ }
+#endif
+ pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up_mmi;
+ if (bpp == 3)
+ {
+ pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_mmi;
+ pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_mmi;
+ pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
+ png_read_filter_row_paeth3_mmi;
+ }
+ else if (bpp == 4)
+ {
+ pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_mmi;
+ pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_mmi;
+ pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
+ png_read_filter_row_paeth4_mmi;
+ }
+#endif /* PNG_MIPS_MMI_IMPLEMENTATION > 0 */
+
+MIPS_MSA_INIT:
+#if PNG_MIPS_MSA_IMPLEMENTATION == 1
/* The switch statement is compiled in for MIPS_MSA_API, the call to
* png_have_msa is compiled in for MIPS_MSA_CHECK. If both are defined
* the check is only performed if the API has not set the MSA option on
@@ -125,6 +197,8 @@ png_init_filter_functions_msa(png_structp pp, unsigned int bpp)
pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_msa;
pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = png_read_filter_row_paeth4_msa;
}
+#endif /* PNG_MIPS_MSA_IMPLEMENTATION == 1 */
+ return;
}
-#endif /* PNG_MIPS_MSA_OPT > 0 */
+#endif /* PNG_MIPS_MSA_IMPLEMENTATION == 1 || PNG_MIPS_MMI_IMPLEMENTATION > 0 */
#endif /* READ */