diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-15 03:35:49 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-15 03:35:49 +0000 |
commit | d8bbc7858622b6d9c278469aab701ca0b609cddf (patch) | |
tree | eff41dc61d9f714852212739e6b3738b82a2af87 /third_party/aom/aom_dsp/aom_dsp_rtcd_defs.pl | |
parent | Releasing progress-linux version 125.0.3-1~progress7.99u1. (diff) | |
download | firefox-d8bbc7858622b6d9c278469aab701ca0b609cddf.tar.xz firefox-d8bbc7858622b6d9c278469aab701ca0b609cddf.zip |
Merging upstream version 126.0.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rwxr-xr-x | third_party/aom/aom_dsp/aom_dsp_rtcd_defs.pl | 118 |
1 files changed, 62 insertions, 56 deletions
diff --git a/third_party/aom/aom_dsp/aom_dsp_rtcd_defs.pl b/third_party/aom/aom_dsp/aom_dsp_rtcd_defs.pl index 7bb156ac59..7e746e9cb9 100755 --- a/third_party/aom/aom_dsp/aom_dsp_rtcd_defs.pl +++ b/third_party/aom/aom_dsp/aom_dsp_rtcd_defs.pl @@ -498,8 +498,8 @@ add_proto qw/void aom_convolve8_horiz/, "const uint8_t *src, ptrdiff_t add_proto qw/void aom_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; specialize qw/aom_convolve_copy neon sse2 avx2/; -specialize qw/aom_convolve8_horiz neon neon_dotprod neon_i8mm sse2 ssse3/, "$avx2_ssse3"; -specialize qw/aom_convolve8_vert neon neon_dotprod neon_i8mm sse2 ssse3/, "$avx2_ssse3"; +specialize qw/aom_convolve8_horiz neon neon_dotprod neon_i8mm ssse3/, "$avx2_ssse3"; +specialize qw/aom_convolve8_vert neon neon_dotprod neon_i8mm ssse3/, "$avx2_ssse3"; add_proto qw/void aom_scaled_2d/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h"; specialize qw/aom_scaled_2d ssse3 neon/; @@ -509,10 +509,10 @@ if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { specialize qw/aom_highbd_convolve_copy sse2 avx2 neon/; add_proto qw/void aom_highbd_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bd"; - specialize qw/aom_highbd_convolve8_horiz sse2 avx2 neon/; + specialize qw/aom_highbd_convolve8_horiz sse2 avx2 neon sve/; add_proto qw/void aom_highbd_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bd"; - specialize qw/aom_highbd_convolve8_vert sse2 avx2 neon/; + specialize qw/aom_highbd_convolve8_vert sse2 avx2 neon sve/; } # @@ -1087,7 +1087,7 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") { specialize qw/aom_sad_skip_16x32x4d avx2 sse2 neon neon_dotprod/; specialize qw/aom_sad_skip_16x16x4d avx2 sse2 neon neon_dotprod/; specialize qw/aom_sad_skip_16x8x4d avx2 sse2 neon neon_dotprod/; - specialize qw/aom_sad_skip_16x4x4d neon neon_dotprod/; + specialize qw/aom_sad_skip_16x4x4d avx2 neon neon_dotprod/; specialize qw/aom_sad_skip_8x32x4d sse2 neon/; specialize qw/aom_sad_skip_8x16x4d sse2 neon/; specialize qw/aom_sad_skip_8x8x4d sse2 neon/; @@ -1116,7 +1116,7 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") { specialize qw/aom_sad64x16x3d avx2 neon neon_dotprod/; specialize qw/aom_sad32x8x3d avx2 neon neon_dotprod/; specialize qw/aom_sad16x64x3d avx2 neon neon_dotprod/; - specialize qw/aom_sad16x4x3d neon neon_dotprod/; + specialize qw/aom_sad16x4x3d avx2 neon neon_dotprod/; specialize qw/aom_sad8x32x3d neon/; specialize qw/aom_sad4x16x3d neon/; @@ -1264,8 +1264,6 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") { add_proto qw/int aom_vector_var/, "const int16_t *ref, const int16_t *src, int bwl"; specialize qw/aom_vector_var avx2 sse4_1 neon sve/; - # TODO(kyslov@) bring back SSE2 by extending it to 128 block size - #specialize qw/aom_vector_var neon sse2/; # # hamadard transform and satd for implmenting temporal dependency model @@ -1357,6 +1355,11 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") { specialize "aom_highbd_${bd}_mse16x8", qw/neon neon_dotprod/; specialize "aom_highbd_${bd}_mse8x16", qw/neon neon_dotprod/; specialize "aom_highbd_${bd}_mse8x8", qw/sse2 neon neon_dotprod/; + } elsif ($bd eq 10) { + specialize "aom_highbd_${bd}_mse16x16", qw/avx2 sse2 neon sve/; + specialize "aom_highbd_${bd}_mse16x8", qw/neon sve/; + specialize "aom_highbd_${bd}_mse8x16", qw/neon sve/; + specialize "aom_highbd_${bd}_mse8x8", qw/sse2 neon sve/; } else { specialize "aom_highbd_${bd}_mse16x16", qw/sse2 neon sve/; specialize "aom_highbd_${bd}_mse16x8", qw/neon sve/; @@ -1406,39 +1409,39 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") { specialize qw/aom_variance4x8 sse2 neon neon_dotprod/; specialize qw/aom_variance4x4 sse2 neon neon_dotprod/; - specialize qw/aom_sub_pixel_variance128x128 avx2 neon sse2 ssse3/; - specialize qw/aom_sub_pixel_variance128x64 avx2 neon sse2 ssse3/; - specialize qw/aom_sub_pixel_variance64x128 avx2 neon sse2 ssse3/; - specialize qw/aom_sub_pixel_variance64x64 avx2 neon sse2 ssse3/; - specialize qw/aom_sub_pixel_variance64x32 avx2 neon sse2 ssse3/; - specialize qw/aom_sub_pixel_variance32x64 avx2 neon sse2 ssse3/; - specialize qw/aom_sub_pixel_variance32x32 avx2 neon sse2 ssse3/; - specialize qw/aom_sub_pixel_variance32x16 avx2 neon sse2 ssse3/; - specialize qw/aom_sub_pixel_variance16x32 avx2 neon sse2 ssse3/; - specialize qw/aom_sub_pixel_variance16x16 avx2 neon sse2 ssse3/; - specialize qw/aom_sub_pixel_variance16x8 avx2 neon sse2 ssse3/; - specialize qw/aom_sub_pixel_variance8x16 neon sse2 ssse3/; - specialize qw/aom_sub_pixel_variance8x8 neon sse2 ssse3/; - specialize qw/aom_sub_pixel_variance8x4 neon sse2 ssse3/; - specialize qw/aom_sub_pixel_variance4x8 neon sse2 ssse3/; - specialize qw/aom_sub_pixel_variance4x4 neon sse2 ssse3/; - - specialize qw/aom_sub_pixel_avg_variance128x128 avx2 neon sse2 ssse3/; - specialize qw/aom_sub_pixel_avg_variance128x64 avx2 neon sse2 ssse3/; - specialize qw/aom_sub_pixel_avg_variance64x128 avx2 neon sse2 ssse3/; - specialize qw/aom_sub_pixel_avg_variance64x64 avx2 neon sse2 ssse3/; - specialize qw/aom_sub_pixel_avg_variance64x32 avx2 neon sse2 ssse3/; - specialize qw/aom_sub_pixel_avg_variance32x64 avx2 neon sse2 ssse3/; - specialize qw/aom_sub_pixel_avg_variance32x32 avx2 neon sse2 ssse3/; - specialize qw/aom_sub_pixel_avg_variance32x16 avx2 neon sse2 ssse3/; - specialize qw/aom_sub_pixel_avg_variance16x32 neon sse2 ssse3/; - specialize qw/aom_sub_pixel_avg_variance16x16 neon sse2 ssse3/; - specialize qw/aom_sub_pixel_avg_variance16x8 neon sse2 ssse3/; - specialize qw/aom_sub_pixel_avg_variance8x16 neon sse2 ssse3/; - specialize qw/aom_sub_pixel_avg_variance8x8 neon sse2 ssse3/; - specialize qw/aom_sub_pixel_avg_variance8x4 neon sse2 ssse3/; - specialize qw/aom_sub_pixel_avg_variance4x8 neon sse2 ssse3/; - specialize qw/aom_sub_pixel_avg_variance4x4 neon sse2 ssse3/; + specialize qw/aom_sub_pixel_variance128x128 avx2 neon ssse3/; + specialize qw/aom_sub_pixel_variance128x64 avx2 neon ssse3/; + specialize qw/aom_sub_pixel_variance64x128 avx2 neon ssse3/; + specialize qw/aom_sub_pixel_variance64x64 avx2 neon ssse3/; + specialize qw/aom_sub_pixel_variance64x32 avx2 neon ssse3/; + specialize qw/aom_sub_pixel_variance32x64 avx2 neon ssse3/; + specialize qw/aom_sub_pixel_variance32x32 avx2 neon ssse3/; + specialize qw/aom_sub_pixel_variance32x16 avx2 neon ssse3/; + specialize qw/aom_sub_pixel_variance16x32 avx2 neon ssse3/; + specialize qw/aom_sub_pixel_variance16x16 avx2 neon ssse3/; + specialize qw/aom_sub_pixel_variance16x8 avx2 neon ssse3/; + specialize qw/aom_sub_pixel_variance8x16 neon ssse3/; + specialize qw/aom_sub_pixel_variance8x8 neon ssse3/; + specialize qw/aom_sub_pixel_variance8x4 neon ssse3/; + specialize qw/aom_sub_pixel_variance4x8 neon ssse3/; + specialize qw/aom_sub_pixel_variance4x4 neon ssse3/; + + specialize qw/aom_sub_pixel_avg_variance128x128 avx2 neon ssse3/; + specialize qw/aom_sub_pixel_avg_variance128x64 avx2 neon ssse3/; + specialize qw/aom_sub_pixel_avg_variance64x128 avx2 neon ssse3/; + specialize qw/aom_sub_pixel_avg_variance64x64 avx2 neon ssse3/; + specialize qw/aom_sub_pixel_avg_variance64x32 avx2 neon ssse3/; + specialize qw/aom_sub_pixel_avg_variance32x64 avx2 neon ssse3/; + specialize qw/aom_sub_pixel_avg_variance32x32 avx2 neon ssse3/; + specialize qw/aom_sub_pixel_avg_variance32x16 avx2 neon ssse3/; + specialize qw/aom_sub_pixel_avg_variance16x32 neon ssse3/; + specialize qw/aom_sub_pixel_avg_variance16x16 neon ssse3/; + specialize qw/aom_sub_pixel_avg_variance16x8 neon ssse3/; + specialize qw/aom_sub_pixel_avg_variance8x16 neon ssse3/; + specialize qw/aom_sub_pixel_avg_variance8x8 neon ssse3/; + specialize qw/aom_sub_pixel_avg_variance8x4 neon ssse3/; + specialize qw/aom_sub_pixel_avg_variance4x8 neon ssse3/; + specialize qw/aom_sub_pixel_avg_variance4x4 neon ssse3/; if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") { specialize qw/aom_variance4x16 neon neon_dotprod sse2/; @@ -1448,18 +1451,18 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") { specialize qw/aom_variance16x64 neon neon_dotprod sse2 avx2/; specialize qw/aom_variance64x16 neon neon_dotprod sse2 avx2/; - specialize qw/aom_sub_pixel_variance4x16 neon sse2 ssse3/; - specialize qw/aom_sub_pixel_variance16x4 neon avx2 sse2 ssse3/; - specialize qw/aom_sub_pixel_variance8x32 neon sse2 ssse3/; - specialize qw/aom_sub_pixel_variance32x8 neon sse2 ssse3/; - specialize qw/aom_sub_pixel_variance16x64 neon avx2 sse2 ssse3/; - specialize qw/aom_sub_pixel_variance64x16 neon sse2 ssse3/; - specialize qw/aom_sub_pixel_avg_variance4x16 neon sse2 ssse3/; - specialize qw/aom_sub_pixel_avg_variance16x4 neon sse2 ssse3/; - specialize qw/aom_sub_pixel_avg_variance8x32 neon sse2 ssse3/; - specialize qw/aom_sub_pixel_avg_variance32x8 neon sse2 ssse3/; - specialize qw/aom_sub_pixel_avg_variance16x64 neon sse2 ssse3/; - specialize qw/aom_sub_pixel_avg_variance64x16 neon sse2 ssse3/; + specialize qw/aom_sub_pixel_variance4x16 neon ssse3/; + specialize qw/aom_sub_pixel_variance16x4 neon avx2 ssse3/; + specialize qw/aom_sub_pixel_variance8x32 neon ssse3/; + specialize qw/aom_sub_pixel_variance32x8 neon ssse3/; + specialize qw/aom_sub_pixel_variance16x64 neon avx2 ssse3/; + specialize qw/aom_sub_pixel_variance64x16 neon ssse3/; + specialize qw/aom_sub_pixel_avg_variance4x16 neon ssse3/; + specialize qw/aom_sub_pixel_avg_variance16x4 neon ssse3/; + specialize qw/aom_sub_pixel_avg_variance8x32 neon ssse3/; + specialize qw/aom_sub_pixel_avg_variance32x8 neon ssse3/; + specialize qw/aom_sub_pixel_avg_variance16x64 neon ssse3/; + specialize qw/aom_sub_pixel_avg_variance64x16 neon ssse3/; specialize qw/aom_dist_wtd_sub_pixel_avg_variance4x16 neon ssse3/; specialize qw/aom_dist_wtd_sub_pixel_avg_variance16x4 neon ssse3/; @@ -1789,11 +1792,14 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") { # Flow estimation library if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") { - add_proto qw/double av1_compute_cross_correlation/, "const unsigned char *frame1, int stride1, int x1, int y1, const unsigned char *frame2, int stride2, int x2, int y2"; - specialize qw/av1_compute_cross_correlation sse4_1 avx2/; + add_proto qw/bool aom_compute_mean_stddev/, "const unsigned char *frame, int stride, int x, int y, double *mean, double *one_over_stddev"; + specialize qw/aom_compute_mean_stddev sse4_1 avx2/; + + add_proto qw/double aom_compute_correlation/, "const unsigned char *frame1, int stride1, int x1, int y1, double mean1, double one_over_stddev1, const unsigned char *frame2, int stride2, int x2, int y2, double mean2, double one_over_stddev2"; + specialize qw/aom_compute_correlation sse4_1 avx2/; add_proto qw/void aom_compute_flow_at_point/, "const uint8_t *src, const uint8_t *ref, int x, int y, int width, int height, int stride, double *u, double *v"; - specialize qw/aom_compute_flow_at_point sse4_1 neon/; + specialize qw/aom_compute_flow_at_point sse4_1 avx2 neon/; } } # CONFIG_AV1_ENCODER |