summaryrefslogtreecommitdiffstats
path: root/third_party/aom/aom_dsp/aom_dsp_rtcd_defs.pl
diff options
context:
space:
mode:
Diffstat (limited to '')
-rwxr-xr-xthird_party/aom/aom_dsp/aom_dsp_rtcd_defs.pl118
1 files changed, 62 insertions, 56 deletions
diff --git a/third_party/aom/aom_dsp/aom_dsp_rtcd_defs.pl b/third_party/aom/aom_dsp/aom_dsp_rtcd_defs.pl
index 7bb156ac59..7e746e9cb9 100755
--- a/third_party/aom/aom_dsp/aom_dsp_rtcd_defs.pl
+++ b/third_party/aom/aom_dsp/aom_dsp_rtcd_defs.pl
@@ -498,8 +498,8 @@ add_proto qw/void aom_convolve8_horiz/, "const uint8_t *src, ptrdiff_t
add_proto qw/void aom_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
specialize qw/aom_convolve_copy neon sse2 avx2/;
-specialize qw/aom_convolve8_horiz neon neon_dotprod neon_i8mm sse2 ssse3/, "$avx2_ssse3";
-specialize qw/aom_convolve8_vert neon neon_dotprod neon_i8mm sse2 ssse3/, "$avx2_ssse3";
+specialize qw/aom_convolve8_horiz neon neon_dotprod neon_i8mm ssse3/, "$avx2_ssse3";
+specialize qw/aom_convolve8_vert neon neon_dotprod neon_i8mm ssse3/, "$avx2_ssse3";
add_proto qw/void aom_scaled_2d/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
specialize qw/aom_scaled_2d ssse3 neon/;
@@ -509,10 +509,10 @@ if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
specialize qw/aom_highbd_convolve_copy sse2 avx2 neon/;
add_proto qw/void aom_highbd_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bd";
- specialize qw/aom_highbd_convolve8_horiz sse2 avx2 neon/;
+ specialize qw/aom_highbd_convolve8_horiz sse2 avx2 neon sve/;
add_proto qw/void aom_highbd_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bd";
- specialize qw/aom_highbd_convolve8_vert sse2 avx2 neon/;
+ specialize qw/aom_highbd_convolve8_vert sse2 avx2 neon sve/;
}
#
@@ -1087,7 +1087,7 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
specialize qw/aom_sad_skip_16x32x4d avx2 sse2 neon neon_dotprod/;
specialize qw/aom_sad_skip_16x16x4d avx2 sse2 neon neon_dotprod/;
specialize qw/aom_sad_skip_16x8x4d avx2 sse2 neon neon_dotprod/;
- specialize qw/aom_sad_skip_16x4x4d neon neon_dotprod/;
+ specialize qw/aom_sad_skip_16x4x4d avx2 neon neon_dotprod/;
specialize qw/aom_sad_skip_8x32x4d sse2 neon/;
specialize qw/aom_sad_skip_8x16x4d sse2 neon/;
specialize qw/aom_sad_skip_8x8x4d sse2 neon/;
@@ -1116,7 +1116,7 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
specialize qw/aom_sad64x16x3d avx2 neon neon_dotprod/;
specialize qw/aom_sad32x8x3d avx2 neon neon_dotprod/;
specialize qw/aom_sad16x64x3d avx2 neon neon_dotprod/;
- specialize qw/aom_sad16x4x3d neon neon_dotprod/;
+ specialize qw/aom_sad16x4x3d avx2 neon neon_dotprod/;
specialize qw/aom_sad8x32x3d neon/;
specialize qw/aom_sad4x16x3d neon/;
@@ -1264,8 +1264,6 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
add_proto qw/int aom_vector_var/, "const int16_t *ref, const int16_t *src, int bwl";
specialize qw/aom_vector_var avx2 sse4_1 neon sve/;
- # TODO(kyslov@) bring back SSE2 by extending it to 128 block size
- #specialize qw/aom_vector_var neon sse2/;
#
# hamadard transform and satd for implmenting temporal dependency model
@@ -1357,6 +1355,11 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
specialize "aom_highbd_${bd}_mse16x8", qw/neon neon_dotprod/;
specialize "aom_highbd_${bd}_mse8x16", qw/neon neon_dotprod/;
specialize "aom_highbd_${bd}_mse8x8", qw/sse2 neon neon_dotprod/;
+ } elsif ($bd eq 10) {
+ specialize "aom_highbd_${bd}_mse16x16", qw/avx2 sse2 neon sve/;
+ specialize "aom_highbd_${bd}_mse16x8", qw/neon sve/;
+ specialize "aom_highbd_${bd}_mse8x16", qw/neon sve/;
+ specialize "aom_highbd_${bd}_mse8x8", qw/sse2 neon sve/;
} else {
specialize "aom_highbd_${bd}_mse16x16", qw/sse2 neon sve/;
specialize "aom_highbd_${bd}_mse16x8", qw/neon sve/;
@@ -1406,39 +1409,39 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
specialize qw/aom_variance4x8 sse2 neon neon_dotprod/;
specialize qw/aom_variance4x4 sse2 neon neon_dotprod/;
- specialize qw/aom_sub_pixel_variance128x128 avx2 neon sse2 ssse3/;
- specialize qw/aom_sub_pixel_variance128x64 avx2 neon sse2 ssse3/;
- specialize qw/aom_sub_pixel_variance64x128 avx2 neon sse2 ssse3/;
- specialize qw/aom_sub_pixel_variance64x64 avx2 neon sse2 ssse3/;
- specialize qw/aom_sub_pixel_variance64x32 avx2 neon sse2 ssse3/;
- specialize qw/aom_sub_pixel_variance32x64 avx2 neon sse2 ssse3/;
- specialize qw/aom_sub_pixel_variance32x32 avx2 neon sse2 ssse3/;
- specialize qw/aom_sub_pixel_variance32x16 avx2 neon sse2 ssse3/;
- specialize qw/aom_sub_pixel_variance16x32 avx2 neon sse2 ssse3/;
- specialize qw/aom_sub_pixel_variance16x16 avx2 neon sse2 ssse3/;
- specialize qw/aom_sub_pixel_variance16x8 avx2 neon sse2 ssse3/;
- specialize qw/aom_sub_pixel_variance8x16 neon sse2 ssse3/;
- specialize qw/aom_sub_pixel_variance8x8 neon sse2 ssse3/;
- specialize qw/aom_sub_pixel_variance8x4 neon sse2 ssse3/;
- specialize qw/aom_sub_pixel_variance4x8 neon sse2 ssse3/;
- specialize qw/aom_sub_pixel_variance4x4 neon sse2 ssse3/;
-
- specialize qw/aom_sub_pixel_avg_variance128x128 avx2 neon sse2 ssse3/;
- specialize qw/aom_sub_pixel_avg_variance128x64 avx2 neon sse2 ssse3/;
- specialize qw/aom_sub_pixel_avg_variance64x128 avx2 neon sse2 ssse3/;
- specialize qw/aom_sub_pixel_avg_variance64x64 avx2 neon sse2 ssse3/;
- specialize qw/aom_sub_pixel_avg_variance64x32 avx2 neon sse2 ssse3/;
- specialize qw/aom_sub_pixel_avg_variance32x64 avx2 neon sse2 ssse3/;
- specialize qw/aom_sub_pixel_avg_variance32x32 avx2 neon sse2 ssse3/;
- specialize qw/aom_sub_pixel_avg_variance32x16 avx2 neon sse2 ssse3/;
- specialize qw/aom_sub_pixel_avg_variance16x32 neon sse2 ssse3/;
- specialize qw/aom_sub_pixel_avg_variance16x16 neon sse2 ssse3/;
- specialize qw/aom_sub_pixel_avg_variance16x8 neon sse2 ssse3/;
- specialize qw/aom_sub_pixel_avg_variance8x16 neon sse2 ssse3/;
- specialize qw/aom_sub_pixel_avg_variance8x8 neon sse2 ssse3/;
- specialize qw/aom_sub_pixel_avg_variance8x4 neon sse2 ssse3/;
- specialize qw/aom_sub_pixel_avg_variance4x8 neon sse2 ssse3/;
- specialize qw/aom_sub_pixel_avg_variance4x4 neon sse2 ssse3/;
+ specialize qw/aom_sub_pixel_variance128x128 avx2 neon ssse3/;
+ specialize qw/aom_sub_pixel_variance128x64 avx2 neon ssse3/;
+ specialize qw/aom_sub_pixel_variance64x128 avx2 neon ssse3/;
+ specialize qw/aom_sub_pixel_variance64x64 avx2 neon ssse3/;
+ specialize qw/aom_sub_pixel_variance64x32 avx2 neon ssse3/;
+ specialize qw/aom_sub_pixel_variance32x64 avx2 neon ssse3/;
+ specialize qw/aom_sub_pixel_variance32x32 avx2 neon ssse3/;
+ specialize qw/aom_sub_pixel_variance32x16 avx2 neon ssse3/;
+ specialize qw/aom_sub_pixel_variance16x32 avx2 neon ssse3/;
+ specialize qw/aom_sub_pixel_variance16x16 avx2 neon ssse3/;
+ specialize qw/aom_sub_pixel_variance16x8 avx2 neon ssse3/;
+ specialize qw/aom_sub_pixel_variance8x16 neon ssse3/;
+ specialize qw/aom_sub_pixel_variance8x8 neon ssse3/;
+ specialize qw/aom_sub_pixel_variance8x4 neon ssse3/;
+ specialize qw/aom_sub_pixel_variance4x8 neon ssse3/;
+ specialize qw/aom_sub_pixel_variance4x4 neon ssse3/;
+
+ specialize qw/aom_sub_pixel_avg_variance128x128 avx2 neon ssse3/;
+ specialize qw/aom_sub_pixel_avg_variance128x64 avx2 neon ssse3/;
+ specialize qw/aom_sub_pixel_avg_variance64x128 avx2 neon ssse3/;
+ specialize qw/aom_sub_pixel_avg_variance64x64 avx2 neon ssse3/;
+ specialize qw/aom_sub_pixel_avg_variance64x32 avx2 neon ssse3/;
+ specialize qw/aom_sub_pixel_avg_variance32x64 avx2 neon ssse3/;
+ specialize qw/aom_sub_pixel_avg_variance32x32 avx2 neon ssse3/;
+ specialize qw/aom_sub_pixel_avg_variance32x16 avx2 neon ssse3/;
+ specialize qw/aom_sub_pixel_avg_variance16x32 neon ssse3/;
+ specialize qw/aom_sub_pixel_avg_variance16x16 neon ssse3/;
+ specialize qw/aom_sub_pixel_avg_variance16x8 neon ssse3/;
+ specialize qw/aom_sub_pixel_avg_variance8x16 neon ssse3/;
+ specialize qw/aom_sub_pixel_avg_variance8x8 neon ssse3/;
+ specialize qw/aom_sub_pixel_avg_variance8x4 neon ssse3/;
+ specialize qw/aom_sub_pixel_avg_variance4x8 neon ssse3/;
+ specialize qw/aom_sub_pixel_avg_variance4x4 neon ssse3/;
if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
specialize qw/aom_variance4x16 neon neon_dotprod sse2/;
@@ -1448,18 +1451,18 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
specialize qw/aom_variance16x64 neon neon_dotprod sse2 avx2/;
specialize qw/aom_variance64x16 neon neon_dotprod sse2 avx2/;
- specialize qw/aom_sub_pixel_variance4x16 neon sse2 ssse3/;
- specialize qw/aom_sub_pixel_variance16x4 neon avx2 sse2 ssse3/;
- specialize qw/aom_sub_pixel_variance8x32 neon sse2 ssse3/;
- specialize qw/aom_sub_pixel_variance32x8 neon sse2 ssse3/;
- specialize qw/aom_sub_pixel_variance16x64 neon avx2 sse2 ssse3/;
- specialize qw/aom_sub_pixel_variance64x16 neon sse2 ssse3/;
- specialize qw/aom_sub_pixel_avg_variance4x16 neon sse2 ssse3/;
- specialize qw/aom_sub_pixel_avg_variance16x4 neon sse2 ssse3/;
- specialize qw/aom_sub_pixel_avg_variance8x32 neon sse2 ssse3/;
- specialize qw/aom_sub_pixel_avg_variance32x8 neon sse2 ssse3/;
- specialize qw/aom_sub_pixel_avg_variance16x64 neon sse2 ssse3/;
- specialize qw/aom_sub_pixel_avg_variance64x16 neon sse2 ssse3/;
+ specialize qw/aom_sub_pixel_variance4x16 neon ssse3/;
+ specialize qw/aom_sub_pixel_variance16x4 neon avx2 ssse3/;
+ specialize qw/aom_sub_pixel_variance8x32 neon ssse3/;
+ specialize qw/aom_sub_pixel_variance32x8 neon ssse3/;
+ specialize qw/aom_sub_pixel_variance16x64 neon avx2 ssse3/;
+ specialize qw/aom_sub_pixel_variance64x16 neon ssse3/;
+ specialize qw/aom_sub_pixel_avg_variance4x16 neon ssse3/;
+ specialize qw/aom_sub_pixel_avg_variance16x4 neon ssse3/;
+ specialize qw/aom_sub_pixel_avg_variance8x32 neon ssse3/;
+ specialize qw/aom_sub_pixel_avg_variance32x8 neon ssse3/;
+ specialize qw/aom_sub_pixel_avg_variance16x64 neon ssse3/;
+ specialize qw/aom_sub_pixel_avg_variance64x16 neon ssse3/;
specialize qw/aom_dist_wtd_sub_pixel_avg_variance4x16 neon ssse3/;
specialize qw/aom_dist_wtd_sub_pixel_avg_variance16x4 neon ssse3/;
@@ -1789,11 +1792,14 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
# Flow estimation library
if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
- add_proto qw/double av1_compute_cross_correlation/, "const unsigned char *frame1, int stride1, int x1, int y1, const unsigned char *frame2, int stride2, int x2, int y2";
- specialize qw/av1_compute_cross_correlation sse4_1 avx2/;
+ add_proto qw/bool aom_compute_mean_stddev/, "const unsigned char *frame, int stride, int x, int y, double *mean, double *one_over_stddev";
+ specialize qw/aom_compute_mean_stddev sse4_1 avx2/;
+
+ add_proto qw/double aom_compute_correlation/, "const unsigned char *frame1, int stride1, int x1, int y1, double mean1, double one_over_stddev1, const unsigned char *frame2, int stride2, int x2, int y2, double mean2, double one_over_stddev2";
+ specialize qw/aom_compute_correlation sse4_1 avx2/;
add_proto qw/void aom_compute_flow_at_point/, "const uint8_t *src, const uint8_t *ref, int x, int y, int width, int height, int stride, double *u, double *v";
- specialize qw/aom_compute_flow_at_point sse4_1 neon/;
+ specialize qw/aom_compute_flow_at_point sse4_1 avx2 neon/;
}
} # CONFIG_AV1_ENCODER