summaryrefslogtreecommitdiffstats
path: root/third_party/aom/av1/common/av1_rtcd_defs.pl
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--third_party/aom/av1/common/av1_rtcd_defs.pl54
1 files changed, 32 insertions, 22 deletions
diff --git a/third_party/aom/av1/common/av1_rtcd_defs.pl b/third_party/aom/av1/common/av1_rtcd_defs.pl
index ef999fbba2..c0831330d1 100644
--- a/third_party/aom/av1/common/av1_rtcd_defs.pl
+++ b/third_party/aom/av1/common/av1_rtcd_defs.pl
@@ -77,6 +77,16 @@ EOF
}
forward_decls qw/av1_common_forward_decls/;
+# Fallbacks for Valgrind support
+# For normal use, we require SSE4.1. However, 32-bit Valgrind does not support
+# SSE4.1, so we include fallbacks for some critical functions to improve
+# performance
+$sse2_x86 = $ssse3_x86 = '';
+if ($opts{arch} eq "x86") {
+ $sse2_x86 = 'sse2';
+ $ssse3_x86 = 'ssse3';
+}
+
# functions that are 64 bit only.
$mmx_x86_64 = $sse2_x86_64 = $ssse3_x86_64 = $avx_x86_64 = $avx2_x86_64 = '';
if ($opts{arch} eq "x86_64") {
@@ -345,7 +355,7 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
#fwd txfm
add_proto qw/void av1_lowbd_fwd_txfm/, "const int16_t *src_diff, tran_low_t *coeff, int diff_stride, TxfmParam *txfm_param";
- specialize qw/av1_lowbd_fwd_txfm sse2 sse4_1 avx2 neon/;
+ specialize qw/av1_lowbd_fwd_txfm sse4_1 avx2 neon/, $sse2_x86;
add_proto qw/void av1_fwd_txfm2d_4x8/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
specialize qw/av1_fwd_txfm2d_4x8 sse4_1 neon/;
@@ -436,9 +446,9 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
specialize qw/av1_txb_init_levels sse4_1 avx2 neon/;
add_proto qw/uint64_t av1_wedge_sse_from_residuals/, "const int16_t *r1, const int16_t *d, const uint8_t *m, int N";
- specialize qw/av1_wedge_sse_from_residuals sse2 avx2 neon/;
+ specialize qw/av1_wedge_sse_from_residuals sse2 avx2 neon sve/;
add_proto qw/int8_t av1_wedge_sign_from_residuals/, "const int16_t *ds, const uint8_t *m, int N, int64_t limit";
- specialize qw/av1_wedge_sign_from_residuals sse2 avx2 neon/;
+ specialize qw/av1_wedge_sign_from_residuals sse2 avx2 neon sve/;
add_proto qw/void av1_wedge_compute_delta_squares/, "int16_t *d, const int16_t *a, const int16_t *b, int N";
specialize qw/av1_wedge_compute_delta_squares sse2 avx2 neon/;
@@ -521,21 +531,21 @@ add_proto qw/void cdef_copy_rect8_16bit_to_16bit/, "uint16_t *dst, int dstride,
# structs as arguments, which makes the v256 type of the intrinsics
# hard to support, so optimizations for this target are disabled.
if ($opts{config} !~ /libs-x86-win32-vs.*/) {
- specialize qw/cdef_find_dir sse2 ssse3 sse4_1 avx2 neon/;
- specialize qw/cdef_find_dir_dual sse2 ssse3 sse4_1 avx2 neon/;
+ specialize qw/cdef_find_dir sse4_1 avx2 neon/, "$ssse3_x86";
+ specialize qw/cdef_find_dir_dual sse4_1 avx2 neon/, "$ssse3_x86";
- specialize qw/cdef_filter_8_0 sse2 ssse3 sse4_1 avx2 neon/;
- specialize qw/cdef_filter_8_1 sse2 ssse3 sse4_1 avx2 neon/;
- specialize qw/cdef_filter_8_2 sse2 ssse3 sse4_1 avx2 neon/;
- specialize qw/cdef_filter_8_3 sse2 ssse3 sse4_1 avx2 neon/;
+ specialize qw/cdef_filter_8_0 sse4_1 avx2 neon/, "$ssse3_x86";
+ specialize qw/cdef_filter_8_1 sse4_1 avx2 neon/, "$ssse3_x86";
+ specialize qw/cdef_filter_8_2 sse4_1 avx2 neon/, "$ssse3_x86";
+ specialize qw/cdef_filter_8_3 sse4_1 avx2 neon/, "$ssse3_x86";
- specialize qw/cdef_filter_16_0 sse2 ssse3 sse4_1 avx2 neon/;
- specialize qw/cdef_filter_16_1 sse2 ssse3 sse4_1 avx2 neon/;
- specialize qw/cdef_filter_16_2 sse2 ssse3 sse4_1 avx2 neon/;
- specialize qw/cdef_filter_16_3 sse2 ssse3 sse4_1 avx2 neon/;
+ specialize qw/cdef_filter_16_0 sse4_1 avx2 neon/, "$ssse3_x86";
+ specialize qw/cdef_filter_16_1 sse4_1 avx2 neon/, "$ssse3_x86";
+ specialize qw/cdef_filter_16_2 sse4_1 avx2 neon/, "$ssse3_x86";
+ specialize qw/cdef_filter_16_3 sse4_1 avx2 neon/, "$ssse3_x86";
- specialize qw/cdef_copy_rect8_8bit_to_16bit sse2 ssse3 sse4_1 avx2 neon/;
- specialize qw/cdef_copy_rect8_16bit_to_16bit sse2 ssse3 sse4_1 avx2 neon/;
+ specialize qw/cdef_copy_rect8_8bit_to_16bit sse4_1 avx2 neon/, "$ssse3_x86";
+ specialize qw/cdef_copy_rect8_16bit_to_16bit sse4_1 avx2 neon/, "$ssse3_x86";
}
# WARPED_MOTION / GLOBAL_MOTION functions
@@ -591,20 +601,20 @@ if(aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
specialize qw/av1_convolve_y_sr sse2 avx2 neon/;
specialize qw/av1_convolve_y_sr_intrabc neon/;
specialize qw/av1_convolve_2d_scale sse4_1/;
- specialize qw/av1_dist_wtd_convolve_2d sse2 ssse3 avx2 neon neon_dotprod neon_i8mm/;
+ specialize qw/av1_dist_wtd_convolve_2d ssse3 avx2 neon neon_dotprod neon_i8mm/;
specialize qw/av1_dist_wtd_convolve_2d_copy sse2 avx2 neon/;
specialize qw/av1_dist_wtd_convolve_x sse2 avx2 neon neon_dotprod neon_i8mm/;
specialize qw/av1_dist_wtd_convolve_y sse2 avx2 neon/;
if(aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
- specialize qw/av1_highbd_dist_wtd_convolve_2d sse4_1 avx2 neon/;
- specialize qw/av1_highbd_dist_wtd_convolve_x sse4_1 avx2 neon/;
- specialize qw/av1_highbd_dist_wtd_convolve_y sse4_1 avx2 neon/;
+ specialize qw/av1_highbd_dist_wtd_convolve_2d sse4_1 avx2 neon sve2/;
+ specialize qw/av1_highbd_dist_wtd_convolve_x sse4_1 avx2 neon sve2/;
+ specialize qw/av1_highbd_dist_wtd_convolve_y sse4_1 avx2 neon sve2/;
specialize qw/av1_highbd_dist_wtd_convolve_2d_copy sse4_1 avx2 neon/;
- specialize qw/av1_highbd_convolve_2d_sr ssse3 avx2 neon/;
+ specialize qw/av1_highbd_convolve_2d_sr ssse3 avx2 neon sve2/;
specialize qw/av1_highbd_convolve_2d_sr_intrabc neon/;
- specialize qw/av1_highbd_convolve_x_sr ssse3 avx2 neon/;
+ specialize qw/av1_highbd_convolve_x_sr ssse3 avx2 neon sve2/;
specialize qw/av1_highbd_convolve_x_sr_intrabc neon/;
- specialize qw/av1_highbd_convolve_y_sr ssse3 avx2 neon/;
+ specialize qw/av1_highbd_convolve_y_sr ssse3 avx2 neon sve2/;
specialize qw/av1_highbd_convolve_y_sr_intrabc neon/;
specialize qw/av1_highbd_convolve_2d_scale sse4_1 neon/;
}