#[cfg(target_arch = "arm")] use crate::core_arch::arm::*; #[cfg(target_arch = "aarch64")] use crate::core_arch::aarch64::*; use crate::core_arch::simd::*; use std::{i16, i32, i8, mem::transmute, u16, u32, u8, vec::Vec}; macro_rules! V_u8 { () => { vec![0x00u8, 0x01u8, 0x02u8, 0x0Fu8, 0x80u8, 0xF0u8, 0xFFu8] }; } macro_rules! V_u16 { () => { vec![ 0x0000u16, 0x0101u16, 0x0202u16, 0x0F0Fu16, 0x8000u16, 0xF0F0u16, 0xFFFFu16, ] }; } macro_rules! V_u32 { () => { vec![ 0x00000000u32, 0x01010101u32, 0x02020202u32, 0x0F0F0F0Fu32, 0x80000000u32, 0xF0F0F0F0u32, 0xFFFFFFFFu32, ] }; } macro_rules! V_u64 { () => { vec![ 0x0000000000000000u64, 0x0101010101010101u64, 0x0202020202020202u64, 0x0F0F0F0F0F0F0F0Fu64, 0x8080808080808080u64, 0xF0F0F0F0F0F0F0F0u64, 0xFFFFFFFFFFFFFFFFu64, ] }; } macro_rules! V_i8 { () => { vec![ 0x00i8, 0x01i8, 0x02i8, 0x0Fi8, -128i8, /* 0x80 */ -16i8, /* 0xF0 */ -1i8, /* 0xFF */ ] }; } macro_rules! V_i16 { () => { vec![ 0x0000i16, 0x0101i16, 0x0202i16, 0x0F0Fi16, -32768i16, /* 0x8000 */ -3856i16, /* 0xF0F0 */ -1i16, /* 0xFFF */ ] }; } macro_rules! V_i32 { () => { vec![ 0x00000000i32, 0x01010101i32, 0x02020202i32, 0x0F0F0F0Fi32, -2139062144i32, /* 0x80000000 */ -252645136i32, /* 0xF0F0F0F0 */ -1i32, /* 0xFFFFFFFF */ ] }; } macro_rules! V_i64 { () => { vec![ 0x0000000000000000i64, 0x0101010101010101i64, 0x0202020202020202i64, 0x0F0F0F0F0F0F0F0Fi64, -9223372036854775808i64, /* 0x8000000000000000 */ -1152921504606846976i64, /* 0xF000000000000000 */ -1i64, /* 0xFFFFFFFFFFFFFFFF */ ] }; } macro_rules! V_f32 { () => { vec![ 0.0f32, 1.0f32, -1.0f32, 1.2f32, 2.4f32, std::f32::MAX, std::f32::MIN, std::f32::INFINITY, std::f32::NEG_INFINITY, std::f32::NAN, ] }; } macro_rules! to64 { ($t : ident) => { |v: $t| -> u64 { transmute(v) } }; } macro_rules! to128 { ($t : ident) => { |v: $t| -> u128 { transmute(v) } }; } pub(crate) fn test( vals: Vec, fill1: fn(T) -> V, fill2: fn(U) -> W, cast: fn(W) -> X, test_fun: fn(V, V) -> W, verify_fun: fn(T, T) -> U, ) where T: Copy + core::fmt::Debug + std::cmp::PartialEq, U: Copy + core::fmt::Debug + std::cmp::PartialEq, V: Copy + core::fmt::Debug, W: Copy + core::fmt::Debug, X: Copy + core::fmt::Debug + std::cmp::PartialEq, { let pairs = vals.iter().zip(vals.iter()); for (i, j) in pairs { let a: V = fill1(*i); let b: V = fill1(*j); let actual_pre: W = test_fun(a, b); let expected_pre: W = fill2(verify_fun(*i, *j)); let actual: X = cast(actual_pre); let expected: X = cast(expected_pre); assert_eq!( actual, expected, "[{:?}:{:?}] :\nf({:?}, {:?}) = {:?}\ng({:?}, {:?}) = {:?}\n", *i, *j, &a, &b, actual_pre, &a, &b, expected_pre ); } } macro_rules! gen_test_fn { ($n: ident, $t: ident, $u: ident, $v: ident, $w: ident, $x: ident, $vals: expr, $fill1: expr, $fill2: expr, $cast: expr) => { pub(crate) fn $n(test_fun: fn($v, $v) -> $w, verify_fun: fn($t, $t) -> $u) { unsafe { test::<$t, $u, $v, $w, $x>($vals, $fill1, $fill2, $cast, test_fun, verify_fun) }; } }; } macro_rules! gen_fill_fn { ($id: ident, $el_width: expr, $num_els: expr, $in_t : ident, $out_t: ident, $cmp_t: ident) => { pub(crate) fn $id(val: $in_t) -> $out_t { let initial: [$in_t; $num_els] = [val; $num_els]; let result: $cmp_t = unsafe { transmute(initial) }; let result_out: $out_t = unsafe { transmute(result) }; // println!("FILL: {:016x} as {} x {}: {:016x}", val.reverse_bits(), $el_width, $num_els, (result as u64).reverse_bits()); result_out } }; } gen_fill_fn!(fill_u8, 8, 8, u8, uint8x8_t, u64); gen_fill_fn!(fill_s8, 8, 8, i8, int8x8_t, u64); gen_fill_fn!(fillq_u8, 8, 16, u8, uint8x16_t, u128); gen_fill_fn!(fillq_s8, 8, 16, i8, int8x16_t, u128); gen_fill_fn!(fill_u16, 16, 4, u16, uint16x4_t, u64); gen_fill_fn!(fill_s16, 16, 4, i16, int16x4_t, u64); gen_fill_fn!(fillq_u16, 16, 8, u16, uint16x8_t, u128); gen_fill_fn!(fillq_s16, 16, 8, i16, int16x8_t, u128); gen_fill_fn!(fill_u32, 32, 2, u32, uint32x2_t, u64); gen_fill_fn!(fill_s32, 32, 2, i32, int32x2_t, u64); gen_fill_fn!(fillq_u32, 32, 4, u32, uint32x4_t, u128); gen_fill_fn!(fillq_s32, 32, 4, i32, int32x4_t, u128); gen_fill_fn!(fill_u64, 64, 1, u64, uint64x1_t, u64); gen_fill_fn!(fill_s64, 64, 1, i64, int64x1_t, u64); gen_fill_fn!(fillq_u64, 64, 2, u64, uint64x2_t, u128); gen_fill_fn!(fillq_s64, 64, 2, i64, int64x2_t, u128); gen_fill_fn!(fill_f32, 32, 2, f32, float32x2_t, u64); gen_fill_fn!(fillq_f32, 32, 4, f32, float32x4_t, u128); gen_test_fn!( test_ari_u8, u8, u8, uint8x8_t, uint8x8_t, u64, V_u8!(), fill_u8, fill_u8, to64!(uint8x8_t) ); gen_test_fn!( test_bit_u8, u8, u8, uint8x8_t, uint8x8_t, u64, V_u8!(), fill_u8, fill_u8, to64!(uint8x8_t) ); gen_test_fn!( test_cmp_u8, u8, u8, uint8x8_t, uint8x8_t, u64, V_u8!(), fill_u8, fill_u8, to64!(uint8x8_t) ); gen_test_fn!( testq_ari_u8, u8, u8, uint8x16_t, uint8x16_t, u128, V_u8!(), fillq_u8, fillq_u8, to128!(uint8x16_t) ); gen_test_fn!( testq_bit_u8, u8, u8, uint8x16_t, uint8x16_t, u128, V_u8!(), fillq_u8, fillq_u8, to128!(uint8x16_t) ); gen_test_fn!( testq_cmp_u8, u8, u8, uint8x16_t, uint8x16_t, u128, V_u8!(), fillq_u8, fillq_u8, to128!(uint8x16_t) ); gen_test_fn!( test_ari_s8, i8, i8, int8x8_t, int8x8_t, u64, V_i8!(), fill_s8, fill_s8, to64!(int8x8_t) ); gen_test_fn!( test_bit_s8, i8, i8, int8x8_t, int8x8_t, u64, V_i8!(), fill_s8, fill_s8, to64!(int8x8_t) ); gen_test_fn!( test_cmp_s8, i8, u8, int8x8_t, uint8x8_t, u64, V_i8!(), fill_s8, fill_u8, to64!(uint8x8_t) ); gen_test_fn!( testq_ari_s8, i8, i8, int8x16_t, int8x16_t, u128, V_i8!(), fillq_s8, fillq_s8, to128!(int8x16_t) ); gen_test_fn!( testq_bit_s8, i8, i8, int8x16_t, int8x16_t, u128, V_i8!(), fillq_s8, fillq_s8, to128!(int8x16_t) ); gen_test_fn!( testq_cmp_s8, i8, u8, int8x16_t, uint8x16_t, u128, V_i8!(), fillq_s8, fillq_u8, to128!(uint8x16_t) ); gen_test_fn!( test_ari_u16, u16, u16, uint16x4_t, uint16x4_t, u64, V_u16!(), fill_u16, fill_u16, to64!(uint16x4_t) ); gen_test_fn!( test_bit_u16, u16, u16, uint16x4_t, uint16x4_t, u64, V_u16!(), fill_u16, fill_u16, to64!(uint16x4_t) ); gen_test_fn!( test_cmp_u16, u16, u16, uint16x4_t, uint16x4_t, u64, V_u16!(), fill_u16, fill_u16, to64!(uint16x4_t) ); gen_test_fn!( testq_ari_u16, u16, u16, uint16x8_t, uint16x8_t, u128, V_u16!(), fillq_u16, fillq_u16, to128!(uint16x8_t) ); gen_test_fn!( testq_bit_u16, u16, u16, uint16x8_t, uint16x8_t, u128, V_u16!(), fillq_u16, fillq_u16, to128!(uint16x8_t) ); gen_test_fn!( testq_cmp_u16, u16, u16, uint16x8_t, uint16x8_t, u128, V_u16!(), fillq_u16, fillq_u16, to128!(uint16x8_t) ); gen_test_fn!( test_ari_s16, i16, i16, int16x4_t, int16x4_t, u64, V_i16!(), fill_s16, fill_s16, to64!(int16x4_t) ); gen_test_fn!( test_bit_s16, i16, i16, int16x4_t, int16x4_t, u64, V_i16!(), fill_s16, fill_s16, to64!(int16x4_t) ); gen_test_fn!( test_cmp_s16, i16, u16, int16x4_t, uint16x4_t, u64, V_i16!(), fill_s16, fill_u16, to64!(uint16x4_t) ); gen_test_fn!( testq_ari_s16, i16, i16, int16x8_t, int16x8_t, u128, V_i16!(), fillq_s16, fillq_s16, to128!(int16x8_t) ); gen_test_fn!( testq_bit_s16, i16, i16, int16x8_t, int16x8_t, u128, V_i16!(), fillq_s16, fillq_s16, to128!(int16x8_t) ); gen_test_fn!( testq_cmp_s16, i16, u16, int16x8_t, uint16x8_t, u128, V_i16!(), fillq_s16, fillq_u16, to128!(uint16x8_t) ); gen_test_fn!( test_ari_u32, u32, u32, uint32x2_t, uint32x2_t, u64, V_u32!(), fill_u32, fill_u32, to64!(uint32x2_t) ); gen_test_fn!( test_bit_u32, u32, u32, uint32x2_t, uint32x2_t, u64, V_u32!(), fill_u32, fill_u32, to64!(uint32x2_t) ); gen_test_fn!( test_cmp_u32, u32, u32, uint32x2_t, uint32x2_t, u64, V_u32!(), fill_u32, fill_u32, to64!(uint32x2_t) ); gen_test_fn!( testq_ari_u32, u32, u32, uint32x4_t, uint32x4_t, u128, V_u32!(), fillq_u32, fillq_u32, to128!(uint32x4_t) ); gen_test_fn!( testq_bit_u32, u32, u32, uint32x4_t, uint32x4_t, u128, V_u32!(), fillq_u32, fillq_u32, to128!(uint32x4_t) ); gen_test_fn!( testq_cmp_u32, u32, u32, uint32x4_t, uint32x4_t, u128, V_u32!(), fillq_u32, fillq_u32, to128!(uint32x4_t) ); gen_test_fn!( test_ari_s32, i32, i32, int32x2_t, int32x2_t, u64, V_i32!(), fill_s32, fill_s32, to64!(int32x2_t) ); gen_test_fn!( test_bit_s32, i32, i32, int32x2_t, int32x2_t, u64, V_i32!(), fill_s32, fill_s32, to64!(int32x2_t) ); gen_test_fn!( test_cmp_s32, i32, u32, int32x2_t, uint32x2_t, u64, V_i32!(), fill_s32, fill_u32, to64!(uint32x2_t) ); gen_test_fn!( testq_ari_s32, i32, i32, int32x4_t, int32x4_t, u128, V_i32!(), fillq_s32, fillq_s32, to128!(int32x4_t) ); gen_test_fn!( testq_bit_s32, i32, i32, int32x4_t, int32x4_t, u128, V_i32!(), fillq_s32, fillq_s32, to128!(int32x4_t) ); gen_test_fn!( testq_cmp_s32, i32, u32, int32x4_t, uint32x4_t, u128, V_i32!(), fillq_s32, fillq_u32, to128!(uint32x4_t) ); gen_test_fn!( test_ari_u64, u64, u64, uint64x1_t, uint64x1_t, u64, V_u64!(), fill_u64, fill_u64, to64!(uint64x1_t) ); gen_test_fn!( test_bit_u64, u64, u64, uint64x1_t, uint64x1_t, u64, V_u64!(), fill_u64, fill_u64, to64!(uint64x1_t) ); gen_test_fn!( test_cmp_u64, u64, u64, uint64x1_t, uint64x1_t, u64, V_u64!(), fill_u64, fill_u64, to64!(uint64x1_t) ); gen_test_fn!( testq_ari_u64, u64, u64, uint64x2_t, uint64x2_t, u128, V_u64!(), fillq_u64, fillq_u64, to128!(uint64x2_t) ); gen_test_fn!( testq_bit_u64, u64, u64, uint64x2_t, uint64x2_t, u128, V_u64!(), fillq_u64, fillq_u64, to128!(uint64x2_t) ); gen_test_fn!( testq_cmp_u64, u64, u64, uint64x2_t, uint64x2_t, u128, V_u64!(), fillq_u64, fillq_u64, to128!(uint64x2_t) ); gen_test_fn!( test_ari_s64, i64, i64, int64x1_t, int64x1_t, u64, V_i64!(), fill_s64, fill_s64, to64!(int64x1_t) ); gen_test_fn!( test_bit_s64, i64, i64, int64x1_t, int64x1_t, u64, V_i64!(), fill_s64, fill_s64, to64!(int64x1_t) ); gen_test_fn!( test_cmp_s64, i64, u64, int64x1_t, uint64x1_t, u64, V_i64!(), fill_s64, fill_u64, to64!(uint64x1_t) ); gen_test_fn!( testq_ari_s64, i64, i64, int64x2_t, int64x2_t, u128, V_i64!(), fillq_s64, fillq_s64, to128!(int64x2_t) ); gen_test_fn!( testq_bit_s64, i64, i64, int64x2_t, int64x2_t, u128, V_i64!(), fillq_s64, fillq_s64, to128!(int64x2_t) ); gen_test_fn!( testq_cmp_s64, i64, u64, int64x2_t, uint64x2_t, u128, V_i64!(), fillq_s64, fillq_u64, to128!(uint64x2_t) ); gen_test_fn!( test_ari_f32, f32, f32, float32x2_t, float32x2_t, u64, V_f32!(), fill_f32, fill_f32, to64!(float32x2_t) ); gen_test_fn!( test_cmp_f32, f32, u32, float32x2_t, uint32x2_t, u64, V_f32!(), fill_f32, fill_u32, to64!(uint32x2_t) ); gen_test_fn!( testq_ari_f32, f32, f32, float32x4_t, float32x4_t, u128, V_f32!(), fillq_f32, fillq_f32, to128!(float32x4_t) ); gen_test_fn!( testq_cmp_f32, f32, u32, float32x4_t, uint32x4_t, u128, V_f32!(), fillq_f32, fillq_u32, to128!(uint32x4_t) );