diff options
Diffstat (limited to 'library/stdarch/crates/intrinsic-test')
6 files changed, 184 insertions, 109 deletions
diff --git a/library/stdarch/crates/intrinsic-test/missing_aarch64.txt b/library/stdarch/crates/intrinsic-test/missing_aarch64.txt index 56ec274b5..93fc126e5 100644 --- a/library/stdarch/crates/intrinsic-test/missing_aarch64.txt +++ b/library/stdarch/crates/intrinsic-test/missing_aarch64.txt @@ -67,20 +67,6 @@ vrnd64xq_f64 vrnd64z_f64 vrnd64zq_f64 -# Takes too long to compile tests -vcopyq_laneq_u8 -vcopyq_laneq_s8 -vcopyq_laneq_p8 -vcopyq_lane_u8 -vcopyq_lane_s8 -vcopyq_lane_p8 -vcopy_laneq_u8 -vcopy_laneq_s8 -vcopy_laneq_p8 -vcopy_lane_u8 -vcopy_lane_s8 -vcopy_lane_p8 - # QEMU 6.0 doesn't support these instructions vmmlaq_s32 vmmlaq_u32 diff --git a/library/stdarch/crates/intrinsic-test/src/argument.rs b/library/stdarch/crates/intrinsic-test/src/argument.rs index f4cb77992..798854c03 100644 --- a/library/stdarch/crates/intrinsic-test/src/argument.rs +++ b/library/stdarch/crates/intrinsic-test/src/argument.rs @@ -1,6 +1,6 @@ use std::ops::Range; -use crate::types::IntrinsicType; +use crate::types::{IntrinsicType, TypeKind}; use crate::Language; /// An argument for the intrinsic. @@ -90,49 +90,108 @@ impl ArgumentList { .join(", ") } - /// Creates a line that initializes this argument for C code. - /// e.g. `int32x2_t a = { 0x1, 0x2 };` - pub fn init_random_values_c(&self, pass: usize) -> String { + /// Creates a line for each argument that initializes an array for C from which `loads` argument + /// values can be loaded as a sliding window. + /// e.g `const int32x2_t a_vals = {0x3effffff, 0x3effffff, 0x3f7fffff}`, if loads=2. + pub fn gen_arglists_c(&self, loads: u32) -> String { self.iter() .filter_map(|arg| { (!arg.has_constraint()).then(|| { format!( - "{ty} {name} = {{ {values} }};", - ty = arg.to_c_type(), + "const {ty} {name}_vals[] = {{ {values} }};", + ty = arg.ty.c_scalar_type(), name = arg.name, - values = arg.ty.populate_random(pass, &Language::C) + values = arg.ty.populate_random(loads, &Language::C) ) }) }) .collect::<Vec<_>>() - .join("\n ") + .join("\n") } - /// Creates a line that initializes this argument for Rust code. - /// e.g. `let a = transmute([0x1, 0x2]);` - pub fn init_random_values_rust(&self, pass: usize) -> String { + /// Creates a line for each argument that initializes an array for Rust from which `loads` argument + /// values can be loaded as a sliding window, e.g `const A_VALS: [u32; 20] = [...];` + pub fn gen_arglists_rust(&self, loads: u32) -> String { self.iter() .filter_map(|arg| { (!arg.has_constraint()).then(|| { - if arg.is_simd() { - format!( - "let {name} = ::std::mem::transmute([{values}]);", - name = arg.name, - values = arg.ty.populate_random(pass, &Language::Rust), - ) - } else { - format!( - "let {name} = {value};", - name = arg.name, - value = arg.ty.populate_random(pass, &Language::Rust) - ) - } + format!( + "const {upper_name}_VALS: [{ty}; {load_size}] = unsafe{{ [{values}] }};", + upper_name = arg.name.to_uppercase(), + ty = arg.ty.rust_scalar_type(), + load_size = arg.ty.num_lanes() * arg.ty.num_vectors() + loads - 1, + values = arg.ty.populate_random(loads, &Language::Rust) + ) + }) + }) + .collect::<Vec<_>>() + .join("\n") + } + + /// Creates a line for each argument that initalizes the argument from an array [arg]_vals at + /// an offset i using a load intrinsic, in C. + /// e.g `uint8x8_t a = vld1_u8(&a_vals[i]);` + pub fn load_values_c(&self, p64_armv7_workaround: bool) -> String { + self.iter() + .filter_map(|arg| { + // The ACLE doesn't support 64-bit polynomial loads on Armv7 + // This and the cast are a workaround for this + let armv7_p64 = if let TypeKind::Poly = arg.ty.kind() { + p64_armv7_workaround + } else { + false + }; + + (!arg.has_constraint()).then(|| { + format!( + "{ty} {name} = {open_cast}{load}(&{name}_vals[i]){close_cast};", + ty = arg.to_c_type(), + name = arg.name, + load = if arg.is_simd() { + arg.ty.get_load_function(p64_armv7_workaround) + } else { + "*".to_string() + }, + open_cast = if armv7_p64 { + format!("cast<{}>(", arg.to_c_type()) + } else { + "".to_string() + }, + close_cast = if armv7_p64 { + ")".to_string() + } else { + "".to_string() + } + ) }) }) .collect::<Vec<_>>() .join("\n ") } + /// Creates a line for each argument that initalizes the argument from array [ARG]_VALS at + /// an offset i using a load intrinsic, in Rust. + /// e.g `let a = vld1_u8(A_VALS.as_ptr().offset(i));` + pub fn load_values_rust(&self) -> String { + self.iter() + .filter_map(|arg| { + (!arg.has_constraint()).then(|| { + format!( + "let {name} = {load}({upper_name}_VALS.as_ptr().offset(i));", + name = arg.name, + upper_name = arg.name.to_uppercase(), + load = if arg.is_simd() { + arg.ty.get_load_function(false) + } else { + "*".to_string() + }, + ) + }) + }) + .collect::<Vec<_>>() + .join("\n ") + } + pub fn iter(&self) -> std::slice::Iter<'_, Argument> { self.args.iter() } diff --git a/library/stdarch/crates/intrinsic-test/src/intrinsic.rs b/library/stdarch/crates/intrinsic-test/src/intrinsic.rs index 2b7130440..e0645a36b 100644 --- a/library/stdarch/crates/intrinsic-test/src/intrinsic.rs +++ b/library/stdarch/crates/intrinsic-test/src/intrinsic.rs @@ -20,8 +20,9 @@ pub struct Intrinsic { impl Intrinsic { /// Generates a std::cout for the intrinsics results that will match the - /// rust debug output format for the return type. - pub fn print_result_c(&self, index: usize, additional: &str) -> String { + /// rust debug output format for the return type. The generated line assumes + /// there is an int i in scope which is the current pass number. + pub fn print_result_c(&self, additional: &str) -> String { let lanes = if self.results.num_vectors() > 1 { (0..self.results.num_vectors()) .map(|vector| { @@ -72,7 +73,7 @@ impl Intrinsic { }; format!( - r#"std::cout << "Result {additional}-{idx}: {ty}" << std::fixed << std::setprecision(150) << {lanes} << "{close}" << std::endl;"#, + r#"std::cout << "Result {additional}-" << i+1 << ": {ty}" << std::fixed << std::setprecision(150) << {lanes} << "{close}" << std::endl;"#, ty = if self.results.is_simd() { format!("{}(", self.results.c_type()) } else { @@ -81,11 +82,31 @@ impl Intrinsic { close = if self.results.is_simd() { ")" } else { "" }, lanes = lanes, additional = additional, - idx = index, ) } - pub fn generate_pass_rust(&self, index: usize, additional: &str) -> String { + pub fn generate_loop_c( + &self, + additional: &str, + passes: u32, + p64_armv7_workaround: bool, + ) -> String { + format!( + r#" {{ + for (int i=0; i<{passes}; i++) {{ + {loaded_args} + auto __return_value = {intrinsic_call}({args}); + {print_result} + }} + }}"#, + loaded_args = self.arguments.load_values_c(p64_armv7_workaround), + intrinsic_call = self.name, + args = self.arguments.as_call_param_c(), + print_result = self.print_result_c(additional) + ) + } + + pub fn generate_loop_rust(&self, additional: &str, passes: u32) -> String { let constraints = self.arguments.as_constraint_parameters_rust(); let constraints = if !constraints.is_empty() { format!("::<{}>", constraints) @@ -94,32 +115,20 @@ impl Intrinsic { }; format!( - r#" - unsafe {{ - {initialized_args} - let res = {intrinsic_call}{const}({args}); - println!("Result {additional}-{idx}: {{:.150?}}", res); - }}"#, - initialized_args = self.arguments.init_random_values_rust(index), - intrinsic_call = self.name, - args = self.arguments.as_call_param_rust(), - additional = additional, - idx = index, - const = constraints, - ) - } - - pub fn generate_pass_c(&self, index: usize, additional: &str) -> String { - format!( r#" {{ - {initialized_args} - auto __return_value = {intrinsic_call}({args}); - {print_result} + for i in 0..{passes} {{ + unsafe {{ + {loaded_args} + let __return_value = {intrinsic_call}{const}({args}); + println!("Result {additional}-{{}}: {{:.150?}}", i+1, __return_value); + }} + }} }}"#, - initialized_args = self.arguments.init_random_values_c(index), + loaded_args = self.arguments.load_values_rust(), intrinsic_call = self.name, - args = self.arguments.as_call_param_c(), - print_result = self.print_result_c(index, additional) + const = constraints, + args = self.arguments.as_call_param_rust(), + additional = additional, ) } } diff --git a/library/stdarch/crates/intrinsic-test/src/main.rs b/library/stdarch/crates/intrinsic-test/src/main.rs index 1b58da2fd..43f2df08b 100644 --- a/library/stdarch/crates/intrinsic-test/src/main.rs +++ b/library/stdarch/crates/intrinsic-test/src/main.rs @@ -23,13 +23,21 @@ mod intrinsic; mod types; mod values; +// The number of times each intrinsic will be called. +const PASSES: u32 = 20; + #[derive(Debug, PartialEq)] pub enum Language { Rust, C, } -fn gen_code_c(intrinsic: &Intrinsic, constraints: &[&Argument], name: String) -> String { +fn gen_code_c( + intrinsic: &Intrinsic, + constraints: &[&Argument], + name: String, + p64_armv7_workaround: bool, +) -> String { if let Some((current, constraints)) = constraints.split_last() { let range = current .constraints @@ -47,19 +55,25 @@ fn gen_code_c(intrinsic: &Intrinsic, constraints: &[&Argument], name: String) -> name = current.name, ty = current.ty.c_type(), val = i, - pass = gen_code_c(intrinsic, constraints, format!("{}-{}", name, i)) + pass = gen_code_c( + intrinsic, + constraints, + format!("{}-{}", name, i), + p64_armv7_workaround + ) ) }) .collect() } else { - (1..20) - .map(|idx| intrinsic.generate_pass_c(idx, &name)) - .collect::<Vec<_>>() - .join("\n") + intrinsic.generate_loop_c(&name, PASSES, p64_armv7_workaround) } } -fn generate_c_program(header_files: &[&str], intrinsic: &Intrinsic) -> String { +fn generate_c_program( + header_files: &[&str], + intrinsic: &Intrinsic, + p64_armv7_workaround: bool, +) -> String { let constraints = intrinsic .arguments .iter() @@ -75,7 +89,7 @@ fn generate_c_program(header_files: &[&str], intrinsic: &Intrinsic) -> String { template<typename T1, typename T2> T1 cast(T2 x) {{ static_assert(sizeof(T1) == sizeof(T2), "sizeof T1 and T2 must be the same"); - T1 ret = 0; + T1 ret{{}}; memcpy(&ret, &x, sizeof(T1)); return ret; }} @@ -95,6 +109,8 @@ std::ostream& operator<<(std::ostream& os, poly128_t value) {{ }} #endif +{arglists} + int main(int argc, char **argv) {{ {passes} return 0; @@ -104,7 +120,13 @@ int main(int argc, char **argv) {{ .map(|header| format!("#include <{}>", header)) .collect::<Vec<_>>() .join("\n"), - passes = gen_code_c(intrinsic, constraints.as_slice(), Default::default()), + arglists = intrinsic.arguments.gen_arglists_c(PASSES), + passes = gen_code_c( + intrinsic, + constraints.as_slice(), + Default::default(), + p64_armv7_workaround + ), ) } @@ -131,10 +153,7 @@ fn gen_code_rust(intrinsic: &Intrinsic, constraints: &[&Argument], name: String) }) .collect() } else { - (1..20) - .map(|idx| intrinsic.generate_pass_rust(idx, &name)) - .collect::<Vec<_>>() - .join("\n") + intrinsic.generate_loop_rust(&name, PASSES) } } @@ -153,11 +172,14 @@ fn generate_rust_program(intrinsic: &Intrinsic, a32: bool) -> String { #![allow(non_upper_case_globals)] use core_arch::arch::{target_arch}::*; +{arglists} + fn main() {{ {passes} }} "#, target_arch = if a32 { "arm" } else { "aarch64" }, + arglists = intrinsic.arguments.gen_arglists_rust(PASSES), passes = gen_code_rust(intrinsic, &constraints, Default::default()) ) } @@ -203,7 +225,7 @@ fn build_c(intrinsics: &Vec<Intrinsic>, compiler: &str, a32: bool) -> bool { let c_filename = format!(r#"c_programs/{}.cpp"#, i.name); let mut file = File::create(&c_filename).unwrap(); - let c_code = generate_c_program(&["arm_neon.h", "arm_acle.h"], &i); + let c_code = generate_c_program(&["arm_neon.h", "arm_acle.h"], &i, a32); file.write_all(c_code.into_bytes().as_slice()).unwrap(); compile_c(&c_filename, &i, compiler, a32) }) @@ -259,7 +281,7 @@ path = "{intrinsic}/main.rs""#, .current_dir("rust_programs") .arg("-c") .arg(format!( - "cargo {toolchain} build --target {target}", + "cargo {toolchain} build --target {target} --release", toolchain = toolchain, target = if a32 { "armv7-unknown-linux-gnueabihf" @@ -407,7 +429,7 @@ fn compare_outputs(intrinsics: &Vec<Intrinsic>, toolchain: &str, runner: &str, a .current_dir("rust_programs") .arg("-c") .arg(format!( - "cargo {toolchain} run --target {target} --bin {intrinsic}", + "cargo {toolchain} run --target {target} --bin {intrinsic} --release", intrinsic = intrinsic.name, toolchain = toolchain, target = if a32 { diff --git a/library/stdarch/crates/intrinsic-test/src/types.rs b/library/stdarch/crates/intrinsic-test/src/types.rs index e51e61649..dd23586e7 100644 --- a/library/stdarch/crates/intrinsic-test/src/types.rs +++ b/library/stdarch/crates/intrinsic-test/src/types.rs @@ -1,7 +1,7 @@ use std::fmt; use std::str::FromStr; -use crate::values::values_for_pass; +use crate::values::value_for_array; use crate::Language; #[derive(Debug, PartialEq, Copy, Clone)] @@ -160,8 +160,7 @@ impl IntrinsicType { } } - #[allow(unused)] - fn c_scalar_type(&self) -> String { + pub fn c_scalar_type(&self) -> String { format!( "{prefix}{bits}_t", prefix = self.kind().c_prefix(), @@ -169,7 +168,7 @@ impl IntrinsicType { ) } - fn rust_scalar_type(&self) -> String { + pub fn rust_scalar_type(&self) -> String { format!( "{prefix}{bits}", prefix = self.kind().rust_prefix(), @@ -289,18 +288,19 @@ impl IntrinsicType { } } - /// Generates a comma list of values that can be used to initialize an - /// argument for the intrinsic call. + /// Generates a comma list of values that can be used to initialize the array that + /// an argument for the intrinsic call is loaded from. /// This is determistic based on the pass number. /// - /// * `pass`: The pass index, i.e. the iteration index for the call to an intrinsic + /// * `loads`: The number of values that need to be loaded from the argument array + /// * e.g for argument type uint32x2, loads=2 results in a string representing 4 32-bit values /// /// Returns a string such as /// * `0x1, 0x7F, 0xFF` if `language` is `Language::C` /// * `0x1 as _, 0x7F as _, 0xFF as _` if `language` is `Language::Rust` - pub fn populate_random(&self, pass: usize, language: &Language) -> String { + pub fn populate_random(&self, loads: u32, language: &Language) -> String { match self { - IntrinsicType::Ptr { child, .. } => child.populate_random(pass, language), + IntrinsicType::Ptr { child, .. } => child.populate_random(loads, language), IntrinsicType::Type { bit_len: Some(bit_len), kind, @@ -308,11 +308,11 @@ impl IntrinsicType { vec_len, .. } if kind == &TypeKind::Int || kind == &TypeKind::UInt || kind == &TypeKind::Poly => (0 - ..(simd_len.unwrap_or(1) * vec_len.unwrap_or(1))) + ..(simd_len.unwrap_or(1) * vec_len.unwrap_or(1) + loads - 1)) .map(|i| { format!( "{}{}", - values_for_pass(*bit_len, i, pass), + value_for_array(*bit_len, i), match language { &Language::Rust => format!(" as {ty} ", ty = self.rust_scalar_type()), &Language::C => String::from(""), @@ -327,15 +327,15 @@ impl IntrinsicType { simd_len, vec_len, .. - } => (0..(simd_len.unwrap_or(1) * vec_len.unwrap_or(1))) + } => (0..(simd_len.unwrap_or(1) * vec_len.unwrap_or(1) + loads - 1)) .map(|i| { format!( "{}({})", match language { - &Language::Rust => "f32::from_bits", + &Language::Rust => "std::mem::transmute", &Language::C => "cast<float, uint32_t>", }, - values_for_pass(32, i, pass), + value_for_array(32, i), ) }) .collect::<Vec<_>>() @@ -346,15 +346,15 @@ impl IntrinsicType { simd_len, vec_len, .. - } => (0..(simd_len.unwrap_or(1) * vec_len.unwrap_or(1))) + } => (0..(simd_len.unwrap_or(1) * vec_len.unwrap_or(1) + loads - 1)) .map(|i| { format!( "{}({}{})", match language { - &Language::Rust => "f64::from_bits", + &Language::Rust => "std::mem::transmute", &Language::C => "cast<double, uint64_t>", }, - values_for_pass(64, i, pass), + value_for_array(64, i), match language { &Language::Rust => " as u64", &Language::C => "", @@ -368,10 +368,9 @@ impl IntrinsicType { } /// Determines the load function for this type. - #[allow(unused)] - pub fn get_load_function(&self) -> String { + pub fn get_load_function(&self, armv7_p64_workaround: bool) -> String { match self { - IntrinsicType::Ptr { child, .. } => child.get_load_function(), + IntrinsicType::Ptr { child, .. } => child.get_load_function(armv7_p64_workaround), IntrinsicType::Type { kind: k, bit_len: Some(bl), @@ -379,7 +378,7 @@ impl IntrinsicType { vec_len, .. } => { - let quad = if (simd_len.unwrap_or(1) * bl) > 64 { + let quad = if simd_len.unwrap_or(1) * bl > 64 { "q" } else { "" @@ -390,7 +389,8 @@ impl IntrinsicType { TypeKind::UInt => "u", TypeKind::Int => "s", TypeKind::Float => "f", - TypeKind::Poly => "p", + // The ACLE doesn't support 64-bit polynomial loads on Armv7 + TypeKind::Poly => if armv7_p64_workaround && *bl == 64 {"s"} else {"p"}, x => todo!("get_load_function TypeKind: {:#?}", x), }, size = bl, diff --git a/library/stdarch/crates/intrinsic-test/src/values.rs b/library/stdarch/crates/intrinsic-test/src/values.rs index 4565edca0..64b4d9fc9 100644 --- a/library/stdarch/crates/intrinsic-test/src/values.rs +++ b/library/stdarch/crates/intrinsic-test/src/values.rs @@ -1,9 +1,8 @@ -/// Gets a hex constant value for a single lane in in a determistic way +/// Gets a hex constant value for a single value in the argument values array in a determistic way /// * `bits`: The number of bits for the type, only 8, 16, 32, 64 are valid values -/// * `simd`: The index of the simd lane we are generating for -/// * `pass`: The index of the pass we are generating the values for -pub fn values_for_pass(bits: u32, simd: u32, pass: usize) -> String { - let index = pass + (simd as usize); +/// * `index`: The position in the array we are generating for +pub fn value_for_array(bits: u32, index: u32) -> String { + let index = index as usize; if bits == 8 { format!("{:#X}", VALUES_8[index % VALUES_8.len()]) |