summaryrefslogtreecommitdiffstats
path: root/library/stdarch/crates/intrinsic-test
diff options
context:
space:
mode:
Diffstat (limited to 'library/stdarch/crates/intrinsic-test')
-rw-r--r--library/stdarch/crates/intrinsic-test/missing_aarch64.txt14
-rw-r--r--library/stdarch/crates/intrinsic-test/src/argument.rs107
-rw-r--r--library/stdarch/crates/intrinsic-test/src/intrinsic.rs65
-rw-r--r--library/stdarch/crates/intrinsic-test/src/main.rs54
-rw-r--r--library/stdarch/crates/intrinsic-test/src/types.rs44
-rw-r--r--library/stdarch/crates/intrinsic-test/src/values.rs9
6 files changed, 184 insertions, 109 deletions
diff --git a/library/stdarch/crates/intrinsic-test/missing_aarch64.txt b/library/stdarch/crates/intrinsic-test/missing_aarch64.txt
index 56ec274b5..93fc126e5 100644
--- a/library/stdarch/crates/intrinsic-test/missing_aarch64.txt
+++ b/library/stdarch/crates/intrinsic-test/missing_aarch64.txt
@@ -67,20 +67,6 @@ vrnd64xq_f64
vrnd64z_f64
vrnd64zq_f64
-# Takes too long to compile tests
-vcopyq_laneq_u8
-vcopyq_laneq_s8
-vcopyq_laneq_p8
-vcopyq_lane_u8
-vcopyq_lane_s8
-vcopyq_lane_p8
-vcopy_laneq_u8
-vcopy_laneq_s8
-vcopy_laneq_p8
-vcopy_lane_u8
-vcopy_lane_s8
-vcopy_lane_p8
-
# QEMU 6.0 doesn't support these instructions
vmmlaq_s32
vmmlaq_u32
diff --git a/library/stdarch/crates/intrinsic-test/src/argument.rs b/library/stdarch/crates/intrinsic-test/src/argument.rs
index f4cb77992..798854c03 100644
--- a/library/stdarch/crates/intrinsic-test/src/argument.rs
+++ b/library/stdarch/crates/intrinsic-test/src/argument.rs
@@ -1,6 +1,6 @@
use std::ops::Range;
-use crate::types::IntrinsicType;
+use crate::types::{IntrinsicType, TypeKind};
use crate::Language;
/// An argument for the intrinsic.
@@ -90,49 +90,108 @@ impl ArgumentList {
.join(", ")
}
- /// Creates a line that initializes this argument for C code.
- /// e.g. `int32x2_t a = { 0x1, 0x2 };`
- pub fn init_random_values_c(&self, pass: usize) -> String {
+ /// Creates a line for each argument that initializes an array for C from which `loads` argument
+ /// values can be loaded as a sliding window.
+ /// e.g `const int32x2_t a_vals = {0x3effffff, 0x3effffff, 0x3f7fffff}`, if loads=2.
+ pub fn gen_arglists_c(&self, loads: u32) -> String {
self.iter()
.filter_map(|arg| {
(!arg.has_constraint()).then(|| {
format!(
- "{ty} {name} = {{ {values} }};",
- ty = arg.to_c_type(),
+ "const {ty} {name}_vals[] = {{ {values} }};",
+ ty = arg.ty.c_scalar_type(),
name = arg.name,
- values = arg.ty.populate_random(pass, &Language::C)
+ values = arg.ty.populate_random(loads, &Language::C)
)
})
})
.collect::<Vec<_>>()
- .join("\n ")
+ .join("\n")
}
- /// Creates a line that initializes this argument for Rust code.
- /// e.g. `let a = transmute([0x1, 0x2]);`
- pub fn init_random_values_rust(&self, pass: usize) -> String {
+ /// Creates a line for each argument that initializes an array for Rust from which `loads` argument
+ /// values can be loaded as a sliding window, e.g `const A_VALS: [u32; 20] = [...];`
+ pub fn gen_arglists_rust(&self, loads: u32) -> String {
self.iter()
.filter_map(|arg| {
(!arg.has_constraint()).then(|| {
- if arg.is_simd() {
- format!(
- "let {name} = ::std::mem::transmute([{values}]);",
- name = arg.name,
- values = arg.ty.populate_random(pass, &Language::Rust),
- )
- } else {
- format!(
- "let {name} = {value};",
- name = arg.name,
- value = arg.ty.populate_random(pass, &Language::Rust)
- )
- }
+ format!(
+ "const {upper_name}_VALS: [{ty}; {load_size}] = unsafe{{ [{values}] }};",
+ upper_name = arg.name.to_uppercase(),
+ ty = arg.ty.rust_scalar_type(),
+ load_size = arg.ty.num_lanes() * arg.ty.num_vectors() + loads - 1,
+ values = arg.ty.populate_random(loads, &Language::Rust)
+ )
+ })
+ })
+ .collect::<Vec<_>>()
+ .join("\n")
+ }
+
+ /// Creates a line for each argument that initalizes the argument from an array [arg]_vals at
+ /// an offset i using a load intrinsic, in C.
+ /// e.g `uint8x8_t a = vld1_u8(&a_vals[i]);`
+ pub fn load_values_c(&self, p64_armv7_workaround: bool) -> String {
+ self.iter()
+ .filter_map(|arg| {
+ // The ACLE doesn't support 64-bit polynomial loads on Armv7
+ // This and the cast are a workaround for this
+ let armv7_p64 = if let TypeKind::Poly = arg.ty.kind() {
+ p64_armv7_workaround
+ } else {
+ false
+ };
+
+ (!arg.has_constraint()).then(|| {
+ format!(
+ "{ty} {name} = {open_cast}{load}(&{name}_vals[i]){close_cast};",
+ ty = arg.to_c_type(),
+ name = arg.name,
+ load = if arg.is_simd() {
+ arg.ty.get_load_function(p64_armv7_workaround)
+ } else {
+ "*".to_string()
+ },
+ open_cast = if armv7_p64 {
+ format!("cast<{}>(", arg.to_c_type())
+ } else {
+ "".to_string()
+ },
+ close_cast = if armv7_p64 {
+ ")".to_string()
+ } else {
+ "".to_string()
+ }
+ )
})
})
.collect::<Vec<_>>()
.join("\n ")
}
+ /// Creates a line for each argument that initalizes the argument from array [ARG]_VALS at
+ /// an offset i using a load intrinsic, in Rust.
+ /// e.g `let a = vld1_u8(A_VALS.as_ptr().offset(i));`
+ pub fn load_values_rust(&self) -> String {
+ self.iter()
+ .filter_map(|arg| {
+ (!arg.has_constraint()).then(|| {
+ format!(
+ "let {name} = {load}({upper_name}_VALS.as_ptr().offset(i));",
+ name = arg.name,
+ upper_name = arg.name.to_uppercase(),
+ load = if arg.is_simd() {
+ arg.ty.get_load_function(false)
+ } else {
+ "*".to_string()
+ },
+ )
+ })
+ })
+ .collect::<Vec<_>>()
+ .join("\n ")
+ }
+
pub fn iter(&self) -> std::slice::Iter<'_, Argument> {
self.args.iter()
}
diff --git a/library/stdarch/crates/intrinsic-test/src/intrinsic.rs b/library/stdarch/crates/intrinsic-test/src/intrinsic.rs
index 2b7130440..e0645a36b 100644
--- a/library/stdarch/crates/intrinsic-test/src/intrinsic.rs
+++ b/library/stdarch/crates/intrinsic-test/src/intrinsic.rs
@@ -20,8 +20,9 @@ pub struct Intrinsic {
impl Intrinsic {
/// Generates a std::cout for the intrinsics results that will match the
- /// rust debug output format for the return type.
- pub fn print_result_c(&self, index: usize, additional: &str) -> String {
+ /// rust debug output format for the return type. The generated line assumes
+ /// there is an int i in scope which is the current pass number.
+ pub fn print_result_c(&self, additional: &str) -> String {
let lanes = if self.results.num_vectors() > 1 {
(0..self.results.num_vectors())
.map(|vector| {
@@ -72,7 +73,7 @@ impl Intrinsic {
};
format!(
- r#"std::cout << "Result {additional}-{idx}: {ty}" << std::fixed << std::setprecision(150) << {lanes} << "{close}" << std::endl;"#,
+ r#"std::cout << "Result {additional}-" << i+1 << ": {ty}" << std::fixed << std::setprecision(150) << {lanes} << "{close}" << std::endl;"#,
ty = if self.results.is_simd() {
format!("{}(", self.results.c_type())
} else {
@@ -81,11 +82,31 @@ impl Intrinsic {
close = if self.results.is_simd() { ")" } else { "" },
lanes = lanes,
additional = additional,
- idx = index,
)
}
- pub fn generate_pass_rust(&self, index: usize, additional: &str) -> String {
+ pub fn generate_loop_c(
+ &self,
+ additional: &str,
+ passes: u32,
+ p64_armv7_workaround: bool,
+ ) -> String {
+ format!(
+ r#" {{
+ for (int i=0; i<{passes}; i++) {{
+ {loaded_args}
+ auto __return_value = {intrinsic_call}({args});
+ {print_result}
+ }}
+ }}"#,
+ loaded_args = self.arguments.load_values_c(p64_armv7_workaround),
+ intrinsic_call = self.name,
+ args = self.arguments.as_call_param_c(),
+ print_result = self.print_result_c(additional)
+ )
+ }
+
+ pub fn generate_loop_rust(&self, additional: &str, passes: u32) -> String {
let constraints = self.arguments.as_constraint_parameters_rust();
let constraints = if !constraints.is_empty() {
format!("::<{}>", constraints)
@@ -94,32 +115,20 @@ impl Intrinsic {
};
format!(
- r#"
- unsafe {{
- {initialized_args}
- let res = {intrinsic_call}{const}({args});
- println!("Result {additional}-{idx}: {{:.150?}}", res);
- }}"#,
- initialized_args = self.arguments.init_random_values_rust(index),
- intrinsic_call = self.name,
- args = self.arguments.as_call_param_rust(),
- additional = additional,
- idx = index,
- const = constraints,
- )
- }
-
- pub fn generate_pass_c(&self, index: usize, additional: &str) -> String {
- format!(
r#" {{
- {initialized_args}
- auto __return_value = {intrinsic_call}({args});
- {print_result}
+ for i in 0..{passes} {{
+ unsafe {{
+ {loaded_args}
+ let __return_value = {intrinsic_call}{const}({args});
+ println!("Result {additional}-{{}}: {{:.150?}}", i+1, __return_value);
+ }}
+ }}
}}"#,
- initialized_args = self.arguments.init_random_values_c(index),
+ loaded_args = self.arguments.load_values_rust(),
intrinsic_call = self.name,
- args = self.arguments.as_call_param_c(),
- print_result = self.print_result_c(index, additional)
+ const = constraints,
+ args = self.arguments.as_call_param_rust(),
+ additional = additional,
)
}
}
diff --git a/library/stdarch/crates/intrinsic-test/src/main.rs b/library/stdarch/crates/intrinsic-test/src/main.rs
index 1b58da2fd..43f2df08b 100644
--- a/library/stdarch/crates/intrinsic-test/src/main.rs
+++ b/library/stdarch/crates/intrinsic-test/src/main.rs
@@ -23,13 +23,21 @@ mod intrinsic;
mod types;
mod values;
+// The number of times each intrinsic will be called.
+const PASSES: u32 = 20;
+
#[derive(Debug, PartialEq)]
pub enum Language {
Rust,
C,
}
-fn gen_code_c(intrinsic: &Intrinsic, constraints: &[&Argument], name: String) -> String {
+fn gen_code_c(
+ intrinsic: &Intrinsic,
+ constraints: &[&Argument],
+ name: String,
+ p64_armv7_workaround: bool,
+) -> String {
if let Some((current, constraints)) = constraints.split_last() {
let range = current
.constraints
@@ -47,19 +55,25 @@ fn gen_code_c(intrinsic: &Intrinsic, constraints: &[&Argument], name: String) ->
name = current.name,
ty = current.ty.c_type(),
val = i,
- pass = gen_code_c(intrinsic, constraints, format!("{}-{}", name, i))
+ pass = gen_code_c(
+ intrinsic,
+ constraints,
+ format!("{}-{}", name, i),
+ p64_armv7_workaround
+ )
)
})
.collect()
} else {
- (1..20)
- .map(|idx| intrinsic.generate_pass_c(idx, &name))
- .collect::<Vec<_>>()
- .join("\n")
+ intrinsic.generate_loop_c(&name, PASSES, p64_armv7_workaround)
}
}
-fn generate_c_program(header_files: &[&str], intrinsic: &Intrinsic) -> String {
+fn generate_c_program(
+ header_files: &[&str],
+ intrinsic: &Intrinsic,
+ p64_armv7_workaround: bool,
+) -> String {
let constraints = intrinsic
.arguments
.iter()
@@ -75,7 +89,7 @@ fn generate_c_program(header_files: &[&str], intrinsic: &Intrinsic) -> String {
template<typename T1, typename T2> T1 cast(T2 x) {{
static_assert(sizeof(T1) == sizeof(T2), "sizeof T1 and T2 must be the same");
- T1 ret = 0;
+ T1 ret{{}};
memcpy(&ret, &x, sizeof(T1));
return ret;
}}
@@ -95,6 +109,8 @@ std::ostream& operator<<(std::ostream& os, poly128_t value) {{
}}
#endif
+{arglists}
+
int main(int argc, char **argv) {{
{passes}
return 0;
@@ -104,7 +120,13 @@ int main(int argc, char **argv) {{
.map(|header| format!("#include <{}>", header))
.collect::<Vec<_>>()
.join("\n"),
- passes = gen_code_c(intrinsic, constraints.as_slice(), Default::default()),
+ arglists = intrinsic.arguments.gen_arglists_c(PASSES),
+ passes = gen_code_c(
+ intrinsic,
+ constraints.as_slice(),
+ Default::default(),
+ p64_armv7_workaround
+ ),
)
}
@@ -131,10 +153,7 @@ fn gen_code_rust(intrinsic: &Intrinsic, constraints: &[&Argument], name: String)
})
.collect()
} else {
- (1..20)
- .map(|idx| intrinsic.generate_pass_rust(idx, &name))
- .collect::<Vec<_>>()
- .join("\n")
+ intrinsic.generate_loop_rust(&name, PASSES)
}
}
@@ -153,11 +172,14 @@ fn generate_rust_program(intrinsic: &Intrinsic, a32: bool) -> String {
#![allow(non_upper_case_globals)]
use core_arch::arch::{target_arch}::*;
+{arglists}
+
fn main() {{
{passes}
}}
"#,
target_arch = if a32 { "arm" } else { "aarch64" },
+ arglists = intrinsic.arguments.gen_arglists_rust(PASSES),
passes = gen_code_rust(intrinsic, &constraints, Default::default())
)
}
@@ -203,7 +225,7 @@ fn build_c(intrinsics: &Vec<Intrinsic>, compiler: &str, a32: bool) -> bool {
let c_filename = format!(r#"c_programs/{}.cpp"#, i.name);
let mut file = File::create(&c_filename).unwrap();
- let c_code = generate_c_program(&["arm_neon.h", "arm_acle.h"], &i);
+ let c_code = generate_c_program(&["arm_neon.h", "arm_acle.h"], &i, a32);
file.write_all(c_code.into_bytes().as_slice()).unwrap();
compile_c(&c_filename, &i, compiler, a32)
})
@@ -259,7 +281,7 @@ path = "{intrinsic}/main.rs""#,
.current_dir("rust_programs")
.arg("-c")
.arg(format!(
- "cargo {toolchain} build --target {target}",
+ "cargo {toolchain} build --target {target} --release",
toolchain = toolchain,
target = if a32 {
"armv7-unknown-linux-gnueabihf"
@@ -407,7 +429,7 @@ fn compare_outputs(intrinsics: &Vec<Intrinsic>, toolchain: &str, runner: &str, a
.current_dir("rust_programs")
.arg("-c")
.arg(format!(
- "cargo {toolchain} run --target {target} --bin {intrinsic}",
+ "cargo {toolchain} run --target {target} --bin {intrinsic} --release",
intrinsic = intrinsic.name,
toolchain = toolchain,
target = if a32 {
diff --git a/library/stdarch/crates/intrinsic-test/src/types.rs b/library/stdarch/crates/intrinsic-test/src/types.rs
index e51e61649..dd23586e7 100644
--- a/library/stdarch/crates/intrinsic-test/src/types.rs
+++ b/library/stdarch/crates/intrinsic-test/src/types.rs
@@ -1,7 +1,7 @@
use std::fmt;
use std::str::FromStr;
-use crate::values::values_for_pass;
+use crate::values::value_for_array;
use crate::Language;
#[derive(Debug, PartialEq, Copy, Clone)]
@@ -160,8 +160,7 @@ impl IntrinsicType {
}
}
- #[allow(unused)]
- fn c_scalar_type(&self) -> String {
+ pub fn c_scalar_type(&self) -> String {
format!(
"{prefix}{bits}_t",
prefix = self.kind().c_prefix(),
@@ -169,7 +168,7 @@ impl IntrinsicType {
)
}
- fn rust_scalar_type(&self) -> String {
+ pub fn rust_scalar_type(&self) -> String {
format!(
"{prefix}{bits}",
prefix = self.kind().rust_prefix(),
@@ -289,18 +288,19 @@ impl IntrinsicType {
}
}
- /// Generates a comma list of values that can be used to initialize an
- /// argument for the intrinsic call.
+ /// Generates a comma list of values that can be used to initialize the array that
+ /// an argument for the intrinsic call is loaded from.
/// This is determistic based on the pass number.
///
- /// * `pass`: The pass index, i.e. the iteration index for the call to an intrinsic
+ /// * `loads`: The number of values that need to be loaded from the argument array
+ /// * e.g for argument type uint32x2, loads=2 results in a string representing 4 32-bit values
///
/// Returns a string such as
/// * `0x1, 0x7F, 0xFF` if `language` is `Language::C`
/// * `0x1 as _, 0x7F as _, 0xFF as _` if `language` is `Language::Rust`
- pub fn populate_random(&self, pass: usize, language: &Language) -> String {
+ pub fn populate_random(&self, loads: u32, language: &Language) -> String {
match self {
- IntrinsicType::Ptr { child, .. } => child.populate_random(pass, language),
+ IntrinsicType::Ptr { child, .. } => child.populate_random(loads, language),
IntrinsicType::Type {
bit_len: Some(bit_len),
kind,
@@ -308,11 +308,11 @@ impl IntrinsicType {
vec_len,
..
} if kind == &TypeKind::Int || kind == &TypeKind::UInt || kind == &TypeKind::Poly => (0
- ..(simd_len.unwrap_or(1) * vec_len.unwrap_or(1)))
+ ..(simd_len.unwrap_or(1) * vec_len.unwrap_or(1) + loads - 1))
.map(|i| {
format!(
"{}{}",
- values_for_pass(*bit_len, i, pass),
+ value_for_array(*bit_len, i),
match language {
&Language::Rust => format!(" as {ty} ", ty = self.rust_scalar_type()),
&Language::C => String::from(""),
@@ -327,15 +327,15 @@ impl IntrinsicType {
simd_len,
vec_len,
..
- } => (0..(simd_len.unwrap_or(1) * vec_len.unwrap_or(1)))
+ } => (0..(simd_len.unwrap_or(1) * vec_len.unwrap_or(1) + loads - 1))
.map(|i| {
format!(
"{}({})",
match language {
- &Language::Rust => "f32::from_bits",
+ &Language::Rust => "std::mem::transmute",
&Language::C => "cast<float, uint32_t>",
},
- values_for_pass(32, i, pass),
+ value_for_array(32, i),
)
})
.collect::<Vec<_>>()
@@ -346,15 +346,15 @@ impl IntrinsicType {
simd_len,
vec_len,
..
- } => (0..(simd_len.unwrap_or(1) * vec_len.unwrap_or(1)))
+ } => (0..(simd_len.unwrap_or(1) * vec_len.unwrap_or(1) + loads - 1))
.map(|i| {
format!(
"{}({}{})",
match language {
- &Language::Rust => "f64::from_bits",
+ &Language::Rust => "std::mem::transmute",
&Language::C => "cast<double, uint64_t>",
},
- values_for_pass(64, i, pass),
+ value_for_array(64, i),
match language {
&Language::Rust => " as u64",
&Language::C => "",
@@ -368,10 +368,9 @@ impl IntrinsicType {
}
/// Determines the load function for this type.
- #[allow(unused)]
- pub fn get_load_function(&self) -> String {
+ pub fn get_load_function(&self, armv7_p64_workaround: bool) -> String {
match self {
- IntrinsicType::Ptr { child, .. } => child.get_load_function(),
+ IntrinsicType::Ptr { child, .. } => child.get_load_function(armv7_p64_workaround),
IntrinsicType::Type {
kind: k,
bit_len: Some(bl),
@@ -379,7 +378,7 @@ impl IntrinsicType {
vec_len,
..
} => {
- let quad = if (simd_len.unwrap_or(1) * bl) > 64 {
+ let quad = if simd_len.unwrap_or(1) * bl > 64 {
"q"
} else {
""
@@ -390,7 +389,8 @@ impl IntrinsicType {
TypeKind::UInt => "u",
TypeKind::Int => "s",
TypeKind::Float => "f",
- TypeKind::Poly => "p",
+ // The ACLE doesn't support 64-bit polynomial loads on Armv7
+ TypeKind::Poly => if armv7_p64_workaround && *bl == 64 {"s"} else {"p"},
x => todo!("get_load_function TypeKind: {:#?}", x),
},
size = bl,
diff --git a/library/stdarch/crates/intrinsic-test/src/values.rs b/library/stdarch/crates/intrinsic-test/src/values.rs
index 4565edca0..64b4d9fc9 100644
--- a/library/stdarch/crates/intrinsic-test/src/values.rs
+++ b/library/stdarch/crates/intrinsic-test/src/values.rs
@@ -1,9 +1,8 @@
-/// Gets a hex constant value for a single lane in in a determistic way
+/// Gets a hex constant value for a single value in the argument values array in a determistic way
/// * `bits`: The number of bits for the type, only 8, 16, 32, 64 are valid values
-/// * `simd`: The index of the simd lane we are generating for
-/// * `pass`: The index of the pass we are generating the values for
-pub fn values_for_pass(bits: u32, simd: u32, pass: usize) -> String {
- let index = pass + (simd as usize);
+/// * `index`: The position in the array we are generating for
+pub fn value_for_array(bits: u32, index: u32) -> String {
+ let index = index as usize;
if bits == 8 {
format!("{:#X}", VALUES_8[index % VALUES_8.len()])