//! Runtime support needed for testing the stdarch crate. //! //! This basically just disassembles the current executable and then parses the //! output once globally and then provides the `assert` function which makes //! assertions about the disassembly of a function. #![deny(rust_2018_idioms)] #![allow(clippy::missing_docs_in_private_items, clippy::print_stdout)] #[macro_use] extern crate lazy_static; #[macro_use] extern crate cfg_if; pub use assert_instr_macro::*; pub use simd_test_macro::*; use std::{cmp, collections::HashSet, env, hash, hint::black_box, str}; cfg_if! { if #[cfg(target_arch = "wasm32")] { pub mod wasm; use wasm::disassemble_myself; } else { mod disassembly; use crate::disassembly::disassemble_myself; } } lazy_static! { static ref DISASSEMBLY: HashSet = disassemble_myself(); } #[derive(Debug)] struct Function { name: String, instrs: Vec, } impl Function { fn new(n: &str) -> Self { Self { name: n.to_string(), instrs: Vec::new(), } } } impl cmp::PartialEq for Function { fn eq(&self, other: &Self) -> bool { self.name == other.name } } impl cmp::Eq for Function {} impl hash::Hash for Function { fn hash(&self, state: &mut H) { self.name.hash(state) } } /// Main entry point for this crate, called by the `#[assert_instr]` macro. /// /// This asserts that the function at `fnptr` contains the instruction /// `expected` provided. pub fn assert(shim_addr: usize, fnname: &str, expected: &str) { // Make sure that the shim is not removed black_box(shim_addr); //eprintln!("shim name: {fnname}"); let function = &DISASSEMBLY .get(&Function::new(fnname)) .unwrap_or_else(|| panic!("function \"{fnname}\" not found in the disassembly")); //eprintln!(" function: {:?}", function); let mut instrs = &function.instrs[..]; while instrs.last().map_or(false, |s| s == "nop") { instrs = &instrs[..instrs.len() - 1]; } // Look for `expected` as the first part of any instruction in this // function, e.g., tzcntl in tzcntl %rax,%rax. // // There are two cases when the expected instruction is nop: // 1. The expected intrinsic is compiled away so we can't // check for it - aka the intrinsic is not generating any code. // 2. It is a mark, indicating that the instruction will be // compiled into other instructions - mainly because of llvm // optimization. let found = expected == "nop" || instrs.iter().any(|s| s.starts_with(expected)); // Look for subroutine call instructions in the disassembly to detect whether // inlining failed: all intrinsics are `#[inline(always)]`, so calling one // intrinsic from another should not generate subroutine call instructions. let inlining_failed = if cfg!(target_arch = "x86_64") || cfg!(target_arch = "wasm32") { instrs.iter().any(|s| s.starts_with("call ")) } else if cfg!(target_arch = "x86") { instrs.windows(2).any(|s| { // On 32-bit x86 position independent code will call itself and be // immediately followed by a `pop` to learn about the current address. // Let's not take that into account when considering whether a function // failed inlining something. s[0].starts_with("call ") && s[1].starts_with("pop") // FIXME: original logic but does not match comment }) } else if cfg!(target_arch = "aarch64") { instrs.iter().any(|s| s.starts_with("bl ")) } else { // FIXME: Add detection for other archs false }; let instruction_limit = std::env::var("STDARCH_ASSERT_INSTR_LIMIT") .ok() .map_or_else( || match expected { // `cpuid` returns a pretty big aggregate structure, so exempt // it from the slightly more restrictive 22 instructions below. "cpuid" => 30, // Apparently, on Windows, LLVM generates a bunch of // saves/restores of xmm registers around these intstructions, // which exceeds the limit of 20 below. As it seems dictated by // Windows's ABI (I believe?), we probably can't do much // about it. "vzeroall" | "vzeroupper" if cfg!(windows) => 30, // Intrinsics using `cvtpi2ps` are typically "composites" and // in some cases exceed the limit. "cvtpi2ps" => 25, // core_arch/src/arm_shared/simd32 // vfmaq_n_f32_vfma : #instructions = 26 >= 22 (limit) "usad8" | "vfma" | "vfms" => 27, "qadd8" | "qsub8" | "sadd8" | "sel" | "shadd8" | "shsub8" | "usub8" | "ssub8" => 29, // core_arch/src/arm_shared/simd32 // vst1q_s64_x4_vst1 : #instructions = 22 >= 22 (limit) "vld3" => 23, // core_arch/src/arm_shared/simd32 // vld4q_lane_u32_vld4 : #instructions = 31 >= 22 (limit) "vld4" => 32, // core_arch/src/arm_shared/simd32 // vst1q_s64_x4_vst1 : #instructions = 40 >= 22 (limit) "vst1" => 41, // core_arch/src/arm_shared/simd32 // vst4q_u32_vst4 : #instructions = 26 >= 22 (limit) "vst4" => 27, // Temporary, currently the fptosi.sat and fptoui.sat LLVM // intrinsics emit unnecessary code on arm. This can be // removed once it has been addressed in LLVM. "fcvtzu" | "fcvtzs" | "vcvt" => 64, // core_arch/src/arm_shared/simd32 // vst1q_p64_x4_nop : #instructions = 33 >= 22 (limit) "nop" if fnname.contains("vst1q_p64") => 34, // Original limit was 20 instructions, but ARM DSP Intrinsics // are exactly 20 instructions long. So, bump the limit to 22 // instead of adding here a long list of exceptions. _ => 22, }, |v| v.parse().unwrap(), ); let probably_only_one_instruction = instrs.len() < instruction_limit; if found && probably_only_one_instruction && !inlining_failed { return; } // Help debug by printing out the found disassembly, and then panic as we // didn't find the instruction. println!("disassembly for {fnname}: ",); for (i, instr) in instrs.iter().enumerate() { println!("\t{i:2}: {instr}"); } if !found { panic!( "failed to find instruction `{}` in the disassembly", expected ); } else if !probably_only_one_instruction { panic!( "instruction found, but the disassembly contains too many \ instructions: #instructions = {} >= {} (limit)", instrs.len(), instruction_limit ); } else if inlining_failed { panic!( "instruction found, but the disassembly contains subroutine \ call instructions, which hint that inlining failed" ); } } pub fn assert_skip_test_ok(name: &str) { if env::var("STDARCH_TEST_EVERYTHING").is_err() { return; } panic!("skipped test `{name}` when it shouldn't be skipped"); } // See comment in `assert-instr-macro` crate for why this exists pub static mut _DONT_DEDUP: *const u8 = std::ptr::null();