1 files changed, 202 insertions, 0 deletions
diff --git a/library/stdarch/crates/stdarch-test/src/lib.rs b/library/stdarch/crates/stdarch-test/src/lib.rs
new file mode 100644
index 000000000..078736c66
--- /dev/null
+++ b/library/stdarch/crates/stdarch-test/src/lib.rs
@@ -0,0 +1,202 @@
+//! Runtime support needed for testing the stdarch crate.
+//!
+//! This basically just disassembles the current executable and then parses the
+//! output once globally and then provides the `assert` function which makes
+//! assertions about the disassembly of a function.
+#![feature(bench_black_box)] // For black_box
+#![deny(rust_2018_idioms)]
+#![allow(clippy::missing_docs_in_private_items, clippy::print_stdout)]
+
+#[macro_use]
+extern crate lazy_static;
+#[macro_use]
+extern crate cfg_if;
+
+pub use assert_instr_macro::*;
+pub use simd_test_macro::*;
+use std::{cmp, collections::HashSet, env, hash, hint::black_box, str};
+
+cfg_if! {
+    if #[cfg(target_arch = "wasm32")] {
+        pub mod wasm;
+        use wasm::disassemble_myself;
+    } else {
+        mod disassembly;
+        use crate::disassembly::disassemble_myself;
+    }
+}
+
+lazy_static! {
+    static ref DISASSEMBLY: HashSet<Function> = disassemble_myself();
+}
+
+#[derive(Debug)]
+struct Function {
+    name: String,
+    instrs: Vec<String>,
+}
+impl Function {
+    fn new(n: &str) -> Self {
+        Self {
+            name: n.to_string(),
+            instrs: Vec::new(),
+        }
+    }
+}
+
+impl cmp::PartialEq for Function {
+    fn eq(&self, other: &Self) -> bool {
+        self.name == other.name
+    }
+}
+impl cmp::Eq for Function {}
+
+impl hash::Hash for Function {
+    fn hash<H: hash::Hasher>(&self, state: &mut H) {
+        self.name.hash(state)
+    }
+}
+
+/// Main entry point for this crate, called by the `#[assert_instr]` macro.
+///
+/// This asserts that the function at `fnptr` contains the instruction
+/// `expected` provided.
+pub fn assert(shim_addr: usize, fnname: &str, expected: &str) {
+    // Make sure that the shim is not removed
+    black_box(shim_addr);
+
+    //eprintln!("shim name: {}", fnname);
+    let function = &DISASSEMBLY
+        .get(&Function::new(fnname))
+        .unwrap_or_else(|| panic!("function \"{}\" not found in the disassembly", fnname));
+    //eprintln!("  function: {:?}", function);
+
+    let mut instrs = &function.instrs[..];
+    while instrs.last().map_or(false, |s| s == "nop") {
+        instrs = &instrs[..instrs.len() - 1];
+    }
+
+    // Look for `expected` as the first part of any instruction in this
+    // function, e.g., tzcntl in tzcntl %rax,%rax.
+    //
+    // There are two cases when the expected instruction is nop:
+    // 1. The expected intrinsic is compiled away so we can't
+    // check for it - aka the intrinsic is not generating any code.
+    // 2. It is a mark, indicating that the instruction will be
+    // compiled into other instructions - mainly because of llvm
+    // optimization.
+    let found = expected == "nop" || instrs.iter().any(|s| s.starts_with(expected));
+
+    // Look for subroutine call instructions in the disassembly to detect whether
+    // inlining failed: all intrinsics are `#[inline(always)]`, so calling one
+    // intrinsic from another should not generate subroutine call instructions.
+    let inlining_failed = if cfg!(target_arch = "x86_64") || cfg!(target_arch = "wasm32") {
+        instrs.iter().any(|s| s.starts_with("call "))
+    } else if cfg!(target_arch = "x86") {
+        instrs.windows(2).any(|s| {
+            // On 32-bit x86 position independent code will call itself and be
+            // immediately followed by a `pop` to learn about the current address.
+            // Let's not take that into account when considering whether a function
+            // failed inlining something.
+            s[0].starts_with("call ") && s[1].starts_with("pop") // FIXME: original logic but does not match comment
+        })
+    } else if cfg!(target_arch = "aarch64") {
+        instrs.iter().any(|s| s.starts_with("bl "))
+    } else {
+        // FIXME: Add detection for other archs
+        false
+    };
+
+    let instruction_limit = std::env::var("STDARCH_ASSERT_INSTR_LIMIT")
+        .ok()
+        .map_or_else(
+            || match expected {
+                // `cpuid` returns a pretty big aggregate structure, so exempt
+                // it from the slightly more restrictive 22 instructions below.
+                "cpuid" => 30,
+
+                // Apparently, on Windows, LLVM generates a bunch of
+                // saves/restores of xmm registers around these intstructions,
+                // which exceeds the limit of 20 below. As it seems dictated by
+                // Windows's ABI (I believe?), we probably can't do much
+                // about it.
+                "vzeroall" | "vzeroupper" if cfg!(windows) => 30,
+
+                // Intrinsics using `cvtpi2ps` are typically "composites" and
+                // in some cases exceed the limit.
+                "cvtpi2ps" => 25,
+                // core_arch/src/arm_shared/simd32
+                // vfmaq_n_f32_vfma : #instructions = 26 >= 22 (limit)
+                "usad8" | "vfma" | "vfms" => 27,
+                "qadd8" | "qsub8" | "sadd8" | "sel" | "shadd8" | "shsub8" | "usub8" | "ssub8" => 29,
+                // core_arch/src/arm_shared/simd32
+                // vst1q_s64_x4_vst1 : #instructions = 22 >= 22 (limit)
+                "vld3" => 23,
+                // core_arch/src/arm_shared/simd32
+                // vld4q_lane_u32_vld4 : #instructions = 31 >= 22 (limit)
+                "vld4" => 32,
+                // core_arch/src/arm_shared/simd32
+                // vst1q_s64_x4_vst1 : #instructions = 40 >= 22 (limit)
+                "vst1" => 41,
+                // core_arch/src/arm_shared/simd32
+                // vst4q_u32_vst4 : #instructions = 26 >= 22 (limit)
+                "vst4" => 27,
+
+                // Temporary, currently the fptosi.sat and fptoui.sat LLVM
+                // intrinsics emit unnecessary code on arm. This can be
+                // removed once it has been addressed in LLVM.
+                "fcvtzu" | "fcvtzs" | "vcvt" => 64,
+
+                // core_arch/src/arm_shared/simd32
+                // vst1q_p64_x4_nop : #instructions = 33 >= 22 (limit)
+                "nop" if fnname.contains("vst1q_p64") => 34,
+
+                // Original limit was 20 instructions, but ARM DSP Intrinsics
+                // are exactly 20 instructions long. So, bump the limit to 22
+                // instead of adding here a long list of exceptions.
+                _ => 22,
+            },
+            |v| v.parse().unwrap(),
+        );
+    let probably_only_one_instruction = instrs.len() < instruction_limit;
+
+    if found && probably_only_one_instruction && !inlining_failed {
+        return;
+    }
+
+    // Help debug by printing out the found disassembly, and then panic as we
+    // didn't find the instruction.
+    println!("disassembly for {}: ", fnname,);
+    for (i, instr) in instrs.iter().enumerate() {
+        println!("\t{:2}: {}", i, instr);
+    }
+
+    if !found {
+        panic!(
+            "failed to find instruction `{}` in the disassembly",
+            expected
+        );
+    } else if !probably_only_one_instruction {
+        panic!(
+            "instruction found, but the disassembly contains too many \
+             instructions: #instructions = {} >= {} (limit)",
+            instrs.len(),
+            instruction_limit
+        );
+    } else if inlining_failed {
+        panic!(
+            "instruction found, but the disassembly contains subroutine \
+             call instructions, which hint that inlining failed"
+        );
+    }
+}
+
+pub fn assert_skip_test_ok(name: &str) {
+    if env::var("STDARCH_TEST_EVERYTHING").is_err() {
+        return;
+    }
+    panic!("skipped test `{}` when it shouldn't be skipped", name);
+}
+
+// See comment in `assert-instr-macro` crate for why this exists
+pub static mut _DONT_DEDUP: *const u8 = std::ptr::null();