diff options
Diffstat (limited to 'vendor/measureme')
-rw-r--r-- | vendor/measureme/.cargo-checksum.json | 1 | ||||
-rw-r--r-- | vendor/measureme/Cargo.toml | 48 | ||||
-rw-r--r-- | vendor/measureme/src/counters.rs | 999 | ||||
-rw-r--r-- | vendor/measureme/src/event_id.rs | 97 | ||||
-rw-r--r-- | vendor/measureme/src/file_header.rs | 145 | ||||
-rw-r--r-- | vendor/measureme/src/lib.rs | 55 | ||||
-rw-r--r-- | vendor/measureme/src/profiler.rs | 234 | ||||
-rw-r--r-- | vendor/measureme/src/raw_event.rs | 409 | ||||
-rw-r--r-- | vendor/measureme/src/rustc.rs | 15 | ||||
-rw-r--r-- | vendor/measureme/src/serialization.rs | 498 | ||||
-rw-r--r-- | vendor/measureme/src/stringtable.rs | 328 |
11 files changed, 2829 insertions, 0 deletions
diff --git a/vendor/measureme/.cargo-checksum.json b/vendor/measureme/.cargo-checksum.json new file mode 100644 index 000000000..e1ae53dcf --- /dev/null +++ b/vendor/measureme/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{"Cargo.toml":"67d6c7162b1f307c6dc161337f9db6ffac1e1224a7626daca79ffb935c2c59bb","src/counters.rs":"92baa26d73d7af342c5508adeca8c864290c0bb036c86b0de8dc039978b05767","src/event_id.rs":"0895f1ac50d05e6cc0a0cd33b098d421ee8e8e58bbcc1316415382b8aad0fc48","src/file_header.rs":"07c81f3c2a0d08a9ced0631eb04c2adf5f7294c29ad91c8d21fe29f91a4cacc3","src/lib.rs":"426736c590c588c80c06af809b21288b826d7438afd200aa82db13bbaf3e87d1","src/profiler.rs":"ee64a312a96f4560a45a92ab6d50942e041d4ef5cb35487ac17fd9ea3c208028","src/raw_event.rs":"0d5f70312582e5c38e20fbde5a5a11cbbd6be1ad293c907448e0fd996b888fbe","src/rustc.rs":"014d9a1bb61f6ebee39ff0b1d30b3ff14e34a76a71da2a53f5567398994d1eb7","src/serialization.rs":"286a750a7fc95d9611f50c8ee27e2320b96e258a0e99f54000b9c920924d455a","src/stringtable.rs":"2bbf2eb606be3499985c7772622530df8d201a8ad7352e68893b271f672eb8fb"},"package":"cbdc226fa10994e8f66a4d2f6f000148bc563a1c671b6dcd2135737018033d8a"}
\ No newline at end of file diff --git a/vendor/measureme/Cargo.toml b/vendor/measureme/Cargo.toml new file mode 100644 index 000000000..bc193fd1e --- /dev/null +++ b/vendor/measureme/Cargo.toml @@ -0,0 +1,48 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies. +# +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. + +[package] +edition = "2018" +name = "measureme" +version = "10.1.0" +authors = [ + "Wesley Wiser <wwiser@gmail.com>", + "Michael Woerister <michaelwoerister@posteo>", +] +description = "Support crate for rustc's self-profiling feature" +homepage = "https://github.com/rust-lang/measureme" +documentation = "https://docs.rs/measureme" +license = "MIT OR Apache-2.0" +repository = "https://github.com/rust-lang/measureme" + +[dependencies.log] +version = "0.4" + +[dependencies.parking_lot] +version = "0.11.0" + +[dependencies.rustc-hash] +version = "1.0.1" + +[dependencies.smallvec] +version = "1.0" + +[features] +nightly = [] + +[target."cfg(all(target_arch = \"x86_64\", target_os = \"linux\"))".dependencies.memmap2] +version = "0.2.1" + +[target."cfg(all(target_arch = \"x86_64\", target_os = \"linux\"))".dependencies.perf-event-open-sys] +version = "1.0.1" + +[badges.travis-ci] +repository = "rust-lang/measureme" diff --git a/vendor/measureme/src/counters.rs b/vendor/measureme/src/counters.rs new file mode 100644 index 000000000..a72cbc16e --- /dev/null +++ b/vendor/measureme/src/counters.rs @@ -0,0 +1,999 @@ +//! Profiling counters and their implementation. +//! +//! # Available counters +//! +//! Name (for [`Counter::by_name()`]) | Counter | OSes | CPUs +//! --------------------------------- | ------- | ---- | ---- +//! `wall-time` | [`WallTime`] | any | any +//! `instructions:u` | [`Instructions`] | Linux | `x86_64` +//! `instructions-minus-irqs:u` | [`InstructionsMinusIrqs`] | Linux | `x86_64`<br>- AMD (since K8)<br>- Intel (since Sandy Bridge) +//! `instructions-minus-r0420:u` | [`InstructionsMinusRaw0420`] | Linux | `x86_64`<br>- AMD (Zen) +//! +//! *Note: `:u` suffixes for hardware performance counters come from the Linux `perf` +//! tool, and indicate that the counter is only active while userspace code executes +//! (i.e. it's paused while the kernel handles syscalls, interrupts, etc.).* +//! +//! # Limitations and caveats +//! +//! *Note: for more information, also see the GitHub PR which first implemented hardware +//! performance counter support ([#143](https://github.com/rust-lang/measureme/pull/143)).* +//! +//! The hardware performance counters (i.e. all counters other than `wall-time`) are limited to: +//! * Linux, for out-of-the-box performance counter reads from userspace +//! * other OSes could work through custom kernel extensions/drivers, in the future +//! * `x86_64` CPUs, mostly due to lack of other available test hardware +//! * new architectures would be easier to support (on Linux) than new OSes +//! * easiest to add would be 32-bit `x86` (aka `i686`), which would reuse +//! most of the `x86_64` CPU model detection logic +//! * specific (newer) CPU models, for certain non-standard counters +//! * e.g. `instructions-minus-irqs:u` requires a "hardware interrupts" (aka "IRQs") +//! counter, which is implemented differently between vendors / models (if at all) +//! * single-threaded programs (counters only work on the thread they were created on) +//! * for profiling `rustc`, this means only "check mode" (`--emit=metadata`), +//! is supported currently (`-Z no-llvm-threads` could also work) +//! * unclear what the best approach for handling multiple threads would be +//! * changing the API (e.g. to require per-thread profiler handles) could result +//! in a more efficient implementation, but would also be less ergonomic +//! * profiling data from multithreaded programs would be harder to use due to +//! noise from synchronization mechanisms, non-deterministic work-stealing, etc. +//! +//! For ergonomic reasons, the public API doesn't vary based on `features` or target. +//! Instead, attempting to create any unsupported counter will return `Err`, just +//! like it does for any issue detected at runtime (e.g. incompatible CPU model). +//! +//! When counting instructions specifically, these factors will impact the profiling quality: +//! * high-level non-determinism (e.g. user interactions, networking) +//! * the ideal use-case is a mostly-deterministic program, e.g. a compiler like `rustc` +//! * if I/O can be isolated to separate profiling events, and doesn't impact +//! execution in a more subtle way (see below), the deterministic parts of +//! the program can still be profiled with high accuracy +//! * intentional uses of randomness may change execution paths, though for +//! cryptographic operations specifically, "constant time" implementations +//! are preferred / necessary (in order to limit an external observer's +//! ability to infer secrets), so they're not as much of a problem +//! * even otherwise-deterministic machine-local communication (to e.g. system +//! services or drivers) can behave unpredictably (especially under load) +//! * while we haven't observed this in the wild yet, it's possible for +//! file reads/writes to be split up into multiple smaller chunks +//! (and therefore take more userspace instructions to fully read/write) +//! * low-level non-determinism (e.g. ASLR, randomized `HashMap`s, timers) +//! * ASLR ("Address Space Layout Randomization"), may be provided by the OS for +//! security reasons, or accidentally caused through allocations that depend on +//! random data (even as low-entropy as e.g. the base 10 length of a process ID) +//! * on Linux ASLR can be disabled by running the process under `setarch -R` +//! * this impacts `rustc` and LLVM, which rely on keying `HashMap`s by addresses +//! (typically of interned data) as an optimization, and while non-determinstic +//! outputs are considered bugs, the instructions executed can still vary a lot, +//! even when the externally observable behavior is perfectly repeatable +//! * `HashMap`s are involved in one more than one way: +//! * both the executed instructions, and the shape of the allocations depend +//! on both the hasher state and choice of keys (as the buckets are in +//! a flat array indexed by some of the lower bits of the key hashes) +//! * so every `HashMap` with keys being/containing addresses will amplify +//! ASLR and ASLR-like effects, making the entire program more sensitive +//! * the default hasher is randomized, and while `rustc` doesn't use it, +//! proc macros can (and will), and it's harder to disable than Linux ASLR +//! * most ways of measuring time will inherently never perfectly align with +//! exact points in the program's execution, making time behave like another +//! low-entropy source of randomness - this also means timers will elapse at +//! unpredictable points (which can further impact the rest of the execution) +//! * this includes the common thread scheduler technique of preempting the +//! currently executing thread with a periodic timer interrupt, so the exact +//! interleaving of multiple threads will likely not be reproducible without +//! special OS configuration, or tools that emulate a deterministic scheduler +//! * `jemalloc` (the allocator used by `rustc`, at least in official releases) +//! has a 10 second "purge timer", which can introduce an ASLR-like effect, +//! unless disabled with `MALLOC_CONF=dirty_decay_ms:0,muzzy_decay_ms:0` +//! * hardware flaws (whether in the design or implementation) +//! * hardware interrupts ("IRQs") and exceptions (like page faults) cause +//! overcounting (1 instruction per interrupt, possibly the `iret` from the +//! kernel handler back to the interrupted userspace program) +//! * this is the reason why `instructions-minus-irqs:u` should be preferred +//! to `instructions:u`, where the former is available +//! * there are system-wide options (e.g. `CONFIG_NO_HZ_FULL`) for removing +//! some interrupts from the cores used for profiling, but they're not as +//! complete of a solution, nor easy to set up in the first place +//! * AMD Zen CPUs have a speculative execution feature (dubbed `SpecLockMap`), +//! which can cause non-deterministic overcounting for instructions following +//! an atomic instruction (such as found in heap allocators, or `measureme`) +//! * this is automatically detected, with a `log` message pointing the user +//! to <https://github.com/mozilla/rr/wiki/Zen> for guidance on how to +//! disable `SpecLockMap` on their system (sadly requires root access) +//! +//! Even if some of the above caveats apply for some profiling setup, as long as +//! the counters function, they can still be used, and compared with `wall-time`. +//! Chances are, they will still have less variance, as everything that impacts +//! instruction counts will also impact any time measurements. +//! +//! Also keep in mind that instruction counts do not properly reflect all kinds +//! of workloads, e.g. SIMD throughput and cache locality are unaccounted for. + +use std::error::Error; +use std::time::Instant; + +// HACK(eddyb) this is semantically `warn!` but uses `error!` because +// that's the only log level enabled by default - see also +// https://github.com/rust-lang/rust/issues/76824 +macro_rules! really_warn { + ($msg:literal $($rest:tt)*) => { + error!(concat!("[WARNING] ", $msg) $($rest)*) + } +} + +pub enum Counter { + WallTime(WallTime), + Instructions(Instructions), + InstructionsMinusIrqs(InstructionsMinusIrqs), + InstructionsMinusRaw0420(InstructionsMinusRaw0420), +} + +impl Counter { + pub fn by_name(name: &str) -> Result<Self, Box<dyn Error + Send + Sync>> { + Ok(match name { + WallTime::NAME => Counter::WallTime(WallTime::new()), + Instructions::NAME => Counter::Instructions(Instructions::new()?), + InstructionsMinusIrqs::NAME => { + Counter::InstructionsMinusIrqs(InstructionsMinusIrqs::new()?) + } + InstructionsMinusRaw0420::NAME => { + Counter::InstructionsMinusRaw0420(InstructionsMinusRaw0420::new()?) + } + _ => return Err(format!("{:?} is not a valid counter name", name).into()), + }) + } + + pub(super) fn describe_as_json(&self) -> String { + let (name, units) = match self { + Counter::WallTime(_) => ( + WallTime::NAME, + r#"[["ns", 1], ["μs", 1000], ["ms", 1000000], ["s", 1000000000]]"#, + ), + Counter::Instructions(_) => (Instructions::NAME, r#"[["instructions", 1]]"#), + Counter::InstructionsMinusIrqs(_) => { + (InstructionsMinusIrqs::NAME, r#"[["instructions", 1]]"#) + } + Counter::InstructionsMinusRaw0420(_) => { + (InstructionsMinusRaw0420::NAME, r#"[["instructions", 1]]"#) + } + }; + format!(r#"{{ "name": "{}", "units": {} }}"#, name, units) + } + + #[inline] + pub(super) fn since_start(&self) -> u64 { + match self { + Counter::WallTime(counter) => counter.since_start(), + Counter::Instructions(counter) => counter.since_start(), + Counter::InstructionsMinusIrqs(counter) => counter.since_start(), + Counter::InstructionsMinusRaw0420(counter) => counter.since_start(), + } + } +} + +/// "Monotonic clock" with nanosecond precision (using [`std::time::Instant`]). +/// +/// Can be obtained with `Counter::by_name("wall-time")`. +pub struct WallTime { + start: Instant, +} + +impl WallTime { + const NAME: &'static str = "wall-time"; + + pub fn new() -> Self { + WallTime { + start: Instant::now(), + } + } + + #[inline] + fn since_start(&self) -> u64 { + self.start.elapsed().as_nanos() as u64 + } +} + +/// "Instructions retired" hardware performance counter (userspace-only). +/// +/// Can be obtained with `Counter::by_name("instructions:u")`. +pub struct Instructions { + instructions: hw::Counter, + start: u64, +} + +impl Instructions { + const NAME: &'static str = "instructions:u"; + + pub fn new() -> Result<Self, Box<dyn Error + Send + Sync>> { + let model = hw::CpuModel::detect()?; + let instructions = hw::Counter::new(&model, HwCounterType::Instructions)?; + let start = instructions.read(); + Ok(Instructions { + instructions, + start, + }) + } + + #[inline] + fn since_start(&self) -> u64 { + self.instructions.read().wrapping_sub(self.start) + } +} + +/// More accurate [`Instructions`] (subtracting hardware interrupt counts). +/// +/// Can be obtained with `Counter::by_name("instructions-minus-irqs:u")`. +pub struct InstructionsMinusIrqs { + instructions: hw::Counter, + irqs: hw::Counter, + start: u64, +} + +impl InstructionsMinusIrqs { + const NAME: &'static str = "instructions-minus-irqs:u"; + + pub fn new() -> Result<Self, Box<dyn Error + Send + Sync>> { + let model = hw::CpuModel::detect()?; + let instructions = hw::Counter::new(&model, HwCounterType::Instructions)?; + let irqs = hw::Counter::new(&model, HwCounterType::Irqs)?; + let (start_instructions, start_irqs) = (&instructions, &irqs).read(); + let start = start_instructions.wrapping_sub(start_irqs); + Ok(InstructionsMinusIrqs { + instructions, + irqs, + start, + }) + } + + #[inline] + fn since_start(&self) -> u64 { + let (instructions, irqs) = (&self.instructions, &self.irqs).read(); + instructions.wrapping_sub(irqs).wrapping_sub(self.start) + } +} + +/// (Experimental) Like [`InstructionsMinusIrqs`] (but using an undocumented `r0420:u` counter). +/// +/// Can be obtained with `Counter::by_name("instructions-minus-r0420:u")`. +// +// HACK(eddyb) this is a variant of `instructions-minus-irqs:u`, where `r0420` +// is subtracted, instead of the usual "hardware interrupts" (aka IRQs). +// `r0420` is an undocumented counter on AMD Zen CPUs which appears to count +// both hardware interrupts and exceptions (such as page faults), though +// it's unclear yet what exactly it's counting (could even be `iret`s). +pub struct InstructionsMinusRaw0420(InstructionsMinusIrqs); + +impl InstructionsMinusRaw0420 { + const NAME: &'static str = "instructions-minus-r0420:u"; + + pub fn new() -> Result<Self, Box<dyn Error + Send + Sync>> { + let model = hw::CpuModel::detect()?; + let instructions = hw::Counter::new(&model, HwCounterType::Instructions)?; + let irqs = hw::Counter::new(&model, HwCounterType::Raw0420)?; + let (start_instructions, start_irqs) = (&instructions, &irqs).read(); + let start = start_instructions.wrapping_sub(start_irqs); + Ok(InstructionsMinusRaw0420(InstructionsMinusIrqs { + instructions, + irqs, + start, + })) + } + + #[inline] + fn since_start(&self) -> u64 { + self.0.since_start() + } +} + +trait HwCounterRead { + type Output; + fn read(&self) -> Self::Output; +} + +enum HwCounterType { + Instructions, + Irqs, + Raw0420, +} + +const BUG_REPORT_MSG: &str = + "please report this to https://github.com/rust-lang/measureme/issues/new"; + +/// Linux x86_64 implementation based on `perf_event_open` and `rdpmc`. +#[cfg(all(target_arch = "x86_64", target_os = "linux"))] +mod hw { + use memmap2::{Mmap, MmapOptions}; + use perf_event_open_sys::{bindings::*, perf_event_open}; + use std::arch::asm; + use std::convert::TryInto; + use std::error::Error; + use std::fs; + use std::mem; + use std::os::unix::io::FromRawFd; + + pub(super) struct Counter { + mmap: Mmap, + reg_idx: u32, + } + + impl Counter { + pub(super) fn new( + model: &CpuModel, + counter_type: super::HwCounterType, + ) -> Result<Self, Box<dyn Error + Send + Sync>> { + let (type_, hw_id) = match counter_type { + super::HwCounterType::Instructions => ( + perf_type_id_PERF_TYPE_HARDWARE, + perf_hw_id_PERF_COUNT_HW_INSTRUCTIONS, + ), + super::HwCounterType::Irqs => { + (perf_type_id_PERF_TYPE_RAW, model.irqs_counter_config()?) + } + super::HwCounterType::Raw0420 => { + match model { + CpuModel::Amd(AmdGen::Zen) => {} + + _ => really_warn!( + "Counter::new: the undocumented `r0420` performance \ + counter has only been observed on AMD Zen CPUs" + ), + } + + (perf_type_id_PERF_TYPE_RAW, 0x04_20) + } + }; + Self::with_type_and_hw_id(type_, hw_id) + } + + fn with_type_and_hw_id( + type_: perf_type_id, + hw_id: u32, + ) -> Result<Self, Box<dyn Error + Send + Sync>> { + let mut attrs = perf_event_attr { + size: mem::size_of::<perf_event_attr>().try_into().unwrap(), + type_, + config: hw_id.into(), + ..perf_event_attr::default() + }; + + // Only record same-thread, any CPUs, and only userspace (no kernel/hypervisor). + // NOTE(eddyb) `pid = 0`, despite talking about "process id", means + // "calling process/thread", *not* "any thread in the calling process" + // (i.e. "process" is interchangeable with "main thread of the process") + // FIXME(eddyb) introduce per-thread counters and/or use `inherit` + // (and `inherit_stat`? though they might not be appropriate here) + // to be able to read the counter on more than just the initial thread. + let pid = 0; + let cpu = -1; + let group_fd = -1; + attrs.set_exclude_kernel(1); + attrs.set_exclude_hv(1); + + let file = unsafe { + let fd = + perf_event_open(&mut attrs, pid, cpu, group_fd, PERF_FLAG_FD_CLOEXEC.into()); + if fd < 0 { + Err(std::io::Error::from_raw_os_error(-fd)) + } else { + Ok(fs::File::from_raw_fd(fd)) + } + }; + let file = file.map_err(|e| format!("perf_event_open failed: {:?}", e))?; + + let mmap = unsafe { + MmapOptions::new() + .len(mem::size_of::<perf_event_mmap_page>()) + .map(&file) + }; + let mmap = mmap.map_err(|e| format!("perf_event_mmap_page: mmap failed: {:?}", e))?; + + let mut counter = Counter { mmap, reg_idx: 0 }; + + let (version, compat_version, caps, index, pmc_width) = counter + .access_mmap_page_with_seqlock(|mp| { + ( + mp.version, + mp.compat_version, + unsafe { mp.__bindgen_anon_1.__bindgen_anon_1 }, + mp.index, + mp.pmc_width, + ) + }); + + info!( + "Counter::new: version={} compat_version={} index={:#x}", + version, compat_version, index, + ); + + if caps.cap_user_rdpmc() == 0 { + return Err(format!( + "perf_event_mmap_page: missing cap_user_rdpmc{}", + if caps.cap_bit0_is_deprecated() == 0 && caps.cap_bit0() == 1 { + " (ignoring legacy/broken rdpmc support)" + } else { + "" + } + ) + .into()); + } + + if index == 0 { + return Err(format!( + "perf_event_mmap_page: no allocated hardware register (ran out?)" + ) + .into()); + } + counter.reg_idx = index - 1; + + if (cfg!(not(accurate_seqlock_rdpmc)) || true) && pmc_width != 48 { + return Err(format!( + "perf_event_mmap_page: {}-bit hardware counter found, only 48-bit supported", + pmc_width + ) + .into()); + } + + Ok(counter) + } + + /// Try to access the mmap page, retrying the `attempt` closure as long + /// as the "seqlock" sequence number changes (which indicates the kernel + /// has updated one or more fields within the mmap page). + #[inline] + fn access_mmap_page_with_seqlock<T>( + &self, + attempt: impl Fn(&perf_event_mmap_page) -> T, + ) -> T { + // FIXME(eddyb) it's probably UB to use regular reads, especially + // from behind `&T`, with the only synchronization being barriers. + // Probably needs atomic reads, and stronger ones at that, for the + // `lock` field, than the fields (which would be `Relaxed`?). + let mmap_page = unsafe { &*(self.mmap.as_ptr() as *const perf_event_mmap_page) }; + let barrier = || std::sync::atomic::fence(std::sync::atomic::Ordering::Acquire); + + loop { + // Grab the "seqlock" - the kernel will update this value when it + // updates any of the other fields that may be read in `attempt`. + let seq_lock = mmap_page.lock; + barrier(); + + let result = attempt(mmap_page); + + // If nothing has changed, we're done. Otherwise, keep retrying. + barrier(); + if mmap_page.lock == seq_lock { + return result; + } + } + } + } + + impl super::HwCounterRead for Counter { + type Output = u64; + + #[inline] + fn read(&self) -> u64 { + // HACK(eddyb) keep the accurate code around while not using it, + // to minimize overhead without losing the more complex implementation. + let (counter, offset, pmc_width) = if cfg!(accurate_seqlock_rdpmc) && false { + self.access_mmap_page_with_seqlock(|mp| { + let caps = unsafe { mp.__bindgen_anon_1.__bindgen_anon_1 }; + assert_ne!(caps.cap_user_rdpmc(), 0); + + ( + rdpmc(mp.index.checked_sub(1).unwrap()), + mp.offset, + mp.pmc_width, + ) + }) + } else { + (rdpmc(self.reg_idx), 0, 48) + }; + + let counter = offset + (counter as i64); + + // Sign-extend the `pmc_width`-bit value to `i64`. + (counter << (64 - pmc_width) >> (64 - pmc_width)) as u64 + } + } + + impl super::HwCounterRead for (&Counter, &Counter) { + type Output = (u64, u64); + + #[inline] + fn read(&self) -> (u64, u64) { + // HACK(eddyb) keep the accurate code around while not using it, + // to minimize overhead without losing the more complex implementation. + if (cfg!(accurate_seqlock_rdpmc) || cfg!(unserialized_rdpmc)) && false { + return (self.0.read(), self.1.read()); + } + + let pmc_width = 48; + + let (a_counter, b_counter) = rdpmc_pair(self.0.reg_idx, self.1.reg_idx); + + // Sign-extend the `pmc_width`-bit values to `i64`. + ( + ((a_counter as i64) << (64 - pmc_width) >> (64 - pmc_width)) as u64, + ((b_counter as i64) << (64 - pmc_width) >> (64 - pmc_width)) as u64, + ) + } + } + + /// Read the hardware performance counter indicated by `reg_idx`. + /// + /// If the counter is signed, sign extension should be performed based on + /// the width of the register (32 to 64 bits, e.g. 48-bit seems common). + #[inline(always)] + fn rdpmc(reg_idx: u32) -> u64 { + // NOTE(eddyb) below comment is outdated (the other branch uses `cpuid`). + if cfg!(unserialized_rdpmc) && false { + // FIXME(eddyb) the Intel and AMD manuals warn about the need for + // "serializing instructions" before/after `rdpmc`, if avoiding any + // reordering is desired, but do not agree on the full set of usable + // "serializing instructions" (e.g. `mfence` isn't listed in both). + // + // The only usable, and guaranteed to work, "serializing instruction" + // appears to be `cpuid`, but it doesn't seem easy to use, especially + // due to the overlap in registers with `rdpmc` itself, and it might + // have too high of a cost, compared to serialization benefits (if any). + unserialized_rdpmc(reg_idx) + } else { + serialize_instruction_execution(); + unserialized_rdpmc(reg_idx) + } + } + + /// Read two hardware performance counters at once (see `rdpmc`). + /// + /// Should be more efficient/accurate than two `rdpmc` calls, as it + /// only requires one "serializing instruction", rather than two. + #[inline(always)] + fn rdpmc_pair(a_reg_idx: u32, b_reg_idx: u32) -> (u64, u64) { + serialize_instruction_execution(); + (unserialized_rdpmc(a_reg_idx), unserialized_rdpmc(b_reg_idx)) + } + + /// Dummy `cpuid(0)` to serialize instruction execution. + #[inline(always)] + fn serialize_instruction_execution() { + unsafe { + asm!( + "xor %eax, %eax", // Intel syntax: "xor eax, eax" + // LLVM sometimes reserves `ebx` for its internal use, so we need to use + // a scratch register for it instead. + "mov %rbx, {tmp_rbx:r}", // Intel syntax: "mov {tmp_rbx:r}, rbx" + "cpuid", + "mov {tmp_rbx:r}, %rbx", // Intel syntax: "mov rbx, {tmp_rbx:r}" + tmp_rbx = lateout(reg) _, + // `cpuid` clobbers. + lateout("eax") _, + lateout("edx") _, + lateout("ecx") _, + + options(nostack), + // Older versions of LLVM do not support modifiers in + // Intel syntax inline asm; whenever Rust minimum LLVM version + // supports Intel syntax inline asm, remove and replace above + // instructions with Intel syntax version (from comments). + options(att_syntax), + ); + } + } + + /// Read the hardware performance counter indicated by `reg_idx`. + /// + /// If the counter is signed, sign extension should be performed based on + /// the width of the register (32 to 64 bits, e.g. 48-bit seems common). + #[inline(always)] + fn unserialized_rdpmc(reg_idx: u32) -> u64 { + let (lo, hi): (u32, u32); + unsafe { + asm!( + "rdpmc", + in("ecx") reg_idx, + lateout("eax") lo, + lateout("edx") hi, + options(nostack), + // Older versions of LLVM do not support modifiers in + // Intel syntax inline asm; whenever Rust minimum LLVM version + // supports Intel syntax inline asm, remove and replace above + // instructions with Intel syntax version (from comments). + options(att_syntax), + ); + } + lo as u64 | (hi as u64) << 32 + } + + /// Categorization of `x86_64` CPUs, primarily based on how they + /// support for counting "hardware interrupts" (documented or not). + pub(super) enum CpuModel { + Amd(AmdGen), + Intel(IntelGen), + } + + pub(super) enum AmdGen { + /// K8 (Hammer) to Jaguar / Puma. + PreZen, + + /// Zen / Zen+ / Zen 2. + Zen, + + /// Unknown AMD CPU, contemporary to/succeeding Zen/Zen+/Zen 2, + /// but likely similar to them. + UnknownMaybeZenLike, + } + + pub(super) enum IntelGen { + /// Intel CPU predating Sandy Bridge. These are the only CPUs we + /// can't support (more) accurate instruction counting on, as they + /// don't (appear to) have any way to count "hardware interrupts". + PreBridge, + + /// Sandy Bridge / Ivy Bridge: + /// * client: Sandy Bridge (M/H) / Ivy Bridge (M/H/Gladden) + /// * server: Sandy Bridge (E/EN/EP) / Ivy Bridge (E/EN/EP/EX) + /// + /// Intel doesn't document support for counting "hardware interrupts" + /// prior to Skylake, but testing found that `HW_INTERRUPTS.RECEIVED` + /// from Skylake has existed, with the same config, as far back as + /// "Sandy Bridge" (but before that it mapped to a different event). + /// + /// These are the (pre-Skylake) *Bridge CPU models confirmed so far: + /// * Sandy Bridge (client) Family 6 Model 42 + /// Intel(R) Core(TM) i5-2520M CPU @ 2.50GHz (@alyssais) + /// * Ivy Bridge (client) Family 6 Model 58 + /// Intel(R) Core(TM) i7-3520M CPU @ 2.90GHz (@eddyb) + /// + /// We later found this paper, which on page 5 lists 12 counters, + /// for each of Nehalem/Westmere, Sandy Bridge and Ivy Bridge: + /// http://web.eece.maine.edu/~vweaver/projects/deterministic/deterministic_counters.pdf + /// It appears that both Sandy Bridge and Ivy Bridge used to have + /// `HW_INTERRUPTS.RECEIVED` documented, before Intel removed every + /// mention of the counter from newer versions of their manuals. + Bridge, + + /// Haswell / Broadwell: + /// * client: Haswell (S/ULT/GT3e) / Broadwell (U/Y/S/H/C/W) + /// * server: Haswell (E/EP/EX) / Broadwell (E/EP/EX/DE/Hewitt Lake) + /// + /// Equally as undocumented as "Sandy Bridge / Ivy Bridge" (see above). + /// + /// These are the (pre-Skylake) *Well CPU models confirmed so far: + /// * Haswell (client) Family 6 Model 60 + /// Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz (@m-ou-se) + /// * Haswell (server) Family 6 Model 63 + /// Intel(R) Xeon(R) CPU E5-2697 v3 @ 2.60GHz (@cuviper) + /// * Haswell (client + GT3e) Family 6 Model 70 + /// Intel(R) Core(TM) i7-4750HQ CPU @ 2.00GHz (@nagisa) + /// Intel(R) Core(TM) i7-4770HQ CPU @ 2.20GHz (@m-ou-se) + Well, + + /// Skylake / Skylake-derived: + /// * client: Skylake (Y/U/DT/H/S) / Kaby Lake (Y/U/DT/H/S/X) / Coffee Lake (U/S/H/E) + /// * server: Skylake (SP/X/DE/W) / Cascade Lake (SP/X/W) + /// + /// Both "client" and "server" product lines have documented support + /// for counting "hardware interrupts" (`HW_INTERRUPTS.RECEIVED`). + /// + /// Intel does not make it clear that future product lines, such as + /// "Ice Lake", will continue to support this (or with what config), + /// and even "Comet Lake" (aka "10th gen") isn't explicitly listed. + Lake, + + /// Unknown Intel CPU, contemporary to/succeeding *Bridge/*Well/*Lake, + /// but likely similar to them. + UnknownMaybeLakeLike, + } + + impl CpuModel { + /// Detect the model of the current CPU using `cpuid`. + pub(super) fn detect() -> Result<Self, Box<dyn Error + Send + Sync>> { + let cpuid0 = unsafe { std::arch::x86_64::__cpuid(0) }; + let cpuid1 = unsafe { std::arch::x86_64::__cpuid(1) }; + let mut vendor = [0; 12]; + vendor[0..4].copy_from_slice(&cpuid0.ebx.to_le_bytes()); + vendor[4..8].copy_from_slice(&cpuid0.edx.to_le_bytes()); + vendor[8..12].copy_from_slice(&cpuid0.ecx.to_le_bytes()); + + let vendor = std::str::from_utf8(&vendor).map_err(|_| { + format!( + "cpuid returned non-UTF-8 vendor name: cpuid(0)={:?} cpuid(1)={:?}", + cpuid0, cpuid1 + ) + })?; + + let version = cpuid1.eax; + + let mut family = (version >> 8) & 0xf; + if family == 15 { + // Extended family. + family += (version >> 20) & 0xff; + } + + let mut model = (version >> 4) & 0xf; + if family >= 15 || vendor == "GenuineIntel" && family == 6 { + // Extended model. + model += ((version >> 16) & 0xf) << 4; + } + + info!( + "CpuModel::detect: vendor={:?} family={} model={}", + vendor, family, model + ); + + match vendor { + "AuthenticAMD" => { + use self::AmdGen::*; + + let (gen, name) = match (family, model) { + (0..=14, _) | (19, _) => { + return Err(format!( + "impossible AMD64 CPU detected (Family {} Model {}); {}", + family, + model, + super::BUG_REPORT_MSG + ) + .into()); + } + + (15, _) => (PreZen, "K8 (Hammer)"), + (16, _) => (PreZen, "K10 (Barcelona/Shanghai/Istanbul)"), + (17, _) => (PreZen, "K8+K10 hybrid (Turion X2 Ultra)"), + (18, _) => (PreZen, "Fusion"), + (20, _) => (PreZen, "Bobcat"), + (21, _) => (PreZen, "Bulldozer / Piledriver / Steamroller / Excavator"), + (22, _) => (PreZen, "Jaguar / Puma"), + + (23, 1) => (Zen, "Zen (Naples/Whitehaven/Summit Ridge/Snowy Owl)"), + (23, 17) => (Zen, "Zen (Raven Ridge)"), + (23, 24) => (Zen, "Zen (Banded Kestrel/Dali) / Zen+ (Picasso)"), + (23, 8) => (Zen, "Zen+ (Pinnacle Ridge)"), + (23, 49) => (Zen, "Zen 2 (Rome/Castle Peak)"), + (23, 113) => (Zen, "Zen 2 (Matisse)"), + + (23..=0xffff_ffff, _) => { + really_warn!( + "CpuModel::detect: unknown AMD CPU (Family {} Model {}), \ + assuming Zen-like; {}", + family, + model, + super::BUG_REPORT_MSG + ); + + (UnknownMaybeZenLike, "") + } + }; + + if !name.is_empty() { + info!("CpuModel::detect: known AMD CPU: {}", name); + } + + // The `SpecLockMap` (speculative atomic aka `lock` instruction + // execution, unclear what "Map" refers to) feature in AMD Zen CPUs + // causes non-deterministic overcounting of atomic instructions, + // presumably whenever it has to roll back the speculation + // (as in, the performance counters aren't rolled back). + // Even this this may be rare when uncontended, it adds up. + // + // There is an MSR bit (`MSRC001_1020[54]`) that's not officially + // documented, but which several motherboards and profiling tools + // set whenever IBS (Instruction-Based Sampling) is in use, and + // it is sometimes referred to as "disabling `SpecLockMap`" + // (hence having a name for the feature that speculates `lock`s). + // + // One way we could detect that the bit has been set would be to + // parse `uname().release` (aka `uname -r`) and look for versions + // which are known to include the patch suggested in this thread: + // https://github.com/mozilla/rr/issues/2034#issuecomment-693761247 + // + // However, one may set the bit using e.g. `wrmsr`, even on older + // kernels, so a more reliable approach is to execute some atomics + // and look at the `SpecLockMapCommit` (`r0825:u`) Zen counter, + // which only reliably remains `0` when `SpecLockMap` is disabled. + if matches!(gen, Zen | UnknownMaybeZenLike) { + if let Ok(spec_lock_map_commit) = + Counter::with_type_and_hw_id(perf_type_id_PERF_TYPE_RAW, 0x08_25) + { + use super::HwCounterRead; + + let start_spec_lock_map_commit = spec_lock_map_commit.read(); + + // Execute an atomic (`lock`) instruction, which should + // start speculative execution for following instructions + // (as long as `SpecLockMap` isn't disabled). + let mut atomic: u64 = 0; + let mut _tmp: u64 = 0; + unsafe { + asm!( + // Intel syntax: "lock xadd [{atomic}], {tmp}" + "lock xadd {tmp}, ({atomic})", + + atomic = in(reg) &mut atomic, + tmp = inout(reg) _tmp, + + // Older versions of LLVM do not support modifiers in + // Intel syntax inline asm; whenever Rust minimum LLVM + // version supports Intel syntax inline asm, remove + // and replace above instructions with Intel syntax + // version (from comments). + options(att_syntax), + ); + } + + if spec_lock_map_commit.read() != start_spec_lock_map_commit { + really_warn!( + "CpuModel::detect: SpecLockMap detected, in AMD {} CPU; \ + this may add some non-deterministic noise - \ + for information on disabling SpecLockMap, see \ + https://github.com/mozilla/rr/wiki/Zen", + name + ); + } + } + } + + Ok(CpuModel::Amd(gen)) + } + + "GenuineIntel" => { + use self::IntelGen::*; + + let (gen, name) = match (family, model) { + // No need to name these, they're unsupported anyway. + (0..=5, _) => (PreBridge, ""), + (15, _) => (PreBridge, "Netburst"), + (6, 0..=41) => (PreBridge, ""), + + // Older Xeon Phi CPUs, misplaced in Family 6. + (6, 87) => (PreBridge, "Knights Landing"), + (6, 133) => (PreBridge, "Knights Mill"), + + // Older Atom CPUs, interleaved with other CPUs. + // FIXME(eddyb) figure out if these are like *Bridge/*Well. + (6, 53) | (6, 54) => (PreBridge, "Saltwell"), + (6, 55) | (6, 74) | (6, 77) | (6, 90) | (6, 93) => { + (PreBridge, "Silvermont") + } + (6, 76) => (PreBridge, "Airmont (Cherry Trail/Braswell)"), + + // Older server CPUs, numbered out of order. + (6, 44) => (PreBridge, "Westmere (Gulftown/EP)"), + (6, 46) => (PreBridge, "Nehalem (EX)"), + (6, 47) => (PreBridge, "Westmere (EX)"), + + (6, 42) => (Bridge, "Sandy Bridge (M/H)"), + (6, 45) => (Bridge, "Sandy Bridge (E/EN/EP)"), + (6, 58) => (Bridge, "Ivy Bridge (M/H/Gladden)"), + (6, 62) => (Bridge, "Ivy Bridge (E/EN/EP/EX)"), + + (6, 60) => (Well, "Haswell (S)"), + (6, 61) => (Well, "Broadwell (U/Y/S)"), + (6, 63) => (Well, "Haswell (E/EP/EX)"), + (6, 69) => (Well, "Haswell (ULT)"), + (6, 70) => (Well, "Haswell (GT3e)"), + (6, 71) => (Well, "Broadwell (H/C/W)"), + (6, 79) => (Well, "Broadwell (E/EP/EX)"), + (6, 86) => (Well, "Broadwell (DE/Hewitt Lake)"), + + (6, 78) => (Lake, "Skylake (Y/U)"), + (6, 85) => (Lake, "Skylake (SP/X/DE/W) / Cascade Lake (SP/X/W)"), + (6, 94) => (Lake, "Skylake (DT/H/S)"), + (6, 142) => (Lake, "Kaby Lake (Y/U) / Coffee Lake (U)"), + (6, 158) => (Lake, "Kaby Lake (DT/H/S/X) / Coffee Lake (S/H/E)"), + + (6..=14, _) | (16..=0xffff_ffff, _) => { + really_warn!( + "CpuModel::detect: unknown Intel CPU (Family {} Model {}), \ + assuming Skylake-like; {}", + family, + model, + super::BUG_REPORT_MSG + ); + + (UnknownMaybeLakeLike, "") + } + }; + + if !name.is_empty() { + info!("CpuModel::detect: known Intel CPU: {}", name); + } + + Ok(CpuModel::Intel(gen)) + } + + _ => Err(format!( + "cpuid returned unknown CPU vendor {:?}; version={:#x}", + vendor, version + ) + .into()), + } + } + + /// Return the hardware performance counter configuration for + /// counting "hardware interrupts" (documented or not). + fn irqs_counter_config(&self) -> Result<u32, Box<dyn Error + Send + Sync>> { + match self { + CpuModel::Amd(model) => match model { + AmdGen::PreZen => Ok(0x00_cf), + AmdGen::Zen | AmdGen::UnknownMaybeZenLike => Ok(0x00_2c), + }, + CpuModel::Intel(model) => match model { + IntelGen::PreBridge => Err(format!( + "counting IRQs not yet supported on Intel CPUs \ + predating Sandy Bridge; {}", + super::BUG_REPORT_MSG + ) + .into()), + IntelGen::Bridge + | IntelGen::Well + | IntelGen::Lake + | IntelGen::UnknownMaybeLakeLike => Ok(0x01_cb), + }, + } + } + } +} + +#[cfg(not(all(target_arch = "x86_64", target_os = "linux")))] +mod hw { + use std::error::Error; + + pub(super) enum Counter {} + + impl Counter { + pub(super) fn new( + model: &CpuModel, + _: super::HwCounterType, + ) -> Result<Self, Box<dyn Error + Send + Sync>> { + match *model {} + } + } + + impl super::HwCounterRead for Counter { + type Output = u64; + + #[inline] + fn read(&self) -> u64 { + match *self {} + } + } + + impl super::HwCounterRead for (&Counter, &Counter) { + type Output = (u64, u64); + + #[inline] + fn read(&self) -> (u64, u64) { + match *self.0 {} + } + } + + pub(super) enum CpuModel {} + + impl CpuModel { + pub(super) fn detect() -> Result<Self, Box<dyn Error + Send + Sync>> { + // HACK(eddyb) mark `really_warn!` (and transitively `log` macros) + // and `BUG_REPORT_MSG` as "used" to silence warnings. + if false { + really_warn!("unsupported; {}", super::BUG_REPORT_MSG); + } + + let mut msg = String::new(); + let mut add_error = |s| { + if !msg.is_empty() { + msg += "; "; + } + msg += s; + }; + + if cfg!(not(target_arch = "x86_64")) { + add_error("only supported architecture is x86_64"); + } + + if cfg!(not(target_os = "linux")) { + add_error("only supported OS is Linux"); + } + + Err(msg.into()) + } + } +} diff --git a/vendor/measureme/src/event_id.rs b/vendor/measureme/src/event_id.rs new file mode 100644 index 000000000..ec4f5a4cb --- /dev/null +++ b/vendor/measureme/src/event_id.rs @@ -0,0 +1,97 @@ +use smallvec::SmallVec; + +use crate::{Profiler, StringComponent, StringId}; + +/// Event IDs are strings conforming to the following grammar: +/// +/// ```ignore +/// <event_id> = <label> {<argument>} +/// <label> = <text> +/// <argument> = '\x1E' <text> +/// <text> = regex([^[[:cntrl:]]]+) // Anything but ASCII control characters +/// ``` +/// +/// This means there's always a "label", followed by an optional list of +/// arguments. Future versions my support other optional suffixes (with a tag +/// other than '\x11' after the '\x1E' separator), such as a "category". + +/// The byte used to separate arguments from the label and each other. +pub const SEPARATOR_BYTE: &str = "\x1E"; + +/// An `EventId` is a `StringId` with the additional guarantee that the +/// corresponding string conforms to the event_id grammar. +#[derive(Clone, Copy, Eq, PartialEq, Hash, Debug)] +#[repr(C)] +pub struct EventId(StringId); + +impl EventId { + pub const INVALID: EventId = EventId(StringId::INVALID); + + #[inline] + pub fn to_string_id(self) -> StringId { + self.0 + } + + #[inline] + pub fn as_u32(self) -> u32 { + self.0.as_u32() + } + + #[inline] + pub fn from_label(label: StringId) -> EventId { + EventId(label) + } + + #[inline] + pub fn from_virtual(virtual_id: StringId) -> EventId { + EventId(virtual_id) + } + + /// Create an EventId from a raw u32 value. Only used internally for + /// deserialization. + #[inline] + pub fn from_u32(raw_id: u32) -> EventId { + EventId(StringId::new(raw_id)) + } +} + +pub struct EventIdBuilder<'p> { + profiler: &'p Profiler, +} + +impl<'p> EventIdBuilder<'p> { + pub fn new(profiler: &Profiler) -> EventIdBuilder<'_> { + EventIdBuilder { profiler } + } + + #[inline] + pub fn from_label(&self, label: StringId) -> EventId { + // Just forward the string ID, a single identifier is a valid event_id + EventId::from_label(label) + } + + pub fn from_label_and_arg(&self, label: StringId, arg: StringId) -> EventId { + EventId(self.profiler.alloc_string(&[ + // Label + StringComponent::Ref(label), + // Seperator and start tag for arg + StringComponent::Value(SEPARATOR_BYTE), + // Arg string id + StringComponent::Ref(arg), + ])) + } + + pub fn from_label_and_args(&self, label: StringId, args: &[StringId]) -> EventId { + // Store up to 7 components on the stack: 1 label + 3 arguments + 3 argument separators + let mut parts = SmallVec::<[StringComponent<'_>; 7]>::with_capacity(1 + args.len() * 2); + + parts.push(StringComponent::Ref(label)); + + for arg in args { + parts.push(StringComponent::Value(SEPARATOR_BYTE)); + parts.push(StringComponent::Ref(*arg)); + } + + EventId(self.profiler.alloc_string(&parts[..])) + } +} diff --git a/vendor/measureme/src/file_header.rs b/vendor/measureme/src/file_header.rs new file mode 100644 index 000000000..8ad192895 --- /dev/null +++ b/vendor/measureme/src/file_header.rs @@ -0,0 +1,145 @@ +//! All binary files generated by measureme have a simple file header that +//! consists of a 4 byte file magic string and a 4 byte little-endian version +//! number. +use std::convert::TryInto; +use std::error::Error; +use std::path::Path; + +pub const CURRENT_FILE_FORMAT_VERSION: u32 = 8; + +pub const FILE_MAGIC_TOP_LEVEL: &[u8; 4] = b"MMPD"; +pub const FILE_MAGIC_EVENT_STREAM: &[u8; 4] = b"MMES"; +pub const FILE_MAGIC_STRINGTABLE_DATA: &[u8; 4] = b"MMSD"; +pub const FILE_MAGIC_STRINGTABLE_INDEX: &[u8; 4] = b"MMSI"; + +pub const FILE_EXTENSION: &str = "mm_profdata"; + +/// The size of the file header in bytes. Note that functions in this module +/// rely on this size to be `8`. +pub const FILE_HEADER_SIZE: usize = 8; + +pub fn write_file_header( + s: &mut dyn std::io::Write, + file_magic: &[u8; 4], +) -> Result<(), Box<dyn Error + Send + Sync>> { + // The implementation here relies on FILE_HEADER_SIZE to have the value 8. + // Let's make sure this assumption cannot be violated without being noticed. + assert_eq!(FILE_HEADER_SIZE, 8); + + s.write_all(file_magic).map_err(Box::new)?; + s.write_all(&CURRENT_FILE_FORMAT_VERSION.to_le_bytes()) + .map_err(Box::new)?; + + Ok(()) +} + +#[must_use] +pub fn verify_file_header( + bytes: &[u8], + expected_magic: &[u8; 4], + diagnostic_file_path: Option<&Path>, + stream_tag: &str, +) -> Result<(), Box<dyn Error + Send + Sync>> { + // The implementation here relies on FILE_HEADER_SIZE to have the value 8. + // Let's make sure this assumption cannot be violated without being noticed. + assert_eq!(FILE_HEADER_SIZE, 8); + + let diagnostic_file_path = diagnostic_file_path.unwrap_or(Path::new("<in-memory>")); + + if bytes.len() < FILE_HEADER_SIZE { + let msg = format!( + "Error reading {} stream in file `{}`: Expected file to contain at least `{:?}` bytes but found `{:?}` bytes", + stream_tag, + diagnostic_file_path.display(), + FILE_HEADER_SIZE, + bytes.len() + ); + + return Err(From::from(msg)); + } + + let actual_magic = &bytes[0..4]; + + if actual_magic != expected_magic { + let msg = format!( + "Error reading {} stream in file `{}`: Expected file magic `{:?}` but found `{:?}`", + stream_tag, + diagnostic_file_path.display(), + expected_magic, + actual_magic + ); + + return Err(From::from(msg)); + } + + let file_format_version = u32::from_le_bytes(bytes[4..8].try_into().unwrap()); + + if file_format_version != CURRENT_FILE_FORMAT_VERSION { + let msg = format!( + "Error reading {} stream in file `{}`: Expected file format version {} but found `{}`", + stream_tag, + diagnostic_file_path.display(), + CURRENT_FILE_FORMAT_VERSION, + file_format_version + ); + + return Err(From::from(msg)); + } + + Ok(()) +} + +pub fn strip_file_header(data: &[u8]) -> &[u8] { + &data[FILE_HEADER_SIZE..] +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::{PageTag, SerializationSinkBuilder}; + + #[test] + fn roundtrip() { + let data_sink = SerializationSinkBuilder::new_in_memory().new_sink(PageTag::Events); + + write_file_header(&mut data_sink.as_std_write(), FILE_MAGIC_EVENT_STREAM).unwrap(); + + let data = data_sink.into_bytes(); + + verify_file_header(&data, FILE_MAGIC_EVENT_STREAM, None, "test").unwrap(); + } + + #[test] + fn invalid_magic() { + let data_sink = SerializationSinkBuilder::new_in_memory().new_sink(PageTag::Events); + write_file_header(&mut data_sink.as_std_write(), FILE_MAGIC_STRINGTABLE_DATA).unwrap(); + let mut data = data_sink.into_bytes(); + + // Invalidate the filemagic + data[2] = 0; + assert!(verify_file_header(&data, FILE_MAGIC_STRINGTABLE_DATA, None, "test").is_err()); + } + + #[test] + fn other_version() { + let data_sink = SerializationSinkBuilder::new_in_memory().new_sink(PageTag::Events); + + write_file_header(&mut data_sink.as_std_write(), FILE_MAGIC_STRINGTABLE_INDEX).unwrap(); + + let mut data = data_sink.into_bytes(); + + // Change version + data[4] = 0xFF; + data[5] = 0xFF; + data[6] = 0xFF; + data[7] = 0xFF; + assert!(verify_file_header(&data, FILE_MAGIC_STRINGTABLE_INDEX, None, "test").is_err()); + } + + #[test] + fn empty_file() { + let data: [u8; 0] = []; + + assert!(verify_file_header(&data, FILE_MAGIC_STRINGTABLE_DATA, None, "test").is_err()); + } +} diff --git a/vendor/measureme/src/lib.rs b/vendor/measureme/src/lib.rs new file mode 100644 index 000000000..f0566c4ba --- /dev/null +++ b/vendor/measureme/src/lib.rs @@ -0,0 +1,55 @@ +//! This crate provides a library for high-performance event tracing which is used by +//! the Rust compiler's unstable `-Z self-profile` feature. +//! +//! The output of a tracing session will be an `.mm_profdata` file containing a stream +//! of events and a string table that allows to decode the `StringId`s in the event stream. +//! +//! # Writing event trace files +//! +//! The main entry point for writing event trace files is the [`Profiler`] struct. +//! +//! To create a [`Profiler`], call the [`Profiler::new()`] function and provide a `Path` with +//! the directory and file name for the trace files. +//! Alternatively, call the [`Profiler::with_counter()`] function, to choose the [`Counter`] +//! the profiler will use for events (whereas [`Profiler::new()`] defaults to `wall-time`). +//! +//! For more information on available counters, see the [`counters`] module documentation. +//! +//! To record an event, call the [`Profiler::record_instant_event()`] method, passing a few +//! arguments: +//! - `event_kind`: a [`StringId`] which assigns an arbitrary category to the event +//! - `event_id`: a [`StringId`] which specifies the name of the event +//! - `thread_id`: a `u32` id of the thread which is recording this event +//! +//! Alternatively, events can also be recorded via the +//! [`Profiler::start_recording_interval_event()`] method. This method records a "start" event and +//! returns a `TimingGuard` object that will automatically record the corresponding "end" event +//! when it is dropped. +//! +//! To create a [`StringId`], call one of the string allocation methods: +//! - [`Profiler::alloc_string()`]: allocates a string and returns the [`StringId`] that refers +//! to it +//! +//! [`Counter`]: counters::Counter +#![deny(warnings)] + +#[macro_use] +extern crate log; + +pub mod counters; +pub mod event_id; +pub mod file_header; +mod profiler; +mod raw_event; +mod serialization; +pub mod stringtable; + +pub mod rustc; + +pub use crate::event_id::{EventId, EventIdBuilder}; +pub use crate::profiler::{DetachedTiming, Profiler, TimingGuard}; +pub use crate::raw_event::{RawEvent, MAX_INTERVAL_VALUE, MAX_SINGLE_VALUE}; +pub use crate::serialization::{ + split_streams, Addr, PageTag, SerializationSink, SerializationSinkBuilder, +}; +pub use crate::stringtable::{SerializableString, StringComponent, StringId, StringTableBuilder}; diff --git a/vendor/measureme/src/profiler.rs b/vendor/measureme/src/profiler.rs new file mode 100644 index 000000000..0fdf41727 --- /dev/null +++ b/vendor/measureme/src/profiler.rs @@ -0,0 +1,234 @@ +use crate::counters::Counter; +use crate::file_header::{write_file_header, FILE_MAGIC_EVENT_STREAM, FILE_MAGIC_TOP_LEVEL}; +use crate::raw_event::RawEvent; +use crate::serialization::{PageTag, SerializationSink, SerializationSinkBuilder}; +use crate::stringtable::{SerializableString, StringId, StringTableBuilder}; +use crate::{event_id::EventId, file_header::FILE_EXTENSION}; +use std::error::Error; +use std::fs; +use std::path::Path; +use std::sync::Arc; + +pub struct Profiler { + event_sink: Arc<SerializationSink>, + string_table: StringTableBuilder, + counter: Counter, +} + +impl Profiler { + pub fn new<P: AsRef<Path>>(path_stem: P) -> Result<Profiler, Box<dyn Error + Send + Sync>> { + Self::with_counter( + path_stem, + Counter::WallTime(crate::counters::WallTime::new()), + ) + } + + pub fn with_counter<P: AsRef<Path>>( + path_stem: P, + counter: Counter, + ) -> Result<Profiler, Box<dyn Error + Send + Sync>> { + let path = path_stem.as_ref().with_extension(FILE_EXTENSION); + + fs::create_dir_all(path.parent().unwrap())?; + let mut file = fs::File::create(path)?; + + // The first thing in the file must be the top-level file header. + write_file_header(&mut file, FILE_MAGIC_TOP_LEVEL)?; + + let sink_builder = SerializationSinkBuilder::new_from_file(file)?; + let event_sink = Arc::new(sink_builder.new_sink(PageTag::Events)); + + // The first thing in every stream we generate must be the stream header. + write_file_header(&mut event_sink.as_std_write(), FILE_MAGIC_EVENT_STREAM)?; + + let string_table = StringTableBuilder::new( + Arc::new(sink_builder.new_sink(PageTag::StringData)), + Arc::new(sink_builder.new_sink(PageTag::StringIndex)), + )?; + + let profiler = Profiler { + event_sink, + string_table, + counter, + }; + + let mut args = String::new(); + for arg in std::env::args() { + args.push_str(&arg.escape_default().to_string()); + args.push(' '); + } + + profiler.string_table.alloc_metadata(&*format!( + r#"{{ "start_time": {}, "process_id": {}, "cmd": "{}", "counter": {} }}"#, + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_nanos(), + std::process::id(), + args, + profiler.counter.describe_as_json(), + )); + + Ok(profiler) + } + + #[inline(always)] + pub fn map_virtual_to_concrete_string(&self, virtual_id: StringId, concrete_id: StringId) { + self.string_table + .map_virtual_to_concrete_string(virtual_id, concrete_id); + } + + #[inline(always)] + pub fn bulk_map_virtual_to_single_concrete_string<I>( + &self, + virtual_ids: I, + concrete_id: StringId, + ) where + I: Iterator<Item = StringId> + ExactSizeIterator, + { + self.string_table + .bulk_map_virtual_to_single_concrete_string(virtual_ids, concrete_id); + } + + #[inline(always)] + pub fn alloc_string<STR: SerializableString + ?Sized>(&self, s: &STR) -> StringId { + self.string_table.alloc(s) + } + + /// Records an event with the given parameters. The event time is computed + /// automatically. + pub fn record_instant_event(&self, event_kind: StringId, event_id: EventId, thread_id: u32) { + let raw_event = + RawEvent::new_instant(event_kind, event_id, thread_id, self.counter.since_start()); + + self.record_raw_event(&raw_event); + } + + /// Records an event with the given parameters. The event time is computed + /// automatically. + pub fn record_integer_event( + &self, + event_kind: StringId, + event_id: EventId, + thread_id: u32, + value: u64, + ) { + let raw_event = RawEvent::new_integer(event_kind, event_id, thread_id, value); + self.record_raw_event(&raw_event); + } + + /// Creates a "start" event and returns a `TimingGuard` that will create + /// the corresponding "end" event when it is dropped. + #[inline] + pub fn start_recording_interval_event<'a>( + &'a self, + event_kind: StringId, + event_id: EventId, + thread_id: u32, + ) -> TimingGuard<'a> { + TimingGuard { + profiler: self, + event_id, + event_kind, + thread_id, + start_count: self.counter.since_start(), + } + } + + /// Creates a "start" event and returns a `DetachedTiming`. + /// To create the corresponding "event" event, you must call + /// `finish_recording_internal_event` with the returned + /// `DetachedTiming`. + /// Since `DetachedTiming` does not capture the lifetime of `&self`, + /// this method can sometimes be more convenient than + /// `start_recording_interval_event` - e.g. it can be stored + /// in a struct without the need to add a lifetime parameter. + #[inline] + pub fn start_recording_interval_event_detached( + &self, + event_kind: StringId, + event_id: EventId, + thread_id: u32, + ) -> DetachedTiming { + DetachedTiming { + event_id, + event_kind, + thread_id, + start_count: self.counter.since_start(), + } + } + + /// Creates the corresponding "end" event for + /// the "start" event represented by `timing`. You + /// must have obtained `timing` from the same `Profiler` + pub fn finish_recording_interval_event(&self, timing: DetachedTiming) { + drop(TimingGuard { + profiler: self, + event_id: timing.event_id, + event_kind: timing.event_kind, + thread_id: timing.thread_id, + start_count: timing.start_count, + }); + } + + fn record_raw_event(&self, raw_event: &RawEvent) { + self.event_sink + .write_atomic(std::mem::size_of::<RawEvent>(), |bytes| { + raw_event.serialize(bytes); + }); + } +} + +/// Created by `Profiler::start_recording_interval_event_detached`. +/// Must be passed to `finish_recording_interval_event` to record an +/// "end" event. +#[must_use] +pub struct DetachedTiming { + event_id: EventId, + event_kind: StringId, + thread_id: u32, + start_count: u64, +} + +/// When dropped, this `TimingGuard` will record an "end" event in the +/// `Profiler` it was created by. +#[must_use] +pub struct TimingGuard<'a> { + profiler: &'a Profiler, + event_id: EventId, + event_kind: StringId, + thread_id: u32, + start_count: u64, +} + +impl<'a> Drop for TimingGuard<'a> { + #[inline] + fn drop(&mut self) { + let raw_event = RawEvent::new_interval( + self.event_kind, + self.event_id, + self.thread_id, + self.start_count, + self.profiler.counter.since_start(), + ); + + self.profiler.record_raw_event(&raw_event); + } +} + +impl<'a> TimingGuard<'a> { + /// This method set a new `event_id` right before actually recording the + /// event. + #[inline] + pub fn finish_with_override_event_id(mut self, event_id: EventId) { + self.event_id = event_id; + // Let's be explicit about it: Dropping the guard will record the event. + drop(self) + } +} + +// Make sure that `Profiler` can be used in a multithreaded context +fn _assert_bounds() { + assert_bounds_inner(&Profiler::new("")); + fn assert_bounds_inner<S: Sized + Send + Sync + 'static>(_: &S) {} +} diff --git a/vendor/measureme/src/raw_event.rs b/vendor/measureme/src/raw_event.rs new file mode 100644 index 000000000..f181fb56f --- /dev/null +++ b/vendor/measureme/src/raw_event.rs @@ -0,0 +1,409 @@ +use crate::event_id::EventId; +use crate::stringtable::StringId; +#[cfg(target_endian = "big")] +use std::convert::TryInto; + +/// `RawEvent` is how events are stored on-disk. If you change this struct, +/// make sure that you increment `file_header::CURRENT_FILE_FORMAT_VERSION`. +#[derive(Eq, PartialEq, Debug)] +#[repr(C)] +pub struct RawEvent { + pub event_kind: StringId, + pub event_id: EventId, + pub thread_id: u32, + + // The following 96 bits store the payload values, using + // 48 bits for each. + // Interval: + // Payload 1 is start value and payload 2 is end value + // SSSSSSSSSSSSSSSSEEEEEEEEEEEEEEEESSSSSSSEEEEEEEEE + // [payload1_lower][payload2_lower][payloads_upper] + // Instant: + // Payload2 is 0xFFFF_FFFF_FFFF + // VVVVVVVVVVVVVVVV1111111111111111VVVVVVV11111111 + // [payload1_lower][payload2_lower][payloads_upper] + // Integer: + // Payload2 is 0xFFFF_FFFF_FFFE + // VVVVVVVVVVVVVVVV1111111111111111VVVVVVV11111110 + // [payload1_lower][payload2_lower][payloads_upper] + pub payload1_lower: u32, + pub payload2_lower: u32, + pub payloads_upper: u32, +} + +/// `RawEvents` that have a payload 2 value with this value are instant events. +const INSTANT_MARKER: u64 = 0xFFFF_FFFF_FFFF; +/// `RawEvents` that have a payload 2 value with this value are integer events. +const INTEGER_MARKER: u64 = INSTANT_MARKER - 1; + +/// The max value we can represent with the 48 bits available. +pub const MAX_SINGLE_VALUE: u64 = 0xFFFF_FFFF_FFFF; + +/// The max value we can represent with the 48 bits available. +/// The highest two values are reserved for the `INSTANT_MARKER` and `INTEGER_MARKER`. +pub const MAX_INTERVAL_VALUE: u64 = INTEGER_MARKER - 1; + +impl RawEvent { + #[inline] + pub fn new_interval( + event_kind: StringId, + event_id: EventId, + thread_id: u32, + start: u64, + end: u64, + ) -> Self { + assert!(start <= end); + assert!(end <= MAX_INTERVAL_VALUE); + + Self::pack_values(event_kind, event_id, thread_id, start, end) + } + + #[inline] + pub fn new_instant( + event_kind: StringId, + event_id: EventId, + thread_id: u32, + instant: u64, + ) -> Self { + assert!(instant <= MAX_SINGLE_VALUE); + Self::pack_values(event_kind, event_id, thread_id, instant, INSTANT_MARKER) + } + + #[inline] + pub fn new_integer( + event_kind: StringId, + event_id: EventId, + thread_id: u32, + value: u64, + ) -> Self { + assert!(value <= MAX_SINGLE_VALUE); + Self::pack_values(event_kind, event_id, thread_id, value, INTEGER_MARKER) + } + + #[inline] + fn pack_values( + event_kind: StringId, + event_id: EventId, + thread_id: u32, + value1: u64, + value2: u64, + ) -> Self { + let payload1_lower = value1 as u32; + let payload2_lower = value2 as u32; + + let value1_upper = (value1 >> 16) as u32 & 0xFFFF_0000; + let value2_upper = (value2 >> 32) as u32; + + let payloads_upper = value1_upper | value2_upper; + + Self { + event_kind, + event_id, + thread_id, + payload1_lower, + payload2_lower, + payloads_upper, + } + } + + /// The start value assuming self is an interval + #[inline] + pub fn start_value(&self) -> u64 { + self.payload1_lower as u64 | (((self.payloads_upper & 0xFFFF_0000) as u64) << 16) + } + + /// The end value assuming self is an interval + #[inline] + pub fn end_value(&self) -> u64 { + self.payload2_lower as u64 | (((self.payloads_upper & 0x0000_FFFF) as u64) << 32) + } + + /// The value assuming self is an interval or integer. + #[inline] + pub fn value(&self) -> u64 { + self.payload1_lower as u64 | (((self.payloads_upper & 0xFFFF_0000) as u64) << 16) + } + + #[inline] + pub fn is_instant(&self) -> bool { + self.end_value() == INSTANT_MARKER + } + + #[inline] + pub fn is_integer(&self) -> bool { + self.end_value() == INTEGER_MARKER + } + + #[inline] + pub fn serialize(&self, bytes: &mut [u8]) { + assert!(bytes.len() == std::mem::size_of::<RawEvent>()); + + #[cfg(target_endian = "little")] + { + let raw_event_bytes: &[u8] = unsafe { + std::slice::from_raw_parts( + self as *const _ as *const u8, + std::mem::size_of::<RawEvent>(), + ) + }; + + bytes.copy_from_slice(raw_event_bytes); + } + + #[cfg(target_endian = "big")] + { + // We always emit data as little endian, which we have to do + // manually on big endian targets. + bytes[0..4].copy_from_slice(&self.event_kind.as_u32().to_le_bytes()); + bytes[4..8].copy_from_slice(&self.event_id.as_u32().to_le_bytes()); + bytes[8..12].copy_from_slice(&self.thread_id.to_le_bytes()); + bytes[12..16].copy_from_slice(&self.payload1_lower.to_le_bytes()); + bytes[16..20].copy_from_slice(&self.payload2_lower.to_le_bytes()); + bytes[20..24].copy_from_slice(&self.payloads_upper.to_le_bytes()); + } + } + + #[inline] + pub fn deserialize(bytes: &[u8]) -> RawEvent { + assert!(bytes.len() == std::mem::size_of::<RawEvent>()); + + #[cfg(target_endian = "little")] + { + let mut raw_event = RawEvent::default(); + unsafe { + let raw_event = std::slice::from_raw_parts_mut( + &mut raw_event as *mut RawEvent as *mut u8, + std::mem::size_of::<RawEvent>(), + ); + raw_event.copy_from_slice(bytes); + }; + raw_event + } + + #[cfg(target_endian = "big")] + { + RawEvent { + event_kind: StringId::new(u32::from_le_bytes(bytes[0..4].try_into().unwrap())), + event_id: EventId::from_u32(u32::from_le_bytes(bytes[4..8].try_into().unwrap())), + thread_id: u32::from_le_bytes(bytes[8..12].try_into().unwrap()), + payload1_lower: u32::from_le_bytes(bytes[12..16].try_into().unwrap()), + payload2_lower: u32::from_le_bytes(bytes[16..20].try_into().unwrap()), + payloads_upper: u32::from_le_bytes(bytes[20..24].try_into().unwrap()), + } + } + } +} + +impl Default for RawEvent { + fn default() -> Self { + RawEvent { + event_kind: StringId::INVALID, + event_id: EventId::INVALID, + thread_id: 0, + payload1_lower: 0, + payload2_lower: 0, + payloads_upper: 0, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn raw_event_has_expected_size() { + // A test case to prevent accidental regressions of RawEvent's size. + assert_eq!(std::mem::size_of::<RawEvent>(), 24); + } + + #[test] + fn is_instant() { + assert!(RawEvent::new_instant(StringId::INVALID, EventId::INVALID, 987, 0,).is_instant()); + + assert!( + RawEvent::new_instant(StringId::INVALID, EventId::INVALID, 987, MAX_SINGLE_VALUE,) + .is_instant() + ); + + assert!(!RawEvent::new_interval( + StringId::INVALID, + EventId::INVALID, + 987, + 0, + MAX_INTERVAL_VALUE, + ) + .is_instant()); + } + + #[test] + fn is_integer() { + let integer = RawEvent::new_integer(StringId::INVALID, EventId::INVALID, 987, 0); + assert!(integer.is_integer()); + assert_eq!(integer.value(), 0); + + let integer = RawEvent::new_integer(StringId::INVALID, EventId::INVALID, 987, 8769); + assert!(integer.is_integer()); + assert_eq!(integer.value(), 8769); + + assert!( + RawEvent::new_integer(StringId::INVALID, EventId::INVALID, 987, MAX_SINGLE_VALUE,) + .is_integer() + ); + + assert!(!RawEvent::new_interval( + StringId::INVALID, + EventId::INVALID, + 987, + 0, + MAX_INTERVAL_VALUE, + ) + .is_integer()); + } + + #[test] + #[should_panic] + fn invalid_instant_count() { + let _ = RawEvent::new_instant( + StringId::INVALID, + EventId::INVALID, + 123, + // count too large + MAX_SINGLE_VALUE + 1, + ); + } + + #[test] + #[should_panic] + fn invalid_start_count() { + let _ = RawEvent::new_interval( + StringId::INVALID, + EventId::INVALID, + 123, + // start count too large + MAX_INTERVAL_VALUE + 1, + MAX_INTERVAL_VALUE + 1, + ); + } + + #[test] + #[should_panic] + fn invalid_end_count() { + let _ = RawEvent::new_interval( + StringId::INVALID, + EventId::INVALID, + 123, + 0, + // end count too large + MAX_INTERVAL_VALUE + 3, + ); + } + + #[test] + #[should_panic] + fn invalid_end_count2() { + let _ = RawEvent::new_interval(StringId::INVALID, EventId::INVALID, 123, 0, INTEGER_MARKER); + } + + #[test] + #[should_panic] + fn start_greater_than_end_count() { + let _ = RawEvent::new_interval( + StringId::INVALID, + EventId::INVALID, + 123, + // start count greater than end count + 1, + 0, + ); + } + + #[test] + fn start_equal_to_end_count() { + // This is allowed, make sure we don't panic + let _ = RawEvent::new_interval(StringId::INVALID, EventId::INVALID, 123, 1, 1); + } + + #[test] + fn interval_count_decoding() { + // Check the upper limits + let e = RawEvent::new_interval( + StringId::INVALID, + EventId::INVALID, + 1234, + MAX_INTERVAL_VALUE, + MAX_INTERVAL_VALUE, + ); + + assert_eq!(e.start_value(), MAX_INTERVAL_VALUE); + assert_eq!(e.end_value(), MAX_INTERVAL_VALUE); + + // Check the lower limits + let e = RawEvent::new_interval(StringId::INVALID, EventId::INVALID, 1234, 0, 0); + + assert_eq!(e.start_value(), 0); + assert_eq!(e.end_value(), 0); + + // Check that end does not bleed into start + let e = RawEvent::new_interval( + StringId::INVALID, + EventId::INVALID, + 1234, + 0, + MAX_INTERVAL_VALUE, + ); + + assert_eq!(e.start_value(), 0); + assert_eq!(e.end_value(), MAX_INTERVAL_VALUE); + + // Test some random values + let e = RawEvent::new_interval( + StringId::INVALID, + EventId::INVALID, + 1234, + 0x1234567890, + 0x1234567890A, + ); + + assert_eq!(e.start_value(), 0x1234567890); + assert_eq!(e.end_value(), 0x1234567890A); + } + + #[test] + fn instant_count_decoding() { + assert_eq!( + RawEvent::new_instant(StringId::INVALID, EventId::INVALID, 987, 0,).start_value(), + 0 + ); + + assert_eq!( + RawEvent::new_instant(StringId::INVALID, EventId::INVALID, 987, 42,).start_value(), + 42 + ); + + assert_eq!( + RawEvent::new_instant(StringId::INVALID, EventId::INVALID, 987, MAX_SINGLE_VALUE,) + .start_value(), + MAX_SINGLE_VALUE + ); + } + + #[test] + fn integer_decoding() { + assert_eq!( + RawEvent::new_integer(StringId::INVALID, EventId::INVALID, 987, 0,).start_value(), + 0 + ); + + assert_eq!( + RawEvent::new_integer(StringId::INVALID, EventId::INVALID, 987, 42,).start_value(), + 42 + ); + + assert_eq!( + RawEvent::new_integer(StringId::INVALID, EventId::INVALID, 987, MAX_SINGLE_VALUE,) + .start_value(), + MAX_SINGLE_VALUE + ); + } +} diff --git a/vendor/measureme/src/rustc.rs b/vendor/measureme/src/rustc.rs new file mode 100644 index 000000000..11986561f --- /dev/null +++ b/vendor/measureme/src/rustc.rs @@ -0,0 +1,15 @@ +//! This module contains functionality specific to to the measureme integration with rustc + +pub const QUERY_EVENT_KIND: &str = "Query"; + +pub const GENERIC_ACTIVITY_EVENT_KIND: &str = "GenericActivity"; + +pub const INCREMENTAL_LOAD_RESULT_EVENT_KIND: &str = "IncrementalLoadResult"; + +pub const INCREMENTAL_RESULT_HASHING_EVENT_KIND: &str = "IncrementalResultHashing"; + +pub const QUERY_BLOCKED_EVENT_KIND: &str = "QueryBlocked"; + +pub const QUERY_CACHE_HIT_EVENT_KIND: &str = "QueryCacheHit"; + +pub const ARTIFACT_SIZE_EVENT_KIND: &str = "ArtifactSize"; diff --git a/vendor/measureme/src/serialization.rs b/vendor/measureme/src/serialization.rs new file mode 100644 index 000000000..6dcc51d39 --- /dev/null +++ b/vendor/measureme/src/serialization.rs @@ -0,0 +1,498 @@ +/// This module implements the "container" file format that `measureme` uses for
+/// storing things on disk. The format supports storing three independent
+/// streams of data: one for events, one for string data, and one for string
+/// index data (in theory it could support an arbitrary number of separate
+/// streams but three is all we need). The data of each stream is split into
+/// "pages", where each page has a small header designating what kind of
+/// data it is (i.e. event, string data, or string index), and the length of
+/// the page.
+///
+/// Pages of different kinds can be arbitrarily interleaved. The headers allow
+/// for reconstructing each of the streams later on. An example file might thus
+/// look like this:
+///
+/// ```ignore
+/// | file header | page (events) | page (string data) | page (events) | page (string index) |
+/// ```
+///
+/// The exact encoding of a page is:
+///
+/// | byte slice | contents |
+/// |-------------------------|-----------------------------------------|
+/// | &[0 .. 1] | page tag |
+/// | &[1 .. 5] | page size as little endian u32 |
+/// | &[5 .. (5 + page_size)] | page contents (exactly page_size bytes) |
+///
+/// A page is immediately followed by the next page, without any padding.
+use parking_lot::Mutex;
+use rustc_hash::FxHashMap;
+use std::cmp::min;
+use std::convert::TryInto;
+use std::error::Error;
+use std::fmt::Debug;
+use std::fs;
+use std::io::Write;
+use std::sync::Arc;
+
+const MAX_PAGE_SIZE: usize = 256 * 1024;
+
+/// The number of bytes we consider enough to warrant their own page when
+/// deciding whether to flush a partially full buffer. Actual pages may need
+/// to be smaller, e.g. when writing the tail of the data stream.
+const MIN_PAGE_SIZE: usize = MAX_PAGE_SIZE / 2;
+
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+#[repr(u8)]
+pub enum PageTag {
+ Events = 0,
+ StringData = 1,
+ StringIndex = 2,
+}
+
+impl std::convert::TryFrom<u8> for PageTag {
+ type Error = String;
+
+ fn try_from(value: u8) -> Result<Self, Self::Error> {
+ match value {
+ 0 => Ok(PageTag::Events),
+ 1 => Ok(PageTag::StringData),
+ 2 => Ok(PageTag::StringIndex),
+ _ => Err(format!("Could not convert byte `{}` to PageTag.", value)),
+ }
+ }
+}
+
+/// An address within a data stream. Each data stream has its own address space,
+/// i.e. the first piece of data written to the events stream will have
+/// `Addr(0)` and the first piece of data written to the string data stream
+/// will *also* have `Addr(0)`.
+//
+// TODO: Evaluate if it makes sense to add a type tag to `Addr` in order to
+// prevent accidental use of `Addr` values with the wrong address space.
+#[derive(Clone, Copy, Eq, PartialEq, Debug)]
+pub struct Addr(pub u32);
+
+impl Addr {
+ pub fn as_usize(self) -> usize {
+ self.0 as usize
+ }
+}
+
+#[derive(Debug)]
+pub struct SerializationSink {
+ shared_state: SharedState,
+ data: Mutex<SerializationSinkInner>,
+ page_tag: PageTag,
+}
+
+pub struct SerializationSinkBuilder(SharedState);
+
+impl SerializationSinkBuilder {
+ pub fn new_from_file(file: fs::File) -> Result<Self, Box<dyn Error + Send + Sync>> {
+ Ok(Self(SharedState(Arc::new(Mutex::new(
+ BackingStorage::File(file),
+ )))))
+ }
+
+ pub fn new_in_memory() -> SerializationSinkBuilder {
+ Self(SharedState(Arc::new(Mutex::new(BackingStorage::Memory(
+ Vec::new(),
+ )))))
+ }
+
+ pub fn new_sink(&self, page_tag: PageTag) -> SerializationSink {
+ SerializationSink {
+ data: Mutex::new(SerializationSinkInner {
+ buffer: Vec::with_capacity(MAX_PAGE_SIZE),
+ addr: 0,
+ }),
+ shared_state: self.0.clone(),
+ page_tag,
+ }
+ }
+}
+
+/// The `BackingStorage` is what the data gets written to. Usually that is a
+/// file but for testing purposes it can also be an in-memory vec of bytes.
+#[derive(Debug)]
+enum BackingStorage {
+ File(fs::File),
+ Memory(Vec<u8>),
+}
+
+impl Write for BackingStorage {
+ #[inline]
+ fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
+ match *self {
+ BackingStorage::File(ref mut file) => file.write(buf),
+ BackingStorage::Memory(ref mut vec) => vec.write(buf),
+ }
+ }
+
+ fn flush(&mut self) -> std::io::Result<()> {
+ match *self {
+ BackingStorage::File(ref mut file) => file.flush(),
+ BackingStorage::Memory(_) => {
+ // Nothing to do
+ Ok(())
+ }
+ }
+ }
+}
+
+/// This struct allows to treat `SerializationSink` as `std::io::Write`.
+pub struct StdWriteAdapter<'a>(&'a SerializationSink);
+
+impl<'a> Write for StdWriteAdapter<'a> {
+ fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
+ self.0.write_bytes_atomic(buf);
+ Ok(buf.len())
+ }
+
+ fn flush(&mut self) -> std::io::Result<()> {
+ let mut data = self.0.data.lock();
+ let SerializationSinkInner {
+ ref mut buffer,
+ addr: _,
+ } = *data;
+
+ // First flush the local buffer.
+ self.0.flush(buffer);
+
+ // Then flush the backing store.
+ self.0.shared_state.0.lock().flush()?;
+
+ Ok(())
+ }
+}
+
+#[derive(Debug)]
+struct SerializationSinkInner {
+ buffer: Vec<u8>,
+ addr: u32,
+}
+
+/// This state is shared between all `SerializationSink`s writing to the same
+/// backing storage (e.g. the same file).
+#[derive(Clone, Debug)]
+struct SharedState(Arc<Mutex<BackingStorage>>);
+
+impl SharedState {
+ /// Copies out the contents of all pages with the given tag and
+ /// concatenates them into a single byte vec. This method is only meant to
+ /// be used for testing and will panic if the underlying backing storage is
+ /// a file instead of in memory.
+ fn copy_bytes_with_page_tag(&self, page_tag: PageTag) -> Vec<u8> {
+ let data = self.0.lock();
+ let data = match *data {
+ BackingStorage::File(_) => panic!(),
+ BackingStorage::Memory(ref data) => data,
+ };
+
+ split_streams(data).remove(&page_tag).unwrap_or(Vec::new())
+ }
+}
+
+/// This function reconstructs the individual data streams from their paged
+/// version.
+///
+/// For example, if `E` denotes the page header of an events page, `S` denotes
+/// the header of a string data page, and lower case letters denote page
+/// contents then a paged stream could look like:
+///
+/// ```ignore
+/// s = Eabcd_Sopq_Eef_Eghi_Srst
+/// ```
+///
+/// and `split_streams` would result in the following set of streams:
+///
+/// ```ignore
+/// split_streams(s) = {
+/// events: [abcdefghi],
+/// string_data: [opqrst],
+/// }
+/// ```
+pub fn split_streams(paged_data: &[u8]) -> FxHashMap<PageTag, Vec<u8>> {
+ let mut result: FxHashMap<PageTag, Vec<u8>> = FxHashMap::default();
+
+ let mut pos = 0;
+ while pos < paged_data.len() {
+ let tag = TryInto::try_into(paged_data[pos]).unwrap();
+ let page_size =
+ u32::from_le_bytes(paged_data[pos + 1..pos + 5].try_into().unwrap()) as usize;
+
+ assert!(page_size > 0);
+
+ result
+ .entry(tag)
+ .or_default()
+ .extend_from_slice(&paged_data[pos + 5..pos + 5 + page_size]);
+
+ pos += page_size + 5;
+ }
+
+ result
+}
+
+impl SerializationSink {
+ /// Writes `bytes` as a single page to the shared backing storage. The
+ /// method will first write the page header (consisting of the page tag and
+ /// the number of bytes in the page) and then the page contents
+ /// (i.e. `bytes`).
+ fn write_page(&self, bytes: &[u8]) {
+ if bytes.len() > 0 {
+ // We explicitly don't assert `bytes.len() >= MIN_PAGE_SIZE` because
+ // `MIN_PAGE_SIZE` is just a recommendation and the last page will
+ // often be smaller than that.
+ assert!(bytes.len() <= MAX_PAGE_SIZE);
+
+ let mut file = self.shared_state.0.lock();
+
+ file.write_all(&[self.page_tag as u8]).unwrap();
+
+ let page_size: [u8; 4] = (bytes.len() as u32).to_le_bytes();
+ file.write_all(&page_size).unwrap();
+ file.write_all(&bytes[..]).unwrap();
+ }
+ }
+
+ /// Flushes `buffer` by writing its contents as a new page to the backing
+ /// storage and then clearing it.
+ fn flush(&self, buffer: &mut Vec<u8>) {
+ self.write_page(&buffer[..]);
+ buffer.clear();
+ }
+
+ /// Creates a copy of all data written so far. This method is meant to be
+ /// used for writing unit tests. It will panic if the underlying
+ /// `BackingStorage` is a file.
+ pub fn into_bytes(mut self) -> Vec<u8> {
+ // Swap out the contains of `self` with something that can safely be
+ // dropped without side effects.
+ let mut data = Mutex::new(SerializationSinkInner {
+ buffer: Vec::new(),
+ addr: 0,
+ });
+ std::mem::swap(&mut self.data, &mut data);
+
+ // Extract the data from the mutex.
+ let SerializationSinkInner {
+ ref mut buffer,
+ addr: _,
+ } = data.into_inner();
+
+ // Make sure we write the current contents of the buffer to the
+ // backing storage before proceeding.
+ self.flush(buffer);
+
+ self.shared_state.copy_bytes_with_page_tag(self.page_tag)
+ }
+
+ /// Atomically writes `num_bytes` of data to this `SerializationSink`.
+ /// Atomic means the data is guaranteed to be written as a contiguous range
+ /// of bytes.
+ ///
+ /// The buffer provided to the `write` callback is guaranteed to be of size
+ /// `num_bytes` and `write` is supposed to completely fill it with the data
+ /// to be written.
+ ///
+ /// The return value is the address of the data written and can be used to
+ /// refer to the data later on.
+ pub fn write_atomic<W>(&self, num_bytes: usize, write: W) -> Addr
+ where
+ W: FnOnce(&mut [u8]),
+ {
+ if num_bytes > MAX_PAGE_SIZE {
+ let mut bytes = vec![0u8; num_bytes];
+ write(&mut bytes[..]);
+ return self.write_bytes_atomic(&bytes[..]);
+ }
+
+ let mut data = self.data.lock();
+ let SerializationSinkInner {
+ ref mut buffer,
+ ref mut addr,
+ } = *data;
+
+ if buffer.len() + num_bytes > MAX_PAGE_SIZE {
+ self.flush(buffer);
+ assert!(buffer.is_empty());
+ }
+
+ let curr_addr = *addr;
+
+ let buf_start = buffer.len();
+ let buf_end = buf_start + num_bytes;
+ buffer.resize(buf_end, 0u8);
+ write(&mut buffer[buf_start..buf_end]);
+
+ *addr += num_bytes as u32;
+
+ Addr(curr_addr)
+ }
+
+ /// Atomically writes the data in `bytes` to this `SerializationSink`.
+ /// Atomic means the data is guaranteed to be written as a contiguous range
+ /// of bytes.
+ ///
+ /// This method may perform better than `write_atomic` because it may be
+ /// able to skip the sink's internal buffer. Use this method if the data to
+ /// be written is already available as a `&[u8]`.
+ ///
+ /// The return value is the address of the data written and can be used to
+ /// refer to the data later on.
+ pub fn write_bytes_atomic(&self, bytes: &[u8]) -> Addr {
+ // For "small" data we go to the buffered version immediately.
+ if bytes.len() <= 128 {
+ return self.write_atomic(bytes.len(), |sink| {
+ sink.copy_from_slice(bytes);
+ });
+ }
+
+ let mut data = self.data.lock();
+ let SerializationSinkInner {
+ ref mut buffer,
+ ref mut addr,
+ } = *data;
+
+ let curr_addr = Addr(*addr);
+ *addr += bytes.len() as u32;
+
+ let mut bytes_left = bytes;
+
+ // Do we have too little data in the buffer? If so, fill up the buffer
+ // to the minimum page size.
+ if buffer.len() < MIN_PAGE_SIZE {
+ let num_bytes_to_take = min(MIN_PAGE_SIZE - buffer.len(), bytes_left.len());
+ buffer.extend_from_slice(&bytes_left[..num_bytes_to_take]);
+ bytes_left = &bytes_left[num_bytes_to_take..];
+ }
+
+ if bytes_left.is_empty() {
+ return curr_addr;
+ }
+
+ // Make sure we flush the buffer before writing out any other pages.
+ self.flush(buffer);
+
+ for chunk in bytes_left.chunks(MAX_PAGE_SIZE) {
+ if chunk.len() == MAX_PAGE_SIZE {
+ // This chunk has the maximum size. It might or might not be the
+ // last one. In either case we want to write it to disk
+ // immediately because there is no reason to copy it to the
+ // buffer first.
+ self.write_page(chunk);
+ } else {
+ // This chunk is less than the chunk size that we requested, so
+ // it must be the last one. If it is big enough to warrant its
+ // own page, we write it to disk immediately. Otherwise, we copy
+ // it to the buffer.
+ if chunk.len() >= MIN_PAGE_SIZE {
+ self.write_page(chunk);
+ } else {
+ debug_assert!(buffer.is_empty());
+ buffer.extend_from_slice(chunk);
+ }
+ }
+ }
+
+ curr_addr
+ }
+
+ pub fn as_std_write<'a>(&'a self) -> impl Write + 'a {
+ StdWriteAdapter(self)
+ }
+}
+
+impl Drop for SerializationSink {
+ fn drop(&mut self) {
+ let mut data = self.data.lock();
+ let SerializationSinkInner {
+ ref mut buffer,
+ addr: _,
+ } = *data;
+
+ self.flush(buffer);
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ // This function writes `chunk_count` byte-slices of size `chunk_size` to
+ // three `SerializationSinks` that all map to the same underlying stream,
+ // so we get interleaved pages with different tags.
+ // It then extracts the data out again and asserts that it is the same as
+ // has been written.
+ fn test_roundtrip<W>(chunk_size: usize, chunk_count: usize, write: W)
+ where
+ W: Fn(&SerializationSink, &[u8]) -> Addr,
+ {
+ let sink_builder = SerializationSinkBuilder::new_in_memory();
+ let tags = [PageTag::Events, PageTag::StringData, PageTag::StringIndex];
+ let expected_chunk: Vec<u8> = (0..chunk_size).map(|x| (x % 239) as u8).collect();
+
+ {
+ let sinks: Vec<SerializationSink> =
+ tags.iter().map(|&tag| sink_builder.new_sink(tag)).collect();
+
+ for chunk_index in 0..chunk_count {
+ let expected_addr = Addr((chunk_index * chunk_size) as u32);
+ for sink in sinks.iter() {
+ assert_eq!(write(sink, &expected_chunk[..]), expected_addr);
+ }
+ }
+ }
+
+ let streams: Vec<Vec<u8>> = tags
+ .iter()
+ .map(|&tag| sink_builder.0.copy_bytes_with_page_tag(tag))
+ .collect();
+
+ for stream in streams {
+ for chunk in stream.chunks(chunk_size) {
+ assert_eq!(chunk, expected_chunk);
+ }
+ }
+ }
+
+ fn write_closure(sink: &SerializationSink, bytes: &[u8]) -> Addr {
+ sink.write_atomic(bytes.len(), |dest| dest.copy_from_slice(bytes))
+ }
+
+ fn write_slice(sink: &SerializationSink, bytes: &[u8]) -> Addr {
+ sink.write_bytes_atomic(bytes)
+ }
+
+ // Creates two roundtrip tests, one using `SerializationSink::write_atomic`
+ // and one using `SerializationSink::write_bytes_atomic`.
+ macro_rules! mk_roundtrip_test {
+ ($name:ident, $chunk_size:expr, $chunk_count:expr) => {
+ mod $name {
+ use super::*;
+
+ #[test]
+ fn write_atomic() {
+ test_roundtrip($chunk_size, $chunk_count, write_closure);
+ }
+
+ #[test]
+ fn write_bytes_atomic() {
+ test_roundtrip($chunk_size, $chunk_count, write_slice);
+ }
+ }
+ };
+ }
+
+ mk_roundtrip_test!(small_data, 10, (90 * MAX_PAGE_SIZE) / 100);
+ mk_roundtrip_test!(huge_data, MAX_PAGE_SIZE * 10, 5);
+
+ mk_roundtrip_test!(exactly_max_page_size, MAX_PAGE_SIZE, 10);
+ mk_roundtrip_test!(max_page_size_plus_one, MAX_PAGE_SIZE + 1, 10);
+ mk_roundtrip_test!(max_page_size_minus_one, MAX_PAGE_SIZE - 1, 10);
+
+ mk_roundtrip_test!(exactly_min_page_size, MIN_PAGE_SIZE, 10);
+ mk_roundtrip_test!(min_page_size_plus_one, MIN_PAGE_SIZE + 1, 10);
+ mk_roundtrip_test!(min_page_size_minus_one, MIN_PAGE_SIZE - 1, 10);
+}
diff --git a/vendor/measureme/src/stringtable.rs b/vendor/measureme/src/stringtable.rs new file mode 100644 index 000000000..a56bbcbfc --- /dev/null +++ b/vendor/measureme/src/stringtable.rs @@ -0,0 +1,328 @@ +//! A string table implementation with a tree-like encoding. +//! +//! Each entry in the table represents a string and is encoded as a list of +//! components where each component can either be +//! +//! 1. a string _value_ that contains actual UTF-8 string content, +//! 2. a string _ID_ that contains a reference to another entry, or +//! 3. a terminator tag which marks the end of a component list. +//! +//! The string _content_ of an entry is defined as the concatenation of the +//! content of its components. The content of a string value is its actual +//! UTF-8 bytes. The content of a string ID is the contents of the entry +//! it references. +//! +//! The byte-level encoding of component lists uses the structure of UTF-8 in +//! order to save space: +//! +//! - A valid UTF-8 codepoint never starts with the byte `0xFE`. We make use +//! of this fact by letting all string ID components start with this `0xFE` +//! prefix. Thus when we parse the contents of a value we know to stop if +//! we encounter this byte. +//! +//! - A valid UTF-8 string cannot contain the `0xFF` byte. Thus we can safely +//! use `0xFF` as our component list terminator. +//! +//! The sample composite string ["abc", ID(42), "def", TERMINATOR] would thus be +//! encoded as: +//! +//! ```ignore +//! ['a', 'b' , 'c', 254, 42, 0, 0, 0, 'd', 'e', 'f', 255] +//! ^^^^^^^^^^^^^^^^ ^^^ +//! string ID with 0xFE prefix terminator (0xFF) +//! ``` +//! +//! As you can see string IDs are encoded in little endian format. +//! +//! ---------------------------------------------------------------------------- +//! +//! Each string in the table is referred to via a `StringId`. `StringId`s may +//! be generated in two ways: +//! +//! 1. Calling `StringTableBuilder::alloc()` which returns the `StringId` for +//! the allocated string. +//! 2. Calling `StringId::new_virtual()` to create a "virtual" `StringId` that +//! later can be mapped to an actual string via +//! `StringTableBuilder::map_virtual_to_concrete_string()`. +//! +//! String IDs allow you to deduplicate strings by allocating a string +//! once and then referring to it by id over and over. This is a useful trick +//! for strings which are recorded many times and it can significantly reduce +//! the size of profile trace files. +//! +//! `StringId`s are partitioned according to type: +//! +//! > [0 .. MAX_VIRTUAL_STRING_ID, METADATA_STRING_ID, .. ] +//! +//! From `0` to `MAX_VIRTUAL_STRING_ID` are the allowed values for virtual strings. +//! After `MAX_VIRTUAL_STRING_ID`, there is one string id (`METADATA_STRING_ID`) +//! which is used internally by `measureme` to record additional metadata about +//! the profiling session. After `METADATA_STRING_ID` are all other `StringId` +//! values. + +use crate::file_header::{ + write_file_header, FILE_MAGIC_STRINGTABLE_DATA, FILE_MAGIC_STRINGTABLE_INDEX, +}; +use crate::serialization::Addr; +use crate::serialization::SerializationSink; +use std::{error::Error, sync::Arc}; + +/// A `StringId` is used to identify a string in the `StringTable`. It is +/// either a regular `StringId`, meaning that it contains the absolute address +/// of a string within the string table data. Or it is "virtual", which means +/// that the address it points to is resolved via the string table index data, +/// that maps virtual `StringId`s to addresses. +#[derive(Clone, Copy, Eq, PartialEq, Debug, Hash)] +#[repr(C)] +pub struct StringId(u32); + +impl StringId { + pub const INVALID: StringId = StringId(INVALID_STRING_ID); + + #[inline] + pub fn new(id: u32) -> StringId { + StringId(id) + } + + #[inline] + pub fn new_virtual(id: u32) -> StringId { + assert!(id <= MAX_USER_VIRTUAL_STRING_ID); + StringId(id) + } + + #[inline] + pub fn is_virtual(self) -> bool { + self.0 <= METADATA_STRING_ID + } + + #[inline] + pub fn as_u32(self) -> u32 { + self.0 + } + + #[inline] + pub fn from_addr(addr: Addr) -> StringId { + let id = addr.0.checked_add(FIRST_REGULAR_STRING_ID).unwrap(); + StringId::new(id) + } + + #[inline] + pub fn to_addr(self) -> Addr { + Addr(self.0.checked_sub(FIRST_REGULAR_STRING_ID).unwrap()) + } +} + +// See module-level documentation for more information on the encoding. +pub const TERMINATOR: u8 = 0xFF; +pub const STRING_REF_TAG: u8 = 0xFE; +pub const STRING_REF_ENCODED_SIZE: usize = 5; + +/// The maximum id value a virtual string may be. +const MAX_USER_VIRTUAL_STRING_ID: u32 = 100_000_000; + +/// The id of the profile metadata string entry. +pub const METADATA_STRING_ID: u32 = MAX_USER_VIRTUAL_STRING_ID + 1; + +/// Some random string ID that we make sure cannot be generated or assigned to. +const INVALID_STRING_ID: u32 = METADATA_STRING_ID + 1; + +pub const FIRST_REGULAR_STRING_ID: u32 = INVALID_STRING_ID + 1; + +/// Write-only version of the string table +pub struct StringTableBuilder { + data_sink: Arc<SerializationSink>, + index_sink: Arc<SerializationSink>, +} + +/// Anything that implements `SerializableString` can be written to a +/// `StringTable`. +pub trait SerializableString { + fn serialized_size(&self) -> usize; + fn serialize(&self, bytes: &mut [u8]); +} + +// A single string is encoded as `[UTF-8 bytes][TERMINATOR]` +impl SerializableString for str { + #[inline] + fn serialized_size(&self) -> usize { + self.len() + // actual bytes + 1 // terminator + } + + #[inline] + fn serialize(&self, bytes: &mut [u8]) { + let last_byte_index = bytes.len() - 1; + bytes[0..last_byte_index].copy_from_slice(self.as_bytes()); + bytes[last_byte_index] = TERMINATOR; + } +} + +/// A single component of a string. Used for building composite table entries. +pub enum StringComponent<'s> { + Value(&'s str), + Ref(StringId), +} + +impl<'s> StringComponent<'s> { + #[inline] + fn serialized_size(&self) -> usize { + match *self { + StringComponent::Value(s) => s.len(), + StringComponent::Ref(_) => STRING_REF_ENCODED_SIZE, + } + } + + #[inline] + fn serialize<'b>(&self, bytes: &'b mut [u8]) -> &'b mut [u8] { + match *self { + StringComponent::Value(s) => { + bytes[..s.len()].copy_from_slice(s.as_bytes()); + &mut bytes[s.len()..] + } + StringComponent::Ref(string_id) => { + // The code below assumes we use a 5-byte encoding for string + // refs, where the first byte is STRING_REF_TAG and the + // following 4 bytes are a little-endian u32 string ID value. + assert!(STRING_REF_ENCODED_SIZE == 5); + + bytes[0] = STRING_REF_TAG; + bytes[1..5].copy_from_slice(&string_id.0.to_le_bytes()); + &mut bytes[5..] + } + } + } +} + +impl<'a> SerializableString for [StringComponent<'a>] { + #[inline] + fn serialized_size(&self) -> usize { + self.iter().map(|c| c.serialized_size()).sum::<usize>() + // size of components + 1 // terminator + } + + #[inline] + fn serialize(&self, mut bytes: &mut [u8]) { + assert!(bytes.len() == self.serialized_size()); + for component in self.iter() { + bytes = component.serialize(bytes); + } + + // Assert that we used the exact number of bytes we anticipated. + assert!(bytes.len() == 1); + bytes[0] = TERMINATOR; + } +} + +macro_rules! impl_serializable_string_for_fixed_size { + ($n:expr) => { + impl<'a> SerializableString for [StringComponent<'a>; $n] { + #[inline(always)] + fn serialized_size(&self) -> usize { + (&self[..]).serialized_size() + } + + #[inline(always)] + fn serialize(&self, bytes: &mut [u8]) { + (&self[..]).serialize(bytes); + } + } + }; +} + +impl_serializable_string_for_fixed_size!(0); +impl_serializable_string_for_fixed_size!(1); +impl_serializable_string_for_fixed_size!(2); +impl_serializable_string_for_fixed_size!(3); +impl_serializable_string_for_fixed_size!(4); +impl_serializable_string_for_fixed_size!(5); +impl_serializable_string_for_fixed_size!(6); +impl_serializable_string_for_fixed_size!(7); +impl_serializable_string_for_fixed_size!(8); +impl_serializable_string_for_fixed_size!(9); +impl_serializable_string_for_fixed_size!(10); +impl_serializable_string_for_fixed_size!(11); +impl_serializable_string_for_fixed_size!(12); +impl_serializable_string_for_fixed_size!(13); +impl_serializable_string_for_fixed_size!(14); +impl_serializable_string_for_fixed_size!(15); +impl_serializable_string_for_fixed_size!(16); + +fn serialize_index_entry(sink: &SerializationSink, id: StringId, addr: Addr) { + sink.write_atomic(8, |bytes| { + bytes[0..4].copy_from_slice(&id.0.to_le_bytes()); + bytes[4..8].copy_from_slice(&addr.0.to_le_bytes()); + }); +} + +impl StringTableBuilder { + pub fn new( + data_sink: Arc<SerializationSink>, + index_sink: Arc<SerializationSink>, + ) -> Result<StringTableBuilder, Box<dyn Error + Send + Sync>> { + // The first thing in every stream we generate must be the stream header. + write_file_header(&mut data_sink.as_std_write(), FILE_MAGIC_STRINGTABLE_DATA)?; + write_file_header(&mut index_sink.as_std_write(), FILE_MAGIC_STRINGTABLE_INDEX)?; + + Ok(StringTableBuilder { + data_sink, + index_sink, + }) + } + + /// Creates a mapping so that `virtual_id` will resolve to the contents of + /// `concrete_id` when reading the string table. + pub fn map_virtual_to_concrete_string(&self, virtual_id: StringId, concrete_id: StringId) { + // This assertion does not use `is_virtual` on purpose because that + // would also allow to overwrite `METADATA_STRING_ID`. + assert!(virtual_id.0 <= MAX_USER_VIRTUAL_STRING_ID); + serialize_index_entry(&*self.index_sink, virtual_id, concrete_id.to_addr()); + } + + pub fn bulk_map_virtual_to_single_concrete_string<I>( + &self, + virtual_ids: I, + concrete_id: StringId, + ) where + I: Iterator<Item = StringId> + ExactSizeIterator, + { + // TODO: Index data encoding could have a special bulk mode that assigns + // multiple StringIds to the same addr, so we don't have to repeat + // the `concrete_id` over and over. + + type MappingEntry = [u32; 2]; + assert!(std::mem::size_of::<MappingEntry>() == 8); + + let to_addr_le = concrete_id.to_addr().0.to_le(); + + let serialized: Vec<MappingEntry> = virtual_ids + .map(|from| { + let id = from.0; + assert!(id <= MAX_USER_VIRTUAL_STRING_ID); + [id.to_le(), to_addr_le] + }) + .collect(); + + let num_bytes = serialized.len() * std::mem::size_of::<MappingEntry>(); + let byte_ptr = serialized.as_ptr() as *const u8; + + let bytes = unsafe { std::slice::from_raw_parts(byte_ptr, num_bytes) }; + + self.index_sink.write_bytes_atomic(bytes); + } + + pub fn alloc_metadata<STR: SerializableString + ?Sized>(&self, s: &STR) { + let concrete_id = self.alloc(s); + let virtual_id = StringId(METADATA_STRING_ID); + assert!(virtual_id.is_virtual()); + serialize_index_entry(&*self.index_sink, virtual_id, concrete_id.to_addr()); + } + + pub fn alloc<STR: SerializableString + ?Sized>(&self, s: &STR) -> StringId { + let size_in_bytes = s.serialized_size(); + let addr = self.data_sink.write_atomic(size_in_bytes, |mem| { + s.serialize(mem); + }); + + StringId::from_addr(addr) + } +} |