7 files changed, 843 insertions, 0 deletions
diff --git a/src/tools/rust-analyzer/crates/profile/Cargo.toml b/src/tools/rust-analyzer/crates/profile/Cargo.toml
new file mode 100644
index 000000000..0b78a45a2
--- /dev/null
+++ b/src/tools/rust-analyzer/crates/profile/Cargo.toml
@@ -0,0 +1,31 @@
+[package]
+name = "profile"
+version = "0.0.0"
+description = "TBD"
+license = "MIT OR Apache-2.0"
+edition = "2021"
+rust-version = "1.57"
+
+[lib]
+doctest = false
+
+[dependencies]
+once_cell = "1.12.0"
+cfg-if = "1.0.0"
+libc = "0.2.126"
+la-arena = { version = "0.3.0", path = "../../lib/la-arena" }
+countme = { version = "3.0.1", features = ["enable"] }
+jemalloc-ctl = { version = "0.5.0", package = "tikv-jemalloc-ctl", optional = true }
+
+[target.'cfg(target_os = "linux")'.dependencies]
+perf-event = "0.4.7"
+
+[target.'cfg(windows)'.dependencies]
+winapi = { version = "0.3.9", features = ["processthreadsapi", "psapi"] }
+
+[features]
+cpu_profiler = []
+jemalloc = ["jemalloc-ctl"]
+
+# Uncomment to enable for the whole crate graph
+# default = [ "cpu_profiler" ]
diff --git a/src/tools/rust-analyzer/crates/profile/src/google_cpu_profiler.rs b/src/tools/rust-analyzer/crates/profile/src/google_cpu_profiler.rs
new file mode 100644
index 000000000..cae6caeaa
--- /dev/null
+++ b/src/tools/rust-analyzer/crates/profile/src/google_cpu_profiler.rs
@@ -0,0 +1,44 @@
+//! https://github.com/gperftools/gperftools
+
+use std::{
+    ffi::CString,
+    os::raw::c_char,
+    path::Path,
+    sync::atomic::{AtomicUsize, Ordering},
+};
+
+#[link(name = "profiler")]
+#[allow(non_snake_case)]
+extern "C" {
+    fn ProfilerStart(fname: *const c_char) -> i32;
+    fn ProfilerStop();
+}
+
+const OFF: usize = 0;
+const ON: usize = 1;
+const PENDING: usize = 2;
+
+fn transition(current: usize, new: usize) -> bool {
+    static STATE: AtomicUsize = AtomicUsize::new(OFF);
+
+    STATE.compare_exchange(current, new, Ordering::SeqCst, Ordering::SeqCst).is_ok()
+}
+
+pub(crate) fn start(path: &Path) {
+    if !transition(OFF, PENDING) {
+        panic!("profiler already started");
+    }
+    let path = CString::new(path.display().to_string()).unwrap();
+    if unsafe { ProfilerStart(path.as_ptr()) } == 0 {
+        panic!("profiler failed to start")
+    }
+    assert!(transition(PENDING, ON));
+}
+
+pub(crate) fn stop() {
+    if !transition(ON, PENDING) {
+        panic!("profiler is not started")
+    }
+    unsafe { ProfilerStop() };
+    assert!(transition(PENDING, OFF));
+}
diff --git a/src/tools/rust-analyzer/crates/profile/src/hprof.rs b/src/tools/rust-analyzer/crates/profile/src/hprof.rs
new file mode 100644
index 000000000..b562c193e
--- /dev/null
+++ b/src/tools/rust-analyzer/crates/profile/src/hprof.rs
@@ -0,0 +1,326 @@
+//! Simple hierarchical profiler
+use std::{
+    cell::RefCell,
+    collections::{BTreeMap, HashSet},
+    env, fmt,
+    io::{stderr, Write},
+    sync::{
+        atomic::{AtomicBool, Ordering},
+        RwLock,
+    },
+    time::{Duration, Instant},
+};
+
+use once_cell::sync::Lazy;
+
+use crate::tree::{Idx, Tree};
+
+/// Filtering syntax
+/// env RA_PROFILE=*             // dump everything
+/// env RA_PROFILE=foo|bar|baz   // enabled only selected entries
+/// env RA_PROFILE=*@3>10        // dump everything, up to depth 3, if it takes more than 10 ms
+pub fn init() {
+    countme::enable(env::var("RA_COUNT").is_ok());
+    let spec = env::var("RA_PROFILE").unwrap_or_default();
+    init_from(&spec);
+}
+
+pub fn init_from(spec: &str) {
+    let filter = if spec.is_empty() { Filter::disabled() } else { Filter::from_spec(spec) };
+    filter.install();
+}
+
+type Label = &'static str;
+
+/// This function starts a profiling scope in the current execution stack with a given description.
+/// It returns a `Profile` struct that measures elapsed time between this method invocation and `Profile` struct drop.
+/// It supports nested profiling scopes in case when this function is invoked multiple times at the execution stack.
+/// In this case the profiling information will be nested at the output.
+/// Profiling information is being printed in the stderr.
+///
+/// # Example
+/// ```
+/// profile::init_from("profile1|profile2@2");
+/// profiling_function1();
+///
+/// fn profiling_function1() {
+///     let _p = profile::span("profile1");
+///     profiling_function2();
+/// }
+///
+/// fn profiling_function2() {
+///     let _p = profile::span("profile2");
+/// }
+/// ```
+/// This will print in the stderr the following:
+/// ```text
+///  0ms - profile
+///      0ms - profile2
+/// ```
+#[inline]
+pub fn span(label: Label) -> ProfileSpan {
+    debug_assert!(!label.is_empty());
+
+    let enabled = PROFILING_ENABLED.load(Ordering::Relaxed);
+    if enabled && with_profile_stack(|stack| stack.push(label)) {
+        ProfileSpan(Some(ProfilerImpl { label, detail: None }))
+    } else {
+        ProfileSpan(None)
+    }
+}
+
+#[inline]
+pub fn heartbeat_span() -> HeartbeatSpan {
+    let enabled = PROFILING_ENABLED.load(Ordering::Relaxed);
+    HeartbeatSpan::new(enabled)
+}
+
+#[inline]
+pub fn heartbeat() {
+    let enabled = PROFILING_ENABLED.load(Ordering::Relaxed);
+    if enabled {
+        with_profile_stack(|it| it.heartbeat(1));
+    }
+}
+
+pub struct ProfileSpan(Option<ProfilerImpl>);
+
+struct ProfilerImpl {
+    label: Label,
+    detail: Option<String>,
+}
+
+impl ProfileSpan {
+    pub fn detail(mut self, detail: impl FnOnce() -> String) -> ProfileSpan {
+        if let Some(profiler) = &mut self.0 {
+            profiler.detail = Some(detail());
+        }
+        self
+    }
+}
+
+impl Drop for ProfilerImpl {
+    #[inline]
+    fn drop(&mut self) {
+        with_profile_stack(|it| it.pop(self.label, self.detail.take()));
+    }
+}
+
+pub struct HeartbeatSpan {
+    enabled: bool,
+}
+
+impl HeartbeatSpan {
+    #[inline]
+    pub fn new(enabled: bool) -> Self {
+        if enabled {
+            with_profile_stack(|it| it.heartbeats(true));
+        }
+        Self { enabled }
+    }
+}
+
+impl Drop for HeartbeatSpan {
+    fn drop(&mut self) {
+        if self.enabled {
+            with_profile_stack(|it| it.heartbeats(false));
+        }
+    }
+}
+
+static PROFILING_ENABLED: AtomicBool = AtomicBool::new(false);
+static FILTER: Lazy<RwLock<Filter>> = Lazy::new(Default::default);
+
+fn with_profile_stack<T>(f: impl FnOnce(&mut ProfileStack) -> T) -> T {
+    thread_local!(static STACK: RefCell<ProfileStack> = RefCell::new(ProfileStack::new()));
+    STACK.with(|it| f(&mut *it.borrow_mut()))
+}
+
+#[derive(Default, Clone, Debug)]
+struct Filter {
+    depth: usize,
+    allowed: HashSet<String>,
+    longer_than: Duration,
+    heartbeat_longer_than: Duration,
+    version: usize,
+}
+
+impl Filter {
+    fn disabled() -> Filter {
+        Filter::default()
+    }
+
+    fn from_spec(mut spec: &str) -> Filter {
+        let longer_than = if let Some(idx) = spec.rfind('>') {
+            let longer_than = spec[idx + 1..].parse().expect("invalid profile longer_than");
+            spec = &spec[..idx];
+            Duration::from_millis(longer_than)
+        } else {
+            Duration::new(0, 0)
+        };
+        let heartbeat_longer_than = longer_than;
+
+        let depth = if let Some(idx) = spec.rfind('@') {
+            let depth: usize = spec[idx + 1..].parse().expect("invalid profile depth");
+            spec = &spec[..idx];
+            depth
+        } else {
+            999
+        };
+        let allowed =
+            if spec == "*" { HashSet::new() } else { spec.split('|').map(String::from).collect() };
+        Filter { depth, allowed, longer_than, heartbeat_longer_than, version: 0 }
+    }
+
+    fn install(mut self) {
+        PROFILING_ENABLED.store(self.depth > 0, Ordering::SeqCst);
+        let mut old = FILTER.write().unwrap();
+        self.version = old.version + 1;
+        *old = self;
+    }
+}
+
+struct ProfileStack {
+    frames: Vec<Frame>,
+    filter: Filter,
+    messages: Tree<Message>,
+    heartbeats: bool,
+}
+
+struct Frame {
+    t: Instant,
+    heartbeats: u32,
+}
+
+#[derive(Default)]
+struct Message {
+    duration: Duration,
+    label: Label,
+    detail: Option<String>,
+}
+
+impl ProfileStack {
+    fn new() -> ProfileStack {
+        ProfileStack {
+            frames: Vec::new(),
+            messages: Tree::default(),
+            filter: Default::default(),
+            heartbeats: false,
+        }
+    }
+
+    fn push(&mut self, label: Label) -> bool {
+        if self.frames.is_empty() {
+            if let Ok(f) = FILTER.try_read() {
+                if f.version > self.filter.version {
+                    self.filter = f.clone();
+                }
+            };
+        }
+        if self.frames.len() > self.filter.depth {
+            return false;
+        }
+        let allowed = &self.filter.allowed;
+        if self.frames.is_empty() && !allowed.is_empty() && !allowed.contains(label) {
+            return false;
+        }
+
+        self.frames.push(Frame { t: Instant::now(), heartbeats: 0 });
+        self.messages.start();
+        true
+    }
+
+    fn pop(&mut self, label: Label, detail: Option<String>) {
+        let frame = self.frames.pop().unwrap();
+        let duration = frame.t.elapsed();
+
+        if self.heartbeats {
+            self.heartbeat(frame.heartbeats);
+            let avg_span = duration / (frame.heartbeats + 1);
+            if avg_span > self.filter.heartbeat_longer_than {
+                eprintln!("Too few heartbeats {} ({}/{:?})?", label, frame.heartbeats, duration);
+            }
+        }
+
+        self.messages.finish(Message { duration, label, detail });
+        if self.frames.is_empty() {
+            let longer_than = self.filter.longer_than;
+            // Convert to millis for comparison to avoid problems with rounding
+            // (otherwise we could print `0ms` despite user's `>0` filter when
+            // `duration` is just a few nanos).
+            if duration.as_millis() > longer_than.as_millis() {
+                if let Some(root) = self.messages.root() {
+                    print(&self.messages, root, 0, longer_than, &mut stderr().lock());
+                }
+            }
+            self.messages.clear();
+        }
+    }
+
+    fn heartbeats(&mut self, yes: bool) {
+        self.heartbeats = yes;
+    }
+    fn heartbeat(&mut self, n: u32) {
+        if let Some(frame) = self.frames.last_mut() {
+            frame.heartbeats += n;
+        }
+    }
+}
+
+fn print(
+    tree: &Tree<Message>,
+    curr: Idx<Message>,
+    level: u32,
+    longer_than: Duration,
+    out: &mut impl Write,
+) {
+    let current_indent = "    ".repeat(level as usize);
+    let detail = tree[curr].detail.as_ref().map(|it| format!(" @ {}", it)).unwrap_or_default();
+    writeln!(
+        out,
+        "{}{} - {}{}",
+        current_indent,
+        ms(tree[curr].duration),
+        tree[curr].label,
+        detail,
+    )
+    .expect("printing profiling info");
+
+    let mut accounted_for = Duration::default();
+    let mut short_children = BTreeMap::new(); // Use `BTreeMap` to get deterministic output.
+    for child in tree.children(curr) {
+        accounted_for += tree[child].duration;
+
+        if tree[child].duration.as_millis() > longer_than.as_millis() {
+            print(tree, child, level + 1, longer_than, out);
+        } else {
+            let (total_duration, cnt) =
+                short_children.entry(tree[child].label).or_insert((Duration::default(), 0));
+            *total_duration += tree[child].duration;
+            *cnt += 1;
+        }
+    }
+
+    for (child_msg, (duration, count)) in &short_children {
+        writeln!(out, "    {}{} - {} ({} calls)", current_indent, ms(*duration), child_msg, count)
+            .expect("printing profiling info");
+    }
+
+    let unaccounted = tree[curr].duration - accounted_for;
+    if tree.children(curr).next().is_some() && unaccounted > longer_than {
+        writeln!(out, "    {}{} - ???", current_indent, ms(unaccounted))
+            .expect("printing profiling info");
+    }
+}
+
+#[allow(non_camel_case_types)]
+struct ms(Duration);
+
+impl fmt::Display for ms {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self.0.as_millis() {
+            0 => f.write_str("    0  "),
+            n => write!(f, "{:5}ms", n),
+        }
+    }
+}
diff --git a/src/tools/rust-analyzer/crates/profile/src/lib.rs b/src/tools/rust-analyzer/crates/profile/src/lib.rs
new file mode 100644
index 000000000..00f7952e8
--- /dev/null
+++ b/src/tools/rust-analyzer/crates/profile/src/lib.rs
@@ -0,0 +1,130 @@
+//! A collection of tools for profiling rust-analyzer.
+
+#![warn(rust_2018_idioms, unused_lifetimes, semicolon_in_expressions_from_macros)]
+
+mod stop_watch;
+mod memory_usage;
+#[cfg(feature = "cpu_profiler")]
+mod google_cpu_profiler;
+mod hprof;
+mod tree;
+
+use std::cell::RefCell;
+
+pub use crate::{
+    hprof::{heartbeat, heartbeat_span, init, init_from, span},
+    memory_usage::{Bytes, MemoryUsage},
+    stop_watch::{StopWatch, StopWatchSpan},
+};
+
+pub use countme;
+/// Include `_c: Count<Self>` field in important structs to count them.
+///
+/// To view the counts, run with `RA_COUNT=1`. The overhead of disabled count is
+/// almost zero.
+pub use countme::Count;
+
+thread_local!(static IN_SCOPE: RefCell<bool> = RefCell::new(false));
+
+/// Allows to check if the current code is withing some dynamic scope, can be
+/// useful during debugging to figure out why a function is called.
+pub struct Scope {
+    prev: bool,
+}
+
+impl Scope {
+    #[must_use]
+    pub fn enter() -> Scope {
+        let prev = IN_SCOPE.with(|slot| std::mem::replace(&mut *slot.borrow_mut(), true));
+        Scope { prev }
+    }
+    pub fn is_active() -> bool {
+        IN_SCOPE.with(|slot| *slot.borrow())
+    }
+}
+
+impl Drop for Scope {
+    fn drop(&mut self) {
+        IN_SCOPE.with(|slot| *slot.borrow_mut() = self.prev);
+    }
+}
+
+/// A wrapper around google_cpu_profiler.
+///
+/// Usage:
+/// 1. Install gpref_tools (<https://github.com/gperftools/gperftools>), probably packaged with your Linux distro.
+/// 2. Build with `cpu_profiler` feature.
+/// 3. Run the code, the *raw* output would be in the `./out.profile` file.
+/// 4. Install pprof for visualization (<https://github.com/google/pprof>).
+/// 5. Bump sampling frequency to once per ms: `export CPUPROFILE_FREQUENCY=1000`
+/// 6. Use something like `pprof -svg target/release/rust-analyzer ./out.profile` to see the results.
+///
+/// For example, here's how I run profiling on NixOS:
+///
+/// ```bash
+/// $ bat -p shell.nix
+/// with import <nixpkgs> {};
+/// mkShell {
+///   buildInputs = [ gperftools ];
+///   shellHook = ''
+///     export LD_LIBRARY_PATH="${gperftools}/lib:"
+///   '';
+/// }
+/// $ set -x CPUPROFILE_FREQUENCY 1000
+/// $ nix-shell --run 'cargo test --release --package rust-analyzer --lib -- benchmarks::benchmark_integrated_highlighting --exact --nocapture'
+/// $ pprof -svg target/release/deps/rust_analyzer-8739592dc93d63cb crates/rust-analyzer/out.profile > profile.svg
+/// ```
+///
+/// See this diff for how to profile completions:
+///
+/// <https://github.com/rust-lang/rust-analyzer/pull/5306>
+#[derive(Debug)]
+pub struct CpuSpan {
+    _private: (),
+}
+
+#[must_use]
+pub fn cpu_span() -> CpuSpan {
+    #[cfg(feature = "cpu_profiler")]
+    {
+        google_cpu_profiler::start("./out.profile".as_ref())
+    }
+
+    #[cfg(not(feature = "cpu_profiler"))]
+    {
+        eprintln!(
+            r#"cpu profiling is disabled, uncomment `default = [ "cpu_profiler" ]` in Cargo.toml to enable."#
+        );
+    }
+
+    CpuSpan { _private: () }
+}
+
+impl Drop for CpuSpan {
+    fn drop(&mut self) {
+        #[cfg(feature = "cpu_profiler")]
+        {
+            google_cpu_profiler::stop();
+            let profile_data = std::env::current_dir().unwrap().join("out.profile");
+            eprintln!("Profile data saved to:\n\n    {}\n", profile_data.display());
+            let mut cmd = std::process::Command::new("pprof");
+            cmd.arg("-svg").arg(std::env::current_exe().unwrap()).arg(&profile_data);
+            let out = cmd.output();
+
+            match out {
+                Ok(out) if out.status.success() => {
+                    let svg = profile_data.with_extension("svg");
+                    std::fs::write(&svg, &out.stdout).unwrap();
+                    eprintln!("Profile rendered to:\n\n    {}\n", svg.display());
+                }
+                _ => {
+                    eprintln!("Failed to run:\n\n   {:?}\n", cmd);
+                }
+            }
+        }
+    }
+}
+
+pub fn memory_usage() -> MemoryUsage {
+    MemoryUsage::now()
+}
diff --git a/src/tools/rust-analyzer/crates/profile/src/memory_usage.rs b/src/tools/rust-analyzer/crates/profile/src/memory_usage.rs
new file mode 100644
index 000000000..ee882b4cb
--- /dev/null
+++ b/src/tools/rust-analyzer/crates/profile/src/memory_usage.rs
@@ -0,0 +1,127 @@
+//! Like [`std::time::Instant`], but for memory.
+//!
+//! Measures the total size of all currently allocated objects.
+use std::fmt;
+
+use cfg_if::cfg_if;
+
+#[derive(Copy, Clone)]
+pub struct MemoryUsage {
+    pub allocated: Bytes,
+}
+
+impl fmt::Display for MemoryUsage {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        self.allocated.fmt(f)
+    }
+}
+
+impl std::ops::Sub for MemoryUsage {
+    type Output = MemoryUsage;
+    fn sub(self, rhs: MemoryUsage) -> MemoryUsage {
+        MemoryUsage { allocated: self.allocated - rhs.allocated }
+    }
+}
+
+impl MemoryUsage {
+    pub fn now() -> MemoryUsage {
+        cfg_if! {
+            if #[cfg(all(feature = "jemalloc", not(target_env = "msvc")))] {
+                jemalloc_ctl::epoch::advance().unwrap();
+                MemoryUsage {
+                    allocated: Bytes(jemalloc_ctl::stats::allocated::read().unwrap() as isize),
+                }
+            } else if #[cfg(all(target_os = "linux", target_env = "gnu"))] {
+                memusage_linux()
+            } else if #[cfg(windows)] {
+                // There doesn't seem to be an API for determining heap usage, so we try to
+                // approximate that by using the Commit Charge value.
+
+                use winapi::um::processthreadsapi::*;
+                use winapi::um::psapi::*;
+                use std::mem::{MaybeUninit, size_of};
+
+                let proc = unsafe { GetCurrentProcess() };
+                let mut mem_counters = MaybeUninit::uninit();
+                let cb = size_of::<PROCESS_MEMORY_COUNTERS>();
+                let ret = unsafe { GetProcessMemoryInfo(proc, mem_counters.as_mut_ptr(), cb as u32) };
+                assert!(ret != 0);
+
+                let usage = unsafe { mem_counters.assume_init().PagefileUsage };
+                MemoryUsage { allocated: Bytes(usage as isize) }
+            } else {
+                MemoryUsage { allocated: Bytes(0) }
+            }
+        }
+    }
+}
+
+#[cfg(all(target_os = "linux", target_env = "gnu", not(feature = "jemalloc")))]
+fn memusage_linux() -> MemoryUsage {
+    // Linux/glibc has 2 APIs for allocator introspection that we can use: mallinfo and mallinfo2.
+    // mallinfo uses `int` fields and cannot handle memory usage exceeding 2 GB.
+    // mallinfo2 is very recent, so its presence needs to be detected at runtime.
+    // Both are abysmally slow.
+
+    use std::ffi::CStr;
+    use std::sync::atomic::{AtomicUsize, Ordering};
+
+    static MALLINFO2: AtomicUsize = AtomicUsize::new(1);
+
+    let mut mallinfo2 = MALLINFO2.load(Ordering::Relaxed);
+    if mallinfo2 == 1 {
+        let cstr = CStr::from_bytes_with_nul(b"mallinfo2\0").unwrap();
+        mallinfo2 = unsafe { libc::dlsym(libc::RTLD_DEFAULT, cstr.as_ptr()) } as usize;
+        // NB: races don't matter here, since they'll always store the same value
+        MALLINFO2.store(mallinfo2, Ordering::Relaxed);
+    }
+
+    if mallinfo2 == 0 {
+        // mallinfo2 does not exist, use mallinfo.
+        let alloc = unsafe { libc::mallinfo() }.uordblks as isize;
+        MemoryUsage { allocated: Bytes(alloc) }
+    } else {
+        let mallinfo2: fn() -> libc::mallinfo2 = unsafe { std::mem::transmute(mallinfo2) };
+        let alloc = mallinfo2().uordblks as isize;
+        MemoryUsage { allocated: Bytes(alloc) }
+    }
+}
+
+#[derive(Default, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Copy)]
+pub struct Bytes(isize);
+
+impl Bytes {
+    pub fn megabytes(self) -> isize {
+        self.0 / 1024 / 1024
+    }
+}
+
+impl fmt::Display for Bytes {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let bytes = self.0;
+        let mut value = bytes;
+        let mut suffix = "b";
+        if value.abs() > 4096 {
+            value /= 1024;
+            suffix = "kb";
+            if value.abs() > 4096 {
+                value /= 1024;
+                suffix = "mb";
+            }
+        }
+        f.pad(&format!("{}{}", value, suffix))
+    }
+}
+
+impl std::ops::AddAssign<usize> for Bytes {
+    fn add_assign(&mut self, x: usize) {
+        self.0 += x as isize;
+    }
+}
+
+impl std::ops::Sub for Bytes {
+    type Output = Bytes;
+    fn sub(self, rhs: Bytes) -> Bytes {
+        Bytes(self.0 - rhs.0)
+    }
+}
diff --git a/src/tools/rust-analyzer/crates/profile/src/stop_watch.rs b/src/tools/rust-analyzer/crates/profile/src/stop_watch.rs
new file mode 100644
index 000000000..625832848
--- /dev/null
+++ b/src/tools/rust-analyzer/crates/profile/src/stop_watch.rs
@@ -0,0 +1,101 @@
+//! Like `std::time::Instant`, but also measures memory & CPU cycles.
+use std::{
+    fmt,
+    time::{Duration, Instant},
+};
+
+use crate::MemoryUsage;
+
+pub struct StopWatch {
+    time: Instant,
+    #[cfg(target_os = "linux")]
+    counter: Option<perf_event::Counter>,
+    memory: Option<MemoryUsage>,
+}
+
+pub struct StopWatchSpan {
+    pub time: Duration,
+    pub instructions: Option<u64>,
+    pub memory: Option<MemoryUsage>,
+}
+
+impl StopWatch {
+    pub fn start() -> StopWatch {
+        #[cfg(target_os = "linux")]
+        let counter = {
+            // When debugging rust-analyzer using rr, the perf-related syscalls cause it to abort.
+            // We allow disabling perf by setting the env var `RA_DISABLE_PERF`.
+
+            use once_cell::sync::Lazy;
+            static PERF_ENABLED: Lazy<bool> =
+                Lazy::new(|| std::env::var_os("RA_DISABLE_PERF").is_none());
+
+            if *PERF_ENABLED {
+                let mut counter = perf_event::Builder::new()
+                    .build()
+                    .map_err(|err| eprintln!("Failed to create perf counter: {}", err))
+                    .ok();
+                if let Some(counter) = &mut counter {
+                    if let Err(err) = counter.enable() {
+                        eprintln!("Failed to start perf counter: {}", err)
+                    }
+                }
+                counter
+            } else {
+                None
+            }
+        };
+        let time = Instant::now();
+        StopWatch {
+            time,
+            #[cfg(target_os = "linux")]
+            counter,
+            memory: None,
+        }
+    }
+    pub fn memory(mut self, yes: bool) -> StopWatch {
+        if yes {
+            self.memory = Some(MemoryUsage::now());
+        }
+        self
+    }
+    pub fn elapsed(&mut self) -> StopWatchSpan {
+        let time = self.time.elapsed();
+
+        #[cfg(target_os = "linux")]
+        let instructions = self.counter.as_mut().and_then(|it| {
+            it.read().map_err(|err| eprintln!("Failed to read perf counter: {}", err)).ok()
+        });
+        #[cfg(not(target_os = "linux"))]
+        let instructions = None;
+
+        let memory = self.memory.map(|it| MemoryUsage::now() - it);
+        StopWatchSpan { time, instructions, memory }
+    }
+}
+
+impl fmt::Display for StopWatchSpan {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "{:.2?}", self.time)?;
+        if let Some(mut instructions) = self.instructions {
+            let mut prefix = "";
+            if instructions > 10000 {
+                instructions /= 1000;
+                prefix = "k";
+            }
+            if instructions > 10000 {
+                instructions /= 1000;
+                prefix = "m";
+            }
+            if instructions > 10000 {
+                instructions /= 1000;
+                prefix = "g";
+            }
+            write!(f, ", {}{}instr", instructions, prefix)?;
+        }
+        if let Some(memory) = self.memory {
+            write!(f, ", {}", memory)?;
+        }
+        Ok(())
+    }
+}
diff --git a/src/tools/rust-analyzer/crates/profile/src/tree.rs b/src/tools/rust-analyzer/crates/profile/src/tree.rs
new file mode 100644
index 000000000..62f0c30b5
--- /dev/null
+++ b/src/tools/rust-analyzer/crates/profile/src/tree.rs
@@ -0,0 +1,84 @@
+//! A simple tree implementation which tries to not allocate all over the place.
+use std::ops;
+
+use la_arena::Arena;
+
+#[derive(Default)]
+pub(crate) struct Tree<T> {
+    nodes: Arena<Node<T>>,
+    current_path: Vec<(Idx<T>, Option<Idx<T>>)>,
+}
+
+pub(crate) type Idx<T> = la_arena::Idx<Node<T>>;
+
+impl<T> Tree<T> {
+    pub(crate) fn start(&mut self)
+    where
+        T: Default,
+    {
+        let me = self.nodes.alloc(Node::new(T::default()));
+        if let Some((parent, last_child)) = self.current_path.last_mut() {
+            let slot = match *last_child {
+                Some(last_child) => &mut self.nodes[last_child].next_sibling,
+                None => &mut self.nodes[*parent].first_child,
+            };
+            let prev = slot.replace(me);
+            assert!(prev.is_none());
+            *last_child = Some(me);
+        }
+
+        self.current_path.push((me, None));
+    }
+
+    pub(crate) fn finish(&mut self, data: T) {
+        let (me, _last_child) = self.current_path.pop().unwrap();
+        self.nodes[me].data = data;
+    }
+
+    pub(crate) fn root(&self) -> Option<Idx<T>> {
+        self.nodes.iter().next().map(|(idx, _)| idx)
+    }
+
+    pub(crate) fn children(&self, idx: Idx<T>) -> impl Iterator<Item = Idx<T>> + '_ {
+        NodeIter { nodes: &self.nodes, next: self.nodes[idx].first_child }
+    }
+    pub(crate) fn clear(&mut self) {
+        self.nodes.clear();
+        self.current_path.clear();
+    }
+}
+
+impl<T> ops::Index<Idx<T>> for Tree<T> {
+    type Output = T;
+    fn index(&self, index: Idx<T>) -> &T {
+        &self.nodes[index].data
+    }
+}
+
+pub(crate) struct Node<T> {
+    data: T,
+    first_child: Option<Idx<T>>,
+    next_sibling: Option<Idx<T>>,
+}
+
+impl<T> Node<T> {
+    fn new(data: T) -> Node<T> {
+        Node { data, first_child: None, next_sibling: None }
+    }
+}
+
+struct NodeIter<'a, T> {
+    nodes: &'a Arena<Node<T>>,
+    next: Option<Idx<T>>,
+}
+
+impl<'a, T> Iterator for NodeIter<'a, T> {
+    type Item = Idx<T>;
+
+    fn next(&mut self) -> Option<Idx<T>> {
+        self.next.map(|next| {
+            self.next = self.nodes[next].next_sibling;
+            next
+        })
+    }
+}