From 698f8c2f01ea549d77d7dc3338a12e04c11057b9 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 17 Apr 2024 14:02:58 +0200 Subject: Adding upstream version 1.64.0+dfsg1. Signed-off-by: Daniel Baumann --- vendor/rowan/src/green/node_cache.rs | 157 +++++++++++++++++++++++++++++++++++ 1 file changed, 157 insertions(+) create mode 100644 vendor/rowan/src/green/node_cache.rs (limited to 'vendor/rowan/src/green/node_cache.rs') diff --git a/vendor/rowan/src/green/node_cache.rs b/vendor/rowan/src/green/node_cache.rs new file mode 100644 index 000000000..c73f3e696 --- /dev/null +++ b/vendor/rowan/src/green/node_cache.rs @@ -0,0 +1,157 @@ +use hashbrown::hash_map::RawEntryMut; +use rustc_hash::FxHasher; +use std::hash::{BuildHasherDefault, Hash, Hasher}; + +use crate::{ + green::GreenElementRef, GreenNode, GreenNodeData, GreenToken, GreenTokenData, NodeOrToken, + SyntaxKind, +}; + +use super::element::GreenElement; + +type HashMap = hashbrown::HashMap>; + +#[derive(Debug)] +struct NoHash(T); + +/// Interner for GreenTokens and GreenNodes +// XXX: the impl is a bit tricky. As usual when writing interners, we want to +// store all values in one HashSet. +// +// However, hashing trees is fun: hash of the tree is recursively defined. We +// maintain an invariant -- if the tree is interned, then all of its children +// are interned as well. +// +// That means that computing the hash naively is wasteful -- we just *know* +// hashes of children, and we can re-use those. +// +// So here we use *raw* API of hashbrown and provide the hashes manually, +// instead of going via a `Hash` impl. Our manual `Hash` and the +// `#[derive(Hash)]` are actually different! At some point we had a fun bug, +// where we accidentally mixed the two hashes, which made the cache much less +// efficient. +// +// To fix that, we additionally wrap the data in `NoHash` wrapper, to make sure +// we don't accidentally use the wrong hash! +#[derive(Default, Debug)] +pub struct NodeCache { + nodes: HashMap, ()>, + tokens: HashMap, ()>, +} + +fn token_hash(token: &GreenTokenData) -> u64 { + let mut h = FxHasher::default(); + token.kind().hash(&mut h); + token.text().hash(&mut h); + h.finish() +} + +fn node_hash(node: &GreenNodeData) -> u64 { + let mut h = FxHasher::default(); + node.kind().hash(&mut h); + for child in node.children() { + match child { + NodeOrToken::Node(it) => node_hash(it), + NodeOrToken::Token(it) => token_hash(it), + } + .hash(&mut h) + } + h.finish() +} + +fn element_id(elem: GreenElementRef<'_>) -> *const () { + match elem { + NodeOrToken::Node(it) => it as *const GreenNodeData as *const (), + NodeOrToken::Token(it) => it as *const GreenTokenData as *const (), + } +} + +impl NodeCache { + pub(crate) fn node( + &mut self, + kind: SyntaxKind, + children: &mut Vec<(u64, GreenElement)>, + first_child: usize, + ) -> (u64, GreenNode) { + let build_node = move |children: &mut Vec<(u64, GreenElement)>| { + GreenNode::new(kind, children.drain(first_child..).map(|(_, it)| it)) + }; + + let children_ref = &children[first_child..]; + if children_ref.len() > 3 { + let node = build_node(children); + return (0, node); + } + + let hash = { + let mut h = FxHasher::default(); + kind.hash(&mut h); + for &(hash, _) in children_ref { + if hash == 0 { + let node = build_node(children); + return (0, node); + } + hash.hash(&mut h); + } + h.finish() + }; + + // Green nodes are fully immutable, so it's ok to deduplicate them. + // This is the same optimization that Roslyn does + // https://github.com/KirillOsenkov/Bliki/wiki/Roslyn-Immutable-Trees + // + // For example, all `#[inline]` in this file share the same green node! + // For `libsyntax/parse/parser.rs`, measurements show that deduping saves + // 17% of the memory for green nodes! + let entry = self.nodes.raw_entry_mut().from_hash(hash, |node| { + node.0.kind() == kind && node.0.children().len() == children_ref.len() && { + let lhs = node.0.children(); + let rhs = children_ref.iter().map(|(_, it)| it.as_deref()); + + let lhs = lhs.map(element_id); + let rhs = rhs.map(element_id); + + lhs.eq(rhs) + } + }); + + let node = match entry { + RawEntryMut::Occupied(entry) => { + drop(children.drain(first_child..)); + entry.key().0.clone() + } + RawEntryMut::Vacant(entry) => { + let node = build_node(children); + entry.insert_with_hasher(hash, NoHash(node.clone()), (), |n| node_hash(&n.0)); + node + } + }; + + (hash, node) + } + + pub(crate) fn token(&mut self, kind: SyntaxKind, text: &str) -> (u64, GreenToken) { + let hash = { + let mut h = FxHasher::default(); + kind.hash(&mut h); + text.hash(&mut h); + h.finish() + }; + + let entry = self + .tokens + .raw_entry_mut() + .from_hash(hash, |token| token.0.kind() == kind && token.0.text() == text); + + let token = match entry { + RawEntryMut::Occupied(entry) => entry.key().0.clone(), + RawEntryMut::Vacant(entry) => { + let token = GreenToken::new(kind, text); + entry.insert_with_hasher(hash, NoHash(token.clone()), (), |t| token_hash(&t.0)); + token + } + }; + + (hash, token) + } +} -- cgit v1.2.3