From 698f8c2f01ea549d77d7dc3338a12e04c11057b9 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 17 Apr 2024 14:02:58 +0200 Subject: Adding upstream version 1.64.0+dfsg1. Signed-off-by: Daniel Baumann --- .../src/graph/dominators/mod.rs | 324 ++++++++++++ .../src/graph/dominators/tests.rs | 45 ++ .../src/graph/implementation/mod.rs | 366 +++++++++++++ .../src/graph/implementation/tests.rs | 131 +++++ .../rustc_data_structures/src/graph/iterate/mod.rs | 353 +++++++++++++ .../src/graph/iterate/tests.rs | 38 ++ compiler/rustc_data_structures/src/graph/mod.rs | 81 +++ .../rustc_data_structures/src/graph/reference.rs | 39 ++ .../rustc_data_structures/src/graph/scc/mod.rs | 567 +++++++++++++++++++++ .../rustc_data_structures/src/graph/scc/tests.rs | 216 ++++++++ compiler/rustc_data_structures/src/graph/tests.rs | 73 +++ .../src/graph/vec_graph/mod.rs | 109 ++++ .../src/graph/vec_graph/tests.rs | 42 ++ 13 files changed, 2384 insertions(+) create mode 100644 compiler/rustc_data_structures/src/graph/dominators/mod.rs create mode 100644 compiler/rustc_data_structures/src/graph/dominators/tests.rs create mode 100644 compiler/rustc_data_structures/src/graph/implementation/mod.rs create mode 100644 compiler/rustc_data_structures/src/graph/implementation/tests.rs create mode 100644 compiler/rustc_data_structures/src/graph/iterate/mod.rs create mode 100644 compiler/rustc_data_structures/src/graph/iterate/tests.rs create mode 100644 compiler/rustc_data_structures/src/graph/mod.rs create mode 100644 compiler/rustc_data_structures/src/graph/reference.rs create mode 100644 compiler/rustc_data_structures/src/graph/scc/mod.rs create mode 100644 compiler/rustc_data_structures/src/graph/scc/tests.rs create mode 100644 compiler/rustc_data_structures/src/graph/tests.rs create mode 100644 compiler/rustc_data_structures/src/graph/vec_graph/mod.rs create mode 100644 compiler/rustc_data_structures/src/graph/vec_graph/tests.rs (limited to 'compiler/rustc_data_structures/src/graph') diff --git a/compiler/rustc_data_structures/src/graph/dominators/mod.rs b/compiler/rustc_data_structures/src/graph/dominators/mod.rs new file mode 100644 index 000000000..00913a483 --- /dev/null +++ b/compiler/rustc_data_structures/src/graph/dominators/mod.rs @@ -0,0 +1,324 @@ +//! Finding the dominators in a control-flow graph. +//! +//! Algorithm based on Loukas Georgiadis, +//! "Linear-Time Algorithms for Dominators and Related Problems", +//! +//! +//! Additionally useful is the original Lengauer-Tarjan paper on this subject, +//! "A Fast Algorithm for Finding Dominators in a Flowgraph" +//! Thomas Lengauer and Robert Endre Tarjan. +//! + +use super::ControlFlowGraph; +use rustc_index::vec::{Idx, IndexVec}; +use std::cmp::Ordering; + +#[cfg(test)] +mod tests; + +struct PreOrderFrame { + pre_order_idx: PreorderIndex, + iter: Iter, +} + +rustc_index::newtype_index! { + struct PreorderIndex { .. } +} + +pub fn dominators(graph: G) -> Dominators { + // compute the post order index (rank) for each node + let mut post_order_rank = IndexVec::from_elem_n(0, graph.num_nodes()); + + // We allocate capacity for the full set of nodes, because most of the time + // most of the nodes *are* reachable. + let mut parent: IndexVec = + IndexVec::with_capacity(graph.num_nodes()); + + let mut stack = vec![PreOrderFrame { + pre_order_idx: PreorderIndex::new(0), + iter: graph.successors(graph.start_node()), + }]; + let mut pre_order_to_real: IndexVec = + IndexVec::with_capacity(graph.num_nodes()); + let mut real_to_pre_order: IndexVec> = + IndexVec::from_elem_n(None, graph.num_nodes()); + pre_order_to_real.push(graph.start_node()); + parent.push(PreorderIndex::new(0)); // the parent of the root node is the root for now. + real_to_pre_order[graph.start_node()] = Some(PreorderIndex::new(0)); + let mut post_order_idx = 0; + + // Traverse the graph, collecting a number of things: + // + // * Preorder mapping (to it, and back to the actual ordering) + // * Postorder mapping (used exclusively for rank_partial_cmp on the final product) + // * Parents for each vertex in the preorder tree + // + // These are all done here rather than through one of the 'standard' + // graph traversals to help make this fast. + 'recurse: while let Some(frame) = stack.last_mut() { + while let Some(successor) = frame.iter.next() { + if real_to_pre_order[successor].is_none() { + let pre_order_idx = pre_order_to_real.push(successor); + real_to_pre_order[successor] = Some(pre_order_idx); + parent.push(frame.pre_order_idx); + stack.push(PreOrderFrame { pre_order_idx, iter: graph.successors(successor) }); + + continue 'recurse; + } + } + post_order_rank[pre_order_to_real[frame.pre_order_idx]] = post_order_idx; + post_order_idx += 1; + + stack.pop(); + } + + let reachable_vertices = pre_order_to_real.len(); + + let mut idom = IndexVec::from_elem_n(PreorderIndex::new(0), reachable_vertices); + let mut semi = IndexVec::from_fn_n(std::convert::identity, reachable_vertices); + let mut label = semi.clone(); + let mut bucket = IndexVec::from_elem_n(vec![], reachable_vertices); + let mut lastlinked = None; + + // We loop over vertices in reverse preorder. This implements the pseudocode + // of the simple Lengauer-Tarjan algorithm. A few key facts are noted here + // which are helpful for understanding the code (full proofs and such are + // found in various papers, including one cited at the top of this file). + // + // For each vertex w (which is not the root), + // * semi[w] is a proper ancestor of the vertex w (i.e., semi[w] != w) + // * idom[w] is an ancestor of semi[w] (i.e., idom[w] may equal semi[w]) + // + // An immediate dominator of w (idom[w]) is a vertex v where v dominates w + // and every other dominator of w dominates v. (Every vertex except the root has + // a unique immediate dominator.) + // + // A semidominator for a given vertex w (semi[w]) is the vertex v with minimum + // preorder number such that there exists a path from v to w in which all elements (other than w) have + // preorder numbers greater than w (i.e., this path is not the tree path to + // w). + for w in (PreorderIndex::new(1)..PreorderIndex::new(reachable_vertices)).rev() { + // Optimization: process buckets just once, at the start of the + // iteration. Do not explicitly empty the bucket (even though it will + // not be used again), to save some instructions. + // + // The bucket here contains the vertices whose semidominator is the + // vertex w, which we are guaranteed to have found: all vertices who can + // be semidominated by w must have a preorder number exceeding w, so + // they have been placed in the bucket. + // + // We compute a partial set of immediate dominators here. + let z = parent[w]; + for &v in bucket[z].iter() { + // This uses the result of Lemma 5 from section 2 from the original + // 1979 paper, to compute either the immediate or relative dominator + // for a given vertex v. + // + // eval returns a vertex y, for which semi[y] is minimum among + // vertices semi[v] +> y *> v. Note that semi[v] = z as we're in the + // z bucket. + // + // Given such a vertex y, semi[y] <= semi[v] and idom[y] = idom[v]. + // If semi[y] = semi[v], though, idom[v] = semi[v]. + // + // Using this, we can either set idom[v] to be: + // * semi[v] (i.e. z), if semi[y] is z + // * idom[y], otherwise + // + // We don't directly set to idom[y] though as it's not necessarily + // known yet. The second preorder traversal will cleanup by updating + // the idom for any that were missed in this pass. + let y = eval(&mut parent, lastlinked, &semi, &mut label, v); + idom[v] = if semi[y] < z { y } else { z }; + } + + // This loop computes the semi[w] for w. + semi[w] = w; + for v in graph.predecessors(pre_order_to_real[w]) { + let v = real_to_pre_order[v].unwrap(); + + // eval returns a vertex x from which semi[x] is minimum among + // vertices semi[v] +> x *> v. + // + // From Lemma 4 from section 2, we know that the semidominator of a + // vertex w is the minimum (by preorder number) vertex of the + // following: + // + // * direct predecessors of w with preorder number less than w + // * semidominators of u such that u > w and there exists (v, w) + // such that u *> v + // + // This loop therefore identifies such a minima. Note that any + // semidominator path to w must have all but the first vertex go + // through vertices numbered greater than w, so the reverse preorder + // traversal we are using guarantees that all of the information we + // might need is available at this point. + // + // The eval call will give us semi[x], which is either: + // + // * v itself, if v has not yet been processed + // * A possible 'best' semidominator for w. + let x = eval(&mut parent, lastlinked, &semi, &mut label, v); + semi[w] = std::cmp::min(semi[w], semi[x]); + } + // semi[w] is now semidominator(w) and won't change any more. + + // Optimization: Do not insert into buckets if parent[w] = semi[w], as + // we then immediately know the idom. + // + // If we don't yet know the idom directly, then push this vertex into + // our semidominator's bucket, where it will get processed at a later + // stage to compute its immediate dominator. + if parent[w] != semi[w] { + bucket[semi[w]].push(w); + } else { + idom[w] = parent[w]; + } + + // Optimization: We share the parent array between processed and not + // processed elements; lastlinked represents the divider. + lastlinked = Some(w); + } + + // Finalize the idoms for any that were not fully settable during initial + // traversal. + // + // If idom[w] != semi[w] then we know that we've stored vertex y from above + // into idom[w]. It is known to be our 'relative dominator', which means + // that it's one of w's ancestors and has the same immediate dominator as w, + // so use that idom. + for w in PreorderIndex::new(1)..PreorderIndex::new(reachable_vertices) { + if idom[w] != semi[w] { + idom[w] = idom[idom[w]]; + } + } + + let mut immediate_dominators = IndexVec::from_elem_n(None, graph.num_nodes()); + for (idx, node) in pre_order_to_real.iter_enumerated() { + immediate_dominators[*node] = Some(pre_order_to_real[idom[idx]]); + } + + Dominators { post_order_rank, immediate_dominators } +} + +/// Evaluate the link-eval virtual forest, providing the currently minimum semi +/// value for the passed `node` (which may be itself). +/// +/// This maintains that for every vertex v, `label[v]` is such that: +/// +/// ```text +/// semi[eval(v)] = min { semi[label[u]] | root_in_forest(v) +> u *> v } +/// ``` +/// +/// where `+>` is a proper ancestor and `*>` is just an ancestor. +#[inline] +fn eval( + ancestor: &mut IndexVec, + lastlinked: Option, + semi: &IndexVec, + label: &mut IndexVec, + node: PreorderIndex, +) -> PreorderIndex { + if is_processed(node, lastlinked) { + compress(ancestor, lastlinked, semi, label, node); + label[node] + } else { + node + } +} + +#[inline] +fn is_processed(v: PreorderIndex, lastlinked: Option) -> bool { + if let Some(ll) = lastlinked { v >= ll } else { false } +} + +#[inline] +fn compress( + ancestor: &mut IndexVec, + lastlinked: Option, + semi: &IndexVec, + label: &mut IndexVec, + v: PreorderIndex, +) { + assert!(is_processed(v, lastlinked)); + // Compute the processed list of ancestors + // + // We use a heap stack here to avoid recursing too deeply, exhausting the + // stack space. + let mut stack: smallvec::SmallVec<[_; 8]> = smallvec::smallvec![v]; + let mut u = ancestor[v]; + while is_processed(u, lastlinked) { + stack.push(u); + u = ancestor[u]; + } + + // Then in reverse order, popping the stack + for &[v, u] in stack.array_windows().rev() { + if semi[label[u]] < semi[label[v]] { + label[v] = label[u]; + } + ancestor[v] = ancestor[u]; + } +} + +#[derive(Clone, Debug)] +pub struct Dominators { + post_order_rank: IndexVec, + immediate_dominators: IndexVec>, +} + +impl Dominators { + pub fn dummy() -> Self { + Self { post_order_rank: IndexVec::new(), immediate_dominators: IndexVec::new() } + } + + pub fn is_reachable(&self, node: Node) -> bool { + self.immediate_dominators[node].is_some() + } + + pub fn immediate_dominator(&self, node: Node) -> Node { + assert!(self.is_reachable(node), "node {:?} is not reachable", node); + self.immediate_dominators[node].unwrap() + } + + pub fn dominators(&self, node: Node) -> Iter<'_, Node> { + assert!(self.is_reachable(node), "node {:?} is not reachable", node); + Iter { dominators: self, node: Some(node) } + } + + pub fn is_dominated_by(&self, node: Node, dom: Node) -> bool { + // FIXME -- could be optimized by using post-order-rank + self.dominators(node).any(|n| n == dom) + } + + /// Provide deterministic ordering of nodes such that, if any two nodes have a dominator + /// relationship, the dominator will always precede the dominated. (The relative ordering + /// of two unrelated nodes will also be consistent, but otherwise the order has no + /// meaning.) This method cannot be used to determine if either Node dominates the other. + pub fn rank_partial_cmp(&self, lhs: Node, rhs: Node) -> Option { + self.post_order_rank[lhs].partial_cmp(&self.post_order_rank[rhs]) + } +} + +pub struct Iter<'dom, Node: Idx> { + dominators: &'dom Dominators, + node: Option, +} + +impl<'dom, Node: Idx> Iterator for Iter<'dom, Node> { + type Item = Node; + + fn next(&mut self) -> Option { + if let Some(node) = self.node { + let dom = self.dominators.immediate_dominator(node); + if dom == node { + self.node = None; // reached the root + } else { + self.node = Some(dom); + } + Some(node) + } else { + None + } + } +} diff --git a/compiler/rustc_data_structures/src/graph/dominators/tests.rs b/compiler/rustc_data_structures/src/graph/dominators/tests.rs new file mode 100644 index 000000000..ff31d8f7f --- /dev/null +++ b/compiler/rustc_data_structures/src/graph/dominators/tests.rs @@ -0,0 +1,45 @@ +use super::*; + +use super::super::tests::TestGraph; + +#[test] +fn diamond() { + let graph = TestGraph::new(0, &[(0, 1), (0, 2), (1, 3), (2, 3)]); + + let dominators = dominators(&graph); + let immediate_dominators = &dominators.immediate_dominators; + assert_eq!(immediate_dominators[0], Some(0)); + assert_eq!(immediate_dominators[1], Some(0)); + assert_eq!(immediate_dominators[2], Some(0)); + assert_eq!(immediate_dominators[3], Some(0)); +} + +#[test] +fn paper() { + // example from the paper: + let graph = TestGraph::new( + 6, + &[(6, 5), (6, 4), (5, 1), (4, 2), (4, 3), (1, 2), (2, 3), (3, 2), (2, 1)], + ); + + let dominators = dominators(&graph); + let immediate_dominators = &dominators.immediate_dominators; + assert_eq!(immediate_dominators[0], None); // <-- note that 0 is not in graph + assert_eq!(immediate_dominators[1], Some(6)); + assert_eq!(immediate_dominators[2], Some(6)); + assert_eq!(immediate_dominators[3], Some(6)); + assert_eq!(immediate_dominators[4], Some(6)); + assert_eq!(immediate_dominators[5], Some(6)); + assert_eq!(immediate_dominators[6], Some(6)); +} + +#[test] +fn paper_slt() { + // example from the paper: + let graph = TestGraph::new( + 1, + &[(1, 2), (1, 3), (2, 3), (2, 7), (3, 4), (3, 6), (4, 5), (5, 4), (6, 7), (7, 8), (8, 5)], + ); + + dominators(&graph); +} diff --git a/compiler/rustc_data_structures/src/graph/implementation/mod.rs b/compiler/rustc_data_structures/src/graph/implementation/mod.rs new file mode 100644 index 000000000..1aa7ac024 --- /dev/null +++ b/compiler/rustc_data_structures/src/graph/implementation/mod.rs @@ -0,0 +1,366 @@ +//! A graph module for use in dataflow, region resolution, and elsewhere. +//! +//! # Interface details +//! +//! You customize the graph by specifying a "node data" type `N` and an +//! "edge data" type `E`. You can then later gain access (mutable or +//! immutable) to these "user-data" bits. Currently, you can only add +//! nodes or edges to the graph. You cannot remove or modify them once +//! added. This could be changed if we have a need. +//! +//! # Implementation details +//! +//! The main tricky thing about this code is the way that edges are +//! stored. The edges are stored in a central array, but they are also +//! threaded onto two linked lists for each node, one for incoming edges +//! and one for outgoing edges. Note that every edge is a member of some +//! incoming list and some outgoing list. Basically you can load the +//! first index of the linked list from the node data structures (the +//! field `first_edge`) and then, for each edge, load the next index from +//! the field `next_edge`). Each of those fields is an array that should +//! be indexed by the direction (see the type `Direction`). + +use crate::snapshot_vec::{SnapshotVec, SnapshotVecDelegate}; +use rustc_index::bit_set::BitSet; +use std::fmt::Debug; + +#[cfg(test)] +mod tests; + +pub struct Graph { + nodes: SnapshotVec>, + edges: SnapshotVec>, +} + +pub struct Node { + first_edge: [EdgeIndex; 2], // see module comment + pub data: N, +} + +#[derive(Debug)] +pub struct Edge { + next_edge: [EdgeIndex; 2], // see module comment + source: NodeIndex, + target: NodeIndex, + pub data: E, +} + +impl SnapshotVecDelegate for Node { + type Value = Node; + type Undo = (); + + fn reverse(_: &mut Vec>, _: ()) {} +} + +impl SnapshotVecDelegate for Edge { + type Value = Edge; + type Undo = (); + + fn reverse(_: &mut Vec>, _: ()) {} +} + +#[derive(Copy, Clone, PartialEq, Debug)] +pub struct NodeIndex(pub usize); + +#[derive(Copy, Clone, PartialEq, Debug)] +pub struct EdgeIndex(pub usize); + +pub const INVALID_EDGE_INDEX: EdgeIndex = EdgeIndex(usize::MAX); + +// Use a private field here to guarantee no more instances are created: +#[derive(Copy, Clone, Debug, PartialEq)] +pub struct Direction { + repr: usize, +} + +pub const OUTGOING: Direction = Direction { repr: 0 }; + +pub const INCOMING: Direction = Direction { repr: 1 }; + +impl NodeIndex { + /// Returns unique ID (unique with respect to the graph holding associated node). + pub fn node_id(self) -> usize { + self.0 + } +} + +impl Graph { + pub fn new() -> Graph { + Graph { nodes: SnapshotVec::new(), edges: SnapshotVec::new() } + } + + pub fn with_capacity(nodes: usize, edges: usize) -> Graph { + Graph { nodes: SnapshotVec::with_capacity(nodes), edges: SnapshotVec::with_capacity(edges) } + } + + // # Simple accessors + + #[inline] + pub fn all_nodes(&self) -> &[Node] { + &self.nodes + } + + #[inline] + pub fn len_nodes(&self) -> usize { + self.nodes.len() + } + + #[inline] + pub fn all_edges(&self) -> &[Edge] { + &self.edges + } + + #[inline] + pub fn len_edges(&self) -> usize { + self.edges.len() + } + + // # Node construction + + pub fn next_node_index(&self) -> NodeIndex { + NodeIndex(self.nodes.len()) + } + + pub fn add_node(&mut self, data: N) -> NodeIndex { + let idx = self.next_node_index(); + self.nodes.push(Node { first_edge: [INVALID_EDGE_INDEX, INVALID_EDGE_INDEX], data }); + idx + } + + pub fn mut_node_data(&mut self, idx: NodeIndex) -> &mut N { + &mut self.nodes[idx.0].data + } + + pub fn node_data(&self, idx: NodeIndex) -> &N { + &self.nodes[idx.0].data + } + + pub fn node(&self, idx: NodeIndex) -> &Node { + &self.nodes[idx.0] + } + + // # Edge construction and queries + + pub fn next_edge_index(&self) -> EdgeIndex { + EdgeIndex(self.edges.len()) + } + + pub fn add_edge(&mut self, source: NodeIndex, target: NodeIndex, data: E) -> EdgeIndex { + debug!("graph: add_edge({:?}, {:?}, {:?})", source, target, data); + + let idx = self.next_edge_index(); + + // read current first of the list of edges from each node + let source_first = self.nodes[source.0].first_edge[OUTGOING.repr]; + let target_first = self.nodes[target.0].first_edge[INCOMING.repr]; + + // create the new edge, with the previous firsts from each node + // as the next pointers + self.edges.push(Edge { next_edge: [source_first, target_first], source, target, data }); + + // adjust the firsts for each node target be the next object. + self.nodes[source.0].first_edge[OUTGOING.repr] = idx; + self.nodes[target.0].first_edge[INCOMING.repr] = idx; + + idx + } + + pub fn edge(&self, idx: EdgeIndex) -> &Edge { + &self.edges[idx.0] + } + + // # Iterating over nodes, edges + + pub fn enumerated_nodes(&self) -> impl Iterator)> { + self.nodes.iter().enumerate().map(|(idx, n)| (NodeIndex(idx), n)) + } + + pub fn enumerated_edges(&self) -> impl Iterator)> { + self.edges.iter().enumerate().map(|(idx, e)| (EdgeIndex(idx), e)) + } + + pub fn each_node<'a>(&'a self, mut f: impl FnMut(NodeIndex, &'a Node) -> bool) -> bool { + //! Iterates over all edges defined in the graph. + self.enumerated_nodes().all(|(node_idx, node)| f(node_idx, node)) + } + + pub fn each_edge<'a>(&'a self, mut f: impl FnMut(EdgeIndex, &'a Edge) -> bool) -> bool { + //! Iterates over all edges defined in the graph + self.enumerated_edges().all(|(edge_idx, edge)| f(edge_idx, edge)) + } + + pub fn outgoing_edges(&self, source: NodeIndex) -> AdjacentEdges<'_, N, E> { + self.adjacent_edges(source, OUTGOING) + } + + pub fn incoming_edges(&self, source: NodeIndex) -> AdjacentEdges<'_, N, E> { + self.adjacent_edges(source, INCOMING) + } + + pub fn adjacent_edges( + &self, + source: NodeIndex, + direction: Direction, + ) -> AdjacentEdges<'_, N, E> { + let first_edge = self.node(source).first_edge[direction.repr]; + AdjacentEdges { graph: self, direction, next: first_edge } + } + + pub fn successor_nodes<'a>( + &'a self, + source: NodeIndex, + ) -> impl Iterator + 'a { + self.outgoing_edges(source).targets() + } + + pub fn predecessor_nodes<'a>( + &'a self, + target: NodeIndex, + ) -> impl Iterator + 'a { + self.incoming_edges(target).sources() + } + + pub fn depth_traverse( + &self, + start: NodeIndex, + direction: Direction, + ) -> DepthFirstTraversal<'_, N, E> { + DepthFirstTraversal::with_start_node(self, start, direction) + } + + pub fn nodes_in_postorder( + &self, + direction: Direction, + entry_node: NodeIndex, + ) -> Vec { + let mut visited = BitSet::new_empty(self.len_nodes()); + let mut stack = vec![]; + let mut result = Vec::with_capacity(self.len_nodes()); + let mut push_node = |stack: &mut Vec<_>, node: NodeIndex| { + if visited.insert(node.0) { + stack.push((node, self.adjacent_edges(node, direction))); + } + }; + + for node in + Some(entry_node).into_iter().chain(self.enumerated_nodes().map(|(node, _)| node)) + { + push_node(&mut stack, node); + while let Some((node, mut iter)) = stack.pop() { + if let Some((_, child)) = iter.next() { + let target = child.source_or_target(direction); + // the current node needs more processing, so + // add it back to the stack + stack.push((node, iter)); + // and then push the new node + push_node(&mut stack, target); + } else { + result.push(node); + } + } + } + + assert_eq!(result.len(), self.len_nodes()); + result + } +} + +// # Iterators + +pub struct AdjacentEdges<'g, N, E> { + graph: &'g Graph, + direction: Direction, + next: EdgeIndex, +} + +impl<'g, N: Debug, E: Debug> AdjacentEdges<'g, N, E> { + fn targets(self) -> impl Iterator + 'g { + self.map(|(_, edge)| edge.target) + } + + fn sources(self) -> impl Iterator + 'g { + self.map(|(_, edge)| edge.source) + } +} + +impl<'g, N: Debug, E: Debug> Iterator for AdjacentEdges<'g, N, E> { + type Item = (EdgeIndex, &'g Edge); + + fn next(&mut self) -> Option<(EdgeIndex, &'g Edge)> { + let edge_index = self.next; + if edge_index == INVALID_EDGE_INDEX { + return None; + } + + let edge = self.graph.edge(edge_index); + self.next = edge.next_edge[self.direction.repr]; + Some((edge_index, edge)) + } + + fn size_hint(&self) -> (usize, Option) { + // At most, all the edges in the graph. + (0, Some(self.graph.len_edges())) + } +} + +pub struct DepthFirstTraversal<'g, N, E> { + graph: &'g Graph, + stack: Vec, + visited: BitSet, + direction: Direction, +} + +impl<'g, N: Debug, E: Debug> DepthFirstTraversal<'g, N, E> { + pub fn with_start_node( + graph: &'g Graph, + start_node: NodeIndex, + direction: Direction, + ) -> Self { + let mut visited = BitSet::new_empty(graph.len_nodes()); + visited.insert(start_node.node_id()); + DepthFirstTraversal { graph, stack: vec![start_node], visited, direction } + } + + fn visit(&mut self, node: NodeIndex) { + if self.visited.insert(node.node_id()) { + self.stack.push(node); + } + } +} + +impl<'g, N: Debug, E: Debug> Iterator for DepthFirstTraversal<'g, N, E> { + type Item = NodeIndex; + + fn next(&mut self) -> Option { + let next = self.stack.pop(); + if let Some(idx) = next { + for (_, edge) in self.graph.adjacent_edges(idx, self.direction) { + let target = edge.source_or_target(self.direction); + self.visit(target); + } + } + next + } + + fn size_hint(&self) -> (usize, Option) { + // We will visit every node in the graph exactly once. + let remaining = self.graph.len_nodes() - self.visited.count(); + (remaining, Some(remaining)) + } +} + +impl<'g, N: Debug, E: Debug> ExactSizeIterator for DepthFirstTraversal<'g, N, E> {} + +impl Edge { + pub fn source(&self) -> NodeIndex { + self.source + } + + pub fn target(&self) -> NodeIndex { + self.target + } + + pub fn source_or_target(&self, direction: Direction) -> NodeIndex { + if direction == OUTGOING { self.target } else { self.source } + } +} diff --git a/compiler/rustc_data_structures/src/graph/implementation/tests.rs b/compiler/rustc_data_structures/src/graph/implementation/tests.rs new file mode 100644 index 000000000..e4e4d0d44 --- /dev/null +++ b/compiler/rustc_data_structures/src/graph/implementation/tests.rs @@ -0,0 +1,131 @@ +use crate::graph::implementation::*; +use std::fmt::Debug; + +type TestGraph = Graph<&'static str, &'static str>; + +fn create_graph() -> TestGraph { + let mut graph = Graph::new(); + + // Create a simple graph + // + // F + // | + // V + // A --> B --> C + // | ^ + // v | + // D --> E + + let a = graph.add_node("A"); + let b = graph.add_node("B"); + let c = graph.add_node("C"); + let d = graph.add_node("D"); + let e = graph.add_node("E"); + let f = graph.add_node("F"); + + graph.add_edge(a, b, "AB"); + graph.add_edge(b, c, "BC"); + graph.add_edge(b, d, "BD"); + graph.add_edge(d, e, "DE"); + graph.add_edge(e, c, "EC"); + graph.add_edge(f, b, "FB"); + + return graph; +} + +#[test] +fn each_node() { + let graph = create_graph(); + let expected = ["A", "B", "C", "D", "E", "F"]; + graph.each_node(|idx, node| { + assert_eq!(&expected[idx.0], graph.node_data(idx)); + assert_eq!(expected[idx.0], node.data); + true + }); +} + +#[test] +fn each_edge() { + let graph = create_graph(); + let expected = ["AB", "BC", "BD", "DE", "EC", "FB"]; + graph.each_edge(|idx, edge| { + assert_eq!(expected[idx.0], edge.data); + true + }); +} + +fn test_adjacent_edges( + graph: &Graph, + start_index: NodeIndex, + start_data: N, + expected_incoming: &[(E, N)], + expected_outgoing: &[(E, N)], +) { + assert!(graph.node_data(start_index) == &start_data); + + let mut counter = 0; + for (edge_index, edge) in graph.incoming_edges(start_index) { + assert!(counter < expected_incoming.len()); + debug!( + "counter={:?} expected={:?} edge_index={:?} edge={:?}", + counter, expected_incoming[counter], edge_index, edge + ); + match expected_incoming[counter] { + (ref e, ref n) => { + assert!(e == &edge.data); + assert!(n == graph.node_data(edge.source())); + assert!(start_index == edge.target); + } + } + counter += 1; + } + assert_eq!(counter, expected_incoming.len()); + + let mut counter = 0; + for (edge_index, edge) in graph.outgoing_edges(start_index) { + assert!(counter < expected_outgoing.len()); + debug!( + "counter={:?} expected={:?} edge_index={:?} edge={:?}", + counter, expected_outgoing[counter], edge_index, edge + ); + match expected_outgoing[counter] { + (ref e, ref n) => { + assert!(e == &edge.data); + assert!(start_index == edge.source); + assert!(n == graph.node_data(edge.target)); + } + } + counter += 1; + } + assert_eq!(counter, expected_outgoing.len()); +} + +#[test] +fn each_adjacent_from_a() { + let graph = create_graph(); + test_adjacent_edges(&graph, NodeIndex(0), "A", &[], &[("AB", "B")]); +} + +#[test] +fn each_adjacent_from_b() { + let graph = create_graph(); + test_adjacent_edges( + &graph, + NodeIndex(1), + "B", + &[("FB", "F"), ("AB", "A")], + &[("BD", "D"), ("BC", "C")], + ); +} + +#[test] +fn each_adjacent_from_c() { + let graph = create_graph(); + test_adjacent_edges(&graph, NodeIndex(2), "C", &[("EC", "E"), ("BC", "B")], &[]); +} + +#[test] +fn each_adjacent_from_d() { + let graph = create_graph(); + test_adjacent_edges(&graph, NodeIndex(3), "D", &[("BD", "B")], &[("DE", "E")]); +} diff --git a/compiler/rustc_data_structures/src/graph/iterate/mod.rs b/compiler/rustc_data_structures/src/graph/iterate/mod.rs new file mode 100644 index 000000000..57007611a --- /dev/null +++ b/compiler/rustc_data_structures/src/graph/iterate/mod.rs @@ -0,0 +1,353 @@ +use super::{DirectedGraph, WithNumNodes, WithStartNode, WithSuccessors}; +use rustc_index::bit_set::BitSet; +use rustc_index::vec::IndexVec; +use std::ops::ControlFlow; + +#[cfg(test)] +mod tests; + +pub fn post_order_from( + graph: &G, + start_node: G::Node, +) -> Vec { + post_order_from_to(graph, start_node, None) +} + +pub fn post_order_from_to( + graph: &G, + start_node: G::Node, + end_node: Option, +) -> Vec { + let mut visited: IndexVec = IndexVec::from_elem_n(false, graph.num_nodes()); + let mut result: Vec = Vec::with_capacity(graph.num_nodes()); + if let Some(end_node) = end_node { + visited[end_node] = true; + } + post_order_walk(graph, start_node, &mut result, &mut visited); + result +} + +fn post_order_walk( + graph: &G, + node: G::Node, + result: &mut Vec, + visited: &mut IndexVec, +) { + struct PostOrderFrame { + node: Node, + iter: Iter, + } + + if visited[node] { + return; + } + + let mut stack = vec![PostOrderFrame { node, iter: graph.successors(node) }]; + + 'recurse: while let Some(frame) = stack.last_mut() { + let node = frame.node; + visited[node] = true; + + while let Some(successor) = frame.iter.next() { + if !visited[successor] { + stack.push(PostOrderFrame { node: successor, iter: graph.successors(successor) }); + continue 'recurse; + } + } + + let _ = stack.pop(); + result.push(node); + } +} + +pub fn reverse_post_order( + graph: &G, + start_node: G::Node, +) -> Vec { + let mut vec = post_order_from(graph, start_node); + vec.reverse(); + vec +} + +/// A "depth-first search" iterator for a directed graph. +pub struct DepthFirstSearch<'graph, G> +where + G: ?Sized + DirectedGraph + WithNumNodes + WithSuccessors, +{ + graph: &'graph G, + stack: Vec, + visited: BitSet, +} + +impl<'graph, G> DepthFirstSearch<'graph, G> +where + G: ?Sized + DirectedGraph + WithNumNodes + WithSuccessors, +{ + pub fn new(graph: &'graph G) -> Self { + Self { graph, stack: vec![], visited: BitSet::new_empty(graph.num_nodes()) } + } + + /// Version of `push_start_node` that is convenient for chained + /// use. + pub fn with_start_node(mut self, start_node: G::Node) -> Self { + self.push_start_node(start_node); + self + } + + /// Pushes another start node onto the stack. If the node + /// has not already been visited, then you will be able to + /// walk its successors (and so forth) after the current + /// contents of the stack are drained. If multiple start nodes + /// are added into the walk, then their mutual successors + /// will all be walked. You can use this method once the + /// iterator has been completely drained to add additional + /// start nodes. + pub fn push_start_node(&mut self, start_node: G::Node) { + if self.visited.insert(start_node) { + self.stack.push(start_node); + } + } + + /// Searches all nodes reachable from the current start nodes. + /// This is equivalent to just invoke `next` repeatedly until + /// you get a `None` result. + pub fn complete_search(&mut self) { + while let Some(_) = self.next() {} + } + + /// Returns true if node has been visited thus far. + /// A node is considered "visited" once it is pushed + /// onto the internal stack; it may not yet have been yielded + /// from the iterator. This method is best used after + /// the iterator is completely drained. + pub fn visited(&self, node: G::Node) -> bool { + self.visited.contains(node) + } +} + +impl std::fmt::Debug for DepthFirstSearch<'_, G> +where + G: ?Sized + DirectedGraph + WithNumNodes + WithSuccessors, +{ + fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut f = fmt.debug_set(); + for n in self.visited.iter() { + f.entry(&n); + } + f.finish() + } +} + +impl Iterator for DepthFirstSearch<'_, G> +where + G: ?Sized + DirectedGraph + WithNumNodes + WithSuccessors, +{ + type Item = G::Node; + + fn next(&mut self) -> Option { + let DepthFirstSearch { stack, visited, graph } = self; + let n = stack.pop()?; + stack.extend(graph.successors(n).filter(|&m| visited.insert(m))); + Some(n) + } +} + +/// The status of a node in the depth-first search. +/// +/// See the documentation of `TriColorDepthFirstSearch` to see how a node's status is updated +/// during DFS. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum NodeStatus { + /// This node has been examined by the depth-first search but is not yet `Settled`. + /// + /// Also referred to as "gray" or "discovered" nodes in [CLR]. + /// + /// [CLR]: https://en.wikipedia.org/wiki/Introduction_to_Algorithms + Visited, + + /// This node and all nodes reachable from it have been examined by the depth-first search. + /// + /// Also referred to as "black" or "finished" nodes in [CLR]. + /// + /// [CLR]: https://en.wikipedia.org/wiki/Introduction_to_Algorithms + Settled, +} + +struct Event { + node: N, + becomes: NodeStatus, +} + +/// A depth-first search that also tracks when all successors of a node have been examined. +/// +/// This is based on the DFS described in [Introduction to Algorithms (1st ed.)][CLR], hereby +/// referred to as **CLR**. However, we use the terminology in [`NodeStatus`] above instead of +/// "discovered"/"finished" or "white"/"grey"/"black". Each node begins the search with no status, +/// becomes `Visited` when it is first examined by the DFS and is `Settled` when all nodes +/// reachable from it have been examined. This allows us to differentiate between "tree", "back" +/// and "forward" edges (see [`TriColorVisitor::node_examined`]). +/// +/// Unlike the pseudocode in [CLR], this implementation is iterative and does not use timestamps. +/// We accomplish this by storing `Event`s on the stack that result in a (possible) state change +/// for each node. A `Visited` event signifies that we should examine this node if it has not yet +/// been `Visited` or `Settled`. When a node is examined for the first time, we mark it as +/// `Visited` and push a `Settled` event for it on stack followed by `Visited` events for all of +/// its predecessors, scheduling them for examination. Multiple `Visited` events for a single node +/// may exist on the stack simultaneously if a node has multiple predecessors, but only one +/// `Settled` event will ever be created for each node. After all `Visited` events for a node's +/// successors have been popped off the stack (as well as any new events triggered by visiting +/// those successors), we will pop off that node's `Settled` event. +/// +/// [CLR]: https://en.wikipedia.org/wiki/Introduction_to_Algorithms +pub struct TriColorDepthFirstSearch<'graph, G> +where + G: ?Sized + DirectedGraph + WithNumNodes + WithSuccessors, +{ + graph: &'graph G, + stack: Vec>, + visited: BitSet, + settled: BitSet, +} + +impl<'graph, G> TriColorDepthFirstSearch<'graph, G> +where + G: ?Sized + DirectedGraph + WithNumNodes + WithSuccessors, +{ + pub fn new(graph: &'graph G) -> Self { + TriColorDepthFirstSearch { + graph, + stack: vec![], + visited: BitSet::new_empty(graph.num_nodes()), + settled: BitSet::new_empty(graph.num_nodes()), + } + } + + /// Performs a depth-first search, starting from the given `root`. + /// + /// This won't visit nodes that are not reachable from `root`. + pub fn run_from(mut self, root: G::Node, visitor: &mut V) -> Option + where + V: TriColorVisitor, + { + use NodeStatus::{Settled, Visited}; + + self.stack.push(Event { node: root, becomes: Visited }); + + loop { + match self.stack.pop()? { + Event { node, becomes: Settled } => { + let not_previously_settled = self.settled.insert(node); + assert!(not_previously_settled, "A node should be settled exactly once"); + if let ControlFlow::Break(val) = visitor.node_settled(node) { + return Some(val); + } + } + + Event { node, becomes: Visited } => { + let not_previously_visited = self.visited.insert(node); + let prior_status = if not_previously_visited { + None + } else if self.settled.contains(node) { + Some(Settled) + } else { + Some(Visited) + }; + + if let ControlFlow::Break(val) = visitor.node_examined(node, prior_status) { + return Some(val); + } + + // If this node has already been examined, we are done. + if prior_status.is_some() { + continue; + } + + // Otherwise, push a `Settled` event for this node onto the stack, then + // schedule its successors for examination. + self.stack.push(Event { node, becomes: Settled }); + for succ in self.graph.successors(node) { + if !visitor.ignore_edge(node, succ) { + self.stack.push(Event { node: succ, becomes: Visited }); + } + } + } + } + } + } +} + +impl TriColorDepthFirstSearch<'_, G> +where + G: ?Sized + DirectedGraph + WithNumNodes + WithSuccessors + WithStartNode, +{ + /// Performs a depth-first search, starting from `G::start_node()`. + /// + /// This won't visit nodes that are not reachable from the start node. + pub fn run_from_start(self, visitor: &mut V) -> Option + where + V: TriColorVisitor, + { + let root = self.graph.start_node(); + self.run_from(root, visitor) + } +} + +/// What to do when a node is examined or becomes `Settled` during DFS. +pub trait TriColorVisitor +where + G: ?Sized + DirectedGraph, +{ + /// The value returned by this search. + type BreakVal; + + /// Called when a node is examined by the depth-first search. + /// + /// By checking the value of `prior_status`, this visitor can determine whether the edge + /// leading to this node was a tree edge (`None`), forward edge (`Some(Settled)`) or back edge + /// (`Some(Visited)`). For a full explanation of each edge type, see the "Depth-first Search" + /// chapter in [CLR] or [wikipedia]. + /// + /// If you want to know *both* nodes linked by each edge, you'll need to modify + /// `TriColorDepthFirstSearch` to store a `source` node for each `Visited` event. + /// + /// [wikipedia]: https://en.wikipedia.org/wiki/Depth-first_search#Output_of_a_depth-first_search + /// [CLR]: https://en.wikipedia.org/wiki/Introduction_to_Algorithms + fn node_examined( + &mut self, + _node: G::Node, + _prior_status: Option, + ) -> ControlFlow { + ControlFlow::CONTINUE + } + + /// Called after all nodes reachable from this one have been examined. + fn node_settled(&mut self, _node: G::Node) -> ControlFlow { + ControlFlow::CONTINUE + } + + /// Behave as if no edges exist from `source` to `target`. + fn ignore_edge(&mut self, _source: G::Node, _target: G::Node) -> bool { + false + } +} + +/// This `TriColorVisitor` looks for back edges in a graph, which indicate that a cycle exists. +pub struct CycleDetector; + +impl TriColorVisitor for CycleDetector +where + G: ?Sized + DirectedGraph, +{ + type BreakVal = (); + + fn node_examined( + &mut self, + _node: G::Node, + prior_status: Option, + ) -> ControlFlow { + match prior_status { + Some(NodeStatus::Visited) => ControlFlow::BREAK, + _ => ControlFlow::CONTINUE, + } + } +} diff --git a/compiler/rustc_data_structures/src/graph/iterate/tests.rs b/compiler/rustc_data_structures/src/graph/iterate/tests.rs new file mode 100644 index 000000000..c498c2893 --- /dev/null +++ b/compiler/rustc_data_structures/src/graph/iterate/tests.rs @@ -0,0 +1,38 @@ +use super::super::tests::TestGraph; + +use super::*; + +#[test] +fn diamond_post_order() { + let graph = TestGraph::new(0, &[(0, 1), (0, 2), (1, 3), (2, 3)]); + + let result = post_order_from(&graph, 0); + assert_eq!(result, vec![3, 1, 2, 0]); +} + +#[test] +fn is_cyclic() { + use super::super::is_cyclic; + + let diamond_acyclic = TestGraph::new(0, &[(0, 1), (0, 2), (1, 3), (2, 3)]); + let diamond_cyclic = TestGraph::new(0, &[(0, 1), (1, 2), (2, 3), (3, 0)]); + + assert!(!is_cyclic(&diamond_acyclic)); + assert!(is_cyclic(&diamond_cyclic)); +} + +#[test] +fn dfs() { + let graph = TestGraph::new(0, &[(0, 1), (0, 2), (1, 3), (2, 3), (3, 0)]); + + let result: Vec = DepthFirstSearch::new(&graph).with_start_node(0).collect(); + assert_eq!(result, vec![0, 2, 3, 1]); +} + +#[test] +fn dfs_debug() { + let graph = TestGraph::new(0, &[(0, 1), (0, 2), (1, 3), (2, 3), (3, 0)]); + let mut dfs = DepthFirstSearch::new(&graph).with_start_node(0); + dfs.complete_search(); + assert_eq!(format!("{{0, 1, 2, 3}}"), format!("{:?}", dfs)); +} diff --git a/compiler/rustc_data_structures/src/graph/mod.rs b/compiler/rustc_data_structures/src/graph/mod.rs new file mode 100644 index 000000000..3560df6e5 --- /dev/null +++ b/compiler/rustc_data_structures/src/graph/mod.rs @@ -0,0 +1,81 @@ +use rustc_index::vec::Idx; + +pub mod dominators; +pub mod implementation; +pub mod iterate; +mod reference; +pub mod scc; +pub mod vec_graph; + +#[cfg(test)] +mod tests; + +pub trait DirectedGraph { + type Node: Idx; +} + +pub trait WithNumNodes: DirectedGraph { + fn num_nodes(&self) -> usize; +} + +pub trait WithNumEdges: DirectedGraph { + fn num_edges(&self) -> usize; +} + +pub trait WithSuccessors: DirectedGraph +where + Self: for<'graph> GraphSuccessors<'graph, Item = ::Node>, +{ + fn successors(&self, node: Self::Node) -> >::Iter; + + fn depth_first_search(&self, from: Self::Node) -> iterate::DepthFirstSearch<'_, Self> + where + Self: WithNumNodes, + { + iterate::DepthFirstSearch::new(self).with_start_node(from) + } +} + +#[allow(unused_lifetimes)] +pub trait GraphSuccessors<'graph> { + type Item; + type Iter: Iterator; +} + +pub trait WithPredecessors: DirectedGraph +where + Self: for<'graph> GraphPredecessors<'graph, Item = ::Node>, +{ + fn predecessors(&self, node: Self::Node) -> >::Iter; +} + +#[allow(unused_lifetimes)] +pub trait GraphPredecessors<'graph> { + type Item; + type Iter: Iterator; +} + +pub trait WithStartNode: DirectedGraph { + fn start_node(&self) -> Self::Node; +} + +pub trait ControlFlowGraph: + DirectedGraph + WithStartNode + WithPredecessors + WithSuccessors + WithNumNodes +{ + // convenient trait +} + +impl ControlFlowGraph for T where + T: DirectedGraph + WithStartNode + WithPredecessors + WithSuccessors + WithNumNodes +{ +} + +/// Returns `true` if the graph has a cycle that is reachable from the start node. +pub fn is_cyclic(graph: &G) -> bool +where + G: ?Sized + DirectedGraph + WithStartNode + WithSuccessors + WithNumNodes, +{ + iterate::TriColorDepthFirstSearch::new(graph) + .run_from_start(&mut iterate::CycleDetector) + .is_some() +} diff --git a/compiler/rustc_data_structures/src/graph/reference.rs b/compiler/rustc_data_structures/src/graph/reference.rs new file mode 100644 index 000000000..c259fe56c --- /dev/null +++ b/compiler/rustc_data_structures/src/graph/reference.rs @@ -0,0 +1,39 @@ +use super::*; + +impl<'graph, G: DirectedGraph> DirectedGraph for &'graph G { + type Node = G::Node; +} + +impl<'graph, G: WithNumNodes> WithNumNodes for &'graph G { + fn num_nodes(&self) -> usize { + (**self).num_nodes() + } +} + +impl<'graph, G: WithStartNode> WithStartNode for &'graph G { + fn start_node(&self) -> Self::Node { + (**self).start_node() + } +} + +impl<'graph, G: WithSuccessors> WithSuccessors for &'graph G { + fn successors(&self, node: Self::Node) -> >::Iter { + (**self).successors(node) + } +} + +impl<'graph, G: WithPredecessors> WithPredecessors for &'graph G { + fn predecessors(&self, node: Self::Node) -> >::Iter { + (**self).predecessors(node) + } +} + +impl<'iter, 'graph, G: WithPredecessors> GraphPredecessors<'iter> for &'graph G { + type Item = G::Node; + type Iter = >::Iter; +} + +impl<'iter, 'graph, G: WithSuccessors> GraphSuccessors<'iter> for &'graph G { + type Item = G::Node; + type Iter = >::Iter; +} diff --git a/compiler/rustc_data_structures/src/graph/scc/mod.rs b/compiler/rustc_data_structures/src/graph/scc/mod.rs new file mode 100644 index 000000000..7099ca7eb --- /dev/null +++ b/compiler/rustc_data_structures/src/graph/scc/mod.rs @@ -0,0 +1,567 @@ +//! Routine to compute the strongly connected components (SCCs) of a graph. +//! +//! Also computes as the resulting DAG if each SCC is replaced with a +//! node in the graph. This uses [Tarjan's algorithm]( +//! https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm) +//! that completes in *O*(*n*) time. + +use crate::fx::FxHashSet; +use crate::graph::vec_graph::VecGraph; +use crate::graph::{DirectedGraph, GraphSuccessors, WithNumEdges, WithNumNodes, WithSuccessors}; +use rustc_index::vec::{Idx, IndexVec}; +use std::cmp::Ord; +use std::ops::Range; + +#[cfg(test)] +mod tests; + +/// Strongly connected components (SCC) of a graph. The type `N` is +/// the index type for the graph nodes and `S` is the index type for +/// the SCCs. We can map from each node to the SCC that it +/// participates in, and we also have the successors of each SCC. +pub struct Sccs { + /// For each node, what is the SCC index of the SCC to which it + /// belongs. + scc_indices: IndexVec, + + /// Data about each SCC. + scc_data: SccData, +} + +struct SccData { + /// For each SCC, the range of `all_successors` where its + /// successors can be found. + ranges: IndexVec>, + + /// Contains the successors for all the Sccs, concatenated. The + /// range of indices corresponding to a given SCC is found in its + /// SccData. + all_successors: Vec, +} + +impl Sccs { + pub fn new(graph: &(impl DirectedGraph + WithNumNodes + WithSuccessors)) -> Self { + SccsConstruction::construct(graph) + } + + /// Returns the number of SCCs in the graph. + pub fn num_sccs(&self) -> usize { + self.scc_data.len() + } + + /// Returns an iterator over the SCCs in the graph. + /// + /// The SCCs will be iterated in **dependency order** (or **post order**), + /// meaning that if `S1 -> S2`, we will visit `S2` first and `S1` after. + /// This is convenient when the edges represent dependencies: when you visit + /// `S1`, the value for `S2` will already have been computed. + pub fn all_sccs(&self) -> impl Iterator { + (0..self.scc_data.len()).map(S::new) + } + + /// Returns the SCC to which a node `r` belongs. + pub fn scc(&self, r: N) -> S { + self.scc_indices[r] + } + + /// Returns the successors of the given SCC. + pub fn successors(&self, scc: S) -> &[S] { + self.scc_data.successors(scc) + } + + /// Construct the reverse graph of the SCC graph. + pub fn reverse(&self) -> VecGraph { + VecGraph::new( + self.num_sccs(), + self.all_sccs() + .flat_map(|source| { + self.successors(source).iter().map(move |&target| (target, source)) + }) + .collect(), + ) + } +} + +impl DirectedGraph for Sccs { + type Node = S; +} + +impl WithNumNodes for Sccs { + fn num_nodes(&self) -> usize { + self.num_sccs() + } +} + +impl WithNumEdges for Sccs { + fn num_edges(&self) -> usize { + self.scc_data.all_successors.len() + } +} + +impl<'graph, N: Idx, S: Idx> GraphSuccessors<'graph> for Sccs { + type Item = S; + + type Iter = std::iter::Cloned>; +} + +impl WithSuccessors for Sccs { + fn successors(&self, node: S) -> >::Iter { + self.successors(node).iter().cloned() + } +} + +impl SccData { + /// Number of SCCs, + fn len(&self) -> usize { + self.ranges.len() + } + + /// Returns the successors of the given SCC. + fn successors(&self, scc: S) -> &[S] { + // Annoyingly, `range` does not implement `Copy`, so we have + // to do `range.start..range.end`: + let range = &self.ranges[scc]; + &self.all_successors[range.start..range.end] + } + + /// Creates a new SCC with `successors` as its successors and + /// returns the resulting index. + fn create_scc(&mut self, successors: impl IntoIterator) -> S { + // Store the successors on `scc_successors_vec`, remembering + // the range of indices. + let all_successors_start = self.all_successors.len(); + self.all_successors.extend(successors); + let all_successors_end = self.all_successors.len(); + + debug!( + "create_scc({:?}) successors={:?}", + self.ranges.len(), + &self.all_successors[all_successors_start..all_successors_end], + ); + + self.ranges.push(all_successors_start..all_successors_end) + } +} + +struct SccsConstruction<'c, G: DirectedGraph + WithNumNodes + WithSuccessors, S: Idx> { + graph: &'c G, + + /// The state of each node; used during walk to record the stack + /// and after walk to record what cycle each node ended up being + /// in. + node_states: IndexVec>, + + /// The stack of nodes that we are visiting as part of the DFS. + node_stack: Vec, + + /// The stack of successors: as we visit a node, we mark our + /// position in this stack, and when we encounter a successor SCC, + /// we push it on the stack. When we complete an SCC, we can pop + /// everything off the stack that was found along the way. + successors_stack: Vec, + + /// A set used to strip duplicates. As we accumulate successors + /// into the successors_stack, we sometimes get duplicate entries. + /// We use this set to remove those -- we also keep its storage + /// around between successors to amortize memory allocation costs. + duplicate_set: FxHashSet, + + scc_data: SccData, +} + +#[derive(Copy, Clone, Debug)] +enum NodeState { + /// This node has not yet been visited as part of the DFS. + /// + /// After SCC construction is complete, this state ought to be + /// impossible. + NotVisited, + + /// This node is currently being walk as part of our DFS. It is on + /// the stack at the depth `depth`. + /// + /// After SCC construction is complete, this state ought to be + /// impossible. + BeingVisited { depth: usize }, + + /// Indicates that this node is a member of the given cycle. + InCycle { scc_index: S }, + + /// Indicates that this node is a member of whatever cycle + /// `parent` is a member of. This state is transient: whenever we + /// see it, we try to overwrite it with the current state of + /// `parent` (this is the "path compression" step of a union-find + /// algorithm). + InCycleWith { parent: N }, +} + +#[derive(Copy, Clone, Debug)] +enum WalkReturn { + Cycle { min_depth: usize }, + Complete { scc_index: S }, +} + +impl<'c, G, S> SccsConstruction<'c, G, S> +where + G: DirectedGraph + WithNumNodes + WithSuccessors, + S: Idx, +{ + /// Identifies SCCs in the graph `G` and computes the resulting + /// DAG. This uses a variant of [Tarjan's + /// algorithm][wikipedia]. The high-level summary of the algorithm + /// is that we do a depth-first search. Along the way, we keep a + /// stack of each node whose successors are being visited. We + /// track the depth of each node on this stack (there is no depth + /// if the node is not on the stack). When we find that some node + /// N with depth D can reach some other node N' with lower depth + /// D' (i.e., D' < D), we know that N, N', and all nodes in + /// between them on the stack are part of an SCC. + /// + /// [wikipedia]: https://bit.ly/2EZIx84 + fn construct(graph: &'c G) -> Sccs { + let num_nodes = graph.num_nodes(); + + let mut this = Self { + graph, + node_states: IndexVec::from_elem_n(NodeState::NotVisited, num_nodes), + node_stack: Vec::with_capacity(num_nodes), + successors_stack: Vec::new(), + scc_data: SccData { ranges: IndexVec::new(), all_successors: Vec::new() }, + duplicate_set: FxHashSet::default(), + }; + + let scc_indices = (0..num_nodes) + .map(G::Node::new) + .map(|node| match this.start_walk_from(node) { + WalkReturn::Complete { scc_index } => scc_index, + WalkReturn::Cycle { min_depth } => panic!( + "`start_walk_node({:?})` returned cycle with depth {:?}", + node, min_depth + ), + }) + .collect(); + + Sccs { scc_indices, scc_data: this.scc_data } + } + + fn start_walk_from(&mut self, node: G::Node) -> WalkReturn { + if let Some(result) = self.inspect_node(node) { + result + } else { + self.walk_unvisited_node(node) + } + } + + /// Inspect a node during the DFS. We first examine its current + /// state -- if it is not yet visited (`NotVisited`), return `None` so + /// that the caller might push it onto the stack and start walking its + /// successors. + /// + /// If it is already on the DFS stack it will be in the state + /// `BeingVisited`. In that case, we have found a cycle and we + /// return the depth from the stack. + /// + /// Otherwise, we are looking at a node that has already been + /// completely visited. We therefore return `WalkReturn::Complete` + /// with its associated SCC index. + fn inspect_node(&mut self, node: G::Node) -> Option> { + Some(match self.find_state(node) { + NodeState::InCycle { scc_index } => WalkReturn::Complete { scc_index }, + + NodeState::BeingVisited { depth: min_depth } => WalkReturn::Cycle { min_depth }, + + NodeState::NotVisited => return None, + + NodeState::InCycleWith { parent } => panic!( + "`find_state` returned `InCycleWith({:?})`, which ought to be impossible", + parent + ), + }) + } + + /// Fetches the state of the node `r`. If `r` is recorded as being + /// in a cycle with some other node `r2`, then fetches the state + /// of `r2` (and updates `r` to reflect current result). This is + /// basically the "find" part of a standard union-find algorithm + /// (with path compression). + fn find_state(&mut self, mut node: G::Node) -> NodeState { + // To avoid recursion we temporarily reuse the `parent` of each + // InCycleWith link to encode a downwards link while compressing + // the path. After we have found the root or deepest node being + // visited, we traverse the reverse links and correct the node + // states on the way. + // + // **Note**: This mutation requires that this is a leaf function + // or at least that none of the called functions inspects the + // current node states. Luckily, we are a leaf. + + // Remember one previous link. The termination condition when + // following links downwards is then simply as soon as we have + // found the initial self-loop. + let mut previous_node = node; + + // Ultimately assigned by the parent when following + // `InCycleWith` upwards. + let node_state = loop { + debug!("find_state(r = {:?} in state {:?})", node, self.node_states[node]); + match self.node_states[node] { + NodeState::InCycle { scc_index } => break NodeState::InCycle { scc_index }, + NodeState::BeingVisited { depth } => break NodeState::BeingVisited { depth }, + NodeState::NotVisited => break NodeState::NotVisited, + NodeState::InCycleWith { parent } => { + // We test this, to be extremely sure that we never + // ever break our termination condition for the + // reverse iteration loop. + assert!(node != parent, "Node can not be in cycle with itself"); + // Store the previous node as an inverted list link + self.node_states[node] = NodeState::InCycleWith { parent: previous_node }; + // Update to parent node. + previous_node = node; + node = parent; + } + } + }; + + // The states form a graph where up to one outgoing link is stored at + // each node. Initially in general, + // + // E + // ^ + // | + // InCycleWith/BeingVisited/NotVisited + // | + // A-InCycleWith->B-InCycleWith…>C-InCycleWith->D-+ + // | + // = node, previous_node + // + // After the first loop, this will look like + // E + // ^ + // | + // InCycleWith/BeingVisited/NotVisited + // | + // +>A<-InCycleWith-B<…InCycleWith-C<-InCycleWith-D-+ + // | | | | + // | InCycleWith | = node + // +-+ =previous_node + // + // Note in particular that A will be linked to itself in a self-cycle + // and no other self-cycles occur due to how InCycleWith is assigned in + // the find phase implemented by `walk_unvisited_node`. + // + // We now want to compress the path, that is assign the state of the + // link D-E to all other links. + // + // We can then walk backwards, starting from `previous_node`, and assign + // each node in the list with the updated state. The loop terminates + // when we reach the self-cycle. + + // Move backwards until we found the node where we started. We + // will know when we hit the state where previous_node == node. + loop { + // Back at the beginning, we can return. + if previous_node == node { + return node_state; + } + // Update to previous node in the link. + match self.node_states[previous_node] { + NodeState::InCycleWith { parent: previous } => { + node = previous_node; + previous_node = previous; + } + // Only InCycleWith nodes were added to the reverse linked list. + other => panic!("Invalid previous link while compressing cycle: {:?}", other), + } + + debug!("find_state: parent_state = {:?}", node_state); + + // Update the node state from the parent state. The assigned + // state is actually a loop invariant but it will only be + // evaluated if there is at least one backlink to follow. + // Fully trusting llvm here to find this loop optimization. + match node_state { + // Path compression, make current node point to the same root. + NodeState::InCycle { .. } => { + self.node_states[node] = node_state; + } + // Still visiting nodes, compress to cycle to the node + // at that depth. + NodeState::BeingVisited { depth } => { + self.node_states[node] = + NodeState::InCycleWith { parent: self.node_stack[depth] }; + } + // These are never allowed as parent nodes. InCycleWith + // should have been followed to a real parent and + // NotVisited can not be part of a cycle since it should + // have instead gotten explored. + NodeState::NotVisited | NodeState::InCycleWith { .. } => { + panic!("invalid parent state: {:?}", node_state) + } + } + } + } + + /// Walks a node that has never been visited before. + /// + /// Call this method when `inspect_node` has returned `None`. Having the + /// caller decide avoids mutual recursion between the two methods and allows + /// us to maintain an allocated stack for nodes on the path between calls. + #[instrument(skip(self, initial), level = "debug")] + fn walk_unvisited_node(&mut self, initial: G::Node) -> WalkReturn { + struct VisitingNodeFrame { + node: G::Node, + iter: Option, + depth: usize, + min_depth: usize, + successors_len: usize, + min_cycle_root: G::Node, + successor_node: G::Node, + } + + // Move the stack to a local variable. We want to utilize the existing allocation and + // mutably borrow it without borrowing self at the same time. + let mut successors_stack = core::mem::take(&mut self.successors_stack); + debug_assert_eq!(successors_stack.len(), 0); + + let mut stack: Vec> = vec![VisitingNodeFrame { + node: initial, + depth: 0, + min_depth: 0, + iter: None, + successors_len: 0, + min_cycle_root: initial, + successor_node: initial, + }]; + + let mut return_value = None; + + 'recurse: while let Some(frame) = stack.last_mut() { + let VisitingNodeFrame { + node, + depth, + iter, + successors_len, + min_depth, + min_cycle_root, + successor_node, + } = frame; + + let node = *node; + let depth = *depth; + + let successors = match iter { + Some(iter) => iter, + None => { + // This None marks that we still have the initialize this node's frame. + debug!(?depth, ?node); + + debug_assert!(matches!(self.node_states[node], NodeState::NotVisited)); + + // Push `node` onto the stack. + self.node_states[node] = NodeState::BeingVisited { depth }; + self.node_stack.push(node); + + // Walk each successor of the node, looking to see if any of + // them can reach a node that is presently on the stack. If + // so, that means they can also reach us. + *successors_len = successors_stack.len(); + // Set and return a reference, this is currently empty. + iter.get_or_insert(self.graph.successors(node)) + } + }; + + // Now that iter is initialized, this is a constant for this frame. + let successors_len = *successors_len; + + // Construct iterators for the nodes and walk results. There are two cases: + // * The walk of a successor node returned. + // * The remaining successor nodes. + let returned_walk = + return_value.take().into_iter().map(|walk| (*successor_node, Some(walk))); + + let successor_walk = successors.by_ref().map(|successor_node| { + debug!(?node, ?successor_node); + (successor_node, self.inspect_node(successor_node)) + }); + + for (successor_node, walk) in returned_walk.chain(successor_walk) { + match walk { + Some(WalkReturn::Cycle { min_depth: successor_min_depth }) => { + // Track the minimum depth we can reach. + assert!(successor_min_depth <= depth); + if successor_min_depth < *min_depth { + debug!(?node, ?successor_min_depth); + *min_depth = successor_min_depth; + *min_cycle_root = successor_node; + } + } + + Some(WalkReturn::Complete { scc_index: successor_scc_index }) => { + // Push the completed SCC indices onto + // the `successors_stack` for later. + debug!(?node, ?successor_scc_index); + successors_stack.push(successor_scc_index); + } + + None => { + let depth = depth + 1; + debug!(?depth, ?successor_node); + // Remember which node the return value will come from. + frame.successor_node = successor_node; + // Start a new stack frame the step into it. + stack.push(VisitingNodeFrame { + node: successor_node, + depth, + iter: None, + successors_len: 0, + min_depth: depth, + min_cycle_root: successor_node, + successor_node, + }); + continue 'recurse; + } + } + } + + // Completed walk, remove `node` from the stack. + let r = self.node_stack.pop(); + debug_assert_eq!(r, Some(node)); + + // Remove the frame, it's done. + let frame = stack.pop().unwrap(); + + // If `min_depth == depth`, then we are the root of the + // cycle: we can't reach anyone further down the stack. + + // Pass the 'return value' down the stack. + // We return one frame at a time so there can't be another return value. + debug_assert!(return_value.is_none()); + return_value = Some(if frame.min_depth == depth { + // Note that successor stack may have duplicates, so we + // want to remove those: + let deduplicated_successors = { + let duplicate_set = &mut self.duplicate_set; + duplicate_set.clear(); + successors_stack + .drain(successors_len..) + .filter(move |&i| duplicate_set.insert(i)) + }; + let scc_index = self.scc_data.create_scc(deduplicated_successors); + self.node_states[node] = NodeState::InCycle { scc_index }; + WalkReturn::Complete { scc_index } + } else { + // We are not the head of the cycle. Return back to our + // caller. They will take ownership of the + // `self.successors` data that we pushed. + self.node_states[node] = NodeState::InCycleWith { parent: frame.min_cycle_root }; + WalkReturn::Cycle { min_depth: frame.min_depth } + }); + } + + // Keep the allocation we used for successors_stack. + self.successors_stack = successors_stack; + debug_assert_eq!(self.successors_stack.len(), 0); + + return_value.unwrap() + } +} diff --git a/compiler/rustc_data_structures/src/graph/scc/tests.rs b/compiler/rustc_data_structures/src/graph/scc/tests.rs new file mode 100644 index 000000000..9940fee60 --- /dev/null +++ b/compiler/rustc_data_structures/src/graph/scc/tests.rs @@ -0,0 +1,216 @@ +extern crate test; + +use super::*; +use crate::graph::tests::TestGraph; + +#[test] +fn diamond() { + let graph = TestGraph::new(0, &[(0, 1), (0, 2), (1, 3), (2, 3)]); + let sccs: Sccs<_, usize> = Sccs::new(&graph); + assert_eq!(sccs.num_sccs(), 4); + assert_eq!(sccs.num_sccs(), 4); +} + +#[test] +fn test_big_scc() { + // The order in which things will be visited is important to this + // test. + // + // We will visit: + // + // 0 -> 1 -> 2 -> 0 + // + // and at this point detect a cycle. 2 will return back to 1 which + // will visit 3. 3 will visit 2 before the cycle is complete, and + // hence it too will return a cycle. + + /* + +-> 0 + | | + | v + | 1 -> 3 + | | | + | v | + +-- 2 <--+ + */ + let graph = TestGraph::new(0, &[(0, 1), (1, 2), (1, 3), (2, 0), (3, 2)]); + let sccs: Sccs<_, usize> = Sccs::new(&graph); + assert_eq!(sccs.num_sccs(), 1); +} + +#[test] +fn test_three_sccs() { + /* + 0 + | + v + +-> 1 3 + | | | + | v | + +-- 2 <--+ + */ + let graph = TestGraph::new(0, &[(0, 1), (1, 2), (2, 1), (3, 2)]); + let sccs: Sccs<_, usize> = Sccs::new(&graph); + assert_eq!(sccs.num_sccs(), 3); + assert_eq!(sccs.scc(0), 1); + assert_eq!(sccs.scc(1), 0); + assert_eq!(sccs.scc(2), 0); + assert_eq!(sccs.scc(3), 2); + assert_eq!(sccs.successors(0), &[]); + assert_eq!(sccs.successors(1), &[0]); + assert_eq!(sccs.successors(2), &[0]); +} + +#[test] +fn test_find_state_2() { + // The order in which things will be visited is important to this + // test. It tests part of the `find_state` behavior. Here is the + // graph: + // + // + // /----+ + // 0 <--+ | + // | | | + // v | | + // +-> 1 -> 3 4 + // | | | + // | v | + // +-- 2 <----+ + + let graph = TestGraph::new(0, &[(0, 1), (0, 4), (1, 2), (1, 3), (2, 1), (3, 0), (4, 2)]); + + // For this graph, we will start in our DFS by visiting: + // + // 0 -> 1 -> 2 -> 1 + // + // and at this point detect a cycle. The state of 2 will thus be + // `InCycleWith { 1 }`. We will then visit the 1 -> 3 edge, which + // will attempt to visit 0 as well, thus going to the state + // `InCycleWith { 0 }`. Finally, node 1 will complete; the lowest + // depth of any successor was 3 which had depth 0, and thus it + // will be in the state `InCycleWith { 3 }`. + // + // When we finally traverse the `0 -> 4` edge and then visit node 2, + // the states of the nodes are: + // + // 0 BeingVisited { 0 } + // 1 InCycleWith { 3 } + // 2 InCycleWith { 1 } + // 3 InCycleWith { 0 } + // + // and hence 4 will traverse the links, finding an ultimate depth of 0. + // If will also collapse the states to the following: + // + // 0 BeingVisited { 0 } + // 1 InCycleWith { 3 } + // 2 InCycleWith { 1 } + // 3 InCycleWith { 0 } + + let sccs: Sccs<_, usize> = Sccs::new(&graph); + assert_eq!(sccs.num_sccs(), 1); + assert_eq!(sccs.scc(0), 0); + assert_eq!(sccs.scc(1), 0); + assert_eq!(sccs.scc(2), 0); + assert_eq!(sccs.scc(3), 0); + assert_eq!(sccs.scc(4), 0); + assert_eq!(sccs.successors(0), &[]); +} + +#[test] +fn test_find_state_3() { + /* + /----+ + 0 <--+ | + | | | + v | | + +-> 1 -> 3 4 5 + | | | | + | v | | + +-- 2 <----+-+ + */ + let graph = + TestGraph::new(0, &[(0, 1), (0, 4), (1, 2), (1, 3), (2, 1), (3, 0), (4, 2), (5, 2)]); + let sccs: Sccs<_, usize> = Sccs::new(&graph); + assert_eq!(sccs.num_sccs(), 2); + assert_eq!(sccs.scc(0), 0); + assert_eq!(sccs.scc(1), 0); + assert_eq!(sccs.scc(2), 0); + assert_eq!(sccs.scc(3), 0); + assert_eq!(sccs.scc(4), 0); + assert_eq!(sccs.scc(5), 1); + assert_eq!(sccs.successors(0), &[]); + assert_eq!(sccs.successors(1), &[0]); +} + +#[test] +fn test_deep_linear() { + /* + 0 + | + v + 1 + | + v + 2 + | + v + … + */ + #[cfg(not(miri))] + const NR_NODES: usize = 1 << 14; + #[cfg(miri)] + const NR_NODES: usize = 1 << 3; + let mut nodes = vec![]; + for i in 1..NR_NODES { + nodes.push((i - 1, i)); + } + let graph = TestGraph::new(0, nodes.as_slice()); + let sccs: Sccs<_, usize> = Sccs::new(&graph); + assert_eq!(sccs.num_sccs(), NR_NODES); + assert_eq!(sccs.scc(0), NR_NODES - 1); + assert_eq!(sccs.scc(NR_NODES - 1), 0); +} + +#[bench] +fn bench_sccc(b: &mut test::Bencher) { + // Like `test_three_sccs` but each state is replaced by a group of + // three or four to have some amount of test data. + /* + 0-3 + | + v + +->4-6 11-14 + | | | + | v | + +--7-10<-+ + */ + fn make_3_clique(slice: &mut [(usize, usize)], base: usize) { + slice[0] = (base + 0, base + 1); + slice[1] = (base + 1, base + 2); + slice[2] = (base + 2, base + 0); + } + // Not actually a clique but strongly connected. + fn make_4_clique(slice: &mut [(usize, usize)], base: usize) { + slice[0] = (base + 0, base + 1); + slice[1] = (base + 1, base + 2); + slice[2] = (base + 2, base + 3); + slice[3] = (base + 3, base + 0); + slice[4] = (base + 1, base + 3); + slice[5] = (base + 2, base + 1); + } + + let mut graph = [(0, 0); 6 + 3 + 6 + 3 + 4]; + make_4_clique(&mut graph[0..6], 0); + make_3_clique(&mut graph[6..9], 4); + make_4_clique(&mut graph[9..15], 7); + make_3_clique(&mut graph[15..18], 11); + graph[18] = (0, 4); + graph[19] = (5, 7); + graph[20] = (11, 10); + graph[21] = (7, 4); + let graph = TestGraph::new(0, &graph[..]); + b.iter(|| { + let sccs: Sccs<_, usize> = Sccs::new(&graph); + assert_eq!(sccs.num_sccs(), 3); + }); +} diff --git a/compiler/rustc_data_structures/src/graph/tests.rs b/compiler/rustc_data_structures/src/graph/tests.rs new file mode 100644 index 000000000..7f4ef906b --- /dev/null +++ b/compiler/rustc_data_structures/src/graph/tests.rs @@ -0,0 +1,73 @@ +use crate::fx::FxHashMap; +use std::cmp::max; +use std::iter; +use std::slice; + +use super::*; + +pub struct TestGraph { + num_nodes: usize, + start_node: usize, + successors: FxHashMap>, + predecessors: FxHashMap>, +} + +impl TestGraph { + pub fn new(start_node: usize, edges: &[(usize, usize)]) -> Self { + let mut graph = TestGraph { + num_nodes: start_node + 1, + start_node, + successors: FxHashMap::default(), + predecessors: FxHashMap::default(), + }; + for &(source, target) in edges { + graph.num_nodes = max(graph.num_nodes, source + 1); + graph.num_nodes = max(graph.num_nodes, target + 1); + graph.successors.entry(source).or_default().push(target); + graph.predecessors.entry(target).or_default().push(source); + } + for node in 0..graph.num_nodes { + graph.successors.entry(node).or_default(); + graph.predecessors.entry(node).or_default(); + } + graph + } +} + +impl DirectedGraph for TestGraph { + type Node = usize; +} + +impl WithStartNode for TestGraph { + fn start_node(&self) -> usize { + self.start_node + } +} + +impl WithNumNodes for TestGraph { + fn num_nodes(&self) -> usize { + self.num_nodes + } +} + +impl WithPredecessors for TestGraph { + fn predecessors(&self, node: usize) -> >::Iter { + self.predecessors[&node].iter().cloned() + } +} + +impl WithSuccessors for TestGraph { + fn successors(&self, node: usize) -> >::Iter { + self.successors[&node].iter().cloned() + } +} + +impl<'graph> GraphPredecessors<'graph> for TestGraph { + type Item = usize; + type Iter = iter::Cloned>; +} + +impl<'graph> GraphSuccessors<'graph> for TestGraph { + type Item = usize; + type Iter = iter::Cloned>; +} diff --git a/compiler/rustc_data_structures/src/graph/vec_graph/mod.rs b/compiler/rustc_data_structures/src/graph/vec_graph/mod.rs new file mode 100644 index 000000000..3d91bcade --- /dev/null +++ b/compiler/rustc_data_structures/src/graph/vec_graph/mod.rs @@ -0,0 +1,109 @@ +use std::cmp::Ord; + +use crate::graph::{DirectedGraph, GraphSuccessors, WithNumEdges, WithNumNodes, WithSuccessors}; +use rustc_index::vec::{Idx, IndexVec}; + +#[cfg(test)] +mod tests; + +pub struct VecGraph { + /// Maps from a given node to an index where the set of successors + /// for that node starts. The index indexes into the `edges` + /// vector. To find the range for a given node, we look up the + /// start for that node and then the start for the next node + /// (i.e., with an index 1 higher) and get the range between the + /// two. This vector always has an extra entry so that this works + /// even for the max element. + node_starts: IndexVec, + + edge_targets: Vec, +} + +impl VecGraph { + pub fn new(num_nodes: usize, mut edge_pairs: Vec<(N, N)>) -> Self { + // Sort the edges by the source -- this is important. + edge_pairs.sort(); + + let num_edges = edge_pairs.len(); + + // Store the *target* of each edge into `edge_targets`. + let edge_targets: Vec = edge_pairs.iter().map(|&(_, target)| target).collect(); + + // Create the *edge starts* array. We are iterating over over + // the (sorted) edge pairs. We maintain the invariant that the + // length of the `node_starts` array is enough to store the + // current source node -- so when we see that the source node + // for an edge is greater than the current length, we grow the + // edge-starts array by just enough. + let mut node_starts = IndexVec::with_capacity(num_edges); + for (index, &(source, _)) in edge_pairs.iter().enumerate() { + // If we have a list like `[(0, x), (2, y)]`: + // + // - Start out with `node_starts` of `[]` + // - Iterate to `(0, x)` at index 0: + // - Push one entry because `node_starts.len()` (0) is <= the source (0) + // - Leaving us with `node_starts` of `[0]` + // - Iterate to `(2, y)` at index 1: + // - Push one entry because `node_starts.len()` (1) is <= the source (2) + // - Push one entry because `node_starts.len()` (2) is <= the source (2) + // - Leaving us with `node_starts` of `[0, 1, 1]` + // - Loop terminates + while node_starts.len() <= source.index() { + node_starts.push(index); + } + } + + // Pad out the `node_starts` array so that it has `num_nodes + + // 1` entries. Continuing our example above, if `num_nodes` is + // be `3`, we would push one more index: `[0, 1, 1, 2]`. + // + // Interpretation of that vector: + // + // [0, 1, 1, 2] + // ---- range for N=2 + // ---- range for N=1 + // ---- range for N=0 + while node_starts.len() <= num_nodes { + node_starts.push(edge_targets.len()); + } + + assert_eq!(node_starts.len(), num_nodes + 1); + + Self { node_starts, edge_targets } + } + + /// Gets the successors for `source` as a slice. + pub fn successors(&self, source: N) -> &[N] { + let start_index = self.node_starts[source]; + let end_index = self.node_starts[source.plus(1)]; + &self.edge_targets[start_index..end_index] + } +} + +impl DirectedGraph for VecGraph { + type Node = N; +} + +impl WithNumNodes for VecGraph { + fn num_nodes(&self) -> usize { + self.node_starts.len() - 1 + } +} + +impl WithNumEdges for VecGraph { + fn num_edges(&self) -> usize { + self.edge_targets.len() + } +} + +impl<'graph, N: Idx> GraphSuccessors<'graph> for VecGraph { + type Item = N; + + type Iter = std::iter::Cloned>; +} + +impl WithSuccessors for VecGraph { + fn successors(&self, node: N) -> >::Iter { + self.successors(node).iter().cloned() + } +} diff --git a/compiler/rustc_data_structures/src/graph/vec_graph/tests.rs b/compiler/rustc_data_structures/src/graph/vec_graph/tests.rs new file mode 100644 index 000000000..c8f979267 --- /dev/null +++ b/compiler/rustc_data_structures/src/graph/vec_graph/tests.rs @@ -0,0 +1,42 @@ +use super::*; + +fn create_graph() -> VecGraph { + // Create a simple graph + // + // 5 + // | + // V + // 0 --> 1 --> 2 + // | + // v + // 3 --> 4 + // + // 6 + + VecGraph::new(7, vec![(0, 1), (1, 2), (1, 3), (3, 4), (5, 1)]) +} + +#[test] +fn num_nodes() { + let graph = create_graph(); + assert_eq!(graph.num_nodes(), 7); +} + +#[test] +fn successors() { + let graph = create_graph(); + assert_eq!(graph.successors(0), &[1]); + assert_eq!(graph.successors(1), &[2, 3]); + assert_eq!(graph.successors(2), &[]); + assert_eq!(graph.successors(3), &[4]); + assert_eq!(graph.successors(4), &[]); + assert_eq!(graph.successors(5), &[1]); + assert_eq!(graph.successors(6), &[]); +} + +#[test] +fn dfs() { + let graph = create_graph(); + let dfs: Vec<_> = graph.depth_first_search(0).collect(); + assert_eq!(dfs, vec![0, 1, 3, 4, 2]); +} -- cgit v1.2.3