summaryrefslogtreecommitdiffstats
path: root/third_party/rust/itertools/src/groupbylazy.rs
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/rust/itertools/src/groupbylazy.rs')
-rw-r--r--third_party/rust/itertools/src/groupbylazy.rs571
1 files changed, 571 insertions, 0 deletions
diff --git a/third_party/rust/itertools/src/groupbylazy.rs b/third_party/rust/itertools/src/groupbylazy.rs
new file mode 100644
index 0000000000..a5a321df45
--- /dev/null
+++ b/third_party/rust/itertools/src/groupbylazy.rs
@@ -0,0 +1,571 @@
+use std::cell::{Cell, RefCell};
+use alloc::vec::{self, Vec};
+
+/// A trait to unify `FnMut` for `GroupBy` with the chunk key in `IntoChunks`
+trait KeyFunction<A> {
+ type Key;
+ fn call_mut(&mut self, arg: A) -> Self::Key;
+}
+
+impl<A, K, F: ?Sized> KeyFunction<A> for F
+ where F: FnMut(A) -> K
+{
+ type Key = K;
+ #[inline]
+ fn call_mut(&mut self, arg: A) -> Self::Key {
+ (*self)(arg)
+ }
+}
+
+
+/// `ChunkIndex` acts like the grouping key function for `IntoChunks`
+#[derive(Debug)]
+struct ChunkIndex {
+ size: usize,
+ index: usize,
+ key: usize,
+}
+
+impl ChunkIndex {
+ #[inline(always)]
+ fn new(size: usize) -> Self {
+ ChunkIndex {
+ size,
+ index: 0,
+ key: 0,
+ }
+ }
+}
+
+impl<A> KeyFunction<A> for ChunkIndex {
+ type Key = usize;
+ #[inline(always)]
+ fn call_mut(&mut self, _arg: A) -> Self::Key {
+ if self.index == self.size {
+ self.key += 1;
+ self.index = 0;
+ }
+ self.index += 1;
+ self.key
+ }
+}
+
+
+struct GroupInner<K, I, F>
+ where I: Iterator
+{
+ key: F,
+ iter: I,
+ current_key: Option<K>,
+ current_elt: Option<I::Item>,
+ /// flag set if iterator is exhausted
+ done: bool,
+ /// Index of group we are currently buffering or visiting
+ top_group: usize,
+ /// Least index for which we still have elements buffered
+ oldest_buffered_group: usize,
+ /// Group index for `buffer[0]` -- the slots
+ /// bottom_group..oldest_buffered_group are unused and will be erased when
+ /// that range is large enough.
+ bottom_group: usize,
+ /// Buffered groups, from `bottom_group` (index 0) to `top_group`.
+ buffer: Vec<vec::IntoIter<I::Item>>,
+ /// index of last group iter that was dropped, usize::MAX == none
+ dropped_group: usize,
+}
+
+impl<K, I, F> GroupInner<K, I, F>
+ where I: Iterator,
+ F: for<'a> KeyFunction<&'a I::Item, Key=K>,
+ K: PartialEq,
+{
+ /// `client`: Index of group that requests next element
+ #[inline(always)]
+ fn step(&mut self, client: usize) -> Option<I::Item> {
+ /*
+ println!("client={}, bottom_group={}, oldest_buffered_group={}, top_group={}, buffers=[{}]",
+ client, self.bottom_group, self.oldest_buffered_group,
+ self.top_group,
+ self.buffer.iter().map(|elt| elt.len()).format(", "));
+ */
+ if client < self.oldest_buffered_group {
+ None
+ } else if client < self.top_group ||
+ (client == self.top_group &&
+ self.buffer.len() > self.top_group - self.bottom_group)
+ {
+ self.lookup_buffer(client)
+ } else if self.done {
+ None
+ } else if self.top_group == client {
+ self.step_current()
+ } else {
+ self.step_buffering(client)
+ }
+ }
+
+ #[inline(never)]
+ fn lookup_buffer(&mut self, client: usize) -> Option<I::Item> {
+ // if `bufidx` doesn't exist in self.buffer, it might be empty
+ let bufidx = client - self.bottom_group;
+ if client < self.oldest_buffered_group {
+ return None;
+ }
+ let elt = self.buffer.get_mut(bufidx).and_then(|queue| queue.next());
+ if elt.is_none() && client == self.oldest_buffered_group {
+ // FIXME: VecDeque is unfortunately not zero allocation when empty,
+ // so we do this job manually.
+ // `bottom_group..oldest_buffered_group` is unused, and if it's large enough, erase it.
+ self.oldest_buffered_group += 1;
+ // skip forward further empty queues too
+ while self.buffer.get(self.oldest_buffered_group - self.bottom_group)
+ .map_or(false, |buf| buf.len() == 0)
+ {
+ self.oldest_buffered_group += 1;
+ }
+
+ let nclear = self.oldest_buffered_group - self.bottom_group;
+ if nclear > 0 && nclear >= self.buffer.len() / 2 {
+ let mut i = 0;
+ self.buffer.retain(|buf| {
+ i += 1;
+ debug_assert!(buf.len() == 0 || i > nclear);
+ i > nclear
+ });
+ self.bottom_group = self.oldest_buffered_group;
+ }
+ }
+ elt
+ }
+
+ /// Take the next element from the iterator, and set the done
+ /// flag if exhausted. Must not be called after done.
+ #[inline(always)]
+ fn next_element(&mut self) -> Option<I::Item> {
+ debug_assert!(!self.done);
+ match self.iter.next() {
+ None => { self.done = true; None }
+ otherwise => otherwise,
+ }
+ }
+
+
+ #[inline(never)]
+ fn step_buffering(&mut self, client: usize) -> Option<I::Item> {
+ // requested a later group -- walk through the current group up to
+ // the requested group index, and buffer the elements (unless
+ // the group is marked as dropped).
+ // Because the `Groups` iterator is always the first to request
+ // each group index, client is the next index efter top_group.
+ debug_assert!(self.top_group + 1 == client);
+ let mut group = Vec::new();
+
+ if let Some(elt) = self.current_elt.take() {
+ if self.top_group != self.dropped_group {
+ group.push(elt);
+ }
+ }
+ let mut first_elt = None; // first element of the next group
+
+ while let Some(elt) = self.next_element() {
+ let key = self.key.call_mut(&elt);
+ match self.current_key.take() {
+ None => {}
+ Some(old_key) => if old_key != key {
+ self.current_key = Some(key);
+ first_elt = Some(elt);
+ break;
+ },
+ }
+ self.current_key = Some(key);
+ if self.top_group != self.dropped_group {
+ group.push(elt);
+ }
+ }
+
+ if self.top_group != self.dropped_group {
+ self.push_next_group(group);
+ }
+ if first_elt.is_some() {
+ self.top_group += 1;
+ debug_assert!(self.top_group == client);
+ }
+ first_elt
+ }
+
+ fn push_next_group(&mut self, group: Vec<I::Item>) {
+ // When we add a new buffered group, fill up slots between oldest_buffered_group and top_group
+ while self.top_group - self.bottom_group > self.buffer.len() {
+ if self.buffer.is_empty() {
+ self.bottom_group += 1;
+ self.oldest_buffered_group += 1;
+ } else {
+ self.buffer.push(Vec::new().into_iter());
+ }
+ }
+ self.buffer.push(group.into_iter());
+ debug_assert!(self.top_group + 1 - self.bottom_group == self.buffer.len());
+ }
+
+ /// This is the immediate case, where we use no buffering
+ #[inline]
+ fn step_current(&mut self) -> Option<I::Item> {
+ debug_assert!(!self.done);
+ if let elt @ Some(..) = self.current_elt.take() {
+ return elt;
+ }
+ match self.next_element() {
+ None => None,
+ Some(elt) => {
+ let key = self.key.call_mut(&elt);
+ match self.current_key.take() {
+ None => {}
+ Some(old_key) => if old_key != key {
+ self.current_key = Some(key);
+ self.current_elt = Some(elt);
+ self.top_group += 1;
+ return None;
+ },
+ }
+ self.current_key = Some(key);
+ Some(elt)
+ }
+ }
+ }
+
+ /// Request the just started groups' key.
+ ///
+ /// `client`: Index of group
+ ///
+ /// **Panics** if no group key is available.
+ fn group_key(&mut self, client: usize) -> K {
+ // This can only be called after we have just returned the first
+ // element of a group.
+ // Perform this by simply buffering one more element, grabbing the
+ // next key.
+ debug_assert!(!self.done);
+ debug_assert!(client == self.top_group);
+ debug_assert!(self.current_key.is_some());
+ debug_assert!(self.current_elt.is_none());
+ let old_key = self.current_key.take().unwrap();
+ if let Some(elt) = self.next_element() {
+ let key = self.key.call_mut(&elt);
+ if old_key != key {
+ self.top_group += 1;
+ }
+ self.current_key = Some(key);
+ self.current_elt = Some(elt);
+ }
+ old_key
+ }
+}
+
+impl<K, I, F> GroupInner<K, I, F>
+ where I: Iterator,
+{
+ /// Called when a group is dropped
+ fn drop_group(&mut self, client: usize) {
+ // It's only useful to track the maximal index
+ if self.dropped_group == !0 || client > self.dropped_group {
+ self.dropped_group = client;
+ }
+ }
+}
+
+/// `GroupBy` is the storage for the lazy grouping operation.
+///
+/// If the groups are consumed in their original order, or if each
+/// group is dropped without keeping it around, then `GroupBy` uses
+/// no allocations. It needs allocations only if several group iterators
+/// are alive at the same time.
+///
+/// This type implements [`IntoIterator`] (it is **not** an iterator
+/// itself), because the group iterators need to borrow from this
+/// value. It should be stored in a local variable or temporary and
+/// iterated.
+///
+/// See [`.group_by()`](crate::Itertools::group_by) for more information.
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+pub struct GroupBy<K, I, F>
+ where I: Iterator,
+{
+ inner: RefCell<GroupInner<K, I, F>>,
+ // the group iterator's current index. Keep this in the main value
+ // so that simultaneous iterators all use the same state.
+ index: Cell<usize>,
+}
+
+/// Create a new
+pub fn new<K, J, F>(iter: J, f: F) -> GroupBy<K, J::IntoIter, F>
+ where J: IntoIterator,
+ F: FnMut(&J::Item) -> K,
+{
+ GroupBy {
+ inner: RefCell::new(GroupInner {
+ key: f,
+ iter: iter.into_iter(),
+ current_key: None,
+ current_elt: None,
+ done: false,
+ top_group: 0,
+ oldest_buffered_group: 0,
+ bottom_group: 0,
+ buffer: Vec::new(),
+ dropped_group: !0,
+ }),
+ index: Cell::new(0),
+ }
+}
+
+impl<K, I, F> GroupBy<K, I, F>
+ where I: Iterator,
+{
+ /// `client`: Index of group that requests next element
+ fn step(&self, client: usize) -> Option<I::Item>
+ where F: FnMut(&I::Item) -> K,
+ K: PartialEq,
+ {
+ self.inner.borrow_mut().step(client)
+ }
+
+ /// `client`: Index of group
+ fn drop_group(&self, client: usize) {
+ self.inner.borrow_mut().drop_group(client);
+ }
+}
+
+impl<'a, K, I, F> IntoIterator for &'a GroupBy<K, I, F>
+ where I: Iterator,
+ I::Item: 'a,
+ F: FnMut(&I::Item) -> K,
+ K: PartialEq
+{
+ type Item = (K, Group<'a, K, I, F>);
+ type IntoIter = Groups<'a, K, I, F>;
+
+ fn into_iter(self) -> Self::IntoIter {
+ Groups { parent: self }
+ }
+}
+
+
+/// An iterator that yields the Group iterators.
+///
+/// Iterator element type is `(K, Group)`:
+/// the group's key `K` and the group's iterator.
+///
+/// See [`.group_by()`](crate::Itertools::group_by) for more information.
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+pub struct Groups<'a, K: 'a, I: 'a, F: 'a>
+ where I: Iterator,
+ I::Item: 'a
+{
+ parent: &'a GroupBy<K, I, F>,
+}
+
+impl<'a, K, I, F> Iterator for Groups<'a, K, I, F>
+ where I: Iterator,
+ I::Item: 'a,
+ F: FnMut(&I::Item) -> K,
+ K: PartialEq
+{
+ type Item = (K, Group<'a, K, I, F>);
+
+ #[inline]
+ fn next(&mut self) -> Option<Self::Item> {
+ let index = self.parent.index.get();
+ self.parent.index.set(index + 1);
+ let inner = &mut *self.parent.inner.borrow_mut();
+ inner.step(index).map(|elt| {
+ let key = inner.group_key(index);
+ (key, Group {
+ parent: self.parent,
+ index,
+ first: Some(elt),
+ })
+ })
+ }
+}
+
+/// An iterator for the elements in a single group.
+///
+/// Iterator element type is `I::Item`.
+pub struct Group<'a, K: 'a, I: 'a, F: 'a>
+ where I: Iterator,
+ I::Item: 'a,
+{
+ parent: &'a GroupBy<K, I, F>,
+ index: usize,
+ first: Option<I::Item>,
+}
+
+impl<'a, K, I, F> Drop for Group<'a, K, I, F>
+ where I: Iterator,
+ I::Item: 'a,
+{
+ fn drop(&mut self) {
+ self.parent.drop_group(self.index);
+ }
+}
+
+impl<'a, K, I, F> Iterator for Group<'a, K, I, F>
+ where I: Iterator,
+ I::Item: 'a,
+ F: FnMut(&I::Item) -> K,
+ K: PartialEq,
+{
+ type Item = I::Item;
+ #[inline]
+ fn next(&mut self) -> Option<Self::Item> {
+ if let elt @ Some(..) = self.first.take() {
+ return elt;
+ }
+ self.parent.step(self.index)
+ }
+}
+
+///// IntoChunks /////
+
+/// Create a new
+pub fn new_chunks<J>(iter: J, size: usize) -> IntoChunks<J::IntoIter>
+ where J: IntoIterator,
+{
+ IntoChunks {
+ inner: RefCell::new(GroupInner {
+ key: ChunkIndex::new(size),
+ iter: iter.into_iter(),
+ current_key: None,
+ current_elt: None,
+ done: false,
+ top_group: 0,
+ oldest_buffered_group: 0,
+ bottom_group: 0,
+ buffer: Vec::new(),
+ dropped_group: !0,
+ }),
+ index: Cell::new(0),
+ }
+}
+
+
+/// `ChunkLazy` is the storage for a lazy chunking operation.
+///
+/// `IntoChunks` behaves just like `GroupBy`: it is iterable, and
+/// it only buffers if several chunk iterators are alive at the same time.
+///
+/// This type implements [`IntoIterator`] (it is **not** an iterator
+/// itself), because the chunk iterators need to borrow from this
+/// value. It should be stored in a local variable or temporary and
+/// iterated.
+///
+/// Iterator element type is `Chunk`, each chunk's iterator.
+///
+/// See [`.chunks()`](crate::Itertools::chunks) for more information.
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+pub struct IntoChunks<I>
+ where I: Iterator,
+{
+ inner: RefCell<GroupInner<usize, I, ChunkIndex>>,
+ // the chunk iterator's current index. Keep this in the main value
+ // so that simultaneous iterators all use the same state.
+ index: Cell<usize>,
+}
+
+
+impl<I> IntoChunks<I>
+ where I: Iterator,
+{
+ /// `client`: Index of chunk that requests next element
+ fn step(&self, client: usize) -> Option<I::Item> {
+ self.inner.borrow_mut().step(client)
+ }
+
+ /// `client`: Index of chunk
+ fn drop_group(&self, client: usize) {
+ self.inner.borrow_mut().drop_group(client);
+ }
+}
+
+impl<'a, I> IntoIterator for &'a IntoChunks<I>
+ where I: Iterator,
+ I::Item: 'a,
+{
+ type Item = Chunk<'a, I>;
+ type IntoIter = Chunks<'a, I>;
+
+ fn into_iter(self) -> Self::IntoIter {
+ Chunks {
+ parent: self,
+ }
+ }
+}
+
+
+/// An iterator that yields the Chunk iterators.
+///
+/// Iterator element type is `Chunk`.
+///
+/// See [`.chunks()`](crate::Itertools::chunks) for more information.
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+pub struct Chunks<'a, I: 'a>
+ where I: Iterator,
+ I::Item: 'a,
+{
+ parent: &'a IntoChunks<I>,
+}
+
+impl<'a, I> Iterator for Chunks<'a, I>
+ where I: Iterator,
+ I::Item: 'a,
+{
+ type Item = Chunk<'a, I>;
+
+ #[inline]
+ fn next(&mut self) -> Option<Self::Item> {
+ let index = self.parent.index.get();
+ self.parent.index.set(index + 1);
+ let inner = &mut *self.parent.inner.borrow_mut();
+ inner.step(index).map(|elt| {
+ Chunk {
+ parent: self.parent,
+ index,
+ first: Some(elt),
+ }
+ })
+ }
+}
+
+/// An iterator for the elements in a single chunk.
+///
+/// Iterator element type is `I::Item`.
+pub struct Chunk<'a, I: 'a>
+ where I: Iterator,
+ I::Item: 'a,
+{
+ parent: &'a IntoChunks<I>,
+ index: usize,
+ first: Option<I::Item>,
+}
+
+impl<'a, I> Drop for Chunk<'a, I>
+ where I: Iterator,
+ I::Item: 'a,
+{
+ fn drop(&mut self) {
+ self.parent.drop_group(self.index);
+ }
+}
+
+impl<'a, I> Iterator for Chunk<'a, I>
+ where I: Iterator,
+ I::Item: 'a,
+{
+ type Item = I::Item;
+ #[inline]
+ fn next(&mut self) -> Option<Self::Item> {
+ if let elt @ Some(..) = self.first.take() {
+ return elt;
+ }
+ self.parent.step(self.index)
+ }
+}