use std::hash::Hash; mod private { use std::collections::HashMap; use std::hash::Hash; use std::fmt; #[derive(Clone)] #[must_use = "iterator adaptors are lazy and do nothing unless consumed"] pub struct DuplicatesBy { pub(crate) iter: I, pub(crate) meta: Meta, } impl fmt::Debug for DuplicatesBy where I: Iterator + fmt::Debug, V: fmt::Debug + Hash + Eq, { debug_fmt_fields!(DuplicatesBy, iter, meta.used); } impl DuplicatesBy { pub(crate) fn new(iter: I, key_method: F) -> Self { DuplicatesBy { iter, meta: Meta { used: HashMap::new(), pending: 0, key_method, }, } } } #[derive(Clone)] pub struct Meta { used: HashMap, pending: usize, key_method: F, } impl Meta where Key: Eq + Hash, { /// Takes an item and returns it back to the caller if it's the second time we see it. /// Otherwise the item is consumed and None is returned #[inline(always)] fn filter(&mut self, item: I) -> Option where F: KeyMethod, { let kv = self.key_method.make(item); match self.used.get_mut(kv.key_ref()) { None => { self.used.insert(kv.key(), false); self.pending += 1; None } Some(true) => None, Some(produced) => { *produced = true; self.pending -= 1; Some(kv.value()) } } } } impl Iterator for DuplicatesBy where I: Iterator, Key: Eq + Hash, F: KeyMethod, { type Item = I::Item; fn next(&mut self) -> Option { let DuplicatesBy { iter, meta } = self; iter.find_map(|v| meta.filter(v)) } #[inline] fn size_hint(&self) -> (usize, Option) { let (_, hi) = self.iter.size_hint(); let hi = hi.map(|hi| { if hi <= self.meta.pending { // fewer or equally many iter-remaining elements than pending elements // => at most, each iter-remaining element is matched hi } else { // fewer pending elements than iter-remaining elements // => at most: // * each pending element is matched // * the other iter-remaining elements come in pairs self.meta.pending + (hi - self.meta.pending) / 2 } }); // The lower bound is always 0 since we might only get unique items from now on (0, hi) } } impl DoubleEndedIterator for DuplicatesBy where I: DoubleEndedIterator, Key: Eq + Hash, F: KeyMethod, { fn next_back(&mut self) -> Option { let DuplicatesBy { iter, meta } = self; iter.rev().find_map(|v| meta.filter(v)) } } /// A keying method for use with `DuplicatesBy` pub trait KeyMethod { type Container: KeyXorValue; fn make(&mut self, value: V) -> Self::Container; } /// Apply the identity function to elements before checking them for equality. #[derive(Debug)] pub struct ById; impl KeyMethod for ById { type Container = JustValue; fn make(&mut self, v: V) -> Self::Container { JustValue(v) } } /// Apply a user-supplied function to elements before checking them for equality. pub struct ByFn(pub(crate) F); impl fmt::Debug for ByFn { debug_fmt_fields!(ByFn,); } impl KeyMethod for ByFn where F: FnMut(&V) -> K, { type Container = KeyValue; fn make(&mut self, v: V) -> Self::Container { KeyValue((self.0)(&v), v) } } // Implementors of this trait can hold onto a key and a value but only give access to one of them // at a time. This allows the key and the value to be the same value internally pub trait KeyXorValue { fn key_ref(&self) -> &K; fn key(self) -> K; fn value(self) -> V; } #[derive(Debug)] pub struct KeyValue(K, V); impl KeyXorValue for KeyValue { fn key_ref(&self) -> &K { &self.0 } fn key(self) -> K { self.0 } fn value(self) -> V { self.1 } } #[derive(Debug)] pub struct JustValue(V); impl KeyXorValue for JustValue { fn key_ref(&self) -> &V { &self.0 } fn key(self) -> V { self.0 } fn value(self) -> V { self.0 } } } /// An iterator adapter to filter for duplicate elements. /// /// See [`.duplicates_by()`](crate::Itertools::duplicates_by) for more information. #[must_use = "iterator adaptors are lazy and do nothing unless consumed"] pub type DuplicatesBy = private::DuplicatesBy>; /// Create a new `DuplicatesBy` iterator. pub fn duplicates_by(iter: I, f: F) -> DuplicatesBy where Key: Eq + Hash, F: FnMut(&I::Item) -> Key, I: Iterator, { DuplicatesBy::new(iter, private::ByFn(f)) } /// An iterator adapter to filter out duplicate elements. /// /// See [`.duplicates()`](crate::Itertools::duplicates) for more information. pub type Duplicates = private::DuplicatesBy::Item, private::ById>; /// Create a new `Duplicates` iterator. pub fn duplicates(iter: I) -> Duplicates where I: Iterator, I::Item: Eq + Hash, { Duplicates::new(iter, private::ById) }