use std::collections::HashMap; use std::hash::Hash; use super::DiffableStrRef; // quick and dirty way to get an upper sequence ratio. pub fn upper_seq_ratio(seq1: &[T], seq2: &[T]) -> f32 { let n = seq1.len() + seq2.len(); if n == 0 { 1.0 } else { 2.0 * seq1.len().min(seq2.len()) as f32 / n as f32 } } /// Internal utility to calculate an upper bound for a ratio for /// [`get_close_matches`]. This is based on Python's difflib approach /// of considering the two sets to be multisets. /// /// It counts the number of matches without regard to order, which is an /// obvious upper bound. pub struct QuickSeqRatio<'a, T: DiffableStrRef + ?Sized>(HashMap<&'a T, i32>); impl<'a, T: DiffableStrRef + Hash + Eq + ?Sized> QuickSeqRatio<'a, T> { pub fn new(seq: &[&'a T]) -> QuickSeqRatio<'a, T> { let mut counts = HashMap::new(); for &word in seq { *counts.entry(word).or_insert(0) += 1; } QuickSeqRatio(counts) } pub fn calc(&self, seq: &[&T]) -> f32 { let n = self.0.len() + seq.len(); if n == 0 { return 1.0; } let mut available = HashMap::new(); let mut matches = 0; for &word in seq { let x = if let Some(count) = available.get(&word) { *count } else { self.0.get(&word).copied().unwrap_or(0) }; available.insert(word, x - 1); if x > 0 { matches += 1; } } 2.0 * matches as f32 / n as f32 } }