/*! An implementation of the [Shift-Or substring search algorithm][shiftor]. [shiftor]: https://en.wikipedia.org/wiki/Bitap_algorithm */ use alloc::boxed::Box; /// The type of our mask. /// /// While we don't expose anyway to configure this in the public API, if one /// really needs less memory usage or support for longer needles, then it is /// suggested to copy the code from this module and modify it to fit your /// needs. The code below is written to be correct regardless of whether Mask /// is a u8, u16, u32, u64 or u128. type Mask = u16; /// A forward substring searcher using the Shift-Or algorithm. #[derive(Debug)] pub struct Finder { masks: Box<[Mask; 256]>, needle_len: usize, } impl Finder { const MAX_NEEDLE_LEN: usize = (Mask::BITS - 1) as usize; /// Create a new Shift-Or forward searcher for the given `needle`. /// /// The needle may be empty. The empty needle matches at every byte offset. #[inline] pub fn new(needle: &[u8]) -> Option { let needle_len = needle.len(); if needle_len > Finder::MAX_NEEDLE_LEN { // A match is found when bit 7 is set in 'result' in the search // routine below. So our needle can't be bigger than 7. We could // permit bigger needles by using u16, u32 or u64 for our mask // entries. But this is all we need for this example. return None; } let mut searcher = Finder { masks: Box::from([!0; 256]), needle_len }; for (i, &byte) in needle.iter().enumerate() { searcher.masks[usize::from(byte)] &= !(1 << i); } Some(searcher) } /// Return the first occurrence of the needle given to `Finder::new` in /// the `haystack` given. If no such occurrence exists, then `None` is /// returned. /// /// Unlike most other substring search implementations in this crate, this /// finder does not require passing the needle at search time. A match can /// be determined without the needle at all since the required information /// is already encoded into this finder at construction time. /// /// The maximum value this can return is `haystack.len()`, which can only /// occur when the needle and haystack both have length zero. Otherwise, /// for non-empty haystacks, the maximum value is `haystack.len() - 1`. #[inline] pub fn find(&self, haystack: &[u8]) -> Option { if self.needle_len == 0 { return Some(0); } let mut result = !1; for (i, &byte) in haystack.iter().enumerate() { result |= self.masks[usize::from(byte)]; result <<= 1; if result & (1 << self.needle_len) == 0 { return Some(i + 1 - self.needle_len); } } None } } #[cfg(test)] mod tests { use super::*; define_substring_forward_quickcheck!(|h, n| Some(Finder::new(n)?.find(h))); #[test] fn forward() { crate::tests::substring::Runner::new() .fwd(|h, n| Some(Finder::new(n)?.find(h))) .run(); } }