/*! Contains architecture independent routines. These routines are often used as a "fallback" implementation when the more specialized architecture dependent routines are unavailable. */ pub mod memchr; pub mod packedpair; pub mod rabinkarp; #[cfg(feature = "alloc")] pub mod shiftor; pub mod twoway; /// Returns true if and only if `needle` is a prefix of `haystack`. /// /// This uses a latency optimized variant of `memcmp` internally which *might* /// make this faster for very short strings. /// /// # Inlining /// /// This routine is marked `inline(always)`. If you want to call this function /// in a way that is not always inlined, you'll need to wrap a call to it in /// another function that is marked as `inline(never)` or just `inline`. #[inline(always)] pub fn is_prefix(haystack: &[u8], needle: &[u8]) -> bool { needle.len() <= haystack.len() && is_equal(&haystack[..needle.len()], needle) } /// Returns true if and only if `needle` is a suffix of `haystack`. /// /// This uses a latency optimized variant of `memcmp` internally which *might* /// make this faster for very short strings. /// /// # Inlining /// /// This routine is marked `inline(always)`. If you want to call this function /// in a way that is not always inlined, you'll need to wrap a call to it in /// another function that is marked as `inline(never)` or just `inline`. #[inline(always)] pub fn is_suffix(haystack: &[u8], needle: &[u8]) -> bool { needle.len() <= haystack.len() && is_equal(&haystack[haystack.len() - needle.len()..], needle) } /// Compare corresponding bytes in `x` and `y` for equality. /// /// That is, this returns true if and only if `x.len() == y.len()` and /// `x[i] == y[i]` for all `0 <= i < x.len()`. /// /// # Inlining /// /// This routine is marked `inline(always)`. If you want to call this function /// in a way that is not always inlined, you'll need to wrap a call to it in /// another function that is marked as `inline(never)` or just `inline`. /// /// # Motivation /// /// Why not use slice equality instead? Well, slice equality usually results in /// a call out to the current platform's `libc` which might not be inlineable /// or have other overhead. This routine isn't guaranteed to be a win, but it /// might be in some cases. #[inline(always)] pub fn is_equal(x: &[u8], y: &[u8]) -> bool { if x.len() != y.len() { return false; } // SAFETY: Our pointers are derived directly from borrowed slices which // uphold all of our safety guarantees except for length. We account for // length with the check above. unsafe { is_equal_raw(x.as_ptr(), y.as_ptr(), x.len()) } } /// Compare `n` bytes at the given pointers for equality. /// /// This returns true if and only if `*x.add(i) == *y.add(i)` for all /// `0 <= i < n`. /// /// # Inlining /// /// This routine is marked `inline(always)`. If you want to call this function /// in a way that is not always inlined, you'll need to wrap a call to it in /// another function that is marked as `inline(never)` or just `inline`. /// /// # Motivation /// /// Why not use slice equality instead? Well, slice equality usually results in /// a call out to the current platform's `libc` which might not be inlineable /// or have other overhead. This routine isn't guaranteed to be a win, but it /// might be in some cases. /// /// # Safety /// /// * Both `x` and `y` must be valid for reads of up to `n` bytes. /// * Both `x` and `y` must point to an initialized value. /// * Both `x` and `y` must each point to an allocated object and /// must either be in bounds or at most one byte past the end of the /// allocated object. `x` and `y` do not need to point to the same allocated /// object, but they may. /// * Both `x` and `y` must be _derived from_ a pointer to their respective /// allocated objects. /// * The distance between `x` and `x+n` must not overflow `isize`. Similarly /// for `y` and `y+n`. /// * The distance being in bounds must not rely on "wrapping around" the /// address space. #[inline(always)] pub unsafe fn is_equal_raw( mut x: *const u8, mut y: *const u8, n: usize, ) -> bool { // If we don't have enough bytes to do 4-byte at a time loads, then // handle each possible length specially. Note that I used to have a // byte-at-a-time loop here and that turned out to be quite a bit slower // for the memmem/pathological/defeat-simple-vector-alphabet benchmark. if n < 4 { return match n { 0 => true, 1 => x.read() == y.read(), 2 => { x.cast::().read_unaligned() == y.cast::().read_unaligned() } // I also tried copy_nonoverlapping here and it looks like the // codegen is the same. 3 => x.cast::<[u8; 3]>().read() == y.cast::<[u8; 3]>().read(), _ => unreachable!(), }; } // When we have 4 or more bytes to compare, then proceed in chunks of 4 at // a time using unaligned loads. // // Also, why do 4 byte loads instead of, say, 8 byte loads? The reason is // that this particular version of memcmp is likely to be called with tiny // needles. That means that if we do 8 byte loads, then a higher proportion // of memcmp calls will use the slower variant above. With that said, this // is a hypothesis and is only loosely supported by benchmarks. There's // likely some improvement that could be made here. The main thing here // though is to optimize for latency, not throughput. // SAFETY: The caller is responsible for ensuring the pointers we get are // valid and readable for at least `n` bytes. We also do unaligned loads, // so there's no need to ensure we're aligned. (This is justified by this // routine being specifically for short strings.) let xend = x.add(n.wrapping_sub(4)); let yend = y.add(n.wrapping_sub(4)); while x < xend { let vx = x.cast::().read_unaligned(); let vy = y.cast::().read_unaligned(); if vx != vy { return false; } x = x.add(4); y = y.add(4); } let vx = xend.cast::().read_unaligned(); let vy = yend.cast::().read_unaligned(); vx == vy } #[cfg(test)] mod tests { use super::*; #[test] fn equals_different_lengths() { assert!(!is_equal(b"", b"a")); assert!(!is_equal(b"a", b"")); assert!(!is_equal(b"ab", b"a")); assert!(!is_equal(b"a", b"ab")); } #[test] fn equals_mismatch() { let one_mismatch = [ (&b"a"[..], &b"x"[..]), (&b"ab"[..], &b"ax"[..]), (&b"abc"[..], &b"abx"[..]), (&b"abcd"[..], &b"abcx"[..]), (&b"abcde"[..], &b"abcdx"[..]), (&b"abcdef"[..], &b"abcdex"[..]), (&b"abcdefg"[..], &b"abcdefx"[..]), (&b"abcdefgh"[..], &b"abcdefgx"[..]), (&b"abcdefghi"[..], &b"abcdefghx"[..]), (&b"abcdefghij"[..], &b"abcdefghix"[..]), (&b"abcdefghijk"[..], &b"abcdefghijx"[..]), (&b"abcdefghijkl"[..], &b"abcdefghijkx"[..]), (&b"abcdefghijklm"[..], &b"abcdefghijklx"[..]), (&b"abcdefghijklmn"[..], &b"abcdefghijklmx"[..]), ]; for (x, y) in one_mismatch { assert_eq!(x.len(), y.len(), "lengths should match"); assert!(!is_equal(x, y)); assert!(!is_equal(y, x)); } } #[test] fn equals_yes() { assert!(is_equal(b"", b"")); assert!(is_equal(b"a", b"a")); assert!(is_equal(b"ab", b"ab")); assert!(is_equal(b"abc", b"abc")); assert!(is_equal(b"abcd", b"abcd")); assert!(is_equal(b"abcde", b"abcde")); assert!(is_equal(b"abcdef", b"abcdef")); assert!(is_equal(b"abcdefg", b"abcdefg")); assert!(is_equal(b"abcdefgh", b"abcdefgh")); assert!(is_equal(b"abcdefghi", b"abcdefghi")); } #[test] fn prefix() { assert!(is_prefix(b"", b"")); assert!(is_prefix(b"a", b"")); assert!(is_prefix(b"ab", b"")); assert!(is_prefix(b"foo", b"foo")); assert!(is_prefix(b"foobar", b"foo")); assert!(!is_prefix(b"foo", b"fob")); assert!(!is_prefix(b"foobar", b"fob")); } #[test] fn suffix() { assert!(is_suffix(b"", b"")); assert!(is_suffix(b"a", b"")); assert!(is_suffix(b"ab", b"")); assert!(is_suffix(b"foo", b"foo")); assert!(is_suffix(b"foobar", b"bar")); assert!(!is_suffix(b"foo", b"goo")); assert!(!is_suffix(b"foobar", b"gar")); } }