diff options
Diffstat (limited to 'vendor/bstr/src/byteset')
-rw-r--r-- | vendor/bstr/src/byteset/mod.rs | 114 | ||||
-rw-r--r-- | vendor/bstr/src/byteset/scalar.rs | 295 |
2 files changed, 0 insertions, 409 deletions
diff --git a/vendor/bstr/src/byteset/mod.rs b/vendor/bstr/src/byteset/mod.rs deleted file mode 100644 index 043d30986..000000000 --- a/vendor/bstr/src/byteset/mod.rs +++ /dev/null @@ -1,114 +0,0 @@ -use memchr::{memchr, memchr2, memchr3, memrchr, memrchr2, memrchr3}; -mod scalar; - -#[inline] -fn build_table(byteset: &[u8]) -> [u8; 256] { - let mut table = [0u8; 256]; - for &b in byteset { - table[b as usize] = 1; - } - table -} - -#[inline] -pub(crate) fn find(haystack: &[u8], byteset: &[u8]) -> Option<usize> { - match byteset.len() { - 0 => return None, - 1 => memchr(byteset[0], haystack), - 2 => memchr2(byteset[0], byteset[1], haystack), - 3 => memchr3(byteset[0], byteset[1], byteset[2], haystack), - _ => { - let table = build_table(byteset); - scalar::forward_search_bytes(haystack, |b| table[b as usize] != 0) - } - } -} - -#[inline] -pub(crate) fn rfind(haystack: &[u8], byteset: &[u8]) -> Option<usize> { - match byteset.len() { - 0 => return None, - 1 => memrchr(byteset[0], haystack), - 2 => memrchr2(byteset[0], byteset[1], haystack), - 3 => memrchr3(byteset[0], byteset[1], byteset[2], haystack), - _ => { - let table = build_table(byteset); - scalar::reverse_search_bytes(haystack, |b| table[b as usize] != 0) - } - } -} - -#[inline] -pub(crate) fn find_not(haystack: &[u8], byteset: &[u8]) -> Option<usize> { - if haystack.is_empty() { - return None; - } - match byteset.len() { - 0 => return Some(0), - 1 => scalar::inv_memchr(byteset[0], haystack), - 2 => scalar::forward_search_bytes(haystack, |b| { - b != byteset[0] && b != byteset[1] - }), - 3 => scalar::forward_search_bytes(haystack, |b| { - b != byteset[0] && b != byteset[1] && b != byteset[2] - }), - _ => { - let table = build_table(byteset); - scalar::forward_search_bytes(haystack, |b| table[b as usize] == 0) - } - } -} -#[inline] -pub(crate) fn rfind_not(haystack: &[u8], byteset: &[u8]) -> Option<usize> { - if haystack.is_empty() { - return None; - } - match byteset.len() { - 0 => return Some(haystack.len() - 1), - 1 => scalar::inv_memrchr(byteset[0], haystack), - 2 => scalar::reverse_search_bytes(haystack, |b| { - b != byteset[0] && b != byteset[1] - }), - 3 => scalar::reverse_search_bytes(haystack, |b| { - b != byteset[0] && b != byteset[1] && b != byteset[2] - }), - _ => { - let table = build_table(byteset); - scalar::reverse_search_bytes(haystack, |b| table[b as usize] == 0) - } - } -} - -#[cfg(test)] -mod tests { - quickcheck::quickcheck! { - fn qc_byteset_forward_matches_naive( - haystack: Vec<u8>, - needles: Vec<u8> - ) -> bool { - super::find(&haystack, &needles) - == haystack.iter().position(|b| needles.contains(b)) - } - fn qc_byteset_backwards_matches_naive( - haystack: Vec<u8>, - needles: Vec<u8> - ) -> bool { - super::rfind(&haystack, &needles) - == haystack.iter().rposition(|b| needles.contains(b)) - } - fn qc_byteset_forward_not_matches_naive( - haystack: Vec<u8>, - needles: Vec<u8> - ) -> bool { - super::find_not(&haystack, &needles) - == haystack.iter().position(|b| !needles.contains(b)) - } - fn qc_byteset_backwards_not_matches_naive( - haystack: Vec<u8>, - needles: Vec<u8> - ) -> bool { - super::rfind_not(&haystack, &needles) - == haystack.iter().rposition(|b| !needles.contains(b)) - } - } -} diff --git a/vendor/bstr/src/byteset/scalar.rs b/vendor/bstr/src/byteset/scalar.rs deleted file mode 100644 index 9bd34a8f9..000000000 --- a/vendor/bstr/src/byteset/scalar.rs +++ /dev/null @@ -1,295 +0,0 @@ -// This is adapted from `fallback.rs` from rust-memchr. It's modified to return -// the 'inverse' query of memchr, e.g. finding the first byte not in the provided -// set. This is simple for the 1-byte case. - -use core::cmp; -use core::usize; - -#[cfg(target_pointer_width = "32")] -const USIZE_BYTES: usize = 4; - -#[cfg(target_pointer_width = "64")] -const USIZE_BYTES: usize = 8; - -// The number of bytes to loop at in one iteration of memchr/memrchr. -const LOOP_SIZE: usize = 2 * USIZE_BYTES; - -/// Repeat the given byte into a word size number. That is, every 8 bits -/// is equivalent to the given byte. For example, if `b` is `\x4E` or -/// `01001110` in binary, then the returned value on a 32-bit system would be: -/// `01001110_01001110_01001110_01001110`. -#[inline(always)] -fn repeat_byte(b: u8) -> usize { - (b as usize) * (usize::MAX / 255) -} - -pub fn inv_memchr(n1: u8, haystack: &[u8]) -> Option<usize> { - let vn1 = repeat_byte(n1); - let confirm = |byte| byte != n1; - let loop_size = cmp::min(LOOP_SIZE, haystack.len()); - let align = USIZE_BYTES - 1; - let start_ptr = haystack.as_ptr(); - let end_ptr = haystack[haystack.len()..].as_ptr(); - let mut ptr = start_ptr; - - unsafe { - if haystack.len() < USIZE_BYTES { - return forward_search(start_ptr, end_ptr, ptr, confirm); - } - - let chunk = read_unaligned_usize(ptr); - if (chunk ^ vn1) != 0 { - return forward_search(start_ptr, end_ptr, ptr, confirm); - } - - ptr = ptr.add(USIZE_BYTES - (start_ptr as usize & align)); - debug_assert!(ptr > start_ptr); - debug_assert!(end_ptr.sub(USIZE_BYTES) >= start_ptr); - while loop_size == LOOP_SIZE && ptr <= end_ptr.sub(loop_size) { - debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES); - - let a = *(ptr as *const usize); - let b = *(ptr.add(USIZE_BYTES) as *const usize); - let eqa = (a ^ vn1) != 0; - let eqb = (b ^ vn1) != 0; - if eqa || eqb { - break; - } - ptr = ptr.add(LOOP_SIZE); - } - forward_search(start_ptr, end_ptr, ptr, confirm) - } -} - -/// Return the last index not matching the byte `x` in `text`. -pub fn inv_memrchr(n1: u8, haystack: &[u8]) -> Option<usize> { - let vn1 = repeat_byte(n1); - let confirm = |byte| byte != n1; - let loop_size = cmp::min(LOOP_SIZE, haystack.len()); - let align = USIZE_BYTES - 1; - let start_ptr = haystack.as_ptr(); - let end_ptr = haystack[haystack.len()..].as_ptr(); - let mut ptr = end_ptr; - - unsafe { - if haystack.len() < USIZE_BYTES { - return reverse_search(start_ptr, end_ptr, ptr, confirm); - } - - let chunk = read_unaligned_usize(ptr.sub(USIZE_BYTES)); - if (chunk ^ vn1) != 0 { - return reverse_search(start_ptr, end_ptr, ptr, confirm); - } - - ptr = (end_ptr as usize & !align) as *const u8; - debug_assert!(start_ptr <= ptr && ptr <= end_ptr); - while loop_size == LOOP_SIZE && ptr >= start_ptr.add(loop_size) { - debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES); - - let a = *(ptr.sub(2 * USIZE_BYTES) as *const usize); - let b = *(ptr.sub(1 * USIZE_BYTES) as *const usize); - let eqa = (a ^ vn1) != 0; - let eqb = (b ^ vn1) != 0; - if eqa || eqb { - break; - } - ptr = ptr.sub(loop_size); - } - reverse_search(start_ptr, end_ptr, ptr, confirm) - } -} - -#[inline(always)] -unsafe fn forward_search<F: Fn(u8) -> bool>( - start_ptr: *const u8, - end_ptr: *const u8, - mut ptr: *const u8, - confirm: F, -) -> Option<usize> { - debug_assert!(start_ptr <= ptr); - debug_assert!(ptr <= end_ptr); - - while ptr < end_ptr { - if confirm(*ptr) { - return Some(sub(ptr, start_ptr)); - } - ptr = ptr.offset(1); - } - None -} - -#[inline(always)] -unsafe fn reverse_search<F: Fn(u8) -> bool>( - start_ptr: *const u8, - end_ptr: *const u8, - mut ptr: *const u8, - confirm: F, -) -> Option<usize> { - debug_assert!(start_ptr <= ptr); - debug_assert!(ptr <= end_ptr); - - while ptr > start_ptr { - ptr = ptr.offset(-1); - if confirm(*ptr) { - return Some(sub(ptr, start_ptr)); - } - } - None -} - -unsafe fn read_unaligned_usize(ptr: *const u8) -> usize { - (ptr as *const usize).read_unaligned() -} - -/// Subtract `b` from `a` and return the difference. `a` should be greater than -/// or equal to `b`. -fn sub(a: *const u8, b: *const u8) -> usize { - debug_assert!(a >= b); - (a as usize) - (b as usize) -} - -/// Safe wrapper around `forward_search` -#[inline] -pub(crate) fn forward_search_bytes<F: Fn(u8) -> bool>( - s: &[u8], - confirm: F, -) -> Option<usize> { - unsafe { - let start = s.as_ptr(); - let end = start.add(s.len()); - forward_search(start, end, start, confirm) - } -} - -/// Safe wrapper around `reverse_search` -#[inline] -pub(crate) fn reverse_search_bytes<F: Fn(u8) -> bool>( - s: &[u8], - confirm: F, -) -> Option<usize> { - unsafe { - let start = s.as_ptr(); - let end = start.add(s.len()); - reverse_search(start, end, end, confirm) - } -} - -#[cfg(test)] -mod tests { - use super::{inv_memchr, inv_memrchr}; - // search string, search byte, inv_memchr result, inv_memrchr result. - // these are expanded into a much larger set of tests in build_tests - const TESTS: &[(&[u8], u8, usize, usize)] = &[ - (b"z", b'a', 0, 0), - (b"zz", b'a', 0, 1), - (b"aza", b'a', 1, 1), - (b"zaz", b'a', 0, 2), - (b"zza", b'a', 0, 1), - (b"zaa", b'a', 0, 0), - (b"zzz", b'a', 0, 2), - ]; - - type TestCase = (Vec<u8>, u8, Option<(usize, usize)>); - - fn build_tests() -> Vec<TestCase> { - let mut result = vec![]; - for &(search, byte, fwd_pos, rev_pos) in TESTS { - result.push((search.to_vec(), byte, Some((fwd_pos, rev_pos)))); - for i in 1..515 { - // add a bunch of copies of the search byte to the end. - let mut suffixed: Vec<u8> = search.into(); - suffixed.extend(std::iter::repeat(byte).take(i)); - result.push((suffixed, byte, Some((fwd_pos, rev_pos)))); - - // add a bunch of copies of the search byte to the start. - let mut prefixed: Vec<u8> = - std::iter::repeat(byte).take(i).collect(); - prefixed.extend(search); - result.push(( - prefixed, - byte, - Some((fwd_pos + i, rev_pos + i)), - )); - - // add a bunch of copies of the search byte to both ends. - let mut surrounded: Vec<u8> = - std::iter::repeat(byte).take(i).collect(); - surrounded.extend(search); - surrounded.extend(std::iter::repeat(byte).take(i)); - result.push(( - surrounded, - byte, - Some((fwd_pos + i, rev_pos + i)), - )); - } - } - - // build non-matching tests for several sizes - for i in 0..515 { - result.push(( - std::iter::repeat(b'\0').take(i).collect(), - b'\0', - None, - )); - } - - result - } - - #[test] - fn test_inv_memchr() { - use crate::{ByteSlice, B}; - for (search, byte, matching) in build_tests() { - assert_eq!( - inv_memchr(byte, &search), - matching.map(|m| m.0), - "inv_memchr when searching for {:?} in {:?}", - byte as char, - // better printing - B(&search).as_bstr(), - ); - assert_eq!( - inv_memrchr(byte, &search), - matching.map(|m| m.1), - "inv_memrchr when searching for {:?} in {:?}", - byte as char, - // better printing - B(&search).as_bstr(), - ); - // Test a rather large number off offsets for potential alignment issues - for offset in 1..130 { - if offset >= search.len() { - break; - } - // If this would cause us to shift the results off the end, skip - // it so that we don't have to recompute them. - if let Some((f, r)) = matching { - if offset > f || offset > r { - break; - } - } - let realigned = &search[offset..]; - - let forward_pos = matching.map(|m| m.0 - offset); - let reverse_pos = matching.map(|m| m.1 - offset); - - assert_eq!( - inv_memchr(byte, &realigned), - forward_pos, - "inv_memchr when searching (realigned by {}) for {:?} in {:?}", - offset, - byte as char, - realigned.as_bstr(), - ); - assert_eq!( - inv_memrchr(byte, &realigned), - reverse_pos, - "inv_memrchr when searching (realigned by {}) for {:?} in {:?}", - offset, - byte as char, - realigned.as_bstr(), - ); - } - } - } -} |