diff options
Diffstat (limited to 'vendor/bstr/src/ascii.rs')
-rw-r--r-- | vendor/bstr/src/ascii.rs | 336 |
1 files changed, 0 insertions, 336 deletions
diff --git a/vendor/bstr/src/ascii.rs b/vendor/bstr/src/ascii.rs deleted file mode 100644 index bb2b67949..000000000 --- a/vendor/bstr/src/ascii.rs +++ /dev/null @@ -1,336 +0,0 @@ -use core::mem; - -// The following ~400 lines of code exists for exactly one purpose, which is -// to optimize this code: -// -// byte_slice.iter().position(|&b| b > 0x7F).unwrap_or(byte_slice.len()) -// -// Yes... Overengineered is a word that comes to mind, but this is effectively -// a very similar problem to memchr, and virtually nobody has been able to -// resist optimizing the crap out of that (except for perhaps the BSD and MUSL -// folks). In particular, this routine makes a very common case (ASCII) very -// fast, which seems worth it. We do stop short of adding AVX variants of the -// code below in order to retain our sanity and also to avoid needing to deal -// with runtime target feature detection. RESIST! -// -// In order to understand the SIMD version below, it would be good to read this -// comment describing how my memchr routine works: -// https://github.com/BurntSushi/rust-memchr/blob/b0a29f267f4a7fad8ffcc8fe8377a06498202883/src/x86/sse2.rs#L19-L106 -// -// The primary difference with memchr is that for ASCII, we can do a bit less -// work. In particular, we don't need to detect the presence of a specific -// byte, but rather, whether any byte has its most significant bit set. That -// means we can effectively skip the _mm_cmpeq_epi8 step and jump straight to -// _mm_movemask_epi8. - -#[cfg(any(test, not(target_arch = "x86_64")))] -const USIZE_BYTES: usize = mem::size_of::<usize>(); -#[cfg(any(test, not(target_arch = "x86_64")))] -const FALLBACK_LOOP_SIZE: usize = 2 * USIZE_BYTES; - -// This is a mask where the most significant bit of each byte in the usize -// is set. We test this bit to determine whether a character is ASCII or not. -// Namely, a single byte is regarded as an ASCII codepoint if and only if it's -// most significant bit is not set. -#[cfg(any(test, not(target_arch = "x86_64")))] -const ASCII_MASK_U64: u64 = 0x8080808080808080; -#[cfg(any(test, not(target_arch = "x86_64")))] -const ASCII_MASK: usize = ASCII_MASK_U64 as usize; - -/// Returns the index of the first non ASCII byte in the given slice. -/// -/// If slice only contains ASCII bytes, then the length of the slice is -/// returned. -pub fn first_non_ascii_byte(slice: &[u8]) -> usize { - #[cfg(not(target_arch = "x86_64"))] - { - first_non_ascii_byte_fallback(slice) - } - - #[cfg(target_arch = "x86_64")] - { - first_non_ascii_byte_sse2(slice) - } -} - -#[cfg(any(test, not(target_arch = "x86_64")))] -fn first_non_ascii_byte_fallback(slice: &[u8]) -> usize { - let align = USIZE_BYTES - 1; - let start_ptr = slice.as_ptr(); - let end_ptr = slice[slice.len()..].as_ptr(); - let mut ptr = start_ptr; - - unsafe { - if slice.len() < USIZE_BYTES { - return first_non_ascii_byte_slow(start_ptr, end_ptr, ptr); - } - - let chunk = read_unaligned_usize(ptr); - let mask = chunk & ASCII_MASK; - if mask != 0 { - return first_non_ascii_byte_mask(mask); - } - - ptr = ptr_add(ptr, USIZE_BYTES - (start_ptr as usize & align)); - debug_assert!(ptr > start_ptr); - debug_assert!(ptr_sub(end_ptr, USIZE_BYTES) >= start_ptr); - if slice.len() >= FALLBACK_LOOP_SIZE { - while ptr <= ptr_sub(end_ptr, FALLBACK_LOOP_SIZE) { - debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES); - - let a = *(ptr as *const usize); - let b = *(ptr_add(ptr, USIZE_BYTES) as *const usize); - if (a | b) & ASCII_MASK != 0 { - // What a kludge. We wrap the position finding code into - // a non-inlineable function, which makes the codegen in - // the tight loop above a bit better by avoiding a - // couple extra movs. We pay for it by two additional - // stores, but only in the case of finding a non-ASCII - // byte. - #[inline(never)] - unsafe fn findpos( - start_ptr: *const u8, - ptr: *const u8, - ) -> usize { - let a = *(ptr as *const usize); - let b = *(ptr_add(ptr, USIZE_BYTES) as *const usize); - - let mut at = sub(ptr, start_ptr); - let maska = a & ASCII_MASK; - if maska != 0 { - return at + first_non_ascii_byte_mask(maska); - } - - at += USIZE_BYTES; - let maskb = b & ASCII_MASK; - debug_assert!(maskb != 0); - return at + first_non_ascii_byte_mask(maskb); - } - return findpos(start_ptr, ptr); - } - ptr = ptr_add(ptr, FALLBACK_LOOP_SIZE); - } - } - first_non_ascii_byte_slow(start_ptr, end_ptr, ptr) - } -} - -#[cfg(target_arch = "x86_64")] -fn first_non_ascii_byte_sse2(slice: &[u8]) -> usize { - use core::arch::x86_64::*; - - const VECTOR_SIZE: usize = mem::size_of::<__m128i>(); - const VECTOR_ALIGN: usize = VECTOR_SIZE - 1; - const VECTOR_LOOP_SIZE: usize = 4 * VECTOR_SIZE; - - let start_ptr = slice.as_ptr(); - let end_ptr = slice[slice.len()..].as_ptr(); - let mut ptr = start_ptr; - - unsafe { - if slice.len() < VECTOR_SIZE { - return first_non_ascii_byte_slow(start_ptr, end_ptr, ptr); - } - - let chunk = _mm_loadu_si128(ptr as *const __m128i); - let mask = _mm_movemask_epi8(chunk); - if mask != 0 { - return mask.trailing_zeros() as usize; - } - - ptr = ptr.add(VECTOR_SIZE - (start_ptr as usize & VECTOR_ALIGN)); - debug_assert!(ptr > start_ptr); - debug_assert!(end_ptr.sub(VECTOR_SIZE) >= start_ptr); - if slice.len() >= VECTOR_LOOP_SIZE { - while ptr <= ptr_sub(end_ptr, VECTOR_LOOP_SIZE) { - debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE); - - let a = _mm_load_si128(ptr as *const __m128i); - let b = _mm_load_si128(ptr.add(VECTOR_SIZE) as *const __m128i); - let c = - _mm_load_si128(ptr.add(2 * VECTOR_SIZE) as *const __m128i); - let d = - _mm_load_si128(ptr.add(3 * VECTOR_SIZE) as *const __m128i); - - let or1 = _mm_or_si128(a, b); - let or2 = _mm_or_si128(c, d); - let or3 = _mm_or_si128(or1, or2); - if _mm_movemask_epi8(or3) != 0 { - let mut at = sub(ptr, start_ptr); - let mask = _mm_movemask_epi8(a); - if mask != 0 { - return at + mask.trailing_zeros() as usize; - } - - at += VECTOR_SIZE; - let mask = _mm_movemask_epi8(b); - if mask != 0 { - return at + mask.trailing_zeros() as usize; - } - - at += VECTOR_SIZE; - let mask = _mm_movemask_epi8(c); - if mask != 0 { - return at + mask.trailing_zeros() as usize; - } - - at += VECTOR_SIZE; - let mask = _mm_movemask_epi8(d); - debug_assert!(mask != 0); - return at + mask.trailing_zeros() as usize; - } - ptr = ptr_add(ptr, VECTOR_LOOP_SIZE); - } - } - while ptr <= end_ptr.sub(VECTOR_SIZE) { - debug_assert!(sub(end_ptr, ptr) >= VECTOR_SIZE); - - let chunk = _mm_loadu_si128(ptr as *const __m128i); - let mask = _mm_movemask_epi8(chunk); - if mask != 0 { - return sub(ptr, start_ptr) + mask.trailing_zeros() as usize; - } - ptr = ptr.add(VECTOR_SIZE); - } - first_non_ascii_byte_slow(start_ptr, end_ptr, ptr) - } -} - -#[inline(always)] -unsafe fn first_non_ascii_byte_slow( - start_ptr: *const u8, - end_ptr: *const u8, - mut ptr: *const u8, -) -> usize { - debug_assert!(start_ptr <= ptr); - debug_assert!(ptr <= end_ptr); - - while ptr < end_ptr { - if *ptr > 0x7F { - return sub(ptr, start_ptr); - } - ptr = ptr.offset(1); - } - sub(end_ptr, start_ptr) -} - -/// Compute the position of the first ASCII byte in the given mask. -/// -/// The mask should be computed by `chunk & ASCII_MASK`, where `chunk` is -/// 8 contiguous bytes of the slice being checked where *at least* one of those -/// bytes is not an ASCII byte. -/// -/// The position returned is always in the inclusive range [0, 7]. -#[cfg(any(test, not(target_arch = "x86_64")))] -fn first_non_ascii_byte_mask(mask: usize) -> usize { - #[cfg(target_endian = "little")] - { - mask.trailing_zeros() as usize / 8 - } - #[cfg(target_endian = "big")] - { - mask.leading_zeros() as usize / 8 - } -} - -/// Increment the given pointer by the given amount. -unsafe fn ptr_add(ptr: *const u8, amt: usize) -> *const u8 { - debug_assert!(amt < ::core::isize::MAX as usize); - ptr.offset(amt as isize) -} - -/// Decrement the given pointer by the given amount. -unsafe fn ptr_sub(ptr: *const u8, amt: usize) -> *const u8 { - debug_assert!(amt < ::core::isize::MAX as usize); - ptr.offset((amt as isize).wrapping_neg()) -} - -#[cfg(any(test, not(target_arch = "x86_64")))] -unsafe fn read_unaligned_usize(ptr: *const u8) -> usize { - use core::ptr; - - let mut n: usize = 0; - ptr::copy_nonoverlapping(ptr, &mut n as *mut _ as *mut u8, USIZE_BYTES); - n -} - -/// Subtract `b` from `a` and return the difference. `a` should be greater than -/// or equal to `b`. -fn sub(a: *const u8, b: *const u8) -> usize { - debug_assert!(a >= b); - (a as usize) - (b as usize) -} - -#[cfg(test)] -mod tests { - use super::*; - - // Our testing approach here is to try and exhaustively test every case. - // This includes the position at which a non-ASCII byte occurs in addition - // to the alignment of the slice that we're searching. - - #[test] - fn positive_fallback_forward() { - for i in 0..517 { - let s = "a".repeat(i); - assert_eq!( - i, - first_non_ascii_byte_fallback(s.as_bytes()), - "i: {:?}, len: {:?}, s: {:?}", - i, - s.len(), - s - ); - } - } - - #[test] - #[cfg(target_arch = "x86_64")] - fn positive_sse2_forward() { - for i in 0..517 { - let b = "a".repeat(i).into_bytes(); - assert_eq!(b.len(), first_non_ascii_byte_sse2(&b)); - } - } - - #[test] - fn negative_fallback_forward() { - for i in 0..517 { - for align in 0..65 { - let mut s = "a".repeat(i); - s.push_str("☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃"); - let s = s.get(align..).unwrap_or(""); - assert_eq!( - i.saturating_sub(align), - first_non_ascii_byte_fallback(s.as_bytes()), - "i: {:?}, align: {:?}, len: {:?}, s: {:?}", - i, - align, - s.len(), - s - ); - } - } - } - - #[test] - #[cfg(target_arch = "x86_64")] - fn negative_sse2_forward() { - for i in 0..517 { - for align in 0..65 { - let mut s = "a".repeat(i); - s.push_str("☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃"); - let s = s.get(align..).unwrap_or(""); - assert_eq!( - i.saturating_sub(align), - first_non_ascii_byte_sse2(s.as_bytes()), - "i: {:?}, align: {:?}, len: {:?}, s: {:?}", - i, - align, - s.len(), - s - ); - } - } - } -} |