extern crate packed_simd; #[cfg(not(feature = "runtime-dispatch-simd"))] use core::mem; #[cfg(feature = "runtime-dispatch-simd")] use std::mem; use self::packed_simd::{u8x32, u8x64, FromCast}; const MASK: [u8; 64] = [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, ]; unsafe fn u8x64_from_offset(slice: &[u8], offset: usize) -> u8x64 { u8x64::from_slice_unaligned_unchecked(slice.get_unchecked(offset..)) } unsafe fn u8x32_from_offset(slice: &[u8], offset: usize) -> u8x32 { u8x32::from_slice_unaligned_unchecked(slice.get_unchecked(offset..)) } fn sum_x64(u8s: &u8x64) -> usize { let mut store = [0; mem::size_of::()]; u8s.write_to_slice_unaligned(&mut store); store.iter().map(|&e| e as usize).sum() } fn sum_x32(u8s: &u8x32) -> usize { let mut store = [0; mem::size_of::()]; u8s.write_to_slice_unaligned(&mut store); store.iter().map(|&e| e as usize).sum() } pub fn chunk_count(haystack: &[u8], needle: u8) -> usize { assert!(haystack.len() >= 32); unsafe { let mut offset = 0; let mut count = 0; let needles_x64 = u8x64::splat(needle); // 16320 while haystack.len() >= offset + 64 * 255 { let mut counts = u8x64::splat(0); for _ in 0..255 { counts -= u8x64::from_cast(u8x64_from_offset(haystack, offset).eq(needles_x64)); offset += 64; } count += sum_x64(&counts); } // 8192 if haystack.len() >= offset + 64 * 128 { let mut counts = u8x64::splat(0); for _ in 0..128 { counts -= u8x64::from_cast(u8x64_from_offset(haystack, offset).eq(needles_x64)); offset += 64; } count += sum_x64(&counts); } let needles_x32 = u8x32::splat(needle); // 32 let mut counts = u8x32::splat(0); for i in 0..(haystack.len() - offset) / 32 { counts -= u8x32::from_cast(u8x32_from_offset(haystack, offset + i * 32).eq(needles_x32)); } count += sum_x32(&counts); // Straggler; need to reset counts because prior loop can run 255 times counts = u8x32::splat(0); if haystack.len() % 32 != 0 { counts -= u8x32::from_cast(u8x32_from_offset(haystack, haystack.len() - 32).eq(needles_x32)) & u8x32_from_offset(&MASK, haystack.len() % 32); } count += sum_x32(&counts); count } } fn is_leading_utf8_byte_x64(u8s: u8x64) -> u8x64 { u8x64::from_cast((u8s & u8x64::splat(0b1100_0000)).ne(u8x64::splat(0b1000_0000))) } fn is_leading_utf8_byte_x32(u8s: u8x32) -> u8x32 { u8x32::from_cast((u8s & u8x32::splat(0b1100_0000)).ne(u8x32::splat(0b1000_0000))) } pub fn chunk_num_chars(utf8_chars: &[u8]) -> usize { assert!(utf8_chars.len() >= 32); unsafe { let mut offset = 0; let mut count = 0; // 16320 while utf8_chars.len() >= offset + 64 * 255 { let mut counts = u8x64::splat(0); for _ in 0..255 { counts -= is_leading_utf8_byte_x64(u8x64_from_offset(utf8_chars, offset)); offset += 64; } count += sum_x64(&counts); } // 8192 if utf8_chars.len() >= offset + 64 * 128 { let mut counts = u8x64::splat(0); for _ in 0..128 { counts -= is_leading_utf8_byte_x64(u8x64_from_offset(utf8_chars, offset)); offset += 64; } count += sum_x64(&counts); } // 32 let mut counts = u8x32::splat(0); for i in 0..(utf8_chars.len() - offset) / 32 { counts -= is_leading_utf8_byte_x32(u8x32_from_offset(utf8_chars, offset + i * 32)); } count += sum_x32(&counts); // Straggler; need to reset counts because prior loop can run 255 times counts = u8x32::splat(0); if utf8_chars.len() % 32 != 0 { counts -= is_leading_utf8_byte_x32(u8x32_from_offset(utf8_chars, utf8_chars.len() - 32)) & u8x32_from_offset(&MASK, utf8_chars.len() % 32); } count += sum_x32(&counts); count } }