From 698f8c2f01ea549d77d7dc3338a12e04c11057b9 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 17 Apr 2024 14:02:58 +0200 Subject: Adding upstream version 1.64.0+dfsg1. Signed-off-by: Daniel Baumann --- library/core/benches/ascii/is_ascii.rs | 82 ++++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) create mode 100644 library/core/benches/ascii/is_ascii.rs (limited to 'library/core/benches/ascii') diff --git a/library/core/benches/ascii/is_ascii.rs b/library/core/benches/ascii/is_ascii.rs new file mode 100644 index 000000000..a42a1dcfe --- /dev/null +++ b/library/core/benches/ascii/is_ascii.rs @@ -0,0 +1,82 @@ +use super::{LONG, MEDIUM, SHORT}; +use test::black_box; +use test::Bencher; + +macro_rules! benches { + ($( fn $name: ident($arg: ident: &[u8]) $body: block )+) => { + benches!(mod short SHORT[..] $($name $arg $body)+); + benches!(mod medium MEDIUM[..] $($name $arg $body)+); + benches!(mod long LONG[..] $($name $arg $body)+); + // Ensure we benchmark cases where the functions are called with strings + // that are not perfectly aligned or have a length which is not a + // multiple of size_of::() (or both) + benches!(mod unaligned_head MEDIUM[1..] $($name $arg $body)+); + benches!(mod unaligned_tail MEDIUM[..(MEDIUM.len() - 1)] $($name $arg $body)+); + benches!(mod unaligned_both MEDIUM[1..(MEDIUM.len() - 1)] $($name $arg $body)+); + }; + + (mod $mod_name: ident $input: ident [$range: expr] $($name: ident $arg: ident $body: block)+) => { + mod $mod_name { + use super::*; + $( + #[bench] + fn $name(bencher: &mut Bencher) { + bencher.bytes = $input[$range].len() as u64; + let mut vec = $input.as_bytes().to_vec(); + bencher.iter(|| { + let $arg: &[u8] = &black_box(&mut vec)[$range]; + black_box($body) + }) + } + )+ + } + }; +} + +benches! { + fn case00_libcore(bytes: &[u8]) { + bytes.is_ascii() + } + + fn case01_iter_all(bytes: &[u8]) { + bytes.iter().all(|b| b.is_ascii()) + } + + fn case02_align_to(bytes: &[u8]) { + is_ascii_align_to(bytes) + } + + fn case03_align_to_unrolled(bytes: &[u8]) { + is_ascii_align_to_unrolled(bytes) + } +} + +// These are separate since it's easier to debug errors if they don't go through +// macro expansion first. +fn is_ascii_align_to(bytes: &[u8]) -> bool { + if bytes.len() < core::mem::size_of::() { + return bytes.iter().all(|b| b.is_ascii()); + } + // SAFETY: transmuting a sequence of `u8` to `usize` is always fine + let (head, body, tail) = unsafe { bytes.align_to::() }; + head.iter().all(|b| b.is_ascii()) + && body.iter().all(|w| !contains_nonascii(*w)) + && tail.iter().all(|b| b.is_ascii()) +} + +fn is_ascii_align_to_unrolled(bytes: &[u8]) -> bool { + if bytes.len() < core::mem::size_of::() { + return bytes.iter().all(|b| b.is_ascii()); + } + // SAFETY: transmuting a sequence of `u8` to `[usize; 2]` is always fine + let (head, body, tail) = unsafe { bytes.align_to::<[usize; 2]>() }; + head.iter().all(|b| b.is_ascii()) + && body.iter().all(|w| !contains_nonascii(w[0] | w[1])) + && tail.iter().all(|b| b.is_ascii()) +} + +#[inline] +fn contains_nonascii(v: usize) -> bool { + const NONASCII_MASK: usize = usize::from_ne_bytes([0x80; core::mem::size_of::()]); + (NONASCII_MASK & v) != 0 +} -- cgit v1.2.3