library/core/benches/ascii.rs


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353

mod is_ascii;

// Lower-case ASCII 'a' is the first byte that has its highest bit set
// after wrap-adding 0x1F:
//
//     b'a' + 0x1F == 0x80 == 0b1000_0000
//     b'z' + 0x1F == 0x98 == 0b1001_1000
//
// Lower-case ASCII 'z' is the last byte that has its highest bit unset
// after wrap-adding 0x05:
//
//     b'a' + 0x05 == 0x66 == 0b0110_0110
//     b'z' + 0x05 == 0x7F == 0b0111_1111
//
// … except for 0xFB to 0xFF, but those are in the range of bytes
// that have the highest bit unset again after adding 0x1F.
//
// So `(byte + 0x1f) & !(byte + 5)` has its highest bit set
// iff `byte` is a lower-case ASCII letter.
//
// Lower-case ASCII letters all have the 0x20 bit set.
// (Two positions right of 0x80, the highest bit.)
// Unsetting that bit produces the same letter, in upper-case.
//
// Therefore:
fn branchless_to_ascii_upper_case(byte: u8) -> u8 {
    byte & !((byte.wrapping_add(0x1f) & !byte.wrapping_add(0x05) & 0x80) >> 2)
}

macro_rules! benches {
    ($( fn $name: ident($arg: ident: &mut [u8]) $body: block )+ @iter $( $is_: ident, )+) => {
        benches! {@
            $( fn $name($arg: &mut [u8]) $body )+
            $( fn $is_(bytes: &mut [u8]) { bytes.iter().all(u8::$is_) } )+
        }
    };

    (@$( fn $name: ident($arg: ident: &mut [u8]) $body: block )+) => {
        benches!(mod short SHORT $($name $arg $body)+);
        benches!(mod medium MEDIUM $($name $arg $body)+);
        benches!(mod long LONG $($name $arg $body)+);
    };

    (mod $mod_name: ident $input: ident $($name: ident $arg: ident $body: block)+) => {
        mod $mod_name {
            use super::*;

            $(
                #[bench]
                fn $name(bencher: &mut Bencher) {
                    bencher.bytes = $input.len() as u64;
                    bencher.iter(|| {
                        let mut vec = $input.as_bytes().to_vec();
                        {
                            let $arg = &mut vec[..];
                            black_box($body);
                        }
                        vec
                    })
                }
            )+
        }
    }
}

use test::black_box;
use test::Bencher;

const ASCII_CASE_MASK: u8 = 0b0010_0000;

benches! {
    fn case00_alloc_only(_bytes: &mut [u8]) {}

    fn case01_black_box_read_each_byte(bytes: &mut [u8]) {
        for byte in bytes {
            black_box(*byte);
        }
    }

    fn case02_lookup_table(bytes: &mut [u8]) {
        for byte in bytes {
            *byte = ASCII_UPPERCASE_MAP[*byte as usize]
        }
    }

    fn case03_branch_and_subtract(bytes: &mut [u8]) {
        for byte in bytes {
            *byte = if b'a' <= *byte && *byte <= b'z' {
                *byte - b'a' + b'A'
            } else {
                *byte
            }
        }
    }

    fn case04_branch_and_mask(bytes: &mut [u8]) {
        for byte in bytes {
            *byte = if b'a' <= *byte && *byte <= b'z' {
                *byte & !0x20
            } else {
                *byte
            }
        }
    }

    fn case05_branchless(bytes: &mut [u8]) {
        for byte in bytes {
            *byte = branchless_to_ascii_upper_case(*byte)
        }
    }

    fn case06_libcore(bytes: &mut [u8]) {
        bytes.make_ascii_uppercase()
    }

    fn case07_fake_simd_u32(bytes: &mut [u8]) {
        // SAFETY: transmuting a sequence of `u8` to `u32` is always fine
        let (before, aligned, after) = unsafe {
            bytes.align_to_mut::<u32>()
        };
        for byte in before {
            *byte = branchless_to_ascii_upper_case(*byte)
        }
        for word in aligned {
            // FIXME: this is incorrect for some byte values:
            // addition within a byte can carry/overflow into the next byte.
            // Test case: b"\xFFz  "
            *word &= !(
                (
                    word.wrapping_add(0x1f1f1f1f) &
                    !word.wrapping_add(0x05050505) &
                    0x80808080
                ) >> 2
            )
        }
        for byte in after {
            *byte = branchless_to_ascii_upper_case(*byte)
        }
    }

    fn case08_fake_simd_u64(bytes: &mut [u8]) {
        // SAFETY: transmuting a sequence of `u8` to `u64` is always fine
        let (before, aligned, after) = unsafe {
            bytes.align_to_mut::<u64>()
        };
        for byte in before {
            *byte = branchless_to_ascii_upper_case(*byte)
        }
        for word in aligned {
            // FIXME: like above, this is incorrect for some byte values.
            *word &= !(
                (
                    word.wrapping_add(0x1f1f1f1f_1f1f1f1f) &
                    !word.wrapping_add(0x05050505_05050505) &
                    0x80808080_80808080
                ) >> 2
            )
        }
        for byte in after {
            *byte = branchless_to_ascii_upper_case(*byte)
        }
    }

    fn case09_mask_mult_bool_branchy_lookup_table(bytes: &mut [u8]) {
        fn is_ascii_lowercase(b: u8) -> bool {
            if b >= 0x80 { return false }
            match ASCII_CHARACTER_CLASS[b as usize] {
                L | Lx => true,
                _ => false,
            }
        }
        for byte in bytes {
            *byte &= !(0x20 * (is_ascii_lowercase(*byte) as u8))
        }
    }

    fn case10_mask_mult_bool_lookup_table(bytes: &mut [u8]) {
        fn is_ascii_lowercase(b: u8) -> bool {
            match ASCII_CHARACTER_CLASS[b as usize] {
                L | Lx => true,
                _ => false
            }
        }
        for byte in bytes {
            *byte &= !(0x20 * (is_ascii_lowercase(*byte) as u8))
        }
    }

    fn case11_mask_mult_bool_match_range(bytes: &mut [u8]) {
        fn is_ascii_lowercase(b: u8) -> bool {
            match b {
                b'a'..=b'z' => true,
                _ => false
            }
        }
        for byte in bytes {
            *byte &= !(0x20 * (is_ascii_lowercase(*byte) as u8))
        }
    }

    fn case12_mask_shifted_bool_match_range(bytes: &mut [u8]) {
        fn is_ascii_lowercase(b: u8) -> bool {
            match b {
                b'a'..=b'z' => true,
                _ => false
            }
        }
        for byte in bytes {
            *byte &= !((is_ascii_lowercase(*byte) as u8) * ASCII_CASE_MASK)
        }
    }

    fn case13_subtract_shifted_bool_match_range(bytes: &mut [u8]) {
        fn is_ascii_lowercase(b: u8) -> bool {
            match b {
                b'a'..=b'z' => true,
                _ => false
            }
        }
        for byte in bytes {
            *byte -= (is_ascii_lowercase(*byte) as u8) * ASCII_CASE_MASK
        }
    }

    fn case14_subtract_multiplied_bool_match_range(bytes: &mut [u8]) {
        fn is_ascii_lowercase(b: u8) -> bool {
            match b {
                b'a'..=b'z' => true,
                _ => false
            }
        }
        for byte in bytes {
            *byte -= (b'a' - b'A') * is_ascii_lowercase(*byte) as u8
        }
    }

    @iter

    is_ascii,
    is_ascii_alphabetic,
    is_ascii_uppercase,
    is_ascii_lowercase,
    is_ascii_alphanumeric,
    is_ascii_digit,
    is_ascii_hexdigit,
    is_ascii_punctuation,
    is_ascii_graphic,
    is_ascii_whitespace,
    is_ascii_control,
}

macro_rules! repeat {
    ($s: expr) => {
        concat!($s, $s, $s, $s, $s, $s, $s, $s, $s, $s)
    };
}

const SHORT: &str = "Alice's";
const MEDIUM: &str = "Alice's Adventures in Wonderland";
const LONG: &str = repeat!(
    r#"
    La Guida di Bragia, a Ballad Opera for the Marionette Theatre (around 1850)
    Alice's Adventures in Wonderland (1865)
    Phantasmagoria and Other Poems (1869)
    Through the Looking-Glass, and What Alice Found There
        (includes "Jabberwocky" and "The Walrus and the Carpenter") (1871)
    The Hunting of the Snark (1876)
    Rhyme? And Reason? (1883) – shares some contents with the 1869 collection,
        including the long poem "Phantasmagoria"
    A Tangled Tale (1885)
    Sylvie and Bruno (1889)
    Sylvie and Bruno Concluded (1893)
    Pillow Problems (1893)
    What the Tortoise Said to Achilles (1895)
    Three Sunsets and Other Poems (1898)
    The Manlet (1903)[106]
"#
);

#[rustfmt::skip]
const ASCII_UPPERCASE_MAP: [u8; 256] = [
    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
    0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
    0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
    0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
    b' ', b'!', b'"', b'#', b'$', b'%', b'&', b'\'',
    b'(', b')', b'*', b'+', b',', b'-', b'.', b'/',
    b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7',
    b'8', b'9', b':', b';', b'<', b'=', b'>', b'?',
    b'@', b'A', b'B', b'C', b'D', b'E', b'F', b'G',
    b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O',
    b'P', b'Q', b'R', b'S', b'T', b'U', b'V', b'W',
    b'X', b'Y', b'Z', b'[', b'\\', b']', b'^', b'_',
    b'`',

          b'A', b'B', b'C', b'D', b'E', b'F', b'G',
    b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O',
    b'P', b'Q', b'R', b'S', b'T', b'U', b'V', b'W',
    b'X', b'Y', b'Z',

                      b'{', b'|', b'}', b'~', 0x7f,
    0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
    0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
    0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
    0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
    0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
    0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
    0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
    0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
    0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
    0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
    0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
    0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
    0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
    0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
    0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
    0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
];

enum AsciiCharacterClass {
    C,  // control
    Cw, // control whitespace
    W,  // whitespace
    D,  // digit
    L,  // lowercase
    Lx, // lowercase hex digit
    U,  // uppercase
    Ux, // uppercase hex digit
    P,  // punctuation
    N,  // Non-ASCII
}
use self::AsciiCharacterClass::*;

#[rustfmt::skip]
static ASCII_CHARACTER_CLASS: [AsciiCharacterClass; 256] = [
//  _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _a _b _c _d _e _f
    C, C, C, C, C, C, C, C, C, Cw,Cw,C, Cw,Cw,C, C, // 0_
    C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, // 1_
    W, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, // 2_
    D, D, D, D, D, D, D, D, D, D, P, P, P, P, P, P, // 3_
    P, Ux,Ux,Ux,Ux,Ux,Ux,U, U, U, U, U, U, U, U, U, // 4_
    U, U, U, U, U, U, U, U, U, U, U, P, P, P, P, P, // 5_
    P, Lx,Lx,Lx,Lx,Lx,Lx,L, L, L, L, L, L, L, L, L, // 6_
    L, L, L, L, L, L, L, L, L, L, L, P, P, P, P, C, // 7_
    N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
    N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
    N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
    N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
    N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
    N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
    N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
    N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
];