From 698f8c2f01ea549d77d7dc3338a12e04c11057b9 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 17 Apr 2024 14:02:58 +0200 Subject: Adding upstream version 1.64.0+dfsg1. Signed-off-by: Daniel Baumann --- vendor/bytecount/.cargo-checksum.json | 1 + vendor/bytecount/Cargo.toml | 53 +++++++++ vendor/bytecount/LICENSE.Apache2 | 201 ++++++++++++++++++++++++++++++++++ vendor/bytecount/LICENSE.MIT | 19 ++++ vendor/bytecount/README.md | 75 +++++++++++++ vendor/bytecount/benches/bench.rs | 83 ++++++++++++++ vendor/bytecount/ci/miri.sh | 15 +++ vendor/bytecount/src/integer_simd.rs | 111 +++++++++++++++++++ vendor/bytecount/src/lib.rs | 138 +++++++++++++++++++++++ vendor/bytecount/src/naive.rs | 42 +++++++ vendor/bytecount/src/simd/generic.rs | 137 +++++++++++++++++++++++ vendor/bytecount/src/simd/mod.rs | 17 +++ vendor/bytecount/src/simd/x86_avx2.rs | 161 +++++++++++++++++++++++++++ vendor/bytecount/src/simd/x86_sse2.rs | 171 +++++++++++++++++++++++++++++ vendor/bytecount/tests/check.rs | 95 ++++++++++++++++ 15 files changed, 1319 insertions(+) create mode 100644 vendor/bytecount/.cargo-checksum.json create mode 100644 vendor/bytecount/Cargo.toml create mode 100644 vendor/bytecount/LICENSE.Apache2 create mode 100644 vendor/bytecount/LICENSE.MIT create mode 100644 vendor/bytecount/README.md create mode 100644 vendor/bytecount/benches/bench.rs create mode 100755 vendor/bytecount/ci/miri.sh create mode 100644 vendor/bytecount/src/integer_simd.rs create mode 100644 vendor/bytecount/src/lib.rs create mode 100644 vendor/bytecount/src/naive.rs create mode 100644 vendor/bytecount/src/simd/generic.rs create mode 100644 vendor/bytecount/src/simd/mod.rs create mode 100644 vendor/bytecount/src/simd/x86_avx2.rs create mode 100644 vendor/bytecount/src/simd/x86_sse2.rs create mode 100644 vendor/bytecount/tests/check.rs (limited to 'vendor/bytecount') diff --git a/vendor/bytecount/.cargo-checksum.json b/vendor/bytecount/.cargo-checksum.json new file mode 100644 index 000000000..700fc57f2 --- /dev/null +++ b/vendor/bytecount/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{"Cargo.toml":"90f053e609e5ba2abe4f05ecca43423ed11efebf55d6070fd9983252a81c24a8","LICENSE.Apache2":"b40930bbcf80744c86c46a12bc9da056641d722716c378f5659b9e555ef833e1","LICENSE.MIT":"a5dea80c1f383cb5f80a6bb0da5e55a2beb9f24adb123ce6300af2cbaaa3bf65","README.md":"84e452fd64997865cf8f55790717703efb8efc7c43c82252a23545d50e73fa28","benches/bench.rs":"085734cc7b3c3f6da30eeab8bf98d95fc2a9c0886c63f93edd98df356674bf8e","ci/miri.sh":"fb8d52144352c6b09ac925fb089177d30f46ab8b17dd378d68e414c6b6b5b709","src/integer_simd.rs":"5cd97182cd5aea5c5eab1e1cb5d9d409f263de9083c8e69cd7f9d1dd379dfb52","src/lib.rs":"000a6591306852b9ec892995cd6ab4247b8a727f4befa9dd6a9851c20a5f6552","src/naive.rs":"068dae3fba7d721227bb7a9ed9bdfe4a12cfb373a9de4852d6a517630fabf0c8","src/simd/generic.rs":"2f01963147fa97b5dbd7a166975e6f6660d3197f4b6d0a78ed138da9f78a269e","src/simd/mod.rs":"8ce4aac79520bd0d49f6e48471bf32a2d6d3cd8a3a1c65e393f4f4fae6c7f502","src/simd/x86_avx2.rs":"0345d5fbb74d907e3e6cd0d361427ce75645e7abf4c6efa17ef20ee8a4d8fc18","src/simd/x86_sse2.rs":"e7caab115d77118e6a7bca8daea4a9b6d2d318bd3e76ddc23dc4f023aced1f77","tests/check.rs":"e5075559155a1aae6326932e68410996ca52e4140db4b26eb8b9f03a07241b87"},"package":"72feb31ffc86498dacdbd0fcebb56138e7177a8cc5cea4516031d15ae85a742e"} \ No newline at end of file diff --git a/vendor/bytecount/Cargo.toml b/vendor/bytecount/Cargo.toml new file mode 100644 index 000000000..e3571c5c1 --- /dev/null +++ b/vendor/bytecount/Cargo.toml @@ -0,0 +1,53 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies +# +# If you believe there's an error in this file please file an +# issue against the rust-lang/cargo repository. If you're +# editing this file be aware that the upstream Cargo.toml +# will likely look very different (and much more reasonable) + +[package] +edition = "2018" +name = "bytecount" +version = "0.6.2" +authors = ["Andre Bogus ", "Joshua Landau "] +exclude = ["/.travis.yml", "/appveyor.yml"] +description = "count occurrences of a given byte, or the number of UTF-8 code points, in a byte slice, fast" +readme = "README.md" +categories = ["algorithms", "no-std"] +license = "Apache-2.0/MIT" +repository = "https://github.com/llogiq/bytecount" + +[lib] +bench = false + +[[bench]] +name = "bench" +harness = false +[dependencies.packed_simd] +version = "0.3.4" +optional = true +package = "packed_simd_2" +[dev-dependencies.criterion] +version = "0.3" +default-features = false + +[dev-dependencies.quickcheck] +version = "0.9" + +[dev-dependencies.rand] +version = "0.7" + +[features] +generic-simd = ["packed_simd"] +html_report = [] +runtime-dispatch-simd = [] +[badges.appveyor] +repository = "llogiq/bytecount" + +[badges.travis-ci] +repository = "llogiq/bytecount" diff --git a/vendor/bytecount/LICENSE.Apache2 b/vendor/bytecount/LICENSE.Apache2 new file mode 100644 index 000000000..8dada3eda --- /dev/null +++ b/vendor/bytecount/LICENSE.Apache2 @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright {yyyy} {name of copyright owner} + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/vendor/bytecount/LICENSE.MIT b/vendor/bytecount/LICENSE.MIT new file mode 100644 index 000000000..e0f734e45 --- /dev/null +++ b/vendor/bytecount/LICENSE.MIT @@ -0,0 +1,19 @@ +Copyright (c) 2017 The bytecount Developers + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/vendor/bytecount/README.md b/vendor/bytecount/README.md new file mode 100644 index 000000000..9847c2a89 --- /dev/null +++ b/vendor/bytecount/README.md @@ -0,0 +1,75 @@ +# bytecount + +Counting bytes really fast + +[![Build Status](https://travis-ci.org/llogiq/bytecount.svg?branch=master)](https://travis-ci.org/llogiq/bytecount) +[![Windows build status](https://ci.appveyor.com/api/projects/status/github/llogiq/bytecount?svg=true)](https://ci.appveyor.com/project/llogiq/bytecount) +[![Current Version](http://meritbadge.herokuapp.com/bytecount)](https://crates.io/crates/bytecount) +[![License: Apache 2.0/MIT](https://img.shields.io/crates/l/bytecount.svg)](#license) + +This uses the "hyperscreamingcount" algorithm by Joshua Landau to count bytes faster than anything else. +The [newlinebench](https://github.com/llogiq/newlinebench) repository has further benchmarks for old versions of this repository. + +To use bytecount in your crate, if you have [cargo-edit](https://github.com/killercup/cargo-edit), just type +`cargo add bytecount` in a terminal with the crate root as the current path. Otherwise you can manually edit your +`Cargo.toml` to add `bytecount = 0.5.1` to your `[dependencies]` section. + +In your crate root (`lib.rs` or `main.rs`, depending on if you are writing a +library or application), add `extern crate bytecount;`. Now you can simply use +`bytecount::count` as follows: + +```Rust +extern crate bytecount; + +fn main() { + let mytext = "some potentially large text, perhaps read from disk?"; + let spaces = bytecount::count(mytext.as_bytes(), b' '); + .. +} +``` + +bytecount supports two features to make use of modern CPU's features to speed up counting considerably. To allow your +users to use them, add the following to your `Cargo.toml`: + +``` +[features] +runtime-dispatch-simd = ["bytecount/runtime-dispatch-simd"] +generic-simd = ["bytecount/generic-simd"] +``` + +The first, `runtime-dispatch-simd`, enables detection of SIMD capabilities at runtime, which allows using the SSE2 and +AVX2 codepaths, but cannot be used with `no_std`. + +Your users can then compile with runtime dispatch using: + +``` +cargo build --release --features runtime-dispatch-simd +``` + +The second, `generic-simd`, uses `packed_simd` to provide a fast +architecture-agnostic SIMD codepath, but requires running on nightly. + +Your users can compile with this codepath using: + +``` +cargo build --release --features generic-simd +``` + +Building for a more specific architecture will also improve performance. +You can do this with + +``` +RUSTFLAGS="-C target-cpu=native" cargo build --release +``` + +The scalar algorithm is explained in depth [here](https://llogiq.github.io/2016/09/27/count.html). + +**Note: Versions until 0.4.0 worked with Rust as of 1.20.0. Version 0.5.0 until 0.6.0 requires Rust 1.26 or later, +and at least 1.27.2 to use SIMD. Versions from 0.6.0 require Rust 1.32.0 or later.** + +## License + +Licensed under either of at your discretion: + +- [Apache 2.0](LICENSE.Apache2) +- [MIT](LICENSE.MIT) diff --git a/vendor/bytecount/benches/bench.rs b/vendor/bytecount/benches/bench.rs new file mode 100644 index 000000000..85d04dbb4 --- /dev/null +++ b/vendor/bytecount/benches/bench.rs @@ -0,0 +1,83 @@ +#[macro_use] +extern crate criterion; +extern crate rand; +extern crate bytecount; + +use std::env; +use std::time::Duration; +use rand::RngCore; +use criterion::{Bencher, Criterion, ParameterizedBenchmark}; + +use bytecount::{ + count, naive_count, naive_count_32, + num_chars, naive_num_chars, +}; + +fn random_bytes(len: usize) -> Vec { + let mut result = vec![0; len]; + rand::thread_rng().fill_bytes(&mut result); + result +} + +static COUNTS : &[usize] = &[0, 10, 20, 30, 40, 50, 60, 70, 80, 90, + 100, 120, 140, 170, 210, 250, 300, 400, 500, 600, 700, 800, 900, + 1000, 1_000, 1_200, 1_400, 1_700, 2_100, 2_500, 3_000, 4_000, + 5_000, 6_000, 7_000, 8_000, 9_000, 10_000, 12_000, 14_000, 17_000, + 21_000, 25_000, 30_000, 100_000, 1_000_000]; + +fn get_counts() -> Vec { + env::var("COUNTS").map( + |s| s.split(',').map( + |n| str::parse::(n).unwrap()).collect()) + .unwrap_or(COUNTS.to_owned()) +} + +fn get_config() -> Criterion { + if env::var("CI").is_ok() { + Criterion::default().nresamples(5_000) + .without_plots() + .measurement_time(Duration::new(2, 0)) + .warm_up_time(Duration::new(1, 0)) + } else { + Criterion::default() + } +} + +fn bench_counts(criterion: &mut Criterion) { + fn naive(b: &mut Bencher, s: &usize) { + let haystack = random_bytes(*s); + b.iter(|| naive_count(&haystack, 10)) + } + fn naive_32(b: &mut Bencher, s: &usize) { + let haystack = random_bytes(*s); + b.iter(|| naive_count_32(&haystack, 10)) + } + fn hyper(b: &mut Bencher, s: &usize) { + let haystack = random_bytes(*s); + b.iter(|| count(&haystack, 10)) + } + let counts = get_counts(); + criterion.bench("counts", + ParameterizedBenchmark::new("naive", naive, counts) + .with_function("naive_32", naive_32) + .with_function("hyper", hyper)); +} + +fn bench_num_chars(criterion: &mut Criterion) { + fn naive(b: &mut Bencher, s: &usize) { + let haystack = random_bytes(*s); + b.iter(|| naive_num_chars(&haystack)) + } + fn hyper(b: &mut Bencher, s: &usize) { + let haystack = random_bytes(*s); + b.iter(|| num_chars(&haystack)) + } + let counts = get_counts(); + criterion.bench("num_chars", + ParameterizedBenchmark::new("naive", naive, counts) + .with_function("hyper", hyper)); +} + +criterion_group!(name = count_bench; config = get_config(); targets = bench_counts); +criterion_group!(name = num_chars_bench; config = get_config(); targets = bench_num_chars); +criterion_main!(count_bench, num_chars_bench); diff --git a/vendor/bytecount/ci/miri.sh b/vendor/bytecount/ci/miri.sh new file mode 100755 index 000000000..8704ebc1d --- /dev/null +++ b/vendor/bytecount/ci/miri.sh @@ -0,0 +1,15 @@ +#!/bin/bash +set -ex + +# Setup +MIRI_NIGHTLY=nightly-$(curl -s https://rust-lang.github.io/rustup-components-history/x86_64-unknown-linux-gnu/miri) +echo "Installing latest nightly with Miri: $MIRI_NIGHTLY" +rustup default "$MIRI_NIGHTLY" +rustup component add miri + +# Run tests +cargo miri test +cargo miri test --target=mips64-unknown-linux-gnuabi64 # big-endian architecture + +# Restore old state in case Travis uses this cache for other jobs. +rustup default nightly diff --git a/vendor/bytecount/src/integer_simd.rs b/vendor/bytecount/src/integer_simd.rs new file mode 100644 index 000000000..48f2ee8d9 --- /dev/null +++ b/vendor/bytecount/src/integer_simd.rs @@ -0,0 +1,111 @@ +#[cfg(not(feature = "runtime-dispatch-simd"))] +use core::{mem, ptr, usize}; +#[cfg(feature = "runtime-dispatch-simd")] +use std::{mem, ptr, usize}; + +fn splat(byte: u8) -> usize { + let lo = usize::MAX / 0xFF; + lo * byte as usize +} + +unsafe fn usize_load_unchecked(bytes: &[u8], offset: usize) -> usize { + let mut output = 0; + ptr::copy_nonoverlapping( + bytes.as_ptr().add(offset), + &mut output as *mut usize as *mut u8, + mem::size_of::() + ); + output +} + +fn bytewise_equal(lhs: usize, rhs: usize) -> usize { + let lo = usize::MAX / 0xFF; + let hi = lo << 7; + + let x = lhs ^ rhs; + !((((x & !hi) + !hi) | x) >> 7) & lo +} + +fn sum_usize(values: usize) -> usize { + let every_other_byte_lo = usize::MAX / 0xFFFF; + let every_other_byte = every_other_byte_lo * 0xFF; + + // Pairwise reduction to avoid overflow on next step. + let pair_sum: usize = (values & every_other_byte) + ((values >> 8) & every_other_byte); + + // Multiplication results in top two bytes holding sum. + pair_sum.wrapping_mul(every_other_byte_lo) >> ((mem::size_of::() - 2) * 8) +} + +fn is_leading_utf8_byte(values: usize) -> usize { + // a leading UTF-8 byte is one which does not start with the bits 10. + ((!values >> 7) | (values >> 6)) & splat(1) +} + +pub fn chunk_count(haystack: &[u8], needle: u8) -> usize { + let chunksize = mem::size_of::(); + assert!(haystack.len() >= chunksize); + + unsafe { + let mut offset = 0; + let mut count = 0; + + let needles = splat(needle); + + // 2040 + while haystack.len() >= offset + chunksize * 255 { + let mut counts = 0; + for _ in 0..255 { + counts += bytewise_equal(usize_load_unchecked(haystack, offset), needles); + offset += chunksize; + } + count += sum_usize(counts); + } + + // 8 + let mut counts = 0; + for i in 0..(haystack.len() - offset) / chunksize { + counts += bytewise_equal(usize_load_unchecked(haystack, offset + i * chunksize), needles); + } + if haystack.len() % 8 != 0 { + let mask = usize::from_le(!(!0 >> ((haystack.len() % chunksize) * 8))); + counts += bytewise_equal(usize_load_unchecked(haystack, haystack.len() - chunksize), needles) & mask; + } + count += sum_usize(counts); + + count + } +} + +pub fn chunk_num_chars(utf8_chars: &[u8]) -> usize { + let chunksize = mem::size_of::(); + assert!(utf8_chars.len() >= chunksize); + + unsafe { + let mut offset = 0; + let mut count = 0; + + // 2040 + while utf8_chars.len() >= offset + chunksize * 255 { + let mut counts = 0; + for _ in 0..255 { + counts += is_leading_utf8_byte(usize_load_unchecked(utf8_chars, offset)); + offset += chunksize; + } + count += sum_usize(counts); + } + + // 8 + let mut counts = 0; + for i in 0..(utf8_chars.len() - offset) / chunksize { + counts += is_leading_utf8_byte(usize_load_unchecked(utf8_chars, offset + i * chunksize)); + } + if utf8_chars.len() % 8 != 0 { + let mask = usize::from_le(!(!0 >> ((utf8_chars.len() % chunksize) * 8))); + counts += is_leading_utf8_byte(usize_load_unchecked(utf8_chars, utf8_chars.len() - chunksize)) & mask; + } + count += sum_usize(counts); + + count + } +} diff --git a/vendor/bytecount/src/lib.rs b/vendor/bytecount/src/lib.rs new file mode 100644 index 000000000..ef4235c26 --- /dev/null +++ b/vendor/bytecount/src/lib.rs @@ -0,0 +1,138 @@ +//! count occurrences of a given byte, or the number of UTF-8 code points, in a +//! byte slice, fast. +//! +//! This crate has the [`count`](fn.count.html) method to count byte +//! occurrences (for example newlines) in a larger `&[u8]` slice. +//! +//! For example: +//! +//! ```rust +//! assert_eq!(5, bytecount::count(b"Hello, this is the bytecount crate!", b' ')); +//! ``` +//! +//! Also there is a [`num_chars`](fn.num_chars.html) method to count +//! the number of UTF8 characters in a slice. It will work the same as +//! `str::chars().count()` for byte slices of correct UTF-8 character +//! sequences. The result will likely be off for invalid sequences, +//! although the result is guaranteed to be between `0` and +//! `[_]::len()`, inclusive. +//! +//! Example: +//! +//! ```rust +//! let sequence = "Wenn ich ein Vöglein wär, flög ich zu Dir!"; +//! assert_eq!(sequence.chars().count(), +//! bytecount::num_chars(sequence.as_bytes())); +//! ``` +//! +//! For completeness and easy comparison, the "naive" versions of both +//! count and num_chars are provided. Those are also faster if used on +//! predominantly small strings. The +//! [`naive_count_32`](fn.naive_count_32.html) method can be faster +//! still on small strings. + +#![deny(missing_docs)] + +#![cfg_attr(not(feature = "runtime-dispatch-simd"), no_std)] + +#[cfg(not(feature = "runtime-dispatch-simd"))] +use core::mem; +#[cfg(feature = "runtime-dispatch-simd")] +use std::mem; + +mod naive; +pub use naive::*; +mod integer_simd; + +#[cfg(any( + all(feature = "runtime-dispatch-simd", any(target_arch = "x86", target_arch = "x86_64")), + feature = "generic-simd" +))] +mod simd; + +/// Count occurrences of a byte in a slice of bytes, fast +/// +/// # Examples +/// +/// ``` +/// let s = b"This is a Text with spaces"; +/// let number_of_spaces = bytecount::count(s, b' '); +/// assert_eq!(number_of_spaces, 5); +/// ``` +pub fn count(haystack: &[u8], needle: u8) -> usize { + if haystack.len() >= 32 { + #[cfg(all(feature = "runtime-dispatch-simd", target_arch = "x86_64"))] + { + if is_x86_feature_detected!("avx2") { + unsafe { return simd::x86_avx2::chunk_count(haystack, needle); } + } + } + + #[cfg(feature = "generic-simd")] + return simd::generic::chunk_count(haystack, needle); + } + + if haystack.len() >= 16 { + #[cfg(all( + feature = "runtime-dispatch-simd", + any(target_arch = "x86", target_arch = "x86_64"), + not(feature = "generic-simd") + ))] + { + if is_x86_feature_detected!("sse2") { + unsafe { return simd::x86_sse2::chunk_count(haystack, needle); } + } + } + } + + if haystack.len() >= mem::size_of::() { + return integer_simd::chunk_count(haystack, needle); + } + + naive_count(haystack, needle) +} + +/// Count the number of UTF-8 encoded Unicode codepoints in a slice of bytes, fast +/// +/// This function is safe to use on any byte array, valid UTF-8 or not, +/// but the output is only meaningful for well-formed UTF-8. +/// +/// # Example +/// +/// ``` +/// let swordfish = "メカジキ"; +/// let char_count = bytecount::num_chars(swordfish.as_bytes()); +/// assert_eq!(char_count, 4); +/// ``` +pub fn num_chars(utf8_chars: &[u8]) -> usize { + if utf8_chars.len() >= 32 { + #[cfg(all(feature = "runtime-dispatch-simd", target_arch = "x86_64"))] + { + if is_x86_feature_detected!("avx2") { + unsafe { return simd::x86_avx2::chunk_num_chars(utf8_chars); } + } + } + + #[cfg(feature = "generic-simd")] + return simd::generic::chunk_num_chars(utf8_chars); + } + + if utf8_chars.len() >= 16 { + #[cfg(all( + feature = "runtime-dispatch-simd", + any(target_arch = "x86", target_arch = "x86_64"), + not(feature = "generic-simd") + ))] + { + if is_x86_feature_detected!("sse2") { + unsafe { return simd::x86_sse2::chunk_num_chars(utf8_chars); } + } + } + } + + if utf8_chars.len() >= mem::size_of::() { + return integer_simd::chunk_num_chars(utf8_chars); + } + + naive_num_chars(utf8_chars) +} diff --git a/vendor/bytecount/src/naive.rs b/vendor/bytecount/src/naive.rs new file mode 100644 index 000000000..315c4b675 --- /dev/null +++ b/vendor/bytecount/src/naive.rs @@ -0,0 +1,42 @@ +/// Count up to `(2^32)-1` occurrences of a byte in a slice +/// of bytes, simple +/// +/// # Example +/// +/// ``` +/// let s = b"This is yet another Text with spaces"; +/// let number_of_spaces = bytecount::naive_count_32(s, b' '); +/// assert_eq!(number_of_spaces, 6); +/// ``` +pub fn naive_count_32(haystack: &[u8], needle: u8) -> usize { + haystack.iter().fold(0, |n, c| n + (*c == needle) as u32) as usize +} + +/// Count occurrences of a byte in a slice of bytes, simple +/// +/// # Example +/// +/// ``` +/// let s = b"This is yet another Text with spaces"; +/// let number_of_spaces = bytecount::naive_count(s, b' '); +/// assert_eq!(number_of_spaces, 6); +/// ``` +pub fn naive_count(utf8_chars: &[u8], needle: u8) -> usize { + utf8_chars.iter().fold(0, |n, c| n + (*c == needle) as usize) +} + +/// Count the number of UTF-8 encoded Unicode codepoints in a slice of bytes, simple +/// +/// This function is safe to use on any byte array, valid UTF-8 or not, +/// but the output is only meaningful for well-formed UTF-8. +/// +/// # Example +/// +/// ``` +/// let swordfish = "メカジキ"; +/// let char_count = bytecount::naive_num_chars(swordfish.as_bytes()); +/// assert_eq!(char_count, 4); +/// ``` +pub fn naive_num_chars(utf8_chars: &[u8]) -> usize { + utf8_chars.iter().filter(|&&byte| (byte >> 6) != 0b10).count() +} diff --git a/vendor/bytecount/src/simd/generic.rs b/vendor/bytecount/src/simd/generic.rs new file mode 100644 index 000000000..2031e730e --- /dev/null +++ b/vendor/bytecount/src/simd/generic.rs @@ -0,0 +1,137 @@ +extern crate packed_simd; + +#[cfg(not(feature = "runtime-dispatch-simd"))] +use core::mem; +#[cfg(feature = "runtime-dispatch-simd")] +use std::mem; + +use self::packed_simd::{u8x32, u8x64, FromCast}; + +const MASK: [u8; 64] = [ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, +]; + +unsafe fn u8x64_from_offset(slice: &[u8], offset: usize) -> u8x64 { + u8x64::from_slice_unaligned_unchecked(slice.get_unchecked(offset..)) +} +unsafe fn u8x32_from_offset(slice: &[u8], offset: usize) -> u8x32 { + u8x32::from_slice_unaligned_unchecked(slice.get_unchecked(offset..)) +} + +fn sum_x64(u8s: &u8x64) -> usize { + let mut store = [0; mem::size_of::()]; + u8s.write_to_slice_unaligned(&mut store); + store.iter().map(|&e| e as usize).sum() +} +fn sum_x32(u8s: &u8x32) -> usize { + let mut store = [0; mem::size_of::()]; + u8s.write_to_slice_unaligned(&mut store); + store.iter().map(|&e| e as usize).sum() +} + +pub fn chunk_count(haystack: &[u8], needle: u8) -> usize { + assert!(haystack.len() >= 32); + + unsafe { + let mut offset = 0; + let mut count = 0; + + let needles_x64 = u8x64::splat(needle); + + // 16320 + while haystack.len() >= offset + 64 * 255 { + let mut counts = u8x64::splat(0); + for _ in 0..255 { + counts -= u8x64::from_cast(u8x64_from_offset(haystack, offset).eq(needles_x64)); + offset += 64; + } + count += sum_x64(&counts); + } + + // 8192 + if haystack.len() >= offset + 64 * 128 { + let mut counts = u8x64::splat(0); + for _ in 0..128 { + counts -= u8x64::from_cast(u8x64_from_offset(haystack, offset).eq(needles_x64)); + offset += 64; + } + count += sum_x64(&counts); + } + + let needles_x32 = u8x32::splat(needle); + + // 32 + let mut counts = u8x32::splat(0); + for i in 0..(haystack.len() - offset) / 32 { + counts -= u8x32::from_cast(u8x32_from_offset(haystack, offset + i * 32).eq(needles_x32)); + } + count += sum_x32(&counts); + + // Straggler; need to reset counts because prior loop can run 255 times + counts = u8x32::splat(0); + if haystack.len() % 32 != 0 { + counts -= u8x32::from_cast(u8x32_from_offset(haystack, haystack.len() - 32).eq(needles_x32)) & + u8x32_from_offset(&MASK, haystack.len() % 32); + } + count += sum_x32(&counts); + + count + } +} + +fn is_leading_utf8_byte_x64(u8s: u8x64) -> u8x64 { + u8x64::from_cast((u8s & u8x64::splat(0b1100_0000)).ne(u8x64::splat(0b1000_0000))) +} + +fn is_leading_utf8_byte_x32(u8s: u8x32) -> u8x32 { + u8x32::from_cast((u8s & u8x32::splat(0b1100_0000)).ne(u8x32::splat(0b1000_0000))) +} + +pub fn chunk_num_chars(utf8_chars: &[u8]) -> usize { + assert!(utf8_chars.len() >= 32); + + unsafe { + let mut offset = 0; + let mut count = 0; + + // 16320 + while utf8_chars.len() >= offset + 64 * 255 { + let mut counts = u8x64::splat(0); + for _ in 0..255 { + counts -= is_leading_utf8_byte_x64(u8x64_from_offset(utf8_chars, offset)); + offset += 64; + } + count += sum_x64(&counts); + } + + // 8192 + if utf8_chars.len() >= offset + 64 * 128 { + let mut counts = u8x64::splat(0); + for _ in 0..128 { + counts -= is_leading_utf8_byte_x64(u8x64_from_offset(utf8_chars, offset)); + offset += 64; + } + count += sum_x64(&counts); + } + + // 32 + let mut counts = u8x32::splat(0); + for i in 0..(utf8_chars.len() - offset) / 32 { + counts -= is_leading_utf8_byte_x32(u8x32_from_offset(utf8_chars, offset + i * 32)); + } + count += sum_x32(&counts); + + // Straggler; need to reset counts because prior loop can run 255 times + counts = u8x32::splat(0); + if utf8_chars.len() % 32 != 0 { + counts -= is_leading_utf8_byte_x32(u8x32_from_offset(utf8_chars, utf8_chars.len() - 32)) & + u8x32_from_offset(&MASK, utf8_chars.len() % 32); + } + count += sum_x32(&counts); + + count + } +} diff --git a/vendor/bytecount/src/simd/mod.rs b/vendor/bytecount/src/simd/mod.rs new file mode 100644 index 000000000..d144e1847 --- /dev/null +++ b/vendor/bytecount/src/simd/mod.rs @@ -0,0 +1,17 @@ +#[cfg(feature = "generic-simd")] +pub mod generic; + +// This is like generic, but written explicitly +// because generic SIMD requires nightly. +#[cfg(all( + feature = "runtime-dispatch-simd", + any(target_arch = "x86", target_arch = "x86_64"), + not(feature = "generic-simd") +))] +pub mod x86_sse2; + +// Modern x86 machines can do lots of fun stuff; +// this is where the *real* optimizations go. +// Runtime feature detection is not available with no_std. +#[cfg(all(feature = "runtime-dispatch-simd", target_arch = "x86_64"))] +pub mod x86_avx2; diff --git a/vendor/bytecount/src/simd/x86_avx2.rs b/vendor/bytecount/src/simd/x86_avx2.rs new file mode 100644 index 000000000..90a55c0fb --- /dev/null +++ b/vendor/bytecount/src/simd/x86_avx2.rs @@ -0,0 +1,161 @@ +use std::arch::x86_64::{ + __m256i, + _mm256_and_si256, + _mm256_cmpeq_epi8, + _mm256_extract_epi64, + _mm256_loadu_si256, + _mm256_sad_epu8, + _mm256_set1_epi8, + _mm256_setzero_si256, + _mm256_sub_epi8, + _mm256_xor_si256, +}; + +#[target_feature(enable = "avx2")] +pub unsafe fn _mm256_set1_epu8(a: u8) -> __m256i { + _mm256_set1_epi8(a as i8) +} + +#[target_feature(enable = "avx2")] +pub unsafe fn mm256_cmpneq_epi8(a: __m256i, b: __m256i) -> __m256i { + _mm256_xor_si256(_mm256_cmpeq_epi8(a, b), _mm256_set1_epi8(-1)) +} + +const MASK: [u8; 64] = [ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, +]; + +#[target_feature(enable = "avx2")] +unsafe fn mm256_from_offset(slice: &[u8], offset: usize) -> __m256i { + _mm256_loadu_si256(slice.as_ptr().add(offset) as *const _) +} + +#[target_feature(enable = "avx2")] +unsafe fn sum(u8s: &__m256i) -> usize { + let sums = _mm256_sad_epu8(*u8s, _mm256_setzero_si256()); + ( + _mm256_extract_epi64(sums, 0) + _mm256_extract_epi64(sums, 1) + + _mm256_extract_epi64(sums, 2) + _mm256_extract_epi64(sums, 3) + ) as usize +} + +#[target_feature(enable = "avx2")] +pub unsafe fn chunk_count(haystack: &[u8], needle: u8) -> usize { + assert!(haystack.len() >= 32); + + let mut offset = 0; + let mut count = 0; + + let needles = _mm256_set1_epu8(needle); + + // 8160 + while haystack.len() >= offset + 32 * 255 { + let mut counts = _mm256_setzero_si256(); + for _ in 0..255 { + counts = _mm256_sub_epi8( + counts, + _mm256_cmpeq_epi8(mm256_from_offset(haystack, offset), needles) + ); + offset += 32; + } + count += sum(&counts); + } + + // 4096 + if haystack.len() >= offset + 32 * 128 { + let mut counts = _mm256_setzero_si256(); + for _ in 0..128 { + counts = _mm256_sub_epi8( + counts, + _mm256_cmpeq_epi8(mm256_from_offset(haystack, offset), needles) + ); + offset += 32; + } + count += sum(&counts); + } + + // 32 + let mut counts = _mm256_setzero_si256(); + for i in 0..(haystack.len() - offset) / 32 { + counts = _mm256_sub_epi8( + counts, + _mm256_cmpeq_epi8(mm256_from_offset(haystack, offset + i * 32), needles) + ); + } + if haystack.len() % 32 != 0 { + counts = _mm256_sub_epi8( + counts, + _mm256_and_si256( + _mm256_cmpeq_epi8(mm256_from_offset(haystack, haystack.len() - 32), needles), + mm256_from_offset(&MASK, haystack.len() % 32) + ) + ); + } + count += sum(&counts); + + count +} + +#[target_feature(enable = "avx2")] +unsafe fn is_leading_utf8_byte(u8s: __m256i) -> __m256i { + mm256_cmpneq_epi8(_mm256_and_si256(u8s, _mm256_set1_epu8(0b1100_0000)), _mm256_set1_epu8(0b1000_0000)) +} + +#[target_feature(enable = "avx2")] +pub unsafe fn chunk_num_chars(utf8_chars: &[u8]) -> usize { + assert!(utf8_chars.len() >= 32); + + let mut offset = 0; + let mut count = 0; + + // 8160 + while utf8_chars.len() >= offset + 32 * 255 { + let mut counts = _mm256_setzero_si256(); + + for _ in 0..255 { + counts = _mm256_sub_epi8( + counts, + is_leading_utf8_byte(mm256_from_offset(utf8_chars, offset)) + ); + offset += 32; + } + count += sum(&counts); + } + + // 4096 + if utf8_chars.len() >= offset + 32 * 128 { + let mut counts = _mm256_setzero_si256(); + for _ in 0..128 { + counts = _mm256_sub_epi8( + counts, + is_leading_utf8_byte(mm256_from_offset(utf8_chars, offset)) + ); + offset += 32; + } + count += sum(&counts); + } + + // 32 + let mut counts = _mm256_setzero_si256(); + for i in 0..(utf8_chars.len() - offset) / 32 { + counts = _mm256_sub_epi8( + counts, + is_leading_utf8_byte(mm256_from_offset(utf8_chars, offset + i * 32)) + ); + } + if utf8_chars.len() % 32 != 0 { + counts = _mm256_sub_epi8( + counts, + _mm256_and_si256( + is_leading_utf8_byte(mm256_from_offset(utf8_chars, utf8_chars.len() - 32)), + mm256_from_offset(&MASK, utf8_chars.len() % 32) + ) + ); + } + count += sum(&counts); + + count +} diff --git a/vendor/bytecount/src/simd/x86_sse2.rs b/vendor/bytecount/src/simd/x86_sse2.rs new file mode 100644 index 000000000..63d295eae --- /dev/null +++ b/vendor/bytecount/src/simd/x86_sse2.rs @@ -0,0 +1,171 @@ +#[cfg(target_arch = "x86")] +use std::arch::x86::{ + __m128i, + _mm_and_si128, + _mm_cmpeq_epi8, + _mm_extract_epi32, + _mm_loadu_si128, + _mm_sad_epu8, + _mm_set1_epi8, + _mm_setzero_si128, + _mm_sub_epi8, + _mm_xor_si128, +}; + +#[cfg(target_arch = "x86_64")] +use std::arch::x86_64::{ + __m128i, + _mm_and_si128, + _mm_cmpeq_epi8, + _mm_extract_epi32, + _mm_loadu_si128, + _mm_sad_epu8, + _mm_set1_epi8, + _mm_setzero_si128, + _mm_sub_epi8, + _mm_xor_si128, +}; + +#[target_feature(enable = "sse2")] +pub unsafe fn _mm_set1_epu8(a: u8) -> __m128i { + _mm_set1_epi8(a as i8) +} + +#[target_feature(enable = "sse2")] +pub unsafe fn mm_cmpneq_epi8(a: __m128i, b: __m128i) -> __m128i { + _mm_xor_si128(_mm_cmpeq_epi8(a, b), _mm_set1_epi8(-1)) +} + +const MASK: [u8; 32] = [ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, +]; + +#[target_feature(enable = "sse2")] +unsafe fn mm_from_offset(slice: &[u8], offset: usize) -> __m128i { + _mm_loadu_si128(slice.as_ptr().offset(offset as isize) as *const _) +} + +#[target_feature(enable = "sse2")] +unsafe fn sum(u8s: &__m128i) -> usize { + let sums = _mm_sad_epu8(*u8s, _mm_setzero_si128()); + (_mm_extract_epi32(sums, 0) + _mm_extract_epi32(sums, 2)) as usize +} + +#[target_feature(enable = "sse2")] +pub unsafe fn chunk_count(haystack: &[u8], needle: u8) -> usize { + assert!(haystack.len() >= 16); + + let mut offset = 0; + let mut count = 0; + + let needles = _mm_set1_epu8(needle); + + // 4080 + while haystack.len() >= offset + 16 * 255 { + let mut counts = _mm_setzero_si128(); + for _ in 0..255 { + counts = _mm_sub_epi8( + counts, + _mm_cmpeq_epi8(mm_from_offset(haystack, offset), needles) + ); + offset += 16; + } + count += sum(&counts); + } + + // 2048 + if haystack.len() >= offset + 16 * 128 { + let mut counts = _mm_setzero_si128(); + for _ in 0..128 { + counts = _mm_sub_epi8( + counts, + _mm_cmpeq_epi8(mm_from_offset(haystack, offset), needles) + ); + offset += 16; + } + count += sum(&counts); + } + + // 16 + let mut counts = _mm_setzero_si128(); + for i in 0..(haystack.len() - offset) / 16 { + counts = _mm_sub_epi8( + counts, + _mm_cmpeq_epi8(mm_from_offset(haystack, offset + i * 16), needles) + ); + } + if haystack.len() % 16 != 0 { + counts = _mm_sub_epi8( + counts, + _mm_and_si128( + _mm_cmpeq_epi8(mm_from_offset(haystack, haystack.len() - 16), needles), + mm_from_offset(&MASK, haystack.len() % 16) + ) + ); + } + count += sum(&counts); + + count +} + +#[target_feature(enable = "sse2")] +unsafe fn is_leading_utf8_byte(u8s: __m128i) -> __m128i { + mm_cmpneq_epi8(_mm_and_si128(u8s, _mm_set1_epu8(0b1100_0000)), _mm_set1_epu8(0b1000_0000)) +} + +#[target_feature(enable = "sse2")] +pub unsafe fn chunk_num_chars(utf8_chars: &[u8]) -> usize { + assert!(utf8_chars.len() >= 16); + + let mut offset = 0; + let mut count = 0; + + // 4080 + while utf8_chars.len() >= offset + 16 * 255 { + let mut counts = _mm_setzero_si128(); + + for _ in 0..255 { + counts = _mm_sub_epi8( + counts, + is_leading_utf8_byte(mm_from_offset(utf8_chars, offset)) + ); + offset += 16; + } + count += sum(&counts); + } + + // 2048 + if utf8_chars.len() >= offset + 16 * 128 { + let mut counts = _mm_setzero_si128(); + for _ in 0..128 { + counts = _mm_sub_epi8( + counts, + is_leading_utf8_byte(mm_from_offset(utf8_chars, offset)) + ); + offset += 16; + } + count += sum(&counts); + } + + // 16 + let mut counts = _mm_setzero_si128(); + for i in 0..(utf8_chars.len() - offset) / 16 { + counts = _mm_sub_epi8( + counts, + is_leading_utf8_byte(mm_from_offset(utf8_chars, offset + i * 16)) + ); + } + if utf8_chars.len() % 16 != 0 { + counts = _mm_sub_epi8( + counts, + _mm_and_si128( + is_leading_utf8_byte(mm_from_offset(utf8_chars, utf8_chars.len() - 16)), + mm_from_offset(&MASK, utf8_chars.len() % 16) + ) + ); + } + count += sum(&counts); + + count +} diff --git a/vendor/bytecount/tests/check.rs b/vendor/bytecount/tests/check.rs new file mode 100644 index 000000000..147b466fc --- /dev/null +++ b/vendor/bytecount/tests/check.rs @@ -0,0 +1,95 @@ +extern crate bytecount; +#[macro_use] +extern crate quickcheck; +extern crate rand; + +use bytecount::{ + count, naive_count, + num_chars, naive_num_chars, +}; +use rand::RngCore; + +fn random_bytes(len: usize) -> Vec { + let mut result = vec![0; len]; + rand::thread_rng().fill_bytes(&mut result); + result +} + +quickcheck! { + fn check_count_correct(x: (Vec, u8)) -> bool { + let (haystack, needle) = x; + count(&haystack, needle) == naive_count(&haystack, needle) + } +} + +#[test] +fn check_count_large() { + let haystack = vec![0u8; if cfg!(miri) { 2_000 } else { 10_000_000 }]; + assert_eq!(naive_count(&haystack, 0), count(&haystack, 0)); + assert_eq!(naive_count(&haystack, 1), count(&haystack, 1)); +} + +#[test] +fn check_count_large_rand() { + let haystack = random_bytes(if cfg!(miri) { 200 } else { 100_000 }); + for i in 0..=255 { + assert_eq!(naive_count(&haystack, i), count(&haystack, i)); + } +} + +#[test] +fn check_count_some() { + let haystack = vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 68]; + let needle = 68; + assert_eq!(count(&haystack, needle), naive_count(&haystack, needle)); +} + +#[test] +fn check_count_overflow() { + let haystack = vec![0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; + let needle = 2; + assert_eq!(count(&haystack, needle), naive_count(&haystack, needle)); +} + +#[test] +fn check_count_overflow_many() { + let string = [b'x'; 20000]; + for i in 0..20000 { + assert_eq!(count(&string[..i], b'x'), i); + } +} + + + +quickcheck! { + fn check_num_chars_correct(haystack: Vec) -> bool { + num_chars(&haystack) == naive_num_chars(&haystack) + } +} + +#[test] +fn check_num_chars_large() { + let haystack = vec![0u8; if cfg!(miri) { 2_000 } else { 10_000_000 }]; + assert_eq!(naive_num_chars(&haystack), num_chars(&haystack)); + assert_eq!(naive_num_chars(&haystack), num_chars(&haystack)); +} + +#[test] +fn check_num_chars_some() { + let haystack = vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 68]; + assert_eq!(num_chars(&haystack), naive_num_chars(&haystack)); +} + +#[test] +fn check_num_chars_overflow() { + let haystack = vec![0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; + assert_eq!(num_chars(&haystack), naive_num_chars(&haystack)); +} + +#[test] +fn check_num_chars_overflow_many() { + let string = [b'x'; 20000]; + for i in 0..20000 { + assert_eq!(num_chars(&string[..i]), i); + } +} -- cgit v1.2.3