summaryrefslogtreecommitdiffstats
path: root/vendor/bytecount
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/bytecount')
-rw-r--r--vendor/bytecount/.cargo-checksum.json1
-rw-r--r--vendor/bytecount/Cargo.toml53
-rw-r--r--vendor/bytecount/LICENSE.Apache2201
-rw-r--r--vendor/bytecount/LICENSE.MIT19
-rw-r--r--vendor/bytecount/README.md75
-rw-r--r--vendor/bytecount/benches/bench.rs83
-rwxr-xr-xvendor/bytecount/ci/miri.sh15
-rw-r--r--vendor/bytecount/src/integer_simd.rs111
-rw-r--r--vendor/bytecount/src/lib.rs138
-rw-r--r--vendor/bytecount/src/naive.rs42
-rw-r--r--vendor/bytecount/src/simd/generic.rs137
-rw-r--r--vendor/bytecount/src/simd/mod.rs17
-rw-r--r--vendor/bytecount/src/simd/x86_avx2.rs161
-rw-r--r--vendor/bytecount/src/simd/x86_sse2.rs171
-rw-r--r--vendor/bytecount/tests/check.rs95
15 files changed, 1319 insertions, 0 deletions
diff --git a/vendor/bytecount/.cargo-checksum.json b/vendor/bytecount/.cargo-checksum.json
new file mode 100644
index 000000000..700fc57f2
--- /dev/null
+++ b/vendor/bytecount/.cargo-checksum.json
@@ -0,0 +1 @@
+{"files":{"Cargo.toml":"90f053e609e5ba2abe4f05ecca43423ed11efebf55d6070fd9983252a81c24a8","LICENSE.Apache2":"b40930bbcf80744c86c46a12bc9da056641d722716c378f5659b9e555ef833e1","LICENSE.MIT":"a5dea80c1f383cb5f80a6bb0da5e55a2beb9f24adb123ce6300af2cbaaa3bf65","README.md":"84e452fd64997865cf8f55790717703efb8efc7c43c82252a23545d50e73fa28","benches/bench.rs":"085734cc7b3c3f6da30eeab8bf98d95fc2a9c0886c63f93edd98df356674bf8e","ci/miri.sh":"fb8d52144352c6b09ac925fb089177d30f46ab8b17dd378d68e414c6b6b5b709","src/integer_simd.rs":"5cd97182cd5aea5c5eab1e1cb5d9d409f263de9083c8e69cd7f9d1dd379dfb52","src/lib.rs":"000a6591306852b9ec892995cd6ab4247b8a727f4befa9dd6a9851c20a5f6552","src/naive.rs":"068dae3fba7d721227bb7a9ed9bdfe4a12cfb373a9de4852d6a517630fabf0c8","src/simd/generic.rs":"2f01963147fa97b5dbd7a166975e6f6660d3197f4b6d0a78ed138da9f78a269e","src/simd/mod.rs":"8ce4aac79520bd0d49f6e48471bf32a2d6d3cd8a3a1c65e393f4f4fae6c7f502","src/simd/x86_avx2.rs":"0345d5fbb74d907e3e6cd0d361427ce75645e7abf4c6efa17ef20ee8a4d8fc18","src/simd/x86_sse2.rs":"e7caab115d77118e6a7bca8daea4a9b6d2d318bd3e76ddc23dc4f023aced1f77","tests/check.rs":"e5075559155a1aae6326932e68410996ca52e4140db4b26eb8b9f03a07241b87"},"package":"72feb31ffc86498dacdbd0fcebb56138e7177a8cc5cea4516031d15ae85a742e"} \ No newline at end of file
diff --git a/vendor/bytecount/Cargo.toml b/vendor/bytecount/Cargo.toml
new file mode 100644
index 000000000..e3571c5c1
--- /dev/null
+++ b/vendor/bytecount/Cargo.toml
@@ -0,0 +1,53 @@
+# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
+#
+# When uploading crates to the registry Cargo will automatically
+# "normalize" Cargo.toml files for maximal compatibility
+# with all versions of Cargo and also rewrite `path` dependencies
+# to registry (e.g., crates.io) dependencies
+#
+# If you believe there's an error in this file please file an
+# issue against the rust-lang/cargo repository. If you're
+# editing this file be aware that the upstream Cargo.toml
+# will likely look very different (and much more reasonable)
+
+[package]
+edition = "2018"
+name = "bytecount"
+version = "0.6.2"
+authors = ["Andre Bogus <bogusandre@gmail.de>", "Joshua Landau <joshua@landau.ws>"]
+exclude = ["/.travis.yml", "/appveyor.yml"]
+description = "count occurrences of a given byte, or the number of UTF-8 code points, in a byte slice, fast"
+readme = "README.md"
+categories = ["algorithms", "no-std"]
+license = "Apache-2.0/MIT"
+repository = "https://github.com/llogiq/bytecount"
+
+[lib]
+bench = false
+
+[[bench]]
+name = "bench"
+harness = false
+[dependencies.packed_simd]
+version = "0.3.4"
+optional = true
+package = "packed_simd_2"
+[dev-dependencies.criterion]
+version = "0.3"
+default-features = false
+
+[dev-dependencies.quickcheck]
+version = "0.9"
+
+[dev-dependencies.rand]
+version = "0.7"
+
+[features]
+generic-simd = ["packed_simd"]
+html_report = []
+runtime-dispatch-simd = []
+[badges.appveyor]
+repository = "llogiq/bytecount"
+
+[badges.travis-ci]
+repository = "llogiq/bytecount"
diff --git a/vendor/bytecount/LICENSE.Apache2 b/vendor/bytecount/LICENSE.Apache2
new file mode 100644
index 000000000..8dada3eda
--- /dev/null
+++ b/vendor/bytecount/LICENSE.Apache2
@@ -0,0 +1,201 @@
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "{}"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright {yyyy} {name of copyright owner}
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/vendor/bytecount/LICENSE.MIT b/vendor/bytecount/LICENSE.MIT
new file mode 100644
index 000000000..e0f734e45
--- /dev/null
+++ b/vendor/bytecount/LICENSE.MIT
@@ -0,0 +1,19 @@
+Copyright (c) 2017 The bytecount Developers
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/vendor/bytecount/README.md b/vendor/bytecount/README.md
new file mode 100644
index 000000000..9847c2a89
--- /dev/null
+++ b/vendor/bytecount/README.md
@@ -0,0 +1,75 @@
+# bytecount
+
+Counting bytes really fast
+
+[![Build Status](https://travis-ci.org/llogiq/bytecount.svg?branch=master)](https://travis-ci.org/llogiq/bytecount)
+[![Windows build status](https://ci.appveyor.com/api/projects/status/github/llogiq/bytecount?svg=true)](https://ci.appveyor.com/project/llogiq/bytecount)
+[![Current Version](http://meritbadge.herokuapp.com/bytecount)](https://crates.io/crates/bytecount)
+[![License: Apache 2.0/MIT](https://img.shields.io/crates/l/bytecount.svg)](#license)
+
+This uses the "hyperscreamingcount" algorithm by Joshua Landau to count bytes faster than anything else.
+The [newlinebench](https://github.com/llogiq/newlinebench) repository has further benchmarks for old versions of this repository.
+
+To use bytecount in your crate, if you have [cargo-edit](https://github.com/killercup/cargo-edit), just type
+`cargo add bytecount` in a terminal with the crate root as the current path. Otherwise you can manually edit your
+`Cargo.toml` to add `bytecount = 0.5.1` to your `[dependencies]` section.
+
+In your crate root (`lib.rs` or `main.rs`, depending on if you are writing a
+library or application), add `extern crate bytecount;`. Now you can simply use
+`bytecount::count` as follows:
+
+```Rust
+extern crate bytecount;
+
+fn main() {
+ let mytext = "some potentially large text, perhaps read from disk?";
+ let spaces = bytecount::count(mytext.as_bytes(), b' ');
+ ..
+}
+```
+
+bytecount supports two features to make use of modern CPU's features to speed up counting considerably. To allow your
+users to use them, add the following to your `Cargo.toml`:
+
+```
+[features]
+runtime-dispatch-simd = ["bytecount/runtime-dispatch-simd"]
+generic-simd = ["bytecount/generic-simd"]
+```
+
+The first, `runtime-dispatch-simd`, enables detection of SIMD capabilities at runtime, which allows using the SSE2 and
+AVX2 codepaths, but cannot be used with `no_std`.
+
+Your users can then compile with runtime dispatch using:
+
+```
+cargo build --release --features runtime-dispatch-simd
+```
+
+The second, `generic-simd`, uses `packed_simd` to provide a fast
+architecture-agnostic SIMD codepath, but requires running on nightly.
+
+Your users can compile with this codepath using:
+
+```
+cargo build --release --features generic-simd
+```
+
+Building for a more specific architecture will also improve performance.
+You can do this with
+
+```
+RUSTFLAGS="-C target-cpu=native" cargo build --release
+```
+
+The scalar algorithm is explained in depth [here](https://llogiq.github.io/2016/09/27/count.html).
+
+**Note: Versions until 0.4.0 worked with Rust as of 1.20.0. Version 0.5.0 until 0.6.0 requires Rust 1.26 or later,
+and at least 1.27.2 to use SIMD. Versions from 0.6.0 require Rust 1.32.0 or later.**
+
+## License
+
+Licensed under either of at your discretion:
+
+- [Apache 2.0](LICENSE.Apache2)
+- [MIT](LICENSE.MIT)
diff --git a/vendor/bytecount/benches/bench.rs b/vendor/bytecount/benches/bench.rs
new file mode 100644
index 000000000..85d04dbb4
--- /dev/null
+++ b/vendor/bytecount/benches/bench.rs
@@ -0,0 +1,83 @@
+#[macro_use]
+extern crate criterion;
+extern crate rand;
+extern crate bytecount;
+
+use std::env;
+use std::time::Duration;
+use rand::RngCore;
+use criterion::{Bencher, Criterion, ParameterizedBenchmark};
+
+use bytecount::{
+ count, naive_count, naive_count_32,
+ num_chars, naive_num_chars,
+};
+
+fn random_bytes(len: usize) -> Vec<u8> {
+ let mut result = vec![0; len];
+ rand::thread_rng().fill_bytes(&mut result);
+ result
+}
+
+static COUNTS : &[usize] = &[0, 10, 20, 30, 40, 50, 60, 70, 80, 90,
+ 100, 120, 140, 170, 210, 250, 300, 400, 500, 600, 700, 800, 900,
+ 1000, 1_000, 1_200, 1_400, 1_700, 2_100, 2_500, 3_000, 4_000,
+ 5_000, 6_000, 7_000, 8_000, 9_000, 10_000, 12_000, 14_000, 17_000,
+ 21_000, 25_000, 30_000, 100_000, 1_000_000];
+
+fn get_counts() -> Vec<usize> {
+ env::var("COUNTS").map(
+ |s| s.split(',').map(
+ |n| str::parse::<usize>(n).unwrap()).collect())
+ .unwrap_or(COUNTS.to_owned())
+}
+
+fn get_config() -> Criterion {
+ if env::var("CI").is_ok() {
+ Criterion::default().nresamples(5_000)
+ .without_plots()
+ .measurement_time(Duration::new(2, 0))
+ .warm_up_time(Duration::new(1, 0))
+ } else {
+ Criterion::default()
+ }
+}
+
+fn bench_counts(criterion: &mut Criterion) {
+ fn naive(b: &mut Bencher, s: &usize) {
+ let haystack = random_bytes(*s);
+ b.iter(|| naive_count(&haystack, 10))
+ }
+ fn naive_32(b: &mut Bencher, s: &usize) {
+ let haystack = random_bytes(*s);
+ b.iter(|| naive_count_32(&haystack, 10))
+ }
+ fn hyper(b: &mut Bencher, s: &usize) {
+ let haystack = random_bytes(*s);
+ b.iter(|| count(&haystack, 10))
+ }
+ let counts = get_counts();
+ criterion.bench("counts",
+ ParameterizedBenchmark::new("naive", naive, counts)
+ .with_function("naive_32", naive_32)
+ .with_function("hyper", hyper));
+}
+
+fn bench_num_chars(criterion: &mut Criterion) {
+ fn naive(b: &mut Bencher, s: &usize) {
+ let haystack = random_bytes(*s);
+ b.iter(|| naive_num_chars(&haystack))
+ }
+ fn hyper(b: &mut Bencher, s: &usize) {
+ let haystack = random_bytes(*s);
+ b.iter(|| num_chars(&haystack))
+ }
+ let counts = get_counts();
+ criterion.bench("num_chars",
+ ParameterizedBenchmark::new("naive", naive, counts)
+ .with_function("hyper", hyper));
+}
+
+criterion_group!(name = count_bench; config = get_config(); targets = bench_counts);
+criterion_group!(name = num_chars_bench; config = get_config(); targets = bench_num_chars);
+criterion_main!(count_bench, num_chars_bench);
diff --git a/vendor/bytecount/ci/miri.sh b/vendor/bytecount/ci/miri.sh
new file mode 100755
index 000000000..8704ebc1d
--- /dev/null
+++ b/vendor/bytecount/ci/miri.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+set -ex
+
+# Setup
+MIRI_NIGHTLY=nightly-$(curl -s https://rust-lang.github.io/rustup-components-history/x86_64-unknown-linux-gnu/miri)
+echo "Installing latest nightly with Miri: $MIRI_NIGHTLY"
+rustup default "$MIRI_NIGHTLY"
+rustup component add miri
+
+# Run tests
+cargo miri test
+cargo miri test --target=mips64-unknown-linux-gnuabi64 # big-endian architecture
+
+# Restore old state in case Travis uses this cache for other jobs.
+rustup default nightly
diff --git a/vendor/bytecount/src/integer_simd.rs b/vendor/bytecount/src/integer_simd.rs
new file mode 100644
index 000000000..48f2ee8d9
--- /dev/null
+++ b/vendor/bytecount/src/integer_simd.rs
@@ -0,0 +1,111 @@
+#[cfg(not(feature = "runtime-dispatch-simd"))]
+use core::{mem, ptr, usize};
+#[cfg(feature = "runtime-dispatch-simd")]
+use std::{mem, ptr, usize};
+
+fn splat(byte: u8) -> usize {
+ let lo = usize::MAX / 0xFF;
+ lo * byte as usize
+}
+
+unsafe fn usize_load_unchecked(bytes: &[u8], offset: usize) -> usize {
+ let mut output = 0;
+ ptr::copy_nonoverlapping(
+ bytes.as_ptr().add(offset),
+ &mut output as *mut usize as *mut u8,
+ mem::size_of::<usize>()
+ );
+ output
+}
+
+fn bytewise_equal(lhs: usize, rhs: usize) -> usize {
+ let lo = usize::MAX / 0xFF;
+ let hi = lo << 7;
+
+ let x = lhs ^ rhs;
+ !((((x & !hi) + !hi) | x) >> 7) & lo
+}
+
+fn sum_usize(values: usize) -> usize {
+ let every_other_byte_lo = usize::MAX / 0xFFFF;
+ let every_other_byte = every_other_byte_lo * 0xFF;
+
+ // Pairwise reduction to avoid overflow on next step.
+ let pair_sum: usize = (values & every_other_byte) + ((values >> 8) & every_other_byte);
+
+ // Multiplication results in top two bytes holding sum.
+ pair_sum.wrapping_mul(every_other_byte_lo) >> ((mem::size_of::<usize>() - 2) * 8)
+}
+
+fn is_leading_utf8_byte(values: usize) -> usize {
+ // a leading UTF-8 byte is one which does not start with the bits 10.
+ ((!values >> 7) | (values >> 6)) & splat(1)
+}
+
+pub fn chunk_count(haystack: &[u8], needle: u8) -> usize {
+ let chunksize = mem::size_of::<usize>();
+ assert!(haystack.len() >= chunksize);
+
+ unsafe {
+ let mut offset = 0;
+ let mut count = 0;
+
+ let needles = splat(needle);
+
+ // 2040
+ while haystack.len() >= offset + chunksize * 255 {
+ let mut counts = 0;
+ for _ in 0..255 {
+ counts += bytewise_equal(usize_load_unchecked(haystack, offset), needles);
+ offset += chunksize;
+ }
+ count += sum_usize(counts);
+ }
+
+ // 8
+ let mut counts = 0;
+ for i in 0..(haystack.len() - offset) / chunksize {
+ counts += bytewise_equal(usize_load_unchecked(haystack, offset + i * chunksize), needles);
+ }
+ if haystack.len() % 8 != 0 {
+ let mask = usize::from_le(!(!0 >> ((haystack.len() % chunksize) * 8)));
+ counts += bytewise_equal(usize_load_unchecked(haystack, haystack.len() - chunksize), needles) & mask;
+ }
+ count += sum_usize(counts);
+
+ count
+ }
+}
+
+pub fn chunk_num_chars(utf8_chars: &[u8]) -> usize {
+ let chunksize = mem::size_of::<usize>();
+ assert!(utf8_chars.len() >= chunksize);
+
+ unsafe {
+ let mut offset = 0;
+ let mut count = 0;
+
+ // 2040
+ while utf8_chars.len() >= offset + chunksize * 255 {
+ let mut counts = 0;
+ for _ in 0..255 {
+ counts += is_leading_utf8_byte(usize_load_unchecked(utf8_chars, offset));
+ offset += chunksize;
+ }
+ count += sum_usize(counts);
+ }
+
+ // 8
+ let mut counts = 0;
+ for i in 0..(utf8_chars.len() - offset) / chunksize {
+ counts += is_leading_utf8_byte(usize_load_unchecked(utf8_chars, offset + i * chunksize));
+ }
+ if utf8_chars.len() % 8 != 0 {
+ let mask = usize::from_le(!(!0 >> ((utf8_chars.len() % chunksize) * 8)));
+ counts += is_leading_utf8_byte(usize_load_unchecked(utf8_chars, utf8_chars.len() - chunksize)) & mask;
+ }
+ count += sum_usize(counts);
+
+ count
+ }
+}
diff --git a/vendor/bytecount/src/lib.rs b/vendor/bytecount/src/lib.rs
new file mode 100644
index 000000000..ef4235c26
--- /dev/null
+++ b/vendor/bytecount/src/lib.rs
@@ -0,0 +1,138 @@
+//! count occurrences of a given byte, or the number of UTF-8 code points, in a
+//! byte slice, fast.
+//!
+//! This crate has the [`count`](fn.count.html) method to count byte
+//! occurrences (for example newlines) in a larger `&[u8]` slice.
+//!
+//! For example:
+//!
+//! ```rust
+//! assert_eq!(5, bytecount::count(b"Hello, this is the bytecount crate!", b' '));
+//! ```
+//!
+//! Also there is a [`num_chars`](fn.num_chars.html) method to count
+//! the number of UTF8 characters in a slice. It will work the same as
+//! `str::chars().count()` for byte slices of correct UTF-8 character
+//! sequences. The result will likely be off for invalid sequences,
+//! although the result is guaranteed to be between `0` and
+//! `[_]::len()`, inclusive.
+//!
+//! Example:
+//!
+//! ```rust
+//! let sequence = "Wenn ich ein Vöglein wär, flög ich zu Dir!";
+//! assert_eq!(sequence.chars().count(),
+//! bytecount::num_chars(sequence.as_bytes()));
+//! ```
+//!
+//! For completeness and easy comparison, the "naive" versions of both
+//! count and num_chars are provided. Those are also faster if used on
+//! predominantly small strings. The
+//! [`naive_count_32`](fn.naive_count_32.html) method can be faster
+//! still on small strings.
+
+#![deny(missing_docs)]
+
+#![cfg_attr(not(feature = "runtime-dispatch-simd"), no_std)]
+
+#[cfg(not(feature = "runtime-dispatch-simd"))]
+use core::mem;
+#[cfg(feature = "runtime-dispatch-simd")]
+use std::mem;
+
+mod naive;
+pub use naive::*;
+mod integer_simd;
+
+#[cfg(any(
+ all(feature = "runtime-dispatch-simd", any(target_arch = "x86", target_arch = "x86_64")),
+ feature = "generic-simd"
+))]
+mod simd;
+
+/// Count occurrences of a byte in a slice of bytes, fast
+///
+/// # Examples
+///
+/// ```
+/// let s = b"This is a Text with spaces";
+/// let number_of_spaces = bytecount::count(s, b' ');
+/// assert_eq!(number_of_spaces, 5);
+/// ```
+pub fn count(haystack: &[u8], needle: u8) -> usize {
+ if haystack.len() >= 32 {
+ #[cfg(all(feature = "runtime-dispatch-simd", target_arch = "x86_64"))]
+ {
+ if is_x86_feature_detected!("avx2") {
+ unsafe { return simd::x86_avx2::chunk_count(haystack, needle); }
+ }
+ }
+
+ #[cfg(feature = "generic-simd")]
+ return simd::generic::chunk_count(haystack, needle);
+ }
+
+ if haystack.len() >= 16 {
+ #[cfg(all(
+ feature = "runtime-dispatch-simd",
+ any(target_arch = "x86", target_arch = "x86_64"),
+ not(feature = "generic-simd")
+ ))]
+ {
+ if is_x86_feature_detected!("sse2") {
+ unsafe { return simd::x86_sse2::chunk_count(haystack, needle); }
+ }
+ }
+ }
+
+ if haystack.len() >= mem::size_of::<usize>() {
+ return integer_simd::chunk_count(haystack, needle);
+ }
+
+ naive_count(haystack, needle)
+}
+
+/// Count the number of UTF-8 encoded Unicode codepoints in a slice of bytes, fast
+///
+/// This function is safe to use on any byte array, valid UTF-8 or not,
+/// but the output is only meaningful for well-formed UTF-8.
+///
+/// # Example
+///
+/// ```
+/// let swordfish = "メカジキ";
+/// let char_count = bytecount::num_chars(swordfish.as_bytes());
+/// assert_eq!(char_count, 4);
+/// ```
+pub fn num_chars(utf8_chars: &[u8]) -> usize {
+ if utf8_chars.len() >= 32 {
+ #[cfg(all(feature = "runtime-dispatch-simd", target_arch = "x86_64"))]
+ {
+ if is_x86_feature_detected!("avx2") {
+ unsafe { return simd::x86_avx2::chunk_num_chars(utf8_chars); }
+ }
+ }
+
+ #[cfg(feature = "generic-simd")]
+ return simd::generic::chunk_num_chars(utf8_chars);
+ }
+
+ if utf8_chars.len() >= 16 {
+ #[cfg(all(
+ feature = "runtime-dispatch-simd",
+ any(target_arch = "x86", target_arch = "x86_64"),
+ not(feature = "generic-simd")
+ ))]
+ {
+ if is_x86_feature_detected!("sse2") {
+ unsafe { return simd::x86_sse2::chunk_num_chars(utf8_chars); }
+ }
+ }
+ }
+
+ if utf8_chars.len() >= mem::size_of::<usize>() {
+ return integer_simd::chunk_num_chars(utf8_chars);
+ }
+
+ naive_num_chars(utf8_chars)
+}
diff --git a/vendor/bytecount/src/naive.rs b/vendor/bytecount/src/naive.rs
new file mode 100644
index 000000000..315c4b675
--- /dev/null
+++ b/vendor/bytecount/src/naive.rs
@@ -0,0 +1,42 @@
+/// Count up to `(2^32)-1` occurrences of a byte in a slice
+/// of bytes, simple
+///
+/// # Example
+///
+/// ```
+/// let s = b"This is yet another Text with spaces";
+/// let number_of_spaces = bytecount::naive_count_32(s, b' ');
+/// assert_eq!(number_of_spaces, 6);
+/// ```
+pub fn naive_count_32(haystack: &[u8], needle: u8) -> usize {
+ haystack.iter().fold(0, |n, c| n + (*c == needle) as u32) as usize
+}
+
+/// Count occurrences of a byte in a slice of bytes, simple
+///
+/// # Example
+///
+/// ```
+/// let s = b"This is yet another Text with spaces";
+/// let number_of_spaces = bytecount::naive_count(s, b' ');
+/// assert_eq!(number_of_spaces, 6);
+/// ```
+pub fn naive_count(utf8_chars: &[u8], needle: u8) -> usize {
+ utf8_chars.iter().fold(0, |n, c| n + (*c == needle) as usize)
+}
+
+/// Count the number of UTF-8 encoded Unicode codepoints in a slice of bytes, simple
+///
+/// This function is safe to use on any byte array, valid UTF-8 or not,
+/// but the output is only meaningful for well-formed UTF-8.
+///
+/// # Example
+///
+/// ```
+/// let swordfish = "メカジキ";
+/// let char_count = bytecount::naive_num_chars(swordfish.as_bytes());
+/// assert_eq!(char_count, 4);
+/// ```
+pub fn naive_num_chars(utf8_chars: &[u8]) -> usize {
+ utf8_chars.iter().filter(|&&byte| (byte >> 6) != 0b10).count()
+}
diff --git a/vendor/bytecount/src/simd/generic.rs b/vendor/bytecount/src/simd/generic.rs
new file mode 100644
index 000000000..2031e730e
--- /dev/null
+++ b/vendor/bytecount/src/simd/generic.rs
@@ -0,0 +1,137 @@
+extern crate packed_simd;
+
+#[cfg(not(feature = "runtime-dispatch-simd"))]
+use core::mem;
+#[cfg(feature = "runtime-dispatch-simd")]
+use std::mem;
+
+use self::packed_simd::{u8x32, u8x64, FromCast};
+
+const MASK: [u8; 64] = [
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+];
+
+unsafe fn u8x64_from_offset(slice: &[u8], offset: usize) -> u8x64 {
+ u8x64::from_slice_unaligned_unchecked(slice.get_unchecked(offset..))
+}
+unsafe fn u8x32_from_offset(slice: &[u8], offset: usize) -> u8x32 {
+ u8x32::from_slice_unaligned_unchecked(slice.get_unchecked(offset..))
+}
+
+fn sum_x64(u8s: &u8x64) -> usize {
+ let mut store = [0; mem::size_of::<u8x64>()];
+ u8s.write_to_slice_unaligned(&mut store);
+ store.iter().map(|&e| e as usize).sum()
+}
+fn sum_x32(u8s: &u8x32) -> usize {
+ let mut store = [0; mem::size_of::<u8x32>()];
+ u8s.write_to_slice_unaligned(&mut store);
+ store.iter().map(|&e| e as usize).sum()
+}
+
+pub fn chunk_count(haystack: &[u8], needle: u8) -> usize {
+ assert!(haystack.len() >= 32);
+
+ unsafe {
+ let mut offset = 0;
+ let mut count = 0;
+
+ let needles_x64 = u8x64::splat(needle);
+
+ // 16320
+ while haystack.len() >= offset + 64 * 255 {
+ let mut counts = u8x64::splat(0);
+ for _ in 0..255 {
+ counts -= u8x64::from_cast(u8x64_from_offset(haystack, offset).eq(needles_x64));
+ offset += 64;
+ }
+ count += sum_x64(&counts);
+ }
+
+ // 8192
+ if haystack.len() >= offset + 64 * 128 {
+ let mut counts = u8x64::splat(0);
+ for _ in 0..128 {
+ counts -= u8x64::from_cast(u8x64_from_offset(haystack, offset).eq(needles_x64));
+ offset += 64;
+ }
+ count += sum_x64(&counts);
+ }
+
+ let needles_x32 = u8x32::splat(needle);
+
+ // 32
+ let mut counts = u8x32::splat(0);
+ for i in 0..(haystack.len() - offset) / 32 {
+ counts -= u8x32::from_cast(u8x32_from_offset(haystack, offset + i * 32).eq(needles_x32));
+ }
+ count += sum_x32(&counts);
+
+ // Straggler; need to reset counts because prior loop can run 255 times
+ counts = u8x32::splat(0);
+ if haystack.len() % 32 != 0 {
+ counts -= u8x32::from_cast(u8x32_from_offset(haystack, haystack.len() - 32).eq(needles_x32)) &
+ u8x32_from_offset(&MASK, haystack.len() % 32);
+ }
+ count += sum_x32(&counts);
+
+ count
+ }
+}
+
+fn is_leading_utf8_byte_x64(u8s: u8x64) -> u8x64 {
+ u8x64::from_cast((u8s & u8x64::splat(0b1100_0000)).ne(u8x64::splat(0b1000_0000)))
+}
+
+fn is_leading_utf8_byte_x32(u8s: u8x32) -> u8x32 {
+ u8x32::from_cast((u8s & u8x32::splat(0b1100_0000)).ne(u8x32::splat(0b1000_0000)))
+}
+
+pub fn chunk_num_chars(utf8_chars: &[u8]) -> usize {
+ assert!(utf8_chars.len() >= 32);
+
+ unsafe {
+ let mut offset = 0;
+ let mut count = 0;
+
+ // 16320
+ while utf8_chars.len() >= offset + 64 * 255 {
+ let mut counts = u8x64::splat(0);
+ for _ in 0..255 {
+ counts -= is_leading_utf8_byte_x64(u8x64_from_offset(utf8_chars, offset));
+ offset += 64;
+ }
+ count += sum_x64(&counts);
+ }
+
+ // 8192
+ if utf8_chars.len() >= offset + 64 * 128 {
+ let mut counts = u8x64::splat(0);
+ for _ in 0..128 {
+ counts -= is_leading_utf8_byte_x64(u8x64_from_offset(utf8_chars, offset));
+ offset += 64;
+ }
+ count += sum_x64(&counts);
+ }
+
+ // 32
+ let mut counts = u8x32::splat(0);
+ for i in 0..(utf8_chars.len() - offset) / 32 {
+ counts -= is_leading_utf8_byte_x32(u8x32_from_offset(utf8_chars, offset + i * 32));
+ }
+ count += sum_x32(&counts);
+
+ // Straggler; need to reset counts because prior loop can run 255 times
+ counts = u8x32::splat(0);
+ if utf8_chars.len() % 32 != 0 {
+ counts -= is_leading_utf8_byte_x32(u8x32_from_offset(utf8_chars, utf8_chars.len() - 32)) &
+ u8x32_from_offset(&MASK, utf8_chars.len() % 32);
+ }
+ count += sum_x32(&counts);
+
+ count
+ }
+}
diff --git a/vendor/bytecount/src/simd/mod.rs b/vendor/bytecount/src/simd/mod.rs
new file mode 100644
index 000000000..d144e1847
--- /dev/null
+++ b/vendor/bytecount/src/simd/mod.rs
@@ -0,0 +1,17 @@
+#[cfg(feature = "generic-simd")]
+pub mod generic;
+
+// This is like generic, but written explicitly
+// because generic SIMD requires nightly.
+#[cfg(all(
+ feature = "runtime-dispatch-simd",
+ any(target_arch = "x86", target_arch = "x86_64"),
+ not(feature = "generic-simd")
+))]
+pub mod x86_sse2;
+
+// Modern x86 machines can do lots of fun stuff;
+// this is where the *real* optimizations go.
+// Runtime feature detection is not available with no_std.
+#[cfg(all(feature = "runtime-dispatch-simd", target_arch = "x86_64"))]
+pub mod x86_avx2;
diff --git a/vendor/bytecount/src/simd/x86_avx2.rs b/vendor/bytecount/src/simd/x86_avx2.rs
new file mode 100644
index 000000000..90a55c0fb
--- /dev/null
+++ b/vendor/bytecount/src/simd/x86_avx2.rs
@@ -0,0 +1,161 @@
+use std::arch::x86_64::{
+ __m256i,
+ _mm256_and_si256,
+ _mm256_cmpeq_epi8,
+ _mm256_extract_epi64,
+ _mm256_loadu_si256,
+ _mm256_sad_epu8,
+ _mm256_set1_epi8,
+ _mm256_setzero_si256,
+ _mm256_sub_epi8,
+ _mm256_xor_si256,
+};
+
+#[target_feature(enable = "avx2")]
+pub unsafe fn _mm256_set1_epu8(a: u8) -> __m256i {
+ _mm256_set1_epi8(a as i8)
+}
+
+#[target_feature(enable = "avx2")]
+pub unsafe fn mm256_cmpneq_epi8(a: __m256i, b: __m256i) -> __m256i {
+ _mm256_xor_si256(_mm256_cmpeq_epi8(a, b), _mm256_set1_epi8(-1))
+}
+
+const MASK: [u8; 64] = [
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+];
+
+#[target_feature(enable = "avx2")]
+unsafe fn mm256_from_offset(slice: &[u8], offset: usize) -> __m256i {
+ _mm256_loadu_si256(slice.as_ptr().add(offset) as *const _)
+}
+
+#[target_feature(enable = "avx2")]
+unsafe fn sum(u8s: &__m256i) -> usize {
+ let sums = _mm256_sad_epu8(*u8s, _mm256_setzero_si256());
+ (
+ _mm256_extract_epi64(sums, 0) + _mm256_extract_epi64(sums, 1) +
+ _mm256_extract_epi64(sums, 2) + _mm256_extract_epi64(sums, 3)
+ ) as usize
+}
+
+#[target_feature(enable = "avx2")]
+pub unsafe fn chunk_count(haystack: &[u8], needle: u8) -> usize {
+ assert!(haystack.len() >= 32);
+
+ let mut offset = 0;
+ let mut count = 0;
+
+ let needles = _mm256_set1_epu8(needle);
+
+ // 8160
+ while haystack.len() >= offset + 32 * 255 {
+ let mut counts = _mm256_setzero_si256();
+ for _ in 0..255 {
+ counts = _mm256_sub_epi8(
+ counts,
+ _mm256_cmpeq_epi8(mm256_from_offset(haystack, offset), needles)
+ );
+ offset += 32;
+ }
+ count += sum(&counts);
+ }
+
+ // 4096
+ if haystack.len() >= offset + 32 * 128 {
+ let mut counts = _mm256_setzero_si256();
+ for _ in 0..128 {
+ counts = _mm256_sub_epi8(
+ counts,
+ _mm256_cmpeq_epi8(mm256_from_offset(haystack, offset), needles)
+ );
+ offset += 32;
+ }
+ count += sum(&counts);
+ }
+
+ // 32
+ let mut counts = _mm256_setzero_si256();
+ for i in 0..(haystack.len() - offset) / 32 {
+ counts = _mm256_sub_epi8(
+ counts,
+ _mm256_cmpeq_epi8(mm256_from_offset(haystack, offset + i * 32), needles)
+ );
+ }
+ if haystack.len() % 32 != 0 {
+ counts = _mm256_sub_epi8(
+ counts,
+ _mm256_and_si256(
+ _mm256_cmpeq_epi8(mm256_from_offset(haystack, haystack.len() - 32), needles),
+ mm256_from_offset(&MASK, haystack.len() % 32)
+ )
+ );
+ }
+ count += sum(&counts);
+
+ count
+}
+
+#[target_feature(enable = "avx2")]
+unsafe fn is_leading_utf8_byte(u8s: __m256i) -> __m256i {
+ mm256_cmpneq_epi8(_mm256_and_si256(u8s, _mm256_set1_epu8(0b1100_0000)), _mm256_set1_epu8(0b1000_0000))
+}
+
+#[target_feature(enable = "avx2")]
+pub unsafe fn chunk_num_chars(utf8_chars: &[u8]) -> usize {
+ assert!(utf8_chars.len() >= 32);
+
+ let mut offset = 0;
+ let mut count = 0;
+
+ // 8160
+ while utf8_chars.len() >= offset + 32 * 255 {
+ let mut counts = _mm256_setzero_si256();
+
+ for _ in 0..255 {
+ counts = _mm256_sub_epi8(
+ counts,
+ is_leading_utf8_byte(mm256_from_offset(utf8_chars, offset))
+ );
+ offset += 32;
+ }
+ count += sum(&counts);
+ }
+
+ // 4096
+ if utf8_chars.len() >= offset + 32 * 128 {
+ let mut counts = _mm256_setzero_si256();
+ for _ in 0..128 {
+ counts = _mm256_sub_epi8(
+ counts,
+ is_leading_utf8_byte(mm256_from_offset(utf8_chars, offset))
+ );
+ offset += 32;
+ }
+ count += sum(&counts);
+ }
+
+ // 32
+ let mut counts = _mm256_setzero_si256();
+ for i in 0..(utf8_chars.len() - offset) / 32 {
+ counts = _mm256_sub_epi8(
+ counts,
+ is_leading_utf8_byte(mm256_from_offset(utf8_chars, offset + i * 32))
+ );
+ }
+ if utf8_chars.len() % 32 != 0 {
+ counts = _mm256_sub_epi8(
+ counts,
+ _mm256_and_si256(
+ is_leading_utf8_byte(mm256_from_offset(utf8_chars, utf8_chars.len() - 32)),
+ mm256_from_offset(&MASK, utf8_chars.len() % 32)
+ )
+ );
+ }
+ count += sum(&counts);
+
+ count
+}
diff --git a/vendor/bytecount/src/simd/x86_sse2.rs b/vendor/bytecount/src/simd/x86_sse2.rs
new file mode 100644
index 000000000..63d295eae
--- /dev/null
+++ b/vendor/bytecount/src/simd/x86_sse2.rs
@@ -0,0 +1,171 @@
+#[cfg(target_arch = "x86")]
+use std::arch::x86::{
+ __m128i,
+ _mm_and_si128,
+ _mm_cmpeq_epi8,
+ _mm_extract_epi32,
+ _mm_loadu_si128,
+ _mm_sad_epu8,
+ _mm_set1_epi8,
+ _mm_setzero_si128,
+ _mm_sub_epi8,
+ _mm_xor_si128,
+};
+
+#[cfg(target_arch = "x86_64")]
+use std::arch::x86_64::{
+ __m128i,
+ _mm_and_si128,
+ _mm_cmpeq_epi8,
+ _mm_extract_epi32,
+ _mm_loadu_si128,
+ _mm_sad_epu8,
+ _mm_set1_epi8,
+ _mm_setzero_si128,
+ _mm_sub_epi8,
+ _mm_xor_si128,
+};
+
+#[target_feature(enable = "sse2")]
+pub unsafe fn _mm_set1_epu8(a: u8) -> __m128i {
+ _mm_set1_epi8(a as i8)
+}
+
+#[target_feature(enable = "sse2")]
+pub unsafe fn mm_cmpneq_epi8(a: __m128i, b: __m128i) -> __m128i {
+ _mm_xor_si128(_mm_cmpeq_epi8(a, b), _mm_set1_epi8(-1))
+}
+
+const MASK: [u8; 32] = [
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+];
+
+#[target_feature(enable = "sse2")]
+unsafe fn mm_from_offset(slice: &[u8], offset: usize) -> __m128i {
+ _mm_loadu_si128(slice.as_ptr().offset(offset as isize) as *const _)
+}
+
+#[target_feature(enable = "sse2")]
+unsafe fn sum(u8s: &__m128i) -> usize {
+ let sums = _mm_sad_epu8(*u8s, _mm_setzero_si128());
+ (_mm_extract_epi32(sums, 0) + _mm_extract_epi32(sums, 2)) as usize
+}
+
+#[target_feature(enable = "sse2")]
+pub unsafe fn chunk_count(haystack: &[u8], needle: u8) -> usize {
+ assert!(haystack.len() >= 16);
+
+ let mut offset = 0;
+ let mut count = 0;
+
+ let needles = _mm_set1_epu8(needle);
+
+ // 4080
+ while haystack.len() >= offset + 16 * 255 {
+ let mut counts = _mm_setzero_si128();
+ for _ in 0..255 {
+ counts = _mm_sub_epi8(
+ counts,
+ _mm_cmpeq_epi8(mm_from_offset(haystack, offset), needles)
+ );
+ offset += 16;
+ }
+ count += sum(&counts);
+ }
+
+ // 2048
+ if haystack.len() >= offset + 16 * 128 {
+ let mut counts = _mm_setzero_si128();
+ for _ in 0..128 {
+ counts = _mm_sub_epi8(
+ counts,
+ _mm_cmpeq_epi8(mm_from_offset(haystack, offset), needles)
+ );
+ offset += 16;
+ }
+ count += sum(&counts);
+ }
+
+ // 16
+ let mut counts = _mm_setzero_si128();
+ for i in 0..(haystack.len() - offset) / 16 {
+ counts = _mm_sub_epi8(
+ counts,
+ _mm_cmpeq_epi8(mm_from_offset(haystack, offset + i * 16), needles)
+ );
+ }
+ if haystack.len() % 16 != 0 {
+ counts = _mm_sub_epi8(
+ counts,
+ _mm_and_si128(
+ _mm_cmpeq_epi8(mm_from_offset(haystack, haystack.len() - 16), needles),
+ mm_from_offset(&MASK, haystack.len() % 16)
+ )
+ );
+ }
+ count += sum(&counts);
+
+ count
+}
+
+#[target_feature(enable = "sse2")]
+unsafe fn is_leading_utf8_byte(u8s: __m128i) -> __m128i {
+ mm_cmpneq_epi8(_mm_and_si128(u8s, _mm_set1_epu8(0b1100_0000)), _mm_set1_epu8(0b1000_0000))
+}
+
+#[target_feature(enable = "sse2")]
+pub unsafe fn chunk_num_chars(utf8_chars: &[u8]) -> usize {
+ assert!(utf8_chars.len() >= 16);
+
+ let mut offset = 0;
+ let mut count = 0;
+
+ // 4080
+ while utf8_chars.len() >= offset + 16 * 255 {
+ let mut counts = _mm_setzero_si128();
+
+ for _ in 0..255 {
+ counts = _mm_sub_epi8(
+ counts,
+ is_leading_utf8_byte(mm_from_offset(utf8_chars, offset))
+ );
+ offset += 16;
+ }
+ count += sum(&counts);
+ }
+
+ // 2048
+ if utf8_chars.len() >= offset + 16 * 128 {
+ let mut counts = _mm_setzero_si128();
+ for _ in 0..128 {
+ counts = _mm_sub_epi8(
+ counts,
+ is_leading_utf8_byte(mm_from_offset(utf8_chars, offset))
+ );
+ offset += 16;
+ }
+ count += sum(&counts);
+ }
+
+ // 16
+ let mut counts = _mm_setzero_si128();
+ for i in 0..(utf8_chars.len() - offset) / 16 {
+ counts = _mm_sub_epi8(
+ counts,
+ is_leading_utf8_byte(mm_from_offset(utf8_chars, offset + i * 16))
+ );
+ }
+ if utf8_chars.len() % 16 != 0 {
+ counts = _mm_sub_epi8(
+ counts,
+ _mm_and_si128(
+ is_leading_utf8_byte(mm_from_offset(utf8_chars, utf8_chars.len() - 16)),
+ mm_from_offset(&MASK, utf8_chars.len() % 16)
+ )
+ );
+ }
+ count += sum(&counts);
+
+ count
+}
diff --git a/vendor/bytecount/tests/check.rs b/vendor/bytecount/tests/check.rs
new file mode 100644
index 000000000..147b466fc
--- /dev/null
+++ b/vendor/bytecount/tests/check.rs
@@ -0,0 +1,95 @@
+extern crate bytecount;
+#[macro_use]
+extern crate quickcheck;
+extern crate rand;
+
+use bytecount::{
+ count, naive_count,
+ num_chars, naive_num_chars,
+};
+use rand::RngCore;
+
+fn random_bytes(len: usize) -> Vec<u8> {
+ let mut result = vec![0; len];
+ rand::thread_rng().fill_bytes(&mut result);
+ result
+}
+
+quickcheck! {
+ fn check_count_correct(x: (Vec<u8>, u8)) -> bool {
+ let (haystack, needle) = x;
+ count(&haystack, needle) == naive_count(&haystack, needle)
+ }
+}
+
+#[test]
+fn check_count_large() {
+ let haystack = vec![0u8; if cfg!(miri) { 2_000 } else { 10_000_000 }];
+ assert_eq!(naive_count(&haystack, 0), count(&haystack, 0));
+ assert_eq!(naive_count(&haystack, 1), count(&haystack, 1));
+}
+
+#[test]
+fn check_count_large_rand() {
+ let haystack = random_bytes(if cfg!(miri) { 200 } else { 100_000 });
+ for i in 0..=255 {
+ assert_eq!(naive_count(&haystack, i), count(&haystack, i));
+ }
+}
+
+#[test]
+fn check_count_some() {
+ let haystack = vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 68];
+ let needle = 68;
+ assert_eq!(count(&haystack, needle), naive_count(&haystack, needle));
+}
+
+#[test]
+fn check_count_overflow() {
+ let haystack = vec![0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
+ let needle = 2;
+ assert_eq!(count(&haystack, needle), naive_count(&haystack, needle));
+}
+
+#[test]
+fn check_count_overflow_many() {
+ let string = [b'x'; 20000];
+ for i in 0..20000 {
+ assert_eq!(count(&string[..i], b'x'), i);
+ }
+}
+
+
+
+quickcheck! {
+ fn check_num_chars_correct(haystack: Vec<u8>) -> bool {
+ num_chars(&haystack) == naive_num_chars(&haystack)
+ }
+}
+
+#[test]
+fn check_num_chars_large() {
+ let haystack = vec![0u8; if cfg!(miri) { 2_000 } else { 10_000_000 }];
+ assert_eq!(naive_num_chars(&haystack), num_chars(&haystack));
+ assert_eq!(naive_num_chars(&haystack), num_chars(&haystack));
+}
+
+#[test]
+fn check_num_chars_some() {
+ let haystack = vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 68];
+ assert_eq!(num_chars(&haystack), naive_num_chars(&haystack));
+}
+
+#[test]
+fn check_num_chars_overflow() {
+ let haystack = vec![0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
+ assert_eq!(num_chars(&haystack), naive_num_chars(&haystack));
+}
+
+#[test]
+fn check_num_chars_overflow_many() {
+ let string = [b'x'; 20000];
+ for i in 0..20000 {
+ assert_eq!(num_chars(&string[..i]), i);
+ }
+}