summaryrefslogtreecommitdiffstats
path: root/vendor/odht
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 12:02:58 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 12:02:58 +0000
commit698f8c2f01ea549d77d7dc3338a12e04c11057b9 (patch)
tree173a775858bd501c378080a10dca74132f05bc50 /vendor/odht
parentInitial commit. (diff)
downloadrustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.tar.xz
rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.zip
Adding upstream version 1.64.0+dfsg1.upstream/1.64.0+dfsg1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/odht')
-rw-r--r--vendor/odht/.cargo-checksum.json1
-rw-r--r--vendor/odht/CODE_OF_CONDUCT.md3
-rw-r--r--vendor/odht/Cargo.toml33
-rw-r--r--vendor/odht/LICENSE-APACHE176
-rw-r--r--vendor/odht/LICENSE-MIT23
-rw-r--r--vendor/odht/README.md17
-rw-r--r--vendor/odht/benches/bench.rs228
-rw-r--r--vendor/odht/src/error.rs9
-rw-r--r--vendor/odht/src/fxhash.rs58
-rw-r--r--vendor/odht/src/lib.rs963
-rw-r--r--vendor/odht/src/memory_layout.rs369
-rw-r--r--vendor/odht/src/raw_table.rs659
-rw-r--r--vendor/odht/src/swisstable_group_query/mod.rs117
-rw-r--r--vendor/odht/src/swisstable_group_query/no_simd.rs70
-rw-r--r--vendor/odht/src/swisstable_group_query/sse2.rs54
-rw-r--r--vendor/odht/src/unhash.rs15
16 files changed, 2795 insertions, 0 deletions
diff --git a/vendor/odht/.cargo-checksum.json b/vendor/odht/.cargo-checksum.json
new file mode 100644
index 000000000..57a139d68
--- /dev/null
+++ b/vendor/odht/.cargo-checksum.json
@@ -0,0 +1 @@
+{"files":{"CODE_OF_CONDUCT.md":"3c4d1c4de2e6991695f5dc495f7530ecb188dfafdb1f47a1323ce7159987accd","Cargo.toml":"b3cafcf8d6e4c2d5c82c0c7b60597164e6b8ec2c26cf4417aebb528f3df45d1b","LICENSE-APACHE":"62c7a1e35f56406896d7aa7ca52d0cc0d272ac022b5d2796e7d6905db8a3636a","LICENSE-MIT":"23f18e03dc49df91622fe2a76176497404e46ced8a715d9d2b67a7446571cca3","README.md":"68519c9260fe1ce04b20683d856874ae7a61229691403bb8d2b732dccfb536fb","benches/bench.rs":"a0c51d176847fef2a9a4481354e9b5a536e8899192bbbeda4cb68c1824b99fdc","src/error.rs":"8b4abe83d01dc8269bbf8ec2717d1b7886f53c6bd351accd0105ebe8bd4e8700","src/fxhash.rs":"565713c9b5d6bb09a9100d95b49607faa59c55c5d30a6d94dc65d31c2ecc0e69","src/lib.rs":"46e57754376968caaad59c5a1889a9fef481615d35882dfaf599c1ee47142eda","src/memory_layout.rs":"bb6bed0b35fd18755fbc6787b511a1359cfa4b65a6d3c8d9580e0a6898a5c209","src/raw_table.rs":"75a39c4a4410ea45b8c2f7f74f47f29aa174c40bb3c3d1fd53428912275301f3","src/swisstable_group_query/mod.rs":"9813e31bb0cc870aee9462899e46e8a8b60064e719729e1da12038c3eda54983","src/swisstable_group_query/no_simd.rs":"e58c4e1aed09ac8f2ade2e8f9e62444764459b56da4dd2a732df168338e61763","src/swisstable_group_query/sse2.rs":"56331addbe5e364907ea3992fed5c6b33f0d86f1aef10f7ef94df6c285f7bec2","src/unhash.rs":"c13d599cd9d54475b5d6512ef11308f69e1dd165e704de1ee206a20f8e8952c3"},"package":"5a518809ac14b25b569624d0268eba1e88498f71615893dca57982bed7621abb"} \ No newline at end of file
diff --git a/vendor/odht/CODE_OF_CONDUCT.md b/vendor/odht/CODE_OF_CONDUCT.md
new file mode 100644
index 000000000..e3708bc48
--- /dev/null
+++ b/vendor/odht/CODE_OF_CONDUCT.md
@@ -0,0 +1,3 @@
+# The Rust Code of Conduct
+
+The Code of Conduct for this repository [can be found online](https://www.rust-lang.org/conduct.html).
diff --git a/vendor/odht/Cargo.toml b/vendor/odht/Cargo.toml
new file mode 100644
index 000000000..f41eb35cb
--- /dev/null
+++ b/vendor/odht/Cargo.toml
@@ -0,0 +1,33 @@
+# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
+#
+# When uploading crates to the registry Cargo will automatically
+# "normalize" Cargo.toml files for maximal compatibility
+# with all versions of Cargo and also rewrite `path` dependencies
+# to registry (e.g., crates.io) dependencies.
+#
+# If you are reading this file be aware that the original Cargo.toml
+# will likely look very different (and much more reasonable).
+# See Cargo.toml.orig for the original contents.
+
+[package]
+edition = "2018"
+name = "odht"
+version = "0.3.1"
+exclude = ["/.github/*"]
+description = "A Rust crate for hash tables that can be mapped from disk into memory without the need for up-front decoding."
+license = "MIT OR Apache-2.0"
+repository = "https://github.com/rust-lang/odht"
+[dependencies.cfg-if]
+version = "1.0.0"
+[dev-dependencies.quickcheck]
+version = "1"
+
+[dev-dependencies.rand]
+version = "0.8.2"
+
+[dev-dependencies.rustc-hash]
+version = "1.1.0"
+
+[features]
+nightly = []
+no_simd = []
diff --git a/vendor/odht/LICENSE-APACHE b/vendor/odht/LICENSE-APACHE
new file mode 100644
index 000000000..1b5ec8b78
--- /dev/null
+++ b/vendor/odht/LICENSE-APACHE
@@ -0,0 +1,176 @@
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+END OF TERMS AND CONDITIONS
diff --git a/vendor/odht/LICENSE-MIT b/vendor/odht/LICENSE-MIT
new file mode 100644
index 000000000..31aa79387
--- /dev/null
+++ b/vendor/odht/LICENSE-MIT
@@ -0,0 +1,23 @@
+Permission is hereby granted, free of charge, to any
+person obtaining a copy of this software and associated
+documentation files (the "Software"), to deal in the
+Software without restriction, including without
+limitation the rights to use, copy, modify, merge,
+publish, distribute, sublicense, and/or sell copies of
+the Software, and to permit persons to whom the Software
+is furnished to do so, subject to the following
+conditions:
+
+The above copyright notice and this permission notice
+shall be included in all copies or substantial portions
+of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
+ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
+SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
+IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
diff --git a/vendor/odht/README.md b/vendor/odht/README.md
new file mode 100644
index 000000000..ef76b45dc
--- /dev/null
+++ b/vendor/odht/README.md
@@ -0,0 +1,17 @@
+![CI Status](https://github.com/rust-lang/odht/actions/workflows/ci.yml/badge.svg)
+
+# odht
+
+A Rust crate for hash tables that can be mapped from disk into memory without the need for up-front decoding.
+The goal of the implementation is to provide a data structure that
+
+- can be used exactly in the format it is stored on disk,
+- provides roughly the same performance as a `HashMap` from Rust's standard library,
+- has a completely deterministic binary representation,
+- is platform and endianess independent, so that data serialized on one system can be used on any other system, and
+- is independent of alignment requirements so that
+ - its use is not restricted to certain classes of CPUs, and
+ - the data structure can be mapped to arbitrary memory addresses.
+
+This crate is developed and maintained by the Rust compiler team for internal use within `rustc`.
+This crate will have regular breaking changes and provides no stability guarantees.
diff --git a/vendor/odht/benches/bench.rs b/vendor/odht/benches/bench.rs
new file mode 100644
index 000000000..081e3ef9e
--- /dev/null
+++ b/vendor/odht/benches/bench.rs
@@ -0,0 +1,228 @@
+#![feature(test)]
+
+extern crate test;
+
+use odht::{Config, FxHashFn, HashTable, HashTableOwned};
+use rustc_hash::FxHashMap;
+
+#[repr(C)]
+#[derive(Copy, Clone, Hash, Eq, PartialEq)]
+struct TestKey(u64, u64);
+
+struct FxConfig;
+
+impl Config for FxConfig {
+ type Key = TestKey;
+ type Value = u32;
+
+ type EncodedKey = [u8; 16];
+ type EncodedValue = [u8; 4];
+
+ type H = FxHashFn;
+
+ #[inline]
+ fn encode_key(k: &Self::Key) -> Self::EncodedKey {
+ let mut result = [0u8; 16];
+
+ result[0..8].copy_from_slice(&k.0.to_le_bytes());
+ result[8..16].copy_from_slice(&k.1.to_le_bytes());
+
+ result
+ }
+
+ #[inline]
+ fn encode_value(v: &Self::Value) -> Self::EncodedValue {
+ v.to_le_bytes()
+ }
+
+ #[inline]
+ fn decode_key(_k: &Self::EncodedKey) -> Self::Key {
+ panic!()
+ }
+
+ #[inline]
+ fn decode_value(v: &Self::EncodedValue) -> Self::Value {
+ u32::from_le_bytes(*v)
+ }
+}
+
+fn index_contained(i: usize) -> bool {
+ i % 10 != 3
+}
+
+// Call functions being benchmark through an #[inline(never)] wrapper
+// so that we get a clear inlining barrier to look at in profilers.
+#[inline(never)]
+fn get_value<F: Fn(&TestKey) -> Option<u32>>(f: &F, key: &TestKey) -> Option<u32> {
+ f(key)
+}
+
+#[inline(never)]
+fn insert_value<F: FnMut(TestKey, u32) -> Option<u32>>(
+ f: &mut F,
+ key: TestKey,
+ value: u32,
+) -> Option<u32> {
+ f(key, value)
+}
+
+fn generate_hash_table(
+ test_data: &[(TestKey, u32)],
+ load_factor_percent: u8,
+) -> HashTableOwned<FxConfig> {
+ let values: Vec<_> = test_data
+ .iter()
+ .enumerate()
+ .filter(|&(i, _)| index_contained(i))
+ .map(|(_, x)| x)
+ .collect();
+
+ let mut table = HashTableOwned::with_capacity(values.len(), load_factor_percent);
+
+ for (key, value) in values {
+ table.insert(key, value);
+ }
+
+ table
+}
+
+fn generate_std_hash_table(test_data: &[(TestKey, u32)]) -> FxHashMap<TestKey, u32> {
+ let mut table = FxHashMap::default();
+
+ let values: Vec<_> = test_data
+ .iter()
+ .enumerate()
+ .filter(|&(i, _)| index_contained(i))
+ .map(|(_, x)| *x)
+ .collect();
+
+ for (key, value) in values {
+ table.insert(key, value);
+ }
+
+ table
+}
+
+fn generate_test_data(num_values: usize) -> Vec<(TestKey, u32)> {
+ use rand::prelude::*;
+
+ (0..num_values)
+ .map(|_| (TestKey(random(), random()), random()))
+ .collect()
+}
+
+const LOOKUP_ITERATIONS: usize = 10;
+const INSERT_ITERATIONS: usize = 10;
+
+fn bench_odht_fx_lookup(b: &mut test::Bencher, num_values: usize, load_factor_percent: u8) {
+ let test_data = crate::generate_test_data(num_values);
+ let table = crate::generate_hash_table(&test_data, load_factor_percent);
+
+ let mut serialized = table.raw_bytes().to_owned();
+
+ // Shift the data so we mess up alignment. We want to test the table under
+ // realistic conditions where we cannot expect any specific alignment.
+ serialized.insert(0, 0xFFu8);
+
+ let table = HashTable::<FxConfig, _>::from_raw_bytes(&serialized[1..]).unwrap();
+
+ let get = |key: &TestKey| table.get(key);
+
+ b.iter(|| {
+ for _ in 0..LOOKUP_ITERATIONS {
+ for (index, &(key, value)) in test_data.iter().enumerate() {
+ if index_contained(index) {
+ assert!(get_value(&get, &key) == Some(value));
+ } else {
+ assert!(get_value(&get, &key).is_none());
+ }
+ }
+ }
+ })
+}
+
+fn bench_odht_fx_insert(b: &mut test::Bencher, num_values: usize, load_factor_percent: u8) {
+ let test_data = crate::generate_test_data(num_values);
+
+ b.iter(|| {
+ for _ in 0..INSERT_ITERATIONS {
+ let mut table = HashTableOwned::<FxConfig>::with_capacity(10, load_factor_percent);
+
+ let mut insert = |key: TestKey, value: u32| table.insert(&key, &value);
+
+ for (key, value) in test_data.iter() {
+ assert!(insert_value(&mut insert, *key, *value).is_none());
+ }
+ }
+ })
+}
+
+fn bench_std_fx_lookup(b: &mut test::Bencher, num_values: usize) {
+ let test_data = crate::generate_test_data(num_values);
+ let table = crate::generate_std_hash_table(&test_data);
+
+ let get = |key: &TestKey| table.get(key).cloned();
+
+ b.iter(|| {
+ for _ in 0..LOOKUP_ITERATIONS {
+ for (index, &(key, value)) in test_data.iter().enumerate() {
+ if index_contained(index) {
+ assert!(get_value(&get, &key) == Some(value));
+ } else {
+ assert!(get_value(&get, &key).is_none());
+ }
+ }
+ }
+ })
+}
+
+fn bench_std_fx_insert(b: &mut test::Bencher, num_values: usize) {
+ let test_data = crate::generate_test_data(num_values);
+
+ b.iter(|| {
+ for _ in 0..INSERT_ITERATIONS {
+ let mut table = FxHashMap::default();
+
+ let mut insert = |key: TestKey, value: u32| -> Option<u32> { table.insert(key, value) };
+
+ for (key, value) in test_data.iter() {
+ assert!(insert_value(&mut insert, *key, *value).is_none());
+ }
+ }
+ })
+}
+
+macro_rules! bench {
+ ($name:ident, $num_values:expr) => {
+ mod $name {
+ #[bench]
+ fn lookup_odht_fx_load_87(b: &mut test::Bencher) {
+ crate::bench_odht_fx_lookup(b, $num_values, 87);
+ }
+
+ #[bench]
+ fn insert_odht_fx_load_87(b: &mut test::Bencher) {
+ crate::bench_odht_fx_insert(b, $num_values, 87);
+ }
+
+ #[bench]
+ fn lookup_std_fx(b: &mut test::Bencher) {
+ crate::bench_std_fx_lookup(b, $num_values);
+ }
+
+ #[bench]
+ fn insert_std_fx(b: &mut test::Bencher) {
+ crate::bench_std_fx_insert(b, $num_values);
+ }
+ }
+ };
+}
+
+// These numbers are chosen so that we get an actual load factor of ~87%
+// taking into account that slot counts are always rounded up to the next
+// power of two.
+bench!(____n13, 13);
+bench!(____n55, 55);
+bench!(___n444, 444);
+bench!(__n3550, 3550);
+bench!(_n57000, 57000);
diff --git a/vendor/odht/src/error.rs b/vendor/odht/src/error.rs
new file mode 100644
index 000000000..5a72c265b
--- /dev/null
+++ b/vendor/odht/src/error.rs
@@ -0,0 +1,9 @@
+#[derive(Eq, PartialEq, Debug)]
+pub(crate) struct Error(pub String);
+
+impl std::error::Error for Error {}
+impl std::fmt::Display for Error {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ write!(f, "{}", self.0)
+ }
+}
diff --git a/vendor/odht/src/fxhash.rs b/vendor/odht/src/fxhash.rs
new file mode 100644
index 000000000..a163bc3e7
--- /dev/null
+++ b/vendor/odht/src/fxhash.rs
@@ -0,0 +1,58 @@
+use crate::HashFn;
+use std::convert::TryInto;
+
+/// Implements the hash function from the rustc-hash crate.
+#[derive(Eq, PartialEq)]
+pub struct FxHashFn;
+
+impl HashFn for FxHashFn {
+ // This function is marked as #[inline] because that allows LLVM to know the
+ // actual size of `bytes` and thus eliminate all unneeded branches below.
+ #[inline]
+ fn hash(mut bytes: &[u8]) -> u32 {
+ let mut hash_value = 0;
+
+ while bytes.len() >= 8 {
+ hash_value = add_to_hash(hash_value, read_u64(bytes));
+ bytes = &bytes[8..];
+ }
+
+ if bytes.len() >= 4 {
+ hash_value = add_to_hash(
+ hash_value,
+ u32::from_le_bytes(bytes[..4].try_into().unwrap()) as u64,
+ );
+ bytes = &bytes[4..];
+ }
+
+ if bytes.len() >= 2 {
+ hash_value = add_to_hash(
+ hash_value,
+ u16::from_le_bytes(bytes[..2].try_into().unwrap()) as u64,
+ );
+ bytes = &bytes[2..];
+ }
+
+ if bytes.len() >= 1 {
+ hash_value = add_to_hash(hash_value, bytes[0] as u64);
+ }
+
+ return hash_value as u32;
+
+ #[inline]
+ fn add_to_hash(current_hash: u64, value: u64) -> u64 {
+ use std::ops::BitXor;
+ current_hash
+ .rotate_left(5)
+ .bitxor(value)
+ // This constant is part of FxHash's definition:
+ // https://github.com/rust-lang/rustc-hash/blob/5e09ea0a1/src/lib.rs#L67
+ .wrapping_mul(0x517cc1b727220a95)
+ }
+
+ #[inline]
+ fn read_u64(bytes: &[u8]) -> u64 {
+ u64::from_le_bytes(bytes[..8].try_into().unwrap())
+ }
+ }
+}
diff --git a/vendor/odht/src/lib.rs b/vendor/odht/src/lib.rs
new file mode 100644
index 000000000..fc8bb86a2
--- /dev/null
+++ b/vendor/odht/src/lib.rs
@@ -0,0 +1,963 @@
+//! This crate implements a hash table that can be used as is in its binary, on-disk format.
+//! The goal is to provide a high performance data structure that can be used without any significant up-front decoding.
+//! The implementation makes no assumptions about alignment or endianess of the underlying data,
+//! so a table encoded on one platform can be used on any other platform and
+//! the binary data can be mapped into memory at arbitrary addresses.
+//!
+//!
+//! ## Usage
+//!
+//! In order to use the hash table one needs to implement the `Config` trait.
+//! This trait defines how the table is encoded and what hash function is used.
+//! With a `Config` in place the `HashTableOwned` type can be used to build and serialize a hash table.
+//! The `HashTable` type can then be used to create an almost zero-cost view of the serialized hash table.
+//!
+//! ```rust
+//!
+//! use odht::{HashTable, HashTableOwned, Config, FxHashFn};
+//!
+//! struct MyConfig;
+//!
+//! impl Config for MyConfig {
+//!
+//! type Key = u64;
+//! type Value = u32;
+//!
+//! type EncodedKey = [u8; 8];
+//! type EncodedValue = [u8; 4];
+//!
+//! type H = FxHashFn;
+//!
+//! #[inline] fn encode_key(k: &Self::Key) -> Self::EncodedKey { k.to_le_bytes() }
+//! #[inline] fn encode_value(v: &Self::Value) -> Self::EncodedValue { v.to_le_bytes() }
+//! #[inline] fn decode_key(k: &Self::EncodedKey) -> Self::Key { u64::from_le_bytes(*k) }
+//! #[inline] fn decode_value(v: &Self::EncodedValue) -> Self::Value { u32::from_le_bytes(*v)}
+//! }
+//!
+//! fn main() {
+//! let mut builder = HashTableOwned::<MyConfig>::with_capacity(3, 95);
+//!
+//! builder.insert(&1, &2);
+//! builder.insert(&3, &4);
+//! builder.insert(&5, &6);
+//!
+//! let serialized = builder.raw_bytes().to_owned();
+//!
+//! let table = HashTable::<MyConfig, &[u8]>::from_raw_bytes(
+//! &serialized[..]
+//! ).unwrap();
+//!
+//! assert_eq!(table.get(&1), Some(2));
+//! assert_eq!(table.get(&3), Some(4));
+//! assert_eq!(table.get(&5), Some(6));
+//! }
+//! ```
+
+#![cfg_attr(feature = "nightly", feature(core_intrinsics))]
+
+#[cfg(test)]
+extern crate quickcheck;
+
+#[cfg(feature = "nightly")]
+macro_rules! likely {
+ ($x:expr) => {
+ std::intrinsics::likely($x)
+ };
+}
+
+#[cfg(not(feature = "nightly"))]
+macro_rules! likely {
+ ($x:expr) => {
+ $x
+ };
+}
+
+#[cfg(feature = "nightly")]
+macro_rules! unlikely {
+ ($x:expr) => {
+ std::intrinsics::unlikely($x)
+ };
+}
+
+#[cfg(not(feature = "nightly"))]
+macro_rules! unlikely {
+ ($x:expr) => {
+ $x
+ };
+}
+
+mod error;
+mod fxhash;
+mod memory_layout;
+mod raw_table;
+mod swisstable_group_query;
+mod unhash;
+
+use error::Error;
+use memory_layout::Header;
+use std::borrow::{Borrow, BorrowMut};
+use swisstable_group_query::REFERENCE_GROUP_SIZE;
+
+pub use crate::fxhash::FxHashFn;
+pub use crate::unhash::UnHashFn;
+
+use crate::raw_table::{ByteArray, RawIter, RawTable, RawTableMut};
+
+/// This trait provides a complete "configuration" for a hash table, i.e. it
+/// defines the key and value types, how these are encoded and what hash
+/// function is being used.
+///
+/// Implementations of the `encode_key` and `encode_value` methods must encode
+/// the given key/value into a fixed size array. The encoding must be
+/// deterministic (i.e. no random padding bytes) and must be independent of
+/// platform endianess. It is always highly recommended to mark these methods
+/// as `#[inline]`.
+pub trait Config {
+ type Key;
+ type Value;
+
+ // The EncodedKey and EncodedValue types must always be a fixed size array of bytes,
+ // e.g. [u8; 4].
+ type EncodedKey: ByteArray;
+ type EncodedValue: ByteArray;
+
+ type H: HashFn;
+
+ /// Implementations of the `encode_key` and `encode_value` methods must encode
+ /// the given key/value into a fixed size array. See above for requirements.
+ fn encode_key(k: &Self::Key) -> Self::EncodedKey;
+
+ /// Implementations of the `encode_key` and `encode_value` methods must encode
+ /// the given key/value into a fixed size array. See above for requirements.
+ fn encode_value(v: &Self::Value) -> Self::EncodedValue;
+
+ fn decode_key(k: &Self::EncodedKey) -> Self::Key;
+ fn decode_value(v: &Self::EncodedValue) -> Self::Value;
+}
+
+/// This trait represents hash functions as used by HashTable and
+/// HashTableOwned.
+pub trait HashFn: Eq {
+ fn hash(bytes: &[u8]) -> u32;
+}
+
+/// A [HashTableOwned] keeps the underlying data on the heap and
+/// can resize itself on demand.
+#[derive(Clone)]
+pub struct HashTableOwned<C: Config> {
+ allocation: memory_layout::Allocation<C, Box<[u8]>>,
+}
+
+impl<C: Config> Default for HashTableOwned<C> {
+ fn default() -> Self {
+ HashTableOwned::with_capacity(12, 87)
+ }
+}
+
+impl<C: Config> HashTableOwned<C> {
+ /// Creates a new [HashTableOwned] that can hold at least `max_item_count`
+ /// items while maintaining the specified load factor.
+ pub fn with_capacity(max_item_count: usize, max_load_factor_percent: u8) -> HashTableOwned<C> {
+ assert!(max_load_factor_percent <= 100);
+ assert!(max_load_factor_percent > 0);
+
+ Self::with_capacity_internal(
+ max_item_count,
+ Factor::from_percent(max_load_factor_percent),
+ )
+ }
+
+ fn with_capacity_internal(max_item_count: usize, max_load_factor: Factor) -> HashTableOwned<C> {
+ let slots_needed = slots_needed(max_item_count, max_load_factor);
+ assert!(slots_needed > 0);
+
+ let allocation = memory_layout::allocate(slots_needed, 0, max_load_factor);
+
+ HashTableOwned { allocation }
+ }
+
+ /// Retrieves the value for the given key. Returns `None` if no entry is found.
+ #[inline]
+ pub fn get(&self, key: &C::Key) -> Option<C::Value> {
+ let encoded_key = C::encode_key(key);
+ if let Some(encoded_value) = self.as_raw().find(&encoded_key) {
+ Some(C::decode_value(encoded_value))
+ } else {
+ None
+ }
+ }
+
+ #[inline]
+ pub fn contains_key(&self, key: &C::Key) -> bool {
+ let encoded_key = C::encode_key(key);
+ self.as_raw().find(&encoded_key).is_some()
+ }
+
+ /// Inserts the given key-value pair into the table.
+ /// Grows the table if necessary.
+ #[inline]
+ pub fn insert(&mut self, key: &C::Key, value: &C::Value) -> Option<C::Value> {
+ let (item_count, max_item_count) = {
+ let header = self.allocation.header();
+ let max_item_count = max_item_count_for(header.slot_count(), header.max_load_factor());
+ (header.item_count(), max_item_count)
+ };
+
+ if unlikely!(item_count == max_item_count) {
+ self.grow();
+ }
+
+ debug_assert!(
+ item_count
+ < max_item_count_for(
+ self.allocation.header().slot_count(),
+ self.allocation.header().max_load_factor()
+ )
+ );
+
+ let encoded_key = C::encode_key(key);
+ let encoded_value = C::encode_value(value);
+
+ with_raw_mut(&mut self.allocation, |header, mut raw_table| {
+ if let Some(old_value) = raw_table.insert(encoded_key, encoded_value) {
+ Some(C::decode_value(&old_value))
+ } else {
+ header.set_item_count(item_count + 1);
+ None
+ }
+ })
+ }
+
+ #[inline]
+ pub fn iter(&self) -> Iter<'_, C> {
+ let (entry_metadata, entry_data) = self.allocation.data_slices();
+ Iter(RawIter::new(entry_metadata, entry_data))
+ }
+
+ pub fn from_iterator<I: IntoIterator<Item = (C::Key, C::Value)>>(
+ it: I,
+ max_load_factor_percent: u8,
+ ) -> Self {
+ let it = it.into_iter();
+
+ let known_size = match it.size_hint() {
+ (min, Some(max)) => {
+ if min == max {
+ Some(max)
+ } else {
+ None
+ }
+ }
+ _ => None,
+ };
+
+ if let Some(known_size) = known_size {
+ let mut table = HashTableOwned::with_capacity(known_size, max_load_factor_percent);
+
+ let initial_slot_count = table.allocation.header().slot_count();
+
+ for (k, v) in it {
+ table.insert(&k, &v);
+ }
+
+ // duplicates
+ assert!(table.len() <= known_size);
+ assert_eq!(table.allocation.header().slot_count(), initial_slot_count);
+
+ table
+ } else {
+ let items: Vec<_> = it.collect();
+ Self::from_iterator(items, max_load_factor_percent)
+ }
+ }
+
+ /// Constructs a [HashTableOwned] from its raw byte representation.
+ /// The provided data must have the exact right number of bytes.
+ ///
+ /// This method has linear time complexity as it needs to make its own
+ /// copy of the given data.
+ ///
+ /// The method will verify the header of the given data and return an
+ /// error if the verification fails.
+ pub fn from_raw_bytes(data: &[u8]) -> Result<HashTableOwned<C>, Box<dyn std::error::Error>> {
+ let data = data.to_owned().into_boxed_slice();
+ let allocation = memory_layout::Allocation::from_raw_bytes(data)?;
+
+ Ok(HashTableOwned { allocation })
+ }
+
+ #[inline]
+ pub unsafe fn from_raw_bytes_unchecked(data: &[u8]) -> HashTableOwned<C> {
+ let data = data.to_owned().into_boxed_slice();
+ let allocation = memory_layout::Allocation::from_raw_bytes_unchecked(data);
+
+ HashTableOwned { allocation }
+ }
+
+ /// Returns the number of items stored in the hash table.
+ #[inline]
+ pub fn len(&self) -> usize {
+ self.allocation.header().item_count()
+ }
+
+ #[inline]
+ pub fn raw_bytes(&self) -> &[u8] {
+ self.allocation.raw_bytes()
+ }
+
+ #[inline]
+ fn as_raw(&self) -> RawTable<'_, C::EncodedKey, C::EncodedValue, C::H> {
+ let (entry_metadata, entry_data) = self.allocation.data_slices();
+ RawTable::new(entry_metadata, entry_data)
+ }
+
+ #[inline(never)]
+ #[cold]
+ fn grow(&mut self) {
+ let initial_slot_count = self.allocation.header().slot_count();
+ let initial_item_count = self.allocation.header().item_count();
+ let initial_max_load_factor = self.allocation.header().max_load_factor();
+
+ let mut new_table =
+ Self::with_capacity_internal(initial_item_count * 2, initial_max_load_factor);
+
+ // Copy the entries over with the internal `insert_entry()` method,
+ // which allows us to do insertions without hashing everything again.
+ {
+ with_raw_mut(&mut new_table.allocation, |header, mut raw_table| {
+ for (_, entry_data) in self.as_raw().iter() {
+ raw_table.insert(entry_data.key, entry_data.value);
+ }
+
+ header.set_item_count(initial_item_count);
+ });
+ }
+
+ *self = new_table;
+
+ assert!(
+ self.allocation.header().slot_count() >= 2 * initial_slot_count,
+ "Allocation did not grow properly. Slot count is {} but was expected to be \
+ at least {}",
+ self.allocation.header().slot_count(),
+ 2 * initial_slot_count
+ );
+ assert_eq!(self.allocation.header().item_count(), initial_item_count);
+ assert_eq!(
+ self.allocation.header().max_load_factor(),
+ initial_max_load_factor
+ );
+ }
+}
+
+impl<C: Config> std::fmt::Debug for HashTableOwned<C> {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ let header = self.allocation.header();
+
+ writeln!(
+ f,
+ "(item_count={}, max_item_count={}, max_load_factor={}%)",
+ header.item_count(),
+ max_item_count_for(header.slot_count(), header.max_load_factor()),
+ header.max_load_factor().to_percent(),
+ )?;
+
+ writeln!(f, "{:?}", self.as_raw())
+ }
+}
+
+/// The [HashTable] type provides a cheap way to construct a non-resizable view
+/// of a persisted hash table. If the underlying data storage `D` implements
+/// `BorrowMut<[u8]>` then the table can be modified in place.
+#[derive(Clone, Copy)]
+pub struct HashTable<C: Config, D: Borrow<[u8]>> {
+ allocation: memory_layout::Allocation<C, D>,
+}
+
+impl<C: Config, D: Borrow<[u8]>> HashTable<C, D> {
+ /// Constructs a [HashTable] from its raw byte representation.
+ /// The provided data must have the exact right number of bytes.
+ ///
+ /// This method has constant time complexity and will only verify the header
+ /// data of the hash table. It will not copy any data.
+ pub fn from_raw_bytes(data: D) -> Result<HashTable<C, D>, Box<dyn std::error::Error>> {
+ let allocation = memory_layout::Allocation::from_raw_bytes(data)?;
+ Ok(HashTable { allocation })
+ }
+
+ /// Constructs a [HashTable] from its raw byte representation without doing
+ /// any verification of the underlying data. It is the user's responsibility
+ /// to make sure that the underlying data is actually a valid hash table.
+ ///
+ /// The [HashTable::from_raw_bytes] method provides a safe alternative to this
+ /// method.
+ #[inline]
+ pub unsafe fn from_raw_bytes_unchecked(data: D) -> HashTable<C, D> {
+ HashTable {
+ allocation: memory_layout::Allocation::from_raw_bytes_unchecked(data),
+ }
+ }
+
+ #[inline]
+ pub fn get(&self, key: &C::Key) -> Option<C::Value> {
+ let encoded_key = C::encode_key(key);
+ self.as_raw().find(&encoded_key).map(C::decode_value)
+ }
+
+ #[inline]
+ pub fn contains_key(&self, key: &C::Key) -> bool {
+ let encoded_key = C::encode_key(key);
+ self.as_raw().find(&encoded_key).is_some()
+ }
+
+ #[inline]
+ pub fn iter(&self) -> Iter<'_, C> {
+ let (entry_metadata, entry_data) = self.allocation.data_slices();
+ Iter(RawIter::new(entry_metadata, entry_data))
+ }
+
+ /// Returns the number of items stored in the hash table.
+ #[inline]
+ pub fn len(&self) -> usize {
+ self.allocation.header().item_count()
+ }
+
+ #[inline]
+ pub fn raw_bytes(&self) -> &[u8] {
+ self.allocation.raw_bytes()
+ }
+
+ #[inline]
+ fn as_raw(&self) -> RawTable<'_, C::EncodedKey, C::EncodedValue, C::H> {
+ let (entry_metadata, entry_data) = self.allocation.data_slices();
+ RawTable::new(entry_metadata, entry_data)
+ }
+}
+
+impl<C: Config, D: Borrow<[u8]> + BorrowMut<[u8]>> HashTable<C, D> {
+ pub fn init_in_place(
+ mut data: D,
+ max_item_count: usize,
+ max_load_factor_percent: u8,
+ ) -> Result<HashTable<C, D>, Box<dyn std::error::Error>> {
+ let max_load_factor = Factor::from_percent(max_load_factor_percent);
+ let byte_count = bytes_needed_internal::<C>(max_item_count, max_load_factor);
+ if data.borrow_mut().len() != byte_count {
+ return Err(Error(format!(
+ "byte slice to initialize has wrong length ({} instead of {})",
+ data.borrow_mut().len(),
+ byte_count
+ )))?;
+ }
+
+ let slot_count = slots_needed(max_item_count, max_load_factor);
+ let allocation = memory_layout::init_in_place::<C, _>(data, slot_count, 0, max_load_factor);
+ Ok(HashTable { allocation })
+ }
+
+ /// Inserts the given key-value pair into the table.
+ /// Unlike [HashTableOwned::insert] this method cannot grow the underlying table
+ /// if there is not enough space for the new item. Instead the call will panic.
+ #[inline]
+ pub fn insert(&mut self, key: &C::Key, value: &C::Value) -> Option<C::Value> {
+ let item_count = self.allocation.header().item_count();
+ let max_load_factor = self.allocation.header().max_load_factor();
+ let slot_count = self.allocation.header().slot_count();
+ // FIXME: This is actually a bit to conservative because it does not account for
+ // cases where an entry is overwritten and thus the item count does not
+ // change.
+ assert!(item_count < max_item_count_for(slot_count, max_load_factor));
+
+ let encoded_key = C::encode_key(key);
+ let encoded_value = C::encode_value(value);
+
+ with_raw_mut(&mut self.allocation, |header, mut raw_table| {
+ if let Some(old_value) = raw_table.insert(encoded_key, encoded_value) {
+ Some(C::decode_value(&old_value))
+ } else {
+ header.set_item_count(item_count + 1);
+ None
+ }
+ })
+ }
+}
+
+/// Computes the exact number of bytes needed for storing a HashTable with the
+/// given max item count and load factor. The result can be used for allocating
+/// storage to be passed into [HashTable::init_in_place].
+pub fn bytes_needed<C: Config>(max_item_count: usize, max_load_factor_percent: u8) -> usize {
+ let max_load_factor = Factor::from_percent(max_load_factor_percent);
+ bytes_needed_internal::<C>(max_item_count, max_load_factor)
+}
+
+fn bytes_needed_internal<C: Config>(max_item_count: usize, max_load_factor: Factor) -> usize {
+ let slot_count = slots_needed(max_item_count, max_load_factor);
+ memory_layout::bytes_needed::<C>(slot_count)
+}
+
+pub struct Iter<'a, C: Config>(RawIter<'a, C::EncodedKey, C::EncodedValue>);
+
+impl<'a, C: Config> Iterator for Iter<'a, C> {
+ type Item = (C::Key, C::Value);
+
+ fn next(&mut self) -> Option<Self::Item> {
+ self.0.next().map(|(_, entry)| {
+ let key = C::decode_key(&entry.key);
+ let value = C::decode_value(&entry.value);
+
+ (key, value)
+ })
+ }
+}
+
+// We use integer math here as not to run into any issues with
+// platform-specific floating point math implementation.
+fn slots_needed(item_count: usize, max_load_factor: Factor) -> usize {
+ // Note: we round up here
+ let slots_needed = max_load_factor.apply_inverse(item_count);
+ std::cmp::max(
+ slots_needed.checked_next_power_of_two().unwrap(),
+ REFERENCE_GROUP_SIZE,
+ )
+}
+
+fn max_item_count_for(slot_count: usize, max_load_factor: Factor) -> usize {
+ // Note: we round down here
+ max_load_factor.apply(slot_count)
+}
+
+#[inline]
+fn with_raw_mut<C, M, F, R>(allocation: &mut memory_layout::Allocation<C, M>, f: F) -> R
+where
+ C: Config,
+ M: BorrowMut<[u8]>,
+ F: FnOnce(&mut Header, RawTableMut<'_, C::EncodedKey, C::EncodedValue, C::H>) -> R,
+{
+ allocation.with_mut_parts(|header, entry_metadata, entry_data| {
+ f(header, RawTableMut::new(entry_metadata, entry_data))
+ })
+}
+
+/// This type is used for computing max item counts for a given load factor
+/// efficiently. We use integer math here so that things are the same on
+/// all platforms and with all compiler settings.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+struct Factor(pub u16);
+
+impl Factor {
+ const BASE: usize = u16::MAX as usize;
+
+ #[inline]
+ fn from_percent(percent: u8) -> Factor {
+ let percent = percent as usize;
+ Factor(((percent * Self::BASE) / 100) as u16)
+ }
+
+ fn to_percent(self) -> usize {
+ (self.0 as usize * 100) / Self::BASE
+ }
+
+ // Note: we round down here
+ #[inline]
+ fn apply(self, x: usize) -> usize {
+ // Let's make sure there's no overflow during the
+ // calculation below by doing everything with 128 bits.
+ let x = x as u128;
+ let factor = self.0 as u128;
+ ((x * factor) >> 16) as usize
+ }
+
+ // Note: we round up here
+ #[inline]
+ fn apply_inverse(self, x: usize) -> usize {
+ // Let's make sure there's no overflow during the
+ // calculation below by doing everything with 128 bits.
+ let x = x as u128;
+ let factor = self.0 as u128;
+ let base = Self::BASE as u128;
+ ((base * x + factor - 1) / factor) as usize
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use std::convert::TryInto;
+
+ enum TestConfig {}
+
+ impl Config for TestConfig {
+ type EncodedKey = [u8; 4];
+ type EncodedValue = [u8; 4];
+
+ type Key = u32;
+ type Value = u32;
+
+ type H = FxHashFn;
+
+ fn encode_key(k: &Self::Key) -> Self::EncodedKey {
+ k.to_le_bytes()
+ }
+
+ fn encode_value(v: &Self::Value) -> Self::EncodedValue {
+ v.to_le_bytes()
+ }
+
+ fn decode_key(k: &Self::EncodedKey) -> Self::Key {
+ u32::from_le_bytes(k[..].try_into().unwrap())
+ }
+
+ fn decode_value(v: &Self::EncodedValue) -> Self::Value {
+ u32::from_le_bytes(v[..].try_into().unwrap())
+ }
+ }
+
+ fn make_test_items(count: usize) -> Vec<(u32, u32)> {
+ if count == 0 {
+ return vec![];
+ }
+
+ let mut items = vec![];
+
+ if count > 1 {
+ let steps = (count - 1) as u32;
+ let step = u32::MAX / steps;
+
+ for i in 0..steps {
+ let x = i * step;
+ items.push((x, u32::MAX - x));
+ }
+ }
+
+ items.push((u32::MAX, 0));
+
+ items.sort();
+ items.dedup();
+ assert_eq!(items.len(), count);
+
+ items
+ }
+
+ #[test]
+ fn from_iterator() {
+ for count in 0..33 {
+ let items = make_test_items(count);
+ let table = HashTableOwned::<TestConfig>::from_iterator(items.clone(), 95);
+ assert_eq!(table.len(), items.len());
+
+ let mut actual_items: Vec<_> = table.iter().collect();
+ actual_items.sort();
+
+ assert_eq!(items, actual_items);
+ }
+ }
+
+ #[test]
+ fn init_in_place() {
+ for count in 0..33 {
+ let items = make_test_items(count);
+ let byte_count = bytes_needed::<TestConfig>(items.len(), 87);
+ let data = vec![0u8; byte_count];
+
+ let mut table =
+ HashTable::<TestConfig, _>::init_in_place(data, items.len(), 87).unwrap();
+
+ for (i, (k, v)) in items.iter().enumerate() {
+ assert_eq!(table.len(), i);
+ assert_eq!(table.insert(k, v), None);
+ assert_eq!(table.len(), i + 1);
+
+ // Make sure we still can find all items previously inserted.
+ for (k, v) in items.iter().take(i) {
+ assert_eq!(table.get(k), Some(*v));
+ }
+ }
+
+ let mut actual_items: Vec<_> = table.iter().collect();
+ actual_items.sort();
+
+ assert_eq!(items, actual_items);
+ }
+ }
+
+ #[test]
+ fn hash_table_at_different_alignments() {
+ let items = make_test_items(33);
+
+ let mut serialized = {
+ let table: HashTableOwned<TestConfig> =
+ HashTableOwned::from_iterator(items.clone(), 95);
+
+ assert_eq!(table.len(), items.len());
+
+ table.raw_bytes().to_owned()
+ };
+
+ for alignment_shift in 0..4 {
+ let data = &serialized[alignment_shift..];
+
+ let table = HashTable::<TestConfig, _>::from_raw_bytes(data).unwrap();
+
+ assert_eq!(table.len(), items.len());
+
+ for (key, value) in items.iter() {
+ assert_eq!(table.get(key), Some(*value));
+ }
+
+ serialized.insert(0, 0xFFu8);
+ }
+ }
+
+ #[test]
+ fn load_factor_and_item_count() {
+ assert_eq!(
+ slots_needed(0, Factor::from_percent(100)),
+ REFERENCE_GROUP_SIZE
+ );
+ assert_eq!(slots_needed(6, Factor::from_percent(60)), 16);
+ assert_eq!(slots_needed(5, Factor::from_percent(50)), 16);
+ assert_eq!(slots_needed(5, Factor::from_percent(49)), 16);
+ assert_eq!(slots_needed(1000, Factor::from_percent(100)), 1024);
+
+ // Factor cannot never be a full 100% because of the rounding involved.
+ assert_eq!(max_item_count_for(10, Factor::from_percent(100)), 9);
+ assert_eq!(max_item_count_for(10, Factor::from_percent(50)), 4);
+ assert_eq!(max_item_count_for(11, Factor::from_percent(50)), 5);
+ assert_eq!(max_item_count_for(12, Factor::from_percent(50)), 5);
+ }
+
+ #[test]
+ fn grow() {
+ let items = make_test_items(100);
+ let mut table = HashTableOwned::<TestConfig>::with_capacity(10, 87);
+
+ for (key, value) in items.iter() {
+ assert_eq!(table.insert(key, value), None);
+ }
+ }
+
+ #[test]
+ fn factor_from_percent() {
+ assert_eq!(Factor::from_percent(100), Factor(u16::MAX));
+ assert_eq!(Factor::from_percent(0), Factor(0));
+ assert_eq!(Factor::from_percent(50), Factor(u16::MAX / 2));
+ }
+
+ #[test]
+ fn factor_apply() {
+ assert_eq!(Factor::from_percent(100).apply(12345), 12344);
+ assert_eq!(Factor::from_percent(0).apply(12345), 0);
+ assert_eq!(Factor::from_percent(50).apply(66), 32);
+
+ // Make sure we can handle large numbers without overflow
+ assert_basically_equal(Factor::from_percent(100).apply(usize::MAX), usize::MAX);
+ }
+
+ #[test]
+ fn factor_apply_inverse() {
+ assert_eq!(Factor::from_percent(100).apply_inverse(12345), 12345);
+ assert_eq!(Factor::from_percent(10).apply_inverse(100), 1001);
+ assert_eq!(Factor::from_percent(50).apply_inverse(33), 67);
+
+ // // Make sure we can handle large numbers without overflow
+ assert_basically_equal(
+ Factor::from_percent(100).apply_inverse(usize::MAX),
+ usize::MAX,
+ );
+ }
+
+ fn assert_basically_equal(x: usize, y: usize) {
+ let larger_number = std::cmp::max(x, y) as f64;
+ let abs_difference = (x as f64 - y as f64).abs();
+ let difference_in_percent = (abs_difference / larger_number) * 100.0;
+
+ const MAX_ALLOWED_DIFFERENCE_IN_PERCENT: f64 = 0.01;
+
+ assert!(
+ difference_in_percent < MAX_ALLOWED_DIFFERENCE_IN_PERCENT,
+ "{} and {} differ by {:.4} percent but the maximally allowed difference \
+ is {:.2} percent. Large differences might be caused by integer overflow.",
+ x,
+ y,
+ difference_in_percent,
+ MAX_ALLOWED_DIFFERENCE_IN_PERCENT
+ );
+ }
+
+ mod quickchecks {
+ use super::*;
+ use crate::raw_table::ByteArray;
+ use quickcheck::{Arbitrary, Gen};
+ use rustc_hash::FxHashMap;
+
+ #[derive(Copy, Clone, Hash, Eq, PartialEq, Debug)]
+ struct Bytes<const BYTE_COUNT: usize>([u8; BYTE_COUNT]);
+
+ impl<const L: usize> Arbitrary for Bytes<L> {
+ fn arbitrary(gen: &mut Gen) -> Self {
+ let mut xs = [0; L];
+ for x in xs.iter_mut() {
+ *x = u8::arbitrary(gen);
+ }
+ Bytes(xs)
+ }
+ }
+
+ impl<const L: usize> Default for Bytes<L> {
+ fn default() -> Self {
+ Bytes([0; L])
+ }
+ }
+
+ impl<const L: usize> ByteArray for Bytes<L> {
+ #[inline(always)]
+ fn zeroed() -> Self {
+ Bytes([0u8; L])
+ }
+
+ #[inline(always)]
+ fn as_slice(&self) -> &[u8] {
+ &self.0[..]
+ }
+
+ #[inline(always)]
+ fn equals(&self, other: &Self) -> bool {
+ self.as_slice() == other.as_slice()
+ }
+ }
+
+ macro_rules! mk_quick_tests {
+ ($name: ident, $key_len:expr, $value_len:expr) => {
+ mod $name {
+ use super::*;
+ use quickcheck::quickcheck;
+
+ struct Cfg;
+
+ type Key = Bytes<$key_len>;
+ type Value = Bytes<$value_len>;
+
+ impl Config for Cfg {
+ type EncodedKey = Key;
+ type EncodedValue = Value;
+
+ type Key = Key;
+ type Value = Value;
+
+ type H = FxHashFn;
+
+ fn encode_key(k: &Self::Key) -> Self::EncodedKey {
+ *k
+ }
+
+ fn encode_value(v: &Self::Value) -> Self::EncodedValue {
+ *v
+ }
+
+ fn decode_key(k: &Self::EncodedKey) -> Self::Key {
+ *k
+ }
+
+ fn decode_value(v: &Self::EncodedValue) -> Self::Value {
+ *v
+ }
+ }
+
+ fn from_std_hashmap(m: &FxHashMap<Key, Value>) -> HashTableOwned<Cfg> {
+ HashTableOwned::<Cfg>::from_iterator(m.iter().map(|(x, y)| (*x, *y)), 87)
+ }
+
+ quickcheck! {
+ fn len(xs: FxHashMap<Key, Value>) -> bool {
+ let table = from_std_hashmap(&xs);
+
+ xs.len() == table.len()
+ }
+ }
+
+ quickcheck! {
+ fn lookup(xs: FxHashMap<Key, Value>) -> bool {
+ let table = from_std_hashmap(&xs);
+ xs.iter().all(|(k, v)| table.get(k) == Some(*v))
+ }
+ }
+
+ quickcheck! {
+ fn insert_with_duplicates(xs: Vec<(Key, Value)>) -> bool {
+ let mut reference = FxHashMap::default();
+ let mut table = HashTableOwned::<Cfg>::default();
+
+ for (k, v) in xs {
+ let expected = reference.insert(k, v);
+ let actual = table.insert(&k, &v);
+
+ if expected != actual {
+ return false;
+ }
+ }
+
+ true
+ }
+ }
+
+ quickcheck! {
+ fn bytes_deterministic(xs: FxHashMap<Key, Value>) -> bool {
+ // NOTE: We only guarantee this given the exact same
+ // insertion order.
+ let table0 = from_std_hashmap(&xs);
+ let table1 = from_std_hashmap(&xs);
+
+ table0.raw_bytes() == table1.raw_bytes()
+ }
+ }
+
+ quickcheck! {
+ fn from_iterator_vs_manual_insertion(xs: Vec<(Key, Value)>) -> bool {
+ let mut table0 = HashTableOwned::<Cfg>::with_capacity(xs.len(), 87);
+
+ for (k, v) in xs.iter() {
+ table0.insert(k, v);
+ }
+
+ let table1 = HashTableOwned::<Cfg>::from_iterator(xs.into_iter(), 87);
+
+ // Requiring bit for bit equality might be a bit too much in this case,
+ // as long as it works ...
+ table0.raw_bytes() == table1.raw_bytes()
+ }
+ }
+ }
+ };
+ }
+
+ // Test zero sized key and values
+ mk_quick_tests!(k0_v0, 0, 0);
+ mk_quick_tests!(k1_v0, 1, 0);
+ mk_quick_tests!(k2_v0, 2, 0);
+ mk_quick_tests!(k3_v0, 3, 0);
+ mk_quick_tests!(k4_v0, 4, 0);
+ mk_quick_tests!(k8_v0, 8, 0);
+ mk_quick_tests!(k15_v0, 15, 0);
+ mk_quick_tests!(k16_v0, 16, 0);
+ mk_quick_tests!(k17_v0, 17, 0);
+ mk_quick_tests!(k63_v0, 63, 0);
+ mk_quick_tests!(k64_v0, 64, 0);
+
+ // Test a few different key sizes
+ mk_quick_tests!(k2_v4, 2, 4);
+ mk_quick_tests!(k4_v4, 4, 4);
+ mk_quick_tests!(k8_v4, 8, 4);
+ mk_quick_tests!(k17_v4, 17, 4);
+ mk_quick_tests!(k20_v4, 20, 4);
+ mk_quick_tests!(k64_v4, 64, 4);
+
+ // Test a few different value sizes
+ mk_quick_tests!(k16_v1, 16, 1);
+ mk_quick_tests!(k16_v2, 16, 2);
+ mk_quick_tests!(k16_v3, 16, 3);
+ mk_quick_tests!(k16_v4, 16, 4);
+ mk_quick_tests!(k16_v8, 16, 8);
+ mk_quick_tests!(k16_v16, 16, 16);
+ mk_quick_tests!(k16_v17, 16, 17);
+ }
+}
diff --git a/vendor/odht/src/memory_layout.rs b/vendor/odht/src/memory_layout.rs
new file mode 100644
index 000000000..de59e9898
--- /dev/null
+++ b/vendor/odht/src/memory_layout.rs
@@ -0,0 +1,369 @@
+use std::{
+ borrow::{Borrow, BorrowMut},
+ convert::TryInto,
+ marker::PhantomData,
+ mem::{align_of, size_of},
+};
+
+use crate::{
+ error::Error,
+ raw_table::{Entry, EntryMetadata, RawTable},
+ Factor,
+};
+use crate::{swisstable_group_query::REFERENCE_GROUP_SIZE, Config};
+
+const CURRENT_FILE_FORMAT_VERSION: [u8; 4] = [0, 0, 0, 2];
+
+#[repr(C)]
+#[derive(Clone)]
+pub(crate) struct Header {
+ tag: [u8; 4],
+ size_of_metadata: u8,
+ size_of_key: u8,
+ size_of_value: u8,
+ size_of_header: u8,
+
+ item_count: [u8; 8],
+ slot_count: [u8; 8],
+
+ file_format_version: [u8; 4],
+ max_load_factor: [u8; 2],
+
+ // Let's keep things at least 8 byte aligned
+ padding: [u8; 2],
+}
+
+const HEADER_TAG: [u8; 4] = *b"ODHT";
+const HEADER_SIZE: usize = size_of::<Header>();
+
+impl Header {
+ pub fn sanity_check<C: Config>(&self, raw_bytes_len: usize) -> Result<(), Error> {
+ assert!(align_of::<Header>() == 1);
+ assert!(HEADER_SIZE % 8 == 0);
+
+ if self.tag != HEADER_TAG {
+ return Err(Error(format!(
+ "Expected header tag {:?} but found {:?}",
+ HEADER_TAG, self.tag
+ )));
+ }
+
+ if self.file_format_version != CURRENT_FILE_FORMAT_VERSION {
+ return Err(Error(format!(
+ "Expected file format version {:?} but found {:?}",
+ CURRENT_FILE_FORMAT_VERSION, self.file_format_version
+ )));
+ }
+
+ check_expected_size::<EntryMetadata>("EntryMetadata", self.size_of_metadata)?;
+ check_expected_size::<C::EncodedKey>("Config::EncodedKey", self.size_of_key)?;
+ check_expected_size::<C::EncodedValue>("Config::EncodedValue", self.size_of_value)?;
+ check_expected_size::<Header>("Header", self.size_of_header)?;
+
+ if raw_bytes_len != bytes_needed::<C>(self.slot_count()) {
+ return Err(Error(format!(
+ "Provided allocation has wrong size for slot count {}. \
+ The allocation's size is {} but the expected size is {}.",
+ self.slot_count(),
+ raw_bytes_len,
+ bytes_needed::<C>(self.slot_count()),
+ )));
+ }
+
+ // This should never actually be a problem because it should be impossible to
+ // create the underlying memory slice in the first place:
+ assert!(u64::from_le_bytes(self.slot_count) <= usize::MAX as u64);
+
+ if !self.slot_count().is_power_of_two() {
+ return Err(Error(format!(
+ "Slot count of hashtable should be a power of two but is {}",
+ self.slot_count()
+ )));
+ }
+
+ return Ok(());
+
+ fn check_expected_size<T>(name: &str, expected_size: u8) -> Result<(), Error> {
+ if expected_size as usize != size_of::<T>() {
+ Err(Error(format!(
+ "Expected size of {} to be {} but the encoded \
+ table specifies {}. This indicates an encoding mismatch.",
+ name,
+ size_of::<T>(),
+ expected_size
+ )))
+ } else {
+ Ok(())
+ }
+ }
+ }
+
+ #[inline]
+ pub fn item_count(&self) -> usize {
+ u64::from_le_bytes(self.item_count) as usize
+ }
+
+ #[inline]
+ pub fn set_item_count(&mut self, item_count: usize) {
+ self.item_count = (item_count as u64).to_le_bytes();
+ }
+
+ #[inline]
+ pub fn slot_count(&self) -> usize {
+ u64::from_le_bytes(self.slot_count) as usize
+ }
+
+ #[inline]
+ pub fn max_load_factor(&self) -> Factor {
+ Factor(u16::from_le_bytes(self.max_load_factor))
+ }
+
+ #[inline]
+ fn metadata_offset<C: Config>(&self) -> isize {
+ self.entry_data_offset() + self.entry_data_size_in_bytes::<C>() as isize
+ }
+
+ #[inline]
+ fn entry_data_size_in_bytes<C: Config>(&self) -> usize {
+ let slot_count = self.slot_count();
+ let size_of_entry = size_of::<Entry<C::EncodedKey, C::EncodedValue>>();
+ slot_count * size_of_entry
+ }
+
+ #[inline]
+ fn entry_data_offset(&self) -> isize {
+ HEADER_SIZE as isize
+ }
+
+ fn initialize<C: Config>(
+ raw_bytes: &mut [u8],
+ slot_count: usize,
+ item_count: usize,
+ max_load_factor: Factor,
+ ) {
+ assert_eq!(raw_bytes.len(), bytes_needed::<C>(slot_count));
+
+ let header = Header {
+ tag: HEADER_TAG,
+ size_of_metadata: size_of::<EntryMetadata>().try_into().unwrap(),
+ size_of_key: size_of::<C::EncodedKey>().try_into().unwrap(),
+ size_of_value: size_of::<C::EncodedValue>().try_into().unwrap(),
+ size_of_header: size_of::<Header>().try_into().unwrap(),
+ item_count: (item_count as u64).to_le_bytes(),
+ slot_count: (slot_count as u64).to_le_bytes(),
+ file_format_version: CURRENT_FILE_FORMAT_VERSION,
+ max_load_factor: max_load_factor.0.to_le_bytes(),
+ padding: [0u8; 2],
+ };
+
+ assert_eq!(header.sanity_check::<C>(raw_bytes.len()), Ok(()));
+
+ unsafe {
+ *(raw_bytes.as_mut_ptr() as *mut Header) = header;
+ }
+ }
+}
+
+/// An allocation holds a byte array that is guaranteed to conform to the
+/// hash table's binary layout.
+#[derive(Clone, Copy)]
+pub(crate) struct Allocation<C, M>
+where
+ C: Config,
+{
+ bytes: M,
+ _config: PhantomData<C>,
+}
+
+impl<C, M> Allocation<C, M>
+where
+ C: Config,
+ M: Borrow<[u8]>,
+{
+ pub fn from_raw_bytes(raw_bytes: M) -> Result<Allocation<C, M>, Error> {
+ let allocation = Allocation {
+ bytes: raw_bytes,
+ _config: PhantomData::default(),
+ };
+
+ allocation
+ .header()
+ .sanity_check::<C>(allocation.bytes.borrow().len())?;
+
+ // Check that the hash function provides the expected hash values.
+ {
+ let (entry_metadata, entry_data) = allocation.data_slices();
+ RawTable::<C::EncodedKey, C::EncodedValue, C::H>::new(entry_metadata, entry_data)
+ .sanity_check_hashes(10)?;
+ }
+
+ Ok(allocation)
+ }
+
+ #[inline]
+ pub unsafe fn from_raw_bytes_unchecked(raw_bytes: M) -> Allocation<C, M> {
+ Allocation {
+ bytes: raw_bytes,
+ _config: PhantomData::default(),
+ }
+ }
+
+ #[inline]
+ pub fn header(&self) -> &Header {
+ let raw_bytes = self.bytes.borrow();
+ debug_assert!(raw_bytes.len() >= HEADER_SIZE);
+
+ let header: &Header = unsafe { &*(raw_bytes.as_ptr() as *const Header) };
+
+ debug_assert_eq!(header.sanity_check::<C>(raw_bytes.len()), Ok(()));
+
+ header
+ }
+
+ #[inline]
+ pub fn data_slices(&self) -> (&[EntryMetadata], &[Entry<C::EncodedKey, C::EncodedValue>]) {
+ let raw_bytes = self.bytes.borrow();
+ let slot_count = self.header().slot_count();
+ let entry_data_offset = self.header().entry_data_offset();
+ let metadata_offset = self.header().metadata_offset::<C>();
+
+ let entry_metadata = unsafe {
+ std::slice::from_raw_parts(
+ raw_bytes.as_ptr().offset(metadata_offset) as *const EntryMetadata,
+ slot_count + REFERENCE_GROUP_SIZE,
+ )
+ };
+
+ let entry_data = unsafe {
+ std::slice::from_raw_parts(
+ raw_bytes.as_ptr().offset(entry_data_offset)
+ as *const Entry<C::EncodedKey, C::EncodedValue>,
+ slot_count,
+ )
+ };
+
+ debug_assert_eq!(
+ entry_data.as_ptr_range().start as usize,
+ raw_bytes.as_ptr_range().start as usize + HEADER_SIZE,
+ );
+
+ debug_assert_eq!(
+ entry_data.as_ptr_range().end as usize,
+ entry_metadata.as_ptr_range().start as usize,
+ );
+
+ debug_assert_eq!(
+ raw_bytes.as_ptr_range().end as usize,
+ entry_metadata.as_ptr_range().end as usize,
+ );
+
+ (entry_metadata, entry_data)
+ }
+
+ #[inline]
+ pub fn raw_bytes(&self) -> &[u8] {
+ self.bytes.borrow()
+ }
+}
+
+impl<C, M> Allocation<C, M>
+where
+ C: Config,
+ M: BorrowMut<[u8]>,
+{
+ #[inline]
+ pub fn with_mut_parts<R>(
+ &mut self,
+ f: impl FnOnce(
+ &mut Header,
+ &mut [EntryMetadata],
+ &mut [Entry<C::EncodedKey, C::EncodedValue>],
+ ) -> R,
+ ) -> R {
+ let raw_bytes = self.bytes.borrow_mut();
+
+ // Copy the address as an integer so we can use it for the debug_assertion
+ // below without accessing `raw_bytes` again.
+ let _raw_bytes_end_addr = raw_bytes.as_ptr_range().end as usize;
+
+ let (header, rest) = raw_bytes.split_at_mut(HEADER_SIZE);
+ let header: &mut Header = unsafe { &mut *(header.as_mut_ptr() as *mut Header) };
+
+ let slot_count = header.slot_count();
+ let entry_data_size_in_bytes = header.entry_data_size_in_bytes::<C>();
+
+ let (entry_data_bytes, metadata_bytes) = rest.split_at_mut(entry_data_size_in_bytes);
+
+ let entry_metadata = unsafe {
+ std::slice::from_raw_parts_mut(
+ metadata_bytes.as_mut_ptr() as *mut EntryMetadata,
+ slot_count + REFERENCE_GROUP_SIZE,
+ )
+ };
+
+ let entry_data = unsafe {
+ std::slice::from_raw_parts_mut(
+ entry_data_bytes.as_mut_ptr() as *mut Entry<C::EncodedKey, C::EncodedValue>,
+ slot_count,
+ )
+ };
+
+ debug_assert_eq!(
+ entry_data.as_ptr_range().start as usize,
+ header as *mut Header as usize + HEADER_SIZE,
+ );
+
+ debug_assert_eq!(
+ entry_data.as_ptr_range().end as usize,
+ entry_metadata.as_ptr_range().start as usize,
+ );
+
+ debug_assert_eq!(
+ _raw_bytes_end_addr,
+ entry_metadata.as_ptr_range().end as usize,
+ );
+
+ f(header, entry_metadata, entry_data)
+ }
+}
+
+#[inline]
+pub(crate) fn bytes_needed<C: Config>(slot_count: usize) -> usize {
+ assert!(slot_count.is_power_of_two());
+ let size_of_entry = size_of::<Entry<C::EncodedKey, C::EncodedValue>>();
+ let size_of_metadata = size_of::<EntryMetadata>();
+
+ HEADER_SIZE
+ + slot_count * size_of_entry
+ + (slot_count + REFERENCE_GROUP_SIZE) * size_of_metadata
+}
+
+pub(crate) fn allocate<C: Config>(
+ slot_count: usize,
+ item_count: usize,
+ max_load_factor: Factor,
+) -> Allocation<C, Box<[u8]>> {
+ let bytes = vec![0u8; bytes_needed::<C>(slot_count)].into_boxed_slice();
+ init_in_place::<C, _>(bytes, slot_count, item_count, max_load_factor)
+}
+
+pub(crate) fn init_in_place<C: Config, M: BorrowMut<[u8]>>(
+ mut bytes: M,
+ slot_count: usize,
+ item_count: usize,
+ max_load_factor: Factor,
+) -> Allocation<C, M> {
+ Header::initialize::<C>(bytes.borrow_mut(), slot_count, item_count, max_load_factor);
+
+ let mut allocation = Allocation {
+ bytes,
+ _config: PhantomData::default(),
+ };
+
+ allocation.with_mut_parts(|_, metadata, data| {
+ metadata.fill(0xFF);
+ data.fill(Default::default());
+ });
+
+ allocation
+}
diff --git a/vendor/odht/src/raw_table.rs b/vendor/odht/src/raw_table.rs
new file mode 100644
index 000000000..1ad574d9e
--- /dev/null
+++ b/vendor/odht/src/raw_table.rs
@@ -0,0 +1,659 @@
+//! This module implements the actual hash table logic. It works solely with
+//! byte arrays and does not know anything about the unencoded key and value
+//! types.
+//!
+//! The implementation makes sure that the encoded data contains no padding
+//! bytes (which makes it deterministic) and nothing in it requires any specific
+//! alignment.
+//!
+//! Many functions in this module are marked as `#[inline]`. This is allows
+//! LLVM to retain the information about byte array sizes, even though they are
+//! converted to slices (of unknown size) from time to time.
+
+use crate::swisstable_group_query::{GroupQuery, GROUP_SIZE, REFERENCE_GROUP_SIZE};
+use crate::{error::Error, HashFn};
+use std::convert::TryInto;
+use std::{fmt, marker::PhantomData, mem::size_of};
+
+/// Values of this type represent key-value pairs *as they are stored on-disk*.
+/// `#[repr(C)]` makes sure we have deterministic field order and the fields
+/// being byte arrays makes sure that there are no padding bytes, alignment is
+/// not restricted, and the data is endian-independent.
+///
+/// It is a strict requirement that any `&[Entry<K, V>]` can be transmuted
+/// into a `&[u8]` and back, regardless of whether the byte array has been
+/// moved in the meantime.
+#[repr(C)]
+#[derive(PartialEq, Eq, Clone, Copy, Debug)]
+pub(crate) struct Entry<K: ByteArray, V: ByteArray> {
+ pub key: K,
+ pub value: V,
+}
+
+impl<K: ByteArray, V: ByteArray> Entry<K, V> {
+ #[inline]
+ fn new(key: K, value: V) -> Entry<K, V> {
+ Entry { key, value }
+ }
+}
+
+impl<K: ByteArray, V: ByteArray> Default for Entry<K, V> {
+ #[inline]
+ fn default() -> Entry<K, V> {
+ Entry {
+ key: K::zeroed(),
+ value: V::zeroed(),
+ }
+ }
+}
+
+impl<'a, K: ByteArray, V: ByteArray, H: HashFn> fmt::Debug for RawTable<'a, K, V, H> {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ writeln!(
+ f,
+ "RawTable (slot_count={}, hash_fn={}) {{",
+ self.data.len(),
+ std::any::type_name::<H>(),
+ )?;
+ for (index, (&metadata, entry)) in self.metadata.iter().zip(self.data.iter()).enumerate() {
+ if is_empty_or_deleted(metadata) {
+ writeln!(f, "slot {}: empty", index)?;
+ } else {
+ writeln!(
+ f,
+ "slot {}: key={:?}, value={:?}, control_byte={}",
+ index, entry.key, entry.value, metadata
+ )?;
+ }
+ }
+ writeln!(f, "}}")
+ }
+}
+
+pub(crate) type EntryMetadata = u8;
+
+type HashValue = u32;
+
+#[inline]
+fn h1(h: HashValue) -> usize {
+ h as usize
+}
+
+#[inline]
+fn h2(h: HashValue) -> u8 {
+ const SHIFT: usize = size_of::<HashValue>() * 8 - 7;
+ (h >> SHIFT) as u8
+}
+
+/// This type implements the sequence in which slots are probed when resolving
+/// hash value conflicts. Note that probing is always done as if `GROUP_SIZE`
+/// was 16, even though on targets without sse2 `GROUP_SIZE` is going to be
+/// smaller. By keeping the probing pattern constant (i.e. always visiting
+/// the same slots in the same order, independently of `GROUP_SIZE`) we enable
+/// the hash table layout to be target-independent. In other words: A hash
+/// table created and persisted on a target with `GROUP_SIZE` x can always
+/// be loaded and read on a target with a different `GROUP_SIZE` y.
+struct ProbeSeq<const GROUP_SIZE: usize> {
+ index: usize,
+ base: usize,
+ chunk_index: usize,
+ stride: usize,
+}
+
+impl<const GROUP_SIZE: usize> ProbeSeq<GROUP_SIZE> {
+ #[inline]
+ fn new(h1: usize, mask: usize) -> Self {
+ let initial_index = h1 & mask;
+
+ ProbeSeq {
+ index: initial_index,
+ base: initial_index,
+ chunk_index: 0,
+ stride: 0,
+ }
+ }
+
+ #[inline]
+ fn advance(&mut self, mask: usize) {
+ debug_assert!(GROUP_SIZE <= REFERENCE_GROUP_SIZE);
+ debug_assert!(REFERENCE_GROUP_SIZE % GROUP_SIZE == 0);
+
+ // The effect of the code in the two branches is
+ // identical if GROUP_SIZE==REFERENCE_GROUP_SIZE
+ // but the if statement should make it very easy
+ // for the compiler to discard the more costly
+ // version and only emit the simplified version.
+
+ if GROUP_SIZE == REFERENCE_GROUP_SIZE {
+ self.stride += REFERENCE_GROUP_SIZE;
+ self.index += self.stride;
+ self.index &= mask;
+ } else {
+ self.chunk_index += GROUP_SIZE;
+
+ if self.chunk_index == REFERENCE_GROUP_SIZE {
+ self.chunk_index = 0;
+ self.stride += REFERENCE_GROUP_SIZE;
+ self.base += self.stride;
+ }
+
+ self.index = (self.base + self.chunk_index) & mask;
+ }
+ }
+}
+
+#[inline]
+fn group_starting_at<'a>(control_bytes: &'a [u8], index: usize) -> &'a [u8; GROUP_SIZE] {
+ debug_assert!(index < control_bytes.len() - GROUP_SIZE);
+ unsafe {
+ std::slice::from_raw_parts(control_bytes.as_ptr().offset(index as isize), GROUP_SIZE)
+ .try_into()
+ .unwrap()
+ }
+}
+
+#[inline]
+fn is_empty_or_deleted(control_byte: u8) -> bool {
+ (control_byte & EMPTY_CONTROL_BYTE) != 0
+}
+
+const EMPTY_CONTROL_BYTE: u8 = 0b1000_0000;
+
+/// This type provides a readonly view of the given table data.
+#[derive(PartialEq, Eq)]
+pub(crate) struct RawTable<'a, K, V, H>
+where
+ K: ByteArray,
+ V: ByteArray,
+ H: HashFn,
+{
+ metadata: &'a [EntryMetadata],
+ data: &'a [Entry<K, V>],
+ _hash_fn: PhantomData<H>,
+}
+
+impl<'a, K, V, H> RawTable<'a, K, V, H>
+where
+ K: ByteArray,
+ V: ByteArray,
+ H: HashFn,
+{
+ #[inline]
+ pub(crate) fn new(metadata: &'a [EntryMetadata], data: &'a [Entry<K, V>]) -> Self {
+ // Make sure Entry<K, V> does not contain any padding bytes and can be
+ // stored at arbitrary adresses.
+ assert!(size_of::<Entry<K, V>>() == size_of::<K>() + size_of::<V>());
+ assert!(std::mem::align_of::<Entry<K, V>>() == 1);
+
+ debug_assert!(data.len().is_power_of_two());
+ debug_assert!(metadata.len() == data.len() + REFERENCE_GROUP_SIZE);
+
+ Self {
+ metadata,
+ data,
+ _hash_fn: PhantomData::default(),
+ }
+ }
+
+ #[inline]
+ pub(crate) fn find(&self, key: &K) -> Option<&V> {
+ debug_assert!(self.data.len().is_power_of_two());
+ debug_assert!(self.metadata.len() == self.data.len() + REFERENCE_GROUP_SIZE);
+
+ let mask = self.data.len() - 1;
+ let hash = H::hash(key.as_slice());
+ let mut ps = ProbeSeq::<GROUP_SIZE>::new(h1(hash), mask);
+ let h2 = h2(hash);
+
+ loop {
+ let mut group_query = GroupQuery::from(group_starting_at(self.metadata, ps.index), h2);
+
+ for m in &mut group_query {
+ let index = (ps.index + m) & mask;
+
+ let entry = entry_at(self.data, index);
+
+ if likely!(entry.key.equals(key)) {
+ return Some(&entry.value);
+ }
+ }
+
+ if likely!(group_query.any_empty()) {
+ return None;
+ }
+
+ ps.advance(mask);
+ }
+ }
+
+ #[inline]
+ pub(crate) fn iter(&'a self) -> RawIter<'a, K, V> {
+ RawIter::new(self.metadata, self.data)
+ }
+
+ /// Check (for the first `entries_to_check` entries) if the computed and
+ /// the stored hash value match.
+ ///
+ /// A mismatch is an indication that the table has been deserialized with
+ /// the wrong hash function.
+ pub(crate) fn sanity_check_hashes(&self, slots_to_check: usize) -> Result<(), Error> {
+ let slots_to_check = std::cmp::min(slots_to_check, self.data.len());
+
+ for i in 0..slots_to_check {
+ let metadata = self.metadata[i];
+ let entry = &self.data[i];
+
+ if is_empty_or_deleted(metadata) {
+ if !entry.key.is_all_zeros() || !entry.value.is_all_zeros() {
+ let message = format!("Found empty entry with non-zero contents at {}", i);
+
+ return Err(Error(message));
+ }
+ } else {
+ let hash = H::hash(entry.key.as_slice());
+ let h2 = h2(hash);
+
+ if metadata != h2 {
+ let message = format!(
+ "Control byte does not match hash value for entry {}. Expected {}, found {}.",
+ i, h2, metadata
+ );
+
+ return Err(Error(message));
+ }
+ }
+ }
+
+ Ok(())
+ }
+}
+
+#[inline]
+fn entry_at<K: ByteArray, V: ByteArray>(data: &[Entry<K, V>], index: usize) -> &Entry<K, V> {
+ debug_assert!(index < data.len());
+ unsafe { data.get_unchecked(index) }
+}
+
+#[inline]
+fn metadata_at(metadata: &[EntryMetadata], index: usize) -> &EntryMetadata {
+ debug_assert!(index < metadata.len());
+ unsafe { metadata.get_unchecked(index) }
+}
+
+/// This type provides a mutable view of the given table data. It allows for
+/// inserting new entries but does *not* allow for growing the table.
+#[derive(PartialEq, Eq)]
+pub(crate) struct RawTableMut<'a, K, V, H>
+where
+ K: ByteArray,
+ V: ByteArray,
+ H: HashFn,
+{
+ metadata: &'a mut [EntryMetadata],
+ data: &'a mut [Entry<K, V>],
+ _hash_fn: PhantomData<H>,
+}
+
+impl<'a, K, V, H> RawTableMut<'a, K, V, H>
+where
+ K: ByteArray,
+ V: ByteArray,
+ H: HashFn,
+{
+ #[inline]
+ pub(crate) fn new(metadata: &'a mut [EntryMetadata], data: &'a mut [Entry<K, V>]) -> Self {
+ // Make sure Entry<K, V> does not contain any padding bytes and can be
+ // stored at arbitrary adresses.
+ assert!(size_of::<Entry<K, V>>() == size_of::<K>() + size_of::<V>());
+ assert!(std::mem::align_of::<Entry<K, V>>() == 1);
+
+ debug_assert!(data.len().is_power_of_two());
+ debug_assert_eq!(metadata.len(), data.len() + REFERENCE_GROUP_SIZE);
+
+ Self {
+ metadata,
+ data,
+ _hash_fn: PhantomData::default(),
+ }
+ }
+
+ /// Inserts the given key value pair into the hash table.
+ ///
+ /// WARNING: This method assumes that there is free space in the hash table
+ /// somewhere. If there isn't it will end up in an infinite loop.
+ #[inline]
+ pub(crate) fn insert(&mut self, key: K, value: V) -> Option<V> {
+ debug_assert!(self.data.len().is_power_of_two());
+ debug_assert!(self.metadata.len() == self.data.len() + REFERENCE_GROUP_SIZE);
+
+ let mask = self.data.len() - 1;
+ let hash = H::hash(key.as_slice());
+
+ let mut ps = ProbeSeq::<GROUP_SIZE>::new(h1(hash), mask);
+ let h2 = h2(hash);
+
+ loop {
+ let mut group_query = GroupQuery::from(group_starting_at(self.metadata, ps.index), h2);
+
+ for m in &mut group_query {
+ let index = (ps.index + m) & mask;
+
+ let entry = entry_at_mut(self.data, index);
+
+ if likely!(entry.key.equals(&key)) {
+ let ret = Some(entry.value);
+ entry.value = value;
+ return ret;
+ }
+ }
+
+ if let Some(first_empty) = group_query.first_empty() {
+ let index = (ps.index + first_empty) & mask;
+ *entry_at_mut(self.data, index) = Entry::new(key, value);
+ *metadata_at_mut(self.metadata, index) = h2;
+
+ if index < REFERENCE_GROUP_SIZE {
+ let first_mirror = self.data.len();
+ *metadata_at_mut(self.metadata, first_mirror + index) = h2;
+ debug_assert_eq!(
+ self.metadata[..REFERENCE_GROUP_SIZE],
+ self.metadata[self.data.len()..]
+ );
+ }
+
+ return None;
+ }
+
+ ps.advance(mask);
+ }
+ }
+}
+
+#[inline]
+fn entry_at_mut<K: ByteArray, V: ByteArray>(
+ data: &mut [Entry<K, V>],
+ index: usize,
+) -> &mut Entry<K, V> {
+ debug_assert!(index < data.len());
+ unsafe { data.get_unchecked_mut(index) }
+}
+
+#[inline]
+fn metadata_at_mut(metadata: &mut [EntryMetadata], index: usize) -> &mut EntryMetadata {
+ debug_assert!(index < metadata.len());
+ unsafe { metadata.get_unchecked_mut(index) }
+}
+
+impl<'a, K: ByteArray, V: ByteArray, H: HashFn> fmt::Debug for RawTableMut<'a, K, V, H> {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ let readonly = RawTable::<'_, K, V, H>::new(self.metadata, self.data);
+ write!(f, "{:?}", readonly)
+ }
+}
+
+pub(crate) struct RawIter<'a, K, V>
+where
+ K: ByteArray,
+ V: ByteArray,
+{
+ metadata: &'a [EntryMetadata],
+ data: &'a [Entry<K, V>],
+ current_index: usize,
+}
+
+impl<'a, K, V> RawIter<'a, K, V>
+where
+ K: ByteArray,
+ V: ByteArray,
+{
+ pub(crate) fn new(metadata: &'a [EntryMetadata], data: &'a [Entry<K, V>]) -> RawIter<'a, K, V> {
+ debug_assert!(data.len().is_power_of_two());
+ debug_assert!(metadata.len() == data.len() + REFERENCE_GROUP_SIZE);
+
+ RawIter {
+ metadata,
+ data,
+ current_index: 0,
+ }
+ }
+}
+
+impl<'a, K, V> Iterator for RawIter<'a, K, V>
+where
+ K: ByteArray,
+ V: ByteArray,
+{
+ type Item = (EntryMetadata, &'a Entry<K, V>);
+
+ fn next(&mut self) -> Option<Self::Item> {
+ loop {
+ if self.current_index >= self.data.len() {
+ return None;
+ }
+
+ let index = self.current_index;
+
+ self.current_index += 1;
+
+ let entry_metadata = *metadata_at(self.metadata, index);
+ if !is_empty_or_deleted(entry_metadata) {
+ return Some((entry_metadata, entry_at(self.data, index)));
+ }
+ }
+ }
+}
+
+/// A trait that lets us abstract over different lengths of fixed size byte
+/// arrays. Don't implement it for anything other than fixed size byte arrays!
+pub trait ByteArray: Sized + Copy + Eq + Clone + PartialEq + fmt::Debug + 'static {
+ fn zeroed() -> Self;
+ fn as_slice(&self) -> &[u8];
+ fn equals(&self, other: &Self) -> bool;
+ fn is_all_zeros(&self) -> bool {
+ self.as_slice().iter().all(|b| *b == 0)
+ }
+}
+
+impl<const LEN: usize> ByteArray for [u8; LEN] {
+ #[inline(always)]
+ fn zeroed() -> Self {
+ [0u8; LEN]
+ }
+
+ #[inline(always)]
+ fn as_slice(&self) -> &[u8] {
+ &self[..]
+ }
+
+ // This custom implementation of comparing the fixed size arrays
+ // seems make a big difference for performance (at least for
+ // 16+ byte keys)
+ #[inline]
+ fn equals(&self, other: &Self) -> bool {
+ // Most branches here are optimized away at compile time
+ // because they depend on values known at compile time.
+
+ const USIZE: usize = size_of::<usize>();
+
+ // Special case a few cases we care about
+ if size_of::<Self>() == USIZE {
+ return read_usize(&self[..], 0) == read_usize(&other[..], 0);
+ }
+
+ if size_of::<Self>() == USIZE * 2 {
+ return read_usize(&self[..], 0) == read_usize(&other[..], 0)
+ && read_usize(&self[..], 1) == read_usize(&other[..], 1);
+ }
+
+ if size_of::<Self>() == USIZE * 3 {
+ return read_usize(&self[..], 0) == read_usize(&other[..], 0)
+ && read_usize(&self[..], 1) == read_usize(&other[..], 1)
+ && read_usize(&self[..], 2) == read_usize(&other[..], 2);
+ }
+
+ if size_of::<Self>() == USIZE * 4 {
+ return read_usize(&self[..], 0) == read_usize(&other[..], 0)
+ && read_usize(&self[..], 1) == read_usize(&other[..], 1)
+ && read_usize(&self[..], 2) == read_usize(&other[..], 2)
+ && read_usize(&self[..], 3) == read_usize(&other[..], 3);
+ }
+
+ // fallback
+ return &self[..] == &other[..];
+
+ #[inline(always)]
+ fn read_usize(bytes: &[u8], index: usize) -> usize {
+ const STRIDE: usize = size_of::<usize>();
+ usize::from_le_bytes(
+ bytes[STRIDE * index..STRIDE * (index + 1)]
+ .try_into()
+ .unwrap(),
+ )
+ }
+ }
+}
+
+#[cfg(test)]
+#[rustfmt::skip]
+mod tests {
+ use super::*;
+ use crate::FxHashFn;
+
+ fn make_table<
+ I: Iterator<Item = (K, V)> + ExactSizeIterator,
+ K: ByteArray,
+ V: ByteArray,
+ H: HashFn,
+ >(
+ xs: I,
+ ) -> (Vec<EntryMetadata>, Vec<Entry<K, V>>) {
+ let size = xs.size_hint().0.next_power_of_two();
+ let mut data = vec![Entry::default(); size];
+ let mut metadata = vec![255; size + REFERENCE_GROUP_SIZE];
+
+ assert!(metadata.iter().all(|b| is_empty_or_deleted(*b)));
+
+ {
+ let mut table: RawTableMut<K, V, H> = RawTableMut {
+ metadata: &mut metadata[..],
+ data: &mut data[..],
+ _hash_fn: Default::default(),
+ };
+
+ for (k, v) in xs {
+ table.insert(k, v);
+ }
+ }
+
+ (metadata, data)
+ }
+
+ #[test]
+ fn stress() {
+ let xs: Vec<[u8; 2]> = (0 ..= u16::MAX).map(|x| x.to_le_bytes()).collect();
+
+ let (mut metadata, mut data) = make_table::<_, _, _, FxHashFn>(xs.iter().map(|x| (*x, *x)));
+
+ {
+ let table: RawTable<_, _, FxHashFn> = RawTable {
+ metadata: &metadata[..],
+ data: &data[..],
+ _hash_fn: PhantomData::default(),
+ };
+
+ for x in xs.iter() {
+ assert_eq!(table.find(x), Some(x));
+ }
+ }
+
+ // overwrite all values
+ {
+ let mut table: RawTableMut<_, _, FxHashFn> = RawTableMut {
+ metadata: &mut metadata[..],
+ data: &mut data[..],
+ _hash_fn: PhantomData::default(),
+ };
+
+ for (i, x) in xs.iter().enumerate() {
+ assert_eq!(table.insert(*x, [i as u8, (i + 1) as u8]), Some(*x));
+ }
+ }
+
+ // Check if we find the new expected values
+ {
+ let table: RawTable<_, _, FxHashFn> = RawTable {
+ metadata: &metadata[..],
+ data: &data[..],
+ _hash_fn: PhantomData::default(),
+ };
+
+ for (i, x) in xs.iter().enumerate() {
+ assert_eq!(table.find(x), Some(&[i as u8, (i + 1) as u8]));
+ }
+ }
+ }
+
+ // This test makes sure that ProbeSeq will always visit the same slots
+ // in the same order, regardless of the actual GROUP_SIZE.
+ #[test]
+ fn probe_seq() {
+ struct ReferenceProbeSeq {
+ index: usize,
+ stride: usize,
+ }
+
+ impl ReferenceProbeSeq {
+ fn new(h1: usize, mask: usize) -> ReferenceProbeSeq {
+ ReferenceProbeSeq {
+ index: h1 & mask,
+ stride: 0,
+ }
+ }
+
+ fn advance(&mut self, mask: usize) {
+ self.stride += REFERENCE_GROUP_SIZE;
+ self.index += self.stride;
+ self.index &= mask;
+ }
+ }
+
+ fn test_with_group_size<const GROUP_SIZE: usize>() {
+ assert!(REFERENCE_GROUP_SIZE % GROUP_SIZE == 0);
+ assert!(REFERENCE_GROUP_SIZE >= GROUP_SIZE);
+
+ for i in 4 .. 17 {
+ let item_count = 1 << i;
+ assert!(item_count % REFERENCE_GROUP_SIZE == 0);
+ assert!(item_count % GROUP_SIZE == 0);
+
+ let mask = item_count - 1;
+
+ let mut expected = Vec::with_capacity(item_count);
+
+ let mut refseq = ReferenceProbeSeq::new(0, mask);
+ for _ in 0 .. item_count / REFERENCE_GROUP_SIZE {
+ for index in refseq.index .. refseq.index + REFERENCE_GROUP_SIZE {
+ expected.push(index & mask);
+ }
+ refseq.advance(mask);
+ }
+
+ let mut actual = Vec::with_capacity(item_count);
+
+ let mut seq = ProbeSeq::<GROUP_SIZE>::new(0, mask);
+ for _ in 0 .. item_count / GROUP_SIZE {
+ for index in seq.index .. seq.index + GROUP_SIZE {
+ actual.push(index & mask);
+ }
+ seq.advance(mask);
+ }
+
+ assert_eq!(expected, actual);
+ }
+ }
+
+ test_with_group_size::<4>();
+ test_with_group_size::<8>();
+ test_with_group_size::<16>();
+ }
+}
diff --git a/vendor/odht/src/swisstable_group_query/mod.rs b/vendor/odht/src/swisstable_group_query/mod.rs
new file mode 100644
index 000000000..ed7f5f71f
--- /dev/null
+++ b/vendor/odht/src/swisstable_group_query/mod.rs
@@ -0,0 +1,117 @@
+cfg_if::cfg_if! {
+ if #[cfg(all(
+ target_feature = "sse2",
+ any(target_arch = "x86", target_arch = "x86_64"),
+ not(miri),
+ not(feature = "no_simd"),
+ ))] {
+ mod sse2;
+ use sse2 as imp;
+ } else {
+ mod no_simd;
+ use no_simd as imp;
+ }
+}
+
+pub use imp::GroupQuery;
+pub use imp::GROUP_SIZE;
+
+// While GROUP_SIZE is target-dependent for performance reasons,
+// we always do probing as if it was the same as REFERENCE_GROUP_SIZE.
+// This way the same slot indices will be assigned to the same
+// entries no matter the underlying target. This allows the
+// binary format of the table to be portable.
+pub const REFERENCE_GROUP_SIZE: usize = 16;
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ const EMPTY_GROUP: [u8; GROUP_SIZE] = [255; GROUP_SIZE];
+
+ fn full_group() -> [u8; GROUP_SIZE] {
+ let mut group = [0; GROUP_SIZE];
+ for i in 0..group.len() {
+ group[i] = i as u8;
+ }
+ group
+ }
+
+ #[test]
+ fn full() {
+ let mut q = GroupQuery::from(&full_group(), 42);
+
+ assert_eq!(Iterator::count(&mut q), 0);
+ assert!(!q.any_empty());
+ assert_eq!(q.first_empty(), None);
+ }
+
+ #[test]
+ fn all_empty() {
+ let mut q = GroupQuery::from(&EMPTY_GROUP, 31);
+
+ assert_eq!(Iterator::count(&mut q), 0);
+ assert!(q.any_empty());
+ assert_eq!(q.first_empty(), Some(0));
+ }
+
+ #[test]
+ fn partially_filled() {
+ for filled_up_to_index in 0..=(GROUP_SIZE - 2) {
+ let mut group = EMPTY_GROUP;
+
+ for i in 0..=filled_up_to_index {
+ group[i] = 42;
+ }
+
+ let mut q = GroupQuery::from(&group, 77);
+
+ assert_eq!(Iterator::count(&mut q), 0);
+ assert!(q.any_empty());
+ assert_eq!(q.first_empty(), Some(filled_up_to_index + 1));
+ }
+ }
+
+ #[test]
+ fn match_iter() {
+ let expected: Vec<_> = (0..GROUP_SIZE).filter(|x| x % 3 == 0).collect();
+
+ let mut group = full_group();
+
+ for i in &expected {
+ group[*i] = 103;
+ }
+
+ let mut q = GroupQuery::from(&group, 103);
+
+ let matches: Vec<usize> = (&mut q).collect();
+
+ assert_eq!(matches, expected);
+ assert!(!q.any_empty());
+ assert_eq!(q.first_empty(), None);
+ }
+
+ #[test]
+ fn match_iter_with_empty() {
+ let expected: Vec<_> = (0..GROUP_SIZE).filter(|x| x % 3 == 2).collect();
+
+ let mut group = full_group();
+
+ for i in &expected {
+ group[*i] = 99;
+ }
+
+ // Clear a few slots
+ group[3] = 255;
+ group[4] = 255;
+ group[GROUP_SIZE - 1] = 255;
+
+ let mut q = GroupQuery::from(&group, 99);
+
+ let matches: Vec<usize> = (&mut q).collect();
+
+ assert_eq!(matches, expected);
+ assert!(q.any_empty());
+ assert_eq!(q.first_empty(), Some(3));
+ }
+}
diff --git a/vendor/odht/src/swisstable_group_query/no_simd.rs b/vendor/odht/src/swisstable_group_query/no_simd.rs
new file mode 100644
index 000000000..81898fe47
--- /dev/null
+++ b/vendor/odht/src/swisstable_group_query/no_simd.rs
@@ -0,0 +1,70 @@
+use std::num::NonZeroU64;
+
+pub const GROUP_SIZE: usize = 8;
+
+type GroupWord = u64;
+type NonZeroGroupWord = NonZeroU64;
+
+pub struct GroupQuery {
+ eq_mask: GroupWord,
+ empty_mask: GroupWord,
+}
+
+#[inline]
+fn repeat(byte: u8) -> GroupWord {
+ GroupWord::from_ne_bytes([byte; GROUP_SIZE])
+}
+
+impl GroupQuery {
+ #[inline]
+ pub fn from(group: &[u8; GROUP_SIZE], h2: u8) -> GroupQuery {
+ // Adapted from this gem:
+ // https://github.com/rust-lang/hashbrown/blob/bbb5d3bb1c23569c15e54c670bc0c3669ae3e7dc/src/raw/generic.rs#L93-L109
+ // which in turn is based on
+ // http://graphics.stanford.edu/~seander/bithacks.html##ValueInWord
+ //
+ // Note the mask generated by the code below can contain false
+ // positives. But we don't care because it's rare and we need
+ // to compare keys anyway. In other words, a false positive here
+ // has pretty much the same effect as a hash collision, something
+ // that we need to deal with in any case anyway.
+
+ let group = GroupWord::from_le_bytes(*group);
+ let cmp = group ^ repeat(h2);
+ let high_bit_greater_than_128 = (!cmp) & repeat(0x80);
+ let high_bit_greater_than_128_or_zero = cmp.wrapping_sub(repeat(0x01));
+ let eq_mask = high_bit_greater_than_128_or_zero & high_bit_greater_than_128;
+
+ let empty_mask = group & repeat(0x80);
+
+ GroupQuery {
+ eq_mask,
+ empty_mask,
+ }
+ }
+
+ #[inline]
+ pub fn any_empty(&self) -> bool {
+ self.empty_mask != 0
+ }
+
+ #[inline]
+ pub fn first_empty(&self) -> Option<usize> {
+ Some((NonZeroGroupWord::new(self.empty_mask)?.trailing_zeros() / 8) as usize)
+ }
+}
+
+impl Iterator for GroupQuery {
+ type Item = usize;
+
+ #[inline]
+ fn next(&mut self) -> Option<usize> {
+ let index = NonZeroGroupWord::new(self.eq_mask)?.trailing_zeros() / 8;
+
+ // Clear the lowest bit
+ // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetKernighan
+ self.eq_mask &= self.eq_mask - 1;
+
+ Some(index as usize)
+ }
+}
diff --git a/vendor/odht/src/swisstable_group_query/sse2.rs b/vendor/odht/src/swisstable_group_query/sse2.rs
new file mode 100644
index 000000000..89e41aa19
--- /dev/null
+++ b/vendor/odht/src/swisstable_group_query/sse2.rs
@@ -0,0 +1,54 @@
+use core::num::NonZeroU16;
+
+#[cfg(target_arch = "x86")]
+use core::arch::x86;
+#[cfg(target_arch = "x86_64")]
+use core::arch::x86_64 as x86;
+
+pub const GROUP_SIZE: usize = 16;
+
+pub struct GroupQuery {
+ matches: u16,
+ empty: u16,
+}
+
+impl GroupQuery {
+ #[inline]
+ pub fn from(group: &[u8; GROUP_SIZE], h2: u8) -> GroupQuery {
+ assert!(std::mem::size_of::<x86::__m128i>() == GROUP_SIZE);
+
+ unsafe {
+ let group = x86::_mm_loadu_si128(group as *const _ as *const x86::__m128i);
+ let cmp_byte = x86::_mm_cmpeq_epi8(group, x86::_mm_set1_epi8(h2 as i8));
+ let matches = x86::_mm_movemask_epi8(cmp_byte) as u16;
+ let empty = x86::_mm_movemask_epi8(group) as u16;
+
+ GroupQuery { matches, empty }
+ }
+ }
+
+ #[inline]
+ pub fn any_empty(&self) -> bool {
+ self.empty != 0
+ }
+
+ #[inline]
+ pub fn first_empty(&self) -> Option<usize> {
+ Some(NonZeroU16::new(self.empty)?.trailing_zeros() as usize)
+ }
+}
+
+impl Iterator for GroupQuery {
+ type Item = usize;
+
+ #[inline]
+ fn next(&mut self) -> Option<usize> {
+ let index = NonZeroU16::new(self.matches)?.trailing_zeros();
+
+ // Clear the lowest bit
+ // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetKernighan
+ self.matches &= self.matches - 1;
+
+ Some(index as usize)
+ }
+}
diff --git a/vendor/odht/src/unhash.rs b/vendor/odht/src/unhash.rs
new file mode 100644
index 000000000..2d1f905eb
--- /dev/null
+++ b/vendor/odht/src/unhash.rs
@@ -0,0 +1,15 @@
+use crate::HashFn;
+use std::convert::TryInto;
+
+/// A hash function that simply takes the last 4 bytes of a given key as the
+/// hash value.
+#[derive(Eq, PartialEq)]
+pub struct UnHashFn;
+
+impl HashFn for UnHashFn {
+ #[inline]
+ fn hash(bytes: &[u8]) -> u32 {
+ let len = bytes.len();
+ u32::from_le_bytes(bytes[len - 4..].try_into().unwrap())
+ }
+}