summaryrefslogtreecommitdiffstats
path: root/vendor/phf_generator/src/lib.rs
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/phf_generator/src/lib.rs')
-rw-r--r--vendor/phf_generator/src/lib.rs105
1 files changed, 105 insertions, 0 deletions
diff --git a/vendor/phf_generator/src/lib.rs b/vendor/phf_generator/src/lib.rs
new file mode 100644
index 000000000..6c848ce5a
--- /dev/null
+++ b/vendor/phf_generator/src/lib.rs
@@ -0,0 +1,105 @@
+#![doc(html_root_url = "https://docs.rs/phf_generator/0.9")]
+use phf_shared::{HashKey, PhfHash};
+use rand::distributions::Standard;
+use rand::rngs::SmallRng;
+use rand::{Rng, SeedableRng};
+
+const DEFAULT_LAMBDA: usize = 5;
+
+const FIXED_SEED: u64 = 1234567890;
+
+pub struct HashState {
+ pub key: HashKey,
+ pub disps: Vec<(u32, u32)>,
+ pub map: Vec<usize>,
+}
+
+pub fn generate_hash<H: PhfHash>(entries: &[H]) -> HashState {
+ SmallRng::seed_from_u64(FIXED_SEED)
+ .sample_iter(Standard)
+ .find_map(|key| try_generate_hash(entries, key))
+ .expect("failed to solve PHF")
+}
+
+fn try_generate_hash<H: PhfHash>(entries: &[H], key: HashKey) -> Option<HashState> {
+ struct Bucket {
+ idx: usize,
+ keys: Vec<usize>,
+ }
+
+ let hashes: Vec<_> = entries
+ .iter()
+ .map(|entry| phf_shared::hash(entry, &key))
+ .collect();
+
+ let buckets_len = (hashes.len() + DEFAULT_LAMBDA - 1) / DEFAULT_LAMBDA;
+ let mut buckets = (0..buckets_len)
+ .map(|i| Bucket {
+ idx: i,
+ keys: vec![],
+ })
+ .collect::<Vec<_>>();
+
+ for (i, hash) in hashes.iter().enumerate() {
+ buckets[(hash.g % (buckets_len as u32)) as usize]
+ .keys
+ .push(i);
+ }
+
+ // Sort descending
+ buckets.sort_by(|a, b| a.keys.len().cmp(&b.keys.len()).reverse());
+
+ let table_len = hashes.len();
+ let mut map = vec![None; table_len];
+ let mut disps = vec![(0u32, 0u32); buckets_len];
+
+ // store whether an element from the bucket being placed is
+ // located at a certain position, to allow for efficient overlap
+ // checks. It works by storing the generation in each cell and
+ // each new placement-attempt is a new generation, so you can tell
+ // if this is legitimately full by checking that the generations
+ // are equal. (A u64 is far too large to overflow in a reasonable
+ // time for current hardware.)
+ let mut try_map = vec![0u64; table_len];
+ let mut generation = 0u64;
+
+ // the actual values corresponding to the markers above, as
+ // (index, key) pairs, for adding to the main map once we've
+ // chosen the right disps.
+ let mut values_to_add = vec![];
+
+ 'buckets: for bucket in &buckets {
+ for d1 in 0..(table_len as u32) {
+ 'disps: for d2 in 0..(table_len as u32) {
+ values_to_add.clear();
+ generation += 1;
+
+ for &key in &bucket.keys {
+ let idx = (phf_shared::displace(hashes[key].f1, hashes[key].f2, d1, d2)
+ % (table_len as u32)) as usize;
+ if map[idx].is_some() || try_map[idx] == generation {
+ continue 'disps;
+ }
+ try_map[idx] = generation;
+ values_to_add.push((idx, key));
+ }
+
+ // We've picked a good set of disps
+ disps[bucket.idx] = (d1, d2);
+ for &(idx, key) in &values_to_add {
+ map[idx] = Some(key);
+ }
+ continue 'buckets;
+ }
+ }
+
+ // Unable to find displacements for a bucket
+ return None;
+ }
+
+ Some(HashState {
+ key,
+ disps,
+ map: map.into_iter().map(|i| i.unwrap()).collect(),
+ })
+}