1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
|
use std::hash::{BuildHasher, Hash, Hasher};
use criterion::*;
use fxhash::FxHasher;
use ahash::{AHasher, CallHasher, RandomState};
fn gen_word_pairs() -> Vec<String> {
let words: Vec<_> = r#"
a, ability, able, about, above, accept, according, account, across, act, action,
activity, actually, add, address, administration, admit, adult, affect, after,
again, against, age, agency, agent, ago, agree, agreement, ahead, air, all,
allow, almost, alone, along, already, also, although, always, American, among,
amount, analysis, and, animal, another, answer, any, anyone, anything, appear,
apply, approach, area, argue, arm, around, arrive, art, article, artist, as,
ask, assume, at, attack, attention, attorney, audience, author, authority,
available, avoid, away, baby, back, bad, bag, ball, bank, bar, base, be, beat,
beautiful, because, become, bed, before, begin, behavior, behind, believe,
benefit, best, better, between, beyond, big, bill, billion, bit, black, blood,
blue, board, body, book, born, both, box, boy, break, bring, brother, budget,
build, building, business, but, buy, by, call, camera, campaign, can, cancer,
candidate, capital, car, card, care, career, carry, case, catch, cause, cell,
center, central, century, certain, certainly, chair, challenge, chance, change,
character, charge, check, child, choice, choose, church, citizen, city, civil,
claim, class, clear, clearly, close, coach, cold, collection, college, color,
come, commercial, common, community, company, compare, computer, concern,
condition, conference, Congress, consider, consumer, contain, continue, control,
cost, could, country, couple, course, court, cover, create, crime, cultural,
culture, cup, current, customer, cut, dark, data, daughter, day, dead, deal,
death, debate, decade, decide, decision, deep, defense, degree, Democrat,
democratic, describe, design, despite, detail, determine, develop, development,
die, difference, different, difficult, dinner, direction, director, discover,
discuss, discussion, disease, do, doctor, dog, door, down, draw, dream, drive,
drop, drug, during, each, early, east, easy, eat, economic, economy, edge,
education, effect, effort, eight, either, election, else, employee, end, energy,
enjoy, enough, enter, entire, environment, environmental, especially, establish,
even, evening, event, ever, every, everybody, everyone, everything, evidence,
exactly, example, executive, exist, expect, experience, expert, explain, eye,
face, fact, factor, fail, fall, family, far, fast, father, fear, federal, feel,
feeling, few, field, fight, figure, fill, film, final, finally, financial, find,
fine, finger, finish, fire, firm, first, fish, five, floor, fly, focus, follow,
food, foot, for, force, foreign, forget, form, former, forward, four, free,
friend, from, front, full, fund, future, game, garden, gas, general, generation,
get, girl, give, glass, go, goal, good, government, great, green, ground, group,
grow, growth, guess, gun, guy, hair, half, hand, hang, happen, happy, hard,
have, he, head, health, hear, heart, heat, heavy, help, her, here, herself,
high, him, himself, his, history, hit, hold, home, hope, hospital, hot, hotel,
hour, house, how, however, huge, human, hundred, husband, I, idea, identify, if,
image, imagine, impact, important, improve, in, include, including, increase,
indeed, indicate, individual, industry, information, inside, instead,
institution, interest, interesting, international, interview, into, investment,
involve, issue, it, item, its, itself, job, join, just, keep, key, kid, kill,
kind, kitchen, know, knowledge, land, language, large, last, late, later, laugh,
law, lawyer, lay, lead, leader, learn, least, leave, left, leg, legal, less,
let, letter, level, lie, life, light, like, likely, line, list, listen, little,
live, local, long, look, lose, loss, lot, love, low, machine, magazine, main,
maintain, major, majority, make, man, manage, management, manager, many, market,
marriage, material, matter, may, maybe, me, mean, measure, media, medical, meet,
meeting, member, memory, mention, message, method, middle, might, military,
million, mind, minute, miss, mission, model, modern, moment, money, month, more,
morning, most, mother, mouth, move, movement, movie, Mr, Mrs, much, music, must,
my, myself, name, nation, national, natural, nature, near, nearly, necessary,
need, network, never, new, news, newspaper, next, nice, night, no, none, nor,
north, not, note, nothing, notice, now, n't, number, occur, of, off, offer,
office, officer, official, often, oh, oil, ok, old, on, once, one, only, onto,
open, operation, opportunity, option, or, order, organization, other, others,
our, out, outside, over, own, owner, page, pain, painting, paper, parent, part,
participant, particular, particularly, partner, party, pass, past, patient,
pattern, pay, peace, people, per, perform, performance, perhaps, period, person,
personal, phone, physical, pick, picture, piece, place, plan, plant, play,
player, PM, point, police, policy, political, politics, poor, popular,
population, position, positive, possible, power, practice, prepare, present,
president, pressure, pretty, prevent, price, private, probably, problem,
process, produce, product, production, professional, professor, program,
project, property, protect, prove, provide, public, pull, purpose, push, put,
quality, question, quickly, quite, race, radio, raise, range, rate, rather,
reach, read, ready, real, reality, realize, really, reason, receive, recent,
recently, recognize, record, red, reduce, reflect, region, relate, relationship,
religious, remain, remember, remove, report, represent, Republican, require,
research, resource, respond, response, responsibility, rest, result, return,
reveal, rich, right, rise, risk, road, rock, role, room, rule, run, safe, same,
save, say, scene, school, science, scientist, score, sea, season, seat, second,
section, security, see, seek, seem, sell, send, senior, sense, series, serious,
serve, service, set, seven, several, sex, sexual, shake, share, she, shoot,
short, shot, should, shoulder, show, side, sign, significant, similar, simple,
simply, since, sing, single, sister, sit, site, situation, six, size, skill,
skin, small, smile, so, social, society, soldier, some, somebody, someone,
something, sometimes, son, song, soon, sort, sound, source, south, southern,
space, speak, special, specific, speech, spend, sport, spring, staff, stage,
stand, standard, star, start, state, statement, station, stay, step, still,
stock, stop, store, story, strategy, street, strong, structure, student, study,
stuff, style, subject, success, successful, such, suddenly, suffer, suggest,
summer, support, sure, surface, system, table, take, talk, task, tax, teach,
teacher, team, technology, television, tell, ten, tend, term, test, than, thank,
that, the, their, them, themselves, then, theory, there, these, they, thing,
think, third, this, those, though, thought, thousand, threat, three, through,
throughout, throw, thus, time, to, today, together, tonight, too, top, total,
tough, toward, town, trade, traditional, training, travel, treat, treatment,
tree, trial, trip, trouble, true, truth, try, turn, TV, two, type, under,
understand, unit, until, up, upon, us, use, usually, value, various, very,
victim, view, violence, visit, voice, vote, wait, walk, wall, want, war, watch,
water, way, we, weapon, wear, week, weight, well, west, western, what, whatever,
when, where, whether, which, while, white, who, whole, whom, whose, why, wide,
wife, will, win, wind, window, wish, with, within, without, woman, wonder, word,
work, worker, world, worry, would, write, writer, wrong, yard, yeah, year, yes,
yet, you, young, your, yourself"#
.split(',')
.map(|word| word.trim())
.collect();
let mut word_pairs: Vec<_> = Vec::new();
for word in &words {
for other_word in &words {
word_pairs.push(word.to_string() + " " + other_word);
}
}
assert_eq!(1_000_000, word_pairs.len());
word_pairs
}
#[allow(unused)] // False positive
fn test_hash_common_words<B: BuildHasher>(build_hasher: &B) {
let word_pairs: Vec<_> = gen_word_pairs();
check_for_collisions(build_hasher, &word_pairs, 32);
}
#[allow(unused)] // False positive
fn check_for_collisions<H: Hash, B: BuildHasher>(build_hasher: &B, items: &[H], bucket_count: usize) {
let mut buckets = vec![0; bucket_count];
for item in items {
let value = hash(item, build_hasher) as usize;
buckets[value % bucket_count] += 1;
}
let mean = items.len() / bucket_count;
let max = *buckets.iter().max().unwrap();
let min = *buckets.iter().min().unwrap();
assert!(
(min as f64) > (mean as f64) * 0.95,
"min: {}, max:{}, {:?}",
min,
max,
buckets
);
assert!(
(max as f64) < (mean as f64) * 1.05,
"min: {}, max:{}, {:?}",
min,
max,
buckets
);
}
#[allow(unused)] // False positive
fn hash<H: Hash, B: BuildHasher>(b: &H, build_hasher: &B) -> u64 {
H::get_hash(b, build_hasher)
}
#[test]
fn test_bucket_distribution() {
let build_hasher = RandomState::with_seeds(1, 2, 3, 4);
test_hash_common_words(&build_hasher);
let sequence: Vec<_> = (0..320000).collect();
check_for_collisions(&build_hasher, &sequence, 32);
let sequence: Vec<_> = (0..2560000).collect();
check_for_collisions(&build_hasher, &sequence, 256);
let sequence: Vec<_> = (0..320000).map(|i| i * 1024).collect();
check_for_collisions(&build_hasher, &sequence, 32);
let sequence: Vec<_> = (0..2560000_u64).map(|i| i * 1024).collect();
check_for_collisions(&build_hasher, &sequence, 256);
}
fn ahash_vec<H: Hash>(b: &Vec<H>) -> u64 {
let mut total: u64 = 0;
for item in b {
let mut hasher = AHasher::new_with_keys(1234, 5678);
item.hash(&mut hasher);
total = total.wrapping_add(hasher.finish());
}
total
}
fn fxhash_vec<H: Hash>(b: &Vec<H>) -> u64 {
let mut total: u64 = 0;
for item in b {
let mut hasher = FxHasher::default();
item.hash(&mut hasher);
total = total.wrapping_add(hasher.finish());
}
total
}
fn bench_ahash_words(c: &mut Criterion) {
let words = gen_word_pairs();
c.bench_function("aes_words", |b| b.iter(|| black_box(ahash_vec(&words))));
}
fn bench_fx_words(c: &mut Criterion) {
let words = gen_word_pairs();
c.bench_function("fx_words", |b| b.iter(|| black_box(fxhash_vec(&words))));
}
criterion_main!(benches);
criterion_group!(benches, bench_ahash_words, bench_fx_words,);
|