1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
|
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
use std::borrow::ToOwned;
use std::collections::hash_map::{Entry, HashMap};
use tendril::StrTendril;
fn index_words_string(input: &String) -> HashMap<char, Vec<String>> {
let mut index = HashMap::new();
for word in input.split(|c| c == ' ') {
if word.len() == 0 {
continue;
}
let word = word.to_owned();
match index.entry(word.chars().next().unwrap()) {
Entry::Occupied(mut e) => {
let x: &mut Vec<String> = e.get_mut();
x.push(word);
}
Entry::Vacant(e) => {
e.insert(vec![word]);
}
}
}
index
}
fn index_words_tendril(input: &StrTendril) -> HashMap<char, Vec<StrTendril>> {
let mut index = HashMap::new();
let mut t = input.clone();
loop {
match t.pop_front_char_run(|c| c != ' ') {
None => return index,
Some((_, false)) => (),
Some((word, true)) => match index.entry(word.chars().next().unwrap()) {
Entry::Occupied(mut e) => {
e.get_mut().push(word);
}
Entry::Vacant(e) => {
e.insert(vec![word]);
}
},
}
}
}
static EN_1: &'static str = "Days turn to nights turn to paper into rocks into plastic";
static EN_2: &'static str =
"Here the notes in my laboratory journal cease. I was able to write the last \
words only with great effort. By now it was already clear to me that LSD had \
been the cause of the remarkable experience of the previous Friday, for the \
altered perceptions were of the same type as before, only much more intense. I \
had to struggle to speak intelligibly. I asked my laboratory assistant, who was \
informed of the self-experiment, to escort me home. We went by bicycle, no \
automobile being available because of wartime restrictions on their use. On the \
way home, my condition began to assume threatening forms. Everything in my \
field of vision wavered and was distorted as if seen in a curved mirror. I also \
had the sensation of being unable to move from the spot. Nevertheless, my \
assistant later told me that we had traveled very rapidly. Finally, we arrived \
at home safe and sound, and I was just barely capable of asking my companion to \
summon our family doctor and request milk from the neighbors.\n\n\
In spite of my delirious, bewildered condition, I had brief periods of clear \
and effective thinking—and chose milk as a nonspecific antidote for poisoning.";
static KR_1: &'static str =
"러스트(Rust)는 모질라(mozilla.org)에서 개발하고 있는, 메모리-안전하고 병렬 \
프로그래밍이 쉬운 차세대 프로그래밍 언어입니다. 아직 \
개발 단계이며 많은 기능이 구현 중으로, MIT/Apache2 라이선스로 배포됩니다.";
static HTML_KR_1: &'static str =
"<p>러스트(<a href=\"http://rust-lang.org\">Rust</a>)는 모질라(<a href=\"\
https://www.mozilla.org/\">mozilla.org</a>)에서 개발하고 있는, \
메모리-안전하고 병렬 프로그래밍이 쉬운 차세대 프로그래밍 언어입니다. \
아직 개발 단계이며 많은 기능이 구현 중으로, MIT/Apache2 라이선스로 배포됩니다.</p>";
mod index_words {
macro_rules! bench {
($txt:ident) => {
#[allow(non_snake_case)]
mod $txt {
const SMALL_SIZE: usize = 65536;
const LARGE_SIZE: usize = (1 << 20);
#[bench]
fn index_words_string(b: &mut ::test::Bencher) {
let mut s = String::new();
while s.len() < SMALL_SIZE {
s.push_str(::tendril::bench::$txt);
}
b.iter(|| ::tendril::bench::index_words_string(&s));
}
#[bench]
fn index_words_tendril(b: &mut ::test::Bencher) {
let mut t = ::tendril::StrTendril::new();
while t.len() < SMALL_SIZE {
t.push_slice(::tendril::bench::$txt);
}
b.iter(|| ::tendril::bench::index_words_tendril(&t));
}
#[bench]
fn index_words_big_string(b: &mut ::test::Bencher) {
let mut s = String::new();
while s.len() < LARGE_SIZE {
s.push_str(::tendril::bench::$txt);
}
b.iter(|| ::tendril::bench::index_words_string(&s));
}
#[bench]
fn index_words_big_tendril(b: &mut ::test::Bencher) {
let mut t = ::tendril::StrTendril::new();
while t.len() < LARGE_SIZE {
t.push_slice(::tendril::bench::$txt);
}
b.iter(|| ::tendril::bench::index_words_tendril(&t));
}
#[test]
fn correctness() {
use std::borrow::ToOwned;
use tendril::bench::{index_words_string, index_words_tendril};
use tendril::SliceExt;
let txt = ::tendril::bench::$txt;
let input_string = txt.to_owned();
let count_s = index_words_string(&input_string);
let mut keys: Vec<char> = count_s.keys().cloned().collect();
keys.sort();
let input_tendril = txt.to_tendril();
let count_t = index_words_tendril(&input_tendril);
let mut keys_t: Vec<char> = count_t.keys().cloned().collect();
keys_t.sort();
assert_eq!(keys, keys_t);
for k in &keys {
let vs = &count_s[k];
let vt = &count_t[k];
assert_eq!(vs.len(), vt.len());
assert!(vs.iter().zip(vt.iter()).all(|(s, t)| **s == **t));
}
}
}
};
}
bench!(EN_1);
bench!(EN_2);
bench!(KR_1);
bench!(HTML_KR_1);
}
|