summaryrefslogtreecommitdiffstats
path: root/vendor/html5ever/benches
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 12:02:58 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 12:02:58 +0000
commit698f8c2f01ea549d77d7dc3338a12e04c11057b9 (patch)
tree173a775858bd501c378080a10dca74132f05bc50 /vendor/html5ever/benches
parentInitial commit. (diff)
downloadrustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.tar.xz
rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.zip
Adding upstream version 1.64.0+dfsg1.upstream/1.64.0+dfsg1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/html5ever/benches')
-rw-r--r--vendor/html5ever/benches/html5ever.rs81
1 files changed, 81 insertions, 0 deletions
diff --git a/vendor/html5ever/benches/html5ever.rs b/vendor/html5ever/benches/html5ever.rs
new file mode 100644
index 000000000..ff20c4f70
--- /dev/null
+++ b/vendor/html5ever/benches/html5ever.rs
@@ -0,0 +1,81 @@
+#[macro_use]
+extern crate criterion;
+extern crate html5ever;
+
+use std::fs;
+use std::path::PathBuf;
+
+use criterion::{black_box, Criterion};
+
+use html5ever::tendril::*;
+use html5ever::tokenizer::{BufferQueue, Token, TokenSink, TokenSinkResult, Tokenizer};
+
+struct Sink;
+
+impl TokenSink for Sink {
+ type Handle = ();
+
+ fn process_token(&mut self, token: Token, _line_number: u64) -> TokenSinkResult<()> {
+ // Don't use the token, but make sure we don't get
+ // optimized out entirely.
+ black_box(token);
+ TokenSinkResult::Continue
+ }
+}
+
+fn run_bench(c: &mut Criterion, name: &str) {
+ let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
+ path.push("data/bench/");
+ path.push(name);
+ let mut file = fs::File::open(&path).ok().expect("can't open file");
+
+ // Read the file and treat it as an infinitely repeating sequence of characters.
+ let mut file_input = ByteTendril::new();
+ file.read_to_tendril(&mut file_input)
+ .ok()
+ .expect("can't read file");
+ let file_input: StrTendril = file_input.try_reinterpret().unwrap();
+ let size = file_input.len();
+ let mut stream = file_input.chars().cycle();
+
+ // Break the input into chunks of 1024 chars (= a few kB).
+ // This simulates reading from the network.
+ let mut input = vec![];
+ let mut total = 0usize;
+ while total < size {
+ // The by_ref() call is important, otherwise we get wrong results!
+ // See rust-lang/rust#18045.
+ let sz = std::cmp::min(1024, size - total);
+ input.push(stream.by_ref().take(sz).collect::<String>().to_tendril());
+ total += sz;
+ }
+
+ let test_name = format!("html tokenizing {}", name);
+
+ c.bench_function(&test_name, move |b| {
+ b.iter(|| {
+ let mut tok = Tokenizer::new(Sink, Default::default());
+ let mut buffer = BufferQueue::new();
+ // We are doing clone inside the bench function, this is not ideal, but possibly
+ // necessary since our iterator consumes the underlying buffer.
+ for buf in input.clone().into_iter() {
+ buffer.push_back(buf);
+ let _ = tok.feed(&mut buffer);
+ }
+ let _ = tok.feed(&mut buffer);
+ tok.end();
+ })
+ });
+}
+
+fn html5ever_benchmark(c: &mut Criterion) {
+ run_bench(c, "lipsum.html");
+ run_bench(c, "lipsum-zh.html");
+ run_bench(c, "medium-fragment.html");
+ run_bench(c, "small-fragment.html");
+ run_bench(c, "tiny-fragment.html");
+ run_bench(c, "strong.html");
+}
+
+criterion_group!(benches, html5ever_benchmark);
+criterion_main!(benches);