From 698f8c2f01ea549d77d7dc3338a12e04c11057b9 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 17 Apr 2024 14:02:58 +0200 Subject: Adding upstream version 1.64.0+dfsg1. Signed-off-by: Daniel Baumann --- vendor/html5ever/benches/html5ever.rs | 81 +++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100644 vendor/html5ever/benches/html5ever.rs (limited to 'vendor/html5ever/benches') diff --git a/vendor/html5ever/benches/html5ever.rs b/vendor/html5ever/benches/html5ever.rs new file mode 100644 index 000000000..ff20c4f70 --- /dev/null +++ b/vendor/html5ever/benches/html5ever.rs @@ -0,0 +1,81 @@ +#[macro_use] +extern crate criterion; +extern crate html5ever; + +use std::fs; +use std::path::PathBuf; + +use criterion::{black_box, Criterion}; + +use html5ever::tendril::*; +use html5ever::tokenizer::{BufferQueue, Token, TokenSink, TokenSinkResult, Tokenizer}; + +struct Sink; + +impl TokenSink for Sink { + type Handle = (); + + fn process_token(&mut self, token: Token, _line_number: u64) -> TokenSinkResult<()> { + // Don't use the token, but make sure we don't get + // optimized out entirely. + black_box(token); + TokenSinkResult::Continue + } +} + +fn run_bench(c: &mut Criterion, name: &str) { + let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + path.push("data/bench/"); + path.push(name); + let mut file = fs::File::open(&path).ok().expect("can't open file"); + + // Read the file and treat it as an infinitely repeating sequence of characters. + let mut file_input = ByteTendril::new(); + file.read_to_tendril(&mut file_input) + .ok() + .expect("can't read file"); + let file_input: StrTendril = file_input.try_reinterpret().unwrap(); + let size = file_input.len(); + let mut stream = file_input.chars().cycle(); + + // Break the input into chunks of 1024 chars (= a few kB). + // This simulates reading from the network. + let mut input = vec![]; + let mut total = 0usize; + while total < size { + // The by_ref() call is important, otherwise we get wrong results! + // See rust-lang/rust#18045. + let sz = std::cmp::min(1024, size - total); + input.push(stream.by_ref().take(sz).collect::().to_tendril()); + total += sz; + } + + let test_name = format!("html tokenizing {}", name); + + c.bench_function(&test_name, move |b| { + b.iter(|| { + let mut tok = Tokenizer::new(Sink, Default::default()); + let mut buffer = BufferQueue::new(); + // We are doing clone inside the bench function, this is not ideal, but possibly + // necessary since our iterator consumes the underlying buffer. + for buf in input.clone().into_iter() { + buffer.push_back(buf); + let _ = tok.feed(&mut buffer); + } + let _ = tok.feed(&mut buffer); + tok.end(); + }) + }); +} + +fn html5ever_benchmark(c: &mut Criterion) { + run_bench(c, "lipsum.html"); + run_bench(c, "lipsum-zh.html"); + run_bench(c, "medium-fragment.html"); + run_bench(c, "small-fragment.html"); + run_bench(c, "tiny-fragment.html"); + run_bench(c, "strong.html"); +} + +criterion_group!(benches, html5ever_benchmark); +criterion_main!(benches); -- cgit v1.2.3