#[macro_use] extern crate criterion; extern crate html5ever; use std::fs; use std::path::PathBuf; use criterion::{black_box, Criterion}; use html5ever::tendril::*; use html5ever::tokenizer::{BufferQueue, Token, TokenSink, TokenSinkResult, Tokenizer}; struct Sink; impl TokenSink for Sink { type Handle = (); fn process_token(&mut self, token: Token, _line_number: u64) -> TokenSinkResult<()> { // Don't use the token, but make sure we don't get // optimized out entirely. black_box(token); TokenSinkResult::Continue } } fn run_bench(c: &mut Criterion, name: &str) { let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); path.push("data/bench/"); path.push(name); let mut file = fs::File::open(&path).ok().expect("can't open file"); // Read the file and treat it as an infinitely repeating sequence of characters. let mut file_input = ByteTendril::new(); file.read_to_tendril(&mut file_input) .ok() .expect("can't read file"); let file_input: StrTendril = file_input.try_reinterpret().unwrap(); let size = file_input.len(); let mut stream = file_input.chars().cycle(); // Break the input into chunks of 1024 chars (= a few kB). // This simulates reading from the network. let mut input = vec![]; let mut total = 0usize; while total < size { // The by_ref() call is important, otherwise we get wrong results! // See rust-lang/rust#18045. let sz = std::cmp::min(1024, size - total); input.push(stream.by_ref().take(sz).collect::().to_tendril()); total += sz; } let test_name = format!("html tokenizing {}", name); c.bench_function(&test_name, move |b| { b.iter(|| { let mut tok = Tokenizer::new(Sink, Default::default()); let mut buffer = BufferQueue::new(); // We are doing clone inside the bench function, this is not ideal, but possibly // necessary since our iterator consumes the underlying buffer. for buf in input.clone().into_iter() { buffer.push_back(buf); let _ = tok.feed(&mut buffer); } let _ = tok.feed(&mut buffer); tok.end(); }) }); } fn html5ever_benchmark(c: &mut Criterion) { run_bench(c, "lipsum.html"); run_bench(c, "lipsum-zh.html"); run_bench(c, "medium-fragment.html"); run_bench(c, "small-fragment.html"); run_bench(c, "tiny-fragment.html"); run_bench(c, "strong.html"); } criterion_group!(benches, html5ever_benchmark); criterion_main!(benches);