summaryrefslogtreecommitdiffstats
path: root/vendor/html5ever/examples/tokenize.rs
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 12:02:58 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 12:02:58 +0000
commit698f8c2f01ea549d77d7dc3338a12e04c11057b9 (patch)
tree173a775858bd501c378080a10dca74132f05bc50 /vendor/html5ever/examples/tokenize.rs
parentInitial commit. (diff)
downloadrustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.tar.xz
rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.zip
Adding upstream version 1.64.0+dfsg1.upstream/1.64.0+dfsg1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/html5ever/examples/tokenize.rs')
-rw-r--r--vendor/html5ever/examples/tokenize.rs103
1 files changed, 103 insertions, 0 deletions
diff --git a/vendor/html5ever/examples/tokenize.rs b/vendor/html5ever/examples/tokenize.rs
new file mode 100644
index 000000000..039ffb796
--- /dev/null
+++ b/vendor/html5ever/examples/tokenize.rs
@@ -0,0 +1,103 @@
+// Copyright 2014-2017 The html5ever Project Developers. See the
+// COPYRIGHT file at the top-level directory of this distribution.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+extern crate html5ever;
+
+use std::default::Default;
+use std::io;
+
+use html5ever::tendril::*;
+use html5ever::tokenizer::BufferQueue;
+use html5ever::tokenizer::{CharacterTokens, EndTag, NullCharacterToken, StartTag, TagToken};
+use html5ever::tokenizer::{
+ ParseError, Token, TokenSink, TokenSinkResult, Tokenizer, TokenizerOpts,
+};
+
+#[derive(Copy, Clone)]
+struct TokenPrinter {
+ in_char_run: bool,
+}
+
+impl TokenPrinter {
+ fn is_char(&mut self, is_char: bool) {
+ match (self.in_char_run, is_char) {
+ (false, true) => print!("CHAR : \""),
+ (true, false) => println!("\""),
+ _ => (),
+ }
+ self.in_char_run = is_char;
+ }
+
+ fn do_char(&mut self, c: char) {
+ self.is_char(true);
+ print!("{}", c.escape_default().collect::<String>());
+ }
+}
+
+impl TokenSink for TokenPrinter {
+ type Handle = ();
+
+ fn process_token(&mut self, token: Token, _line_number: u64) -> TokenSinkResult<()> {
+ match token {
+ CharacterTokens(b) => {
+ for c in b.chars() {
+ self.do_char(c);
+ }
+ },
+ NullCharacterToken => self.do_char('\0'),
+ TagToken(tag) => {
+ self.is_char(false);
+ // This is not proper HTML serialization, of course.
+ match tag.kind {
+ StartTag => print!("TAG : <\x1b[32m{}\x1b[0m", tag.name),
+ EndTag => print!("TAG : <\x1b[31m/{}\x1b[0m", tag.name),
+ }
+ for attr in tag.attrs.iter() {
+ print!(
+ " \x1b[36m{}\x1b[0m='\x1b[34m{}\x1b[0m'",
+ attr.name.local, attr.value
+ );
+ }
+ if tag.self_closing {
+ print!(" \x1b[31m/\x1b[0m");
+ }
+ println!(">");
+ },
+ ParseError(err) => {
+ self.is_char(false);
+ println!("ERROR: {}", err);
+ },
+ _ => {
+ self.is_char(false);
+ println!("OTHER: {:?}", token);
+ },
+ }
+ TokenSinkResult::Continue
+ }
+}
+
+fn main() {
+ let mut sink = TokenPrinter { in_char_run: false };
+ let mut chunk = ByteTendril::new();
+ io::stdin().read_to_tendril(&mut chunk).unwrap();
+ let mut input = BufferQueue::new();
+ input.push_back(chunk.try_reinterpret().unwrap());
+
+ let mut tok = Tokenizer::new(
+ sink,
+ TokenizerOpts {
+ profile: true,
+ ..Default::default()
+ },
+ );
+ let _ = tok.feed(&mut input);
+ assert!(input.is_empty());
+ tok.end();
+ sink.is_char(false);
+}