diff options
Diffstat (limited to '')
-rw-r--r-- | third_party/rust/xml-rs/src/analyze.rs | 99 |
1 files changed, 99 insertions, 0 deletions
diff --git a/third_party/rust/xml-rs/src/analyze.rs b/third_party/rust/xml-rs/src/analyze.rs new file mode 100644 index 0000000000..d369d2f014 --- /dev/null +++ b/third_party/rust/xml-rs/src/analyze.rs @@ -0,0 +1,99 @@ +#![forbid(unsafe_code)] + +extern crate xml; + +use std::cmp; +use std::env; +use std::io::{self, Read, Write, BufReader}; +use std::fs::File; +use std::collections::HashSet; + +use xml::ParserConfig; +use xml::reader::XmlEvent; + +macro_rules! abort { + ($code:expr) => {::std::process::exit($code)}; + ($code:expr, $($args:tt)+) => {{ + writeln!(&mut ::std::io::stderr(), $($args)+).unwrap(); + ::std::process::exit($code); + }} +} + +fn main() { + let mut file; + let mut stdin; + let source: &mut Read = match env::args().nth(1) { + Some(file_name) => { + file = File::open(file_name) + .unwrap_or_else(|e| abort!(1, "Cannot open input file: {}", e)); + &mut file + } + None => { + stdin = io::stdin(); + &mut stdin + } + }; + + let reader = ParserConfig::new() + .whitespace_to_characters(true) + .ignore_comments(false) + .create_reader(BufReader::new(source)); + + let mut processing_instructions = 0; + let mut elements = 0; + let mut character_blocks = 0; + let mut cdata_blocks = 0; + let mut characters = 0; + let mut comment_blocks = 0; + let mut comment_characters = 0; + let mut namespaces = HashSet::new(); + let mut depth = 0; + let mut max_depth = 0; + + for e in reader { + match e { + Ok(e) => match e { + XmlEvent::StartDocument { version, encoding, standalone } => + println!( + "XML document version {}, encoded in {}, {}standalone", + version, encoding, if standalone.unwrap_or(false) { "" } else { "not " } + ), + XmlEvent::EndDocument => println!("Document finished"), + XmlEvent::ProcessingInstruction { .. } => processing_instructions += 1, + XmlEvent::Whitespace(_) => {} // can't happen due to configuration + XmlEvent::Characters(s) => { + character_blocks += 1; + characters += s.len(); + } + XmlEvent::CData(s) => { + cdata_blocks += 1; + characters += s.len(); + } + XmlEvent::Comment(s) => { + comment_blocks += 1; + comment_characters += s.len(); + } + XmlEvent::StartElement { namespace, .. } => { + depth += 1; + max_depth = cmp::max(max_depth, depth); + elements += 1; + namespaces.extend(namespace.0.into_iter().map(|(_, ns_uri)| ns_uri)); + } + XmlEvent::EndElement { .. } => { + depth -= 1; + } + }, + Err(e) => abort!(1, "Error parsing XML document: {}", e) + } + } + namespaces.remove(xml::namespace::NS_EMPTY_URI); + namespaces.remove(xml::namespace::NS_XMLNS_URI); + namespaces.remove(xml::namespace::NS_XML_URI); + + println!("Elements: {}, maximum depth: {}", elements, max_depth); + println!("Namespaces (excluding built-in): {}", namespaces.len()); + println!("Characters: {}, characters blocks: {}, CDATA blocks: {}", + characters, character_blocks, cdata_blocks); + println!("Comment blocks: {}, comment characters: {}", comment_blocks, comment_characters); + println!("Processing instructions (excluding built-in): {}", processing_instructions); +} |