1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
|
extern crate xml;
use std::cmp;
use std::env;
use std::io::{self, Read, Write, BufReader};
use std::fs::File;
use std::collections::HashSet;
use xml::ParserConfig;
use xml::reader::XmlEvent;
macro_rules! abort {
($code:expr) => {::std::process::exit($code)};
($code:expr, $($args:tt)+) => {{
writeln!(&mut ::std::io::stderr(), $($args)+).unwrap();
::std::process::exit($code);
}}
}
fn main() {
let mut file;
let mut stdin;
let source: &mut Read = match env::args().nth(1) {
Some(file_name) => {
file = File::open(file_name)
.unwrap_or_else(|e| abort!(1, "Cannot open input file: {}", e));
&mut file
}
None => {
stdin = io::stdin();
&mut stdin
}
};
let reader = ParserConfig::new()
.whitespace_to_characters(true)
.ignore_comments(false)
.create_reader(BufReader::new(source));
let mut processing_instructions = 0;
let mut elements = 0;
let mut character_blocks = 0;
let mut cdata_blocks = 0;
let mut characters = 0;
let mut comment_blocks = 0;
let mut comment_characters = 0;
let mut namespaces = HashSet::new();
let mut depth = 0;
let mut max_depth = 0;
for e in reader {
match e {
Ok(e) => match e {
XmlEvent::StartDocument { version, encoding, standalone } =>
println!(
"XML document version {}, encoded in {}, {}standalone",
version, encoding, if standalone.unwrap_or(false) { "" } else { "not " }
),
XmlEvent::EndDocument => println!("Document finished"),
XmlEvent::ProcessingInstruction { .. } => processing_instructions += 1,
XmlEvent::Whitespace(_) => {} // can't happen due to configuration
XmlEvent::Characters(s) => {
character_blocks += 1;
characters += s.len();
}
XmlEvent::CData(s) => {
cdata_blocks += 1;
characters += s.len();
}
XmlEvent::Comment(s) => {
comment_blocks += 1;
comment_characters += s.len();
}
XmlEvent::StartElement { namespace, .. } => {
depth += 1;
max_depth = cmp::max(max_depth, depth);
elements += 1;
namespaces.extend(namespace.0.into_iter().map(|(_, ns_uri)| ns_uri));
}
XmlEvent::EndElement { .. } => {
depth -= 1;
}
},
Err(e) => abort!(1, "Error parsing XML document: {}", e)
}
}
namespaces.remove(xml::namespace::NS_EMPTY_URI);
namespaces.remove(xml::namespace::NS_XMLNS_URI);
namespaces.remove(xml::namespace::NS_XML_URI);
println!("Elements: {}, maximum depth: {}", elements, max_depth);
println!("Namespaces (excluding built-in): {}", namespaces.len());
println!("Characters: {}, characters blocks: {}, CDATA blocks: {}",
characters, character_blocks, cdata_blocks);
println!("Comment blocks: {}, comment characters: {}", comment_blocks, comment_characters);
println!("Processing instructions (excluding built-in): {}", processing_instructions);
}
|