#![forbid(unsafe_code)] extern crate xml; #[macro_use] extern crate lazy_static; use std::env; use std::fmt; use std::fs::File; use std::io::{BufRead, BufReader, Write, stderr}; use std::path::Path; use xml::name::OwnedName; use xml::common::Position; use xml::reader::{Result, XmlEvent, ParserConfig, EventReader}; /// Dummy function that opens a file, parses it, and returns a `Result`. /// There can be IO errors (from `File::open`) and XML errors (from the parser). /// Having `impl From for xml::reader::Error` allows the user to /// do this without defining their own error type. #[allow(dead_code)] fn count_event_in_file(name: &Path) -> Result { let mut event_count = 0; for event in EventReader::new(BufReader::new(try!(File::open(name)))) { try!(event); event_count += 1; } Ok(event_count) } #[test] fn sample_1_short() { test( include_bytes!("documents/sample_1.xml"), include_bytes!("documents/sample_1_short.txt"), ParserConfig::new() .ignore_comments(true) .whitespace_to_characters(true) .cdata_to_characters(true) .trim_whitespace(true) .coalesce_characters(true), false ); } #[test] fn sample_1_full() { test( include_bytes!("documents/sample_1.xml"), include_bytes!("documents/sample_1_full.txt"), ParserConfig::new() .ignore_comments(false) .whitespace_to_characters(false) .cdata_to_characters(false) .trim_whitespace(false) .coalesce_characters(false), false ); } #[test] fn sample_2_short() { test( include_bytes!("documents/sample_2.xml"), include_bytes!("documents/sample_2_short.txt"), ParserConfig::new() .ignore_comments(true) .whitespace_to_characters(true) .cdata_to_characters(true) .trim_whitespace(true) .coalesce_characters(true), false ); } #[test] fn sample_2_full() { test( include_bytes!("documents/sample_2.xml"), include_bytes!("documents/sample_2_full.txt"), ParserConfig::new() .ignore_comments(false) .whitespace_to_characters(false) .cdata_to_characters(false) .trim_whitespace(false) .coalesce_characters(false), false ); } #[test] fn sample_3_short() { test( include_bytes!("documents/sample_3.xml"), include_bytes!("documents/sample_3_short.txt"), ParserConfig::new() .ignore_comments(true) .whitespace_to_characters(true) .cdata_to_characters(true) .trim_whitespace(true) .coalesce_characters(true), true ); } #[test] fn sample_3_full() { test( include_bytes!("documents/sample_3.xml"), include_bytes!("documents/sample_3_full.txt"), ParserConfig::new() .ignore_comments(false) .whitespace_to_characters(false) .cdata_to_characters(false) .trim_whitespace(false) .coalesce_characters(false), true ); } #[test] fn sample_4_short() { test( include_bytes!("documents/sample_4.xml"), include_bytes!("documents/sample_4_short.txt"), ParserConfig::new() .ignore_comments(true) .whitespace_to_characters(true) .cdata_to_characters(true) .trim_whitespace(true) .coalesce_characters(true), false ); } #[test] fn sample_4_full() { test( include_bytes!("documents/sample_4.xml"), include_bytes!("documents/sample_4_full.txt"), ParserConfig::new() .ignore_comments(false) .whitespace_to_characters(false) .cdata_to_characters(false) .trim_whitespace(false) .coalesce_characters(false), false ); } #[test] fn sample_5_short() { test( include_bytes!("documents/sample_5.xml"), include_bytes!("documents/sample_5_short.txt"), ParserConfig::new() .ignore_comments(true) .whitespace_to_characters(true) .cdata_to_characters(true) .trim_whitespace(true) .coalesce_characters(true) .add_entity("nbsp", " ") .add_entity("copy", "©") .add_entity("NotEqualTilde", "≂̸"), false ); } #[test] fn sample_6_full() { test( include_bytes!("documents/sample_6.xml"), include_bytes!("documents/sample_6_full.txt"), ParserConfig::new() .ignore_root_level_whitespace(false) .ignore_comments(false) .whitespace_to_characters(false) .cdata_to_characters(false) .trim_whitespace(false) .coalesce_characters(false), false ); } #[test] fn eof_1() { test( br#""#, br#" |1:14 Unexpected token '--' before ' ' "#, ParserConfig::new(), false ); test( br#""#, br#" |1:14 Unexpected token '--' before '-' "#, ParserConfig::new(), false ); } #[test] fn tabs_1() { test( b"\t\t", br#" |1:2 StartDocument(1.0, UTF-8) |1:2 StartElement(a) |1:6 StartElement(b) |1:6 EndElement(b) |1:10 EndElement(a) |1:14 EndDocument "#, ParserConfig::new() .trim_whitespace(true), true ); } #[test] fn issue_32_unescaped_cdata_end() { test( br#"]]>"#, br#" |StartDocument(1.0, UTF-8) |StartElement(hello) |Characters("]]>") |EndElement(hello) |EndDocument "#, ParserConfig::new(), false ); } #[test] fn issue_unescaped_processing_instruction_end() { test( br#"?>"#, br#" |StartDocument(1.0, UTF-8) |StartElement(hello) |Characters("?>") |EndElement(hello) |EndDocument "#, ParserConfig::new(), false ); } #[test] fn issue_unescaped_empty_tag_end() { test( br#"/>"#, br#" |StartDocument(1.0, UTF-8) |StartElement(hello) |Characters("/>") |EndElement(hello) |EndDocument "#, ParserConfig::new(), false ); } #[test] fn issue_83_duplicate_attributes() { test( br#""#, br#" |StartDocument(1.0, UTF-8) |StartElement(hello) |1:30 Attribute 'a' is redefined "#, ParserConfig::new(), false ); } #[test] fn issue_93_large_characters_in_entity_references() { test( r#"&𤶼;"#.as_bytes(), r#" |StartDocument(1.0, UTF-8) |StartElement(hello) |1:10 Unexpected entity: 𤶼 "#.as_bytes(), // FIXME: it shouldn't be 10, looks like indices are off slightly ParserConfig::new(), false ) } #[test] fn issue_98_cdata_ending_with_right_bracket() { test( br#""#, br#" |StartDocument(1.0, UTF-8) |StartElement(hello) |CData("Foo [Bar]") |EndElement(hello) |EndDocument "#, ParserConfig::new(), false ) } #[test] fn issue_105_unexpected_double_dash() { test( br#"-- "#, br#" |StartDocument(1.0, UTF-8) |StartElement(hello) |Characters("-- ") |EndElement(hello) |EndDocument "#, ParserConfig::new(), false ); test( br#"--"#, br#" |StartDocument(1.0, UTF-8) |StartElement(hello) |Characters("--") |EndElement(hello) |EndDocument "#, ParserConfig::new(), false ); test( br#"-->"#, br#" |StartDocument(1.0, UTF-8) |StartElement(hello) |Characters("-->") |EndElement(hello) |EndDocument "#, ParserConfig::new(), false ); test( br#""#, br#" |StartDocument(1.0, UTF-8) |StartElement(hello) |CData("--") |EndElement(hello) |EndDocument "#, ParserConfig::new(), false ); } #[test] fn issue_attribues_have_no_default_namespace () { test( br#""#, br#" |StartDocument(1.0, UTF-8) |StartElement({urn:foo}hello [x="y"]) |EndElement({urn:foo}hello) |EndDocument "#, ParserConfig::new(), false ); } #[test] fn issue_replacement_character_entity_reference() { test( br#"��"#, br#" |StartDocument(1.0, UTF-8) |StartElement(doc) |1:13 Invalid decimal character number in an entity: #55357 "#, ParserConfig::new(), false, ); test( br#"��"#, br#" |StartDocument(1.0, UTF-8) |StartElement(doc) |1:13 Invalid hexadecimal character number in an entity: #xd83d "#, ParserConfig::new(), false, ); test( br#"��"#, format!( r#" |StartDocument(1.0, UTF-8) |StartElement(doc) |Characters("{replacement_character}{replacement_character}") |EndElement(doc) |EndDocument "#, replacement_character = "\u{fffd}" ) .as_bytes(), ParserConfig::new() .replace_unknown_entity_references(true), false, ); test( br#"��"#, format!( r#" |StartDocument(1.0, UTF-8) |StartElement(doc) |Characters("{replacement_character}{replacement_character}") |EndElement(doc) |EndDocument "#, replacement_character = "\u{fffd}" ) .as_bytes(), ParserConfig::new() .replace_unknown_entity_references(true), false, ); } lazy_static! { // If PRINT_SPEC env variable is set, print the lines // to stderr instead of comparing with the output // it can be used like this: // PRINT_SPEC=1 cargo test --test event_reader sample_1_full 2> sample_1_full.txt static ref PRINT: bool = { for (key, value) in env::vars() { if key == "PRINT_SPEC" && value == "1" { return true; } } false }; } // clones a lot but that's fine fn trim_until_bar(s: String) -> String { match s.trim() { ts if ts.starts_with('|') => return ts[1..].to_owned(), _ => {} } s } fn test(input: &[u8], output: &[u8], config: ParserConfig, test_position: bool) { let mut reader = config.create_reader(input); let mut spec_lines = BufReader::new(output).lines() .map(|line| line.unwrap()) .enumerate() .map(|(i, line)| (i, trim_until_bar(line))) .filter(|&(_, ref line)| !line.trim().is_empty()); loop { let e = reader.next(); let line = if test_position { format!("{} {}", reader.position(), Event(&e)) } else { format!("{}", Event(&e)) }; if *PRINT { writeln!(&mut stderr(), "{}", line).unwrap(); } else { if let Some((n, spec)) = spec_lines.next() { if line != spec { const SPLITTER: &'static str = "-------------------"; panic!("\n{}\nUnexpected event at line {}:\nExpected: {}\nFound: {}\n{}\n", SPLITTER, n + 1, spec, line, std::str::from_utf8(output).unwrap()); } } else { panic!("Unexpected event: {}", line); } } match e { Ok(XmlEvent::EndDocument) | Err(_) => break, _ => {}, } } } // Here we define our own string representation of events so we don't depend // on the specifics of Display implementation for XmlEvent and OwnedName. struct Name<'a>(&'a OwnedName); impl <'a> fmt::Display for Name<'a> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { if let Some(ref namespace) = self.0.namespace { try! { write!(f, "{{{}}}", namespace) } } if let Some(ref prefix) = self.0.prefix { try! { write!(f, "{}:", prefix) } } write!(f, "{}", self.0.local_name) } } struct Event<'a>(&'a Result); impl<'a> fmt::Display for Event<'a> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let empty = String::new(); match *self.0 { Ok(ref e) => match *e { XmlEvent::StartDocument { ref version, ref encoding, .. } => write!(f, "StartDocument({}, {})", version, encoding), XmlEvent::EndDocument => write!(f, "EndDocument"), XmlEvent::ProcessingInstruction { ref name, ref data } => write!(f, "ProcessingInstruction({}={:?})", name, data.as_ref().unwrap_or(&empty)), XmlEvent::StartElement { ref name, ref attributes, .. } => { if attributes.is_empty() { write!(f, "StartElement({})", Name(name)) } else { let attrs: Vec<_> = attributes.iter() .map(|a| format!("{}={:?}", Name(&a.name), a.value)) .collect(); write!(f, "StartElement({} [{}])", Name(name), attrs.join(", ")) } }, XmlEvent::EndElement { ref name } => write!(f, "EndElement({})", Name(name)), XmlEvent::Comment(ref data) => write!(f, r#"Comment("{}")"#, data.escape_debug()), XmlEvent::CData(ref data) => write!(f, r#"CData("{}")"#, data.escape_debug()), XmlEvent::Characters(ref data) => write!(f, r#"Characters("{}")"#, data.escape_debug()), XmlEvent::Whitespace(ref data) => write!(f, r#"Whitespace("{}")"#, data.escape_debug()), }, Err(ref e) => e.fmt(f), } } }