summaryrefslogtreecommitdiffstats
path: root/third_party/rust/xml-rs/src
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 19:33:14 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 19:33:14 +0000
commit36d22d82aa202bb199967e9512281e9a53db42c9 (patch)
tree105e8c98ddea1c1e4784a60a5a6410fa416be2de /third_party/rust/xml-rs/src
parentInitial commit. (diff)
downloadfirefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.tar.xz
firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.zip
Adding upstream version 115.7.0esr.upstream/115.7.0esrupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/xml-rs/src')
-rw-r--r--third_party/rust/xml-rs/src/analyze.rs99
-rw-r--r--third_party/rust/xml-rs/src/attribute.rs99
-rw-r--r--third_party/rust/xml-rs/src/common.rs142
-rw-r--r--third_party/rust/xml-rs/src/escape.rs126
-rw-r--r--third_party/rust/xml-rs/src/lib.rs29
-rw-r--r--third_party/rust/xml-rs/src/macros.rs30
-rw-r--r--third_party/rust/xml-rs/src/name.rs301
-rw-r--r--third_party/rust/xml-rs/src/namespace.rs485
-rw-r--r--third_party/rust/xml-rs/src/reader/config.rs181
-rw-r--r--third_party/rust/xml-rs/src/reader/error.rs121
-rw-r--r--third_party/rust/xml-rs/src/reader/events.rs219
-rw-r--r--third_party/rust/xml-rs/src/reader/lexer.rs867
-rw-r--r--third_party/rust/xml-rs/src/reader/mod.rs129
-rw-r--r--third_party/rust/xml-rs/src/reader/parser/inside_cdata.rs32
-rw-r--r--third_party/rust/xml-rs/src/reader/parser/inside_closing_tag_name.rs34
-rw-r--r--third_party/rust/xml-rs/src/reader/parser/inside_comment.rs32
-rw-r--r--third_party/rust/xml-rs/src/reader/parser/inside_declaration.rs151
-rw-r--r--third_party/rust/xml-rs/src/reader/parser/inside_doctype.rs16
-rw-r--r--third_party/rust/xml-rs/src/reader/parser/inside_opening_tag.rs108
-rw-r--r--third_party/rust/xml-rs/src/reader/parser/inside_processing_instruction.rs96
-rw-r--r--third_party/rust/xml-rs/src/reader/parser/inside_reference.rs89
-rw-r--r--third_party/rust/xml-rs/src/reader/parser/mod.rs622
-rw-r--r--third_party/rust/xml-rs/src/reader/parser/outside_tag.rs130
-rw-r--r--third_party/rust/xml-rs/src/util.rs107
-rw-r--r--third_party/rust/xml-rs/src/writer/config.rs157
-rw-r--r--third_party/rust/xml-rs/src/writer/emitter.rs447
-rw-r--r--third_party/rust/xml-rs/src/writer/events.rs241
-rw-r--r--third_party/rust/xml-rs/src/writer/mod.rs93
28 files changed, 5183 insertions, 0 deletions
diff --git a/third_party/rust/xml-rs/src/analyze.rs b/third_party/rust/xml-rs/src/analyze.rs
new file mode 100644
index 0000000000..d369d2f014
--- /dev/null
+++ b/third_party/rust/xml-rs/src/analyze.rs
@@ -0,0 +1,99 @@
+#![forbid(unsafe_code)]
+
+extern crate xml;
+
+use std::cmp;
+use std::env;
+use std::io::{self, Read, Write, BufReader};
+use std::fs::File;
+use std::collections::HashSet;
+
+use xml::ParserConfig;
+use xml::reader::XmlEvent;
+
+macro_rules! abort {
+ ($code:expr) => {::std::process::exit($code)};
+ ($code:expr, $($args:tt)+) => {{
+ writeln!(&mut ::std::io::stderr(), $($args)+).unwrap();
+ ::std::process::exit($code);
+ }}
+}
+
+fn main() {
+ let mut file;
+ let mut stdin;
+ let source: &mut Read = match env::args().nth(1) {
+ Some(file_name) => {
+ file = File::open(file_name)
+ .unwrap_or_else(|e| abort!(1, "Cannot open input file: {}", e));
+ &mut file
+ }
+ None => {
+ stdin = io::stdin();
+ &mut stdin
+ }
+ };
+
+ let reader = ParserConfig::new()
+ .whitespace_to_characters(true)
+ .ignore_comments(false)
+ .create_reader(BufReader::new(source));
+
+ let mut processing_instructions = 0;
+ let mut elements = 0;
+ let mut character_blocks = 0;
+ let mut cdata_blocks = 0;
+ let mut characters = 0;
+ let mut comment_blocks = 0;
+ let mut comment_characters = 0;
+ let mut namespaces = HashSet::new();
+ let mut depth = 0;
+ let mut max_depth = 0;
+
+ for e in reader {
+ match e {
+ Ok(e) => match e {
+ XmlEvent::StartDocument { version, encoding, standalone } =>
+ println!(
+ "XML document version {}, encoded in {}, {}standalone",
+ version, encoding, if standalone.unwrap_or(false) { "" } else { "not " }
+ ),
+ XmlEvent::EndDocument => println!("Document finished"),
+ XmlEvent::ProcessingInstruction { .. } => processing_instructions += 1,
+ XmlEvent::Whitespace(_) => {} // can't happen due to configuration
+ XmlEvent::Characters(s) => {
+ character_blocks += 1;
+ characters += s.len();
+ }
+ XmlEvent::CData(s) => {
+ cdata_blocks += 1;
+ characters += s.len();
+ }
+ XmlEvent::Comment(s) => {
+ comment_blocks += 1;
+ comment_characters += s.len();
+ }
+ XmlEvent::StartElement { namespace, .. } => {
+ depth += 1;
+ max_depth = cmp::max(max_depth, depth);
+ elements += 1;
+ namespaces.extend(namespace.0.into_iter().map(|(_, ns_uri)| ns_uri));
+ }
+ XmlEvent::EndElement { .. } => {
+ depth -= 1;
+ }
+ },
+ Err(e) => abort!(1, "Error parsing XML document: {}", e)
+ }
+ }
+ namespaces.remove(xml::namespace::NS_EMPTY_URI);
+ namespaces.remove(xml::namespace::NS_XMLNS_URI);
+ namespaces.remove(xml::namespace::NS_XML_URI);
+
+ println!("Elements: {}, maximum depth: {}", elements, max_depth);
+ println!("Namespaces (excluding built-in): {}", namespaces.len());
+ println!("Characters: {}, characters blocks: {}, CDATA blocks: {}",
+ characters, character_blocks, cdata_blocks);
+ println!("Comment blocks: {}, comment characters: {}", comment_blocks, comment_characters);
+ println!("Processing instructions (excluding built-in): {}", processing_instructions);
+}
diff --git a/third_party/rust/xml-rs/src/attribute.rs b/third_party/rust/xml-rs/src/attribute.rs
new file mode 100644
index 0000000000..8728f496d7
--- /dev/null
+++ b/third_party/rust/xml-rs/src/attribute.rs
@@ -0,0 +1,99 @@
+//! Contains XML attributes manipulation types and functions.
+//!
+
+use std::fmt;
+
+use name::{Name, OwnedName};
+use escape::escape_str_attribute;
+
+/// A borrowed version of an XML attribute.
+///
+/// Consists of a borrowed qualified name and a borrowed string value.
+#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)]
+pub struct Attribute<'a> {
+ /// Attribute name.
+ pub name: Name<'a>,
+
+ /// Attribute value.
+ pub value: &'a str
+}
+
+impl<'a> fmt::Display for Attribute<'a> {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ write!(f, "{}=\"{}\"", self.name, escape_str_attribute(self.value))
+ }
+}
+
+impl<'a> Attribute<'a> {
+ /// Creates an owned attribute out of this borrowed one.
+ #[inline]
+ pub fn to_owned(&self) -> OwnedAttribute {
+ OwnedAttribute {
+ name: self.name.into(),
+ value: self.value.into(),
+ }
+ }
+
+ /// Creates a borrowed attribute using the provided borrowed name and a borrowed string value.
+ #[inline]
+ pub fn new(name: Name<'a>, value: &'a str) -> Attribute<'a> {
+ Attribute { name, value, }
+ }
+}
+
+/// An owned version of an XML attribute.
+///
+/// Consists of an owned qualified name and an owned string value.
+#[derive(Clone, Eq, PartialEq, Hash, Debug)]
+pub struct OwnedAttribute {
+ /// Attribute name.
+ pub name: OwnedName,
+
+ /// Attribute value.
+ pub value: String
+}
+
+impl OwnedAttribute {
+ /// Returns a borrowed `Attribute` out of this owned one.
+ pub fn borrow(&self) -> Attribute {
+ Attribute {
+ name: self.name.borrow(),
+ value: &*self.value,
+ }
+ }
+
+ /// Creates a new owned attribute using the provided owned name and an owned string value.
+ #[inline]
+ pub fn new<S: Into<String>>(name: OwnedName, value: S) -> OwnedAttribute {
+ OwnedAttribute {
+ name,
+ value: value.into(),
+ }
+ }
+}
+
+impl fmt::Display for OwnedAttribute {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ write!(f, "{}=\"{}\"", self.name, escape_str_attribute(&*self.value))
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::{Attribute};
+
+ use name::Name;
+
+ #[test]
+ fn attribute_display() {
+ let attr = Attribute::new(
+ Name::qualified("attribute", "urn:namespace", Some("n")),
+ "its value with > & \" ' < weird symbols"
+ );
+
+ assert_eq!(
+ &*attr.to_string(),
+ "{urn:namespace}n:attribute=\"its value with &gt; &amp; &quot; &apos; &lt; weird symbols\""
+ )
+ }
+}
diff --git a/third_party/rust/xml-rs/src/common.rs b/third_party/rust/xml-rs/src/common.rs
new file mode 100644
index 0000000000..029e8515af
--- /dev/null
+++ b/third_party/rust/xml-rs/src/common.rs
@@ -0,0 +1,142 @@
+//! Contains common types and functions used throughout the library.
+
+use std::fmt;
+
+/// Represents a position inside some textual document.
+#[derive(Copy, Clone, PartialEq, Eq)]
+pub struct TextPosition {
+ /// Row, counting from 0
+ pub row: u64,
+ /// Column, counting from 0
+ pub column: u64,
+}
+
+impl TextPosition {
+ /// Creates a new position initialized to the beginning of the document
+ #[inline]
+ pub fn new() -> TextPosition {
+ TextPosition { row: 0, column: 0 }
+ }
+
+ /// Advances the position in a line
+ #[inline]
+ pub fn advance(&mut self, count: u8) {
+ self.column += count as u64;
+ }
+
+ /// Advances the position in a line to the next tab position
+ #[inline]
+ pub fn advance_to_tab(&mut self, width: u8) {
+ let width = width as u64;
+ self.column += width - self.column % width
+ }
+
+ /// Advances the position to the beginning of the next line
+ #[inline]
+ pub fn new_line(&mut self) {
+ self.column = 0;
+ self.row += 1;
+ }
+}
+
+impl fmt::Debug for TextPosition {
+ #[inline]
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ write!(f, "{}:{}", self.row + 1, self.column + 1)
+ }
+}
+
+impl fmt::Display for TextPosition {
+ #[inline]
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ write!(f, "{}:{}", self.row + 1, self.column + 1)
+ }
+}
+
+/// Get the position in the document corresponding to the object
+///
+/// This trait is implemented by parsers, lexers and errors.
+pub trait Position {
+ /// Returns the current position or a position corresponding to the object.
+ fn position(&self) -> TextPosition;
+}
+
+impl Position for TextPosition {
+ #[inline]
+ fn position(&self) -> TextPosition {
+ *self
+ }
+}
+
+/// XML version enumeration.
+#[derive(Copy, Clone, PartialEq, Eq)]
+pub enum XmlVersion {
+ /// XML version 1.0.
+ Version10,
+
+ /// XML version 1.1.
+ Version11
+}
+
+impl fmt::Display for XmlVersion {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ match *self {
+ XmlVersion::Version10 => write!(f, "1.0"),
+ XmlVersion::Version11 => write!(f, "1.1")
+ }
+ }
+}
+
+impl fmt::Debug for XmlVersion {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ fmt::Display::fmt(self, f)
+ }
+}
+
+/// Checks whether the given character is a white space character (`S`)
+/// as is defined by XML 1.1 specification, [section 2.3][1].
+///
+/// [1]: http://www.w3.org/TR/2006/REC-xml11-20060816/#sec-common-syn
+pub fn is_whitespace_char(c: char) -> bool {
+ match c {
+ '\x20' | '\x09' | '\x0d' | '\x0a' => true,
+ _ => false
+ }
+}
+
+/// Checks whether the given string is compound only by white space
+/// characters (`S`) using the previous is_whitespace_char to check
+/// all characters of this string
+pub fn is_whitespace_str(s: &str) -> bool {
+ s.chars().all(is_whitespace_char)
+}
+
+/// Checks whether the given character is a name start character (`NameStartChar`)
+/// as is defined by XML 1.1 specification, [section 2.3][1].
+///
+/// [1]: http://www.w3.org/TR/2006/REC-xml11-20060816/#sec-common-syn
+pub fn is_name_start_char(c: char) -> bool {
+ match c {
+ ':' | 'A'...'Z' | '_' | 'a'...'z' |
+ '\u{C0}'...'\u{D6}' | '\u{D8}'...'\u{F6}' | '\u{F8}'...'\u{2FF}' |
+ '\u{370}'...'\u{37D}' | '\u{37F}'...'\u{1FFF}' |
+ '\u{200C}'...'\u{200D}' | '\u{2070}'...'\u{218F}' |
+ '\u{2C00}'...'\u{2FEF}' | '\u{3001}'...'\u{D7FF}' |
+ '\u{F900}'...'\u{FDCF}' | '\u{FDF0}'...'\u{FFFD}' |
+ '\u{10000}'...'\u{EFFFF}' => true,
+ _ => false
+ }
+}
+
+/// Checks whether the given character is a name character (`NameChar`)
+/// as is defined by XML 1.1 specification, [section 2.3][1].
+///
+/// [1]: http://www.w3.org/TR/2006/REC-xml11-20060816/#sec-common-syn
+pub fn is_name_char(c: char) -> bool {
+ match c {
+ _ if is_name_start_char(c) => true,
+ '-' | '.' | '0'...'9' | '\u{B7}' |
+ '\u{300}'...'\u{36F}' | '\u{203F}'...'\u{2040}' => true,
+ _ => false
+ }
+}
diff --git a/third_party/rust/xml-rs/src/escape.rs b/third_party/rust/xml-rs/src/escape.rs
new file mode 100644
index 0000000000..18298b9be1
--- /dev/null
+++ b/third_party/rust/xml-rs/src/escape.rs
@@ -0,0 +1,126 @@
+//! Contains functions for performing XML special characters escaping.
+
+use std::borrow::Cow;
+
+enum Value {
+ Char(char),
+ Str(&'static str)
+}
+
+impl Value {
+ fn dispatch_for_attribute(c: char) -> Value {
+ match c {
+ '<' => Value::Str("&lt;"),
+ '>' => Value::Str("&gt;"),
+ '"' => Value::Str("&quot;"),
+ '\'' => Value::Str("&apos;"),
+ '&' => Value::Str("&amp;"),
+ '\n' => Value::Str("&#xA;"),
+ '\r' => Value::Str("&#xD;"),
+ _ => Value::Char(c)
+ }
+ }
+
+ fn dispatch_for_pcdata(c: char) -> Value {
+ match c {
+ '<' => Value::Str("&lt;"),
+ '&' => Value::Str("&amp;"),
+ _ => Value::Char(c)
+ }
+ }
+}
+
+enum Process<'a> {
+ Borrowed(&'a str),
+ Owned(String)
+}
+
+impl<'a> Process<'a> {
+ fn process(&mut self, (i, next): (usize, Value)) {
+ match next {
+ Value::Str(s) => match *self {
+ Process::Owned(ref mut o) => o.push_str(s),
+ Process::Borrowed(b) => {
+ let mut r = String::with_capacity(b.len() + s.len());
+ r.push_str(&b[..i]);
+ r.push_str(s);
+ *self = Process::Owned(r);
+ }
+ },
+ Value::Char(c) => match *self {
+ Process::Borrowed(_) => {}
+ Process::Owned(ref mut o) => o.push(c)
+ }
+ }
+ }
+
+ fn into_result(self) -> Cow<'a, str> {
+ match self {
+ Process::Borrowed(b) => Cow::Borrowed(b),
+ Process::Owned(o) => Cow::Owned(o)
+ }
+ }
+}
+
+impl<'a> Extend<(usize, Value)> for Process<'a> {
+ fn extend<I: IntoIterator<Item=(usize, Value)>>(&mut self, it: I) {
+ for v in it.into_iter() {
+ self.process(v);
+ }
+ }
+}
+
+fn escape_str(s: &str, dispatch: fn(char) -> Value) -> Cow<str> {
+ let mut p = Process::Borrowed(s);
+ p.extend(s.char_indices().map(|(ind, c)| (ind, dispatch(c))));
+ p.into_result()
+}
+
+/// Performs escaping of common XML characters inside an attribute value.
+///
+/// This function replaces several important markup characters with their
+/// entity equivalents:
+///
+/// * `<` → `&lt;`
+/// * `>` → `&gt;`
+/// * `"` → `&quot;`
+/// * `'` → `&apos;`
+/// * `&` → `&amp;`
+///
+/// The resulting string is safe to use inside XML attribute values or in PCDATA sections.
+///
+/// Does not perform allocations if the given string does not contain escapable characters.
+#[inline]
+pub fn escape_str_attribute(s: &str) -> Cow<str> {
+ escape_str(s, Value::dispatch_for_attribute)
+}
+
+/// Performs escaping of common XML characters inside PCDATA.
+///
+/// This function replaces several important markup characters with their
+/// entity equivalents:
+///
+/// * `<` → `&lt;`
+/// * `&` → `&amp;`
+///
+/// The resulting string is safe to use inside PCDATA sections but NOT inside attribute values.
+///
+/// Does not perform allocations if the given string does not contain escapable characters.
+#[inline]
+pub fn escape_str_pcdata(s: &str) -> Cow<str> {
+ escape_str(s, Value::dispatch_for_pcdata)
+}
+
+#[cfg(test)]
+mod tests {
+ use super::{escape_str_pcdata, escape_str_attribute};
+
+ // TODO: add more tests
+
+ #[test]
+ fn test_escape_multibyte_code_points() {
+ assert_eq!(escape_str_attribute("☃<"), "☃&lt;");
+ assert_eq!(escape_str_pcdata("☃<"), "☃&lt;");
+ }
+}
+
diff --git a/third_party/rust/xml-rs/src/lib.rs b/third_party/rust/xml-rs/src/lib.rs
new file mode 100644
index 0000000000..fb672efea0
--- /dev/null
+++ b/third_party/rust/xml-rs/src/lib.rs
@@ -0,0 +1,29 @@
+//#![warn(missing_doc)]
+#![allow(dead_code)]
+#![allow(unused_variables)]
+#![forbid(non_camel_case_types)]
+#![forbid(unsafe_code)]
+
+//! This crate currently provides an almost XML 1.0/1.1-compliant pull parser.
+
+#[cfg(doctest)]
+#[macro_use]
+extern crate doc_comment;
+
+#[cfg(doctest)]
+doctest!("../Readme.md");
+
+pub use reader::EventReader;
+pub use reader::ParserConfig;
+pub use writer::EventWriter;
+pub use writer::EmitterConfig;
+
+pub mod macros;
+pub mod name;
+pub mod attribute;
+pub mod common;
+pub mod escape;
+pub mod namespace;
+pub mod reader;
+pub mod writer;
+mod util;
diff --git a/third_party/rust/xml-rs/src/macros.rs b/third_party/rust/xml-rs/src/macros.rs
new file mode 100644
index 0000000000..1cce3d6a5e
--- /dev/null
+++ b/third_party/rust/xml-rs/src/macros.rs
@@ -0,0 +1,30 @@
+#![macro_use]
+
+//! Contains several macros used in this crate.
+
+macro_rules! gen_setter {
+ ($target:ty, $field:ident : into $t:ty) => {
+ impl $target {
+ /// Sets the field to the provided value and returns updated config object.
+ pub fn $field<T: Into<$t>>(mut self, value: T) -> $target {
+ self.$field = value.into();
+ self
+ }
+ }
+ };
+ ($target:ty, $field:ident : val $t:ty) => {
+ impl $target {
+ /// Sets the field to the provided value and returns updated config object.
+ pub fn $field(mut self, value: $t) -> $target {
+ self.$field = value;
+ self
+ }
+ }
+ }
+}
+
+macro_rules! gen_setters {
+ ($target:ty, $($field:ident : $k:tt $tpe:ty),+) => ($(
+ gen_setter! { $target, $field : $k $tpe }
+ )+)
+}
diff --git a/third_party/rust/xml-rs/src/name.rs b/third_party/rust/xml-rs/src/name.rs
new file mode 100644
index 0000000000..a20eae2f10
--- /dev/null
+++ b/third_party/rust/xml-rs/src/name.rs
@@ -0,0 +1,301 @@
+//! Contains XML qualified names manipulation types and functions.
+//!
+
+use std::fmt;
+use std::str::FromStr;
+
+use namespace::NS_NO_PREFIX;
+
+/// Represents a qualified XML name.
+///
+/// A qualified name always consists at least of a local name. It can optionally contain
+/// a prefix; when reading an XML document, if it contains a prefix, it must also contain a
+/// namespace URI, but this is not enforced statically; see below. The name can contain a
+/// namespace without a prefix; in that case a default, empty prefix is assumed.
+///
+/// When writing XML documents, it is possible to omit the namespace URI, leaving only
+/// the prefix. In this case the writer will check that the specifed prefix is bound to some
+/// URI in the current namespace context. If both prefix and namespace URI are specified,
+/// it is checked that the current namespace context contains this exact correspondence
+/// between prefix and namespace URI.
+///
+/// # Prefixes and URIs
+///
+/// A qualified name with a prefix must always contain a proper namespace URI --- names with
+/// a prefix but without a namespace associated with that prefix are meaningless. However,
+/// it is impossible to obtain proper namespace URI by a prefix without a context, and such
+/// context is only available when parsing a document (or it can be constructed manually
+/// when writing a document). Tying a name to a context statically seems impractical. This
+/// may change in future, though.
+///
+/// # Conversions
+///
+/// `Name` implements some `From` instances for conversion from strings and tuples. For example:
+///
+/// ```rust
+/// # use xml::name::Name;
+/// let n1: Name = "p:some-name".into();
+/// let n2: Name = ("p", "some-name").into();
+///
+/// assert_eq!(n1, n2);
+/// assert_eq!(n1.local_name, "some-name");
+/// assert_eq!(n1.prefix, Some("p"));
+/// assert!(n1.namespace.is_none());
+/// ```
+///
+/// This is added to support easy specification of XML elements when writing XML documents.
+#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
+pub struct Name<'a> {
+ /// A local name, e.g. `string` in `xsi:string`.
+ pub local_name: &'a str,
+
+ /// A namespace URI, e.g. `http://www.w3.org/2000/xmlns/`.
+ pub namespace: Option<&'a str>,
+
+ /// A name prefix, e.g. `xsi` in `xsi:string`.
+ pub prefix: Option<&'a str>
+}
+
+impl<'a> From<&'a str> for Name<'a> {
+ fn from(s: &'a str) -> Name<'a> {
+ let mut parts = s.splitn(2, ":").fuse();
+ match (parts.next(), parts.next()) {
+ (Some(name), None) => Name::local(name),
+ (Some(prefix), Some(name)) => Name::prefixed(name, prefix),
+ _ => unreachable!()
+ }
+ }
+}
+
+impl<'a> From<(&'a str, &'a str)> for Name<'a> {
+ fn from((prefix, name): (&'a str, &'a str)) -> Name<'a> {
+ Name::prefixed(name, prefix)
+ }
+}
+
+impl<'a> fmt::Display for Name<'a> {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ if let Some(namespace) = self.namespace {
+ write!(f, "{{{}}}", namespace)?;
+ }
+
+ if let Some(prefix) = self.prefix {
+ write!(f, "{}:", prefix)?;
+ }
+
+ write!(f, "{}", self.local_name)
+ }
+}
+
+impl<'a> Name<'a> {
+ /// Returns an owned variant of the qualified name.
+ pub fn to_owned(&self) -> OwnedName {
+ OwnedName {
+ local_name: self.local_name.into(),
+ namespace: self.namespace.map(|s| s.into()),
+ prefix: self.prefix.map(|s| s.into())
+ }
+ }
+
+ /// Returns a new `Name` instance representing plain local name.
+ #[inline]
+ pub fn local(local_name: &str) -> Name {
+ Name {
+ local_name,
+ prefix: None,
+ namespace: None
+ }
+ }
+
+ /// Returns a new `Name` instance with the given local name and prefix.
+ #[inline]
+ pub fn prefixed(local_name: &'a str, prefix: &'a str) -> Name<'a> {
+ Name {
+ local_name,
+ namespace: None,
+ prefix: Some(prefix)
+ }
+ }
+
+ /// Returns a new `Name` instance representing a qualified name with or without a prefix and
+ /// with a namespace URI.
+ #[inline]
+ pub fn qualified(local_name: &'a str, namespace: &'a str, prefix: Option<&'a str>) -> Name<'a> {
+ Name {
+ local_name,
+ namespace: Some(namespace),
+ prefix,
+ }
+ }
+
+ /// Returns a correct XML representation of this local name and prefix.
+ ///
+ /// This method is different from the autoimplemented `to_string()` because it does not
+ /// include namespace URI in the result.
+ pub fn to_repr(&self) -> String {
+ self.repr_display().to_string()
+ }
+
+ /// Returns a structure which can be displayed with `std::fmt` machinery to obtain this
+ /// local name and prefix.
+ ///
+ /// This method is needed for efficiency purposes in order not to create unnecessary
+ /// allocations.
+ #[inline]
+ pub fn repr_display(&self) -> ReprDisplay {
+ ReprDisplay(self)
+ }
+
+ /// Returns either a prefix of this name or `namespace::NS_NO_PREFIX` constant.
+ #[inline]
+ pub fn prefix_repr(&self) -> &str {
+ self.prefix.unwrap_or(NS_NO_PREFIX)
+ }
+}
+
+/// A wrapper around `Name` whose `Display` implementation prints the wrapped name as it is
+/// displayed in an XML document.
+pub struct ReprDisplay<'a, 'b:'a>(&'a Name<'b>);
+
+impl<'a, 'b:'a> fmt::Display for ReprDisplay<'a, 'b> {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ match self.0.prefix {
+ Some(prefix) => write!(f, "{}:{}", prefix, self.0.local_name),
+ None => write!(f, "{}", self.0.local_name)
+ }
+ }
+}
+
+/// An owned variant of `Name`.
+///
+/// Everything about `Name` applies to this structure as well.
+#[derive(Clone, PartialEq, Eq, Hash, Debug)]
+pub struct OwnedName {
+ /// A local name, e.g. `string` in `xsi:string`.
+ pub local_name: String,
+
+ /// A namespace URI, e.g. `http://www.w3.org/2000/xmlns/`.
+ pub namespace: Option<String>,
+
+ /// A name prefix, e.g. `xsi` in `xsi:string`.
+ pub prefix: Option<String>,
+}
+
+impl fmt::Display for OwnedName {
+ #[inline]
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ fmt::Display::fmt(&self.borrow(), f)
+ }
+}
+
+impl OwnedName {
+ /// Constructs a borrowed `Name` based on this owned name.
+ pub fn borrow(&self) -> Name {
+ Name {
+ local_name: &*self.local_name,
+ namespace: self.namespace.as_ref().map(|s| &**s),
+ prefix: self.prefix.as_ref().map(|s| &**s),
+ }
+ }
+
+ /// Returns a new `OwnedName` instance representing a plain local name.
+ #[inline]
+ pub fn local<S>(local_name: S) -> OwnedName where S: Into<String> {
+ OwnedName {
+ local_name: local_name.into(),
+ namespace: None,
+ prefix: None,
+ }
+ }
+
+ /// Returns a new `OwnedName` instance representing a qualified name with or without
+ /// a prefix and with a namespace URI.
+ #[inline]
+ pub fn qualified<S1, S2, S3>(local_name: S1, namespace: S2, prefix: Option<S3>) -> OwnedName
+ where S1: Into<String>, S2: Into<String>, S3: Into<String>
+ {
+ OwnedName {
+ local_name: local_name.into(),
+ namespace: Some(namespace.into()),
+ prefix: prefix.map(|v| v.into())
+ }
+ }
+
+ /// Returns an optional prefix by reference, equivalent to `self.borrow().prefix`
+ /// but avoids extra work.
+ #[inline]
+ pub fn prefix_ref(&self) -> Option<&str> {
+ self.prefix.as_ref().map(|s| &**s)
+ }
+
+ /// Returns an optional namespace by reference, equivalen to `self.borrow().namespace`
+ /// but avoids extra work.
+ #[inline]
+ pub fn namespace_ref(&self) -> Option<&str> {
+ self.namespace.as_ref().map(|s| &**s)
+ }
+}
+
+impl<'a> From<Name<'a>> for OwnedName {
+ #[inline]
+ fn from(n: Name<'a>) -> OwnedName {
+ n.to_owned()
+ }
+}
+
+impl FromStr for OwnedName {
+ type Err = ();
+
+ /// Parses the given string slice into a qualified name.
+ ///
+ /// This function, when finishes sucessfully, always return a qualified
+ /// name without a namespace (`name.namespace == None`). It should be filled later
+ /// using proper `NamespaceStack`.
+ ///
+ /// It is supposed that all characters in the argument string are correct
+ /// as defined by the XML specification. No additional checks except a check
+ /// for emptiness are done.
+ fn from_str(s: &str) -> Result<OwnedName, ()> {
+ let mut it = s.split(':');
+
+ let r = match (it.next(), it.next(), it.next()) {
+ (Some(prefix), Some(local_name), None) if !prefix.is_empty() &&
+ !local_name.is_empty() =>
+ Some((local_name.into(), Some(prefix.into()))),
+ (Some(local_name), None, None) if !local_name.is_empty() =>
+ Some((local_name.into(), None)),
+ (_, _, _) => None
+ };
+ r.map(|(local_name, prefix)| OwnedName {
+ local_name,
+ namespace: None,
+ prefix
+ }).ok_or(())
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::OwnedName;
+
+ #[test]
+ fn test_owned_name_from_str() {
+ assert_eq!("prefix:name".parse(), Ok(OwnedName {
+ local_name: "name".into(),
+ namespace: None,
+ prefix: Some("prefix".into())
+ }));
+
+ assert_eq!("name".parse(), Ok(OwnedName {
+ local_name: "name".into(),
+ namespace: None,
+ prefix: None
+ }));
+
+ assert_eq!("".parse(), Err::<OwnedName, ()>(()));
+ assert_eq!(":".parse(), Err::<OwnedName, ()>(()));
+ assert_eq!(":a".parse(), Err::<OwnedName, ()>(()));
+ assert_eq!("a:".parse(), Err::<OwnedName, ()>(()));
+ assert_eq!("a:b:c".parse(), Err::<OwnedName, ()>(()));
+ }
+}
diff --git a/third_party/rust/xml-rs/src/namespace.rs b/third_party/rust/xml-rs/src/namespace.rs
new file mode 100644
index 0000000000..1ab4a5c025
--- /dev/null
+++ b/third_party/rust/xml-rs/src/namespace.rs
@@ -0,0 +1,485 @@
+//! Contains namespace manipulation types and functions.
+
+use std::iter::{Map, Rev};
+use std::collections::btree_map::{BTreeMap, Entry};
+use std::collections::btree_map::Iter as Entries;
+use std::collections::HashSet;
+use std::slice::Iter;
+
+/// Designates prefix for namespace definitions.
+///
+/// See [Namespaces in XML][namespace] spec for more information.
+///
+/// [namespace]: http://www.w3.org/TR/xml-names/#ns-decl
+pub const NS_XMLNS_PREFIX: &'static str = "xmlns";
+
+/// Designates the standard URI for `xmlns` prefix.
+///
+/// See [A Namespace Name for xmlns Attributes][1] for more information.
+///
+/// [namespace]: http://www.w3.org/2000/xmlns/
+pub const NS_XMLNS_URI: &'static str = "http://www.w3.org/2000/xmlns/";
+
+/// Designates prefix for a namespace containing several special predefined attributes.
+///
+/// See [2.10 White Space handling][1], [2.1 Language Identification][2],
+/// [XML Base specification][3] and [xml:id specification][4] for more information.
+///
+/// [1]: http://www.w3.org/TR/REC-xml/#sec-white-space
+/// [2]: http://www.w3.org/TR/REC-xml/#sec-lang-tag
+/// [3]: http://www.w3.org/TR/xmlbase/
+/// [4]: http://www.w3.org/TR/xml-id/
+pub const NS_XML_PREFIX: &'static str = "xml";
+
+/// Designates the standard URI for `xml` prefix.
+///
+/// See `NS_XML_PREFIX` documentation for more information.
+pub const NS_XML_URI: &'static str = "http://www.w3.org/XML/1998/namespace";
+
+/// Designates the absence of prefix in a qualified name.
+///
+/// This constant should be used to define or query default namespace which should be used
+/// for element or attribute names without prefix. For example, if a namespace mapping
+/// at a particular point in the document contains correspondence like
+///
+/// ```none
+/// NS_NO_PREFIX --> urn:some:namespace
+/// ```
+///
+/// then all names declared without an explicit prefix `urn:some:namespace` is assumed as
+/// a namespace URI.
+///
+/// By default empty prefix corresponds to absence of namespace, but this can change either
+/// when writing an XML document (manually) or when reading an XML document (based on namespace
+/// declarations).
+pub const NS_NO_PREFIX: &'static str = "";
+
+/// Designates an empty namespace URI, which is equivalent to absence of namespace.
+///
+/// This constant should not usually be used directly; it is used to designate that
+/// empty prefix corresponds to absent namespace in `NamespaceStack` instances created with
+/// `NamespaceStack::default()`. Therefore, it can be used to restore `NS_NO_PREFIX` mapping
+/// in a namespace back to its default value.
+pub const NS_EMPTY_URI: &'static str = "";
+
+/// Namespace is a map from prefixes to namespace URIs.
+///
+/// No prefix (i.e. default namespace) is designated by `NS_NO_PREFIX` constant.
+#[derive(PartialEq, Eq, Clone, Debug)]
+pub struct Namespace(pub BTreeMap<String, String>);
+
+impl Namespace {
+ /// Returns an empty namespace.
+ #[inline]
+ pub fn empty() -> Namespace { Namespace(BTreeMap::new()) }
+
+ /// Checks whether this namespace is empty.
+ #[inline]
+ pub fn is_empty(&self) -> bool {
+ self.0.is_empty()
+ }
+
+ /// Checks whether this namespace is essentially empty, that is, it does not contain
+ /// anything but default mappings.
+ pub fn is_essentially_empty(&self) -> bool {
+ // a shortcut for a namespace which is definitely not empty
+ if self.0.len() > 3 { return false; }
+
+ self.0.iter().all(|(k, v)| match (&**k, &**v) {
+ (NS_NO_PREFIX, NS_EMPTY_URI) => true,
+ (NS_XMLNS_PREFIX, NS_XMLNS_URI) => true,
+ (NS_XML_PREFIX, NS_XML_URI) => true,
+ _ => false
+ })
+ }
+
+ /// Checks whether this namespace mapping contains the given prefix.
+ ///
+ /// # Parameters
+ /// * `prefix` --- namespace prefix.
+ ///
+ /// # Return value
+ /// `true` if this namespace contains the given prefix, `false` otherwise.
+ #[inline]
+ pub fn contains<P: ?Sized+AsRef<str>>(&self, prefix: &P) -> bool {
+ self.0.contains_key(prefix.as_ref())
+ }
+
+ /// Puts a mapping into this namespace.
+ ///
+ /// This method does not override any already existing mappings.
+ ///
+ /// Returns a boolean flag indicating whether the map already contained
+ /// the given prefix.
+ ///
+ /// # Parameters
+ /// * `prefix` --- namespace prefix;
+ /// * `uri` --- namespace URI.
+ ///
+ /// # Return value
+ /// `true` if `prefix` has been inserted successfully; `false` if the `prefix`
+ /// was already present in the namespace.
+ pub fn put<P, U>(&mut self, prefix: P, uri: U) -> bool
+ where P: Into<String>, U: Into<String>
+ {
+ match self.0.entry(prefix.into()) {
+ Entry::Occupied(_) => false,
+ Entry::Vacant(ve) => {
+ ve.insert(uri.into());
+ true
+ }
+ }
+ }
+
+ /// Puts a mapping into this namespace forcefully.
+ ///
+ /// This method, unlike `put()`, does replace an already existing mapping.
+ ///
+ /// Returns previous URI which was assigned to the given prefix, if it is present.
+ ///
+ /// # Parameters
+ /// * `prefix` --- namespace prefix;
+ /// * `uri` --- namespace URI.
+ ///
+ /// # Return value
+ /// `Some(uri)` with `uri` being a previous URI assigned to the `prefix`, or
+ /// `None` if such prefix was not present in the namespace before.
+ pub fn force_put<P, U>(&mut self, prefix: P, uri: U) -> Option<String>
+ where P: Into<String>, U: Into<String>
+ {
+ self.0.insert(prefix.into(), uri.into())
+ }
+
+ /// Queries the namespace for the given prefix.
+ ///
+ /// # Parameters
+ /// * `prefix` --- namespace prefix.
+ ///
+ /// # Return value
+ /// Namespace URI corresponding to the given prefix, if it is present.
+ pub fn get<'a, P: ?Sized+AsRef<str>>(&'a self, prefix: &P) -> Option<&'a str> {
+ self.0.get(prefix.as_ref()).map(|s| &**s)
+ }
+}
+
+/// An alias for iterator type for namespace mappings contained in a namespace.
+pub type NamespaceMappings<'a> = Map<
+ Entries<'a, String, String>,
+ for<'b> fn((&'b String, &'b String)) -> UriMapping<'b>
+>;
+
+impl<'a> IntoIterator for &'a Namespace {
+ type Item = UriMapping<'a>;
+ type IntoIter = NamespaceMappings<'a>;
+
+ fn into_iter(self) -> Self::IntoIter {
+ fn mapper<'a>((prefix, uri): (&'a String, &'a String)) -> UriMapping<'a> {
+ (&*prefix, &*uri)
+ }
+ self.0.iter().map(mapper)
+ }
+}
+
+/// Namespace stack is a sequence of namespaces.
+///
+/// Namespace stack is used to represent cumulative namespace consisting of
+/// combined namespaces from nested elements.
+#[derive(Clone, Eq, PartialEq, Debug)]
+pub struct NamespaceStack(pub Vec<Namespace>);
+
+impl NamespaceStack {
+ /// Returns an empty namespace stack.
+ #[inline]
+ pub fn empty() -> NamespaceStack { NamespaceStack(Vec::with_capacity(2)) }
+
+ /// Returns a namespace stack with default items in it.
+ ///
+ /// Default items are the following:
+ ///
+ /// * `xml` → `http://www.w3.org/XML/1998/namespace`;
+ /// * `xmlns` → `http://www.w3.org/2000/xmlns/`.
+ #[inline]
+ pub fn default() -> NamespaceStack {
+ let mut nst = NamespaceStack::empty();
+ nst.push_empty();
+ // xml namespace
+ nst.put(NS_XML_PREFIX, NS_XML_URI);
+ // xmlns namespace
+ nst.put(NS_XMLNS_PREFIX, NS_XMLNS_URI);
+ // empty namespace
+ nst.put(NS_NO_PREFIX, NS_EMPTY_URI);
+ nst
+ }
+
+ /// Adds an empty namespace to the top of this stack.
+ #[inline]
+ pub fn push_empty(&mut self) -> &mut NamespaceStack {
+ self.0.push(Namespace::empty());
+ self
+ }
+
+ /// Removes the topmost namespace in this stack.
+ ///
+ /// Panics if the stack is empty.
+ #[inline]
+ pub fn pop(&mut self) -> Namespace {
+ self.0.pop().unwrap()
+ }
+
+ /// Removes the topmost namespace in this stack.
+ ///
+ /// Returns `Some(namespace)` if this stack is not empty and `None` otherwise.
+ #[inline]
+ pub fn try_pop(&mut self) -> Option<Namespace> {
+ self.0.pop()
+ }
+
+ /// Borrows the topmost namespace mutably, leaving the stack intact.
+ ///
+ /// Panics if the stack is empty.
+ #[inline]
+ pub fn peek_mut(&mut self) -> &mut Namespace {
+ self.0.last_mut().unwrap()
+ }
+
+ /// Borrows the topmost namespace immutably, leaving the stack intact.
+ ///
+ /// Panics if the stack is empty.
+ #[inline]
+ pub fn peek(&self) -> &Namespace {
+ self.0.last().unwrap()
+ }
+
+ /// Puts a mapping into the topmost namespace if this stack does not already contain one.
+ ///
+ /// Returns a boolean flag indicating whether the insertion has completed successfully.
+ /// Note that both key and value are matched and the mapping is inserted if either
+ /// namespace prefix is not already mapped, or if it is mapped, but to a different URI.
+ ///
+ /// # Parameters
+ /// * `prefix` --- namespace prefix;
+ /// * `uri` --- namespace URI.
+ ///
+ /// # Return value
+ /// `true` if `prefix` has been inserted successfully; `false` if the `prefix`
+ /// was already present in the namespace stack.
+ pub fn put_checked<P, U>(&mut self, prefix: P, uri: U) -> bool
+ where P: Into<String> + AsRef<str>,
+ U: Into<String> + AsRef<str>
+ {
+ if self.0.iter().any(|ns| ns.get(&prefix) == Some(uri.as_ref())) {
+ false
+ } else {
+ self.put(prefix, uri);
+ true
+ }
+ }
+
+ /// Puts a mapping into the topmost namespace in this stack.
+ ///
+ /// This method does not override a mapping in the topmost namespace if it is
+ /// already present, however, it does not depend on other namespaces in the stack,
+ /// so it is possible to put a mapping which is present in lower namespaces.
+ ///
+ /// Returns a boolean flag indicating whether the insertion has completed successfully.
+ ///
+ /// # Parameters
+ /// * `prefix` --- namespace prefix;
+ /// * `uri` --- namespace URI.
+ ///
+ /// # Return value
+ /// `true` if `prefix` has been inserted successfully; `false` if the `prefix`
+ /// was already present in the namespace.
+ #[inline]
+ pub fn put<P, U>(&mut self, prefix: P, uri: U) -> bool
+ where P: Into<String>, U: Into<String>
+ {
+ self.0.last_mut().unwrap().put(prefix, uri)
+ }
+
+ /// Performs a search for the given prefix in the whole stack.
+ ///
+ /// This method walks the stack from top to bottom, querying each namespace
+ /// in order for the given prefix. If none of the namespaces contains the prefix,
+ /// `None` is returned.
+ ///
+ /// # Parameters
+ /// * `prefix` --- namespace prefix.
+ #[inline]
+ pub fn get<'a, P: ?Sized+AsRef<str>>(&'a self, prefix: &P) -> Option<&'a str> {
+ let prefix = prefix.as_ref();
+ for ns in self.0.iter().rev() {
+ match ns.get(prefix) {
+ None => {},
+ r => return r,
+ }
+ }
+ None
+ }
+
+ /// Combines this stack of namespaces into a single namespace.
+ ///
+ /// Namespaces are combined in left-to-right order, that is, rightmost namespace
+ /// elements take priority over leftmost ones.
+ pub fn squash(&self) -> Namespace {
+ let mut result = BTreeMap::new();
+ for ns in self.0.iter() {
+ result.extend(ns.0.iter().map(|(k, v)| (k.clone(), v.clone())));
+ }
+ Namespace(result)
+ }
+
+ /// Returns an object which implements `Extend` using `put_checked()` instead of `put()`.
+ ///
+ /// See `CheckedTarget` for more information.
+ #[inline]
+ pub fn checked_target(&mut self) -> CheckedTarget {
+ CheckedTarget(self)
+ }
+
+ /// Returns an iterator over all mappings in this namespace stack.
+ #[inline]
+ pub fn iter(&self) -> NamespaceStackMappings {
+ self.into_iter()
+ }
+}
+
+/// An iterator over mappings from prefixes to URIs in a namespace stack.
+///
+/// # Example
+/// ```
+/// # use xml::namespace::NamespaceStack;
+/// let mut nst = NamespaceStack::empty();
+/// nst.push_empty();
+/// nst.put("a", "urn:A");
+/// nst.put("b", "urn:B");
+/// nst.push_empty();
+/// nst.put("c", "urn:C");
+///
+/// assert_eq!(vec![("c", "urn:C"), ("a", "urn:A"), ("b", "urn:B")], nst.iter().collect::<Vec<_>>());
+/// ```
+pub struct NamespaceStackMappings<'a> {
+ namespaces: Rev<Iter<'a, Namespace>>,
+ current_namespace: Option<NamespaceMappings<'a>>,
+ used_keys: HashSet<&'a str>
+}
+
+impl<'a> NamespaceStackMappings<'a> {
+ fn go_to_next_namespace(&mut self) -> bool {
+ self.current_namespace = self.namespaces.next().map(|ns| ns.into_iter());
+ self.current_namespace.is_some()
+ }
+}
+
+impl<'a> Iterator for NamespaceStackMappings<'a> {
+ type Item = UriMapping<'a>;
+
+ fn next(&mut self) -> Option<UriMapping<'a>> {
+ // If there is no current namespace and no next namespace, we're finished
+ if self.current_namespace.is_none() && !self.go_to_next_namespace() {
+ return None;
+ }
+ let next_item = self.current_namespace.as_mut().unwrap().next();
+
+ match next_item {
+ // There is an element in the current namespace
+ Some((k, v)) => if self.used_keys.contains(&k) {
+ // If the current key is used, go to the next one
+ self.next()
+ } else {
+ // Otherwise insert the current key to the set of used keys and
+ // return the mapping
+ self.used_keys.insert(k);
+ Some((k, v))
+ },
+ // Current namespace is exhausted
+ None => if self.go_to_next_namespace() {
+ // If there is next namespace, continue from it
+ self.next()
+ } else {
+ // No next namespace, exiting
+ None
+ }
+ }
+ }
+}
+
+impl<'a> IntoIterator for &'a NamespaceStack {
+ type Item = UriMapping<'a>;
+ type IntoIter = NamespaceStackMappings<'a>;
+
+ fn into_iter(self) -> Self::IntoIter {
+ NamespaceStackMappings {
+ namespaces: self.0.iter().rev(),
+ current_namespace: None,
+ used_keys: HashSet::new()
+ }
+ }
+}
+
+/// A type alias for a pair of `(prefix, uri)` values returned by namespace iterators.
+pub type UriMapping<'a> = (&'a str, &'a str);
+
+impl<'a> Extend<UriMapping<'a>> for Namespace {
+ fn extend<T>(&mut self, iterable: T) where T: IntoIterator<Item=UriMapping<'a>> {
+ for (prefix, uri) in iterable {
+ self.put(prefix, uri);
+ }
+ }
+}
+
+impl<'a> Extend<UriMapping<'a>> for NamespaceStack {
+ fn extend<T>(&mut self, iterable: T) where T: IntoIterator<Item=UriMapping<'a>> {
+ for (prefix, uri) in iterable {
+ self.put(prefix, uri);
+ }
+ }
+}
+
+/// A wrapper around `NamespaceStack` which implements `Extend` using `put_checked()`.
+///
+/// # Example
+///
+/// ```
+/// # use xml::namespace::NamespaceStack;
+///
+/// let mut nst = NamespaceStack::empty();
+/// nst.push_empty();
+/// nst.put("a", "urn:A");
+/// nst.put("b", "urn:B");
+/// nst.push_empty();
+/// nst.put("c", "urn:C");
+///
+/// nst.checked_target().extend(vec![("a", "urn:Z"), ("b", "urn:B"), ("c", "urn:Y"), ("d", "urn:D")]);
+/// assert_eq!(
+/// vec![("a", "urn:Z"), ("c", "urn:C"), ("d", "urn:D"), ("b", "urn:B")],
+/// nst.iter().collect::<Vec<_>>()
+/// );
+/// ```
+///
+/// Compare:
+///
+/// ```
+/// # use xml::namespace::NamespaceStack;
+/// # let mut nst = NamespaceStack::empty();
+/// # nst.push_empty();
+/// # nst.put("a", "urn:A");
+/// # nst.put("b", "urn:B");
+/// # nst.push_empty();
+/// # nst.put("c", "urn:C");
+///
+/// nst.extend(vec![("a", "urn:Z"), ("b", "urn:B"), ("c", "urn:Y"), ("d", "urn:D")]);
+/// assert_eq!(
+/// vec![("a", "urn:Z"), ("b", "urn:B"), ("c", "urn:C"), ("d", "urn:D")],
+/// nst.iter().collect::<Vec<_>>()
+/// );
+/// ```
+pub struct CheckedTarget<'a>(&'a mut NamespaceStack);
+
+impl<'a, 'b> Extend<UriMapping<'b>> for CheckedTarget<'a> {
+ fn extend<T>(&mut self, iterable: T) where T: IntoIterator<Item=UriMapping<'b>> {
+ for (prefix, uri) in iterable {
+ self.0.put_checked(prefix, uri);
+ }
+ }
+}
diff --git a/third_party/rust/xml-rs/src/reader/config.rs b/third_party/rust/xml-rs/src/reader/config.rs
new file mode 100644
index 0000000000..0abb165cf4
--- /dev/null
+++ b/third_party/rust/xml-rs/src/reader/config.rs
@@ -0,0 +1,181 @@
+//! Contains parser configuration structure.
+use std::io::Read;
+use std::collections::HashMap;
+
+use reader::EventReader;
+
+/// Parser configuration structure.
+///
+/// This structure contains various configuration options which affect
+/// behavior of the parser.
+#[derive(Clone, PartialEq, Eq, Debug)]
+pub struct ParserConfig {
+ /// Whether or not should whitespace in textual events be removed. Default is false.
+ ///
+ /// When true, all standalone whitespace will be removed (this means no
+ /// `Whitespace` events will be emitted), and leading and trailing whitespace
+ /// from `Character` events will be deleted. If after trimming `Characters`
+ /// event will be empty, it will also be omitted from output stream. This is
+ /// possible, however, only if `whitespace_to_characters` or
+ /// `cdata_to_characters` options are set.
+ ///
+ /// This option does not affect CDATA events, unless `cdata_to_characters`
+ /// option is also set. In that case CDATA content will also be trimmed.
+ pub trim_whitespace: bool,
+
+ /// Whether or not should whitespace be converted to characters.
+ /// Default is false.
+ ///
+ /// If true, instead of `Whitespace` events `Characters` events with the
+ /// same content will be emitted. If `trim_whitespace` is also true, these
+ /// events will be trimmed to nothing and, consequently, not emitted.
+ pub whitespace_to_characters: bool,
+
+ /// Whether or not should CDATA be converted to characters.
+ /// Default is false.
+ ///
+ /// If true, instead of `CData` events `Characters` events with the same
+ /// content will be emitted. If `trim_whitespace` is also true, these events
+ /// will be trimmed. If corresponding CDATA contained nothing but whitespace,
+ /// this event will be omitted from the stream.
+ pub cdata_to_characters: bool,
+
+ /// Whether or not should comments be omitted. Default is true.
+ ///
+ /// If true, `Comment` events will not be emitted at all.
+ pub ignore_comments: bool,
+
+ /// Whether or not should sequential `Characters` events be merged.
+ /// Default is true.
+ ///
+ /// If true, multiple sequential `Characters` events will be merged into
+ /// a single event, that is, their data will be concatenated.
+ ///
+ /// Multiple sequential `Characters` events are only possible if either
+ /// `cdata_to_characters` or `ignore_comments` are set. Otherwise character
+ /// events will always be separated by other events.
+ pub coalesce_characters: bool,
+
+ /// A map of extra entities recognized by the parser. Default is an empty map.
+ ///
+ /// By default the XML parser recognizes the entities defined in the XML spec. Sometimes,
+ /// however, it is convenient to make the parser recognize additional entities which
+ /// are also not available through the DTD definitions (especially given that at the moment
+ /// DTD parsing is not supported).
+ pub extra_entities: HashMap<String, String>,
+
+ /// Whether or not the parser should ignore the end of stream. Default is false.
+ ///
+ /// By default the parser will either error out when it encounters a premature end of
+ /// stream or complete normally if the end of stream was expected. If you want to continue
+ /// reading from a stream whose input is supplied progressively, you can set this option to true.
+ /// In this case the parser will allow you to invoke the next() method even if a supposed end
+ /// of stream has happened.
+ ///
+ /// Note that support for this functionality is incomplete; for example, the parser will fail if
+ /// the premature end of stream happens inside PCDATA. Therefore, use this option at your own risk.
+ pub ignore_end_of_stream: bool,
+
+ /// Whether or not non-unicode entity references get replaced with the replacement character
+ ///
+ /// When true, any decimal or hexadecimal character reference that cannot be converted from a
+ /// u32 to a char using [std::char::from_u32](https://doc.rust-lang.org/std/char/fn.from_u32.html)
+ /// will be converted into the unicode REPLACEMENT CHARACTER (U+FFFD).
+ pub replace_unknown_entity_references: bool,
+
+ /// Whether or not whitespace at the root level of the document is ignored. Default is true.
+ ///
+ /// By default any whitespace that is not enclosed within at least one level of elements will be
+ /// ignored. Setting this value to false will cause root level whitespace events to be emitted.
+ pub ignore_root_level_whitespace: bool,
+}
+
+impl ParserConfig {
+ /// Returns a new config with default values.
+ ///
+ /// You can tweak default values using builder-like pattern:
+ ///
+ /// ```rust
+ /// use xml::reader::ParserConfig;
+ ///
+ /// let config = ParserConfig::new()
+ /// .trim_whitespace(true)
+ /// .ignore_comments(true)
+ /// .coalesce_characters(false);
+ /// ```
+ pub fn new() -> ParserConfig {
+ ParserConfig {
+ trim_whitespace: false,
+ whitespace_to_characters: false,
+ cdata_to_characters: false,
+ ignore_comments: true,
+ coalesce_characters: true,
+ extra_entities: HashMap::new(),
+ ignore_end_of_stream: false,
+ replace_unknown_entity_references: false,
+ ignore_root_level_whitespace: true,
+ }
+ }
+
+ /// Creates an XML reader with this configuration.
+ ///
+ /// This is a convenience method for configuring and creating a reader at the same time:
+ ///
+ /// ```rust
+ /// use xml::reader::ParserConfig;
+ ///
+ /// let mut source: &[u8] = b"...";
+ ///
+ /// let reader = ParserConfig::new()
+ /// .trim_whitespace(true)
+ /// .ignore_comments(true)
+ /// .coalesce_characters(false)
+ /// .create_reader(&mut source);
+ /// ```
+ ///
+ /// This method is exactly equivalent to calling `EventReader::new_with_config()` with
+ /// this configuration object.
+ #[inline]
+ pub fn create_reader<R: Read>(self, source: R) -> EventReader<R> {
+ EventReader::new_with_config(source, self)
+ }
+
+ /// Adds a new entity mapping and returns an updated config object.
+ ///
+ /// This is a convenience method for adding external entities mappings to the XML parser.
+ /// An example:
+ ///
+ /// ```rust
+ /// use xml::reader::ParserConfig;
+ ///
+ /// let mut source: &[u8] = b"...";
+ ///
+ /// let reader = ParserConfig::new()
+ /// .add_entity("nbsp", " ")
+ /// .add_entity("copy", "©")
+ /// .add_entity("reg", "®")
+ /// .create_reader(&mut source);
+ /// ```
+ pub fn add_entity<S: Into<String>, T: Into<String>>(mut self, entity: S, value: T) -> ParserConfig {
+ self.extra_entities.insert(entity.into(), value.into());
+ self
+ }
+}
+
+impl Default for ParserConfig {
+ #[inline]
+ fn default() -> ParserConfig {
+ ParserConfig::new()
+ }
+}
+
+gen_setters! { ParserConfig,
+ trim_whitespace: val bool,
+ whitespace_to_characters: val bool,
+ cdata_to_characters: val bool,
+ ignore_comments: val bool,
+ coalesce_characters: val bool,
+ ignore_end_of_stream: val bool,
+ replace_unknown_entity_references: val bool,
+ ignore_root_level_whitespace: val bool
+}
diff --git a/third_party/rust/xml-rs/src/reader/error.rs b/third_party/rust/xml-rs/src/reader/error.rs
new file mode 100644
index 0000000000..92378e6373
--- /dev/null
+++ b/third_party/rust/xml-rs/src/reader/error.rs
@@ -0,0 +1,121 @@
+
+use std::io;
+use std::borrow::Cow;
+use std::fmt;
+use std::error;
+use std::str;
+
+use util;
+use common::{Position, TextPosition};
+
+#[derive(Debug)]
+pub enum ErrorKind {
+ Syntax(Cow<'static, str>),
+ Io(io::Error),
+ Utf8(str::Utf8Error),
+ UnexpectedEof,
+}
+
+/// An XML parsing error.
+///
+/// Consists of a 2D position in a document and a textual message describing the error.
+#[derive(Clone, PartialEq, Eq, Debug)]
+pub struct Error {
+ pos: TextPosition,
+ kind: ErrorKind,
+}
+
+impl fmt::Display for Error {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ write!(f, "{} {}", self.pos, self.msg())
+ }
+}
+
+impl Position for Error {
+ #[inline]
+ fn position(&self) -> TextPosition { self.pos }
+}
+
+impl Error {
+ /// Returns a reference to a message which is contained inside this error.
+ #[inline]
+ pub fn msg(&self) -> &str {
+ use self::ErrorKind::*;
+ match self.kind {
+ UnexpectedEof => &"Unexpected EOF",
+ Utf8(ref reason) => error_description(reason),
+ Io(ref io_error) => error_description(io_error),
+ Syntax(ref msg) => msg.as_ref(),
+ }
+ }
+
+ pub fn kind(&self) -> &ErrorKind { &self.kind }
+}
+
+impl error::Error for Error {
+ #[inline]
+ fn description(&self) -> &str { self.msg() }
+}
+
+impl<'a, P, M> From<(&'a P, M)> for Error where P: Position, M: Into<Cow<'static, str>> {
+ fn from(orig: (&'a P, M)) -> Self {
+ Error{
+ pos: orig.0.position(),
+ kind: ErrorKind::Syntax(orig.1.into())
+ }
+ }
+}
+
+impl From<util::CharReadError> for Error {
+ fn from(e: util::CharReadError) -> Self {
+ use util::CharReadError::*;
+ Error{
+ pos: TextPosition::new(),
+ kind: match e {
+ UnexpectedEof => ErrorKind::UnexpectedEof,
+ Utf8(reason) => ErrorKind::Utf8(reason),
+ Io(io_error) => ErrorKind::Io(io_error),
+ }
+ }
+ }
+}
+
+impl From<io::Error> for Error {
+ fn from(e: io::Error) -> Self {
+ Error {
+ pos: TextPosition::new(),
+ kind: ErrorKind::Io(e),
+ }
+ }
+}
+
+impl Clone for ErrorKind {
+ fn clone(&self) -> Self {
+ use self::ErrorKind::*;
+ match *self {
+ UnexpectedEof => UnexpectedEof,
+ Utf8(ref reason) => Utf8(reason.clone()),
+ Io(ref io_error) => Io(io::Error::new(io_error.kind(), error_description(io_error))),
+ Syntax(ref msg) => Syntax(msg.clone()),
+ }
+ }
+}
+impl PartialEq for ErrorKind {
+ fn eq(&self, other: &ErrorKind) -> bool {
+ use self::ErrorKind::*;
+ match (self, other) {
+ (&UnexpectedEof, &UnexpectedEof) => true,
+ (&Utf8(ref left), &Utf8(ref right)) => left == right,
+ (&Io(ref left), &Io(ref right)) =>
+ left.kind() == right.kind() &&
+ error_description(left) == error_description(right),
+ (&Syntax(ref left), &Syntax(ref right)) =>
+ left == right,
+
+ (_, _) => false,
+ }
+ }
+}
+impl Eq for ErrorKind {}
+
+fn error_description(e: &error::Error) -> &str { e.description() }
diff --git a/third_party/rust/xml-rs/src/reader/events.rs b/third_party/rust/xml-rs/src/reader/events.rs
new file mode 100644
index 0000000000..46d7621a87
--- /dev/null
+++ b/third_party/rust/xml-rs/src/reader/events.rs
@@ -0,0 +1,219 @@
+//! Contains `XmlEvent` datatype, instances of which are emitted by the parser.
+
+use std::fmt;
+use std::borrow::Cow;
+
+use name::OwnedName;
+use attribute::OwnedAttribute;
+use common::XmlVersion;
+use namespace::Namespace;
+
+/// An element of an XML input stream.
+///
+/// Items of this enum are emitted by `reader::EventReader`. They correspond to different
+/// elements of an XML document.
+#[derive(PartialEq, Clone)]
+pub enum XmlEvent {
+ /// Corresponds to XML document declaration.
+ ///
+ /// This event is always emitted before any other event. It is emitted
+ /// even if the actual declaration is not present in the document.
+ StartDocument {
+ /// XML version.
+ ///
+ /// If XML declaration is not present, defaults to `Version10`.
+ version: XmlVersion,
+
+ /// XML document encoding.
+ ///
+ /// If XML declaration is not present or does not contain `encoding` attribute,
+ /// defaults to `"UTF-8"`. This field is currently used for no other purpose than
+ /// informational.
+ encoding: String,
+
+ /// XML standalone declaration.
+ ///
+ /// If XML document is not present or does not contain `standalone` attribute,
+ /// defaults to `None`. This field is currently used for no other purpose than
+ /// informational.
+ standalone: Option<bool>
+ },
+
+ /// Denotes to the end of the document stream.
+ ///
+ /// This event is always emitted after any other event (except `Error`). After it
+ /// is emitted for the first time, it will always be emitted on next event pull attempts.
+ EndDocument,
+
+ /// Denotes an XML processing instruction.
+ ///
+ /// This event contains a processing instruction target (`name`) and opaque `data`. It
+ /// is up to the application to process them.
+ ProcessingInstruction {
+ /// Processing instruction target.
+ name: String,
+
+ /// Processing instruction content.
+ data: Option<String>
+ },
+
+ /// Denotes a beginning of an XML element.
+ ///
+ /// This event is emitted after parsing opening tags or after parsing bodiless tags. In the
+ /// latter case `EndElement` event immediately follows.
+ StartElement {
+ /// Qualified name of the element.
+ name: OwnedName,
+
+ /// A list of attributes associated with the element.
+ ///
+ /// Currently attributes are not checked for duplicates (TODO)
+ attributes: Vec<OwnedAttribute>,
+
+ /// Contents of the namespace mapping at this point of the document.
+ namespace: Namespace,
+ },
+
+ /// Denotes an end of an XML element.
+ ///
+ /// This event is emitted after parsing closing tags or after parsing bodiless tags. In the
+ /// latter case it is emitted immediately after corresponding `StartElement` event.
+ EndElement {
+ /// Qualified name of the element.
+ name: OwnedName
+ },
+
+ /// Denotes CDATA content.
+ ///
+ /// This event contains unparsed data. No unescaping will be performed.
+ ///
+ /// It is possible to configure a parser to emit `Characters` event instead of `CData`. See
+ /// `pull::ParserConfiguration` structure for more information.
+ CData(String),
+
+ /// Denotes a comment.
+ ///
+ /// It is possible to configure a parser to ignore comments, so this event will never be emitted.
+ /// See `pull::ParserConfiguration` structure for more information.
+ Comment(String),
+
+ /// Denotes character data outside of tags.
+ ///
+ /// Contents of this event will always be unescaped, so no entities like `&lt;` or `&amp;` or `&#123;`
+ /// will appear in it.
+ ///
+ /// It is possible to configure a parser to trim leading and trailing whitespace for this event.
+ /// See `pull::ParserConfiguration` structure for more information.
+ Characters(String),
+
+ /// Denotes a chunk of whitespace outside of tags.
+ ///
+ /// It is possible to configure a parser to emit `Characters` event instead of `Whitespace`.
+ /// See `pull::ParserConfiguration` structure for more information. When combined with whitespace
+ /// trimming, it will eliminate standalone whitespace from the event stream completely.
+ Whitespace(String)
+}
+
+impl fmt::Debug for XmlEvent {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ match *self {
+ XmlEvent::StartDocument { ref version, ref encoding, ref standalone } =>
+ write!(f, "StartDocument({}, {}, {:?})", version, *encoding, *standalone),
+ XmlEvent::EndDocument =>
+ write!(f, "EndDocument"),
+ XmlEvent::ProcessingInstruction { ref name, ref data } =>
+ write!(f, "ProcessingInstruction({}{})", *name, match *data {
+ Some(ref data) => format!(", {}", data),
+ None => String::new()
+ }),
+ XmlEvent::StartElement { ref name, ref attributes, namespace: Namespace(ref namespace) } =>
+ write!(f, "StartElement({}, {:?}{})", name, namespace, if attributes.is_empty() {
+ String::new()
+ } else {
+ let attributes: Vec<String> = attributes.iter().map(
+ |a| format!("{} -> {}", a.name, a.value)
+ ).collect();
+ format!(", [{}]", attributes.join(", "))
+ }),
+ XmlEvent::EndElement { ref name } =>
+ write!(f, "EndElement({})", name),
+ XmlEvent::Comment(ref data) =>
+ write!(f, "Comment({})", data),
+ XmlEvent::CData(ref data) =>
+ write!(f, "CData({})", data),
+ XmlEvent::Characters(ref data) =>
+ write!(f, "Characters({})", data),
+ XmlEvent::Whitespace(ref data) =>
+ write!(f, "Whitespace({})", data)
+ }
+ }
+}
+
+impl XmlEvent {
+ /// Obtains a writer event from this reader event.
+ ///
+ /// This method is useful for streaming processing of XML documents where the output
+ /// is also an XML document. With this method it is possible to process some events
+ /// while passing other events through to the writer unchanged:
+ ///
+ /// ```rust
+ /// use std::str;
+ ///
+ /// use xml::{EventReader, EventWriter};
+ /// use xml::reader::XmlEvent as ReaderEvent;
+ /// use xml::writer::XmlEvent as WriterEvent;
+ ///
+ /// let mut input: &[u8] = b"<hello>world</hello>";
+ /// let mut output: Vec<u8> = Vec::new();
+ ///
+ /// {
+ /// let mut reader = EventReader::new(&mut input);
+ /// let mut writer = EventWriter::new(&mut output);
+ ///
+ /// for e in reader {
+ /// match e.unwrap() {
+ /// ReaderEvent::Characters(s) =>
+ /// writer.write(WriterEvent::characters(&s.to_uppercase())).unwrap(),
+ /// e => if let Some(e) = e.as_writer_event() {
+ /// writer.write(e).unwrap()
+ /// }
+ /// }
+ /// }
+ /// }
+ ///
+ /// assert_eq!(
+ /// str::from_utf8(&output).unwrap(),
+ /// r#"<?xml version="1.0" encoding="UTF-8"?><hello>WORLD</hello>"#
+ /// );
+ /// ```
+ ///
+ /// Note that this API may change or get additions in future to improve its ergonomics.
+ pub fn as_writer_event<'a>(&'a self) -> Option<::writer::events::XmlEvent<'a>> {
+ match *self {
+ XmlEvent::StartDocument { version, ref encoding, standalone } =>
+ Some(::writer::events::XmlEvent::StartDocument {
+ version: version,
+ encoding: Some(encoding),
+ standalone: standalone
+ }),
+ XmlEvent::ProcessingInstruction { ref name, ref data } =>
+ Some(::writer::events::XmlEvent::ProcessingInstruction {
+ name: name,
+ data: data.as_ref().map(|s| &s[..])
+ }),
+ XmlEvent::StartElement { ref name, ref attributes, ref namespace } =>
+ Some(::writer::events::XmlEvent::StartElement {
+ name: name.borrow(),
+ attributes: attributes.iter().map(|a| a.borrow()).collect(),
+ namespace: Cow::Borrowed(namespace)
+ }),
+ XmlEvent::EndElement { ref name } =>
+ Some(::writer::events::XmlEvent::EndElement { name: Some(name.borrow()) }),
+ XmlEvent::Comment(ref data) => Some(::writer::events::XmlEvent::Comment(data)),
+ XmlEvent::CData(ref data) => Some(::writer::events::XmlEvent::CData(data)),
+ XmlEvent::Characters(ref data) => Some(::writer::events::XmlEvent::Characters(data)),
+ XmlEvent::Whitespace(ref data) => Some(::writer::events::XmlEvent::Characters(data)),
+ _ => None
+ }
+ }
+}
diff --git a/third_party/rust/xml-rs/src/reader/lexer.rs b/third_party/rust/xml-rs/src/reader/lexer.rs
new file mode 100644
index 0000000000..c466db9210
--- /dev/null
+++ b/third_party/rust/xml-rs/src/reader/lexer.rs
@@ -0,0 +1,867 @@
+//! Contains simple lexer for XML documents.
+//!
+//! This module is for internal use. Use `xml::pull` module to do parsing.
+
+use std::fmt;
+use std::collections::VecDeque;
+use std::io::Read;
+use std::result;
+use std::borrow::Cow;
+
+use common::{Position, TextPosition, is_whitespace_char, is_name_char};
+use reader::Error;
+use util;
+
+/// `Token` represents a single lexeme of an XML document. These lexemes
+/// are used to perform actual parsing.
+#[derive(Copy, Clone, PartialEq, Eq, Debug)]
+pub enum Token {
+ /// `<?`
+ ProcessingInstructionStart,
+ /// `?>`
+ ProcessingInstructionEnd,
+ /// `<!DOCTYPE
+ DoctypeStart,
+ /// `<`
+ OpeningTagStart,
+ /// `</`
+ ClosingTagStart,
+ /// `>`
+ TagEnd,
+ /// `/>`
+ EmptyTagEnd,
+ /// `<!--`
+ CommentStart,
+ /// `-->`
+ CommentEnd,
+ /// A chunk of characters, used for errors recovery.
+ Chunk(&'static str),
+ /// Any non-special character except whitespace.
+ Character(char),
+ /// Whitespace character.
+ Whitespace(char),
+ /// `=`
+ EqualsSign,
+ /// `'`
+ SingleQuote,
+ /// `"`
+ DoubleQuote,
+ /// `<![CDATA[`
+ CDataStart,
+ /// `]]>`
+ CDataEnd,
+ /// `&`
+ ReferenceStart,
+ /// `;`
+ ReferenceEnd,
+}
+
+impl fmt::Display for Token {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ match *self {
+ Token::Chunk(s) => write!(f, "{}", s),
+ Token::Character(c) | Token::Whitespace(c) => write!(f, "{}", c),
+ other => write!(f, "{}", match other {
+ Token::OpeningTagStart => "<",
+ Token::ProcessingInstructionStart => "<?",
+ Token::DoctypeStart => "<!DOCTYPE",
+ Token::ClosingTagStart => "</",
+ Token::CommentStart => "<!--",
+ Token::CDataStart => "<![CDATA[",
+ Token::TagEnd => ">",
+ Token::EmptyTagEnd => "/>",
+ Token::ProcessingInstructionEnd => "?>",
+ Token::CommentEnd => "-->",
+ Token::CDataEnd => "]]>",
+ Token::ReferenceStart => "&",
+ Token::ReferenceEnd => ";",
+ Token::EqualsSign => "=",
+ Token::SingleQuote => "'",
+ Token::DoubleQuote => "\"",
+ _ => unreachable!()
+ })
+ }
+ }
+}
+
+impl Token {
+ pub fn as_static_str(&self) -> Option<&'static str> {
+ match *self {
+ Token::OpeningTagStart => Some("<"),
+ Token::ProcessingInstructionStart => Some("<?"),
+ Token::DoctypeStart => Some("<!DOCTYPE"),
+ Token::ClosingTagStart => Some("</"),
+ Token::CommentStart => Some("<!--"),
+ Token::CDataStart => Some("<![CDATA["),
+ Token::TagEnd => Some(">"),
+ Token::EmptyTagEnd => Some("/>"),
+ Token::ProcessingInstructionEnd => Some("?>"),
+ Token::CommentEnd => Some("-->"),
+ Token::CDataEnd => Some("]]>"),
+ Token::ReferenceStart => Some("&"),
+ Token::ReferenceEnd => Some(";"),
+ Token::EqualsSign => Some("="),
+ Token::SingleQuote => Some("'"),
+ Token::DoubleQuote => Some("\""),
+ Token::Chunk(s) => Some(s),
+ _ => None
+ }
+ }
+
+ // using String.push_str(token.to_string()) is simply way too slow
+ pub fn push_to_string(&self, target: &mut String) {
+ match self.as_static_str() {
+ Some(s) => { target.push_str(s); }
+ None => {
+ match *self {
+ Token::Character(c) | Token::Whitespace(c) => target.push(c),
+ _ => unreachable!()
+ }
+ }
+ }
+ }
+
+ /// Returns `true` if this token contains data that can be interpreted
+ /// as a part of the text. Surprisingly, this also means '>' and '=' and '"' and "'" and '-->'.
+ #[inline]
+ pub fn contains_char_data(&self) -> bool {
+ match *self {
+ Token::Whitespace(_) | Token::Chunk(_) | Token::Character(_) | Token::CommentEnd |
+ Token::TagEnd | Token::EqualsSign | Token::DoubleQuote | Token::SingleQuote | Token::CDataEnd |
+ Token::ProcessingInstructionEnd | Token::EmptyTagEnd => true,
+ _ => false
+ }
+ }
+
+ /// Returns `true` if this token corresponds to a white space character.
+ #[inline]
+ pub fn is_whitespace(&self) -> bool {
+ match *self {
+ Token::Whitespace(_) => true,
+ _ => false
+ }
+ }
+}
+
+enum State {
+ /// Triggered on '<'
+ TagStarted,
+ /// Triggered on '<!'
+ CommentOrCDataOrDoctypeStarted,
+ /// Triggered on '<!-'
+ CommentStarted,
+ /// Triggered on '<!D' up to '<!DOCTYPE'
+ DoctypeStarted(DoctypeStartedSubstate),
+ /// Triggered after DoctypeStarted to handle sub elements
+ DoctypeFinishing(u8),
+ /// Triggered on '<![' up to '<![CDATA'
+ CDataStarted(CDataStartedSubstate),
+ /// Triggered on '?'
+ ProcessingInstructionClosing,
+ /// Triggered on '/'
+ EmptyTagClosing,
+ /// Triggered on '-' up to '--'
+ CommentClosing(ClosingSubstate),
+ /// Triggered on ']' up to ']]'
+ CDataClosing(ClosingSubstate),
+ /// Default state
+ Normal
+}
+
+#[derive(Copy, Clone)]
+enum ClosingSubstate {
+ First, Second
+}
+
+#[derive(Copy, Clone)]
+enum DoctypeStartedSubstate {
+ D, DO, DOC, DOCT, DOCTY, DOCTYP
+}
+
+#[derive(Copy, Clone)]
+enum CDataStartedSubstate {
+ E, C, CD, CDA, CDAT, CDATA
+}
+
+/// `Result` represents lexing result. It is either a token or an error message.
+pub type Result = result::Result<Option<Token>, Error>;
+
+/// Helps to set up a dispatch table for lexing large unambigous tokens like
+/// `<![CDATA[` or `<!DOCTYPE `.
+macro_rules! dispatch_on_enum_state(
+ ($_self:ident, $s:expr, $c:expr, $is:expr,
+ $($st:ident; $stc:expr ; $next_st:ident ; $chunk:expr),+;
+ $end_st:ident ; $end_c:expr ; $end_chunk:expr ; $e:expr) => (
+ match $s {
+ $(
+ $st => match $c {
+ $stc => $_self.move_to($is($next_st)),
+ _ => $_self.handle_error($chunk, $c)
+ },
+ )+
+ $end_st => match $c {
+ $end_c => $e,
+ _ => $_self.handle_error($end_chunk, $c)
+ }
+ }
+ )
+);
+
+/// `Lexer` is a lexer for XML documents, which implements pull API.
+///
+/// Main method is `next_token` which accepts an `std::io::Read` instance and
+/// tries to read the next lexeme from it.
+///
+/// When `skip_errors` flag is set, invalid lexemes will be returned as `Chunk`s.
+/// When it is not set, errors will be reported as `Err` objects with a string message.
+/// By default this flag is not set. Use `enable_errors` and `disable_errors` methods
+/// to toggle the behavior.
+pub struct Lexer {
+ pos: TextPosition,
+ head_pos: TextPosition,
+ char_queue: VecDeque<char>,
+ st: State,
+ skip_errors: bool,
+ inside_comment: bool,
+ inside_token: bool,
+ eof_handled: bool
+}
+
+impl Position for Lexer {
+ #[inline]
+ /// Returns the position of the last token produced by the lexer
+ fn position(&self) -> TextPosition { self.pos }
+}
+
+impl Lexer {
+ /// Returns a new lexer with default state.
+ pub fn new() -> Lexer {
+ Lexer {
+ pos: TextPosition::new(),
+ head_pos: TextPosition::new(),
+ char_queue: VecDeque::with_capacity(4), // TODO: check size
+ st: State::Normal,
+ skip_errors: false,
+ inside_comment: false,
+ inside_token: false,
+ eof_handled: false
+ }
+ }
+
+ /// Enables error handling so `next_token` will return `Some(Err(..))`
+ /// upon invalid lexeme.
+ #[inline]
+ pub fn enable_errors(&mut self) { self.skip_errors = false; }
+
+ /// Disables error handling so `next_token` will return `Some(Chunk(..))`
+ /// upon invalid lexeme with this lexeme content.
+ #[inline]
+ pub fn disable_errors(&mut self) { self.skip_errors = true; }
+
+ /// Enables special handling of some lexemes which should be done when we're parsing comment
+ /// internals.
+ #[inline]
+ pub fn inside_comment(&mut self) { self.inside_comment = true; }
+
+ /// Disables the effect of `inside_comment()` method.
+ #[inline]
+ pub fn outside_comment(&mut self) { self.inside_comment = false; }
+
+ /// Reset the eof handled flag of the lexer.
+ #[inline]
+ pub fn reset_eof_handled(&mut self) { self.eof_handled = false; }
+
+ /// Tries to read the next token from the buffer.
+ ///
+ /// It is possible to pass different instaces of `BufReader` each time
+ /// this method is called, but the resulting behavior is undefined in this case.
+ ///
+ /// Return value:
+ /// * `Err(reason) where reason: reader::Error` - when an error occurs;
+ /// * `Ok(None)` - upon end of stream is reached;
+ /// * `Ok(Some(token)) where token: Token` - in case a complete-token has been read from the stream.
+ pub fn next_token<B: Read>(&mut self, b: &mut B) -> Result {
+ // Already reached end of buffer
+ if self.eof_handled {
+ return Ok(None);
+ }
+
+ if !self.inside_token {
+ self.pos = self.head_pos;
+ self.inside_token = true;
+ }
+
+ // Check if we have saved a char or two for ourselves
+ while let Some(c) = self.char_queue.pop_front() {
+ match try!(self.read_next_token(c)) {
+ Some(t) => {
+ self.inside_token = false;
+ return Ok(Some(t));
+ }
+ None => {} // continue
+ }
+ }
+
+ loop {
+ // TODO: this should handle multiple encodings
+ let c = match try!(util::next_char_from(b)) {
+ Some(c) => c, // got next char
+ None => break, // nothing to read left
+ };
+
+ match try!(self.read_next_token(c)) {
+ Some(t) => {
+ self.inside_token = false;
+ return Ok(Some(t));
+ }
+ None => {
+ // continue
+ }
+ }
+ }
+
+ // Handle end of stream
+ self.eof_handled = true;
+ self.pos = self.head_pos;
+ match self.st {
+ State::TagStarted | State::CommentOrCDataOrDoctypeStarted |
+ State::CommentStarted | State::CDataStarted(_)| State::DoctypeStarted(_) |
+ State::CommentClosing(ClosingSubstate::Second) |
+ State::DoctypeFinishing(_) =>
+ Err(self.error("Unexpected end of stream")),
+ State::ProcessingInstructionClosing =>
+ Ok(Some(Token::Character('?'))),
+ State::EmptyTagClosing =>
+ Ok(Some(Token::Character('/'))),
+ State::CommentClosing(ClosingSubstate::First) =>
+ Ok(Some(Token::Character('-'))),
+ State::CDataClosing(ClosingSubstate::First) =>
+ Ok(Some(Token::Character(']'))),
+ State::CDataClosing(ClosingSubstate::Second) =>
+ Ok(Some(Token::Chunk("]]"))),
+ State::Normal =>
+ Ok(None)
+ }
+ }
+
+ #[inline]
+ fn error<M: Into<Cow<'static, str>>>(&self, msg: M) -> Error {
+ (self, msg).into()
+ }
+
+ #[inline]
+ fn read_next_token(&mut self, c: char) -> Result {
+ let res = self.dispatch_char(c);
+ if self.char_queue.is_empty() {
+ if c == '\n' {
+ self.head_pos.new_line();
+ } else {
+ self.head_pos.advance(1);
+ }
+ }
+ res
+ }
+
+ fn dispatch_char(&mut self, c: char) -> Result {
+ match self.st {
+ State::Normal => self.normal(c),
+ State::TagStarted => self.tag_opened(c),
+ State::CommentOrCDataOrDoctypeStarted => self.comment_or_cdata_or_doctype_started(c),
+ State::CommentStarted => self.comment_started(c),
+ State::CDataStarted(s) => self.cdata_started(c, s),
+ State::DoctypeStarted(s) => self.doctype_started(c, s),
+ State::DoctypeFinishing(d) => self.doctype_finishing(c, d),
+ State::ProcessingInstructionClosing => self.processing_instruction_closing(c),
+ State::EmptyTagClosing => self.empty_element_closing(c),
+ State::CommentClosing(s) => self.comment_closing(c, s),
+ State::CDataClosing(s) => self.cdata_closing(c, s)
+ }
+ }
+
+ #[inline]
+ fn move_to(&mut self, st: State) -> Result {
+ self.st = st;
+ Ok(None)
+ }
+
+ #[inline]
+ fn move_to_with(&mut self, st: State, token: Token) -> Result {
+ self.st = st;
+ Ok(Some(token))
+ }
+
+ #[inline]
+ fn move_to_with_unread(&mut self, st: State, cs: &[char], token: Token) -> Result {
+ self.char_queue.extend(cs.iter().cloned());
+ self.move_to_with(st, token)
+ }
+
+ fn handle_error(&mut self, chunk: &'static str, c: char) -> Result {
+ self.char_queue.push_back(c);
+ if self.skip_errors || (self.inside_comment && chunk != "--") { // FIXME: looks hacky
+ self.move_to_with(State::Normal, Token::Chunk(chunk))
+ } else {
+ Err(self.error(format!("Unexpected token '{}' before '{}'", chunk, c)))
+ }
+ }
+
+ /// Encountered a char
+ fn normal(&mut self, c: char) -> Result {
+ match c {
+ '<' => self.move_to(State::TagStarted),
+ '>' => Ok(Some(Token::TagEnd)),
+ '/' => self.move_to(State::EmptyTagClosing),
+ '=' => Ok(Some(Token::EqualsSign)),
+ '"' => Ok(Some(Token::DoubleQuote)),
+ '\'' => Ok(Some(Token::SingleQuote)),
+ '?' => self.move_to(State::ProcessingInstructionClosing),
+ '-' => self.move_to(State::CommentClosing(ClosingSubstate::First)),
+ ']' => self.move_to(State::CDataClosing(ClosingSubstate::First)),
+ '&' => Ok(Some(Token::ReferenceStart)),
+ ';' => Ok(Some(Token::ReferenceEnd)),
+ _ if is_whitespace_char(c) => Ok(Some(Token::Whitespace(c))),
+ _ => Ok(Some(Token::Character(c)))
+ }
+ }
+
+ /// Encountered '<'
+ fn tag_opened(&mut self, c: char) -> Result {
+ match c {
+ '?' => self.move_to_with(State::Normal, Token::ProcessingInstructionStart),
+ '/' => self.move_to_with(State::Normal, Token::ClosingTagStart),
+ '!' => self.move_to(State::CommentOrCDataOrDoctypeStarted),
+ _ if is_whitespace_char(c) => self.move_to_with_unread(State::Normal, &[c], Token::OpeningTagStart),
+ _ if is_name_char(c) => self.move_to_with_unread(State::Normal, &[c], Token::OpeningTagStart),
+ _ => self.handle_error("<", c)
+ }
+ }
+
+ /// Encountered '<!'
+ fn comment_or_cdata_or_doctype_started(&mut self, c: char) -> Result {
+ match c {
+ '-' => self.move_to(State::CommentStarted),
+ '[' => self.move_to(State::CDataStarted(CDataStartedSubstate::E)),
+ 'D' => self.move_to(State::DoctypeStarted(DoctypeStartedSubstate::D)),
+ _ => self.handle_error("<!", c)
+ }
+ }
+
+ /// Encountered '<!-'
+ fn comment_started(&mut self, c: char) -> Result {
+ match c {
+ '-' => self.move_to_with(State::Normal, Token::CommentStart),
+ _ => self.handle_error("<!-", c)
+ }
+ }
+
+ /// Encountered '<!['
+ fn cdata_started(&mut self, c: char, s: CDataStartedSubstate) -> Result {
+ use self::CDataStartedSubstate::{E, C, CD, CDA, CDAT, CDATA};
+ dispatch_on_enum_state!(self, s, c, State::CDataStarted,
+ E ; 'C' ; C ; "<![",
+ C ; 'D' ; CD ; "<![C",
+ CD ; 'A' ; CDA ; "<![CD",
+ CDA ; 'T' ; CDAT ; "<![CDA",
+ CDAT ; 'A' ; CDATA ; "<![CDAT";
+ CDATA ; '[' ; "<![CDATA" ; self.move_to_with(State::Normal, Token::CDataStart)
+ )
+ }
+
+ /// Encountered '<!D'
+ fn doctype_started(&mut self, c: char, s: DoctypeStartedSubstate) -> Result {
+ use self::DoctypeStartedSubstate::{D, DO, DOC, DOCT, DOCTY, DOCTYP};
+ dispatch_on_enum_state!(self, s, c, State::DoctypeStarted,
+ D ; 'O' ; DO ; "<!D",
+ DO ; 'C' ; DOC ; "<!DO",
+ DOC ; 'T' ; DOCT ; "<!DOC",
+ DOCT ; 'Y' ; DOCTY ; "<!DOCT",
+ DOCTY ; 'P' ; DOCTYP ; "<!DOCTY";
+ DOCTYP ; 'E' ; "<!DOCTYP" ; self.move_to_with(State::DoctypeFinishing(1), Token::DoctypeStart)
+ )
+ }
+
+ /// State used while awaiting the closing bracket for the <!DOCTYPE tag
+ fn doctype_finishing(&mut self, c: char, d: u8) -> Result {
+ match c {
+ '<' => self.move_to(State::DoctypeFinishing(d + 1)),
+ '>' if d == 1 => self.move_to_with(State::Normal, Token::TagEnd),
+ '>' => self.move_to(State::DoctypeFinishing(d - 1)),
+ _ => Ok(None),
+ }
+ }
+
+ /// Encountered '?'
+ fn processing_instruction_closing(&mut self, c: char) -> Result {
+ match c {
+ '>' => self.move_to_with(State::Normal, Token::ProcessingInstructionEnd),
+ _ => self.move_to_with_unread(State::Normal, &[c], Token::Character('?')),
+ }
+ }
+
+ /// Encountered '/'
+ fn empty_element_closing(&mut self, c: char) -> Result {
+ match c {
+ '>' => self.move_to_with(State::Normal, Token::EmptyTagEnd),
+ _ => self.move_to_with_unread(State::Normal, &[c], Token::Character('/')),
+ }
+ }
+
+ /// Encountered '-'
+ fn comment_closing(&mut self, c: char, s: ClosingSubstate) -> Result {
+ match s {
+ ClosingSubstate::First => match c {
+ '-' => self.move_to(State::CommentClosing(ClosingSubstate::Second)),
+ _ => self.move_to_with_unread(State::Normal, &[c], Token::Character('-'))
+ },
+ ClosingSubstate::Second => match c {
+ '>' => self.move_to_with(State::Normal, Token::CommentEnd),
+ // double dash not followed by a greater-than is a hard error inside comment
+ _ if self.inside_comment => self.handle_error("--", c),
+ // nothing else except comment closing starts with a double dash, and comment
+ // closing can never be after another dash, and also we're outside of a comment,
+ // therefore it is safe to push only the last read character to the list of unread
+ // characters and pass the double dash directly to the output
+ _ => self.move_to_with_unread(State::Normal, &[c], Token::Chunk("--"))
+ }
+ }
+ }
+
+ /// Encountered ']'
+ fn cdata_closing(&mut self, c: char, s: ClosingSubstate) -> Result {
+ match s {
+ ClosingSubstate::First => match c {
+ ']' => self.move_to(State::CDataClosing(ClosingSubstate::Second)),
+ _ => self.move_to_with_unread(State::Normal, &[c], Token::Character(']'))
+ },
+ ClosingSubstate::Second => match c {
+ '>' => self.move_to_with(State::Normal, Token::CDataEnd),
+ _ => self.move_to_with_unread(State::Normal, &[']', c], Token::Character(']'))
+ }
+ }
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use common::{Position};
+ use std::io::{BufReader, Cursor};
+
+ use super::{Lexer, Token};
+
+ macro_rules! assert_oks(
+ (for $lex:ident and $buf:ident ; $($e:expr)+) => ({
+ $(
+ assert_eq!(Ok(Some($e)), $lex.next_token(&mut $buf));
+ )+
+ })
+ );
+
+ macro_rules! assert_err(
+ (for $lex:ident and $buf:ident expect row $r:expr ; $c:expr, $s:expr) => ({
+ let err = $lex.next_token(&mut $buf);
+ assert!(err.is_err());
+ let err = err.unwrap_err();
+ assert_eq!($r as u64, err.position().row);
+ assert_eq!($c as u64, err.position().column);
+ assert_eq!($s, err.msg());
+ })
+ );
+
+ macro_rules! assert_none(
+ (for $lex:ident and $buf:ident) => (
+ assert_eq!(Ok(None), $lex.next_token(&mut $buf));
+ )
+ );
+
+ fn make_lex_and_buf(s: &str) -> (Lexer, BufReader<Cursor<Vec<u8>>>) {
+ (Lexer::new(), BufReader::new(Cursor::new(s.to_owned().into_bytes())))
+ }
+
+ #[test]
+ fn simple_lexer_test() {
+ let (mut lex, mut buf) = make_lex_and_buf(
+ r#"<a p='q'> x<b z="y">d </b></a><p/> <?nm ?> <!-- a c --> &nbsp;"#
+ );
+
+ assert_oks!(for lex and buf ;
+ Token::OpeningTagStart
+ Token::Character('a')
+ Token::Whitespace(' ')
+ Token::Character('p')
+ Token::EqualsSign
+ Token::SingleQuote
+ Token::Character('q')
+ Token::SingleQuote
+ Token::TagEnd
+ Token::Whitespace(' ')
+ Token::Character('x')
+ Token::OpeningTagStart
+ Token::Character('b')
+ Token::Whitespace(' ')
+ Token::Character('z')
+ Token::EqualsSign
+ Token::DoubleQuote
+ Token::Character('y')
+ Token::DoubleQuote
+ Token::TagEnd
+ Token::Character('d')
+ Token::Whitespace('\t')
+ Token::ClosingTagStart
+ Token::Character('b')
+ Token::TagEnd
+ Token::ClosingTagStart
+ Token::Character('a')
+ Token::TagEnd
+ Token::OpeningTagStart
+ Token::Character('p')
+ Token::EmptyTagEnd
+ Token::Whitespace(' ')
+ Token::ProcessingInstructionStart
+ Token::Character('n')
+ Token::Character('m')
+ Token::Whitespace(' ')
+ Token::ProcessingInstructionEnd
+ Token::Whitespace(' ')
+ Token::CommentStart
+ Token::Whitespace(' ')
+ Token::Character('a')
+ Token::Whitespace(' ')
+ Token::Character('c')
+ Token::Whitespace(' ')
+ Token::CommentEnd
+ Token::Whitespace(' ')
+ Token::ReferenceStart
+ Token::Character('n')
+ Token::Character('b')
+ Token::Character('s')
+ Token::Character('p')
+ Token::ReferenceEnd
+ );
+ assert_none!(for lex and buf);
+ }
+
+ #[test]
+ fn special_chars_test() {
+ let (mut lex, mut buf) = make_lex_and_buf(
+ r#"?x!+ // -| ]z]]"#
+ );
+
+ assert_oks!(for lex and buf ;
+ Token::Character('?')
+ Token::Character('x')
+ Token::Character('!')
+ Token::Character('+')
+ Token::Whitespace(' ')
+ Token::Character('/')
+ Token::Character('/')
+ Token::Whitespace(' ')
+ Token::Character('-')
+ Token::Character('|')
+ Token::Whitespace(' ')
+ Token::Character(']')
+ Token::Character('z')
+ Token::Chunk("]]")
+ );
+ assert_none!(for lex and buf);
+ }
+
+ #[test]
+ fn cdata_test() {
+ let (mut lex, mut buf) = make_lex_and_buf(
+ r#"<a><![CDATA[x y ?]]> </a>"#
+ );
+
+ assert_oks!(for lex and buf ;
+ Token::OpeningTagStart
+ Token::Character('a')
+ Token::TagEnd
+ Token::CDataStart
+ Token::Character('x')
+ Token::Whitespace(' ')
+ Token::Character('y')
+ Token::Whitespace(' ')
+ Token::Character('?')
+ Token::CDataEnd
+ Token::Whitespace(' ')
+ Token::ClosingTagStart
+ Token::Character('a')
+ Token::TagEnd
+ );
+ assert_none!(for lex and buf);
+ }
+
+ #[test]
+ fn doctype_test() {
+ let (mut lex, mut buf) = make_lex_and_buf(
+ r#"<a><!DOCTYPE ab xx z> "#
+ );
+ assert_oks!(for lex and buf ;
+ Token::OpeningTagStart
+ Token::Character('a')
+ Token::TagEnd
+ Token::DoctypeStart
+ Token::TagEnd
+ Token::Whitespace(' ')
+ );
+ assert_none!(for lex and buf)
+ }
+
+ #[test]
+ fn doctype_with_internal_subset_test() {
+ let (mut lex, mut buf) = make_lex_and_buf(
+ r#"<a><!DOCTYPE ab[<!ELEMENT ba> ]> "#
+ );
+ assert_oks!(for lex and buf ;
+ Token::OpeningTagStart
+ Token::Character('a')
+ Token::TagEnd
+ Token::DoctypeStart
+ Token::TagEnd
+ Token::Whitespace(' ')
+ );
+ assert_none!(for lex and buf)
+ }
+
+ #[test]
+ fn end_of_stream_handling_ok() {
+ macro_rules! eof_check(
+ ($data:expr ; $token:expr) => ({
+ let (mut lex, mut buf) = make_lex_and_buf($data);
+ assert_oks!(for lex and buf ; $token);
+ assert_none!(for lex and buf);
+ })
+ );
+ eof_check!("?" ; Token::Character('?'));
+ eof_check!("/" ; Token::Character('/'));
+ eof_check!("-" ; Token::Character('-'));
+ eof_check!("]" ; Token::Character(']'));
+ eof_check!("]]" ; Token::Chunk("]]"));
+ }
+
+ #[test]
+ fn end_of_stream_handling_error() {
+ macro_rules! eof_check(
+ ($data:expr; $r:expr, $c:expr) => ({
+ let (mut lex, mut buf) = make_lex_and_buf($data);
+ assert_err!(for lex and buf expect row $r ; $c, "Unexpected end of stream");
+ assert_none!(for lex and buf);
+ })
+ );
+ eof_check!("<" ; 0, 1);
+ eof_check!("<!" ; 0, 2);
+ eof_check!("<!-" ; 0, 3);
+ eof_check!("<![" ; 0, 3);
+ eof_check!("<![C" ; 0, 4);
+ eof_check!("<![CD" ; 0, 5);
+ eof_check!("<![CDA" ; 0, 6);
+ eof_check!("<![CDAT" ; 0, 7);
+ eof_check!("<![CDATA" ; 0, 8);
+ eof_check!("--" ; 0, 2);
+ }
+
+ #[test]
+ fn error_in_comment_or_cdata_prefix() {
+ let (mut lex, mut buf) = make_lex_and_buf("<!x");
+ assert_err!(for lex and buf expect row 0 ; 0,
+ "Unexpected token '<!' before 'x'"
+ );
+
+ let (mut lex, mut buf) = make_lex_and_buf("<!x");
+ lex.disable_errors();
+ assert_oks!(for lex and buf ;
+ Token::Chunk("<!")
+ Token::Character('x')
+ );
+ assert_none!(for lex and buf);
+ }
+
+ #[test]
+ fn error_in_comment_started() {
+ let (mut lex, mut buf) = make_lex_and_buf("<!-\t");
+ assert_err!(for lex and buf expect row 0 ; 0,
+ "Unexpected token '<!-' before '\t'"
+ );
+
+ let (mut lex, mut buf) = make_lex_and_buf("<!-\t");
+ lex.disable_errors();
+ assert_oks!(for lex and buf ;
+ Token::Chunk("<!-")
+ Token::Whitespace('\t')
+ );
+ assert_none!(for lex and buf);
+ }
+
+ #[test]
+ fn error_in_comment_two_dashes_not_at_end() {
+ let (mut lex, mut buf) = make_lex_and_buf("--x");
+ lex.inside_comment();
+ assert_err!(for lex and buf expect row 0; 0,
+ "Unexpected token '--' before 'x'"
+ );
+
+ let (mut lex, mut buf) = make_lex_and_buf("--x");
+ assert_oks!(for lex and buf ;
+ Token::Chunk("--")
+ Token::Character('x')
+ );
+ }
+
+ macro_rules! check_case(
+ ($chunk:expr, $app:expr; $data:expr; $r:expr, $c:expr, $s:expr) => ({
+ let (mut lex, mut buf) = make_lex_and_buf($data);
+ assert_err!(for lex and buf expect row $r ; $c, $s);
+
+ let (mut lex, mut buf) = make_lex_and_buf($data);
+ lex.disable_errors();
+ assert_oks!(for lex and buf ;
+ Token::Chunk($chunk)
+ Token::Character($app)
+ );
+ assert_none!(for lex and buf);
+ })
+ );
+
+ #[test]
+ fn error_in_cdata_started() {
+ check_case!("<![", '['; "<![[" ; 0, 0, "Unexpected token '<![' before '['");
+ check_case!("<![C", '['; "<![C[" ; 0, 0, "Unexpected token '<![C' before '['");
+ check_case!("<![CD", '['; "<![CD[" ; 0, 0, "Unexpected token '<![CD' before '['");
+ check_case!("<![CDA", '['; "<![CDA[" ; 0, 0, "Unexpected token '<![CDA' before '['");
+ check_case!("<![CDAT", '['; "<![CDAT[" ; 0, 0, "Unexpected token '<![CDAT' before '['");
+ check_case!("<![CDATA", '|'; "<![CDATA|" ; 0, 0, "Unexpected token '<![CDATA' before '|'");
+ }
+
+ #[test]
+ fn error_in_doctype_started() {
+ check_case!("<!D", 'a'; "<!Da" ; 0, 0, "Unexpected token '<!D' before 'a'");
+ check_case!("<!DO", 'b'; "<!DOb" ; 0, 0, "Unexpected token '<!DO' before 'b'");
+ check_case!("<!DOC", 'c'; "<!DOCc" ; 0, 0, "Unexpected token '<!DOC' before 'c'");
+ check_case!("<!DOCT", 'd'; "<!DOCTd" ; 0, 0, "Unexpected token '<!DOCT' before 'd'");
+ check_case!("<!DOCTY", 'e'; "<!DOCTYe" ; 0, 0, "Unexpected token '<!DOCTY' before 'e'");
+ check_case!("<!DOCTYP", 'f'; "<!DOCTYPf" ; 0, 0, "Unexpected token '<!DOCTYP' before 'f'");
+ }
+
+
+
+ #[test]
+ fn issue_98_cdata_ending_with_right_bracket() {
+ let (mut lex, mut buf) = make_lex_and_buf(
+ r#"<![CDATA[Foo [Bar]]]>"#
+ );
+
+ assert_oks!(for lex and buf ;
+ Token::CDataStart
+ Token::Character('F')
+ Token::Character('o')
+ Token::Character('o')
+ Token::Whitespace(' ')
+ Token::Character('[')
+ Token::Character('B')
+ Token::Character('a')
+ Token::Character('r')
+ Token::Character(']')
+ Token::CDataEnd
+ );
+ assert_none!(for lex and buf);
+ }
+}
diff --git a/third_party/rust/xml-rs/src/reader/mod.rs b/third_party/rust/xml-rs/src/reader/mod.rs
new file mode 100644
index 0000000000..90f5b52c56
--- /dev/null
+++ b/third_party/rust/xml-rs/src/reader/mod.rs
@@ -0,0 +1,129 @@
+//! Contains high-level interface for a pull-based XML parser.
+//!
+//! The most important type in this module is `EventReader`, which provides an iterator
+//! view for events in XML document.
+
+use std::io::{Read};
+use std::result;
+
+use common::{Position, TextPosition};
+
+pub use self::config::ParserConfig;
+pub use self::events::XmlEvent;
+
+use self::parser::PullParser;
+
+mod lexer;
+mod parser;
+mod config;
+mod events;
+
+mod error;
+pub use self::error::{Error, ErrorKind};
+
+/// A result type yielded by `XmlReader`.
+pub type Result<T> = result::Result<T, Error>;
+
+/// A wrapper around an `std::io::Read` instance which provides pull-based XML parsing.
+pub struct EventReader<R: Read> {
+ source: R,
+ parser: PullParser
+}
+
+impl<R: Read> EventReader<R> {
+ /// Creates a new reader, consuming the given stream.
+ #[inline]
+ pub fn new(source: R) -> EventReader<R> {
+ EventReader::new_with_config(source, ParserConfig::new())
+ }
+
+ /// Creates a new reader with the provded configuration, consuming the given stream.
+ #[inline]
+ pub fn new_with_config(source: R, config: ParserConfig) -> EventReader<R> {
+ EventReader { source: source, parser: PullParser::new(config) }
+ }
+
+ /// Pulls and returns next XML event from the stream.
+ ///
+ /// If returned event is `XmlEvent::Error` or `XmlEvent::EndDocument`, then
+ /// further calls to this method will return this event again.
+ #[inline]
+ pub fn next(&mut self) -> Result<XmlEvent> {
+ self.parser.next(&mut self.source)
+ }
+
+ pub fn source(&self) -> &R { &self.source }
+ pub fn source_mut(&mut self) -> &mut R { &mut self.source }
+
+ /// Unwraps this `EventReader`, returning the underlying reader.
+ ///
+ /// Note that this operation is destructive; unwrapping the reader and wrapping it
+ /// again with `EventReader::new()` will create a fresh reader which will attempt
+ /// to parse an XML document from the beginning.
+ pub fn into_inner(self) -> R {
+ self.source
+ }
+}
+
+impl<B: Read> Position for EventReader<B> {
+ /// Returns the position of the last event produced by the reader.
+ #[inline]
+ fn position(&self) -> TextPosition {
+ self.parser.position()
+ }
+}
+
+impl<R: Read> IntoIterator for EventReader<R> {
+ type Item = Result<XmlEvent>;
+ type IntoIter = Events<R>;
+
+ fn into_iter(self) -> Events<R> {
+ Events { reader: self, finished: false }
+ }
+}
+
+/// An iterator over XML events created from some type implementing `Read`.
+///
+/// When the next event is `xml::event::Error` or `xml::event::EndDocument`, then
+/// it will be returned by the iterator once, and then it will stop producing events.
+pub struct Events<R: Read> {
+ reader: EventReader<R>,
+ finished: bool
+}
+
+impl<R: Read> Events<R> {
+ /// Unwraps the iterator, returning the internal `EventReader`.
+ #[inline]
+ pub fn into_inner(self) -> EventReader<R> {
+ self.reader
+ }
+
+ pub fn source(&self) -> &R { &self.reader.source }
+ pub fn source_mut(&mut self) -> &mut R { &mut self.reader.source }
+
+}
+
+impl<R: Read> Iterator for Events<R> {
+ type Item = Result<XmlEvent>;
+
+ #[inline]
+ fn next(&mut self) -> Option<Result<XmlEvent>> {
+ if self.finished && !self.reader.parser.is_ignoring_end_of_stream() { None }
+ else {
+ let ev = self.reader.next();
+ match ev {
+ Ok(XmlEvent::EndDocument) | Err(_) => self.finished = true,
+ _ => {}
+ }
+ Some(ev)
+ }
+ }
+}
+
+impl<'r> EventReader<&'r [u8]> {
+ /// A convenience method to create an `XmlReader` from a string slice.
+ #[inline]
+ pub fn from_str(source: &'r str) -> EventReader<&'r [u8]> {
+ EventReader::new(source.as_bytes())
+ }
+}
diff --git a/third_party/rust/xml-rs/src/reader/parser/inside_cdata.rs b/third_party/rust/xml-rs/src/reader/parser/inside_cdata.rs
new file mode 100644
index 0000000000..3269fb4d6b
--- /dev/null
+++ b/third_party/rust/xml-rs/src/reader/parser/inside_cdata.rs
@@ -0,0 +1,32 @@
+use reader::events::XmlEvent;
+use reader::lexer::Token;
+
+use super::{Result, PullParser, State};
+
+impl PullParser {
+ pub fn inside_cdata(&mut self, t: Token) -> Option<Result> {
+ match t {
+ Token::CDataEnd => {
+ self.lexer.enable_errors();
+ let event = if self.config.cdata_to_characters {
+ None
+ } else {
+ let data = self.take_buf();
+ Some(Ok(XmlEvent::CData(data)))
+ };
+ self.into_state(State::OutsideTag, event)
+ }
+
+ Token::Whitespace(_) => {
+ t.push_to_string(&mut self.buf);
+ None
+ }
+
+ _ => {
+ self.inside_whitespace = false;
+ t.push_to_string(&mut self.buf);
+ None
+ }
+ }
+ }
+}
diff --git a/third_party/rust/xml-rs/src/reader/parser/inside_closing_tag_name.rs b/third_party/rust/xml-rs/src/reader/parser/inside_closing_tag_name.rs
new file mode 100644
index 0000000000..1d8074a5a3
--- /dev/null
+++ b/third_party/rust/xml-rs/src/reader/parser/inside_closing_tag_name.rs
@@ -0,0 +1,34 @@
+use namespace;
+
+use reader::lexer::Token;
+
+use super::{Result, PullParser, State, QualifiedNameTarget, ClosingTagSubstate};
+
+impl PullParser {
+ pub fn inside_closing_tag_name(&mut self, t: Token, s: ClosingTagSubstate) -> Option<Result> {
+ match s {
+ ClosingTagSubstate::CTInsideName => self.read_qualified_name(t, QualifiedNameTarget::ClosingTagNameTarget, |this, token, name| {
+ match name.prefix_ref() {
+ Some(prefix) if prefix == namespace::NS_XML_PREFIX ||
+ prefix == namespace::NS_XMLNS_PREFIX =>
+ // TODO: {:?} is bad, need something better
+ Some(self_error!(this; "'{:?}' cannot be an element name prefix", name.prefix)),
+ _ => {
+ this.data.element_name = Some(name.clone());
+ match token {
+ Token::Whitespace(_) => this.into_state_continue(State::InsideClosingTag(ClosingTagSubstate::CTAfterName)),
+ Token::TagEnd => this.emit_end_element(),
+ _ => Some(self_error!(this; "Unexpected token inside closing tag: {}", token))
+ }
+ }
+ }
+ }),
+ ClosingTagSubstate::CTAfterName => match t {
+ Token::Whitespace(_) => None, // Skip whitespace
+ Token::TagEnd => self.emit_end_element(),
+ _ => Some(self_error!(self; "Unexpected token inside closing tag: {}", t))
+ }
+ }
+ }
+
+}
diff --git a/third_party/rust/xml-rs/src/reader/parser/inside_comment.rs b/third_party/rust/xml-rs/src/reader/parser/inside_comment.rs
new file mode 100644
index 0000000000..fc983205ac
--- /dev/null
+++ b/third_party/rust/xml-rs/src/reader/parser/inside_comment.rs
@@ -0,0 +1,32 @@
+use reader::events::XmlEvent;
+use reader::lexer::Token;
+
+use super::{Result, PullParser, State};
+
+impl PullParser {
+ pub fn inside_comment(&mut self, t: Token) -> Option<Result> {
+ match t {
+ // Double dash is illegal inside a comment
+ Token::Chunk(ref s) if &s[..] == "--" => Some(self_error!(self; "Unexpected token inside a comment: --")),
+
+ Token::CommentEnd if self.config.ignore_comments => {
+ self.lexer.outside_comment();
+ self.into_state_continue(State::OutsideTag)
+ }
+
+ Token::CommentEnd => {
+ self.lexer.outside_comment();
+ let data = self.take_buf();
+ self.into_state_emit(State::OutsideTag, Ok(XmlEvent::Comment(data)))
+ }
+
+ _ if self.config.ignore_comments => None, // Do not modify buffer if ignoring the comment
+
+ _ => {
+ t.push_to_string(&mut self.buf);
+ None
+ }
+ }
+ }
+
+}
diff --git a/third_party/rust/xml-rs/src/reader/parser/inside_declaration.rs b/third_party/rust/xml-rs/src/reader/parser/inside_declaration.rs
new file mode 100644
index 0000000000..af39d10d86
--- /dev/null
+++ b/third_party/rust/xml-rs/src/reader/parser/inside_declaration.rs
@@ -0,0 +1,151 @@
+
+use common::XmlVersion;
+
+use reader::events::XmlEvent;
+use reader::lexer::Token;
+
+use super::{
+ Result, PullParser, State, DeclarationSubstate, QualifiedNameTarget,
+ DEFAULT_VERSION, DEFAULT_ENCODING
+};
+
+impl PullParser {
+ // TODO: remove redundancy via macros or extra methods
+ pub fn inside_declaration(&mut self, t: Token, s: DeclarationSubstate) -> Option<Result> {
+ macro_rules! unexpected_token(
+ ($this:expr; $t:expr) => (Some($this.error(format!("Unexpected token inside XML declaration: {}", $t))));
+ ($t:expr) => (unexpected_token!(self; $t));
+ );
+
+ #[inline]
+ fn emit_start_document(this: &mut PullParser) -> Option<Result> {
+ this.parsed_declaration = true;
+ let version = this.data.take_version();
+ let encoding = this.data.take_encoding();
+ let standalone = this.data.take_standalone();
+ this.into_state_emit(State::OutsideTag, Ok(XmlEvent::StartDocument {
+ version: version.unwrap_or(DEFAULT_VERSION),
+ encoding: encoding.unwrap_or(DEFAULT_ENCODING.into()),
+ standalone: standalone
+ }))
+ }
+
+ match s {
+ DeclarationSubstate::BeforeVersion => match t {
+ Token::Whitespace(_) => None, // continue
+ Token::Character('v') => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideVersion)),
+ _ => unexpected_token!(t)
+ },
+
+ DeclarationSubstate::InsideVersion => self.read_qualified_name(t, QualifiedNameTarget::AttributeNameTarget, |this, token, name| {
+ match &name.local_name[..] {
+ "ersion" if name.namespace.is_none() =>
+ this.into_state_continue(State::InsideDeclaration(
+ if token == Token::EqualsSign {
+ DeclarationSubstate::InsideVersionValue
+ } else {
+ DeclarationSubstate::AfterVersion
+ }
+ )),
+ _ => unexpected_token!(this; name)
+ }
+ }),
+
+ DeclarationSubstate::AfterVersion => match t {
+ Token::Whitespace(_) => None,
+ Token::EqualsSign => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideVersionValue)),
+ _ => unexpected_token!(t)
+ },
+
+ DeclarationSubstate::InsideVersionValue => self.read_attribute_value(t, |this, value| {
+ this.data.version = match &value[..] {
+ "1.0" => Some(XmlVersion::Version10),
+ "1.1" => Some(XmlVersion::Version11),
+ _ => None
+ };
+ if this.data.version.is_some() {
+ this.into_state_continue(State::InsideDeclaration(DeclarationSubstate::AfterVersionValue))
+ } else {
+ Some(self_error!(this; "Unexpected XML version value: {}", value))
+ }
+ }),
+
+ DeclarationSubstate::AfterVersionValue => match t {
+ Token::Whitespace(_) => None, // skip whitespace
+ Token::Character('e') => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideEncoding)),
+ Token::Character('s') => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideStandaloneDecl)),
+ Token::ProcessingInstructionEnd => emit_start_document(self),
+ _ => unexpected_token!(t)
+ },
+
+ DeclarationSubstate::InsideEncoding => self.read_qualified_name(t, QualifiedNameTarget::AttributeNameTarget, |this, token, name| {
+ match &name.local_name[..] {
+ "ncoding" if name.namespace.is_none() =>
+ this.into_state_continue(State::InsideDeclaration(
+ if token == Token::EqualsSign { DeclarationSubstate::InsideEncodingValue } else { DeclarationSubstate::AfterEncoding }
+ )),
+ _ => unexpected_token!(this; name)
+ }
+ }),
+
+ DeclarationSubstate::AfterEncoding => match t {
+ Token::Whitespace(_) => None,
+ Token::EqualsSign => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideEncodingValue)),
+ _ => unexpected_token!(t)
+ },
+
+ DeclarationSubstate::InsideEncodingValue => self.read_attribute_value(t, |this, value| {
+ this.data.encoding = Some(value);
+ this.into_state_continue(State::InsideDeclaration(DeclarationSubstate::BeforeStandaloneDecl))
+ }),
+
+ DeclarationSubstate::BeforeStandaloneDecl => match t {
+ Token::Whitespace(_) => None, // skip whitespace
+ Token::Character('s') => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideStandaloneDecl)),
+ Token::ProcessingInstructionEnd => emit_start_document(self),
+ _ => unexpected_token!(t)
+ },
+
+ DeclarationSubstate::InsideStandaloneDecl => self.read_qualified_name(t, QualifiedNameTarget::AttributeNameTarget, |this, token, name| {
+ match &name.local_name[..] {
+ "tandalone" if name.namespace.is_none() =>
+ this.into_state_continue(State::InsideDeclaration(
+ if token == Token::EqualsSign {
+ DeclarationSubstate::InsideStandaloneDeclValue
+ } else {
+ DeclarationSubstate::AfterStandaloneDecl
+ }
+ )),
+ _ => unexpected_token!(this; name)
+ }
+ }),
+
+ DeclarationSubstate::AfterStandaloneDecl => match t {
+ Token::Whitespace(_) => None,
+ Token::EqualsSign => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideStandaloneDeclValue)),
+ _ => unexpected_token!(t)
+ },
+
+ DeclarationSubstate::InsideStandaloneDeclValue => self.read_attribute_value(t, |this, value| {
+ let standalone = match &value[..] {
+ "yes" => Some(true),
+ "no" => Some(false),
+ _ => None
+ };
+ if standalone.is_some() {
+ this.data.standalone = standalone;
+ this.into_state_continue(State::InsideDeclaration(DeclarationSubstate::AfterStandaloneDeclValue))
+ } else {
+ Some(self_error!(this; "Invalid standalone declaration value: {}", value))
+ }
+ }),
+
+ DeclarationSubstate::AfterStandaloneDeclValue => match t {
+ Token::Whitespace(_) => None, // skip whitespace
+ Token::ProcessingInstructionEnd => emit_start_document(self),
+ _ => unexpected_token!(t)
+ }
+ }
+ }
+
+}
diff --git a/third_party/rust/xml-rs/src/reader/parser/inside_doctype.rs b/third_party/rust/xml-rs/src/reader/parser/inside_doctype.rs
new file mode 100644
index 0000000000..8dcf367bc6
--- /dev/null
+++ b/third_party/rust/xml-rs/src/reader/parser/inside_doctype.rs
@@ -0,0 +1,16 @@
+use reader::lexer::Token;
+
+use super::{Result, PullParser, State};
+
+impl PullParser {
+ pub fn inside_doctype(&mut self, t: Token) -> Option<Result> {
+ match t {
+ Token::TagEnd => {
+ self.lexer.enable_errors();
+ self.into_state_continue(State::OutsideTag)
+ }
+
+ _ => None
+ }
+ }
+}
diff --git a/third_party/rust/xml-rs/src/reader/parser/inside_opening_tag.rs b/third_party/rust/xml-rs/src/reader/parser/inside_opening_tag.rs
new file mode 100644
index 0000000000..533874fb81
--- /dev/null
+++ b/third_party/rust/xml-rs/src/reader/parser/inside_opening_tag.rs
@@ -0,0 +1,108 @@
+use common::is_name_start_char;
+use attribute::OwnedAttribute;
+use namespace;
+
+use reader::lexer::Token;
+
+use super::{Result, PullParser, State, OpeningTagSubstate, QualifiedNameTarget};
+
+impl PullParser {
+ pub fn inside_opening_tag(&mut self, t: Token, s: OpeningTagSubstate) -> Option<Result> {
+ macro_rules! unexpected_token(($t:expr) => (Some(self_error!(self; "Unexpected token inside opening tag: {}", $t))));
+ match s {
+ OpeningTagSubstate::InsideName => self.read_qualified_name(t, QualifiedNameTarget::OpeningTagNameTarget, |this, token, name| {
+ match name.prefix_ref() {
+ Some(prefix) if prefix == namespace::NS_XML_PREFIX ||
+ prefix == namespace::NS_XMLNS_PREFIX =>
+ Some(self_error!(this; "'{:?}' cannot be an element name prefix", name.prefix)),
+ _ => {
+ this.data.element_name = Some(name.clone());
+ match token {
+ Token::TagEnd => this.emit_start_element(false),
+ Token::EmptyTagEnd => this.emit_start_element(true),
+ Token::Whitespace(_) => this.into_state_continue(State::InsideOpeningTag(OpeningTagSubstate::InsideTag)),
+ _ => unreachable!()
+ }
+ }
+ }
+ }),
+
+ OpeningTagSubstate::InsideTag => match t {
+ Token::Whitespace(_) => None, // skip whitespace
+ Token::Character(c) if is_name_start_char(c) => {
+ self.buf.push(c);
+ self.into_state_continue(State::InsideOpeningTag(OpeningTagSubstate::InsideAttributeName))
+ }
+ Token::TagEnd => self.emit_start_element(false),
+ Token::EmptyTagEnd => self.emit_start_element(true),
+ _ => unexpected_token!(t)
+ },
+
+ OpeningTagSubstate::InsideAttributeName => self.read_qualified_name(t, QualifiedNameTarget::AttributeNameTarget, |this, token, name| {
+ this.data.attr_name = Some(name);
+ match token {
+ Token::Whitespace(_) => this.into_state_continue(State::InsideOpeningTag(OpeningTagSubstate::AfterAttributeName)),
+ Token::EqualsSign => this.into_state_continue(State::InsideOpeningTag(OpeningTagSubstate::InsideAttributeValue)),
+ _ => unreachable!()
+ }
+ }),
+
+ OpeningTagSubstate::AfterAttributeName => match t {
+ Token::Whitespace(_) => None,
+ Token::EqualsSign => self.into_state_continue(State::InsideOpeningTag(OpeningTagSubstate::InsideAttributeValue)),
+ _ => unexpected_token!(t)
+ },
+
+ OpeningTagSubstate::InsideAttributeValue => self.read_attribute_value(t, |this, value| {
+ let name = this.data.take_attr_name().unwrap(); // unwrap() will always succeed here
+
+ // check that no attribute with such name is already present
+ // if there is one, XML is not well-formed
+ if this.data.attributes.iter().find(|a| a.name == name).is_some() { // TODO: looks bad
+ // TODO: ideally this error should point to the beginning of the attribute,
+ // TODO: not the end of its value
+ Some(self_error!(this; "Attribute '{}' is redefined", name))
+ } else {
+ match name.prefix_ref() {
+ // declaring a new prefix; it is sufficient to check prefix only
+ // because "xmlns" prefix is reserved
+ Some(namespace::NS_XMLNS_PREFIX) => {
+ let ln = &name.local_name[..];
+ if ln == namespace::NS_XMLNS_PREFIX {
+ Some(self_error!(this; "Cannot redefine prefix '{}'", namespace::NS_XMLNS_PREFIX))
+ } else if ln == namespace::NS_XML_PREFIX && &value[..] != namespace::NS_XML_URI {
+ Some(self_error!(this; "Prefix '{}' cannot be rebound to another value", namespace::NS_XML_PREFIX))
+ } else if value.is_empty() {
+ Some(self_error!(this; "Cannot undefine prefix '{}'", ln))
+ } else {
+ this.nst.put(name.local_name.clone(), value);
+ this.into_state_continue(State::InsideOpeningTag(OpeningTagSubstate::InsideTag))
+ }
+ }
+
+ // declaring default namespace
+ None if &name.local_name[..] == namespace::NS_XMLNS_PREFIX =>
+ match &value[..] {
+ namespace::NS_XMLNS_PREFIX | namespace::NS_XML_PREFIX =>
+ Some(self_error!(this; "Namespace '{}' cannot be default", value)),
+ _ => {
+ this.nst.put(namespace::NS_NO_PREFIX, value.clone());
+ this.into_state_continue(State::InsideOpeningTag(OpeningTagSubstate::InsideTag))
+ }
+ },
+
+ // regular attribute
+ _ => {
+ this.data.attributes.push(OwnedAttribute {
+ name: name.clone(),
+ value: value
+ });
+ this.into_state_continue(State::InsideOpeningTag(OpeningTagSubstate::InsideTag))
+ }
+ }
+ }
+ })
+ }
+ }
+
+}
diff --git a/third_party/rust/xml-rs/src/reader/parser/inside_processing_instruction.rs b/third_party/rust/xml-rs/src/reader/parser/inside_processing_instruction.rs
new file mode 100644
index 0000000000..8ddf6b8d51
--- /dev/null
+++ b/third_party/rust/xml-rs/src/reader/parser/inside_processing_instruction.rs
@@ -0,0 +1,96 @@
+use common::{
+ is_name_start_char, is_name_char,
+};
+
+use reader::events::XmlEvent;
+use reader::lexer::Token;
+
+use super::{Result, PullParser, State, ProcessingInstructionSubstate, DeclarationSubstate};
+
+impl PullParser {
+ pub fn inside_processing_instruction(&mut self, t: Token, s: ProcessingInstructionSubstate) -> Option<Result> {
+ match s {
+ ProcessingInstructionSubstate::PIInsideName => match t {
+ Token::Character(c) if !self.buf_has_data() && is_name_start_char(c) ||
+ self.buf_has_data() && is_name_char(c) => self.append_char_continue(c),
+
+ Token::ProcessingInstructionEnd => {
+ // self.buf contains PI name
+ let name = self.take_buf();
+
+ // Don't need to check for declaration because it has mandatory attributes
+ // but there is none
+ match &name[..] {
+ // Name is empty, it is an error
+ "" => Some(self_error!(self; "Encountered processing instruction without name")),
+
+ // Found <?xml-like PI not at the beginning of a document,
+ // it is an error - see section 2.6 of XML 1.1 spec
+ "xml"|"xmL"|"xMl"|"xML"|"Xml"|"XmL"|"XMl"|"XML" =>
+ Some(self_error!(self; "Invalid processing instruction: <?{}", name)),
+
+ // All is ok, emitting event
+ _ => {
+ self.into_state_emit(
+ State::OutsideTag,
+ Ok(XmlEvent::ProcessingInstruction {
+ name: name,
+ data: None
+ })
+ )
+ }
+ }
+ }
+
+ Token::Whitespace(_) => {
+ // self.buf contains PI name
+ let name = self.take_buf();
+
+ match &name[..] {
+ // We have not ever encountered an element and have not parsed XML declaration
+ "xml" if !self.encountered_element && !self.parsed_declaration =>
+ self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::BeforeVersion)),
+
+ // Found <?xml-like PI after the beginning of a document,
+ // it is an error - see section 2.6 of XML 1.1 spec
+ "xml"|"xmL"|"xMl"|"xML"|"Xml"|"XmL"|"XMl"|"XML"
+ if self.encountered_element || self.parsed_declaration =>
+ Some(self_error!(self; "Invalid processing instruction: <?{}", name)),
+
+ // All is ok, starting parsing PI data
+ _ => {
+ self.lexer.disable_errors(); // data is arbitrary, so disable errors
+ self.data.name = name;
+ self.into_state_continue(State::InsideProcessingInstruction(ProcessingInstructionSubstate::PIInsideData))
+ }
+
+ }
+ }
+
+ _ => Some(self_error!(self; "Unexpected token: <?{}{}", self.buf, t))
+ },
+
+ ProcessingInstructionSubstate::PIInsideData => match t {
+ Token::ProcessingInstructionEnd => {
+ self.lexer.enable_errors();
+ let name = self.data.take_name();
+ let data = self.take_buf();
+ self.into_state_emit(
+ State::OutsideTag,
+ Ok(XmlEvent::ProcessingInstruction {
+ name: name,
+ data: Some(data)
+ })
+ )
+ },
+
+ // Any other token should be treated as plain characters
+ _ => {
+ t.push_to_string(&mut self.buf);
+ None
+ }
+ },
+ }
+ }
+
+}
diff --git a/third_party/rust/xml-rs/src/reader/parser/inside_reference.rs b/third_party/rust/xml-rs/src/reader/parser/inside_reference.rs
new file mode 100644
index 0000000000..60026d5572
--- /dev/null
+++ b/third_party/rust/xml-rs/src/reader/parser/inside_reference.rs
@@ -0,0 +1,89 @@
+use std::char;
+
+use common::{is_name_start_char, is_name_char, is_whitespace_str};
+
+use reader::lexer::Token;
+
+use super::{Result, PullParser, State};
+
+impl PullParser {
+ pub fn inside_reference(&mut self, t: Token, prev_st: State) -> Option<Result> {
+ match t {
+ Token::Character(c) if !self.data.ref_data.is_empty() && is_name_char(c) ||
+ self.data.ref_data.is_empty() && (is_name_start_char(c) || c == '#') => {
+ self.data.ref_data.push(c);
+ None
+ }
+
+ Token::ReferenceEnd => {
+ // TODO: check for unicode correctness
+ let name = self.data.take_ref_data();
+ let name_len = name.len(); // compute once
+ let c = match &name[..] {
+ "lt" => Ok('<'.to_string()),
+ "gt" => Ok('>'.to_string()),
+ "amp" => Ok('&'.to_string()),
+ "apos" => Ok('\''.to_string()),
+ "quot" => Ok('"'.to_string()),
+ "" => Err(self_error!(self; "Encountered empty entity")),
+ _ if name_len > 2 && name.starts_with("#x") => {
+ let num_str = &name[2..name_len];
+ if num_str == "0" {
+ Err(self_error!(self; "Null character entity is not allowed"))
+ } else {
+ if self.config.replace_unknown_entity_references {
+ match u32::from_str_radix(num_str, 16).ok().map(|i| char::from_u32(i).unwrap_or('\u{fffd}')) {
+ Some(c) => Ok(c.to_string()),
+ None => Err(self_error!(self; "Invalid hexadecimal character number in an entity: {}", name))
+ }
+ } else {
+ match u32::from_str_radix(num_str, 16).ok().and_then(char::from_u32) {
+ Some(c) => Ok(c.to_string()),
+ None => Err(self_error!(self; "Invalid hexadecimal character number in an entity: {}", name))
+ }
+ }
+ }
+ }
+ _ if name_len > 1 && name.starts_with('#') => {
+ let num_str = &name[1..name_len];
+ if num_str == "0" {
+ Err(self_error!(self; "Null character entity is not allowed"))
+ } else {
+ if self.config.replace_unknown_entity_references {
+ match u32::from_str_radix(num_str, 10).ok().map(|i| char::from_u32(i).unwrap_or('\u{fffd}')) {
+ Some(c) => Ok(c.to_string()),
+ None => Err(self_error!(self; "Invalid decimal character number in an entity: {}", name))
+ }
+ }
+ else {
+ match u32::from_str_radix(num_str, 10).ok().and_then(char::from_u32) {
+ Some(c) => Ok(c.to_string()),
+ None => Err(self_error!(self; "Invalid decimal character number in an entity: {}", name))
+ }
+ }
+ }
+ },
+ _ => {
+ if let Some(v) = self.config.extra_entities.get(&name) {
+ Ok(v.clone())
+ } else {
+ Err(self_error!(self; "Unexpected entity: {}", name))
+ }
+ }
+ };
+ match c {
+ Ok(c) => {
+ self.buf.push_str(&c);
+ if prev_st == State::OutsideTag && !is_whitespace_str(&c) {
+ self.inside_whitespace = false;
+ }
+ self.into_state_continue(prev_st)
+ }
+ Err(e) => Some(e)
+ }
+ }
+
+ _ => Some(self_error!(self; "Unexpected token inside an entity: {}", t))
+ }
+ }
+}
diff --git a/third_party/rust/xml-rs/src/reader/parser/mod.rs b/third_party/rust/xml-rs/src/reader/parser/mod.rs
new file mode 100644
index 0000000000..58ca3a6b1e
--- /dev/null
+++ b/third_party/rust/xml-rs/src/reader/parser/mod.rs
@@ -0,0 +1,622 @@
+//! Contains an implementation of pull-based XML parser.
+
+use std::mem;
+use std::borrow::Cow;
+use std::io::prelude::*;
+
+use common::{
+ self,
+ XmlVersion, Position, TextPosition,
+ is_name_start_char, is_name_char,
+};
+use name::OwnedName;
+use attribute::OwnedAttribute;
+use namespace::NamespaceStack;
+
+use reader::events::XmlEvent;
+use reader::config::ParserConfig;
+use reader::lexer::{Lexer, Token};
+
+macro_rules! gen_takes(
+ ($($field:ident -> $method:ident, $t:ty, $def:expr);+) => (
+ $(
+ impl MarkupData {
+ #[inline]
+ fn $method(&mut self) -> $t {
+ mem::replace(&mut self.$field, $def)
+ }
+ }
+ )+
+ )
+);
+
+gen_takes!(
+ name -> take_name, String, String::new();
+ ref_data -> take_ref_data, String, String::new();
+
+ version -> take_version, Option<common::XmlVersion>, None;
+ encoding -> take_encoding, Option<String>, None;
+ standalone -> take_standalone, Option<bool>, None;
+
+ element_name -> take_element_name, Option<OwnedName>, None;
+
+ attr_name -> take_attr_name, Option<OwnedName>, None;
+ attributes -> take_attributes, Vec<OwnedAttribute>, vec!()
+);
+
+macro_rules! self_error(
+ ($this:ident; $msg:expr) => ($this.error($msg));
+ ($this:ident; $fmt:expr, $($arg:expr),+) => ($this.error(format!($fmt, $($arg),+)))
+);
+
+mod outside_tag;
+mod inside_processing_instruction;
+mod inside_declaration;
+mod inside_doctype;
+mod inside_opening_tag;
+mod inside_closing_tag_name;
+mod inside_comment;
+mod inside_cdata;
+mod inside_reference;
+
+static DEFAULT_VERSION: XmlVersion = XmlVersion::Version10;
+static DEFAULT_ENCODING: &'static str = "UTF-8";
+static DEFAULT_STANDALONE: Option<bool> = None;
+
+type ElementStack = Vec<OwnedName>;
+pub type Result = super::Result<XmlEvent>;
+
+/// Pull-based XML parser.
+pub struct PullParser {
+ config: ParserConfig,
+ lexer: Lexer,
+ st: State,
+ buf: String,
+ nst: NamespaceStack,
+
+ data: MarkupData,
+ final_result: Option<Result>,
+ next_event: Option<Result>,
+ est: ElementStack,
+ pos: Vec<TextPosition>,
+
+ encountered_element: bool,
+ parsed_declaration: bool,
+ inside_whitespace: bool,
+ read_prefix_separator: bool,
+ pop_namespace: bool
+}
+
+impl PullParser {
+ /// Returns a new parser using the given config.
+ pub fn new(config: ParserConfig) -> PullParser {
+ PullParser {
+ config: config,
+ lexer: Lexer::new(),
+ st: State::OutsideTag,
+ buf: String::new(),
+ nst: NamespaceStack::default(),
+
+ data: MarkupData {
+ name: String::new(),
+ version: None,
+ encoding: None,
+ standalone: None,
+ ref_data: String::new(),
+ element_name: None,
+ quote: None,
+ attr_name: None,
+ attributes: Vec::new()
+ },
+ final_result: None,
+ next_event: None,
+ est: Vec::new(),
+ pos: vec![TextPosition::new()],
+
+ encountered_element: false,
+ parsed_declaration: false,
+ inside_whitespace: true,
+ read_prefix_separator: false,
+ pop_namespace: false
+ }
+ }
+
+ /// Checks if this parser ignores the end of stream errors.
+ pub fn is_ignoring_end_of_stream(&self) -> bool { self.config.ignore_end_of_stream }
+}
+
+impl Position for PullParser {
+ /// Returns the position of the last event produced by the parser
+ #[inline]
+ fn position(&self) -> TextPosition {
+ self.pos[0]
+ }
+}
+
+#[derive(Clone, PartialEq)]
+pub enum State {
+ OutsideTag,
+ InsideOpeningTag(OpeningTagSubstate),
+ InsideClosingTag(ClosingTagSubstate),
+ InsideProcessingInstruction(ProcessingInstructionSubstate),
+ InsideComment,
+ InsideCData,
+ InsideDeclaration(DeclarationSubstate),
+ InsideDoctype,
+ InsideReference(Box<State>)
+}
+
+#[derive(Clone, PartialEq)]
+pub enum OpeningTagSubstate {
+ InsideName,
+
+ InsideTag,
+
+ InsideAttributeName,
+ AfterAttributeName,
+
+ InsideAttributeValue,
+}
+
+#[derive(Clone, PartialEq)]
+pub enum ClosingTagSubstate {
+ CTInsideName,
+ CTAfterName
+}
+
+#[derive(Clone, PartialEq)]
+pub enum ProcessingInstructionSubstate {
+ PIInsideName,
+ PIInsideData
+}
+
+#[derive(Clone, PartialEq)]
+pub enum DeclarationSubstate {
+ BeforeVersion,
+ InsideVersion,
+ AfterVersion,
+
+ InsideVersionValue,
+ AfterVersionValue,
+
+ InsideEncoding,
+ AfterEncoding,
+
+ InsideEncodingValue,
+
+ BeforeStandaloneDecl,
+ InsideStandaloneDecl,
+ AfterStandaloneDecl,
+
+ InsideStandaloneDeclValue,
+ AfterStandaloneDeclValue
+}
+
+#[derive(PartialEq)]
+enum QualifiedNameTarget {
+ AttributeNameTarget,
+ OpeningTagNameTarget,
+ ClosingTagNameTarget
+}
+
+#[derive(Copy, Clone, PartialEq, Eq)]
+enum QuoteToken {
+ SingleQuoteToken,
+ DoubleQuoteToken
+}
+
+impl QuoteToken {
+ fn from_token(t: &Token) -> QuoteToken {
+ match *t {
+ Token::SingleQuote => QuoteToken::SingleQuoteToken,
+ Token::DoubleQuote => QuoteToken::DoubleQuoteToken,
+ _ => panic!("Unexpected token: {}", t)
+ }
+ }
+
+ fn as_token(self) -> Token {
+ match self {
+ QuoteToken::SingleQuoteToken => Token::SingleQuote,
+ QuoteToken::DoubleQuoteToken => Token::DoubleQuote
+ }
+ }
+}
+
+struct MarkupData {
+ name: String, // used for processing instruction name
+ ref_data: String, // used for reference content
+
+ version: Option<common::XmlVersion>, // used for XML declaration version
+ encoding: Option<String>, // used for XML declaration encoding
+ standalone: Option<bool>, // used for XML declaration standalone parameter
+
+ element_name: Option<OwnedName>, // used for element name
+
+ quote: Option<QuoteToken>, // used to hold opening quote for attribute value
+ attr_name: Option<OwnedName>, // used to hold attribute name
+ attributes: Vec<OwnedAttribute> // used to hold all accumulated attributes
+}
+
+impl PullParser {
+ /// Returns next event read from the given buffer.
+ ///
+ /// This method should be always called with the same buffer. If you call it
+ /// providing different buffers each time, the result will be undefined.
+ pub fn next<R: Read>(&mut self, r: &mut R) -> Result {
+ if let Some(ref ev) = self.final_result {
+ return ev.clone();
+ }
+
+ if let Some(ev) = self.next_event.take() {
+ return ev;
+ }
+
+ if self.pop_namespace {
+ self.pop_namespace = false;
+ self.nst.pop();
+ }
+
+ loop {
+ // While lexer gives us Ok(maybe_token) -- we loop.
+ // Upon having a complete XML-event -- we return from the whole function.
+ match self.lexer.next_token(r) {
+ Ok(maybe_token) =>
+ match maybe_token {
+ None => break,
+ Some(token) =>
+ match self.dispatch_token(token) {
+ None => {} // continue
+ Some(Ok(XmlEvent::EndDocument)) =>
+ return {
+ self.next_pos();
+ self.set_final_result(Ok(XmlEvent::EndDocument))
+ },
+ Some(Ok(xml_event)) =>
+ return {
+ self.next_pos();
+ Ok(xml_event)
+ },
+ Some(Err(xml_error)) =>
+ return {
+ self.next_pos();
+ self.set_final_result(Err(xml_error))
+ },
+ }
+ },
+ Err(lexer_error) =>
+ return self.set_final_result(Err(lexer_error)),
+ }
+ }
+
+ // Handle end of stream
+ // Forward pos to the lexer head
+ self.next_pos();
+ let ev = if self.depth() == 0 {
+ if self.encountered_element && self.st == State::OutsideTag { // all is ok
+ Ok(XmlEvent::EndDocument)
+ } else if !self.encountered_element {
+ self_error!(self; "Unexpected end of stream: no root element found")
+ } else { // self.st != State::OutsideTag
+ self_error!(self; "Unexpected end of stream") // TODO: add expected hint?
+ }
+ } else {
+ if self.config.ignore_end_of_stream {
+ self.final_result = None;
+ self.lexer.reset_eof_handled();
+ return self_error!(self; "Unexpected end of stream: still inside the root element");
+ } else {
+ self_error!(self; "Unexpected end of stream: still inside the root element")
+ }
+ };
+ self.set_final_result(ev)
+ }
+
+ // This function is to be called when a terminal event is reached.
+ // The function sets up the `self.final_result` into `Some(result)` and return `result`.
+ fn set_final_result(&mut self, result: Result) -> Result {
+ self.final_result = Some(result.clone());
+ result
+ }
+
+ #[inline]
+ fn error<M: Into<Cow<'static, str>>>(&self, msg: M) -> Result {
+ Err((&self.lexer, msg).into())
+ }
+
+ #[inline]
+ fn next_pos(&mut self) {
+ if self.pos.len() > 1 {
+ self.pos.remove(0);
+ } else {
+ self.pos[0] = self.lexer.position();
+ }
+ }
+
+ #[inline]
+ fn push_pos(&mut self) {
+ self.pos.push(self.lexer.position());
+ }
+
+ fn dispatch_token(&mut self, t: Token) -> Option<Result> {
+ match self.st.clone() {
+ State::OutsideTag => self.outside_tag(t),
+ State::InsideProcessingInstruction(s) => self.inside_processing_instruction(t, s),
+ State::InsideDeclaration(s) => self.inside_declaration(t, s),
+ State::InsideDoctype => self.inside_doctype(t),
+ State::InsideOpeningTag(s) => self.inside_opening_tag(t, s),
+ State::InsideClosingTag(s) => self.inside_closing_tag_name(t, s),
+ State::InsideComment => self.inside_comment(t),
+ State::InsideCData => self.inside_cdata(t),
+ State::InsideReference(s) => self.inside_reference(t, *s)
+ }
+ }
+
+ #[inline]
+ fn depth(&self) -> usize {
+ self.est.len()
+ }
+
+ #[inline]
+ fn buf_has_data(&self) -> bool {
+ self.buf.len() > 0
+ }
+
+ #[inline]
+ fn take_buf(&mut self) -> String {
+ mem::replace(&mut self.buf, String::new())
+ }
+
+ #[inline]
+ fn append_char_continue(&mut self, c: char) -> Option<Result> {
+ self.buf.push(c);
+ None
+ }
+
+ #[inline]
+ fn into_state(&mut self, st: State, ev: Option<Result>) -> Option<Result> {
+ self.st = st;
+ ev
+ }
+
+ #[inline]
+ fn into_state_continue(&mut self, st: State) -> Option<Result> {
+ self.into_state(st, None)
+ }
+
+ #[inline]
+ fn into_state_emit(&mut self, st: State, ev: Result) -> Option<Result> {
+ self.into_state(st, Some(ev))
+ }
+
+ /// Dispatches tokens in order to process qualified name. If qualified name cannot be parsed,
+ /// an error is returned.
+ ///
+ /// # Parameters
+ /// * `t` --- next token;
+ /// * `on_name` --- a callback which is executed when whitespace is encountered.
+ fn read_qualified_name<F>(&mut self, t: Token, target: QualifiedNameTarget, on_name: F) -> Option<Result>
+ where F: Fn(&mut PullParser, Token, OwnedName) -> Option<Result> {
+ // We can get here for the first time only when self.data.name contains zero or one character,
+ // but first character cannot be a colon anyway
+ if self.buf.len() <= 1 {
+ self.read_prefix_separator = false;
+ }
+
+ let invoke_callback = |this: &mut PullParser, t| {
+ let name = this.take_buf();
+ match name.parse() {
+ Ok(name) => on_name(this, t, name),
+ Err(_) => Some(self_error!(this; "Qualified name is invalid: {}", name))
+ }
+ };
+
+ match t {
+ // There can be only one colon, and not as the first character
+ Token::Character(':') if self.buf_has_data() && !self.read_prefix_separator => {
+ self.buf.push(':');
+ self.read_prefix_separator = true;
+ None
+ }
+
+ Token::Character(c) if c != ':' && (!self.buf_has_data() && is_name_start_char(c) ||
+ self.buf_has_data() && is_name_char(c)) =>
+ self.append_char_continue(c),
+
+ Token::EqualsSign if target == QualifiedNameTarget::AttributeNameTarget => invoke_callback(self, t),
+
+ Token::EmptyTagEnd if target == QualifiedNameTarget::OpeningTagNameTarget => invoke_callback(self, t),
+
+ Token::TagEnd if target == QualifiedNameTarget::OpeningTagNameTarget ||
+ target == QualifiedNameTarget::ClosingTagNameTarget => invoke_callback(self, t),
+
+ Token::Whitespace(_) => invoke_callback(self, t),
+
+ _ => Some(self_error!(self; "Unexpected token inside qualified name: {}", t))
+ }
+ }
+
+ /// Dispatches tokens in order to process attribute value.
+ ///
+ /// # Parameters
+ /// * `t` --- next token;
+ /// * `on_value` --- a callback which is called when terminating quote is encountered.
+ fn read_attribute_value<F>(&mut self, t: Token, on_value: F) -> Option<Result>
+ where F: Fn(&mut PullParser, String) -> Option<Result> {
+ match t {
+ Token::Whitespace(_) if self.data.quote.is_none() => None, // skip leading whitespace
+
+ Token::DoubleQuote | Token::SingleQuote => match self.data.quote {
+ None => { // Entered attribute value
+ self.data.quote = Some(QuoteToken::from_token(&t));
+ None
+ }
+ Some(q) if q.as_token() == t => {
+ self.data.quote = None;
+ let value = self.take_buf();
+ on_value(self, value)
+ }
+ _ => {
+ t.push_to_string(&mut self.buf);
+ None
+ }
+ },
+
+ Token::ReferenceStart => {
+ let st = Box::new(self.st.clone());
+ self.into_state_continue(State::InsideReference(st))
+ }
+
+ Token::OpeningTagStart =>
+ Some(self_error!(self; "Unexpected token inside attribute value: <")),
+
+ // Every character except " and ' and < is okay
+ _ => {
+ t.push_to_string(&mut self.buf);
+ None
+ }
+ }
+ }
+
+ fn emit_start_element(&mut self, emit_end_element: bool) -> Option<Result> {
+ let mut name = self.data.take_element_name().unwrap();
+ let mut attributes = self.data.take_attributes();
+
+ // check whether the name prefix is bound and fix its namespace
+ match self.nst.get(name.borrow().prefix_repr()) {
+ Some("") => name.namespace = None, // default namespace
+ Some(ns) => name.namespace = Some(ns.into()),
+ None => return Some(self_error!(self; "Element {} prefix is unbound", name))
+ }
+
+ // check and fix accumulated attributes prefixes
+ for attr in attributes.iter_mut() {
+ if let Some(ref pfx) = attr.name.prefix {
+ let new_ns = match self.nst.get(pfx) {
+ Some("") => None, // default namespace
+ Some(ns) => Some(ns.into()),
+ None => return Some(self_error!(self; "Attribute {} prefix is unbound", attr.name))
+ };
+ attr.name.namespace = new_ns;
+ }
+ }
+
+ if emit_end_element {
+ self.pop_namespace = true;
+ self.next_event = Some(Ok(XmlEvent::EndElement {
+ name: name.clone()
+ }));
+ } else {
+ self.est.push(name.clone());
+ }
+ let namespace = self.nst.squash();
+ self.into_state_emit(State::OutsideTag, Ok(XmlEvent::StartElement {
+ name: name,
+ attributes: attributes,
+ namespace: namespace
+ }))
+ }
+
+ fn emit_end_element(&mut self) -> Option<Result> {
+ let mut name = self.data.take_element_name().unwrap();
+
+ // check whether the name prefix is bound and fix its namespace
+ match self.nst.get(name.borrow().prefix_repr()) {
+ Some("") => name.namespace = None, // default namespace
+ Some(ns) => name.namespace = Some(ns.into()),
+ None => return Some(self_error!(self; "Element {} prefix is unbound", name))
+ }
+
+ let op_name = self.est.pop().unwrap();
+
+ if name == op_name {
+ self.pop_namespace = true;
+ self.into_state_emit(State::OutsideTag, Ok(XmlEvent::EndElement { name: name }))
+ } else {
+ Some(self_error!(self; "Unexpected closing tag: {}, expected {}", name, op_name))
+ }
+ }
+
+}
+
+#[cfg(test)]
+mod tests {
+ use std::io::BufReader;
+
+ use common::{Position, TextPosition};
+ use name::OwnedName;
+ use attribute::OwnedAttribute;
+ use reader::parser::PullParser;
+ use reader::ParserConfig;
+ use reader::events::XmlEvent;
+
+ fn new_parser() -> PullParser {
+ PullParser::new(ParserConfig::new())
+ }
+
+ macro_rules! expect_event(
+ ($r:expr, $p:expr, $t:pat) => (
+ match $p.next(&mut $r) {
+ $t => {}
+ e => panic!("Unexpected event: {:?}", e)
+ }
+ );
+ ($r:expr, $p:expr, $t:pat => $c:expr ) => (
+ match $p.next(&mut $r) {
+ $t if $c => {}
+ e => panic!("Unexpected event: {:?}", e)
+ }
+ )
+ );
+
+ macro_rules! test_data(
+ ($d:expr) => ({
+ static DATA: &'static str = $d;
+ let r = BufReader::new(DATA.as_bytes());
+ let p = new_parser();
+ (r, p)
+ })
+ );
+
+ #[test]
+ fn issue_3_semicolon_in_attribute_value() {
+ let (mut r, mut p) = test_data!(r#"
+ <a attr="zzz;zzz" />
+ "#);
+
+ expect_event!(r, p, Ok(XmlEvent::StartDocument { .. }));
+ expect_event!(r, p, Ok(XmlEvent::StartElement { ref name, ref attributes, ref namespace }) =>
+ *name == OwnedName::local("a") &&
+ attributes.len() == 1 &&
+ attributes[0] == OwnedAttribute::new(OwnedName::local("attr"), "zzz;zzz") &&
+ namespace.is_essentially_empty()
+ );
+ expect_event!(r, p, Ok(XmlEvent::EndElement { ref name }) => *name == OwnedName::local("a"));
+ expect_event!(r, p, Ok(XmlEvent::EndDocument));
+ }
+
+ #[test]
+ fn issue_140_entity_reference_inside_tag() {
+ let (mut r, mut p) = test_data!(r#"
+ <bla>&#9835;</bla>
+ "#);
+
+ expect_event!(r, p, Ok(XmlEvent::StartDocument { .. }));
+ expect_event!(r, p, Ok(XmlEvent::StartElement { ref name, .. }) => *name == OwnedName::local("bla"));
+ expect_event!(r, p, Ok(XmlEvent::Characters(ref s)) => s == "\u{266b}");
+ expect_event!(r, p, Ok(XmlEvent::EndElement { ref name, .. }) => *name == OwnedName::local("bla"));
+ expect_event!(r, p, Ok(XmlEvent::EndDocument));
+ }
+
+ #[test]
+ fn opening_tag_in_attribute_value() {
+ let (mut r, mut p) = test_data!(r#"
+ <a attr="zzz<zzz" />
+ "#);
+
+ expect_event!(r, p, Ok(XmlEvent::StartDocument { .. }));
+ expect_event!(r, p, Err(ref e) =>
+ e.msg() == "Unexpected token inside attribute value: <" &&
+ e.position() == TextPosition { row: 1, column: 24 }
+ );
+ }
+}
diff --git a/third_party/rust/xml-rs/src/reader/parser/outside_tag.rs b/third_party/rust/xml-rs/src/reader/parser/outside_tag.rs
new file mode 100644
index 0000000000..d3f7598f75
--- /dev/null
+++ b/third_party/rust/xml-rs/src/reader/parser/outside_tag.rs
@@ -0,0 +1,130 @@
+use common::is_whitespace_char;
+
+use reader::events::XmlEvent;
+use reader::lexer::Token;
+
+use super::{
+ Result, PullParser, State, ClosingTagSubstate, OpeningTagSubstate,
+ ProcessingInstructionSubstate, DEFAULT_VERSION, DEFAULT_ENCODING, DEFAULT_STANDALONE
+};
+
+impl PullParser {
+ pub fn outside_tag(&mut self, t: Token) -> Option<Result> {
+ match t {
+ Token::ReferenceStart =>
+ self.into_state_continue(State::InsideReference(Box::new(State::OutsideTag))),
+
+ Token::Whitespace(_) if self.depth() == 0 && self.config.ignore_root_level_whitespace => None, // skip whitespace outside of the root element
+
+ Token::Whitespace(_) if self.config.trim_whitespace && !self.buf_has_data() => None,
+
+ Token::Whitespace(c) => {
+ if !self.buf_has_data() {
+ self.push_pos();
+ }
+ self.append_char_continue(c)
+ }
+
+ _ if t.contains_char_data() && self.depth() == 0 =>
+ Some(self_error!(self; "Unexpected characters outside the root element: {}", t)),
+
+ _ if t.contains_char_data() => { // Non-whitespace char data
+ if !self.buf_has_data() {
+ self.push_pos();
+ }
+ self.inside_whitespace = false;
+ t.push_to_string(&mut self.buf);
+ None
+ }
+
+ Token::ReferenceEnd => { // Semi-colon in a text outside an entity
+ self.inside_whitespace = false;
+ Token::ReferenceEnd.push_to_string(&mut self.buf);
+ None
+ }
+
+ Token::CommentStart if self.config.coalesce_characters && self.config.ignore_comments => {
+ // We need to switch the lexer into a comment mode inside comments
+ self.lexer.inside_comment();
+ self.into_state_continue(State::InsideComment)
+ }
+
+ Token::CDataStart if self.config.coalesce_characters && self.config.cdata_to_characters => {
+ if !self.buf_has_data() {
+ self.push_pos();
+ }
+ // We need to disable lexing errors inside CDATA
+ self.lexer.disable_errors();
+ self.into_state_continue(State::InsideCData)
+ }
+
+ _ => {
+ // Encountered some markup event, flush the buffer as characters
+ // or a whitespace
+ let mut next_event = if self.buf_has_data() {
+ let buf = self.take_buf();
+ if self.inside_whitespace && self.config.trim_whitespace {
+ None
+ } else if self.inside_whitespace && !self.config.whitespace_to_characters {
+ Some(Ok(XmlEvent::Whitespace(buf)))
+ } else if self.config.trim_whitespace {
+ Some(Ok(XmlEvent::Characters(buf.trim_matches(is_whitespace_char).into())))
+ } else {
+ Some(Ok(XmlEvent::Characters(buf)))
+ }
+ } else { None };
+ self.inside_whitespace = true; // Reset inside_whitespace flag
+ self.push_pos();
+ match t {
+ Token::ProcessingInstructionStart =>
+ self.into_state(State::InsideProcessingInstruction(ProcessingInstructionSubstate::PIInsideName), next_event),
+
+ Token::DoctypeStart if !self.encountered_element => {
+ // We don't have a doctype event so skip this position
+ // FIXME: update when we have a doctype event
+ self.next_pos();
+ self.lexer.disable_errors();
+ self.into_state(State::InsideDoctype, next_event)
+ }
+
+ Token::OpeningTagStart => {
+ // If declaration was not parsed and we have encountered an element,
+ // emit this declaration as the next event.
+ if !self.parsed_declaration {
+ self.parsed_declaration = true;
+ let sd_event = XmlEvent::StartDocument {
+ version: DEFAULT_VERSION,
+ encoding: DEFAULT_ENCODING.into(),
+ standalone: DEFAULT_STANDALONE
+ };
+ // next_event is always none here because we're outside of
+ // the root element
+ next_event = Some(Ok(sd_event));
+ self.push_pos();
+ }
+ self.encountered_element = true;
+ self.nst.push_empty();
+ self.into_state(State::InsideOpeningTag(OpeningTagSubstate::InsideName), next_event)
+ }
+
+ Token::ClosingTagStart if self.depth() > 0 =>
+ self.into_state(State::InsideClosingTag(ClosingTagSubstate::CTInsideName), next_event),
+
+ Token::CommentStart => {
+ // We need to switch the lexer into a comment mode inside comments
+ self.lexer.inside_comment();
+ self.into_state(State::InsideComment, next_event)
+ }
+
+ Token::CDataStart => {
+ // We need to disable lexing errors inside CDATA
+ self.lexer.disable_errors();
+ self.into_state(State::InsideCData, next_event)
+ }
+
+ _ => Some(self_error!(self; "Unexpected token: {}", t))
+ }
+ }
+ }
+ }
+}
diff --git a/third_party/rust/xml-rs/src/util.rs b/third_party/rust/xml-rs/src/util.rs
new file mode 100644
index 0000000000..23fee04eed
--- /dev/null
+++ b/third_party/rust/xml-rs/src/util.rs
@@ -0,0 +1,107 @@
+use std::io::{self, Read};
+use std::str;
+use std::fmt;
+
+#[derive(Debug)]
+pub enum CharReadError {
+ UnexpectedEof,
+ Utf8(str::Utf8Error),
+ Io(io::Error)
+}
+
+impl From<str::Utf8Error> for CharReadError {
+ fn from(e: str::Utf8Error) -> CharReadError {
+ CharReadError::Utf8(e)
+ }
+}
+
+impl From<io::Error> for CharReadError {
+ fn from(e: io::Error) -> CharReadError {
+ CharReadError::Io(e)
+ }
+}
+
+impl fmt::Display for CharReadError {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ use self::CharReadError::*;
+ match *self {
+ UnexpectedEof => write!(f, "unexpected end of stream"),
+ Utf8(ref e) => write!(f, "UTF-8 decoding error: {}", e),
+ Io(ref e) => write!(f, "I/O error: {}", e)
+ }
+ }
+}
+
+pub fn next_char_from<R: Read>(source: &mut R) -> Result<Option<char>, CharReadError> {
+ const MAX_CODEPOINT_LEN: usize = 4;
+
+ let mut bytes = source.bytes();
+ let mut buf = [0u8; MAX_CODEPOINT_LEN];
+ let mut pos = 0;
+
+ loop {
+ let next = match bytes.next() {
+ Some(Ok(b)) => b,
+ Some(Err(e)) => return Err(e.into()),
+ None if pos == 0 => return Ok(None),
+ None => return Err(CharReadError::UnexpectedEof)
+ };
+ buf[pos] = next;
+ pos += 1;
+
+ match str::from_utf8(&buf[..pos]) {
+ Ok(s) => return Ok(s.chars().next()), // always Some(..)
+ Err(_) if pos < MAX_CODEPOINT_LEN => {},
+ Err(e) => return Err(e.into())
+ }
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ #[test]
+ fn test_next_char_from() {
+ use std::io;
+ use std::error::Error;
+
+ let mut bytes: &[u8] = "correct".as_bytes(); // correct ASCII
+ assert_eq!(super::next_char_from(&mut bytes).unwrap(), Some('c'));
+
+ let mut bytes: &[u8] = "правильно".as_bytes(); // correct BMP
+ assert_eq!(super::next_char_from(&mut bytes).unwrap(), Some('п'));
+
+ let mut bytes: &[u8] = "😊".as_bytes(); // correct non-BMP
+ assert_eq!(super::next_char_from(&mut bytes).unwrap(), Some('😊'));
+
+ let mut bytes: &[u8] = b""; // empty
+ assert_eq!(super::next_char_from(&mut bytes).unwrap(), None);
+
+ let mut bytes: &[u8] = b"\xf0\x9f\x98"; // incomplete code point
+ match super::next_char_from(&mut bytes).unwrap_err() {
+ super::CharReadError::UnexpectedEof => {},
+ e => panic!("Unexpected result: {:?}", e)
+ };
+
+ let mut bytes: &[u8] = b"\xff\x9f\x98\x32"; // invalid code point
+ match super::next_char_from(&mut bytes).unwrap_err() {
+ super::CharReadError::Utf8(_) => {},
+ e => panic!("Unexpected result: {:?}", e)
+ };
+
+
+ // error during read
+ struct ErrorReader;
+ impl io::Read for ErrorReader {
+ fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
+ Err(io::Error::new(io::ErrorKind::Other, "test error"))
+ }
+ }
+
+ let mut r = ErrorReader;
+ match super::next_char_from(&mut r).unwrap_err() {
+ super::CharReadError::Io(ref e) if e.kind() == io::ErrorKind::Other &&
+ e.description() == "test error" => {},
+ e => panic!("Unexpected result: {:?}", e)
+ }
+ }
+}
diff --git a/third_party/rust/xml-rs/src/writer/config.rs b/third_party/rust/xml-rs/src/writer/config.rs
new file mode 100644
index 0000000000..ebabf181f0
--- /dev/null
+++ b/third_party/rust/xml-rs/src/writer/config.rs
@@ -0,0 +1,157 @@
+//! Contains emitter configuration structure.
+
+use std::io::Write;
+use std::borrow::Cow;
+
+use writer::EventWriter;
+
+/// Emitter configuration structure.
+///
+/// This structure contains various options which control XML document emitter behavior.
+#[derive(Clone, PartialEq, Eq, Debug)]
+pub struct EmitterConfig {
+ /// Line separator used to separate lines in formatted output. Default is `"\n"`.
+ pub line_separator: Cow<'static, str>,
+
+ /// A string which will be used for a single level of indentation. Default is `" "`
+ /// (two spaces).
+ pub indent_string: Cow<'static, str>,
+
+ /// Whether or not the emitted document should be indented. Default is false.
+ ///
+ /// The emitter is capable to perform automatic indentation of the emitted XML document.
+ /// It is done in stream-like fashion and does not require the knowledge of the whole
+ /// document in advance.
+ ///
+ /// Sometimes, however, automatic indentation is undesirable, e.g. when you want to keep
+ /// existing layout when processing an existing XML document. Also the indentiation algorithm
+ /// is not thoroughly tested. Hence by default it is disabled.
+ pub perform_indent: bool,
+
+ /// Whether or not characters in output events will be escaped. Default is true.
+ ///
+ /// The emitter can automatically escape characters which can't appear in PCDATA sections
+ /// or element attributes of an XML document, like `<` or `"` (in attributes). This may
+ /// introduce some overhead because then every corresponding piece of character data
+ /// should be scanned for invalid characters.
+ ///
+ /// If this option is disabled, the XML writer may produce non-well-formed documents, so
+ /// use `false` value for this option with care.
+ pub perform_escaping: bool,
+
+ /// Whether or not to write XML document declaration at the beginning of a document.
+ /// Default is true.
+ ///
+ /// This option controls whether the document declaration should be emitted automatically
+ /// before a root element is written if it was not emitted explicitly by the user.
+ pub write_document_declaration: bool,
+
+ /// Whether or not to convert elements with empty content to empty elements. Default is true.
+ ///
+ /// This option allows turning elements like `<a></a>` (an element with empty content)
+ /// into `<a />` (an empty element).
+ pub normalize_empty_elements: bool,
+
+ /// Whether or not to emit CDATA events as plain characters. Default is false.
+ ///
+ /// This option forces the emitter to convert CDATA events into regular character events,
+ /// performing all the necessary escaping beforehand. This may be occasionally useful
+ /// for feeding the document into incorrect parsers which do not support CDATA.
+ pub cdata_to_characters: bool,
+
+ /// Whether or not to keep element names to support `EndElement` events without explicit names.
+ /// Default is true.
+ ///
+ /// This option makes the emitter to keep names of written elements in order to allow
+ /// omitting names when writing closing element tags. This could incur some memory overhead.
+ pub keep_element_names_stack: bool,
+
+ /// Whether or not to automatically insert leading and trailing spaces in emitted comments,
+ /// if necessary. Default is true.
+ ///
+ /// This is a convenience option in order for the user not to append spaces before and after
+ /// comments text in order to get more pretty comments: `<!-- something -->` instead of
+ /// `<!--something-->`.
+ pub autopad_comments: bool,
+
+ /// Whether or not to automatically insert spaces before the trailing `/>` in self-closing
+ /// elements. Default is true.
+ ///
+ /// This option is only meaningful if `normalize_empty_elements` is true. For example, the
+ /// element `<a></a>` would be unaffected. When `normalize_empty_elements` is true, then when
+ /// this option is also true, the same element would appear `<a />`. If this option is false,
+ /// then the same element would appear `<a/>`.
+ pub pad_self_closing: bool,
+}
+
+impl EmitterConfig {
+ /// Creates an emitter configuration with default values.
+ ///
+ /// You can tweak default options with builder-like pattern:
+ ///
+ /// ```rust
+ /// use xml::writer::EmitterConfig;
+ ///
+ /// let config = EmitterConfig::new()
+ /// .line_separator("\r\n")
+ /// .perform_indent(true)
+ /// .normalize_empty_elements(false);
+ /// ```
+ #[inline]
+ pub fn new() -> EmitterConfig {
+ EmitterConfig {
+ line_separator: "\n".into(),
+ indent_string: " ".into(), // two spaces
+ perform_indent: false,
+ perform_escaping: true,
+ write_document_declaration: true,
+ normalize_empty_elements: true,
+ cdata_to_characters: false,
+ keep_element_names_stack: true,
+ autopad_comments: true,
+ pad_self_closing: true
+ }
+ }
+
+ /// Creates an XML writer with this configuration.
+ ///
+ /// This is a convenience method for configuring and creating a writer at the same time:
+ ///
+ /// ```rust
+ /// use xml::writer::EmitterConfig;
+ ///
+ /// let mut target: Vec<u8> = Vec::new();
+ ///
+ /// let writer = EmitterConfig::new()
+ /// .line_separator("\r\n")
+ /// .perform_indent(true)
+ /// .normalize_empty_elements(false)
+ /// .create_writer(&mut target);
+ /// ```
+ ///
+ /// This method is exactly equivalent to calling `EventWriter::new_with_config()` with
+ /// this configuration object.
+ #[inline]
+ pub fn create_writer<W: Write>(self, sink: W) -> EventWriter<W> {
+ EventWriter::new_with_config(sink, self)
+ }
+}
+
+impl Default for EmitterConfig {
+ #[inline]
+ fn default() -> EmitterConfig {
+ EmitterConfig::new()
+ }
+}
+
+gen_setters!(EmitterConfig,
+ line_separator: into Cow<'static, str>,
+ indent_string: into Cow<'static, str>,
+ perform_indent: val bool,
+ write_document_declaration: val bool,
+ normalize_empty_elements: val bool,
+ cdata_to_characters: val bool,
+ keep_element_names_stack: val bool,
+ autopad_comments: val bool,
+ pad_self_closing: val bool
+);
diff --git a/third_party/rust/xml-rs/src/writer/emitter.rs b/third_party/rust/xml-rs/src/writer/emitter.rs
new file mode 100644
index 0000000000..ba80f66781
--- /dev/null
+++ b/third_party/rust/xml-rs/src/writer/emitter.rs
@@ -0,0 +1,447 @@
+use std::io;
+use std::io::prelude::*;
+use std::fmt;
+use std::result;
+use std::borrow::Cow;
+use std::error::Error;
+
+use common;
+use name::{Name, OwnedName};
+use attribute::Attribute;
+use escape::{escape_str_attribute, escape_str_pcdata};
+use common::XmlVersion;
+use namespace::{NamespaceStack, NS_NO_PREFIX, NS_EMPTY_URI, NS_XMLNS_PREFIX, NS_XML_PREFIX};
+
+use writer::config::EmitterConfig;
+
+/// An error which may be returned by `XmlWriter` when writing XML events.
+#[derive(Debug)]
+pub enum EmitterError {
+ /// An I/O error occured in the underlying `Write` instance.
+ Io(io::Error),
+
+ /// Document declaration has already been written to the output stream.
+ DocumentStartAlreadyEmitted,
+
+ /// The name of the last opening element is not available.
+ LastElementNameNotAvailable,
+
+ /// The name of the last opening element is not equal to the name of the provided
+ /// closing element.
+ EndElementNameIsNotEqualToLastStartElementName,
+
+ /// End element name is not specified when it is needed, for example, when automatic
+ /// closing is not enabled in configuration.
+ EndElementNameIsNotSpecified
+}
+
+impl From<io::Error> for EmitterError {
+ fn from(err: io::Error) -> EmitterError {
+ EmitterError::Io(err)
+ }
+}
+
+impl fmt::Display for EmitterError {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+
+ write!(f, "emitter error: ")?;
+ match *self {
+ EmitterError::Io(ref e) =>
+ write!(f, "I/O error: {}", e),
+ ref other =>
+ write!(f, "{}", other.description()),
+ }
+ }
+}
+
+impl Error for EmitterError {
+ fn description(&self) -> &str {
+ match *self {
+ EmitterError::Io(_) =>
+ "I/O error",
+ EmitterError::DocumentStartAlreadyEmitted =>
+ "document start event has already been emitted",
+ EmitterError::LastElementNameNotAvailable =>
+ "last element name is not available",
+ EmitterError::EndElementNameIsNotEqualToLastStartElementName =>
+ "end element name is not equal to last start element name",
+ EmitterError::EndElementNameIsNotSpecified =>
+ "end element name is not specified and can't be inferred",
+ }
+ }
+}
+
+/// A result type yielded by `XmlWriter`.
+pub type Result<T> = result::Result<T, EmitterError>;
+
+// TODO: split into a low-level fast writer without any checks and formatting logic and a
+// high-level indenting validating writer
+pub struct Emitter {
+ config: EmitterConfig,
+
+ nst: NamespaceStack,
+
+ indent_level: usize,
+ indent_stack: Vec<IndentFlags>,
+
+ element_names: Vec<OwnedName>,
+
+ start_document_emitted: bool,
+ just_wrote_start_element: bool
+}
+
+impl Emitter {
+ pub fn new(config: EmitterConfig) -> Emitter {
+ Emitter {
+ config,
+
+ nst: NamespaceStack::empty(),
+
+ indent_level: 0,
+ indent_stack: vec![IndentFlags::WroteNothing],
+
+ element_names: Vec::new(),
+
+ start_document_emitted: false,
+ just_wrote_start_element: false
+ }
+ }
+}
+
+#[derive(Copy, Clone, Eq, PartialEq, Debug)]
+enum IndentFlags {
+ WroteNothing,
+ WroteMarkup,
+ WroteText,
+}
+
+impl Emitter {
+ /// Returns the current state of namespaces.
+ #[inline]
+ pub fn namespace_stack_mut(&mut self) -> &mut NamespaceStack {
+ &mut self.nst
+ }
+
+ #[inline]
+ fn wrote_text(&self) -> bool {
+ *self.indent_stack.last().unwrap() == IndentFlags::WroteText
+ }
+
+ #[inline]
+ fn wrote_markup(&self) -> bool {
+ *self.indent_stack.last().unwrap() == IndentFlags::WroteMarkup
+ }
+
+ #[inline]
+ fn set_wrote_text(&mut self) {
+ *self.indent_stack.last_mut().unwrap() = IndentFlags::WroteText;
+ }
+
+ #[inline]
+ fn set_wrote_markup(&mut self) {
+ *self.indent_stack.last_mut().unwrap() = IndentFlags::WroteMarkup;
+ }
+
+ #[inline]
+ fn reset_state(&mut self) {
+ *self.indent_stack.last_mut().unwrap() = IndentFlags::WroteNothing;
+ }
+
+ fn write_newline<W: Write>(&mut self, target: &mut W, level: usize) -> Result<()> {
+ target.write_all(self.config.line_separator.as_bytes())?;
+ for _ in 0..level {
+ target.write_all(self.config.indent_string.as_bytes())?;
+ }
+ Ok(())
+ }
+
+ fn before_markup<W: Write>(&mut self, target: &mut W) -> Result<()> {
+ if self.config.perform_indent && !self.wrote_text() &&
+ (self.indent_level > 0 || self.wrote_markup()) {
+ let indent_level = self.indent_level;
+ self.write_newline(target, indent_level)?;
+ if self.indent_level > 0 && self.config.indent_string.len() > 0 {
+ self.after_markup();
+ }
+ }
+ Ok(())
+ }
+
+ fn after_markup(&mut self) {
+ self.set_wrote_markup();
+ }
+
+ fn before_start_element<W: Write>(&mut self, target: &mut W) -> Result<()> {
+ self.before_markup(target)?;
+ self.indent_stack.push(IndentFlags::WroteNothing);
+ Ok(())
+ }
+
+ fn after_start_element(&mut self) {
+ self.after_markup();
+ self.indent_level += 1;
+ }
+
+ fn before_end_element<W: Write>(&mut self, target: &mut W) -> Result<()> {
+ if self.config.perform_indent && self.indent_level > 0 && self.wrote_markup() &&
+ !self.wrote_text() {
+ let indent_level = self.indent_level;
+ self.write_newline(target, indent_level - 1)
+ } else {
+ Ok(())
+ }
+ }
+
+ fn after_end_element(&mut self) {
+ if self.indent_level > 0 {
+ self.indent_level -= 1;
+ self.indent_stack.pop();
+ }
+ self.set_wrote_markup();
+ }
+
+ fn after_text(&mut self) {
+ self.set_wrote_text();
+ }
+
+ pub fn emit_start_document<W: Write>(&mut self, target: &mut W,
+ version: XmlVersion,
+ encoding: &str,
+ standalone: Option<bool>) -> Result<()> {
+ if self.start_document_emitted {
+ return Err(EmitterError::DocumentStartAlreadyEmitted);
+ }
+ self.start_document_emitted = true;
+
+ self.before_markup(target)?;
+ let result = {
+ let mut write = move || {
+ write!(target, "<?xml version=\"{}\" encoding=\"{}\"", version, encoding)?;
+
+ if let Some(standalone) = standalone {
+ write!(target, " standalone=\"{}\"", if standalone { "yes" } else { "no" })?;
+ }
+
+ write!(target, "?>")?;
+
+ Ok(())
+ };
+ write()
+ };
+ self.after_markup();
+
+ result
+ }
+
+ fn check_document_started<W: Write>(&mut self, target: &mut W) -> Result<()> {
+ if !self.start_document_emitted && self.config.write_document_declaration {
+ self.emit_start_document(target, common::XmlVersion::Version10, "utf-8", None)
+ } else {
+ Ok(())
+ }
+ }
+
+ fn fix_non_empty_element<W: Write>(&mut self, target: &mut W) -> Result<()> {
+ if self.config.normalize_empty_elements && self.just_wrote_start_element {
+ self.just_wrote_start_element = false;
+ target.write_all(b">").map_err(From::from)
+ } else {
+ Ok(())
+ }
+ }
+
+ pub fn emit_processing_instruction<W: Write>(&mut self,
+ target: &mut W,
+ name: &str,
+ data: Option<&str>) -> Result<()> {
+ self.check_document_started(target)?;
+ self.fix_non_empty_element(target)?;
+
+ self.before_markup(target)?;
+
+ let result = {
+ let mut write = || {
+ write!(target, "<?{}", name)?;
+
+ if let Some(data) = data {
+ write!(target, " {}", data)?;
+ }
+
+ write!(target, "?>")?;
+
+ Ok(())
+ };
+ write()
+ };
+
+ self.after_markup();
+
+ result
+ }
+
+ fn emit_start_element_initial<W>(&mut self, target: &mut W,
+ name: Name,
+ attributes: &[Attribute]) -> Result<()>
+ where W: Write
+ {
+ self.check_document_started(target)?;
+ self.fix_non_empty_element(target)?;
+ self.before_start_element(target)?;
+ write!(target, "<{}", name.repr_display())?;
+ self.emit_current_namespace_attributes(target)?;
+ self.emit_attributes(target, attributes)?;
+ self.after_start_element();
+ Ok(())
+ }
+
+ pub fn emit_start_element<W>(&mut self, target: &mut W,
+ name: Name,
+ attributes: &[Attribute]) -> Result<()>
+ where W: Write
+ {
+ if self.config.keep_element_names_stack {
+ self.element_names.push(name.to_owned());
+ }
+
+ self.emit_start_element_initial(target, name, attributes)?;
+ self.just_wrote_start_element = true;
+
+ if !self.config.normalize_empty_elements {
+ write!(target, ">")?;
+ }
+
+ Ok(())
+ }
+
+ pub fn emit_current_namespace_attributes<W>(&mut self, target: &mut W) -> Result<()>
+ where W: Write
+ {
+ for (prefix, uri) in self.nst.peek() {
+ match prefix {
+ // internal namespaces are not emitted
+ NS_XMLNS_PREFIX | NS_XML_PREFIX => Ok(()),
+ //// there is already a namespace binding with this prefix in scope
+ //prefix if self.nst.get(prefix) == Some(uri) => Ok(()),
+ // emit xmlns only if it is overridden
+ NS_NO_PREFIX => if uri != NS_EMPTY_URI {
+ write!(target, " xmlns=\"{}\"", uri)
+ } else { Ok(()) },
+ // everything else
+ prefix => write!(target, " xmlns:{}=\"{}\"", prefix, uri)
+ }?;
+ }
+ Ok(())
+ }
+
+ pub fn emit_attributes<W: Write>(&mut self, target: &mut W,
+ attributes: &[Attribute]) -> Result<()> {
+ for attr in attributes.iter() {
+ write!(
+ target, " {}=\"{}\"",
+ attr.name.repr_display(),
+ if self.config.perform_escaping { escape_str_attribute(attr.value) } else { Cow::Borrowed(attr.value) }
+ )?
+ }
+ Ok(())
+ }
+
+ pub fn emit_end_element<W: Write>(&mut self, target: &mut W,
+ name: Option<Name>) -> Result<()> {
+ let owned_name = if self.config.keep_element_names_stack {
+ Some(self.element_names.pop().ok_or(EmitterError::LastElementNameNotAvailable)?)
+ } else {
+ None
+ };
+
+ // Check that last started element name equals to the provided name, if there are both
+ if let Some(ref last_name) = owned_name {
+ if let Some(ref name) = name {
+ if last_name.borrow() != *name {
+ return Err(EmitterError::EndElementNameIsNotEqualToLastStartElementName);
+ }
+ }
+ }
+
+ if let Some(name) = owned_name.as_ref().map(|n| n.borrow()).or(name) {
+ if self.config.normalize_empty_elements && self.just_wrote_start_element {
+ self.just_wrote_start_element = false;
+ let termination = if self.config.pad_self_closing { " />" } else { "/>" };
+ let result = target.write_all(termination.as_bytes()).map_err(From::from);
+ self.after_end_element();
+ result
+ } else {
+ self.just_wrote_start_element = false;
+
+ self.before_end_element(target)?;
+ let result = write!(target, "</{}>", name.repr_display()).map_err(From::from);
+ self.after_end_element();
+
+ result
+ }
+ } else {
+ Err(EmitterError::EndElementNameIsNotSpecified)
+ }
+ }
+
+ pub fn emit_cdata<W: Write>(&mut self, target: &mut W, content: &str) -> Result<()> {
+ self.fix_non_empty_element(target)?;
+ if self.config.cdata_to_characters {
+ self.emit_characters(target, content)
+ } else {
+ // TODO: escape ']]>' characters in CDATA as two adjacent CDATA blocks
+ target.write_all(b"<![CDATA[")?;
+ target.write_all(content.as_bytes())?;
+ target.write_all(b"]]>")?;
+
+ self.after_text();
+
+ Ok(())
+ }
+ }
+
+ pub fn emit_characters<W: Write>(&mut self, target: &mut W,
+ content: &str) -> Result<()> {
+ self.check_document_started(target)?;
+ self.fix_non_empty_element(target)?;
+ target.write_all(
+ (if self.config.perform_escaping {
+ escape_str_pcdata(content)
+ } else {
+ Cow::Borrowed(content)
+ }).as_bytes()
+ )?;
+ self.after_text();
+ Ok(())
+ }
+
+ pub fn emit_comment<W: Write>(&mut self, target: &mut W, content: &str) -> Result<()> {
+ self.fix_non_empty_element(target)?;
+
+ // TODO: add escaping dashes at the end of the comment
+
+ let autopad_comments = self.config.autopad_comments;
+ let write = |target: &mut W| -> Result<()> {
+ target.write_all(b"<!--")?;
+
+ if autopad_comments && !content.starts_with(char::is_whitespace) {
+ target.write_all(b" ")?;
+ }
+
+ target.write_all(content.as_bytes())?;
+
+ if autopad_comments && !content.ends_with(char::is_whitespace) {
+ target.write_all(b" ")?;
+ }
+
+ target.write_all(b"-->")?;
+
+ Ok(())
+ };
+
+ self.before_markup(target)?;
+ let result = write(target);
+ self.after_markup();
+
+ result
+ }
+}
diff --git a/third_party/rust/xml-rs/src/writer/events.rs b/third_party/rust/xml-rs/src/writer/events.rs
new file mode 100644
index 0000000000..1f7040f66a
--- /dev/null
+++ b/third_party/rust/xml-rs/src/writer/events.rs
@@ -0,0 +1,241 @@
+//! Contains `XmlEvent` datatype, instances of which are consumed by the writer.
+
+use std::borrow::Cow;
+
+use name::Name;
+use attribute::Attribute;
+use common::XmlVersion;
+use namespace::{Namespace, NS_NO_PREFIX};
+
+/// A part of an XML output stream.
+///
+/// Objects of this enum are consumed by `EventWriter`. They correspond to different parts of
+/// an XML document.
+#[derive(Debug)]
+pub enum XmlEvent<'a> {
+ /// Corresponds to XML document declaration.
+ ///
+ /// This event should always be written before any other event. If it is not written
+ /// at all, a default XML declaration will be outputted if the corresponding option
+ /// is set in the configuration. Otherwise an error will be returned.
+ StartDocument {
+ /// XML version.
+ ///
+ /// Defaults to `XmlVersion::Version10`.
+ version: XmlVersion,
+
+ /// XML document encoding.
+ ///
+ /// Defaults to `Some("UTF-8")`.
+ encoding: Option<&'a str>,
+
+ /// XML standalone declaration.
+ ///
+ /// Defaults to `None`.
+ standalone: Option<bool>
+ },
+
+ /// Denotes an XML processing instruction.
+ ProcessingInstruction {
+ /// Processing instruction target.
+ name: &'a str,
+
+ /// Processing instruction content.
+ data: Option<&'a str>
+ },
+
+ /// Denotes a beginning of an XML element.
+ StartElement {
+ /// Qualified name of the element.
+ name: Name<'a>,
+
+ /// A list of attributes associated with the element.
+ ///
+ /// Currently attributes are not checked for duplicates (TODO). Attribute values
+ /// will be escaped, and all characters invalid for attribute values like `"` or `<`
+ /// will be changed into character entities.
+ attributes: Cow<'a, [Attribute<'a>]>,
+
+ /// Contents of the namespace mapping at this point of the document.
+ ///
+ /// This mapping will be inspected for "new" entries, and if at this point of the document
+ /// a particular pair of prefix and namespace URI is already defined, no namespace
+ /// attributes will be emitted.
+ namespace: Cow<'a, Namespace>,
+ },
+
+ /// Denotes an end of an XML element.
+ EndElement {
+ /// Optional qualified name of the element.
+ ///
+ /// If `None`, then it is assumed that the element name should be the last valid one.
+ /// If `Some` and element names tracking is enabled, then the writer will check it for
+ /// correctness.
+ name: Option<Name<'a>>
+ },
+
+ /// Denotes CDATA content.
+ ///
+ /// This event contains unparsed data, and no escaping will be performed when writing it
+ /// to the output stream.
+ CData(&'a str),
+
+ /// Denotes a comment.
+ ///
+ /// The string will be checked for invalid sequences and error will be returned by the
+ /// write operation
+ Comment(&'a str),
+
+ /// Denotes character data outside of tags.
+ ///
+ /// Contents of this event will be escaped if `perform_escaping` option is enabled,
+ /// that is, every character invalid for PCDATA will appear as a character entity.
+ Characters(&'a str)
+}
+
+impl<'a> XmlEvent<'a> {
+ /// Returns an writer event for a processing instruction.
+ #[inline]
+ pub fn processing_instruction(name: &'a str, data: Option<&'a str>) -> XmlEvent<'a> {
+ XmlEvent::ProcessingInstruction { name: name, data: data }
+ }
+
+ /// Returns a builder for a starting element.
+ ///
+ /// This builder can then be used to tweak attributes and namespace starting at
+ /// this element.
+ #[inline]
+ pub fn start_element<S>(name: S) -> StartElementBuilder<'a> where S: Into<Name<'a>> {
+ StartElementBuilder {
+ name: name.into(),
+ attributes: Vec::new(),
+ namespace: Namespace::empty().into()
+ }
+ }
+
+ /// Returns a builder for an closing element.
+ ///
+ /// This method, unline `start_element()`, does not accept a name because by default
+ /// the writer is able to determine it automatically. However, when this functionality
+ /// is disabled, it is possible to specify the name with `name()` method on the builder.
+ #[inline]
+ pub fn end_element() -> EndElementBuilder<'a> {
+ EndElementBuilder { name: None }
+ }
+
+ /// Returns a CDATA event.
+ ///
+ /// Naturally, the provided string won't be escaped, except for closing CDATA token `]]>`
+ /// (depending on the configuration).
+ #[inline]
+ pub fn cdata(data: &'a str) -> XmlEvent<'a> { XmlEvent::CData(data) }
+
+ /// Returns a regular characters (PCDATA) event.
+ ///
+ /// All offending symbols, in particular, `&` and `<`, will be escaped by the writer.
+ #[inline]
+ pub fn characters(data: &'a str) -> XmlEvent<'a> { XmlEvent::Characters(data) }
+
+ /// Returns a comment event.
+ #[inline]
+ pub fn comment(data: &'a str) -> XmlEvent<'a> { XmlEvent::Comment(data) }
+}
+
+impl<'a> From<&'a str> for XmlEvent<'a> {
+ #[inline]
+ fn from(s: &'a str) -> XmlEvent<'a> { XmlEvent::Characters(s) }
+}
+
+pub struct EndElementBuilder<'a> {
+ name: Option<Name<'a>>
+}
+
+/// A builder for a closing element event.
+impl<'a> EndElementBuilder<'a> {
+ /// Sets the name of this closing element.
+ ///
+ /// Usually the writer is able to determine closing element names automatically. If
+ /// this functionality is enabled (by default it is), then this name is checked for correctness.
+ /// It is possible, however, to disable such behavior; then the user must ensure that
+ /// closing element name is correct manually.
+ #[inline]
+ pub fn name<N>(mut self, name: N) -> EndElementBuilder<'a> where N: Into<Name<'a>> {
+ self.name = Some(name.into());
+ self
+ }
+}
+
+impl<'a> From<EndElementBuilder<'a>> for XmlEvent<'a> {
+ fn from(b: EndElementBuilder<'a>) -> XmlEvent<'a> {
+ XmlEvent::EndElement { name: b.name }
+ }
+}
+
+/// A builder for a starting element event.
+pub struct StartElementBuilder<'a> {
+ name: Name<'a>,
+ attributes: Vec<Attribute<'a>>,
+ namespace: Namespace
+}
+
+impl<'a> StartElementBuilder<'a> {
+ /// Sets an attribute value of this element to the given string.
+ ///
+ /// This method can be used to add attributes to the starting element. Name is a qualified
+ /// name; its namespace is ignored, but its prefix is checked for correctness, that is,
+ /// it is checked that the prefix is bound to some namespace in the current context.
+ ///
+ /// Currently attributes are not checked for duplicates. Note that duplicate attributes
+ /// are a violation of XML document well-formedness.
+ ///
+ /// The writer checks that you don't specify reserved prefix names, for example `xmlns`.
+ #[inline]
+ pub fn attr<N>(mut self, name: N, value: &'a str) -> StartElementBuilder<'a>
+ where N: Into<Name<'a>>
+ {
+ self.attributes.push(Attribute::new(name.into(), value));
+ self
+ }
+
+ /// Adds a namespace to the current namespace context.
+ ///
+ /// If no namespace URI was bound to the provided prefix at this point of the document,
+ /// then the mapping from the prefix to the provided namespace URI will be written as
+ /// a part of this element attribute set.
+ ///
+ /// If the same namespace URI was bound to the provided prefix at this point of the document,
+ /// then no namespace attributes will be emitted.
+ ///
+ /// If some other namespace URI was bound to the provided prefix at this point of the document,
+ /// then another binding will be added as a part of this element attribute set, shadowing
+ /// the outer binding.
+ #[inline]
+ pub fn ns<S1, S2>(mut self, prefix: S1, uri: S2) -> StartElementBuilder<'a>
+ where S1: Into<String>, S2: Into<String>
+ {
+ self.namespace.put(prefix, uri);
+ self
+ }
+
+ /// Adds a default namespace mapping to the current namespace context.
+ ///
+ /// Same rules as for `ns()` are also valid for the default namespace mapping.
+ #[inline]
+ pub fn default_ns<S>(mut self, uri: S) -> StartElementBuilder<'a>
+ where S: Into<String>
+ {
+ self.namespace.put(NS_NO_PREFIX, uri);
+ self
+ }
+}
+
+impl<'a> From<StartElementBuilder<'a>> for XmlEvent<'a> {
+ #[inline]
+ fn from(b: StartElementBuilder<'a>) -> XmlEvent<'a> {
+ XmlEvent::StartElement {
+ name: b.name,
+ attributes: Cow::Owned(b.attributes),
+ namespace: Cow::Owned(b.namespace)
+ }
+ }
+}
diff --git a/third_party/rust/xml-rs/src/writer/mod.rs b/third_party/rust/xml-rs/src/writer/mod.rs
new file mode 100644
index 0000000000..ea1b24266f
--- /dev/null
+++ b/third_party/rust/xml-rs/src/writer/mod.rs
@@ -0,0 +1,93 @@
+//! Contains high-level interface for an events-based XML emitter.
+//!
+//! The most important type in this module is `EventWriter` which allows writing an XML document
+//! to some output stream.
+
+pub use self::emitter::Result;
+pub use self::emitter::EmitterError as Error;
+pub use self::config::EmitterConfig;
+pub use self::events::XmlEvent;
+
+use self::emitter::Emitter;
+
+use std::io::prelude::*;
+
+mod emitter;
+mod config;
+pub mod events;
+
+/// A wrapper around an `std::io::Write` instance which emits XML document according to provided
+/// events.
+pub struct EventWriter<W> {
+ sink: W,
+ emitter: Emitter
+}
+
+impl<W: Write> EventWriter<W> {
+ /// Creates a new `EventWriter` out of an `std::io::Write` instance using the default
+ /// configuration.
+ #[inline]
+ pub fn new(sink: W) -> EventWriter<W> {
+ EventWriter::new_with_config(sink, EmitterConfig::new())
+ }
+
+ /// Creates a new `EventWriter` out of an `std::io::Write` instance using the provided
+ /// configuration.
+ #[inline]
+ pub fn new_with_config(sink: W, config: EmitterConfig) -> EventWriter<W> {
+ EventWriter {
+ sink,
+ emitter: Emitter::new(config)
+ }
+ }
+
+ /// Writes the next piece of XML document according to the provided event.
+ ///
+ /// Note that output data may not exactly correspond to the written event because
+ /// of various configuration options. For example, `XmlEvent::EndElement` may
+ /// correspond to a separate closing element or it may cause writing an empty element.
+ /// Another example is that `XmlEvent::CData` may be represented as characters in
+ /// the output stream.
+ pub fn write<'a, E>(&mut self, event: E) -> Result<()> where E: Into<XmlEvent<'a>> {
+ match event.into() {
+ XmlEvent::StartDocument { version, encoding, standalone } =>
+ self.emitter.emit_start_document(&mut self.sink, version, encoding.unwrap_or("UTF-8"), standalone),
+ XmlEvent::ProcessingInstruction { name, data } =>
+ self.emitter.emit_processing_instruction(&mut self.sink, name, data),
+ XmlEvent::StartElement { name, attributes, namespace } => {
+ self.emitter.namespace_stack_mut().push_empty().checked_target().extend(namespace.as_ref());
+ self.emitter.emit_start_element(&mut self.sink, name, &attributes)
+ }
+ XmlEvent::EndElement { name } => {
+ let r = self.emitter.emit_end_element(&mut self.sink, name);
+ self.emitter.namespace_stack_mut().try_pop();
+ r
+ }
+ XmlEvent::Comment(content) =>
+ self.emitter.emit_comment(&mut self.sink, content),
+ XmlEvent::CData(content) =>
+ self.emitter.emit_cdata(&mut self.sink, content),
+ XmlEvent::Characters(content) =>
+ self.emitter.emit_characters(&mut self.sink, content)
+ }
+ }
+
+ /// Returns a mutable reference to the underlying `Writer`.
+ ///
+ /// Note that having a reference to the underlying sink makes it very easy to emit invalid XML
+ /// documents. Use this method with care. Valid use cases for this method include accessing
+ /// methods like `Write::flush`, which do not emit new data but rather change the state
+ /// of the stream itself.
+ pub fn inner_mut(&mut self) -> &mut W {
+ &mut self.sink
+ }
+
+ /// Unwraps this `EventWriter`, returning the underlying writer.
+ ///
+ /// Note that this is a destructive operation: unwrapping a writer and then wrapping
+ /// it again with `EventWriter::new()` will create a fresh writer whose state will be
+ /// blank; for example, accumulated namespaces will be reset.
+ pub fn into_inner(self) -> W {
+ self.sink
+ }
+}