summaryrefslogtreecommitdiffstats
path: root/third_party/rust/xml-rs/src/reader/parser
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/rust/xml-rs/src/reader/parser')
-rw-r--r--third_party/rust/xml-rs/src/reader/parser/inside_cdata.rs32
-rw-r--r--third_party/rust/xml-rs/src/reader/parser/inside_closing_tag_name.rs34
-rw-r--r--third_party/rust/xml-rs/src/reader/parser/inside_comment.rs32
-rw-r--r--third_party/rust/xml-rs/src/reader/parser/inside_declaration.rs151
-rw-r--r--third_party/rust/xml-rs/src/reader/parser/inside_doctype.rs16
-rw-r--r--third_party/rust/xml-rs/src/reader/parser/inside_opening_tag.rs108
-rw-r--r--third_party/rust/xml-rs/src/reader/parser/inside_processing_instruction.rs96
-rw-r--r--third_party/rust/xml-rs/src/reader/parser/inside_reference.rs89
-rw-r--r--third_party/rust/xml-rs/src/reader/parser/mod.rs622
-rw-r--r--third_party/rust/xml-rs/src/reader/parser/outside_tag.rs130
10 files changed, 1310 insertions, 0 deletions
diff --git a/third_party/rust/xml-rs/src/reader/parser/inside_cdata.rs b/third_party/rust/xml-rs/src/reader/parser/inside_cdata.rs
new file mode 100644
index 0000000000..3269fb4d6b
--- /dev/null
+++ b/third_party/rust/xml-rs/src/reader/parser/inside_cdata.rs
@@ -0,0 +1,32 @@
+use reader::events::XmlEvent;
+use reader::lexer::Token;
+
+use super::{Result, PullParser, State};
+
+impl PullParser {
+ pub fn inside_cdata(&mut self, t: Token) -> Option<Result> {
+ match t {
+ Token::CDataEnd => {
+ self.lexer.enable_errors();
+ let event = if self.config.cdata_to_characters {
+ None
+ } else {
+ let data = self.take_buf();
+ Some(Ok(XmlEvent::CData(data)))
+ };
+ self.into_state(State::OutsideTag, event)
+ }
+
+ Token::Whitespace(_) => {
+ t.push_to_string(&mut self.buf);
+ None
+ }
+
+ _ => {
+ self.inside_whitespace = false;
+ t.push_to_string(&mut self.buf);
+ None
+ }
+ }
+ }
+}
diff --git a/third_party/rust/xml-rs/src/reader/parser/inside_closing_tag_name.rs b/third_party/rust/xml-rs/src/reader/parser/inside_closing_tag_name.rs
new file mode 100644
index 0000000000..1d8074a5a3
--- /dev/null
+++ b/third_party/rust/xml-rs/src/reader/parser/inside_closing_tag_name.rs
@@ -0,0 +1,34 @@
+use namespace;
+
+use reader::lexer::Token;
+
+use super::{Result, PullParser, State, QualifiedNameTarget, ClosingTagSubstate};
+
+impl PullParser {
+ pub fn inside_closing_tag_name(&mut self, t: Token, s: ClosingTagSubstate) -> Option<Result> {
+ match s {
+ ClosingTagSubstate::CTInsideName => self.read_qualified_name(t, QualifiedNameTarget::ClosingTagNameTarget, |this, token, name| {
+ match name.prefix_ref() {
+ Some(prefix) if prefix == namespace::NS_XML_PREFIX ||
+ prefix == namespace::NS_XMLNS_PREFIX =>
+ // TODO: {:?} is bad, need something better
+ Some(self_error!(this; "'{:?}' cannot be an element name prefix", name.prefix)),
+ _ => {
+ this.data.element_name = Some(name.clone());
+ match token {
+ Token::Whitespace(_) => this.into_state_continue(State::InsideClosingTag(ClosingTagSubstate::CTAfterName)),
+ Token::TagEnd => this.emit_end_element(),
+ _ => Some(self_error!(this; "Unexpected token inside closing tag: {}", token))
+ }
+ }
+ }
+ }),
+ ClosingTagSubstate::CTAfterName => match t {
+ Token::Whitespace(_) => None, // Skip whitespace
+ Token::TagEnd => self.emit_end_element(),
+ _ => Some(self_error!(self; "Unexpected token inside closing tag: {}", t))
+ }
+ }
+ }
+
+}
diff --git a/third_party/rust/xml-rs/src/reader/parser/inside_comment.rs b/third_party/rust/xml-rs/src/reader/parser/inside_comment.rs
new file mode 100644
index 0000000000..fc983205ac
--- /dev/null
+++ b/third_party/rust/xml-rs/src/reader/parser/inside_comment.rs
@@ -0,0 +1,32 @@
+use reader::events::XmlEvent;
+use reader::lexer::Token;
+
+use super::{Result, PullParser, State};
+
+impl PullParser {
+ pub fn inside_comment(&mut self, t: Token) -> Option<Result> {
+ match t {
+ // Double dash is illegal inside a comment
+ Token::Chunk(ref s) if &s[..] == "--" => Some(self_error!(self; "Unexpected token inside a comment: --")),
+
+ Token::CommentEnd if self.config.ignore_comments => {
+ self.lexer.outside_comment();
+ self.into_state_continue(State::OutsideTag)
+ }
+
+ Token::CommentEnd => {
+ self.lexer.outside_comment();
+ let data = self.take_buf();
+ self.into_state_emit(State::OutsideTag, Ok(XmlEvent::Comment(data)))
+ }
+
+ _ if self.config.ignore_comments => None, // Do not modify buffer if ignoring the comment
+
+ _ => {
+ t.push_to_string(&mut self.buf);
+ None
+ }
+ }
+ }
+
+}
diff --git a/third_party/rust/xml-rs/src/reader/parser/inside_declaration.rs b/third_party/rust/xml-rs/src/reader/parser/inside_declaration.rs
new file mode 100644
index 0000000000..af39d10d86
--- /dev/null
+++ b/third_party/rust/xml-rs/src/reader/parser/inside_declaration.rs
@@ -0,0 +1,151 @@
+
+use common::XmlVersion;
+
+use reader::events::XmlEvent;
+use reader::lexer::Token;
+
+use super::{
+ Result, PullParser, State, DeclarationSubstate, QualifiedNameTarget,
+ DEFAULT_VERSION, DEFAULT_ENCODING
+};
+
+impl PullParser {
+ // TODO: remove redundancy via macros or extra methods
+ pub fn inside_declaration(&mut self, t: Token, s: DeclarationSubstate) -> Option<Result> {
+ macro_rules! unexpected_token(
+ ($this:expr; $t:expr) => (Some($this.error(format!("Unexpected token inside XML declaration: {}", $t))));
+ ($t:expr) => (unexpected_token!(self; $t));
+ );
+
+ #[inline]
+ fn emit_start_document(this: &mut PullParser) -> Option<Result> {
+ this.parsed_declaration = true;
+ let version = this.data.take_version();
+ let encoding = this.data.take_encoding();
+ let standalone = this.data.take_standalone();
+ this.into_state_emit(State::OutsideTag, Ok(XmlEvent::StartDocument {
+ version: version.unwrap_or(DEFAULT_VERSION),
+ encoding: encoding.unwrap_or(DEFAULT_ENCODING.into()),
+ standalone: standalone
+ }))
+ }
+
+ match s {
+ DeclarationSubstate::BeforeVersion => match t {
+ Token::Whitespace(_) => None, // continue
+ Token::Character('v') => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideVersion)),
+ _ => unexpected_token!(t)
+ },
+
+ DeclarationSubstate::InsideVersion => self.read_qualified_name(t, QualifiedNameTarget::AttributeNameTarget, |this, token, name| {
+ match &name.local_name[..] {
+ "ersion" if name.namespace.is_none() =>
+ this.into_state_continue(State::InsideDeclaration(
+ if token == Token::EqualsSign {
+ DeclarationSubstate::InsideVersionValue
+ } else {
+ DeclarationSubstate::AfterVersion
+ }
+ )),
+ _ => unexpected_token!(this; name)
+ }
+ }),
+
+ DeclarationSubstate::AfterVersion => match t {
+ Token::Whitespace(_) => None,
+ Token::EqualsSign => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideVersionValue)),
+ _ => unexpected_token!(t)
+ },
+
+ DeclarationSubstate::InsideVersionValue => self.read_attribute_value(t, |this, value| {
+ this.data.version = match &value[..] {
+ "1.0" => Some(XmlVersion::Version10),
+ "1.1" => Some(XmlVersion::Version11),
+ _ => None
+ };
+ if this.data.version.is_some() {
+ this.into_state_continue(State::InsideDeclaration(DeclarationSubstate::AfterVersionValue))
+ } else {
+ Some(self_error!(this; "Unexpected XML version value: {}", value))
+ }
+ }),
+
+ DeclarationSubstate::AfterVersionValue => match t {
+ Token::Whitespace(_) => None, // skip whitespace
+ Token::Character('e') => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideEncoding)),
+ Token::Character('s') => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideStandaloneDecl)),
+ Token::ProcessingInstructionEnd => emit_start_document(self),
+ _ => unexpected_token!(t)
+ },
+
+ DeclarationSubstate::InsideEncoding => self.read_qualified_name(t, QualifiedNameTarget::AttributeNameTarget, |this, token, name| {
+ match &name.local_name[..] {
+ "ncoding" if name.namespace.is_none() =>
+ this.into_state_continue(State::InsideDeclaration(
+ if token == Token::EqualsSign { DeclarationSubstate::InsideEncodingValue } else { DeclarationSubstate::AfterEncoding }
+ )),
+ _ => unexpected_token!(this; name)
+ }
+ }),
+
+ DeclarationSubstate::AfterEncoding => match t {
+ Token::Whitespace(_) => None,
+ Token::EqualsSign => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideEncodingValue)),
+ _ => unexpected_token!(t)
+ },
+
+ DeclarationSubstate::InsideEncodingValue => self.read_attribute_value(t, |this, value| {
+ this.data.encoding = Some(value);
+ this.into_state_continue(State::InsideDeclaration(DeclarationSubstate::BeforeStandaloneDecl))
+ }),
+
+ DeclarationSubstate::BeforeStandaloneDecl => match t {
+ Token::Whitespace(_) => None, // skip whitespace
+ Token::Character('s') => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideStandaloneDecl)),
+ Token::ProcessingInstructionEnd => emit_start_document(self),
+ _ => unexpected_token!(t)
+ },
+
+ DeclarationSubstate::InsideStandaloneDecl => self.read_qualified_name(t, QualifiedNameTarget::AttributeNameTarget, |this, token, name| {
+ match &name.local_name[..] {
+ "tandalone" if name.namespace.is_none() =>
+ this.into_state_continue(State::InsideDeclaration(
+ if token == Token::EqualsSign {
+ DeclarationSubstate::InsideStandaloneDeclValue
+ } else {
+ DeclarationSubstate::AfterStandaloneDecl
+ }
+ )),
+ _ => unexpected_token!(this; name)
+ }
+ }),
+
+ DeclarationSubstate::AfterStandaloneDecl => match t {
+ Token::Whitespace(_) => None,
+ Token::EqualsSign => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideStandaloneDeclValue)),
+ _ => unexpected_token!(t)
+ },
+
+ DeclarationSubstate::InsideStandaloneDeclValue => self.read_attribute_value(t, |this, value| {
+ let standalone = match &value[..] {
+ "yes" => Some(true),
+ "no" => Some(false),
+ _ => None
+ };
+ if standalone.is_some() {
+ this.data.standalone = standalone;
+ this.into_state_continue(State::InsideDeclaration(DeclarationSubstate::AfterStandaloneDeclValue))
+ } else {
+ Some(self_error!(this; "Invalid standalone declaration value: {}", value))
+ }
+ }),
+
+ DeclarationSubstate::AfterStandaloneDeclValue => match t {
+ Token::Whitespace(_) => None, // skip whitespace
+ Token::ProcessingInstructionEnd => emit_start_document(self),
+ _ => unexpected_token!(t)
+ }
+ }
+ }
+
+}
diff --git a/third_party/rust/xml-rs/src/reader/parser/inside_doctype.rs b/third_party/rust/xml-rs/src/reader/parser/inside_doctype.rs
new file mode 100644
index 0000000000..8dcf367bc6
--- /dev/null
+++ b/third_party/rust/xml-rs/src/reader/parser/inside_doctype.rs
@@ -0,0 +1,16 @@
+use reader::lexer::Token;
+
+use super::{Result, PullParser, State};
+
+impl PullParser {
+ pub fn inside_doctype(&mut self, t: Token) -> Option<Result> {
+ match t {
+ Token::TagEnd => {
+ self.lexer.enable_errors();
+ self.into_state_continue(State::OutsideTag)
+ }
+
+ _ => None
+ }
+ }
+}
diff --git a/third_party/rust/xml-rs/src/reader/parser/inside_opening_tag.rs b/third_party/rust/xml-rs/src/reader/parser/inside_opening_tag.rs
new file mode 100644
index 0000000000..533874fb81
--- /dev/null
+++ b/third_party/rust/xml-rs/src/reader/parser/inside_opening_tag.rs
@@ -0,0 +1,108 @@
+use common::is_name_start_char;
+use attribute::OwnedAttribute;
+use namespace;
+
+use reader::lexer::Token;
+
+use super::{Result, PullParser, State, OpeningTagSubstate, QualifiedNameTarget};
+
+impl PullParser {
+ pub fn inside_opening_tag(&mut self, t: Token, s: OpeningTagSubstate) -> Option<Result> {
+ macro_rules! unexpected_token(($t:expr) => (Some(self_error!(self; "Unexpected token inside opening tag: {}", $t))));
+ match s {
+ OpeningTagSubstate::InsideName => self.read_qualified_name(t, QualifiedNameTarget::OpeningTagNameTarget, |this, token, name| {
+ match name.prefix_ref() {
+ Some(prefix) if prefix == namespace::NS_XML_PREFIX ||
+ prefix == namespace::NS_XMLNS_PREFIX =>
+ Some(self_error!(this; "'{:?}' cannot be an element name prefix", name.prefix)),
+ _ => {
+ this.data.element_name = Some(name.clone());
+ match token {
+ Token::TagEnd => this.emit_start_element(false),
+ Token::EmptyTagEnd => this.emit_start_element(true),
+ Token::Whitespace(_) => this.into_state_continue(State::InsideOpeningTag(OpeningTagSubstate::InsideTag)),
+ _ => unreachable!()
+ }
+ }
+ }
+ }),
+
+ OpeningTagSubstate::InsideTag => match t {
+ Token::Whitespace(_) => None, // skip whitespace
+ Token::Character(c) if is_name_start_char(c) => {
+ self.buf.push(c);
+ self.into_state_continue(State::InsideOpeningTag(OpeningTagSubstate::InsideAttributeName))
+ }
+ Token::TagEnd => self.emit_start_element(false),
+ Token::EmptyTagEnd => self.emit_start_element(true),
+ _ => unexpected_token!(t)
+ },
+
+ OpeningTagSubstate::InsideAttributeName => self.read_qualified_name(t, QualifiedNameTarget::AttributeNameTarget, |this, token, name| {
+ this.data.attr_name = Some(name);
+ match token {
+ Token::Whitespace(_) => this.into_state_continue(State::InsideOpeningTag(OpeningTagSubstate::AfterAttributeName)),
+ Token::EqualsSign => this.into_state_continue(State::InsideOpeningTag(OpeningTagSubstate::InsideAttributeValue)),
+ _ => unreachable!()
+ }
+ }),
+
+ OpeningTagSubstate::AfterAttributeName => match t {
+ Token::Whitespace(_) => None,
+ Token::EqualsSign => self.into_state_continue(State::InsideOpeningTag(OpeningTagSubstate::InsideAttributeValue)),
+ _ => unexpected_token!(t)
+ },
+
+ OpeningTagSubstate::InsideAttributeValue => self.read_attribute_value(t, |this, value| {
+ let name = this.data.take_attr_name().unwrap(); // unwrap() will always succeed here
+
+ // check that no attribute with such name is already present
+ // if there is one, XML is not well-formed
+ if this.data.attributes.iter().find(|a| a.name == name).is_some() { // TODO: looks bad
+ // TODO: ideally this error should point to the beginning of the attribute,
+ // TODO: not the end of its value
+ Some(self_error!(this; "Attribute '{}' is redefined", name))
+ } else {
+ match name.prefix_ref() {
+ // declaring a new prefix; it is sufficient to check prefix only
+ // because "xmlns" prefix is reserved
+ Some(namespace::NS_XMLNS_PREFIX) => {
+ let ln = &name.local_name[..];
+ if ln == namespace::NS_XMLNS_PREFIX {
+ Some(self_error!(this; "Cannot redefine prefix '{}'", namespace::NS_XMLNS_PREFIX))
+ } else if ln == namespace::NS_XML_PREFIX && &value[..] != namespace::NS_XML_URI {
+ Some(self_error!(this; "Prefix '{}' cannot be rebound to another value", namespace::NS_XML_PREFIX))
+ } else if value.is_empty() {
+ Some(self_error!(this; "Cannot undefine prefix '{}'", ln))
+ } else {
+ this.nst.put(name.local_name.clone(), value);
+ this.into_state_continue(State::InsideOpeningTag(OpeningTagSubstate::InsideTag))
+ }
+ }
+
+ // declaring default namespace
+ None if &name.local_name[..] == namespace::NS_XMLNS_PREFIX =>
+ match &value[..] {
+ namespace::NS_XMLNS_PREFIX | namespace::NS_XML_PREFIX =>
+ Some(self_error!(this; "Namespace '{}' cannot be default", value)),
+ _ => {
+ this.nst.put(namespace::NS_NO_PREFIX, value.clone());
+ this.into_state_continue(State::InsideOpeningTag(OpeningTagSubstate::InsideTag))
+ }
+ },
+
+ // regular attribute
+ _ => {
+ this.data.attributes.push(OwnedAttribute {
+ name: name.clone(),
+ value: value
+ });
+ this.into_state_continue(State::InsideOpeningTag(OpeningTagSubstate::InsideTag))
+ }
+ }
+ }
+ })
+ }
+ }
+
+}
diff --git a/third_party/rust/xml-rs/src/reader/parser/inside_processing_instruction.rs b/third_party/rust/xml-rs/src/reader/parser/inside_processing_instruction.rs
new file mode 100644
index 0000000000..8ddf6b8d51
--- /dev/null
+++ b/third_party/rust/xml-rs/src/reader/parser/inside_processing_instruction.rs
@@ -0,0 +1,96 @@
+use common::{
+ is_name_start_char, is_name_char,
+};
+
+use reader::events::XmlEvent;
+use reader::lexer::Token;
+
+use super::{Result, PullParser, State, ProcessingInstructionSubstate, DeclarationSubstate};
+
+impl PullParser {
+ pub fn inside_processing_instruction(&mut self, t: Token, s: ProcessingInstructionSubstate) -> Option<Result> {
+ match s {
+ ProcessingInstructionSubstate::PIInsideName => match t {
+ Token::Character(c) if !self.buf_has_data() && is_name_start_char(c) ||
+ self.buf_has_data() && is_name_char(c) => self.append_char_continue(c),
+
+ Token::ProcessingInstructionEnd => {
+ // self.buf contains PI name
+ let name = self.take_buf();
+
+ // Don't need to check for declaration because it has mandatory attributes
+ // but there is none
+ match &name[..] {
+ // Name is empty, it is an error
+ "" => Some(self_error!(self; "Encountered processing instruction without name")),
+
+ // Found <?xml-like PI not at the beginning of a document,
+ // it is an error - see section 2.6 of XML 1.1 spec
+ "xml"|"xmL"|"xMl"|"xML"|"Xml"|"XmL"|"XMl"|"XML" =>
+ Some(self_error!(self; "Invalid processing instruction: <?{}", name)),
+
+ // All is ok, emitting event
+ _ => {
+ self.into_state_emit(
+ State::OutsideTag,
+ Ok(XmlEvent::ProcessingInstruction {
+ name: name,
+ data: None
+ })
+ )
+ }
+ }
+ }
+
+ Token::Whitespace(_) => {
+ // self.buf contains PI name
+ let name = self.take_buf();
+
+ match &name[..] {
+ // We have not ever encountered an element and have not parsed XML declaration
+ "xml" if !self.encountered_element && !self.parsed_declaration =>
+ self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::BeforeVersion)),
+
+ // Found <?xml-like PI after the beginning of a document,
+ // it is an error - see section 2.6 of XML 1.1 spec
+ "xml"|"xmL"|"xMl"|"xML"|"Xml"|"XmL"|"XMl"|"XML"
+ if self.encountered_element || self.parsed_declaration =>
+ Some(self_error!(self; "Invalid processing instruction: <?{}", name)),
+
+ // All is ok, starting parsing PI data
+ _ => {
+ self.lexer.disable_errors(); // data is arbitrary, so disable errors
+ self.data.name = name;
+ self.into_state_continue(State::InsideProcessingInstruction(ProcessingInstructionSubstate::PIInsideData))
+ }
+
+ }
+ }
+
+ _ => Some(self_error!(self; "Unexpected token: <?{}{}", self.buf, t))
+ },
+
+ ProcessingInstructionSubstate::PIInsideData => match t {
+ Token::ProcessingInstructionEnd => {
+ self.lexer.enable_errors();
+ let name = self.data.take_name();
+ let data = self.take_buf();
+ self.into_state_emit(
+ State::OutsideTag,
+ Ok(XmlEvent::ProcessingInstruction {
+ name: name,
+ data: Some(data)
+ })
+ )
+ },
+
+ // Any other token should be treated as plain characters
+ _ => {
+ t.push_to_string(&mut self.buf);
+ None
+ }
+ },
+ }
+ }
+
+}
diff --git a/third_party/rust/xml-rs/src/reader/parser/inside_reference.rs b/third_party/rust/xml-rs/src/reader/parser/inside_reference.rs
new file mode 100644
index 0000000000..60026d5572
--- /dev/null
+++ b/third_party/rust/xml-rs/src/reader/parser/inside_reference.rs
@@ -0,0 +1,89 @@
+use std::char;
+
+use common::{is_name_start_char, is_name_char, is_whitespace_str};
+
+use reader::lexer::Token;
+
+use super::{Result, PullParser, State};
+
+impl PullParser {
+ pub fn inside_reference(&mut self, t: Token, prev_st: State) -> Option<Result> {
+ match t {
+ Token::Character(c) if !self.data.ref_data.is_empty() && is_name_char(c) ||
+ self.data.ref_data.is_empty() && (is_name_start_char(c) || c == '#') => {
+ self.data.ref_data.push(c);
+ None
+ }
+
+ Token::ReferenceEnd => {
+ // TODO: check for unicode correctness
+ let name = self.data.take_ref_data();
+ let name_len = name.len(); // compute once
+ let c = match &name[..] {
+ "lt" => Ok('<'.to_string()),
+ "gt" => Ok('>'.to_string()),
+ "amp" => Ok('&'.to_string()),
+ "apos" => Ok('\''.to_string()),
+ "quot" => Ok('"'.to_string()),
+ "" => Err(self_error!(self; "Encountered empty entity")),
+ _ if name_len > 2 && name.starts_with("#x") => {
+ let num_str = &name[2..name_len];
+ if num_str == "0" {
+ Err(self_error!(self; "Null character entity is not allowed"))
+ } else {
+ if self.config.replace_unknown_entity_references {
+ match u32::from_str_radix(num_str, 16).ok().map(|i| char::from_u32(i).unwrap_or('\u{fffd}')) {
+ Some(c) => Ok(c.to_string()),
+ None => Err(self_error!(self; "Invalid hexadecimal character number in an entity: {}", name))
+ }
+ } else {
+ match u32::from_str_radix(num_str, 16).ok().and_then(char::from_u32) {
+ Some(c) => Ok(c.to_string()),
+ None => Err(self_error!(self; "Invalid hexadecimal character number in an entity: {}", name))
+ }
+ }
+ }
+ }
+ _ if name_len > 1 && name.starts_with('#') => {
+ let num_str = &name[1..name_len];
+ if num_str == "0" {
+ Err(self_error!(self; "Null character entity is not allowed"))
+ } else {
+ if self.config.replace_unknown_entity_references {
+ match u32::from_str_radix(num_str, 10).ok().map(|i| char::from_u32(i).unwrap_or('\u{fffd}')) {
+ Some(c) => Ok(c.to_string()),
+ None => Err(self_error!(self; "Invalid decimal character number in an entity: {}", name))
+ }
+ }
+ else {
+ match u32::from_str_radix(num_str, 10).ok().and_then(char::from_u32) {
+ Some(c) => Ok(c.to_string()),
+ None => Err(self_error!(self; "Invalid decimal character number in an entity: {}", name))
+ }
+ }
+ }
+ },
+ _ => {
+ if let Some(v) = self.config.extra_entities.get(&name) {
+ Ok(v.clone())
+ } else {
+ Err(self_error!(self; "Unexpected entity: {}", name))
+ }
+ }
+ };
+ match c {
+ Ok(c) => {
+ self.buf.push_str(&c);
+ if prev_st == State::OutsideTag && !is_whitespace_str(&c) {
+ self.inside_whitespace = false;
+ }
+ self.into_state_continue(prev_st)
+ }
+ Err(e) => Some(e)
+ }
+ }
+
+ _ => Some(self_error!(self; "Unexpected token inside an entity: {}", t))
+ }
+ }
+}
diff --git a/third_party/rust/xml-rs/src/reader/parser/mod.rs b/third_party/rust/xml-rs/src/reader/parser/mod.rs
new file mode 100644
index 0000000000..58ca3a6b1e
--- /dev/null
+++ b/third_party/rust/xml-rs/src/reader/parser/mod.rs
@@ -0,0 +1,622 @@
+//! Contains an implementation of pull-based XML parser.
+
+use std::mem;
+use std::borrow::Cow;
+use std::io::prelude::*;
+
+use common::{
+ self,
+ XmlVersion, Position, TextPosition,
+ is_name_start_char, is_name_char,
+};
+use name::OwnedName;
+use attribute::OwnedAttribute;
+use namespace::NamespaceStack;
+
+use reader::events::XmlEvent;
+use reader::config::ParserConfig;
+use reader::lexer::{Lexer, Token};
+
+macro_rules! gen_takes(
+ ($($field:ident -> $method:ident, $t:ty, $def:expr);+) => (
+ $(
+ impl MarkupData {
+ #[inline]
+ fn $method(&mut self) -> $t {
+ mem::replace(&mut self.$field, $def)
+ }
+ }
+ )+
+ )
+);
+
+gen_takes!(
+ name -> take_name, String, String::new();
+ ref_data -> take_ref_data, String, String::new();
+
+ version -> take_version, Option<common::XmlVersion>, None;
+ encoding -> take_encoding, Option<String>, None;
+ standalone -> take_standalone, Option<bool>, None;
+
+ element_name -> take_element_name, Option<OwnedName>, None;
+
+ attr_name -> take_attr_name, Option<OwnedName>, None;
+ attributes -> take_attributes, Vec<OwnedAttribute>, vec!()
+);
+
+macro_rules! self_error(
+ ($this:ident; $msg:expr) => ($this.error($msg));
+ ($this:ident; $fmt:expr, $($arg:expr),+) => ($this.error(format!($fmt, $($arg),+)))
+);
+
+mod outside_tag;
+mod inside_processing_instruction;
+mod inside_declaration;
+mod inside_doctype;
+mod inside_opening_tag;
+mod inside_closing_tag_name;
+mod inside_comment;
+mod inside_cdata;
+mod inside_reference;
+
+static DEFAULT_VERSION: XmlVersion = XmlVersion::Version10;
+static DEFAULT_ENCODING: &'static str = "UTF-8";
+static DEFAULT_STANDALONE: Option<bool> = None;
+
+type ElementStack = Vec<OwnedName>;
+pub type Result = super::Result<XmlEvent>;
+
+/// Pull-based XML parser.
+pub struct PullParser {
+ config: ParserConfig,
+ lexer: Lexer,
+ st: State,
+ buf: String,
+ nst: NamespaceStack,
+
+ data: MarkupData,
+ final_result: Option<Result>,
+ next_event: Option<Result>,
+ est: ElementStack,
+ pos: Vec<TextPosition>,
+
+ encountered_element: bool,
+ parsed_declaration: bool,
+ inside_whitespace: bool,
+ read_prefix_separator: bool,
+ pop_namespace: bool
+}
+
+impl PullParser {
+ /// Returns a new parser using the given config.
+ pub fn new(config: ParserConfig) -> PullParser {
+ PullParser {
+ config: config,
+ lexer: Lexer::new(),
+ st: State::OutsideTag,
+ buf: String::new(),
+ nst: NamespaceStack::default(),
+
+ data: MarkupData {
+ name: String::new(),
+ version: None,
+ encoding: None,
+ standalone: None,
+ ref_data: String::new(),
+ element_name: None,
+ quote: None,
+ attr_name: None,
+ attributes: Vec::new()
+ },
+ final_result: None,
+ next_event: None,
+ est: Vec::new(),
+ pos: vec![TextPosition::new()],
+
+ encountered_element: false,
+ parsed_declaration: false,
+ inside_whitespace: true,
+ read_prefix_separator: false,
+ pop_namespace: false
+ }
+ }
+
+ /// Checks if this parser ignores the end of stream errors.
+ pub fn is_ignoring_end_of_stream(&self) -> bool { self.config.ignore_end_of_stream }
+}
+
+impl Position for PullParser {
+ /// Returns the position of the last event produced by the parser
+ #[inline]
+ fn position(&self) -> TextPosition {
+ self.pos[0]
+ }
+}
+
+#[derive(Clone, PartialEq)]
+pub enum State {
+ OutsideTag,
+ InsideOpeningTag(OpeningTagSubstate),
+ InsideClosingTag(ClosingTagSubstate),
+ InsideProcessingInstruction(ProcessingInstructionSubstate),
+ InsideComment,
+ InsideCData,
+ InsideDeclaration(DeclarationSubstate),
+ InsideDoctype,
+ InsideReference(Box<State>)
+}
+
+#[derive(Clone, PartialEq)]
+pub enum OpeningTagSubstate {
+ InsideName,
+
+ InsideTag,
+
+ InsideAttributeName,
+ AfterAttributeName,
+
+ InsideAttributeValue,
+}
+
+#[derive(Clone, PartialEq)]
+pub enum ClosingTagSubstate {
+ CTInsideName,
+ CTAfterName
+}
+
+#[derive(Clone, PartialEq)]
+pub enum ProcessingInstructionSubstate {
+ PIInsideName,
+ PIInsideData
+}
+
+#[derive(Clone, PartialEq)]
+pub enum DeclarationSubstate {
+ BeforeVersion,
+ InsideVersion,
+ AfterVersion,
+
+ InsideVersionValue,
+ AfterVersionValue,
+
+ InsideEncoding,
+ AfterEncoding,
+
+ InsideEncodingValue,
+
+ BeforeStandaloneDecl,
+ InsideStandaloneDecl,
+ AfterStandaloneDecl,
+
+ InsideStandaloneDeclValue,
+ AfterStandaloneDeclValue
+}
+
+#[derive(PartialEq)]
+enum QualifiedNameTarget {
+ AttributeNameTarget,
+ OpeningTagNameTarget,
+ ClosingTagNameTarget
+}
+
+#[derive(Copy, Clone, PartialEq, Eq)]
+enum QuoteToken {
+ SingleQuoteToken,
+ DoubleQuoteToken
+}
+
+impl QuoteToken {
+ fn from_token(t: &Token) -> QuoteToken {
+ match *t {
+ Token::SingleQuote => QuoteToken::SingleQuoteToken,
+ Token::DoubleQuote => QuoteToken::DoubleQuoteToken,
+ _ => panic!("Unexpected token: {}", t)
+ }
+ }
+
+ fn as_token(self) -> Token {
+ match self {
+ QuoteToken::SingleQuoteToken => Token::SingleQuote,
+ QuoteToken::DoubleQuoteToken => Token::DoubleQuote
+ }
+ }
+}
+
+struct MarkupData {
+ name: String, // used for processing instruction name
+ ref_data: String, // used for reference content
+
+ version: Option<common::XmlVersion>, // used for XML declaration version
+ encoding: Option<String>, // used for XML declaration encoding
+ standalone: Option<bool>, // used for XML declaration standalone parameter
+
+ element_name: Option<OwnedName>, // used for element name
+
+ quote: Option<QuoteToken>, // used to hold opening quote for attribute value
+ attr_name: Option<OwnedName>, // used to hold attribute name
+ attributes: Vec<OwnedAttribute> // used to hold all accumulated attributes
+}
+
+impl PullParser {
+ /// Returns next event read from the given buffer.
+ ///
+ /// This method should be always called with the same buffer. If you call it
+ /// providing different buffers each time, the result will be undefined.
+ pub fn next<R: Read>(&mut self, r: &mut R) -> Result {
+ if let Some(ref ev) = self.final_result {
+ return ev.clone();
+ }
+
+ if let Some(ev) = self.next_event.take() {
+ return ev;
+ }
+
+ if self.pop_namespace {
+ self.pop_namespace = false;
+ self.nst.pop();
+ }
+
+ loop {
+ // While lexer gives us Ok(maybe_token) -- we loop.
+ // Upon having a complete XML-event -- we return from the whole function.
+ match self.lexer.next_token(r) {
+ Ok(maybe_token) =>
+ match maybe_token {
+ None => break,
+ Some(token) =>
+ match self.dispatch_token(token) {
+ None => {} // continue
+ Some(Ok(XmlEvent::EndDocument)) =>
+ return {
+ self.next_pos();
+ self.set_final_result(Ok(XmlEvent::EndDocument))
+ },
+ Some(Ok(xml_event)) =>
+ return {
+ self.next_pos();
+ Ok(xml_event)
+ },
+ Some(Err(xml_error)) =>
+ return {
+ self.next_pos();
+ self.set_final_result(Err(xml_error))
+ },
+ }
+ },
+ Err(lexer_error) =>
+ return self.set_final_result(Err(lexer_error)),
+ }
+ }
+
+ // Handle end of stream
+ // Forward pos to the lexer head
+ self.next_pos();
+ let ev = if self.depth() == 0 {
+ if self.encountered_element && self.st == State::OutsideTag { // all is ok
+ Ok(XmlEvent::EndDocument)
+ } else if !self.encountered_element {
+ self_error!(self; "Unexpected end of stream: no root element found")
+ } else { // self.st != State::OutsideTag
+ self_error!(self; "Unexpected end of stream") // TODO: add expected hint?
+ }
+ } else {
+ if self.config.ignore_end_of_stream {
+ self.final_result = None;
+ self.lexer.reset_eof_handled();
+ return self_error!(self; "Unexpected end of stream: still inside the root element");
+ } else {
+ self_error!(self; "Unexpected end of stream: still inside the root element")
+ }
+ };
+ self.set_final_result(ev)
+ }
+
+ // This function is to be called when a terminal event is reached.
+ // The function sets up the `self.final_result` into `Some(result)` and return `result`.
+ fn set_final_result(&mut self, result: Result) -> Result {
+ self.final_result = Some(result.clone());
+ result
+ }
+
+ #[inline]
+ fn error<M: Into<Cow<'static, str>>>(&self, msg: M) -> Result {
+ Err((&self.lexer, msg).into())
+ }
+
+ #[inline]
+ fn next_pos(&mut self) {
+ if self.pos.len() > 1 {
+ self.pos.remove(0);
+ } else {
+ self.pos[0] = self.lexer.position();
+ }
+ }
+
+ #[inline]
+ fn push_pos(&mut self) {
+ self.pos.push(self.lexer.position());
+ }
+
+ fn dispatch_token(&mut self, t: Token) -> Option<Result> {
+ match self.st.clone() {
+ State::OutsideTag => self.outside_tag(t),
+ State::InsideProcessingInstruction(s) => self.inside_processing_instruction(t, s),
+ State::InsideDeclaration(s) => self.inside_declaration(t, s),
+ State::InsideDoctype => self.inside_doctype(t),
+ State::InsideOpeningTag(s) => self.inside_opening_tag(t, s),
+ State::InsideClosingTag(s) => self.inside_closing_tag_name(t, s),
+ State::InsideComment => self.inside_comment(t),
+ State::InsideCData => self.inside_cdata(t),
+ State::InsideReference(s) => self.inside_reference(t, *s)
+ }
+ }
+
+ #[inline]
+ fn depth(&self) -> usize {
+ self.est.len()
+ }
+
+ #[inline]
+ fn buf_has_data(&self) -> bool {
+ self.buf.len() > 0
+ }
+
+ #[inline]
+ fn take_buf(&mut self) -> String {
+ mem::replace(&mut self.buf, String::new())
+ }
+
+ #[inline]
+ fn append_char_continue(&mut self, c: char) -> Option<Result> {
+ self.buf.push(c);
+ None
+ }
+
+ #[inline]
+ fn into_state(&mut self, st: State, ev: Option<Result>) -> Option<Result> {
+ self.st = st;
+ ev
+ }
+
+ #[inline]
+ fn into_state_continue(&mut self, st: State) -> Option<Result> {
+ self.into_state(st, None)
+ }
+
+ #[inline]
+ fn into_state_emit(&mut self, st: State, ev: Result) -> Option<Result> {
+ self.into_state(st, Some(ev))
+ }
+
+ /// Dispatches tokens in order to process qualified name. If qualified name cannot be parsed,
+ /// an error is returned.
+ ///
+ /// # Parameters
+ /// * `t` --- next token;
+ /// * `on_name` --- a callback which is executed when whitespace is encountered.
+ fn read_qualified_name<F>(&mut self, t: Token, target: QualifiedNameTarget, on_name: F) -> Option<Result>
+ where F: Fn(&mut PullParser, Token, OwnedName) -> Option<Result> {
+ // We can get here for the first time only when self.data.name contains zero or one character,
+ // but first character cannot be a colon anyway
+ if self.buf.len() <= 1 {
+ self.read_prefix_separator = false;
+ }
+
+ let invoke_callback = |this: &mut PullParser, t| {
+ let name = this.take_buf();
+ match name.parse() {
+ Ok(name) => on_name(this, t, name),
+ Err(_) => Some(self_error!(this; "Qualified name is invalid: {}", name))
+ }
+ };
+
+ match t {
+ // There can be only one colon, and not as the first character
+ Token::Character(':') if self.buf_has_data() && !self.read_prefix_separator => {
+ self.buf.push(':');
+ self.read_prefix_separator = true;
+ None
+ }
+
+ Token::Character(c) if c != ':' && (!self.buf_has_data() && is_name_start_char(c) ||
+ self.buf_has_data() && is_name_char(c)) =>
+ self.append_char_continue(c),
+
+ Token::EqualsSign if target == QualifiedNameTarget::AttributeNameTarget => invoke_callback(self, t),
+
+ Token::EmptyTagEnd if target == QualifiedNameTarget::OpeningTagNameTarget => invoke_callback(self, t),
+
+ Token::TagEnd if target == QualifiedNameTarget::OpeningTagNameTarget ||
+ target == QualifiedNameTarget::ClosingTagNameTarget => invoke_callback(self, t),
+
+ Token::Whitespace(_) => invoke_callback(self, t),
+
+ _ => Some(self_error!(self; "Unexpected token inside qualified name: {}", t))
+ }
+ }
+
+ /// Dispatches tokens in order to process attribute value.
+ ///
+ /// # Parameters
+ /// * `t` --- next token;
+ /// * `on_value` --- a callback which is called when terminating quote is encountered.
+ fn read_attribute_value<F>(&mut self, t: Token, on_value: F) -> Option<Result>
+ where F: Fn(&mut PullParser, String) -> Option<Result> {
+ match t {
+ Token::Whitespace(_) if self.data.quote.is_none() => None, // skip leading whitespace
+
+ Token::DoubleQuote | Token::SingleQuote => match self.data.quote {
+ None => { // Entered attribute value
+ self.data.quote = Some(QuoteToken::from_token(&t));
+ None
+ }
+ Some(q) if q.as_token() == t => {
+ self.data.quote = None;
+ let value = self.take_buf();
+ on_value(self, value)
+ }
+ _ => {
+ t.push_to_string(&mut self.buf);
+ None
+ }
+ },
+
+ Token::ReferenceStart => {
+ let st = Box::new(self.st.clone());
+ self.into_state_continue(State::InsideReference(st))
+ }
+
+ Token::OpeningTagStart =>
+ Some(self_error!(self; "Unexpected token inside attribute value: <")),
+
+ // Every character except " and ' and < is okay
+ _ => {
+ t.push_to_string(&mut self.buf);
+ None
+ }
+ }
+ }
+
+ fn emit_start_element(&mut self, emit_end_element: bool) -> Option<Result> {
+ let mut name = self.data.take_element_name().unwrap();
+ let mut attributes = self.data.take_attributes();
+
+ // check whether the name prefix is bound and fix its namespace
+ match self.nst.get(name.borrow().prefix_repr()) {
+ Some("") => name.namespace = None, // default namespace
+ Some(ns) => name.namespace = Some(ns.into()),
+ None => return Some(self_error!(self; "Element {} prefix is unbound", name))
+ }
+
+ // check and fix accumulated attributes prefixes
+ for attr in attributes.iter_mut() {
+ if let Some(ref pfx) = attr.name.prefix {
+ let new_ns = match self.nst.get(pfx) {
+ Some("") => None, // default namespace
+ Some(ns) => Some(ns.into()),
+ None => return Some(self_error!(self; "Attribute {} prefix is unbound", attr.name))
+ };
+ attr.name.namespace = new_ns;
+ }
+ }
+
+ if emit_end_element {
+ self.pop_namespace = true;
+ self.next_event = Some(Ok(XmlEvent::EndElement {
+ name: name.clone()
+ }));
+ } else {
+ self.est.push(name.clone());
+ }
+ let namespace = self.nst.squash();
+ self.into_state_emit(State::OutsideTag, Ok(XmlEvent::StartElement {
+ name: name,
+ attributes: attributes,
+ namespace: namespace
+ }))
+ }
+
+ fn emit_end_element(&mut self) -> Option<Result> {
+ let mut name = self.data.take_element_name().unwrap();
+
+ // check whether the name prefix is bound and fix its namespace
+ match self.nst.get(name.borrow().prefix_repr()) {
+ Some("") => name.namespace = None, // default namespace
+ Some(ns) => name.namespace = Some(ns.into()),
+ None => return Some(self_error!(self; "Element {} prefix is unbound", name))
+ }
+
+ let op_name = self.est.pop().unwrap();
+
+ if name == op_name {
+ self.pop_namespace = true;
+ self.into_state_emit(State::OutsideTag, Ok(XmlEvent::EndElement { name: name }))
+ } else {
+ Some(self_error!(self; "Unexpected closing tag: {}, expected {}", name, op_name))
+ }
+ }
+
+}
+
+#[cfg(test)]
+mod tests {
+ use std::io::BufReader;
+
+ use common::{Position, TextPosition};
+ use name::OwnedName;
+ use attribute::OwnedAttribute;
+ use reader::parser::PullParser;
+ use reader::ParserConfig;
+ use reader::events::XmlEvent;
+
+ fn new_parser() -> PullParser {
+ PullParser::new(ParserConfig::new())
+ }
+
+ macro_rules! expect_event(
+ ($r:expr, $p:expr, $t:pat) => (
+ match $p.next(&mut $r) {
+ $t => {}
+ e => panic!("Unexpected event: {:?}", e)
+ }
+ );
+ ($r:expr, $p:expr, $t:pat => $c:expr ) => (
+ match $p.next(&mut $r) {
+ $t if $c => {}
+ e => panic!("Unexpected event: {:?}", e)
+ }
+ )
+ );
+
+ macro_rules! test_data(
+ ($d:expr) => ({
+ static DATA: &'static str = $d;
+ let r = BufReader::new(DATA.as_bytes());
+ let p = new_parser();
+ (r, p)
+ })
+ );
+
+ #[test]
+ fn issue_3_semicolon_in_attribute_value() {
+ let (mut r, mut p) = test_data!(r#"
+ <a attr="zzz;zzz" />
+ "#);
+
+ expect_event!(r, p, Ok(XmlEvent::StartDocument { .. }));
+ expect_event!(r, p, Ok(XmlEvent::StartElement { ref name, ref attributes, ref namespace }) =>
+ *name == OwnedName::local("a") &&
+ attributes.len() == 1 &&
+ attributes[0] == OwnedAttribute::new(OwnedName::local("attr"), "zzz;zzz") &&
+ namespace.is_essentially_empty()
+ );
+ expect_event!(r, p, Ok(XmlEvent::EndElement { ref name }) => *name == OwnedName::local("a"));
+ expect_event!(r, p, Ok(XmlEvent::EndDocument));
+ }
+
+ #[test]
+ fn issue_140_entity_reference_inside_tag() {
+ let (mut r, mut p) = test_data!(r#"
+ <bla>&#9835;</bla>
+ "#);
+
+ expect_event!(r, p, Ok(XmlEvent::StartDocument { .. }));
+ expect_event!(r, p, Ok(XmlEvent::StartElement { ref name, .. }) => *name == OwnedName::local("bla"));
+ expect_event!(r, p, Ok(XmlEvent::Characters(ref s)) => s == "\u{266b}");
+ expect_event!(r, p, Ok(XmlEvent::EndElement { ref name, .. }) => *name == OwnedName::local("bla"));
+ expect_event!(r, p, Ok(XmlEvent::EndDocument));
+ }
+
+ #[test]
+ fn opening_tag_in_attribute_value() {
+ let (mut r, mut p) = test_data!(r#"
+ <a attr="zzz<zzz" />
+ "#);
+
+ expect_event!(r, p, Ok(XmlEvent::StartDocument { .. }));
+ expect_event!(r, p, Err(ref e) =>
+ e.msg() == "Unexpected token inside attribute value: <" &&
+ e.position() == TextPosition { row: 1, column: 24 }
+ );
+ }
+}
diff --git a/third_party/rust/xml-rs/src/reader/parser/outside_tag.rs b/third_party/rust/xml-rs/src/reader/parser/outside_tag.rs
new file mode 100644
index 0000000000..d3f7598f75
--- /dev/null
+++ b/third_party/rust/xml-rs/src/reader/parser/outside_tag.rs
@@ -0,0 +1,130 @@
+use common::is_whitespace_char;
+
+use reader::events::XmlEvent;
+use reader::lexer::Token;
+
+use super::{
+ Result, PullParser, State, ClosingTagSubstate, OpeningTagSubstate,
+ ProcessingInstructionSubstate, DEFAULT_VERSION, DEFAULT_ENCODING, DEFAULT_STANDALONE
+};
+
+impl PullParser {
+ pub fn outside_tag(&mut self, t: Token) -> Option<Result> {
+ match t {
+ Token::ReferenceStart =>
+ self.into_state_continue(State::InsideReference(Box::new(State::OutsideTag))),
+
+ Token::Whitespace(_) if self.depth() == 0 && self.config.ignore_root_level_whitespace => None, // skip whitespace outside of the root element
+
+ Token::Whitespace(_) if self.config.trim_whitespace && !self.buf_has_data() => None,
+
+ Token::Whitespace(c) => {
+ if !self.buf_has_data() {
+ self.push_pos();
+ }
+ self.append_char_continue(c)
+ }
+
+ _ if t.contains_char_data() && self.depth() == 0 =>
+ Some(self_error!(self; "Unexpected characters outside the root element: {}", t)),
+
+ _ if t.contains_char_data() => { // Non-whitespace char data
+ if !self.buf_has_data() {
+ self.push_pos();
+ }
+ self.inside_whitespace = false;
+ t.push_to_string(&mut self.buf);
+ None
+ }
+
+ Token::ReferenceEnd => { // Semi-colon in a text outside an entity
+ self.inside_whitespace = false;
+ Token::ReferenceEnd.push_to_string(&mut self.buf);
+ None
+ }
+
+ Token::CommentStart if self.config.coalesce_characters && self.config.ignore_comments => {
+ // We need to switch the lexer into a comment mode inside comments
+ self.lexer.inside_comment();
+ self.into_state_continue(State::InsideComment)
+ }
+
+ Token::CDataStart if self.config.coalesce_characters && self.config.cdata_to_characters => {
+ if !self.buf_has_data() {
+ self.push_pos();
+ }
+ // We need to disable lexing errors inside CDATA
+ self.lexer.disable_errors();
+ self.into_state_continue(State::InsideCData)
+ }
+
+ _ => {
+ // Encountered some markup event, flush the buffer as characters
+ // or a whitespace
+ let mut next_event = if self.buf_has_data() {
+ let buf = self.take_buf();
+ if self.inside_whitespace && self.config.trim_whitespace {
+ None
+ } else if self.inside_whitespace && !self.config.whitespace_to_characters {
+ Some(Ok(XmlEvent::Whitespace(buf)))
+ } else if self.config.trim_whitespace {
+ Some(Ok(XmlEvent::Characters(buf.trim_matches(is_whitespace_char).into())))
+ } else {
+ Some(Ok(XmlEvent::Characters(buf)))
+ }
+ } else { None };
+ self.inside_whitespace = true; // Reset inside_whitespace flag
+ self.push_pos();
+ match t {
+ Token::ProcessingInstructionStart =>
+ self.into_state(State::InsideProcessingInstruction(ProcessingInstructionSubstate::PIInsideName), next_event),
+
+ Token::DoctypeStart if !self.encountered_element => {
+ // We don't have a doctype event so skip this position
+ // FIXME: update when we have a doctype event
+ self.next_pos();
+ self.lexer.disable_errors();
+ self.into_state(State::InsideDoctype, next_event)
+ }
+
+ Token::OpeningTagStart => {
+ // If declaration was not parsed and we have encountered an element,
+ // emit this declaration as the next event.
+ if !self.parsed_declaration {
+ self.parsed_declaration = true;
+ let sd_event = XmlEvent::StartDocument {
+ version: DEFAULT_VERSION,
+ encoding: DEFAULT_ENCODING.into(),
+ standalone: DEFAULT_STANDALONE
+ };
+ // next_event is always none here because we're outside of
+ // the root element
+ next_event = Some(Ok(sd_event));
+ self.push_pos();
+ }
+ self.encountered_element = true;
+ self.nst.push_empty();
+ self.into_state(State::InsideOpeningTag(OpeningTagSubstate::InsideName), next_event)
+ }
+
+ Token::ClosingTagStart if self.depth() > 0 =>
+ self.into_state(State::InsideClosingTag(ClosingTagSubstate::CTInsideName), next_event),
+
+ Token::CommentStart => {
+ // We need to switch the lexer into a comment mode inside comments
+ self.lexer.inside_comment();
+ self.into_state(State::InsideComment, next_event)
+ }
+
+ Token::CDataStart => {
+ // We need to disable lexing errors inside CDATA
+ self.lexer.disable_errors();
+ self.into_state(State::InsideCData, next_event)
+ }
+
+ _ => Some(self_error!(self; "Unexpected token: {}", t))
+ }
+ }
+ }
+ }
+}