summaryrefslogtreecommitdiffstats
path: root/third_party/rust/yaml-rust/src
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/rust/yaml-rust/src')
-rw-r--r--third_party/rust/yaml-rust/src/emitter.rs635
-rw-r--r--third_party/rust/yaml-rust/src/lib.rs121
-rw-r--r--third_party/rust/yaml-rust/src/parser.rs858
-rw-r--r--third_party/rust/yaml-rust/src/scanner.rs2182
-rw-r--r--third_party/rust/yaml-rust/src/yaml.rs739
5 files changed, 4535 insertions, 0 deletions
diff --git a/third_party/rust/yaml-rust/src/emitter.rs b/third_party/rust/yaml-rust/src/emitter.rs
new file mode 100644
index 0000000000..f20a3ed679
--- /dev/null
+++ b/third_party/rust/yaml-rust/src/emitter.rs
@@ -0,0 +1,635 @@
+use std::convert::From;
+use std::error::Error;
+use std::fmt::{self, Display};
+use crate::yaml::{Hash, Yaml};
+
+#[derive(Copy, Clone, Debug)]
+pub enum EmitError {
+ FmtError(fmt::Error),
+ BadHashmapKey,
+}
+
+impl Error for EmitError {
+ fn cause(&self) -> Option<&dyn Error> {
+ None
+ }
+}
+
+impl Display for EmitError {
+ fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
+ match *self {
+ EmitError::FmtError(ref err) => Display::fmt(err, formatter),
+ EmitError::BadHashmapKey => formatter.write_str("bad hashmap key"),
+ }
+ }
+}
+
+impl From<fmt::Error> for EmitError {
+ fn from(f: fmt::Error) -> Self {
+ EmitError::FmtError(f)
+ }
+}
+
+pub struct YamlEmitter<'a> {
+ writer: &'a mut dyn fmt::Write,
+ best_indent: usize,
+ compact: bool,
+
+ level: isize,
+}
+
+pub type EmitResult = Result<(), EmitError>;
+
+// from serialize::json
+fn escape_str(wr: &mut dyn fmt::Write, v: &str) -> Result<(), fmt::Error> {
+ wr.write_str("\"")?;
+
+ let mut start = 0;
+
+ for (i, byte) in v.bytes().enumerate() {
+ let escaped = match byte {
+ b'"' => "\\\"",
+ b'\\' => "\\\\",
+ b'\x00' => "\\u0000",
+ b'\x01' => "\\u0001",
+ b'\x02' => "\\u0002",
+ b'\x03' => "\\u0003",
+ b'\x04' => "\\u0004",
+ b'\x05' => "\\u0005",
+ b'\x06' => "\\u0006",
+ b'\x07' => "\\u0007",
+ b'\x08' => "\\b",
+ b'\t' => "\\t",
+ b'\n' => "\\n",
+ b'\x0b' => "\\u000b",
+ b'\x0c' => "\\f",
+ b'\r' => "\\r",
+ b'\x0e' => "\\u000e",
+ b'\x0f' => "\\u000f",
+ b'\x10' => "\\u0010",
+ b'\x11' => "\\u0011",
+ b'\x12' => "\\u0012",
+ b'\x13' => "\\u0013",
+ b'\x14' => "\\u0014",
+ b'\x15' => "\\u0015",
+ b'\x16' => "\\u0016",
+ b'\x17' => "\\u0017",
+ b'\x18' => "\\u0018",
+ b'\x19' => "\\u0019",
+ b'\x1a' => "\\u001a",
+ b'\x1b' => "\\u001b",
+ b'\x1c' => "\\u001c",
+ b'\x1d' => "\\u001d",
+ b'\x1e' => "\\u001e",
+ b'\x1f' => "\\u001f",
+ b'\x7f' => "\\u007f",
+ _ => continue,
+ };
+
+ if start < i {
+ wr.write_str(&v[start..i])?;
+ }
+
+ wr.write_str(escaped)?;
+
+ start = i + 1;
+ }
+
+ if start != v.len() {
+ wr.write_str(&v[start..])?;
+ }
+
+ wr.write_str("\"")?;
+ Ok(())
+}
+
+impl<'a> YamlEmitter<'a> {
+ pub fn new(writer: &'a mut dyn fmt::Write) -> YamlEmitter {
+ YamlEmitter {
+ writer,
+ best_indent: 2,
+ compact: true,
+ level: -1,
+ }
+ }
+
+ /// Set 'compact inline notation' on or off, as described for block
+ /// [sequences](http://www.yaml.org/spec/1.2/spec.html#id2797382)
+ /// and
+ /// [mappings](http://www.yaml.org/spec/1.2/spec.html#id2798057).
+ ///
+ /// In this form, blocks cannot have any properties (such as anchors
+ /// or tags), which should be OK, because this emitter doesn't
+ /// (currently) emit those anyways.
+ pub fn compact(&mut self, compact: bool) {
+ self.compact = compact;
+ }
+
+ /// Determine if this emitter is using 'compact inline notation'.
+ pub fn is_compact(&self) -> bool {
+ self.compact
+ }
+
+ pub fn dump(&mut self, doc: &Yaml) -> EmitResult {
+ // write DocumentStart
+ writeln!(self.writer, "---")?;
+ self.level = -1;
+ self.emit_node(doc)
+ }
+
+ fn write_indent(&mut self) -> EmitResult {
+ if self.level <= 0 {
+ return Ok(());
+ }
+ for _ in 0..self.level {
+ for _ in 0..self.best_indent {
+ write!(self.writer, " ")?;
+ }
+ }
+ Ok(())
+ }
+
+ fn emit_node(&mut self, node: &Yaml) -> EmitResult {
+ match *node {
+ Yaml::Array(ref v) => self.emit_array(v),
+ Yaml::Hash(ref h) => self.emit_hash(h),
+ Yaml::String(ref v) => {
+ if need_quotes(v) {
+ escape_str(self.writer, v)?;
+ } else {
+ write!(self.writer, "{}", v)?;
+ }
+ Ok(())
+ }
+ Yaml::Boolean(v) => {
+ if v {
+ self.writer.write_str("true")?;
+ } else {
+ self.writer.write_str("false")?;
+ }
+ Ok(())
+ }
+ Yaml::Integer(v) => {
+ write!(self.writer, "{}", v)?;
+ Ok(())
+ }
+ Yaml::Real(ref v) => {
+ write!(self.writer, "{}", v)?;
+ Ok(())
+ }
+ Yaml::Null | Yaml::BadValue => {
+ write!(self.writer, "~")?;
+ Ok(())
+ }
+ // XXX(chenyh) Alias
+ _ => Ok(()),
+ }
+ }
+
+ fn emit_array(&mut self, v: &[Yaml]) -> EmitResult {
+ if v.is_empty() {
+ write!(self.writer, "[]")?;
+ } else {
+ self.level += 1;
+ for (cnt, x) in v.iter().enumerate() {
+ if cnt > 0 {
+ writeln!(self.writer)?;
+ self.write_indent()?;
+ }
+ write!(self.writer, "-")?;
+ self.emit_val(true, x)?;
+ }
+ self.level -= 1;
+ }
+ Ok(())
+ }
+
+ fn emit_hash(&mut self, h: &Hash) -> EmitResult {
+ if h.is_empty() {
+ self.writer.write_str("{}")?;
+ } else {
+ self.level += 1;
+ for (cnt, (k, v)) in h.iter().enumerate() {
+ let complex_key = match *k {
+ Yaml::Hash(_) | Yaml::Array(_) => true,
+ _ => false,
+ };
+ if cnt > 0 {
+ writeln!(self.writer)?;
+ self.write_indent()?;
+ }
+ if complex_key {
+ write!(self.writer, "?")?;
+ self.emit_val(true, k)?;
+ writeln!(self.writer)?;
+ self.write_indent()?;
+ write!(self.writer, ":")?;
+ self.emit_val(true, v)?;
+ } else {
+ self.emit_node(k)?;
+ write!(self.writer, ":")?;
+ self.emit_val(false, v)?;
+ }
+ }
+ self.level -= 1;
+ }
+ Ok(())
+ }
+
+ /// Emit a yaml as a hash or array value: i.e., which should appear
+ /// following a ":" or "-", either after a space, or on a new line.
+ /// If `inline` is true, then the preceding characters are distinct
+ /// and short enough to respect the compact flag.
+ fn emit_val(&mut self, inline: bool, val: &Yaml) -> EmitResult {
+ match *val {
+ Yaml::Array(ref v) => {
+ if (inline && self.compact) || v.is_empty() {
+ write!(self.writer, " ")?;
+ } else {
+ writeln!(self.writer)?;
+ self.level += 1;
+ self.write_indent()?;
+ self.level -= 1;
+ }
+ self.emit_array(v)
+ }
+ Yaml::Hash(ref h) => {
+ if (inline && self.compact) || h.is_empty() {
+ write!(self.writer, " ")?;
+ } else {
+ writeln!(self.writer)?;
+ self.level += 1;
+ self.write_indent()?;
+ self.level -= 1;
+ }
+ self.emit_hash(h)
+ }
+ _ => {
+ write!(self.writer, " ")?;
+ self.emit_node(val)
+ }
+ }
+ }
+}
+
+/// Check if the string requires quoting.
+/// Strings starting with any of the following characters must be quoted.
+/// :, &, *, ?, |, -, <, >, =, !, %, @
+/// Strings containing any of the following characters must be quoted.
+/// {, }, [, ], ,, #, `
+///
+/// If the string contains any of the following control characters, it must be escaped with double quotes:
+/// \0, \x01, \x02, \x03, \x04, \x05, \x06, \a, \b, \t, \n, \v, \f, \r, \x0e, \x0f, \x10, \x11, \x12, \x13, \x14, \x15, \x16, \x17, \x18, \x19, \x1a, \e, \x1c, \x1d, \x1e, \x1f, \N, \_, \L, \P
+///
+/// Finally, there are other cases when the strings must be quoted, no matter if you're using single or double quotes:
+/// * When the string is true or false (otherwise, it would be treated as a boolean value);
+/// * When the string is null or ~ (otherwise, it would be considered as a null value);
+/// * When the string looks like a number, such as integers (e.g. 2, 14, etc.), floats (e.g. 2.6, 14.9) and exponential numbers (e.g. 12e7, etc.) (otherwise, it would be treated as a numeric value);
+/// * When the string looks like a date (e.g. 2014-12-31) (otherwise it would be automatically converted into a Unix timestamp).
+fn need_quotes(string: &str) -> bool {
+ fn need_quotes_spaces(string: &str) -> bool {
+ string.starts_with(' ') || string.ends_with(' ')
+ }
+
+ string == ""
+ || need_quotes_spaces(string)
+ || string.starts_with(|character: char| match character {
+ '&' | '*' | '?' | '|' | '-' | '<' | '>' | '=' | '!' | '%' | '@' => true,
+ _ => false,
+ })
+ || string.contains(|character: char| match character {
+ ':'
+ | '{'
+ | '}'
+ | '['
+ | ']'
+ | ','
+ | '#'
+ | '`'
+ | '\"'
+ | '\''
+ | '\\'
+ | '\0'..='\x06'
+ | '\t'
+ | '\n'
+ | '\r'
+ | '\x0e'..='\x1a'
+ | '\x1c'..='\x1f' => true,
+ _ => false,
+ })
+ || [
+ // http://yaml.org/type/bool.html
+ // Note: 'y', 'Y', 'n', 'N', is not quoted deliberately, as in libyaml. PyYAML also parse
+ // them as string, not booleans, although it is violating the YAML 1.1 specification.
+ // See https://github.com/dtolnay/serde-yaml/pull/83#discussion_r152628088.
+ "yes", "Yes", "YES", "no", "No", "NO", "True", "TRUE", "true", "False", "FALSE",
+ "false", "on", "On", "ON", "off", "Off", "OFF",
+ // http://yaml.org/type/null.html
+ "null", "Null", "NULL", "~",
+ ]
+ .contains(&string)
+ || string.starts_with('.')
+ || string.starts_with("0x")
+ || string.parse::<i64>().is_ok()
+ || string.parse::<f64>().is_ok()
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+ use crate::YamlLoader;
+
+ #[test]
+ fn test_emit_simple() {
+ let s = "
+# comment
+a0 bb: val
+a1:
+ b1: 4
+ b2: d
+a2: 4 # i'm comment
+a3: [1, 2, 3]
+a4:
+ - [a1, a2]
+ - 2
+";
+
+ let docs = YamlLoader::load_from_str(&s).unwrap();
+ let doc = &docs[0];
+ let mut writer = String::new();
+ {
+ let mut emitter = YamlEmitter::new(&mut writer);
+ emitter.dump(doc).unwrap();
+ }
+ println!("original:\n{}", s);
+ println!("emitted:\n{}", writer);
+ let docs_new = match YamlLoader::load_from_str(&writer) {
+ Ok(y) => y,
+ Err(e) => panic!(format!("{}", e)),
+ };
+ let doc_new = &docs_new[0];
+
+ assert_eq!(doc, doc_new);
+ }
+
+ #[test]
+ fn test_emit_complex() {
+ let s = r#"
+cataloge:
+ product: &coffee { name: Coffee, price: 2.5 , unit: 1l }
+ product: &cookies { name: Cookies!, price: 3.40 , unit: 400g}
+
+products:
+ *coffee:
+ amount: 4
+ *cookies:
+ amount: 4
+ [1,2,3,4]:
+ array key
+ 2.4:
+ real key
+ true:
+ bool key
+ {}:
+ empty hash key
+ "#;
+ let docs = YamlLoader::load_from_str(&s).unwrap();
+ let doc = &docs[0];
+ let mut writer = String::new();
+ {
+ let mut emitter = YamlEmitter::new(&mut writer);
+ emitter.dump(doc).unwrap();
+ }
+ let docs_new = match YamlLoader::load_from_str(&writer) {
+ Ok(y) => y,
+ Err(e) => panic!(format!("{}", e)),
+ };
+ let doc_new = &docs_new[0];
+ assert_eq!(doc, doc_new);
+ }
+
+ #[test]
+ fn test_emit_avoid_quotes() {
+ let s = r#"---
+a7: 你好
+boolean: "true"
+boolean2: "false"
+date: 2014-12-31
+empty_string: ""
+empty_string1: " "
+empty_string2: " a"
+empty_string3: " a "
+exp: "12e7"
+field: ":"
+field2: "{"
+field3: "\\"
+field4: "\n"
+field5: "can't avoid quote"
+float: "2.6"
+int: "4"
+nullable: "null"
+nullable2: "~"
+products:
+ "*coffee":
+ amount: 4
+ "*cookies":
+ amount: 4
+ ".milk":
+ amount: 1
+ "2.4": real key
+ "[1,2,3,4]": array key
+ "true": bool key
+ "{}": empty hash key
+x: test
+y: avoid quoting here
+z: string with spaces"#;
+
+ let docs = YamlLoader::load_from_str(&s).unwrap();
+ let doc = &docs[0];
+ let mut writer = String::new();
+ {
+ let mut emitter = YamlEmitter::new(&mut writer);
+ emitter.dump(doc).unwrap();
+ }
+
+ assert_eq!(s, writer, "actual:\n\n{}\n", writer);
+ }
+
+ #[test]
+ fn emit_quoted_bools() {
+ let input = r#"---
+string0: yes
+string1: no
+string2: "true"
+string3: "false"
+string4: "~"
+null0: ~
+[true, false]: real_bools
+[True, TRUE, False, FALSE, y,Y,yes,Yes,YES,n,N,no,No,NO,on,On,ON,off,Off,OFF]: false_bools
+bool0: true
+bool1: false"#;
+ let expected = r#"---
+string0: "yes"
+string1: "no"
+string2: "true"
+string3: "false"
+string4: "~"
+null0: ~
+? - true
+ - false
+: real_bools
+? - "True"
+ - "TRUE"
+ - "False"
+ - "FALSE"
+ - y
+ - Y
+ - "yes"
+ - "Yes"
+ - "YES"
+ - n
+ - N
+ - "no"
+ - "No"
+ - "NO"
+ - "on"
+ - "On"
+ - "ON"
+ - "off"
+ - "Off"
+ - "OFF"
+: false_bools
+bool0: true
+bool1: false"#;
+
+ let docs = YamlLoader::load_from_str(&input).unwrap();
+ let doc = &docs[0];
+ let mut writer = String::new();
+ {
+ let mut emitter = YamlEmitter::new(&mut writer);
+ emitter.dump(doc).unwrap();
+ }
+
+ assert_eq!(
+ expected, writer,
+ "expected:\n{}\nactual:\n{}\n",
+ expected, writer
+ );
+ }
+
+ #[test]
+ fn test_empty_and_nested() {
+ test_empty_and_nested_flag(false)
+ }
+
+ #[test]
+ fn test_empty_and_nested_compact() {
+ test_empty_and_nested_flag(true)
+ }
+
+ fn test_empty_and_nested_flag(compact: bool) {
+ let s = if compact {
+ r#"---
+a:
+ b:
+ c: hello
+ d: {}
+e:
+ - f
+ - g
+ - h: []"#
+ } else {
+ r#"---
+a:
+ b:
+ c: hello
+ d: {}
+e:
+ - f
+ - g
+ -
+ h: []"#
+ };
+
+ let docs = YamlLoader::load_from_str(&s).unwrap();
+ let doc = &docs[0];
+ let mut writer = String::new();
+ {
+ let mut emitter = YamlEmitter::new(&mut writer);
+ emitter.compact(compact);
+ emitter.dump(doc).unwrap();
+ }
+
+ assert_eq!(s, writer);
+ }
+
+ #[test]
+ fn test_nested_arrays() {
+ let s = r#"---
+a:
+ - b
+ - - c
+ - d
+ - - e
+ - f"#;
+
+ let docs = YamlLoader::load_from_str(&s).unwrap();
+ let doc = &docs[0];
+ let mut writer = String::new();
+ {
+ let mut emitter = YamlEmitter::new(&mut writer);
+ emitter.dump(doc).unwrap();
+ }
+ println!("original:\n{}", s);
+ println!("emitted:\n{}", writer);
+
+ assert_eq!(s, writer);
+ }
+
+ #[test]
+ fn test_deeply_nested_arrays() {
+ let s = r#"---
+a:
+ - b
+ - - c
+ - d
+ - - e
+ - - f
+ - - e"#;
+
+ let docs = YamlLoader::load_from_str(&s).unwrap();
+ let doc = &docs[0];
+ let mut writer = String::new();
+ {
+ let mut emitter = YamlEmitter::new(&mut writer);
+ emitter.dump(doc).unwrap();
+ }
+ println!("original:\n{}", s);
+ println!("emitted:\n{}", writer);
+
+ assert_eq!(s, writer);
+ }
+
+ #[test]
+ fn test_nested_hashes() {
+ let s = r#"---
+a:
+ b:
+ c:
+ d:
+ e: f"#;
+
+ let docs = YamlLoader::load_from_str(&s).unwrap();
+ let doc = &docs[0];
+ let mut writer = String::new();
+ {
+ let mut emitter = YamlEmitter::new(&mut writer);
+ emitter.dump(doc).unwrap();
+ }
+ println!("original:\n{}", s);
+ println!("emitted:\n{}", writer);
+
+ assert_eq!(s, writer);
+ }
+
+}
diff --git a/third_party/rust/yaml-rust/src/lib.rs b/third_party/rust/yaml-rust/src/lib.rs
new file mode 100644
index 0000000000..6cf87c7c5e
--- /dev/null
+++ b/third_party/rust/yaml-rust/src/lib.rs
@@ -0,0 +1,121 @@
+// Copyright 2015, Yuheng Chen. See the LICENSE file at the top-level
+// directory of this distribution.
+
+//! YAML 1.2 implementation in pure Rust.
+//!
+//! # Usage
+//!
+//! This crate is [on github](https://github.com/chyh1990/yaml-rust) and can be
+//! used by adding `yaml-rust` to the dependencies in your project's `Cargo.toml`.
+//!
+//! ```toml
+//! [dependencies.yaml-rust]
+//! git = "https://github.com/chyh1990/yaml-rust.git"
+//! ```
+//!
+//! And this in your crate root:
+//!
+//! ```rust
+//! extern crate yaml_rust;
+//! ```
+//!
+//! Parse a string into `Vec<Yaml>` and then serialize it as a YAML string.
+//!
+//! # Examples
+//!
+//! ```
+//! use yaml_rust::{YamlLoader, YamlEmitter};
+//!
+//! let docs = YamlLoader::load_from_str("[1, 2, 3]").unwrap();
+//! let doc = &docs[0]; // select the first document
+//! assert_eq!(doc[0].as_i64().unwrap(), 1); // access elements by index
+//!
+//! let mut out_str = String::new();
+//! let mut emitter = YamlEmitter::new(&mut out_str);
+//! emitter.dump(doc).unwrap(); // dump the YAML object to a String
+//!
+//! ```
+
+#![doc(html_root_url = "https://docs.rs/yaml-rust/0.4.5")]
+#![cfg_attr(feature = "cargo-clippy", allow(renamed_and_removed_lints))]
+#![cfg_attr(feature = "cargo-clippy", warn(cyclomatic_complexity))]
+#![cfg_attr(
+ feature = "cargo-clippy",
+ allow(match_same_arms, should_implement_trait)
+)]
+
+extern crate linked_hash_map;
+
+pub mod emitter;
+pub mod parser;
+pub mod scanner;
+pub mod yaml;
+
+// reexport key APIs
+pub use crate::emitter::{EmitError, YamlEmitter};
+pub use crate::parser::Event;
+pub use crate::scanner::ScanError;
+pub use crate::yaml::{Yaml, YamlLoader};
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_api() {
+ let s = "
+# from yaml-cpp example
+- name: Ogre
+ position: [0, 5, 0]
+ powers:
+ - name: Club
+ damage: 10
+ - name: Fist
+ damage: 8
+- name: Dragon
+ position: [1, 0, 10]
+ powers:
+ - name: Fire Breath
+ damage: 25
+ - name: Claws
+ damage: 15
+- name: Wizard
+ position: [5, -3, 0]
+ powers:
+ - name: Acid Rain
+ damage: 50
+ - name: Staff
+ damage: 3
+";
+ let docs = YamlLoader::load_from_str(s).unwrap();
+ let doc = &docs[0];
+
+ assert_eq!(doc[0]["name"].as_str().unwrap(), "Ogre");
+
+ let mut writer = String::new();
+ {
+ let mut emitter = YamlEmitter::new(&mut writer);
+ emitter.dump(doc).unwrap();
+ }
+
+ assert!(!writer.is_empty());
+ }
+
+ fn try_fail(s: &str) -> Result<Vec<Yaml>, ScanError> {
+ let t = YamlLoader::load_from_str(s)?;
+ Ok(t)
+ }
+
+ #[test]
+ fn test_fail() {
+ let s = "
+# syntax error
+scalar
+key: [1, 2]]
+key1:a2
+";
+ assert!(YamlLoader::load_from_str(s).is_err());
+ assert!(try_fail(s).is_err());
+ }
+
+}
diff --git a/third_party/rust/yaml-rust/src/parser.rs b/third_party/rust/yaml-rust/src/parser.rs
new file mode 100644
index 0000000000..4a63146f13
--- /dev/null
+++ b/third_party/rust/yaml-rust/src/parser.rs
@@ -0,0 +1,858 @@
+use crate::scanner::*;
+use std::collections::HashMap;
+
+#[derive(Clone, Copy, PartialEq, Debug, Eq)]
+enum State {
+ StreamStart,
+ ImplicitDocumentStart,
+ DocumentStart,
+ DocumentContent,
+ DocumentEnd,
+ BlockNode,
+ // BlockNodeOrIndentlessSequence,
+ // FlowNode,
+ BlockSequenceFirstEntry,
+ BlockSequenceEntry,
+ IndentlessSequenceEntry,
+ BlockMappingFirstKey,
+ BlockMappingKey,
+ BlockMappingValue,
+ FlowSequenceFirstEntry,
+ FlowSequenceEntry,
+ FlowSequenceEntryMappingKey,
+ FlowSequenceEntryMappingValue,
+ FlowSequenceEntryMappingEnd,
+ FlowMappingFirstKey,
+ FlowMappingKey,
+ FlowMappingValue,
+ FlowMappingEmptyValue,
+ End,
+}
+
+/// `Event` is used with the low-level event base parsing API,
+/// see `EventReceiver` trait.
+#[derive(Clone, PartialEq, Debug, Eq)]
+pub enum Event {
+ /// Reserved for internal use
+ Nothing,
+ StreamStart,
+ StreamEnd,
+ DocumentStart,
+ DocumentEnd,
+ /// Refer to an anchor ID
+ Alias(usize),
+ /// Value, style, anchor_id, tag
+ Scalar(String, TScalarStyle, usize, Option<TokenType>),
+ /// Anchor ID
+ SequenceStart(usize),
+ SequenceEnd,
+ /// Anchor ID
+ MappingStart(usize),
+ MappingEnd,
+}
+
+impl Event {
+ fn empty_scalar() -> Event {
+ // a null scalar
+ Event::Scalar("~".to_owned(), TScalarStyle::Plain, 0, None)
+ }
+
+ fn empty_scalar_with_anchor(anchor: usize, tag: Option<TokenType>) -> Event {
+ Event::Scalar("".to_owned(), TScalarStyle::Plain, anchor, tag)
+ }
+}
+
+#[derive(Debug)]
+pub struct Parser<T> {
+ scanner: Scanner<T>,
+ states: Vec<State>,
+ state: State,
+ marks: Vec<Marker>,
+ token: Option<Token>,
+ current: Option<(Event, Marker)>,
+ anchors: HashMap<String, usize>,
+ anchor_id: usize,
+}
+
+pub trait EventReceiver {
+ fn on_event(&mut self, ev: Event);
+}
+
+pub trait MarkedEventReceiver {
+ fn on_event(&mut self, ev: Event, _mark: Marker);
+}
+
+impl<R: EventReceiver> MarkedEventReceiver for R {
+ fn on_event(&mut self, ev: Event, _mark: Marker) {
+ self.on_event(ev)
+ }
+}
+
+pub type ParseResult = Result<(Event, Marker), ScanError>;
+
+impl<T: Iterator<Item = char>> Parser<T> {
+ pub fn new(src: T) -> Parser<T> {
+ Parser {
+ scanner: Scanner::new(src),
+ states: Vec::new(),
+ state: State::StreamStart,
+ marks: Vec::new(),
+ token: None,
+ current: None,
+
+ anchors: HashMap::new(),
+ // valid anchor_id starts from 1
+ anchor_id: 1,
+ }
+ }
+
+ pub fn peek(&mut self) -> Result<&(Event, Marker), ScanError> {
+ match self.current {
+ Some(ref x) => Ok(x),
+ None => {
+ self.current = Some(self.next()?);
+ self.peek()
+ }
+ }
+ }
+
+ pub fn next(&mut self) -> ParseResult {
+ match self.current {
+ None => self.parse(),
+ Some(_) => Ok(self.current.take().unwrap()),
+ }
+ }
+
+ fn peek_token(&mut self) -> Result<&Token, ScanError> {
+ match self.token {
+ None => {
+ self.token = Some(self.scan_next_token()?);
+ Ok(self.token.as_ref().unwrap())
+ }
+ Some(ref tok) => Ok(tok),
+ }
+ }
+
+ fn scan_next_token(&mut self) -> Result<Token, ScanError> {
+ let token = self.scanner.next();
+ match token {
+ None => match self.scanner.get_error() {
+ None => Err(ScanError::new(self.scanner.mark(), "unexpected eof")),
+ Some(e) => Err(e),
+ },
+ Some(tok) => Ok(tok),
+ }
+ }
+
+ fn fetch_token(&mut self) -> Token {
+ self.token
+ .take()
+ .expect("fetch_token needs to be preceded by peek_token")
+ }
+
+ fn skip(&mut self) {
+ self.token = None;
+ //self.peek_token();
+ }
+ fn pop_state(&mut self) {
+ self.state = self.states.pop().unwrap()
+ }
+ fn push_state(&mut self, state: State) {
+ self.states.push(state);
+ }
+
+ fn parse(&mut self) -> ParseResult {
+ if self.state == State::End {
+ return Ok((Event::StreamEnd, self.scanner.mark()));
+ }
+ let (ev, mark) = self.state_machine()?;
+ // println!("EV {:?}", ev);
+ Ok((ev, mark))
+ }
+
+ pub fn load<R: MarkedEventReceiver>(
+ &mut self,
+ recv: &mut R,
+ multi: bool,
+ ) -> Result<(), ScanError> {
+ if !self.scanner.stream_started() {
+ let (ev, mark) = self.next()?;
+ assert_eq!(ev, Event::StreamStart);
+ recv.on_event(ev, mark);
+ }
+
+ if self.scanner.stream_ended() {
+ // XXX has parsed?
+ recv.on_event(Event::StreamEnd, self.scanner.mark());
+ return Ok(());
+ }
+ loop {
+ let (ev, mark) = self.next()?;
+ if ev == Event::StreamEnd {
+ recv.on_event(ev, mark);
+ return Ok(());
+ }
+ // clear anchors before a new document
+ self.anchors.clear();
+ self.load_document(ev, mark, recv)?;
+ if !multi {
+ break;
+ }
+ }
+ Ok(())
+ }
+
+ fn load_document<R: MarkedEventReceiver>(
+ &mut self,
+ first_ev: Event,
+ mark: Marker,
+ recv: &mut R,
+ ) -> Result<(), ScanError> {
+ assert_eq!(first_ev, Event::DocumentStart);
+ recv.on_event(first_ev, mark);
+
+ let (ev, mark) = self.next()?;
+ self.load_node(ev, mark, recv)?;
+
+ // DOCUMENT-END is expected.
+ let (ev, mark) = self.next()?;
+ assert_eq!(ev, Event::DocumentEnd);
+ recv.on_event(ev, mark);
+
+ Ok(())
+ }
+
+ fn load_node<R: MarkedEventReceiver>(
+ &mut self,
+ first_ev: Event,
+ mark: Marker,
+ recv: &mut R,
+ ) -> Result<(), ScanError> {
+ match first_ev {
+ Event::Alias(..) | Event::Scalar(..) => {
+ recv.on_event(first_ev, mark);
+ Ok(())
+ }
+ Event::SequenceStart(_) => {
+ recv.on_event(first_ev, mark);
+ self.load_sequence(recv)
+ }
+ Event::MappingStart(_) => {
+ recv.on_event(first_ev, mark);
+ self.load_mapping(recv)
+ }
+ _ => {
+ println!("UNREACHABLE EVENT: {:?}", first_ev);
+ unreachable!();
+ }
+ }
+ }
+
+ fn load_mapping<R: MarkedEventReceiver>(&mut self, recv: &mut R) -> Result<(), ScanError> {
+ let (mut key_ev, mut key_mark) = self.next()?;
+ while key_ev != Event::MappingEnd {
+ // key
+ self.load_node(key_ev, key_mark, recv)?;
+
+ // value
+ let (ev, mark) = self.next()?;
+ self.load_node(ev, mark, recv)?;
+
+ // next event
+ let (ev, mark) = self.next()?;
+ key_ev = ev;
+ key_mark = mark;
+ }
+ recv.on_event(key_ev, key_mark);
+ Ok(())
+ }
+
+ fn load_sequence<R: MarkedEventReceiver>(&mut self, recv: &mut R) -> Result<(), ScanError> {
+ let (mut ev, mut mark) = self.next()?;
+ while ev != Event::SequenceEnd {
+ self.load_node(ev, mark, recv)?;
+
+ // next event
+ let (next_ev, next_mark) = self.next()?;
+ ev = next_ev;
+ mark = next_mark;
+ }
+ recv.on_event(ev, mark);
+ Ok(())
+ }
+
+ fn state_machine(&mut self) -> ParseResult {
+ // let next_tok = self.peek_token()?;
+ // println!("cur_state {:?}, next tok: {:?}", self.state, next_tok);
+ match self.state {
+ State::StreamStart => self.stream_start(),
+
+ State::ImplicitDocumentStart => self.document_start(true),
+ State::DocumentStart => self.document_start(false),
+ State::DocumentContent => self.document_content(),
+ State::DocumentEnd => self.document_end(),
+
+ State::BlockNode => self.parse_node(true, false),
+ // State::BlockNodeOrIndentlessSequence => self.parse_node(true, true),
+ // State::FlowNode => self.parse_node(false, false),
+ State::BlockMappingFirstKey => self.block_mapping_key(true),
+ State::BlockMappingKey => self.block_mapping_key(false),
+ State::BlockMappingValue => self.block_mapping_value(),
+
+ State::BlockSequenceFirstEntry => self.block_sequence_entry(true),
+ State::BlockSequenceEntry => self.block_sequence_entry(false),
+
+ State::FlowSequenceFirstEntry => self.flow_sequence_entry(true),
+ State::FlowSequenceEntry => self.flow_sequence_entry(false),
+
+ State::FlowMappingFirstKey => self.flow_mapping_key(true),
+ State::FlowMappingKey => self.flow_mapping_key(false),
+ State::FlowMappingValue => self.flow_mapping_value(false),
+
+ State::IndentlessSequenceEntry => self.indentless_sequence_entry(),
+
+ State::FlowSequenceEntryMappingKey => self.flow_sequence_entry_mapping_key(),
+ State::FlowSequenceEntryMappingValue => self.flow_sequence_entry_mapping_value(),
+ State::FlowSequenceEntryMappingEnd => self.flow_sequence_entry_mapping_end(),
+ State::FlowMappingEmptyValue => self.flow_mapping_value(true),
+
+ /* impossible */
+ State::End => unreachable!(),
+ }
+ }
+
+ fn stream_start(&mut self) -> ParseResult {
+ match *self.peek_token()? {
+ Token(mark, TokenType::StreamStart(_)) => {
+ self.state = State::ImplicitDocumentStart;
+ self.skip();
+ Ok((Event::StreamStart, mark))
+ }
+ Token(mark, _) => Err(ScanError::new(mark, "did not find expected <stream-start>")),
+ }
+ }
+
+ fn document_start(&mut self, implicit: bool) -> ParseResult {
+ if !implicit {
+ while let TokenType::DocumentEnd = self.peek_token()?.1 {
+ self.skip();
+ }
+ }
+
+ match *self.peek_token()? {
+ Token(mark, TokenType::StreamEnd) => {
+ self.state = State::End;
+ self.skip();
+ Ok((Event::StreamEnd, mark))
+ }
+ Token(_, TokenType::VersionDirective(..))
+ | Token(_, TokenType::TagDirective(..))
+ | Token(_, TokenType::DocumentStart) => {
+ // explicit document
+ self._explicit_document_start()
+ }
+ Token(mark, _) if implicit => {
+ self.parser_process_directives()?;
+ self.push_state(State::DocumentEnd);
+ self.state = State::BlockNode;
+ Ok((Event::DocumentStart, mark))
+ }
+ _ => {
+ // explicit document
+ self._explicit_document_start()
+ }
+ }
+ }
+
+ fn parser_process_directives(&mut self) -> Result<(), ScanError> {
+ loop {
+ match self.peek_token()?.1 {
+ TokenType::VersionDirective(_, _) => {
+ // XXX parsing with warning according to spec
+ //if major != 1 || minor > 2 {
+ // return Err(ScanError::new(tok.0,
+ // "found incompatible YAML document"));
+ //}
+ }
+ TokenType::TagDirective(..) => {
+ // TODO add tag directive
+ }
+ _ => break,
+ }
+ self.skip();
+ }
+ // TODO tag directive
+ Ok(())
+ }
+
+ fn _explicit_document_start(&mut self) -> ParseResult {
+ self.parser_process_directives()?;
+ match *self.peek_token()? {
+ Token(mark, TokenType::DocumentStart) => {
+ self.push_state(State::DocumentEnd);
+ self.state = State::DocumentContent;
+ self.skip();
+ Ok((Event::DocumentStart, mark))
+ }
+ Token(mark, _) => Err(ScanError::new(
+ mark,
+ "did not find expected <document start>",
+ )),
+ }
+ }
+
+ fn document_content(&mut self) -> ParseResult {
+ match *self.peek_token()? {
+ Token(mark, TokenType::VersionDirective(..))
+ | Token(mark, TokenType::TagDirective(..))
+ | Token(mark, TokenType::DocumentStart)
+ | Token(mark, TokenType::DocumentEnd)
+ | Token(mark, TokenType::StreamEnd) => {
+ self.pop_state();
+ // empty scalar
+ Ok((Event::empty_scalar(), mark))
+ }
+ _ => self.parse_node(true, false),
+ }
+ }
+
+ fn document_end(&mut self) -> ParseResult {
+ let mut _implicit = true;
+ let marker: Marker = match *self.peek_token()? {
+ Token(mark, TokenType::DocumentEnd) => {
+ self.skip();
+ _implicit = false;
+ mark
+ }
+ Token(mark, _) => mark,
+ };
+
+ // TODO tag handling
+ self.state = State::DocumentStart;
+ Ok((Event::DocumentEnd, marker))
+ }
+
+ fn register_anchor(&mut self, name: String, _: &Marker) -> Result<usize, ScanError> {
+ // anchors can be overridden/reused
+ // if self.anchors.contains_key(name) {
+ // return Err(ScanError::new(*mark,
+ // "while parsing anchor, found duplicated anchor"));
+ // }
+ let new_id = self.anchor_id;
+ self.anchor_id += 1;
+ self.anchors.insert(name, new_id);
+ Ok(new_id)
+ }
+
+ fn parse_node(&mut self, block: bool, indentless_sequence: bool) -> ParseResult {
+ let mut anchor_id = 0;
+ let mut tag = None;
+ match *self.peek_token()? {
+ Token(_, TokenType::Alias(_)) => {
+ self.pop_state();
+ if let Token(mark, TokenType::Alias(name)) = self.fetch_token() {
+ match self.anchors.get(&name) {
+ None => {
+ return Err(ScanError::new(
+ mark,
+ "while parsing node, found unknown anchor",
+ ))
+ }
+ Some(id) => return Ok((Event::Alias(*id), mark)),
+ }
+ } else {
+ unreachable!()
+ }
+ }
+ Token(_, TokenType::Anchor(_)) => {
+ if let Token(mark, TokenType::Anchor(name)) = self.fetch_token() {
+ anchor_id = self.register_anchor(name, &mark)?;
+ if let TokenType::Tag(..) = self.peek_token()?.1 {
+ if let tg @ TokenType::Tag(..) = self.fetch_token().1 {
+ tag = Some(tg);
+ } else {
+ unreachable!()
+ }
+ }
+ } else {
+ unreachable!()
+ }
+ }
+ Token(_, TokenType::Tag(..)) => {
+ if let tg @ TokenType::Tag(..) = self.fetch_token().1 {
+ tag = Some(tg);
+ if let TokenType::Anchor(_) = self.peek_token()?.1 {
+ if let Token(mark, TokenType::Anchor(name)) = self.fetch_token() {
+ anchor_id = self.register_anchor(name, &mark)?;
+ } else {
+ unreachable!()
+ }
+ }
+ } else {
+ unreachable!()
+ }
+ }
+ _ => {}
+ }
+ match *self.peek_token()? {
+ Token(mark, TokenType::BlockEntry) if indentless_sequence => {
+ self.state = State::IndentlessSequenceEntry;
+ Ok((Event::SequenceStart(anchor_id), mark))
+ }
+ Token(_, TokenType::Scalar(..)) => {
+ self.pop_state();
+ if let Token(mark, TokenType::Scalar(style, v)) = self.fetch_token() {
+ Ok((Event::Scalar(v, style, anchor_id, tag), mark))
+ } else {
+ unreachable!()
+ }
+ }
+ Token(mark, TokenType::FlowSequenceStart) => {
+ self.state = State::FlowSequenceFirstEntry;
+ Ok((Event::SequenceStart(anchor_id), mark))
+ }
+ Token(mark, TokenType::FlowMappingStart) => {
+ self.state = State::FlowMappingFirstKey;
+ Ok((Event::MappingStart(anchor_id), mark))
+ }
+ Token(mark, TokenType::BlockSequenceStart) if block => {
+ self.state = State::BlockSequenceFirstEntry;
+ Ok((Event::SequenceStart(anchor_id), mark))
+ }
+ Token(mark, TokenType::BlockMappingStart) if block => {
+ self.state = State::BlockMappingFirstKey;
+ Ok((Event::MappingStart(anchor_id), mark))
+ }
+ // ex 7.2, an empty scalar can follow a secondary tag
+ Token(mark, _) if tag.is_some() || anchor_id > 0 => {
+ self.pop_state();
+ Ok((Event::empty_scalar_with_anchor(anchor_id, tag), mark))
+ }
+ Token(mark, _) => Err(ScanError::new(
+ mark,
+ "while parsing a node, did not find expected node content",
+ )),
+ }
+ }
+
+ fn block_mapping_key(&mut self, first: bool) -> ParseResult {
+ // skip BlockMappingStart
+ if first {
+ let _ = self.peek_token()?;
+ //self.marks.push(tok.0);
+ self.skip();
+ }
+ match *self.peek_token()? {
+ Token(_, TokenType::Key) => {
+ self.skip();
+ match *self.peek_token()? {
+ Token(mark, TokenType::Key)
+ | Token(mark, TokenType::Value)
+ | Token(mark, TokenType::BlockEnd) => {
+ self.state = State::BlockMappingValue;
+ // empty scalar
+ Ok((Event::empty_scalar(), mark))
+ }
+ _ => {
+ self.push_state(State::BlockMappingValue);
+ self.parse_node(true, true)
+ }
+ }
+ }
+ // XXX(chenyh): libyaml failed to parse spec 1.2, ex8.18
+ Token(mark, TokenType::Value) => {
+ self.state = State::BlockMappingValue;
+ Ok((Event::empty_scalar(), mark))
+ }
+ Token(mark, TokenType::BlockEnd) => {
+ self.pop_state();
+ self.skip();
+ Ok((Event::MappingEnd, mark))
+ }
+ Token(mark, _) => Err(ScanError::new(
+ mark,
+ "while parsing a block mapping, did not find expected key",
+ )),
+ }
+ }
+
+ fn block_mapping_value(&mut self) -> ParseResult {
+ match *self.peek_token()? {
+ Token(_, TokenType::Value) => {
+ self.skip();
+ match *self.peek_token()? {
+ Token(mark, TokenType::Key)
+ | Token(mark, TokenType::Value)
+ | Token(mark, TokenType::BlockEnd) => {
+ self.state = State::BlockMappingKey;
+ // empty scalar
+ Ok((Event::empty_scalar(), mark))
+ }
+ _ => {
+ self.push_state(State::BlockMappingKey);
+ self.parse_node(true, true)
+ }
+ }
+ }
+ Token(mark, _) => {
+ self.state = State::BlockMappingKey;
+ // empty scalar
+ Ok((Event::empty_scalar(), mark))
+ }
+ }
+ }
+
+ fn flow_mapping_key(&mut self, first: bool) -> ParseResult {
+ if first {
+ let _ = self.peek_token()?;
+ self.skip();
+ }
+ let marker: Marker =
+ {
+ match *self.peek_token()? {
+ Token(mark, TokenType::FlowMappingEnd) => mark,
+ Token(mark, _) => {
+ if !first {
+ match *self.peek_token()? {
+ Token(_, TokenType::FlowEntry) => self.skip(),
+ Token(mark, _) => return Err(ScanError::new(mark,
+ "while parsing a flow mapping, did not find expected ',' or '}'"))
+ }
+ }
+
+ match *self.peek_token()? {
+ Token(_, TokenType::Key) => {
+ self.skip();
+ match *self.peek_token()? {
+ Token(mark, TokenType::Value)
+ | Token(mark, TokenType::FlowEntry)
+ | Token(mark, TokenType::FlowMappingEnd) => {
+ self.state = State::FlowMappingValue;
+ return Ok((Event::empty_scalar(), mark));
+ }
+ _ => {
+ self.push_state(State::FlowMappingValue);
+ return self.parse_node(false, false);
+ }
+ }
+ }
+ Token(marker, TokenType::Value) => {
+ self.state = State::FlowMappingValue;
+ return Ok((Event::empty_scalar(), marker));
+ }
+ Token(_, TokenType::FlowMappingEnd) => (),
+ _ => {
+ self.push_state(State::FlowMappingEmptyValue);
+ return self.parse_node(false, false);
+ }
+ }
+
+ mark
+ }
+ }
+ };
+
+ self.pop_state();
+ self.skip();
+ Ok((Event::MappingEnd, marker))
+ }
+
+ fn flow_mapping_value(&mut self, empty: bool) -> ParseResult {
+ let mark: Marker = {
+ if empty {
+ let Token(mark, _) = *self.peek_token()?;
+ self.state = State::FlowMappingKey;
+ return Ok((Event::empty_scalar(), mark));
+ } else {
+ match *self.peek_token()? {
+ Token(marker, TokenType::Value) => {
+ self.skip();
+ match self.peek_token()?.1 {
+ TokenType::FlowEntry | TokenType::FlowMappingEnd => {}
+ _ => {
+ self.push_state(State::FlowMappingKey);
+ return self.parse_node(false, false);
+ }
+ }
+ marker
+ }
+ Token(marker, _) => marker,
+ }
+ }
+ };
+
+ self.state = State::FlowMappingKey;
+ Ok((Event::empty_scalar(), mark))
+ }
+
+ fn flow_sequence_entry(&mut self, first: bool) -> ParseResult {
+ // skip FlowMappingStart
+ if first {
+ let _ = self.peek_token()?;
+ //self.marks.push(tok.0);
+ self.skip();
+ }
+ match *self.peek_token()? {
+ Token(mark, TokenType::FlowSequenceEnd) => {
+ self.pop_state();
+ self.skip();
+ return Ok((Event::SequenceEnd, mark));
+ }
+ Token(_, TokenType::FlowEntry) if !first => {
+ self.skip();
+ }
+ Token(mark, _) if !first => {
+ return Err(ScanError::new(
+ mark,
+ "while parsing a flow sequence, expected ',' or ']'",
+ ));
+ }
+ _ => { /* next */ }
+ }
+ match *self.peek_token()? {
+ Token(mark, TokenType::FlowSequenceEnd) => {
+ self.pop_state();
+ self.skip();
+ Ok((Event::SequenceEnd, mark))
+ }
+ Token(mark, TokenType::Key) => {
+ self.state = State::FlowSequenceEntryMappingKey;
+ self.skip();
+ Ok((Event::MappingStart(0), mark))
+ }
+ _ => {
+ self.push_state(State::FlowSequenceEntry);
+ self.parse_node(false, false)
+ }
+ }
+ }
+
+ fn indentless_sequence_entry(&mut self) -> ParseResult {
+ match *self.peek_token()? {
+ Token(_, TokenType::BlockEntry) => (),
+ Token(mark, _) => {
+ self.pop_state();
+ return Ok((Event::SequenceEnd, mark));
+ }
+ }
+ self.skip();
+ match *self.peek_token()? {
+ Token(mark, TokenType::BlockEntry)
+ | Token(mark, TokenType::Key)
+ | Token(mark, TokenType::Value)
+ | Token(mark, TokenType::BlockEnd) => {
+ self.state = State::IndentlessSequenceEntry;
+ Ok((Event::empty_scalar(), mark))
+ }
+ _ => {
+ self.push_state(State::IndentlessSequenceEntry);
+ self.parse_node(true, false)
+ }
+ }
+ }
+
+ fn block_sequence_entry(&mut self, first: bool) -> ParseResult {
+ // BLOCK-SEQUENCE-START
+ if first {
+ let _ = self.peek_token()?;
+ //self.marks.push(tok.0);
+ self.skip();
+ }
+ match *self.peek_token()? {
+ Token(mark, TokenType::BlockEnd) => {
+ self.pop_state();
+ self.skip();
+ Ok((Event::SequenceEnd, mark))
+ }
+ Token(_, TokenType::BlockEntry) => {
+ self.skip();
+ match *self.peek_token()? {
+ Token(mark, TokenType::BlockEntry) | Token(mark, TokenType::BlockEnd) => {
+ self.state = State::BlockSequenceEntry;
+ Ok((Event::empty_scalar(), mark))
+ }
+ _ => {
+ self.push_state(State::BlockSequenceEntry);
+ self.parse_node(true, false)
+ }
+ }
+ }
+ Token(mark, _) => Err(ScanError::new(
+ mark,
+ "while parsing a block collection, did not find expected '-' indicator",
+ )),
+ }
+ }
+
+ fn flow_sequence_entry_mapping_key(&mut self) -> ParseResult {
+ match *self.peek_token()? {
+ Token(mark, TokenType::Value)
+ | Token(mark, TokenType::FlowEntry)
+ | Token(mark, TokenType::FlowSequenceEnd) => {
+ self.skip();
+ self.state = State::FlowSequenceEntryMappingValue;
+ Ok((Event::empty_scalar(), mark))
+ }
+ _ => {
+ self.push_state(State::FlowSequenceEntryMappingValue);
+ self.parse_node(false, false)
+ }
+ }
+ }
+
+ fn flow_sequence_entry_mapping_value(&mut self) -> ParseResult {
+ match *self.peek_token()? {
+ Token(_, TokenType::Value) => {
+ self.skip();
+ self.state = State::FlowSequenceEntryMappingValue;
+ match *self.peek_token()? {
+ Token(mark, TokenType::FlowEntry) | Token(mark, TokenType::FlowSequenceEnd) => {
+ self.state = State::FlowSequenceEntryMappingEnd;
+ Ok((Event::empty_scalar(), mark))
+ }
+ _ => {
+ self.push_state(State::FlowSequenceEntryMappingEnd);
+ self.parse_node(false, false)
+ }
+ }
+ }
+ Token(mark, _) => {
+ self.state = State::FlowSequenceEntryMappingEnd;
+ Ok((Event::empty_scalar(), mark))
+ }
+ }
+ }
+
+ fn flow_sequence_entry_mapping_end(&mut self) -> ParseResult {
+ self.state = State::FlowSequenceEntry;
+ Ok((Event::MappingEnd, self.scanner.mark()))
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use super::{Event, Parser};
+
+ #[test]
+ fn test_peek_eq_parse() {
+ let s = "
+a0 bb: val
+a1: &x
+ b1: 4
+ b2: d
+a2: 4
+a3: [1, 2, 3]
+a4:
+ - [a1, a2]
+ - 2
+a5: *x
+";
+ let mut p = Parser::new(s.chars());
+ while {
+ let event_peek = p.peek().unwrap().clone();
+ let event = p.next().unwrap();
+ assert_eq!(event, event_peek);
+ event.0 != Event::StreamEnd
+ } {}
+ }
+}
diff --git a/third_party/rust/yaml-rust/src/scanner.rs b/third_party/rust/yaml-rust/src/scanner.rs
new file mode 100644
index 0000000000..a8659a8522
--- /dev/null
+++ b/third_party/rust/yaml-rust/src/scanner.rs
@@ -0,0 +1,2182 @@
+use std::collections::VecDeque;
+use std::error::Error;
+use std::{char, fmt};
+
+#[derive(Clone, Copy, PartialEq, Debug, Eq)]
+pub enum TEncoding {
+ Utf8,
+}
+
+#[derive(Clone, Copy, PartialEq, Debug, Eq)]
+pub enum TScalarStyle {
+ Any,
+ Plain,
+ SingleQuoted,
+ DoubleQuoted,
+
+ Literal,
+ Foled,
+}
+
+#[derive(Clone, Copy, PartialEq, Debug, Eq)]
+pub struct Marker {
+ index: usize,
+ line: usize,
+ col: usize,
+}
+
+impl Marker {
+ fn new(index: usize, line: usize, col: usize) -> Marker {
+ Marker { index, line, col }
+ }
+
+ pub fn index(&self) -> usize {
+ self.index
+ }
+
+ pub fn line(&self) -> usize {
+ self.line
+ }
+
+ pub fn col(&self) -> usize {
+ self.col
+ }
+}
+
+#[derive(Clone, PartialEq, Debug, Eq)]
+pub struct ScanError {
+ mark: Marker,
+ info: String,
+}
+
+impl ScanError {
+ pub fn new(loc: Marker, info: &str) -> ScanError {
+ ScanError {
+ mark: loc,
+ info: info.to_owned(),
+ }
+ }
+
+ pub fn marker(&self) -> &Marker {
+ &self.mark
+ }
+}
+
+impl Error for ScanError {
+ fn description(&self) -> &str {
+ self.info.as_ref()
+ }
+
+ fn cause(&self) -> Option<&dyn Error> {
+ None
+ }
+}
+
+impl fmt::Display for ScanError {
+ // col starts from 0
+ fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
+ write!(
+ formatter,
+ "{} at line {} column {}",
+ self.info,
+ self.mark.line,
+ self.mark.col + 1
+ )
+ }
+}
+
+#[derive(Clone, PartialEq, Debug, Eq)]
+pub enum TokenType {
+ NoToken,
+ StreamStart(TEncoding),
+ StreamEnd,
+ /// major, minor
+ VersionDirective(u32, u32),
+ /// handle, prefix
+ TagDirective(String, String),
+ DocumentStart,
+ DocumentEnd,
+ BlockSequenceStart,
+ BlockMappingStart,
+ BlockEnd,
+ FlowSequenceStart,
+ FlowSequenceEnd,
+ FlowMappingStart,
+ FlowMappingEnd,
+ BlockEntry,
+ FlowEntry,
+ Key,
+ Value,
+ Alias(String),
+ Anchor(String),
+ /// handle, suffix
+ Tag(String, String),
+ Scalar(TScalarStyle, String),
+}
+
+#[derive(Clone, PartialEq, Debug, Eq)]
+pub struct Token(pub Marker, pub TokenType);
+
+#[derive(Clone, PartialEq, Debug, Eq)]
+struct SimpleKey {
+ possible: bool,
+ required: bool,
+ token_number: usize,
+ mark: Marker,
+}
+
+impl SimpleKey {
+ fn new(mark: Marker) -> SimpleKey {
+ SimpleKey {
+ possible: false,
+ required: false,
+ token_number: 0,
+ mark,
+ }
+ }
+}
+
+#[derive(Debug)]
+pub struct Scanner<T> {
+ rdr: T,
+ mark: Marker,
+ tokens: VecDeque<Token>,
+ buffer: VecDeque<char>,
+ error: Option<ScanError>,
+
+ stream_start_produced: bool,
+ stream_end_produced: bool,
+ adjacent_value_allowed_at: usize,
+ simple_key_allowed: bool,
+ simple_keys: Vec<SimpleKey>,
+ indent: isize,
+ indents: Vec<isize>,
+ flow_level: u8,
+ tokens_parsed: usize,
+ token_available: bool,
+}
+
+impl<T: Iterator<Item = char>> Iterator for Scanner<T> {
+ type Item = Token;
+ fn next(&mut self) -> Option<Token> {
+ if self.error.is_some() {
+ return None;
+ }
+ match self.next_token() {
+ Ok(tok) => tok,
+ Err(e) => {
+ self.error = Some(e);
+ None
+ }
+ }
+ }
+}
+
+#[inline]
+fn is_z(c: char) -> bool {
+ c == '\0'
+}
+#[inline]
+fn is_break(c: char) -> bool {
+ c == '\n' || c == '\r'
+}
+#[inline]
+fn is_breakz(c: char) -> bool {
+ is_break(c) || is_z(c)
+}
+#[inline]
+fn is_blank(c: char) -> bool {
+ c == ' ' || c == '\t'
+}
+#[inline]
+fn is_blankz(c: char) -> bool {
+ is_blank(c) || is_breakz(c)
+}
+#[inline]
+fn is_digit(c: char) -> bool {
+ c >= '0' && c <= '9'
+}
+#[inline]
+fn is_alpha(c: char) -> bool {
+ match c {
+ '0'..='9' | 'a'..='z' | 'A'..='Z' => true,
+ '_' | '-' => true,
+ _ => false,
+ }
+}
+#[inline]
+fn is_hex(c: char) -> bool {
+ (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')
+}
+#[inline]
+fn as_hex(c: char) -> u32 {
+ match c {
+ '0'..='9' => (c as u32) - ('0' as u32),
+ 'a'..='f' => (c as u32) - ('a' as u32) + 10,
+ 'A'..='F' => (c as u32) - ('A' as u32) + 10,
+ _ => unreachable!(),
+ }
+}
+#[inline]
+fn is_flow(c: char) -> bool {
+ match c {
+ ',' | '[' | ']' | '{' | '}' => true,
+ _ => false,
+ }
+}
+
+pub type ScanResult = Result<(), ScanError>;
+
+impl<T: Iterator<Item = char>> Scanner<T> {
+ /// Creates the YAML tokenizer.
+ pub fn new(rdr: T) -> Scanner<T> {
+ Scanner {
+ rdr,
+ buffer: VecDeque::new(),
+ mark: Marker::new(0, 1, 0),
+ tokens: VecDeque::new(),
+ error: None,
+
+ stream_start_produced: false,
+ stream_end_produced: false,
+ adjacent_value_allowed_at: 0,
+ simple_key_allowed: true,
+ simple_keys: Vec::new(),
+ indent: -1,
+ indents: Vec::new(),
+ flow_level: 0,
+ tokens_parsed: 0,
+ token_available: false,
+ }
+ }
+ #[inline]
+ pub fn get_error(&self) -> Option<ScanError> {
+ match self.error {
+ None => None,
+ Some(ref e) => Some(e.clone()),
+ }
+ }
+
+ #[inline]
+ fn lookahead(&mut self, count: usize) {
+ if self.buffer.len() >= count {
+ return;
+ }
+ for _ in 0..(count - self.buffer.len()) {
+ self.buffer.push_back(self.rdr.next().unwrap_or('\0'));
+ }
+ }
+ #[inline]
+ fn skip(&mut self) {
+ let c = self.buffer.pop_front().unwrap();
+
+ self.mark.index += 1;
+ if c == '\n' {
+ self.mark.line += 1;
+ self.mark.col = 0;
+ } else {
+ self.mark.col += 1;
+ }
+ }
+ #[inline]
+ fn skip_line(&mut self) {
+ if self.buffer[0] == '\r' && self.buffer[1] == '\n' {
+ self.skip();
+ self.skip();
+ } else if is_break(self.buffer[0]) {
+ self.skip();
+ }
+ }
+ #[inline]
+ fn ch(&self) -> char {
+ self.buffer[0]
+ }
+ #[inline]
+ fn ch_is(&self, c: char) -> bool {
+ self.buffer[0] == c
+ }
+ #[allow(dead_code)]
+ #[inline]
+ fn eof(&self) -> bool {
+ self.ch_is('\0')
+ }
+ #[inline]
+ pub fn stream_started(&self) -> bool {
+ self.stream_start_produced
+ }
+ #[inline]
+ pub fn stream_ended(&self) -> bool {
+ self.stream_end_produced
+ }
+ #[inline]
+ pub fn mark(&self) -> Marker {
+ self.mark
+ }
+ #[inline]
+ fn read_break(&mut self, s: &mut String) {
+ if self.buffer[0] == '\r' && self.buffer[1] == '\n' {
+ s.push('\n');
+ self.skip();
+ self.skip();
+ } else if self.buffer[0] == '\r' || self.buffer[0] == '\n' {
+ s.push('\n');
+ self.skip();
+ } else {
+ unreachable!();
+ }
+ }
+ fn insert_token(&mut self, pos: usize, tok: Token) {
+ let old_len = self.tokens.len();
+ assert!(pos <= old_len);
+ self.tokens.push_back(tok);
+ for i in 0..old_len - pos {
+ self.tokens.swap(old_len - i, old_len - i - 1);
+ }
+ }
+ fn allow_simple_key(&mut self) {
+ self.simple_key_allowed = true;
+ }
+ fn disallow_simple_key(&mut self) {
+ self.simple_key_allowed = false;
+ }
+
+ pub fn fetch_next_token(&mut self) -> ScanResult {
+ self.lookahead(1);
+ // println!("--> fetch_next_token Cur {:?} {:?}", self.mark, self.ch());
+
+ if !self.stream_start_produced {
+ self.fetch_stream_start();
+ return Ok(());
+ }
+ self.skip_to_next_token();
+
+ self.stale_simple_keys()?;
+
+ let mark = self.mark;
+ self.unroll_indent(mark.col as isize);
+
+ self.lookahead(4);
+
+ if is_z(self.ch()) {
+ self.fetch_stream_end()?;
+ return Ok(());
+ }
+
+ // Is it a directive?
+ if self.mark.col == 0 && self.ch_is('%') {
+ return self.fetch_directive();
+ }
+
+ if self.mark.col == 0
+ && self.buffer[0] == '-'
+ && self.buffer[1] == '-'
+ && self.buffer[2] == '-'
+ && is_blankz(self.buffer[3])
+ {
+ self.fetch_document_indicator(TokenType::DocumentStart)?;
+ return Ok(());
+ }
+
+ if self.mark.col == 0
+ && self.buffer[0] == '.'
+ && self.buffer[1] == '.'
+ && self.buffer[2] == '.'
+ && is_blankz(self.buffer[3])
+ {
+ self.fetch_document_indicator(TokenType::DocumentEnd)?;
+ return Ok(());
+ }
+
+ let c = self.buffer[0];
+ let nc = self.buffer[1];
+ match c {
+ '[' => self.fetch_flow_collection_start(TokenType::FlowSequenceStart),
+ '{' => self.fetch_flow_collection_start(TokenType::FlowMappingStart),
+ ']' => self.fetch_flow_collection_end(TokenType::FlowSequenceEnd),
+ '}' => self.fetch_flow_collection_end(TokenType::FlowMappingEnd),
+ ',' => self.fetch_flow_entry(),
+ '-' if is_blankz(nc) => self.fetch_block_entry(),
+ '?' if is_blankz(nc) => self.fetch_key(),
+ ':' if is_blankz(nc)
+ || (self.flow_level > 0
+ && (is_flow(nc) || self.mark.index == self.adjacent_value_allowed_at)) =>
+ {
+ self.fetch_value()
+ }
+ // Is it an alias?
+ '*' => self.fetch_anchor(true),
+ // Is it an anchor?
+ '&' => self.fetch_anchor(false),
+ '!' => self.fetch_tag(),
+ // Is it a literal scalar?
+ '|' if self.flow_level == 0 => self.fetch_block_scalar(true),
+ // Is it a folded scalar?
+ '>' if self.flow_level == 0 => self.fetch_block_scalar(false),
+ '\'' => self.fetch_flow_scalar(true),
+ '"' => self.fetch_flow_scalar(false),
+ // plain scalar
+ '-' if !is_blankz(nc) => self.fetch_plain_scalar(),
+ ':' | '?' if !is_blankz(nc) && self.flow_level == 0 => self.fetch_plain_scalar(),
+ '%' | '@' | '`' => Err(ScanError::new(
+ self.mark,
+ &format!("unexpected character: `{}'", c),
+ )),
+ _ => self.fetch_plain_scalar(),
+ }
+ }
+
+ pub fn next_token(&mut self) -> Result<Option<Token>, ScanError> {
+ if self.stream_end_produced {
+ return Ok(None);
+ }
+
+ if !self.token_available {
+ self.fetch_more_tokens()?;
+ }
+ let t = self.tokens.pop_front().unwrap();
+ self.token_available = false;
+ self.tokens_parsed += 1;
+
+ if let TokenType::StreamEnd = t.1 {
+ self.stream_end_produced = true;
+ }
+ Ok(Some(t))
+ }
+
+ pub fn fetch_more_tokens(&mut self) -> ScanResult {
+ let mut need_more;
+ loop {
+ need_more = false;
+ if self.tokens.is_empty() {
+ need_more = true;
+ } else {
+ self.stale_simple_keys()?;
+ for sk in &self.simple_keys {
+ if sk.possible && sk.token_number == self.tokens_parsed {
+ need_more = true;
+ break;
+ }
+ }
+ }
+
+ if !need_more {
+ break;
+ }
+ self.fetch_next_token()?;
+ }
+ self.token_available = true;
+
+ Ok(())
+ }
+
+ fn stale_simple_keys(&mut self) -> ScanResult {
+ for sk in &mut self.simple_keys {
+ if sk.possible
+ && (sk.mark.line < self.mark.line || sk.mark.index + 1024 < self.mark.index)
+ {
+ if sk.required {
+ return Err(ScanError::new(self.mark, "simple key expect ':'"));
+ }
+ sk.possible = false;
+ }
+ }
+ Ok(())
+ }
+
+ fn skip_to_next_token(&mut self) {
+ loop {
+ self.lookahead(1);
+ // TODO(chenyh) BOM
+ match self.ch() {
+ ' ' => self.skip(),
+ '\t' if self.flow_level > 0 || !self.simple_key_allowed => self.skip(),
+ '\n' | '\r' => {
+ self.lookahead(2);
+ self.skip_line();
+ if self.flow_level == 0 {
+ self.allow_simple_key();
+ }
+ }
+ '#' => {
+ while !is_breakz(self.ch()) {
+ self.skip();
+ self.lookahead(1);
+ }
+ }
+ _ => break,
+ }
+ }
+ }
+
+ fn fetch_stream_start(&mut self) {
+ let mark = self.mark;
+ self.indent = -1;
+ self.stream_start_produced = true;
+ self.allow_simple_key();
+ self.tokens
+ .push_back(Token(mark, TokenType::StreamStart(TEncoding::Utf8)));
+ self.simple_keys.push(SimpleKey::new(Marker::new(0, 0, 0)));
+ }
+
+ fn fetch_stream_end(&mut self) -> ScanResult {
+ // force new line
+ if self.mark.col != 0 {
+ self.mark.col = 0;
+ self.mark.line += 1;
+ }
+
+ self.unroll_indent(-1);
+ self.remove_simple_key()?;
+ self.disallow_simple_key();
+
+ self.tokens
+ .push_back(Token(self.mark, TokenType::StreamEnd));
+ Ok(())
+ }
+
+ fn fetch_directive(&mut self) -> ScanResult {
+ self.unroll_indent(-1);
+ self.remove_simple_key()?;
+
+ self.disallow_simple_key();
+
+ let tok = self.scan_directive()?;
+
+ self.tokens.push_back(tok);
+
+ Ok(())
+ }
+
+ fn scan_directive(&mut self) -> Result<Token, ScanError> {
+ let start_mark = self.mark;
+ self.skip();
+
+ let name = self.scan_directive_name()?;
+ let tok = match name.as_ref() {
+ "YAML" => self.scan_version_directive_value(&start_mark)?,
+ "TAG" => self.scan_tag_directive_value(&start_mark)?,
+ // XXX This should be a warning instead of an error
+ _ => {
+ // skip current line
+ self.lookahead(1);
+ while !is_breakz(self.ch()) {
+ self.skip();
+ self.lookahead(1);
+ }
+ // XXX return an empty TagDirective token
+ Token(
+ start_mark,
+ TokenType::TagDirective(String::new(), String::new()),
+ )
+ // return Err(ScanError::new(start_mark,
+ // "while scanning a directive, found unknown directive name"))
+ }
+ };
+ self.lookahead(1);
+
+ while is_blank(self.ch()) {
+ self.skip();
+ self.lookahead(1);
+ }
+
+ if self.ch() == '#' {
+ while !is_breakz(self.ch()) {
+ self.skip();
+ self.lookahead(1);
+ }
+ }
+
+ if !is_breakz(self.ch()) {
+ return Err(ScanError::new(
+ start_mark,
+ "while scanning a directive, did not find expected comment or line break",
+ ));
+ }
+
+ // Eat a line break
+ if is_break(self.ch()) {
+ self.lookahead(2);
+ self.skip_line();
+ }
+
+ Ok(tok)
+ }
+
+ fn scan_version_directive_value(&mut self, mark: &Marker) -> Result<Token, ScanError> {
+ self.lookahead(1);
+
+ while is_blank(self.ch()) {
+ self.skip();
+ self.lookahead(1);
+ }
+
+ let major = self.scan_version_directive_number(mark)?;
+
+ if self.ch() != '.' {
+ return Err(ScanError::new(
+ *mark,
+ "while scanning a YAML directive, did not find expected digit or '.' character",
+ ));
+ }
+
+ self.skip();
+
+ let minor = self.scan_version_directive_number(mark)?;
+
+ Ok(Token(*mark, TokenType::VersionDirective(major, minor)))
+ }
+
+ fn scan_directive_name(&mut self) -> Result<String, ScanError> {
+ let start_mark = self.mark;
+ let mut string = String::new();
+ self.lookahead(1);
+ while is_alpha(self.ch()) {
+ string.push(self.ch());
+ self.skip();
+ self.lookahead(1);
+ }
+
+ if string.is_empty() {
+ return Err(ScanError::new(
+ start_mark,
+ "while scanning a directive, could not find expected directive name",
+ ));
+ }
+
+ if !is_blankz(self.ch()) {
+ return Err(ScanError::new(
+ start_mark,
+ "while scanning a directive, found unexpected non-alphabetical character",
+ ));
+ }
+
+ Ok(string)
+ }
+
+ fn scan_version_directive_number(&mut self, mark: &Marker) -> Result<u32, ScanError> {
+ let mut val = 0u32;
+ let mut length = 0usize;
+ self.lookahead(1);
+ while is_digit(self.ch()) {
+ if length + 1 > 9 {
+ return Err(ScanError::new(
+ *mark,
+ "while scanning a YAML directive, found extremely long version number",
+ ));
+ }
+ length += 1;
+ val = val * 10 + ((self.ch() as u32) - ('0' as u32));
+ self.skip();
+ self.lookahead(1);
+ }
+
+ if length == 0 {
+ return Err(ScanError::new(
+ *mark,
+ "while scanning a YAML directive, did not find expected version number",
+ ));
+ }
+
+ Ok(val)
+ }
+
+ fn scan_tag_directive_value(&mut self, mark: &Marker) -> Result<Token, ScanError> {
+ self.lookahead(1);
+ /* Eat whitespaces. */
+ while is_blank(self.ch()) {
+ self.skip();
+ self.lookahead(1);
+ }
+ let handle = self.scan_tag_handle(true, mark)?;
+
+ self.lookahead(1);
+ /* Eat whitespaces. */
+ while is_blank(self.ch()) {
+ self.skip();
+ self.lookahead(1);
+ }
+
+ let is_secondary = handle == "!!";
+ let prefix = self.scan_tag_uri(true, is_secondary, &String::new(), mark)?;
+
+ self.lookahead(1);
+
+ if is_blankz(self.ch()) {
+ Ok(Token(*mark, TokenType::TagDirective(handle, prefix)))
+ } else {
+ Err(ScanError::new(
+ *mark,
+ "while scanning TAG, did not find expected whitespace or line break",
+ ))
+ }
+ }
+
+ fn fetch_tag(&mut self) -> ScanResult {
+ self.save_simple_key()?;
+ self.disallow_simple_key();
+
+ let tok = self.scan_tag()?;
+ self.tokens.push_back(tok);
+ Ok(())
+ }
+
+ fn scan_tag(&mut self) -> Result<Token, ScanError> {
+ let start_mark = self.mark;
+ let mut handle = String::new();
+ let mut suffix;
+ let mut secondary = false;
+
+ // Check if the tag is in the canonical form (verbatim).
+ self.lookahead(2);
+
+ if self.buffer[1] == '<' {
+ // Eat '!<'
+ self.skip();
+ self.skip();
+ suffix = self.scan_tag_uri(false, false, &String::new(), &start_mark)?;
+
+ if self.ch() != '>' {
+ return Err(ScanError::new(
+ start_mark,
+ "while scanning a tag, did not find the expected '>'",
+ ));
+ }
+
+ self.skip();
+ } else {
+ // The tag has either the '!suffix' or the '!handle!suffix'
+ handle = self.scan_tag_handle(false, &start_mark)?;
+ // Check if it is, indeed, handle.
+ if handle.len() >= 2 && handle.starts_with('!') && handle.ends_with('!') {
+ if handle == "!!" {
+ secondary = true;
+ }
+ suffix = self.scan_tag_uri(false, secondary, &String::new(), &start_mark)?;
+ } else {
+ suffix = self.scan_tag_uri(false, false, &handle, &start_mark)?;
+ handle = "!".to_owned();
+ // A special case: the '!' tag. Set the handle to '' and the
+ // suffix to '!'.
+ if suffix.is_empty() {
+ handle.clear();
+ suffix = "!".to_owned();
+ }
+ }
+ }
+
+ self.lookahead(1);
+ if is_blankz(self.ch()) {
+ // XXX: ex 7.2, an empty scalar can follow a secondary tag
+ Ok(Token(start_mark, TokenType::Tag(handle, suffix)))
+ } else {
+ Err(ScanError::new(
+ start_mark,
+ "while scanning a tag, did not find expected whitespace or line break",
+ ))
+ }
+ }
+
+ fn scan_tag_handle(&mut self, directive: bool, mark: &Marker) -> Result<String, ScanError> {
+ let mut string = String::new();
+ self.lookahead(1);
+ if self.ch() != '!' {
+ return Err(ScanError::new(
+ *mark,
+ "while scanning a tag, did not find expected '!'",
+ ));
+ }
+
+ string.push(self.ch());
+ self.skip();
+
+ self.lookahead(1);
+ while is_alpha(self.ch()) {
+ string.push(self.ch());
+ self.skip();
+ self.lookahead(1);
+ }
+
+ // Check if the trailing character is '!' and copy it.
+ if self.ch() == '!' {
+ string.push(self.ch());
+ self.skip();
+ } else if directive && string != "!" {
+ // It's either the '!' tag or not really a tag handle. If it's a %TAG
+ // directive, it's an error. If it's a tag token, it must be a part of
+ // URI.
+ return Err(ScanError::new(
+ *mark,
+ "while parsing a tag directive, did not find expected '!'",
+ ));
+ }
+ Ok(string)
+ }
+
+ fn scan_tag_uri(
+ &mut self,
+ directive: bool,
+ _is_secondary: bool,
+ head: &str,
+ mark: &Marker,
+ ) -> Result<String, ScanError> {
+ let mut length = head.len();
+ let mut string = String::new();
+
+ // Copy the head if needed.
+ // Note that we don't copy the leading '!' character.
+ if length > 1 {
+ string.extend(head.chars().skip(1));
+ }
+
+ self.lookahead(1);
+ /*
+ * The set of characters that may appear in URI is as follows:
+ *
+ * '0'-'9', 'A'-'Z', 'a'-'z', '_', '-', ';', '/', '?', ':', '@', '&',
+ * '=', '+', '$', ',', '.', '!', '~', '*', '\'', '(', ')', '[', ']',
+ * '%'.
+ */
+ while match self.ch() {
+ ';' | '/' | '?' | ':' | '@' | '&' => true,
+ '=' | '+' | '$' | ',' | '.' | '!' | '~' | '*' | '\'' | '(' | ')' | '[' | ']' => true,
+ '%' => true,
+ c if is_alpha(c) => true,
+ _ => false,
+ } {
+ // Check if it is a URI-escape sequence.
+ if self.ch() == '%' {
+ string.push(self.scan_uri_escapes(directive, mark)?);
+ } else {
+ string.push(self.ch());
+ self.skip();
+ }
+
+ length += 1;
+ self.lookahead(1);
+ }
+
+ if length == 0 {
+ return Err(ScanError::new(
+ *mark,
+ "while parsing a tag, did not find expected tag URI",
+ ));
+ }
+
+ Ok(string)
+ }
+
+ fn scan_uri_escapes(&mut self, _directive: bool, mark: &Marker) -> Result<char, ScanError> {
+ let mut width = 0usize;
+ let mut code = 0u32;
+ loop {
+ self.lookahead(3);
+
+ if !(self.ch() == '%' && is_hex(self.buffer[1]) && is_hex(self.buffer[2])) {
+ return Err(ScanError::new(
+ *mark,
+ "while parsing a tag, did not find URI escaped octet",
+ ));
+ }
+
+ let octet = (as_hex(self.buffer[1]) << 4) + as_hex(self.buffer[2]);
+ if width == 0 {
+ width = match octet {
+ _ if octet & 0x80 == 0x00 => 1,
+ _ if octet & 0xE0 == 0xC0 => 2,
+ _ if octet & 0xF0 == 0xE0 => 3,
+ _ if octet & 0xF8 == 0xF0 => 4,
+ _ => {
+ return Err(ScanError::new(
+ *mark,
+ "while parsing a tag, found an incorrect leading UTF-8 octet",
+ ));
+ }
+ };
+ code = octet;
+ } else {
+ if octet & 0xc0 != 0x80 {
+ return Err(ScanError::new(
+ *mark,
+ "while parsing a tag, found an incorrect trailing UTF-8 octet",
+ ));
+ }
+ code = (code << 8) + octet;
+ }
+
+ self.skip();
+ self.skip();
+ self.skip();
+
+ width -= 1;
+ if width == 0 {
+ break;
+ }
+ }
+
+ match char::from_u32(code) {
+ Some(ch) => Ok(ch),
+ None => Err(ScanError::new(
+ *mark,
+ "while parsing a tag, found an invalid UTF-8 codepoint",
+ )),
+ }
+ }
+
+ fn fetch_anchor(&mut self, alias: bool) -> ScanResult {
+ self.save_simple_key()?;
+ self.disallow_simple_key();
+
+ let tok = self.scan_anchor(alias)?;
+
+ self.tokens.push_back(tok);
+
+ Ok(())
+ }
+
+ fn scan_anchor(&mut self, alias: bool) -> Result<Token, ScanError> {
+ let mut string = String::new();
+ let start_mark = self.mark;
+
+ self.skip();
+ self.lookahead(1);
+ while is_alpha(self.ch()) {
+ string.push(self.ch());
+ self.skip();
+ self.lookahead(1);
+ }
+
+ if string.is_empty()
+ || match self.ch() {
+ c if is_blankz(c) => false,
+ '?' | ':' | ',' | ']' | '}' | '%' | '@' | '`' => false,
+ _ => true,
+ }
+ {
+ return Err(ScanError::new(start_mark, "while scanning an anchor or alias, did not find expected alphabetic or numeric character"));
+ }
+
+ if alias {
+ Ok(Token(start_mark, TokenType::Alias(string)))
+ } else {
+ Ok(Token(start_mark, TokenType::Anchor(string)))
+ }
+ }
+
+ fn fetch_flow_collection_start(&mut self, tok: TokenType) -> ScanResult {
+ // The indicators '[' and '{' may start a simple key.
+ self.save_simple_key()?;
+
+ self.increase_flow_level()?;
+
+ self.allow_simple_key();
+
+ let start_mark = self.mark;
+ self.skip();
+
+ self.tokens.push_back(Token(start_mark, tok));
+ Ok(())
+ }
+
+ fn fetch_flow_collection_end(&mut self, tok: TokenType) -> ScanResult {
+ self.remove_simple_key()?;
+ self.decrease_flow_level();
+
+ self.disallow_simple_key();
+
+ let start_mark = self.mark;
+ self.skip();
+
+ self.tokens.push_back(Token(start_mark, tok));
+ Ok(())
+ }
+
+ fn fetch_flow_entry(&mut self) -> ScanResult {
+ self.remove_simple_key()?;
+ self.allow_simple_key();
+
+ let start_mark = self.mark;
+ self.skip();
+
+ self.tokens
+ .push_back(Token(start_mark, TokenType::FlowEntry));
+ Ok(())
+ }
+
+ fn increase_flow_level(&mut self) -> ScanResult {
+ self.simple_keys.push(SimpleKey::new(Marker::new(0, 0, 0)));
+ self.flow_level = self
+ .flow_level
+ .checked_add(1)
+ .ok_or_else(|| ScanError::new(self.mark, "recursion limit exceeded"))?;
+ Ok(())
+ }
+ fn decrease_flow_level(&mut self) {
+ if self.flow_level > 0 {
+ self.flow_level -= 1;
+ self.simple_keys.pop().unwrap();
+ }
+ }
+
+ fn fetch_block_entry(&mut self) -> ScanResult {
+ if self.flow_level == 0 {
+ // Check if we are allowed to start a new entry.
+ if !self.simple_key_allowed {
+ return Err(ScanError::new(
+ self.mark,
+ "block sequence entries are not allowed in this context",
+ ));
+ }
+
+ let mark = self.mark;
+ // generate BLOCK-SEQUENCE-START if indented
+ self.roll_indent(mark.col, None, TokenType::BlockSequenceStart, mark);
+ } else {
+ // - * only allowed in block
+ return Err(ScanError::new(
+ self.mark,
+ r#""-" is only valid inside a block"#,
+ ));
+ }
+ self.remove_simple_key()?;
+ self.allow_simple_key();
+
+ let start_mark = self.mark;
+ self.skip();
+
+ self.tokens
+ .push_back(Token(start_mark, TokenType::BlockEntry));
+ Ok(())
+ }
+
+ fn fetch_document_indicator(&mut self, t: TokenType) -> ScanResult {
+ self.unroll_indent(-1);
+ self.remove_simple_key()?;
+ self.disallow_simple_key();
+
+ let mark = self.mark;
+
+ self.skip();
+ self.skip();
+ self.skip();
+
+ self.tokens.push_back(Token(mark, t));
+ Ok(())
+ }
+
+ fn fetch_block_scalar(&mut self, literal: bool) -> ScanResult {
+ self.save_simple_key()?;
+ self.allow_simple_key();
+ let tok = self.scan_block_scalar(literal)?;
+
+ self.tokens.push_back(tok);
+ Ok(())
+ }
+
+ fn scan_block_scalar(&mut self, literal: bool) -> Result<Token, ScanError> {
+ let start_mark = self.mark;
+ let mut chomping: i32 = 0;
+ let mut increment: usize = 0;
+ let mut indent: usize = 0;
+ let mut trailing_blank: bool;
+ let mut leading_blank: bool = false;
+
+ let mut string = String::new();
+ let mut leading_break = String::new();
+ let mut trailing_breaks = String::new();
+
+ // skip '|' or '>'
+ self.skip();
+ self.lookahead(1);
+
+ if self.ch() == '+' || self.ch() == '-' {
+ if self.ch() == '+' {
+ chomping = 1;
+ } else {
+ chomping = -1;
+ }
+ self.skip();
+ self.lookahead(1);
+ if is_digit(self.ch()) {
+ if self.ch() == '0' {
+ return Err(ScanError::new(
+ start_mark,
+ "while scanning a block scalar, found an indentation indicator equal to 0",
+ ));
+ }
+ increment = (self.ch() as usize) - ('0' as usize);
+ self.skip();
+ }
+ } else if is_digit(self.ch()) {
+ if self.ch() == '0' {
+ return Err(ScanError::new(
+ start_mark,
+ "while scanning a block scalar, found an indentation indicator equal to 0",
+ ));
+ }
+
+ increment = (self.ch() as usize) - ('0' as usize);
+ self.skip();
+ self.lookahead(1);
+ if self.ch() == '+' || self.ch() == '-' {
+ if self.ch() == '+' {
+ chomping = 1;
+ } else {
+ chomping = -1;
+ }
+ self.skip();
+ }
+ }
+
+ // Eat whitespaces and comments to the end of the line.
+ self.lookahead(1);
+
+ while is_blank(self.ch()) {
+ self.skip();
+ self.lookahead(1);
+ }
+
+ if self.ch() == '#' {
+ while !is_breakz(self.ch()) {
+ self.skip();
+ self.lookahead(1);
+ }
+ }
+
+ // Check if we are at the end of the line.
+ if !is_breakz(self.ch()) {
+ return Err(ScanError::new(
+ start_mark,
+ "while scanning a block scalar, did not find expected comment or line break",
+ ));
+ }
+
+ if is_break(self.ch()) {
+ self.lookahead(2);
+ self.skip_line();
+ }
+
+ if increment > 0 {
+ indent = if self.indent >= 0 {
+ (self.indent + increment as isize) as usize
+ } else {
+ increment
+ }
+ }
+ // Scan the leading line breaks and determine the indentation level if needed.
+ self.block_scalar_breaks(&mut indent, &mut trailing_breaks)?;
+
+ self.lookahead(1);
+
+ let start_mark = self.mark;
+
+ while self.mark.col == indent && !is_z(self.ch()) {
+ // We are at the beginning of a non-empty line.
+ trailing_blank = is_blank(self.ch());
+ if !literal && !leading_break.is_empty() && !leading_blank && !trailing_blank {
+ if trailing_breaks.is_empty() {
+ string.push(' ');
+ }
+ leading_break.clear();
+ } else {
+ string.push_str(&leading_break);
+ leading_break.clear();
+ }
+
+ string.push_str(&trailing_breaks);
+ trailing_breaks.clear();
+
+ leading_blank = is_blank(self.ch());
+
+ while !is_breakz(self.ch()) {
+ string.push(self.ch());
+ self.skip();
+ self.lookahead(1);
+ }
+ // break on EOF
+ if is_z(self.ch()) {
+ break;
+ }
+
+ self.lookahead(2);
+ self.read_break(&mut leading_break);
+
+ // Eat the following indentation spaces and line breaks.
+ self.block_scalar_breaks(&mut indent, &mut trailing_breaks)?;
+ }
+
+ // Chomp the tail.
+ if chomping != -1 {
+ string.push_str(&leading_break);
+ }
+
+ if chomping == 1 {
+ string.push_str(&trailing_breaks);
+ }
+
+ if literal {
+ Ok(Token(
+ start_mark,
+ TokenType::Scalar(TScalarStyle::Literal, string),
+ ))
+ } else {
+ Ok(Token(
+ start_mark,
+ TokenType::Scalar(TScalarStyle::Foled, string),
+ ))
+ }
+ }
+
+ fn block_scalar_breaks(&mut self, indent: &mut usize, breaks: &mut String) -> ScanResult {
+ let mut max_indent = 0;
+ loop {
+ self.lookahead(1);
+ while (*indent == 0 || self.mark.col < *indent) && self.buffer[0] == ' ' {
+ self.skip();
+ self.lookahead(1);
+ }
+
+ if self.mark.col > max_indent {
+ max_indent = self.mark.col;
+ }
+
+ // Check for a tab character messing the indentation.
+ if (*indent == 0 || self.mark.col < *indent) && self.buffer[0] == '\t' {
+ return Err(ScanError::new(self.mark,
+ "while scanning a block scalar, found a tab character where an indentation space is expected"));
+ }
+
+ if !is_break(self.ch()) {
+ break;
+ }
+
+ self.lookahead(2);
+ // Consume the line break.
+ self.read_break(breaks);
+ }
+
+ if *indent == 0 {
+ *indent = max_indent;
+ if *indent < (self.indent + 1) as usize {
+ *indent = (self.indent + 1) as usize;
+ }
+ if *indent < 1 {
+ *indent = 1;
+ }
+ }
+ Ok(())
+ }
+
+ fn fetch_flow_scalar(&mut self, single: bool) -> ScanResult {
+ self.save_simple_key()?;
+ self.disallow_simple_key();
+
+ let tok = self.scan_flow_scalar(single)?;
+
+ // From spec: To ensure JSON compatibility, if a key inside a flow mapping is JSON-like,
+ // YAML allows the following value to be specified adjacent to the “:”.
+ self.adjacent_value_allowed_at = self.mark.index;
+
+ self.tokens.push_back(tok);
+ Ok(())
+ }
+
+ fn scan_flow_scalar(&mut self, single: bool) -> Result<Token, ScanError> {
+ let start_mark = self.mark;
+
+ let mut string = String::new();
+ let mut leading_break = String::new();
+ let mut trailing_breaks = String::new();
+ let mut whitespaces = String::new();
+ let mut leading_blanks;
+
+ /* Eat the left quote. */
+ self.skip();
+
+ loop {
+ /* Check for a document indicator. */
+ self.lookahead(4);
+
+ if self.mark.col == 0
+ && (((self.buffer[0] == '-') && (self.buffer[1] == '-') && (self.buffer[2] == '-'))
+ || ((self.buffer[0] == '.')
+ && (self.buffer[1] == '.')
+ && (self.buffer[2] == '.')))
+ && is_blankz(self.buffer[3])
+ {
+ return Err(ScanError::new(
+ start_mark,
+ "while scanning a quoted scalar, found unexpected document indicator",
+ ));
+ }
+
+ if is_z(self.ch()) {
+ return Err(ScanError::new(
+ start_mark,
+ "while scanning a quoted scalar, found unexpected end of stream",
+ ));
+ }
+
+ self.lookahead(2);
+
+ leading_blanks = false;
+ // Consume non-blank characters.
+
+ while !is_blankz(self.ch()) {
+ match self.ch() {
+ // Check for an escaped single quote.
+ '\'' if self.buffer[1] == '\'' && single => {
+ string.push('\'');
+ self.skip();
+ self.skip();
+ }
+ // Check for the right quote.
+ '\'' if single => break,
+ '"' if !single => break,
+ // Check for an escaped line break.
+ '\\' if !single && is_break(self.buffer[1]) => {
+ self.lookahead(3);
+ self.skip();
+ self.skip_line();
+ leading_blanks = true;
+ break;
+ }
+ // Check for an escape sequence.
+ '\\' if !single => {
+ let mut code_length = 0usize;
+ match self.buffer[1] {
+ '0' => string.push('\0'),
+ 'a' => string.push('\x07'),
+ 'b' => string.push('\x08'),
+ 't' | '\t' => string.push('\t'),
+ 'n' => string.push('\n'),
+ 'v' => string.push('\x0b'),
+ 'f' => string.push('\x0c'),
+ 'r' => string.push('\x0d'),
+ 'e' => string.push('\x1b'),
+ ' ' => string.push('\x20'),
+ '"' => string.push('"'),
+ '\'' => string.push('\''),
+ '\\' => string.push('\\'),
+ // NEL (#x85)
+ 'N' => string.push(char::from_u32(0x85).unwrap()),
+ // #xA0
+ '_' => string.push(char::from_u32(0xA0).unwrap()),
+ // LS (#x2028)
+ 'L' => string.push(char::from_u32(0x2028).unwrap()),
+ // PS (#x2029)
+ 'P' => string.push(char::from_u32(0x2029).unwrap()),
+ 'x' => code_length = 2,
+ 'u' => code_length = 4,
+ 'U' => code_length = 8,
+ _ => {
+ return Err(ScanError::new(
+ start_mark,
+ "while parsing a quoted scalar, found unknown escape character",
+ ))
+ }
+ }
+ self.skip();
+ self.skip();
+ // Consume an arbitrary escape code.
+ if code_length > 0 {
+ self.lookahead(code_length);
+ let mut value = 0u32;
+ for i in 0..code_length {
+ if !is_hex(self.buffer[i]) {
+ return Err(ScanError::new(start_mark,
+ "while parsing a quoted scalar, did not find expected hexadecimal number"));
+ }
+ value = (value << 4) + as_hex(self.buffer[i]);
+ }
+
+ let ch = match char::from_u32(value) {
+ Some(v) => v,
+ None => {
+ return Err(ScanError::new(start_mark,
+ "while parsing a quoted scalar, found invalid Unicode character escape code"));
+ }
+ };
+ string.push(ch);
+
+ for _ in 0..code_length {
+ self.skip();
+ }
+ }
+ }
+ c => {
+ string.push(c);
+ self.skip();
+ }
+ }
+ self.lookahead(2);
+ }
+ self.lookahead(1);
+ match self.ch() {
+ '\'' if single => break,
+ '"' if !single => break,
+ _ => {}
+ }
+
+ // Consume blank characters.
+ while is_blank(self.ch()) || is_break(self.ch()) {
+ if is_blank(self.ch()) {
+ // Consume a space or a tab character.
+ if leading_blanks {
+ self.skip();
+ } else {
+ whitespaces.push(self.ch());
+ self.skip();
+ }
+ } else {
+ self.lookahead(2);
+ // Check if it is a first line break.
+ if leading_blanks {
+ self.read_break(&mut trailing_breaks);
+ } else {
+ whitespaces.clear();
+ self.read_break(&mut leading_break);
+ leading_blanks = true;
+ }
+ }
+ self.lookahead(1);
+ }
+ // Join the whitespaces or fold line breaks.
+ if leading_blanks {
+ if leading_break.is_empty() {
+ string.push_str(&leading_break);
+ string.push_str(&trailing_breaks);
+ trailing_breaks.clear();
+ leading_break.clear();
+ } else {
+ if trailing_breaks.is_empty() {
+ string.push(' ');
+ } else {
+ string.push_str(&trailing_breaks);
+ trailing_breaks.clear();
+ }
+ leading_break.clear();
+ }
+ } else {
+ string.push_str(&whitespaces);
+ whitespaces.clear();
+ }
+ } // loop
+
+ // Eat the right quote.
+ self.skip();
+
+ if single {
+ Ok(Token(
+ start_mark,
+ TokenType::Scalar(TScalarStyle::SingleQuoted, string),
+ ))
+ } else {
+ Ok(Token(
+ start_mark,
+ TokenType::Scalar(TScalarStyle::DoubleQuoted, string),
+ ))
+ }
+ }
+
+ fn fetch_plain_scalar(&mut self) -> ScanResult {
+ self.save_simple_key()?;
+ self.disallow_simple_key();
+
+ let tok = self.scan_plain_scalar()?;
+
+ self.tokens.push_back(tok);
+ Ok(())
+ }
+
+ fn scan_plain_scalar(&mut self) -> Result<Token, ScanError> {
+ let indent = self.indent + 1;
+ let start_mark = self.mark;
+
+ let mut string = String::new();
+ let mut leading_break = String::new();
+ let mut trailing_breaks = String::new();
+ let mut whitespaces = String::new();
+ let mut leading_blanks = false;
+
+ loop {
+ /* Check for a document indicator. */
+ self.lookahead(4);
+
+ if self.mark.col == 0
+ && (((self.buffer[0] == '-') && (self.buffer[1] == '-') && (self.buffer[2] == '-'))
+ || ((self.buffer[0] == '.')
+ && (self.buffer[1] == '.')
+ && (self.buffer[2] == '.')))
+ && is_blankz(self.buffer[3])
+ {
+ break;
+ }
+
+ if self.ch() == '#' {
+ break;
+ }
+ while !is_blankz(self.ch()) {
+ // indicators can end a plain scalar, see 7.3.3. Plain Style
+ match self.ch() {
+ ':' if is_blankz(self.buffer[1])
+ || (self.flow_level > 0 && is_flow(self.buffer[1])) =>
+ {
+ break;
+ }
+ ',' | '[' | ']' | '{' | '}' if self.flow_level > 0 => break,
+ _ => {}
+ }
+
+ if leading_blanks || !whitespaces.is_empty() {
+ if leading_blanks {
+ if leading_break.is_empty() {
+ string.push_str(&leading_break);
+ string.push_str(&trailing_breaks);
+ trailing_breaks.clear();
+ leading_break.clear();
+ } else {
+ if trailing_breaks.is_empty() {
+ string.push(' ');
+ } else {
+ string.push_str(&trailing_breaks);
+ trailing_breaks.clear();
+ }
+ leading_break.clear();
+ }
+ leading_blanks = false;
+ } else {
+ string.push_str(&whitespaces);
+ whitespaces.clear();
+ }
+ }
+
+ string.push(self.ch());
+ self.skip();
+ self.lookahead(2);
+ }
+ // is the end?
+ if !(is_blank(self.ch()) || is_break(self.ch())) {
+ break;
+ }
+ self.lookahead(1);
+
+ while is_blank(self.ch()) || is_break(self.ch()) {
+ if is_blank(self.ch()) {
+ if leading_blanks && (self.mark.col as isize) < indent && self.ch() == '\t' {
+ return Err(ScanError::new(
+ start_mark,
+ "while scanning a plain scalar, found a tab",
+ ));
+ }
+
+ if leading_blanks {
+ self.skip();
+ } else {
+ whitespaces.push(self.ch());
+ self.skip();
+ }
+ } else {
+ self.lookahead(2);
+ // Check if it is a first line break
+ if leading_blanks {
+ self.read_break(&mut trailing_breaks);
+ } else {
+ whitespaces.clear();
+ self.read_break(&mut leading_break);
+ leading_blanks = true;
+ }
+ }
+ self.lookahead(1);
+ }
+
+ // check indentation level
+ if self.flow_level == 0 && (self.mark.col as isize) < indent {
+ break;
+ }
+ }
+
+ if leading_blanks {
+ self.allow_simple_key();
+ }
+
+ Ok(Token(
+ start_mark,
+ TokenType::Scalar(TScalarStyle::Plain, string),
+ ))
+ }
+
+ fn fetch_key(&mut self) -> ScanResult {
+ let start_mark = self.mark;
+ if self.flow_level == 0 {
+ // Check if we are allowed to start a new key (not necessarily simple).
+ if !self.simple_key_allowed {
+ return Err(ScanError::new(
+ self.mark,
+ "mapping keys are not allowed in this context",
+ ));
+ }
+ self.roll_indent(
+ start_mark.col,
+ None,
+ TokenType::BlockMappingStart,
+ start_mark,
+ );
+ }
+
+ self.remove_simple_key()?;
+
+ if self.flow_level == 0 {
+ self.allow_simple_key();
+ } else {
+ self.disallow_simple_key();
+ }
+
+ self.skip();
+ self.tokens.push_back(Token(start_mark, TokenType::Key));
+ Ok(())
+ }
+
+ fn fetch_value(&mut self) -> ScanResult {
+ let sk = self.simple_keys.last().unwrap().clone();
+ let start_mark = self.mark;
+ if sk.possible {
+ // insert simple key
+ let tok = Token(sk.mark, TokenType::Key);
+ let tokens_parsed = self.tokens_parsed;
+ self.insert_token(sk.token_number - tokens_parsed, tok);
+
+ // Add the BLOCK-MAPPING-START token if needed.
+ self.roll_indent(
+ sk.mark.col,
+ Some(sk.token_number),
+ TokenType::BlockMappingStart,
+ start_mark,
+ );
+
+ self.simple_keys.last_mut().unwrap().possible = false;
+ self.disallow_simple_key();
+ } else {
+ // The ':' indicator follows a complex key.
+ if self.flow_level == 0 {
+ if !self.simple_key_allowed {
+ return Err(ScanError::new(
+ start_mark,
+ "mapping values are not allowed in this context",
+ ));
+ }
+
+ self.roll_indent(
+ start_mark.col,
+ None,
+ TokenType::BlockMappingStart,
+ start_mark,
+ );
+ }
+
+ if self.flow_level == 0 {
+ self.allow_simple_key();
+ } else {
+ self.disallow_simple_key();
+ }
+ }
+ self.skip();
+ self.tokens.push_back(Token(start_mark, TokenType::Value));
+
+ Ok(())
+ }
+
+ fn roll_indent(&mut self, col: usize, number: Option<usize>, tok: TokenType, mark: Marker) {
+ if self.flow_level > 0 {
+ return;
+ }
+
+ if self.indent < col as isize {
+ self.indents.push(self.indent);
+ self.indent = col as isize;
+ let tokens_parsed = self.tokens_parsed;
+ match number {
+ Some(n) => self.insert_token(n - tokens_parsed, Token(mark, tok)),
+ None => self.tokens.push_back(Token(mark, tok)),
+ }
+ }
+ }
+
+ fn unroll_indent(&mut self, col: isize) {
+ if self.flow_level > 0 {
+ return;
+ }
+ while self.indent > col {
+ self.tokens.push_back(Token(self.mark, TokenType::BlockEnd));
+ self.indent = self.indents.pop().unwrap();
+ }
+ }
+
+ fn save_simple_key(&mut self) -> Result<(), ScanError> {
+ let required = self.flow_level > 0 && self.indent == (self.mark.col as isize);
+ if self.simple_key_allowed {
+ let mut sk = SimpleKey::new(self.mark);
+ sk.possible = true;
+ sk.required = required;
+ sk.token_number = self.tokens_parsed + self.tokens.len();
+
+ self.remove_simple_key()?;
+
+ self.simple_keys.pop();
+ self.simple_keys.push(sk);
+ }
+ Ok(())
+ }
+
+ fn remove_simple_key(&mut self) -> ScanResult {
+ let last = self.simple_keys.last_mut().unwrap();
+ if last.possible && last.required {
+ return Err(ScanError::new(self.mark, "simple key expected"));
+ }
+
+ last.possible = false;
+ Ok(())
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use super::TokenType::*;
+ use super::*;
+
+ macro_rules! next {
+ ($p:ident, $tk:pat) => {{
+ let tok = $p.next().unwrap();
+ match tok.1 {
+ $tk => {}
+ _ => panic!("unexpected token: {:?}", tok),
+ }
+ }};
+ }
+
+ macro_rules! next_scalar {
+ ($p:ident, $tk:expr, $v:expr) => {{
+ let tok = $p.next().unwrap();
+ match tok.1 {
+ Scalar(style, ref v) => {
+ assert_eq!(style, $tk);
+ assert_eq!(v, $v);
+ }
+ _ => panic!("unexpected token: {:?}", tok),
+ }
+ }};
+ }
+
+ macro_rules! end {
+ ($p:ident) => {{
+ assert_eq!($p.next(), None);
+ }};
+ }
+ /// test cases in libyaml scanner.c
+ #[test]
+ fn test_empty() {
+ let s = "";
+ let mut p = Scanner::new(s.chars());
+ next!(p, StreamStart(..));
+ next!(p, StreamEnd);
+ end!(p);
+ }
+
+ #[test]
+ fn test_scalar() {
+ let s = "a scalar";
+ let mut p = Scanner::new(s.chars());
+ next!(p, StreamStart(..));
+ next!(p, Scalar(TScalarStyle::Plain, _));
+ next!(p, StreamEnd);
+ end!(p);
+ }
+
+ #[test]
+ fn test_explicit_scalar() {
+ let s = "---
+'a scalar'
+...
+";
+ let mut p = Scanner::new(s.chars());
+ next!(p, StreamStart(..));
+ next!(p, DocumentStart);
+ next!(p, Scalar(TScalarStyle::SingleQuoted, _));
+ next!(p, DocumentEnd);
+ next!(p, StreamEnd);
+ end!(p);
+ }
+
+ #[test]
+ fn test_multiple_documents() {
+ let s = "
+'a scalar'
+---
+'a scalar'
+---
+'a scalar'
+";
+ let mut p = Scanner::new(s.chars());
+ next!(p, StreamStart(..));
+ next!(p, Scalar(TScalarStyle::SingleQuoted, _));
+ next!(p, DocumentStart);
+ next!(p, Scalar(TScalarStyle::SingleQuoted, _));
+ next!(p, DocumentStart);
+ next!(p, Scalar(TScalarStyle::SingleQuoted, _));
+ next!(p, StreamEnd);
+ end!(p);
+ }
+
+ #[test]
+ fn test_a_flow_sequence() {
+ let s = "[item 1, item 2, item 3]";
+ let mut p = Scanner::new(s.chars());
+ next!(p, StreamStart(..));
+ next!(p, FlowSequenceStart);
+ next_scalar!(p, TScalarStyle::Plain, "item 1");
+ next!(p, FlowEntry);
+ next!(p, Scalar(TScalarStyle::Plain, _));
+ next!(p, FlowEntry);
+ next!(p, Scalar(TScalarStyle::Plain, _));
+ next!(p, FlowSequenceEnd);
+ next!(p, StreamEnd);
+ end!(p);
+ }
+
+ #[test]
+ fn test_a_flow_mapping() {
+ let s = "
+{
+ a simple key: a value, # Note that the KEY token is produced.
+ ? a complex key: another value,
+}
+";
+ let mut p = Scanner::new(s.chars());
+ next!(p, StreamStart(..));
+ next!(p, FlowMappingStart);
+ next!(p, Key);
+ next!(p, Scalar(TScalarStyle::Plain, _));
+ next!(p, Value);
+ next!(p, Scalar(TScalarStyle::Plain, _));
+ next!(p, FlowEntry);
+ next!(p, Key);
+ next_scalar!(p, TScalarStyle::Plain, "a complex key");
+ next!(p, Value);
+ next!(p, Scalar(TScalarStyle::Plain, _));
+ next!(p, FlowEntry);
+ next!(p, FlowMappingEnd);
+ next!(p, StreamEnd);
+ end!(p);
+ }
+
+ #[test]
+ fn test_block_sequences() {
+ let s = "
+- item 1
+- item 2
+-
+ - item 3.1
+ - item 3.2
+-
+ key 1: value 1
+ key 2: value 2
+";
+ let mut p = Scanner::new(s.chars());
+ next!(p, StreamStart(..));
+ next!(p, BlockSequenceStart);
+ next!(p, BlockEntry);
+ next_scalar!(p, TScalarStyle::Plain, "item 1");
+ next!(p, BlockEntry);
+ next_scalar!(p, TScalarStyle::Plain, "item 2");
+ next!(p, BlockEntry);
+ next!(p, BlockSequenceStart);
+ next!(p, BlockEntry);
+ next_scalar!(p, TScalarStyle::Plain, "item 3.1");
+ next!(p, BlockEntry);
+ next_scalar!(p, TScalarStyle::Plain, "item 3.2");
+ next!(p, BlockEnd);
+ next!(p, BlockEntry);
+ next!(p, BlockMappingStart);
+ next!(p, Key);
+ next_scalar!(p, TScalarStyle::Plain, "key 1");
+ next!(p, Value);
+ next_scalar!(p, TScalarStyle::Plain, "value 1");
+ next!(p, Key);
+ next_scalar!(p, TScalarStyle::Plain, "key 2");
+ next!(p, Value);
+ next_scalar!(p, TScalarStyle::Plain, "value 2");
+ next!(p, BlockEnd);
+ next!(p, BlockEnd);
+ next!(p, StreamEnd);
+ end!(p);
+ }
+
+ #[test]
+ fn test_block_mappings() {
+ let s = "
+a simple key: a value # The KEY token is produced here.
+? a complex key
+: another value
+a mapping:
+ key 1: value 1
+ key 2: value 2
+a sequence:
+ - item 1
+ - item 2
+";
+ let mut p = Scanner::new(s.chars());
+ next!(p, StreamStart(..));
+ next!(p, BlockMappingStart);
+ next!(p, Key);
+ next!(p, Scalar(_, _));
+ next!(p, Value);
+ next!(p, Scalar(_, _));
+ next!(p, Key);
+ next!(p, Scalar(_, _));
+ next!(p, Value);
+ next!(p, Scalar(_, _));
+ next!(p, Key);
+ next!(p, Scalar(_, _));
+ next!(p, Value); // libyaml comment seems to be wrong
+ next!(p, BlockMappingStart);
+ next!(p, Key);
+ next!(p, Scalar(_, _));
+ next!(p, Value);
+ next!(p, Scalar(_, _));
+ next!(p, Key);
+ next!(p, Scalar(_, _));
+ next!(p, Value);
+ next!(p, Scalar(_, _));
+ next!(p, BlockEnd);
+ next!(p, Key);
+ next!(p, Scalar(_, _));
+ next!(p, Value);
+ next!(p, BlockSequenceStart);
+ next!(p, BlockEntry);
+ next!(p, Scalar(_, _));
+ next!(p, BlockEntry);
+ next!(p, Scalar(_, _));
+ next!(p, BlockEnd);
+ next!(p, BlockEnd);
+ next!(p, StreamEnd);
+ end!(p);
+ }
+
+ #[test]
+ fn test_no_block_sequence_start() {
+ let s = "
+key:
+- item 1
+- item 2
+";
+ let mut p = Scanner::new(s.chars());
+ next!(p, StreamStart(..));
+ next!(p, BlockMappingStart);
+ next!(p, Key);
+ next_scalar!(p, TScalarStyle::Plain, "key");
+ next!(p, Value);
+ next!(p, BlockEntry);
+ next_scalar!(p, TScalarStyle::Plain, "item 1");
+ next!(p, BlockEntry);
+ next_scalar!(p, TScalarStyle::Plain, "item 2");
+ next!(p, BlockEnd);
+ next!(p, StreamEnd);
+ end!(p);
+ }
+
+ #[test]
+ fn test_collections_in_sequence() {
+ let s = "
+- - item 1
+ - item 2
+- key 1: value 1
+ key 2: value 2
+- ? complex key
+ : complex value
+";
+ let mut p = Scanner::new(s.chars());
+ next!(p, StreamStart(..));
+ next!(p, BlockSequenceStart);
+ next!(p, BlockEntry);
+ next!(p, BlockSequenceStart);
+ next!(p, BlockEntry);
+ next_scalar!(p, TScalarStyle::Plain, "item 1");
+ next!(p, BlockEntry);
+ next_scalar!(p, TScalarStyle::Plain, "item 2");
+ next!(p, BlockEnd);
+ next!(p, BlockEntry);
+ next!(p, BlockMappingStart);
+ next!(p, Key);
+ next_scalar!(p, TScalarStyle::Plain, "key 1");
+ next!(p, Value);
+ next_scalar!(p, TScalarStyle::Plain, "value 1");
+ next!(p, Key);
+ next_scalar!(p, TScalarStyle::Plain, "key 2");
+ next!(p, Value);
+ next_scalar!(p, TScalarStyle::Plain, "value 2");
+ next!(p, BlockEnd);
+ next!(p, BlockEntry);
+ next!(p, BlockMappingStart);
+ next!(p, Key);
+ next_scalar!(p, TScalarStyle::Plain, "complex key");
+ next!(p, Value);
+ next_scalar!(p, TScalarStyle::Plain, "complex value");
+ next!(p, BlockEnd);
+ next!(p, BlockEnd);
+ next!(p, StreamEnd);
+ end!(p);
+ }
+
+ #[test]
+ fn test_collections_in_mapping() {
+ let s = "
+? a sequence
+: - item 1
+ - item 2
+? a mapping
+: key 1: value 1
+ key 2: value 2
+";
+ let mut p = Scanner::new(s.chars());
+ next!(p, StreamStart(..));
+ next!(p, BlockMappingStart);
+ next!(p, Key);
+ next_scalar!(p, TScalarStyle::Plain, "a sequence");
+ next!(p, Value);
+ next!(p, BlockSequenceStart);
+ next!(p, BlockEntry);
+ next_scalar!(p, TScalarStyle::Plain, "item 1");
+ next!(p, BlockEntry);
+ next_scalar!(p, TScalarStyle::Plain, "item 2");
+ next!(p, BlockEnd);
+ next!(p, Key);
+ next_scalar!(p, TScalarStyle::Plain, "a mapping");
+ next!(p, Value);
+ next!(p, BlockMappingStart);
+ next!(p, Key);
+ next_scalar!(p, TScalarStyle::Plain, "key 1");
+ next!(p, Value);
+ next_scalar!(p, TScalarStyle::Plain, "value 1");
+ next!(p, Key);
+ next_scalar!(p, TScalarStyle::Plain, "key 2");
+ next!(p, Value);
+ next_scalar!(p, TScalarStyle::Plain, "value 2");
+ next!(p, BlockEnd);
+ next!(p, BlockEnd);
+ next!(p, StreamEnd);
+ end!(p);
+ }
+
+ #[test]
+ fn test_spec_ex7_3() {
+ let s = "
+{
+ ? foo :,
+ : bar,
+}
+";
+ let mut p = Scanner::new(s.chars());
+ next!(p, StreamStart(..));
+ next!(p, FlowMappingStart);
+ next!(p, Key);
+ next_scalar!(p, TScalarStyle::Plain, "foo");
+ next!(p, Value);
+ next!(p, FlowEntry);
+ next!(p, Value);
+ next_scalar!(p, TScalarStyle::Plain, "bar");
+ next!(p, FlowEntry);
+ next!(p, FlowMappingEnd);
+ next!(p, StreamEnd);
+ end!(p);
+ }
+
+ #[test]
+ fn test_plain_scalar_starting_with_indicators_in_flow() {
+ // "Plain scalars must not begin with most indicators, as this would cause ambiguity with
+ // other YAML constructs. However, the “:”, “?” and “-” indicators may be used as the first
+ // character if followed by a non-space “safe” character, as this causes no ambiguity."
+
+ let s = "{a: :b}";
+ let mut p = Scanner::new(s.chars());
+ next!(p, StreamStart(..));
+ next!(p, FlowMappingStart);
+ next!(p, Key);
+ next_scalar!(p, TScalarStyle::Plain, "a");
+ next!(p, Value);
+ next_scalar!(p, TScalarStyle::Plain, ":b");
+ next!(p, FlowMappingEnd);
+ next!(p, StreamEnd);
+ end!(p);
+
+ let s = "{a: ?b}";
+ let mut p = Scanner::new(s.chars());
+ next!(p, StreamStart(..));
+ next!(p, FlowMappingStart);
+ next!(p, Key);
+ next_scalar!(p, TScalarStyle::Plain, "a");
+ next!(p, Value);
+ next_scalar!(p, TScalarStyle::Plain, "?b");
+ next!(p, FlowMappingEnd);
+ next!(p, StreamEnd);
+ end!(p);
+ }
+
+ #[test]
+ fn test_plain_scalar_starting_with_indicators_in_block() {
+ let s = ":a";
+ let mut p = Scanner::new(s.chars());
+ next!(p, StreamStart(..));
+ next_scalar!(p, TScalarStyle::Plain, ":a");
+ next!(p, StreamEnd);
+ end!(p);
+
+ let s = "?a";
+ let mut p = Scanner::new(s.chars());
+ next!(p, StreamStart(..));
+ next_scalar!(p, TScalarStyle::Plain, "?a");
+ next!(p, StreamEnd);
+ end!(p);
+ }
+
+ #[test]
+ fn test_plain_scalar_containing_indicators_in_block() {
+ let s = "a:,b";
+ let mut p = Scanner::new(s.chars());
+ next!(p, StreamStart(..));
+ next_scalar!(p, TScalarStyle::Plain, "a:,b");
+ next!(p, StreamEnd);
+ end!(p);
+
+ let s = ":,b";
+ let mut p = Scanner::new(s.chars());
+ next!(p, StreamStart(..));
+ next_scalar!(p, TScalarStyle::Plain, ":,b");
+ next!(p, StreamEnd);
+ end!(p);
+ }
+
+ #[test]
+ fn test_scanner_cr() {
+ let s = "---\r\n- tok1\r\n- tok2";
+ let mut p = Scanner::new(s.chars());
+ next!(p, StreamStart(..));
+ next!(p, DocumentStart);
+ next!(p, BlockSequenceStart);
+ next!(p, BlockEntry);
+ next_scalar!(p, TScalarStyle::Plain, "tok1");
+ next!(p, BlockEntry);
+ next_scalar!(p, TScalarStyle::Plain, "tok2");
+ next!(p, BlockEnd);
+ next!(p, StreamEnd);
+ end!(p);
+ }
+
+ #[test]
+ fn test_uri() {
+ // TODO
+ }
+
+ #[test]
+ fn test_uri_escapes() {
+ // TODO
+ }
+}
diff --git a/third_party/rust/yaml-rust/src/yaml.rs b/third_party/rust/yaml-rust/src/yaml.rs
new file mode 100644
index 0000000000..4bb70da531
--- /dev/null
+++ b/third_party/rust/yaml-rust/src/yaml.rs
@@ -0,0 +1,739 @@
+use linked_hash_map::LinkedHashMap;
+use crate::parser::*;
+use crate::scanner::{Marker, ScanError, TScalarStyle, TokenType};
+use std::collections::BTreeMap;
+use std::f64;
+use std::i64;
+use std::mem;
+use std::ops::Index;
+use std::string;
+use std::vec;
+
+/// A YAML node is stored as this `Yaml` enumeration, which provides an easy way to
+/// access your YAML document.
+///
+/// # Examples
+///
+/// ```
+/// use yaml_rust::Yaml;
+/// let foo = Yaml::from_str("-123"); // convert the string to the appropriate YAML type
+/// assert_eq!(foo.as_i64().unwrap(), -123);
+///
+/// // iterate over an Array
+/// let vec = Yaml::Array(vec![Yaml::Integer(1), Yaml::Integer(2)]);
+/// for v in vec.as_vec().unwrap() {
+/// assert!(v.as_i64().is_some());
+/// }
+/// ```
+#[derive(Clone, PartialEq, PartialOrd, Debug, Eq, Ord, Hash)]
+pub enum Yaml {
+ /// Float types are stored as String and parsed on demand.
+ /// Note that f64 does NOT implement Eq trait and can NOT be stored in BTreeMap.
+ Real(string::String),
+ /// YAML int is stored as i64.
+ Integer(i64),
+ /// YAML scalar.
+ String(string::String),
+ /// YAML bool, e.g. `true` or `false`.
+ Boolean(bool),
+ /// YAML array, can be accessed as a `Vec`.
+ Array(self::Array),
+ /// YAML hash, can be accessed as a `LinkedHashMap`.
+ ///
+ /// Insertion order will match the order of insertion into the map.
+ Hash(self::Hash),
+ /// Alias, not fully supported yet.
+ Alias(usize),
+ /// YAML null, e.g. `null` or `~`.
+ Null,
+ /// Accessing a nonexistent node via the Index trait returns `BadValue`. This
+ /// simplifies error handling in the calling code. Invalid type conversion also
+ /// returns `BadValue`.
+ BadValue,
+}
+
+pub type Array = Vec<Yaml>;
+pub type Hash = LinkedHashMap<Yaml, Yaml>;
+
+// parse f64 as Core schema
+// See: https://github.com/chyh1990/yaml-rust/issues/51
+fn parse_f64(v: &str) -> Option<f64> {
+ match v {
+ ".inf" | ".Inf" | ".INF" | "+.inf" | "+.Inf" | "+.INF" => Some(f64::INFINITY),
+ "-.inf" | "-.Inf" | "-.INF" => Some(f64::NEG_INFINITY),
+ ".nan" | "NaN" | ".NAN" => Some(f64::NAN),
+ _ => v.parse::<f64>().ok(),
+ }
+}
+
+pub struct YamlLoader {
+ docs: Vec<Yaml>,
+ // states
+ // (current node, anchor_id) tuple
+ doc_stack: Vec<(Yaml, usize)>,
+ key_stack: Vec<Yaml>,
+ anchor_map: BTreeMap<usize, Yaml>,
+}
+
+impl MarkedEventReceiver for YamlLoader {
+ fn on_event(&mut self, ev: Event, _: Marker) {
+ // println!("EV {:?}", ev);
+ match ev {
+ Event::DocumentStart => {
+ // do nothing
+ }
+ Event::DocumentEnd => {
+ match self.doc_stack.len() {
+ // empty document
+ 0 => self.docs.push(Yaml::BadValue),
+ 1 => self.docs.push(self.doc_stack.pop().unwrap().0),
+ _ => unreachable!(),
+ }
+ }
+ Event::SequenceStart(aid) => {
+ self.doc_stack.push((Yaml::Array(Vec::new()), aid));
+ }
+ Event::SequenceEnd => {
+ let node = self.doc_stack.pop().unwrap();
+ self.insert_new_node(node);
+ }
+ Event::MappingStart(aid) => {
+ self.doc_stack.push((Yaml::Hash(Hash::new()), aid));
+ self.key_stack.push(Yaml::BadValue);
+ }
+ Event::MappingEnd => {
+ self.key_stack.pop().unwrap();
+ let node = self.doc_stack.pop().unwrap();
+ self.insert_new_node(node);
+ }
+ Event::Scalar(v, style, aid, tag) => {
+ let node = if style != TScalarStyle::Plain {
+ Yaml::String(v)
+ } else if let Some(TokenType::Tag(ref handle, ref suffix)) = tag {
+ // XXX tag:yaml.org,2002:
+ if handle == "!!" {
+ match suffix.as_ref() {
+ "bool" => {
+ // "true" or "false"
+ match v.parse::<bool>() {
+ Err(_) => Yaml::BadValue,
+ Ok(v) => Yaml::Boolean(v),
+ }
+ }
+ "int" => match v.parse::<i64>() {
+ Err(_) => Yaml::BadValue,
+ Ok(v) => Yaml::Integer(v),
+ },
+ "float" => match parse_f64(&v) {
+ Some(_) => Yaml::Real(v),
+ None => Yaml::BadValue,
+ },
+ "null" => match v.as_ref() {
+ "~" | "null" => Yaml::Null,
+ _ => Yaml::BadValue,
+ },
+ _ => Yaml::String(v),
+ }
+ } else {
+ Yaml::String(v)
+ }
+ } else {
+ // Datatype is not specified, or unrecognized
+ Yaml::from_str(&v)
+ };
+
+ self.insert_new_node((node, aid));
+ }
+ Event::Alias(id) => {
+ let n = match self.anchor_map.get(&id) {
+ Some(v) => v.clone(),
+ None => Yaml::BadValue,
+ };
+ self.insert_new_node((n, 0));
+ }
+ _ => { /* ignore */ }
+ }
+ // println!("DOC {:?}", self.doc_stack);
+ }
+}
+
+impl YamlLoader {
+ fn insert_new_node(&mut self, node: (Yaml, usize)) {
+ // valid anchor id starts from 1
+ if node.1 > 0 {
+ self.anchor_map.insert(node.1, node.0.clone());
+ }
+ if self.doc_stack.is_empty() {
+ self.doc_stack.push(node);
+ } else {
+ let parent = self.doc_stack.last_mut().unwrap();
+ match *parent {
+ (Yaml::Array(ref mut v), _) => v.push(node.0),
+ (Yaml::Hash(ref mut h), _) => {
+ let cur_key = self.key_stack.last_mut().unwrap();
+ // current node is a key
+ if cur_key.is_badvalue() {
+ *cur_key = node.0;
+ // current node is a value
+ } else {
+ let mut newkey = Yaml::BadValue;
+ mem::swap(&mut newkey, cur_key);
+ h.insert(newkey, node.0);
+ }
+ }
+ _ => unreachable!(),
+ }
+ }
+ }
+
+ pub fn load_from_str(source: &str) -> Result<Vec<Yaml>, ScanError> {
+ let mut loader = YamlLoader {
+ docs: Vec::new(),
+ doc_stack: Vec::new(),
+ key_stack: Vec::new(),
+ anchor_map: BTreeMap::new(),
+ };
+ let mut parser = Parser::new(source.chars());
+ parser.load(&mut loader, true)?;
+ Ok(loader.docs)
+ }
+}
+
+macro_rules! define_as (
+ ($name:ident, $t:ident, $yt:ident) => (
+pub fn $name(&self) -> Option<$t> {
+ match *self {
+ Yaml::$yt(v) => Some(v),
+ _ => None
+ }
+}
+ );
+);
+
+macro_rules! define_as_ref (
+ ($name:ident, $t:ty, $yt:ident) => (
+pub fn $name(&self) -> Option<$t> {
+ match *self {
+ Yaml::$yt(ref v) => Some(v),
+ _ => None
+ }
+}
+ );
+);
+
+macro_rules! define_into (
+ ($name:ident, $t:ty, $yt:ident) => (
+pub fn $name(self) -> Option<$t> {
+ match self {
+ Yaml::$yt(v) => Some(v),
+ _ => None
+ }
+}
+ );
+);
+
+impl Yaml {
+ define_as!(as_bool, bool, Boolean);
+ define_as!(as_i64, i64, Integer);
+
+ define_as_ref!(as_str, &str, String);
+ define_as_ref!(as_hash, &Hash, Hash);
+ define_as_ref!(as_vec, &Array, Array);
+
+ define_into!(into_bool, bool, Boolean);
+ define_into!(into_i64, i64, Integer);
+ define_into!(into_string, String, String);
+ define_into!(into_hash, Hash, Hash);
+ define_into!(into_vec, Array, Array);
+
+ pub fn is_null(&self) -> bool {
+ match *self {
+ Yaml::Null => true,
+ _ => false,
+ }
+ }
+
+ pub fn is_badvalue(&self) -> bool {
+ match *self {
+ Yaml::BadValue => true,
+ _ => false,
+ }
+ }
+
+ pub fn is_array(&self) -> bool {
+ match *self {
+ Yaml::Array(_) => true,
+ _ => false,
+ }
+ }
+
+ pub fn as_f64(&self) -> Option<f64> {
+ match *self {
+ Yaml::Real(ref v) => parse_f64(v),
+ _ => None,
+ }
+ }
+
+ pub fn into_f64(self) -> Option<f64> {
+ match self {
+ Yaml::Real(ref v) => parse_f64(v),
+ _ => None,
+ }
+ }
+}
+
+#[cfg_attr(feature = "cargo-clippy", allow(should_implement_trait))]
+impl Yaml {
+ // Not implementing FromStr because there is no possibility of Error.
+ // This function falls back to Yaml::String if nothing else matches.
+ pub fn from_str(v: &str) -> Yaml {
+ if v.starts_with("0x") {
+ if let Ok(i) = i64::from_str_radix(&v[2..], 16) {
+ return Yaml::Integer(i);
+ }
+ }
+ if v.starts_with("0o") {
+ if let Ok(i) = i64::from_str_radix(&v[2..], 8) {
+ return Yaml::Integer(i);
+ }
+ }
+ if v.starts_with('+') {
+ if let Ok(i) = v[1..].parse::<i64>() {
+ return Yaml::Integer(i);
+ }
+ }
+ match v {
+ "~" | "null" => Yaml::Null,
+ "true" => Yaml::Boolean(true),
+ "false" => Yaml::Boolean(false),
+ _ if v.parse::<i64>().is_ok() => Yaml::Integer(v.parse::<i64>().unwrap()),
+ // try parsing as f64
+ _ if parse_f64(v).is_some() => Yaml::Real(v.to_owned()),
+ _ => Yaml::String(v.to_owned()),
+ }
+ }
+}
+
+static BAD_VALUE: Yaml = Yaml::BadValue;
+impl<'a> Index<&'a str> for Yaml {
+ type Output = Yaml;
+
+ fn index(&self, idx: &'a str) -> &Yaml {
+ let key = Yaml::String(idx.to_owned());
+ match self.as_hash() {
+ Some(h) => h.get(&key).unwrap_or(&BAD_VALUE),
+ None => &BAD_VALUE,
+ }
+ }
+}
+
+impl Index<usize> for Yaml {
+ type Output = Yaml;
+
+ fn index(&self, idx: usize) -> &Yaml {
+ if let Some(v) = self.as_vec() {
+ v.get(idx).unwrap_or(&BAD_VALUE)
+ } else if let Some(v) = self.as_hash() {
+ let key = Yaml::Integer(idx as i64);
+ v.get(&key).unwrap_or(&BAD_VALUE)
+ } else {
+ &BAD_VALUE
+ }
+ }
+}
+
+impl IntoIterator for Yaml {
+ type Item = Yaml;
+ type IntoIter = YamlIter;
+
+ fn into_iter(self) -> Self::IntoIter {
+ YamlIter {
+ yaml: self.into_vec().unwrap_or_else(Vec::new).into_iter(),
+ }
+ }
+}
+
+pub struct YamlIter {
+ yaml: vec::IntoIter<Yaml>,
+}
+
+impl Iterator for YamlIter {
+ type Item = Yaml;
+
+ fn next(&mut self) -> Option<Yaml> {
+ self.yaml.next()
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use std::f64;
+ use crate::yaml::*;
+ #[test]
+ fn test_coerce() {
+ let s = "---
+a: 1
+b: 2.2
+c: [1, 2]
+";
+ let out = YamlLoader::load_from_str(&s).unwrap();
+ let doc = &out[0];
+ assert_eq!(doc["a"].as_i64().unwrap(), 1i64);
+ assert_eq!(doc["b"].as_f64().unwrap(), 2.2f64);
+ assert_eq!(doc["c"][1].as_i64().unwrap(), 2i64);
+ assert!(doc["d"][0].is_badvalue());
+ }
+
+ #[test]
+ fn test_empty_doc() {
+ let s: String = "".to_owned();
+ YamlLoader::load_from_str(&s).unwrap();
+ let s: String = "---".to_owned();
+ assert_eq!(YamlLoader::load_from_str(&s).unwrap()[0], Yaml::Null);
+ }
+
+ #[test]
+ fn test_parser() {
+ let s: String = "
+# comment
+a0 bb: val
+a1:
+ b1: 4
+ b2: d
+a2: 4 # i'm comment
+a3: [1, 2, 3]
+a4:
+ - - a1
+ - a2
+ - 2
+a5: 'single_quoted'
+a6: \"double_quoted\"
+a7: 你好
+"
+ .to_owned();
+ let out = YamlLoader::load_from_str(&s).unwrap();
+ let doc = &out[0];
+ assert_eq!(doc["a7"].as_str().unwrap(), "你好");
+ }
+
+ #[test]
+ fn test_multi_doc() {
+ let s = "
+'a scalar'
+---
+'a scalar'
+---
+'a scalar'
+";
+ let out = YamlLoader::load_from_str(&s).unwrap();
+ assert_eq!(out.len(), 3);
+ }
+
+ #[test]
+ fn test_anchor() {
+ let s = "
+a1: &DEFAULT
+ b1: 4
+ b2: d
+a2: *DEFAULT
+";
+ let out = YamlLoader::load_from_str(&s).unwrap();
+ let doc = &out[0];
+ assert_eq!(doc["a2"]["b1"].as_i64().unwrap(), 4);
+ }
+
+ #[test]
+ fn test_bad_anchor() {
+ let s = "
+a1: &DEFAULT
+ b1: 4
+ b2: *DEFAULT
+";
+ let out = YamlLoader::load_from_str(&s).unwrap();
+ let doc = &out[0];
+ assert_eq!(doc["a1"]["b2"], Yaml::BadValue);
+ }
+
+ #[test]
+ fn test_github_27() {
+ // https://github.com/chyh1990/yaml-rust/issues/27
+ let s = "&a";
+ let out = YamlLoader::load_from_str(&s).unwrap();
+ let doc = &out[0];
+ assert_eq!(doc.as_str().unwrap(), "");
+ }
+
+ #[test]
+ fn test_plain_datatype() {
+ let s = "
+- 'string'
+- \"string\"
+- string
+- 123
+- -321
+- 1.23
+- -1e4
+- ~
+- null
+- true
+- false
+- !!str 0
+- !!int 100
+- !!float 2
+- !!null ~
+- !!bool true
+- !!bool false
+- 0xFF
+# bad values
+- !!int string
+- !!float string
+- !!bool null
+- !!null val
+- 0o77
+- [ 0xF, 0xF ]
+- +12345
+- [ true, false ]
+";
+ let out = YamlLoader::load_from_str(&s).unwrap();
+ let doc = &out[0];
+
+ assert_eq!(doc[0].as_str().unwrap(), "string");
+ assert_eq!(doc[1].as_str().unwrap(), "string");
+ assert_eq!(doc[2].as_str().unwrap(), "string");
+ assert_eq!(doc[3].as_i64().unwrap(), 123);
+ assert_eq!(doc[4].as_i64().unwrap(), -321);
+ assert_eq!(doc[5].as_f64().unwrap(), 1.23);
+ assert_eq!(doc[6].as_f64().unwrap(), -1e4);
+ assert!(doc[7].is_null());
+ assert!(doc[8].is_null());
+ assert_eq!(doc[9].as_bool().unwrap(), true);
+ assert_eq!(doc[10].as_bool().unwrap(), false);
+ assert_eq!(doc[11].as_str().unwrap(), "0");
+ assert_eq!(doc[12].as_i64().unwrap(), 100);
+ assert_eq!(doc[13].as_f64().unwrap(), 2.0);
+ assert!(doc[14].is_null());
+ assert_eq!(doc[15].as_bool().unwrap(), true);
+ assert_eq!(doc[16].as_bool().unwrap(), false);
+ assert_eq!(doc[17].as_i64().unwrap(), 255);
+ assert!(doc[18].is_badvalue());
+ assert!(doc[19].is_badvalue());
+ assert!(doc[20].is_badvalue());
+ assert!(doc[21].is_badvalue());
+ assert_eq!(doc[22].as_i64().unwrap(), 63);
+ assert_eq!(doc[23][0].as_i64().unwrap(), 15);
+ assert_eq!(doc[23][1].as_i64().unwrap(), 15);
+ assert_eq!(doc[24].as_i64().unwrap(), 12345);
+ assert!(doc[25][0].as_bool().unwrap());
+ assert!(!doc[25][1].as_bool().unwrap());
+ }
+
+ #[test]
+ fn test_bad_hyphen() {
+ // See: https://github.com/chyh1990/yaml-rust/issues/23
+ let s = "{-";
+ assert!(YamlLoader::load_from_str(&s).is_err());
+ }
+
+ #[test]
+ fn test_issue_65() {
+ // See: https://github.com/chyh1990/yaml-rust/issues/65
+ let b = "\n\"ll\\\"ll\\\r\n\"ll\\\"ll\\\r\r\r\rU\r\r\rU";
+ assert!(YamlLoader::load_from_str(&b).is_err());
+ }
+
+ #[test]
+ fn test_bad_docstart() {
+ assert!(YamlLoader::load_from_str("---This used to cause an infinite loop").is_ok());
+ assert_eq!(
+ YamlLoader::load_from_str("----"),
+ Ok(vec![Yaml::String(String::from("----"))])
+ );
+ assert_eq!(
+ YamlLoader::load_from_str("--- #here goes a comment"),
+ Ok(vec![Yaml::Null])
+ );
+ assert_eq!(
+ YamlLoader::load_from_str("---- #here goes a comment"),
+ Ok(vec![Yaml::String(String::from("----"))])
+ );
+ }
+
+ #[test]
+ fn test_plain_datatype_with_into_methods() {
+ let s = "
+- 'string'
+- \"string\"
+- string
+- 123
+- -321
+- 1.23
+- -1e4
+- true
+- false
+- !!str 0
+- !!int 100
+- !!float 2
+- !!bool true
+- !!bool false
+- 0xFF
+- 0o77
+- +12345
+- -.INF
+- .NAN
+- !!float .INF
+";
+ let mut out = YamlLoader::load_from_str(&s).unwrap().into_iter();
+ let mut doc = out.next().unwrap().into_iter();
+
+ assert_eq!(doc.next().unwrap().into_string().unwrap(), "string");
+ assert_eq!(doc.next().unwrap().into_string().unwrap(), "string");
+ assert_eq!(doc.next().unwrap().into_string().unwrap(), "string");
+ assert_eq!(doc.next().unwrap().into_i64().unwrap(), 123);
+ assert_eq!(doc.next().unwrap().into_i64().unwrap(), -321);
+ assert_eq!(doc.next().unwrap().into_f64().unwrap(), 1.23);
+ assert_eq!(doc.next().unwrap().into_f64().unwrap(), -1e4);
+ assert_eq!(doc.next().unwrap().into_bool().unwrap(), true);
+ assert_eq!(doc.next().unwrap().into_bool().unwrap(), false);
+ assert_eq!(doc.next().unwrap().into_string().unwrap(), "0");
+ assert_eq!(doc.next().unwrap().into_i64().unwrap(), 100);
+ assert_eq!(doc.next().unwrap().into_f64().unwrap(), 2.0);
+ assert_eq!(doc.next().unwrap().into_bool().unwrap(), true);
+ assert_eq!(doc.next().unwrap().into_bool().unwrap(), false);
+ assert_eq!(doc.next().unwrap().into_i64().unwrap(), 255);
+ assert_eq!(doc.next().unwrap().into_i64().unwrap(), 63);
+ assert_eq!(doc.next().unwrap().into_i64().unwrap(), 12345);
+ assert_eq!(doc.next().unwrap().into_f64().unwrap(), f64::NEG_INFINITY);
+ assert!(doc.next().unwrap().into_f64().is_some());
+ assert_eq!(doc.next().unwrap().into_f64().unwrap(), f64::INFINITY);
+ }
+
+ #[test]
+ fn test_hash_order() {
+ let s = "---
+b: ~
+a: ~
+c: ~
+";
+ let out = YamlLoader::load_from_str(&s).unwrap();
+ let first = out.into_iter().next().unwrap();
+ let mut iter = first.into_hash().unwrap().into_iter();
+ assert_eq!(
+ Some((Yaml::String("b".to_owned()), Yaml::Null)),
+ iter.next()
+ );
+ assert_eq!(
+ Some((Yaml::String("a".to_owned()), Yaml::Null)),
+ iter.next()
+ );
+ assert_eq!(
+ Some((Yaml::String("c".to_owned()), Yaml::Null)),
+ iter.next()
+ );
+ assert_eq!(None, iter.next());
+ }
+
+ #[test]
+ fn test_integer_key() {
+ let s = "
+0:
+ important: true
+1:
+ important: false
+";
+ let out = YamlLoader::load_from_str(&s).unwrap();
+ let first = out.into_iter().next().unwrap();
+ assert_eq!(first[0]["important"].as_bool().unwrap(), true);
+ }
+
+ #[test]
+ fn test_indentation_equality() {
+ let four_spaces = YamlLoader::load_from_str(
+ r#"
+hash:
+ with:
+ indentations
+"#,
+ )
+ .unwrap()
+ .into_iter()
+ .next()
+ .unwrap();
+
+ let two_spaces = YamlLoader::load_from_str(
+ r#"
+hash:
+ with:
+ indentations
+"#,
+ )
+ .unwrap()
+ .into_iter()
+ .next()
+ .unwrap();
+
+ let one_space = YamlLoader::load_from_str(
+ r#"
+hash:
+ with:
+ indentations
+"#,
+ )
+ .unwrap()
+ .into_iter()
+ .next()
+ .unwrap();
+
+ let mixed_spaces = YamlLoader::load_from_str(
+ r#"
+hash:
+ with:
+ indentations
+"#,
+ )
+ .unwrap()
+ .into_iter()
+ .next()
+ .unwrap();
+
+ assert_eq!(four_spaces, two_spaces);
+ assert_eq!(two_spaces, one_space);
+ assert_eq!(four_spaces, mixed_spaces);
+ }
+
+ #[test]
+ fn test_two_space_indentations() {
+ // https://github.com/kbknapp/clap-rs/issues/965
+
+ let s = r#"
+subcommands:
+ - server:
+ about: server related commands
+subcommands2:
+ - server:
+ about: server related commands
+subcommands3:
+ - server:
+ about: server related commands
+ "#;
+
+ let out = YamlLoader::load_from_str(&s).unwrap();
+ let doc = &out.into_iter().next().unwrap();
+
+ println!("{:#?}", doc);
+ assert_eq!(doc["subcommands"][0]["server"], Yaml::Null);
+ assert!(doc["subcommands2"][0]["server"].as_hash().is_some());
+ assert!(doc["subcommands3"][0]["server"].as_hash().is_some());
+ }
+
+ #[test]
+ fn test_recursion_depth_check_objects() {
+ let s = "{a:".repeat(10_000) + &"}".repeat(10_000);
+ assert!(YamlLoader::load_from_str(&s).is_err());
+ }
+
+ #[test]
+ fn test_recursion_depth_check_arrays() {
+ let s = "[".repeat(10_000) + &"]".repeat(10_000);
+ assert!(YamlLoader::load_from_str(&s).is_err());
+ }
+}