summaryrefslogtreecommitdiffstats
path: root/third_party/rust/yaml-rust/src/scanner.rs
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--third_party/rust/yaml-rust/src/scanner.rs2182
1 files changed, 2182 insertions, 0 deletions
diff --git a/third_party/rust/yaml-rust/src/scanner.rs b/third_party/rust/yaml-rust/src/scanner.rs
new file mode 100644
index 0000000000..a8659a8522
--- /dev/null
+++ b/third_party/rust/yaml-rust/src/scanner.rs
@@ -0,0 +1,2182 @@
+use std::collections::VecDeque;
+use std::error::Error;
+use std::{char, fmt};
+
+#[derive(Clone, Copy, PartialEq, Debug, Eq)]
+pub enum TEncoding {
+ Utf8,
+}
+
+#[derive(Clone, Copy, PartialEq, Debug, Eq)]
+pub enum TScalarStyle {
+ Any,
+ Plain,
+ SingleQuoted,
+ DoubleQuoted,
+
+ Literal,
+ Foled,
+}
+
+#[derive(Clone, Copy, PartialEq, Debug, Eq)]
+pub struct Marker {
+ index: usize,
+ line: usize,
+ col: usize,
+}
+
+impl Marker {
+ fn new(index: usize, line: usize, col: usize) -> Marker {
+ Marker { index, line, col }
+ }
+
+ pub fn index(&self) -> usize {
+ self.index
+ }
+
+ pub fn line(&self) -> usize {
+ self.line
+ }
+
+ pub fn col(&self) -> usize {
+ self.col
+ }
+}
+
+#[derive(Clone, PartialEq, Debug, Eq)]
+pub struct ScanError {
+ mark: Marker,
+ info: String,
+}
+
+impl ScanError {
+ pub fn new(loc: Marker, info: &str) -> ScanError {
+ ScanError {
+ mark: loc,
+ info: info.to_owned(),
+ }
+ }
+
+ pub fn marker(&self) -> &Marker {
+ &self.mark
+ }
+}
+
+impl Error for ScanError {
+ fn description(&self) -> &str {
+ self.info.as_ref()
+ }
+
+ fn cause(&self) -> Option<&dyn Error> {
+ None
+ }
+}
+
+impl fmt::Display for ScanError {
+ // col starts from 0
+ fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
+ write!(
+ formatter,
+ "{} at line {} column {}",
+ self.info,
+ self.mark.line,
+ self.mark.col + 1
+ )
+ }
+}
+
+#[derive(Clone, PartialEq, Debug, Eq)]
+pub enum TokenType {
+ NoToken,
+ StreamStart(TEncoding),
+ StreamEnd,
+ /// major, minor
+ VersionDirective(u32, u32),
+ /// handle, prefix
+ TagDirective(String, String),
+ DocumentStart,
+ DocumentEnd,
+ BlockSequenceStart,
+ BlockMappingStart,
+ BlockEnd,
+ FlowSequenceStart,
+ FlowSequenceEnd,
+ FlowMappingStart,
+ FlowMappingEnd,
+ BlockEntry,
+ FlowEntry,
+ Key,
+ Value,
+ Alias(String),
+ Anchor(String),
+ /// handle, suffix
+ Tag(String, String),
+ Scalar(TScalarStyle, String),
+}
+
+#[derive(Clone, PartialEq, Debug, Eq)]
+pub struct Token(pub Marker, pub TokenType);
+
+#[derive(Clone, PartialEq, Debug, Eq)]
+struct SimpleKey {
+ possible: bool,
+ required: bool,
+ token_number: usize,
+ mark: Marker,
+}
+
+impl SimpleKey {
+ fn new(mark: Marker) -> SimpleKey {
+ SimpleKey {
+ possible: false,
+ required: false,
+ token_number: 0,
+ mark,
+ }
+ }
+}
+
+#[derive(Debug)]
+pub struct Scanner<T> {
+ rdr: T,
+ mark: Marker,
+ tokens: VecDeque<Token>,
+ buffer: VecDeque<char>,
+ error: Option<ScanError>,
+
+ stream_start_produced: bool,
+ stream_end_produced: bool,
+ adjacent_value_allowed_at: usize,
+ simple_key_allowed: bool,
+ simple_keys: Vec<SimpleKey>,
+ indent: isize,
+ indents: Vec<isize>,
+ flow_level: u8,
+ tokens_parsed: usize,
+ token_available: bool,
+}
+
+impl<T: Iterator<Item = char>> Iterator for Scanner<T> {
+ type Item = Token;
+ fn next(&mut self) -> Option<Token> {
+ if self.error.is_some() {
+ return None;
+ }
+ match self.next_token() {
+ Ok(tok) => tok,
+ Err(e) => {
+ self.error = Some(e);
+ None
+ }
+ }
+ }
+}
+
+#[inline]
+fn is_z(c: char) -> bool {
+ c == '\0'
+}
+#[inline]
+fn is_break(c: char) -> bool {
+ c == '\n' || c == '\r'
+}
+#[inline]
+fn is_breakz(c: char) -> bool {
+ is_break(c) || is_z(c)
+}
+#[inline]
+fn is_blank(c: char) -> bool {
+ c == ' ' || c == '\t'
+}
+#[inline]
+fn is_blankz(c: char) -> bool {
+ is_blank(c) || is_breakz(c)
+}
+#[inline]
+fn is_digit(c: char) -> bool {
+ c >= '0' && c <= '9'
+}
+#[inline]
+fn is_alpha(c: char) -> bool {
+ match c {
+ '0'..='9' | 'a'..='z' | 'A'..='Z' => true,
+ '_' | '-' => true,
+ _ => false,
+ }
+}
+#[inline]
+fn is_hex(c: char) -> bool {
+ (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')
+}
+#[inline]
+fn as_hex(c: char) -> u32 {
+ match c {
+ '0'..='9' => (c as u32) - ('0' as u32),
+ 'a'..='f' => (c as u32) - ('a' as u32) + 10,
+ 'A'..='F' => (c as u32) - ('A' as u32) + 10,
+ _ => unreachable!(),
+ }
+}
+#[inline]
+fn is_flow(c: char) -> bool {
+ match c {
+ ',' | '[' | ']' | '{' | '}' => true,
+ _ => false,
+ }
+}
+
+pub type ScanResult = Result<(), ScanError>;
+
+impl<T: Iterator<Item = char>> Scanner<T> {
+ /// Creates the YAML tokenizer.
+ pub fn new(rdr: T) -> Scanner<T> {
+ Scanner {
+ rdr,
+ buffer: VecDeque::new(),
+ mark: Marker::new(0, 1, 0),
+ tokens: VecDeque::new(),
+ error: None,
+
+ stream_start_produced: false,
+ stream_end_produced: false,
+ adjacent_value_allowed_at: 0,
+ simple_key_allowed: true,
+ simple_keys: Vec::new(),
+ indent: -1,
+ indents: Vec::new(),
+ flow_level: 0,
+ tokens_parsed: 0,
+ token_available: false,
+ }
+ }
+ #[inline]
+ pub fn get_error(&self) -> Option<ScanError> {
+ match self.error {
+ None => None,
+ Some(ref e) => Some(e.clone()),
+ }
+ }
+
+ #[inline]
+ fn lookahead(&mut self, count: usize) {
+ if self.buffer.len() >= count {
+ return;
+ }
+ for _ in 0..(count - self.buffer.len()) {
+ self.buffer.push_back(self.rdr.next().unwrap_or('\0'));
+ }
+ }
+ #[inline]
+ fn skip(&mut self) {
+ let c = self.buffer.pop_front().unwrap();
+
+ self.mark.index += 1;
+ if c == '\n' {
+ self.mark.line += 1;
+ self.mark.col = 0;
+ } else {
+ self.mark.col += 1;
+ }
+ }
+ #[inline]
+ fn skip_line(&mut self) {
+ if self.buffer[0] == '\r' && self.buffer[1] == '\n' {
+ self.skip();
+ self.skip();
+ } else if is_break(self.buffer[0]) {
+ self.skip();
+ }
+ }
+ #[inline]
+ fn ch(&self) -> char {
+ self.buffer[0]
+ }
+ #[inline]
+ fn ch_is(&self, c: char) -> bool {
+ self.buffer[0] == c
+ }
+ #[allow(dead_code)]
+ #[inline]
+ fn eof(&self) -> bool {
+ self.ch_is('\0')
+ }
+ #[inline]
+ pub fn stream_started(&self) -> bool {
+ self.stream_start_produced
+ }
+ #[inline]
+ pub fn stream_ended(&self) -> bool {
+ self.stream_end_produced
+ }
+ #[inline]
+ pub fn mark(&self) -> Marker {
+ self.mark
+ }
+ #[inline]
+ fn read_break(&mut self, s: &mut String) {
+ if self.buffer[0] == '\r' && self.buffer[1] == '\n' {
+ s.push('\n');
+ self.skip();
+ self.skip();
+ } else if self.buffer[0] == '\r' || self.buffer[0] == '\n' {
+ s.push('\n');
+ self.skip();
+ } else {
+ unreachable!();
+ }
+ }
+ fn insert_token(&mut self, pos: usize, tok: Token) {
+ let old_len = self.tokens.len();
+ assert!(pos <= old_len);
+ self.tokens.push_back(tok);
+ for i in 0..old_len - pos {
+ self.tokens.swap(old_len - i, old_len - i - 1);
+ }
+ }
+ fn allow_simple_key(&mut self) {
+ self.simple_key_allowed = true;
+ }
+ fn disallow_simple_key(&mut self) {
+ self.simple_key_allowed = false;
+ }
+
+ pub fn fetch_next_token(&mut self) -> ScanResult {
+ self.lookahead(1);
+ // println!("--> fetch_next_token Cur {:?} {:?}", self.mark, self.ch());
+
+ if !self.stream_start_produced {
+ self.fetch_stream_start();
+ return Ok(());
+ }
+ self.skip_to_next_token();
+
+ self.stale_simple_keys()?;
+
+ let mark = self.mark;
+ self.unroll_indent(mark.col as isize);
+
+ self.lookahead(4);
+
+ if is_z(self.ch()) {
+ self.fetch_stream_end()?;
+ return Ok(());
+ }
+
+ // Is it a directive?
+ if self.mark.col == 0 && self.ch_is('%') {
+ return self.fetch_directive();
+ }
+
+ if self.mark.col == 0
+ && self.buffer[0] == '-'
+ && self.buffer[1] == '-'
+ && self.buffer[2] == '-'
+ && is_blankz(self.buffer[3])
+ {
+ self.fetch_document_indicator(TokenType::DocumentStart)?;
+ return Ok(());
+ }
+
+ if self.mark.col == 0
+ && self.buffer[0] == '.'
+ && self.buffer[1] == '.'
+ && self.buffer[2] == '.'
+ && is_blankz(self.buffer[3])
+ {
+ self.fetch_document_indicator(TokenType::DocumentEnd)?;
+ return Ok(());
+ }
+
+ let c = self.buffer[0];
+ let nc = self.buffer[1];
+ match c {
+ '[' => self.fetch_flow_collection_start(TokenType::FlowSequenceStart),
+ '{' => self.fetch_flow_collection_start(TokenType::FlowMappingStart),
+ ']' => self.fetch_flow_collection_end(TokenType::FlowSequenceEnd),
+ '}' => self.fetch_flow_collection_end(TokenType::FlowMappingEnd),
+ ',' => self.fetch_flow_entry(),
+ '-' if is_blankz(nc) => self.fetch_block_entry(),
+ '?' if is_blankz(nc) => self.fetch_key(),
+ ':' if is_blankz(nc)
+ || (self.flow_level > 0
+ && (is_flow(nc) || self.mark.index == self.adjacent_value_allowed_at)) =>
+ {
+ self.fetch_value()
+ }
+ // Is it an alias?
+ '*' => self.fetch_anchor(true),
+ // Is it an anchor?
+ '&' => self.fetch_anchor(false),
+ '!' => self.fetch_tag(),
+ // Is it a literal scalar?
+ '|' if self.flow_level == 0 => self.fetch_block_scalar(true),
+ // Is it a folded scalar?
+ '>' if self.flow_level == 0 => self.fetch_block_scalar(false),
+ '\'' => self.fetch_flow_scalar(true),
+ '"' => self.fetch_flow_scalar(false),
+ // plain scalar
+ '-' if !is_blankz(nc) => self.fetch_plain_scalar(),
+ ':' | '?' if !is_blankz(nc) && self.flow_level == 0 => self.fetch_plain_scalar(),
+ '%' | '@' | '`' => Err(ScanError::new(
+ self.mark,
+ &format!("unexpected character: `{}'", c),
+ )),
+ _ => self.fetch_plain_scalar(),
+ }
+ }
+
+ pub fn next_token(&mut self) -> Result<Option<Token>, ScanError> {
+ if self.stream_end_produced {
+ return Ok(None);
+ }
+
+ if !self.token_available {
+ self.fetch_more_tokens()?;
+ }
+ let t = self.tokens.pop_front().unwrap();
+ self.token_available = false;
+ self.tokens_parsed += 1;
+
+ if let TokenType::StreamEnd = t.1 {
+ self.stream_end_produced = true;
+ }
+ Ok(Some(t))
+ }
+
+ pub fn fetch_more_tokens(&mut self) -> ScanResult {
+ let mut need_more;
+ loop {
+ need_more = false;
+ if self.tokens.is_empty() {
+ need_more = true;
+ } else {
+ self.stale_simple_keys()?;
+ for sk in &self.simple_keys {
+ if sk.possible && sk.token_number == self.tokens_parsed {
+ need_more = true;
+ break;
+ }
+ }
+ }
+
+ if !need_more {
+ break;
+ }
+ self.fetch_next_token()?;
+ }
+ self.token_available = true;
+
+ Ok(())
+ }
+
+ fn stale_simple_keys(&mut self) -> ScanResult {
+ for sk in &mut self.simple_keys {
+ if sk.possible
+ && (sk.mark.line < self.mark.line || sk.mark.index + 1024 < self.mark.index)
+ {
+ if sk.required {
+ return Err(ScanError::new(self.mark, "simple key expect ':'"));
+ }
+ sk.possible = false;
+ }
+ }
+ Ok(())
+ }
+
+ fn skip_to_next_token(&mut self) {
+ loop {
+ self.lookahead(1);
+ // TODO(chenyh) BOM
+ match self.ch() {
+ ' ' => self.skip(),
+ '\t' if self.flow_level > 0 || !self.simple_key_allowed => self.skip(),
+ '\n' | '\r' => {
+ self.lookahead(2);
+ self.skip_line();
+ if self.flow_level == 0 {
+ self.allow_simple_key();
+ }
+ }
+ '#' => {
+ while !is_breakz(self.ch()) {
+ self.skip();
+ self.lookahead(1);
+ }
+ }
+ _ => break,
+ }
+ }
+ }
+
+ fn fetch_stream_start(&mut self) {
+ let mark = self.mark;
+ self.indent = -1;
+ self.stream_start_produced = true;
+ self.allow_simple_key();
+ self.tokens
+ .push_back(Token(mark, TokenType::StreamStart(TEncoding::Utf8)));
+ self.simple_keys.push(SimpleKey::new(Marker::new(0, 0, 0)));
+ }
+
+ fn fetch_stream_end(&mut self) -> ScanResult {
+ // force new line
+ if self.mark.col != 0 {
+ self.mark.col = 0;
+ self.mark.line += 1;
+ }
+
+ self.unroll_indent(-1);
+ self.remove_simple_key()?;
+ self.disallow_simple_key();
+
+ self.tokens
+ .push_back(Token(self.mark, TokenType::StreamEnd));
+ Ok(())
+ }
+
+ fn fetch_directive(&mut self) -> ScanResult {
+ self.unroll_indent(-1);
+ self.remove_simple_key()?;
+
+ self.disallow_simple_key();
+
+ let tok = self.scan_directive()?;
+
+ self.tokens.push_back(tok);
+
+ Ok(())
+ }
+
+ fn scan_directive(&mut self) -> Result<Token, ScanError> {
+ let start_mark = self.mark;
+ self.skip();
+
+ let name = self.scan_directive_name()?;
+ let tok = match name.as_ref() {
+ "YAML" => self.scan_version_directive_value(&start_mark)?,
+ "TAG" => self.scan_tag_directive_value(&start_mark)?,
+ // XXX This should be a warning instead of an error
+ _ => {
+ // skip current line
+ self.lookahead(1);
+ while !is_breakz(self.ch()) {
+ self.skip();
+ self.lookahead(1);
+ }
+ // XXX return an empty TagDirective token
+ Token(
+ start_mark,
+ TokenType::TagDirective(String::new(), String::new()),
+ )
+ // return Err(ScanError::new(start_mark,
+ // "while scanning a directive, found unknown directive name"))
+ }
+ };
+ self.lookahead(1);
+
+ while is_blank(self.ch()) {
+ self.skip();
+ self.lookahead(1);
+ }
+
+ if self.ch() == '#' {
+ while !is_breakz(self.ch()) {
+ self.skip();
+ self.lookahead(1);
+ }
+ }
+
+ if !is_breakz(self.ch()) {
+ return Err(ScanError::new(
+ start_mark,
+ "while scanning a directive, did not find expected comment or line break",
+ ));
+ }
+
+ // Eat a line break
+ if is_break(self.ch()) {
+ self.lookahead(2);
+ self.skip_line();
+ }
+
+ Ok(tok)
+ }
+
+ fn scan_version_directive_value(&mut self, mark: &Marker) -> Result<Token, ScanError> {
+ self.lookahead(1);
+
+ while is_blank(self.ch()) {
+ self.skip();
+ self.lookahead(1);
+ }
+
+ let major = self.scan_version_directive_number(mark)?;
+
+ if self.ch() != '.' {
+ return Err(ScanError::new(
+ *mark,
+ "while scanning a YAML directive, did not find expected digit or '.' character",
+ ));
+ }
+
+ self.skip();
+
+ let minor = self.scan_version_directive_number(mark)?;
+
+ Ok(Token(*mark, TokenType::VersionDirective(major, minor)))
+ }
+
+ fn scan_directive_name(&mut self) -> Result<String, ScanError> {
+ let start_mark = self.mark;
+ let mut string = String::new();
+ self.lookahead(1);
+ while is_alpha(self.ch()) {
+ string.push(self.ch());
+ self.skip();
+ self.lookahead(1);
+ }
+
+ if string.is_empty() {
+ return Err(ScanError::new(
+ start_mark,
+ "while scanning a directive, could not find expected directive name",
+ ));
+ }
+
+ if !is_blankz(self.ch()) {
+ return Err(ScanError::new(
+ start_mark,
+ "while scanning a directive, found unexpected non-alphabetical character",
+ ));
+ }
+
+ Ok(string)
+ }
+
+ fn scan_version_directive_number(&mut self, mark: &Marker) -> Result<u32, ScanError> {
+ let mut val = 0u32;
+ let mut length = 0usize;
+ self.lookahead(1);
+ while is_digit(self.ch()) {
+ if length + 1 > 9 {
+ return Err(ScanError::new(
+ *mark,
+ "while scanning a YAML directive, found extremely long version number",
+ ));
+ }
+ length += 1;
+ val = val * 10 + ((self.ch() as u32) - ('0' as u32));
+ self.skip();
+ self.lookahead(1);
+ }
+
+ if length == 0 {
+ return Err(ScanError::new(
+ *mark,
+ "while scanning a YAML directive, did not find expected version number",
+ ));
+ }
+
+ Ok(val)
+ }
+
+ fn scan_tag_directive_value(&mut self, mark: &Marker) -> Result<Token, ScanError> {
+ self.lookahead(1);
+ /* Eat whitespaces. */
+ while is_blank(self.ch()) {
+ self.skip();
+ self.lookahead(1);
+ }
+ let handle = self.scan_tag_handle(true, mark)?;
+
+ self.lookahead(1);
+ /* Eat whitespaces. */
+ while is_blank(self.ch()) {
+ self.skip();
+ self.lookahead(1);
+ }
+
+ let is_secondary = handle == "!!";
+ let prefix = self.scan_tag_uri(true, is_secondary, &String::new(), mark)?;
+
+ self.lookahead(1);
+
+ if is_blankz(self.ch()) {
+ Ok(Token(*mark, TokenType::TagDirective(handle, prefix)))
+ } else {
+ Err(ScanError::new(
+ *mark,
+ "while scanning TAG, did not find expected whitespace or line break",
+ ))
+ }
+ }
+
+ fn fetch_tag(&mut self) -> ScanResult {
+ self.save_simple_key()?;
+ self.disallow_simple_key();
+
+ let tok = self.scan_tag()?;
+ self.tokens.push_back(tok);
+ Ok(())
+ }
+
+ fn scan_tag(&mut self) -> Result<Token, ScanError> {
+ let start_mark = self.mark;
+ let mut handle = String::new();
+ let mut suffix;
+ let mut secondary = false;
+
+ // Check if the tag is in the canonical form (verbatim).
+ self.lookahead(2);
+
+ if self.buffer[1] == '<' {
+ // Eat '!<'
+ self.skip();
+ self.skip();
+ suffix = self.scan_tag_uri(false, false, &String::new(), &start_mark)?;
+
+ if self.ch() != '>' {
+ return Err(ScanError::new(
+ start_mark,
+ "while scanning a tag, did not find the expected '>'",
+ ));
+ }
+
+ self.skip();
+ } else {
+ // The tag has either the '!suffix' or the '!handle!suffix'
+ handle = self.scan_tag_handle(false, &start_mark)?;
+ // Check if it is, indeed, handle.
+ if handle.len() >= 2 && handle.starts_with('!') && handle.ends_with('!') {
+ if handle == "!!" {
+ secondary = true;
+ }
+ suffix = self.scan_tag_uri(false, secondary, &String::new(), &start_mark)?;
+ } else {
+ suffix = self.scan_tag_uri(false, false, &handle, &start_mark)?;
+ handle = "!".to_owned();
+ // A special case: the '!' tag. Set the handle to '' and the
+ // suffix to '!'.
+ if suffix.is_empty() {
+ handle.clear();
+ suffix = "!".to_owned();
+ }
+ }
+ }
+
+ self.lookahead(1);
+ if is_blankz(self.ch()) {
+ // XXX: ex 7.2, an empty scalar can follow a secondary tag
+ Ok(Token(start_mark, TokenType::Tag(handle, suffix)))
+ } else {
+ Err(ScanError::new(
+ start_mark,
+ "while scanning a tag, did not find expected whitespace or line break",
+ ))
+ }
+ }
+
+ fn scan_tag_handle(&mut self, directive: bool, mark: &Marker) -> Result<String, ScanError> {
+ let mut string = String::new();
+ self.lookahead(1);
+ if self.ch() != '!' {
+ return Err(ScanError::new(
+ *mark,
+ "while scanning a tag, did not find expected '!'",
+ ));
+ }
+
+ string.push(self.ch());
+ self.skip();
+
+ self.lookahead(1);
+ while is_alpha(self.ch()) {
+ string.push(self.ch());
+ self.skip();
+ self.lookahead(1);
+ }
+
+ // Check if the trailing character is '!' and copy it.
+ if self.ch() == '!' {
+ string.push(self.ch());
+ self.skip();
+ } else if directive && string != "!" {
+ // It's either the '!' tag or not really a tag handle. If it's a %TAG
+ // directive, it's an error. If it's a tag token, it must be a part of
+ // URI.
+ return Err(ScanError::new(
+ *mark,
+ "while parsing a tag directive, did not find expected '!'",
+ ));
+ }
+ Ok(string)
+ }
+
+ fn scan_tag_uri(
+ &mut self,
+ directive: bool,
+ _is_secondary: bool,
+ head: &str,
+ mark: &Marker,
+ ) -> Result<String, ScanError> {
+ let mut length = head.len();
+ let mut string = String::new();
+
+ // Copy the head if needed.
+ // Note that we don't copy the leading '!' character.
+ if length > 1 {
+ string.extend(head.chars().skip(1));
+ }
+
+ self.lookahead(1);
+ /*
+ * The set of characters that may appear in URI is as follows:
+ *
+ * '0'-'9', 'A'-'Z', 'a'-'z', '_', '-', ';', '/', '?', ':', '@', '&',
+ * '=', '+', '$', ',', '.', '!', '~', '*', '\'', '(', ')', '[', ']',
+ * '%'.
+ */
+ while match self.ch() {
+ ';' | '/' | '?' | ':' | '@' | '&' => true,
+ '=' | '+' | '$' | ',' | '.' | '!' | '~' | '*' | '\'' | '(' | ')' | '[' | ']' => true,
+ '%' => true,
+ c if is_alpha(c) => true,
+ _ => false,
+ } {
+ // Check if it is a URI-escape sequence.
+ if self.ch() == '%' {
+ string.push(self.scan_uri_escapes(directive, mark)?);
+ } else {
+ string.push(self.ch());
+ self.skip();
+ }
+
+ length += 1;
+ self.lookahead(1);
+ }
+
+ if length == 0 {
+ return Err(ScanError::new(
+ *mark,
+ "while parsing a tag, did not find expected tag URI",
+ ));
+ }
+
+ Ok(string)
+ }
+
+ fn scan_uri_escapes(&mut self, _directive: bool, mark: &Marker) -> Result<char, ScanError> {
+ let mut width = 0usize;
+ let mut code = 0u32;
+ loop {
+ self.lookahead(3);
+
+ if !(self.ch() == '%' && is_hex(self.buffer[1]) && is_hex(self.buffer[2])) {
+ return Err(ScanError::new(
+ *mark,
+ "while parsing a tag, did not find URI escaped octet",
+ ));
+ }
+
+ let octet = (as_hex(self.buffer[1]) << 4) + as_hex(self.buffer[2]);
+ if width == 0 {
+ width = match octet {
+ _ if octet & 0x80 == 0x00 => 1,
+ _ if octet & 0xE0 == 0xC0 => 2,
+ _ if octet & 0xF0 == 0xE0 => 3,
+ _ if octet & 0xF8 == 0xF0 => 4,
+ _ => {
+ return Err(ScanError::new(
+ *mark,
+ "while parsing a tag, found an incorrect leading UTF-8 octet",
+ ));
+ }
+ };
+ code = octet;
+ } else {
+ if octet & 0xc0 != 0x80 {
+ return Err(ScanError::new(
+ *mark,
+ "while parsing a tag, found an incorrect trailing UTF-8 octet",
+ ));
+ }
+ code = (code << 8) + octet;
+ }
+
+ self.skip();
+ self.skip();
+ self.skip();
+
+ width -= 1;
+ if width == 0 {
+ break;
+ }
+ }
+
+ match char::from_u32(code) {
+ Some(ch) => Ok(ch),
+ None => Err(ScanError::new(
+ *mark,
+ "while parsing a tag, found an invalid UTF-8 codepoint",
+ )),
+ }
+ }
+
+ fn fetch_anchor(&mut self, alias: bool) -> ScanResult {
+ self.save_simple_key()?;
+ self.disallow_simple_key();
+
+ let tok = self.scan_anchor(alias)?;
+
+ self.tokens.push_back(tok);
+
+ Ok(())
+ }
+
+ fn scan_anchor(&mut self, alias: bool) -> Result<Token, ScanError> {
+ let mut string = String::new();
+ let start_mark = self.mark;
+
+ self.skip();
+ self.lookahead(1);
+ while is_alpha(self.ch()) {
+ string.push(self.ch());
+ self.skip();
+ self.lookahead(1);
+ }
+
+ if string.is_empty()
+ || match self.ch() {
+ c if is_blankz(c) => false,
+ '?' | ':' | ',' | ']' | '}' | '%' | '@' | '`' => false,
+ _ => true,
+ }
+ {
+ return Err(ScanError::new(start_mark, "while scanning an anchor or alias, did not find expected alphabetic or numeric character"));
+ }
+
+ if alias {
+ Ok(Token(start_mark, TokenType::Alias(string)))
+ } else {
+ Ok(Token(start_mark, TokenType::Anchor(string)))
+ }
+ }
+
+ fn fetch_flow_collection_start(&mut self, tok: TokenType) -> ScanResult {
+ // The indicators '[' and '{' may start a simple key.
+ self.save_simple_key()?;
+
+ self.increase_flow_level()?;
+
+ self.allow_simple_key();
+
+ let start_mark = self.mark;
+ self.skip();
+
+ self.tokens.push_back(Token(start_mark, tok));
+ Ok(())
+ }
+
+ fn fetch_flow_collection_end(&mut self, tok: TokenType) -> ScanResult {
+ self.remove_simple_key()?;
+ self.decrease_flow_level();
+
+ self.disallow_simple_key();
+
+ let start_mark = self.mark;
+ self.skip();
+
+ self.tokens.push_back(Token(start_mark, tok));
+ Ok(())
+ }
+
+ fn fetch_flow_entry(&mut self) -> ScanResult {
+ self.remove_simple_key()?;
+ self.allow_simple_key();
+
+ let start_mark = self.mark;
+ self.skip();
+
+ self.tokens
+ .push_back(Token(start_mark, TokenType::FlowEntry));
+ Ok(())
+ }
+
+ fn increase_flow_level(&mut self) -> ScanResult {
+ self.simple_keys.push(SimpleKey::new(Marker::new(0, 0, 0)));
+ self.flow_level = self
+ .flow_level
+ .checked_add(1)
+ .ok_or_else(|| ScanError::new(self.mark, "recursion limit exceeded"))?;
+ Ok(())
+ }
+ fn decrease_flow_level(&mut self) {
+ if self.flow_level > 0 {
+ self.flow_level -= 1;
+ self.simple_keys.pop().unwrap();
+ }
+ }
+
+ fn fetch_block_entry(&mut self) -> ScanResult {
+ if self.flow_level == 0 {
+ // Check if we are allowed to start a new entry.
+ if !self.simple_key_allowed {
+ return Err(ScanError::new(
+ self.mark,
+ "block sequence entries are not allowed in this context",
+ ));
+ }
+
+ let mark = self.mark;
+ // generate BLOCK-SEQUENCE-START if indented
+ self.roll_indent(mark.col, None, TokenType::BlockSequenceStart, mark);
+ } else {
+ // - * only allowed in block
+ return Err(ScanError::new(
+ self.mark,
+ r#""-" is only valid inside a block"#,
+ ));
+ }
+ self.remove_simple_key()?;
+ self.allow_simple_key();
+
+ let start_mark = self.mark;
+ self.skip();
+
+ self.tokens
+ .push_back(Token(start_mark, TokenType::BlockEntry));
+ Ok(())
+ }
+
+ fn fetch_document_indicator(&mut self, t: TokenType) -> ScanResult {
+ self.unroll_indent(-1);
+ self.remove_simple_key()?;
+ self.disallow_simple_key();
+
+ let mark = self.mark;
+
+ self.skip();
+ self.skip();
+ self.skip();
+
+ self.tokens.push_back(Token(mark, t));
+ Ok(())
+ }
+
+ fn fetch_block_scalar(&mut self, literal: bool) -> ScanResult {
+ self.save_simple_key()?;
+ self.allow_simple_key();
+ let tok = self.scan_block_scalar(literal)?;
+
+ self.tokens.push_back(tok);
+ Ok(())
+ }
+
+ fn scan_block_scalar(&mut self, literal: bool) -> Result<Token, ScanError> {
+ let start_mark = self.mark;
+ let mut chomping: i32 = 0;
+ let mut increment: usize = 0;
+ let mut indent: usize = 0;
+ let mut trailing_blank: bool;
+ let mut leading_blank: bool = false;
+
+ let mut string = String::new();
+ let mut leading_break = String::new();
+ let mut trailing_breaks = String::new();
+
+ // skip '|' or '>'
+ self.skip();
+ self.lookahead(1);
+
+ if self.ch() == '+' || self.ch() == '-' {
+ if self.ch() == '+' {
+ chomping = 1;
+ } else {
+ chomping = -1;
+ }
+ self.skip();
+ self.lookahead(1);
+ if is_digit(self.ch()) {
+ if self.ch() == '0' {
+ return Err(ScanError::new(
+ start_mark,
+ "while scanning a block scalar, found an indentation indicator equal to 0",
+ ));
+ }
+ increment = (self.ch() as usize) - ('0' as usize);
+ self.skip();
+ }
+ } else if is_digit(self.ch()) {
+ if self.ch() == '0' {
+ return Err(ScanError::new(
+ start_mark,
+ "while scanning a block scalar, found an indentation indicator equal to 0",
+ ));
+ }
+
+ increment = (self.ch() as usize) - ('0' as usize);
+ self.skip();
+ self.lookahead(1);
+ if self.ch() == '+' || self.ch() == '-' {
+ if self.ch() == '+' {
+ chomping = 1;
+ } else {
+ chomping = -1;
+ }
+ self.skip();
+ }
+ }
+
+ // Eat whitespaces and comments to the end of the line.
+ self.lookahead(1);
+
+ while is_blank(self.ch()) {
+ self.skip();
+ self.lookahead(1);
+ }
+
+ if self.ch() == '#' {
+ while !is_breakz(self.ch()) {
+ self.skip();
+ self.lookahead(1);
+ }
+ }
+
+ // Check if we are at the end of the line.
+ if !is_breakz(self.ch()) {
+ return Err(ScanError::new(
+ start_mark,
+ "while scanning a block scalar, did not find expected comment or line break",
+ ));
+ }
+
+ if is_break(self.ch()) {
+ self.lookahead(2);
+ self.skip_line();
+ }
+
+ if increment > 0 {
+ indent = if self.indent >= 0 {
+ (self.indent + increment as isize) as usize
+ } else {
+ increment
+ }
+ }
+ // Scan the leading line breaks and determine the indentation level if needed.
+ self.block_scalar_breaks(&mut indent, &mut trailing_breaks)?;
+
+ self.lookahead(1);
+
+ let start_mark = self.mark;
+
+ while self.mark.col == indent && !is_z(self.ch()) {
+ // We are at the beginning of a non-empty line.
+ trailing_blank = is_blank(self.ch());
+ if !literal && !leading_break.is_empty() && !leading_blank && !trailing_blank {
+ if trailing_breaks.is_empty() {
+ string.push(' ');
+ }
+ leading_break.clear();
+ } else {
+ string.push_str(&leading_break);
+ leading_break.clear();
+ }
+
+ string.push_str(&trailing_breaks);
+ trailing_breaks.clear();
+
+ leading_blank = is_blank(self.ch());
+
+ while !is_breakz(self.ch()) {
+ string.push(self.ch());
+ self.skip();
+ self.lookahead(1);
+ }
+ // break on EOF
+ if is_z(self.ch()) {
+ break;
+ }
+
+ self.lookahead(2);
+ self.read_break(&mut leading_break);
+
+ // Eat the following indentation spaces and line breaks.
+ self.block_scalar_breaks(&mut indent, &mut trailing_breaks)?;
+ }
+
+ // Chomp the tail.
+ if chomping != -1 {
+ string.push_str(&leading_break);
+ }
+
+ if chomping == 1 {
+ string.push_str(&trailing_breaks);
+ }
+
+ if literal {
+ Ok(Token(
+ start_mark,
+ TokenType::Scalar(TScalarStyle::Literal, string),
+ ))
+ } else {
+ Ok(Token(
+ start_mark,
+ TokenType::Scalar(TScalarStyle::Foled, string),
+ ))
+ }
+ }
+
+ fn block_scalar_breaks(&mut self, indent: &mut usize, breaks: &mut String) -> ScanResult {
+ let mut max_indent = 0;
+ loop {
+ self.lookahead(1);
+ while (*indent == 0 || self.mark.col < *indent) && self.buffer[0] == ' ' {
+ self.skip();
+ self.lookahead(1);
+ }
+
+ if self.mark.col > max_indent {
+ max_indent = self.mark.col;
+ }
+
+ // Check for a tab character messing the indentation.
+ if (*indent == 0 || self.mark.col < *indent) && self.buffer[0] == '\t' {
+ return Err(ScanError::new(self.mark,
+ "while scanning a block scalar, found a tab character where an indentation space is expected"));
+ }
+
+ if !is_break(self.ch()) {
+ break;
+ }
+
+ self.lookahead(2);
+ // Consume the line break.
+ self.read_break(breaks);
+ }
+
+ if *indent == 0 {
+ *indent = max_indent;
+ if *indent < (self.indent + 1) as usize {
+ *indent = (self.indent + 1) as usize;
+ }
+ if *indent < 1 {
+ *indent = 1;
+ }
+ }
+ Ok(())
+ }
+
+ fn fetch_flow_scalar(&mut self, single: bool) -> ScanResult {
+ self.save_simple_key()?;
+ self.disallow_simple_key();
+
+ let tok = self.scan_flow_scalar(single)?;
+
+ // From spec: To ensure JSON compatibility, if a key inside a flow mapping is JSON-like,
+ // YAML allows the following value to be specified adjacent to the “:”.
+ self.adjacent_value_allowed_at = self.mark.index;
+
+ self.tokens.push_back(tok);
+ Ok(())
+ }
+
+ fn scan_flow_scalar(&mut self, single: bool) -> Result<Token, ScanError> {
+ let start_mark = self.mark;
+
+ let mut string = String::new();
+ let mut leading_break = String::new();
+ let mut trailing_breaks = String::new();
+ let mut whitespaces = String::new();
+ let mut leading_blanks;
+
+ /* Eat the left quote. */
+ self.skip();
+
+ loop {
+ /* Check for a document indicator. */
+ self.lookahead(4);
+
+ if self.mark.col == 0
+ && (((self.buffer[0] == '-') && (self.buffer[1] == '-') && (self.buffer[2] == '-'))
+ || ((self.buffer[0] == '.')
+ && (self.buffer[1] == '.')
+ && (self.buffer[2] == '.')))
+ && is_blankz(self.buffer[3])
+ {
+ return Err(ScanError::new(
+ start_mark,
+ "while scanning a quoted scalar, found unexpected document indicator",
+ ));
+ }
+
+ if is_z(self.ch()) {
+ return Err(ScanError::new(
+ start_mark,
+ "while scanning a quoted scalar, found unexpected end of stream",
+ ));
+ }
+
+ self.lookahead(2);
+
+ leading_blanks = false;
+ // Consume non-blank characters.
+
+ while !is_blankz(self.ch()) {
+ match self.ch() {
+ // Check for an escaped single quote.
+ '\'' if self.buffer[1] == '\'' && single => {
+ string.push('\'');
+ self.skip();
+ self.skip();
+ }
+ // Check for the right quote.
+ '\'' if single => break,
+ '"' if !single => break,
+ // Check for an escaped line break.
+ '\\' if !single && is_break(self.buffer[1]) => {
+ self.lookahead(3);
+ self.skip();
+ self.skip_line();
+ leading_blanks = true;
+ break;
+ }
+ // Check for an escape sequence.
+ '\\' if !single => {
+ let mut code_length = 0usize;
+ match self.buffer[1] {
+ '0' => string.push('\0'),
+ 'a' => string.push('\x07'),
+ 'b' => string.push('\x08'),
+ 't' | '\t' => string.push('\t'),
+ 'n' => string.push('\n'),
+ 'v' => string.push('\x0b'),
+ 'f' => string.push('\x0c'),
+ 'r' => string.push('\x0d'),
+ 'e' => string.push('\x1b'),
+ ' ' => string.push('\x20'),
+ '"' => string.push('"'),
+ '\'' => string.push('\''),
+ '\\' => string.push('\\'),
+ // NEL (#x85)
+ 'N' => string.push(char::from_u32(0x85).unwrap()),
+ // #xA0
+ '_' => string.push(char::from_u32(0xA0).unwrap()),
+ // LS (#x2028)
+ 'L' => string.push(char::from_u32(0x2028).unwrap()),
+ // PS (#x2029)
+ 'P' => string.push(char::from_u32(0x2029).unwrap()),
+ 'x' => code_length = 2,
+ 'u' => code_length = 4,
+ 'U' => code_length = 8,
+ _ => {
+ return Err(ScanError::new(
+ start_mark,
+ "while parsing a quoted scalar, found unknown escape character",
+ ))
+ }
+ }
+ self.skip();
+ self.skip();
+ // Consume an arbitrary escape code.
+ if code_length > 0 {
+ self.lookahead(code_length);
+ let mut value = 0u32;
+ for i in 0..code_length {
+ if !is_hex(self.buffer[i]) {
+ return Err(ScanError::new(start_mark,
+ "while parsing a quoted scalar, did not find expected hexadecimal number"));
+ }
+ value = (value << 4) + as_hex(self.buffer[i]);
+ }
+
+ let ch = match char::from_u32(value) {
+ Some(v) => v,
+ None => {
+ return Err(ScanError::new(start_mark,
+ "while parsing a quoted scalar, found invalid Unicode character escape code"));
+ }
+ };
+ string.push(ch);
+
+ for _ in 0..code_length {
+ self.skip();
+ }
+ }
+ }
+ c => {
+ string.push(c);
+ self.skip();
+ }
+ }
+ self.lookahead(2);
+ }
+ self.lookahead(1);
+ match self.ch() {
+ '\'' if single => break,
+ '"' if !single => break,
+ _ => {}
+ }
+
+ // Consume blank characters.
+ while is_blank(self.ch()) || is_break(self.ch()) {
+ if is_blank(self.ch()) {
+ // Consume a space or a tab character.
+ if leading_blanks {
+ self.skip();
+ } else {
+ whitespaces.push(self.ch());
+ self.skip();
+ }
+ } else {
+ self.lookahead(2);
+ // Check if it is a first line break.
+ if leading_blanks {
+ self.read_break(&mut trailing_breaks);
+ } else {
+ whitespaces.clear();
+ self.read_break(&mut leading_break);
+ leading_blanks = true;
+ }
+ }
+ self.lookahead(1);
+ }
+ // Join the whitespaces or fold line breaks.
+ if leading_blanks {
+ if leading_break.is_empty() {
+ string.push_str(&leading_break);
+ string.push_str(&trailing_breaks);
+ trailing_breaks.clear();
+ leading_break.clear();
+ } else {
+ if trailing_breaks.is_empty() {
+ string.push(' ');
+ } else {
+ string.push_str(&trailing_breaks);
+ trailing_breaks.clear();
+ }
+ leading_break.clear();
+ }
+ } else {
+ string.push_str(&whitespaces);
+ whitespaces.clear();
+ }
+ } // loop
+
+ // Eat the right quote.
+ self.skip();
+
+ if single {
+ Ok(Token(
+ start_mark,
+ TokenType::Scalar(TScalarStyle::SingleQuoted, string),
+ ))
+ } else {
+ Ok(Token(
+ start_mark,
+ TokenType::Scalar(TScalarStyle::DoubleQuoted, string),
+ ))
+ }
+ }
+
+ fn fetch_plain_scalar(&mut self) -> ScanResult {
+ self.save_simple_key()?;
+ self.disallow_simple_key();
+
+ let tok = self.scan_plain_scalar()?;
+
+ self.tokens.push_back(tok);
+ Ok(())
+ }
+
+ fn scan_plain_scalar(&mut self) -> Result<Token, ScanError> {
+ let indent = self.indent + 1;
+ let start_mark = self.mark;
+
+ let mut string = String::new();
+ let mut leading_break = String::new();
+ let mut trailing_breaks = String::new();
+ let mut whitespaces = String::new();
+ let mut leading_blanks = false;
+
+ loop {
+ /* Check for a document indicator. */
+ self.lookahead(4);
+
+ if self.mark.col == 0
+ && (((self.buffer[0] == '-') && (self.buffer[1] == '-') && (self.buffer[2] == '-'))
+ || ((self.buffer[0] == '.')
+ && (self.buffer[1] == '.')
+ && (self.buffer[2] == '.')))
+ && is_blankz(self.buffer[3])
+ {
+ break;
+ }
+
+ if self.ch() == '#' {
+ break;
+ }
+ while !is_blankz(self.ch()) {
+ // indicators can end a plain scalar, see 7.3.3. Plain Style
+ match self.ch() {
+ ':' if is_blankz(self.buffer[1])
+ || (self.flow_level > 0 && is_flow(self.buffer[1])) =>
+ {
+ break;
+ }
+ ',' | '[' | ']' | '{' | '}' if self.flow_level > 0 => break,
+ _ => {}
+ }
+
+ if leading_blanks || !whitespaces.is_empty() {
+ if leading_blanks {
+ if leading_break.is_empty() {
+ string.push_str(&leading_break);
+ string.push_str(&trailing_breaks);
+ trailing_breaks.clear();
+ leading_break.clear();
+ } else {
+ if trailing_breaks.is_empty() {
+ string.push(' ');
+ } else {
+ string.push_str(&trailing_breaks);
+ trailing_breaks.clear();
+ }
+ leading_break.clear();
+ }
+ leading_blanks = false;
+ } else {
+ string.push_str(&whitespaces);
+ whitespaces.clear();
+ }
+ }
+
+ string.push(self.ch());
+ self.skip();
+ self.lookahead(2);
+ }
+ // is the end?
+ if !(is_blank(self.ch()) || is_break(self.ch())) {
+ break;
+ }
+ self.lookahead(1);
+
+ while is_blank(self.ch()) || is_break(self.ch()) {
+ if is_blank(self.ch()) {
+ if leading_blanks && (self.mark.col as isize) < indent && self.ch() == '\t' {
+ return Err(ScanError::new(
+ start_mark,
+ "while scanning a plain scalar, found a tab",
+ ));
+ }
+
+ if leading_blanks {
+ self.skip();
+ } else {
+ whitespaces.push(self.ch());
+ self.skip();
+ }
+ } else {
+ self.lookahead(2);
+ // Check if it is a first line break
+ if leading_blanks {
+ self.read_break(&mut trailing_breaks);
+ } else {
+ whitespaces.clear();
+ self.read_break(&mut leading_break);
+ leading_blanks = true;
+ }
+ }
+ self.lookahead(1);
+ }
+
+ // check indentation level
+ if self.flow_level == 0 && (self.mark.col as isize) < indent {
+ break;
+ }
+ }
+
+ if leading_blanks {
+ self.allow_simple_key();
+ }
+
+ Ok(Token(
+ start_mark,
+ TokenType::Scalar(TScalarStyle::Plain, string),
+ ))
+ }
+
+ fn fetch_key(&mut self) -> ScanResult {
+ let start_mark = self.mark;
+ if self.flow_level == 0 {
+ // Check if we are allowed to start a new key (not necessarily simple).
+ if !self.simple_key_allowed {
+ return Err(ScanError::new(
+ self.mark,
+ "mapping keys are not allowed in this context",
+ ));
+ }
+ self.roll_indent(
+ start_mark.col,
+ None,
+ TokenType::BlockMappingStart,
+ start_mark,
+ );
+ }
+
+ self.remove_simple_key()?;
+
+ if self.flow_level == 0 {
+ self.allow_simple_key();
+ } else {
+ self.disallow_simple_key();
+ }
+
+ self.skip();
+ self.tokens.push_back(Token(start_mark, TokenType::Key));
+ Ok(())
+ }
+
+ fn fetch_value(&mut self) -> ScanResult {
+ let sk = self.simple_keys.last().unwrap().clone();
+ let start_mark = self.mark;
+ if sk.possible {
+ // insert simple key
+ let tok = Token(sk.mark, TokenType::Key);
+ let tokens_parsed = self.tokens_parsed;
+ self.insert_token(sk.token_number - tokens_parsed, tok);
+
+ // Add the BLOCK-MAPPING-START token if needed.
+ self.roll_indent(
+ sk.mark.col,
+ Some(sk.token_number),
+ TokenType::BlockMappingStart,
+ start_mark,
+ );
+
+ self.simple_keys.last_mut().unwrap().possible = false;
+ self.disallow_simple_key();
+ } else {
+ // The ':' indicator follows a complex key.
+ if self.flow_level == 0 {
+ if !self.simple_key_allowed {
+ return Err(ScanError::new(
+ start_mark,
+ "mapping values are not allowed in this context",
+ ));
+ }
+
+ self.roll_indent(
+ start_mark.col,
+ None,
+ TokenType::BlockMappingStart,
+ start_mark,
+ );
+ }
+
+ if self.flow_level == 0 {
+ self.allow_simple_key();
+ } else {
+ self.disallow_simple_key();
+ }
+ }
+ self.skip();
+ self.tokens.push_back(Token(start_mark, TokenType::Value));
+
+ Ok(())
+ }
+
+ fn roll_indent(&mut self, col: usize, number: Option<usize>, tok: TokenType, mark: Marker) {
+ if self.flow_level > 0 {
+ return;
+ }
+
+ if self.indent < col as isize {
+ self.indents.push(self.indent);
+ self.indent = col as isize;
+ let tokens_parsed = self.tokens_parsed;
+ match number {
+ Some(n) => self.insert_token(n - tokens_parsed, Token(mark, tok)),
+ None => self.tokens.push_back(Token(mark, tok)),
+ }
+ }
+ }
+
+ fn unroll_indent(&mut self, col: isize) {
+ if self.flow_level > 0 {
+ return;
+ }
+ while self.indent > col {
+ self.tokens.push_back(Token(self.mark, TokenType::BlockEnd));
+ self.indent = self.indents.pop().unwrap();
+ }
+ }
+
+ fn save_simple_key(&mut self) -> Result<(), ScanError> {
+ let required = self.flow_level > 0 && self.indent == (self.mark.col as isize);
+ if self.simple_key_allowed {
+ let mut sk = SimpleKey::new(self.mark);
+ sk.possible = true;
+ sk.required = required;
+ sk.token_number = self.tokens_parsed + self.tokens.len();
+
+ self.remove_simple_key()?;
+
+ self.simple_keys.pop();
+ self.simple_keys.push(sk);
+ }
+ Ok(())
+ }
+
+ fn remove_simple_key(&mut self) -> ScanResult {
+ let last = self.simple_keys.last_mut().unwrap();
+ if last.possible && last.required {
+ return Err(ScanError::new(self.mark, "simple key expected"));
+ }
+
+ last.possible = false;
+ Ok(())
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use super::TokenType::*;
+ use super::*;
+
+ macro_rules! next {
+ ($p:ident, $tk:pat) => {{
+ let tok = $p.next().unwrap();
+ match tok.1 {
+ $tk => {}
+ _ => panic!("unexpected token: {:?}", tok),
+ }
+ }};
+ }
+
+ macro_rules! next_scalar {
+ ($p:ident, $tk:expr, $v:expr) => {{
+ let tok = $p.next().unwrap();
+ match tok.1 {
+ Scalar(style, ref v) => {
+ assert_eq!(style, $tk);
+ assert_eq!(v, $v);
+ }
+ _ => panic!("unexpected token: {:?}", tok),
+ }
+ }};
+ }
+
+ macro_rules! end {
+ ($p:ident) => {{
+ assert_eq!($p.next(), None);
+ }};
+ }
+ /// test cases in libyaml scanner.c
+ #[test]
+ fn test_empty() {
+ let s = "";
+ let mut p = Scanner::new(s.chars());
+ next!(p, StreamStart(..));
+ next!(p, StreamEnd);
+ end!(p);
+ }
+
+ #[test]
+ fn test_scalar() {
+ let s = "a scalar";
+ let mut p = Scanner::new(s.chars());
+ next!(p, StreamStart(..));
+ next!(p, Scalar(TScalarStyle::Plain, _));
+ next!(p, StreamEnd);
+ end!(p);
+ }
+
+ #[test]
+ fn test_explicit_scalar() {
+ let s = "---
+'a scalar'
+...
+";
+ let mut p = Scanner::new(s.chars());
+ next!(p, StreamStart(..));
+ next!(p, DocumentStart);
+ next!(p, Scalar(TScalarStyle::SingleQuoted, _));
+ next!(p, DocumentEnd);
+ next!(p, StreamEnd);
+ end!(p);
+ }
+
+ #[test]
+ fn test_multiple_documents() {
+ let s = "
+'a scalar'
+---
+'a scalar'
+---
+'a scalar'
+";
+ let mut p = Scanner::new(s.chars());
+ next!(p, StreamStart(..));
+ next!(p, Scalar(TScalarStyle::SingleQuoted, _));
+ next!(p, DocumentStart);
+ next!(p, Scalar(TScalarStyle::SingleQuoted, _));
+ next!(p, DocumentStart);
+ next!(p, Scalar(TScalarStyle::SingleQuoted, _));
+ next!(p, StreamEnd);
+ end!(p);
+ }
+
+ #[test]
+ fn test_a_flow_sequence() {
+ let s = "[item 1, item 2, item 3]";
+ let mut p = Scanner::new(s.chars());
+ next!(p, StreamStart(..));
+ next!(p, FlowSequenceStart);
+ next_scalar!(p, TScalarStyle::Plain, "item 1");
+ next!(p, FlowEntry);
+ next!(p, Scalar(TScalarStyle::Plain, _));
+ next!(p, FlowEntry);
+ next!(p, Scalar(TScalarStyle::Plain, _));
+ next!(p, FlowSequenceEnd);
+ next!(p, StreamEnd);
+ end!(p);
+ }
+
+ #[test]
+ fn test_a_flow_mapping() {
+ let s = "
+{
+ a simple key: a value, # Note that the KEY token is produced.
+ ? a complex key: another value,
+}
+";
+ let mut p = Scanner::new(s.chars());
+ next!(p, StreamStart(..));
+ next!(p, FlowMappingStart);
+ next!(p, Key);
+ next!(p, Scalar(TScalarStyle::Plain, _));
+ next!(p, Value);
+ next!(p, Scalar(TScalarStyle::Plain, _));
+ next!(p, FlowEntry);
+ next!(p, Key);
+ next_scalar!(p, TScalarStyle::Plain, "a complex key");
+ next!(p, Value);
+ next!(p, Scalar(TScalarStyle::Plain, _));
+ next!(p, FlowEntry);
+ next!(p, FlowMappingEnd);
+ next!(p, StreamEnd);
+ end!(p);
+ }
+
+ #[test]
+ fn test_block_sequences() {
+ let s = "
+- item 1
+- item 2
+-
+ - item 3.1
+ - item 3.2
+-
+ key 1: value 1
+ key 2: value 2
+";
+ let mut p = Scanner::new(s.chars());
+ next!(p, StreamStart(..));
+ next!(p, BlockSequenceStart);
+ next!(p, BlockEntry);
+ next_scalar!(p, TScalarStyle::Plain, "item 1");
+ next!(p, BlockEntry);
+ next_scalar!(p, TScalarStyle::Plain, "item 2");
+ next!(p, BlockEntry);
+ next!(p, BlockSequenceStart);
+ next!(p, BlockEntry);
+ next_scalar!(p, TScalarStyle::Plain, "item 3.1");
+ next!(p, BlockEntry);
+ next_scalar!(p, TScalarStyle::Plain, "item 3.2");
+ next!(p, BlockEnd);
+ next!(p, BlockEntry);
+ next!(p, BlockMappingStart);
+ next!(p, Key);
+ next_scalar!(p, TScalarStyle::Plain, "key 1");
+ next!(p, Value);
+ next_scalar!(p, TScalarStyle::Plain, "value 1");
+ next!(p, Key);
+ next_scalar!(p, TScalarStyle::Plain, "key 2");
+ next!(p, Value);
+ next_scalar!(p, TScalarStyle::Plain, "value 2");
+ next!(p, BlockEnd);
+ next!(p, BlockEnd);
+ next!(p, StreamEnd);
+ end!(p);
+ }
+
+ #[test]
+ fn test_block_mappings() {
+ let s = "
+a simple key: a value # The KEY token is produced here.
+? a complex key
+: another value
+a mapping:
+ key 1: value 1
+ key 2: value 2
+a sequence:
+ - item 1
+ - item 2
+";
+ let mut p = Scanner::new(s.chars());
+ next!(p, StreamStart(..));
+ next!(p, BlockMappingStart);
+ next!(p, Key);
+ next!(p, Scalar(_, _));
+ next!(p, Value);
+ next!(p, Scalar(_, _));
+ next!(p, Key);
+ next!(p, Scalar(_, _));
+ next!(p, Value);
+ next!(p, Scalar(_, _));
+ next!(p, Key);
+ next!(p, Scalar(_, _));
+ next!(p, Value); // libyaml comment seems to be wrong
+ next!(p, BlockMappingStart);
+ next!(p, Key);
+ next!(p, Scalar(_, _));
+ next!(p, Value);
+ next!(p, Scalar(_, _));
+ next!(p, Key);
+ next!(p, Scalar(_, _));
+ next!(p, Value);
+ next!(p, Scalar(_, _));
+ next!(p, BlockEnd);
+ next!(p, Key);
+ next!(p, Scalar(_, _));
+ next!(p, Value);
+ next!(p, BlockSequenceStart);
+ next!(p, BlockEntry);
+ next!(p, Scalar(_, _));
+ next!(p, BlockEntry);
+ next!(p, Scalar(_, _));
+ next!(p, BlockEnd);
+ next!(p, BlockEnd);
+ next!(p, StreamEnd);
+ end!(p);
+ }
+
+ #[test]
+ fn test_no_block_sequence_start() {
+ let s = "
+key:
+- item 1
+- item 2
+";
+ let mut p = Scanner::new(s.chars());
+ next!(p, StreamStart(..));
+ next!(p, BlockMappingStart);
+ next!(p, Key);
+ next_scalar!(p, TScalarStyle::Plain, "key");
+ next!(p, Value);
+ next!(p, BlockEntry);
+ next_scalar!(p, TScalarStyle::Plain, "item 1");
+ next!(p, BlockEntry);
+ next_scalar!(p, TScalarStyle::Plain, "item 2");
+ next!(p, BlockEnd);
+ next!(p, StreamEnd);
+ end!(p);
+ }
+
+ #[test]
+ fn test_collections_in_sequence() {
+ let s = "
+- - item 1
+ - item 2
+- key 1: value 1
+ key 2: value 2
+- ? complex key
+ : complex value
+";
+ let mut p = Scanner::new(s.chars());
+ next!(p, StreamStart(..));
+ next!(p, BlockSequenceStart);
+ next!(p, BlockEntry);
+ next!(p, BlockSequenceStart);
+ next!(p, BlockEntry);
+ next_scalar!(p, TScalarStyle::Plain, "item 1");
+ next!(p, BlockEntry);
+ next_scalar!(p, TScalarStyle::Plain, "item 2");
+ next!(p, BlockEnd);
+ next!(p, BlockEntry);
+ next!(p, BlockMappingStart);
+ next!(p, Key);
+ next_scalar!(p, TScalarStyle::Plain, "key 1");
+ next!(p, Value);
+ next_scalar!(p, TScalarStyle::Plain, "value 1");
+ next!(p, Key);
+ next_scalar!(p, TScalarStyle::Plain, "key 2");
+ next!(p, Value);
+ next_scalar!(p, TScalarStyle::Plain, "value 2");
+ next!(p, BlockEnd);
+ next!(p, BlockEntry);
+ next!(p, BlockMappingStart);
+ next!(p, Key);
+ next_scalar!(p, TScalarStyle::Plain, "complex key");
+ next!(p, Value);
+ next_scalar!(p, TScalarStyle::Plain, "complex value");
+ next!(p, BlockEnd);
+ next!(p, BlockEnd);
+ next!(p, StreamEnd);
+ end!(p);
+ }
+
+ #[test]
+ fn test_collections_in_mapping() {
+ let s = "
+? a sequence
+: - item 1
+ - item 2
+? a mapping
+: key 1: value 1
+ key 2: value 2
+";
+ let mut p = Scanner::new(s.chars());
+ next!(p, StreamStart(..));
+ next!(p, BlockMappingStart);
+ next!(p, Key);
+ next_scalar!(p, TScalarStyle::Plain, "a sequence");
+ next!(p, Value);
+ next!(p, BlockSequenceStart);
+ next!(p, BlockEntry);
+ next_scalar!(p, TScalarStyle::Plain, "item 1");
+ next!(p, BlockEntry);
+ next_scalar!(p, TScalarStyle::Plain, "item 2");
+ next!(p, BlockEnd);
+ next!(p, Key);
+ next_scalar!(p, TScalarStyle::Plain, "a mapping");
+ next!(p, Value);
+ next!(p, BlockMappingStart);
+ next!(p, Key);
+ next_scalar!(p, TScalarStyle::Plain, "key 1");
+ next!(p, Value);
+ next_scalar!(p, TScalarStyle::Plain, "value 1");
+ next!(p, Key);
+ next_scalar!(p, TScalarStyle::Plain, "key 2");
+ next!(p, Value);
+ next_scalar!(p, TScalarStyle::Plain, "value 2");
+ next!(p, BlockEnd);
+ next!(p, BlockEnd);
+ next!(p, StreamEnd);
+ end!(p);
+ }
+
+ #[test]
+ fn test_spec_ex7_3() {
+ let s = "
+{
+ ? foo :,
+ : bar,
+}
+";
+ let mut p = Scanner::new(s.chars());
+ next!(p, StreamStart(..));
+ next!(p, FlowMappingStart);
+ next!(p, Key);
+ next_scalar!(p, TScalarStyle::Plain, "foo");
+ next!(p, Value);
+ next!(p, FlowEntry);
+ next!(p, Value);
+ next_scalar!(p, TScalarStyle::Plain, "bar");
+ next!(p, FlowEntry);
+ next!(p, FlowMappingEnd);
+ next!(p, StreamEnd);
+ end!(p);
+ }
+
+ #[test]
+ fn test_plain_scalar_starting_with_indicators_in_flow() {
+ // "Plain scalars must not begin with most indicators, as this would cause ambiguity with
+ // other YAML constructs. However, the “:”, “?” and “-” indicators may be used as the first
+ // character if followed by a non-space “safe” character, as this causes no ambiguity."
+
+ let s = "{a: :b}";
+ let mut p = Scanner::new(s.chars());
+ next!(p, StreamStart(..));
+ next!(p, FlowMappingStart);
+ next!(p, Key);
+ next_scalar!(p, TScalarStyle::Plain, "a");
+ next!(p, Value);
+ next_scalar!(p, TScalarStyle::Plain, ":b");
+ next!(p, FlowMappingEnd);
+ next!(p, StreamEnd);
+ end!(p);
+
+ let s = "{a: ?b}";
+ let mut p = Scanner::new(s.chars());
+ next!(p, StreamStart(..));
+ next!(p, FlowMappingStart);
+ next!(p, Key);
+ next_scalar!(p, TScalarStyle::Plain, "a");
+ next!(p, Value);
+ next_scalar!(p, TScalarStyle::Plain, "?b");
+ next!(p, FlowMappingEnd);
+ next!(p, StreamEnd);
+ end!(p);
+ }
+
+ #[test]
+ fn test_plain_scalar_starting_with_indicators_in_block() {
+ let s = ":a";
+ let mut p = Scanner::new(s.chars());
+ next!(p, StreamStart(..));
+ next_scalar!(p, TScalarStyle::Plain, ":a");
+ next!(p, StreamEnd);
+ end!(p);
+
+ let s = "?a";
+ let mut p = Scanner::new(s.chars());
+ next!(p, StreamStart(..));
+ next_scalar!(p, TScalarStyle::Plain, "?a");
+ next!(p, StreamEnd);
+ end!(p);
+ }
+
+ #[test]
+ fn test_plain_scalar_containing_indicators_in_block() {
+ let s = "a:,b";
+ let mut p = Scanner::new(s.chars());
+ next!(p, StreamStart(..));
+ next_scalar!(p, TScalarStyle::Plain, "a:,b");
+ next!(p, StreamEnd);
+ end!(p);
+
+ let s = ":,b";
+ let mut p = Scanner::new(s.chars());
+ next!(p, StreamStart(..));
+ next_scalar!(p, TScalarStyle::Plain, ":,b");
+ next!(p, StreamEnd);
+ end!(p);
+ }
+
+ #[test]
+ fn test_scanner_cr() {
+ let s = "---\r\n- tok1\r\n- tok2";
+ let mut p = Scanner::new(s.chars());
+ next!(p, StreamStart(..));
+ next!(p, DocumentStart);
+ next!(p, BlockSequenceStart);
+ next!(p, BlockEntry);
+ next_scalar!(p, TScalarStyle::Plain, "tok1");
+ next!(p, BlockEntry);
+ next_scalar!(p, TScalarStyle::Plain, "tok2");
+ next!(p, BlockEnd);
+ next!(p, StreamEnd);
+ end!(p);
+ }
+
+ #[test]
+ fn test_uri() {
+ // TODO
+ }
+
+ #[test]
+ fn test_uri_escapes() {
+ // TODO
+ }
+}