summaryrefslogtreecommitdiffstats
path: root/third_party/rust/sfv/src/parser.rs
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/rust/sfv/src/parser.rs')
-rw-r--r--third_party/rust/sfv/src/parser.rs473
1 files changed, 473 insertions, 0 deletions
diff --git a/third_party/rust/sfv/src/parser.rs b/third_party/rust/sfv/src/parser.rs
new file mode 100644
index 0000000000..943380f279
--- /dev/null
+++ b/third_party/rust/sfv/src/parser.rs
@@ -0,0 +1,473 @@
+use crate::utils;
+use crate::{
+ BareItem, Decimal, Dictionary, FromStr, InnerList, Item, List, ListEntry, Num, Parameters,
+ SFVResult,
+};
+use std::iter::Peekable;
+use std::str::{from_utf8, Chars};
+
+/// Implements parsing logic for each structured field value type.
+pub trait ParseValue {
+ /// This method should not be used for parsing input into structured field value.
+ /// Use `Parser::parse_item`, `Parser::parse_list` or `Parsers::parse_dictionary` for that.
+ fn parse(input_chars: &mut Peekable<Chars>) -> SFVResult<Self>
+ where
+ Self: Sized;
+}
+
+/// If structured field value of List or Dictionary type is split into multiple lines,
+/// allows to parse more lines and merge them into already existing structure field value.
+pub trait ParseMore {
+ /// If structured field value is split across lines,
+ /// parses and merges next line into a single structured field value.
+ /// # Examples
+ /// ```
+ /// # use sfv::{Parser, SerializeValue, ParseMore};
+ ///
+ /// let mut list_field = Parser::parse_list("11, (12 13)".as_bytes()).unwrap();
+ /// list_field.parse_more("\"foo\", \"bar\"".as_bytes()).unwrap();
+ ///
+ /// assert_eq!(list_field.serialize_value().unwrap(), "11, (12 13), \"foo\", \"bar\"");
+ fn parse_more(&mut self, input_bytes: &[u8]) -> SFVResult<()>
+ where
+ Self: Sized;
+}
+
+impl ParseValue for Item {
+ fn parse(input_chars: &mut Peekable<Chars>) -> SFVResult<Item> {
+ // https://httpwg.org/specs/rfc8941.html#parse-item
+ let bare_item = Parser::parse_bare_item(input_chars)?;
+ let params = Parser::parse_parameters(input_chars)?;
+
+ Ok(Item { bare_item, params })
+ }
+}
+
+impl ParseValue for List {
+ fn parse(input_chars: &mut Peekable<Chars>) -> SFVResult<List> {
+ // https://httpwg.org/specs/rfc8941.html#parse-list
+ // List represents an array of (item_or_inner_list, parameters)
+
+ let mut members = vec![];
+
+ while input_chars.peek().is_some() {
+ members.push(Parser::parse_list_entry(input_chars)?);
+
+ utils::consume_ows_chars(input_chars);
+
+ if input_chars.peek().is_none() {
+ return Ok(members);
+ }
+
+ if let Some(c) = input_chars.next() {
+ if c != ',' {
+ return Err("parse_list: trailing characters after list member");
+ }
+ }
+
+ utils::consume_ows_chars(input_chars);
+
+ if input_chars.peek().is_none() {
+ return Err("parse_list: trailing comma");
+ }
+ }
+
+ Ok(members)
+ }
+}
+
+impl ParseValue for Dictionary {
+ fn parse(input_chars: &mut Peekable<Chars>) -> SFVResult<Dictionary> {
+ let mut dict = Dictionary::new();
+
+ while input_chars.peek().is_some() {
+ let this_key = Parser::parse_key(input_chars)?;
+
+ if let Some('=') = input_chars.peek() {
+ input_chars.next();
+ let member = Parser::parse_list_entry(input_chars)?;
+ dict.insert(this_key, member);
+ } else {
+ let value = true;
+ let params = Parser::parse_parameters(input_chars)?;
+ let member = Item {
+ bare_item: BareItem::Boolean(value),
+ params,
+ };
+ dict.insert(this_key, member.into());
+ }
+
+ utils::consume_ows_chars(input_chars);
+
+ if input_chars.peek().is_none() {
+ return Ok(dict);
+ }
+
+ if let Some(c) = input_chars.next() {
+ if c != ',' {
+ return Err("parse_dict: trailing characters after dictionary member");
+ }
+ }
+
+ utils::consume_ows_chars(input_chars);
+
+ if input_chars.peek().is_none() {
+ return Err("parse_dict: trailing comma");
+ }
+ }
+ Ok(dict)
+ }
+}
+
+impl ParseMore for List {
+ fn parse_more(&mut self, input_bytes: &[u8]) -> SFVResult<()> {
+ let parsed_list = Parser::parse_list(input_bytes)?;
+ self.extend(parsed_list);
+ Ok(())
+ }
+}
+
+impl ParseMore for Dictionary {
+ fn parse_more(&mut self, input_bytes: &[u8]) -> SFVResult<()> {
+ let parsed_dict = Parser::parse_dictionary(input_bytes)?;
+ self.extend(parsed_dict);
+ Ok(())
+ }
+}
+
+/// Exposes methods for parsing input into structured field value.
+pub struct Parser;
+
+impl Parser {
+ /// Parses input into structured field value of Dictionary type
+ pub fn parse_dictionary(input_bytes: &[u8]) -> SFVResult<Dictionary> {
+ Self::parse::<Dictionary>(input_bytes)
+ }
+
+ /// Parses input into structured field value of List type
+ pub fn parse_list(input_bytes: &[u8]) -> SFVResult<List> {
+ Self::parse::<List>(input_bytes)
+ }
+
+ /// Parses input into structured field value of Item type
+ pub fn parse_item(input_bytes: &[u8]) -> SFVResult<Item> {
+ Self::parse::<Item>(input_bytes)
+ }
+
+ // Generic parse method for checking input before parsing
+ // and handling trailing text error
+ fn parse<T: ParseValue>(input_bytes: &[u8]) -> SFVResult<T> {
+ // https://httpwg.org/specs/rfc8941.html#text-parse
+ if !input_bytes.is_ascii() {
+ return Err("parse: non-ascii characters in input");
+ }
+
+ let mut input_chars = from_utf8(input_bytes)
+ .map_err(|_| "parse: conversion from bytes to str failed")?
+ .chars()
+ .peekable();
+ utils::consume_sp_chars(&mut input_chars);
+
+ let output = T::parse(&mut input_chars)?;
+
+ utils::consume_sp_chars(&mut input_chars);
+
+ if input_chars.next().is_some() {
+ return Err("parse: trailing characters after parsed value");
+ };
+ Ok(output)
+ }
+
+ fn parse_list_entry(input_chars: &mut Peekable<Chars>) -> SFVResult<ListEntry> {
+ // https://httpwg.org/specs/rfc8941.html#parse-item-or-list
+ // ListEntry represents a tuple (item_or_inner_list, parameters)
+
+ match input_chars.peek() {
+ Some('(') => {
+ let parsed = Self::parse_inner_list(input_chars)?;
+ Ok(ListEntry::InnerList(parsed))
+ }
+ _ => {
+ let parsed = Item::parse(input_chars)?;
+ Ok(ListEntry::Item(parsed))
+ }
+ }
+ }
+
+ pub(crate) fn parse_inner_list(input_chars: &mut Peekable<Chars>) -> SFVResult<InnerList> {
+ // https://httpwg.org/specs/rfc8941.html#parse-innerlist
+
+ if Some('(') != input_chars.next() {
+ return Err("parse_inner_list: input does not start with '('");
+ }
+
+ let mut inner_list = Vec::new();
+ while input_chars.peek().is_some() {
+ utils::consume_sp_chars(input_chars);
+
+ if Some(&')') == input_chars.peek() {
+ input_chars.next();
+ let params = Self::parse_parameters(input_chars)?;
+ return Ok(InnerList {
+ items: inner_list,
+ params,
+ });
+ }
+
+ let parsed_item = Item::parse(input_chars)?;
+ inner_list.push(parsed_item);
+
+ if let Some(c) = input_chars.peek() {
+ if c != &' ' && c != &')' {
+ return Err("parse_inner_list: bad delimitation");
+ }
+ }
+ }
+
+ Err("parse_inner_list: the end of the inner list was not found")
+ }
+
+ pub(crate) fn parse_bare_item(input_chars: &mut Peekable<Chars>) -> SFVResult<BareItem> {
+ // https://httpwg.org/specs/rfc8941.html#parse-bare-item
+ if input_chars.peek().is_none() {
+ return Err("parse_bare_item: empty item");
+ }
+
+ match input_chars.peek() {
+ Some(&'?') => Ok(BareItem::Boolean(Self::parse_bool(input_chars)?)),
+ Some(&'"') => Ok(BareItem::String(Self::parse_string(input_chars)?)),
+ Some(&':') => Ok(BareItem::ByteSeq(Self::parse_byte_sequence(input_chars)?)),
+ Some(&c) if c == '*' || c.is_ascii_alphabetic() => {
+ Ok(BareItem::Token(Self::parse_token(input_chars)?))
+ }
+ Some(&c) if c == '-' || c.is_ascii_digit() => match Self::parse_number(input_chars)? {
+ Num::Decimal(val) => Ok(BareItem::Decimal(val)),
+ Num::Integer(val) => Ok(BareItem::Integer(val)),
+ },
+ _ => Err("parse_bare_item: item type can't be identified"),
+ }
+ }
+
+ pub(crate) fn parse_bool(input_chars: &mut Peekable<Chars>) -> SFVResult<bool> {
+ // https://httpwg.org/specs/rfc8941.html#parse-boolean
+
+ if input_chars.next() != Some('?') {
+ return Err("parse_bool: first character is not '?'");
+ }
+
+ match input_chars.next() {
+ Some('0') => Ok(false),
+ Some('1') => Ok(true),
+ _ => Err("parse_bool: invalid variant"),
+ }
+ }
+
+ pub(crate) fn parse_string(input_chars: &mut Peekable<Chars>) -> SFVResult<String> {
+ // https://httpwg.org/specs/rfc8941.html#parse-string
+
+ if input_chars.next() != Some('\"') {
+ return Err("parse_string: first character is not '\"'");
+ }
+
+ let mut output_string = String::from("");
+ while let Some(curr_char) = input_chars.next() {
+ match curr_char {
+ '\"' => return Ok(output_string),
+ '\x7f' | '\x00'..='\x1f' => return Err("parse_string: not a visible character"),
+ '\\' => match input_chars.next() {
+ Some(c) if c == '\\' || c == '\"' => {
+ output_string.push(c);
+ }
+ None => return Err("parse_string: last input character is '\\'"),
+ _ => return Err("parse_string: disallowed character after '\\'"),
+ },
+ _ => output_string.push(curr_char),
+ }
+ }
+ Err("parse_string: no closing '\"'")
+ }
+
+ pub(crate) fn parse_token(input_chars: &mut Peekable<Chars>) -> SFVResult<String> {
+ // https://httpwg.org/specs/rfc8941.html#parse-token
+
+ if let Some(first_char) = input_chars.peek() {
+ if !first_char.is_ascii_alphabetic() && first_char != &'*' {
+ return Err("parse_token: first character is not ALPHA or '*'");
+ }
+ } else {
+ return Err("parse_token: empty input string");
+ }
+
+ let mut output_string = String::from("");
+ while let Some(curr_char) = input_chars.peek() {
+ if !utils::is_tchar(*curr_char) && curr_char != &':' && curr_char != &'/' {
+ return Ok(output_string);
+ }
+
+ match input_chars.next() {
+ Some(c) => output_string.push(c),
+ None => return Err("parse_token: end of the string"),
+ }
+ }
+ Ok(output_string)
+ }
+
+ pub(crate) fn parse_byte_sequence(input_chars: &mut Peekable<Chars>) -> SFVResult<Vec<u8>> {
+ // https://httpwg.org/specs/rfc8941.html#parse-binary
+
+ if input_chars.next() != Some(':') {
+ return Err("parse_byte_seq: first char is not ':'");
+ }
+
+ if !input_chars.clone().any(|c| c == ':') {
+ return Err("parse_byte_seq: no closing ':'");
+ }
+
+ let b64_content = input_chars.take_while(|c| c != &':').collect::<String>();
+ if !b64_content.chars().all(utils::is_allowed_b64_content) {
+ return Err("parse_byte_seq: invalid char in byte sequence");
+ }
+ match utils::base64()?.decode(b64_content.as_bytes()) {
+ Ok(content) => Ok(content),
+ Err(_) => Err("parse_byte_seq: decoding error"),
+ }
+ }
+
+ pub(crate) fn parse_number(input_chars: &mut Peekable<Chars>) -> SFVResult<Num> {
+ // https://httpwg.org/specs/rfc8941.html#parse-number
+
+ let mut sign = 1;
+ if let Some('-') = input_chars.peek() {
+ sign = -1;
+ input_chars.next();
+ }
+
+ match input_chars.peek() {
+ Some(c) if !c.is_ascii_digit() => {
+ return Err("parse_number: input number does not start with a digit")
+ }
+ None => return Err("parse_number: input number lacks a digit"),
+ _ => (),
+ }
+
+ // Get number from input as a string and identify whether it's a decimal or integer
+ let (is_integer, input_number) = Self::extract_digits(input_chars)?;
+
+ // Parse input_number from string into integer
+ if is_integer {
+ let output_number = input_number
+ .parse::<i64>()
+ .map_err(|_err| "parse_number: parsing i64 failed")?
+ * sign;
+
+ let (min_int, max_int) = (-999_999_999_999_999_i64, 999_999_999_999_999_i64);
+ if !(min_int <= output_number && output_number <= max_int) {
+ return Err("parse_number: integer number is out of range");
+ }
+
+ return Ok(Num::Integer(output_number));
+ }
+
+ // Parse input_number from string into decimal
+ let chars_after_dot = input_number
+ .find('.')
+ .map(|dot_pos| input_number.len() - dot_pos - 1);
+
+ match chars_after_dot {
+ Some(0) => Err("parse_number: decimal ends with '.'"),
+ Some(1..=3) => {
+ let mut output_number = Decimal::from_str(&input_number)
+ .map_err(|_err| "parse_number: parsing f64 failed")?;
+
+ if sign == -1 {
+ output_number.set_sign_negative(true)
+ }
+
+ Ok(Num::Decimal(output_number))
+ }
+ _ => Err("parse_number: invalid decimal fraction length"),
+ }
+ }
+
+ fn extract_digits(input_chars: &mut Peekable<Chars>) -> SFVResult<(bool, String)> {
+ let mut is_integer = true;
+ let mut input_number = String::from("");
+ while let Some(curr_char) = input_chars.peek() {
+ if curr_char.is_ascii_digit() {
+ input_number.push(*curr_char);
+ input_chars.next();
+ } else if curr_char == &'.' && is_integer {
+ if input_number.len() > 12 {
+ return Err(
+ "parse_number: decimal too long, illegal position for decimal point",
+ );
+ }
+ input_number.push(*curr_char);
+ is_integer = false;
+ input_chars.next();
+ } else {
+ break;
+ }
+
+ if is_integer && input_number.len() > 15 {
+ return Err("parse_number: integer too long, length > 15");
+ }
+
+ if !is_integer && input_number.len() > 16 {
+ return Err("parse_number: decimal too long, length > 16");
+ }
+ }
+ Ok((is_integer, input_number))
+ }
+
+ pub(crate) fn parse_parameters(input_chars: &mut Peekable<Chars>) -> SFVResult<Parameters> {
+ // https://httpwg.org/specs/rfc8941.html#parse-param
+
+ let mut params = Parameters::new();
+
+ while let Some(curr_char) = input_chars.peek() {
+ if curr_char == &';' {
+ input_chars.next();
+ } else {
+ break;
+ }
+
+ utils::consume_sp_chars(input_chars);
+
+ let param_name = Self::parse_key(input_chars)?;
+ let param_value = match input_chars.peek() {
+ Some('=') => {
+ input_chars.next();
+ Self::parse_bare_item(input_chars)?
+ }
+ _ => BareItem::Boolean(true),
+ };
+ params.insert(param_name, param_value);
+ }
+
+ // If parameters already contains a name param_name (comparing character-for-character), overwrite its value.
+ // Note that when duplicate Parameter keys are encountered, this has the effect of ignoring all but the last instance.
+ Ok(params)
+ }
+
+ pub(crate) fn parse_key(input_chars: &mut Peekable<Chars>) -> SFVResult<String> {
+ match input_chars.peek() {
+ Some(c) if c == &'*' || c.is_ascii_lowercase() => (),
+ _ => return Err("parse_key: first character is not lcalpha or '*'"),
+ }
+
+ let mut output = String::new();
+ while let Some(curr_char) = input_chars.peek() {
+ if !curr_char.is_ascii_lowercase()
+ && !curr_char.is_ascii_digit()
+ && !"_-*.".contains(*curr_char)
+ {
+ return Ok(output);
+ }
+
+ output.push(*curr_char);
+ input_chars.next();
+ }
+ Ok(output)
+ }
+}