//! AST for parsing format descriptions. use alloc::boxed::Box; use alloc::string::String; use alloc::vec::Vec; use core::iter; use super::{lexer, unused, Error, Location, Spanned, SpannedValue, Unused}; use crate::internal_macros::bug; /// One part of a complete format description. pub(super) enum Item<'a> { /// A literal string, formatted and parsed as-is. /// /// This should never be present inside a nested format description. Literal(Spanned<&'a [u8]>), /// A sequence of brackets. The first acts as the escape character. /// /// This should never be present if the lexer has `BACKSLASH_ESCAPE` set to `true`. EscapedBracket { /// The first bracket. _first: Unused, /// The second bracket. _second: Unused, }, /// Part of a type, along with its modifiers. Component { /// Where the opening bracket was in the format string. _opening_bracket: Unused, /// Whitespace between the opening bracket and name. _leading_whitespace: Unused>>, /// The name of the component. name: Spanned<&'a [u8]>, /// The modifiers for the component. modifiers: Box<[Modifier<'a>]>, /// Whitespace between the modifiers and closing bracket. _trailing_whitespace: Unused>>, /// Where the closing bracket was in the format string. _closing_bracket: Unused, }, /// An optional sequence of items. Optional { /// Where the opening bracket was in the format string. opening_bracket: Location, /// Whitespace between the opening bracket and "optional". _leading_whitespace: Unused>>, /// The "optional" keyword. _optional_kw: Unused>, /// Whitespace between the "optional" keyword and the opening bracket. _whitespace: Unused>, /// The items within the optional sequence. nested_format_description: NestedFormatDescription<'a>, /// Where the closing bracket was in the format string. closing_bracket: Location, }, /// The first matching parse of a sequence of items. First { /// Where the opening bracket was in the format string. opening_bracket: Location, /// Whitespace between the opening bracket and "first". _leading_whitespace: Unused>>, /// The "first" keyword. _first_kw: Unused>, /// Whitespace between the "first" keyword and the opening bracket. _whitespace: Unused>, /// The sequences of items to try. nested_format_descriptions: Box<[NestedFormatDescription<'a>]>, /// Where the closing bracket was in the format string. closing_bracket: Location, }, } /// A format description that is nested within another format description. pub(super) struct NestedFormatDescription<'a> { /// Where the opening bracket was in the format string. pub(super) _opening_bracket: Unused, /// The items within the nested format description. pub(super) items: Box<[Item<'a>]>, /// Where the closing bracket was in the format string. pub(super) _closing_bracket: Unused, /// Whitespace between the closing bracket and the next item. pub(super) _trailing_whitespace: Unused>>, } /// A modifier for a component. pub(super) struct Modifier<'a> { /// Whitespace preceding the modifier. pub(super) _leading_whitespace: Unused>, /// The key of the modifier. pub(super) key: Spanned<&'a [u8]>, /// Where the colon of the modifier was in the format string. pub(super) _colon: Unused, /// The value of the modifier. pub(super) value: Spanned<&'a [u8]>, } /// Parse the provided tokens into an AST. pub(super) fn parse< 'item: 'iter, 'iter, I: Iterator, Error>>, const VERSION: usize, >( tokens: &'iter mut lexer::Lexed, ) -> impl Iterator, Error>> + 'iter { validate_version!(VERSION); parse_inner::<_, false, VERSION>(tokens) } /// Parse the provided tokens into an AST. The const generic indicates whether the resulting /// [`Item`] will be used directly or as part of a [`NestedFormatDescription`]. fn parse_inner< 'item, I: Iterator, Error>>, const NESTED: bool, const VERSION: usize, >( tokens: &mut lexer::Lexed, ) -> impl Iterator, Error>> + '_ { validate_version!(VERSION); iter::from_fn(move || { if NESTED && tokens.peek_closing_bracket().is_some() { return None; } let next = match tokens.next()? { Ok(token) => token, Err(err) => return Some(Err(err)), }; Some(match next { lexer::Token::Literal(Spanned { value: _, span: _ }) if NESTED => { bug!("literal should not be present in nested description") } lexer::Token::Literal(value) => Ok(Item::Literal(value)), lexer::Token::Bracket { kind: lexer::BracketKind::Opening, location, } => { if version!(..=1) { if let Some(second_location) = tokens.next_if_opening_bracket() { Ok(Item::EscapedBracket { _first: unused(location), _second: unused(second_location), }) } else { parse_component::<_, VERSION>(location, tokens) } } else { parse_component::<_, VERSION>(location, tokens) } } lexer::Token::Bracket { kind: lexer::BracketKind::Closing, location: _, } if NESTED => { bug!("closing bracket should be caught by the `if` statement") } lexer::Token::Bracket { kind: lexer::BracketKind::Closing, location: _, } => { bug!("closing bracket should have been consumed by `parse_component`") } lexer::Token::ComponentPart { kind: _, // whitespace is significant in nested components value, } if NESTED => Ok(Item::Literal(value)), lexer::Token::ComponentPart { kind: _, value: _ } => { bug!("component part should have been consumed by `parse_component`") } }) }) } /// Parse a component. This assumes that the opening bracket has already been consumed. fn parse_component< 'a, I: Iterator, Error>>, const VERSION: usize, >( opening_bracket: Location, tokens: &mut lexer::Lexed, ) -> Result, Error> { validate_version!(VERSION); let leading_whitespace = tokens.next_if_whitespace(); let Some(name) = tokens.next_if_not_whitespace() else { let span = match leading_whitespace { Some(Spanned { value: _, span }) => span, None => opening_bracket.to(opening_bracket), }; return Err(Error { _inner: unused(span.error("expected component name")), public: crate::error::InvalidFormatDescription::MissingComponentName { index: span.start.byte as _, }, }); }; if *name == b"optional" { let Some(whitespace) = tokens.next_if_whitespace() else { return Err(Error { _inner: unused(name.span.error("expected whitespace after `optional`")), public: crate::error::InvalidFormatDescription::Expected { what: "whitespace after `optional`", index: name.span.end.byte as _, }, }); }; let nested = parse_nested::<_, VERSION>(whitespace.span.end, tokens)?; let Some(closing_bracket) = tokens.next_if_closing_bracket() else { return Err(Error { _inner: unused(opening_bracket.error("unclosed bracket")), public: crate::error::InvalidFormatDescription::UnclosedOpeningBracket { index: opening_bracket.byte as _, }, }); }; return Ok(Item::Optional { opening_bracket, _leading_whitespace: unused(leading_whitespace), _optional_kw: unused(name), _whitespace: unused(whitespace), nested_format_description: nested, closing_bracket, }); } if *name == b"first" { let Some(whitespace) = tokens.next_if_whitespace() else { return Err(Error { _inner: unused(name.span.error("expected whitespace after `first`")), public: crate::error::InvalidFormatDescription::Expected { what: "whitespace after `first`", index: name.span.end.byte as _, }, }); }; let mut nested_format_descriptions = Vec::new(); while let Ok(description) = parse_nested::<_, VERSION>(whitespace.span.end, tokens) { nested_format_descriptions.push(description); } let Some(closing_bracket) = tokens.next_if_closing_bracket() else { return Err(Error { _inner: unused(opening_bracket.error("unclosed bracket")), public: crate::error::InvalidFormatDescription::UnclosedOpeningBracket { index: opening_bracket.byte as _, }, }); }; return Ok(Item::First { opening_bracket, _leading_whitespace: unused(leading_whitespace), _first_kw: unused(name), _whitespace: unused(whitespace), nested_format_descriptions: nested_format_descriptions.into_boxed_slice(), closing_bracket, }); } let mut modifiers = Vec::new(); let trailing_whitespace = loop { let Some(whitespace) = tokens.next_if_whitespace() else { break None; }; // This is not necessary for proper parsing, but provides a much better error when a nested // description is used where it's not allowed. if let Some(location) = tokens.next_if_opening_bracket() { return Err(Error { _inner: unused( location .to(location) .error("modifier must be of the form `key:value`"), ), public: crate::error::InvalidFormatDescription::InvalidModifier { value: String::from("["), index: location.byte as _, }, }); } let Some(Spanned { value, span }) = tokens.next_if_not_whitespace() else { break Some(whitespace); }; let Some(colon_index) = value.iter().position(|&b| b == b':') else { return Err(Error { _inner: unused(span.error("modifier must be of the form `key:value`")), public: crate::error::InvalidFormatDescription::InvalidModifier { value: String::from_utf8_lossy(value).into_owned(), index: span.start.byte as _, }, }); }; let key = &value[..colon_index]; let value = &value[colon_index + 1..]; if key.is_empty() { return Err(Error { _inner: unused(span.shrink_to_start().error("expected modifier key")), public: crate::error::InvalidFormatDescription::InvalidModifier { value: String::new(), index: span.start.byte as _, }, }); } if value.is_empty() { return Err(Error { _inner: unused(span.shrink_to_end().error("expected modifier value")), public: crate::error::InvalidFormatDescription::InvalidModifier { value: String::new(), index: span.shrink_to_end().start.byte as _, }, }); } modifiers.push(Modifier { _leading_whitespace: unused(whitespace), key: key.spanned(span.shrink_to_before(colon_index as _)), _colon: unused(span.start.offset(colon_index as _)), value: value.spanned(span.shrink_to_after(colon_index as _)), }); }; let Some(closing_bracket) = tokens.next_if_closing_bracket() else { return Err(Error { _inner: unused(opening_bracket.error("unclosed bracket")), public: crate::error::InvalidFormatDescription::UnclosedOpeningBracket { index: opening_bracket.byte as _, }, }); }; Ok(Item::Component { _opening_bracket: unused(opening_bracket), _leading_whitespace: unused(leading_whitespace), name, modifiers: modifiers.into_boxed_slice(), _trailing_whitespace: unused(trailing_whitespace), _closing_bracket: unused(closing_bracket), }) } /// Parse a nested format description. The location provided is the the most recent one consumed. fn parse_nested<'a, I: Iterator, Error>>, const VERSION: usize>( last_location: Location, tokens: &mut lexer::Lexed, ) -> Result, Error> { validate_version!(VERSION); let Some(opening_bracket) = tokens.next_if_opening_bracket() else { return Err(Error { _inner: unused(last_location.error("expected opening bracket")), public: crate::error::InvalidFormatDescription::Expected { what: "opening bracket", index: last_location.byte as _, }, }); }; let items = parse_inner::<_, true, VERSION>(tokens).collect::>()?; let Some(closing_bracket) = tokens.next_if_closing_bracket() else { return Err(Error { _inner: unused(opening_bracket.error("unclosed bracket")), public: crate::error::InvalidFormatDescription::UnclosedOpeningBracket { index: opening_bracket.byte as _, }, }); }; let trailing_whitespace = tokens.next_if_whitespace(); Ok(NestedFormatDescription { _opening_bracket: unused(opening_bracket), items, _closing_bracket: unused(closing_bracket), _trailing_whitespace: unused(trailing_whitespace), }) }