diff options
Diffstat (limited to 'third_party/rust/fluent-syntax/src')
16 files changed, 3307 insertions, 0 deletions
diff --git a/third_party/rust/fluent-syntax/src/ast/helper.rs b/third_party/rust/fluent-syntax/src/ast/helper.rs new file mode 100644 index 0000000000..923437d23b --- /dev/null +++ b/third_party/rust/fluent-syntax/src/ast/helper.rs @@ -0,0 +1,25 @@ +#[cfg(feature = "serde")] +use serde::{Deserialize, Serialize}; + +use super::Comment; +// This is a helper struct used to properly deserialize referential +// JSON comments which are single continous String, into a vec of +// content slices. +#[derive(Debug, PartialEq, Clone)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "serde", serde(untagged))] +pub enum CommentDef<S> { + Single { content: S }, + Multi { content: Vec<S> }, +} + +impl<'s, S> From<CommentDef<S>> for Comment<S> { + fn from(input: CommentDef<S>) -> Self { + match input { + CommentDef::Single { content } => Self { + content: vec![content], + }, + CommentDef::Multi { content } => Self { content }, + } + } +} diff --git a/third_party/rust/fluent-syntax/src/ast/mod.rs b/third_party/rust/fluent-syntax/src/ast/mod.rs new file mode 100644 index 0000000000..5b79bb3e02 --- /dev/null +++ b/third_party/rust/fluent-syntax/src/ast/mod.rs @@ -0,0 +1,1446 @@ +//! Abstract Syntax Tree representation of the Fluent Translation List. +//! +//! The AST of Fluent contains all nodes structures to represent a complete +//! representation of the FTL resource. +//! +//! The tree preserves all semantic information and allow for round-trip +//! of a canonically written FTL resource. +//! +//! The root node is called [`Resource`] and contains a list of [`Entry`] nodes +//! representing all possible entries in the Fluent Translation List. +//! +//! # Example +//! +//! ``` +//! use fluent_syntax::parser; +//! use fluent_syntax::ast; +//! +//! let ftl = r#" +//! +//! ## This is a message comment +//! hello-world = Hello World! +//! .tooltip = Tooltip for you, { $userName }. +//! +//! "#; +//! +//! let resource = parser::parse(ftl) +//! .expect("Failed to parse an FTL resource."); +//! +//! assert_eq!( +//! resource.body[0], +//! ast::Entry::Message( +//! ast::Message { +//! id: ast::Identifier { +//! name: "hello-world" +//! }, +//! value: Some(ast::Pattern { +//! elements: vec![ +//! ast::PatternElement::TextElement { +//! value: "Hello World!" +//! }, +//! ] +//! }), +//! attributes: vec![ +//! ast::Attribute { +//! id: ast::Identifier { +//! name: "tooltip" +//! }, +//! value: ast::Pattern { +//! elements: vec![ +//! ast::PatternElement::TextElement { +//! value: "Tooltip for you, " +//! }, +//! ast::PatternElement::Placeable { +//! expression: ast::Expression::Inline( +//! ast::InlineExpression::VariableReference { +//! id: ast::Identifier { +//! name: "userName" +//! } +//! } +//! ) +//! }, +//! ast::PatternElement::TextElement { +//! value: "." +//! }, +//! ] +//! } +//! } +//! ], +//! comment: Some( +//! ast::Comment { +//! content: vec!["This is a message comment"] +//! } +//! ) +//! } +//! ), +//! ); +//! ``` +//! +//! ## Errors +//! +//! Fluent AST preserves blocks containing invaid syntax as [`Entry::Junk`]. +//! +//! ## White space +//! +//! At the moment, AST does not preserve white space. In result only a +//! canonical form of the AST is suitable for a round-trip. +mod helper; + +#[cfg(feature = "serde")] +use serde::{Deserialize, Serialize}; + +/// Root node of a Fluent Translation List. +/// +/// A [`Resource`] contains a body with a list of [`Entry`] nodes. +/// +/// # Example +/// +/// ``` +/// use fluent_syntax::parser; +/// use fluent_syntax::ast; +/// +/// let ftl = ""; +/// +/// let resource = parser::parse(ftl) +/// .expect("Failed to parse an FTL resource."); +/// +/// assert_eq!( +/// resource, +/// ast::Resource { +/// body: vec![] +/// } +/// ); +/// ``` +#[derive(Debug, PartialEq, Clone)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct Resource<S> { + pub body: Vec<Entry<S>>, +} + +/// A top-level node representing an entry of a [`Resource`]. +/// +/// Every [`Entry`] is a standalone element and the parser is capable +/// of recovering from errors by identifying a beginning of a next entry. +/// +/// # Example +/// +/// ``` +/// use fluent_syntax::parser; +/// use fluent_syntax::ast; +/// +/// let ftl = r#" +/// +/// key = Value +/// +/// "#; +/// +/// let resource = parser::parse(ftl) +/// .expect("Failed to parse an FTL resource."); +/// +/// assert_eq!( +/// resource, +/// ast::Resource { +/// body: vec![ +/// ast::Entry::Message( +/// ast::Message { +/// id: ast::Identifier { +/// name: "key" +/// }, +/// value: Some(ast::Pattern { +/// elements: vec![ +/// ast::PatternElement::TextElement { +/// value: "Value" +/// }, +/// ] +/// }), +/// attributes: vec![], +/// comment: None, +/// } +/// ) +/// ] +/// } +/// ); +/// ``` +/// +/// # Junk Entry +/// +/// If FTL source contains invalid FTL content, it will be preserved +/// in form of [`Entry::Junk`] nodes. +/// +/// # Example +/// +/// ``` +/// use fluent_syntax::parser; +/// use fluent_syntax::ast; +/// +/// let ftl = r#" +/// +/// g@rb@ge En!ry +/// +/// "#; +/// +/// let (resource, _) = parser::parse(ftl) +/// .expect_err("Failed to parse an FTL resource."); +/// +/// assert_eq!( +/// resource, +/// ast::Resource { +/// body: vec![ +/// ast::Entry::Junk { +/// content: "g@rb@ge En!ry\n\n" +/// } +/// ] +/// } +/// ); +/// ``` +#[derive(Debug, PartialEq, Clone)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "serde", serde(tag = "type"))] +pub enum Entry<S> { + Message(Message<S>), + Term(Term<S>), + Comment(Comment<S>), + GroupComment(Comment<S>), + ResourceComment(Comment<S>), + Junk { content: S }, +} + +/// Message node represents the most common [`Entry`] in an FTL [`Resource`]. +/// +/// A message is a localization unit with a [`Identifier`] unique within a given +/// [`Resource`], and a value or attributes with associated [`Pattern`]. +/// +/// A message can contain a simple text value, or a compound combination of value +/// and attributes which together can be used to localize a complex User Interface +/// element. +/// +/// Finally, each [`Message`] may have an associated [`Comment`]. +/// +/// # Example +/// +/// ``` +/// use fluent_syntax::parser; +/// use fluent_syntax::ast; +/// +/// let ftl = r#" +/// +/// hello-world = Hello, World! +/// +/// "#; +/// +/// let resource = parser::parse(ftl) +/// .expect("Failed to parse an FTL resource."); +/// +/// assert_eq!( +/// resource, +/// ast::Resource { +/// body: vec![ +/// ast::Entry::Message(ast::Message { +/// id: ast::Identifier { +/// name: "hello-world" +/// }, +/// value: Some(ast::Pattern { +/// elements: vec![ +/// ast::PatternElement::TextElement { +/// value: "Hello, World!" +/// } +/// ] +/// }), +/// attributes: vec![], +/// comment: None, +/// }) +/// ] +/// } +/// ); +/// ``` +#[derive(Debug, PartialEq, Clone)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct Message<S> { + pub id: Identifier<S>, + pub value: Option<Pattern<S>>, + pub attributes: Vec<Attribute<S>>, + pub comment: Option<Comment<S>>, +} + +/// A Fluent [`Term`]. +/// +/// Terms are semantically similar to [`Message`] nodes, but +/// they represent a separate concept in Fluent system. +/// +/// Every term has to have a value, and the parser will +/// report errors when term references are used in wrong positions. +/// +/// # Example +/// +/// ``` +/// use fluent_syntax::parser; +/// use fluent_syntax::ast; +/// +/// let ftl = r#" +/// +/// -brand-name = Nightly +/// +/// "#; +/// +/// let resource = parser::parse(ftl) +/// .expect("Failed to parse an FTL resource."); +/// +/// assert_eq!( +/// resource, +/// ast::Resource { +/// body: vec![ +/// ast::Entry::Term(ast::Term { +/// id: ast::Identifier { +/// name: "brand-name" +/// }, +/// value: ast::Pattern { +/// elements: vec![ +/// ast::PatternElement::TextElement { +/// value: "Nightly" +/// } +/// ] +/// }, +/// attributes: vec![], +/// comment: None, +/// }) +/// ] +/// } +/// ); +/// ``` +#[derive(Debug, PartialEq, Clone)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct Term<S> { + pub id: Identifier<S>, + pub value: Pattern<S>, + pub attributes: Vec<Attribute<S>>, + pub comment: Option<Comment<S>>, +} + +/// Pattern contains a value of a [`Message`], [`Term`] or an [`Attribute`]. +/// +/// Each pattern is a list of [`PatternElement`] nodes representing +/// either a simple textual value, or a combination of text literals +/// and placeholder [`Expression`] nodes. +/// +/// # Example +/// +/// ``` +/// use fluent_syntax::parser; +/// use fluent_syntax::ast; +/// +/// let ftl = r#" +/// +/// hello-world = Hello, World! +/// +/// welcome = Welcome, { $userName }. +/// +/// "#; +/// +/// let resource = parser::parse(ftl) +/// .expect("Failed to parse an FTL resource."); +/// +/// assert_eq!( +/// resource, +/// ast::Resource { +/// body: vec![ +/// ast::Entry::Message(ast::Message { +/// id: ast::Identifier { +/// name: "hello-world" +/// }, +/// value: Some(ast::Pattern { +/// elements: vec![ +/// ast::PatternElement::TextElement { +/// value: "Hello, World!" +/// } +/// ] +/// }), +/// attributes: vec![], +/// comment: None, +/// }), +/// ast::Entry::Message(ast::Message { +/// id: ast::Identifier { +/// name: "welcome" +/// }, +/// value: Some(ast::Pattern { +/// elements: vec![ +/// ast::PatternElement::TextElement { +/// value: "Welcome, " +/// }, +/// ast::PatternElement::Placeable { +/// expression: ast::Expression::Inline( +/// ast::InlineExpression::VariableReference { +/// id: ast::Identifier { +/// name: "userName" +/// } +/// } +/// ) +/// }, +/// ast::PatternElement::TextElement { +/// value: "." +/// } +/// ] +/// }), +/// attributes: vec![], +/// comment: None, +/// }), +/// ] +/// } +/// ); +/// ``` +#[derive(Debug, PartialEq, Clone)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct Pattern<S> { + pub elements: Vec<PatternElement<S>>, +} + +/// PatternElement is an element of a [`Pattern`]. +/// +/// Each [`PatternElement`] node represents +/// either a simple textual value, or a combination of text literals +/// and placeholder [`Expression`] nodes. +/// +/// # Example +/// +/// ``` +/// use fluent_syntax::parser; +/// use fluent_syntax::ast; +/// +/// let ftl = r#" +/// +/// hello-world = Hello, World! +/// +/// welcome = Welcome, { $userName }. +/// +/// "#; +/// +/// let resource = parser::parse(ftl) +/// .expect("Failed to parse an FTL resource."); +/// +/// assert_eq!( +/// resource, +/// ast::Resource { +/// body: vec![ +/// ast::Entry::Message(ast::Message { +/// id: ast::Identifier { +/// name: "hello-world" +/// }, +/// value: Some(ast::Pattern { +/// elements: vec![ +/// ast::PatternElement::TextElement { +/// value: "Hello, World!" +/// } +/// ] +/// }), +/// attributes: vec![], +/// comment: None, +/// }), +/// ast::Entry::Message(ast::Message { +/// id: ast::Identifier { +/// name: "welcome" +/// }, +/// value: Some(ast::Pattern { +/// elements: vec![ +/// ast::PatternElement::TextElement { +/// value: "Welcome, " +/// }, +/// ast::PatternElement::Placeable { +/// expression: ast::Expression::Inline( +/// ast::InlineExpression::VariableReference { +/// id: ast::Identifier { +/// name: "userName" +/// } +/// } +/// ) +/// }, +/// ast::PatternElement::TextElement { +/// value: "." +/// } +/// ] +/// }), +/// attributes: vec![], +/// comment: None, +/// }), +/// ] +/// } +/// ); +/// ``` +#[derive(Debug, PartialEq, Clone)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "serde", serde(tag = "type"))] +pub enum PatternElement<S> { + TextElement { value: S }, + Placeable { expression: Expression<S> }, +} + +/// Attribute represents a part of a [`Message`] or [`Term`]. +/// +/// Attributes are used to express a compound list of keyed +/// [`Pattern`] elements on an entry. +/// +/// # Example +/// +/// ``` +/// use fluent_syntax::parser; +/// use fluent_syntax::ast; +/// +/// let ftl = r#" +/// +/// hello-world = +/// .title = This is a title +/// .accesskey = T +/// +/// "#; +/// +/// let resource = parser::parse(ftl) +/// .expect("Failed to parse an FTL resource."); +/// +/// assert_eq!( +/// resource, +/// ast::Resource { +/// body: vec![ +/// ast::Entry::Message(ast::Message { +/// id: ast::Identifier { +/// name: "hello-world" +/// }, +/// value: None, +/// attributes: vec![ +/// ast::Attribute { +/// id: ast::Identifier { +/// name: "title" +/// }, +/// value: ast::Pattern { +/// elements: vec![ +/// ast::PatternElement::TextElement { +/// value: "This is a title" +/// }, +/// ] +/// } +/// }, +/// ast::Attribute { +/// id: ast::Identifier { +/// name: "accesskey" +/// }, +/// value: ast::Pattern { +/// elements: vec![ +/// ast::PatternElement::TextElement { +/// value: "T" +/// }, +/// ] +/// } +/// } +/// ], +/// comment: None, +/// }), +/// ] +/// } +/// ); +/// ``` +#[derive(Debug, PartialEq, Clone)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct Attribute<S> { + pub id: Identifier<S>, + pub value: Pattern<S>, +} + +/// Identifier is part of nodes such as [`Message`], [`Term`] and [`Attribute`]. +/// +/// It is used to associate a unique key with an [`Entry`] or an [`Attribute`] +/// and in [`Expression`] nodes to refer to another entry. +/// +/// # Example +/// +/// ``` +/// use fluent_syntax::parser; +/// use fluent_syntax::ast; +/// +/// let ftl = r#" +/// +/// hello-world = Value +/// +/// "#; +/// +/// let resource = parser::parse(ftl) +/// .expect("Failed to parse an FTL resource."); +/// +/// assert_eq!( +/// resource, +/// ast::Resource { +/// body: vec![ +/// ast::Entry::Message(ast::Message { +/// id: ast::Identifier { +/// name: "hello-world" +/// }, +/// value: Some(ast::Pattern { +/// elements: vec![ +/// ast::PatternElement::TextElement { +/// value: "Value" +/// } +/// ] +/// }), +/// attributes: vec![], +/// comment: None, +/// }), +/// ] +/// } +/// ); +/// ``` +#[derive(Debug, PartialEq, Clone)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct Identifier<S> { + pub name: S, +} + +/// Variant is a single branch of a value in a [`Select`](Expression::Select) expression. +/// +/// It's a pair of [`VariantKey`] and [`Pattern`]. If the selector match the +/// key, then the value of the variant is returned as the value of the expression. +/// +/// # Example +/// +/// ``` +/// use fluent_syntax::parser; +/// use fluent_syntax::ast; +/// +/// let ftl = r#" +/// +/// hello-world = { $var -> +/// [key1] Value 1 +/// *[other] Value 2 +/// } +/// +/// "#; +/// +/// let resource = parser::parse(ftl) +/// .expect("Failed to parse an FTL resource."); +/// +/// assert_eq!( +/// resource, +/// ast::Resource { +/// body: vec![ +/// ast::Entry::Message(ast::Message { +/// id: ast::Identifier { +/// name: "hello-world" +/// }, +/// value: Some(ast::Pattern { +/// elements: vec![ +/// ast::PatternElement::Placeable { +/// expression: ast::Expression::Select { +/// selector: ast::InlineExpression::VariableReference { +/// id: ast::Identifier { name: "var" }, +/// }, +/// variants: vec![ +/// ast::Variant { +/// key: ast::VariantKey::Identifier { +/// name: "key1" +/// }, +/// value: ast::Pattern { +/// elements: vec![ +/// ast::PatternElement::TextElement { +/// value: "Value 1", +/// } +/// ] +/// }, +/// default: false, +/// }, +/// ast::Variant { +/// key: ast::VariantKey::Identifier { +/// name: "other" +/// }, +/// value: ast::Pattern { +/// elements: vec![ +/// ast::PatternElement::TextElement { +/// value: "Value 2", +/// } +/// ] +/// }, +/// default: true, +/// }, +/// ] +/// } +/// } +/// ] +/// }), +/// attributes: vec![], +/// comment: None, +/// }), +/// ] +/// } +/// ); +/// ``` +#[derive(Debug, PartialEq, Clone)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "serde", serde(tag = "type"))] +pub struct Variant<S> { + pub key: VariantKey<S>, + pub value: Pattern<S>, + pub default: bool, +} + +/// A key of a [`Variant`]. +/// +/// Variant key can be either an identifier or a number. +/// +/// # Example +/// +/// ``` +/// use fluent_syntax::parser; +/// use fluent_syntax::ast; +/// +/// let ftl = r#" +/// +/// hello-world = { $var -> +/// [0] Value 1 +/// *[other] Value 2 +/// } +/// +/// "#; +/// +/// let resource = parser::parse(ftl) +/// .expect("Failed to parse an FTL resource."); +/// +/// assert_eq!( +/// resource, +/// ast::Resource { +/// body: vec![ +/// ast::Entry::Message(ast::Message { +/// id: ast::Identifier { +/// name: "hello-world" +/// }, +/// value: Some(ast::Pattern { +/// elements: vec![ +/// ast::PatternElement::Placeable { +/// expression: ast::Expression::Select { +/// selector: ast::InlineExpression::VariableReference { +/// id: ast::Identifier { name: "var" }, +/// }, +/// variants: vec![ +/// ast::Variant { +/// key: ast::VariantKey::NumberLiteral { +/// value: "0" +/// }, +/// value: ast::Pattern { +/// elements: vec![ +/// ast::PatternElement::TextElement { +/// value: "Value 1", +/// } +/// ] +/// }, +/// default: false, +/// }, +/// ast::Variant { +/// key: ast::VariantKey::Identifier { +/// name: "other" +/// }, +/// value: ast::Pattern { +/// elements: vec![ +/// ast::PatternElement::TextElement { +/// value: "Value 2", +/// } +/// ] +/// }, +/// default: true, +/// }, +/// ] +/// } +/// } +/// ] +/// }), +/// attributes: vec![], +/// comment: None, +/// }), +/// ] +/// } +/// ); +/// ``` +#[derive(Debug, PartialEq, Clone)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "serde", serde(tag = "type"))] +pub enum VariantKey<S> { + Identifier { name: S }, + NumberLiteral { value: S }, +} + +/// Fluent [`Comment`]. +/// +/// In Fluent, comments may be standalone, or associated with +/// an entry such as [`Term`] or [`Message`]. +/// +/// When used as a standalone [`Entry`], comments may appear in one of +/// three levels: +/// +/// * Standalone comment +/// * Group comment associated with a group of messages +/// * Resource comment associated with the whole resource +/// +/// # Example +/// +/// ``` +/// use fluent_syntax::parser; +/// use fluent_syntax::ast; +/// +/// let ftl = r#" +/// ## A standalone level comment +/// "#; +/// +/// let resource = parser::parse(ftl) +/// .expect("Failed to parse an FTL resource."); +/// +/// assert_eq!( +/// resource, +/// ast::Resource { +/// body: vec![ +/// ast::Entry::Comment(ast::Comment { +/// content: vec![ +/// "A standalone level comment" +/// ] +/// }) +/// ] +/// } +/// ); +/// ``` +#[derive(Debug, PartialEq, Clone)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "serde", serde(from = "helper::CommentDef<S>"))] +pub struct Comment<S> { + pub content: Vec<S>, +} + +/// List of arguments for a [`FunctionReference`](InlineExpression::FunctionReference) or a +/// [`TermReference`](InlineExpression::TermReference). +/// +/// Function and Term reference may contain a list of positional and +/// named arguments passed to them. +/// +/// # Example +/// +/// ``` +/// use fluent_syntax::parser; +/// use fluent_syntax::ast; +/// +/// let ftl = r#" +/// +/// key = { FUNC($var1, "literal", style: "long") } +/// +/// "#; +/// +/// let resource = parser::parse(ftl) +/// .expect("Failed to parse an FTL resource."); +/// +/// assert_eq!( +/// resource, +/// ast::Resource { +/// body: vec![ +/// ast::Entry::Message( +/// ast::Message { +/// id: ast::Identifier { +/// name: "key" +/// }, +/// value: Some(ast::Pattern { +/// elements: vec![ +/// ast::PatternElement::Placeable { +/// expression: ast::Expression::Inline( +/// ast::InlineExpression::FunctionReference { +/// id: ast::Identifier { +/// name: "FUNC" +/// }, +/// arguments: ast::CallArguments { +/// positional: vec![ +/// ast::InlineExpression::VariableReference { +/// id: ast::Identifier { +/// name: "var1" +/// } +/// }, +/// ast::InlineExpression::StringLiteral { +/// value: "literal", +/// } +/// ], +/// named: vec![ +/// ast::NamedArgument { +/// name: ast::Identifier { +/// name: "style" +/// }, +/// value: ast::InlineExpression::StringLiteral +/// { +/// value: "long" +/// } +/// } +/// ], +/// } +/// } +/// ) +/// }, +/// ] +/// }), +/// attributes: vec![], +/// comment: None, +/// } +/// ) +/// ] +/// } +/// ); +/// ``` +#[derive(Debug, PartialEq, Clone, Default)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "serde", serde(tag = "type"))] +pub struct CallArguments<S> { + pub positional: Vec<InlineExpression<S>>, + pub named: Vec<NamedArgument<S>>, +} + +/// A key-value pair used in [`CallArguments`]. +/// +/// # Example +/// +/// ``` +/// use fluent_syntax::parser; +/// use fluent_syntax::ast; +/// +/// let ftl = r#" +/// +/// key = { FUNC(style: "long") } +/// +/// "#; +/// +/// let resource = parser::parse(ftl) +/// .expect("Failed to parse an FTL resource."); +/// +/// assert_eq!( +/// resource, +/// ast::Resource { +/// body: vec![ +/// ast::Entry::Message( +/// ast::Message { +/// id: ast::Identifier { +/// name: "key" +/// }, +/// value: Some(ast::Pattern { +/// elements: vec![ +/// ast::PatternElement::Placeable { +/// expression: ast::Expression::Inline( +/// ast::InlineExpression::FunctionReference { +/// id: ast::Identifier { +/// name: "FUNC" +/// }, +/// arguments: ast::CallArguments { +/// positional: vec![], +/// named: vec![ +/// ast::NamedArgument { +/// name: ast::Identifier { +/// name: "style" +/// }, +/// value: ast::InlineExpression::StringLiteral +/// { +/// value: "long" +/// } +/// } +/// ], +/// } +/// } +/// ) +/// }, +/// ] +/// }), +/// attributes: vec![], +/// comment: None, +/// } +/// ) +/// ] +/// } +/// ); +/// ``` +#[derive(Debug, PartialEq, Clone)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "serde", serde(tag = "type"))] +pub struct NamedArgument<S> { + pub name: Identifier<S>, + pub value: InlineExpression<S>, +} + +/// A subset of expressions which can be used as [`Placeable`](PatternElement::Placeable), +/// [`selector`](Expression::Select), or in [`CallArguments`]. +/// +/// # Example +/// +/// ``` +/// use fluent_syntax::parser; +/// use fluent_syntax::ast; +/// +/// let ftl = r#" +/// +/// key = { $emailCount } +/// +/// "#; +/// +/// let resource = parser::parse(ftl) +/// .expect("Failed to parse an FTL resource."); +/// +/// assert_eq!( +/// resource, +/// ast::Resource { +/// body: vec![ +/// ast::Entry::Message( +/// ast::Message { +/// id: ast::Identifier { +/// name: "key" +/// }, +/// value: Some(ast::Pattern { +/// elements: vec![ +/// ast::PatternElement::Placeable { +/// expression: ast::Expression::Inline( +/// ast::InlineExpression::VariableReference { +/// id: ast::Identifier { +/// name: "emailCount" +/// }, +/// } +/// ) +/// }, +/// ] +/// }), +/// attributes: vec![], +/// comment: None, +/// } +/// ) +/// ] +/// } +/// ); +/// ``` +#[derive(Debug, PartialEq, Clone)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "serde", serde(tag = "type"))] +pub enum InlineExpression<S> { + /// Single line string literal enclosed in `"`. + /// + /// # Example + /// + /// ``` + /// use fluent_syntax::parser; + /// use fluent_syntax::ast; + /// + /// let ftl = r#" + /// + /// key = { "this is a literal" } + /// + /// "#; + /// + /// let resource = parser::parse(ftl) + /// .expect("Failed to parse an FTL resource."); + /// + /// assert_eq!( + /// resource, + /// ast::Resource { + /// body: vec![ + /// ast::Entry::Message( + /// ast::Message { + /// id: ast::Identifier { + /// name: "key" + /// }, + /// value: Some(ast::Pattern { + /// elements: vec![ + /// ast::PatternElement::Placeable { + /// expression: ast::Expression::Inline( + /// ast::InlineExpression::StringLiteral { + /// value: "this is a literal", + /// } + /// ) + /// }, + /// ] + /// }), + /// attributes: vec![], + /// comment: None, + /// } + /// ) + /// ] + /// } + /// ); + /// ``` + StringLiteral { value: S }, + /// A number literal. + /// + /// # Example + /// + /// ``` + /// use fluent_syntax::parser; + /// use fluent_syntax::ast; + /// + /// let ftl = r#" + /// + /// key = { -0.5 } + /// + /// "#; + /// + /// let resource = parser::parse(ftl) + /// .expect("Failed to parse an FTL resource."); + /// + /// assert_eq!( + /// resource, + /// ast::Resource { + /// body: vec![ + /// ast::Entry::Message( + /// ast::Message { + /// id: ast::Identifier { + /// name: "key" + /// }, + /// value: Some(ast::Pattern { + /// elements: vec![ + /// ast::PatternElement::Placeable { + /// expression: ast::Expression::Inline( + /// ast::InlineExpression::NumberLiteral { + /// value: "-0.5", + /// } + /// ) + /// }, + /// ] + /// }), + /// attributes: vec![], + /// comment: None, + /// } + /// ) + /// ] + /// } + /// ); + /// ``` + NumberLiteral { value: S }, + /// A function reference. + /// + /// # Example + /// + /// ``` + /// use fluent_syntax::parser; + /// use fluent_syntax::ast; + /// + /// let ftl = r#" + /// + /// key = { FUNC() } + /// + /// "#; + /// + /// let resource = parser::parse(ftl) + /// .expect("Failed to parse an FTL resource."); + /// + /// assert_eq!( + /// resource, + /// ast::Resource { + /// body: vec![ + /// ast::Entry::Message( + /// ast::Message { + /// id: ast::Identifier { + /// name: "key" + /// }, + /// value: Some(ast::Pattern { + /// elements: vec![ + /// ast::PatternElement::Placeable { + /// expression: ast::Expression::Inline( + /// ast::InlineExpression::FunctionReference { + /// id: ast::Identifier { + /// name: "FUNC" + /// }, + /// arguments: ast::CallArguments::default(), + /// } + /// ) + /// }, + /// ] + /// }), + /// attributes: vec![], + /// comment: None, + /// } + /// ) + /// ] + /// } + /// ); + /// ``` + FunctionReference { + id: Identifier<S>, + arguments: CallArguments<S>, + }, + /// A reference to another message. + /// + /// # Example + /// + /// ``` + /// use fluent_syntax::parser; + /// use fluent_syntax::ast; + /// + /// let ftl = r#" + /// + /// key = { key2 } + /// + /// "#; + /// + /// let resource = parser::parse(ftl) + /// .expect("Failed to parse an FTL resource."); + /// + /// assert_eq!( + /// resource, + /// ast::Resource { + /// body: vec![ + /// ast::Entry::Message( + /// ast::Message { + /// id: ast::Identifier { + /// name: "key" + /// }, + /// value: Some(ast::Pattern { + /// elements: vec![ + /// ast::PatternElement::Placeable { + /// expression: ast::Expression::Inline( + /// ast::InlineExpression::MessageReference { + /// id: ast::Identifier { + /// name: "key2" + /// }, + /// attribute: None, + /// } + /// ) + /// }, + /// ] + /// }), + /// attributes: vec![], + /// comment: None, + /// } + /// ) + /// ] + /// } + /// ); + /// ``` + MessageReference { + id: Identifier<S>, + attribute: Option<Identifier<S>>, + }, + /// A reference to a term. + /// + /// # Example + /// + /// ``` + /// use fluent_syntax::parser; + /// use fluent_syntax::ast; + /// + /// let ftl = r#" + /// + /// key = { -brand-name } + /// + /// "#; + /// + /// let resource = parser::parse(ftl) + /// .expect("Failed to parse an FTL resource."); + /// + /// assert_eq!( + /// resource, + /// ast::Resource { + /// body: vec![ + /// ast::Entry::Message( + /// ast::Message { + /// id: ast::Identifier { + /// name: "key" + /// }, + /// value: Some(ast::Pattern { + /// elements: vec![ + /// ast::PatternElement::Placeable { + /// expression: ast::Expression::Inline( + /// ast::InlineExpression::TermReference { + /// id: ast::Identifier { + /// name: "brand-name" + /// }, + /// attribute: None, + /// arguments: None, + /// } + /// ) + /// }, + /// ] + /// }), + /// attributes: vec![], + /// comment: None, + /// } + /// ) + /// ] + /// } + /// ); + /// ``` + TermReference { + id: Identifier<S>, + attribute: Option<Identifier<S>>, + arguments: Option<CallArguments<S>>, + }, + /// A reference to a variable. + /// + /// # Example + /// + /// ``` + /// use fluent_syntax::parser; + /// use fluent_syntax::ast; + /// + /// let ftl = r#" + /// + /// key = { $var1 } + /// + /// "#; + /// + /// let resource = parser::parse(ftl) + /// .expect("Failed to parse an FTL resource."); + /// + /// assert_eq!( + /// resource, + /// ast::Resource { + /// body: vec![ + /// ast::Entry::Message( + /// ast::Message { + /// id: ast::Identifier { + /// name: "key" + /// }, + /// value: Some(ast::Pattern { + /// elements: vec![ + /// ast::PatternElement::Placeable { + /// expression: ast::Expression::Inline( + /// ast::InlineExpression::VariableReference { + /// id: ast::Identifier { + /// name: "var1" + /// }, + /// } + /// ) + /// }, + /// ] + /// }), + /// attributes: vec![], + /// comment: None, + /// } + /// ) + /// ] + /// } + /// ); + /// ``` + VariableReference { id: Identifier<S> }, + /// A placeable which may contain another expression. + /// + /// # Example + /// + /// ``` + /// use fluent_syntax::parser; + /// use fluent_syntax::ast; + /// + /// let ftl = r#" + /// + /// key = { { "placeable" } } + /// + /// "#; + /// + /// let resource = parser::parse(ftl) + /// .expect("Failed to parse an FTL resource."); + /// + /// assert_eq!( + /// resource, + /// ast::Resource { + /// body: vec![ + /// ast::Entry::Message( + /// ast::Message { + /// id: ast::Identifier { + /// name: "key" + /// }, + /// value: Some(ast::Pattern { + /// elements: vec![ + /// ast::PatternElement::Placeable { + /// expression: ast::Expression::Inline( + /// ast::InlineExpression::Placeable { + /// expression: Box::new( + /// ast::Expression::Inline( + /// ast::InlineExpression::StringLiteral { + /// value: "placeable" + /// } + /// ) + /// ) + /// } + /// ) + /// }, + /// ] + /// }), + /// attributes: vec![], + /// comment: None, + /// } + /// ) + /// ] + /// } + /// ); + /// ``` + Placeable { expression: Box<Expression<S>> }, +} + +/// An expression that is either a select expression or an inline expression. +/// +/// # Example +/// +/// ``` +/// use fluent_syntax::parser; +/// use fluent_syntax::ast; +/// +/// let ftl = r#" +/// +/// key = { $var -> +/// [key1] Value 1 +/// *[other] Value 2 +/// } +/// +/// "#; +/// +/// let resource = parser::parse(ftl) +/// .expect("Failed to parse an FTL resource."); +/// +/// assert_eq!( +/// resource, +/// ast::Resource { +/// body: vec![ +/// ast::Entry::Message(ast::Message { +/// id: ast::Identifier { +/// name: "key" +/// }, +/// value: Some(ast::Pattern { +/// elements: vec![ +/// ast::PatternElement::Placeable { +/// expression: ast::Expression::Select { +/// selector: ast::InlineExpression::VariableReference { +/// id: ast::Identifier { name: "var" }, +/// }, +/// variants: vec![ +/// ast::Variant { +/// key: ast::VariantKey::Identifier { +/// name: "key1" +/// }, +/// value: ast::Pattern { +/// elements: vec![ +/// ast::PatternElement::TextElement { +/// value: "Value 1", +/// } +/// ] +/// }, +/// default: false, +/// }, +/// ast::Variant { +/// key: ast::VariantKey::Identifier { +/// name: "other" +/// }, +/// value: ast::Pattern { +/// elements: vec![ +/// ast::PatternElement::TextElement { +/// value: "Value 2", +/// } +/// ] +/// }, +/// default: true, +/// }, +/// ] +/// } +/// } +/// ] +/// }), +/// attributes: vec![], +/// comment: None, +/// }), +/// ] +/// } +/// ); +/// ``` +#[derive(Debug, PartialEq, Clone)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "serde", serde(untagged))] +pub enum Expression<S> { + Select { + selector: InlineExpression<S>, + variants: Vec<Variant<S>>, + }, + Inline(InlineExpression<S>), +} diff --git a/third_party/rust/fluent-syntax/src/bin/parser.rs b/third_party/rust/fluent-syntax/src/bin/parser.rs new file mode 100644 index 0000000000..46275a7290 --- /dev/null +++ b/third_party/rust/fluent-syntax/src/bin/parser.rs @@ -0,0 +1,42 @@ +use fluent_syntax::parser::parse; +use std::env; +use std::fs::File; +use std::io; +use std::io::Read; + +fn read_file(path: &str) -> Result<String, io::Error> { + let mut f = File::open(path)?; + let mut s = String::new(); + f.read_to_string(&mut s)?; + Ok(s) +} + +fn main() { + let args: Vec<String> = env::args().collect(); + let source = read_file(args.get(1).expect("Pass an argument")).expect("Failed to fetch file"); + + let (ast, errors) = match parse(source.as_str()) { + Ok(ast) => (ast, None), + Err((ast, err)) => (ast, Some(err)), + }; + + #[cfg(feature = "json")] + { + let target_json = serde_json::to_string_pretty(&ast).unwrap(); + println!("{}", target_json); + } + #[cfg(not(feature = "json"))] + { + use std::fmt::Write; + let mut result = String::new(); + write!(result, "{:#?}", ast).unwrap(); + println!("{}", result); + } + + if let Some(errors) = errors { + println!("\n======== Errors ========== \n"); + for err in errors { + println!("Err: {:#?}", err); + } + } +} diff --git a/third_party/rust/fluent-syntax/src/bin/update_fixtures.rs b/third_party/rust/fluent-syntax/src/bin/update_fixtures.rs new file mode 100644 index 0000000000..01e7a02af0 --- /dev/null +++ b/third_party/rust/fluent-syntax/src/bin/update_fixtures.rs @@ -0,0 +1,44 @@ +use std::fs; +use std::io; + +use fluent_syntax::parser::parse; + +fn read_file(path: &str) -> Result<String, io::Error> { + fs::read_to_string(path) +} + +fn write_file(path: &str, source: &str) -> Result<(), io::Error> { + fs::write(path, source) +} + +fn main() { + let samples = &["menubar", "preferences", "simple"]; + let contexts = &["browser", "preferences"]; + + for sample in samples { + let path = format!("./benches/{}.ftl", sample); + let source = read_file(&path).unwrap(); + let ast = parse(source).unwrap(); + let target_json = serde_json::to_string_pretty(&ast).unwrap(); + let new_path = format!("./tests/fixtures/benches/{}.json", sample); + write_file(&new_path, &target_json).unwrap(); + } + + for test in contexts { + let paths = fs::read_dir(format!("./benches/contexts/{}", test)).unwrap(); + for path in paths.into_iter() { + let p = path.unwrap().path(); + let file_name = p.file_name().unwrap().to_str().unwrap(); + let path = p.to_str().unwrap(); + let source = read_file(path).unwrap(); + let ast = parse(source).unwrap(); + let target_json = serde_json::to_string_pretty(&ast).unwrap(); + let new_path = format!( + "./tests/fixtures/benches/contexts/{}/{}", + test, + file_name.replace(".ftl", ".json") + ); + write_file(&new_path, &target_json).unwrap(); + } + } +} diff --git a/third_party/rust/fluent-syntax/src/lib.rs b/third_party/rust/fluent-syntax/src/lib.rs new file mode 100644 index 0000000000..5b9cbbfe7f --- /dev/null +++ b/third_party/rust/fluent-syntax/src/lib.rs @@ -0,0 +1,51 @@ +//! Fluent is a modern localization system designed to improve how software is translated. +//! +//! `fluent-syntax` is the lowest level component of the [Fluent Localization +//! System](https://www.projectfluent.org). +//! +//! It exposes components necessary for parsing and tooling operations on Fluent Translation Lists ("FTL"). +//! +//! The crate provides a [`parser`] module which allows for parsing of an +//! input string to an Abstract Syntax Tree defined in the [`ast`] module. +//! +//! The [`unicode`] module exposes a set of helper functions used to decode +//! escaped unicode literals according to Fluent specification. +//! +//! # Example +//! +//! ``` +//! use fluent_syntax::parser; +//! use fluent_syntax::ast; +//! +//! let ftl = r#" +//! +//! hello-world = Hello World! +//! +//! "#; +//! +//! let resource = parser::parse(ftl) +//! .expect("Failed to parse an FTL resource."); +//! +//! assert_eq!( +//! resource.body[0], +//! ast::Entry::Message( +//! ast::Message { +//! id: ast::Identifier { +//! name: "hello-world" +//! }, +//! value: Some(ast::Pattern { +//! elements: vec![ +//! ast::PatternElement::TextElement { +//! value: "Hello World!" +//! }, +//! ] +//! }), +//! attributes: vec![], +//! comment: None, +//! } +//! ), +//! ); +//! ``` +pub mod ast; +pub mod parser; +pub mod unicode; diff --git a/third_party/rust/fluent-syntax/src/parser/comment.rs b/third_party/rust/fluent-syntax/src/parser/comment.rs new file mode 100644 index 0000000000..a63483c1d3 --- /dev/null +++ b/third_party/rust/fluent-syntax/src/parser/comment.rs @@ -0,0 +1,89 @@ +use super::{core::Parser, core::Result, Slice}; +use crate::ast; + +#[derive(Debug, PartialEq, Clone, Copy)] +pub(super) enum Level { + None = 0, + Regular = 1, + Group = 2, + Resource = 3, +} + +impl<'s, S> Parser<S> +where + S: Slice<'s>, +{ + pub(super) fn get_comment(&mut self) -> Result<(ast::Comment<S>, Level)> { + let mut level = Level::None; + let mut content = vec![]; + + while self.ptr < self.length { + let line_level = self.get_comment_level(); + if line_level == Level::None { + self.ptr -= 1; + break; + } else if level != Level::None && line_level != level { + self.ptr -= line_level as usize; + break; + } + + level = line_level; + + if self.ptr == self.length { + break; + } else if self.is_current_byte(b'\n') { + content.push(self.get_comment_line()); + } else { + if let Err(e) = self.expect_byte(b' ') { + if content.is_empty() { + return Err(e); + } else { + self.ptr -= line_level as usize; + break; + } + } + content.push(self.get_comment_line()); + } + self.skip_eol(); + } + + Ok((ast::Comment { content }, level)) + } + + pub(super) fn skip_comment(&mut self) { + loop { + while self.ptr < self.length && !self.is_current_byte(b'\n') { + self.ptr += 1; + } + self.ptr += 1; + if self.is_current_byte(b'#') { + self.ptr += 1; + } else { + break; + } + } + } + + fn get_comment_level(&mut self) -> Level { + if self.take_byte_if(b'#') { + if self.take_byte_if(b'#') { + if self.take_byte_if(b'#') { + return Level::Resource; + } + return Level::Group; + } + return Level::Regular; + } + Level::None + } + + fn get_comment_line(&mut self) -> S { + let start_pos = self.ptr; + + while !self.is_eol() { + self.ptr += 1; + } + + self.source.slice(start_pos..self.ptr) + } +} diff --git a/third_party/rust/fluent-syntax/src/parser/core.rs b/third_party/rust/fluent-syntax/src/parser/core.rs new file mode 100644 index 0000000000..68ad8dc0b6 --- /dev/null +++ b/third_party/rust/fluent-syntax/src/parser/core.rs @@ -0,0 +1,307 @@ +use super::{ + comment, + errors::{ErrorKind, ParserError}, + slice::Slice, +}; +use crate::ast; + +pub type Result<T> = std::result::Result<T, ParserError>; + +pub struct Parser<S> { + pub(super) source: S, + pub(super) ptr: usize, + pub(super) length: usize, +} + +impl<'s, S> Parser<S> +where + S: Slice<'s>, +{ + pub fn new(source: S) -> Self { + let length = source.as_ref().as_bytes().len(); + Self { + source, + ptr: 0, + length, + } + } + + pub fn parse( + mut self, + ) -> std::result::Result<ast::Resource<S>, (ast::Resource<S>, Vec<ParserError>)> { + let mut errors = vec![]; + + let mut body = vec![]; + + self.skip_blank_block(); + let mut last_comment = None; + let mut last_blank_count = 0; + + while self.ptr < self.length { + let entry_start = self.ptr; + let mut entry = self.get_entry(entry_start); + + if let Some(comment) = last_comment.take() { + match entry { + Ok(ast::Entry::Message(ref mut msg)) if last_blank_count < 2 => { + msg.comment = Some(comment); + } + Ok(ast::Entry::Term(ref mut term)) if last_blank_count < 2 => { + term.comment = Some(comment); + } + _ => { + body.push(ast::Entry::Comment(comment)); + } + } + } + + match entry { + Ok(ast::Entry::Comment(comment)) => { + last_comment = Some(comment); + } + Ok(entry) => { + body.push(entry); + } + Err(mut err) => { + self.skip_to_next_entry_start(); + err.slice = Some(entry_start..self.ptr); + errors.push(err); + let content = self.source.slice(entry_start..self.ptr); + body.push(ast::Entry::Junk { content }); + } + } + last_blank_count = self.skip_blank_block(); + } + + if let Some(last_comment) = last_comment.take() { + body.push(ast::Entry::Comment(last_comment)); + } + if errors.is_empty() { + Ok(ast::Resource { body }) + } else { + Err((ast::Resource { body }, errors)) + } + } + + fn get_entry(&mut self, entry_start: usize) -> Result<ast::Entry<S>> { + let entry = match get_current_byte!(self) { + Some(b'#') => { + let (comment, level) = self.get_comment()?; + match level { + comment::Level::Regular => ast::Entry::Comment(comment), + comment::Level::Group => ast::Entry::GroupComment(comment), + comment::Level::Resource => ast::Entry::ResourceComment(comment), + comment::Level::None => unreachable!(), + } + } + Some(b'-') => ast::Entry::Term(self.get_term(entry_start)?), + _ => ast::Entry::Message(self.get_message(entry_start)?), + }; + Ok(entry) + } + + pub fn get_message(&mut self, entry_start: usize) -> Result<ast::Message<S>> { + let id = self.get_identifier()?; + self.skip_blank_inline(); + self.expect_byte(b'=')?; + let pattern = self.get_pattern()?; + + self.skip_blank_block(); + + let attributes = self.get_attributes(); + + if pattern.is_none() && attributes.is_empty() { + let entry_id = id.name.as_ref().to_owned(); + return error!( + ErrorKind::ExpectedMessageField { entry_id }, + entry_start, self.ptr + ); + } + + Ok(ast::Message { + id, + value: pattern, + attributes, + comment: None, + }) + } + + pub fn get_term(&mut self, entry_start: usize) -> Result<ast::Term<S>> { + self.expect_byte(b'-')?; + let id = self.get_identifier()?; + self.skip_blank_inline(); + self.expect_byte(b'=')?; + self.skip_blank_inline(); + + let value = self.get_pattern()?; + + self.skip_blank_block(); + + let attributes = self.get_attributes(); + + if let Some(value) = value { + Ok(ast::Term { + id, + value, + attributes, + comment: None, + }) + } else { + error!( + ErrorKind::ExpectedTermField { + entry_id: id.name.as_ref().to_owned() + }, + entry_start, self.ptr + ) + } + } + + fn get_attributes(&mut self) -> Vec<ast::Attribute<S>> { + let mut attributes = vec![]; + + loop { + let line_start = self.ptr; + self.skip_blank_inline(); + if !self.take_byte_if(b'.') { + self.ptr = line_start; + break; + } + + if let Ok(attr) = self.get_attribute() { + attributes.push(attr); + } else { + self.ptr = line_start; + break; + } + } + attributes + } + + fn get_attribute(&mut self) -> Result<ast::Attribute<S>> { + let id = self.get_identifier()?; + self.skip_blank_inline(); + self.expect_byte(b'=')?; + let pattern = self.get_pattern()?; + + match pattern { + Some(pattern) => Ok(ast::Attribute { id, value: pattern }), + None => error!(ErrorKind::MissingValue, self.ptr), + } + } + + pub(super) fn get_identifier_unchecked(&mut self) -> ast::Identifier<S> { + let mut ptr = self.ptr; + + while matches!(get_byte!(self, ptr), Some(b) if b.is_ascii_alphanumeric() || *b == b'-' || *b == b'_') + { + ptr += 1; + } + + let name = self.source.slice(self.ptr - 1..ptr); + self.ptr = ptr; + + ast::Identifier { name } + } + + pub(super) fn get_identifier(&mut self) -> Result<ast::Identifier<S>> { + if !self.is_identifier_start() { + return error!( + ErrorKind::ExpectedCharRange { + range: "a-zA-Z".to_string() + }, + self.ptr + ); + } + self.ptr += 1; + Ok(self.get_identifier_unchecked()) + } + + pub(super) fn get_attribute_accessor(&mut self) -> Result<Option<ast::Identifier<S>>> { + if self.take_byte_if(b'.') { + let ident = self.get_identifier()?; + Ok(Some(ident)) + } else { + Ok(None) + } + } + + fn get_variant_key(&mut self) -> Result<ast::VariantKey<S>> { + self.skip_blank(); + + let key = if self.is_number_start() { + ast::VariantKey::NumberLiteral { + value: self.get_number_literal()?, + } + } else { + ast::VariantKey::Identifier { + name: self.get_identifier()?.name, + } + }; + + self.skip_blank(); + + self.expect_byte(b']')?; + + Ok(key) + } + + pub(super) fn get_variants(&mut self) -> Result<Vec<ast::Variant<S>>> { + let mut variants = Vec::with_capacity(2); + let mut has_default = false; + + loop { + let default = self.take_byte_if(b'*'); + if default { + if has_default { + return error!(ErrorKind::MultipleDefaultVariants, self.ptr); + } else { + has_default = true; + } + } + + if !self.take_byte_if(b'[') { + break; + } + + let key = self.get_variant_key()?; + + let value = self.get_pattern()?; + + if let Some(value) = value { + variants.push(ast::Variant { + key, + value, + default, + }); + self.skip_blank(); + } else { + return error!(ErrorKind::MissingValue, self.ptr); + } + } + + if has_default { + Ok(variants) + } else { + error!(ErrorKind::MissingDefaultVariant, self.ptr) + } + } + + pub(super) fn get_placeable(&mut self) -> Result<ast::Expression<S>> { + self.skip_blank(); + let exp = self.get_expression()?; + self.skip_blank_inline(); + self.expect_byte(b'}')?; + + let invalid_expression_found = match &exp { + ast::Expression::Inline(ast::InlineExpression::TermReference { + ref attribute, .. + }) => attribute.is_some(), + _ => false, + }; + if invalid_expression_found { + return error!(ErrorKind::TermAttributeAsPlaceable, self.ptr); + } + + Ok(exp) + } +} diff --git a/third_party/rust/fluent-syntax/src/parser/errors.rs b/third_party/rust/fluent-syntax/src/parser/errors.rs new file mode 100644 index 0000000000..2c29f97bbf --- /dev/null +++ b/third_party/rust/fluent-syntax/src/parser/errors.rs @@ -0,0 +1,169 @@ +use std::ops::Range; +use thiserror::Error; + +/// Error containing information about an error encountered by the Fluent Parser. +/// +/// Errors in Fluent Parser are non-fatal, and the syntax has been +/// designed to allow for strong recovery. +/// +/// In result [`ParserError`] is designed to point at the slice of +/// the input that is most likely to be a complete fragment from after +/// the end of a valid entry, to the start of the next valid entry, with +/// the invalid syntax in the middle. +/// +/// +/// # Example +/// +/// ``` +/// use fluent_syntax::parser; +/// use fluent_syntax::ast; +/// +/// let ftl = r#" +/// key1 = Value 1 +/// +/// g@Rb@ge = #2y ds +/// +/// key2 = Value 2 +/// +/// "#; +/// +/// let (resource, errors) = parser::parse_runtime(ftl) +/// .expect_err("Resource should contain errors."); +/// +/// assert_eq!( +/// errors, +/// vec![ +/// parser::ParserError { +/// pos: 18..19, +/// slice: Some(17..35), +/// kind: parser::ErrorKind::ExpectedToken('=') +/// } +/// ] +/// ); +/// +/// assert_eq!( +/// resource.body[0], +/// ast::Entry::Message( +/// ast::Message { +/// id: ast::Identifier { +/// name: "key1" +/// }, +/// value: Some(ast::Pattern { +/// elements: vec![ +/// ast::PatternElement::TextElement { +/// value: "Value 1" +/// }, +/// ] +/// }), +/// attributes: vec![], +/// comment: None, +/// } +/// ), +/// ); +/// +/// assert_eq!( +/// resource.body[1], +/// ast::Entry::Junk { +/// content: "g@Rb@ge = #2y ds\n\n" +/// } +/// ); +/// +/// assert_eq!( +/// resource.body[2], +/// ast::Entry::Message( +/// ast::Message { +/// id: ast::Identifier { +/// name: "key2" +/// }, +/// value: Some(ast::Pattern { +/// elements: vec![ +/// ast::PatternElement::TextElement { +/// value: "Value 2" +/// }, +/// ] +/// }), +/// attributes: vec![], +/// comment: None, +/// } +/// ), +/// ); +/// ``` +/// +/// The information contained in the `ParserError` should allow the tooling +/// to display rich contextual annotations of the error slice, using +/// crates such as `annotate-snippers`. +#[derive(Error, Debug, PartialEq, Clone)] +#[error("{}", self.kind)] +pub struct ParserError { + /// Precise location of where the parser encountered the error. + pub pos: Range<usize>, + /// Slice of the input from the end of the last valid entry to the beginning + /// of the next valid entry with the invalid syntax in the middle. + pub slice: Option<Range<usize>>, + /// The type of the error that the parser encountered. + pub kind: ErrorKind, +} + +macro_rules! error { + ($kind:expr, $start:expr) => {{ + Err(ParserError { + pos: $start..$start + 1, + slice: None, + kind: $kind, + }) + }}; + ($kind:expr, $start:expr, $end:expr) => {{ + Err(ParserError { + pos: $start..$end, + slice: None, + kind: $kind, + }) + }}; +} + +/// Kind of an error associated with the [`ParserError`]. +#[derive(Error, Debug, PartialEq, Clone)] +pub enum ErrorKind { + #[error("Expected a token starting with \"{0}\"")] + ExpectedToken(char), + #[error("Expected one of \"{range}\"")] + ExpectedCharRange { range: String }, + #[error("Expected a message field for \"{entry_id}\"")] + ExpectedMessageField { entry_id: String }, + #[error("Expected a term field for \"{entry_id}\"")] + ExpectedTermField { entry_id: String }, + #[error("Callee is not allowed here")] + ForbiddenCallee, + #[error("The select expression must have a default variant")] + MissingDefaultVariant, + #[error("Expected a value")] + MissingValue, + #[error("A select expression can only have one default variant")] + MultipleDefaultVariants, + #[error("Message references can't be used as a selector")] + MessageReferenceAsSelector, + #[error("Term references can't be used as a selector")] + TermReferenceAsSelector, + #[error("Message attributes can't be used as a selector")] + MessageAttributeAsSelector, + #[error("Term attributes can't be used as a selector")] + TermAttributeAsPlaceable, + #[error("Unterminated string literal")] + UnterminatedStringLiteral, + #[error("Positional arguments must come before named arguments")] + PositionalArgumentFollowsNamed, + #[error("The \"{0}\" argument appears twice")] + DuplicatedNamedArgument(String), + #[error("Unknown escape sequence")] + UnknownEscapeSequence(String), + #[error("Invalid unicode escape sequence, \"{0}\"")] + InvalidUnicodeEscapeSequence(String), + #[error("Unbalanced closing brace")] + UnbalancedClosingBrace, + #[error("Expected an inline expression")] + ExpectedInlineExpression, + #[error("Expected a simple expression as selector")] + ExpectedSimpleExpressionAsSelector, + #[error("Expected a string or number literal")] + ExpectedLiteral, +} diff --git a/third_party/rust/fluent-syntax/src/parser/expression.rs b/third_party/rust/fluent-syntax/src/parser/expression.rs new file mode 100644 index 0000000000..c5ccb32bf4 --- /dev/null +++ b/third_party/rust/fluent-syntax/src/parser/expression.rs @@ -0,0 +1,224 @@ +use super::errors::{ErrorKind, ParserError}; +use super::{core::Parser, core::Result, slice::Slice}; +use crate::ast; + +impl<'s, S> Parser<S> +where + S: Slice<'s>, +{ + pub(super) fn get_expression(&mut self) -> Result<ast::Expression<S>> { + let exp = self.get_inline_expression(false)?; + + self.skip_blank(); + + if !self.is_current_byte(b'-') || !self.is_byte_at(b'>', self.ptr + 1) { + if let ast::InlineExpression::TermReference { ref attribute, .. } = exp { + if attribute.is_some() { + return error!(ErrorKind::TermAttributeAsPlaceable, self.ptr); + } + } + return Ok(ast::Expression::Inline(exp)); + } + + match exp { + ast::InlineExpression::MessageReference { ref attribute, .. } => { + if attribute.is_none() { + return error!(ErrorKind::MessageReferenceAsSelector, self.ptr); + } else { + return error!(ErrorKind::MessageAttributeAsSelector, self.ptr); + } + } + ast::InlineExpression::TermReference { ref attribute, .. } => { + if attribute.is_none() { + return error!(ErrorKind::TermReferenceAsSelector, self.ptr); + } + } + ast::InlineExpression::StringLiteral { .. } + | ast::InlineExpression::NumberLiteral { .. } + | ast::InlineExpression::VariableReference { .. } + | ast::InlineExpression::FunctionReference { .. } => {} + _ => { + return error!(ErrorKind::ExpectedSimpleExpressionAsSelector, self.ptr); + } + }; + + self.ptr += 2; // -> + + self.skip_blank_inline(); + if !self.skip_eol() { + return error!( + ErrorKind::ExpectedCharRange { + range: "\n | \r\n".to_string() + }, + self.ptr + ); + } + self.skip_blank(); + + let variants = self.get_variants()?; + + Ok(ast::Expression::Select { + selector: exp, + variants, + }) + } + + pub(super) fn get_inline_expression( + &mut self, + only_literal: bool, + ) -> Result<ast::InlineExpression<S>> { + match get_current_byte!(self) { + Some(b'"') => { + self.ptr += 1; // " + let start = self.ptr; + while let Some(b) = get_current_byte!(self) { + match b { + b'\\' => match get_byte!(self, self.ptr + 1) { + Some(b'\\') | Some(b'{') | Some(b'"') => self.ptr += 2, + Some(b'u') => { + self.ptr += 2; + self.skip_unicode_escape_sequence(4)?; + } + Some(b'U') => { + self.ptr += 2; + self.skip_unicode_escape_sequence(6)?; + } + b => { + let seq = b.unwrap_or(&b' ').to_string(); + return error!(ErrorKind::UnknownEscapeSequence(seq), self.ptr); + } + }, + b'"' => { + break; + } + b'\n' => { + return error!(ErrorKind::UnterminatedStringLiteral, self.ptr); + } + _ => self.ptr += 1, + } + } + + self.expect_byte(b'"')?; + let slice = self.source.slice(start..self.ptr - 1); + Ok(ast::InlineExpression::StringLiteral { value: slice }) + } + Some(b) if b.is_ascii_digit() => { + let num = self.get_number_literal()?; + Ok(ast::InlineExpression::NumberLiteral { value: num }) + } + Some(b'-') if !only_literal => { + self.ptr += 1; // - + if self.is_identifier_start() { + self.ptr += 1; + let id = self.get_identifier_unchecked(); + let attribute = self.get_attribute_accessor()?; + let arguments = self.get_call_arguments()?; + Ok(ast::InlineExpression::TermReference { + id, + attribute, + arguments, + }) + } else { + self.ptr -= 1; + let num = self.get_number_literal()?; + Ok(ast::InlineExpression::NumberLiteral { value: num }) + } + } + Some(b'$') if !only_literal => { + self.ptr += 1; // $ + let id = self.get_identifier()?; + Ok(ast::InlineExpression::VariableReference { id }) + } + Some(b) if b.is_ascii_alphabetic() => { + self.ptr += 1; + let id = self.get_identifier_unchecked(); + let arguments = self.get_call_arguments()?; + if let Some(arguments) = arguments { + if !Self::is_callee(&id.name) { + return error!(ErrorKind::ForbiddenCallee, self.ptr); + } + + Ok(ast::InlineExpression::FunctionReference { id, arguments }) + } else { + let attribute = self.get_attribute_accessor()?; + Ok(ast::InlineExpression::MessageReference { id, attribute }) + } + } + Some(b'{') if !only_literal => { + self.ptr += 1; // { + let exp = self.get_placeable()?; + Ok(ast::InlineExpression::Placeable { + expression: Box::new(exp), + }) + } + _ if only_literal => error!(ErrorKind::ExpectedLiteral, self.ptr), + _ => error!(ErrorKind::ExpectedInlineExpression, self.ptr), + } + } + + pub fn get_call_arguments(&mut self) -> Result<Option<ast::CallArguments<S>>> { + self.skip_blank(); + if !self.take_byte_if(b'(') { + return Ok(None); + } + + let mut positional = vec![]; + let mut named = vec![]; + let mut argument_names = vec![]; + + self.skip_blank(); + + while self.ptr < self.length { + if self.is_current_byte(b')') { + break; + } + + let expr = self.get_inline_expression(false)?; + + if let ast::InlineExpression::MessageReference { + ref id, + attribute: None, + } = expr + { + self.skip_blank(); + if self.is_current_byte(b':') { + if argument_names.contains(&id.name) { + return error!( + ErrorKind::DuplicatedNamedArgument(id.name.as_ref().to_owned()), + self.ptr + ); + } + self.ptr += 1; + self.skip_blank(); + let val = self.get_inline_expression(true)?; + + argument_names.push(id.name.clone()); + named.push(ast::NamedArgument { + name: ast::Identifier { + name: id.name.clone(), + }, + value: val, + }); + } else { + if !argument_names.is_empty() { + return error!(ErrorKind::PositionalArgumentFollowsNamed, self.ptr); + } + positional.push(expr); + } + } else { + if !argument_names.is_empty() { + return error!(ErrorKind::PositionalArgumentFollowsNamed, self.ptr); + } + positional.push(expr); + } + + self.skip_blank(); + self.take_byte_if(b','); + self.skip_blank(); + } + + self.expect_byte(b')')?; + + Ok(Some(ast::CallArguments { positional, named })) + } +} diff --git a/third_party/rust/fluent-syntax/src/parser/helper.rs b/third_party/rust/fluent-syntax/src/parser/helper.rs new file mode 100644 index 0000000000..11544d6855 --- /dev/null +++ b/third_party/rust/fluent-syntax/src/parser/helper.rs @@ -0,0 +1,169 @@ +use super::errors::{ErrorKind, ParserError}; +use super::{core::Parser, core::Result, slice::Slice}; + +impl<'s, S> Parser<S> +where + S: Slice<'s>, +{ + pub(super) fn is_current_byte(&self, b: u8) -> bool { + get_current_byte!(self) == Some(&b) + } + + pub(super) fn is_byte_at(&self, b: u8, pos: usize) -> bool { + get_byte!(self, pos) == Some(&b) + } + + pub(super) fn skip_to_next_entry_start(&mut self) { + while let Some(b) = get_current_byte!(self) { + let new_line = self.ptr == 0 || get_byte!(self, self.ptr - 1) == Some(&b'\n'); + + if new_line && (b.is_ascii_alphabetic() || [b'-', b'#'].contains(b)) { + break; + } + + self.ptr += 1; + } + } + + pub(super) fn skip_eol(&mut self) -> bool { + match get_current_byte!(self) { + Some(b'\n') => { + self.ptr += 1; + true + } + Some(b'\r') if self.is_byte_at(b'\n', self.ptr + 1) => { + self.ptr += 2; + true + } + _ => false, + } + } + + pub(super) fn skip_unicode_escape_sequence(&mut self, length: usize) -> Result<()> { + let start = self.ptr; + for _ in 0..length { + match get_current_byte!(self) { + Some(b) if b.is_ascii_hexdigit() => self.ptr += 1, + _ => break, + } + } + if self.ptr - start != length { + let end = if self.ptr >= self.length { + self.ptr + } else { + self.ptr + 1 + }; + let seq = self.source.slice(start..end).as_ref().to_owned(); + return error!(ErrorKind::InvalidUnicodeEscapeSequence(seq), self.ptr); + } + Ok(()) + } + + pub(super) fn is_identifier_start(&self) -> bool { + matches!(get_current_byte!(self), Some(b) if b.is_ascii_alphabetic()) + } + + pub(super) fn take_byte_if(&mut self, b: u8) -> bool { + if self.is_current_byte(b) { + self.ptr += 1; + true + } else { + false + } + } + + pub(super) fn skip_blank_block(&mut self) -> usize { + let mut count = 0; + loop { + let start = self.ptr; + self.skip_blank_inline(); + if !self.skip_eol() { + self.ptr = start; + break; + } + count += 1; + } + count + } + + pub(super) fn skip_blank(&mut self) { + loop { + match get_current_byte!(self) { + Some(b' ') | Some(b'\n') => self.ptr += 1, + Some(b'\r') if get_byte!(self, self.ptr + 1) == Some(&b'\n') => self.ptr += 2, + _ => break, + } + } + } + + pub(super) fn skip_blank_inline(&mut self) -> usize { + let start = self.ptr; + while let Some(b' ') = get_current_byte!(self) { + self.ptr += 1; + } + self.ptr - start + } + + pub(super) fn is_byte_pattern_continuation(b: u8) -> bool { + !matches!(b, b'.' | b'}' | b'[' | b'*') + } + + pub(super) fn is_callee(name: &S) -> bool { + name.as_ref() + .as_bytes() + .iter() + .all(|c| c.is_ascii_uppercase() || c.is_ascii_digit() || *c == b'_' || *c == b'-') + } + + pub(super) fn expect_byte(&mut self, b: u8) -> Result<()> { + if !self.is_current_byte(b) { + return error!(ErrorKind::ExpectedToken(b as char), self.ptr); + } + self.ptr += 1; + Ok(()) + } + + pub(super) fn is_number_start(&self) -> bool { + matches!(get_current_byte!(self), Some(b) if b.is_ascii_digit() || b == &b'-') + } + + pub(super) fn is_eol(&self) -> bool { + match get_current_byte!(self) { + Some(b'\n') => true, + Some(b'\r') if self.is_byte_at(b'\n', self.ptr + 1) => true, + None => true, + _ => false, + } + } + + pub(super) fn skip_digits(&mut self) -> Result<()> { + let start = self.ptr; + loop { + match get_current_byte!(self) { + Some(b) if b.is_ascii_digit() => self.ptr += 1, + _ => break, + } + } + if start == self.ptr { + error!( + ErrorKind::ExpectedCharRange { + range: "0-9".to_string() + }, + self.ptr + ) + } else { + Ok(()) + } + } + + pub(super) fn get_number_literal(&mut self) -> Result<S> { + let start = self.ptr; + self.take_byte_if(b'-'); + self.skip_digits()?; + if self.take_byte_if(b'.') { + self.skip_digits()?; + } + + Ok(self.source.slice(start..self.ptr)) + } +} diff --git a/third_party/rust/fluent-syntax/src/parser/macros.rs b/third_party/rust/fluent-syntax/src/parser/macros.rs new file mode 100644 index 0000000000..671d543285 --- /dev/null +++ b/third_party/rust/fluent-syntax/src/parser/macros.rs @@ -0,0 +1,11 @@ +macro_rules! get_byte { + ($s:expr, $idx:expr) => { + $s.source.as_ref().as_bytes().get($idx) + }; +} + +macro_rules! get_current_byte { + ($s:expr) => { + $s.source.as_ref().as_bytes().get($s.ptr) + }; +} diff --git a/third_party/rust/fluent-syntax/src/parser/mod.rs b/third_party/rust/fluent-syntax/src/parser/mod.rs new file mode 100644 index 0000000000..52edfdc37a --- /dev/null +++ b/third_party/rust/fluent-syntax/src/parser/mod.rs @@ -0,0 +1,278 @@ +//! Fluent Translation List parsing utilities +//! +//! FTL resources can be parsed using one of two methods: +//! * [`parse`] - parses an input into a complete Abstract Syntax Tree representation with all source information preserved. +//! * [`parse_runtime`] - parses an input into a runtime optimized Abstract Syntax Tree +//! representation with comments stripped. +//! +//! # Example +//! +//! ``` +//! use fluent_syntax::parser; +//! use fluent_syntax::ast; +//! +//! let ftl = r#" +//! #### Resource Level Comment +//! +//! ## This is a message comment +//! hello-world = Hello World! +//! +//! "#; +//! +//! let resource = parser::parse(ftl) +//! .expect("Failed to parse an FTL resource."); +//! +//! assert_eq!( +//! resource.body[0], +//! ast::Entry::ResourceComment( +//! ast::Comment { +//! content: vec![ +//! "Resource Level Comment" +//! ] +//! } +//! ) +//! ); +//! assert_eq!( +//! resource.body[1], +//! ast::Entry::Message( +//! ast::Message { +//! id: ast::Identifier { +//! name: "hello-world" +//! }, +//! value: Some(ast::Pattern { +//! elements: vec![ +//! ast::PatternElement::TextElement { +//! value: "Hello World!" +//! }, +//! ] +//! }), +//! attributes: vec![], +//! comment: Some( +//! ast::Comment { +//! content: vec!["This is a message comment"] +//! } +//! ) +//! } +//! ), +//! ); +//! ``` +//! +//! # Error Recovery +//! +//! In both modes the parser is lenient, attempting to recover from errors. +//! +//! The [`Result`] return the resulting AST in both scenarios, and in the +//! error scenario a vector of [`ParserError`] elements is returned as well. +//! +//! Any unparsed parts of the input are returned as [`ast::Entry::Junk`] elements. +#[macro_use] +mod errors; +#[macro_use] +mod macros; +mod comment; +mod core; +mod expression; +mod helper; +mod pattern; +mod runtime; +mod slice; + +use crate::ast; +pub use errors::{ErrorKind, ParserError}; +pub use slice::Slice; + +/// Parser result always returns an AST representation of the input, +/// and if parsing errors were encountered, a list of [`ParserError`] elements +/// is also returned. +/// +/// # Example +/// +/// ``` +/// use fluent_syntax::parser; +/// use fluent_syntax::ast; +/// +/// let ftl = r#" +/// key1 = Value 1 +/// +/// g@Rb@ge = #2y ds +/// +/// key2 = Value 2 +/// +/// "#; +/// +/// let (resource, errors) = parser::parse_runtime(ftl) +/// .expect_err("Resource should contain errors."); +/// +/// assert_eq!( +/// errors, +/// vec![ +/// parser::ParserError { +/// pos: 18..19, +/// slice: Some(17..35), +/// kind: parser::ErrorKind::ExpectedToken('=') +/// } +/// ] +/// ); +/// +/// assert_eq!( +/// resource.body[0], +/// ast::Entry::Message( +/// ast::Message { +/// id: ast::Identifier { +/// name: "key1" +/// }, +/// value: Some(ast::Pattern { +/// elements: vec![ +/// ast::PatternElement::TextElement { +/// value: "Value 1" +/// }, +/// ] +/// }), +/// attributes: vec![], +/// comment: None, +/// } +/// ), +/// ); +/// +/// assert_eq!( +/// resource.body[1], +/// ast::Entry::Junk { +/// content: "g@Rb@ge = #2y ds\n\n" +/// } +/// ); +/// +/// assert_eq!( +/// resource.body[2], +/// ast::Entry::Message( +/// ast::Message { +/// id: ast::Identifier { +/// name: "key2" +/// }, +/// value: Some(ast::Pattern { +/// elements: vec![ +/// ast::PatternElement::TextElement { +/// value: "Value 2" +/// }, +/// ] +/// }), +/// attributes: vec![], +/// comment: None, +/// } +/// ), +/// ); +/// ``` +pub type Result<S> = std::result::Result<ast::Resource<S>, (ast::Resource<S>, Vec<ParserError>)>; + +/// Parses an input into a complete Abstract Syntax Tree representation with +/// all source information preserved. +/// +/// This mode is intended for tooling, linters and other scenarios where +/// complete representation, with comments, is preferred over speed and memory +/// utilization. +/// +/// # Example +/// +/// ``` +/// use fluent_syntax::parser; +/// use fluent_syntax::ast; +/// +/// let ftl = r#" +/// #### Resource Level Comment +/// +/// ## This is a message comment +/// hello-world = Hello World! +/// +/// "#; +/// +/// let resource = parser::parse(ftl) +/// .expect("Failed to parse an FTL resource."); +/// +/// assert_eq!( +/// resource.body[0], +/// ast::Entry::ResourceComment( +/// ast::Comment { +/// content: vec![ +/// "Resource Level Comment" +/// ] +/// } +/// ) +/// ); +/// assert_eq!( +/// resource.body[1], +/// ast::Entry::Message( +/// ast::Message { +/// id: ast::Identifier { +/// name: "hello-world" +/// }, +/// value: Some(ast::Pattern { +/// elements: vec![ +/// ast::PatternElement::TextElement { +/// value: "Hello World!" +/// }, +/// ] +/// }), +/// attributes: vec![], +/// comment: Some( +/// ast::Comment { +/// content: vec!["This is a message comment"] +/// } +/// ) +/// } +/// ), +/// ); +/// ``` +pub fn parse<'s, S>(input: S) -> Result<S> +where + S: Slice<'s>, +{ + core::Parser::new(input).parse() +} + +/// Parses an input into an Abstract Syntax Tree representation with comments stripped. +/// +/// This mode is intended for runtime use of Fluent. It currently strips all +/// comments improving parsing performance and reducing the size of the AST tree. +/// +/// # Example +/// +/// ``` +/// use fluent_syntax::parser; +/// use fluent_syntax::ast; +/// +/// let ftl = r#" +/// #### Resource Level Comment +/// +/// ## This is a message comment +/// hello-world = Hello World! +/// +/// "#; +/// +/// let resource = parser::parse_runtime(ftl) +/// .expect("Failed to parse an FTL resource."); +/// +/// assert_eq!( +/// resource.body[0], +/// ast::Entry::Message( +/// ast::Message { +/// id: ast::Identifier { +/// name: "hello-world" +/// }, +/// value: Some(ast::Pattern { +/// elements: vec![ +/// ast::PatternElement::TextElement { +/// value: "Hello World!" +/// }, +/// ] +/// }), +/// attributes: vec![], +/// comment: None, +/// } +/// ), +/// ); +/// ``` +pub fn parse_runtime<'s, S>(input: S) -> Result<S> +where + S: Slice<'s>, +{ + core::Parser::new(input).parse_runtime() +} diff --git a/third_party/rust/fluent-syntax/src/parser/pattern.rs b/third_party/rust/fluent-syntax/src/parser/pattern.rs new file mode 100644 index 0000000000..516326d761 --- /dev/null +++ b/third_party/rust/fluent-syntax/src/parser/pattern.rs @@ -0,0 +1,207 @@ +use super::errors::{ErrorKind, ParserError}; +use super::{core::Parser, core::Result, slice::Slice}; +use crate::ast; + +#[derive(Debug, PartialEq)] +enum TextElementTermination { + LineFeed, + CRLF, + PlaceableStart, + EOF, +} + +// This enum tracks the placement of the text element in the pattern, which is needed for +// dedentation logic. +#[derive(Debug, PartialEq)] +enum TextElementPosition { + InitialLineStart, + LineStart, + Continuation, +} + +// This enum allows us to mark pointers in the source which will later become text elements +// but without slicing them out of the source string. This makes the indentation adjustments +// cheaper since they'll happen on the pointers, rather than extracted slices. +#[derive(Debug)] +enum PatternElementPlaceholders<S> { + Placeable(ast::Expression<S>), + // (start, end, indent, position) + TextElement(usize, usize, usize, TextElementPosition), +} + +// This enum tracks whether the text element is blank or not. +// This is important to identify text elements which should not be taken into account +// when calculating common indent. +#[derive(Debug, PartialEq)] +enum TextElementType { + Blank, + NonBlank, +} + +impl<'s, S> Parser<S> +where + S: Slice<'s>, +{ + pub(super) fn get_pattern(&mut self) -> Result<Option<ast::Pattern<S>>> { + let mut elements = vec![]; + let mut last_non_blank = None; + let mut common_indent = None; + + self.skip_blank_inline(); + + let mut text_element_role = if self.skip_eol() { + self.skip_blank_block(); + TextElementPosition::LineStart + } else { + TextElementPosition::InitialLineStart + }; + + while self.ptr < self.length { + if self.take_byte_if(b'{') { + if text_element_role == TextElementPosition::LineStart { + common_indent = Some(0); + } + let exp = self.get_placeable()?; + last_non_blank = Some(elements.len()); + elements.push(PatternElementPlaceholders::Placeable(exp)); + text_element_role = TextElementPosition::Continuation; + } else { + let slice_start = self.ptr; + let mut indent = 0; + if text_element_role == TextElementPosition::LineStart { + indent = self.skip_blank_inline(); + if let Some(b) = get_current_byte!(self) { + if indent == 0 { + if b != &b'\r' && b != &b'\n' { + break; + } + } else if !Self::is_byte_pattern_continuation(*b) { + self.ptr = slice_start; + break; + } + } else { + break; + } + } + let (start, end, text_element_type, termination_reason) = self.get_text_slice()?; + if start != end { + if text_element_role == TextElementPosition::LineStart + && text_element_type == TextElementType::NonBlank + { + if let Some(common) = common_indent { + if indent < common { + common_indent = Some(indent); + } + } else { + common_indent = Some(indent); + } + } + if text_element_role != TextElementPosition::LineStart + || text_element_type == TextElementType::NonBlank + || termination_reason == TextElementTermination::LineFeed + { + if text_element_type == TextElementType::NonBlank { + last_non_blank = Some(elements.len()); + } + elements.push(PatternElementPlaceholders::TextElement( + slice_start, + end, + indent, + text_element_role, + )); + } + } + + text_element_role = match termination_reason { + TextElementTermination::LineFeed => TextElementPosition::LineStart, + TextElementTermination::CRLF => TextElementPosition::LineStart, + TextElementTermination::PlaceableStart => TextElementPosition::Continuation, + TextElementTermination::EOF => TextElementPosition::Continuation, + }; + } + } + + if let Some(last_non_blank) = last_non_blank { + let elements = elements + .into_iter() + .take(last_non_blank + 1) + .enumerate() + .map(|(i, elem)| match elem { + PatternElementPlaceholders::Placeable(expression) => { + ast::PatternElement::Placeable { expression } + } + PatternElementPlaceholders::TextElement(start, end, indent, role) => { + let start = if role == TextElementPosition::LineStart { + common_indent.map_or_else( + || start + indent, + |common_indent| start + std::cmp::min(indent, common_indent), + ) + } else { + start + }; + let mut value = self.source.slice(start..end); + if last_non_blank == i { + value.trim(); + } + ast::PatternElement::TextElement { value } + } + }) + .collect(); + return Ok(Some(ast::Pattern { elements })); + } + + Ok(None) + } + + fn get_text_slice( + &mut self, + ) -> Result<(usize, usize, TextElementType, TextElementTermination)> { + let start_pos = self.ptr; + let mut text_element_type = TextElementType::Blank; + + while let Some(b) = get_current_byte!(self) { + match b { + b' ' => self.ptr += 1, + b'\n' => { + self.ptr += 1; + return Ok(( + start_pos, + self.ptr, + text_element_type, + TextElementTermination::LineFeed, + )); + } + b'\r' if self.is_byte_at(b'\n', self.ptr + 1) => { + self.ptr += 1; + return Ok(( + start_pos, + self.ptr - 1, + text_element_type, + TextElementTermination::CRLF, + )); + } + b'{' => { + return Ok(( + start_pos, + self.ptr, + text_element_type, + TextElementTermination::PlaceableStart, + )); + } + b'}' => { + return error!(ErrorKind::UnbalancedClosingBrace, self.ptr); + } + _ => { + text_element_type = TextElementType::NonBlank; + self.ptr += 1 + } + } + } + Ok(( + start_pos, + self.ptr, + text_element_type, + TextElementTermination::EOF, + )) + } +} diff --git a/third_party/rust/fluent-syntax/src/parser/runtime.rs b/third_party/rust/fluent-syntax/src/parser/runtime.rs new file mode 100644 index 0000000000..e116ceaeed --- /dev/null +++ b/third_party/rust/fluent-syntax/src/parser/runtime.rs @@ -0,0 +1,61 @@ +use super::{ + core::{Parser, Result}, + errors::ParserError, + slice::Slice, +}; +use crate::ast; + +impl<'s, S> Parser<S> +where + S: Slice<'s>, +{ + pub fn parse_runtime( + mut self, + ) -> std::result::Result<ast::Resource<S>, (ast::Resource<S>, Vec<ParserError>)> { + let mut errors = vec![]; + + // That default allocation gives the lowest + // number of instructions and cycles in ioi. + let mut body = Vec::with_capacity(6); + + self.skip_blank_block(); + + while self.ptr < self.length { + let entry_start = self.ptr; + let entry = self.get_entry_runtime(entry_start); + + match entry { + Ok(Some(entry)) => { + body.push(entry); + } + Ok(None) => {} + Err(mut err) => { + self.skip_to_next_entry_start(); + err.slice = Some(entry_start..self.ptr); + errors.push(err); + let content = self.source.slice(entry_start..self.ptr); + body.push(ast::Entry::Junk { content }); + } + } + self.skip_blank_block(); + } + + if errors.is_empty() { + Ok(ast::Resource { body }) + } else { + Err((ast::Resource { body }, errors)) + } + } + + fn get_entry_runtime(&mut self, entry_start: usize) -> Result<Option<ast::Entry<S>>> { + let entry = match get_current_byte!(self) { + Some(b'#') => { + self.skip_comment(); + None + } + Some(b'-') => Some(ast::Entry::Term(self.get_term(entry_start)?)), + _ => Some(ast::Entry::Message(self.get_message(entry_start)?)), + }; + Ok(entry) + } +} diff --git a/third_party/rust/fluent-syntax/src/parser/slice.rs b/third_party/rust/fluent-syntax/src/parser/slice.rs new file mode 100644 index 0000000000..d44f8251fe --- /dev/null +++ b/third_party/rust/fluent-syntax/src/parser/slice.rs @@ -0,0 +1,25 @@ +use std::ops::Range; +pub trait Slice<'s>: AsRef<str> + Clone + PartialEq { + fn slice(&self, range: Range<usize>) -> Self; + fn trim(&mut self); +} + +impl<'s> Slice<'s> for String { + fn slice(&self, range: Range<usize>) -> Self { + self[range].to_string() + } + + fn trim(&mut self) { + *self = self.trim_end().to_string(); + } +} + +impl<'s> Slice<'s> for &'s str { + fn slice(&self, range: Range<usize>) -> Self { + &self[range] + } + + fn trim(&mut self) { + *self = self.trim_end(); + } +} diff --git a/third_party/rust/fluent-syntax/src/unicode.rs b/third_party/rust/fluent-syntax/src/unicode.rs new file mode 100644 index 0000000000..ab95a86884 --- /dev/null +++ b/third_party/rust/fluent-syntax/src/unicode.rs @@ -0,0 +1,159 @@ +//! A set of helper functions for unescaping Fluent unicode escape sequences. +//! +//! # Unicode +//! +//! Fluent supports UTF-8 in all FTL resources, but it also allows +//! unicode sequences to be escaped in [`String +//! Literals`](super::ast::InlineExpression::StringLiteral). +//! +//! Four byte sequences are encoded with `\u` and six byte +//! sqeuences using `\U`. +//! ## Example +//! +//! ``` +//! use fluent_syntax::unicode::unescape_unicode_to_string; +//! +//! assert_eq!( +//! unescape_unicode_to_string("Foo \\u5bd2 Bar"), +//! "Foo 寒 Bar" +//! ); +//! +//! assert_eq!( +//! unescape_unicode_to_string("Foo \\U01F68A Bar"), +//! "Foo 🚊 Bar" +//! ); +//! ``` +//! +//! # Other unescapes +//! +//! This also allows for a char `"` to be present inside an FTL string literal, +//! and for `\` itself to be escaped. +//! +//! ## Example +//! +//! ``` +//! use fluent_syntax::unicode::unescape_unicode_to_string; +//! +//! assert_eq!( +//! unescape_unicode_to_string("Foo \\\" Bar"), +//! "Foo \" Bar" +//! ); +//! assert_eq!( +//! unescape_unicode_to_string("Foo \\\\ Bar"), +//! "Foo \\ Bar" +//! ); +//! ``` +use std::borrow::Cow; +use std::char; +use std::fmt; + +const UNKNOWN_CHAR: char = '�'; + +fn encode_unicode(s: Option<&str>) -> char { + s.and_then(|s| u32::from_str_radix(s, 16).ok().and_then(char::from_u32)) + .unwrap_or(UNKNOWN_CHAR) +} + +/// Unescapes to a writer without allocating. +/// +/// ## Example +/// +/// ``` +/// use fluent_syntax::unicode::unescape_unicode; +/// +/// let mut s = String::new(); +/// unescape_unicode(&mut s, "Foo \\U01F60A Bar"); +/// assert_eq!(s, "Foo 😊 Bar"); +/// ``` +pub fn unescape_unicode<W>(w: &mut W, input: &str) -> fmt::Result +where + W: fmt::Write, +{ + let bytes = input.as_bytes(); + + let mut start = 0; + let mut ptr = 0; + + while let Some(b) = bytes.get(ptr) { + if b != &b'\\' { + ptr += 1; + continue; + } + if start != ptr { + w.write_str(&input[start..ptr])?; + } + + ptr += 1; + + let new_char = match bytes.get(ptr) { + Some(b'\\') => '\\', + Some(b'"') => '"', + Some(u @ b'u') | Some(u @ b'U') => { + let seq_start = ptr + 1; + let len = if u == &b'u' { 4 } else { 6 }; + ptr += len; + encode_unicode(input.get(seq_start..seq_start + len)) + } + _ => UNKNOWN_CHAR, + }; + ptr += 1; + w.write_char(new_char)?; + start = ptr; + } + if start != ptr { + w.write_str(&input[start..ptr])?; + } + Ok(()) +} + +/// Unescapes to a `Cow<str>` optionally allocating. +/// +/// ## Example +/// +/// ``` +/// use fluent_syntax::unicode::unescape_unicode_to_string; +/// +/// assert_eq!( +/// unescape_unicode_to_string("Foo \\U01F60A Bar"), +/// "Foo 😊 Bar" +/// ); +/// ``` +pub fn unescape_unicode_to_string(input: &str) -> Cow<str> { + let bytes = input.as_bytes(); + let mut result = Cow::from(input); + + let mut ptr = 0; + + while let Some(b) = bytes.get(ptr) { + if b != &b'\\' { + if let Cow::Owned(ref mut s) = result { + s.push(*b as char); + } + ptr += 1; + continue; + } + + if let Cow::Borrowed(_) = result { + result = Cow::from(&input[0..ptr]); + } + + ptr += 1; + + let new_char = match bytes.get(ptr) { + Some(b'\\') => '\\', + Some(b'"') => '"', + Some(u @ b'u') | Some(u @ b'U') => { + let start = ptr + 1; + let len = if u == &b'u' { 4 } else { 6 }; + ptr += len; + input + .get(start..(start + len)) + .map_or(UNKNOWN_CHAR, |slice| encode_unicode(Some(slice))) + } + _ => UNKNOWN_CHAR, + }; + result.to_mut().push(new_char); + ptr += 1; + } + result +} |