summaryrefslogtreecommitdiffstats
path: root/third_party/rust/fluent-syntax/src/parser
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 19:33:14 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 19:33:14 +0000
commit36d22d82aa202bb199967e9512281e9a53db42c9 (patch)
tree105e8c98ddea1c1e4784a60a5a6410fa416be2de /third_party/rust/fluent-syntax/src/parser
parentInitial commit. (diff)
downloadfirefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.tar.xz
firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.zip
Adding upstream version 115.7.0esr.upstream/115.7.0esr
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/fluent-syntax/src/parser')
-rw-r--r--third_party/rust/fluent-syntax/src/parser/comment.rs89
-rw-r--r--third_party/rust/fluent-syntax/src/parser/core.rs307
-rw-r--r--third_party/rust/fluent-syntax/src/parser/errors.rs169
-rw-r--r--third_party/rust/fluent-syntax/src/parser/expression.rs224
-rw-r--r--third_party/rust/fluent-syntax/src/parser/helper.rs169
-rw-r--r--third_party/rust/fluent-syntax/src/parser/macros.rs11
-rw-r--r--third_party/rust/fluent-syntax/src/parser/mod.rs278
-rw-r--r--third_party/rust/fluent-syntax/src/parser/pattern.rs207
-rw-r--r--third_party/rust/fluent-syntax/src/parser/runtime.rs61
-rw-r--r--third_party/rust/fluent-syntax/src/parser/slice.rs25
10 files changed, 1540 insertions, 0 deletions
diff --git a/third_party/rust/fluent-syntax/src/parser/comment.rs b/third_party/rust/fluent-syntax/src/parser/comment.rs
new file mode 100644
index 0000000000..a63483c1d3
--- /dev/null
+++ b/third_party/rust/fluent-syntax/src/parser/comment.rs
@@ -0,0 +1,89 @@
+use super::{core::Parser, core::Result, Slice};
+use crate::ast;
+
+#[derive(Debug, PartialEq, Clone, Copy)]
+pub(super) enum Level {
+ None = 0,
+ Regular = 1,
+ Group = 2,
+ Resource = 3,
+}
+
+impl<'s, S> Parser<S>
+where
+ S: Slice<'s>,
+{
+ pub(super) fn get_comment(&mut self) -> Result<(ast::Comment<S>, Level)> {
+ let mut level = Level::None;
+ let mut content = vec![];
+
+ while self.ptr < self.length {
+ let line_level = self.get_comment_level();
+ if line_level == Level::None {
+ self.ptr -= 1;
+ break;
+ } else if level != Level::None && line_level != level {
+ self.ptr -= line_level as usize;
+ break;
+ }
+
+ level = line_level;
+
+ if self.ptr == self.length {
+ break;
+ } else if self.is_current_byte(b'\n') {
+ content.push(self.get_comment_line());
+ } else {
+ if let Err(e) = self.expect_byte(b' ') {
+ if content.is_empty() {
+ return Err(e);
+ } else {
+ self.ptr -= line_level as usize;
+ break;
+ }
+ }
+ content.push(self.get_comment_line());
+ }
+ self.skip_eol();
+ }
+
+ Ok((ast::Comment { content }, level))
+ }
+
+ pub(super) fn skip_comment(&mut self) {
+ loop {
+ while self.ptr < self.length && !self.is_current_byte(b'\n') {
+ self.ptr += 1;
+ }
+ self.ptr += 1;
+ if self.is_current_byte(b'#') {
+ self.ptr += 1;
+ } else {
+ break;
+ }
+ }
+ }
+
+ fn get_comment_level(&mut self) -> Level {
+ if self.take_byte_if(b'#') {
+ if self.take_byte_if(b'#') {
+ if self.take_byte_if(b'#') {
+ return Level::Resource;
+ }
+ return Level::Group;
+ }
+ return Level::Regular;
+ }
+ Level::None
+ }
+
+ fn get_comment_line(&mut self) -> S {
+ let start_pos = self.ptr;
+
+ while !self.is_eol() {
+ self.ptr += 1;
+ }
+
+ self.source.slice(start_pos..self.ptr)
+ }
+}
diff --git a/third_party/rust/fluent-syntax/src/parser/core.rs b/third_party/rust/fluent-syntax/src/parser/core.rs
new file mode 100644
index 0000000000..68ad8dc0b6
--- /dev/null
+++ b/third_party/rust/fluent-syntax/src/parser/core.rs
@@ -0,0 +1,307 @@
+use super::{
+ comment,
+ errors::{ErrorKind, ParserError},
+ slice::Slice,
+};
+use crate::ast;
+
+pub type Result<T> = std::result::Result<T, ParserError>;
+
+pub struct Parser<S> {
+ pub(super) source: S,
+ pub(super) ptr: usize,
+ pub(super) length: usize,
+}
+
+impl<'s, S> Parser<S>
+where
+ S: Slice<'s>,
+{
+ pub fn new(source: S) -> Self {
+ let length = source.as_ref().as_bytes().len();
+ Self {
+ source,
+ ptr: 0,
+ length,
+ }
+ }
+
+ pub fn parse(
+ mut self,
+ ) -> std::result::Result<ast::Resource<S>, (ast::Resource<S>, Vec<ParserError>)> {
+ let mut errors = vec![];
+
+ let mut body = vec![];
+
+ self.skip_blank_block();
+ let mut last_comment = None;
+ let mut last_blank_count = 0;
+
+ while self.ptr < self.length {
+ let entry_start = self.ptr;
+ let mut entry = self.get_entry(entry_start);
+
+ if let Some(comment) = last_comment.take() {
+ match entry {
+ Ok(ast::Entry::Message(ref mut msg)) if last_blank_count < 2 => {
+ msg.comment = Some(comment);
+ }
+ Ok(ast::Entry::Term(ref mut term)) if last_blank_count < 2 => {
+ term.comment = Some(comment);
+ }
+ _ => {
+ body.push(ast::Entry::Comment(comment));
+ }
+ }
+ }
+
+ match entry {
+ Ok(ast::Entry::Comment(comment)) => {
+ last_comment = Some(comment);
+ }
+ Ok(entry) => {
+ body.push(entry);
+ }
+ Err(mut err) => {
+ self.skip_to_next_entry_start();
+ err.slice = Some(entry_start..self.ptr);
+ errors.push(err);
+ let content = self.source.slice(entry_start..self.ptr);
+ body.push(ast::Entry::Junk { content });
+ }
+ }
+ last_blank_count = self.skip_blank_block();
+ }
+
+ if let Some(last_comment) = last_comment.take() {
+ body.push(ast::Entry::Comment(last_comment));
+ }
+ if errors.is_empty() {
+ Ok(ast::Resource { body })
+ } else {
+ Err((ast::Resource { body }, errors))
+ }
+ }
+
+ fn get_entry(&mut self, entry_start: usize) -> Result<ast::Entry<S>> {
+ let entry = match get_current_byte!(self) {
+ Some(b'#') => {
+ let (comment, level) = self.get_comment()?;
+ match level {
+ comment::Level::Regular => ast::Entry::Comment(comment),
+ comment::Level::Group => ast::Entry::GroupComment(comment),
+ comment::Level::Resource => ast::Entry::ResourceComment(comment),
+ comment::Level::None => unreachable!(),
+ }
+ }
+ Some(b'-') => ast::Entry::Term(self.get_term(entry_start)?),
+ _ => ast::Entry::Message(self.get_message(entry_start)?),
+ };
+ Ok(entry)
+ }
+
+ pub fn get_message(&mut self, entry_start: usize) -> Result<ast::Message<S>> {
+ let id = self.get_identifier()?;
+ self.skip_blank_inline();
+ self.expect_byte(b'=')?;
+ let pattern = self.get_pattern()?;
+
+ self.skip_blank_block();
+
+ let attributes = self.get_attributes();
+
+ if pattern.is_none() && attributes.is_empty() {
+ let entry_id = id.name.as_ref().to_owned();
+ return error!(
+ ErrorKind::ExpectedMessageField { entry_id },
+ entry_start, self.ptr
+ );
+ }
+
+ Ok(ast::Message {
+ id,
+ value: pattern,
+ attributes,
+ comment: None,
+ })
+ }
+
+ pub fn get_term(&mut self, entry_start: usize) -> Result<ast::Term<S>> {
+ self.expect_byte(b'-')?;
+ let id = self.get_identifier()?;
+ self.skip_blank_inline();
+ self.expect_byte(b'=')?;
+ self.skip_blank_inline();
+
+ let value = self.get_pattern()?;
+
+ self.skip_blank_block();
+
+ let attributes = self.get_attributes();
+
+ if let Some(value) = value {
+ Ok(ast::Term {
+ id,
+ value,
+ attributes,
+ comment: None,
+ })
+ } else {
+ error!(
+ ErrorKind::ExpectedTermField {
+ entry_id: id.name.as_ref().to_owned()
+ },
+ entry_start, self.ptr
+ )
+ }
+ }
+
+ fn get_attributes(&mut self) -> Vec<ast::Attribute<S>> {
+ let mut attributes = vec![];
+
+ loop {
+ let line_start = self.ptr;
+ self.skip_blank_inline();
+ if !self.take_byte_if(b'.') {
+ self.ptr = line_start;
+ break;
+ }
+
+ if let Ok(attr) = self.get_attribute() {
+ attributes.push(attr);
+ } else {
+ self.ptr = line_start;
+ break;
+ }
+ }
+ attributes
+ }
+
+ fn get_attribute(&mut self) -> Result<ast::Attribute<S>> {
+ let id = self.get_identifier()?;
+ self.skip_blank_inline();
+ self.expect_byte(b'=')?;
+ let pattern = self.get_pattern()?;
+
+ match pattern {
+ Some(pattern) => Ok(ast::Attribute { id, value: pattern }),
+ None => error!(ErrorKind::MissingValue, self.ptr),
+ }
+ }
+
+ pub(super) fn get_identifier_unchecked(&mut self) -> ast::Identifier<S> {
+ let mut ptr = self.ptr;
+
+ while matches!(get_byte!(self, ptr), Some(b) if b.is_ascii_alphanumeric() || *b == b'-' || *b == b'_')
+ {
+ ptr += 1;
+ }
+
+ let name = self.source.slice(self.ptr - 1..ptr);
+ self.ptr = ptr;
+
+ ast::Identifier { name }
+ }
+
+ pub(super) fn get_identifier(&mut self) -> Result<ast::Identifier<S>> {
+ if !self.is_identifier_start() {
+ return error!(
+ ErrorKind::ExpectedCharRange {
+ range: "a-zA-Z".to_string()
+ },
+ self.ptr
+ );
+ }
+ self.ptr += 1;
+ Ok(self.get_identifier_unchecked())
+ }
+
+ pub(super) fn get_attribute_accessor(&mut self) -> Result<Option<ast::Identifier<S>>> {
+ if self.take_byte_if(b'.') {
+ let ident = self.get_identifier()?;
+ Ok(Some(ident))
+ } else {
+ Ok(None)
+ }
+ }
+
+ fn get_variant_key(&mut self) -> Result<ast::VariantKey<S>> {
+ self.skip_blank();
+
+ let key = if self.is_number_start() {
+ ast::VariantKey::NumberLiteral {
+ value: self.get_number_literal()?,
+ }
+ } else {
+ ast::VariantKey::Identifier {
+ name: self.get_identifier()?.name,
+ }
+ };
+
+ self.skip_blank();
+
+ self.expect_byte(b']')?;
+
+ Ok(key)
+ }
+
+ pub(super) fn get_variants(&mut self) -> Result<Vec<ast::Variant<S>>> {
+ let mut variants = Vec::with_capacity(2);
+ let mut has_default = false;
+
+ loop {
+ let default = self.take_byte_if(b'*');
+ if default {
+ if has_default {
+ return error!(ErrorKind::MultipleDefaultVariants, self.ptr);
+ } else {
+ has_default = true;
+ }
+ }
+
+ if !self.take_byte_if(b'[') {
+ break;
+ }
+
+ let key = self.get_variant_key()?;
+
+ let value = self.get_pattern()?;
+
+ if let Some(value) = value {
+ variants.push(ast::Variant {
+ key,
+ value,
+ default,
+ });
+ self.skip_blank();
+ } else {
+ return error!(ErrorKind::MissingValue, self.ptr);
+ }
+ }
+
+ if has_default {
+ Ok(variants)
+ } else {
+ error!(ErrorKind::MissingDefaultVariant, self.ptr)
+ }
+ }
+
+ pub(super) fn get_placeable(&mut self) -> Result<ast::Expression<S>> {
+ self.skip_blank();
+ let exp = self.get_expression()?;
+ self.skip_blank_inline();
+ self.expect_byte(b'}')?;
+
+ let invalid_expression_found = match &exp {
+ ast::Expression::Inline(ast::InlineExpression::TermReference {
+ ref attribute, ..
+ }) => attribute.is_some(),
+ _ => false,
+ };
+ if invalid_expression_found {
+ return error!(ErrorKind::TermAttributeAsPlaceable, self.ptr);
+ }
+
+ Ok(exp)
+ }
+}
diff --git a/third_party/rust/fluent-syntax/src/parser/errors.rs b/third_party/rust/fluent-syntax/src/parser/errors.rs
new file mode 100644
index 0000000000..2c29f97bbf
--- /dev/null
+++ b/third_party/rust/fluent-syntax/src/parser/errors.rs
@@ -0,0 +1,169 @@
+use std::ops::Range;
+use thiserror::Error;
+
+/// Error containing information about an error encountered by the Fluent Parser.
+///
+/// Errors in Fluent Parser are non-fatal, and the syntax has been
+/// designed to allow for strong recovery.
+///
+/// In result [`ParserError`] is designed to point at the slice of
+/// the input that is most likely to be a complete fragment from after
+/// the end of a valid entry, to the start of the next valid entry, with
+/// the invalid syntax in the middle.
+///
+///
+/// # Example
+///
+/// ```
+/// use fluent_syntax::parser;
+/// use fluent_syntax::ast;
+///
+/// let ftl = r#"
+/// key1 = Value 1
+///
+/// g@Rb@ge = #2y ds
+///
+/// key2 = Value 2
+///
+/// "#;
+///
+/// let (resource, errors) = parser::parse_runtime(ftl)
+/// .expect_err("Resource should contain errors.");
+///
+/// assert_eq!(
+/// errors,
+/// vec![
+/// parser::ParserError {
+/// pos: 18..19,
+/// slice: Some(17..35),
+/// kind: parser::ErrorKind::ExpectedToken('=')
+/// }
+/// ]
+/// );
+///
+/// assert_eq!(
+/// resource.body[0],
+/// ast::Entry::Message(
+/// ast::Message {
+/// id: ast::Identifier {
+/// name: "key1"
+/// },
+/// value: Some(ast::Pattern {
+/// elements: vec![
+/// ast::PatternElement::TextElement {
+/// value: "Value 1"
+/// },
+/// ]
+/// }),
+/// attributes: vec![],
+/// comment: None,
+/// }
+/// ),
+/// );
+///
+/// assert_eq!(
+/// resource.body[1],
+/// ast::Entry::Junk {
+/// content: "g@Rb@ge = #2y ds\n\n"
+/// }
+/// );
+///
+/// assert_eq!(
+/// resource.body[2],
+/// ast::Entry::Message(
+/// ast::Message {
+/// id: ast::Identifier {
+/// name: "key2"
+/// },
+/// value: Some(ast::Pattern {
+/// elements: vec![
+/// ast::PatternElement::TextElement {
+/// value: "Value 2"
+/// },
+/// ]
+/// }),
+/// attributes: vec![],
+/// comment: None,
+/// }
+/// ),
+/// );
+/// ```
+///
+/// The information contained in the `ParserError` should allow the tooling
+/// to display rich contextual annotations of the error slice, using
+/// crates such as `annotate-snippers`.
+#[derive(Error, Debug, PartialEq, Clone)]
+#[error("{}", self.kind)]
+pub struct ParserError {
+ /// Precise location of where the parser encountered the error.
+ pub pos: Range<usize>,
+ /// Slice of the input from the end of the last valid entry to the beginning
+ /// of the next valid entry with the invalid syntax in the middle.
+ pub slice: Option<Range<usize>>,
+ /// The type of the error that the parser encountered.
+ pub kind: ErrorKind,
+}
+
+macro_rules! error {
+ ($kind:expr, $start:expr) => {{
+ Err(ParserError {
+ pos: $start..$start + 1,
+ slice: None,
+ kind: $kind,
+ })
+ }};
+ ($kind:expr, $start:expr, $end:expr) => {{
+ Err(ParserError {
+ pos: $start..$end,
+ slice: None,
+ kind: $kind,
+ })
+ }};
+}
+
+/// Kind of an error associated with the [`ParserError`].
+#[derive(Error, Debug, PartialEq, Clone)]
+pub enum ErrorKind {
+ #[error("Expected a token starting with \"{0}\"")]
+ ExpectedToken(char),
+ #[error("Expected one of \"{range}\"")]
+ ExpectedCharRange { range: String },
+ #[error("Expected a message field for \"{entry_id}\"")]
+ ExpectedMessageField { entry_id: String },
+ #[error("Expected a term field for \"{entry_id}\"")]
+ ExpectedTermField { entry_id: String },
+ #[error("Callee is not allowed here")]
+ ForbiddenCallee,
+ #[error("The select expression must have a default variant")]
+ MissingDefaultVariant,
+ #[error("Expected a value")]
+ MissingValue,
+ #[error("A select expression can only have one default variant")]
+ MultipleDefaultVariants,
+ #[error("Message references can't be used as a selector")]
+ MessageReferenceAsSelector,
+ #[error("Term references can't be used as a selector")]
+ TermReferenceAsSelector,
+ #[error("Message attributes can't be used as a selector")]
+ MessageAttributeAsSelector,
+ #[error("Term attributes can't be used as a selector")]
+ TermAttributeAsPlaceable,
+ #[error("Unterminated string literal")]
+ UnterminatedStringLiteral,
+ #[error("Positional arguments must come before named arguments")]
+ PositionalArgumentFollowsNamed,
+ #[error("The \"{0}\" argument appears twice")]
+ DuplicatedNamedArgument(String),
+ #[error("Unknown escape sequence")]
+ UnknownEscapeSequence(String),
+ #[error("Invalid unicode escape sequence, \"{0}\"")]
+ InvalidUnicodeEscapeSequence(String),
+ #[error("Unbalanced closing brace")]
+ UnbalancedClosingBrace,
+ #[error("Expected an inline expression")]
+ ExpectedInlineExpression,
+ #[error("Expected a simple expression as selector")]
+ ExpectedSimpleExpressionAsSelector,
+ #[error("Expected a string or number literal")]
+ ExpectedLiteral,
+}
diff --git a/third_party/rust/fluent-syntax/src/parser/expression.rs b/third_party/rust/fluent-syntax/src/parser/expression.rs
new file mode 100644
index 0000000000..c5ccb32bf4
--- /dev/null
+++ b/third_party/rust/fluent-syntax/src/parser/expression.rs
@@ -0,0 +1,224 @@
+use super::errors::{ErrorKind, ParserError};
+use super::{core::Parser, core::Result, slice::Slice};
+use crate::ast;
+
+impl<'s, S> Parser<S>
+where
+ S: Slice<'s>,
+{
+ pub(super) fn get_expression(&mut self) -> Result<ast::Expression<S>> {
+ let exp = self.get_inline_expression(false)?;
+
+ self.skip_blank();
+
+ if !self.is_current_byte(b'-') || !self.is_byte_at(b'>', self.ptr + 1) {
+ if let ast::InlineExpression::TermReference { ref attribute, .. } = exp {
+ if attribute.is_some() {
+ return error!(ErrorKind::TermAttributeAsPlaceable, self.ptr);
+ }
+ }
+ return Ok(ast::Expression::Inline(exp));
+ }
+
+ match exp {
+ ast::InlineExpression::MessageReference { ref attribute, .. } => {
+ if attribute.is_none() {
+ return error!(ErrorKind::MessageReferenceAsSelector, self.ptr);
+ } else {
+ return error!(ErrorKind::MessageAttributeAsSelector, self.ptr);
+ }
+ }
+ ast::InlineExpression::TermReference { ref attribute, .. } => {
+ if attribute.is_none() {
+ return error!(ErrorKind::TermReferenceAsSelector, self.ptr);
+ }
+ }
+ ast::InlineExpression::StringLiteral { .. }
+ | ast::InlineExpression::NumberLiteral { .. }
+ | ast::InlineExpression::VariableReference { .. }
+ | ast::InlineExpression::FunctionReference { .. } => {}
+ _ => {
+ return error!(ErrorKind::ExpectedSimpleExpressionAsSelector, self.ptr);
+ }
+ };
+
+ self.ptr += 2; // ->
+
+ self.skip_blank_inline();
+ if !self.skip_eol() {
+ return error!(
+ ErrorKind::ExpectedCharRange {
+ range: "\n | \r\n".to_string()
+ },
+ self.ptr
+ );
+ }
+ self.skip_blank();
+
+ let variants = self.get_variants()?;
+
+ Ok(ast::Expression::Select {
+ selector: exp,
+ variants,
+ })
+ }
+
+ pub(super) fn get_inline_expression(
+ &mut self,
+ only_literal: bool,
+ ) -> Result<ast::InlineExpression<S>> {
+ match get_current_byte!(self) {
+ Some(b'"') => {
+ self.ptr += 1; // "
+ let start = self.ptr;
+ while let Some(b) = get_current_byte!(self) {
+ match b {
+ b'\\' => match get_byte!(self, self.ptr + 1) {
+ Some(b'\\') | Some(b'{') | Some(b'"') => self.ptr += 2,
+ Some(b'u') => {
+ self.ptr += 2;
+ self.skip_unicode_escape_sequence(4)?;
+ }
+ Some(b'U') => {
+ self.ptr += 2;
+ self.skip_unicode_escape_sequence(6)?;
+ }
+ b => {
+ let seq = b.unwrap_or(&b' ').to_string();
+ return error!(ErrorKind::UnknownEscapeSequence(seq), self.ptr);
+ }
+ },
+ b'"' => {
+ break;
+ }
+ b'\n' => {
+ return error!(ErrorKind::UnterminatedStringLiteral, self.ptr);
+ }
+ _ => self.ptr += 1,
+ }
+ }
+
+ self.expect_byte(b'"')?;
+ let slice = self.source.slice(start..self.ptr - 1);
+ Ok(ast::InlineExpression::StringLiteral { value: slice })
+ }
+ Some(b) if b.is_ascii_digit() => {
+ let num = self.get_number_literal()?;
+ Ok(ast::InlineExpression::NumberLiteral { value: num })
+ }
+ Some(b'-') if !only_literal => {
+ self.ptr += 1; // -
+ if self.is_identifier_start() {
+ self.ptr += 1;
+ let id = self.get_identifier_unchecked();
+ let attribute = self.get_attribute_accessor()?;
+ let arguments = self.get_call_arguments()?;
+ Ok(ast::InlineExpression::TermReference {
+ id,
+ attribute,
+ arguments,
+ })
+ } else {
+ self.ptr -= 1;
+ let num = self.get_number_literal()?;
+ Ok(ast::InlineExpression::NumberLiteral { value: num })
+ }
+ }
+ Some(b'$') if !only_literal => {
+ self.ptr += 1; // $
+ let id = self.get_identifier()?;
+ Ok(ast::InlineExpression::VariableReference { id })
+ }
+ Some(b) if b.is_ascii_alphabetic() => {
+ self.ptr += 1;
+ let id = self.get_identifier_unchecked();
+ let arguments = self.get_call_arguments()?;
+ if let Some(arguments) = arguments {
+ if !Self::is_callee(&id.name) {
+ return error!(ErrorKind::ForbiddenCallee, self.ptr);
+ }
+
+ Ok(ast::InlineExpression::FunctionReference { id, arguments })
+ } else {
+ let attribute = self.get_attribute_accessor()?;
+ Ok(ast::InlineExpression::MessageReference { id, attribute })
+ }
+ }
+ Some(b'{') if !only_literal => {
+ self.ptr += 1; // {
+ let exp = self.get_placeable()?;
+ Ok(ast::InlineExpression::Placeable {
+ expression: Box::new(exp),
+ })
+ }
+ _ if only_literal => error!(ErrorKind::ExpectedLiteral, self.ptr),
+ _ => error!(ErrorKind::ExpectedInlineExpression, self.ptr),
+ }
+ }
+
+ pub fn get_call_arguments(&mut self) -> Result<Option<ast::CallArguments<S>>> {
+ self.skip_blank();
+ if !self.take_byte_if(b'(') {
+ return Ok(None);
+ }
+
+ let mut positional = vec![];
+ let mut named = vec![];
+ let mut argument_names = vec![];
+
+ self.skip_blank();
+
+ while self.ptr < self.length {
+ if self.is_current_byte(b')') {
+ break;
+ }
+
+ let expr = self.get_inline_expression(false)?;
+
+ if let ast::InlineExpression::MessageReference {
+ ref id,
+ attribute: None,
+ } = expr
+ {
+ self.skip_blank();
+ if self.is_current_byte(b':') {
+ if argument_names.contains(&id.name) {
+ return error!(
+ ErrorKind::DuplicatedNamedArgument(id.name.as_ref().to_owned()),
+ self.ptr
+ );
+ }
+ self.ptr += 1;
+ self.skip_blank();
+ let val = self.get_inline_expression(true)?;
+
+ argument_names.push(id.name.clone());
+ named.push(ast::NamedArgument {
+ name: ast::Identifier {
+ name: id.name.clone(),
+ },
+ value: val,
+ });
+ } else {
+ if !argument_names.is_empty() {
+ return error!(ErrorKind::PositionalArgumentFollowsNamed, self.ptr);
+ }
+ positional.push(expr);
+ }
+ } else {
+ if !argument_names.is_empty() {
+ return error!(ErrorKind::PositionalArgumentFollowsNamed, self.ptr);
+ }
+ positional.push(expr);
+ }
+
+ self.skip_blank();
+ self.take_byte_if(b',');
+ self.skip_blank();
+ }
+
+ self.expect_byte(b')')?;
+
+ Ok(Some(ast::CallArguments { positional, named }))
+ }
+}
diff --git a/third_party/rust/fluent-syntax/src/parser/helper.rs b/third_party/rust/fluent-syntax/src/parser/helper.rs
new file mode 100644
index 0000000000..11544d6855
--- /dev/null
+++ b/third_party/rust/fluent-syntax/src/parser/helper.rs
@@ -0,0 +1,169 @@
+use super::errors::{ErrorKind, ParserError};
+use super::{core::Parser, core::Result, slice::Slice};
+
+impl<'s, S> Parser<S>
+where
+ S: Slice<'s>,
+{
+ pub(super) fn is_current_byte(&self, b: u8) -> bool {
+ get_current_byte!(self) == Some(&b)
+ }
+
+ pub(super) fn is_byte_at(&self, b: u8, pos: usize) -> bool {
+ get_byte!(self, pos) == Some(&b)
+ }
+
+ pub(super) fn skip_to_next_entry_start(&mut self) {
+ while let Some(b) = get_current_byte!(self) {
+ let new_line = self.ptr == 0 || get_byte!(self, self.ptr - 1) == Some(&b'\n');
+
+ if new_line && (b.is_ascii_alphabetic() || [b'-', b'#'].contains(b)) {
+ break;
+ }
+
+ self.ptr += 1;
+ }
+ }
+
+ pub(super) fn skip_eol(&mut self) -> bool {
+ match get_current_byte!(self) {
+ Some(b'\n') => {
+ self.ptr += 1;
+ true
+ }
+ Some(b'\r') if self.is_byte_at(b'\n', self.ptr + 1) => {
+ self.ptr += 2;
+ true
+ }
+ _ => false,
+ }
+ }
+
+ pub(super) fn skip_unicode_escape_sequence(&mut self, length: usize) -> Result<()> {
+ let start = self.ptr;
+ for _ in 0..length {
+ match get_current_byte!(self) {
+ Some(b) if b.is_ascii_hexdigit() => self.ptr += 1,
+ _ => break,
+ }
+ }
+ if self.ptr - start != length {
+ let end = if self.ptr >= self.length {
+ self.ptr
+ } else {
+ self.ptr + 1
+ };
+ let seq = self.source.slice(start..end).as_ref().to_owned();
+ return error!(ErrorKind::InvalidUnicodeEscapeSequence(seq), self.ptr);
+ }
+ Ok(())
+ }
+
+ pub(super) fn is_identifier_start(&self) -> bool {
+ matches!(get_current_byte!(self), Some(b) if b.is_ascii_alphabetic())
+ }
+
+ pub(super) fn take_byte_if(&mut self, b: u8) -> bool {
+ if self.is_current_byte(b) {
+ self.ptr += 1;
+ true
+ } else {
+ false
+ }
+ }
+
+ pub(super) fn skip_blank_block(&mut self) -> usize {
+ let mut count = 0;
+ loop {
+ let start = self.ptr;
+ self.skip_blank_inline();
+ if !self.skip_eol() {
+ self.ptr = start;
+ break;
+ }
+ count += 1;
+ }
+ count
+ }
+
+ pub(super) fn skip_blank(&mut self) {
+ loop {
+ match get_current_byte!(self) {
+ Some(b' ') | Some(b'\n') => self.ptr += 1,
+ Some(b'\r') if get_byte!(self, self.ptr + 1) == Some(&b'\n') => self.ptr += 2,
+ _ => break,
+ }
+ }
+ }
+
+ pub(super) fn skip_blank_inline(&mut self) -> usize {
+ let start = self.ptr;
+ while let Some(b' ') = get_current_byte!(self) {
+ self.ptr += 1;
+ }
+ self.ptr - start
+ }
+
+ pub(super) fn is_byte_pattern_continuation(b: u8) -> bool {
+ !matches!(b, b'.' | b'}' | b'[' | b'*')
+ }
+
+ pub(super) fn is_callee(name: &S) -> bool {
+ name.as_ref()
+ .as_bytes()
+ .iter()
+ .all(|c| c.is_ascii_uppercase() || c.is_ascii_digit() || *c == b'_' || *c == b'-')
+ }
+
+ pub(super) fn expect_byte(&mut self, b: u8) -> Result<()> {
+ if !self.is_current_byte(b) {
+ return error!(ErrorKind::ExpectedToken(b as char), self.ptr);
+ }
+ self.ptr += 1;
+ Ok(())
+ }
+
+ pub(super) fn is_number_start(&self) -> bool {
+ matches!(get_current_byte!(self), Some(b) if b.is_ascii_digit() || b == &b'-')
+ }
+
+ pub(super) fn is_eol(&self) -> bool {
+ match get_current_byte!(self) {
+ Some(b'\n') => true,
+ Some(b'\r') if self.is_byte_at(b'\n', self.ptr + 1) => true,
+ None => true,
+ _ => false,
+ }
+ }
+
+ pub(super) fn skip_digits(&mut self) -> Result<()> {
+ let start = self.ptr;
+ loop {
+ match get_current_byte!(self) {
+ Some(b) if b.is_ascii_digit() => self.ptr += 1,
+ _ => break,
+ }
+ }
+ if start == self.ptr {
+ error!(
+ ErrorKind::ExpectedCharRange {
+ range: "0-9".to_string()
+ },
+ self.ptr
+ )
+ } else {
+ Ok(())
+ }
+ }
+
+ pub(super) fn get_number_literal(&mut self) -> Result<S> {
+ let start = self.ptr;
+ self.take_byte_if(b'-');
+ self.skip_digits()?;
+ if self.take_byte_if(b'.') {
+ self.skip_digits()?;
+ }
+
+ Ok(self.source.slice(start..self.ptr))
+ }
+}
diff --git a/third_party/rust/fluent-syntax/src/parser/macros.rs b/third_party/rust/fluent-syntax/src/parser/macros.rs
new file mode 100644
index 0000000000..671d543285
--- /dev/null
+++ b/third_party/rust/fluent-syntax/src/parser/macros.rs
@@ -0,0 +1,11 @@
+macro_rules! get_byte {
+ ($s:expr, $idx:expr) => {
+ $s.source.as_ref().as_bytes().get($idx)
+ };
+}
+
+macro_rules! get_current_byte {
+ ($s:expr) => {
+ $s.source.as_ref().as_bytes().get($s.ptr)
+ };
+}
diff --git a/third_party/rust/fluent-syntax/src/parser/mod.rs b/third_party/rust/fluent-syntax/src/parser/mod.rs
new file mode 100644
index 0000000000..52edfdc37a
--- /dev/null
+++ b/third_party/rust/fluent-syntax/src/parser/mod.rs
@@ -0,0 +1,278 @@
+//! Fluent Translation List parsing utilities
+//!
+//! FTL resources can be parsed using one of two methods:
+//! * [`parse`] - parses an input into a complete Abstract Syntax Tree representation with all source information preserved.
+//! * [`parse_runtime`] - parses an input into a runtime optimized Abstract Syntax Tree
+//! representation with comments stripped.
+//!
+//! # Example
+//!
+//! ```
+//! use fluent_syntax::parser;
+//! use fluent_syntax::ast;
+//!
+//! let ftl = r#"
+//! #### Resource Level Comment
+//!
+//! ## This is a message comment
+//! hello-world = Hello World!
+//!
+//! "#;
+//!
+//! let resource = parser::parse(ftl)
+//! .expect("Failed to parse an FTL resource.");
+//!
+//! assert_eq!(
+//! resource.body[0],
+//! ast::Entry::ResourceComment(
+//! ast::Comment {
+//! content: vec![
+//! "Resource Level Comment"
+//! ]
+//! }
+//! )
+//! );
+//! assert_eq!(
+//! resource.body[1],
+//! ast::Entry::Message(
+//! ast::Message {
+//! id: ast::Identifier {
+//! name: "hello-world"
+//! },
+//! value: Some(ast::Pattern {
+//! elements: vec![
+//! ast::PatternElement::TextElement {
+//! value: "Hello World!"
+//! },
+//! ]
+//! }),
+//! attributes: vec![],
+//! comment: Some(
+//! ast::Comment {
+//! content: vec!["This is a message comment"]
+//! }
+//! )
+//! }
+//! ),
+//! );
+//! ```
+//!
+//! # Error Recovery
+//!
+//! In both modes the parser is lenient, attempting to recover from errors.
+//!
+//! The [`Result`] return the resulting AST in both scenarios, and in the
+//! error scenario a vector of [`ParserError`] elements is returned as well.
+//!
+//! Any unparsed parts of the input are returned as [`ast::Entry::Junk`] elements.
+#[macro_use]
+mod errors;
+#[macro_use]
+mod macros;
+mod comment;
+mod core;
+mod expression;
+mod helper;
+mod pattern;
+mod runtime;
+mod slice;
+
+use crate::ast;
+pub use errors::{ErrorKind, ParserError};
+pub use slice::Slice;
+
+/// Parser result always returns an AST representation of the input,
+/// and if parsing errors were encountered, a list of [`ParserError`] elements
+/// is also returned.
+///
+/// # Example
+///
+/// ```
+/// use fluent_syntax::parser;
+/// use fluent_syntax::ast;
+///
+/// let ftl = r#"
+/// key1 = Value 1
+///
+/// g@Rb@ge = #2y ds
+///
+/// key2 = Value 2
+///
+/// "#;
+///
+/// let (resource, errors) = parser::parse_runtime(ftl)
+/// .expect_err("Resource should contain errors.");
+///
+/// assert_eq!(
+/// errors,
+/// vec![
+/// parser::ParserError {
+/// pos: 18..19,
+/// slice: Some(17..35),
+/// kind: parser::ErrorKind::ExpectedToken('=')
+/// }
+/// ]
+/// );
+///
+/// assert_eq!(
+/// resource.body[0],
+/// ast::Entry::Message(
+/// ast::Message {
+/// id: ast::Identifier {
+/// name: "key1"
+/// },
+/// value: Some(ast::Pattern {
+/// elements: vec![
+/// ast::PatternElement::TextElement {
+/// value: "Value 1"
+/// },
+/// ]
+/// }),
+/// attributes: vec![],
+/// comment: None,
+/// }
+/// ),
+/// );
+///
+/// assert_eq!(
+/// resource.body[1],
+/// ast::Entry::Junk {
+/// content: "g@Rb@ge = #2y ds\n\n"
+/// }
+/// );
+///
+/// assert_eq!(
+/// resource.body[2],
+/// ast::Entry::Message(
+/// ast::Message {
+/// id: ast::Identifier {
+/// name: "key2"
+/// },
+/// value: Some(ast::Pattern {
+/// elements: vec![
+/// ast::PatternElement::TextElement {
+/// value: "Value 2"
+/// },
+/// ]
+/// }),
+/// attributes: vec![],
+/// comment: None,
+/// }
+/// ),
+/// );
+/// ```
+pub type Result<S> = std::result::Result<ast::Resource<S>, (ast::Resource<S>, Vec<ParserError>)>;
+
+/// Parses an input into a complete Abstract Syntax Tree representation with
+/// all source information preserved.
+///
+/// This mode is intended for tooling, linters and other scenarios where
+/// complete representation, with comments, is preferred over speed and memory
+/// utilization.
+///
+/// # Example
+///
+/// ```
+/// use fluent_syntax::parser;
+/// use fluent_syntax::ast;
+///
+/// let ftl = r#"
+/// #### Resource Level Comment
+///
+/// ## This is a message comment
+/// hello-world = Hello World!
+///
+/// "#;
+///
+/// let resource = parser::parse(ftl)
+/// .expect("Failed to parse an FTL resource.");
+///
+/// assert_eq!(
+/// resource.body[0],
+/// ast::Entry::ResourceComment(
+/// ast::Comment {
+/// content: vec![
+/// "Resource Level Comment"
+/// ]
+/// }
+/// )
+/// );
+/// assert_eq!(
+/// resource.body[1],
+/// ast::Entry::Message(
+/// ast::Message {
+/// id: ast::Identifier {
+/// name: "hello-world"
+/// },
+/// value: Some(ast::Pattern {
+/// elements: vec![
+/// ast::PatternElement::TextElement {
+/// value: "Hello World!"
+/// },
+/// ]
+/// }),
+/// attributes: vec![],
+/// comment: Some(
+/// ast::Comment {
+/// content: vec!["This is a message comment"]
+/// }
+/// )
+/// }
+/// ),
+/// );
+/// ```
+pub fn parse<'s, S>(input: S) -> Result<S>
+where
+ S: Slice<'s>,
+{
+ core::Parser::new(input).parse()
+}
+
+/// Parses an input into an Abstract Syntax Tree representation with comments stripped.
+///
+/// This mode is intended for runtime use of Fluent. It currently strips all
+/// comments improving parsing performance and reducing the size of the AST tree.
+///
+/// # Example
+///
+/// ```
+/// use fluent_syntax::parser;
+/// use fluent_syntax::ast;
+///
+/// let ftl = r#"
+/// #### Resource Level Comment
+///
+/// ## This is a message comment
+/// hello-world = Hello World!
+///
+/// "#;
+///
+/// let resource = parser::parse_runtime(ftl)
+/// .expect("Failed to parse an FTL resource.");
+///
+/// assert_eq!(
+/// resource.body[0],
+/// ast::Entry::Message(
+/// ast::Message {
+/// id: ast::Identifier {
+/// name: "hello-world"
+/// },
+/// value: Some(ast::Pattern {
+/// elements: vec![
+/// ast::PatternElement::TextElement {
+/// value: "Hello World!"
+/// },
+/// ]
+/// }),
+/// attributes: vec![],
+/// comment: None,
+/// }
+/// ),
+/// );
+/// ```
+pub fn parse_runtime<'s, S>(input: S) -> Result<S>
+where
+ S: Slice<'s>,
+{
+ core::Parser::new(input).parse_runtime()
+}
diff --git a/third_party/rust/fluent-syntax/src/parser/pattern.rs b/third_party/rust/fluent-syntax/src/parser/pattern.rs
new file mode 100644
index 0000000000..516326d761
--- /dev/null
+++ b/third_party/rust/fluent-syntax/src/parser/pattern.rs
@@ -0,0 +1,207 @@
+use super::errors::{ErrorKind, ParserError};
+use super::{core::Parser, core::Result, slice::Slice};
+use crate::ast;
+
+#[derive(Debug, PartialEq)]
+enum TextElementTermination {
+ LineFeed,
+ CRLF,
+ PlaceableStart,
+ EOF,
+}
+
+// This enum tracks the placement of the text element in the pattern, which is needed for
+// dedentation logic.
+#[derive(Debug, PartialEq)]
+enum TextElementPosition {
+ InitialLineStart,
+ LineStart,
+ Continuation,
+}
+
+// This enum allows us to mark pointers in the source which will later become text elements
+// but without slicing them out of the source string. This makes the indentation adjustments
+// cheaper since they'll happen on the pointers, rather than extracted slices.
+#[derive(Debug)]
+enum PatternElementPlaceholders<S> {
+ Placeable(ast::Expression<S>),
+ // (start, end, indent, position)
+ TextElement(usize, usize, usize, TextElementPosition),
+}
+
+// This enum tracks whether the text element is blank or not.
+// This is important to identify text elements which should not be taken into account
+// when calculating common indent.
+#[derive(Debug, PartialEq)]
+enum TextElementType {
+ Blank,
+ NonBlank,
+}
+
+impl<'s, S> Parser<S>
+where
+ S: Slice<'s>,
+{
+ pub(super) fn get_pattern(&mut self) -> Result<Option<ast::Pattern<S>>> {
+ let mut elements = vec![];
+ let mut last_non_blank = None;
+ let mut common_indent = None;
+
+ self.skip_blank_inline();
+
+ let mut text_element_role = if self.skip_eol() {
+ self.skip_blank_block();
+ TextElementPosition::LineStart
+ } else {
+ TextElementPosition::InitialLineStart
+ };
+
+ while self.ptr < self.length {
+ if self.take_byte_if(b'{') {
+ if text_element_role == TextElementPosition::LineStart {
+ common_indent = Some(0);
+ }
+ let exp = self.get_placeable()?;
+ last_non_blank = Some(elements.len());
+ elements.push(PatternElementPlaceholders::Placeable(exp));
+ text_element_role = TextElementPosition::Continuation;
+ } else {
+ let slice_start = self.ptr;
+ let mut indent = 0;
+ if text_element_role == TextElementPosition::LineStart {
+ indent = self.skip_blank_inline();
+ if let Some(b) = get_current_byte!(self) {
+ if indent == 0 {
+ if b != &b'\r' && b != &b'\n' {
+ break;
+ }
+ } else if !Self::is_byte_pattern_continuation(*b) {
+ self.ptr = slice_start;
+ break;
+ }
+ } else {
+ break;
+ }
+ }
+ let (start, end, text_element_type, termination_reason) = self.get_text_slice()?;
+ if start != end {
+ if text_element_role == TextElementPosition::LineStart
+ && text_element_type == TextElementType::NonBlank
+ {
+ if let Some(common) = common_indent {
+ if indent < common {
+ common_indent = Some(indent);
+ }
+ } else {
+ common_indent = Some(indent);
+ }
+ }
+ if text_element_role != TextElementPosition::LineStart
+ || text_element_type == TextElementType::NonBlank
+ || termination_reason == TextElementTermination::LineFeed
+ {
+ if text_element_type == TextElementType::NonBlank {
+ last_non_blank = Some(elements.len());
+ }
+ elements.push(PatternElementPlaceholders::TextElement(
+ slice_start,
+ end,
+ indent,
+ text_element_role,
+ ));
+ }
+ }
+
+ text_element_role = match termination_reason {
+ TextElementTermination::LineFeed => TextElementPosition::LineStart,
+ TextElementTermination::CRLF => TextElementPosition::LineStart,
+ TextElementTermination::PlaceableStart => TextElementPosition::Continuation,
+ TextElementTermination::EOF => TextElementPosition::Continuation,
+ };
+ }
+ }
+
+ if let Some(last_non_blank) = last_non_blank {
+ let elements = elements
+ .into_iter()
+ .take(last_non_blank + 1)
+ .enumerate()
+ .map(|(i, elem)| match elem {
+ PatternElementPlaceholders::Placeable(expression) => {
+ ast::PatternElement::Placeable { expression }
+ }
+ PatternElementPlaceholders::TextElement(start, end, indent, role) => {
+ let start = if role == TextElementPosition::LineStart {
+ common_indent.map_or_else(
+ || start + indent,
+ |common_indent| start + std::cmp::min(indent, common_indent),
+ )
+ } else {
+ start
+ };
+ let mut value = self.source.slice(start..end);
+ if last_non_blank == i {
+ value.trim();
+ }
+ ast::PatternElement::TextElement { value }
+ }
+ })
+ .collect();
+ return Ok(Some(ast::Pattern { elements }));
+ }
+
+ Ok(None)
+ }
+
+ fn get_text_slice(
+ &mut self,
+ ) -> Result<(usize, usize, TextElementType, TextElementTermination)> {
+ let start_pos = self.ptr;
+ let mut text_element_type = TextElementType::Blank;
+
+ while let Some(b) = get_current_byte!(self) {
+ match b {
+ b' ' => self.ptr += 1,
+ b'\n' => {
+ self.ptr += 1;
+ return Ok((
+ start_pos,
+ self.ptr,
+ text_element_type,
+ TextElementTermination::LineFeed,
+ ));
+ }
+ b'\r' if self.is_byte_at(b'\n', self.ptr + 1) => {
+ self.ptr += 1;
+ return Ok((
+ start_pos,
+ self.ptr - 1,
+ text_element_type,
+ TextElementTermination::CRLF,
+ ));
+ }
+ b'{' => {
+ return Ok((
+ start_pos,
+ self.ptr,
+ text_element_type,
+ TextElementTermination::PlaceableStart,
+ ));
+ }
+ b'}' => {
+ return error!(ErrorKind::UnbalancedClosingBrace, self.ptr);
+ }
+ _ => {
+ text_element_type = TextElementType::NonBlank;
+ self.ptr += 1
+ }
+ }
+ }
+ Ok((
+ start_pos,
+ self.ptr,
+ text_element_type,
+ TextElementTermination::EOF,
+ ))
+ }
+}
diff --git a/third_party/rust/fluent-syntax/src/parser/runtime.rs b/third_party/rust/fluent-syntax/src/parser/runtime.rs
new file mode 100644
index 0000000000..e116ceaeed
--- /dev/null
+++ b/third_party/rust/fluent-syntax/src/parser/runtime.rs
@@ -0,0 +1,61 @@
+use super::{
+ core::{Parser, Result},
+ errors::ParserError,
+ slice::Slice,
+};
+use crate::ast;
+
+impl<'s, S> Parser<S>
+where
+ S: Slice<'s>,
+{
+ pub fn parse_runtime(
+ mut self,
+ ) -> std::result::Result<ast::Resource<S>, (ast::Resource<S>, Vec<ParserError>)> {
+ let mut errors = vec![];
+
+ // That default allocation gives the lowest
+ // number of instructions and cycles in ioi.
+ let mut body = Vec::with_capacity(6);
+
+ self.skip_blank_block();
+
+ while self.ptr < self.length {
+ let entry_start = self.ptr;
+ let entry = self.get_entry_runtime(entry_start);
+
+ match entry {
+ Ok(Some(entry)) => {
+ body.push(entry);
+ }
+ Ok(None) => {}
+ Err(mut err) => {
+ self.skip_to_next_entry_start();
+ err.slice = Some(entry_start..self.ptr);
+ errors.push(err);
+ let content = self.source.slice(entry_start..self.ptr);
+ body.push(ast::Entry::Junk { content });
+ }
+ }
+ self.skip_blank_block();
+ }
+
+ if errors.is_empty() {
+ Ok(ast::Resource { body })
+ } else {
+ Err((ast::Resource { body }, errors))
+ }
+ }
+
+ fn get_entry_runtime(&mut self, entry_start: usize) -> Result<Option<ast::Entry<S>>> {
+ let entry = match get_current_byte!(self) {
+ Some(b'#') => {
+ self.skip_comment();
+ None
+ }
+ Some(b'-') => Some(ast::Entry::Term(self.get_term(entry_start)?)),
+ _ => Some(ast::Entry::Message(self.get_message(entry_start)?)),
+ };
+ Ok(entry)
+ }
+}
diff --git a/third_party/rust/fluent-syntax/src/parser/slice.rs b/third_party/rust/fluent-syntax/src/parser/slice.rs
new file mode 100644
index 0000000000..d44f8251fe
--- /dev/null
+++ b/third_party/rust/fluent-syntax/src/parser/slice.rs
@@ -0,0 +1,25 @@
+use std::ops::Range;
+pub trait Slice<'s>: AsRef<str> + Clone + PartialEq {
+ fn slice(&self, range: Range<usize>) -> Self;
+ fn trim(&mut self);
+}
+
+impl<'s> Slice<'s> for String {
+ fn slice(&self, range: Range<usize>) -> Self {
+ self[range].to_string()
+ }
+
+ fn trim(&mut self) {
+ *self = self.trim_end().to_string();
+ }
+}
+
+impl<'s> Slice<'s> for &'s str {
+ fn slice(&self, range: Range<usize>) -> Self {
+ &self[range]
+ }
+
+ fn trim(&mut self) {
+ *self = self.trim_end();
+ }
+}