diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 14:29:10 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 14:29:10 +0000 |
commit | 2aa4a82499d4becd2284cdb482213d541b8804dd (patch) | |
tree | b80bf8bf13c3766139fbacc530efd0dd9d54394c /third_party/rust/fluent-syntax/src | |
parent | Initial commit. (diff) | |
download | firefox-2aa4a82499d4becd2284cdb482213d541b8804dd.tar.xz firefox-2aa4a82499d4becd2284cdb482213d541b8804dd.zip |
Adding upstream version 86.0.1.upstream/86.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/fluent-syntax/src')
-rw-r--r-- | third_party/rust/fluent-syntax/src/ast/helper.rs | 25 | ||||
-rw-r--r-- | third_party/rust/fluent-syntax/src/ast/mod.rs | 149 | ||||
-rw-r--r-- | third_party/rust/fluent-syntax/src/bin/parser.rs | 42 | ||||
-rw-r--r-- | third_party/rust/fluent-syntax/src/bin/update_fixtures.rs | 44 | ||||
-rw-r--r-- | third_party/rust/fluent-syntax/src/lib.rs | 3 | ||||
-rw-r--r-- | third_party/rust/fluent-syntax/src/parser/comment.rs | 80 | ||||
-rw-r--r-- | third_party/rust/fluent-syntax/src/parser/errors.rs | 128 | ||||
-rw-r--r-- | third_party/rust/fluent-syntax/src/parser/expression.rs | 148 | ||||
-rw-r--r-- | third_party/rust/fluent-syntax/src/parser/helper.rs | 171 | ||||
-rw-r--r-- | third_party/rust/fluent-syntax/src/parser/mod.rs | 387 | ||||
-rw-r--r-- | third_party/rust/fluent-syntax/src/parser/pattern.rs | 209 | ||||
-rw-r--r-- | third_party/rust/fluent-syntax/src/parser/slice.rs | 25 | ||||
-rw-r--r-- | third_party/rust/fluent-syntax/src/unicode.rs | 91 |
13 files changed, 1502 insertions, 0 deletions
diff --git a/third_party/rust/fluent-syntax/src/ast/helper.rs b/third_party/rust/fluent-syntax/src/ast/helper.rs new file mode 100644 index 0000000000..923437d23b --- /dev/null +++ b/third_party/rust/fluent-syntax/src/ast/helper.rs @@ -0,0 +1,25 @@ +#[cfg(feature = "serde")] +use serde::{Deserialize, Serialize}; + +use super::Comment; +// This is a helper struct used to properly deserialize referential +// JSON comments which are single continous String, into a vec of +// content slices. +#[derive(Debug, PartialEq, Clone)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "serde", serde(untagged))] +pub enum CommentDef<S> { + Single { content: S }, + Multi { content: Vec<S> }, +} + +impl<'s, S> From<CommentDef<S>> for Comment<S> { + fn from(input: CommentDef<S>) -> Self { + match input { + CommentDef::Single { content } => Self { + content: vec![content], + }, + CommentDef::Multi { content } => Self { content }, + } + } +} diff --git a/third_party/rust/fluent-syntax/src/ast/mod.rs b/third_party/rust/fluent-syntax/src/ast/mod.rs new file mode 100644 index 0000000000..48583441ca --- /dev/null +++ b/third_party/rust/fluent-syntax/src/ast/mod.rs @@ -0,0 +1,149 @@ +mod helper; + +#[cfg(feature = "serde")] +use serde::{Deserialize, Serialize}; + +#[derive(Debug, PartialEq, Clone)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct Resource<S> { + pub body: Vec<Entry<S>>, +} + +#[derive(Debug, PartialEq, Clone)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "serde", serde(tag = "type"))] +pub enum Entry<S> { + Message(Message<S>), + Term(Term<S>), + Comment(Comment<S>), + GroupComment(Comment<S>), + ResourceComment(Comment<S>), + Junk { content: S }, +} + +#[derive(Debug, PartialEq, Clone)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct Message<S> { + pub id: Identifier<S>, + pub value: Option<Pattern<S>>, + pub attributes: Vec<Attribute<S>>, + pub comment: Option<Comment<S>>, +} + +#[derive(Debug, PartialEq, Clone)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct Term<S> { + pub id: Identifier<S>, + pub value: Pattern<S>, + pub attributes: Vec<Attribute<S>>, + pub comment: Option<Comment<S>>, +} + +#[derive(Debug, PartialEq, Clone)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct Pattern<S> { + pub elements: Vec<PatternElement<S>>, +} + +#[derive(Debug, PartialEq, Clone)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "serde", serde(tag = "type"))] +pub enum PatternElement<S> { + TextElement { value: S }, + Placeable { expression: Expression<S> }, +} + +#[derive(Debug, PartialEq, Clone)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct Attribute<S> { + pub id: Identifier<S>, + pub value: Pattern<S>, +} + +#[derive(Debug, PartialEq, Clone)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct Identifier<S> { + pub name: S, +} + +#[derive(Debug, PartialEq, Clone)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "serde", serde(tag = "type"))] +pub struct Variant<S> { + pub key: VariantKey<S>, + pub value: Pattern<S>, + pub default: bool, +} + +#[derive(Debug, PartialEq, Clone)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "serde", serde(tag = "type"))] +pub enum VariantKey<S> { + Identifier { name: S }, + NumberLiteral { value: S }, +} + +#[derive(Debug, PartialEq, Clone)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "serde", serde(from = "helper::CommentDef<S>"))] +pub struct Comment<S> { + pub content: Vec<S>, +} + +#[derive(Debug, PartialEq, Clone)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "serde", serde(tag = "type"))] +pub struct CallArguments<S> { + pub positional: Vec<InlineExpression<S>>, + pub named: Vec<NamedArgument<S>>, +} + +#[derive(Debug, PartialEq, Clone)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "serde", serde(tag = "type"))] +pub struct NamedArgument<S> { + pub name: Identifier<S>, + pub value: InlineExpression<S>, +} + +#[derive(Debug, PartialEq, Clone)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "serde", serde(tag = "type"))] +pub enum InlineExpression<S> { + StringLiteral { + value: S, + }, + NumberLiteral { + value: S, + }, + FunctionReference { + id: Identifier<S>, + arguments: Option<CallArguments<S>>, + }, + MessageReference { + id: Identifier<S>, + attribute: Option<Identifier<S>>, + }, + TermReference { + id: Identifier<S>, + attribute: Option<Identifier<S>>, + arguments: Option<CallArguments<S>>, + }, + VariableReference { + id: Identifier<S>, + }, + Placeable { + expression: Box<Expression<S>>, + }, +} + +#[derive(Debug, PartialEq, Clone)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "serde", serde(untagged))] +pub enum Expression<S> { + SelectExpression { + selector: InlineExpression<S>, + variants: Vec<Variant<S>>, + }, + InlineExpression(InlineExpression<S>), +} diff --git a/third_party/rust/fluent-syntax/src/bin/parser.rs b/third_party/rust/fluent-syntax/src/bin/parser.rs new file mode 100644 index 0000000000..4825b4a16d --- /dev/null +++ b/third_party/rust/fluent-syntax/src/bin/parser.rs @@ -0,0 +1,42 @@ +use fluent_syntax::parser::Parser; +use std::env; +use std::fs::File; +use std::io; +use std::io::Read; + +fn read_file(path: &str) -> Result<String, io::Error> { + let mut f = File::open(path)?; + let mut s = String::new(); + f.read_to_string(&mut s)?; + Ok(s) +} + +fn main() { + let args: Vec<String> = env::args().collect(); + let source = read_file(args.get(1).expect("Pass an argument")).expect("Failed to fetch file"); + + let (ast, errors) = match Parser::new(source.as_str()).parse() { + Ok(ast) => (ast, None), + Err((ast, err)) => (ast, Some(err)), + }; + + #[cfg(feature = "json")] + { + let target_json = serde_json::to_string_pretty(&ast).unwrap(); + println!("{}", target_json); + } + #[cfg(not(feature = "json"))] + { + use std::fmt::Write; + let mut result = String::new(); + write!(result, "{:#?}", ast).unwrap(); + println!("{}", result); + } + + if let Some(errors) = errors { + println!("\n======== Errors ========== \n"); + for err in errors { + println!("Err: {:#?}", err); + } + } +} diff --git a/third_party/rust/fluent-syntax/src/bin/update_fixtures.rs b/third_party/rust/fluent-syntax/src/bin/update_fixtures.rs new file mode 100644 index 0000000000..5ec34224b8 --- /dev/null +++ b/third_party/rust/fluent-syntax/src/bin/update_fixtures.rs @@ -0,0 +1,44 @@ +use std::fs; +use std::io; + +use fluent_syntax::parser::Parser; + +fn read_file(path: &str) -> Result<String, io::Error> { + fs::read_to_string(path) +} + +fn write_file(path: &str, source: &str) -> Result<(), io::Error> { + fs::write(path, source) +} + +fn main() { + let samples = &["menubar", "preferences", "simple"]; + let contexts = &["browser", "preferences"]; + + for sample in samples { + let path = format!("./benches/{}.ftl", sample); + let source = read_file(&path).unwrap(); + let ast = Parser::new(source).parse().unwrap(); + let target_json = serde_json::to_string_pretty(&ast).unwrap(); + let new_path = format!("./tests/fixtures/benches/{}.json", sample); + write_file(&new_path, &target_json).unwrap(); + } + + for test in contexts { + let paths = fs::read_dir(format!("./benches/contexts/{}", test)).unwrap(); + for path in paths.into_iter() { + let p = path.unwrap().path(); + let file_name = p.file_name().unwrap().to_str().unwrap(); + let path = p.to_str().unwrap(); + let source = read_file(path).unwrap(); + let ast = Parser::new(source).parse().unwrap(); + let target_json = serde_json::to_string_pretty(&ast).unwrap(); + let new_path = format!( + "./tests/fixtures/benches/contexts/{}/{}", + test, + file_name.replace(".ftl", ".json") + ); + write_file(&new_path, &target_json).unwrap(); + } + } +} diff --git a/third_party/rust/fluent-syntax/src/lib.rs b/third_party/rust/fluent-syntax/src/lib.rs new file mode 100644 index 0000000000..658fa44a4d --- /dev/null +++ b/third_party/rust/fluent-syntax/src/lib.rs @@ -0,0 +1,3 @@ +pub mod ast; +pub mod parser; +pub mod unicode; diff --git a/third_party/rust/fluent-syntax/src/parser/comment.rs b/third_party/rust/fluent-syntax/src/parser/comment.rs new file mode 100644 index 0000000000..3ab97ffb92 --- /dev/null +++ b/third_party/rust/fluent-syntax/src/parser/comment.rs @@ -0,0 +1,80 @@ +use super::{Parser, Result, Slice}; +use crate::ast; + +#[derive(Debug, PartialEq, Clone, Copy)] +pub(super) enum Level { + None = 0, + Regular = 1, + Group = 2, + Resource = 3, +} + +impl<'s, S> Parser<S> +where + S: Slice<'s>, +{ + pub(super) fn get_comment(&mut self) -> Result<(ast::Comment<S>, Level)> { + let mut level = Level::None; + let mut content = vec![]; + + while self.ptr < self.length { + let line_level = self.get_comment_level(); + if line_level == Level::None { + self.ptr -= 1; + break; + } else if level != Level::None && line_level != level { + self.ptr -= line_level as usize; + break; + } + + level = line_level; + + if self.ptr == self.length { + break; + } else if self.is_current_byte(b'\n') { + content.push(self.get_comment_line()?); + } else { + if let Err(e) = self.expect_byte(b' ') { + if content.is_empty() { + return Err(e); + } else { + self.ptr -= line_level as usize; + break; + } + } + content.push(self.get_comment_line()?); + } + self.skip_eol(); + } + + Ok((ast::Comment { content }, level)) + } + + fn get_comment_level(&mut self) -> Level { + let mut chars = 0; + + for _ in 0..3 { + if self.take_byte_if(b'#') { + chars += 1; + } + } + + match chars { + 0 => Level::None, + 1 => Level::Regular, + 2 => Level::Group, + 3 => Level::Resource, + _ => unreachable!(), + } + } + + fn get_comment_line(&mut self) -> Result<S> { + let start_pos = self.ptr; + + while self.ptr < self.length && !self.is_eol() { + self.ptr += 1; + } + + Ok(self.source.slice(start_pos..self.ptr)) + } +} diff --git a/third_party/rust/fluent-syntax/src/parser/errors.rs b/third_party/rust/fluent-syntax/src/parser/errors.rs new file mode 100644 index 0000000000..e1b26bdd73 --- /dev/null +++ b/third_party/rust/fluent-syntax/src/parser/errors.rs @@ -0,0 +1,128 @@ +use std::fmt::{self, Display, Formatter}; + +#[derive(Debug, PartialEq, Clone)] +pub struct ParserError { + pub pos: (usize, usize), + pub slice: Option<(usize, usize)>, + pub kind: ErrorKind, +} + +impl std::error::Error for ParserError {} + +impl Display for ParserError { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + Display::fmt(&self.kind, f) + } +} + +macro_rules! error { + ($kind:expr, $start:expr) => {{ + Err(ParserError { + pos: ($start, $start + 1), + slice: None, + kind: $kind, + }) + }}; + ($kind:expr, $start:expr, $end:expr) => {{ + Err(ParserError { + pos: ($start, $end), + slice: None, + kind: $kind, + }) + }}; +} + +#[derive(Debug, PartialEq, Clone)] +pub enum ErrorKind { + Generic, + ExpectedEntry, + ExpectedToken(char), + ExpectedCharRange { range: String }, + ExpectedMessageField { entry_id: String }, + ExpectedTermField { entry_id: String }, + ForbiddenWhitespace, + ForbiddenCallee, + ForbiddenKey, + MissingDefaultVariant, + MissingVariants, + MissingValue, + MissingVariantKey, + MissingLiteral, + MultipleDefaultVariants, + MessageReferenceAsSelector, + TermReferenceAsSelector, + MessageAttributeAsSelector, + TermAttributeAsPlaceable, + UnterminatedStringExpression, + PositionalArgumentFollowsNamed, + DuplicatedNamedArgument(String), + ForbiddenVariantAccessor, + UnknownEscapeSequence(String), + InvalidUnicodeEscapeSequence(String), + UnbalancedClosingBrace, + ExpectedInlineExpression, + ExpectedSimpleExpressionAsSelector, +} + +impl Display for ErrorKind { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + match self { + Self::Generic => write!(f, "An error occurred"), + Self::ExpectedEntry => write!(f, "Expected an entry"), + Self::ExpectedToken(letter) => { + write!(f, "Expected a token starting with \"{}\"", letter) + } + Self::ExpectedCharRange { range } => write!(f, "Expected one of \"{}\"", range), + Self::ExpectedMessageField { entry_id } => { + write!(f, "Expected a message field for \"{}\"", entry_id) + } + Self::ExpectedTermField { entry_id } => { + write!(f, "Expected a term field for \"{}\"", entry_id) + } + Self::ForbiddenWhitespace => write!(f, "Whitespace is not allowed here"), + Self::ForbiddenCallee => write!(f, "Callee is not allowed here"), + Self::ForbiddenKey => write!(f, "Key is not allowed here"), + Self::MissingDefaultVariant => { + write!(f, "The select expression must have a default variant") + } + Self::MissingVariants => { + write!(f, "The select expression must have one or more variants") + } + Self::MissingValue => write!(f, "Expected a value"), + Self::MissingVariantKey => write!(f, "Expected a variant key"), + Self::MissingLiteral => write!(f, "Expected a literal"), + Self::MultipleDefaultVariants => { + write!(f, "A select expression can only have one default variant",) + } + Self::MessageReferenceAsSelector => { + write!(f, "Message references can't be used as a selector") + } + Self::TermReferenceAsSelector => { + write!(f, "Term references can't be used as a selector") + } + Self::MessageAttributeAsSelector => { + write!(f, "Message attributes can't be used as a selector") + } + Self::TermAttributeAsPlaceable => { + write!(f, "Term attributes can't be used as a placeable") + } + Self::UnterminatedStringExpression => write!(f, "Unterminated string expression"), + Self::PositionalArgumentFollowsNamed => { + write!(f, "Positional arguments must come before named arguments",) + } + Self::DuplicatedNamedArgument(name) => { + write!(f, "The \"{}\" argument appears twice", name) + } + Self::ForbiddenVariantAccessor => write!(f, "Forbidden variant accessor"), + Self::UnknownEscapeSequence(seq) => write!(f, "Unknown escape sequence, \"{}\"", seq), + Self::InvalidUnicodeEscapeSequence(seq) => { + write!(f, "Invalid unicode escape sequence, \"{}\"", seq) + } + Self::UnbalancedClosingBrace => write!(f, "Unbalanced closing brace"), + Self::ExpectedInlineExpression => write!(f, "Expected an inline expression"), + Self::ExpectedSimpleExpressionAsSelector => { + write!(f, "Expected a simple expression as selector") + } + } + } +} diff --git a/third_party/rust/fluent-syntax/src/parser/expression.rs b/third_party/rust/fluent-syntax/src/parser/expression.rs new file mode 100644 index 0000000000..0a2d3c78c8 --- /dev/null +++ b/third_party/rust/fluent-syntax/src/parser/expression.rs @@ -0,0 +1,148 @@ +use super::errors::{ErrorKind, ParserError}; +use super::{Parser, Result, Slice}; +use crate::ast; + +impl<'s, S> Parser<S> +where + S: Slice<'s>, +{ + pub(super) fn get_expression(&mut self) -> Result<ast::Expression<S>> { + let exp = self.get_inline_expression()?; + + self.skip_blank(); + + if !self.is_current_byte(b'-') || !self.is_byte_at(b'>', self.ptr + 1) { + if let ast::InlineExpression::TermReference { ref attribute, .. } = exp { + if attribute.is_some() { + return error!(ErrorKind::TermAttributeAsPlaceable, self.ptr); + } + } + return Ok(ast::Expression::InlineExpression(exp)); + } + + match exp { + ast::InlineExpression::MessageReference { ref attribute, .. } => { + if attribute.is_none() { + return error!(ErrorKind::MessageReferenceAsSelector, self.ptr); + } else { + return error!(ErrorKind::MessageAttributeAsSelector, self.ptr); + } + } + ast::InlineExpression::TermReference { ref attribute, .. } => { + if attribute.is_none() { + return error!(ErrorKind::TermReferenceAsSelector, self.ptr); + } + } + ast::InlineExpression::StringLiteral { .. } + | ast::InlineExpression::NumberLiteral { .. } + | ast::InlineExpression::VariableReference { .. } + | ast::InlineExpression::FunctionReference { .. } => {} + _ => { + return error!(ErrorKind::ExpectedSimpleExpressionAsSelector, self.ptr); + } + }; + + self.ptr += 2; // -> + + self.skip_blank_inline(); + if !self.skip_eol() { + return error!( + ErrorKind::ExpectedCharRange { + range: "\n | \r\n".to_string() + }, + self.ptr + ); + } + self.skip_blank(); + + let variants = self.get_variants()?; + + Ok(ast::Expression::SelectExpression { + selector: exp, + variants, + }) + } + + pub(super) fn get_inline_expression(&mut self) -> Result<ast::InlineExpression<S>> { + match self.source.as_ref().as_bytes().get(self.ptr) { + Some(b'"') => { + self.ptr += 1; // " + let start = self.ptr; + while let Some(b) = self.source.as_ref().as_bytes().get(self.ptr) { + match b { + b'\\' => match self.source.as_ref().as_bytes().get(self.ptr + 1) { + Some(b'\\') | Some(b'{') | Some(b'"') => self.ptr += 2, + Some(b'u') => { + self.ptr += 2; + self.skip_unicode_escape_sequence(4)?; + } + Some(b'U') => { + self.ptr += 2; + self.skip_unicode_escape_sequence(6)?; + } + _ => return error!(ErrorKind::Generic, self.ptr), + }, + b'"' => { + break; + } + b'\n' => { + return error!(ErrorKind::Generic, self.ptr); + } + _ => self.ptr += 1, + } + } + + self.expect_byte(b'"')?; + let slice = self.source.slice(start..self.ptr - 1); + Ok(ast::InlineExpression::StringLiteral { value: slice }) + } + Some(b) if b.is_ascii_digit() => { + let num = self.get_number_literal()?; + Ok(ast::InlineExpression::NumberLiteral { value: num }) + } + Some(b'-') => { + self.ptr += 1; // - + if self.is_identifier_start() { + let id = self.get_identifier()?; + let attribute = self.get_attribute_accessor()?; + let arguments = self.get_call_arguments()?; + Ok(ast::InlineExpression::TermReference { + id, + attribute, + arguments, + }) + } else { + self.ptr -= 1; + let num = self.get_number_literal()?; + Ok(ast::InlineExpression::NumberLiteral { value: num }) + } + } + Some(b'$') => { + self.ptr += 1; // - + let id = self.get_identifier()?; + Ok(ast::InlineExpression::VariableReference { id }) + } + Some(b) if b.is_ascii_alphabetic() => { + let id = self.get_identifier()?; + let arguments = self.get_call_arguments()?; + if arguments.is_some() { + if !Self::is_callee(id.name.as_ref().as_bytes()) { + return error!(ErrorKind::ForbiddenCallee, self.ptr); + } + + Ok(ast::InlineExpression::FunctionReference { id, arguments }) + } else { + let attribute = self.get_attribute_accessor()?; + Ok(ast::InlineExpression::MessageReference { id, attribute }) + } + } + Some(b'{') => { + let exp = self.get_placeable()?; + Ok(ast::InlineExpression::Placeable { + expression: Box::new(exp), + }) + } + _ => error!(ErrorKind::ExpectedInlineExpression, self.ptr), + } + } +} diff --git a/third_party/rust/fluent-syntax/src/parser/helper.rs b/third_party/rust/fluent-syntax/src/parser/helper.rs new file mode 100644 index 0000000000..363bba2864 --- /dev/null +++ b/third_party/rust/fluent-syntax/src/parser/helper.rs @@ -0,0 +1,171 @@ +use super::errors::{ErrorKind, ParserError}; +use super::{Parser, Result, Slice}; + +impl<'s, S> Parser<S> +where + S: Slice<'s>, +{ + pub(super) fn is_current_byte(&self, b: u8) -> bool { + self.source.as_ref().as_bytes().get(self.ptr) == Some(&b) + } + + pub(super) fn is_byte_at(&self, b: u8, pos: usize) -> bool { + self.source.as_ref().as_bytes().get(pos) == Some(&b) + } + + pub(super) fn skip_to_next_entry_start(&mut self) { + while let Some(b) = self.source.as_ref().as_bytes().get(self.ptr) { + let new_line = + self.ptr == 0 || self.source.as_ref().as_bytes().get(self.ptr - 1) == Some(&b'\n'); + + if new_line && (b.is_ascii_alphabetic() || [b'-', b'#'].contains(b)) { + break; + } + + self.ptr += 1; + } + } + + pub(super) fn skip_eol(&mut self) -> bool { + match self.source.as_ref().as_bytes().get(self.ptr) { + Some(b'\n') => { + self.ptr += 1; + true + } + Some(b'\r') if self.is_byte_at(b'\n', self.ptr + 1) => { + self.ptr += 2; + true + } + _ => false, + } + } + + pub(super) fn skip_unicode_escape_sequence(&mut self, length: usize) -> Result<()> { + let start = self.ptr; + for _ in 0..length { + match self.source.as_ref().as_bytes().get(self.ptr) { + Some(b) if b.is_ascii_hexdigit() => self.ptr += 1, + _ => break, + } + } + if self.ptr - start != length { + let end = if self.ptr >= self.length { + self.ptr + } else { + self.ptr + 1 + }; + let seq = self.source.slice(start..end).as_ref().to_owned(); + return error!(ErrorKind::InvalidUnicodeEscapeSequence(seq), self.ptr); + } + Ok(()) + } + + pub(super) fn is_identifier_start(&self) -> bool { + matches!(self.source.as_ref().as_bytes().get(self.ptr), Some(b) if b.is_ascii_alphabetic()) + } + + pub(super) fn take_byte_if(&mut self, b: u8) -> bool { + if self.is_current_byte(b) { + self.ptr += 1; + true + } else { + false + } + } + + pub(super) fn skip_blank_block(&mut self) -> usize { + let mut count = 0; + loop { + let start = self.ptr; + self.skip_blank_inline(); + if !self.skip_eol() { + self.ptr = start; + break; + } + count += 1; + } + count + } + + pub(super) fn skip_blank(&mut self) { + loop { + match self.source.as_ref().as_bytes().get(self.ptr) { + Some(b' ') | Some(b'\n') => self.ptr += 1, + Some(b'\r') + if self.source.as_ref().as_bytes().get(self.ptr + 1) == Some(&b'\n') => + { + self.ptr += 2 + } + _ => break, + } + } + } + + pub(super) fn skip_blank_inline(&mut self) -> usize { + let start = self.ptr; + while let Some(b' ') = self.source.as_ref().as_bytes().get(self.ptr) { + self.ptr += 1; + } + self.ptr - start + } + + pub(super) fn is_byte_pattern_continuation(b: u8) -> bool { + ![b'}', b'.', b'[', b'*'].contains(&b) + } + + pub(super) fn is_callee(name: &[u8]) -> bool { + name.iter() + .all(|c| c.is_ascii_uppercase() || c.is_ascii_digit() || *c == b'_' || *c == b'-') + } + + pub(super) fn expect_byte(&mut self, b: u8) -> Result<()> { + if !self.is_current_byte(b) { + return error!(ErrorKind::ExpectedToken(b as char), self.ptr); + } + self.ptr += 1; + Ok(()) + } + + pub(super) fn is_number_start(&self) -> bool { + matches!(self.source.as_ref().as_bytes().get(self.ptr), Some(b) if (b == &b'-') || b.is_ascii_digit()) + } + + pub(super) fn is_eol(&self) -> bool { + match self.source.as_ref().as_bytes().get(self.ptr) { + Some(b'\n') => true, + Some(b'\r') if self.is_byte_at(b'\n', self.ptr + 1) => true, + _ => false, + } + } + + pub(super) fn skip_digits(&mut self) -> Result<()> { + let start = self.ptr; + loop { + match self.source.as_ref().as_bytes().get(self.ptr) { + Some(b) if b.is_ascii_digit() => self.ptr += 1, + _ => break, + } + } + if start == self.ptr { + error!( + ErrorKind::ExpectedCharRange { + range: "0-9".to_string() + }, + self.ptr + ) + } else { + Ok(()) + } + } + + pub(super) fn get_number_literal(&mut self) -> Result<S> { + let start = self.ptr; + self.take_byte_if(b'-'); + self.skip_digits()?; + if self.take_byte_if(b'.') { + self.skip_digits()?; + } + + Ok(self.source.slice(start..self.ptr)) + } +} diff --git a/third_party/rust/fluent-syntax/src/parser/mod.rs b/third_party/rust/fluent-syntax/src/parser/mod.rs new file mode 100644 index 0000000000..9fc08847dc --- /dev/null +++ b/third_party/rust/fluent-syntax/src/parser/mod.rs @@ -0,0 +1,387 @@ +#[macro_use] +mod errors; +mod comment; +mod expression; +mod helper; +mod pattern; +mod slice; + +use crate::ast; +use slice::Slice; +use std::result; + +pub use errors::{ErrorKind, ParserError}; + +pub type Result<T> = result::Result<T, ParserError>; + +pub struct Parser<S> { + source: S, + ptr: usize, + length: usize, +} + +impl<'s, S> Parser<S> +where + S: Slice<'s>, +{ + pub fn new(source: S) -> Self { + let length = source.as_ref().len(); + Self { + source, + ptr: 0, + length, + } + } + + pub fn parse( + &mut self, + ) -> std::result::Result<ast::Resource<S>, (ast::Resource<S>, Vec<ParserError>)> { + let mut errors = vec![]; + + let mut body = vec![]; + + self.skip_blank_block(); + let mut last_comment = None; + let mut last_blank_count = 0; + + while self.ptr < self.length { + let entry_start = self.ptr; + let mut entry = self.get_entry(entry_start); + + if let Some(comment) = last_comment.take() { + match entry { + Ok(ast::Entry::Message(ref mut msg)) if last_blank_count < 2 => { + msg.comment = Some(comment); + } + Ok(ast::Entry::Term(ref mut term)) if last_blank_count < 2 => { + term.comment = Some(comment); + } + _ => { + body.push(ast::Entry::Comment(comment)); + } + } + } + + match entry { + Ok(ast::Entry::Comment(comment)) => { + last_comment = Some(comment); + } + Ok(entry) => { + body.push(entry); + } + Err(mut err) => { + self.skip_to_next_entry_start(); + err.slice = Some((entry_start, self.ptr)); + errors.push(err); + let content = self.source.slice(entry_start..self.ptr); + body.push(ast::Entry::Junk { content }); + } + } + last_blank_count = self.skip_blank_block(); + } + + if let Some(last_comment) = last_comment.take() { + body.push(ast::Entry::Comment(last_comment)); + } + if errors.is_empty() { + Ok(ast::Resource { body }) + } else { + Err((ast::Resource { body }, errors)) + } + } + + fn get_entry(&mut self, entry_start: usize) -> Result<ast::Entry<S>> { + let entry = match self.source.as_ref().as_bytes().get(self.ptr) { + Some(b'#') => { + let (comment, level) = self.get_comment()?; + match level { + comment::Level::Regular => ast::Entry::Comment(comment), + comment::Level::Group => ast::Entry::GroupComment(comment), + comment::Level::Resource => ast::Entry::ResourceComment(comment), + comment::Level::None => unreachable!(), + } + } + Some(b'-') => ast::Entry::Term(self.get_term(entry_start)?), + _ => ast::Entry::Message(self.get_message(entry_start)?), + }; + Ok(entry) + } + + fn get_message(&mut self, entry_start: usize) -> Result<ast::Message<S>> { + let id = self.get_identifier()?; + self.skip_blank_inline(); + self.expect_byte(b'=')?; + let pattern = self.get_pattern()?; + + self.skip_blank_block(); + + let attributes = self.get_attributes(); + + if pattern.is_none() && attributes.is_empty() { + let entry_id = id.name.as_ref().to_owned(); + return error!( + ErrorKind::ExpectedMessageField { entry_id }, + entry_start, self.ptr + ); + } + + Ok(ast::Message { + id, + value: pattern, + attributes, + comment: None, + }) + } + + fn get_term(&mut self, entry_start: usize) -> Result<ast::Term<S>> { + self.expect_byte(b'-')?; + let id = self.get_identifier()?; + self.skip_blank_inline(); + self.expect_byte(b'=')?; + self.skip_blank_inline(); + + let value = self.get_pattern()?; + + self.skip_blank_block(); + + let attributes = self.get_attributes(); + + if let Some(value) = value { + Ok(ast::Term { + id, + value, + attributes, + comment: None, + }) + } else { + error!( + ErrorKind::ExpectedTermField { + entry_id: id.name.as_ref().to_owned() + }, + entry_start, self.ptr + ) + } + } + + fn get_attributes(&mut self) -> Vec<ast::Attribute<S>> { + let mut attributes = vec![]; + + loop { + let line_start = self.ptr; + self.skip_blank_inline(); + if !self.is_current_byte(b'.') { + self.ptr = line_start; + break; + } + + if let Ok(attr) = self.get_attribute() { + attributes.push(attr); + } else { + self.ptr = line_start; + break; + } + } + attributes + } + + fn get_attribute(&mut self) -> Result<ast::Attribute<S>> { + self.expect_byte(b'.')?; + let id = self.get_identifier()?; + self.skip_blank_inline(); + self.expect_byte(b'=')?; + let pattern = self.get_pattern()?; + + match pattern { + Some(pattern) => Ok(ast::Attribute { id, value: pattern }), + None => error!(ErrorKind::MissingValue, self.ptr), + } + } + + fn get_identifier(&mut self) -> Result<ast::Identifier<S>> { + let mut ptr = self.ptr; + + match self.source.as_ref().as_bytes().get(ptr) { + Some(b) if b.is_ascii_alphabetic() => { + ptr += 1; + } + _ => { + return error!( + ErrorKind::ExpectedCharRange { + range: "a-zA-Z".to_string() + }, + ptr + ); + } + } + + while let Some(b) = self.source.as_ref().as_bytes().get(ptr) { + if b.is_ascii_alphabetic() || b.is_ascii_digit() || [b'_', b'-'].contains(b) { + ptr += 1; + } else { + break; + } + } + + let name = self.source.slice(self.ptr..ptr); + self.ptr = ptr; + + Ok(ast::Identifier { name }) + } + + fn get_attribute_accessor(&mut self) -> Result<Option<ast::Identifier<S>>> { + if self.take_byte_if(b'.') { + let ident = self.get_identifier()?; + Ok(Some(ident)) + } else { + Ok(None) + } + } + + fn get_variant_key(&mut self) -> Result<ast::VariantKey<S>> { + if !self.take_byte_if(b'[') { + return error!(ErrorKind::ExpectedToken('['), self.ptr); + } + self.skip_blank(); + + let key = if self.is_number_start() { + ast::VariantKey::NumberLiteral { + value: self.get_number_literal()?, + } + } else { + ast::VariantKey::Identifier { + name: self.get_identifier()?.name, + } + }; + + self.skip_blank(); + + self.expect_byte(b']')?; + + Ok(key) + } + + fn get_variants(&mut self) -> Result<Vec<ast::Variant<S>>> { + let mut variants = vec![]; + let mut has_default = false; + + while self.is_current_byte(b'*') || self.is_current_byte(b'[') { + let default = self.take_byte_if(b'*'); + + if default { + if has_default { + return error!(ErrorKind::MultipleDefaultVariants, self.ptr); + } else { + has_default = true; + } + } + + let key = self.get_variant_key()?; + + let value = self.get_pattern()?; + + if let Some(value) = value { + variants.push(ast::Variant { + key, + value, + default, + }); + self.skip_blank(); + } else { + return error!(ErrorKind::MissingValue, self.ptr); + } + } + + if has_default { + Ok(variants) + } else { + error!(ErrorKind::MissingDefaultVariant, self.ptr) + } + } + + fn get_placeable(&mut self) -> Result<ast::Expression<S>> { + self.expect_byte(b'{')?; + self.skip_blank(); + let exp = self.get_expression()?; + self.skip_blank_inline(); + self.expect_byte(b'}')?; + + let invalid_expression_found = match &exp { + ast::Expression::InlineExpression(ast::InlineExpression::TermReference { + ref attribute, + .. + }) => attribute.is_some(), + _ => false, + }; + if invalid_expression_found { + return error!(ErrorKind::TermAttributeAsPlaceable, self.ptr); + } + + Ok(exp) + } + + fn get_call_arguments(&mut self) -> Result<Option<ast::CallArguments<S>>> { + self.skip_blank(); + if !self.take_byte_if(b'(') { + return Ok(None); + } + + let mut positional = vec![]; + let mut named = vec![]; + let mut argument_names = vec![]; + + self.skip_blank(); + + while self.ptr < self.length { + if self.is_current_byte(b')') { + break; + } + + let expr = self.get_inline_expression()?; + + if let ast::InlineExpression::MessageReference { + ref id, + attribute: None, + } = expr + { + self.skip_blank(); + if self.is_current_byte(b':') { + if argument_names.contains(&id.name) { + return error!( + ErrorKind::DuplicatedNamedArgument(id.name.as_ref().to_owned()), + self.ptr + ); + } + self.ptr += 1; + self.skip_blank(); + let val = self.get_inline_expression()?; + + argument_names.push(id.name.clone()); + named.push(ast::NamedArgument { + name: ast::Identifier { + name: id.name.clone(), + }, + value: val, + }); + } else { + if !argument_names.is_empty() { + return error!(ErrorKind::PositionalArgumentFollowsNamed, self.ptr); + } + positional.push(expr); + } + } else { + if !argument_names.is_empty() { + return error!(ErrorKind::PositionalArgumentFollowsNamed, self.ptr); + } + positional.push(expr); + } + + self.skip_blank(); + self.take_byte_if(b','); + self.skip_blank(); + } + + self.expect_byte(b')')?; + + Ok(Some(ast::CallArguments { positional, named })) + } +} diff --git a/third_party/rust/fluent-syntax/src/parser/pattern.rs b/third_party/rust/fluent-syntax/src/parser/pattern.rs new file mode 100644 index 0000000000..84804b0db2 --- /dev/null +++ b/third_party/rust/fluent-syntax/src/parser/pattern.rs @@ -0,0 +1,209 @@ +use super::errors::{ErrorKind, ParserError}; +use super::{Parser, Result, Slice}; +use crate::ast; + +#[derive(Debug, PartialEq)] +enum TextElementTermination { + LineFeed, + CRLF, + PlaceableStart, + EOF, +} + +// This enum tracks the placement of the text element in the pattern, which is needed for +// dedentation logic. +#[derive(Debug, PartialEq)] +enum TextElementPosition { + InitialLineStart, + LineStart, + Continuation, +} + +// This enum allows us to mark pointers in the source which will later become text elements +// but without slicing them out of the source string. This makes the indentation adjustments +// cheaper since they'll happen on the pointers, rather than extracted slices. +#[derive(Debug)] +enum PatternElementPlaceholders<S> { + Placeable(ast::Expression<S>), + // (start, end, indent, position) + TextElement(usize, usize, usize, TextElementPosition), +} + +// This enum tracks whether the text element is blank or not. +// This is important to identify text elements which should not be taken into account +// when calculating common indent. +#[derive(Debug, PartialEq)] +enum TextElementType { + Blank, + NonBlank, +} + +impl<'s, S> Parser<S> +where + S: Slice<'s>, +{ + pub(super) fn get_pattern(&mut self) -> Result<Option<ast::Pattern<S>>> { + let mut elements = vec![]; + let mut last_non_blank = None; + let mut common_indent = None; + + self.skip_blank_inline(); + + let mut text_element_role = if self.skip_eol() { + self.skip_blank_block(); + TextElementPosition::LineStart + } else { + TextElementPosition::InitialLineStart + }; + + while self.ptr < self.length { + if self.is_current_byte(b'{') { + if text_element_role == TextElementPosition::LineStart { + common_indent = Some(0); + } + let exp = self.get_placeable()?; + last_non_blank = Some(elements.len()); + elements.push(PatternElementPlaceholders::Placeable(exp)); + text_element_role = TextElementPosition::Continuation; + } else { + let slice_start = self.ptr; + let mut indent = 0; + if text_element_role == TextElementPosition::LineStart { + indent = self.skip_blank_inline(); + if self.ptr >= self.length { + break; + } + let b = self.source.as_ref().as_bytes().get(self.ptr); + if indent == 0 { + if b != Some(&b'\n') { + break; + } + } else if !Self::is_byte_pattern_continuation(*b.unwrap()) { + self.ptr = slice_start; + break; + } + } + let (start, end, text_element_type, termination_reason) = self.get_text_slice()?; + if start != end { + if text_element_role == TextElementPosition::LineStart + && text_element_type == TextElementType::NonBlank + { + if let Some(common) = common_indent { + if indent < common { + common_indent = Some(indent); + } + } else { + common_indent = Some(indent); + } + } + if text_element_role != TextElementPosition::LineStart + || text_element_type == TextElementType::NonBlank + || termination_reason == TextElementTermination::LineFeed + { + if text_element_type == TextElementType::NonBlank { + last_non_blank = Some(elements.len()); + } + elements.push(PatternElementPlaceholders::TextElement( + slice_start, + end, + indent, + text_element_role, + )); + } + } + + text_element_role = match termination_reason { + TextElementTermination::LineFeed => TextElementPosition::LineStart, + TextElementTermination::CRLF => TextElementPosition::Continuation, + TextElementTermination::PlaceableStart => TextElementPosition::Continuation, + TextElementTermination::EOF => TextElementPosition::Continuation, + }; + } + } + + if let Some(last_non_blank) = last_non_blank { + let elements = elements + .into_iter() + .take(last_non_blank + 1) + .enumerate() + .map(|(i, elem)| match elem { + PatternElementPlaceholders::Placeable(expression) => { + ast::PatternElement::Placeable { expression } + } + PatternElementPlaceholders::TextElement(start, end, indent, role) => { + let start = if role == TextElementPosition::LineStart { + common_indent.map_or_else( + || start + indent, + |common_indent| start + std::cmp::min(indent, common_indent), + ) + } else { + start + }; + let mut value = self.source.slice(start..end); + if last_non_blank == i { + value.trim(); + ast::PatternElement::TextElement { value } + } else { + ast::PatternElement::TextElement { value } + } + } + }) + .collect(); + return Ok(Some(ast::Pattern { elements })); + } + + Ok(None) + } + + fn get_text_slice( + &mut self, + ) -> Result<(usize, usize, TextElementType, TextElementTermination)> { + let start_pos = self.ptr; + let mut text_element_type = TextElementType::Blank; + + while let Some(b) = self.source.as_ref().as_bytes().get(self.ptr) { + match b { + b' ' => self.ptr += 1, + b'\n' => { + self.ptr += 1; + return Ok(( + start_pos, + self.ptr, + text_element_type, + TextElementTermination::LineFeed, + )); + } + b'\r' if self.is_byte_at(b'\n', self.ptr + 1) => { + self.ptr += 1; + return Ok(( + start_pos, + self.ptr - 1, + text_element_type, + TextElementTermination::CRLF, + )); + } + b'{' => { + return Ok(( + start_pos, + self.ptr, + text_element_type, + TextElementTermination::PlaceableStart, + )); + } + b'}' => { + return error!(ErrorKind::UnbalancedClosingBrace, self.ptr); + } + _ => { + text_element_type = TextElementType::NonBlank; + self.ptr += 1 + } + } + } + Ok(( + start_pos, + self.ptr, + text_element_type, + TextElementTermination::EOF, + )) + } +} diff --git a/third_party/rust/fluent-syntax/src/parser/slice.rs b/third_party/rust/fluent-syntax/src/parser/slice.rs new file mode 100644 index 0000000000..d44f8251fe --- /dev/null +++ b/third_party/rust/fluent-syntax/src/parser/slice.rs @@ -0,0 +1,25 @@ +use std::ops::Range; +pub trait Slice<'s>: AsRef<str> + Clone + PartialEq { + fn slice(&self, range: Range<usize>) -> Self; + fn trim(&mut self); +} + +impl<'s> Slice<'s> for String { + fn slice(&self, range: Range<usize>) -> Self { + self[range].to_string() + } + + fn trim(&mut self) { + *self = self.trim_end().to_string(); + } +} + +impl<'s> Slice<'s> for &'s str { + fn slice(&self, range: Range<usize>) -> Self { + &self[range] + } + + fn trim(&mut self) { + *self = self.trim_end(); + } +} diff --git a/third_party/rust/fluent-syntax/src/unicode.rs b/third_party/rust/fluent-syntax/src/unicode.rs new file mode 100644 index 0000000000..49301734bd --- /dev/null +++ b/third_party/rust/fluent-syntax/src/unicode.rs @@ -0,0 +1,91 @@ +use std::borrow::Cow; +use std::char; +use std::fmt; + +const UNKNOWN_CHAR: char = '�'; + +fn encode_unicode(s: Option<&str>) -> char { + s.and_then(|s| u32::from_str_radix(s, 16).ok().and_then(char::from_u32)) + .unwrap_or(UNKNOWN_CHAR) +} + +pub fn unescape_unicode<W>(w: &mut W, input: &str) -> fmt::Result +where + W: fmt::Write, +{ + let bytes = input.as_bytes(); + + let mut start = 0; + let mut ptr = 0; + + while let Some(b) = bytes.get(ptr) { + if b != &b'\\' { + ptr += 1; + continue; + } + if start != ptr { + w.write_str(&input[start..ptr])?; + } + + ptr += 1; + + let new_char = match bytes.get(ptr) { + Some(b'\\') => '\\', + Some(b'"') => '"', + Some(u @ b'u') | Some(u @ b'U') => { + let seq_start = ptr + 1; + let len = if u == &b'u' { 4 } else { 6 }; + ptr += len; + encode_unicode(input.get(seq_start..seq_start + len)) + } + _ => UNKNOWN_CHAR, + }; + ptr += 1; + w.write_char(new_char)?; + start = ptr; + } + if start != ptr { + w.write_str(&input[start..ptr])?; + } + Ok(()) +} + +pub fn unescape_unicode_to_string(input: &str) -> Cow<str> { + let bytes = input.as_bytes(); + let mut result = Cow::from(input); + + let mut ptr = 0; + + while let Some(b) = bytes.get(ptr) { + if b != &b'\\' { + if let Cow::Owned(ref mut s) = result { + s.push(*b as char); + } + ptr += 1; + continue; + } + + if let Cow::Borrowed(_) = result { + result = Cow::from(&input[0..ptr]); + } + + ptr += 1; + + let new_char = match bytes.get(ptr) { + Some(b'\\') => '\\', + Some(b'"') => '"', + Some(u @ b'u') | Some(u @ b'U') => { + let start = ptr + 1; + let len = if u == &b'u' { 4 } else { 6 }; + ptr += len; + input + .get(start..(start + len)) + .map_or(UNKNOWN_CHAR, |slice| encode_unicode(Some(slice))) + } + _ => UNKNOWN_CHAR, + }; + result.to_mut().push(new_char); + ptr += 1; + } + result +} |