summaryrefslogtreecommitdiffstats
path: root/third_party/rust/fluent-syntax/src
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/rust/fluent-syntax/src')
-rw-r--r--third_party/rust/fluent-syntax/src/ast/helper.rs25
-rw-r--r--third_party/rust/fluent-syntax/src/ast/mod.rs149
-rw-r--r--third_party/rust/fluent-syntax/src/bin/parser.rs42
-rw-r--r--third_party/rust/fluent-syntax/src/bin/update_fixtures.rs44
-rw-r--r--third_party/rust/fluent-syntax/src/lib.rs3
-rw-r--r--third_party/rust/fluent-syntax/src/parser/comment.rs80
-rw-r--r--third_party/rust/fluent-syntax/src/parser/errors.rs128
-rw-r--r--third_party/rust/fluent-syntax/src/parser/expression.rs148
-rw-r--r--third_party/rust/fluent-syntax/src/parser/helper.rs171
-rw-r--r--third_party/rust/fluent-syntax/src/parser/mod.rs387
-rw-r--r--third_party/rust/fluent-syntax/src/parser/pattern.rs209
-rw-r--r--third_party/rust/fluent-syntax/src/parser/slice.rs25
-rw-r--r--third_party/rust/fluent-syntax/src/unicode.rs91
13 files changed, 1502 insertions, 0 deletions
diff --git a/third_party/rust/fluent-syntax/src/ast/helper.rs b/third_party/rust/fluent-syntax/src/ast/helper.rs
new file mode 100644
index 0000000000..923437d23b
--- /dev/null
+++ b/third_party/rust/fluent-syntax/src/ast/helper.rs
@@ -0,0 +1,25 @@
+#[cfg(feature = "serde")]
+use serde::{Deserialize, Serialize};
+
+use super::Comment;
+// This is a helper struct used to properly deserialize referential
+// JSON comments which are single continous String, into a vec of
+// content slices.
+#[derive(Debug, PartialEq, Clone)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[cfg_attr(feature = "serde", serde(untagged))]
+pub enum CommentDef<S> {
+ Single { content: S },
+ Multi { content: Vec<S> },
+}
+
+impl<'s, S> From<CommentDef<S>> for Comment<S> {
+ fn from(input: CommentDef<S>) -> Self {
+ match input {
+ CommentDef::Single { content } => Self {
+ content: vec![content],
+ },
+ CommentDef::Multi { content } => Self { content },
+ }
+ }
+}
diff --git a/third_party/rust/fluent-syntax/src/ast/mod.rs b/third_party/rust/fluent-syntax/src/ast/mod.rs
new file mode 100644
index 0000000000..48583441ca
--- /dev/null
+++ b/third_party/rust/fluent-syntax/src/ast/mod.rs
@@ -0,0 +1,149 @@
+mod helper;
+
+#[cfg(feature = "serde")]
+use serde::{Deserialize, Serialize};
+
+#[derive(Debug, PartialEq, Clone)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+pub struct Resource<S> {
+ pub body: Vec<Entry<S>>,
+}
+
+#[derive(Debug, PartialEq, Clone)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[cfg_attr(feature = "serde", serde(tag = "type"))]
+pub enum Entry<S> {
+ Message(Message<S>),
+ Term(Term<S>),
+ Comment(Comment<S>),
+ GroupComment(Comment<S>),
+ ResourceComment(Comment<S>),
+ Junk { content: S },
+}
+
+#[derive(Debug, PartialEq, Clone)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+pub struct Message<S> {
+ pub id: Identifier<S>,
+ pub value: Option<Pattern<S>>,
+ pub attributes: Vec<Attribute<S>>,
+ pub comment: Option<Comment<S>>,
+}
+
+#[derive(Debug, PartialEq, Clone)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+pub struct Term<S> {
+ pub id: Identifier<S>,
+ pub value: Pattern<S>,
+ pub attributes: Vec<Attribute<S>>,
+ pub comment: Option<Comment<S>>,
+}
+
+#[derive(Debug, PartialEq, Clone)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+pub struct Pattern<S> {
+ pub elements: Vec<PatternElement<S>>,
+}
+
+#[derive(Debug, PartialEq, Clone)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[cfg_attr(feature = "serde", serde(tag = "type"))]
+pub enum PatternElement<S> {
+ TextElement { value: S },
+ Placeable { expression: Expression<S> },
+}
+
+#[derive(Debug, PartialEq, Clone)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+pub struct Attribute<S> {
+ pub id: Identifier<S>,
+ pub value: Pattern<S>,
+}
+
+#[derive(Debug, PartialEq, Clone)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+pub struct Identifier<S> {
+ pub name: S,
+}
+
+#[derive(Debug, PartialEq, Clone)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[cfg_attr(feature = "serde", serde(tag = "type"))]
+pub struct Variant<S> {
+ pub key: VariantKey<S>,
+ pub value: Pattern<S>,
+ pub default: bool,
+}
+
+#[derive(Debug, PartialEq, Clone)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[cfg_attr(feature = "serde", serde(tag = "type"))]
+pub enum VariantKey<S> {
+ Identifier { name: S },
+ NumberLiteral { value: S },
+}
+
+#[derive(Debug, PartialEq, Clone)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[cfg_attr(feature = "serde", serde(from = "helper::CommentDef<S>"))]
+pub struct Comment<S> {
+ pub content: Vec<S>,
+}
+
+#[derive(Debug, PartialEq, Clone)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[cfg_attr(feature = "serde", serde(tag = "type"))]
+pub struct CallArguments<S> {
+ pub positional: Vec<InlineExpression<S>>,
+ pub named: Vec<NamedArgument<S>>,
+}
+
+#[derive(Debug, PartialEq, Clone)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[cfg_attr(feature = "serde", serde(tag = "type"))]
+pub struct NamedArgument<S> {
+ pub name: Identifier<S>,
+ pub value: InlineExpression<S>,
+}
+
+#[derive(Debug, PartialEq, Clone)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[cfg_attr(feature = "serde", serde(tag = "type"))]
+pub enum InlineExpression<S> {
+ StringLiteral {
+ value: S,
+ },
+ NumberLiteral {
+ value: S,
+ },
+ FunctionReference {
+ id: Identifier<S>,
+ arguments: Option<CallArguments<S>>,
+ },
+ MessageReference {
+ id: Identifier<S>,
+ attribute: Option<Identifier<S>>,
+ },
+ TermReference {
+ id: Identifier<S>,
+ attribute: Option<Identifier<S>>,
+ arguments: Option<CallArguments<S>>,
+ },
+ VariableReference {
+ id: Identifier<S>,
+ },
+ Placeable {
+ expression: Box<Expression<S>>,
+ },
+}
+
+#[derive(Debug, PartialEq, Clone)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[cfg_attr(feature = "serde", serde(untagged))]
+pub enum Expression<S> {
+ SelectExpression {
+ selector: InlineExpression<S>,
+ variants: Vec<Variant<S>>,
+ },
+ InlineExpression(InlineExpression<S>),
+}
diff --git a/third_party/rust/fluent-syntax/src/bin/parser.rs b/third_party/rust/fluent-syntax/src/bin/parser.rs
new file mode 100644
index 0000000000..4825b4a16d
--- /dev/null
+++ b/third_party/rust/fluent-syntax/src/bin/parser.rs
@@ -0,0 +1,42 @@
+use fluent_syntax::parser::Parser;
+use std::env;
+use std::fs::File;
+use std::io;
+use std::io::Read;
+
+fn read_file(path: &str) -> Result<String, io::Error> {
+ let mut f = File::open(path)?;
+ let mut s = String::new();
+ f.read_to_string(&mut s)?;
+ Ok(s)
+}
+
+fn main() {
+ let args: Vec<String> = env::args().collect();
+ let source = read_file(args.get(1).expect("Pass an argument")).expect("Failed to fetch file");
+
+ let (ast, errors) = match Parser::new(source.as_str()).parse() {
+ Ok(ast) => (ast, None),
+ Err((ast, err)) => (ast, Some(err)),
+ };
+
+ #[cfg(feature = "json")]
+ {
+ let target_json = serde_json::to_string_pretty(&ast).unwrap();
+ println!("{}", target_json);
+ }
+ #[cfg(not(feature = "json"))]
+ {
+ use std::fmt::Write;
+ let mut result = String::new();
+ write!(result, "{:#?}", ast).unwrap();
+ println!("{}", result);
+ }
+
+ if let Some(errors) = errors {
+ println!("\n======== Errors ========== \n");
+ for err in errors {
+ println!("Err: {:#?}", err);
+ }
+ }
+}
diff --git a/third_party/rust/fluent-syntax/src/bin/update_fixtures.rs b/third_party/rust/fluent-syntax/src/bin/update_fixtures.rs
new file mode 100644
index 0000000000..5ec34224b8
--- /dev/null
+++ b/third_party/rust/fluent-syntax/src/bin/update_fixtures.rs
@@ -0,0 +1,44 @@
+use std::fs;
+use std::io;
+
+use fluent_syntax::parser::Parser;
+
+fn read_file(path: &str) -> Result<String, io::Error> {
+ fs::read_to_string(path)
+}
+
+fn write_file(path: &str, source: &str) -> Result<(), io::Error> {
+ fs::write(path, source)
+}
+
+fn main() {
+ let samples = &["menubar", "preferences", "simple"];
+ let contexts = &["browser", "preferences"];
+
+ for sample in samples {
+ let path = format!("./benches/{}.ftl", sample);
+ let source = read_file(&path).unwrap();
+ let ast = Parser::new(source).parse().unwrap();
+ let target_json = serde_json::to_string_pretty(&ast).unwrap();
+ let new_path = format!("./tests/fixtures/benches/{}.json", sample);
+ write_file(&new_path, &target_json).unwrap();
+ }
+
+ for test in contexts {
+ let paths = fs::read_dir(format!("./benches/contexts/{}", test)).unwrap();
+ for path in paths.into_iter() {
+ let p = path.unwrap().path();
+ let file_name = p.file_name().unwrap().to_str().unwrap();
+ let path = p.to_str().unwrap();
+ let source = read_file(path).unwrap();
+ let ast = Parser::new(source).parse().unwrap();
+ let target_json = serde_json::to_string_pretty(&ast).unwrap();
+ let new_path = format!(
+ "./tests/fixtures/benches/contexts/{}/{}",
+ test,
+ file_name.replace(".ftl", ".json")
+ );
+ write_file(&new_path, &target_json).unwrap();
+ }
+ }
+}
diff --git a/third_party/rust/fluent-syntax/src/lib.rs b/third_party/rust/fluent-syntax/src/lib.rs
new file mode 100644
index 0000000000..658fa44a4d
--- /dev/null
+++ b/third_party/rust/fluent-syntax/src/lib.rs
@@ -0,0 +1,3 @@
+pub mod ast;
+pub mod parser;
+pub mod unicode;
diff --git a/third_party/rust/fluent-syntax/src/parser/comment.rs b/third_party/rust/fluent-syntax/src/parser/comment.rs
new file mode 100644
index 0000000000..3ab97ffb92
--- /dev/null
+++ b/third_party/rust/fluent-syntax/src/parser/comment.rs
@@ -0,0 +1,80 @@
+use super::{Parser, Result, Slice};
+use crate::ast;
+
+#[derive(Debug, PartialEq, Clone, Copy)]
+pub(super) enum Level {
+ None = 0,
+ Regular = 1,
+ Group = 2,
+ Resource = 3,
+}
+
+impl<'s, S> Parser<S>
+where
+ S: Slice<'s>,
+{
+ pub(super) fn get_comment(&mut self) -> Result<(ast::Comment<S>, Level)> {
+ let mut level = Level::None;
+ let mut content = vec![];
+
+ while self.ptr < self.length {
+ let line_level = self.get_comment_level();
+ if line_level == Level::None {
+ self.ptr -= 1;
+ break;
+ } else if level != Level::None && line_level != level {
+ self.ptr -= line_level as usize;
+ break;
+ }
+
+ level = line_level;
+
+ if self.ptr == self.length {
+ break;
+ } else if self.is_current_byte(b'\n') {
+ content.push(self.get_comment_line()?);
+ } else {
+ if let Err(e) = self.expect_byte(b' ') {
+ if content.is_empty() {
+ return Err(e);
+ } else {
+ self.ptr -= line_level as usize;
+ break;
+ }
+ }
+ content.push(self.get_comment_line()?);
+ }
+ self.skip_eol();
+ }
+
+ Ok((ast::Comment { content }, level))
+ }
+
+ fn get_comment_level(&mut self) -> Level {
+ let mut chars = 0;
+
+ for _ in 0..3 {
+ if self.take_byte_if(b'#') {
+ chars += 1;
+ }
+ }
+
+ match chars {
+ 0 => Level::None,
+ 1 => Level::Regular,
+ 2 => Level::Group,
+ 3 => Level::Resource,
+ _ => unreachable!(),
+ }
+ }
+
+ fn get_comment_line(&mut self) -> Result<S> {
+ let start_pos = self.ptr;
+
+ while self.ptr < self.length && !self.is_eol() {
+ self.ptr += 1;
+ }
+
+ Ok(self.source.slice(start_pos..self.ptr))
+ }
+}
diff --git a/third_party/rust/fluent-syntax/src/parser/errors.rs b/third_party/rust/fluent-syntax/src/parser/errors.rs
new file mode 100644
index 0000000000..e1b26bdd73
--- /dev/null
+++ b/third_party/rust/fluent-syntax/src/parser/errors.rs
@@ -0,0 +1,128 @@
+use std::fmt::{self, Display, Formatter};
+
+#[derive(Debug, PartialEq, Clone)]
+pub struct ParserError {
+ pub pos: (usize, usize),
+ pub slice: Option<(usize, usize)>,
+ pub kind: ErrorKind,
+}
+
+impl std::error::Error for ParserError {}
+
+impl Display for ParserError {
+ fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+ Display::fmt(&self.kind, f)
+ }
+}
+
+macro_rules! error {
+ ($kind:expr, $start:expr) => {{
+ Err(ParserError {
+ pos: ($start, $start + 1),
+ slice: None,
+ kind: $kind,
+ })
+ }};
+ ($kind:expr, $start:expr, $end:expr) => {{
+ Err(ParserError {
+ pos: ($start, $end),
+ slice: None,
+ kind: $kind,
+ })
+ }};
+}
+
+#[derive(Debug, PartialEq, Clone)]
+pub enum ErrorKind {
+ Generic,
+ ExpectedEntry,
+ ExpectedToken(char),
+ ExpectedCharRange { range: String },
+ ExpectedMessageField { entry_id: String },
+ ExpectedTermField { entry_id: String },
+ ForbiddenWhitespace,
+ ForbiddenCallee,
+ ForbiddenKey,
+ MissingDefaultVariant,
+ MissingVariants,
+ MissingValue,
+ MissingVariantKey,
+ MissingLiteral,
+ MultipleDefaultVariants,
+ MessageReferenceAsSelector,
+ TermReferenceAsSelector,
+ MessageAttributeAsSelector,
+ TermAttributeAsPlaceable,
+ UnterminatedStringExpression,
+ PositionalArgumentFollowsNamed,
+ DuplicatedNamedArgument(String),
+ ForbiddenVariantAccessor,
+ UnknownEscapeSequence(String),
+ InvalidUnicodeEscapeSequence(String),
+ UnbalancedClosingBrace,
+ ExpectedInlineExpression,
+ ExpectedSimpleExpressionAsSelector,
+}
+
+impl Display for ErrorKind {
+ fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+ match self {
+ Self::Generic => write!(f, "An error occurred"),
+ Self::ExpectedEntry => write!(f, "Expected an entry"),
+ Self::ExpectedToken(letter) => {
+ write!(f, "Expected a token starting with \"{}\"", letter)
+ }
+ Self::ExpectedCharRange { range } => write!(f, "Expected one of \"{}\"", range),
+ Self::ExpectedMessageField { entry_id } => {
+ write!(f, "Expected a message field for \"{}\"", entry_id)
+ }
+ Self::ExpectedTermField { entry_id } => {
+ write!(f, "Expected a term field for \"{}\"", entry_id)
+ }
+ Self::ForbiddenWhitespace => write!(f, "Whitespace is not allowed here"),
+ Self::ForbiddenCallee => write!(f, "Callee is not allowed here"),
+ Self::ForbiddenKey => write!(f, "Key is not allowed here"),
+ Self::MissingDefaultVariant => {
+ write!(f, "The select expression must have a default variant")
+ }
+ Self::MissingVariants => {
+ write!(f, "The select expression must have one or more variants")
+ }
+ Self::MissingValue => write!(f, "Expected a value"),
+ Self::MissingVariantKey => write!(f, "Expected a variant key"),
+ Self::MissingLiteral => write!(f, "Expected a literal"),
+ Self::MultipleDefaultVariants => {
+ write!(f, "A select expression can only have one default variant",)
+ }
+ Self::MessageReferenceAsSelector => {
+ write!(f, "Message references can't be used as a selector")
+ }
+ Self::TermReferenceAsSelector => {
+ write!(f, "Term references can't be used as a selector")
+ }
+ Self::MessageAttributeAsSelector => {
+ write!(f, "Message attributes can't be used as a selector")
+ }
+ Self::TermAttributeAsPlaceable => {
+ write!(f, "Term attributes can't be used as a placeable")
+ }
+ Self::UnterminatedStringExpression => write!(f, "Unterminated string expression"),
+ Self::PositionalArgumentFollowsNamed => {
+ write!(f, "Positional arguments must come before named arguments",)
+ }
+ Self::DuplicatedNamedArgument(name) => {
+ write!(f, "The \"{}\" argument appears twice", name)
+ }
+ Self::ForbiddenVariantAccessor => write!(f, "Forbidden variant accessor"),
+ Self::UnknownEscapeSequence(seq) => write!(f, "Unknown escape sequence, \"{}\"", seq),
+ Self::InvalidUnicodeEscapeSequence(seq) => {
+ write!(f, "Invalid unicode escape sequence, \"{}\"", seq)
+ }
+ Self::UnbalancedClosingBrace => write!(f, "Unbalanced closing brace"),
+ Self::ExpectedInlineExpression => write!(f, "Expected an inline expression"),
+ Self::ExpectedSimpleExpressionAsSelector => {
+ write!(f, "Expected a simple expression as selector")
+ }
+ }
+ }
+}
diff --git a/third_party/rust/fluent-syntax/src/parser/expression.rs b/third_party/rust/fluent-syntax/src/parser/expression.rs
new file mode 100644
index 0000000000..0a2d3c78c8
--- /dev/null
+++ b/third_party/rust/fluent-syntax/src/parser/expression.rs
@@ -0,0 +1,148 @@
+use super::errors::{ErrorKind, ParserError};
+use super::{Parser, Result, Slice};
+use crate::ast;
+
+impl<'s, S> Parser<S>
+where
+ S: Slice<'s>,
+{
+ pub(super) fn get_expression(&mut self) -> Result<ast::Expression<S>> {
+ let exp = self.get_inline_expression()?;
+
+ self.skip_blank();
+
+ if !self.is_current_byte(b'-') || !self.is_byte_at(b'>', self.ptr + 1) {
+ if let ast::InlineExpression::TermReference { ref attribute, .. } = exp {
+ if attribute.is_some() {
+ return error!(ErrorKind::TermAttributeAsPlaceable, self.ptr);
+ }
+ }
+ return Ok(ast::Expression::InlineExpression(exp));
+ }
+
+ match exp {
+ ast::InlineExpression::MessageReference { ref attribute, .. } => {
+ if attribute.is_none() {
+ return error!(ErrorKind::MessageReferenceAsSelector, self.ptr);
+ } else {
+ return error!(ErrorKind::MessageAttributeAsSelector, self.ptr);
+ }
+ }
+ ast::InlineExpression::TermReference { ref attribute, .. } => {
+ if attribute.is_none() {
+ return error!(ErrorKind::TermReferenceAsSelector, self.ptr);
+ }
+ }
+ ast::InlineExpression::StringLiteral { .. }
+ | ast::InlineExpression::NumberLiteral { .. }
+ | ast::InlineExpression::VariableReference { .. }
+ | ast::InlineExpression::FunctionReference { .. } => {}
+ _ => {
+ return error!(ErrorKind::ExpectedSimpleExpressionAsSelector, self.ptr);
+ }
+ };
+
+ self.ptr += 2; // ->
+
+ self.skip_blank_inline();
+ if !self.skip_eol() {
+ return error!(
+ ErrorKind::ExpectedCharRange {
+ range: "\n | \r\n".to_string()
+ },
+ self.ptr
+ );
+ }
+ self.skip_blank();
+
+ let variants = self.get_variants()?;
+
+ Ok(ast::Expression::SelectExpression {
+ selector: exp,
+ variants,
+ })
+ }
+
+ pub(super) fn get_inline_expression(&mut self) -> Result<ast::InlineExpression<S>> {
+ match self.source.as_ref().as_bytes().get(self.ptr) {
+ Some(b'"') => {
+ self.ptr += 1; // "
+ let start = self.ptr;
+ while let Some(b) = self.source.as_ref().as_bytes().get(self.ptr) {
+ match b {
+ b'\\' => match self.source.as_ref().as_bytes().get(self.ptr + 1) {
+ Some(b'\\') | Some(b'{') | Some(b'"') => self.ptr += 2,
+ Some(b'u') => {
+ self.ptr += 2;
+ self.skip_unicode_escape_sequence(4)?;
+ }
+ Some(b'U') => {
+ self.ptr += 2;
+ self.skip_unicode_escape_sequence(6)?;
+ }
+ _ => return error!(ErrorKind::Generic, self.ptr),
+ },
+ b'"' => {
+ break;
+ }
+ b'\n' => {
+ return error!(ErrorKind::Generic, self.ptr);
+ }
+ _ => self.ptr += 1,
+ }
+ }
+
+ self.expect_byte(b'"')?;
+ let slice = self.source.slice(start..self.ptr - 1);
+ Ok(ast::InlineExpression::StringLiteral { value: slice })
+ }
+ Some(b) if b.is_ascii_digit() => {
+ let num = self.get_number_literal()?;
+ Ok(ast::InlineExpression::NumberLiteral { value: num })
+ }
+ Some(b'-') => {
+ self.ptr += 1; // -
+ if self.is_identifier_start() {
+ let id = self.get_identifier()?;
+ let attribute = self.get_attribute_accessor()?;
+ let arguments = self.get_call_arguments()?;
+ Ok(ast::InlineExpression::TermReference {
+ id,
+ attribute,
+ arguments,
+ })
+ } else {
+ self.ptr -= 1;
+ let num = self.get_number_literal()?;
+ Ok(ast::InlineExpression::NumberLiteral { value: num })
+ }
+ }
+ Some(b'$') => {
+ self.ptr += 1; // -
+ let id = self.get_identifier()?;
+ Ok(ast::InlineExpression::VariableReference { id })
+ }
+ Some(b) if b.is_ascii_alphabetic() => {
+ let id = self.get_identifier()?;
+ let arguments = self.get_call_arguments()?;
+ if arguments.is_some() {
+ if !Self::is_callee(id.name.as_ref().as_bytes()) {
+ return error!(ErrorKind::ForbiddenCallee, self.ptr);
+ }
+
+ Ok(ast::InlineExpression::FunctionReference { id, arguments })
+ } else {
+ let attribute = self.get_attribute_accessor()?;
+ Ok(ast::InlineExpression::MessageReference { id, attribute })
+ }
+ }
+ Some(b'{') => {
+ let exp = self.get_placeable()?;
+ Ok(ast::InlineExpression::Placeable {
+ expression: Box::new(exp),
+ })
+ }
+ _ => error!(ErrorKind::ExpectedInlineExpression, self.ptr),
+ }
+ }
+}
diff --git a/third_party/rust/fluent-syntax/src/parser/helper.rs b/third_party/rust/fluent-syntax/src/parser/helper.rs
new file mode 100644
index 0000000000..363bba2864
--- /dev/null
+++ b/third_party/rust/fluent-syntax/src/parser/helper.rs
@@ -0,0 +1,171 @@
+use super::errors::{ErrorKind, ParserError};
+use super::{Parser, Result, Slice};
+
+impl<'s, S> Parser<S>
+where
+ S: Slice<'s>,
+{
+ pub(super) fn is_current_byte(&self, b: u8) -> bool {
+ self.source.as_ref().as_bytes().get(self.ptr) == Some(&b)
+ }
+
+ pub(super) fn is_byte_at(&self, b: u8, pos: usize) -> bool {
+ self.source.as_ref().as_bytes().get(pos) == Some(&b)
+ }
+
+ pub(super) fn skip_to_next_entry_start(&mut self) {
+ while let Some(b) = self.source.as_ref().as_bytes().get(self.ptr) {
+ let new_line =
+ self.ptr == 0 || self.source.as_ref().as_bytes().get(self.ptr - 1) == Some(&b'\n');
+
+ if new_line && (b.is_ascii_alphabetic() || [b'-', b'#'].contains(b)) {
+ break;
+ }
+
+ self.ptr += 1;
+ }
+ }
+
+ pub(super) fn skip_eol(&mut self) -> bool {
+ match self.source.as_ref().as_bytes().get(self.ptr) {
+ Some(b'\n') => {
+ self.ptr += 1;
+ true
+ }
+ Some(b'\r') if self.is_byte_at(b'\n', self.ptr + 1) => {
+ self.ptr += 2;
+ true
+ }
+ _ => false,
+ }
+ }
+
+ pub(super) fn skip_unicode_escape_sequence(&mut self, length: usize) -> Result<()> {
+ let start = self.ptr;
+ for _ in 0..length {
+ match self.source.as_ref().as_bytes().get(self.ptr) {
+ Some(b) if b.is_ascii_hexdigit() => self.ptr += 1,
+ _ => break,
+ }
+ }
+ if self.ptr - start != length {
+ let end = if self.ptr >= self.length {
+ self.ptr
+ } else {
+ self.ptr + 1
+ };
+ let seq = self.source.slice(start..end).as_ref().to_owned();
+ return error!(ErrorKind::InvalidUnicodeEscapeSequence(seq), self.ptr);
+ }
+ Ok(())
+ }
+
+ pub(super) fn is_identifier_start(&self) -> bool {
+ matches!(self.source.as_ref().as_bytes().get(self.ptr), Some(b) if b.is_ascii_alphabetic())
+ }
+
+ pub(super) fn take_byte_if(&mut self, b: u8) -> bool {
+ if self.is_current_byte(b) {
+ self.ptr += 1;
+ true
+ } else {
+ false
+ }
+ }
+
+ pub(super) fn skip_blank_block(&mut self) -> usize {
+ let mut count = 0;
+ loop {
+ let start = self.ptr;
+ self.skip_blank_inline();
+ if !self.skip_eol() {
+ self.ptr = start;
+ break;
+ }
+ count += 1;
+ }
+ count
+ }
+
+ pub(super) fn skip_blank(&mut self) {
+ loop {
+ match self.source.as_ref().as_bytes().get(self.ptr) {
+ Some(b' ') | Some(b'\n') => self.ptr += 1,
+ Some(b'\r')
+ if self.source.as_ref().as_bytes().get(self.ptr + 1) == Some(&b'\n') =>
+ {
+ self.ptr += 2
+ }
+ _ => break,
+ }
+ }
+ }
+
+ pub(super) fn skip_blank_inline(&mut self) -> usize {
+ let start = self.ptr;
+ while let Some(b' ') = self.source.as_ref().as_bytes().get(self.ptr) {
+ self.ptr += 1;
+ }
+ self.ptr - start
+ }
+
+ pub(super) fn is_byte_pattern_continuation(b: u8) -> bool {
+ ![b'}', b'.', b'[', b'*'].contains(&b)
+ }
+
+ pub(super) fn is_callee(name: &[u8]) -> bool {
+ name.iter()
+ .all(|c| c.is_ascii_uppercase() || c.is_ascii_digit() || *c == b'_' || *c == b'-')
+ }
+
+ pub(super) fn expect_byte(&mut self, b: u8) -> Result<()> {
+ if !self.is_current_byte(b) {
+ return error!(ErrorKind::ExpectedToken(b as char), self.ptr);
+ }
+ self.ptr += 1;
+ Ok(())
+ }
+
+ pub(super) fn is_number_start(&self) -> bool {
+ matches!(self.source.as_ref().as_bytes().get(self.ptr), Some(b) if (b == &b'-') || b.is_ascii_digit())
+ }
+
+ pub(super) fn is_eol(&self) -> bool {
+ match self.source.as_ref().as_bytes().get(self.ptr) {
+ Some(b'\n') => true,
+ Some(b'\r') if self.is_byte_at(b'\n', self.ptr + 1) => true,
+ _ => false,
+ }
+ }
+
+ pub(super) fn skip_digits(&mut self) -> Result<()> {
+ let start = self.ptr;
+ loop {
+ match self.source.as_ref().as_bytes().get(self.ptr) {
+ Some(b) if b.is_ascii_digit() => self.ptr += 1,
+ _ => break,
+ }
+ }
+ if start == self.ptr {
+ error!(
+ ErrorKind::ExpectedCharRange {
+ range: "0-9".to_string()
+ },
+ self.ptr
+ )
+ } else {
+ Ok(())
+ }
+ }
+
+ pub(super) fn get_number_literal(&mut self) -> Result<S> {
+ let start = self.ptr;
+ self.take_byte_if(b'-');
+ self.skip_digits()?;
+ if self.take_byte_if(b'.') {
+ self.skip_digits()?;
+ }
+
+ Ok(self.source.slice(start..self.ptr))
+ }
+}
diff --git a/third_party/rust/fluent-syntax/src/parser/mod.rs b/third_party/rust/fluent-syntax/src/parser/mod.rs
new file mode 100644
index 0000000000..9fc08847dc
--- /dev/null
+++ b/third_party/rust/fluent-syntax/src/parser/mod.rs
@@ -0,0 +1,387 @@
+#[macro_use]
+mod errors;
+mod comment;
+mod expression;
+mod helper;
+mod pattern;
+mod slice;
+
+use crate::ast;
+use slice::Slice;
+use std::result;
+
+pub use errors::{ErrorKind, ParserError};
+
+pub type Result<T> = result::Result<T, ParserError>;
+
+pub struct Parser<S> {
+ source: S,
+ ptr: usize,
+ length: usize,
+}
+
+impl<'s, S> Parser<S>
+where
+ S: Slice<'s>,
+{
+ pub fn new(source: S) -> Self {
+ let length = source.as_ref().len();
+ Self {
+ source,
+ ptr: 0,
+ length,
+ }
+ }
+
+ pub fn parse(
+ &mut self,
+ ) -> std::result::Result<ast::Resource<S>, (ast::Resource<S>, Vec<ParserError>)> {
+ let mut errors = vec![];
+
+ let mut body = vec![];
+
+ self.skip_blank_block();
+ let mut last_comment = None;
+ let mut last_blank_count = 0;
+
+ while self.ptr < self.length {
+ let entry_start = self.ptr;
+ let mut entry = self.get_entry(entry_start);
+
+ if let Some(comment) = last_comment.take() {
+ match entry {
+ Ok(ast::Entry::Message(ref mut msg)) if last_blank_count < 2 => {
+ msg.comment = Some(comment);
+ }
+ Ok(ast::Entry::Term(ref mut term)) if last_blank_count < 2 => {
+ term.comment = Some(comment);
+ }
+ _ => {
+ body.push(ast::Entry::Comment(comment));
+ }
+ }
+ }
+
+ match entry {
+ Ok(ast::Entry::Comment(comment)) => {
+ last_comment = Some(comment);
+ }
+ Ok(entry) => {
+ body.push(entry);
+ }
+ Err(mut err) => {
+ self.skip_to_next_entry_start();
+ err.slice = Some((entry_start, self.ptr));
+ errors.push(err);
+ let content = self.source.slice(entry_start..self.ptr);
+ body.push(ast::Entry::Junk { content });
+ }
+ }
+ last_blank_count = self.skip_blank_block();
+ }
+
+ if let Some(last_comment) = last_comment.take() {
+ body.push(ast::Entry::Comment(last_comment));
+ }
+ if errors.is_empty() {
+ Ok(ast::Resource { body })
+ } else {
+ Err((ast::Resource { body }, errors))
+ }
+ }
+
+ fn get_entry(&mut self, entry_start: usize) -> Result<ast::Entry<S>> {
+ let entry = match self.source.as_ref().as_bytes().get(self.ptr) {
+ Some(b'#') => {
+ let (comment, level) = self.get_comment()?;
+ match level {
+ comment::Level::Regular => ast::Entry::Comment(comment),
+ comment::Level::Group => ast::Entry::GroupComment(comment),
+ comment::Level::Resource => ast::Entry::ResourceComment(comment),
+ comment::Level::None => unreachable!(),
+ }
+ }
+ Some(b'-') => ast::Entry::Term(self.get_term(entry_start)?),
+ _ => ast::Entry::Message(self.get_message(entry_start)?),
+ };
+ Ok(entry)
+ }
+
+ fn get_message(&mut self, entry_start: usize) -> Result<ast::Message<S>> {
+ let id = self.get_identifier()?;
+ self.skip_blank_inline();
+ self.expect_byte(b'=')?;
+ let pattern = self.get_pattern()?;
+
+ self.skip_blank_block();
+
+ let attributes = self.get_attributes();
+
+ if pattern.is_none() && attributes.is_empty() {
+ let entry_id = id.name.as_ref().to_owned();
+ return error!(
+ ErrorKind::ExpectedMessageField { entry_id },
+ entry_start, self.ptr
+ );
+ }
+
+ Ok(ast::Message {
+ id,
+ value: pattern,
+ attributes,
+ comment: None,
+ })
+ }
+
+ fn get_term(&mut self, entry_start: usize) -> Result<ast::Term<S>> {
+ self.expect_byte(b'-')?;
+ let id = self.get_identifier()?;
+ self.skip_blank_inline();
+ self.expect_byte(b'=')?;
+ self.skip_blank_inline();
+
+ let value = self.get_pattern()?;
+
+ self.skip_blank_block();
+
+ let attributes = self.get_attributes();
+
+ if let Some(value) = value {
+ Ok(ast::Term {
+ id,
+ value,
+ attributes,
+ comment: None,
+ })
+ } else {
+ error!(
+ ErrorKind::ExpectedTermField {
+ entry_id: id.name.as_ref().to_owned()
+ },
+ entry_start, self.ptr
+ )
+ }
+ }
+
+ fn get_attributes(&mut self) -> Vec<ast::Attribute<S>> {
+ let mut attributes = vec![];
+
+ loop {
+ let line_start = self.ptr;
+ self.skip_blank_inline();
+ if !self.is_current_byte(b'.') {
+ self.ptr = line_start;
+ break;
+ }
+
+ if let Ok(attr) = self.get_attribute() {
+ attributes.push(attr);
+ } else {
+ self.ptr = line_start;
+ break;
+ }
+ }
+ attributes
+ }
+
+ fn get_attribute(&mut self) -> Result<ast::Attribute<S>> {
+ self.expect_byte(b'.')?;
+ let id = self.get_identifier()?;
+ self.skip_blank_inline();
+ self.expect_byte(b'=')?;
+ let pattern = self.get_pattern()?;
+
+ match pattern {
+ Some(pattern) => Ok(ast::Attribute { id, value: pattern }),
+ None => error!(ErrorKind::MissingValue, self.ptr),
+ }
+ }
+
+ fn get_identifier(&mut self) -> Result<ast::Identifier<S>> {
+ let mut ptr = self.ptr;
+
+ match self.source.as_ref().as_bytes().get(ptr) {
+ Some(b) if b.is_ascii_alphabetic() => {
+ ptr += 1;
+ }
+ _ => {
+ return error!(
+ ErrorKind::ExpectedCharRange {
+ range: "a-zA-Z".to_string()
+ },
+ ptr
+ );
+ }
+ }
+
+ while let Some(b) = self.source.as_ref().as_bytes().get(ptr) {
+ if b.is_ascii_alphabetic() || b.is_ascii_digit() || [b'_', b'-'].contains(b) {
+ ptr += 1;
+ } else {
+ break;
+ }
+ }
+
+ let name = self.source.slice(self.ptr..ptr);
+ self.ptr = ptr;
+
+ Ok(ast::Identifier { name })
+ }
+
+ fn get_attribute_accessor(&mut self) -> Result<Option<ast::Identifier<S>>> {
+ if self.take_byte_if(b'.') {
+ let ident = self.get_identifier()?;
+ Ok(Some(ident))
+ } else {
+ Ok(None)
+ }
+ }
+
+ fn get_variant_key(&mut self) -> Result<ast::VariantKey<S>> {
+ if !self.take_byte_if(b'[') {
+ return error!(ErrorKind::ExpectedToken('['), self.ptr);
+ }
+ self.skip_blank();
+
+ let key = if self.is_number_start() {
+ ast::VariantKey::NumberLiteral {
+ value: self.get_number_literal()?,
+ }
+ } else {
+ ast::VariantKey::Identifier {
+ name: self.get_identifier()?.name,
+ }
+ };
+
+ self.skip_blank();
+
+ self.expect_byte(b']')?;
+
+ Ok(key)
+ }
+
+ fn get_variants(&mut self) -> Result<Vec<ast::Variant<S>>> {
+ let mut variants = vec![];
+ let mut has_default = false;
+
+ while self.is_current_byte(b'*') || self.is_current_byte(b'[') {
+ let default = self.take_byte_if(b'*');
+
+ if default {
+ if has_default {
+ return error!(ErrorKind::MultipleDefaultVariants, self.ptr);
+ } else {
+ has_default = true;
+ }
+ }
+
+ let key = self.get_variant_key()?;
+
+ let value = self.get_pattern()?;
+
+ if let Some(value) = value {
+ variants.push(ast::Variant {
+ key,
+ value,
+ default,
+ });
+ self.skip_blank();
+ } else {
+ return error!(ErrorKind::MissingValue, self.ptr);
+ }
+ }
+
+ if has_default {
+ Ok(variants)
+ } else {
+ error!(ErrorKind::MissingDefaultVariant, self.ptr)
+ }
+ }
+
+ fn get_placeable(&mut self) -> Result<ast::Expression<S>> {
+ self.expect_byte(b'{')?;
+ self.skip_blank();
+ let exp = self.get_expression()?;
+ self.skip_blank_inline();
+ self.expect_byte(b'}')?;
+
+ let invalid_expression_found = match &exp {
+ ast::Expression::InlineExpression(ast::InlineExpression::TermReference {
+ ref attribute,
+ ..
+ }) => attribute.is_some(),
+ _ => false,
+ };
+ if invalid_expression_found {
+ return error!(ErrorKind::TermAttributeAsPlaceable, self.ptr);
+ }
+
+ Ok(exp)
+ }
+
+ fn get_call_arguments(&mut self) -> Result<Option<ast::CallArguments<S>>> {
+ self.skip_blank();
+ if !self.take_byte_if(b'(') {
+ return Ok(None);
+ }
+
+ let mut positional = vec![];
+ let mut named = vec![];
+ let mut argument_names = vec![];
+
+ self.skip_blank();
+
+ while self.ptr < self.length {
+ if self.is_current_byte(b')') {
+ break;
+ }
+
+ let expr = self.get_inline_expression()?;
+
+ if let ast::InlineExpression::MessageReference {
+ ref id,
+ attribute: None,
+ } = expr
+ {
+ self.skip_blank();
+ if self.is_current_byte(b':') {
+ if argument_names.contains(&id.name) {
+ return error!(
+ ErrorKind::DuplicatedNamedArgument(id.name.as_ref().to_owned()),
+ self.ptr
+ );
+ }
+ self.ptr += 1;
+ self.skip_blank();
+ let val = self.get_inline_expression()?;
+
+ argument_names.push(id.name.clone());
+ named.push(ast::NamedArgument {
+ name: ast::Identifier {
+ name: id.name.clone(),
+ },
+ value: val,
+ });
+ } else {
+ if !argument_names.is_empty() {
+ return error!(ErrorKind::PositionalArgumentFollowsNamed, self.ptr);
+ }
+ positional.push(expr);
+ }
+ } else {
+ if !argument_names.is_empty() {
+ return error!(ErrorKind::PositionalArgumentFollowsNamed, self.ptr);
+ }
+ positional.push(expr);
+ }
+
+ self.skip_blank();
+ self.take_byte_if(b',');
+ self.skip_blank();
+ }
+
+ self.expect_byte(b')')?;
+
+ Ok(Some(ast::CallArguments { positional, named }))
+ }
+}
diff --git a/third_party/rust/fluent-syntax/src/parser/pattern.rs b/third_party/rust/fluent-syntax/src/parser/pattern.rs
new file mode 100644
index 0000000000..84804b0db2
--- /dev/null
+++ b/third_party/rust/fluent-syntax/src/parser/pattern.rs
@@ -0,0 +1,209 @@
+use super::errors::{ErrorKind, ParserError};
+use super::{Parser, Result, Slice};
+use crate::ast;
+
+#[derive(Debug, PartialEq)]
+enum TextElementTermination {
+ LineFeed,
+ CRLF,
+ PlaceableStart,
+ EOF,
+}
+
+// This enum tracks the placement of the text element in the pattern, which is needed for
+// dedentation logic.
+#[derive(Debug, PartialEq)]
+enum TextElementPosition {
+ InitialLineStart,
+ LineStart,
+ Continuation,
+}
+
+// This enum allows us to mark pointers in the source which will later become text elements
+// but without slicing them out of the source string. This makes the indentation adjustments
+// cheaper since they'll happen on the pointers, rather than extracted slices.
+#[derive(Debug)]
+enum PatternElementPlaceholders<S> {
+ Placeable(ast::Expression<S>),
+ // (start, end, indent, position)
+ TextElement(usize, usize, usize, TextElementPosition),
+}
+
+// This enum tracks whether the text element is blank or not.
+// This is important to identify text elements which should not be taken into account
+// when calculating common indent.
+#[derive(Debug, PartialEq)]
+enum TextElementType {
+ Blank,
+ NonBlank,
+}
+
+impl<'s, S> Parser<S>
+where
+ S: Slice<'s>,
+{
+ pub(super) fn get_pattern(&mut self) -> Result<Option<ast::Pattern<S>>> {
+ let mut elements = vec![];
+ let mut last_non_blank = None;
+ let mut common_indent = None;
+
+ self.skip_blank_inline();
+
+ let mut text_element_role = if self.skip_eol() {
+ self.skip_blank_block();
+ TextElementPosition::LineStart
+ } else {
+ TextElementPosition::InitialLineStart
+ };
+
+ while self.ptr < self.length {
+ if self.is_current_byte(b'{') {
+ if text_element_role == TextElementPosition::LineStart {
+ common_indent = Some(0);
+ }
+ let exp = self.get_placeable()?;
+ last_non_blank = Some(elements.len());
+ elements.push(PatternElementPlaceholders::Placeable(exp));
+ text_element_role = TextElementPosition::Continuation;
+ } else {
+ let slice_start = self.ptr;
+ let mut indent = 0;
+ if text_element_role == TextElementPosition::LineStart {
+ indent = self.skip_blank_inline();
+ if self.ptr >= self.length {
+ break;
+ }
+ let b = self.source.as_ref().as_bytes().get(self.ptr);
+ if indent == 0 {
+ if b != Some(&b'\n') {
+ break;
+ }
+ } else if !Self::is_byte_pattern_continuation(*b.unwrap()) {
+ self.ptr = slice_start;
+ break;
+ }
+ }
+ let (start, end, text_element_type, termination_reason) = self.get_text_slice()?;
+ if start != end {
+ if text_element_role == TextElementPosition::LineStart
+ && text_element_type == TextElementType::NonBlank
+ {
+ if let Some(common) = common_indent {
+ if indent < common {
+ common_indent = Some(indent);
+ }
+ } else {
+ common_indent = Some(indent);
+ }
+ }
+ if text_element_role != TextElementPosition::LineStart
+ || text_element_type == TextElementType::NonBlank
+ || termination_reason == TextElementTermination::LineFeed
+ {
+ if text_element_type == TextElementType::NonBlank {
+ last_non_blank = Some(elements.len());
+ }
+ elements.push(PatternElementPlaceholders::TextElement(
+ slice_start,
+ end,
+ indent,
+ text_element_role,
+ ));
+ }
+ }
+
+ text_element_role = match termination_reason {
+ TextElementTermination::LineFeed => TextElementPosition::LineStart,
+ TextElementTermination::CRLF => TextElementPosition::Continuation,
+ TextElementTermination::PlaceableStart => TextElementPosition::Continuation,
+ TextElementTermination::EOF => TextElementPosition::Continuation,
+ };
+ }
+ }
+
+ if let Some(last_non_blank) = last_non_blank {
+ let elements = elements
+ .into_iter()
+ .take(last_non_blank + 1)
+ .enumerate()
+ .map(|(i, elem)| match elem {
+ PatternElementPlaceholders::Placeable(expression) => {
+ ast::PatternElement::Placeable { expression }
+ }
+ PatternElementPlaceholders::TextElement(start, end, indent, role) => {
+ let start = if role == TextElementPosition::LineStart {
+ common_indent.map_or_else(
+ || start + indent,
+ |common_indent| start + std::cmp::min(indent, common_indent),
+ )
+ } else {
+ start
+ };
+ let mut value = self.source.slice(start..end);
+ if last_non_blank == i {
+ value.trim();
+ ast::PatternElement::TextElement { value }
+ } else {
+ ast::PatternElement::TextElement { value }
+ }
+ }
+ })
+ .collect();
+ return Ok(Some(ast::Pattern { elements }));
+ }
+
+ Ok(None)
+ }
+
+ fn get_text_slice(
+ &mut self,
+ ) -> Result<(usize, usize, TextElementType, TextElementTermination)> {
+ let start_pos = self.ptr;
+ let mut text_element_type = TextElementType::Blank;
+
+ while let Some(b) = self.source.as_ref().as_bytes().get(self.ptr) {
+ match b {
+ b' ' => self.ptr += 1,
+ b'\n' => {
+ self.ptr += 1;
+ return Ok((
+ start_pos,
+ self.ptr,
+ text_element_type,
+ TextElementTermination::LineFeed,
+ ));
+ }
+ b'\r' if self.is_byte_at(b'\n', self.ptr + 1) => {
+ self.ptr += 1;
+ return Ok((
+ start_pos,
+ self.ptr - 1,
+ text_element_type,
+ TextElementTermination::CRLF,
+ ));
+ }
+ b'{' => {
+ return Ok((
+ start_pos,
+ self.ptr,
+ text_element_type,
+ TextElementTermination::PlaceableStart,
+ ));
+ }
+ b'}' => {
+ return error!(ErrorKind::UnbalancedClosingBrace, self.ptr);
+ }
+ _ => {
+ text_element_type = TextElementType::NonBlank;
+ self.ptr += 1
+ }
+ }
+ }
+ Ok((
+ start_pos,
+ self.ptr,
+ text_element_type,
+ TextElementTermination::EOF,
+ ))
+ }
+}
diff --git a/third_party/rust/fluent-syntax/src/parser/slice.rs b/third_party/rust/fluent-syntax/src/parser/slice.rs
new file mode 100644
index 0000000000..d44f8251fe
--- /dev/null
+++ b/third_party/rust/fluent-syntax/src/parser/slice.rs
@@ -0,0 +1,25 @@
+use std::ops::Range;
+pub trait Slice<'s>: AsRef<str> + Clone + PartialEq {
+ fn slice(&self, range: Range<usize>) -> Self;
+ fn trim(&mut self);
+}
+
+impl<'s> Slice<'s> for String {
+ fn slice(&self, range: Range<usize>) -> Self {
+ self[range].to_string()
+ }
+
+ fn trim(&mut self) {
+ *self = self.trim_end().to_string();
+ }
+}
+
+impl<'s> Slice<'s> for &'s str {
+ fn slice(&self, range: Range<usize>) -> Self {
+ &self[range]
+ }
+
+ fn trim(&mut self) {
+ *self = self.trim_end();
+ }
+}
diff --git a/third_party/rust/fluent-syntax/src/unicode.rs b/third_party/rust/fluent-syntax/src/unicode.rs
new file mode 100644
index 0000000000..49301734bd
--- /dev/null
+++ b/third_party/rust/fluent-syntax/src/unicode.rs
@@ -0,0 +1,91 @@
+use std::borrow::Cow;
+use std::char;
+use std::fmt;
+
+const UNKNOWN_CHAR: char = '�';
+
+fn encode_unicode(s: Option<&str>) -> char {
+ s.and_then(|s| u32::from_str_radix(s, 16).ok().and_then(char::from_u32))
+ .unwrap_or(UNKNOWN_CHAR)
+}
+
+pub fn unescape_unicode<W>(w: &mut W, input: &str) -> fmt::Result
+where
+ W: fmt::Write,
+{
+ let bytes = input.as_bytes();
+
+ let mut start = 0;
+ let mut ptr = 0;
+
+ while let Some(b) = bytes.get(ptr) {
+ if b != &b'\\' {
+ ptr += 1;
+ continue;
+ }
+ if start != ptr {
+ w.write_str(&input[start..ptr])?;
+ }
+
+ ptr += 1;
+
+ let new_char = match bytes.get(ptr) {
+ Some(b'\\') => '\\',
+ Some(b'"') => '"',
+ Some(u @ b'u') | Some(u @ b'U') => {
+ let seq_start = ptr + 1;
+ let len = if u == &b'u' { 4 } else { 6 };
+ ptr += len;
+ encode_unicode(input.get(seq_start..seq_start + len))
+ }
+ _ => UNKNOWN_CHAR,
+ };
+ ptr += 1;
+ w.write_char(new_char)?;
+ start = ptr;
+ }
+ if start != ptr {
+ w.write_str(&input[start..ptr])?;
+ }
+ Ok(())
+}
+
+pub fn unescape_unicode_to_string(input: &str) -> Cow<str> {
+ let bytes = input.as_bytes();
+ let mut result = Cow::from(input);
+
+ let mut ptr = 0;
+
+ while let Some(b) = bytes.get(ptr) {
+ if b != &b'\\' {
+ if let Cow::Owned(ref mut s) = result {
+ s.push(*b as char);
+ }
+ ptr += 1;
+ continue;
+ }
+
+ if let Cow::Borrowed(_) = result {
+ result = Cow::from(&input[0..ptr]);
+ }
+
+ ptr += 1;
+
+ let new_char = match bytes.get(ptr) {
+ Some(b'\\') => '\\',
+ Some(b'"') => '"',
+ Some(u @ b'u') | Some(u @ b'U') => {
+ let start = ptr + 1;
+ let len = if u == &b'u' { 4 } else { 6 };
+ ptr += len;
+ input
+ .get(start..(start + len))
+ .map_or(UNKNOWN_CHAR, |slice| encode_unicode(Some(slice)))
+ }
+ _ => UNKNOWN_CHAR,
+ };
+ result.to_mut().push(new_char);
+ ptr += 1;
+ }
+ result
+}