From 26a029d407be480d791972afb5975cf62c9360a6 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 19 Apr 2024 02:47:55 +0200 Subject: Adding upstream version 124.0.1. Signed-off-by: Daniel Baumann --- testing/mozbase/rust/mozprofile/Cargo.toml | 16 + testing/mozbase/rust/mozprofile/fuzz/Cargo.toml | 25 + .../mozprofile/fuzz/fuzz_targets/prefreader.rs | 16 + testing/mozbase/rust/mozprofile/src/lib.rs | 241 +++++ testing/mozbase/rust/mozprofile/src/preferences.rs | 138 +++ testing/mozbase/rust/mozprofile/src/prefreader.rs | 1046 ++++++++++++++++++++ testing/mozbase/rust/mozprofile/src/profile.rs | 135 +++ 7 files changed, 1617 insertions(+) create mode 100644 testing/mozbase/rust/mozprofile/Cargo.toml create mode 100644 testing/mozbase/rust/mozprofile/fuzz/Cargo.toml create mode 100644 testing/mozbase/rust/mozprofile/fuzz/fuzz_targets/prefreader.rs create mode 100644 testing/mozbase/rust/mozprofile/src/lib.rs create mode 100644 testing/mozbase/rust/mozprofile/src/preferences.rs create mode 100644 testing/mozbase/rust/mozprofile/src/prefreader.rs create mode 100644 testing/mozbase/rust/mozprofile/src/profile.rs (limited to 'testing/mozbase/rust/mozprofile') diff --git a/testing/mozbase/rust/mozprofile/Cargo.toml b/testing/mozbase/rust/mozprofile/Cargo.toml new file mode 100644 index 0000000000..efc0dc89ca --- /dev/null +++ b/testing/mozbase/rust/mozprofile/Cargo.toml @@ -0,0 +1,16 @@ +[package] +edition = "2021" +name = "mozprofile" +version = "0.9.2" +authors = ["Mozilla"] +description = "Library for working with Mozilla profiles." +keywords = [ + "firefox", + "mozilla", +] +license = "MPL-2.0" +repository = "https://hg.mozilla.org/mozilla-central/file/tip/testing/mozbase/rust/mozprofile" + +[dependencies] +tempfile = "3" +thiserror = "1" diff --git a/testing/mozbase/rust/mozprofile/fuzz/Cargo.toml b/testing/mozbase/rust/mozprofile/fuzz/Cargo.toml new file mode 100644 index 0000000000..53e116143c --- /dev/null +++ b/testing/mozbase/rust/mozprofile/fuzz/Cargo.toml @@ -0,0 +1,25 @@ +[package] +name = "mozprofile-fuzz" +version = "0.0.0" +authors = ["Automatically generated"] +publish = false +edition = "2018" + +[package.metadata] +cargo-fuzz = true + +[dependencies] +libfuzzer-sys = "0.4" + +[dependencies.mozprofile] +path = ".." + +# Prevent this from interfering with workspaces +[workspace] +members = ["."] + +[[bin]] +name = "prefreader" +path = "fuzz_targets/prefreader.rs" +test = false +doc = false diff --git a/testing/mozbase/rust/mozprofile/fuzz/fuzz_targets/prefreader.rs b/testing/mozbase/rust/mozprofile/fuzz/fuzz_targets/prefreader.rs new file mode 100644 index 0000000000..824eb3c31e --- /dev/null +++ b/testing/mozbase/rust/mozprofile/fuzz/fuzz_targets/prefreader.rs @@ -0,0 +1,16 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#![no_main] +use libfuzzer_sys::fuzz_target; +use std::io::Cursor; +extern crate mozprofile; + +fuzz_target!(|data: &[u8]| { + let buf = Vec::new(); + let mut out = Cursor::new(buf); + mozprofile::prefreader::parse(data).map(|parsed| { + mozprofile::prefreader::serialize(&parsed, &mut out); + }); +}); diff --git a/testing/mozbase/rust/mozprofile/src/lib.rs b/testing/mozbase/rust/mozprofile/src/lib.rs new file mode 100644 index 0000000000..346f291137 --- /dev/null +++ b/testing/mozbase/rust/mozprofile/src/lib.rs @@ -0,0 +1,241 @@ +#![forbid(unsafe_code)] +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +extern crate tempfile; + +pub mod preferences; +pub mod prefreader; +pub mod profile; + +#[cfg(test)] +mod test { + // use std::fs::File; + // use profile::Profile; + use crate::preferences::Pref; + use crate::prefreader::{parse, serialize, tokenize}; + use crate::prefreader::{Position, PrefToken}; + use std::collections::BTreeMap; + use std::io::Cursor; + use std::str; + + #[test] + fn tokenize_simple() { + let prefs = " user_pref ( 'example.pref.string', 'value' ) ;\n \ + pref(\"example.pref.int\", -123); sticky_pref('example.pref.bool',false);"; + + let p = Position::new(); + + let expected = vec![ + PrefToken::UserPrefFunction(p), + PrefToken::Paren('(', p), + PrefToken::String("example.pref.string".into(), p), + PrefToken::Comma(p), + PrefToken::String("value".into(), p), + PrefToken::Paren(')', p), + PrefToken::Semicolon(p), + PrefToken::PrefFunction(p), + PrefToken::Paren('(', p), + PrefToken::String("example.pref.int".into(), p), + PrefToken::Comma(p), + PrefToken::Int(-123, p), + PrefToken::Paren(')', p), + PrefToken::Semicolon(p), + PrefToken::StickyPrefFunction(p), + PrefToken::Paren('(', p), + PrefToken::String("example.pref.bool".into(), p), + PrefToken::Comma(p), + PrefToken::Bool(false, p), + PrefToken::Paren(')', p), + PrefToken::Semicolon(p), + ]; + + tokenize_test(prefs, &expected); + } + + #[test] + fn tokenize_comments() { + let prefs = "# bash style comment\n /*block comment*/ user_pref/*block comment*/(/*block \ + comment*/ 'example.pref.string' /*block comment*/,/*block comment*/ \ + 'value'/*block comment*/ )// line comment"; + + let p = Position::new(); + + let expected = vec![ + PrefToken::CommentBashLine(" bash style comment".into(), p), + PrefToken::CommentBlock("block comment".into(), p), + PrefToken::UserPrefFunction(p), + PrefToken::CommentBlock("block comment".into(), p), + PrefToken::Paren('(', p), + PrefToken::CommentBlock("block comment".into(), p), + PrefToken::String("example.pref.string".into(), p), + PrefToken::CommentBlock("block comment".into(), p), + PrefToken::Comma(p), + PrefToken::CommentBlock("block comment".into(), p), + PrefToken::String("value".into(), p), + PrefToken::CommentBlock("block comment".into(), p), + PrefToken::Paren(')', p), + PrefToken::CommentLine(" line comment".into(), p), + ]; + + tokenize_test(prefs, &expected); + } + + #[test] + fn tokenize_escapes() { + let prefs = r#"user_pref('example\x20pref', "\u0020\u2603\uD800\uDC96\"\'\n\r\\\w)"#; + + let p = Position::new(); + + let expected = vec![ + PrefToken::UserPrefFunction(p), + PrefToken::Paren('(', p), + PrefToken::String("example pref".into(), p), + PrefToken::Comma(p), + PrefToken::String(" ☃𐂖\"'\n\r\\\\w".into(), p), + PrefToken::Paren(')', p), + ]; + + tokenize_test(prefs, &expected); + } + + fn tokenize_test(prefs: &str, expected: &[PrefToken]) { + println!("{}\n", prefs); + + for (e, a) in expected.iter().zip(tokenize(prefs.as_bytes())) { + let success = match (e, &a) { + (&PrefToken::PrefFunction(_), &PrefToken::PrefFunction(_)) => true, + (&PrefToken::UserPrefFunction(_), &PrefToken::UserPrefFunction(_)) => true, + (&PrefToken::StickyPrefFunction(_), &PrefToken::StickyPrefFunction(_)) => true, + ( + &PrefToken::CommentBlock(ref data_e, _), + &PrefToken::CommentBlock(ref data_a, _), + ) => data_e == data_a, + ( + &PrefToken::CommentLine(ref data_e, _), + &PrefToken::CommentLine(ref data_a, _), + ) => data_e == data_a, + ( + &PrefToken::CommentBashLine(ref data_e, _), + &PrefToken::CommentBashLine(ref data_a, _), + ) => data_e == data_a, + (&PrefToken::Paren(data_e, _), &PrefToken::Paren(data_a, _)) => data_e == data_a, + (&PrefToken::Semicolon(_), &PrefToken::Semicolon(_)) => true, + (&PrefToken::Comma(_), &PrefToken::Comma(_)) => true, + (&PrefToken::String(ref data_e, _), &PrefToken::String(ref data_a, _)) => { + data_e == data_a + } + (&PrefToken::Int(data_e, _), &PrefToken::Int(data_a, _)) => data_e == data_a, + (&PrefToken::Bool(data_e, _), &PrefToken::Bool(data_a, _)) => data_e == data_a, + (&PrefToken::Error(ref data_e, _), &PrefToken::Error(ref data_a, _)) => { + *data_e == *data_a + } + (_, _) => false, + }; + if !success { + println!("Expected {:?}, got {:?}", e, a); + } + assert!(success); + } + } + + #[test] + fn parse_simple() { + let input = " user_pref /* block comment */ ( 'example.pref.string', 'value' ) ;\n \ + pref(\"example.pref.int\", -123); sticky_pref('example.pref.bool',false)"; + + let mut expected: BTreeMap = BTreeMap::new(); + expected.insert("example.pref.string".into(), Pref::new("value")); + expected.insert("example.pref.int".into(), Pref::new(-123)); + expected.insert("example.pref.bool".into(), Pref::new_sticky(false)); + + parse_test(input, expected); + } + + #[test] + fn parse_escape() { + let input = r#"user_pref('example\\pref\"string', 'val\x20ue' )"#; + + let mut expected: BTreeMap = BTreeMap::new(); + expected.insert("example\\pref\"string".into(), Pref::new("val ue")); + + parse_test(input, expected); + } + + #[test] + fn parse_empty() { + let inputs = ["", " ", "\n", "\n \n"]; + for input in inputs { + let expected: BTreeMap = BTreeMap::new(); + parse_test(input, expected); + } + } + + #[test] + fn parse_newline() { + let inputs = vec!["\na", "\n\nfoo"]; + for input in inputs { + assert!(parse(input.as_bytes()).is_err()); + } + } + + #[test] + fn parse_minus() { + let inputs = ["pref(-", "user_pref(\"example.pref.int\", -);"]; + for input in inputs { + assert!(parse(input.as_bytes()).is_err()); + } + } + + #[test] + fn parse_boolean_eof() { + let inputs = vec!["pref(true", "pref(false", "pref(false,", "pref(false)"]; + for input in inputs { + assert!(parse(input.as_bytes()).is_err()); + } + } + + fn parse_test(input: &str, expected: BTreeMap) { + match parse(input.as_bytes()) { + Ok(ref actual) => { + println!("Expected:\n{:?}\nActual\n{:?}", expected, actual); + assert_eq!(actual, &expected); + } + Err(e) => { + println!("{}", e); + assert!(false) + } + } + } + + #[test] + fn serialize_simple() { + let input = " user_pref /* block comment */ ( 'example.pref.string', 'value' ) ;\n \ + pref(\"example.pref.int\", -123); sticky_pref('example.pref.bool',false)"; + let expected = "sticky_pref(\"example.pref.bool\", false); +user_pref(\"example.pref.int\", -123); +user_pref(\"example.pref.string\", \"value\");\n"; + + serialize_test(input, expected); + } + + #[test] + fn serialize_quotes() { + let input = r#"user_pref('example\\with"quotes"', '"Value"')"#; + let expected = r#"user_pref("example\\with\"quotes\"", "\"Value\""); +"#; + + serialize_test(input, expected); + } + + fn serialize_test(input: &str, expected: &str) { + let buf = Vec::with_capacity(expected.len()); + let mut out = Cursor::new(buf); + serialize(&parse(input.as_bytes()).unwrap(), &mut out).unwrap(); + let data = out.into_inner(); + let actual = str::from_utf8(&*data).unwrap(); + println!("Expected:\n{:?}\nActual\n{:?}", expected, actual); + assert_eq!(actual, expected); + } +} diff --git a/testing/mozbase/rust/mozprofile/src/preferences.rs b/testing/mozbase/rust/mozprofile/src/preferences.rs new file mode 100644 index 0000000000..2489352384 --- /dev/null +++ b/testing/mozbase/rust/mozprofile/src/preferences.rs @@ -0,0 +1,138 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +use std::collections::BTreeMap; + +pub type Preferences = BTreeMap; + +#[derive(Debug, PartialEq, Clone)] +pub enum PrefValue { + Bool(bool), + String(String), + Int(i64), +} + +impl From for PrefValue { + fn from(value: bool) -> Self { + PrefValue::Bool(value) + } +} + +impl From for PrefValue { + fn from(value: String) -> Self { + PrefValue::String(value) + } +} + +impl From<&'static str> for PrefValue { + fn from(value: &'static str) -> Self { + PrefValue::String(value.into()) + } +} + +impl From for PrefValue { + fn from(value: i8) -> Self { + PrefValue::Int(value.into()) + } +} + +impl From for PrefValue { + fn from(value: u8) -> Self { + PrefValue::Int(value.into()) + } +} + +impl From for PrefValue { + fn from(value: i16) -> Self { + PrefValue::Int(value.into()) + } +} + +impl From for PrefValue { + fn from(value: u16) -> Self { + PrefValue::Int(value.into()) + } +} + +impl From for PrefValue { + fn from(value: i32) -> Self { + PrefValue::Int(value.into()) + } +} + +impl From for PrefValue { + fn from(value: u32) -> Self { + PrefValue::Int(value.into()) + } +} + +impl From for PrefValue { + fn from(value: i64) -> Self { + PrefValue::Int(value) + } +} + +// Implementing From for PrefValue wouldn't be safe +// because it might overflow. + +#[derive(Debug, PartialEq, Clone)] +pub struct Pref { + pub value: PrefValue, + pub sticky: bool, +} + +impl Pref { + /// Create a new preference with `value`. + pub fn new(value: T) -> Pref + where + T: Into, + { + Pref { + value: value.into(), + sticky: false, + } + } + + /// Create a new sticky, or locked, preference with `value`. + /// These cannot be changed by the user in `about:config`. + pub fn new_sticky(value: T) -> Pref + where + T: Into, + { + Pref { + value: value.into(), + sticky: true, + } + } +} + +#[cfg(test)] +mod test { + use super::PrefValue; + + #[test] + fn test_bool() { + assert_eq!(PrefValue::from(true), PrefValue::Bool(true)); + } + + #[test] + fn test_string() { + assert_eq!(PrefValue::from("foo"), PrefValue::String("foo".to_string())); + assert_eq!( + PrefValue::from("foo".to_string()), + PrefValue::String("foo".to_string()) + ); + } + + #[test] + fn test_int() { + assert_eq!(PrefValue::from(42i8), PrefValue::Int(42i64)); + assert_eq!(PrefValue::from(42u8), PrefValue::Int(42i64)); + assert_eq!(PrefValue::from(42i16), PrefValue::Int(42i64)); + assert_eq!(PrefValue::from(42u16), PrefValue::Int(42i64)); + assert_eq!(PrefValue::from(42i32), PrefValue::Int(42i64)); + assert_eq!(PrefValue::from(42u32), PrefValue::Int(42i64)); + assert_eq!(PrefValue::from(42i64), PrefValue::Int(42i64)); + } +} diff --git a/testing/mozbase/rust/mozprofile/src/prefreader.rs b/testing/mozbase/rust/mozprofile/src/prefreader.rs new file mode 100644 index 0000000000..9c94666e7d --- /dev/null +++ b/testing/mozbase/rust/mozprofile/src/prefreader.rs @@ -0,0 +1,1046 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +use crate::preferences::{Pref, PrefValue, Preferences}; +use std::borrow::Borrow; +use std::borrow::Cow; +use std::char; +use std::error::Error; +use std::io::{self, Write}; +use std::iter::Iterator; + +use std::str; +use thiserror::Error; + +impl PrefReaderError { + fn new(message: String, position: Position, parent: Option>) -> PrefReaderError { + PrefReaderError { + message, + position, + parent, + } + } +} + +impl From for PrefReaderError { + fn from(err: io::Error) -> PrefReaderError { + PrefReaderError::new("IOError".into(), Position::new(), Some(err.into())) + } +} + +#[derive(Copy, Clone, Debug, PartialEq)] +enum TokenizerState { + Junk, + CommentStart, + CommentLine, + CommentBlock, + FunctionName, + AfterFunctionName, + FunctionArgs, + FunctionArg, + DoubleQuotedString, + SingleQuotedString, + Number, + Bool, + AfterFunctionArg, + AfterFunction, + Error, +} + +#[derive(Copy, Clone, Debug, Default, PartialEq)] +pub struct Position { + line: u32, + column: u32, +} + +impl Position { + pub fn new() -> Position { + Position { line: 1, column: 0 } + } +} + +#[derive(Copy, Clone, Debug, PartialEq)] +pub enum TokenType { + None, + PrefFunction, + UserPrefFunction, + StickyPrefFunction, + CommentBlock, + CommentLine, + CommentBashLine, + Paren, + Semicolon, + Comma, + String, + Int, + Bool, + Error, +} + +#[derive(Debug, PartialEq)] +pub enum PrefToken<'a> { + PrefFunction(Position), + UserPrefFunction(Position), + StickyPrefFunction(Position), + CommentBlock(Cow<'a, str>, Position), + CommentLine(Cow<'a, str>, Position), + CommentBashLine(Cow<'a, str>, Position), + Paren(char, Position), + Semicolon(Position), + Comma(Position), + String(Cow<'a, str>, Position), + Int(i64, Position), + Bool(bool, Position), + Error(String, Position), +} + +impl<'a> PrefToken<'a> { + fn position(&self) -> Position { + match *self { + PrefToken::PrefFunction(position) => position, + PrefToken::UserPrefFunction(position) => position, + PrefToken::StickyPrefFunction(position) => position, + PrefToken::CommentBlock(_, position) => position, + PrefToken::CommentLine(_, position) => position, + PrefToken::CommentBashLine(_, position) => position, + PrefToken::Paren(_, position) => position, + PrefToken::Semicolon(position) => position, + PrefToken::Comma(position) => position, + PrefToken::String(_, position) => position, + PrefToken::Int(_, position) => position, + PrefToken::Bool(_, position) => position, + PrefToken::Error(_, position) => position, + } + } +} + +#[derive(Debug, Error)] +#[error("{message} at line {}, column {}", .position.line, .position.column)] +pub struct PrefReaderError { + message: String, + position: Position, + #[source] + parent: Option>, +} + +struct TokenData<'a> { + token_type: TokenType, + complete: bool, + position: Position, + data: Cow<'a, str>, + start_pos: usize, +} + +impl<'a> TokenData<'a> { + fn new(token_type: TokenType, position: Position, start_pos: usize) -> TokenData<'a> { + TokenData { + token_type, + complete: false, + position, + data: Cow::Borrowed(""), + start_pos, + } + } + + fn start(&mut self, tokenizer: &PrefTokenizer, token_type: TokenType) { + self.token_type = token_type; + self.position = tokenizer.position; + self.start_pos = tokenizer.pos; + } + + fn end(&mut self, buf: &'a [u8], end_pos: usize) -> Result<(), PrefReaderError> { + self.complete = true; + self.add_slice_to_token(buf, end_pos) + } + + fn add_slice_to_token(&mut self, buf: &'a [u8], end_pos: usize) -> Result<(), PrefReaderError> { + let data = match str::from_utf8(&buf[self.start_pos..end_pos]) { + Ok(x) => x, + Err(_) => { + return Err(PrefReaderError::new( + "Could not convert string to utf8".into(), + self.position, + None, + )); + } + }; + if self.data != "" { + self.data.to_mut().push_str(data) + } else { + self.data = Cow::Borrowed(data) + }; + Ok(()) + } + + fn push_char(&mut self, tokenizer: &PrefTokenizer, data: char) { + self.data.to_mut().push(data); + self.start_pos = tokenizer.pos + 1; + } +} + +pub struct PrefTokenizer<'a> { + data: &'a [u8], + pos: usize, + cur: Option, + position: Position, + state: TokenizerState, + next_state: Option, +} + +impl<'a> PrefTokenizer<'a> { + pub fn new(data: &'a [u8]) -> PrefTokenizer<'a> { + PrefTokenizer { + data, + pos: 0, + cur: None, + position: Position::new(), + state: TokenizerState::Junk, + next_state: Some(TokenizerState::FunctionName), + } + } + + fn make_token(&mut self, token_data: TokenData<'a>) -> PrefToken<'a> { + let buf = token_data.data; + let position = token_data.position; + // Note: the panic! here are for cases where the invalid input is regarded as + // a bug in the caller. In cases where `make_token` can legitimately be called + // with invalid data we must instead return a PrefToken::Error + match token_data.token_type { + TokenType::None => panic!("Got a token without a type"), + TokenType::PrefFunction => PrefToken::PrefFunction(position), + TokenType::UserPrefFunction => PrefToken::UserPrefFunction(position), + TokenType::StickyPrefFunction => PrefToken::StickyPrefFunction(position), + TokenType::CommentBlock => PrefToken::CommentBlock(buf, position), + TokenType::CommentLine => PrefToken::CommentLine(buf, position), + TokenType::CommentBashLine => PrefToken::CommentBashLine(buf, position), + TokenType::Paren => { + if buf.len() != 1 { + panic!("Expected a buffer of length one"); + } + PrefToken::Paren(buf.chars().next().unwrap(), position) + } + TokenType::Semicolon => PrefToken::Semicolon(position), + TokenType::Comma => PrefToken::Comma(position), + TokenType::String => PrefToken::String(buf, position), + TokenType::Int => { + return match buf.parse::() { + Ok(value) => PrefToken::Int(value, position), + Err(_) => PrefToken::Error(format!("Expected integer, got {}", buf), position), + } + } + TokenType::Bool => { + let value = match buf.borrow() { + "true" => true, + "false" => false, + x => panic!("Boolean wasn't 'true' or 'false' (was {})", x), + }; + PrefToken::Bool(value, position) + } + TokenType::Error => panic!("make_token can't construct errors"), + } + } + + fn get_char(&mut self) -> Option { + if self.pos + 1 >= self.data.len() { + self.cur = None; + return None; + }; + if self.cur.is_some() { + self.pos += 1; + } + let c = self.data[self.pos] as char; + if self.cur == Some('\n') { + self.position.line += 1; + self.position.column = 0; + } else if self.cur.is_some() { + self.position.column += 1; + }; + self.cur = Some(c); + self.cur + } + + fn unget_char(&mut self) -> Option { + if self.pos == 0 { + self.position.column = 0; + self.cur = None + } else { + self.pos -= 1; + let c = self.data[self.pos] as char; + if c == '\n' { + self.position.line -= 1; + let mut col_pos = self.pos; + while col_pos > 0 { + col_pos -= 1; + if self.data[col_pos] as char == '\n' { + break; + } + } + self.position.column = (self.pos - col_pos) as u32; + } else { + self.position.column -= 1; + } + self.cur = Some(c); + } + self.cur + } + + fn is_space(c: char) -> bool { + matches!(c, ' ' | '\t' | '\r' | '\n') + } + + fn skip_whitespace(&mut self) -> Option { + while let Some(c) = self.cur { + if PrefTokenizer::is_space(c) { + self.get_char(); + } else { + break; + }; + } + self.cur + } + + fn consume_escape(&mut self, token_data: &mut TokenData<'a>) -> Result<(), PrefReaderError> { + let pos = self.pos; + let escaped = self.read_escape()?; + if let Some(escape_char) = escaped { + token_data.add_slice_to_token(self.data, pos)?; + token_data.push_char(self, escape_char); + }; + Ok(()) + } + + fn read_escape(&mut self) -> Result, PrefReaderError> { + let escape_char = match self.get_char() { + Some('u') => self.read_hex_escape(4, true)?, + Some('x') => self.read_hex_escape(2, true)?, + Some('\\') => '\\' as u32, + Some('"') => '"' as u32, + Some('\'') => '\'' as u32, + Some('r') => '\r' as u32, + Some('n') => '\n' as u32, + Some(_) => return Ok(None), + None => { + return Err(PrefReaderError::new( + "EOF in character escape".into(), + self.position, + None, + )) + } + }; + Ok(Some(char::from_u32(escape_char).ok_or_else(|| { + PrefReaderError::new( + "Invalid codepoint decoded from escape".into(), + self.position, + None, + ) + })?)) + } + + fn read_hex_escape(&mut self, hex_chars: isize, first: bool) -> Result { + let mut value = 0; + for _ in 0..hex_chars { + match self.get_char() { + Some(x) => { + value <<= 4; + match x { + '0'..='9' => value += x as u32 - '0' as u32, + 'a'..='f' => value += x as u32 - 'a' as u32, + 'A'..='F' => value += x as u32 - 'A' as u32, + _ => { + return Err(PrefReaderError::new( + "Unexpected character in escape".into(), + self.position, + None, + )) + } + } + } + None => { + return Err(PrefReaderError::new( + "Unexpected EOF in escape".into(), + self.position, + None, + )) + } + } + } + if first && (0xD800..=0xDBFF).contains(&value) { + // First part of a surrogate pair + if self.get_char() != Some('\\') || self.get_char() != Some('u') { + return Err(PrefReaderError::new( + "Lone high surrogate in surrogate pair".into(), + self.position, + None, + )); + } + self.unget_char(); + let high_surrogate = value; + let low_surrogate = self.read_hex_escape(4, false)?; + let high_value = (high_surrogate - 0xD800) << 10; + let low_value = low_surrogate - 0xDC00; + value = high_value + low_value + 0x10000; + } else if first && (0xDC00..=0xDFFF).contains(&value) { + return Err(PrefReaderError::new( + "Lone low surrogate".into(), + self.position, + None, + )); + } else if !first && !(0xDC00..=0xDFFF).contains(&value) { + return Err(PrefReaderError::new( + "Invalid low surrogate in surrogate pair".into(), + self.position, + None, + )); + } + Ok(value) + } + + fn get_match(&mut self, target: &str, separators: &str) -> bool { + let initial_pos = self.pos; + let mut matched = true; + for c in target.chars() { + if self.cur == Some(c) { + self.get_char(); + } else { + matched = false; + break; + } + } + + if !matched { + for _ in 0..(self.pos - initial_pos) { + self.unget_char(); + } + } else { + // Check that the next character is whitespace or a separator + if let Some(c) = self.cur { + if !(PrefTokenizer::is_space(c) || separators.contains(c) || c == '/') { + matched = false; + } + self.unget_char(); + } + // Otherwise the token was followed by EOF. That's a valid match, but + // will presumably cause a parse error later. + } + + matched + } + + fn next_token(&mut self) -> Result>, PrefReaderError> { + let mut token_data = TokenData::new(TokenType::None, Position::new(), 0); + + loop { + let mut c = match self.get_char() { + Some(x) => x, + None => return Ok(None), + }; + + self.state = match self.state { + TokenizerState::Junk => { + c = match self.skip_whitespace() { + Some(x) => x, + None => return Ok(None), + }; + match c { + '/' => TokenizerState::CommentStart, + '#' => { + token_data.start(self, TokenType::CommentBashLine); + token_data.start_pos = self.pos + 1; + TokenizerState::CommentLine + } + _ => { + self.unget_char(); + let next = match self.next_state { + Some(x) => x, + None => { + return Err(PrefReaderError::new( + "In Junk state without a next state defined".into(), + self.position, + None, + )) + } + }; + self.next_state = None; + next + } + } + } + TokenizerState::CommentStart => match c { + '*' => { + token_data.start(self, TokenType::CommentBlock); + token_data.start_pos = self.pos + 1; + TokenizerState::CommentBlock + } + '/' => { + token_data.start(self, TokenType::CommentLine); + token_data.start_pos = self.pos + 1; + TokenizerState::CommentLine + } + _ => { + return Err(PrefReaderError::new( + "Invalid character after /".into(), + self.position, + None, + )) + } + }, + TokenizerState::CommentLine => match c { + '\n' => { + token_data.end(self.data, self.pos)?; + TokenizerState::Junk + } + _ => TokenizerState::CommentLine, + }, + TokenizerState::CommentBlock => match c { + '*' => { + if self.get_char() == Some('/') { + token_data.end(self.data, self.pos - 1)?; + TokenizerState::Junk + } else { + TokenizerState::CommentBlock + } + } + _ => TokenizerState::CommentBlock, + }, + TokenizerState::FunctionName => { + let position = self.position; + let start_pos = self.pos; + match c { + 'u' => { + if self.get_match("user_pref", "(") { + token_data.start(self, TokenType::UserPrefFunction); + } + } + 's' => { + if self.get_match("sticky_pref", "(") { + token_data.start(self, TokenType::StickyPrefFunction); + } + } + 'p' => { + if self.get_match("pref", "(") { + token_data.start(self, TokenType::PrefFunction); + } + } + _ => {} + }; + if token_data.token_type == TokenType::None { + // We didn't match anything + return Err(PrefReaderError::new( + "Expected a pref function name".into(), + position, + None, + )); + } else { + token_data.start_pos = start_pos; + token_data.position = position; + token_data.end(self.data, self.pos + 1)?; + self.next_state = Some(TokenizerState::AfterFunctionName); + TokenizerState::Junk + } + } + TokenizerState::AfterFunctionName => match c { + '(' => { + self.next_state = Some(TokenizerState::FunctionArgs); + token_data.start(self, TokenType::Paren); + token_data.end(self.data, self.pos + 1)?; + self.next_state = Some(TokenizerState::FunctionArgs); + TokenizerState::Junk + } + _ => { + return Err(PrefReaderError::new( + "Expected an opening paren".into(), + self.position, + None, + )) + } + }, + TokenizerState::FunctionArgs => match c { + ')' => { + token_data.start(self, TokenType::Paren); + token_data.end(self.data, self.pos + 1)?; + self.next_state = Some(TokenizerState::AfterFunction); + TokenizerState::Junk + } + _ => { + self.unget_char(); + TokenizerState::FunctionArg + } + }, + TokenizerState::FunctionArg => match c { + '"' => { + token_data.start(self, TokenType::String); + token_data.start_pos = self.pos + 1; + TokenizerState::DoubleQuotedString + } + '\'' => { + token_data.start(self, TokenType::String); + token_data.start_pos = self.pos + 1; + TokenizerState::SingleQuotedString + } + 't' | 'f' => { + self.unget_char(); + TokenizerState::Bool + } + '0'..='9' | '-' | '+' => { + token_data.start(self, TokenType::Int); + TokenizerState::Number + } + _ => { + return Err(PrefReaderError::new( + "Invalid character at start of function argument".into(), + self.position, + None, + )) + } + }, + TokenizerState::DoubleQuotedString => match c { + '"' => { + token_data.end(self.data, self.pos)?; + self.next_state = Some(TokenizerState::AfterFunctionArg); + TokenizerState::Junk + } + '\n' => { + return Err(PrefReaderError::new( + "EOL in double quoted string".into(), + self.position, + None, + )) + } + '\\' => { + self.consume_escape(&mut token_data)?; + TokenizerState::DoubleQuotedString + } + _ => TokenizerState::DoubleQuotedString, + }, + TokenizerState::SingleQuotedString => match c { + '\'' => { + token_data.end(self.data, self.pos)?; + self.next_state = Some(TokenizerState::AfterFunctionArg); + TokenizerState::Junk + } + '\n' => { + return Err(PrefReaderError::new( + "EOL in single quoted string".into(), + self.position, + None, + )) + } + '\\' => { + self.consume_escape(&mut token_data)?; + TokenizerState::SingleQuotedString + } + _ => TokenizerState::SingleQuotedString, + }, + TokenizerState::Number => match c { + '0'..='9' => TokenizerState::Number, + ')' | ',' => { + token_data.end(self.data, self.pos)?; + self.unget_char(); + self.next_state = Some(TokenizerState::AfterFunctionArg); + TokenizerState::Junk + } + x if PrefTokenizer::is_space(x) => { + token_data.end(self.data, self.pos)?; + self.next_state = Some(TokenizerState::AfterFunctionArg); + TokenizerState::Junk + } + _ => { + return Err(PrefReaderError::new( + "Invalid character in number literal".into(), + self.position, + None, + )) + } + }, + TokenizerState::Bool => { + let start_pos = self.pos; + let position = self.position; + match c { + 't' => { + if self.get_match("true", ",)") { + token_data.start(self, TokenType::Bool) + } + } + 'f' => { + if self.get_match("false", ",)") { + token_data.start(self, TokenType::Bool) + } + } + _ => {} + }; + if token_data.token_type == TokenType::None { + return Err(PrefReaderError::new( + "Unexpected characters in function argument".into(), + position, + None, + )); + } else { + token_data.start_pos = start_pos; + token_data.position = position; + token_data.end(self.data, self.pos + 1)?; + self.next_state = Some(TokenizerState::AfterFunctionArg); + TokenizerState::Junk + } + } + TokenizerState::AfterFunctionArg => match c { + ',' => { + token_data.start(self, TokenType::Comma); + token_data.end(self.data, self.pos + 1)?; + self.next_state = Some(TokenizerState::FunctionArg); + TokenizerState::Junk + } + ')' => { + token_data.start(self, TokenType::Paren); + token_data.end(self.data, self.pos + 1)?; + self.next_state = Some(TokenizerState::AfterFunction); + TokenizerState::Junk + } + _ => { + return Err(PrefReaderError::new( + "Unexpected character after function argument".into(), + self.position, + None, + )) + } + }, + TokenizerState::AfterFunction => match c { + ';' => { + token_data.start(self, TokenType::Semicolon); + token_data.end(self.data, self.pos)?; + self.next_state = Some(TokenizerState::FunctionName); + TokenizerState::Junk + } + _ => { + return Err(PrefReaderError::new( + "Unexpected character after function".into(), + self.position, + None, + )) + } + }, + TokenizerState::Error => TokenizerState::Error, + }; + if token_data.complete { + return Ok(Some(token_data)); + } + } + } +} + +impl<'a> Iterator for PrefTokenizer<'a> { + type Item = PrefToken<'a>; + + fn next(&mut self) -> Option> { + if let TokenizerState::Error = self.state { + return None; + } + let token_data = match self.next_token() { + Err(e) => { + self.state = TokenizerState::Error; + return Some(PrefToken::Error(e.message.clone(), e.position)); + } + Ok(Some(token_data)) => token_data, + Ok(None) => return None, + }; + let token = self.make_token(token_data); + Some(token) + } +} + +pub fn tokenize(data: &[u8]) -> PrefTokenizer { + PrefTokenizer::new(data) +} + +pub fn serialize_token(token: &PrefToken, output: &mut T) -> Result<(), PrefReaderError> { + let mut data_buf = String::new(); + + let data = match *token { + PrefToken::PrefFunction(_) => "pref", + PrefToken::UserPrefFunction(_) => "user_pref", + PrefToken::StickyPrefFunction(_) => "sticky_pref", + PrefToken::CommentBlock(ref data, _) => { + data_buf.reserve(data.len() + 4); + data_buf.push_str("/*"); + data_buf.push_str(data.borrow()); + data_buf.push('*'); + &*data_buf + } + PrefToken::CommentLine(ref data, _) => { + data_buf.reserve(data.len() + 2); + data_buf.push_str("//"); + data_buf.push_str(data.borrow()); + &*data_buf + } + PrefToken::CommentBashLine(ref data, _) => { + data_buf.reserve(data.len() + 1); + data_buf.push('#'); + data_buf.push_str(data.borrow()); + &*data_buf + } + PrefToken::Paren(data, _) => { + data_buf.push(data); + &*data_buf + } + PrefToken::Comma(_) => ",", + PrefToken::Semicolon(_) => ";\n", + PrefToken::String(ref data, _) => { + data_buf.reserve(data.len() + 2); + data_buf.push('"'); + data_buf.push_str(escape_quote(data.borrow()).borrow()); + data_buf.push('"'); + &*data_buf + } + PrefToken::Int(data, _) => { + data_buf.push_str(&data.to_string()); + &*data_buf + } + PrefToken::Bool(data, _) => { + if data { + "true" + } else { + "false" + } + } + PrefToken::Error(ref data, pos) => { + return Err(PrefReaderError::new(data.clone(), pos, None)) + } + }; + output.write_all(data.as_bytes())?; + Ok(()) +} + +pub fn serialize_tokens<'a, I, W>(tokens: I, output: &mut W) -> Result<(), PrefReaderError> +where + I: Iterator>, + W: Write, +{ + for token in tokens { + serialize_token(token, output)?; + } + Ok(()) +} + +fn escape_quote(data: &str) -> Cow { + // Not very efficient… + if data.contains('"') || data.contains('\\') { + Cow::Owned(data.replace('\\', r"\\").replace('"', r#"\""#)) + } else { + Cow::Borrowed(data) + } +} + +#[derive(Debug, PartialEq)] +enum ParserState { + Function, + Key, + Value, +} + +struct PrefBuilder { + key: Option, + value: Option, + sticky: bool, +} + +impl PrefBuilder { + fn new() -> PrefBuilder { + PrefBuilder { + key: None, + value: None, + sticky: false, + } + } +} + +fn skip_comments<'a>(tokenizer: &mut PrefTokenizer<'a>) -> Option> { + loop { + match tokenizer.next() { + Some(PrefToken::CommentBashLine(_, _)) + | Some(PrefToken::CommentBlock(_, _)) + | Some(PrefToken::CommentLine(_, _)) => {} + Some(x) => return Some(x), + None => return None, + } + } +} + +pub fn parse_tokens(tokenizer: &mut PrefTokenizer<'_>) -> Result { + let mut state = ParserState::Function; + let mut current_pref = PrefBuilder::new(); + let mut rv = Preferences::new(); + + loop { + // Not just using a for loop here seems strange, but this restricts the + // scope of the borrow + let token = { + match tokenizer.next() { + Some(x) => x, + None => break, + } + }; + // First deal with comments and errors + match token { + PrefToken::Error(msg, position) => { + return Err(PrefReaderError::new(msg, position, None)); + } + PrefToken::CommentBashLine(_, _) + | PrefToken::CommentLine(_, _) + | PrefToken::CommentBlock(_, _) => continue, + _ => {} + } + state = match state { + ParserState::Function => { + match token { + PrefToken::PrefFunction(_) => { + current_pref.sticky = false; + } + PrefToken::UserPrefFunction(_) => { + current_pref.sticky = false; + } + PrefToken::StickyPrefFunction(_) => { + current_pref.sticky = true; + } + _ => { + return Err(PrefReaderError::new( + "Expected pref function".into(), + token.position(), + None, + )); + } + } + let next = skip_comments(tokenizer); + match next { + Some(PrefToken::Paren('(', _)) => ParserState::Key, + _ => { + return Err(PrefReaderError::new( + "Expected open paren".into(), + next.map(|x| x.position()).unwrap_or(tokenizer.position), + None, + )) + } + } + } + ParserState::Key => { + match token { + PrefToken::String(data, _) => current_pref.key = Some(data.into_owned()), + _ => { + return Err(PrefReaderError::new( + "Expected string".into(), + token.position(), + None, + )); + } + } + let next = skip_comments(tokenizer); + match next { + Some(PrefToken::Comma(_)) => ParserState::Value, + _ => { + return Err(PrefReaderError::new( + "Expected comma".into(), + next.map(|x| x.position()).unwrap_or(tokenizer.position), + None, + )) + } + } + } + ParserState::Value => { + match token { + PrefToken::String(data, _) => { + current_pref.value = Some(PrefValue::String(data.into_owned())) + } + PrefToken::Int(data, _) => current_pref.value = Some(PrefValue::Int(data)), + PrefToken::Bool(data, _) => current_pref.value = Some(PrefValue::Bool(data)), + _ => { + return Err(PrefReaderError::new( + "Expected value".into(), + token.position(), + None, + )) + } + } + let next = skip_comments(tokenizer); + match next { + Some(PrefToken::Paren(')', _)) => {} + _ => { + return Err(PrefReaderError::new( + "Expected close paren".into(), + next.map(|x| x.position()).unwrap_or(tokenizer.position), + None, + )) + } + } + let next = skip_comments(tokenizer); + match next { + Some(PrefToken::Semicolon(_)) | None => {} + _ => { + return Err(PrefReaderError::new( + "Expected semicolon".into(), + next.map(|x| x.position()).unwrap_or(tokenizer.position), + None, + )) + } + } + let key = current_pref.key.take(); + let value = current_pref.value.take(); + let pref = if current_pref.sticky { + Pref::new_sticky(value.unwrap()) + } else { + Pref::new(value.unwrap()) + }; + rv.insert(key.unwrap(), pref); + current_pref.sticky = false; + ParserState::Function + } + } + } + match state { + ParserState::Key | ParserState::Value => { + return Err(PrefReaderError::new( + "EOF in middle of function".into(), + tokenizer.position, + None, + )); + } + _ => {} + } + Ok(rv) +} + +pub fn serialize(prefs: &Preferences, output: &mut W) -> io::Result<()> { + let mut p: Vec<_> = prefs.iter().collect(); + p.sort_by(|a, b| a.0.cmp(b.0)); + for &(key, pref) in &p { + let func = if pref.sticky { + "sticky_pref(" + } else { + "user_pref(" + } + .as_bytes(); + output.write_all(func)?; + output.write_all(b"\"")?; + output.write_all(escape_quote(key).as_bytes())?; + output.write_all(b"\"")?; + output.write_all(b", ")?; + match pref.value { + PrefValue::Bool(x) => { + output.write_all(if x { b"true" } else { b"false" })?; + } + PrefValue::Int(x) => { + output.write_all(x.to_string().as_bytes())?; + } + PrefValue::String(ref x) => { + output.write_all(b"\"")?; + output.write_all(escape_quote(x).as_bytes())?; + output.write_all(b"\"")?; + } + }; + output.write_all(b");\n")?; + } + Ok(()) +} + +pub fn parse(data: &[u8]) -> Result { + let mut tokenizer = tokenize(data); + parse_tokens(&mut tokenizer) +} diff --git a/testing/mozbase/rust/mozprofile/src/profile.rs b/testing/mozbase/rust/mozprofile/src/profile.rs new file mode 100644 index 0000000000..8da0cdd96a --- /dev/null +++ b/testing/mozbase/rust/mozprofile/src/profile.rs @@ -0,0 +1,135 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +use crate::preferences::{Pref, Preferences}; +use crate::prefreader::{parse, serialize, PrefReaderError}; +use std::collections::btree_map::Iter; +use std::fs::File; +use std::io::prelude::*; +use std::io::Result as IoResult; +use std::path::{Path, PathBuf}; +use tempfile::{Builder, TempDir}; + +#[derive(Debug)] +pub struct Profile { + pub path: PathBuf, + pub temp_dir: Option, + prefs: Option, + user_prefs: Option, +} + +impl PartialEq for Profile { + fn eq(&self, other: &Profile) -> bool { + self.path == other.path + } +} + +impl Profile { + pub fn new(temp_root: Option<&Path>) -> IoResult { + let mut dir_builder = Builder::new(); + dir_builder.prefix("rust_mozprofile"); + let dir = if let Some(temp_root) = temp_root { + dir_builder.tempdir_in(temp_root) + } else { + dir_builder.tempdir() + }?; + let path = dir.path().to_path_buf(); + let temp_dir = Some(dir); + Ok(Profile { + path, + temp_dir, + prefs: None, + user_prefs: None, + }) + } + + pub fn new_from_path(p: &Path) -> IoResult { + let path = p.to_path_buf(); + let temp_dir = None; + Ok(Profile { + path, + temp_dir, + prefs: None, + user_prefs: None, + }) + } + + pub fn prefs(&mut self) -> Result<&mut PrefFile, PrefReaderError> { + if self.prefs.is_none() { + let mut pref_path = PathBuf::from(&self.path); + pref_path.push("prefs.js"); + self.prefs = Some(PrefFile::new(pref_path)?) + }; + // This error handling doesn't make much sense + Ok(self.prefs.as_mut().unwrap()) + } + + pub fn user_prefs(&mut self) -> Result<&mut PrefFile, PrefReaderError> { + if self.user_prefs.is_none() { + let mut pref_path = PathBuf::from(&self.path); + pref_path.push("user.js"); + self.user_prefs = Some(PrefFile::new(pref_path)?) + }; + // This error handling doesn't make much sense + Ok(self.user_prefs.as_mut().unwrap()) + } +} + +#[derive(Debug)] +pub struct PrefFile { + pub path: PathBuf, + pub prefs: Preferences, +} + +impl PrefFile { + pub fn new(path: PathBuf) -> Result { + let prefs = if !path.exists() { + Preferences::new() + } else { + let mut f = File::open(&path)?; + let mut buf = String::with_capacity(4096); + f.read_to_string(&mut buf)?; + parse(buf.as_bytes())? + }; + + Ok(PrefFile { path, prefs }) + } + + pub fn write(&self) -> IoResult<()> { + let mut f = File::create(&self.path)?; + serialize(&self.prefs, &mut f) + } + + pub fn insert_slice(&mut self, preferences: &[(K, Pref)]) + where + K: Into + Clone, + { + for (name, value) in preferences.iter() { + self.insert((*name).clone(), (*value).clone()); + } + } + + pub fn insert(&mut self, key: K, value: Pref) + where + K: Into, + { + self.prefs.insert(key.into(), value); + } + + pub fn remove(&mut self, key: &str) -> Option { + self.prefs.remove(key) + } + + pub fn get(&mut self, key: &str) -> Option<&Pref> { + self.prefs.get(key) + } + + pub fn contains_key(&self, key: &str) -> bool { + self.prefs.contains_key(key) + } + + pub fn iter(&self) -> Iter { + self.prefs.iter() + } +} -- cgit v1.2.3