diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:02:58 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:02:58 +0000 |
commit | 698f8c2f01ea549d77d7dc3338a12e04c11057b9 (patch) | |
tree | 173a775858bd501c378080a10dca74132f05bc50 /vendor/minifier/src | |
parent | Initial commit. (diff) | |
download | rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.tar.xz rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.zip |
Adding upstream version 1.64.0+dfsg1.upstream/1.64.0+dfsg1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/minifier/src')
-rw-r--r-- | vendor/minifier/src/css/mod.rs | 40 | ||||
-rw-r--r-- | vendor/minifier/src/css/tests.rs | 286 | ||||
-rw-r--r-- | vendor/minifier/src/css/token.rs | 875 | ||||
-rw-r--r-- | vendor/minifier/src/html.rs | 233 | ||||
-rw-r--r-- | vendor/minifier/src/js/mod.rs | 17 | ||||
-rw-r--r-- | vendor/minifier/src/js/token.rs | 1431 | ||||
-rw-r--r-- | vendor/minifier/src/js/tools.rs | 1156 | ||||
-rw-r--r-- | vendor/minifier/src/js/utils.rs | 575 | ||||
-rw-r--r-- | vendor/minifier/src/json/json_minifier.rs | 50 | ||||
-rw-r--r-- | vendor/minifier/src/json/mod.rs | 114 | ||||
-rw-r--r-- | vendor/minifier/src/json/read/byte_to_char.rs | 132 | ||||
-rw-r--r-- | vendor/minifier/src/json/read/internal_buffer.rs | 44 | ||||
-rw-r--r-- | vendor/minifier/src/json/read/internal_reader.rs | 63 | ||||
-rw-r--r-- | vendor/minifier/src/json/read/json_read.rs | 106 | ||||
-rw-r--r-- | vendor/minifier/src/json/string.rs | 100 | ||||
-rw-r--r-- | vendor/minifier/src/lib.rs | 10 | ||||
-rw-r--r-- | vendor/minifier/src/main.rs | 94 |
17 files changed, 5326 insertions, 0 deletions
diff --git a/vendor/minifier/src/css/mod.rs b/vendor/minifier/src/css/mod.rs new file mode 100644 index 000000000..224ad8126 --- /dev/null +++ b/vendor/minifier/src/css/mod.rs @@ -0,0 +1,40 @@ +// Take a look at the license at the top of the repository in the LICENSE file. + +use std::{fmt, io}; + +mod token; + +/// Minifies a given CSS source code. +/// +/// # Example +/// +/// ```rust +/// use minifier::css::minify; +/// +/// let css = r#" +/// .foo > p { +/// color: red; +/// }"#.into(); +/// let css_minified = minify(css).expect("minification failed"); +/// assert_eq!(&css_minified.to_string(), ".foo>p{color:red;}"); +/// ``` +pub fn minify<'a>(content: &'a str) -> Result<Minified<'a>, &'static str> { + token::tokenize(content).map(Minified) +} + +pub struct Minified<'a>(token::Tokens<'a>); + +impl<'a> Minified<'a> { + pub fn write<W: io::Write>(self, w: W) -> io::Result<()> { + self.0.write(w) + } +} + +impl<'a> fmt::Display for Minified<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.0.fmt(f) + } +} + +#[cfg(test)] +mod tests; diff --git a/vendor/minifier/src/css/tests.rs b/vendor/minifier/src/css/tests.rs new file mode 100644 index 000000000..dd696afde --- /dev/null +++ b/vendor/minifier/src/css/tests.rs @@ -0,0 +1,286 @@ +// Take a look at the license at the top of the repository in the LICENSE file. + +use crate::css::minify; + +/*enum Element { + /// Rule starting with `@`: + /// + /// * charset + /// * font-face + /// * import + /// * keyframes + /// * media + AtRule(AtRule<'a>), + /// Any "normal" CSS rule block. + /// + /// Contains the selector(s) and its content. + ElementRule(Vec<&'a str>, Vec<Property<'a>>), +} + +fn get_property<'a>(source: &'a str, iterator: &mut Peekable<CharIndices>, + start_pos: &mut usize) -> Option<Property<'a>> { + let mut end_pos = None; + // First we get the property name. + while let Some((pos, c)) = iterator.next() { + if let Ok(c) = ReservedChar::try_from(c) { + if c.is_useless() { + continue + } else if c == ReservedChar::OpenCurlyBrace { + return None + } else if c == ReservedChar::Colon { + end_pos = Some(pos); + break + } else { // Invalid character. + return None; + } + } else if (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '-' { + // everything's fine for now... + } else { + return None; // invalid character + } + } + if end_pos.is_none() || end_pos == Some(*start_pos + 1) { + return None; + } + while let Some((pos, c)) = iterator.next() { + if let Ok(c) = ReservedChar::try_from(c) { + if c == ReservedChar::DoubleQuote || c == ReservedChar::Quote { + get_string(source, iterator, &mut 0, c) + } else if c == ReservedChar::SemiColon { + // we reached the end! + let end_pos = end_pos.unwrap(); + *start_pos = pos; + return Property { + name: &source[start_pos..end_pos], + value: &source[end_pos..pos], + } + } + } + } + None +} + +enum Selector<'a> { + Tag(&'a str), + /// '.' + Class(&'a str), + /// '#' + Id(&'a str), + /// '<', '>', '(', ')', '+', ' ', '[', ']' + Operator(char), +} + +struct ElementRule<'a> { + selectors: Vec<Selector<'a>>, + properties: Vec<Property<'a>>, +} + +fn get_element_rule<'a>(source: &'a str, iterator: &mut Peekable<CharIndices>, + c: char) -> Option<Token<'a>> { + let mut selectors = Vec::with_capacity(2); + + while let Some(s) = get_next_selector(source, iterator, c) { + if !selectors.is_empty() || !s.empty_operator() { + } + selectors.push(s); + } +} + +fn get_media_query<'a>(source: &'a str, iterator: &mut Peekable<CharIndices>, + start_pos: &mut usize) -> Option<Token<'a>> { + while let Some((pos, c)) = iterator.next() { + if c == '{' { + ; + } + } + None // An error occurred, sad life... +} + + +fn get_properties<'a>(source: &'a str, iterator: &mut Peekable<CharIndices>, + start_pos: &mut usize) -> Vec<Property> { + let mut ret = Vec::with_capacity(2); + while let Some(property) = get_property(source, iterator, start_pos) { + ret.push(property); + } + ret +} + +pub struct Property<'a> { + name: &'a str, + value: &'a str, +} + +pub enum AtRule<'a> { + /// Contains the charset. Supposed to be the first rule in the style sheet and be present + /// only once. + Charset(&'a str), + /// font-face rule. + FontFace(Vec<Property<'a>>), + /// Contains the import. + Import(&'a str), + /// Contains the rule and the block. + Keyframes(&'a str, Tokens<'a>), + /// Contains the rules and the block. + Media(Vec<&'a str>, Tokens<'a>), +} + +impl fmt::Display for AtRule { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "@{}", &match *self { + AtRule::Charset(c) => format!("charset {};", c), + AtRule::FontFace(t) => format!("font-face {{{}}};", t), + AtRule::Import(i) => format!("import {};", i), + AtRule::Keyframes(r, t) => format!("keyframes {} {{{}}}", r, t), + AtRule::Media(r, t) => format!("media {} {{{}}}", r.join(" ").collect::<String>(), t), + }) + } +}*/ + +#[test] +fn check_minification() { + let s = r#" +/** Baguette! */ +.b > p + div:hover { + background: #fff; +} + +a[target = "_blank"] { + /* I like weird tests. */ + border: 1px solid yellow ; +} +"#; + let expected = r#"/*! Baguette! */ +.b>p+div:hover{background:#fff;}a[target="_blank"]{border:1px solid yellow;}"#; + assert_eq!(minify(s).expect("minify failed").to_string(), expected); +} + +#[test] +fn check_minification2() { + let s = r#" +h2, h3:not(.impl):not(.method):not(.type) { + background-color: #0a042f !important; +} + +:target { background: #494a3d; } + +.table-display tr td:first-child { + float: right; +} + +/* just some + * long + * + * very + * long + * comment :) + */ +@media (max-width: 700px) { + .theme-picker { + left: 10px; + top: 54px; + z-index: 1; + background-color: rgba(0, 0 , 0 , 0); + font: 15px "SFMono-Regular", Consolas, "Liberation Mono", Menlo, Courier, monospace; + } +}"#; + let expected = "h2,h3:not(.impl):not(.method):not(.type){background-color:#0a042f !important;}\ + :target{background:#494a3d;}.table-display tr td:first-child{float:right;}\ + @media (max-width:700px){.theme-picker{left:10px;top:54px;z-index:1;\ + background-color:rgba(0,0,0,0);font:15px \"SFMono-Regular\",Consolas,\ + \"Liberation Mono\",Menlo,Courier,monospace;}}"; + assert_eq!(minify(s).expect("minify failed").to_string(), expected); +} + +#[test] +fn check_calc() { + let s = ".foo { width: calc(100% - 34px); }"; + let expected = ".foo{width:calc(100% - 34px);}"; + assert_eq!(minify(s).expect("minify failed").to_string(), expected); +} + +#[test] +fn check_spaces() { + let s = ".line-numbers .line-highlighted { color: #0a042f !important; }"; + let expected = ".line-numbers .line-highlighted{color:#0a042f !important;}"; + assert_eq!(minify(s).expect("minify failed").to_string(), expected); +} + +#[test] +fn check_space_after_paren() { + let s = ".docblock:not(.type-decl) a:not(.srclink) {}"; + let expected = ".docblock:not(.type-decl) a:not(.srclink){}"; + assert_eq!(minify(s).expect("minify failed").to_string(), expected); +} + +#[test] +fn check_space_after_and() { + let s = "@media only screen and (max-width : 600px) {}"; + let expected = "@media only screen and (max-width:600px){}"; + assert_eq!(minify(s).expect("minify failed").to_string(), expected); +} + +#[test] +fn check_space_after_or_not() { + let s = "@supports not ((text-align-last: justify) or (-moz-text-align-last: justify)) {}"; + let expected = "@supports not ((text-align-last:justify) or (-moz-text-align-last:justify)){}"; + assert_eq!(minify(s).expect("minify failed").to_string(), expected); +} + +#[test] +fn check_space_after_brackets() { + let s = "#main[data-behavior = \"1\"] {}"; + let expected = "#main[data-behavior=\"1\"]{}"; + assert_eq!(minify(s).expect("minify failed").to_string(), expected); + + let s = "#main[data-behavior = \"1\"] .aclass"; + let expected = "#main[data-behavior=\"1\"] .aclass"; + assert_eq!(minify(s).expect("minify failed").to_string(), expected); + + let s = "#main[data-behavior = \"1\"] ul.aclass"; + let expected = "#main[data-behavior=\"1\"] ul.aclass"; + assert_eq!(minify(s).expect("minify failed").to_string(), expected); +} + +#[test] +fn check_whitespaces_in_calc() { + let s = ".foo { width: calc(130px + 10%); }"; + let expected = ".foo{width:calc(130px + 10%);}"; + assert_eq!(minify(s).expect("minify failed").to_string(), expected); + + let s = ".foo { width: calc(130px + (45% - 10% + (12 * 2px))); }"; + let expected = ".foo{width:calc(130px + (45% - 10% + (12 * 2px)));}"; + assert_eq!(minify(s).expect("minify failed").to_string(), expected); +} + +#[test] +fn check_weird_comments() { + let s = ".test1 { + font-weight: 30em; +}/**/ +.test2 { + font-weight: 30em; +}/**/ +.test3 { + font-weight: 30em; +}/**/"; + let expected = ".test1{font-weight:30em;}.test2{font-weight:30em;}.test3{font-weight:30em;}"; + assert_eq!(minify(s).expect("minify failed").to_string(), expected); +} + +#[test] +fn check_slash_slash() { + let s = "body { + background-image: url(data:image/webp;base64,c//S4KP//ZZ/19Uj/UA==); +}"; + let expected = "body{background-image:url(data:image/webp;base64,c//S4KP//ZZ/19Uj/UA==);}"; + assert_eq!(minify(s).expect("minify failed").to_string(), expected); +} + +#[test] +fn issue_80() { + assert_eq!( + minify("@import 'i';t{x: #fff;}").unwrap().to_string(), + "@import 'i';t{x:#fff;}", + ); +} diff --git a/vendor/minifier/src/css/token.rs b/vendor/minifier/src/css/token.rs new file mode 100644 index 000000000..d2d738840 --- /dev/null +++ b/vendor/minifier/src/css/token.rs @@ -0,0 +1,875 @@ +// Take a look at the license at the top of the repository in the LICENSE file. + +use std::convert::TryFrom; +use std::fmt; +use std::iter::Peekable; +use std::str::CharIndices; + +#[derive(Debug, PartialEq, Eq, Clone, Copy)] +pub enum ReservedChar { + Comma, + SuperiorThan, + OpenParenthese, + CloseParenthese, + OpenCurlyBrace, + CloseCurlyBrace, + OpenBracket, + CloseBracket, + Colon, + SemiColon, + Slash, + Plus, + EqualSign, + Space, + Tab, + Backline, + Star, + Quote, + DoubleQuote, + Pipe, + Tilde, + Dollar, + Circumflex, +} + +impl fmt::Display for ReservedChar { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "{}", + match *self { + ReservedChar::Comma => ',', + ReservedChar::OpenParenthese => '(', + ReservedChar::CloseParenthese => ')', + ReservedChar::OpenCurlyBrace => '{', + ReservedChar::CloseCurlyBrace => '}', + ReservedChar::OpenBracket => '[', + ReservedChar::CloseBracket => ']', + ReservedChar::Colon => ':', + ReservedChar::SemiColon => ';', + ReservedChar::Slash => '/', + ReservedChar::Star => '*', + ReservedChar::Plus => '+', + ReservedChar::EqualSign => '=', + ReservedChar::Space => ' ', + ReservedChar::Tab => '\t', + ReservedChar::Backline => '\n', + ReservedChar::SuperiorThan => '>', + ReservedChar::Quote => '\'', + ReservedChar::DoubleQuote => '"', + ReservedChar::Pipe => '|', + ReservedChar::Tilde => '~', + ReservedChar::Dollar => '$', + ReservedChar::Circumflex => '^', + } + ) + } +} + +impl TryFrom<char> for ReservedChar { + type Error = &'static str; + + fn try_from(value: char) -> Result<ReservedChar, Self::Error> { + match value { + '\'' => Ok(ReservedChar::Quote), + '"' => Ok(ReservedChar::DoubleQuote), + ',' => Ok(ReservedChar::Comma), + '(' => Ok(ReservedChar::OpenParenthese), + ')' => Ok(ReservedChar::CloseParenthese), + '{' => Ok(ReservedChar::OpenCurlyBrace), + '}' => Ok(ReservedChar::CloseCurlyBrace), + '[' => Ok(ReservedChar::OpenBracket), + ']' => Ok(ReservedChar::CloseBracket), + ':' => Ok(ReservedChar::Colon), + ';' => Ok(ReservedChar::SemiColon), + '/' => Ok(ReservedChar::Slash), + '*' => Ok(ReservedChar::Star), + '+' => Ok(ReservedChar::Plus), + '=' => Ok(ReservedChar::EqualSign), + ' ' => Ok(ReservedChar::Space), + '\t' => Ok(ReservedChar::Tab), + '\n' | '\r' => Ok(ReservedChar::Backline), + '>' => Ok(ReservedChar::SuperiorThan), + '|' => Ok(ReservedChar::Pipe), + '~' => Ok(ReservedChar::Tilde), + '$' => Ok(ReservedChar::Dollar), + '^' => Ok(ReservedChar::Circumflex), + _ => Err("Unknown reserved char"), + } + } +} + +impl ReservedChar { + fn is_useless(&self) -> bool { + *self == ReservedChar::Space + || *self == ReservedChar::Tab + || *self == ReservedChar::Backline + } + + fn is_operator(&self) -> bool { + Operator::try_from(*self).is_ok() + } +} + +#[derive(Debug, PartialEq, Eq, Clone, Copy)] +pub enum Operator { + Plus, + Multiply, + Minus, + Modulo, + Divide, +} + +impl fmt::Display for Operator { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "{}", + match *self { + Operator::Plus => '+', + Operator::Multiply => '*', + Operator::Minus => '-', + Operator::Modulo => '%', + Operator::Divide => '/', + } + ) + } +} + +impl TryFrom<char> for Operator { + type Error = &'static str; + + fn try_from(value: char) -> Result<Operator, Self::Error> { + match value { + '+' => Ok(Operator::Plus), + '*' => Ok(Operator::Multiply), + '-' => Ok(Operator::Minus), + '%' => Ok(Operator::Modulo), + '/' => Ok(Operator::Divide), + _ => Err("Unknown operator"), + } + } +} + +impl TryFrom<ReservedChar> for Operator { + type Error = &'static str; + + fn try_from(value: ReservedChar) -> Result<Operator, Self::Error> { + match value { + ReservedChar::Slash => Ok(Operator::Divide), + ReservedChar::Star => Ok(Operator::Multiply), + ReservedChar::Plus => Ok(Operator::Plus), + _ => Err("Unknown operator"), + } + } +} + +#[derive(Eq, PartialEq, Clone, Debug)] +pub enum SelectorElement<'a> { + PseudoClass(&'a str), + Class(&'a str), + Id(&'a str), + Tag(&'a str), + Media(&'a str), +} + +impl<'a> TryFrom<&'a str> for SelectorElement<'a> { + type Error = &'static str; + + fn try_from(value: &'a str) -> Result<SelectorElement<'_>, Self::Error> { + if let Some(value) = value.strip_prefix('.') { + if value.is_empty() { + Err("cannot determine selector") + } else { + Ok(SelectorElement::Class(value)) + } + } else if let Some(value) = value.strip_prefix('#') { + if value.is_empty() { + Err("cannot determine selector") + } else { + Ok(SelectorElement::Id(value)) + } + } else if let Some(value) = value.strip_prefix('@') { + if value.is_empty() { + Err("cannot determine selector") + } else { + Ok(SelectorElement::Media(value)) + } + } else if let Some(value) = value.strip_prefix(':') { + if value.is_empty() { + Err("cannot determine selector") + } else { + Ok(SelectorElement::PseudoClass(value)) + } + } else if value.chars().next().unwrap_or(' ').is_alphabetic() { + Ok(SelectorElement::Tag(value)) + } else { + Err("unknown selector") + } + } +} + +impl<'a> fmt::Display for SelectorElement<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match *self { + SelectorElement::Class(c) => write!(f, ".{}", c), + SelectorElement::Id(i) => write!(f, "#{}", i), + SelectorElement::Tag(t) => write!(f, "{}", t), + SelectorElement::Media(m) => write!(f, "@{} ", m), + SelectorElement::PseudoClass(pc) => write!(f, ":{}", pc), + } + } +} + +#[derive(Eq, PartialEq, Clone, Debug, Copy)] +pub enum SelectorOperator { + /// `~=` + OneAttributeEquals, + /// `|=` + EqualsOrStartsWithFollowedByDash, + /// `$=` + EndsWith, + /// `^=` + FirstStartsWith, + /// `*=` + Contains, +} + +impl fmt::Display for SelectorOperator { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match *self { + SelectorOperator::OneAttributeEquals => write!(f, "~="), + SelectorOperator::EqualsOrStartsWithFollowedByDash => write!(f, "|="), + SelectorOperator::EndsWith => write!(f, "$="), + SelectorOperator::FirstStartsWith => write!(f, "^="), + SelectorOperator::Contains => write!(f, "*="), + } + } +} + +#[derive(Eq, PartialEq, Clone, Debug)] +pub enum Token<'a> { + /// Comment. + Comment(&'a str), + /// Comment starting with `/**`. + License(&'a str), + Char(ReservedChar), + Other(&'a str), + SelectorElement(SelectorElement<'a>), + String(&'a str), + SelectorOperator(SelectorOperator), + Operator(Operator), +} + +impl<'a> fmt::Display for Token<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match *self { + // Token::AtRule(at_rule) => write!(f, "{}", at_rule, content), + // Token::ElementRule(selectors) => write!(f, "{}", x), + Token::Comment(c) => write!(f, "{}", c), + Token::License(l) => writeln!(f, "/*!{}*/", l), + Token::Char(c) => write!(f, "{}", c), + Token::Other(s) => write!(f, "{}", s), + Token::SelectorElement(ref se) => write!(f, "{}", se), + Token::String(s) => write!(f, "{}", s), + Token::SelectorOperator(so) => write!(f, "{}", so), + Token::Operator(op) => write!(f, "{}", op), + } + } +} + +impl<'a> Token<'a> { + fn is_comment(&self) -> bool { + matches!(*self, Token::Comment(_)) + } + + fn is_char(&self) -> bool { + matches!(*self, Token::Char(_)) + } + + fn get_char(&self) -> Option<ReservedChar> { + match *self { + Token::Char(c) => Some(c), + _ => None, + } + } + + fn is_useless(&self) -> bool { + match *self { + Token::Char(c) => c.is_useless(), + _ => false, + } + } + + fn is_a_media(&self) -> bool { + matches!(*self, Token::SelectorElement(SelectorElement::Media(_))) + } + + fn is_a_license(&self) -> bool { + matches!(*self, Token::License(_)) + } + + fn is_operator(&self) -> bool { + match *self { + Token::Operator(_) => true, + Token::Char(c) => c.is_operator(), + _ => false, + } + } +} + +impl<'a> PartialEq<ReservedChar> for Token<'a> { + fn eq(&self, other: &ReservedChar) -> bool { + match *self { + Token::Char(c) => c == *other, + _ => false, + } + } +} + +fn get_comment<'a>( + source: &'a str, + iterator: &mut Peekable<CharIndices<'_>>, + start_pos: &mut usize, +) -> Option<Token<'a>> { + let mut prev = ReservedChar::Quote; + *start_pos += 1; + let builder = if let Some((_, c)) = iterator.next() { + if c == '!' || (c == '*' && iterator.peek().map(|(_, c)| c) != Some(&'/')) { + *start_pos += 1; + Token::License + } else { + if let Ok(c) = ReservedChar::try_from(c) { + prev = c; + } + Token::Comment + } + } else { + Token::Comment + }; + + for (pos, c) in iterator { + if let Ok(c) = ReservedChar::try_from(c) { + if c == ReservedChar::Slash && prev == ReservedChar::Star { + let ret = Some(builder(&source[*start_pos..pos - 1])); + *start_pos = pos; + return ret; + } + prev = c; + } else { + prev = ReservedChar::Space; + } + } + None +} + +fn get_string<'a>( + source: &'a str, + iterator: &mut Peekable<CharIndices<'_>>, + start_pos: &mut usize, + start: ReservedChar, +) -> Option<Token<'a>> { + while let Some((pos, c)) = iterator.next() { + if c == '\\' { + // we skip next character + iterator.next(); + continue; + } + if let Ok(c) = ReservedChar::try_from(c) { + if c == start { + let ret = Some(Token::String(&source[*start_pos..pos + 1])); + *start_pos = pos; + return ret; + } + } + } + None +} + +fn fill_other<'a>( + source: &'a str, + v: &mut Vec<Token<'a>>, + start: usize, + pos: usize, + is_in_block: isize, + is_in_media: bool, + is_in_attribute_selector: bool, +) { + if start < pos { + if !is_in_attribute_selector + && ((is_in_block == 0 && !is_in_media) || (is_in_media && is_in_block == 1)) + { + let mut is_pseudo_class = false; + let mut add = 0; + if let Some(&Token::Char(ReservedChar::Colon)) = v.last() { + is_pseudo_class = true; + add = 1; + } + if let Ok(s) = SelectorElement::try_from(&source[start - add..pos]) { + if is_pseudo_class { + v.pop(); + } + v.push(Token::SelectorElement(s)); + } else { + let s = &source[start..pos]; + if !s.starts_with(':') + && !s.starts_with('.') + && !s.starts_with('#') + && !s.starts_with('@') + { + v.push(Token::Other(s)); + } + } + } else { + v.push(Token::Other(&source[start..pos])); + } + } +} + +#[allow(clippy::comparison_chain)] +pub(super) fn tokenize<'a>(source: &'a str) -> Result<Tokens<'a>, &'static str> { + let mut v = Vec::with_capacity(1000); + let mut iterator = source.char_indices().peekable(); + let mut start = 0; + let mut is_in_block: isize = 0; + let mut is_in_media = false; + let mut is_in_attribute_selector = false; + + loop { + let (mut pos, c) = match iterator.next() { + Some(x) => x, + None => { + fill_other( + source, + &mut v, + start, + source.len(), + is_in_block, + is_in_media, + is_in_attribute_selector, + ); + break; + } + }; + if let Ok(c) = ReservedChar::try_from(c) { + fill_other( + source, + &mut v, + start, + pos, + is_in_block, + is_in_media, + is_in_attribute_selector, + ); + is_in_media = is_in_media + || v.last() + .unwrap_or(&Token::Char(ReservedChar::Space)) + .is_a_media(); + match c { + ReservedChar::Quote | ReservedChar::DoubleQuote => { + if let Some(s) = get_string(source, &mut iterator, &mut pos, c) { + v.push(s); + } + } + ReservedChar::Star + if *v.last().unwrap_or(&Token::Char(ReservedChar::Space)) + == ReservedChar::Slash => + { + v.pop(); + if let Some(s) = get_comment(source, &mut iterator, &mut pos) { + v.push(s); + } + } + ReservedChar::OpenBracket => { + if is_in_attribute_selector { + return Err("Already in attribute selector"); + } + is_in_attribute_selector = true; + v.push(Token::Char(c)); + } + ReservedChar::CloseBracket => { + if !is_in_attribute_selector { + return Err("Unexpected ']'"); + } + is_in_attribute_selector = false; + v.push(Token::Char(c)); + } + ReservedChar::OpenCurlyBrace => { + is_in_block += 1; + v.push(Token::Char(c)); + } + ReservedChar::CloseCurlyBrace => { + is_in_block -= 1; + if is_in_block < 0 { + return Err("Too much '}'"); + } else if is_in_block == 0 { + is_in_media = false; + } + v.push(Token::Char(c)); + } + ReservedChar::SemiColon if is_in_block == 0 => { + is_in_media = false; + v.push(Token::Char(c)); + } + ReservedChar::EqualSign => { + match match v + .last() + .unwrap_or(&Token::Char(ReservedChar::Space)) + .get_char() + .unwrap_or(ReservedChar::Space) + { + ReservedChar::Tilde => Some(SelectorOperator::OneAttributeEquals), + ReservedChar::Pipe => { + Some(SelectorOperator::EqualsOrStartsWithFollowedByDash) + } + ReservedChar::Dollar => Some(SelectorOperator::EndsWith), + ReservedChar::Circumflex => Some(SelectorOperator::FirstStartsWith), + ReservedChar::Star => Some(SelectorOperator::Contains), + _ => None, + } { + Some(r) => { + v.pop(); + v.push(Token::SelectorOperator(r)); + } + None => v.push(Token::Char(c)), + } + } + c if !c.is_useless() => { + v.push(Token::Char(c)); + } + c => { + if !v + .last() + .unwrap_or(&Token::Char(ReservedChar::Space)) + .is_useless() + && (!v + .last() + .unwrap_or(&Token::Char(ReservedChar::OpenCurlyBrace)) + .is_char() + || v.last() + .unwrap_or(&Token::Char(ReservedChar::OpenCurlyBrace)) + .is_operator() + || v.last() + .unwrap_or(&Token::Char(ReservedChar::OpenCurlyBrace)) + .get_char() + == Some(ReservedChar::CloseParenthese) + || v.last() + .unwrap_or(&Token::Char(ReservedChar::OpenCurlyBrace)) + .get_char() + == Some(ReservedChar::CloseBracket)) + { + v.push(Token::Char(ReservedChar::Space)); + } else if let Ok(op) = Operator::try_from(c) { + v.push(Token::Operator(op)); + } + } + } + start = pos + 1; + } + } + Ok(Tokens(clean_tokens(v))) +} + +fn clean_tokens(mut v: Vec<Token<'_>>) -> Vec<Token<'_>> { + let mut i = 0; + let mut is_in_calc = false; + let mut paren = 0; + + while i < v.len() { + if v[i] == Token::Other("calc") { + is_in_calc = true; + } else if is_in_calc { + if v[i] == Token::Char(ReservedChar::CloseParenthese) { + paren -= 1; + is_in_calc = paren != 0; + } else if v[i] == Token::Char(ReservedChar::OpenParenthese) { + paren += 1; + } + } + + if v[i].is_useless() { + if i > 0 && v[i - 1] == Token::Char(ReservedChar::CloseBracket) { + if i + 1 < v.len() + && (v[i + 1].is_useless() + || v[i + 1] == Token::Char(ReservedChar::OpenCurlyBrace)) + { + v.remove(i); + continue; + } + } else if i > 0 + && (v[i - 1] == Token::Other("and") + || v[i - 1] == Token::Other("or") + || v[i - 1] == Token::Other("not")) + { + // retain the space after "and", "or" or "not" + } else if (is_in_calc && v[i - 1].is_useless()) + || !is_in_calc + && ((i > 0 + && ((v[i - 1].is_char() + && v[i - 1] != Token::Char(ReservedChar::CloseParenthese)) + || v[i - 1].is_a_media() + || v[i - 1].is_a_license())) + || (i < v.len() - 1 && v[i + 1].is_char())) + { + v.remove(i); + continue; + } + } else if v[i].is_comment() { + v.remove(i); + continue; + } + i += 1; + } + v +} + +#[derive(Debug, PartialEq, Eq, Clone)] +pub(super) struct Tokens<'a>(Vec<Token<'a>>); + +impl<'a> Tokens<'a> { + pub(super) fn write<W: std::io::Write>(self, mut w: W) -> std::io::Result<()> { + for token in self.0.iter() { + write!(w, "{}", token)?; + } + Ok(()) + } +} + +impl<'a> fmt::Display for Tokens<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + for token in self.0.iter() { + write!(f, "{}", token)?; + } + Ok(()) + } +} + +#[test] +fn css_basic() { + let s = r#" +/*! just some license */ +.foo > #bar p:hover { + color: blue; + background: "blue"; +} + +/* a comment! */ +@media screen and (max-width: 640px) { + .block:hover { + display: block; + } +}"#; + let expected = vec![ + Token::License(" just some license "), + Token::SelectorElement(SelectorElement::Class("foo")), + Token::Char(ReservedChar::SuperiorThan), + Token::SelectorElement(SelectorElement::Id("bar")), + Token::Char(ReservedChar::Space), + Token::SelectorElement(SelectorElement::Tag("p")), + Token::SelectorElement(SelectorElement::PseudoClass("hover")), + Token::Char(ReservedChar::OpenCurlyBrace), + Token::Other("color"), + Token::Char(ReservedChar::Colon), + Token::Other("blue"), + Token::Char(ReservedChar::SemiColon), + Token::Other("background"), + Token::Char(ReservedChar::Colon), + Token::String("\"blue\""), + Token::Char(ReservedChar::SemiColon), + Token::Char(ReservedChar::CloseCurlyBrace), + Token::SelectorElement(SelectorElement::Media("media")), + Token::Other("screen"), + Token::Char(ReservedChar::Space), + Token::Other("and"), + Token::Char(ReservedChar::Space), + Token::Char(ReservedChar::OpenParenthese), + Token::Other("max-width"), + Token::Char(ReservedChar::Colon), + Token::Other("640px"), + Token::Char(ReservedChar::CloseParenthese), + Token::Char(ReservedChar::OpenCurlyBrace), + Token::SelectorElement(SelectorElement::Class("block")), + Token::SelectorElement(SelectorElement::PseudoClass("hover")), + Token::Char(ReservedChar::OpenCurlyBrace), + Token::Other("display"), + Token::Char(ReservedChar::Colon), + Token::Other("block"), + Token::Char(ReservedChar::SemiColon), + Token::Char(ReservedChar::CloseCurlyBrace), + Token::Char(ReservedChar::CloseCurlyBrace), + ]; + assert_eq!(tokenize(s), Ok(Tokens(expected))); +} + +#[test] +fn elem_selector() { + let s = r#" +/** just some license */ +a[href*="example"] { + background: yellow; +} +a[href$=".org"] { + font-style: italic; +} +span[lang|="zh"] { + color: red; +} +a[href^="/"] { + background-color: gold; +} +div[value~="test"] { + border-width: 1px; +} +span[lang="pt"] { + font-size: 12em; /* I love big fonts */ +} +"#; + let expected = vec![ + Token::License(" just some license "), + Token::SelectorElement(SelectorElement::Tag("a")), + Token::Char(ReservedChar::OpenBracket), + Token::Other("href"), + Token::SelectorOperator(SelectorOperator::Contains), + Token::String("\"example\""), + Token::Char(ReservedChar::CloseBracket), + Token::Char(ReservedChar::OpenCurlyBrace), + Token::Other("background"), + Token::Char(ReservedChar::Colon), + Token::Other("yellow"), + Token::Char(ReservedChar::SemiColon), + Token::Char(ReservedChar::CloseCurlyBrace), + Token::SelectorElement(SelectorElement::Tag("a")), + Token::Char(ReservedChar::OpenBracket), + Token::Other("href"), + Token::SelectorOperator(SelectorOperator::EndsWith), + Token::String("\".org\""), + Token::Char(ReservedChar::CloseBracket), + Token::Char(ReservedChar::OpenCurlyBrace), + Token::Other("font-style"), + Token::Char(ReservedChar::Colon), + Token::Other("italic"), + Token::Char(ReservedChar::SemiColon), + Token::Char(ReservedChar::CloseCurlyBrace), + Token::SelectorElement(SelectorElement::Tag("span")), + Token::Char(ReservedChar::OpenBracket), + Token::Other("lang"), + Token::SelectorOperator(SelectorOperator::EqualsOrStartsWithFollowedByDash), + Token::String("\"zh\""), + Token::Char(ReservedChar::CloseBracket), + Token::Char(ReservedChar::OpenCurlyBrace), + Token::Other("color"), + Token::Char(ReservedChar::Colon), + Token::Other("red"), + Token::Char(ReservedChar::SemiColon), + Token::Char(ReservedChar::CloseCurlyBrace), + Token::SelectorElement(SelectorElement::Tag("a")), + Token::Char(ReservedChar::OpenBracket), + Token::Other("href"), + Token::SelectorOperator(SelectorOperator::FirstStartsWith), + Token::String("\"/\""), + Token::Char(ReservedChar::CloseBracket), + Token::Char(ReservedChar::OpenCurlyBrace), + Token::Other("background-color"), + Token::Char(ReservedChar::Colon), + Token::Other("gold"), + Token::Char(ReservedChar::SemiColon), + Token::Char(ReservedChar::CloseCurlyBrace), + Token::SelectorElement(SelectorElement::Tag("div")), + Token::Char(ReservedChar::OpenBracket), + Token::Other("value"), + Token::SelectorOperator(SelectorOperator::OneAttributeEquals), + Token::String("\"test\""), + Token::Char(ReservedChar::CloseBracket), + Token::Char(ReservedChar::OpenCurlyBrace), + Token::Other("border-width"), + Token::Char(ReservedChar::Colon), + Token::Other("1px"), + Token::Char(ReservedChar::SemiColon), + Token::Char(ReservedChar::CloseCurlyBrace), + Token::SelectorElement(SelectorElement::Tag("span")), + Token::Char(ReservedChar::OpenBracket), + Token::Other("lang"), + Token::Char(ReservedChar::EqualSign), + Token::String("\"pt\""), + Token::Char(ReservedChar::CloseBracket), + Token::Char(ReservedChar::OpenCurlyBrace), + Token::Other("font-size"), + Token::Char(ReservedChar::Colon), + Token::Other("12em"), + Token::Char(ReservedChar::SemiColon), + Token::Char(ReservedChar::CloseCurlyBrace), + ]; + assert_eq!(tokenize(s), Ok(Tokens(expected))); +} + +#[test] +fn check_media() { + let s = "@media (max-width: 700px) { color: red; }"; + + let expected = vec![ + Token::SelectorElement(SelectorElement::Media("media")), + Token::Char(ReservedChar::OpenParenthese), + Token::Other("max-width"), + Token::Char(ReservedChar::Colon), + Token::Other("700px"), + Token::Char(ReservedChar::CloseParenthese), + Token::Char(ReservedChar::OpenCurlyBrace), + Token::SelectorElement(SelectorElement::Tag("color")), + Token::Char(ReservedChar::Colon), + Token::Other("red"), + Token::Char(ReservedChar::SemiColon), + Token::Char(ReservedChar::CloseCurlyBrace), + ]; + + assert_eq!(tokenize(s), Ok(Tokens(expected))); +} + +#[test] +fn check_supports() { + let s = "@supports not (display: grid) { div { float: right; } }"; + + let expected = vec![ + Token::SelectorElement(SelectorElement::Media("supports")), + Token::Other("not"), + Token::Char(ReservedChar::Space), + Token::Char(ReservedChar::OpenParenthese), + Token::Other("display"), + Token::Char(ReservedChar::Colon), + Token::Other("grid"), + Token::Char(ReservedChar::CloseParenthese), + Token::Char(ReservedChar::OpenCurlyBrace), + Token::SelectorElement(SelectorElement::Tag("div")), + Token::Char(ReservedChar::OpenCurlyBrace), + Token::Other("float"), + Token::Char(ReservedChar::Colon), + Token::Other("right"), + Token::Char(ReservedChar::SemiColon), + Token::Char(ReservedChar::CloseCurlyBrace), + Token::Char(ReservedChar::CloseCurlyBrace), + ]; + + assert_eq!(tokenize(s), Ok(Tokens(expected))); +} + +#[test] +fn check_calc() { + let s = ".foo { width: calc(100% - 34px); }"; + + let expected = vec![ + Token::SelectorElement(SelectorElement::Class("foo")), + Token::Char(ReservedChar::OpenCurlyBrace), + Token::Other("width"), + Token::Char(ReservedChar::Colon), + Token::Other("calc"), + Token::Char(ReservedChar::OpenParenthese), + Token::Other("100%"), + Token::Char(ReservedChar::Space), + Token::Other("-"), + Token::Char(ReservedChar::Space), + Token::Other("34px"), + Token::Char(ReservedChar::CloseParenthese), + Token::Char(ReservedChar::SemiColon), + Token::Char(ReservedChar::CloseCurlyBrace), + ]; + assert_eq!(tokenize(s), Ok(Tokens(expected))); +} diff --git a/vendor/minifier/src/html.rs b/vendor/minifier/src/html.rs new file mode 100644 index 000000000..b6fa99362 --- /dev/null +++ b/vendor/minifier/src/html.rs @@ -0,0 +1,233 @@ +// Take a look at the license at the top of the repository in the LICENSE file. + +use regex::{Captures, Regex}; + +fn condense_whitespace(source: &str) -> String { + let lower_source = source.to_lowercase(); + if lower_source.find("<textarea").is_none() && lower_source.find("<pre").is_none() { + // maybe should be better not to recreate Regex every time? + let re = Regex::new(r">\s+<").unwrap(); + let source = re.replace_all(source, "> <").into_owned(); + let re = Regex::new(r"\s{2,}|[\r\n]").unwrap(); + re.replace_all(&source, " ").into_owned() + } else { + source.trim().to_owned() + } +} + +fn condense(source: &str) -> String { + let re = Regex::new(r"<(style|script)[\w|\s].*?>").unwrap(); + let type_reg = Regex::new(r#"\s*?type="[\w|\s].*?""#).unwrap(); + re.replace_all(source, |caps: &Captures| { + type_reg.replace_all(&caps[0], "").into_owned() + }) + .into_owned() +} + +fn clean_unneeded_tags(source: &str) -> String { + let useless_tags = [ + "</area>", + "</base>", + "<body>", + "</body>", + "</br>", + "</col>", + "</colgroup>", + "</dd>", + "</dt>", + "<head>", + "</head>", + "</hr>", + "<html>", + "</html>", + "</img>", + "</input>", + "</li>", + "</link>", + "</meta>", + "</option>", + "</param>", + "<tbody>", + "</tbody>", + "</td>", + "</tfoot>", + "</th>", + "</thead>", + "</tr>", + "</basefont>", + "</isindex>", + "</param>", + ]; + let mut res = source.to_owned(); + for useless_tag in &useless_tags { + res = res.replace(useless_tag, ""); + } + res +} + +fn remove_comments(source: &str) -> String { + // "build" and "endbuild" should be matched case insensitively. + let re = Regex::new("<!--(.|\n)*?-->").unwrap(); + re.replace_all(source, |caps: &Captures| { + if caps[0].replace("<!--", " ").trim().starts_with("[") { + caps[0].to_owned() + } else { + " ".to_owned() + } + }) + .into_owned() +} + +fn unquote_attributes(source: &str) -> String { + // Some attributes like width, height, etc... don't need quotes. + let any_tag = Regex::new(r"<\w.*?>").unwrap(); + let extra_spaces = Regex::new(r" \s+|\s +").unwrap(); + let between_words = Regex::new(r"\w\s+\w").unwrap(); + let spaces_before_close = Regex::new(r##""\s+>"##).unwrap(); + let spaces_before_close2 = Regex::new(r"'\s+>").unwrap(); + let extra_spaces2 = Regex::new(r##""\s\s+\w+="|'\s\s+\w+='|"\s\s+\w+=|'\s\s+\w+="##).unwrap(); + let extra_spaces3 = Regex::new(r"\d\s+>").unwrap(); + let quotes_in_tag = Regex::new(r##"([a-zA-Z]+)="([a-zA-Z0-9-_\.]+)""##).unwrap(); + + any_tag + .replace_all(source, |caps: &Captures| { + let cap = format!("{}", &caps[0]); + if cap.starts_with("<!") || cap.find("</").is_some() { + cap + } else { + let tag = spaces_before_close.replace_all(&cap, "\">").into_owned(); + let mut tag = spaces_before_close2.replace_all(&tag, "'>").into_owned(); + let tag_c = tag.clone(); + + let space1_matches: Vec<_> = between_words.find_iter(&tag_c).collect(); + let space6_matches: Vec<_> = extra_spaces3.find_iter(&tag_c).collect(); + let mut pos = 0; + loop { + let replacement = match (space1_matches.get(pos), space6_matches.get(pos)) { + (Some(a), Some(b)) => format!("{}{}", a.as_str(), b.as_str()), + (None, Some(b)) => format!("{}", b.as_str()), + (Some(a), None) => format!("{}", a.as_str()), + _ => break, + }; + pos += 1; + tag = tag.replace( + &replacement, + &extra_spaces.replace_all(&replacement, " ").into_owned(), + ); + } + let mut output = tag.clone(); + for caps in extra_spaces2.find_iter(&tag) { + let c = caps.as_str().chars().next().unwrap_or('\0'); + output = output.replace( + caps.as_str(), + &format!( + "{} {}", + if c == '\0' { + String::new() + } else { + format!("{}", c) + }, + caps.as_str()[1..].trim_start() + ), + ); + } + tag = quotes_in_tag + .replace_all(&output, |caps: &Captures| match &caps[1] { + "width" | "height" => format!("{}={}", &caps[1], &caps[2]), + x => format!("{}=\"{}\"", x, &caps[2]), + }) + .into_owned(); + if cap != tag { + tag + } else { + cap + } + } + }) + .trim() + .to_owned() +} + +/// Returns a minified version of the provided HTML source. +pub fn minify(source: &str) -> String { + let source = remove_comments(source); + let source = condense(&source); + let source = clean_unneeded_tags(&source); + let source = condense_whitespace(&source); + unquote_attributes(&source).trim().to_owned() +} + +#[test] +fn html_minify_test() { + let source = r##"<head> + <title>Some huge title</title> + <link rel="stylesheet" type="text/css" href="something.css" > + <style type="text/css"> + .some_class { + color: red; + } + </style> +</head> +<body> + <header> + <div> + <i> <b><a href="www.somewhere.com" class="some_class">Narnia</a> </b> </i> + <h1 style="width:100%;text-align:center;" >Big header</h1> + </div> + <!-- commeeeeeeeents !!! --> + </header> + <div id="some_id"> + <!-- another comment + on +multi +lines --> + <div id="another_id" class="another_class" width="100"> + <h2>A little sub title</h2> + <ul> + <li>A list!</li> + <li>Who doesn't like lists?</li> + <li height="12" class="fooool">Well, who cares...</li> + </ul> + </div> + </div> + <script type="text/javascript" > + console.log("foo"); + </script> + <style type="text/css" src="../foo.css"> + <script src="../foo.js"> +</body> +"##; + + let expected_result = "<title>Some huge title</title> <link rel=\"stylesheet\" \ + type=\"text/css\" href=\"something.css\"> <style> .some_class \ + { color: red; } </style> <header> <div> <i> <b><a \ + href=\"www.somewhere.com\" class=\"some_class\">Narnia</a> </b> </i> \ + <h1 style=\"width:100%;text-align:center;\">Big header</h1> </div> \ + </header> <div id=\"some_id\"> <div id=\"another_id\" \ + class=\"another_class\" width=100> <h2>A little sub \ + title</h2> <ul> <li>A list! <li>Who doesn't like lists? \ + <li height=12 class=\"fooool\">Well, who cares... </ul> </div> \ + </div> <script > console.log(\"foo\"); </script> <style \ + src=\"../foo.css\"> <script src=\"../foo.js\">"; + assert_eq!(minify(source), expected_result); +} + +#[test] +fn html_keep_important_comments() { + let source = r#" +<div> + <!-- normal comment --> + <div>content</div> + <!--[if lte IE 8]> + <div class="warning">This old browser is unsupported and will most likely display funky things. + </div> + <![endif]--> +</div> +"#; + + let expected_result = + "<div> <div>content</div> <!--[if lte IE 8]> <div class=\"warning\">This \ + old browser is unsupported and will most likely display funky things. \ + </div> <![endif]--> </div>"; + assert_eq!(minify(source), expected_result); +} diff --git a/vendor/minifier/src/js/mod.rs b/vendor/minifier/src/js/mod.rs new file mode 100644 index 000000000..4e0226bd9 --- /dev/null +++ b/vendor/minifier/src/js/mod.rs @@ -0,0 +1,17 @@ +// Take a look at the license at the top of the repository in the LICENSE file. + +mod token; +mod tools; +mod utils; + +pub use self::token::{tokenize, Condition, Keyword, Operation, ReservedChar, Token, Tokens}; +pub use self::tools::{ + aggregate_strings, aggregate_strings_into_array, aggregate_strings_into_array_filter, + aggregate_strings_into_array_with_separation, + aggregate_strings_into_array_with_separation_filter, aggregate_strings_with_separation, minify, + simple_minify, +}; +pub use self::utils::{ + clean_token, clean_token_except, clean_tokens, clean_tokens_except, + get_variable_name_and_value_positions, replace_token_with, replace_tokens_with, +}; diff --git a/vendor/minifier/src/js/token.rs b/vendor/minifier/src/js/token.rs new file mode 100644 index 000000000..251394cf6 --- /dev/null +++ b/vendor/minifier/src/js/token.rs @@ -0,0 +1,1431 @@ +// Take a look at the license at the top of the repository in the LICENSE file. + +use std::convert::TryFrom; +use std::fmt; +use std::str::{CharIndices, FromStr}; + +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Copy)] +pub enum ReservedChar { + Comma, + OpenParenthese, + CloseParenthese, + OpenCurlyBrace, + CloseCurlyBrace, + OpenBracket, + CloseBracket, + Colon, + SemiColon, + Dot, + Quote, + DoubleQuote, + ExclamationMark, + QuestionMark, + Slash, + Modulo, + Star, + Minus, + Plus, + EqualSign, + Backslash, + Space, + Tab, + Backline, + LessThan, + SuperiorThan, + Pipe, + Ampersand, + BackTick, +} + +impl ReservedChar { + pub fn is_white_character(&self) -> bool { + *self == ReservedChar::Space + || *self == ReservedChar::Tab + || *self == ReservedChar::Backline + } +} + +impl fmt::Display for ReservedChar { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "{}", + match *self { + ReservedChar::Comma => ',', + ReservedChar::OpenParenthese => '(', + ReservedChar::CloseParenthese => ')', + ReservedChar::OpenCurlyBrace => '{', + ReservedChar::CloseCurlyBrace => '}', + ReservedChar::OpenBracket => '[', + ReservedChar::CloseBracket => ']', + ReservedChar::Colon => ':', + ReservedChar::SemiColon => ';', + ReservedChar::Dot => '.', + ReservedChar::Quote => '\'', + ReservedChar::DoubleQuote => '"', + ReservedChar::ExclamationMark => '!', + ReservedChar::QuestionMark => '?', + ReservedChar::Slash => '/', + ReservedChar::Modulo => '%', + ReservedChar::Star => '*', + ReservedChar::Minus => '-', + ReservedChar::Plus => '+', + ReservedChar::EqualSign => '=', + ReservedChar::Backslash => '\\', + ReservedChar::Space => ' ', + ReservedChar::Tab => '\t', + ReservedChar::Backline => '\n', + ReservedChar::LessThan => '<', + ReservedChar::SuperiorThan => '>', + ReservedChar::Pipe => '|', + ReservedChar::Ampersand => '&', + ReservedChar::BackTick => '`', + } + ) + } +} + +impl TryFrom<char> for ReservedChar { + type Error = &'static str; + + fn try_from(value: char) -> Result<ReservedChar, Self::Error> { + match value { + ',' => Ok(ReservedChar::Comma), + '(' => Ok(ReservedChar::OpenParenthese), + ')' => Ok(ReservedChar::CloseParenthese), + '{' => Ok(ReservedChar::OpenCurlyBrace), + '}' => Ok(ReservedChar::CloseCurlyBrace), + '[' => Ok(ReservedChar::OpenBracket), + ']' => Ok(ReservedChar::CloseBracket), + ':' => Ok(ReservedChar::Colon), + ';' => Ok(ReservedChar::SemiColon), + '.' => Ok(ReservedChar::Dot), + '\'' => Ok(ReservedChar::Quote), + '"' => Ok(ReservedChar::DoubleQuote), + '!' => Ok(ReservedChar::ExclamationMark), + '?' => Ok(ReservedChar::QuestionMark), + '/' => Ok(ReservedChar::Slash), + '%' => Ok(ReservedChar::Modulo), + '*' => Ok(ReservedChar::Star), + '-' => Ok(ReservedChar::Minus), + '+' => Ok(ReservedChar::Plus), + '=' => Ok(ReservedChar::EqualSign), + '\\' => Ok(ReservedChar::Backslash), + ' ' => Ok(ReservedChar::Space), + '\t' => Ok(ReservedChar::Tab), + '\n' | '\r' => Ok(ReservedChar::Backline), + '<' => Ok(ReservedChar::LessThan), + '>' => Ok(ReservedChar::SuperiorThan), + '|' => Ok(ReservedChar::Pipe), + '&' => Ok(ReservedChar::Ampersand), + '`' => Ok(ReservedChar::BackTick), + _ => Err("Unknown reserved char"), + } + } +} + +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash)] +pub enum Keyword { + Break, + Case, + Catch, + Const, + Continue, + Default, + Do, + Else, + False, + Finally, + Function, + For, + If, + In, + InstanceOf, + Let, + New, + Null, + Private, + Protected, + Public, + Return, + Switch, + This, + Throw, + True, + Try, + Typeof, + Static, + Var, + While, +} + +impl Keyword { + fn requires_before(&self) -> bool { + matches!(*self, Keyword::In | Keyword::InstanceOf) + } +} + +impl fmt::Display for Keyword { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "{}", + match *self { + Keyword::Break => "break", + Keyword::Case => "case", + Keyword::Catch => "catch", + Keyword::Const => "const", + Keyword::Continue => "continue", + Keyword::Default => "default", + Keyword::Do => "do", + Keyword::Else => "else", + Keyword::False => "false", + Keyword::Finally => "finally", + Keyword::Function => "function", + Keyword::For => "for", + Keyword::If => "if", + Keyword::In => "in", + Keyword::InstanceOf => "instanceof", + Keyword::Let => "let", + Keyword::New => "new", + Keyword::Null => "null", + Keyword::Private => "private", + Keyword::Protected => "protected", + Keyword::Public => "public", + Keyword::Return => "return", + Keyword::Switch => "switch", + Keyword::This => "this", + Keyword::Throw => "throw", + Keyword::True => "true", + Keyword::Try => "try", + Keyword::Typeof => "typeof", + Keyword::Static => "static", + Keyword::Var => "var", + Keyword::While => "while", + } + ) + } +} + +impl<'a> TryFrom<&'a str> for Keyword { + type Error = &'static str; + + fn try_from(value: &str) -> Result<Keyword, Self::Error> { + match value { + "break" => Ok(Keyword::Break), + "case" => Ok(Keyword::Case), + "catch" => Ok(Keyword::Catch), + "const" => Ok(Keyword::Const), + "continue" => Ok(Keyword::Continue), + "default" => Ok(Keyword::Default), + "do" => Ok(Keyword::Do), + "else" => Ok(Keyword::Else), + "false" => Ok(Keyword::False), + "finally" => Ok(Keyword::Finally), + "function" => Ok(Keyword::Function), + "for" => Ok(Keyword::For), + "if" => Ok(Keyword::If), + "in" => Ok(Keyword::In), + "instanceof" => Ok(Keyword::InstanceOf), + "let" => Ok(Keyword::Let), + "new" => Ok(Keyword::New), + "null" => Ok(Keyword::Null), + "private" => Ok(Keyword::Private), + "protected" => Ok(Keyword::Protected), + "public" => Ok(Keyword::Public), + "return" => Ok(Keyword::Return), + "switch" => Ok(Keyword::Switch), + "this" => Ok(Keyword::This), + "throw" => Ok(Keyword::Throw), + "true" => Ok(Keyword::True), + "try" => Ok(Keyword::Try), + "typeof" => Ok(Keyword::Typeof), + "static" => Ok(Keyword::Static), + "var" => Ok(Keyword::Var), + "while" => Ok(Keyword::While), + _ => Err("Unkown keyword"), + } + } +} + +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Copy)] +pub enum Condition { + And, + Or, + DifferentThan, + SuperDifferentThan, + EqualTo, + SuperEqualTo, + SuperiorThan, + SuperiorOrEqualTo, + InferiorThan, + InferiorOrEqualTo, +} + +impl fmt::Display for Condition { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "{}", + match *self { + Condition::And => "&&", + Condition::Or => "||", + Condition::DifferentThan => "!=", + Condition::SuperDifferentThan => "!==", + Condition::EqualTo => "==", + Condition::SuperEqualTo => "===", + Condition::SuperiorThan => ">", + Condition::SuperiorOrEqualTo => ">=", + Condition::InferiorThan => "<", + Condition::InferiorOrEqualTo => "<=", + } + ) + } +} + +impl TryFrom<ReservedChar> for Condition { + type Error = &'static str; + + fn try_from(value: ReservedChar) -> Result<Condition, Self::Error> { + Ok(match value { + ReservedChar::SuperiorThan => Condition::SuperiorThan, + ReservedChar::LessThan => Condition::InferiorThan, + _ => return Err("Unkown condition"), + }) + } +} + +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Copy)] +pub enum Operation { + Addition, + AdditionEqual, + Subtract, + SubtractEqual, + Multiply, + MultiplyEqual, + Divide, + DivideEqual, + Modulo, + ModuloEqual, + Equal, +} + +impl Operation { + pub fn is_assign(&self) -> bool { + matches!( + *self, + Operation::AdditionEqual + | Operation::SubtractEqual + | Operation::MultiplyEqual + | Operation::DivideEqual + | Operation::ModuloEqual + | Operation::Equal + ) + } +} + +impl fmt::Display for Operation { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "{}", + match *self { + Operation::Addition => "+", + Operation::AdditionEqual => "+=", + Operation::Subtract => "-", + Operation::SubtractEqual => "-=", + Operation::Multiply => "*", + Operation::MultiplyEqual => "*=", + Operation::Divide => "/", + Operation::DivideEqual => "/=", + Operation::Modulo => "%", + Operation::ModuloEqual => "%=", + Operation::Equal => "=", + } + ) + } +} + +impl TryFrom<ReservedChar> for Operation { + type Error = &'static str; + + fn try_from(value: ReservedChar) -> Result<Operation, Self::Error> { + Ok(match value { + ReservedChar::Plus => Operation::Addition, + ReservedChar::Minus => Operation::Subtract, + ReservedChar::Slash => Operation::Divide, + ReservedChar::Star => Operation::Multiply, + ReservedChar::Modulo => Operation::Modulo, + ReservedChar::EqualSign => Operation::Equal, + _ => return Err("Unkown operation"), + }) + } +} + +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Hash)] +pub enum Token<'a> { + Keyword(Keyword), + Char(ReservedChar), + String(&'a str), + Comment(&'a str), + License(&'a str), + Other(&'a str), + Regex { + regex: &'a str, + is_global: bool, + is_interactive: bool, + }, + Condition(Condition), + Operation(Operation), + CreatedVarDecl(String), + CreatedVar(String), + Number(usize), + FloatingNumber(&'a str), +} + +impl<'a> fmt::Display for Token<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match *self { + Token::Keyword(x) => write!(f, "{}", x), + Token::Char(x) => write!(f, "{}", x), + Token::String(x) | Token::Comment(x) | Token::Other(x) => write!(f, "{}", x), + Token::License(x) => write!(f, "/*!{}*/", x), + Token::Regex { + regex, + is_global, + is_interactive, + } => { + let x = write!(f, "/{}/", regex); + if is_global { + write!(f, "g")?; + } + if is_interactive { + write!(f, "i")?; + } + x + } + Token::Condition(x) => write!(f, "{}", x), + Token::Operation(x) => write!(f, "{}", x), + Token::CreatedVarDecl(ref x) => write!(f, "{}", x), + Token::CreatedVar(ref x) => write!(f, "{}", x), + Token::Number(x) => write!(f, "{}", x), + Token::FloatingNumber(ref x) => write!(f, "{}", x), + } + } +} + +impl<'a> Token<'a> { + pub fn is_comment(&self) -> bool { + matches!(*self, Token::Comment(_)) + } + + pub fn is_license(&self) -> bool { + matches!(*self, Token::License(_)) + } + + pub fn is_reserved_char(&self) -> bool { + matches!(*self, Token::Char(_)) + } + + pub fn get_char(&self) -> Option<ReservedChar> { + match *self { + Token::Char(c) => Some(c), + _ => None, + } + } + + pub fn eq_char(&self, rc: ReservedChar) -> bool { + match *self { + Token::Char(c) => c == rc, + _ => false, + } + } + + pub fn eq_operation(&self, ope: Operation) -> bool { + match *self { + Token::Operation(o) => o == ope, + _ => false, + } + } + + pub fn is_operation(&self) -> bool { + matches!(*self, Token::Operation(_)) + } + + pub fn eq_condition(&self, cond: Condition) -> bool { + match *self { + Token::Condition(c) => c == cond, + _ => false, + } + } + + pub fn is_condition(&self) -> bool { + matches!(*self, Token::Condition(_)) + } + + pub fn is_other(&self) -> bool { + matches!(*self, Token::Other(_)) + } + + pub fn get_other(&self) -> Option<&str> { + match *self { + Token::Other(s) => Some(s), + _ => None, + } + } + + pub fn is_white_character(&self) -> bool { + match *self { + Token::Char(c) => c.is_white_character(), + _ => false, + } + } + + pub fn is_keyword(&self) -> bool { + matches!(*self, Token::Keyword(_)) + } + + pub fn get_keyword(&self) -> Option<Keyword> { + match *self { + Token::Keyword(k) => Some(k), + _ => None, + } + } + + pub fn is_string(&self) -> bool { + matches!(*self, Token::String(_)) + } + + pub fn get_string(&self) -> Option<&str> { + match *self { + Token::String(s) => Some(s), + _ => None, + } + } + + pub fn is_regex(&self) -> bool { + matches!(*self, Token::Regex { .. }) + } + + pub fn is_created_var_decl(&self) -> bool { + matches!(*self, Token::CreatedVarDecl(_)) + } + + pub fn is_created_var(&self) -> bool { + matches!(*self, Token::CreatedVar(_)) + } + + pub fn is_number(&self) -> bool { + matches!(*self, Token::Number(_)) + } + + pub fn is_floating_number(&self) -> bool { + matches!(*self, Token::FloatingNumber(_)) + } + + fn get_required(&self) -> Option<char> { + match *self { + Token::Keyword(_) + | Token::Other(_) + | Token::CreatedVarDecl(_) + | Token::Number(_) + | Token::FloatingNumber(_) => Some(' '), + _ => None, + } + } + + fn requires_before(&self) -> bool { + match *self { + Token::Keyword(k) => k.requires_before(), + _ => false, + } + } +} + +fn get_line_comment<'a>( + source: &'a str, + iterator: &mut MyPeekable<'_>, + start_pos: &mut usize, +) -> Option<Token<'a>> { + *start_pos += 1; + for (pos, c) in iterator { + if let Ok(c) = ReservedChar::try_from(c) { + if c == ReservedChar::Backline { + let ret = Some(Token::Comment(&source[*start_pos..pos])); + *start_pos = pos; + return ret; + } + } + } + None +} + +fn get_regex<'a>( + source: &'a str, + iterator: &mut MyPeekable<'_>, + start_pos: &mut usize, + v: &[Token<'_>], +) -> Option<Token<'a>> { + let mut back = v.len(); + while back > 0 { + back -= 1; + if v[back].is_white_character() || v[back].is_comment() || v[back].is_license() { + continue; + } + match &v[back] { + Token::Char(ReservedChar::SemiColon) + | Token::Char(ReservedChar::Colon) + | Token::Char(ReservedChar::Comma) + | Token::Char(ReservedChar::OpenBracket) + | Token::Char(ReservedChar::OpenParenthese) + | Token::Char(ReservedChar::ExclamationMark) + | Token::Char(ReservedChar::OpenCurlyBrace) + | Token::Char(ReservedChar::QuestionMark) + | Token::Char(ReservedChar::Backline) + | Token::Char(ReservedChar::Pipe) + | Token::Char(ReservedChar::Ampersand) => break, + t if t.is_operation() || t.is_condition() => break, + _ => return None, + } + } + iterator.start_save(); + while let Some((pos, c)) = iterator.next() { + if c == '\\' { + // we skip next character + iterator.next(); + continue; + } + if let Ok(c) = ReservedChar::try_from(c) { + if c == ReservedChar::Slash { + let mut is_global = false; + let mut is_interactive = false; + let mut add = 0; + loop { + match iterator.peek() { + Some((_, 'i')) => is_interactive = true, + Some((_, 'g')) => is_global = true, + _ => break, + }; + iterator.next(); + add += 1; + } + let ret = Some(Token::Regex { + regex: &source[*start_pos + 1..pos], + is_interactive, + is_global, + }); + *start_pos = pos + add; + iterator.drop_save(); + return ret; + } else if c == ReservedChar::Backline { + break; + } + } + } + iterator.stop_save(); + None +} + +fn get_comment<'a>( + source: &'a str, + iterator: &mut MyPeekable<'_>, + start_pos: &mut usize, +) -> Token<'a> { + let mut prev = ReservedChar::Quote; + *start_pos += 1; + let builder = if let Some((_, c)) = iterator.next() { + if c == '!' { + *start_pos += 1; + Token::License + } else { + if let Ok(c) = ReservedChar::try_from(c) { + prev = c; + } + Token::Comment + } + } else { + Token::Comment + }; + + let mut current_pos = *start_pos; + for (pos, c) in iterator { + current_pos = pos; + if let Ok(c) = ReservedChar::try_from(c) { + if c == ReservedChar::Slash && prev == ReservedChar::Star { + current_pos -= 2; + break; + } + prev = c; + } else { + prev = ReservedChar::Space; + } + } + // Unclosed comment so returning it anyway... + let ret = builder(&source[*start_pos..=current_pos]); + *start_pos = current_pos + 2; + ret +} + +fn get_string<'a>( + source: &'a str, + iterator: &mut MyPeekable<'_>, + start_pos: &mut usize, + start: ReservedChar, +) -> Option<Token<'a>> { + while let Some((pos, c)) = iterator.next() { + if c == '\\' { + // we skip next character + iterator.next(); + continue; + } + if let Ok(c) = ReservedChar::try_from(c) { + if c == start { + let ret = Some(Token::String(&source[*start_pos..pos + 1])); + *start_pos = pos; + return ret; + } + } + } + None +} + +fn get_backtick_string<'a>( + source: &'a str, + iterator: &mut MyPeekable<'_>, + start_pos: &mut usize, +) -> Option<Token<'a>> { + while let Some((pos, c)) = iterator.next() { + if c == '\\' { + // we skip next character + iterator.next(); + continue; + } + if c == '$' && iterator.peek().map(|(_, c)| c == '{').unwrap_or(false) { + let mut count = 0; + + loop { + if let Some((mut pos, c)) = iterator.next() { + if c == '\\' { + // we skip next character + iterator.next(); + continue; + } else if c == '"' || c == '\'' { + // We don't care about the result + get_string( + source, + iterator, + &mut pos, + ReservedChar::try_from(c) + .expect("ReservedChar::try_from unexpectedly failed..."), + ); + } else if c == '`' { + get_backtick_string(source, iterator, &mut pos); + } else if c == '{' { + count += 1; + } else if c == '}' { + count -= 1; + if count == 0 { + break; + } + } + } else { + return None; + } + } + } else if c == '`' { + let ret = Some(Token::String(&source[*start_pos..pos + 1])); + *start_pos = pos; + return ret; + } + } + None +} + +fn first_useful<'a>(v: &'a [Token<'a>]) -> Option<&'a Token<'a>> { + for x in v.iter().rev() { + if x.is_white_character() { + continue; + } + return Some(x); + } + None +} + +fn fill_other<'a>(source: &'a str, v: &mut Vec<Token<'a>>, start: usize, pos: usize) { + if start < pos { + if let Ok(w) = Keyword::try_from(&source[start..pos]) { + v.push(Token::Keyword(w)); + } else if let Ok(n) = usize::from_str(&source[start..pos]) { + v.push(Token::Number(n)) + } else if f64::from_str(&source[start..pos]).is_ok() { + v.push(Token::FloatingNumber(&source[start..pos])) + } else { + v.push(Token::Other(&source[start..pos])); + } + } +} + +fn handle_equal_sign(v: &mut Vec<Token<'_>>, c: ReservedChar) -> bool { + if c != ReservedChar::EqualSign { + return false; + } + match v.last().unwrap_or(&Token::Other("")) { + Token::Operation(Operation::Equal) => { + v.pop(); + v.push(Token::Condition(Condition::EqualTo)); + } + Token::Condition(Condition::EqualTo) => { + v.pop(); + v.push(Token::Condition(Condition::SuperEqualTo)); + } + Token::Char(ReservedChar::ExclamationMark) => { + v.pop(); + v.push(Token::Condition(Condition::DifferentThan)); + } + Token::Condition(Condition::DifferentThan) => { + v.pop(); + v.push(Token::Condition(Condition::SuperDifferentThan)); + } + Token::Operation(Operation::Divide) => { + v.pop(); + v.push(Token::Operation(Operation::DivideEqual)); + } + Token::Operation(Operation::Multiply) => { + v.pop(); + v.push(Token::Operation(Operation::MultiplyEqual)); + } + Token::Operation(Operation::Addition) => { + v.pop(); + v.push(Token::Operation(Operation::AdditionEqual)); + } + Token::Operation(Operation::Subtract) => { + v.pop(); + v.push(Token::Operation(Operation::SubtractEqual)); + } + Token::Operation(Operation::Modulo) => { + v.pop(); + v.push(Token::Operation(Operation::ModuloEqual)); + } + Token::Condition(Condition::SuperiorThan) => { + v.pop(); + v.push(Token::Condition(Condition::SuperiorOrEqualTo)); + } + Token::Condition(Condition::InferiorThan) => { + v.pop(); + v.push(Token::Condition(Condition::InferiorOrEqualTo)); + } + _ => { + return false; + } + } + true +} + +fn check_if_number<'a>( + iterator: &mut MyPeekable<'_>, + start: usize, + pos: usize, + source: &'a str, +) -> bool { + if source[start..pos].find('.').is_some() { + return false; + } else if u64::from_str(&source[start..pos]).is_ok() { + return true; + } else if let Some((_, x)) = iterator.peek() { + return x as u8 >= b'0' && x as u8 <= b'9'; + } + false +} + +struct MyPeekable<'a> { + inner: CharIndices<'a>, + saved: Vec<(usize, char)>, + peeked: Option<(usize, char)>, + is_saving: bool, +} + +impl<'a> MyPeekable<'a> { + fn new(indices: CharIndices<'a>) -> MyPeekable<'a> { + MyPeekable { + inner: indices, + saved: Vec::with_capacity(500), + peeked: None, + is_saving: false, + } + } + + fn start_save(&mut self) { + self.is_saving = true; + if let Some(p) = self.peeked { + self.saved.push(p); + } + } + + fn drop_save(&mut self) { + self.is_saving = false; + self.saved.clear(); + } + + fn stop_save(&mut self) { + self.is_saving = false; + if let Some(p) = self.peeked { + self.saved.push(p); + } + self.peeked = None; + } + + /// Returns None if saving. + fn peek(&mut self) -> Option<(usize, char)> { + if self.peeked.is_none() { + self.peeked = self.inner.next(); + if self.is_saving { + if let Some(p) = self.peeked { + self.saved.push(p); + } + } + } + self.peeked + } +} + +impl<'a> Iterator for MyPeekable<'a> { + type Item = (usize, char); + + fn next(&mut self) -> Option<Self::Item> { + if self.peeked.is_some() { + self.peeked.take() + } else { + if !self.is_saving && !self.saved.is_empty() { + return Some(self.saved.remove(0)); + } + match self.inner.next() { + Some(r) if self.is_saving => { + self.saved.push(r); + Some(r) + } + r => r, + } + } + } +} + +pub fn tokenize(source: &str) -> Tokens<'_> { + let mut v = Vec::with_capacity(1000); + let mut start = 0; + let mut iterator = MyPeekable::new(source.char_indices()); + + loop { + let (mut pos, c) = match iterator.next() { + Some(x) => x, + None => { + fill_other(source, &mut v, start, source.len()); + break; + } + }; + if let Ok(c) = ReservedChar::try_from(c) { + if c == ReservedChar::Dot && check_if_number(&mut iterator, start, pos, source) { + let mut cont = true; + if let Some(x) = iterator.peek() { + if !"0123456789,; \t\n<>/*&|{}[]-+=~%^:!".contains(x.1) { + fill_other(source, &mut v, start, pos); + start = pos; + cont = false; + } + } + if cont { + continue; + } + } + fill_other(source, &mut v, start, pos); + match c { + ReservedChar::Quote | ReservedChar::DoubleQuote => { + if let Some(s) = get_string(source, &mut iterator, &mut pos, c) { + v.push(s); + } + } + ReservedChar::BackTick => { + if let Some(s) = get_backtick_string(source, &mut iterator, &mut pos) { + v.push(s); + } + } + ReservedChar::Slash + if v.last() + .unwrap_or(&Token::Other("")) + .eq_operation(Operation::Divide) => + { + v.pop(); + if let Some(s) = get_line_comment(source, &mut iterator, &mut pos) { + v.push(s); + } + } + ReservedChar::Slash + if iterator.peek().is_some() + && iterator.peek().unwrap().1 != '/' + && iterator.peek().unwrap().1 != '*' + && !first_useful(&v).unwrap_or(&Token::String("")).is_other() => + { + if let Some(r) = get_regex(source, &mut iterator, &mut pos, &v) { + v.push(r); + } else { + v.push(Token::Operation(Operation::Divide)); + } + } + ReservedChar::Star + if v.last() + .unwrap_or(&Token::Other("")) + .eq_operation(Operation::Divide) => + { + v.pop(); + v.push(get_comment(source, &mut iterator, &mut pos)); + } + ReservedChar::Pipe + if v.last() + .unwrap_or(&Token::Other("")) + .eq_char(ReservedChar::Pipe) => + { + v.pop(); + v.push(Token::Condition(Condition::Or)); + } + ReservedChar::Ampersand + if v.last() + .unwrap_or(&Token::Other("")) + .eq_char(ReservedChar::Ampersand) => + { + v.pop(); + v.push(Token::Condition(Condition::And)); + } + _ if handle_equal_sign(&mut v, c) => {} + _ => { + if let Ok(o) = Operation::try_from(c) { + v.push(Token::Operation(o)); + } else if let Ok(o) = Condition::try_from(c) { + v.push(Token::Condition(o)); + } else { + v.push(Token::Char(c)); + } + } + } + start = pos + 1; + } + } + Tokens(v) +} + +#[derive(Debug, PartialEq, Eq)] +pub struct Tokens<'a>(pub Vec<Token<'a>>); + +macro_rules! tokens_writer { + ($self:ident, $w:ident) => { + let tokens = &$self.0; + for i in 0..tokens.len() { + if i > 0 + && tokens[i].requires_before() + && !tokens[i - 1].is_keyword() + && !tokens[i - 1].is_other() + && !tokens[i - 1].is_reserved_char() + && !tokens[i - 1].is_string() + { + write!($w, " ")?; + } + write!($w, "{}", tokens[i])?; + if let Some(c) = match tokens[i] { + Token::Keyword(_) | Token::Other(_) if i + 1 < tokens.len() => { + tokens[i + 1].get_required() + } + _ => None, + } { + write!($w, "{}", c)?; + } + } + }; +} + +impl<'a> Tokens<'a> { + pub(super) fn write<W: std::io::Write>(self, mut w: W) -> std::io::Result<()> { + tokens_writer!(self, w); + Ok(()) + } +} + +impl<'a> fmt::Display for Tokens<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + tokens_writer!(self, f); + Ok(()) + } +} + +impl<'a> Tokens<'a> { + #[must_use] + pub fn apply<F>(self, func: F) -> Tokens<'a> + where + F: Fn(Tokens<'a>) -> Tokens<'a>, + { + func(self) + } +} + +pub struct IntoIterTokens<'a> { + inner: Tokens<'a>, +} + +impl<'a> IntoIterator for Tokens<'a> { + type Item = (Token<'a>, Option<&'a Token<'a>>); + type IntoIter = IntoIterTokens<'a>; + + fn into_iter(mut self) -> Self::IntoIter { + self.0.reverse(); + IntoIterTokens { inner: self } + } +} + +impl<'a> Iterator for IntoIterTokens<'a> { + type Item = (Token<'a>, Option<&'a Token<'a>>); + + fn next(&mut self) -> Option<Self::Item> { + if self.inner.0.is_empty() { + None + } else { + let ret = self.inner.0.pop().expect("pop() failed"); + // FIXME once generic traits' types are stabilized, use a second + // lifetime instead of transmute! + Some((ret, unsafe { std::mem::transmute(self.inner.0.last()) })) + } + } +} + +impl<'a> std::ops::Deref for Tokens<'a> { + type Target = Vec<Token<'a>>; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl<'a> From<Vec<Token<'a>>> for Tokens<'a> { + fn from(v: Vec<Token<'a>>) -> Self { + Tokens(v) + } +} + +impl<'a> From<&[Token<'a>]> for Tokens<'a> { + fn from(v: &[Token<'a>]) -> Self { + Tokens(v.to_vec()) + } +} + +#[test] +fn check_regex() { + let source = r#"var x = /"\.x/g;"#; + let expected_result = r#"var x=/"\.x/g"#; + assert_eq!(crate::js::minify(source).to_string(), expected_result); + + let v = tokenize(source).apply(crate::js::clean_tokens); + assert_eq!( + v.0[3], + Token::Regex { + regex: "\"\\.x", + is_global: true, + is_interactive: false, + } + ); + + let source = r#"var x = /"\.x/gigigigig;var x = "hello";"#; + let expected_result = r#"var x=/"\.x/gi;var x="hello""#; + assert_eq!(crate::js::minify(source).to_string(), expected_result); + + let v = tokenize(source).apply(crate::js::clean_tokens); + assert_eq!( + v.0[3], + Token::Regex { + regex: "\"\\.x", + is_global: true, + is_interactive: true, + } + ); +} + +#[test] +fn more_regex() { + let source = r#"var x = /"\.x\/a/i;"#; + let expected_result = r#"var x=/"\.x\/a/i"#; + assert_eq!(crate::js::minify(source).to_string(), expected_result); + + let v = tokenize(source).apply(crate::js::clean_tokens); + assert_eq!( + v.0[3], + Token::Regex { + regex: "\"\\.x\\/a", + is_global: false, + is_interactive: true, + } + ); + + let source = r#"var x = /\\/i;"#; + let expected_result = r#"var x=/\\/i"#; + assert_eq!(crate::js::minify(source).to_string(), expected_result); + + let v = tokenize(source).apply(crate::js::clean_tokens); + assert_eq!( + v.0[3], + Token::Regex { + regex: "\\\\", + is_global: false, + is_interactive: true, + } + ); +} + +#[test] +fn even_more_regex() { + let source = r#"var x = /a-z /;"#; + + let v = tokenize(source).apply(crate::js::clean_tokens); + assert_eq!( + v.0[3], + Token::Regex { + regex: "a-z ", + is_global: false, + is_interactive: false, + } + ); +} + +#[test] +fn not_regex_test() { + let source = "( x ) / 2; x / y;x /= y"; + + let v = tokenize(source).apply(crate::js::clean_tokens); + assert_eq!( + &v.0, + &[ + Token::Char(ReservedChar::OpenParenthese), + Token::Other("x"), + Token::Char(ReservedChar::CloseParenthese), + Token::Operation(Operation::Divide), + Token::Number(2), + Token::Char(ReservedChar::SemiColon), + Token::Other("x"), + Token::Operation(Operation::Divide), + Token::Other("y"), + Token::Char(ReservedChar::SemiColon), + Token::Other("x"), + Token::Operation(Operation::DivideEqual), + Token::Other("y") + ] + ); + + let source = "let x = /x\ny/;"; + + let v = tokenize(source).apply(crate::js::clean_tokens); + assert_eq!( + &v.0, + &[ + Token::Keyword(Keyword::Let), + Token::Other("x"), + Token::Operation(Operation::Equal), + Token::Operation(Operation::Divide), + Token::Other("x"), + Token::Other("y"), + Token::Operation(Operation::Divide) + ] + ); +} + +#[test] +fn test_tokens_parsing() { + let source = "true = == 2.3 === 32"; + + let v = tokenize(source).apply(crate::js::clean_tokens); + assert_eq!( + &v.0, + &[ + Token::Keyword(Keyword::True), + Token::Operation(Operation::Equal), + Token::Condition(Condition::EqualTo), + Token::FloatingNumber("2.3"), + Token::Condition(Condition::SuperEqualTo), + Token::Number(32) + ] + ); +} + +#[test] +fn test_string_parsing() { + let source = "var x = 'hello people!'"; + + let v = tokenize(source).apply(crate::js::clean_tokens); + assert_eq!( + &v.0, + &[ + Token::Keyword(Keyword::Var), + Token::Other("x"), + Token::Operation(Operation::Equal), + Token::String("\'hello people!\'") + ] + ); +} + +#[test] +fn test_number_parsing() { + let source = "var x = .12; let y = 4.; var z = 12; .3 4. 'a' let u = 12.2"; + + let v = tokenize(source).apply(crate::js::clean_tokens); + assert_eq!( + &v.0, + &[ + Token::Keyword(Keyword::Var), + Token::Other("x"), + Token::Operation(Operation::Equal), + Token::FloatingNumber(".12"), + Token::Char(ReservedChar::SemiColon), + Token::Keyword(Keyword::Let), + Token::Other("y"), + Token::Operation(Operation::Equal), + Token::FloatingNumber("4."), + Token::Char(ReservedChar::SemiColon), + Token::Keyword(Keyword::Var), + Token::Other("z"), + Token::Operation(Operation::Equal), + Token::Number(12), + Token::Char(ReservedChar::SemiColon), + Token::FloatingNumber(".3"), + Token::FloatingNumber("4."), + Token::String("'a'"), + Token::Keyword(Keyword::Let), + Token::Other("u"), + Token::Operation(Operation::Equal), + Token::FloatingNumber("12.2") + ] + ); +} + +#[test] +fn test_number_parsing2() { + let source = "var x = 12.a;"; + + let v = tokenize(source).apply(crate::js::clean_tokens); + assert_eq!( + &v.0, + &[ + Token::Keyword(Keyword::Var), + Token::Other("x"), + Token::Operation(Operation::Equal), + Token::Number(12), + Token::Char(ReservedChar::Dot), + Token::Other("a") + ] + ); +} + +#[test] +fn tokens_spaces() { + let source = "t in e"; + + let v = tokenize(source).apply(crate::js::clean_tokens); + assert_eq!( + &v.0, + &[ + Token::Other("t"), + Token::Keyword(Keyword::In), + Token::Other("e") + ] + ); +} + +#[test] +fn division_by_id() { + let source = "100/abc"; + + let v = tokenize(source).apply(crate::js::clean_tokens); + assert_eq!( + &v.0, + &[ + Token::Number(100), + Token::Operation(Operation::Divide), + Token::Other("abc") + ] + ); +} + +#[test] +fn weird_regex() { + let source = "if (!/\\/(contact|legal)\\//.test(a)) {}"; + + let v = tokenize(source).apply(crate::js::clean_tokens); + assert_eq!( + &v.0, + &[ + Token::Keyword(Keyword::If), + Token::Char(ReservedChar::OpenParenthese), + Token::Char(ReservedChar::ExclamationMark), + Token::Regex { + regex: "\\/(contact|legal)\\/", + is_global: false, + is_interactive: false + }, + Token::Char(ReservedChar::Dot), + Token::Other("test"), + Token::Char(ReservedChar::OpenParenthese), + Token::Other("a"), + Token::Char(ReservedChar::CloseParenthese), + Token::Char(ReservedChar::CloseParenthese), + Token::Char(ReservedChar::OpenCurlyBrace), + Token::Char(ReservedChar::CloseCurlyBrace), + ] + ); +} + +#[test] +fn test_regexes() { + let source = "/\\/(contact|legal)\\//.test"; + + let v = tokenize(source).apply(crate::js::clean_tokens); + assert_eq!( + &v.0, + &[ + Token::Regex { + regex: "\\/(contact|legal)\\/", + is_global: false, + is_interactive: false + }, + Token::Char(ReservedChar::Dot), + Token::Other("test"), + ] + ); + + let source = "/\\*(contact|legal)/.test"; + + let v = tokenize(source).apply(crate::js::clean_tokens); + assert_eq!( + &v.0, + &[ + Token::Regex { + regex: "\\*(contact|legal)", + is_global: false, + is_interactive: false + }, + Token::Char(ReservedChar::Dot), + Token::Other("test"), + ] + ); +} + +#[test] +fn test_comments() { + let source = "/*(contact|legal)/.test"; + + let v = tokenize(source); + assert_eq!(&v.0, &[Token::Comment("(contact|legal)/.test"),],); + + let source = "/*(contact|legal)/.test*/ a"; + + let v = tokenize(source); + assert_eq!( + &v.0, + &[ + Token::Comment("(contact|legal)/.test"), + Token::Char(ReservedChar::Space), + Token::Other("a"), + ], + ); +} diff --git a/vendor/minifier/src/js/tools.rs b/vendor/minifier/src/js/tools.rs new file mode 100644 index 000000000..42d32aaab --- /dev/null +++ b/vendor/minifier/src/js/tools.rs @@ -0,0 +1,1156 @@ +// Take a look at the license at the top of the repository in the LICENSE file. + +use std::{fmt, io}; + +use crate::js::token::{self, Keyword, ReservedChar, Token, Tokens}; +use crate::js::utils::{get_array, get_variable_name_and_value_positions, VariableNameGenerator}; + +use std::collections::{HashMap, HashSet}; + +/*#[derive(Debug, Clone, PartialEq, Eq)] +enum Elem<'a> { + Function(Function<'a>), + Block(Block<'a>), + Variable(Variable<'a>), + Condition(token::Condition), + Loop(Loop<'a>), + Operation(Operation<'a>), +} + +impl<'a> Elem<'a> { + fn is_condition(&self) -> bool { + match *self { + Elem::Condition(_) => true, + _ => false, + } + } +} + +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +enum ConditionType { + If, + ElseIf, + Else, + Ternary, +} + +#[derive(Clone, PartialEq, Eq, Debug)] +struct Block<'a> { + elems: Vec<Elem<'a>>, +} + +#[derive(Clone, PartialEq, Eq, Debug)] +struct Argument<'a> { + name: &'a str, +} + +#[derive(Clone, PartialEq, Eq, Debug)] +struct Function<'a> { + name: Option<&'a str>, + args: Vec<Argument<'a>>, + block: Block<'a>, +} + +#[derive(Clone, PartialEq, Eq, Debug)] +struct Variable<'a> { + name: &'a str, + value: Option<&'a str>, +} + +/*struct Condition<'a> { + ty_: ConditionType, + condition: &'a str, + block: Block<'a>, +}*/ + +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +enum LoopType { + Do, + For, + While, +} + +#[derive(Clone, PartialEq, Eq, Debug)] +struct Loop<'a> { + ty_: LoopType, + condition: Vec<Elem<'a>>, + block: Block<'a>, +} + +#[derive(Clone, PartialEq, Eq, Debug)] +struct Operation<'a> { + content: &'a str, +} + +fn get_while_condition<'a>(tokens: &[token::Token<'a>], pos: &mut usize) -> Result<Vec<Elem<'a>>, String> { + let tmp = *pos; + *pos += 1; + if let Err(e) = match tokens.get(tmp) { + Some(token::Token::Char(token::ReservedChar::OpenParenthese)) => Ok(()), + Some(e) => Err(format!("Expected \"(\", found \"{:?}\"", e)), + None => Err("Expected \"(\", found nothing...".to_owned()), + } { + return Err(e); + } + let mut elems: Vec<Elem<'a>> = Vec::with_capacity(1); + + while let Some(e) = tokens.get(*pos) { + *pos += 1; + match e { + token::Token::Char(token::ReservedChar::CloseParenthese) => return Ok(elems), + token::Token::Condition(e) => { + if let Some(cond) = elems.last() { + if cond.is_condition() { + return Err(format!("\"{:?}\" cannot follow \"{:?}\"", e, cond)); + } + } + } + _ => {} + } + } + Err("Expected \")\", found nothing...".to_owned()) +} + +fn get_do<'a>(tokens: &[token::Token<'a>], pos: &mut usize) -> Result<Elem<'a>, String> { + let tmp = *pos; + *pos += 1; + let block = match tokens.get(tmp) { + Some(token::Token::Char(token::ReservedChar::OpenCurlyBrace)) => get_block(tokens, pos, true), + Some(e) => Err(format!("Expected \"{{\", found \"{:?}\"", e)), + None => Err("Expected \"{\", found nothing...".to_owned()), + }?; + let tmp = *pos; + *pos += 1; + let condition = match tokens.get(tmp) { + Some(token::Token::Keyword(token::Keyword::While)) => get_while_condition(tokens, pos), + Some(e) => Err(format!("Expected \"while\", found \"{:?}\"", e)), + None => Err("Expected \"while\", found nothing...".to_owned()), + }?; + let mut loop_ = Loop { + ty_: LoopType::Do, + condition: condition, + block, + }; + Ok(Elem::Loop(loop_)) +} + +fn get_block<'a>(tokens: &[token::Token<'a>], pos: &mut usize, + start_with_paren: bool) -> Result<Block<'a>, String> { + let mut block = Block { elems: Vec::with_capacity(2) }; + while let Some(e) = tokens.get(*pos) { + *pos += 1; + block.elems.push(match e { + token::Token::Keyword(token::Keyword::Do) => get_do(tokens, pos), + token::Token::Char(token::ReservedChar::CloseCurlyBrace) => { + if start_with_paren { + return Ok(block); + } + return Err("Unexpected \"}\"".to_owned()); + } + }?); + } + if !start_with_paren { + Ok(block) + } else { + Err("Expected \"}\" at the end of the block but didn't find one...".to_owned()) + } +} + +fn build_ast<'a>(v: &[token::Token<'a>]) -> Result<Elem<'a>, String> { + let mut pos = 0; + + match get_block(v, &mut pos, false) { + Ok(ast) => Ok(Elem::Block(ast)), + Err(e) => Err(e), + } +}*/ + +/// Minifies a given JS source code. +/// +/// # Example +/// +/// ```rust +/// use minifier::js::minify; +/// +/// let js = r#" +/// function forEach(data, func) { +/// for (var i = 0; i < data.length; ++i) { +/// func(data[i]); +/// } +/// }"#.into(); +/// let js_minified = minify(js); +/// assert_eq!( +/// &js_minified.to_string(), +/// "function forEach(data,func){for(var i=0;i<data.length;++i){func(data[i])}}", +/// ); +/// ``` +#[inline] +pub fn minify(source: &str) -> Minified<'_> { + Minified(token::tokenize(source).apply(crate::js::clean_tokens)) +} + +pub struct Minified<'a>(token::Tokens<'a>); + +impl<'a> Minified<'a> { + pub fn write<W: io::Write>(self, w: W) -> io::Result<()> { + self.0.write(w) + } +} + +impl<'a> fmt::Display for Minified<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.0.fmt(f) + } +} + +// TODO: No scope handling or anything. Might be nice as a second step to add it... +fn get_variables_name<'a>( + tokens: &'a Tokens<'a>, +) -> (HashSet<&'a str>, HashMap<&'a str, (usize, usize)>) { + let mut ret = HashSet::new(); + let mut variables = HashMap::new(); + let mut pos = 0; + + while pos < tokens.len() { + if tokens[pos].is_keyword() || tokens[pos].is_other() { + if let Some((var_pos, Some(value_pos))) = + get_variable_name_and_value_positions(tokens, pos) + { + pos = value_pos; + if let Some(var_name) = tokens[var_pos].get_other() { + if !var_name.starts_with("r_") { + pos += 1; + continue; + } + ret.insert(var_name); + } + if let Some(s) = tokens[value_pos].get_string() { + variables.insert(s, (var_pos, value_pos)); + } + } + } + pos += 1; + } + (ret, variables) +} + +fn aggregate_strings_inner<'a, 'b: 'a>( + mut tokens: Tokens<'a>, + separation_token: Option<Token<'b>>, +) -> Tokens<'a> { + let mut new_vars = Vec::with_capacity(50); + let mut to_replace: Vec<(usize, usize)> = Vec::new(); + + for (var_name, positions) in { + let mut strs: HashMap<&Token<'_>, Vec<usize>> = HashMap::with_capacity(1000); + let mut validated: HashMap<&Token<'_>, String> = HashMap::with_capacity(100); + + let mut var_gen = VariableNameGenerator::new(Some("r_"), 2); + let mut next_name = var_gen.to_string(); + + let (all_variables, values) = get_variables_name(&tokens); + while all_variables.contains(&next_name.as_str()) { + var_gen.next(); + next_name = var_gen.to_string(); + } + + for pos in 0..tokens.len() { + let token = &tokens[pos]; + if let Some(str_token) = token.get_string() { + if let Some((var_pos, string_pos)) = values.get(&str_token) { + if pos != *string_pos { + to_replace.push((pos, *var_pos)); + } + continue; + } + let x = strs.entry(token).or_insert_with(|| Vec::with_capacity(1)); + x.push(pos); + if x.len() > 1 && validated.get(token).is_none() { + let len = str_token.len(); + // Computation here is simple, we declare new variables when creating this so + // the total of characters must be shorter than: + // `var r_aa=...;` -> 10 + `r_aa` -> 14 + if (x.len() + 2/* quotes */) * len + > next_name.len() + str_token.len() + 6 /* var _=_;*/ + x.len() * next_name.len() + { + validated.insert(token, next_name.clone()); + var_gen.next(); + next_name = var_gen.to_string(); + while all_variables.contains(&next_name.as_str()) { + var_gen.next(); + next_name = var_gen.to_string(); + } + } + } + } + } + let mut ret = Vec::with_capacity(validated.len()); + + // We need this macro to avoid having to sort the set when not testing the crate. + //#[cfg(test)] + macro_rules! inner_loop { + ($x:ident) => {{ + let mut $x = $x.into_iter().collect::<Vec<_>>(); + $x.sort_unstable_by(|a, b| a.1.cmp(&b.1)); + $x + }}; + } + /*#[cfg(not(test))] + macro_rules! inner_loop { + ($x:ident) => { + $x.into_iter() + } + }*/ + + for (token, var_name) in inner_loop!(validated) { + ret.push((var_name, strs.remove(&token).unwrap())); + var_gen.next(); + } + ret + } { + if new_vars.is_empty() { + new_vars.push(Token::Keyword(Keyword::Var)); + } else { + new_vars.push(Token::Char(ReservedChar::Comma)); + } + new_vars.push(Token::CreatedVarDecl(format!( + "{}={}", + var_name, tokens[positions[0]] + ))); + for pos in positions { + tokens.0[pos] = Token::CreatedVar(var_name.clone()); + } + } + if !new_vars.is_empty() { + new_vars.push(Token::Char(ReservedChar::SemiColon)); + } + for (to_replace_pos, variable_pos) in to_replace { + tokens.0[to_replace_pos] = tokens.0[variable_pos].clone(); + } + if let Some(token) = separation_token { + new_vars.push(token); + } + new_vars.append(&mut tokens.0); + Tokens(new_vars) +} + +/// Aggregate litteral strings. For instance, if the string litteral "Oh look over there!" +/// appears more than once, a variable will be created with this value and used everywhere the +/// string appears. Of course, this replacement is only performed when it allows to take +/// less space. +/// +/// # Example +/// +/// ```rust,no_run +/// extern crate minifier; +/// use minifier::js::{aggregate_strings, clean_tokens, simple_minify}; +/// use std::fs; +/// +/// fn main() { +/// let content = fs::read("some_file.js").expect("file not found"); +/// let source = String::from_utf8_lossy(&content); +/// let s = simple_minify(&source); // First we get the tokens list. +/// let s = s.apply(aggregate_strings) // This `apply` aggregates string litterals. +/// .apply(clean_tokens) // This one is used to remove useless chars. +/// .to_string(); // And we finally convert to string. +/// println!("result: {}", s); +/// } +/// ``` +#[inline] +pub fn aggregate_strings(tokens: Tokens<'_>) -> Tokens<'_> { + aggregate_strings_inner(tokens, None) +} + +/// Exactly like `aggregate_strings` except this one expects a separation token +/// to be passed. This token will be placed between the created variables for the +/// strings aggregation and the rest. +/// +/// # Example +/// +/// Let's add a backline between the created variables and the rest of the code: +/// +/// ```rust,no_run +/// extern crate minifier; +/// use minifier::js::{ +/// aggregate_strings_with_separation, +/// clean_tokens, +/// simple_minify, +/// Token, +/// ReservedChar, +/// }; +/// use std::fs; +/// +/// fn main() { +/// let content = fs::read("some_file.js").expect("file not found"); +/// let source = String::from_utf8_lossy(&content); +/// let s = simple_minify(&source); // First we get the tokens list. +/// let s = s.apply(|f| { +/// aggregate_strings_with_separation(f, Token::Char(ReservedChar::Backline)) +/// }) // We add a backline between the variable and the rest. +/// .apply(clean_tokens) // We clean the tokens. +/// .to_string(); // And we finally convert to string. +/// println!("result: {}", s); +/// } +/// ``` +#[inline] +pub fn aggregate_strings_with_separation<'a, 'b: 'a>( + tokens: Tokens<'a>, + separation_token: Token<'b>, +) -> Tokens<'a> { + aggregate_strings_inner(tokens, Some(separation_token)) +} + +fn aggregate_strings_into_array_inner<'a, 'b: 'a, T: Fn(&Tokens<'a>, usize) -> bool>( + mut tokens: Tokens<'a>, + array_name: &str, + separation_token: Option<Token<'b>>, + filter: T, +) -> Tokens<'a> { + let mut to_insert = Vec::with_capacity(100); + let mut to_replace = Vec::with_capacity(100); + + { + let mut to_ignore = HashSet::new(); + // key: the token string + // value: (position in the array, positions in the tokens list, need creation) + let mut strs: HashMap<&str, (usize, Vec<usize>, bool)> = HashMap::with_capacity(1000); + let (current_array_values, need_recreate, mut end_bracket) = + match get_array(&tokens, array_name) { + Some((s, p)) => (s, false, p), + None => (Vec::new(), true, 0), + }; + let mut validated: HashSet<&str> = HashSet::new(); + + let mut array_pos = 0; + for s in current_array_values.iter() { + if let Some(st) = tokens.0[*s].get_string() { + strs.insert(&st[1..st.len() - 1], (array_pos, vec![], false)); + array_pos += 1; + validated.insert(&st[1..st.len() - 1]); + to_ignore.insert(*s); + } + } + + let mut array_pos_str = array_pos.to_string(); + for pos in 0..tokens.len() { + if to_ignore.contains(&pos) { + continue; + } + let token = &tokens[pos]; + if let Some(str_token) = token.get_string() { + if !filter(&tokens, pos) { + continue; + } + let s = &str_token[1..str_token.len() - 1]; + let x = strs + .entry(s) + .or_insert_with(|| (0, Vec::with_capacity(1), true)); + x.1.push(pos); + if x.1.len() > 1 && !validated.contains(s) { + let len = s.len(); + if len * x.1.len() + > (array_name.len() + array_pos_str.len() + 2) * x.1.len() + + array_pos_str.len() + + 2 + { + validated.insert(&str_token[1..str_token.len() - 1]); + x.0 = array_pos; + array_pos += 1; + array_pos_str = array_pos.to_string(); + } + } + } + } + + // TODO: + // 1. Sort strings by length (the smallest should take the smallest numbers + // for bigger gains). + // 2. Compute "score" for all strings of the same length and sort the strings + // of the same length with this score. + // 3. Loop again over strings and remove those who shouldn't be there anymore. + // 4. Repeat. + // + // ALTERNATIVE: + // + // Compute the score based on: + // current number of digits * str length * str occurence + // + // ^ This second solution should bring even better results. + // + // ALSO: if an array with such strings already exists, it'd be worth it to recompute + // everything again. + let mut validated = validated.iter().map(|v| (strs[v].0, v)).collect::<Vec<_>>(); + validated.sort_unstable_by(|(p1, _), (p2, _)| p2.cmp(p1)); + + if need_recreate && !validated.is_empty() { + if let Some(token) = separation_token { + to_insert.push((0, token)); + } + to_insert.push((0, Token::Char(ReservedChar::SemiColon))); + to_insert.push((0, Token::Char(ReservedChar::CloseBracket))); + to_insert.push((0, Token::Char(ReservedChar::OpenBracket))); + to_insert.push((0, Token::CreatedVarDecl(format!("var {}=", array_name)))); + + end_bracket = 2; + } + + let mut iter = validated.iter().peekable(); + while let Some((array_pos, s)) = iter.next() { + let (_, ref tokens_pos, create_array_entry) = strs[*s]; + let array_index = Token::CreatedVar(format!("{}[{}]", array_name, array_pos)); + for token in tokens_pos.iter() { + to_replace.push((*token, array_index.clone())); + } + if !create_array_entry { + continue; + } + to_insert.push((end_bracket, Token::CreatedVar(format!("\"{}\"", *s)))); + if iter.peek().is_none() && current_array_values.is_empty() { + continue; + } + to_insert.push((end_bracket, Token::Char(ReservedChar::Comma))); + } + } + for (pos, rep) in to_replace.into_iter() { + tokens.0[pos] = rep; + } + for (pos, rep) in to_insert.into_iter() { + tokens.0.insert(pos, rep); + } + tokens +} + +/// Exactly like `aggregate_strings_into_array` except this one expects a separation token +/// to be passed. This token will be placed between the created array for the +/// strings aggregation and the rest. +/// +/// # Example +/// +/// Let's add a backline between the created variables and the rest of the code: +/// +/// ```rust,no_run +/// extern crate minifier; +/// use minifier::js::{ +/// aggregate_strings_into_array_with_separation, +/// clean_tokens, +/// simple_minify, +/// Token, +/// ReservedChar, +/// }; +/// use std::fs; +/// +/// fn main() { +/// let content = fs::read("some_file.js").expect("file not found"); +/// let source = String::from_utf8_lossy(&content); +/// let s = simple_minify(&source); // First we get the tokens list. +/// let s = s.apply(|f| { +/// aggregate_strings_into_array_with_separation(f, "R", Token::Char(ReservedChar::Backline)) +/// }) // We add a backline between the variable and the rest. +/// .apply(clean_tokens) // We clean the tokens. +/// .to_string(); // And we finally convert to string. +/// println!("result: {}", s); +/// } +/// ``` +#[inline] +pub fn aggregate_strings_into_array_with_separation<'a, 'b: 'a>( + tokens: Tokens<'a>, + array_name: &str, + separation_token: Token<'b>, +) -> Tokens<'a> { + aggregate_strings_into_array_inner(tokens, array_name, Some(separation_token), |_, _| true) +} + +/// Same as [`aggregate_strings_into_array_with_separation`] except it allows certain strings to +/// not be aggregated thanks to the `filter` parameter. If it returns `false`, then the string will +/// be ignored. +#[inline] +pub fn aggregate_strings_into_array_with_separation_filter<'a, 'b: 'a, T>( + tokens: Tokens<'a>, + array_name: &str, + separation_token: Token<'b>, + filter: T, +) -> Tokens<'a> +where + T: Fn(&Tokens<'a>, usize) -> bool, +{ + aggregate_strings_into_array_inner(tokens, array_name, Some(separation_token), filter) +} + +/// Aggregate litteral strings. For instance, if the string litteral "Oh look over there!" +/// appears more than once, it will be added to the generated array and used everywhere the +/// string appears. Of course, this replacement is only performed when it allows to take +/// less space. +/// +/// # Example +/// +/// ```rust,no_run +/// extern crate minifier; +/// use minifier::js::{aggregate_strings_into_array, clean_tokens, simple_minify}; +/// use std::fs; +/// +/// fn main() { +/// let content = fs::read("some_file.js").expect("file not found"); +/// let source = String::from_utf8_lossy(&content); +/// let s = simple_minify(&source); // First we get the tokens list. +/// let s = s.apply(|f| aggregate_strings_into_array(f, "R")) // This `apply` aggregates string litterals. +/// .apply(clean_tokens) // This one is used to remove useless chars. +/// .to_string(); // And we finally convert to string. +/// println!("result: {}", s); +/// } +/// ``` +#[inline] +pub fn aggregate_strings_into_array<'a>(tokens: Tokens<'a>, array_name: &str) -> Tokens<'a> { + aggregate_strings_into_array_inner(tokens, array_name, None, |_, _| true) +} + +/// Same as [`aggregate_strings_into_array`] except it allows certain strings to not be aggregated +/// thanks to the `filter` parameter. If it returns `false`, then the string will be ignored. +#[inline] +pub fn aggregate_strings_into_array_filter<'a, T>( + tokens: Tokens<'a>, + array_name: &str, + filter: T, +) -> Tokens<'a> +where + T: Fn(&Tokens<'a>, usize) -> bool, +{ + aggregate_strings_into_array_inner(tokens, array_name, None, filter) +} + +/// Simple function to get the untouched token list. Useful in case you want to perform some +/// actions directly on it. +/// +/// # Example +/// +/// ```rust,no_run +/// extern crate minifier; +/// use minifier::js::simple_minify; +/// use std::fs; +/// +/// fn main() { +/// let content = fs::read("some_file.js").expect("file not found"); +/// let source = String::from_utf8_lossy(&content); +/// let s = simple_minify(&source); +/// println!("result: {:?}", s); // We now have the tokens list. +/// } +/// ``` +#[inline] +pub fn simple_minify(source: &str) -> Tokens<'_> { + token::tokenize(source) +} + +#[test] +fn aggregate_strings_in_array() { + let source = r#"var x = ["a nice string", "a nice string", "another nice string", "cake!", + "cake!", "a nice string", "cake!", "cake!", "cake!"];"#; + let expected_result = "var R=[\"a nice string\",\"cake!\"];var x=[R[0],R[0],\ + \"another nice string\",R[1],R[1],R[0],R[1],R[1],R[1]]"; + + let result = simple_minify(source) + .apply(crate::js::clean_tokens) + .apply(|c| aggregate_strings_into_array(c, "R")) + .to_string(); + assert_eq!(result, expected_result); + + let source = r#"var x = ["a nice string", "a nice string", "another nice string", "cake!", + "cake!", "a nice string", "cake!", "cake!", "cake!"];"#; + let expected_result = "var R=[\"a nice string\",\"cake!\"];\nvar x=[R[0],R[0],\ + \"another nice string\",R[1],R[1],R[0],R[1],R[1],R[1]]"; + + let result = simple_minify(source) + .apply(crate::js::clean_tokens) + .apply(|c| { + aggregate_strings_into_array_with_separation( + c, + "R", + Token::Char(ReservedChar::Backline), + ) + }) + .to_string(); + assert_eq!(result, expected_result); + + let source = r#"var x = ["a nice string", "a nice string", "another nice string", "another nice string", "another nice string", "another nice string","cake!","cake!", "a nice string", "cake!", "cake!", "cake!"];"#; + let expected_result = "var R=[\"a nice string\",\"another nice string\",\"cake!\"];\n\ + var x=[R[0],R[0],R[1],R[1],R[1],R[1],R[2],R[2],R[0],R[2],\ + R[2],R[2]]"; + + let result = simple_minify(source) + .apply(crate::js::clean_tokens) + .apply(|c| { + aggregate_strings_into_array_with_separation( + c, + "R", + Token::Char(ReservedChar::Backline), + ) + }) + .to_string(); + assert_eq!(result, expected_result); +} + +#[test] +fn aggregate_strings_in_array_filter() { + let source = r#"var searchIndex = {};searchIndex['duplicate_paths'] = {'aaaaaaaa': 'bbbbbbbb', 'bbbbbbbb': 'aaaaaaaa', 'duplicate_paths': 'aaaaaaaa'};"#; + let expected_result = "var R=[\"bbbbbbbb\",\"aaaaaaaa\"];\nvar searchIndex={};searchIndex['duplicate_paths']={R[1]:R[0],R[0]:R[1],'duplicate_paths':R[1]}"; + + let result = simple_minify(source) + .apply(crate::js::clean_tokens) + .apply(|c| { + aggregate_strings_into_array_with_separation_filter( + c, + "R", + Token::Char(ReservedChar::Backline), + |tokens, pos| { + pos < 2 + || !tokens[pos - 1].eq_char(ReservedChar::OpenBracket) + || tokens[pos - 2].get_other() != Some("searchIndex") + }, + ) + }) + .to_string(); + assert_eq!(result, expected_result); + + let source = r#"var searchIndex = {};searchIndex['duplicate_paths'] = {'aaaaaaaa': 'bbbbbbbb', 'bbbbbbbb': 'aaaaaaaa', 'duplicate_paths': 'aaaaaaaa', 'x': 'duplicate_paths'};"#; + let expected_result = "var R=[\"bbbbbbbb\",\"aaaaaaaa\",\"duplicate_paths\"];\nvar searchIndex={};searchIndex['duplicate_paths']={R[1]:R[0],R[0]:R[1],R[2]:R[1],'x':R[2]}"; + + let result = simple_minify(source) + .apply(crate::js::clean_tokens) + .apply(|c| { + aggregate_strings_into_array_with_separation_filter( + c, + "R", + Token::Char(ReservedChar::Backline), + |tokens, pos| { + pos < 2 + || !tokens[pos - 1].eq_char(ReservedChar::OpenBracket) + || tokens[pos - 2].get_other() != Some("searchIndex") + }, + ) + }) + .to_string(); + assert_eq!(result, expected_result); +} + +#[test] +fn aggregate_strings_in_array_existing() { + let source = r#"var R=[];var x = ["a nice string", "a nice string", "another nice string", "cake!", + "cake!", "a nice string", "cake!", "cake!", "cake!"];"#; + let expected_result = "var R=[\"a nice string\",\"cake!\"];var x=[R[0],R[0],\ + \"another nice string\",R[1],R[1],R[0],R[1],R[1],R[1]]"; + + let result = simple_minify(source) + .apply(crate::js::clean_tokens) + .apply(|c| aggregate_strings_into_array(c, "R")) + .to_string(); + assert_eq!(result, expected_result); + + let source = r#"var R=["a nice string"];var x = ["a nice string", "a nice string", "another nice string", "cake!", + "cake!", "a nice string", "cake!", "cake!", "cake!"];"#; + let expected_result = "var R=[\"a nice string\",\"cake!\"];var x=[R[0],R[0],\ + \"another nice string\",R[1],R[1],R[0],R[1],R[1],R[1]]"; + + let result = simple_minify(source) + .apply(crate::js::clean_tokens) + .apply(|c| aggregate_strings_into_array(c, "R")) + .to_string(); + assert_eq!(result, expected_result); + + let source = r#"var y = 12;var R=["a nice string"];var x = ["a nice string", "a nice string", "another nice string", "cake!", + "cake!", "a nice string", "cake!", "cake!", "cake!"];"#; + let expected_result = "var y=12;var R=[\"a nice string\",\"cake!\"];var x=[R[0],R[0],\ + \"another nice string\",R[1],R[1],R[0],R[1],R[1],R[1]]"; + + let result = simple_minify(source) + .apply(crate::js::clean_tokens) + .apply(|c| aggregate_strings_into_array(c, "R")) + .to_string(); + assert_eq!(result, expected_result); + + let source = r#"var R=["osef1", "o2", "damn"]; + var x = ["a nice string", "a nice string", "another nice string", "cake!", + "cake!", "a nice string", "cake!", "cake!", "cake!"];"#; + let expected_result = "var R=[\"osef1\",\"o2\",\"damn\",\"a nice string\",\"cake!\"];\ + var x=[R[3],R[3],\"another nice string\",R[4],R[4],R[3],R[4],R[4],R[4]]"; + + let result = simple_minify(source) + .apply(crate::js::clean_tokens) + .apply(|c| aggregate_strings_into_array(c, "R")) + .to_string(); + assert_eq!(result, expected_result); +} + +#[test] +fn string_duplicates() { + let source = r#"var x = ["a nice string", "a nice string", "another nice string", "cake!", + "cake!", "a nice string", "cake!", "cake!", "cake!"];"#; + let expected_result = "var r_aa=\"a nice string\",r_ba=\"cake!\";var x=[r_aa,r_aa,\ + \"another nice string\",r_ba,r_ba,r_aa,r_ba,r_ba,r_ba]"; + + let result = simple_minify(source) + .apply(aggregate_strings) + .apply(crate::js::clean_tokens) + .to_string(); + assert_eq!(result, expected_result); +} + +#[test] +fn already_existing_var() { + let source = r#"var r_aa = "a nice string"; var x = ["a nice string", "a nice string", + "another nice string", "cake!", + "cake!", "a nice string", "cake!", "cake!", "cake!"];"#; + let expected_result = "var r_ba=\"cake!\";var r_aa=\"a nice string\";var x=[r_aa,r_aa,\ + \"another nice string\",r_ba,r_ba,r_aa,r_ba,r_ba,r_ba]"; + + let result = simple_minify(source) + .apply(aggregate_strings) + .apply(crate::js::clean_tokens) + .to_string(); + assert_eq!(result, expected_result); +} + +#[test] +fn string_duplicates_variables_already_exist() { + let source = r#"var r_aa=1;var x = ["a nice string", "a nice string", "another nice string", "cake!", + "cake!", "a nice string", "cake!", "cake!", "cake!"];"#; + let expected_result = "var r_ba=\"a nice string\",r_ca=\"cake!\";\ + var r_aa=1;var x=[r_ba,r_ba,\ + \"another nice string\",r_ca,r_ca,r_ba,r_ca,r_ca,r_ca]"; + + let result = simple_minify(source) + .apply(aggregate_strings) + .apply(crate::js::clean_tokens) + .to_string(); + assert_eq!(result, expected_result); +} + +#[test] +fn string_duplicates_with_separator() { + use self::token::ReservedChar; + + let source = r#"var x = ["a nice string", "a nice string", "another nice string", "cake!", + "cake!", "a nice string", "cake!", "cake!", "cake!"];"#; + let expected_result = "var r_aa=\"a nice string\",r_ba=\"cake!\";\nvar x=[r_aa,r_aa,\ + \"another nice string\",r_ba,r_ba,r_aa,r_ba,r_ba,r_ba]"; + let result = simple_minify(source) + .apply(crate::js::clean_tokens) + .apply(|f| aggregate_strings_with_separation(f, Token::Char(ReservedChar::Backline))) + .to_string(); + assert_eq!(result, expected_result); +} + +#[test] +fn clean_except() { + use self::token::ReservedChar; + + let source = r#"var x = [1, 2, 3]; +var y = "salut"; +var z = "ok!";"#; + let expected = r#"var x=[1,2,3]; +var y="salut"; +var z="ok!""#; + + let result = simple_minify(source) + .apply(|f| { + crate::js::clean_tokens_except(f, |c| c.get_char() != Some(ReservedChar::Backline)) + }) + .to_string(); + assert_eq!(result, expected); +} + +#[test] +fn clean_except2() { + use self::token::ReservedChar; + + let source = "let x = [ 1, 2, \t3];"; + let expected = "let x = [ 1, 2, 3];"; + + let result = simple_minify(source) + .apply(|f| { + crate::js::clean_tokens_except(f, |c| { + c.get_char() != Some(ReservedChar::Space) + && c.get_char() != Some(ReservedChar::SemiColon) + }) + }) + .to_string(); + assert_eq!(result, expected); +} + +#[test] +fn clean_except3() { + use self::token::ReservedChar; + + let source = "let x = [ 1, 2, \t3];"; + let expected = "let x=[1,2,\t3];"; + + let result = simple_minify(source) + .apply(|f| { + crate::js::clean_tokens_except(f, |c| { + c.get_char() != Some(ReservedChar::Tab) + && c.get_char() != Some(ReservedChar::SemiColon) + }) + }) + .to_string(); + assert_eq!(result, expected); +} + +#[test] +fn name_generator() { + let s = std::iter::repeat('a').take(36).collect::<String>(); + // We need to generate enough long strings to reach the point that the name generator + // generates names with 3 characters. + let s = std::iter::repeat(s) + .take(20000) + .enumerate() + .map(|(pos, s)| format!("{}{}", s, pos)) + .collect::<Vec<_>>(); + let source = format!( + "var x = [{}];", + s.iter() + .map(|s| format!("\"{0}\",\"{0}\"", s)) + .collect::<Vec<_>>() + .join(",") + ); + let result = simple_minify(&source) + .apply(crate::js::clean_tokens) + .apply(aggregate_strings) + .to_string(); + assert!(result.find(",r_aaa=").is_some()); + assert!(result.find(",r_ab=").unwrap() < result.find(",r_ba=").unwrap()); +} + +#[test] +fn simple_quote() { + let source = r#"var x = "\\";"#; + let expected_result = r#"var x="\\""#; + assert_eq!(minify(source).to_string(), expected_result); +} + +#[test] +fn js_minify_test() { + let source = r##" +var foo = "something"; + +var another_var = 2348323; + +// who doesn't like comments? +/* and even longer comments? + +like +on +a +lot +of +lines! + +Fun! +*/ +function far_away(x, y) { + var x2 = x + 4; + return x * x2 + y; +} + +// this call is useless +far_away(another_var, 12); +// this call is useless too +far_away(another_var, 12); +"##; + + let expected_result = "var foo=\"something\";var another_var=2348323;function far_away(x,y){\ + var x2=x+4;return x*x2+y}far_away(another_var,12);far_away(another_var,\ + 12)"; + assert_eq!(minify(source).to_string(), expected_result); +} + +#[test] +fn another_js_test() { + let source = r#" +/*! let's keep this license + * + * because everyone likes licenses! + * + * right? + */ + +function forEach(data, func) { + for (var i = 0; i < data.length; ++i) { + func(data[i]); + } +} + +forEach([0, 1, 2, 3, 4, + 5, 6, 7, 8, 9], function (x) { + console.log(x); + }); +// I think we're done? +console.log('done!'); +"#; + + let expected_result = r#"/*! let's keep this license + * + * because everyone likes licenses! + * + * right? + */function forEach(data,func){for(var i=0;i<data.length;++i){func(data[i])}}forEach([0,1,2,3,4,5,6,7,8,9],function(x){console.log(x)});console.log('done!')"#; + assert_eq!(minify(source).to_string(), expected_result); +} + +#[test] +fn comment_issue() { + let source = r#" +search_input.onchange = function(e) { + // Do NOT e.preventDefault() here. It will prevent pasting. + clearTimeout(searchTimeout); + // zero-timeout necessary here because at the time of event handler execution the + // pasted content is not in the input field yet. Shouldn’t make any difference for + // change, though. + setTimeout(search, 0); +}; +"#; + let expected_result = "search_input.onchange=function(e){clearTimeout(searchTimeout);\ + setTimeout(search,0)}"; + assert_eq!(minify(source).to_string(), expected_result); +} + +#[test] +fn missing_whitespace() { + let source = r#" +for (var entry in results) { + if (results.hasOwnProperty(entry)) { + ar.push(results[entry]); + } +}"#; + let expected_result = "for(var entry in results){if(results.hasOwnProperty(entry)){\ + ar.push(results[entry])}}"; + assert_eq!(minify(source).to_string(), expected_result); +} + +#[test] +fn weird_regex_issue() { + let source = r#" +val = val.replace(/\_/g, ""); + +var valGenerics = extractGenerics(val);"#; + let expected_result = "val=val.replace(/\\_/g,\"\");var valGenerics=extractGenerics(val)"; + assert_eq!(minify(source).to_string(), expected_result); +} + +#[test] +fn keep_space() { + fn inner_double_checks(source: &str, expected: &str) { + assert_eq!(minify(source).to_string(), expected); + let s = minify(source); + let mut out: Vec<u8> = Vec::new(); + s.write(&mut out).unwrap(); + assert_eq!(String::from_utf8(out).unwrap(), expected); + } + + inner_double_checks("return 12;return x;", "return 12;return x"); + inner_double_checks("t in e", "t in e"); + inner_double_checks("t + 1 in e", "t+1 in e"); + inner_double_checks("t - 1 in e", "t-1 in e"); + inner_double_checks("'a' in e", "'a'in e"); + inner_double_checks("/a/g in e", "/a/g in e"); + inner_double_checks("/a/i in e", "/a/i in e"); + + inner_double_checks("t instanceof e", "t instanceof e"); + inner_double_checks("t + 1 instanceof e", "t+1 instanceof e"); + inner_double_checks("t - 1 instanceof e", "t-1 instanceof e"); + inner_double_checks("'a' instanceof e", "'a'instanceof e"); + inner_double_checks("/a/g instanceof e", "/a/g instanceof e"); + inner_double_checks("/a/i instanceof e", "/a/i instanceof e"); + + inner_double_checks("function foo() { let x = 12; }", "function foo(){let x=12}"); + inner_double_checks( + r#""use strict"; + +(function() { + const itemTypes = [ + "mod", + "externcrate", + "import", + "struct", + ]; + const TY_PRIMITIVE = itemTypes; + function hasOwnPropertyRustdoc() {} +})();"#, + "\"use strict\";(function(){const itemTypes=[\"mod\",\"externcrate\",\"import\",\"struct\"\ + ,];const TY_PRIMITIVE=itemTypes;function hasOwnPropertyRustdoc(){}})()", + ); +} + +#[test] +fn test_remove_extra_whitespace_before_typeof() { + let source = "var x = typeof 'foo';var y = typeof x;case typeof 'foo': 'bla'"; + + let expected_result = "var x=typeof'foo';var y=typeof x;case typeof'foo':'bla'"; + assert_eq!(minify(source).to_string(), expected_result); +} + +#[test] +fn test_remove_extra_whitespace_before_in() { + let source = r#"if ("key" in ev && typeof ev) { return true; } +if (x in ev && typeof ev) { return true; } +if (true in ev) { return true; }"#; + + let expected_result = r#"if("key"in ev&&typeof ev){return true}if(x in ev&&typeof ev){return true}if(true in ev){return true}"#; + assert_eq!(minify(source).to_string(), expected_result); +} + +#[test] +fn test_remove_extra_whitespace_before_operator() { + let source = "( x ) / 2; x / y;x /= y"; + + let expected_result = "(x)/2;x/y;x/=y"; + assert_eq!(minify(source).to_string(), expected_result); +} + +#[test] +fn check_regex_syntax() { + let source = "console.log(/MSIE|Trident|Edge/.test(window.navigator.userAgent));"; + let expected = "console.log(/MSIE|Trident|Edge/.test(window.navigator.userAgent))"; + assert_eq!(minify(source).to_string(), expected); +} + +#[test] +fn minify_minified() { + let source = "function (i, n, a) { i[n].type.replace(/ *;(.|\\s)*/,\"\")===t&&a.push(i[n].MathJax.elementJax);return a}"; + let expected = "function(i,n,a){i[n].type.replace(/ *;(.|\\s)*/,\"\")===t&&a.push(i[n].MathJax.elementJax);return a}"; + assert_eq!(minify(source).to_string(), expected); +} + +#[test] +fn check_string() { + let source = r###" + const a = 123; + const b = "123"; + const c = `the number is ${a} <-- note the spaces here`; + const d = ` ${a} ${b} `; + "###; + let expected = "const a=123;const b=\"123\";const c=`the number is ${a} <-- note the spaces \ + here`;const d=` ${a} ${b} `"; + assert_eq!(minify(source).to_string(), expected); +} + +// TODO: requires AST to fix this issue! +/*#[test] +fn no_semi_colon() { + let source = r#" +console.log(1) +console.log(2) +var x = 12; +"#; + let expected_result = r#"console.log(1);console.log(2);var x=12;"#; + assert_eq!(minify(source).to_string(), expected_result); +}*/ + +// TODO: requires AST to fix this issue! +/*#[test] +fn correct_replace_for_backline() { + let source = r#" +function foo() { + return + 12; +} +"#; + let expected_result = r#"function foo(){return 12;}"#; + assert_eq!(minify(source).to_string(), expected_result); +}*/ diff --git a/vendor/minifier/src/js/utils.rs b/vendor/minifier/src/js/utils.rs new file mode 100644 index 000000000..db765e804 --- /dev/null +++ b/vendor/minifier/src/js/utils.rs @@ -0,0 +1,575 @@ +// Take a look at the license at the top of the repository in the LICENSE file. + +use crate::js::token::{Keyword, Operation, ReservedChar, Token, Tokens}; +use std::vec::IntoIter; + +pub(crate) struct VariableNameGenerator<'a> { + letter: char, + lower: Option<Box<VariableNameGenerator<'a>>>, + prepend: Option<&'a str>, +} + +impl<'a> VariableNameGenerator<'a> { + pub(crate) fn new(prepend: Option<&'a str>, nb_letter: usize) -> VariableNameGenerator<'a> { + if nb_letter > 1 { + VariableNameGenerator { + letter: 'a', + lower: Some(Box::new(VariableNameGenerator::new(None, nb_letter - 1))), + prepend, + } + } else { + VariableNameGenerator { + letter: 'a', + lower: None, + prepend, + } + } + } + + pub(crate) fn next(&mut self) { + self.incr_letters(); + } + + #[allow(clippy::inherent_to_string)] + pub(crate) fn to_string(&self) -> String { + if let Some(ref lower) = self.lower { + format!( + "{}{}{}", + self.prepend.unwrap_or(""), + self.letter, + lower.to_string() + ) + } else { + format!("{}{}", self.prepend.unwrap_or(""), self.letter) + } + } + + #[allow(dead_code)] + pub(crate) fn len(&self) -> usize { + let first = match self.prepend { + Some(s) => s.len(), + None => 0, + } + 1; + first + + match self.lower { + Some(ref s) => s.len(), + None => 0, + } + } + + pub(crate) fn incr_letters(&mut self) { + let max = [('z', 'A'), ('Z', '0'), ('9', 'a')]; + + for (m, next) in &max { + if self.letter == *m { + self.letter = *next; + if self.letter == 'a' { + if let Some(ref mut lower) = self.lower { + lower.incr_letters(); + } else { + self.lower = Some(Box::new(VariableNameGenerator::new(None, 1))); + } + } + return; + } + } + self.letter = ((self.letter as u8) + 1) as char; + } +} + +/// Replace given tokens with others. +/// +/// # Example +/// +/// ```rust +/// extern crate minifier; +/// use minifier::js::{Keyword, Token, replace_tokens_with, simple_minify}; +/// +/// fn main() { +/// let js = r#" +/// function replaceByNull(data, func) { +/// for (var i = 0; i < data.length; ++i) { +/// if func(data[i]) { +/// data[i] = null; +/// } +/// } +/// } +/// }"#.into(); +/// let js_minified = simple_minify(js) +/// .apply(|f| { +/// replace_tokens_with(f, |t| { +/// if *t == Token::Keyword(Keyword::Null) { +/// Some(Token::Other("N")) +/// } else { +/// None +/// } +/// }) +/// }); +/// println!("{}", js_minified.to_string()); +/// } +/// ``` +/// +/// The previous code will have all its `null` keywords replaced with `N`. In such cases, +/// don't forget to include the definition of `N` in the returned minified javascript: +/// +/// ```js +/// var N = null; +/// ``` +#[inline] +pub fn replace_tokens_with<'a, 'b: 'a, F: Fn(&Token<'a>) -> Option<Token<'b>>>( + mut tokens: Tokens<'a>, + callback: F, +) -> Tokens<'a> { + for token in tokens.0.iter_mut() { + if let Some(t) = callback(token) { + *token = t; + } + } + tokens +} + +/// Replace a given token with another. +#[inline] +pub fn replace_token_with<'a, 'b: 'a, F: Fn(&Token<'a>) -> Option<Token<'b>>>( + token: Token<'a>, + callback: &F, +) -> Token<'a> { + if let Some(t) = callback(&token) { + t + } else { + token + } +} + +/// When looping over `Tokens`, if you encounter `Keyword::Var`, `Keyword::Let` or +/// `Token::Other` using this function will allow you to get the variable name's +/// position and the variable value's position (if any). +/// +/// ## Note +/// +/// It'll return the value only if there is an `Operation::Equal` found. +/// +/// # Examples +/// +/// ``` +/// extern crate minifier; +/// use minifier::js::{Keyword, get_variable_name_and_value_positions, simple_minify}; +/// +/// fn main() { +/// let source = r#"var x = 1;var z;var y = "2";"#; +/// let mut result = Vec::new(); +/// +/// let tokens = simple_minify(source); +/// +/// for pos in 0..tokens.len() { +/// match tokens[pos].get_keyword() { +/// Some(k) if k == Keyword::Let || k == Keyword::Var => { +/// if let Some(x) = get_variable_name_and_value_positions(&tokens, pos) { +/// result.push(x); +/// } +/// } +/// _ => {} +/// } +/// } +/// assert_eq!(result, vec![(2, Some(6)), (10, None), (14, Some(22))]); +/// } +/// ``` +pub fn get_variable_name_and_value_positions<'a>( + tokens: &'a Tokens<'a>, + pos: usize, +) -> Option<(usize, Option<usize>)> { + if pos >= tokens.len() { + return None; + } + let mut tmp = pos; + match tokens[pos] { + Token::Keyword(Keyword::Let) | Token::Keyword(Keyword::Var) => { + tmp += 1; + } + Token::Other(_) if pos > 0 => { + let mut pos = pos - 1; + while pos > 0 { + if tokens[pos].is_comment() || tokens[pos].is_white_character() { + pos -= 1; + } else if tokens[pos] == Token::Char(ReservedChar::Comma) + || tokens[pos] == Token::Keyword(Keyword::Let) + || tokens[pos] == Token::Keyword(Keyword::Var) + { + break; + } else { + return None; + } + } + } + _ => return None, + } + while tmp < tokens.len() { + if tokens[tmp].is_other() { + let mut tmp2 = tmp + 1; + while tmp2 < tokens.len() { + if tokens[tmp2] == Token::Operation(Operation::Equal) { + tmp2 += 1; + while tmp2 < tokens.len() { + let token = &tokens[tmp2]; + if token.is_string() + || token.is_other() + || token.is_regex() + || token.is_number() + || token.is_floating_number() + { + return Some((tmp, Some(tmp2))); + } else if !tokens[tmp2].is_comment() && !tokens[tmp2].is_white_character() { + break; + } + tmp2 += 1; + } + break; + } else if matches!( + tokens[tmp2].get_char(), + Some(ReservedChar::Comma) | Some(ReservedChar::SemiColon) + ) { + return Some((tmp, None)); + } else if !(tokens[tmp2].is_comment() + || tokens[tmp2].is_white_character() + && tokens[tmp2].get_char() != Some(ReservedChar::Backline)) + { + break; + } + tmp2 += 1; + } + } else { + // We don't care about syntax errors. + } + tmp += 1; + } + None +} + +#[inline] +fn get_next<'a>(it: &mut IntoIter<Token<'a>>) -> Option<Token<'a>> { + for t in it { + if t.is_comment() || t.is_white_character() { + continue; + } + return Some(t); + } + None +} + +/// Convenient function used to clean useless tokens in a token list. +/// +/// # Example +/// +/// ```rust,no_run +/// extern crate minifier; +/// +/// use minifier::js::{clean_tokens, simple_minify}; +/// use std::fs; +/// +/// fn main() { +/// let content = fs::read("some_file.js").expect("file not found"); +/// let source = String::from_utf8_lossy(&content); +/// let s = simple_minify(&source); // First we get the tokens list. +/// let s = s.apply(clean_tokens); // We now have a cleaned token list! +/// println!("result: {:?}", s); +/// } +/// ``` +pub fn clean_tokens(tokens: Tokens<'_>) -> Tokens<'_> { + let mut v = Vec::with_capacity(tokens.len() / 3 * 2); + let mut it = tokens.0.into_iter(); + + loop { + let token = get_next(&mut it); + if token.is_none() { + break; + } + let token = token.unwrap(); + if token.is_white_character() { + continue; + } else if token.get_char() == Some(ReservedChar::SemiColon) { + if v.is_empty() { + continue; + } + if let Some(next) = get_next(&mut it) { + if next != Token::Char(ReservedChar::CloseCurlyBrace) { + v.push(token); + } + v.push(next); + } + continue; + } + v.push(token); + } + v.into() +} + +/// Returns true if the token is a "useful" one (so not a comment or a "useless" +/// character). +pub fn clean_token(token: &Token<'_>, next_token: &Option<&Token<'_>>) -> bool { + !token.is_comment() && { + if let Some(x) = token.get_char() { + !x.is_white_character() + && (x != ReservedChar::SemiColon + || *next_token != Some(&Token::Char(ReservedChar::CloseCurlyBrace))) + } else { + true + } + } +} + +#[inline] +fn get_next_except<'a, F: Fn(&Token<'a>) -> bool>( + it: &mut IntoIter<Token<'a>>, + f: &F, +) -> Option<Token<'a>> { + for t in it { + if (t.is_comment() || t.is_white_character()) && f(&t) { + continue; + } + return Some(t); + } + None +} + +/// Same as `clean_tokens` except that if a token is considered as not desired, +/// the callback is called. If the callback returns `false` as well, it will +/// be removed. +/// +/// # Example +/// +/// ```rust,no_run +/// extern crate minifier; +/// +/// use minifier::js::{clean_tokens_except, simple_minify, ReservedChar}; +/// use std::fs; +/// +/// fn main() { +/// let content = fs::read("some_file.js").expect("file not found"); +/// let source = String::from_utf8_lossy(&content); +/// let s = simple_minify(&source); // First we get the tokens list. +/// let s = s.apply(|f| { +/// clean_tokens_except(f, |c| { +/// c.get_char() != Some(ReservedChar::Backline) +/// }) +/// }); // We now have a cleaned token list which kept backlines! +/// println!("result: {:?}", s); +/// } +/// ``` +pub fn clean_tokens_except<'a, F: Fn(&Token<'a>) -> bool>(tokens: Tokens<'a>, f: F) -> Tokens<'a> { + let mut v = Vec::with_capacity(tokens.len() / 3 * 2); + let mut it = tokens.0.into_iter(); + + loop { + let token = get_next_except(&mut it, &f); + if token.is_none() { + break; + } + let token = token.unwrap(); + if token.is_white_character() { + if f(&token) { + continue; + } + } else if token.get_char() == Some(ReservedChar::SemiColon) { + if v.is_empty() { + if !f(&token) { + v.push(token); + } + continue; + } + if let Some(next) = get_next_except(&mut it, &f) { + if next != Token::Char(ReservedChar::CloseCurlyBrace) || !f(&token) { + v.push(token); + } + v.push(next); + } else if !f(&token) { + v.push(token); + } + continue; + } + v.push(token); + } + v.into() +} + +/// Returns true if the token is a "useful" one (so not a comment or a "useless" +/// character). +#[inline] +pub fn clean_token_except<'a, F: Fn(&Token<'a>) -> bool>( + token: &Token<'a>, + next_token: &Option<&Token<'_>>, + f: &F, +) -> bool { + if !clean_token(token, next_token) { + !f(token) + } else { + true + } +} + +pub(crate) fn get_array<'a>( + tokens: &'a Tokens<'a>, + array_name: &str, +) -> Option<(Vec<usize>, usize)> { + let mut ret = Vec::new(); + + let mut looking_for_var = false; + let mut looking_for_equal = false; + let mut looking_for_array_start = false; + let mut getting_values = false; + + for pos in 0..tokens.len() { + if looking_for_var { + match tokens[pos] { + Token::Other(s) => { + looking_for_var = false; + if s == array_name { + looking_for_equal = true; + } + } + ref s => { + looking_for_var = s.is_comment() || s.is_white_character(); + } + } + } else if looking_for_equal { + match tokens[pos] { + Token::Operation(Operation::Equal) => { + looking_for_equal = false; + looking_for_array_start = true; + } + ref s => { + looking_for_equal = s.is_comment() || s.is_white_character(); + } + } + } else if looking_for_array_start { + match tokens[pos] { + Token::Char(ReservedChar::OpenBracket) => { + looking_for_array_start = false; + getting_values = true; + } + ref s => { + looking_for_array_start = s.is_comment() || s.is_white_character(); + } + } + } else if getting_values { + match &tokens[pos] { + Token::Char(ReservedChar::CloseBracket) => { + return Some((ret, pos)); + } + s if s.is_comment() || s.is_white_character() => {} + _ => { + ret.push(pos); + } + } + } else { + match tokens[pos] { + Token::Keyword(Keyword::Let) | Token::Keyword(Keyword::Var) => { + looking_for_var = true; + } + _ => {} + } + } + } + None +} + +#[test] +fn check_get_array() { + let source = r#"var x = [ ]; var y = ['hello', + 12]; var z = []; var w = 12;"#; + + let tokens = crate::js::token::tokenize(source); + + let ar = get_array(&tokens, "x"); + assert!(ar.is_some()); + assert_eq!(ar.unwrap().1, 9); + + let ar = get_array(&tokens, "y"); + assert!(ar.is_some()); + assert_eq!(ar.unwrap().1, 27); + + let ar = get_array(&tokens, "z"); + assert!(ar.is_some()); + assert_eq!(ar.unwrap().1, 37); + + let ar = get_array(&tokens, "w"); + assert!(ar.is_none()); + + let ar = get_array(&tokens, "W"); + assert!(ar.is_none()); +} + +#[test] +fn check_get_variable_name_and_value_positions() { + let source = r#"var x = 1;var y = "2",we=4;"#; + let mut result = Vec::new(); + let mut pos = 0; + + let tokens = crate::js::token::tokenize(source); + + while pos < tokens.len() { + if let Some(x) = get_variable_name_and_value_positions(&tokens, pos) { + result.push(x); + pos = x.0; + } + pos += 1; + } + assert_eq!(result, vec![(2, Some(6)), (10, Some(18)), (20, Some(22))]); + + let mut result = Vec::new(); + let tokens = crate::js::clean_tokens(tokens); + pos = 0; + + while pos < tokens.len() { + if let Some(x) = get_variable_name_and_value_positions(&tokens, pos) { + result.push(x); + pos = x.0; + } + pos += 1; + } + assert_eq!(result, vec![(1, Some(3)), (6, Some(8)), (10, Some(12))]); +} + +#[test] +fn replace_tokens() { + let source = r#" +var x = ['a', 'b', null, 'd', {'x': null, 'e': null, 'z': 'w'}]; +var n = null; +"#; + let expected_result = "var x=['a','b',N,'d',{'x':N,'e':N,'z':'w'}];var n=N"; + + let res = crate::js::simple_minify(source) + .apply(crate::js::clean_tokens) + .apply(|f| { + replace_tokens_with(f, |t| { + if *t == Token::Keyword(Keyword::Null) { + Some(Token::Other("N")) + } else { + None + } + }) + }); + assert_eq!(res.to_string(), expected_result); +} + +#[test] +fn check_iterator() { + let source = r#" +var x = ['a', 'b', null, 'd', {'x': null, 'e': null, 'z': 'w'}]; +var n = null; +"#; + let expected_result = "var x=['a','b',N,'d',{'x':N,'e':N,'z':'w'}];var n=N;"; + + let res: Tokens = crate::js::simple_minify(source) + .into_iter() + .filter(|(x, next)| crate::js::clean_token(x, next)) + .map(|(t, _)| { + if t == Token::Keyword(Keyword::Null) { + Token::Other("N") + } else { + t + } + }) + .collect::<Vec<_>>() + .into(); + assert_eq!(res.to_string(), expected_result); +} diff --git a/vendor/minifier/src/json/json_minifier.rs b/vendor/minifier/src/json/json_minifier.rs new file mode 100644 index 000000000..ad9fae2ce --- /dev/null +++ b/vendor/minifier/src/json/json_minifier.rs @@ -0,0 +1,50 @@ +// Take a look at the license at the top of the repository in the LICENSE file. + +#[derive(Debug, Clone)] +pub struct JsonMinifier { + pub is_string: bool, + pub escaped_quotation: u8, +} + +impl Default for JsonMinifier { + fn default() -> Self { + Self::new() + } +} + +impl JsonMinifier { + pub fn new() -> Self { + JsonMinifier { + is_string: false, + escaped_quotation: 0, + } + } +} + +#[inline] +pub fn keep_element(minifier: &mut JsonMinifier, item1: &char, item2: Option<&char>) -> bool { + let remove_element = + item1.is_ascii_control() || is_whitespace_outside_string(minifier, item1, item2); + !remove_element +} + +#[inline] +fn is_whitespace_outside_string( + minifier: &mut JsonMinifier, + item1: &char, + item2: Option<&char>, +) -> bool { + if !minifier.is_string && item1.eq(&'"') { + minifier.is_string = true; + } else if minifier.is_string { + if item1.eq(&'\\') && item2.eq(&Some(&'"')) { + minifier.escaped_quotation = 4; + } + if minifier.escaped_quotation > 0 { + minifier.escaped_quotation -= 1; + } else if item1.eq(&'"') { + minifier.is_string = false; + } + } + !minifier.is_string && item1.is_whitespace() +} diff --git a/vendor/minifier/src/json/mod.rs b/vendor/minifier/src/json/mod.rs new file mode 100644 index 000000000..44c8a1c92 --- /dev/null +++ b/vendor/minifier/src/json/mod.rs @@ -0,0 +1,114 @@ +// Take a look at the license at the top of the repository in the LICENSE file. + +use crate::json::{ + json_minifier::{keep_element, JsonMinifier}, + read::json_read::JsonRead, + string::JsonMultiFilter, +}; +use std::fmt; +use std::io::{self, Read}; + +mod read { + mod byte_to_char; + mod internal_buffer; + mod internal_reader; + pub mod json_read; +} + +mod json_minifier; +mod string; + +type JsonMethod = fn(&mut JsonMinifier, &char, Option<&char>) -> bool; + +/// Minifies a given String by JSON minification rules +/// +/// # Example +/// +/// ```rust +/// use minifier::json::minify; +/// +/// let json = r#" +/// { +/// "test": "test", +/// "test2": 2 +/// } +/// "#.into(); +/// let json_minified = minify(json); +/// assert_eq!(&json_minified.to_string(), r#"{"test":"test","test2":2}"#); +/// ``` +#[inline] +pub fn minify(json: &str) -> Minified<'_> { + Minified(JsonMultiFilter::new(json.chars(), keep_element)) +} + +#[derive(Debug)] +pub struct Minified<'a>(JsonMultiFilter<'a, JsonMethod>); + +impl<'a> Minified<'a> { + pub fn write<W: io::Write>(self, w: W) -> io::Result<()> { + self.0.write(w) + } +} + +impl<'a> fmt::Display for Minified<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.0.fmt(f) + } +} + +/// Minifies a given Read by JSON minification rules +/// +/// # Example +/// +/// ```rust +/// extern crate minifier; +/// use std::fs::File; +/// use std::io::Read; +/// use minifier::json::minify_from_read; +/// +/// fn main() { +/// let mut html_minified = String::new(); +/// let mut file = File::open("tests/files/test.json").expect("file not found"); +/// minify_from_read(file).read_to_string(&mut html_minified); +/// } +/// ``` +#[inline] +pub fn minify_from_read<R: Read>(json: R) -> JsonRead<JsonMethod, R> { + JsonRead::new(json, keep_element) +} + +#[test] +fn removal_from_read() { + use std::fs::File; + + let input = File::open("tests/files/test.json").expect("file not found"); + let expected: String = "{\"test\":\"\\\" test2\",\"test2\":\"\",\"test3\":\" \"}".into(); + let mut actual = String::new(); + minify_from_read(input) + .read_to_string(&mut actual) + .expect("error at read"); + assert_eq!(actual, expected); +} + +#[test] +fn removal_of_control_characters() { + let input = "\n".into(); + let expected: String = "".into(); + let actual = minify(input); + assert_eq!(actual.to_string(), expected); +} + +#[test] +fn removal_of_whitespace_outside_of_tags() { + let input = r#" + { + "test": "\" test2", + "test2": "", + "test3": " " + } + "# + .into(); + let expected: String = "{\"test\":\"\\\" test2\",\"test2\":\"\",\"test3\":\" \"}".into(); + let actual = minify(input); + assert_eq!(actual.to_string(), expected); +} diff --git a/vendor/minifier/src/json/read/byte_to_char.rs b/vendor/minifier/src/json/read/byte_to_char.rs new file mode 100644 index 000000000..d3618b9cb --- /dev/null +++ b/vendor/minifier/src/json/read/byte_to_char.rs @@ -0,0 +1,132 @@ +// Take a look at the license at the top of the repository in the LICENSE file. + +use crate::json::read::internal_reader::InternalReader; +use std::{ + error, fmt, + io::{Error, Read}, + str::from_utf8, +}; + +pub struct ByteToChar<R> { + iter: InternalReader<R>, +} + +impl<R: Read> ByteToChar<R> { + #[inline] + pub fn new(read: R, buffer_size: usize) -> Result<Self, Error> { + Ok(ByteToChar { + iter: InternalReader::new(read, buffer_size)?, + }) + } + + fn get_next(&mut self) -> Result<Option<u8>, CharsError> { + match self.iter.next() { + None => Ok(None), + Some(item) => match item { + Ok(item) => Ok(Some(item)), + Err(err) => Err(CharsError::Other(err)), + }, + } + } +} + +impl<R: Read + fmt::Debug> fmt::Debug for ByteToChar<R> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Filter").field("iter", &self.iter).finish() + } +} + +impl<R: Read> Iterator for ByteToChar<R> { + type Item = Result<char, CharsError>; + + fn next(&mut self) -> Option<Result<char, CharsError>> { + let first_byte = match self.get_next() { + Err(err) => return Some(Err(err)), + Ok(item) => match item { + Some(item) => item, + None => return None, + }, + }; + + let width = utf8_char_width(first_byte); + if width == 1 { + return Some(Ok(first_byte as char)); + } + if width == 0 { + return Some(Err(CharsError::NotUtf8)); + } + let mut buf = [first_byte, 0, 0, 0]; + { + let mut start = 1; + while start < width { + let byte = match self.get_next() { + Err(err) => return Some(Err(err)), + Ok(item) => match item { + Some(item) => item, + None => return Some(Err(CharsError::NotUtf8)), + }, + }; + buf[start] = byte; + start += 1; + } + } + Some(match from_utf8(&buf[..width]).ok() { + Some(s) => Ok(s.chars().next().unwrap()), + None => Err(CharsError::NotUtf8), + }) + } +} + +fn utf8_char_width(b: u8) -> usize { + UTF8_CHAR_WIDTH[b as usize] as usize +} + +// https://tools.ietf.org/html/rfc3629 +static UTF8_CHAR_WIDTH: [u8; 256] = [ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, // 0x1F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, // 0x3F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, // 0x5F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, // 0x7F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, // 0x9F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, // 0xBF + 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, // 0xDF + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xEF + 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xFF +]; + +/// An enumeration of possible errors that can be generated from the `Chars` +/// adapter. +#[derive(Debug)] +pub enum CharsError { + /// Variant representing that the underlying stream was read successfully + /// but it did not contain valid utf8 data. + NotUtf8, + + /// Variant representing that an I/O error occurred. + Other(Error), +} + +impl error::Error for CharsError { + fn cause(&self) -> Option<&dyn error::Error> { + match *self { + CharsError::NotUtf8 => None, + CharsError::Other(ref e) => e.source(), + } + } +} + +impl fmt::Display for CharsError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match *self { + CharsError::NotUtf8 => "byte stream did not contain valid utf8".fmt(f), + CharsError::Other(ref e) => e.fmt(f), + } + } +} diff --git a/vendor/minifier/src/json/read/internal_buffer.rs b/vendor/minifier/src/json/read/internal_buffer.rs new file mode 100644 index 000000000..90eebcd73 --- /dev/null +++ b/vendor/minifier/src/json/read/internal_buffer.rs @@ -0,0 +1,44 @@ +// Take a look at the license at the top of the repository in the LICENSE file. + +const ARRAY_DEFAULT: u8 = 0; + +#[derive(Debug)] +pub struct Buffer { + buffer: Vec<u8>, + read_pos: usize, + buffer_size: usize, + data_size: usize, +} + +impl Buffer { + pub fn new(size: usize) -> Buffer { + Buffer { + buffer: vec![ARRAY_DEFAULT; size], + read_pos: 0, + buffer_size: size, + data_size: 0, + } + } + + pub fn as_mut(&mut self) -> &mut [u8] { + self.buffer.as_mut() + } + + pub fn update_metadata(&mut self, size: usize) { + self.read_pos = 0; + self.data_size = size; + } + + pub fn next(&mut self) -> Option<u8> { + if self.read_pos >= self.data_size { + return None; + } + let item = self.buffer.get(self.read_pos); + self.read_pos += 1; + item.copied() + } + + pub fn cont(&self) -> bool { + self.data_size == self.buffer_size + } +} diff --git a/vendor/minifier/src/json/read/internal_reader.rs b/vendor/minifier/src/json/read/internal_reader.rs new file mode 100644 index 000000000..45f178f08 --- /dev/null +++ b/vendor/minifier/src/json/read/internal_reader.rs @@ -0,0 +1,63 @@ +// Take a look at the license at the top of the repository in the LICENSE file. + +use super::internal_buffer::Buffer; +use std::{ + fmt, + io::{Error, Read}, +}; + +pub struct InternalReader<R> { + read: R, + buffer_size: usize, + buffer: Buffer, +} + +impl<R: Read> InternalReader<R> { + pub fn new(mut read: R, buffer_size: usize) -> Result<Self, Error> { + let mut buffer = Buffer::new(buffer_size); + InternalReader::read_data(&mut read, &mut buffer)?; + Ok(InternalReader { + read, + buffer_size, + buffer, + }) + } + + fn read_data(read: &mut R, buffer: &mut Buffer) -> Result<(), Error> { + let size = read.read(buffer.as_mut())?; + buffer.update_metadata(size); + Ok(()) + } +} + +impl<R: Read + fmt::Debug> fmt::Debug for InternalReader<R> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("JsonReader") + .field("read", &self.read) + .field("buffer_size", &self.buffer_size) + .field("buffer", &self.buffer) + .finish() + } +} + +impl<R: Read> Iterator for InternalReader<R> { + type Item = Result<u8, Error>; + + #[inline] + fn next(&mut self) -> Option<Result<u8, Error>> { + if self.buffer_size == 0 { + return None; + } + loop { + if let Some(item) = self.buffer.next() { + return Some(Ok(item)); + } else if self.buffer.cont() { + if let Err(err) = InternalReader::read_data(&mut self.read, &mut self.buffer) { + return Some(Err(err)); + }; + } else { + return None; + } + } + } +} diff --git a/vendor/minifier/src/json/read/json_read.rs b/vendor/minifier/src/json/read/json_read.rs new file mode 100644 index 000000000..338db13c8 --- /dev/null +++ b/vendor/minifier/src/json/read/json_read.rs @@ -0,0 +1,106 @@ +// Take a look at the license at the top of the repository in the LICENSE file. + +use crate::json::{ + json_minifier::JsonMinifier, + read::byte_to_char::{ByteToChar, CharsError}, +}; +use std::{ + fmt, + io::{Error, ErrorKind, Read}, + vec::IntoIter, +}; + +pub struct JsonRead<P, R> { + minifier: JsonMinifier, + read: Option<R>, + iter: Option<ByteToChar<R>>, + predicate: P, + initialized: bool, + item_iter: Option<IntoIter<u8>>, + item1: Option<char>, +} + +impl<P, R: Read> JsonRead<P, R> { + #[inline] + pub fn new(read: R, predicate: P) -> Self { + JsonRead { + minifier: JsonMinifier::default(), + read: Some(read), + iter: None, + predicate, + initialized: false, + item_iter: None, + item1: None, + } + } + + fn get_next(&mut self) -> Result<Option<char>, CharsError> { + match self.iter.as_mut().unwrap().next() { + None => Ok(None), + Some(item) => match item { + Ok(item) => Ok(Some(item)), + Err(err) => Err(err), + }, + } + } + + fn add_char_to_buffer(&mut self, buf: &mut [u8], buf_pos: &mut usize) { + if let Some(ref mut iter) = self.item_iter { + while *buf_pos < buf.len() { + if let Some(byte) = iter.next() { + buf[*buf_pos] = byte; + *buf_pos += 1; + } else { + break; + } + } + } + } +} + +impl<P, R: Read + fmt::Debug> fmt::Debug for JsonRead<P, R> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Filter") + .field("iter", &self.iter) + .field("initialized", &self.initialized) + .finish() + } +} + +impl<P, R> Read for JsonRead<P, R> +where + R: Read, + P: FnMut(&mut JsonMinifier, &char, Option<&char>) -> bool, +{ + fn read(&mut self, buf: &mut [u8]) -> Result<usize, Error> { + let mut buf_pos: usize = 0; + + if buf.is_empty() { + return Ok(0); + } + + if !self.initialized { + self.iter = Some(ByteToChar::new(self.read.take().unwrap(), buf.len())?); + self.item1 = self.get_next()?; + self.initialized = true; + } + + while let Some(item) = self.item1.take() { + self.item1 = self.get_next()?; + if (self.predicate)(&mut self.minifier, &item, self.item1.as_ref()) { + self.item_iter = Some(item.to_string().into_bytes().into_iter()); + self.add_char_to_buffer(buf, &mut buf_pos); + } + if buf_pos >= buf.len() { + break; + } + } + Ok(buf_pos) + } +} + +impl From<CharsError> for Error { + fn from(_: CharsError) -> Self { + Error::from(ErrorKind::InvalidData) + } +} diff --git a/vendor/minifier/src/json/string.rs b/vendor/minifier/src/json/string.rs new file mode 100644 index 000000000..071bf0012 --- /dev/null +++ b/vendor/minifier/src/json/string.rs @@ -0,0 +1,100 @@ +// Take a look at the license at the top of the repository in the LICENSE file. + +use crate::json::json_minifier::JsonMinifier; + +use std::fmt; +use std::str::Chars; + +#[derive(Clone)] +pub struct JsonMultiFilter<'a, P: Clone> { + minifier: JsonMinifier, + iter: Chars<'a>, + predicate: P, + initialized: bool, + item1: Option<<Chars<'a> as Iterator>::Item>, +} + +impl<'a, P: Clone> JsonMultiFilter<'a, P> { + #[inline] + pub fn new(iter: Chars<'a>, predicate: P) -> Self { + JsonMultiFilter { + minifier: JsonMinifier::default(), + iter, + predicate, + initialized: false, + item1: None, + } + } +} + +impl<'a, P: Clone> fmt::Debug for JsonMultiFilter<'a, P> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Filter") + .field("minifier", &self.minifier) + .field("iter", &self.iter) + .field("initialized", &self.initialized) + .finish() + } +} + +impl<'a, P: Clone> Iterator for JsonMultiFilter<'a, P> +where + P: FnMut( + &mut JsonMinifier, + &<Chars<'a> as Iterator>::Item, + Option<&<Chars<'a> as Iterator>::Item>, + ) -> bool, +{ + type Item = <Chars<'a> as Iterator>::Item; + + #[inline] + fn next(&mut self) -> Option<<Chars<'a> as Iterator>::Item> { + if !self.initialized { + self.item1 = self.iter.next(); + self.initialized = true; + } + + while let Some(item) = self.item1.take() { + self.item1 = self.iter.next(); + if (self.predicate)(&mut self.minifier, &item, self.item1.as_ref()) { + return Some(item); + } + } + None + } +} + +impl<'a, P> JsonMultiFilter<'a, P> +where + P: FnMut( + &mut JsonMinifier, + &<Chars<'a> as Iterator>::Item, + Option<&<Chars<'a> as Iterator>::Item>, + ) -> bool + + Clone, +{ + pub(super) fn write<W: std::io::Write>(self, mut w: W) -> std::io::Result<()> { + for token in self { + write!(w, "{}", token)?; + } + Ok(()) + } +} + +impl<'a, P> fmt::Display for JsonMultiFilter<'a, P> +where + P: FnMut( + &mut JsonMinifier, + &<Chars<'a> as Iterator>::Item, + Option<&<Chars<'a> as Iterator>::Item>, + ) -> bool + + Clone, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let s = (*self).clone(); + for token in s { + write!(f, "{}", token)?; + } + Ok(()) + } +} diff --git a/vendor/minifier/src/lib.rs b/vendor/minifier/src/lib.rs new file mode 100644 index 000000000..a88f9abb8 --- /dev/null +++ b/vendor/minifier/src/lib.rs @@ -0,0 +1,10 @@ +// Take a look at the license at the top of the repository in the LICENSE file. + +#[cfg(feature = "html")] +extern crate regex; + +pub mod css; +#[cfg(feature = "html")] +pub mod html; +pub mod js; +pub mod json; diff --git a/vendor/minifier/src/main.rs b/vendor/minifier/src/main.rs new file mode 100644 index 000000000..eb420325e --- /dev/null +++ b/vendor/minifier/src/main.rs @@ -0,0 +1,94 @@ +// Take a look at the license at the top of the repository in the LICENSE file. + +extern crate minifier; + +use std::env; +use std::ffi::OsStr; +use std::fs::{File, OpenOptions}; +use std::io::{self, Read, Write}; +use std::path::{Path, PathBuf}; + +use minifier::{css, js, json}; + +fn print_help() { + println!( + r##"For now, this minifier supports the following type of files: + + * .css + * .js + * .json"## + ); +} + +pub fn get_all_data(file_path: &str) -> io::Result<String> { + let mut file = File::open(file_path)?; + let mut data = String::new(); + + file.read_to_string(&mut data).unwrap(); + Ok(data) +} + +fn call_minifier<F>(file_path: &str, func: F) +where + F: Fn(&str) -> String, +{ + match get_all_data(file_path) { + Ok(content) => { + let mut out = PathBuf::from(file_path); + let original_extension = out + .extension() + .unwrap_or_else(|| OsStr::new("")) + .to_str() + .unwrap_or("") + .to_owned(); + out.set_extension(format!("min.{}", original_extension)); + if let Ok(mut file) = OpenOptions::new() + .truncate(true) + .write(true) + .create(true) + .open(out.clone()) + { + if let Err(e) = write!(file, "{}", func(&content)) { + eprintln!("Impossible to write into {:?}: {}", out, e); + } else { + println!("{:?}: done -> generated into {:?}", file_path, out); + } + } else { + eprintln!("Impossible to create new file: {:?}", out); + } + } + Err(e) => eprintln!("\"{}\": {}", file_path, e), + } +} + +fn main() { + let args: Vec<_> = env::args().skip(1).collect(); + + if args.is_empty() { + println!("Missing files to work on...\nExample: ./minifier file.js\n"); + print_help(); + return; + } + for arg in &args { + let p = Path::new(arg); + + if !p.is_file() { + eprintln!("\"{}\" isn't a file", arg); + continue; + } + match p + .extension() + .unwrap_or_else(|| OsStr::new("")) + .to_str() + .unwrap_or("") + { + "css" => call_minifier(arg, |s| { + css::minify(s).expect("css minification failed").to_string() + }), + "js" => call_minifier(arg, |s| js::minify(s).to_string()), + "json" => call_minifier(arg, |s| json::minify(s).to_string()), + // "html" | "htm" => call_minifier(arg, html::minify), + x => println!("\"{}\": this format isn't supported", x), + } + } +} |