summaryrefslogtreecommitdiffstats
path: root/vendor/minifier/src/html.rs
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 12:02:58 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 12:02:58 +0000
commit698f8c2f01ea549d77d7dc3338a12e04c11057b9 (patch)
tree173a775858bd501c378080a10dca74132f05bc50 /vendor/minifier/src/html.rs
parentInitial commit. (diff)
downloadrustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.tar.xz
rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.zip
Adding upstream version 1.64.0+dfsg1.upstream/1.64.0+dfsg1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/minifier/src/html.rs')
-rw-r--r--vendor/minifier/src/html.rs233
1 files changed, 233 insertions, 0 deletions
diff --git a/vendor/minifier/src/html.rs b/vendor/minifier/src/html.rs
new file mode 100644
index 000000000..b6fa99362
--- /dev/null
+++ b/vendor/minifier/src/html.rs
@@ -0,0 +1,233 @@
+// Take a look at the license at the top of the repository in the LICENSE file.
+
+use regex::{Captures, Regex};
+
+fn condense_whitespace(source: &str) -> String {
+ let lower_source = source.to_lowercase();
+ if lower_source.find("<textarea").is_none() && lower_source.find("<pre").is_none() {
+ // maybe should be better not to recreate Regex every time?
+ let re = Regex::new(r">\s+<").unwrap();
+ let source = re.replace_all(source, "> <").into_owned();
+ let re = Regex::new(r"\s{2,}|[\r\n]").unwrap();
+ re.replace_all(&source, " ").into_owned()
+ } else {
+ source.trim().to_owned()
+ }
+}
+
+fn condense(source: &str) -> String {
+ let re = Regex::new(r"<(style|script)[\w|\s].*?>").unwrap();
+ let type_reg = Regex::new(r#"\s*?type="[\w|\s].*?""#).unwrap();
+ re.replace_all(source, |caps: &Captures| {
+ type_reg.replace_all(&caps[0], "").into_owned()
+ })
+ .into_owned()
+}
+
+fn clean_unneeded_tags(source: &str) -> String {
+ let useless_tags = [
+ "</area>",
+ "</base>",
+ "<body>",
+ "</body>",
+ "</br>",
+ "</col>",
+ "</colgroup>",
+ "</dd>",
+ "</dt>",
+ "<head>",
+ "</head>",
+ "</hr>",
+ "<html>",
+ "</html>",
+ "</img>",
+ "</input>",
+ "</li>",
+ "</link>",
+ "</meta>",
+ "</option>",
+ "</param>",
+ "<tbody>",
+ "</tbody>",
+ "</td>",
+ "</tfoot>",
+ "</th>",
+ "</thead>",
+ "</tr>",
+ "</basefont>",
+ "</isindex>",
+ "</param>",
+ ];
+ let mut res = source.to_owned();
+ for useless_tag in &useless_tags {
+ res = res.replace(useless_tag, "");
+ }
+ res
+}
+
+fn remove_comments(source: &str) -> String {
+ // "build" and "endbuild" should be matched case insensitively.
+ let re = Regex::new("<!--(.|\n)*?-->").unwrap();
+ re.replace_all(source, |caps: &Captures| {
+ if caps[0].replace("<!--", " ").trim().starts_with("[") {
+ caps[0].to_owned()
+ } else {
+ " ".to_owned()
+ }
+ })
+ .into_owned()
+}
+
+fn unquote_attributes(source: &str) -> String {
+ // Some attributes like width, height, etc... don't need quotes.
+ let any_tag = Regex::new(r"<\w.*?>").unwrap();
+ let extra_spaces = Regex::new(r" \s+|\s +").unwrap();
+ let between_words = Regex::new(r"\w\s+\w").unwrap();
+ let spaces_before_close = Regex::new(r##""\s+>"##).unwrap();
+ let spaces_before_close2 = Regex::new(r"'\s+>").unwrap();
+ let extra_spaces2 = Regex::new(r##""\s\s+\w+="|'\s\s+\w+='|"\s\s+\w+=|'\s\s+\w+="##).unwrap();
+ let extra_spaces3 = Regex::new(r"\d\s+>").unwrap();
+ let quotes_in_tag = Regex::new(r##"([a-zA-Z]+)="([a-zA-Z0-9-_\.]+)""##).unwrap();
+
+ any_tag
+ .replace_all(source, |caps: &Captures| {
+ let cap = format!("{}", &caps[0]);
+ if cap.starts_with("<!") || cap.find("</").is_some() {
+ cap
+ } else {
+ let tag = spaces_before_close.replace_all(&cap, "\">").into_owned();
+ let mut tag = spaces_before_close2.replace_all(&tag, "'>").into_owned();
+ let tag_c = tag.clone();
+
+ let space1_matches: Vec<_> = between_words.find_iter(&tag_c).collect();
+ let space6_matches: Vec<_> = extra_spaces3.find_iter(&tag_c).collect();
+ let mut pos = 0;
+ loop {
+ let replacement = match (space1_matches.get(pos), space6_matches.get(pos)) {
+ (Some(a), Some(b)) => format!("{}{}", a.as_str(), b.as_str()),
+ (None, Some(b)) => format!("{}", b.as_str()),
+ (Some(a), None) => format!("{}", a.as_str()),
+ _ => break,
+ };
+ pos += 1;
+ tag = tag.replace(
+ &replacement,
+ &extra_spaces.replace_all(&replacement, " ").into_owned(),
+ );
+ }
+ let mut output = tag.clone();
+ for caps in extra_spaces2.find_iter(&tag) {
+ let c = caps.as_str().chars().next().unwrap_or('\0');
+ output = output.replace(
+ caps.as_str(),
+ &format!(
+ "{} {}",
+ if c == '\0' {
+ String::new()
+ } else {
+ format!("{}", c)
+ },
+ caps.as_str()[1..].trim_start()
+ ),
+ );
+ }
+ tag = quotes_in_tag
+ .replace_all(&output, |caps: &Captures| match &caps[1] {
+ "width" | "height" => format!("{}={}", &caps[1], &caps[2]),
+ x => format!("{}=\"{}\"", x, &caps[2]),
+ })
+ .into_owned();
+ if cap != tag {
+ tag
+ } else {
+ cap
+ }
+ }
+ })
+ .trim()
+ .to_owned()
+}
+
+/// Returns a minified version of the provided HTML source.
+pub fn minify(source: &str) -> String {
+ let source = remove_comments(source);
+ let source = condense(&source);
+ let source = clean_unneeded_tags(&source);
+ let source = condense_whitespace(&source);
+ unquote_attributes(&source).trim().to_owned()
+}
+
+#[test]
+fn html_minify_test() {
+ let source = r##"<head>
+ <title>Some huge title</title>
+ <link rel="stylesheet" type="text/css" href="something.css" >
+ <style type="text/css">
+ .some_class {
+ color: red;
+ }
+ </style>
+</head>
+<body>
+ <header>
+ <div>
+ <i> <b><a href="www.somewhere.com" class="some_class">Narnia</a> </b> </i>
+ <h1 style="width:100%;text-align:center;" >Big header</h1>
+ </div>
+ <!-- commeeeeeeeents !!! -->
+ </header>
+ <div id="some_id">
+ <!-- another comment
+ on
+multi
+lines -->
+ <div id="another_id" class="another_class" width="100">
+ <h2>A little sub title</h2>
+ <ul>
+ <li>A list!</li>
+ <li>Who doesn't like lists?</li>
+ <li height="12" class="fooool">Well, who cares...</li>
+ </ul>
+ </div>
+ </div>
+ <script type="text/javascript" >
+ console.log("foo");
+ </script>
+ <style type="text/css" src="../foo.css">
+ <script src="../foo.js">
+</body>
+"##;
+
+ let expected_result = "<title>Some huge title</title> <link rel=\"stylesheet\" \
+ type=\"text/css\" href=\"something.css\"> <style> .some_class \
+ { color: red; } </style> <header> <div> <i> <b><a \
+ href=\"www.somewhere.com\" class=\"some_class\">Narnia</a> </b> </i> \
+ <h1 style=\"width:100%;text-align:center;\">Big header</h1> </div> \
+ </header> <div id=\"some_id\"> <div id=\"another_id\" \
+ class=\"another_class\" width=100> <h2>A little sub \
+ title</h2> <ul> <li>A list! <li>Who doesn't like lists? \
+ <li height=12 class=\"fooool\">Well, who cares... </ul> </div> \
+ </div> <script > console.log(\"foo\"); </script> <style \
+ src=\"../foo.css\"> <script src=\"../foo.js\">";
+ assert_eq!(minify(source), expected_result);
+}
+
+#[test]
+fn html_keep_important_comments() {
+ let source = r#"
+<div>
+ <!-- normal comment -->
+ <div>content</div>
+ <!--[if lte IE 8]>
+ <div class="warning">This old browser is unsupported and will most likely display funky things.
+ </div>
+ <![endif]-->
+</div>
+"#;
+
+ let expected_result =
+ "<div> <div>content</div> <!--[if lte IE 8]> <div class=\"warning\">This \
+ old browser is unsupported and will most likely display funky things. \
+ </div> <![endif]--> </div>";
+ assert_eq!(minify(source), expected_result);
+}