summaryrefslogtreecommitdiffstats
path: root/src/tools/html-checker/main.rs
blob: 9b4d2c52598068892f8277f1b7bff44a6b23a6f4 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
use rayon::iter::{ParallelBridge, ParallelIterator};
use std::env;
use std::path::Path;
use std::process::{Command, Output};

fn check_html_file(file: &Path) -> usize {
    let to_mute = &[
        // "disabled" on <link> or "autocomplete" on <select> emit this warning
        "PROPRIETARY_ATTRIBUTE",
        // It complains when multiple in the same page link to the same anchor for some reason...
        "ANCHOR_NOT_UNIQUE",
        // If a <span> contains only HTML elements and no text, it complains about it.
        "TRIM_EMPTY_ELEMENT",
        // FIXME: the three next warnings are about <pre> elements which are not supposed to
        //        contain HTML. The solution here would be to replace them with a <div>
        "MISSING_ENDTAG_BEFORE",
        "INSERTING_TAG",
        "DISCARDING_UNEXPECTED",
        // This error is caused by nesting the Notable Traits tooltip within an <h4> tag.
        // The solution is to avoid doing that, but we need to have the <h4> tags for accessibility
        // reasons, and we need the Notable Traits tooltip to help everyone understand the Iterator
        // combinators
        "TAG_NOT_ALLOWED_IN",
    ];
    let to_mute_s = to_mute.join(",");
    let mut command = Command::new("tidy");
    command
        .arg("-errors")
        .arg("-quiet")
        .arg("--mute-id") // this option is useful in case we want to mute more warnings
        .arg("yes")
        .arg("--mute")
        .arg(&to_mute_s)
        .arg(file);

    let Output { status, stderr, .. } = command.output().expect("failed to run tidy command");
    if status.success() {
        0
    } else {
        let stderr = String::from_utf8(stderr).expect("String::from_utf8 failed...");
        if stderr.is_empty() && status.code() != Some(2) {
            0
        } else {
            eprintln!(
                "=> Errors for `{}` (error code: {}) <=",
                file.display(),
                status.code().unwrap_or(-1)
            );
            eprintln!("{}", stderr);
            stderr.lines().count()
        }
    }
}

const DOCS_TO_CHECK: &[&str] =
    &["alloc", "core", "proc_macro", "implementors", "src", "std", "test"];

// Returns the number of files read and the number of errors.
fn find_all_html_files(dir: &Path) -> (usize, usize) {
    walkdir::WalkDir::new(dir)
        .into_iter()
        .filter_entry(|e| {
            e.depth() != 1
                || e.file_name()
                    .to_str()
                    .map(|s| DOCS_TO_CHECK.into_iter().any(|d| *d == s))
                    .unwrap_or(false)
        })
        .par_bridge()
        .map(|entry| {
            let entry = entry.expect("failed to read file");
            if !entry.file_type().is_file() {
                return (0, 0);
            }
            let entry = entry.path();
            // (Number of files processed, number of errors)
            if entry.extension().and_then(|s| s.to_str()) == Some("html") {
                (1, check_html_file(&entry))
            } else {
                (0, 0)
            }
        })
        .reduce(|| (0, 0), |a, b| (a.0 + b.0, a.1 + b.1))
}

/// Default `tidy` command for macOS is too old that it does not have `mute-id` and `mute` options.
/// `tidy` on macOS Monterey was released on 31 October 2006, and the same date can be seen seven
/// years ago at <https://stackoverflow.com/questions/22283382/overwrite-osx-tidy>. Accordingly,
/// the macOS environment using pre-installed `tidy` should immediately suspend HTML checker process
/// and show a hint to install a newer one.
#[cfg(target_os = "macos")]
fn check_tidy_version() -> Result<(), String> {
    let output = Command::new("tidy").arg("-v").output().expect("failed to run tidy command");
    let version = String::from_utf8(output.stdout).expect("failed to read version of tidy command");
    if version.contains("HTML Tidy for Mac OS X released on 31 October 2006") {
        eprintln!("The pre-installed HTML Tidy for macOS is not supported.");
        eprintln!("Consider installing a newer one and re-running.");
        eprintln!("If you're using Homebrew, you can install it by the following command:");
        eprintln!("    brew install tidy-html5");
        eprintln!();
        Err("HTML check failed: 1 error".to_string())
    } else {
        Ok(())
    }
}

fn main() -> Result<(), String> {
    let args = env::args().collect::<Vec<_>>();
    if args.len() != 2 {
        return Err(format!("Usage: {} <doc folder>", args[0]));
    }
    #[cfg(target_os = "macos")]
    check_tidy_version()?;

    println!("Running HTML checker...");

    let (files_read, errors) = find_all_html_files(&Path::new(&args[1]));
    println!("Done! Read {} files...", files_read);
    if errors > 0 {
        Err(format!("HTML check failed: {} errors", errors))
    } else {
        println!("No error found!");
        Ok(())
    }
}