use std::{ collections::BTreeSet, env::{current_dir, set_current_dir}, num::NonZeroUsize, path::{Path, PathBuf}, process::ExitCode, }; use clap::Parser; use lets_find_up::{find_up_with, FindUpKind, FindUpOptions}; use miette::{ensure, miette, Context, Diagnostic, IntoDiagnostic, Report, SourceSpan}; use regex::Regex; use crate::{ fs::{copy_dir, create_dir_all, existing_file, remove_file, FileRoot}, path::join_path, process::{which, EasyCommand}, }; mod fs; mod path; mod process; /// Vendor WebGPU CTS tests from a local Git checkout of [our `gpuweb/cts` fork]. /// /// WPT tests are generated into `testing/web-platform/mozilla/tests/webgpu/`. If the set of tests /// changes upstream, make sure that the generated output still matches up with test expectation /// metadata in `testing/web-platform/mozilla/meta/webgpu/`. /// /// [our `gpuweb/cts` fork]: https://github.com/mozilla/gpuweb-cts #[derive(Debug, Parser)] struct CliArgs { /// A path to the top-level directory of your WebGPU CTS checkout. cts_checkout_path: PathBuf, /// The maximum capacity for test variant chunks. /// /// This tools divides the large number of CTS tests generated by upstream and chunks them into /// multiple files. It's important to use a number that does not cause tests to time out in /// Taskcluster. The current default value has been empirically tested for only this criteria. /// Note that the way tests are divided may change in the future. /// /// If you intend to change the value long-term, change the default here. #[clap(long, default_value = "25")] chunk_size: NonZeroUsize, } fn main() -> ExitCode { env_logger::builder() .filter_level(log::LevelFilter::Info) .parse_default_env() .init(); let args = CliArgs::parse(); match run(args) { Ok(()) => ExitCode::SUCCESS, Err(e) => { log::error!("{e:?}"); ExitCode::FAILURE } } } fn run(args: CliArgs) -> miette::Result<()> { let CliArgs { chunk_size, cts_checkout_path, } = args; let orig_working_dir = current_dir().unwrap(); let cts_dir = join_path(["dom", "webgpu", "tests", "cts"]); let cts_vendor_dir = join_path([&*cts_dir, "vendor".as_ref()]); let gecko_ckt = { let failed_find_hg_err = || { miette!( "failed to find a Mercurial repository (`.hgrc`) in any of current working \ directory and its parent directories" ) }; let hg_root = { let mut dir = find_up_with( ".hg", FindUpOptions { cwd: Path::new("."), kind: FindUpKind::Dir, }, ) .map_err(Report::msg) .wrap_err_with(failed_find_hg_err)? .ok_or_else(failed_find_hg_err)?; dir.pop(); dir }; let root = FileRoot::new("gecko", &hg_root)?; log::info!("detected Gecko repository root at {root}"); ensure!( root.try_child(&orig_working_dir) .map_or(false, |c| c.relative_path() == cts_vendor_dir), "It is expected to run this tool from the root of its Cargo project, but this does \ not appear to have been done. Bailing." ); root }; let cts_vendor_dir = gecko_ckt.child(orig_working_dir.parent().unwrap()); let wpt_tests_dir = { let child = gecko_ckt.child(join_path(["testing", "web-platform", "mozilla", "tests"])); ensure!( child.is_dir(), "WPT tests dir ({}) does not appear to exist", child, ); child }; let (cts_ckt_git_dir, cts_ckt) = { let failed_find_git_err = || { miette!( "failed to find a Git repository (`.git` directory) in the provided path and all \ of its parent directories" ) }; let git_dir = find_up_with( ".git", FindUpOptions { cwd: &cts_checkout_path, kind: FindUpKind::Dir, }, ) .map_err(Report::msg) .wrap_err_with(failed_find_git_err)? .ok_or_else(failed_find_git_err)?; let ckt = FileRoot::new("cts", git_dir.parent().unwrap())?; log::debug!("detected CTS checkout root at {ckt}"); (git_dir, ckt) }; let git_bin = which("git", "Git binary")?; let npm_bin = which("npm", "NPM binary")?; // XXX: It'd be nice to expose separate operations for copying in source and generating WPT // cases from the vendored copy. Checks like these really only matter when updating source. let ensure_no_child = |p1: &FileRoot, p2| { ensure!( p1.try_child(p2).is_err(), "{p1} is a child path of {p2}, which is not supported" ); Ok(()) }; ensure_no_child(&cts_ckt, &gecko_ckt)?; ensure_no_child(&gecko_ckt, &cts_ckt)?; log::info!("making a vendored copy of checked-in files from {cts_ckt}…",); gecko_ckt.regen_file( join_path([&*cts_dir, "checkout_commit.txt".as_ref()]), |checkout_commit_file| { let mut git_status_porcelain_cmd = EasyCommand::new(&git_bin, |cmd| { cmd.args(["status", "--porcelain"]) .envs([("GIT_DIR", &*cts_ckt_git_dir), ("GIT_WORK_TREE", &*cts_ckt)]) }); log::info!( " …ensuring the working tree and index are clean with \ {git_status_porcelain_cmd}…" ); let git_status_porcelain_output = git_status_porcelain_cmd.just_stdout_utf8()?; ensure!( git_status_porcelain_output.is_empty(), "expected a clean CTS working tree and index, but {}'s output was not empty; \ for reference, it was:\n\n{}", git_status_porcelain_cmd, git_status_porcelain_output, ); gecko_ckt.regen_dir(&cts_vendor_dir.join("checkout"), |vendored_ckt_dir| { log::info!(" …copying files tracked by Git to {vendored_ckt_dir}…"); let files_to_vendor = { let mut git_ls_files_cmd = EasyCommand::new(&git_bin, |cmd| { cmd.arg("ls-files").env("GIT_DIR", &cts_ckt_git_dir) }); log::debug!(" …getting files to vendor from {git_ls_files_cmd}…"); let output = git_ls_files_cmd.just_stdout_utf8()?; let mut files = output .split_terminator('\n') .map(PathBuf::from) .collect::>(); log::trace!(" …files from {git_ls_files_cmd}: {files:#?}"); log::trace!(" …validating that files from Git repo still exist…"); let files_not_found = files .iter() .filter(|p| !cts_ckt.child(p).exists()) .collect::>(); ensure!( files_not_found.is_empty(), "the following files were returned by `git ls-files`, but do not \ exist on disk: {:#?}", files_not_found, ); log::trace!(" …stripping files we actually don't want to vendor…"); let files_to_actually_not_vendor = [ // There's no reason to bring this over, and lots of reasons to not bring in // security-sensitive content unless we have to. "deploy_key.enc", ] .map(Path::new); log::trace!(" …files we don't want: {files_to_actually_not_vendor:?}"); for path in files_to_actually_not_vendor { ensure!( files.remove(path), "failed to remove {} from list of files to vendor; does it still \ exist?", cts_ckt.child(path) ); } files }; log::debug!(" …now doing the copying…"); for path in files_to_vendor { let vendor_from_path = cts_ckt.child(&path); let vendor_to_path = vendored_ckt_dir.child(&path); if let Some(parent) = vendor_to_path.parent() { create_dir_all(vendored_ckt_dir.child(parent))?; } log::trace!(" …copying {vendor_from_path} to {vendor_to_path}…"); fs::copy(&vendor_from_path, &vendor_to_path)?; } Ok(()) })?; log::info!(" …writing commit ref pointed to by `HEAD` to {checkout_commit_file}…"); let mut git_rev_parse_head_cmd = EasyCommand::new(&git_bin, |cmd| { cmd.args(["rev-parse", "HEAD"]) .env("GIT_DIR", &cts_ckt_git_dir) }); log::trace!(" …getting output of {git_rev_parse_head_cmd}…"); fs::write( checkout_commit_file, git_rev_parse_head_cmd.just_stdout_utf8()?, ) .wrap_err_with(|| format!("failed to write HEAD ref to {checkout_commit_file}")) }, )?; set_current_dir(&*cts_ckt) .into_diagnostic() .wrap_err("failed to change working directory to CTS checkout")?; log::debug!("changed CWD to {cts_ckt}"); let mut npm_ci_cmd = EasyCommand::new(&npm_bin, |cmd| cmd.arg("ci")); log::info!( "ensuring a clean {} directory with {npm_ci_cmd}…", cts_ckt.child("node_modules"), ); npm_ci_cmd.spawn()?; let out_dir = cts_ckt.regen_dir("out", |out_dir| { let mut npm_run_standalone_cmd = EasyCommand::new(&npm_bin, |cmd| cmd.args(["run", "standalone"])); log::info!( "generating standalone runner files into {out_dir} with {npm_run_standalone_cmd}…" ); npm_run_standalone_cmd.spawn() })?; let out_wpt_dir = cts_ckt.regen_dir("out-wpt", |out_wpt_dir| { let mut npm_run_wpt_cmd = EasyCommand::new(&npm_bin, |cmd| cmd.args(["run", "wpt"])); log::info!("generating WPT test cases into {out_wpt_dir} with {npm_run_wpt_cmd}…"); npm_run_wpt_cmd.spawn() })?; let cts_https_html_path = out_wpt_dir.child("cts.https.html"); log::info!("refining the output of {cts_https_html_path} with `npm run gen_wpt_cts_html …`…"); EasyCommand::new(&npm_bin, |cmd| { cmd.args(["run", "gen_wpt_cts_html"]) .arg(existing_file(&cts_https_html_path)) .args([ existing_file(cts_ckt.child(join_path([ "src", "common", "templates", "cts.https.html", ]))), existing_file(cts_vendor_dir.child("arguments.txt")), existing_file(cts_vendor_dir.child("myexpectations.txt")), ]) .arg("") }) .spawn()?; log::info!("stealing standalone runtime files from {out_dir} for {out_wpt_dir}…"); for subdir in [ &["external"] as &[_], &["common", "internal"], &["common", "util"], ] .map(join_path) { let out_subdir = out_dir.child(&subdir); let out_wpt_subdir = out_wpt_dir.child(subdir); log::info!(" …copying from {out_subdir} to {out_wpt_subdir}…"); copy_dir(out_subdir, out_wpt_subdir)? } log::info!(" …done stealing!"); log::info!("analyzing {cts_https_html_path}…"); let cts_https_html_content = fs::read_to_string(&*cts_https_html_path)?; let cts_boilerplate; let cts_cases; { { let (boilerplate, cases_start) = { let cases_start_idx = cts_https_html_content .find("", 1, ) }; log::info!(" …parsing test variants in {cts_https_html_path}…"); cts_cases = cases_start.split_terminator('\n').collect::>(); let mut parsing_failed = false; let meta_variant_regex = Regex::new("^$").unwrap(); cts_cases.iter().for_each(|line| { if !meta_variant_regex.is_match(line) { parsing_failed = true; log::error!("line is not a test case: {line:?}"); } }); ensure!( !parsing_failed, "one or more test case lines failed to parse, fix it and try again" ); }; log::trace!("\"original\" HTML boilerplate:\n\n{}", cts_boilerplate); ensure!( !cts_cases.is_empty(), "no test cases found; this is unexpected!" ); log::info!(" …found {} test cases", cts_cases.len()); } cts_ckt.regen_dir(out_wpt_dir.join("chunked"), |chunked_tests_dir| { // NOTE: We use an extremely simple chunking algorithm here. This was done in the name of // speed of initial implementation. However, this might cause a significant amount of churn // when tests get updated. let chunks = cts_cases.chunks(chunk_size.get()).zip(1u32..); log::info!( "re-distributing tests into {} chunks of {chunk_size}…", chunks.clone().count() ); let mut failed_writing = false; for (chunk, chunk_idx) in chunks { // NOTE: Using `0`-padding here was considered, but it's probably not worth it. That // would be in conflict with stable file paths as the set of tests grows. let chunk_dir = chunked_tests_dir.child(chunk_idx.to_string()); match create_dir_all(&chunk_dir) { Ok(()) => log::trace!("made directory {}", chunk_dir.display()), Err(e) => { failed_writing = true; log::error!("{e:#}"); continue; } } let chunk_file_path = chunk_dir.child("cts.https.html"); let chunk_file_content = { let mut content = cts_boilerplate.as_bytes().to_vec(); for line in chunk { content.extend(line.as_bytes()); content.extend(b"\n"); } content }; match fs::write(&chunk_file_path, &chunk_file_content).wrap_err_with(|| { miette!("failed to write chunked output to path {chunk_file_path}") }) { Ok(()) => log::debug!(" …wrote {chunk_file_path}"), Err(e) => { failed_writing = true; log::error!("{e:#}"); } } } ensure!( !failed_writing, "failed to write one or more chunked WPT test files; see above output for more details" ); log::debug!(" …finished writing new chunked WPT test files!"); log::info!(" …removing {cts_https_html_path}, now that it's been divided into chunks…"); remove_file(&cts_https_html_path)?; Ok(()) })?; gecko_ckt.regen_dir(wpt_tests_dir.join("webgpu"), |wpt_webgpu_tests_dir| { log::info!("copying contents of {out_wpt_dir} to {wpt_webgpu_tests_dir}…"); copy_dir(&out_wpt_dir, wpt_webgpu_tests_dir) })?; log::info!("All done! Now get your CTS _ON_! :)"); Ok(()) }