From 9918693037dce8aa4bb6f08741b6812923486c18 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 19 Jun 2024 11:26:03 +0200 Subject: Merging upstream version 1.76.0+dfsg1. Signed-off-by: Daniel Baumann --- src/tools/collect-license-metadata/src/main.rs | 10 ++++ .../collect-license-metadata/src/path_tree.rs | 56 ++++++++++++++++++++-- src/tools/collect-license-metadata/src/reuse.rs | 8 ++-- 3 files changed, 66 insertions(+), 8 deletions(-) (limited to 'src/tools/collect-license-metadata') diff --git a/src/tools/collect-license-metadata/src/main.rs b/src/tools/collect-license-metadata/src/main.rs index ca2a6f4b8..cbe94af35 100644 --- a/src/tools/collect-license-metadata/src/main.rs +++ b/src/tools/collect-license-metadata/src/main.rs @@ -6,6 +6,16 @@ use crate::licenses::LicensesInterner; use anyhow::Error; use std::path::PathBuf; +// Some directories have too many slight license differences that'd result in a +// huge report, and could be considered a standalone project anyway. Those +// directories are "condensed" into a single licensing block for ease of +// reading, merging the licensing information. +// +// For every `(dir, file)``, every file in `dir` is considered to have the +// license info of `file`. +const CONDENSED_DIRECTORIES: &[(&str, &str)] = + &[("./src/llvm-project/", "./src/llvm-project/README.md")]; + fn main() -> Result<(), Error> { let reuse_exe: PathBuf = std::env::var_os("REUSE_EXE").expect("Missing REUSE_EXE").into(); let dest: PathBuf = std::env::var_os("DEST").expect("Missing DEST").into(); diff --git a/src/tools/collect-license-metadata/src/path_tree.rs b/src/tools/collect-license-metadata/src/path_tree.rs index 709d91897..fc8756d9a 100644 --- a/src/tools/collect-license-metadata/src/path_tree.rs +++ b/src/tools/collect-license-metadata/src/path_tree.rs @@ -4,7 +4,7 @@ //! passes over the tree to remove redundant information. use crate::licenses::{License, LicenseId, LicensesInterner}; -use std::collections::BTreeMap; +use std::collections::{BTreeMap, BTreeSet}; use std::path::{Path, PathBuf}; #[derive(serde::Serialize)] @@ -12,6 +12,7 @@ use std::path::{Path, PathBuf}; pub(crate) enum Node { Root { children: Vec> }, Directory { name: PathBuf, children: Vec>, license: Option }, + CondensedDirectory { name: PathBuf, licenses: Vec }, File { name: PathBuf, license: L }, Group { files: Vec, directories: Vec, license: L }, Empty, @@ -57,9 +58,9 @@ impl Node { Node::Directory { name, mut children, license: None } => { directories.entry(name).or_insert_with(Vec::new).append(&mut children); } - file @ Node::File { .. } => { - files.push(file); - } + file @ Node::File { .. } => files.push(file), + // Propagate condensed directories as-is. + condensed @ Node::CondensedDirectory { .. } => files.push(condensed), Node::Empty => {} Node::Root { .. } => { panic!("can't have a root inside another element"); @@ -86,6 +87,7 @@ impl Node { } Node::Empty => {} Node::File { .. } => {} + Node::CondensedDirectory { .. } => {} Node::Group { .. } => { panic!("Group should not be present at this stage"); } @@ -132,6 +134,7 @@ impl Node { } } Node::File { .. } => {} + Node::CondensedDirectory { .. } => {} Node::Group { .. } => panic!("group should not be present at this stage"), Node::Empty => {} } @@ -174,6 +177,9 @@ impl Node { Node::Directory { name: child_child_name, .. } => { *child_child_name = child_name.join(&child_child_name); } + Node::CondensedDirectory { name: child_child_name, .. } => { + *child_child_name = child_name.join(&child_child_name); + } Node::File { name: child_child_name, .. } => { *child_child_name = child_name.join(&child_child_name); } @@ -188,6 +194,7 @@ impl Node { } Node::Empty => {} Node::File { .. } => {} + Node::CondensedDirectory { .. } => {} Node::Group { .. } => panic!("Group should not be present at this stage"), } } @@ -255,6 +262,7 @@ impl Node { } } Node::File { .. } => {} + Node::CondensedDirectory { .. } => {} Node::Group { .. } => panic!("FileGroup should not be present at this stage"), Node::Empty => {} } @@ -270,6 +278,7 @@ impl Node { } children.retain(|child| !matches!(child, Node::Empty)); } + Node::CondensedDirectory { .. } => {} Node::Group { .. } => {} Node::File { .. } => {} Node::Empty => {} @@ -293,7 +302,24 @@ pub(crate) fn build(mut input: Vec<(PathBuf, LicenseId)>) -> Node { // Ensure reproducibility of all future steps. input.sort(); - for (path, license) in input { + let mut condensed_directories = BTreeMap::new(); + 'outer: for (path, license) in input { + // Files in condensed directories are handled separately. + for (condensed_directory, allowed_file) in super::CONDENSED_DIRECTORIES { + if path.starts_with(condensed_directory) { + if path.as_path() == Path::new(allowed_file) { + // The licence on our allowed file is used to represent the entire directory + condensed_directories + .entry(*condensed_directory) + .or_insert_with(BTreeSet::new) + .insert(license); + } else { + // don't add the file + } + continue 'outer; + } + } + let mut node = Node::File { name: path.file_name().unwrap().into(), license }; for component in path.parent().unwrap_or_else(|| Path::new(".")).components().rev() { node = Node::Directory { @@ -306,6 +332,22 @@ pub(crate) fn build(mut input: Vec<(PathBuf, LicenseId)>) -> Node { children.push(node); } + for (path, licenses) in condensed_directories { + let path = Path::new(path); + let mut node = Node::CondensedDirectory { + name: path.file_name().unwrap().into(), + licenses: licenses.iter().copied().collect(), + }; + for component in path.parent().unwrap_or_else(|| Path::new(".")).components().rev() { + node = Node::Directory { + name: component.as_os_str().into(), + children: vec![node], + license: None, + }; + } + children.push(node); + } + Node::Root { children } } @@ -334,6 +376,10 @@ pub(crate) fn expand_interned_licenses( Node::Group { files, directories, license } => { Node::Group { files, directories, license: interner.resolve(license) } } + Node::CondensedDirectory { name, licenses } => Node::CondensedDirectory { + name, + licenses: licenses.into_iter().map(|license| interner.resolve(license)).collect(), + }, Node::Empty => Node::Empty, } } diff --git a/src/tools/collect-license-metadata/src/reuse.rs b/src/tools/collect-license-metadata/src/reuse.rs index d6b3772ba..a5d01935f 100644 --- a/src/tools/collect-license-metadata/src/reuse.rs +++ b/src/tools/collect-license-metadata/src/reuse.rs @@ -17,9 +17,11 @@ pub(crate) fn collect( let mut result = Vec::new(); for file in document.file_information { + let concluded_license = file.concluded_license.expect("File should have licence info"); + let copyright_text = file.copyright_text.expect("File should have copyright text"); let license = interner.intern(License { - spdx: file.concluded_license.to_string(), - copyright: file.copyright_text.split('\n').map(|s| s.into()).collect(), + spdx: concluded_license.to_string(), + copyright: copyright_text.split('\n').map(|s| s.into()).collect(), }); result.push((file.file_name.into(), license)); @@ -30,7 +32,7 @@ pub(crate) fn collect( fn obtain_spdx_document(reuse_exe: &Path) -> Result { let output = Command::new(reuse_exe) - .args(&["spdx", "--add-license-concluded", "--creator-person=bors"]) + .args(&["--include-submodules", "spdx", "--add-license-concluded", "--creator-person=bors"]) .stdout(Stdio::piped()) .spawn()? .wait_with_output()?; -- cgit v1.2.3