summaryrefslogtreecommitdiffstats
path: root/commands
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-14 13:42:30 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-14 13:42:30 +0000
commit75808db17caf8b960b351e3408e74142f4c85aac (patch)
tree7989e9c09a4240248bf4658a22208a0a52d991c4 /commands
parentInitial commit. (diff)
downloadlintian-75808db17caf8b960b351e3408e74142f4c85aac.tar.xz
lintian-75808db17caf8b960b351e3408e74142f4c85aac.zip
Adding upstream version 2.117.0.upstream/2.117.0upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'commands')
-rw-r--r--commands/reporting-harness.pm517
-rw-r--r--commands/reporting-html-reports.pm1262
-rw-r--r--commands/reporting-lintian-harness.pm603
-rw-r--r--commands/reporting-sync-state.pm572
4 files changed, 2954 insertions, 0 deletions
diff --git a/commands/reporting-harness.pm b/commands/reporting-harness.pm
new file mode 100644
index 0000000..89bf2c6
--- /dev/null
+++ b/commands/reporting-harness.pm
@@ -0,0 +1,517 @@
+#!/usr/bin/perl
+#
+# Lintian reporting harness -- Create and maintain Lintian reports automatically
+#
+# Copyright (C) 1998 Christian Schwarz and Richard Braakman
+#
+# This program is free software. It is distributed under the terms of
+# the GNU General Public License as published by the Free Software
+# Foundation; either version 2 of the License, or (at your option) any
+# later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, you can find it on the World Wide
+# Web at https://www.gnu.org/copyleft/gpl.html, or write to the Free
+# Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
+# MA 02110-1301, USA.
+
+package reporting_harness;
+
+use v5.20;
+use warnings;
+use utf8;
+
+use constant BACKLOG_PROCESSING_GROUP_LIMIT => 1024;
+
+use Date::Format qw(time2str);
+use File::Copy;
+use FileHandle;
+use Getopt::Long;
+use Path::Tiny;
+use POSIX qw(strftime);
+use YAML::XS ();
+
+use Lintian::IO::Async qw(safe_qx);
+use Lintian::Processable;
+use Lintian::Relation::Version qw(versions_comparator);
+use Lintian::Reporting::Util qw(load_state_cache save_state_cache);
+use Lintian::Util qw(open_gz);
+
+use constant EMPTY => q{};
+
+sub usage {
+ print <<END;
+Lintian reporting harness
+Create and maintain Lintian reports automatically
+
+Usage: harness [ -i | -f | -r | -c ]
+
+Options:
+ -c clean mode, erase everything and start from scratch (implies -f)
+ -f full mode, blithely overwrite lintian.log
+ -i incremental mode, use old lintian.log data, process changes only
+ -r, --[no-]generate-reports
+ Whether to generate reports. By default, reports will be
+ generated at the end of a run with -i, -f or -c. It can also be
+ used as a standard alone "mode", where only reports are
+ regenerated.
+ --reporting-config FILE
+ Parse FILE as the primary configuration file. Defines which
+ archives to process, etc. (Default: ./config.yaml)
+ --dry-run pretend to do the actions without actually doing them. The
+ "normal" harness output will go to stdout rather than the
+ harness.log.
+ --to-stdout
+ [For debugging] Have output go to stdout as well as the usual
+ log files. Note, this option has no (extra) effect with --dry-run.
+ --schedule-chunk-size N
+ Schedule at most N groups in a given run of Lintian. If more than N
+ groups need to be processed, harness will invoke Lintian more than
+ once. If N is 0, schedule all groups in one go. (Default: 512)
+ --schedule-limit-groups N
+ Schedule at most N groups in this run of harness. If more than N
+ groups need to be processed, harness leave the rest for a subsequent
+ run. (Default: ${\BACKLOG_PROCESSING_GROUP_LIMIT})
+
+Incremental mode is the default if you have a lintian.log;
+otherwise, it's full.
+
+Report bugs to <lintian-maint\@debian.org>.
+END
+ #'/# for cperl-mode
+ exit;
+}
+
+my %opt = (
+ 'schedule-chunk-size' => 512,
+ 'schedule-limit-groups' => BACKLOG_PROCESSING_GROUP_LIMIT,
+ 'reporting-config' => './config.yaml',
+);
+
+my %opthash = (
+ 'i' => \$opt{'incremental-mode'},
+ 'c' => \$opt{'clean-mode'},
+ 'f' => \$opt{'full-mode'},
+ 'generate-reports|r!' => \$opt{'generate-reports'},
+ 'reporting-config=s'=> \$opt{'reporting-config'},
+ 'dry-run' => \$opt{'dry-run'},
+ 'schedule-chunk-size=i' => \$opt{'schedule-chunk-size'},
+ 'schedule-limit-groups=i' => \$opt{'schedule-limit-groups'},
+ 'to-stdout' => \$opt{'to-stdout'},
+ 'help|h' => \&usage,
+);
+
+# Global variables
+my (
+ $log_file, $lintian_log, $lintian_perf_log,
+ $html_reports_log,$sync_state_log, $lintian_cmd,
+ $STATE_DIR, $LINTIAN_VERSION, $LOG_FD,
+ $CONFIG,$LOG_DIR, $HTML_DIR,
+ $HTML_TMP_DIR,$LINTIAN_SCRATCH_SPACE, $LINTIAN_BASE,
+ $EXTRA_LINTIAN_OPTIONS,
+);
+
+sub required_cfg_value {
+ my (@keys) = @_;
+ my $v = $CONFIG;
+ for my $key (@keys) {
+ if (not exists($v->{$key})) {
+ my $k = join('.', @keys);
+ die("Missing required config parameter: ${k}\n");
+ }
+ $v = $v->{$key};
+ }
+ return $v;
+}
+
+sub required_cfg_list_value {
+ my (@keys) = @_;
+ my $v = required_cfg_value(@keys);
+ if (not defined($v) or ref($v) ne 'ARRAY') {
+ my $k = join('.', @keys);
+ die("Invalid configuration: ${k} must be a (possibly empty) list\n");
+ }
+ return $v;
+}
+
+sub main {
+ parse_options_and_config();
+
+ # turn file buffering off
+ STDOUT->autoflush;
+
+ unless ($opt{'dry-run'}) {
+ # rotate log files
+ my @rotate_logs
+ = ($log_file, $html_reports_log, $lintian_perf_log, $sync_state_log);
+ safe_qx('savelog', @rotate_logs);
+
+ # create new log file
+ open($LOG_FD, '>', $log_file)
+ or die("cannot open log file $log_file for writing: $!");
+ $LOG_FD->autoflush;
+ } else {
+ $opt{'to-stdout'} = 0;
+ open($LOG_FD, '>&', \*STDOUT)
+ or die "Cannot open log file <stdout> for writing: $!";
+ Log('Running in dry-run mode');
+ }
+ # From here on we can use Log() and Die().
+ if (not $opt{'dry-run'} and $opt{'clean-mode'}) {
+ Log('Purging old state-cache/dir');
+ path($STATE_DIR)->remove_tree;
+ }
+
+ if (not -d $STATE_DIR) {
+ path($STATE_DIR)->mkpath;
+ Log("Created cache dir: $STATE_DIR");
+ }
+
+ if ( !$opt{'generate-reports'}
+ && !$opt{'full-mode'}
+ && !$opt{'incremental-mode'}) {
+ # Nothing explicitly chosen, default to -i if the log is present,
+ # otherwise -f.
+ if (-f $lintian_log) {
+ $opt{'incremental-mode'} = 1;
+ } else {
+ $opt{'full-mode'} = 1;
+ }
+ }
+
+ # Default to yes, if not explicitly disabled.
+ $opt{'generate-reports'} //= 1;
+
+ if ($opt{'incremental-mode'} or $opt{'full-mode'}) {
+ run_lintian();
+ }
+
+ if ($opt{'generate-reports'}) {
+ generate_reports();
+ }
+
+ # ready!!! :-)
+ Log('All done.');
+ exit 0;
+}
+
+# -------------------------------
+
+sub parse_options_and_config {
+
+ # init commandline parser
+ Getopt::Long::config('bundling', 'no_getopt_compat', 'no_auto_abbrev');
+
+ # process commandline options
+ GetOptions(%opthash)
+ or die("error parsing options\n");
+
+ # clean implies full - do this as early as possible, so we can just
+ # check $opt{'full-mode'} rather than a full
+ # ($opt{'clean-mode'} || $opt{'full-mode'})
+ $opt{'full-mode'} = 1 if $opt{'clean-mode'};
+
+ die("Cannot use both incremental and full/clean.\n")
+ if $opt{'incremental-mode'} && $opt{'full-mode'};
+ die("The argument for --schedule-limit-groups must be an > 0\n")
+ if $opt{'schedule-limit-groups'} < 1;
+ if (not $opt{'reporting-config'} or not -f $opt{'reporting-config'}) {
+ die("The --reporting-config parameter must point to an existing file\n"
+ );
+ }
+ # read configuration
+ $CONFIG = YAML::XS::LoadFile($opt{'reporting-config'});
+ $LOG_DIR = required_cfg_value('storage', 'log-dir');
+ $HTML_DIR = required_cfg_value('storage', 'reports-dir');
+ $HTML_TMP_DIR = required_cfg_value('storage', 'reports-work-dir');
+ $STATE_DIR = required_cfg_value('storage', 'state-cache');
+ $LINTIAN_SCRATCH_SPACE = required_cfg_value('storage', 'scratch-space');
+
+ if ( exists($CONFIG->{'lintian'})
+ && exists($CONFIG->{'lintian'}{'extra-options'})) {
+ $EXTRA_LINTIAN_OPTIONS
+ = required_cfg_list_value('lintian', 'extra-options');
+ } else {
+ $EXTRA_LINTIAN_OPTIONS = [];
+ }
+
+ $LINTIAN_BASE = $ENV{'LINTIAN_BASE'};
+
+ $lintian_cmd = "$LINTIAN_BASE/bin/lintian";
+
+ $LINTIAN_VERSION= safe_qx("$LINTIAN_BASE/bin/lintian",'--print-version');
+ chomp($LINTIAN_VERSION);
+
+ (
+ $log_file, $lintian_log, $lintian_perf_log,$html_reports_log,
+ $sync_state_log
+ )
+ = map {"$LOG_DIR/$_" }
+ qw(harness.log lintian.log lintian-perf.log html_reports.log sync_state.log);
+
+ return;
+}
+
+sub run_lintian {
+ my @sync_state_args = (
+ '--reporting-config', $opt{'reporting-config'},
+ '--desired-version', $LINTIAN_VERSION,'--debug',
+ );
+ my @lintian_harness_args = (
+ '--lintian-frontend', "$LINTIAN_BASE/bin/lintian",
+ '--lintian-log-dir', $LOG_DIR,
+ '--schedule-chunk-size', $opt{'schedule-chunk-size'},
+ '--schedule-limit-groups', $opt{'schedule-limit-groups'},
+ '--state-dir', $STATE_DIR,
+ # Finish with the lintian command-line
+ '--', @{$EXTRA_LINTIAN_OPTIONS});
+
+ if ($opt{'full-mode'}) {
+ push(@sync_state_args, '--reschedule-all');
+ }
+ if ($opt{'dry-run'}) {
+ push(@sync_state_args, '--dry-run');
+ }
+
+ if ($LINTIAN_SCRATCH_SPACE) {
+ unshift(@lintian_harness_args,
+ '--lintian-scratch-space', $LINTIAN_SCRATCH_SPACE);
+ }
+
+ Log('Updating harness state cache (reading mirror index files)');
+
+ my $loop = IO::Async::Loop->new;
+ my $syncdone = $loop->new_future;
+
+ my @synccommand = ($lintian_cmd, 'reporting-sync-state', @sync_state_args);
+ Log('Command: ' . join(' ', @synccommand));
+
+ my $syncprocess = IO::Async::Process->new(
+ command => [@synccommand],
+ stdout => { via => 'pipe_read' },
+ stderr => { via => 'pipe_read' },
+ on_finish => sub {
+ my ($self, $exitcode) = @_;
+ my $status = ($exitcode >> 8);
+
+ if ($status) {
+ Log("warning: executing reporting-sync-state returned $status"
+ );
+ my $message= "Non-zero status $status from @synccommand";
+ $syncdone->fail($message);
+ return;
+ }
+
+ $syncdone->done("Done with @synccommand");
+ return;
+ });
+
+ my $syncfh = *STDOUT;
+ unless($opt{'dry-run'}) {
+ open($syncfh, '>', $sync_state_log)
+ or die "Could not open file '$sync_state_log': $!";
+ }
+
+ $syncprocess->stdout->configure(
+ on_read => sub {
+ my ($stream, $buffref, $eof) = @_;
+
+ if (length $$buffref) {
+ print {$syncfh} $$buffref;
+ $$buffref = EMPTY;
+ }
+
+ if ($eof) {
+ close($syncfh)
+ unless $opt{'dry-run'};
+ }
+
+ return 0;
+ },
+ );
+
+ $syncprocess->stderr->configure(
+ on_read => sub {
+ my ($stream, $buffref, $eof) = @_;
+
+ if (length $$buffref) {
+ print STDERR $$buffref;
+ $$buffref = EMPTY;
+ }
+
+ return 0;
+ },
+ );
+
+ $loop->add($syncprocess);
+ $syncdone->await;
+
+ Log('Running lintian (via reporting-lintian-harness)');
+ Log(
+ 'Command: '
+ . join(' ',
+ $lintian_cmd, 'reporting-lintian-harness',@lintian_harness_args));
+ my %harness_lintian_opts = (
+ 'pipe_out' => FileHandle->new,
+ 'err' => '&1',
+ 'fail' => 'never',
+ );
+
+ if (not $opt{'dry-run'}) {
+ spawn(\%harness_lintian_opts,
+ [$lintian_cmd, 'reporting-lintian-harness', @lintian_harness_args]
+ );
+ my $child_out = $harness_lintian_opts{'pipe_out'};
+ while (my $line = <$child_out>) {
+ chomp($line);
+ Log_no_ts($line);
+ }
+ close($child_out);
+ if (not reap(\%harness_lintian_opts)) {
+ my $exit_code = $harness_lintian_opts{harness}->full_result;
+ my $res = ($exit_code >> 8) & 0xff;
+ my $sig = $exit_code & 0xff;
+ # Exit code 2 is "time-out", 3 is "lintian got signalled"
+ # 255 => reporting-lintian-harness caught an unhandled trappable
+ # error.
+ if ($res) {
+ if ($res == 255) {
+ Die('Lintian harness died with an unhandled exception');
+ } elsif ($res == 3) {
+ Log('Lintian harness stopped early due to signal');
+ if ($opt{'generate-reports'}) {
+ Log('Skipping report generation');
+ $opt{'generate-reports'} = 0;
+ }
+ } elsif ($res != 2) {
+ Die("Lintian harness terminated with code $res");
+ }
+ } elsif ($sig) {
+ Die("Lintian harness was killed by signal $sig");
+ }
+ }
+ }
+ return;
+}
+
+sub generate_reports {
+ my @html_reports_args
+ = ('--reporting-config',$opt{'reporting-config'},$lintian_log,);
+ # create html reports
+ Log('Creating HTML reports...');
+ Log("Executing $lintian_cmd reporting-html-reports @html_reports_args");
+
+ my $loop = IO::Async::Loop->new;
+ my $htmldone = $loop->new_future;
+
+ my @htmlcommand
+ = ($lintian_cmd, 'reporting-html-reports', @html_reports_args);
+ my $htmlprocess = IO::Async::Process->new(
+ command => [@htmlcommand],
+ stdout => { via => 'pipe_read' },
+ stderr => { via => 'pipe_read' },
+ on_finish => sub {
+ my ($self, $exitcode) = @_;
+ my $status = ($exitcode >> 8);
+
+ if ($status) {
+ Log(
+"warning: executing reporting-html-reports returned $status"
+ );
+ my $message= "Non-zero status $status from @htmlcommand";
+ $htmldone->fail($message);
+ return;
+ }
+
+ $htmldone->done("Done with @htmlcommand");
+ return;
+ });
+
+ open(my $htmlfh, '>', $html_reports_log)
+ or die "Could not open file '$html_reports_log': $!";
+
+ $htmlprocess->stdout->configure(
+ on_read => sub {
+ my ($stream, $buffref, $eof) = @_;
+
+ if (length $$buffref) {
+ print {$htmlfh} $$buffref;
+ $$buffref = EMPTY;
+ }
+
+ close($htmlfh)
+ if $eof;
+
+ return 0;
+ },
+ );
+
+ $htmlprocess->stderr->configure(
+ on_read => sub {
+ my ($stream, $buffref, $eof) = @_;
+
+ if (length $$buffref) {
+ print STDERR $$buffref;
+ $$buffref = EMPTY;
+ }
+
+ return 0;
+ },
+ );
+
+ $loop->add($htmlprocess);
+ $htmldone->await;
+
+ Log('');
+
+ # rotate the statistics file updated by reporting-html-reports
+ if (!$opt{'dry-run'} && -f "$STATE_DIR/statistics") {
+ my $date = time2str('%Y%m%d', time());
+ my $dest = "$LOG_DIR/stats/statistics-${date}";
+ copy("$STATE_DIR/statistics", $dest)
+ or Log('warning: could not rotate the statistics file');
+ }
+
+ # install new html directory
+ Log('Installing HTML reports...');
+ unless ($opt{'dry-run'}) {
+ path($HTML_DIR)->remove_tree;
+ # a tiny bit of race right here
+ rename($HTML_TMP_DIR,$HTML_DIR)
+ or Die("error renaming $HTML_TMP_DIR into $HTML_DIR");
+ }
+ Log('');
+ return;
+}
+
+sub Log {
+ my ($msg) = @_;
+ my $ts = strftime('[%FT%T]', localtime());
+ Log_no_ts("${ts}: ${msg}");
+ return;
+}
+
+sub Log_no_ts {
+ my ($msg) = @_;
+ print {$LOG_FD} $msg,"\n";
+ print $msg, "\n" if $opt{'to-stdout'};
+ return;
+}
+
+sub Die {
+ Log("fatal error: $_[0]");
+ exit 1;
+}
+
+1;
+
+# Local Variables:
+# indent-tabs-mode: nil
+# cperl-indent-level: 4
+# End:
+# vim: syntax=perl sw=4 sts=4 sr et
diff --git a/commands/reporting-html-reports.pm b/commands/reporting-html-reports.pm
new file mode 100644
index 0000000..2505538
--- /dev/null
+++ b/commands/reporting-html-reports.pm
@@ -0,0 +1,1262 @@
+#!/usr/bin/perl -w
+#
+# Lintian HTML reporting tool -- Create Lintian web reports
+#
+# Copyright (C) 1998 Christian Schwarz and Richard Braakman
+# Copyright (C) 2007 Russ Allbery
+# Copyright (C) 2017-2019 Chris Lamb <lamby@debian.org>
+#
+# This program is free software. It is distributed under the terms of
+# the GNU General Public License as published by the Free Software
+# Foundation; either version 2 of the License, or (at your option) any
+# later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, you can find it on the World Wide
+# Web at https://www.gnu.org/copyleft/gpl.html, or write to the Free
+# Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
+# MA 02110-1301, USA.
+
+package reporting_html_reports;
+
+use v5.20;
+use warnings;
+use utf8;
+use autodie;
+
+use Getopt::Long;
+use POSIX qw(strftime);
+use File::Copy qw(copy);
+use Fcntl qw(SEEK_SET);
+use List::Util qw(first);
+use List::MoreUtils qw(uniq);
+use Path::Tiny;
+use Text::Template ();
+use URI::Escape;
+use YAML::XS ();
+
+use Lintian::Data;
+use Lintian::Deb822::Parser qw(read_dpkg_control_lc);
+use Lintian::IO::Async qw(safe_qx);
+use Lintian::Profile;
+use Lintian::Relation::Version qw(versions_comparator);
+use Lintian::Reporting::ResourceManager;
+use Lintian::Reporting::Util qw(load_state_cache find_backlog);
+use Lintian::Util qw(copy_dir run_cmd locate_executable);
+
+my $CONFIG;
+my %OPT;
+my %OPT_HASH = ('reporting-config=s'=> \$OPT{'reporting-config'},);
+
+# ------------------------------
+# Global variables and configuration
+
+# Some globals initialised in init_global()
+my (
+ $RESOURCE_MANAGER, $LINTIAN_VERSION, $timestamp,
+ $TEMPLATE_CONFIG_VARS,$HARNESS_STATE_DIR, $HISTORY_DIR,
+ $HISTORY, $GRAPHS, $LINTIAN_BASE,
+ $HTML_TMP_DIR, $SCOUR_ENABLED,
+);
+# FIXME: Should become obsolete if gnuplot is replaced by R like piuparts.d.o /
+# reproducible.d.n is using
+my $GRAPHS_RANGE_DAYS = 366;
+
+# ------------------------------
+# Initialize templates
+
+# This only has to be done once, so do it at the start and then reuse the same
+# templates throughout.
+our %templates;
+
+# %statistics accumulates global statistics. For tags: errors, warnings,
+# experimental, overridden, and info are the keys holding the count of tags of
+# that sort. For packages: binary, udeb, and source are the number of
+# packages of each type with Lintian errors or warnings. For maintainers:
+# maintainers is the number of maintainers with Lintian errors or warnings.
+#
+# %tag_statistics holds a hash of tag-specific statistics. Each tag name is a
+# key, and its value is a hash with the following keys: count and overrides
+# (number of times the tag has been detected and overridden, respectively), and
+# packages (number of packages with at least one such tag).
+my (%statistics, %tag_statistics);
+
+# %by_maint holds a hash of maintainer names to packages and tags. Each
+# maintainer is a key. The value is a hash of package names to hashes. Each
+# package hash is in turn a hash of versions to an anonymous array of hashes,
+# with each hash having keys code, package, type, tag, severity,
+# extra, and xref. xref gets the partial URL of the maintainer page for that
+# source package.
+#
+# In other words, the lintian output line:
+#
+# W: gnubg source: substvar-source-version-is-deprecated gnubg-data
+#
+# for gnubg 0.15~20061120-1 maintained by Russ Allbery <rra@debian.org> is
+# turned into the following structure:
+#
+# { 'gnubg' => {
+# '0.15~20061120-1' => [
+# { code => 'W', # Either 'O' or same as $tag_info->code
+# pkg_info => {
+# package => 'gnubg',
+# version => '0.15~20061120-1',
+# component => 'main',
+# type => 'source',
+# anchor => 'gnubg_0.15~20061120-1',
+# xref => 'rra@debian.org.html#gnubg_0.15~20061120-1'
+# },
+# tag_info => $tag_info, # an instance of Lintian::Tag::Info
+# archs => {
+# # Architectures we have seen this tag for
+# 'amd64' => 1,
+# 'i386' => 1,
+# },
+# extra => 'gnubg-data'
+# } ] } }
+#
+# and then stored under the key 'Russ Allbery <rra@debian.org>'
+#
+# %by_uploader holds the same thing except for packages for which the person
+# is only an uploader.
+#
+# %by_tag is a hash of tag names to an anonymous array of tag information
+# hashes just like the inside-most data structure above.
+my (%by_maint, %by_uploader, %by_tag, %maintainer_table, %delta);
+my @attrs = qw(maintainers source-packages binary-packages udeb-packages
+ errors warnings info experimental pedantic overridden groups-known
+ groups-backlog classifications groups-with-errors);
+
+my @RESTRICTED_CONFIG_DIRS= split(/:/, $ENV{'LINTIAN_RESTRICTED_CONFIG_DIRS'});
+my @CONFIG_DIRS = split(/:/, $ENV{'LINTIAN_CONFIG_DIRS'});
+
+sub load_profile {
+ my ($profile_name, $options) = @_;
+ my %opt = (
+ 'restricted-search-dirs' => \@RESTRICTED_CONFIG_DIRS,
+ %{$options // {}},
+ );
+ require Lintian::Profile;
+
+ my $profile = Lintian::Profile->new;
+ $profile->load($profile_name, \@CONFIG_DIRS, \%opt);
+
+ return $profile;
+}
+
+sub required_cfg_value {
+ my (@keys) = @_;
+ my $v = $CONFIG;
+ for my $key (@keys) {
+ if (not exists($v->{$key})) {
+ my $k = join('.', @keys);
+ die("Missing required config parameter: ${k}\n");
+ }
+ $v = $v->{$key};
+ }
+ return $v;
+}
+
+sub required_cfg_non_empty_list_value {
+ my (@keys) = @_;
+ my $v = required_cfg_value(@keys);
+ if (not defined($v) or ref($v) ne 'ARRAY' or scalar(@{$v}) < 1) {
+ my $k = join('.', @keys);
+ die("Invalid configuration: ${k} must be a non-empty list\n");
+ }
+ return $v;
+}
+
+# ------------------------------
+# Main routine
+
+sub main {
+ my $profile = init_globals();
+
+ setup_output_dir(
+ 'output_dir' => $HTML_TMP_DIR,
+ 'lintian_manual' => "${LINTIAN_BASE}/doc/lintian.html",
+ 'lintian_api_docs' => "${LINTIAN_BASE}/doc/api.html",
+ 'lintian_log_file' => $ARGV[0],
+ 'resource_dirs' =>
+ [map { "${LINTIAN_BASE}/reporting/$_"} qw(images resources)],
+ );
+
+ load_templates("$LINTIAN_BASE/reporting/templates");
+
+ # Create lintian.css from a template, install the output file as a resource
+ # and discard the original output file. We do this after installing all
+ # resources, so the .css file can refer to resources.
+ output_template(
+ 'lintian.css',
+ $templates{'lintian.css'},
+ { 'path_prefix' => '../' });
+ $RESOURCE_MANAGER->install_resource("$HTML_TMP_DIR/lintian.css");
+
+ my $state_cache = load_state_cache($HARNESS_STATE_DIR);
+
+ print "Parsing lintian log...\n";
+ parse_lintian_log($profile, $state_cache);
+
+ process_data($profile, $state_cache);
+ exit(0);
+}
+
+# ------------------------------
+# Utility functions
+
+sub init_globals {
+ Getopt::Long::config('bundling', 'no_getopt_compat', 'no_auto_abbrev');
+ Getopt::Long::GetOptions(%OPT_HASH) or die("error parsing options\n");
+
+ if (not $OPT{'reporting-config'} or not -f $OPT{'reporting-config'}) {
+ die("The --reporting-config parameter must point to an existing file\n"
+ );
+ }
+ $LINTIAN_BASE = $ENV{'LINTIAN_BASE'};
+
+ $CONFIG = YAML::XS::LoadFile($OPT{'reporting-config'});
+ $HARNESS_STATE_DIR = required_cfg_value('storage', 'state-cache');
+ $HTML_TMP_DIR = required_cfg_value('storage', 'reports-work-dir');
+ my $history_key = 'storage.historical-data-dir';
+ if (exists($CONFIG->{'storage'}{'historical-data-dir'})) {
+ $HISTORY = 1;
+ $HISTORY_DIR = required_cfg_value('storage', 'historical-data-dir');
+ print "Enabling history tracking as ${history_key} is set\n";
+ if (length locate_executable('gnuplot')) {
+ $GRAPHS = 1;
+ print "Enabling graphs (gnuplot is in PATH)\n";
+ } else {
+ $GRAPHS = 0;
+ print "No graphs as \"gnuplot\" is not in PATH\n";
+ }
+ if ($GRAPHS) {
+ if (locate_executable('scour')) {
+ $SCOUR_ENABLED = 1;
+ print "Minimizing generated SVG files (scour is in PATH)\n";
+ } else {
+ $SCOUR_ENABLED = 0;
+ print 'No minimization of generated SVG files'
+ . " as \"scour\" is not in PATH\n";
+ }
+ }
+ } else {
+ $HISTORY = 0;
+ $GRAPHS = 0;
+ print "History tracking is disabled (${history_key} is unset)\n";
+ print "Without history tracking, there will be no graphs\n";
+ }
+
+ if (exists($CONFIG->{'template-variables'})) {
+ $TEMPLATE_CONFIG_VARS = $CONFIG->{'template-variables'};
+ } else {
+ $TEMPLATE_CONFIG_VARS = {};
+ }
+ # Provide a default URL for the source code. It might not be correct for
+ # the given installation, but it is better than nothing.
+ $TEMPLATE_CONFIG_VARS->{'LINTIAN_SOURCE'}
+ //= 'https://salsa.debian.org/lintian/lintian.git';
+
+ my $profile = load_profile();
+
+ Lintian::Data->set_vendor($profile);
+
+ $LINTIAN_VERSION = $ENV{LINTIAN_VERSION};
+ $timestamp = safe_qx(qw(date -u --rfc-822));
+ chomp($LINTIAN_VERSION, $timestamp);
+
+ $RESOURCE_MANAGER
+ = Lintian::Reporting::ResourceManager->new('html_dir' => $HTML_TMP_DIR,);
+ return $profile;
+}
+
+sub load_templates {
+ my ($template_dir) = @_;
+ for my $template (
+ qw/head foot clean index maintainer maintainers packages tag
+ tags tags-severity tag-not-seen tags-all/
+ ) {
+ open(my $fd, '<:encoding(UTF-8)', "${template_dir}/$template.tmpl");
+ my %options = (TYPE => 'FILEHANDLE', SOURCE => $fd);
+ $templates{$template} = Text::Template->new(%options)
+ or die "cannot load template $template: $Text::Template::ERROR\n";
+ close($fd);
+ }
+
+ open(my $fd, '<:encoding(UTF-8)', "${template_dir}/lintian.css.tmpl");
+ $templates{'lintian.css'} = Text::Template->new(
+ TYPE => 'FILEHANDLE',
+ SOURCE => $fd,
+ DELIMITERS => ['{{{', '}}}'],
+ )
+ or die("cannot load template for lintian.css: $Text::Template::ERROR\n");
+ close($fd);
+ return;
+}
+
+sub process_data {
+ my ($profile, $state_cache) = @_;
+ my @maintainers = sort(uniq(keys(%by_maint), keys(%by_uploader)));
+ my $statistics_file = "$HARNESS_STATE_DIR/statistics";
+ my ($old_statistics, $archives, @archive_info);
+
+ {
+ # Scoped to allow memory to be re-purposed. The %qa and %sources
+ # structures are only used for a very few isolated items.
+ my (%qa, %sources);
+ print "Collecting statistics...\n";
+ $old_statistics
+ = collect_statistics($profile, $state_cache, $statistics_file,
+ \@maintainers,\%sources, \%qa);
+
+ generate_lookup_tables(\%sources);
+
+ write_qa_list(\%qa);
+
+ generate_package_index_packages(\%sources);
+
+ if ($HISTORY) {
+ update_history_and_make_graphs(\@attrs, \%statistics,
+ \%tag_statistics);
+ }
+ }
+
+ # Build a hash of all maintainers, not just those with Lintian tags. We
+ # use this later to generate stub pages for maintainers whose packages are
+ # all Lintian-clean.
+ my %clean;
+ for my $group_id (sort(keys(%{$state_cache->{'groups'}}))) {
+ my $maintainer
+ = $state_cache->{'groups'}{$group_id}{'mirror-metadata'}
+ {'maintainer'};
+ my $id;
+ next if not $maintainer;
+ $id = maintainer_url($maintainer);
+ $clean{$id} = $maintainer;
+ }
+
+ # Now, walk through the tags by source package (sorted by maintainer).
+ # Output a summary page of errors and warnings for each maintainer, output
+ # a full page that includes info, experimental, and overridden tags, and
+ # assemble the maintainer index and the QA package list as we go.
+
+ for my $maintainer (@maintainers) {
+ my $id = maintainer_url($maintainer);
+ delete $clean{$id};
+
+ # Determine if the maintainer's page is clean. Check all packages for
+ # which they're either maintainer or uploader and set $error_clean if
+ # they have no errors or warnings.
+ #
+ # Also take this opportunity to sort the tags so that all similar tags
+ # will be grouped, which produces better HTML output.
+ my $error_clean = 1;
+ for my $source (
+ keys %{ $by_maint{$maintainer} },
+ keys %{ $by_uploader{$maintainer} }
+ ) {
+ my $versions = $by_maint{$maintainer}{$source}
+ || $by_uploader{$maintainer}{$source};
+ for my $version (keys %$versions) {
+ $versions->{$version}
+ = [sort by_tag @{ $versions->{$version} }];
+ next if not $error_clean;
+ my $tags = $versions->{$version};
+ for my $tag (@$tags) {
+ if ($tag->{code} eq 'E' or $tag->{code} eq 'W') {
+ $error_clean = 0;
+ last;
+ }
+ }
+ }
+ }
+
+ # Determine the parts of the maintainer and the file name for the
+ # maintainer page.
+ my ($name, $email) = extract_name_and_email($maintainer);
+
+ my $regular = "maintainer/$id";
+ my $full = "full/$id";
+
+ # Create the regular maintainer page (only errors and warnings) and the
+ # full maintainer page (all tags, including overrides and info tags).
+ print "Generating page for $id\n";
+ my $q_name = html_quote($name);
+ my %data = (
+ email => html_quote(uri_escape($email)),
+ errors => 1,
+ id => $id,
+ maintainer => html_quote($maintainer),
+ name => $q_name,
+ packages => $by_maint{$maintainer},
+ uploads => $by_uploader{$maintainer},
+ );
+ my $template;
+ if ($error_clean) {
+ $template = $templates{clean};
+ } else {
+ $template = $templates{maintainer};
+ }
+ output_template($regular, $template, \%data);
+ $template = $templates{maintainer};
+ $data{errors} = 0;
+ output_template($full, $template, \%data);
+
+ my %index_data = (url => $id, name => $q_name);
+ # Add this maintainer to the hash of maintainer to URL mappings.
+ $maintainer_table{$maintainer} = \%index_data;
+ }
+ undef(@maintainers);
+
+ # Write out the maintainer index.
+ my %data = (maintainers => \%maintainer_table,);
+ output_template('maintainers.html', $templates{maintainers}, \%data);
+
+ # Now, generate stub pages for every maintainer who has only clean
+ # packages.
+ for my $id (keys %clean) {
+ my $maintainer = $clean{$id};
+ my ($name, $email) = extract_name_and_email($maintainer);
+ my %maint_data = (
+ id => $id,
+ email => html_quote(uri_escape($email)),
+ maintainer => html_quote($maintainer),
+ name => html_quote($name),
+ clean => 1,
+ );
+ print "Generating clean page for $id\n";
+ output_template("maintainer/$id", $templates{clean}, \%maint_data);
+ output_template("full/$id", $templates{clean}, \%maint_data);
+ }
+
+ # Create the pages for each tag. Each page shows the extended description
+ # for the tag and all the packages for which that tag was issued.
+ for my $tag (sort $profile->known_tags) {
+ my $info = $profile->get_taginfo($tag);
+ my $description = $info->description('html', ' ');
+ my ($count, $overrides) = (0, 0);
+ my $tmpl = 'tag-not-seen';
+ my $shown_count = 0;
+ my $tag_list = $by_tag{$tag};
+ my $tag_limit_total = 1024;
+ my $tag_limit_per_package = 3;
+
+ if (exists $by_tag{$tag}) {
+ $tmpl = 'tag';
+ $count = $tag_statistics{$tag}{'count'};
+ $overrides = $tag_statistics{$tag}{'overrides'};
+ $shown_count = $count + $overrides;
+ }
+ if ($shown_count > $tag_limit_total) {
+ my (@replacement_list, %seen);
+ for my $orig_info (
+ sort { $a->{pkg_info}{package} cmp $b->{pkg_info}{package} }
+ @{$tag_list}) {
+ my $pkg_info = $orig_info->{pkg_info};
+ my $key
+ = "$pkg_info->{package} $pkg_info->{type} $pkg_info->{version}";
+ next if ++$seen{$key} > $tag_limit_per_package;
+ push(@replacement_list, $orig_info);
+ last if @replacement_list >= $tag_limit_total;
+ }
+ $tag_list = \@replacement_list;
+ $shown_count = scalar(@replacement_list);
+ }
+
+ my %maint_data = (
+ description => $description,
+ tag => $tag,
+ code => $info->code,
+ tags => $tag_list,
+ shown_count => $shown_count,
+ tag_limit_per_package => $tag_limit_per_package,
+ graphs => $GRAPHS,
+ graphs_days => $GRAPHS_RANGE_DAYS,
+ statistics => {
+ count => $count,
+ overrides => $overrides,
+ total => $count + $overrides,
+ },
+ );
+ output_template("tags/$tag.html", $templates{$tmpl}, \%maint_data);
+ }
+
+ # Create the general tag indices.
+ %data = (
+ tags => \%by_tag,
+ stats => \%tag_statistics,
+ profile => \$profile,
+ );
+ output_template('tags.html', $templates{tags}, \%data);
+ output_template('tags-severity.html', $templates{'tags-severity'}, \%data);
+ output_template('tags-all.html', $templates{'tags-all'}, \%data);
+
+ # Update the statistics file.
+ open(my $stats_fd, '>', $statistics_file);
+ print {$stats_fd} "last-updated: $timestamp\n";
+ for my $attr (@attrs) {
+ print {$stats_fd} "$attr: $statistics{$attr}\n";
+ }
+ print {$stats_fd} "lintian-version: $LINTIAN_VERSION\n";
+ close($stats_fd);
+
+ $archives = required_cfg_value('archives');
+ for my $archive (sort(keys(%{$archives}))) {
+ my $architectures
+ = required_cfg_non_empty_list_value('archives', $archive,
+ 'architectures');
+ my $components
+ = required_cfg_non_empty_list_value('archives', $archive,
+ 'components');
+ my $distributions
+ = required_cfg_non_empty_list_value('archives', $archive,
+ 'distributions');
+ my $path = required_cfg_value('archives', $archive, 'base-dir');
+ my $trace_basename
+ = required_cfg_value('archives', $archive, 'tracefile');
+
+ # The path to the mirror timestamp.
+ my $trace_file= "${path}/project/trace/${trace_basename}";
+ my $mirror_timestamp = path($trace_file)->slurp;
+ $mirror_timestamp =~ s/\n.*//s;
+ $mirror_timestamp
+ = safe_qx('date', '-u', '--rfc-822', '-d', $mirror_timestamp);
+ my %info = (
+ 'name' => $archive,
+ 'architectures' => $architectures,
+ 'components' => $components,
+ 'distributions' => $distributions,
+ 'timestamp' => $mirror_timestamp,
+ );
+ push(@archive_info, \%info);
+ }
+
+ # Finally, we can start creating the index page.
+ %data = (
+ delta => \%delta,
+ archives => \@archive_info,
+ previous => $old_statistics->{'last-updated'},
+ graphs => $GRAPHS,
+ graphs_days => $GRAPHS_RANGE_DAYS,
+ );
+ output_template('index.html', $templates{index}, \%data);
+ return;
+}
+
+sub setup_output_dir {
+ my (%args) = @_;
+ my $output_dir = $args{'output_dir'};
+ my $lintian_manual = $args{'lintian_manual'};
+ my $lintian_api = $args{'lintian_api_docs'};
+ my $resource_dirs = $args{'resource_dirs'} // [];
+ my $lintian_log_file = $args{'lintian_log_file'};
+
+ # Create output directories.
+ mkdir($output_dir, 0777);
+ mkdir("$output_dir/full", 0777);
+ mkdir("$output_dir/maintainer", 0777);
+ mkdir("$output_dir/tags", 0777);
+ symlink('.', "$output_dir/reports");
+ copy_dir($lintian_manual, "$output_dir/manual");
+ copy_dir($lintian_api, "$output_dir/library-api");
+
+ if ($lintian_log_file) {
+ my %opts = (
+ 'in' => $lintian_log_file,
+ 'out' => "$output_dir/lintian.log.gz",
+ );
+ run_cmd(\%opts, 'gzip', '-9nc');
+ $RESOURCE_MANAGER->install_resource("$output_dir/lintian.log.gz");
+ symlink($RESOURCE_MANAGER->resource_URL('lintian.log.gz'),
+ "$output_dir/lintian.log.gz");
+ }
+
+ for my $dir (@{$resource_dirs}) {
+ next if not -d $dir;
+ opendir(my $dirfd, $dir);
+ for my $resname (readdir($dirfd)) {
+ next if $resname eq '.' or $resname eq '..';
+ $RESOURCE_MANAGER->install_resource("$dir/$resname",
+ { install_method => 'copy' });
+ }
+ closedir($dirfd);
+ }
+ return;
+}
+
+sub collect_statistics {
+ my ($profile, $state_cache, $statistics_file, $maintainers_ref,
+ $sources_ref, $qa_list_ref)
+ = @_;
+ my $old_statistics;
+
+ # For each of this maintainer's packages, add statistical information
+ # about the number of each type of tag to the QA data and build the
+ # packages hash used for the package index. We only do this for the
+ # maintainer packages, not the uploader packages, to avoid
+ # double-counting.
+ for my $maintainer (@{$maintainers_ref}) {
+ for my $source (keys %{ $by_maint{$maintainer} }) {
+ my %count;
+ for my $version (
+ sort versions_comparator
+ keys %{ $by_maint{$maintainer}{$source} }){
+ my $tags = $by_maint{$maintainer}{$source}{$version};
+ for my $tag (@{$tags}) {
+ $count{$tag->{code}}++;
+ }
+ if (@$tags) {
+ $sources_ref->{$source}{$version}
+ = $tags->[0]{pkg_info}{xref};
+ }
+ }
+ $qa_list_ref->{$source} = \%count;
+ }
+ }
+
+ for my $tag ($profile->known_tags) {
+ my ($count, $overrides) = (0, 0);
+ my %seen_tags;
+ next if (not exists($by_tag{$tag}));
+ foreach (@{$by_tag{$tag}}) {
+ if ($_->{code} ne 'O') {
+ $count++;
+ $seen_tags{$_->{pkg_info}{xref}}++;
+ } else {
+ $overrides++;
+ }
+ }
+ $tag_statistics{$tag}{'count'} = $count;
+ $tag_statistics{$tag}{'overrides'} = $overrides;
+ $tag_statistics{$tag}{'packages'} = scalar(keys(%seen_tags));
+ }
+
+ # Read in the old statistics file so that we can calculate deltas for
+ # all of our statistics.
+
+ if (-f $statistics_file) {
+ ($old_statistics) = read_dpkg_control_lc($statistics_file);
+ }
+ $statistics{'groups-known'} = scalar(keys(%{$state_cache->{'groups'}}));
+ $statistics{'groups-backlog'}
+ = scalar(find_backlog($LINTIAN_VERSION,$state_cache));
+ my $pkgs_w_errors = 0;
+ for my $group_data (values(%{$state_cache->{'groups'}})) {
+ $pkgs_w_errors++
+ if exists($group_data->{'processing-errors'})
+ and $group_data->{'processing-errors'};
+ }
+ $statistics{'groups-with-errors'} = $pkgs_w_errors;
+
+ for my $attr (@attrs) {
+ my $old = $old_statistics->{$attr} || 0;
+ $statistics{$attr} ||= 0;
+ $delta{$attr}
+ = sprintf('%d (%+d)', $statistics{$attr},$statistics{$attr} - $old);
+ }
+
+ return $old_statistics;
+}
+
+sub extract_name_and_email {
+ my ($maintainer) = @_;
+ my ($name, $email) = ($maintainer =~ /^(.*) <([^>]+)>/);
+ $name = 'Unknown Maintainer' unless $name;
+ $email = 'unknown' unless $email;
+ return ($name, $email);
+}
+
+# Generate the package lists. These are huge, so we break them into four
+# separate pages.
+#
+# FIXME: Does anyone actually use these pages? They're basically unreadable.
+sub generate_package_index_packages {
+ my ($sources_ref) = @_;
+
+ my %list = (
+ '0-9, A-F' => [],
+ 'G-L' => [],
+ 'M-R' => [],
+ 'S-Z' => [],
+ );
+ for my $package (sort(keys(%{$sources_ref}))) {
+ my $first = uc(substr($package, 0, 1));
+ if ($first le 'F') { push(@{ $list{'0-9, A-F'} }, $package) }
+ elsif ($first le 'L') { push(@{ $list{'G-L'} }, $package) }
+ elsif ($first le 'R') { push(@{ $list{'M-R'} }, $package) }
+ else { push(@{ $list{'S-Z'} }, $package) }
+ }
+ my %data = (sources => $sources_ref);
+ my $i = 1;
+ for my $section (sort(keys(%list))) {
+ $data{section} = $section;
+ $data{list} = $list{$section};
+ output_template("packages_$i.html", $templates{packages}, \%data);
+ $i++;
+ }
+ return;
+}
+
+sub run_scour {
+ my ($input_file, $output_file) = @_;
+ run_cmd('scour', '-i',$input_file, '-o',$output_file, '-q',
+ '--enable-id-stripping', '--enable-comment-stripping',
+ '--shorten-ids', '--indent=none');
+ return 1;
+}
+
+sub update_history_and_make_graphs {
+ my ($attrs_ref, $statistics_ref, $tag_statistics_ref) = @_;
+ # Update history.
+ my %versions;
+ my $graph_dir = "$HTML_TMP_DIR/graphs";
+ my $commonf = "$graph_dir/common.gpi";
+ my $unix_time = time();
+ mkdir("$HISTORY_DIR")
+ if (not -d "$HISTORY_DIR");
+ mkdir("$HISTORY_DIR/tags")
+ if (not -d "$HISTORY_DIR/tags");
+
+ my $history_file = "$HISTORY_DIR/statistics.dat";
+ my $stats = '';
+ for my $attr (@{$attrs_ref}) {
+ $stats .= ' ' . $statistics_ref->{$attr};
+ }
+ open(my $hist_fd, '+>>', $history_file);
+ print {$hist_fd} "$unix_time $LINTIAN_VERSION$stats\n";
+
+ if ($GRAPHS) {
+ seek($hist_fd, 0, SEEK_SET);
+ while (<$hist_fd>) {
+ my @fields = split();
+ $versions{$fields[1]} = $fields[0]
+ if not exists $versions{$fields[1]};
+ }
+ }
+ close($hist_fd);
+
+ if ($GRAPHS) {
+ mkdir("$graph_dir", 0777);
+ mkdir("$graph_dir/tags", 0777);
+
+ my $date_min
+ = strftime('%s',
+ localtime($unix_time - 3600 * 24 * $GRAPHS_RANGE_DAYS));
+ my $date_max = strftime('%s', localtime($unix_time));
+
+ # Generate loadable Gnuplot file with common variables and labels/arrows
+ # for Lintian versions.
+ open(my $common, '>', $commonf);
+ print {$common} "history_dir='$HISTORY_DIR'\n";
+ print {$common} "graph_dir='$graph_dir'\n";
+ print {$common} "date_min='$date_min'\n";
+ print {$common} "date_max='$date_max'\n";
+ my $last_version = 0;
+ for my $v (sort { $versions{$a} <=> $versions{$b} } keys %versions) {
+ next unless $versions{$v} > $date_min;
+
+ print {$common} "set arrow from '$versions{$v}',graph 0 to ",
+ "'$versions{$v}',graph 1 nohead lw 0.4\n";
+
+ # Skip label if previous release is too close; graphs can't display
+ # more than ~32 labels.
+ my $min_spacing = 3600 * 24 * $GRAPHS_RANGE_DAYS / 32;
+ if ($versions{$v} - $last_version > $min_spacing) {
+ (my $label = $v) =~ s/\-[\w\d]+$//;
+ print {$common} "set label '$label' at '$versions{$v}',graph ",
+ "1.04 rotate by 90 font ',8'\n";
+
+ $last_version = $versions{$v};
+ }
+ }
+ close($common);
+
+ print "Plotting global statistics...\n";
+ run_cmd({ 'chdir' => $graph_dir},
+ 'gnuplot',"$LINTIAN_BASE/reporting/graphs/statistics.gpi");
+
+ if ($SCOUR_ENABLED) {
+ # Do a little "rename" dance to ensure that we keep the
+ # "statistics.svg"-basename without having to use a
+ # subdirectory.
+ rename(
+ "${graph_dir}/statistics.svg",
+ "${graph_dir}/_statistics-orig.svg"
+ );
+ run_scour(
+ "${graph_dir}/_statistics-orig.svg",
+ "${graph_dir}/statistics.svg"
+ );
+ }
+ $RESOURCE_MANAGER->install_resource("${graph_dir}/statistics.svg");
+ }
+
+ my $gnuplot_fd;
+ if ($GRAPHS) {
+ open($gnuplot_fd, '>', "$graph_dir/call.gpi");
+ }
+
+ for my $tag (sort(keys(%{$tag_statistics_ref}))) {
+ $history_file = "$HISTORY_DIR/tags/$tag.dat";
+ $stats = $tag_statistics_ref->{$tag};
+ open(my $tag_fd, '>>', $history_file);
+ print {$tag_fd} "$unix_time $stats->{'count'} $stats->{'overrides'} "
+ ."$stats->{'packages'}\n";
+ close($tag_fd);
+ if ($GRAPHS) {
+ print {$gnuplot_fd} qq{print 'Plotting $tag statistics...'\n};
+ print {$gnuplot_fd}
+ qq{call '$LINTIAN_BASE/reporting/graphs/tags.gpi' '$tag'\n};
+ print {$gnuplot_fd} qq{reset\n};
+ }
+ }
+
+ if ($GRAPHS) {
+ my $svg_dir = "${graph_dir}/tags";
+ close($gnuplot_fd);
+ run_cmd({'chdir' => $graph_dir}, 'gnuplot', 'call.gpi');
+ unlink($commonf);
+ if ($SCOUR_ENABLED) {
+ # Obvious optimization potential; run scour in parallel
+ my $optimized_dir = "${graph_dir}/tags-optimized";
+ mkdir($optimized_dir);
+ print "Minimizing tag graphs; this may take a while ...\n";
+ for my $tag (sort(keys(%{$tag_statistics_ref}))) {
+ run_scour("${svg_dir}/${tag}.svg",
+ "${optimized_dir}/${tag}.svg");
+ }
+ $svg_dir = $optimized_dir;
+ }
+ for my $tag (sort(keys(%{$tag_statistics_ref}))) {
+ my $graph_file = "${svg_dir}/${tag}.svg";
+ $RESOURCE_MANAGER->install_resource($graph_file);
+ }
+ path($graph_dir)->remove_tree
+ if -d $graph_dir;
+ }
+ return;
+}
+
+# Write out the QA package list. This is a space-delimited file that contains
+# the package name and then the error count, warning count, info count,
+# pedantic count, experimental count, and overridden tag count.
+sub write_qa_list {
+ my ($qa_data) = @_;
+
+ open(my $qa_fd, '>', "$HTML_TMP_DIR/qa-list.txt");
+ for my $source (sort(keys(%{$qa_data}))) {
+ print {$qa_fd} $source;
+ for my $code (qw/E W I P X O/) {
+ my $count = $qa_data->{$source}{$code} || 0;
+ print {$qa_fd} " $count";
+ }
+ print {$qa_fd} "\n";
+ }
+ close($qa_fd);
+ return;
+}
+
+# Generate a "redirect" lookup table for the webserver to power the
+# "<site>/source/<source>[/<version>]" redirects.
+sub generate_lookup_tables {
+ my ($sources_ref) = @_;
+ mkdir("$HTML_TMP_DIR/lookup-tables");
+ open(my $table, '>', "$HTML_TMP_DIR/lookup-tables/source-packages");
+
+ foreach my $source (sort(keys(%{$sources_ref}))) {
+ my $first = 1;
+ for my $version (
+ sort versions_comparator keys %{ $sources_ref->{$source} }) {
+ my $xref = $sources_ref->{$source}{$version};
+ print {$table} "$source full/$xref\n" if $first;
+ print {$table} "$source/$version full/$xref\n";
+ $first = 0;
+ }
+ }
+
+ close($table);
+ return;
+}
+
+# Determine the file name for the maintainer page given a maintainer. It
+# should be <email>.html where <email> is their email address with all
+# characters other than a-z A-Z 0-9 - _ . @ = + replaced with _. Don't change
+# this without coordinating with QA.
+sub maintainer_url {
+ my ($maintainer) = @_;
+ if ($maintainer =~ m/<([^>]+)>/) {
+ my $id = $1;
+ $id =~ tr/a-zA-Z0-9_.@=+-/_/c;
+ return "$id.html";
+ } else {
+ return 'unsorted.html';
+ }
+}
+
+sub parse_lintian_log {
+ my ($profile, $state_cache) = @_;
+ # We take a lintian log file on either standard input or as the
+ # first argument. This log file contains all the tags lintian
+ # found, plus N: tags with informational messages. Ignore all the
+ # N: tags and load everything else into the hashes we use for all
+ # web page generation.
+ #
+ # We keep track of a hash from maintainer page URLs to maintainer
+ # values so that we don't have two maintainers who map to the same
+ # page and overwrite each other's pages. If we find two
+ # maintainers who map to the same URL, just assume that the second
+ # maintainer is the same as the first (but warn about it).
+ #
+ # The "last_*" are optimizations to avoid computing the same
+ # things over and over again when a package have multiple tags.
+ my (%seen, $last_info, $last_maintainer, %unknown_member_id, $info,
+ $last_pi, %map_maint, %arch_map);
+ my %expanded_code = (
+ E => 'errors',
+ W => 'warnings',
+ I => 'info',
+ X => 'experimental',
+ O => 'overridden',
+ P => 'pedantic',
+ C => 'classifications',
+ );
+ while (<>) {
+ my @parts;
+ chomp;
+ @parts = split_tag($_);
+ next unless @parts;
+ my ($code, $package, $type, $version, $arch, $tag, $extra) = @parts;
+ $type = 'binary' unless (defined $type);
+ next
+ unless ($type eq 'source' || $type eq 'binary' || $type eq 'udeb');
+ # Ignore unknown tags - happens if we removed a tag that is
+ # still present in the log file.
+ my $tag_info = $profile->get_taginfo($tag);
+ next
+ if not $tag_info
+ or $tag_info->effective_severity eq 'classification';
+
+ # Update statistics.
+ my $key = $expanded_code{$code};
+ $statistics{$key}++;
+ unless ($seen{"$package $type"}) {
+ $statistics{"$type-packages"}++;
+ $seen{"$package $type"} = 1;
+ }
+
+ # Determine the source package for this package and warn if
+ # there appears to be no source package in the archive.
+ # Determine the maintainer, version, and archive component. Work
+ # around a missing source package by pulling information from
+ # a binary package or udeb of the same name if there is any.
+ my ($source, $component, $source_version, $maintainer, $uploaders);
+ my $member_id
+ = "${type}:${package}/${version}"
+ . ($type ne 'source' ? "/$arch" : q{});
+ my $state_data = $state_cache->{'members-to-groups'}{$member_id};
+ next if exists($unknown_member_id{$member_id});
+ if ($type eq 'source') {
+ $source = $package;
+ $source_version = $version;
+ if (not defined($state_data)) {
+ warn "Source package ${member_id} not found in state-cache!\n";
+ $unknown_member_id{$member_id} = 1;
+ }
+ } elsif (defined($state_data)) {
+ my $src_member
+ = first { s/^source:// } keys(%{$state_data->{'members'}});
+ if ($src_member) {
+ ($source, $source_version) = split(m{/}, $src_member, 2);
+ }
+ } elsif (not defined($state_data)) {
+ warn "Package ${member_id} not found in state-cache!\n";
+ $unknown_member_id{$member_id} = 1;
+ }
+ $state_data //= {};
+ $component = $state_data->{'mirror-metadata'}{'component'} ||= 'main';
+ $maintainer = $state_data->{'mirror-metadata'}{'maintainer'}
+ ||= '(unknown)';
+ $uploaders = $state_data->{'mirror-metadata'}{'uploaders'};
+ $source ||= '';
+ $version = 'unknown'
+ unless (defined($version) and length($version) > 0);
+ $source_version = $version
+ unless (defined($source_version) and length($source_version) > 0);
+
+ # Sanitize, just out of paranoia.
+ $package =~ tr/a-zA-Z0-9.+-/_/c;
+ $source =~ tr/a-zA-Z0-9.+-/_/c;
+ $version =~ tr/a-zA-Z0-9.+:~-/_/c;
+ $source_version =~ tr/a-zA-Z0-9.+:~-/_/c;
+
+ # Conditionally call html_quote if needed. On average, 11-13% of
+ # all tags (emitted on lintian.d.o) have no "extra". That would be
+ # tags like "no-upstream-changelog" (now removed)
+ if (defined($extra)) {
+ $extra = html_quote($extra);
+ } else {
+ $extra = '';
+ }
+
+ # Store binary architectures
+ my $arch_key = join(':', $package, $type, $version, $tag, $extra);
+ $arch_map{$arch_key}{$arch} = 1
+ unless $arch eq 'all' or $arch eq 'source';
+
+ # Don't duplicate entries if they only differ on architecture
+ next if scalar(keys %{$arch_map{$arch_key}}) > 1;
+
+ # Add the tag information to our hashes. Share the data
+ # between the hashes to save space (which means we can't later
+ # do destructive tricks with it).
+ if ( $last_info
+ && $last_pi->{type} eq $type
+ && $last_pi->{package} eq $package
+ && $last_pi->{version} eq $version) {
+
+ # There are something like 622k tags emitted on lintian.d.o,
+ # but only "some" 90k unique package+version(+arch) pairs.
+ # Therefore, we can conclude that the average package will
+ # have ~6 tags and optimise for that case. Indeed, this path
+ # seems to be taken about 90% of the time (561k/624k).
+ # - In fact, we see less than "90k" package+version(+arch)
+ # pairs here, since entries without tags never this far down
+ # in this loop (i.e. they are filtered out by split_tag
+ # above).
+
+ # Copy the last info and then change the bits that can change
+ $info = {%{$last_info}};
+ # Code depends on whether the given tag was overridden or not
+ $info->{code} = $code;
+ $info->{extra} = $extra;
+ if ($info->{tag_info}->name ne $tag) {
+ $info->{tag_info} = $tag_info;
+ }
+ # saves a map_maintainer call
+ $maintainer = $last_maintainer;
+ } else {
+
+ my $anchor = "${source}_${source_version}";
+ # Apparently "+" are not allowed in ids and I am guessing
+ # ":" is not either
+ if (index($anchor, '+') > -1 or index($anchor, ':') > -1) {
+ $anchor =~ s/[+]/_x2b/g;
+ $anchor =~ s/[:]/_x3a/g;
+ }
+ if (substr($maintainer, 0, 1) eq q{"}) {
+ # Strip out ""-quotes, which is required in d/control for some
+ # maintainers.
+ $maintainer =~ s/^"(.*)" <(.*)>$/$1 <$2>/;
+ }
+
+ # Check if we've seen the URL for this maintainer before
+ # and, if so, map them to the same person as the previous
+ # one.
+
+ $last_maintainer = $maintainer
+ = map_maintainer(\%map_maint, $maintainer);
+
+ # Update maintainer statistics.
+ $statistics{maintainers}++ unless defined $by_maint{$maintainer};
+
+ $last_info = $info = {
+ # Tag instance specific data
+
+ # split_tags ensures that $code is a single upper case letter
+ code => $code,
+ tag_info => $tag_info,
+ # extra is unsafe in general, but we already quote it above.
+ extra => $extra,
+ archs => $arch_map{$arch_key},
+
+ # Shareable data
+ pkg_info => {
+ package => $package,
+ version => $version,
+ # There is a check for type being in a fixed whitelist of
+ # HTML-safe keywords in the start of the loop.,
+ type => $type,
+ component => html_quote($component),
+ # should be safe
+ anchor => $anchor,
+ xref => maintainer_url($maintainer). "#${anchor}",
+ 'state_data' => $state_data,
+ maintainer => html_quote($maintainer),
+ },
+ };
+ $last_pi = $info->{pkg_info};
+ if (!$by_maint{$maintainer}{$source}{$source_version}) {
+ my $list_ref = [];
+ $by_maint{$maintainer}{$source}{$source_version} = $list_ref;
+ # If the package had uploaders listed, also add the
+ # information to %by_uploaders (still sharing the data
+ # between hashes).
+ if ($uploaders) {
+ for my $uploader (@{$uploaders}) {
+ if (substr($uploader, 0, 1) eq q{"}) {
+ # Strip out ""-quotes, which is required in
+ # d/control for some uploaders.
+ $uploader =~ s/^"(.*)" <(.*)>$/$1 <$2>/;
+ }
+ $uploader = map_maintainer(\%map_maint, $uploader);
+ next if $uploader eq $maintainer;
+ $by_uploader{$uploader}{$source}{$source_version}
+ = $list_ref;
+ }
+ }
+ }
+ }
+
+ push(@{ $by_maint{$maintainer}{$source}{$source_version} }, $info);
+ $by_tag{$tag} ||= [];
+ push(@{ $by_tag{$tag} }, $info);
+
+ }
+ return;
+}
+
+# Deduplicate maintainers. Maintains a cache of the maintainers we've seen
+# with a given e-mail address and returns the maintainer string that we
+# should use (which is whatever maintainer we saw first with that e-mail).
+sub map_maintainer {
+ my ($urlmap, $maintainer) = @_;
+ my $url = maintainer_url($maintainer);
+ if (defined(my $res = $urlmap->{$url})) {
+ $maintainer = $res;
+ } else {
+ $urlmap->{$url} = $maintainer;
+ }
+ return $maintainer;
+}
+
+# Quote special characters for HTML output.
+sub html_quote {
+ my ($text) = @_;
+ $text ||= '';
+ # Use index to do a quick check before we bother requesting a
+ # subst. On average, this is cheaper than blindly s///'ing, since
+ # we rarely subst (all) of the characters below.
+ if (index($text, '&') > -1) {
+ $text =~ s/&/\&amp;/g;
+ }
+ if (index($text, '<') > -1) {
+ $text =~ s/</\&lt;/g;
+ }
+ if (index($text, '>') > -1) {
+ $text =~ s/>/\&gt;/g;
+ }
+ if (index($text, '/') > -1) {
+ $text =~ s/\//\&#x2f;/g;
+ }
+ return $text;
+}
+
+# Given a file name, a template, and a data hash, fill out the template with
+# that data hash and output the results to the file.
+sub output_template {
+ my ($file, $template, $data) = @_;
+ my $path_prefix = $data->{path_prefix};
+ if (not defined($path_prefix)) {
+ $path_prefix = '';
+ if (index($file, '/') > -1) {
+ $path_prefix = '../' x ($file =~ tr|/||);
+ }
+ }
+ $data->{version} ||= $LINTIAN_VERSION;
+ $data->{timestamp} ||= $timestamp;
+ $data->{by_version} ||= \&versions_comparator;
+ $data->{path_prefix} ||= $path_prefix;
+ $data->{html_quote} ||= \&html_quote;
+ $data->{resource_path} ||= sub {
+ return $path_prefix . $RESOURCE_MANAGER->resource_URL($_[0]);
+ };
+ $data->{resource_integrity} ||= sub {
+ return $RESOURCE_MANAGER->resource_integrity_value($_[0]);
+ };
+ $data->{head} ||= sub {
+ $templates{head}->fill_in(
+ HASH => {
+ page_title => $_[0],
+ config_vars => $TEMPLATE_CONFIG_VARS,
+ %{$data},
+ }) or die "Filling out head of $file: $Text::Template::ERROR\n";
+ };
+ $data->{foot} ||= sub {
+ $templates{foot}->fill_in(
+ HASH => {
+ config_vars => $TEMPLATE_CONFIG_VARS,
+ %{$data},
+ }) or die "Filling out footer of $file: $Text::Template::ERROR\n";
+ };
+ $data->{config_vars} ||= $TEMPLATE_CONFIG_VARS;
+ open(my $fd, '>:encoding(UTF-8)', "$HTML_TMP_DIR/$file");
+ $template->fill_in(OUTPUT => $fd, HASH => $data)
+ or die "filling out $file failed: $Text::Template::ERROR\n";
+ close($fd);
+ return;
+}
+
+# Sort function for sorting lists of tags. Sort by package, version, component,
+# type, tag, and then any extra data. This will produce the best HTML output.
+#
+# Note that source tags must come before all other tags, hence the "unfair"
+# priority for those. This is because the first tags listed are assumed to
+# be source package tags.
+sub by_tag {
+ my $a_pi = $a->{pkg_info};
+ my $b_pi = $b->{pkg_info};
+ if ($a_pi->{type} ne $b_pi->{type}) {
+ return -1 if $a_pi->{type} eq 'source';
+ return 1 if $b_pi->{type} eq 'source';
+ }
+ return
+ $a_pi->{package} cmp $b_pi->{package}
+ || $a_pi->{version} cmp $b_pi->{version}
+ || $a_pi->{component} cmp $b_pi->{component}
+ || $a_pi->{type} cmp $b_pi->{type}
+ || $a->{tag_info}->name cmp $b->{tag_info}->name
+ || $a->{extra} cmp $b->{extra};
+}
+
+=item split_tag
+
+=cut
+
+{
+ # Matches something like: (1:2.0-3) [arch1 arch2]
+ # - captures the version and the architectures
+ my $verarchre = qr,(?: \s* \(( [^)]++ )\) \s* \[ ( [^]]++ ) \]),xo;
+ # ^^^^^^^^ ^^^^^^^^^^^^
+ # ( version ) [architecture ]
+
+ # matches the full deal:
+ # 1 222 3333 4444444 5555 666 777
+ # - T: pkg type (version) [arch]: tag [...]
+ # ^^^^^^^^^^^^^^^^^^^^^
+ # Where the marked part(s) are optional values. The numbers above
+ # the example are the capture groups.
+ my $TAG_REGEX
+ = qr/([EWIXOPC]): (\S+)(?: (\S+)(?:$verarchre)?)?: (\S+)(?:\s+(.*))?/;
+
+ sub split_tag {
+ my ($tag_input) = @_;
+ my $pkg_type;
+ return unless $tag_input =~ /^${TAG_REGEX}$/;
+ # default value...
+ $pkg_type = $3//'binary';
+ return ($1, $2, $pkg_type, $4, $5, $6, $7);
+ }
+}
+
+1;
+
+# Local Variables:
+# indent-tabs-mode: nil
+# cperl-indent-level: 4
+# End:
+# vim: syntax=perl sw=4 sts=4 sr et
diff --git a/commands/reporting-lintian-harness.pm b/commands/reporting-lintian-harness.pm
new file mode 100644
index 0000000..afca58d
--- /dev/null
+++ b/commands/reporting-lintian-harness.pm
@@ -0,0 +1,603 @@
+#!/usr/bin/perl -w
+
+# Lintian reporting harness -- Run lintian against an archive mirror
+#
+# Copyright (C) 2015 Niels Thykier
+# Copyright (C) 2018 Chris Lamb <lamby@debian.org>
+#
+# Based on "reporting/harness", which was:
+# Copyright (C) 1998 Christian Schwarz and Richard Braakman
+#
+# This program is free software. It is distributed under the terms of
+# the GNU General Public License as published by the Free Software
+# Foundation; either version 2 of the License, or (at your option) any
+# later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, you can find it on the World Wide
+# Web at https://www.gnu.org/copyleft/gpl.html, or write to the Free
+# Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
+# MA 02110-1301, USA.
+
+package reporting_lintian_harness;
+
+use v5.20;
+use warnings;
+use utf8;
+use autodie;
+
+use constant BACKLOG_PROCESSING_TIME_LIMIT => 4 * 3600; # 4hours
+
+use Fcntl qw(F_GETFD F_SETFD FD_CLOEXEC SEEK_END);
+use File::Basename qw(basename);
+use File::Temp qw(tempfile);
+use Getopt::Long();
+use IO::Async::Loop;
+use IO::Async::Process;
+use List::MoreUtils qw(first_index);
+use POSIX qw(strftime);
+
+use Lintian::IO::Async qw(safe_qx);
+use Lintian::Reporting::Util
+ qw(find_backlog load_state_cache save_state_cache);
+
+use constant EMPTY => q{};
+use constant COLON => q{:};
+use constant NEWLINE => qq{\n};
+
+my (@LINTIAN_CMD, $LINTIAN_VERSION);
+
+my @REQUIRED_PARAMETERS = qw(
+ lintian-log-dir
+ schedule-chunk-size
+ schedule-limit-groups
+ state-dir
+);
+my %OPT = ('lintian-frontend' => 'lintian',);
+my %OPT_HASH = (
+ 'schedule-chunk-size=i' => \$OPT{'schedule-chunk-size'},
+ 'schedule-limit-groups=i' => \$OPT{'schedule-limit-groups'},
+ 'state-dir=s' => \$OPT{'state-dir'},
+ 'lintian-frontend=s' => \$OPT{'lintian-frontend'},
+ 'lintian-log-dir=s' => \$OPT{'lintian-log-dir'},
+ 'lintian-scratch-space=s' => \$OPT{'lintian-scratch-space'},
+ 'help|h' => \&usage,
+);
+
+sub main {
+ STDOUT->autoflush;
+ Getopt::Long::config('bundling', 'no_getopt_compat', 'no_auto_abbrev');
+ Getopt::Long::GetOptions(%OPT_HASH) or die("error parsing options\n");
+ check_parameters();
+ $LINTIAN_VERSION= safe_qx($OPT{'lintian-frontend'}, '--print-version');
+ chomp($LINTIAN_VERSION);
+ prepare_lintian_environment_and_cmdline();
+ exit(harness_lintian());
+}
+
+### END OF SCRIPT -- below are helper subroutines ###
+
+=item untaint(VALUE)
+
+Untaint VALUE
+
+=cut
+
+sub untaint {
+ return $_[0] = $1 if $_[0] =~ m/^(.*)$/;
+ return;
+}
+
+sub check_parameters {
+ for my $parameter (@REQUIRED_PARAMETERS) {
+ if (not defined($OPT{$parameter})) {
+ die( "Missing required parameter \"--${parameter}\""
+ . "(use --help for more info)\n");
+ }
+ }
+ if (-d $OPT{'state-dir'}) {
+ untaint($OPT{'state-dir'});
+ } else {
+ die("The --state-dir parameter must point to an existing directory\n");
+ }
+ die("The argument for --schedule-limit-groups must be an > 0\n")
+ if $OPT{'schedule-limit-groups'} < 1;
+
+ return;
+}
+
+sub prepare_lintian_environment_and_cmdline {
+ my $frontend = 'lintian';
+ my $eoa_marker_index = first_index { $_ eq '--' } @ARGV;
+ my $logs_dir = $OPT{'lintian-log-dir'};
+ my @overridable_args = (qw(-EL +>=classification --show-overrides));
+ my @args = (
+ qw(--verbose), # We rely on this for filtering the log
+ qw(--exp-output=format=fullewi --packages-from-file -),
+ qw(--perf-debug --perf-output),
+ "+${logs_dir}/lintian-perf.log",
+ );
+ $frontend = $OPT{'lintian-frontend'} if ($OPT{'lintian-frontend'});
+
+ if ($eoa_marker_index > -1) {
+ # Move known "non-parameters" and the "--" behind our arguments.
+ # It is a misfeature, but at least it does not break
+ # our code. NB: It requires *two* "--" on the command-line to
+ # trigger this case.
+ push(@args, splice(@ARGV, $eoa_marker_index));
+ }
+ # Put "our" arguments after user supplied ones
+ @LINTIAN_CMD = ($frontend, @overridable_args, @ARGV, @args);
+
+ # The environment part
+ for my $key (keys(%ENV)) {
+ delete($ENV{$key}) if $key =~ m/^LINTIAN_/;
+ }
+ if ($OPT{'lintian-scratch-space'}) {
+ $ENV{'TMPDIR'} = $OPT{'lintian-scratch-space'};
+ log_msg("Setting TMPDIR to $ENV{'TMPDIR'}");
+ } else {
+ log_msg('Leaving TMPDIR unset (no --lintian-scratch-space');
+ }
+ return;
+}
+
+sub log_msg {
+ my ($msg) = @_;
+ my $ts = strftime('[%FT%T]: ', localtime());
+ print $ts, $msg, "\n";
+ return;
+}
+
+sub harness_lintian {
+ my (@worklist);
+ my $exit_code = 0;
+ my $state = load_state_cache($OPT{'state-dir'});
+ my $lintian_log_dir = $OPT{'lintian-log-dir'};
+ my $lintian_log = "${lintian_log_dir}/lintian.log";
+ log_msg('Update complete, loading current state information');
+
+ @worklist = find_backlog($LINTIAN_VERSION, $state);
+
+ # Always update the log if it exists, as we may have removed
+ # some entries.
+ if (-f $lintian_log) {
+ my $filter = generate_log_filter($state, {});
+
+ # update lintian.log
+ log_msg('Updating lintian.log...');
+ rewrite_lintian_log($filter);
+ }
+
+ log_msg('');
+
+ if (not @worklist) {
+ log_msg('Skipping Lintian run - nothing to do...');
+ } else {
+ log_msg('Processing backlog...');
+ if (@worklist > $OPT{'schedule-limit-groups'}) {
+ log_msg(
+ "Truncating worklist to size $OPT{'schedule-limit-groups'}"
+ . ' from '
+ . (scalar(@worklist)));
+ @worklist = splice(@worklist, 0, $OPT{'schedule-limit-groups'});
+ }
+ $exit_code= process_worklist(\@worklist, $state, $lintian_log_dir);
+ }
+ return $exit_code;
+}
+
+sub process_worklist {
+ my ($worklist_ref, $state, $lintian_log_dir) = @_;
+ my $round = 0;
+ my $rounds = 1;
+ my @worklist = @{$worklist_ref};
+ my $exit_code = 0;
+ my $schedule_chunk_size = $OPT{'schedule-chunk-size'};
+ my $start_time = time();
+
+ if ($schedule_chunk_size > 0) {
+ # compute the number of rounds needed.
+ my $size_up = scalar @worklist + ($schedule_chunk_size - 1);
+ $rounds = int($size_up / $schedule_chunk_size);
+ }
+
+ log_msg(
+ sprintf(
+ 'Groups to process %d will take %d round(s) [round limit: %s]',
+ scalar @worklist,
+ $rounds,$schedule_chunk_size > 0 ? $schedule_chunk_size : 'none'
+ ));
+
+ log_msg('Command line used: ' . join(q{ }, @LINTIAN_CMD));
+ while (@worklist) {
+ my $len = scalar @worklist;
+ my (@work_splice, @completed, %processed, %errors);
+ my ($status_fd, $lint_status_out);
+ my $got_alarm = 0;
+
+ # Bail if there is less than 5 minutes left
+ if (time() >= $start_time + BACKLOG_PROCESSING_TIME_LIMIT - 300) {
+ log_msg('No more time for processing backlogs');
+ $exit_code = 2;
+ last;
+ }
+
+ $round++;
+ # correct bounds to fit chunk size
+ if ($schedule_chunk_size > 0 and $len > $schedule_chunk_size) {
+ $len = $schedule_chunk_size;
+ }
+
+ # Sort @work_splice to have the "Range:"-line below produce
+ # reasonable output.
+ @work_splice = sort(splice(@worklist, 0, $len));
+
+ log_msg("Running Lintian (round $round/$rounds) ...");
+ if ($len == 1) {
+ log_msg(' - Single group: ' . $work_splice[0]);
+ } else {
+ log_msg(' - Range: GROUP:'
+ . $work_splice[0]
+ . q{ ... GROUP:}
+ . $work_splice[-1]);
+ }
+
+ next if ($OPT{'dry-run'});
+
+ pipe($status_fd, $lint_status_out);
+ my ($nfd, $new_lintian_log)
+ = tempfile('lintian.log-XXXXXXX', DIR => $lintian_log_dir);
+ # We do not mind if anyone reads the lintian log as it is being written
+ chmod(0644, $nfd);
+ log_msg("New lintian log at $new_lintian_log");
+
+ my $loop = IO::Async::Loop->new;
+ my $future = $loop->new_future;
+ my $signalled_lintian = 0;
+
+ push(@LINTIAN_CMD, '--status-log', '&3');
+ my $process = IO::Async::Process->new(
+ command => [@LINTIAN_CMD],
+ stdin => { via => 'pipe_write' },
+ stdout => { via => 'pipe_read' },
+ stderr => { via => 'pipe_read' },
+ fd3 => { via => 'pipe_read' },
+ on_finish => sub {
+ my ($self, $exitcode) = @_;
+ my $status = ($exitcode >> 8);
+ my $signal = ($exitcode & 0xff);
+
+ if ($signal) {
+ log_msg("Lintian terminated by signal: $signal");
+ # If someone is sending us signals (e.g. SIGINT/Ctrl-C)
+ # don't start the next round.
+ log_msg(' - skipping the rest of the worklist');
+ @worklist = ();
+ $future->fail(
+ "Command @LINTIAN_CMD received signal $signal");
+ return;
+ }
+
+ if ($status == 0 || $status == 1) {
+ # exit 1 (policy violations) happens all the time (sadly)
+ # exit 2 (broken packages) also happens all the time...
+ log_msg('Lintian finished successfully');
+ $future->done("Done with @LINTIAN_CMD");
+ return;
+ }
+
+ log_msg("warning: executing lintian returned status $status");
+ if ($got_alarm) {
+ # Ideally, lintian would always die by the signal
+ # but some times it catches it and terminates
+ # "normally"
+ log_msg('Stopped by a signal or time out');
+ log_msg(' - skipping the rest of the worklist');
+ @worklist = ();
+ }
+
+ $future->fail("Error status $status from @LINTIAN_CMD");
+ return;
+ });
+
+ $process->stdout->configure(
+ on_read => sub {
+ my ($stream, $buffref, $eof) = @_;
+
+ if (length $$buffref) {
+ print {$nfd} $$buffref;
+ $$buffref = EMPTY;
+ }
+
+ close($nfd)
+ if $eof;
+
+ return 0;
+ },
+ );
+
+ $process->stderr->configure(
+ on_read => sub {
+ my ($stream, $buffref, $eof) = @_;
+
+ if (length $$buffref) {
+ print STDOUT $$buffref;
+ $$buffref = EMPTY;
+ }
+
+ return 0;
+ },
+ );
+
+ $process->fd3->configure(
+ on_read => sub {
+ my ($stream, $buffref, $eof) = @_;
+
+ while($$buffref =~ s/^(.*)\n//) {
+ my $line = $1;
+
+ # listen to status updates from lintian
+ if ($line =~ m/^complete ([^ ]+) \(([^\)]+)\)$/) {
+ my ($group_id, $runtime) = ($1, $2);
+ push(@completed, $group_id);
+ $processed{$group_id} = 1;
+ log_msg(" [lintian] processed $group_id"
+ . " successfully (time: $runtime)");
+ } elsif ($line =~ m/^error ([^ ]+) \(([^\)]+)\)$/) {
+ my ($group_id, $runtime) = ($1, $2);
+ log_msg(" [lintian] error processing $group_id "
+ . "(time: $runtime)");
+ $processed{$group_id} = 1;
+ # We ignore errors if we sent lintian a signal to avoid
+ # *some* false-positives.
+ $errors{$group_id} = 1 if not $signalled_lintian;
+ } elsif ($line =~ m/^ack-signal (SIG\S+)$/) {
+ my $signal = $1;
+ log_msg(
+"Signal $signal acknowledged: disabled timed alarms"
+ );
+ alarm(0);
+ }
+ }
+
+ alarm(0)
+ if $eof;
+
+ return 0;
+ },
+ );
+
+ $loop->add($process);
+
+ my $groups = $state->{'groups'};
+ # Submit the tasks to Lintian
+ foreach my $group_id (@work_splice) {
+ my $members;
+ if (not exists($groups->{$group_id})) {
+ # Sanity check (can in theory happen if an external process
+ # modifies the state cache and we have reloaded it)
+ log_msg(
+ "Group ${group_id} disappeared before we could schedule it"
+ );
+ next;
+ }
+ $members = $groups->{$group_id}{'members'};
+ for my $member_id (sort(keys(%{${members}}))) {
+ my $path = $members->{$member_id}{'path'};
+ $process->stdin->write($path . NEWLINE);
+ }
+ }
+ $process->stdin->close_when_empty;
+
+ eval {
+ my $time_limit
+ = $start_time + BACKLOG_PROCESSING_TIME_LIMIT - time();
+ my $count = 0;
+ my $sig_handler = sub {
+ my ($signal_name) = @_;
+ $signalled_lintian = 1;
+ $count++;
+ if ($signal_name eq 'ALRM') {
+ $got_alarm = 1 if $got_alarm >= 0;
+ } else {
+ $got_alarm = -1;
+ }
+ if ($count < 3) {
+ my $pid = $process->pid;
+ log_msg("Received SIG${signal_name}, "
+ . "sending SIGTERM to $pid [${count}/3]");
+ $process->kill('TERM');
+ if ($signal_name eq 'ALRM') {
+ log_msg(
+ 'Scheduling another alarm in 5 minutes from now...'
+ );
+ alarm(300);
+ }
+ } else {
+ my $pid = $process->pid;
+ log_msg("Received SIG${signal_name} as the third one, "
+ . "sending SIGKILL to $pid");
+ log_msg('You may have to clean up some '
+ . 'temporary directories manually');
+ $process->kill('KILL');
+ }
+ };
+ local $SIG{'TERM'} = $sig_handler;
+ local $SIG{'INT'} = $sig_handler;
+ local $SIG{'ALRM'} = $sig_handler;
+
+ alarm($time_limit);
+ };
+
+ # Wait for lintian to terminate
+ $future->await;
+
+ if ($got_alarm) {
+ if ($got_alarm == 1) {
+ # Lintian was (presumably) killed due to a
+ # time-out from this process
+ $exit_code = 2;
+ } else {
+ # Lintian was killed by another signal; notify
+ # harness that it should skip the rest as well.
+ $exit_code = 3;
+ }
+ }
+
+ log_msg('Updating the lintian log used for reporting');
+ my $filter = generate_log_filter($state, \%processed);
+ seek($nfd, 0, SEEK_END);
+ update_lintian_log($filter, $nfd, $new_lintian_log);
+
+ log_msg('Updating harness state cache');
+ # Reload the state cache, just in case it was modified by an external
+ # process during the lintian run.
+ $state = load_state_cache($OPT{'state-dir'});
+ for my $group_id (@completed) {
+ my $group_data;
+ # In theory, they can disappear - in practise, that requires
+ # an external call to (e.g.) lintian reporting-sync-state.
+ next if not exists($state->{'groups'}{$group_id});
+ $group_data = $state->{'groups'}{$group_id};
+ $group_data->{'last-processed-by'} = $LINTIAN_VERSION;
+ delete($group_data->{'out-of-date'});
+ # Always clear the error counter after a successful run.
+ delete($group_data->{'processing-errors'});
+ delete($group_data->{'last-error-by'});
+ }
+ for my $group_id (sort(keys(%errors))) {
+ my $group_data;
+ # In theory, they can disappear - in practise, that requires
+ # an external call to (e.g.) lintian reporting-sync-state.
+ next if not exists($state->{'groups'}{$group_id});
+ $group_data = $state->{'groups'}{$group_id};
+ if ($errors{$group_id}) {
+ if (not exists($group_data->{'last-error-by'})
+ or $group_data->{'last-error-by'} ne $LINTIAN_VERSION) {
+ # If it is a new lintian version then (re)set the counter
+ # to 1. Case also triggers for the very first issue.
+ $group_data->{'processing-errors'} = 1;
+ } else {
+ # Repeated error with the same version
+ ++$group_data->{'processing-errors'};
+ }
+ # Set the "last-error-by" flag so we can clear the
+ # error if there is a new version of lintian.
+ $group_data->{'last-error-by'} = $LINTIAN_VERSION;
+ } else {
+ delete($group_data->{'processing-errors'});
+ }
+ }
+ save_state_cache($OPT{'state-dir'}, $state);
+ last if $exit_code;
+ }
+ return $exit_code;
+}
+
+sub generate_log_filter {
+ my ($state, $exclude) = @_;
+ my %filter;
+ my $group_map = $state->{'groups'};
+ for my $group_id (keys(%{${group_map}})) {
+ my $members;
+ next if exists($exclude->{$group_id});
+ $members = $group_map->{$group_id}{'members'};
+ for my $member_id (keys(%{$members})) {
+ $filter{$member_id} = 1;
+ }
+ }
+ return \%filter;
+}
+
+sub update_lintian_log {
+ my ($keep_filter, $new_fd, $tmp_path) = @_;
+ my $lintian_log_dir = $OPT{'lintian-log-dir'};
+ my $lintian_log = "${lintian_log_dir}/lintian.log";
+ my $copy_mode = 0;
+ my $first = 1;
+
+ eval {
+ open(my $input, '<', $lintian_log);
+ while (<$input>) {
+ if (
+ m/^N: [ ] Processing [ ] (binary|udeb|source) [ ]
+ package [ ] (\S+) [ ] \(version [ ] (\S+), [ ]
+ arch [ ] (\S+)\)[ ]\.\.\./xsm
+ ) {
+ my ($type, $pkg, $ver, $arch) = ($1,$2, $3, $4);
+ my $k = "$type:$pkg/$ver";
+ $k .= "/$arch" if $type ne 'source';
+ $copy_mode = 0;
+ $copy_mode = 1 if exists($keep_filter->{$k});
+ }
+ if ($copy_mode) {
+ if ($first) {
+ print {$new_fd} "N: ---start-of-old-lintian-log-file---\n";
+ $first = 0;
+ }
+ print {$new_fd} $_;
+ }
+ }
+ close($input);
+ close($new_fd);
+ rename($tmp_path, $lintian_log);
+ };
+ if (my $err = $@) {
+ # Unlink $new_lintian_log, we ignore errors as the one we
+ # already got is more important/interesting.
+ no autodie qw(unlink);
+ unlink($tmp_path) or warn("Cannot unlink $tmp_path: $!");
+ die($err);
+ }
+ return;
+}
+
+sub rewrite_lintian_log {
+ my ($keep_filter) = @_;
+ my $lintian_log_dir = $OPT{'lintian-log-dir'};
+ my ($nfd, $new_lintian_log);
+
+ ($nfd, $new_lintian_log)
+ = tempfile('lintian.log-XXXXXXX', DIR => $lintian_log_dir);
+ chmod(0644, $nfd);
+ update_lintian_log($keep_filter, $nfd, $new_lintian_log);
+ return 1;
+}
+
+sub usage {
+ my $cmd = basename($0);
+ my $me = "lintian $cmd";
+ print <<EOF;
+Internal command for the Lintian reporting framework
+Usage: $me <args> -- <extra lintian args>
+
+ --help Show this text and exit
+
+ --lintian-frontend PROG Use PROG as frontend for lintian (defaults to "lintian")
+ --lintian-log-dir DIR Path to the harness log dir. [!]
+ --lintian-scratch-space DIR Use DIR for temporary files (notably temp labs)
+ --schedule-chunk-size N Run at most N groups in a given lintian run.
+ --schedule-limit-groups N Schedule at most N groups in total. [!]
+ --state-dir DIR Directory containing the state cache (must be
+ writable). [!]
+
+Arguments marked with [!] are required for a successful run.
+EOF
+
+ exit(0);
+}
+
+1;
+
+# Local Variables:
+# indent-tabs-mode: nil
+# cperl-indent-level: 4
+# End:
+# vim: syntax=perl sw=4 sts=4 sr et
+
diff --git a/commands/reporting-sync-state.pm b/commands/reporting-sync-state.pm
new file mode 100644
index 0000000..af649a7
--- /dev/null
+++ b/commands/reporting-sync-state.pm
@@ -0,0 +1,572 @@
+#!/usr/bin/perl -w
+#
+# reporting-sync-state
+#
+# Copyright (C) 2018 Chris Lamb <lamby@debian.org>
+#
+# This program is free software. It is distributed under the terms of
+# the GNU General Public License as published by the Free Software
+# Foundation; either version 2 of the License, or (at your option) any
+# later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, you can find it on the World Wide
+# Web at https://www.gnu.org/copyleft/gpl.html, or write to the Free
+# Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
+# MA 02110-1301, USA.
+
+package reporting_sync_state;
+
+use v5.20;
+use warnings;
+use utf8;
+use autodie;
+
+use Getopt::Long();
+use File::Basename qw(basename);
+use YAML::XS ();
+use MIME::Base64 qw(encode_base64);
+
+use Lintian::Deb822::Parser qw(visit_dpkg_paragraph_string);
+use Lintian::Relation::Version qw(versions_comparator);
+use Lintian::Reporting::Util qw(
+ find_backlog
+ load_state_cache
+ save_state_cache
+);
+use Lintian::Util qw(
+ open_gz
+);
+
+my $DEFAULT_CHECKSUM = 'Sha256';
+my (%KNOWN_MEMBERS, %ACTIVE_GROUPS);
+my $CONFIG;
+my %OPT;
+my %OPT_HASH= (
+ 'reporting-config=s'=> \$OPT{'reporting-config'},
+ 'desired-version=s' => \$OPT{'desired-version'},
+ 'reschedule-all' => \$OPT{'reschedule-all'},
+ 'help|h' => \&usage,
+ 'debug|d' => \$OPT{'debug'},
+ 'dry-run' => \$OPT{'dry-run'},
+);
+
+sub check_parameters {
+ for my $parameter (qw(reporting-config desired-version)) {
+ if (not defined($OPT{$parameter})) {
+ die( "Missing required parameter \"--${parameter}\""
+ . "(use --help for more info)\n");
+ }
+ }
+ if (not $OPT{'reporting-config'} or not -f $OPT{'reporting-config'}) {
+ die("The --reporting-config parameter must point to an existing file\n"
+ );
+ }
+ return;
+}
+
+sub required_cfg_value {
+ my (@keys) = @_;
+ my $v = $CONFIG;
+ for my $key (@keys) {
+ if (not exists($v->{$key})) {
+ my $k = join('.', @keys);
+ die("Missing required config parameter: ${k}\n");
+ }
+ $v = $v->{$key};
+ }
+ return $v;
+}
+
+sub required_cfg_non_empty_list_value {
+ my (@keys) = @_;
+ my $v = required_cfg_value(@keys);
+ if (not defined($v) or ref($v) ne 'ARRAY' or scalar(@{$v}) < 1) {
+ my $k = join('.', @keys);
+ die("Invalid configuration: ${k} must be a non-empty list\n");
+ }
+ return $v;
+}
+
+sub optional_cfg_list_value {
+ my (@keys) = @_;
+ my $v = $CONFIG;
+ for my $key (@keys) {
+ if (not exists($v->{$key})) {
+ return [];
+ }
+ $v = $v->{$key};
+ }
+ if (ref($v) ne 'ARRAY') {
+ my $k = join('.', @keys);
+ die("Invalid configuration: ${k} must be a list (or missing)\n");
+ }
+ return $v;
+}
+
+sub main {
+ my ($state_dir, $state, $archives, %blacklist);
+ STDOUT->autoflush;
+ Getopt::Long::config('bundling', 'no_getopt_compat', 'no_auto_abbrev');
+ Getopt::Long::GetOptions(%OPT_HASH) or die("error parsing options\n");
+ check_parameters();
+ $CONFIG = YAML::XS::LoadFile($OPT{'reporting-config'});
+ $state_dir = required_cfg_value('storage', 'state-cache');
+ $state = load_state_cache($state_dir);
+
+ if (upgrade_state_cache_if_needed($state)) {
+ log_debug('Updated the state cache');
+ }
+ log_debug('Initial state had '
+ . (scalar(keys(%{$state->{'groups'}})))
+ . ' groups');
+ $archives = required_cfg_value('archives');
+ %blacklist = map { $_ => 1 } @{optional_cfg_list_value('blacklist')};
+ for my $archive (sort(keys(%{$archives}))) {
+ log_debug("Processing archive $archive");
+ my $path = required_cfg_value('archives', $archive, 'base-dir');
+ my $archs = required_cfg_non_empty_list_value('archives', $archive,
+ 'architectures');
+ my $components
+ = required_cfg_non_empty_list_value('archives', $archive,
+ 'components');
+ my $distributions
+ = required_cfg_non_empty_list_value('archives', $archive,
+ 'distributions');
+ local_mirror_manifests($state, $path, $distributions, $components,
+ $archs, \%blacklist);
+ }
+
+ cleanup_state($state);
+ if (not $OPT{'dry-run'}) {
+ save_state_cache($state_dir, $state);
+ }
+ exit(0);
+}
+
+# State:
+# group-id => {
+# 'last-processed-by' => <version or undef>,
+# 'out-of-date' => <0|1>, (# if omitted => 0, unless "last-processed-by" is omitted as well)
+# 'members' => {
+# $member_id => {
+# 'sha1' => <sha1>,
+# 'path' => <path/relative/to/mirror>,
+# }
+# },
+
+sub upgrade_state_cache_if_needed {
+ my ($state) = @_;
+ if (exists($state->{'groups'})) {
+ my $updated = 0;
+ my $groups = $state->{'groups'};
+ for my $group (sort(keys(%{$groups}))) {
+ my $group_data = $groups->{$group};
+ if ( exists($group_data->{'mirror-metadata'})
+ && exists($group_data->{'mirror-metadata'}{'area'})) {
+ if (not exists($group_data->{'mirror-metadata'}{'component'})){
+ $group_data->{'mirror-metadata'}{'component'}
+ = $group_data->{'mirror-metadata'}{'area'};
+ delete($group_data->{'mirror-metadata'}{'area'});
+ $updated = 1;
+ }
+ }
+ }
+ return $updated;
+ }
+ # Migrate the "last-processed-by" version.
+ my $groups = $state->{'groups'} = {};
+ for my $key (sort(keys(%${state}))) {
+ next if $key eq 'group';
+ if (exists($state->{$key}{'last-processed-by'})) {
+ my $last_version = $state->{$key}{'last-processed-by'};
+ delete($state->{$key});
+ $groups->{$key}{'last-processed-by'} = $last_version;
+ }
+ }
+ return 1;
+}
+
+sub add_member_to_group {
+ my ($state, $group_id, $member_id, $member_data, $group_metadata) = @_;
+ # Fetch members before group_data (relying on autovivification)
+ my $members = $state->{'groups'}{$group_id}{'members'};
+ my $group_data = $state->{'groups'}{$group_id};
+ my $member;
+ my $new_group = 0;
+ if (not defined($members)) {
+ $group_data->{'members'} = $members = {};
+ log_debug(
+ "${group_id} is out-of-date: New group (triggered by ${member_id})"
+ );
+ $new_group = 1;
+ }
+
+ $member = $members->{$member_id};
+ if (not defined($member)) {
+ $members->{$member_id} = $member = {};
+ }
+
+ # Update of path is not sufficient to consider the member out of date
+ # (mirror restructuring happens - although extremely rarely)
+ $member->{'path'} = $member_data->{'path'};
+ if ($member_data->{'mirror-metadata'}
+ && keys(%{$member_data->{'mirror-metadata'}})) {
+ $member->{'mirror-metadata'} = $member_data->{'mirror-metadata'};
+ }
+ if (not exists($group_data->{'mirror-metadata'})) {
+ $group_data->{'mirror-metadata'}= $group_metadata->{'mirror-metadata'};
+ } else {
+ for my $key (keys(%{$group_metadata->{'mirror-metadata'}})) {
+ $group_data->{'mirror-metadata'}{$key}
+ = $group_metadata->{'mirror-metadata'}{$key};
+ }
+ }
+ $KNOWN_MEMBERS{"${group_id} ${member_id}"} = 1;
+ $ACTIVE_GROUPS{$group_id} = 1;
+
+ if (!exists($member->{lc $DEFAULT_CHECKSUM})
+ || $member->{lc $DEFAULT_CHECKSUM} ne
+ $member_data->{lc $DEFAULT_CHECKSUM}) {
+ if (exists($member->{lc $DEFAULT_CHECKSUM})) {
+ # This seems worth a note even if the group is already out of date
+ my $lowercase = lc $DEFAULT_CHECKSUM;
+ log_debug(
+ "${group_id} is out-of-date: ${member_id} checksum mismatch"
+ . " ($member->{$lowercase} != $member_data->{$lowercase})");
+ } elsif (not $group_data->{'out-of-date'} and not $new_group) {
+ log_debug("${group_id} is out-of-date: New member (${member_id})");
+ }
+ $group_data->{'out-of-date'} = 1;
+ $member->{lc $DEFAULT_CHECKSUM} = $member_data->{lc $DEFAULT_CHECKSUM};
+ }
+ delete($member->{'sha1'});
+
+ return;
+}
+
+sub cleanup_state {
+ my ($state) = @_;
+ my %backlog
+ = map { $_ => 1 } find_backlog($OPT{'desired-version'}, $state);
+
+ # Empty 'members-to-groups' to prune "dead links". It will be
+ # recreated by cleanup_group_state below.
+ $state->{'members-to-groups'} = {};
+
+ for my $group_id (sort(keys(%{$state->{'groups'}}))) {
+ cleanup_group_state($state, $group_id, \%backlog);
+ }
+ return;
+}
+
+sub remove_if_empty {
+ my ($hash_ref, $key) = @_;
+ my ($val, $empty);
+ return if not exists($hash_ref->{$key});
+ $val = $hash_ref->{$key};
+ if (defined($val)) {
+ $empty = 1 if (ref($val) eq 'HASH' and not keys(%${val}));
+ $empty = 1 if (ref($val) eq 'ARRAY' and not scalar(@${val}));
+ } else {
+ $empty = 1;
+ }
+ delete($hash_ref->{$key}) if $empty;
+ return;
+}
+
+sub cleanup_group_state {
+ my ($state, $group_id, $backlog) = @_;
+ my ($members);
+ my $group_data = $state->{'groups'}{$group_id};
+ $members = $group_data->{'members'};
+ if (not exists($ACTIVE_GROUPS{$group_id}) or not $members) {
+ # No members left, remove the group entirely
+ delete($state->{'groups'}{$group_id});
+ if (not exists($ACTIVE_GROUPS{$group_id})) {
+ log_debug("Group ${group_id} dropped: It is not an active group");
+ } else {
+ log_debug("Group ${group_id} dropped: No members left (early)");
+ }
+
+ return;
+ }
+
+ for my $member_id (sort(keys(%{$members}))) {
+ if (not exists($KNOWN_MEMBERS{"${group_id} ${member_id}"})) {
+ delete($members->{$member_id});
+ if (not $group_data->{'out-of-date'}) {
+ $group_data->{'out-of-date'} = 1;
+ log_debug(
+ "${group_id} is out-of-date: ${member_id} disappeared");
+ }
+ } else {
+ my $member_data = $members->{$member_id};
+ # Create "member_id to group_data" link
+ $state->{'members-to-groups'}{$member_id} = $group_data;
+ delete($member_data->{'mirror-metadata'}{'component'})
+ if exists($member_data->{'mirror-metadata'});
+ remove_if_empty($member_data, 'mirror-metadata');
+ }
+ }
+
+ # Add the "out-of-date" flag if it is in the backlog OR we were asked
+ # to reschedule all
+ if (not $group_data->{'out-of-date'}) {
+ if ($OPT{'reschedule-all'} or $backlog->{$group_id}) {
+ $group_data->{'out-of-date'} = 1;
+ log_debug("Marking ${group_id} as out of date: In backlog")
+ if $backlog->{$group_id};
+ }
+ }
+ # Check for and possible clear the error counters
+ if (
+ exists($group_data->{'processing-errors'})
+ and (not exists($group_data->{'last-error-by'})
+ or $group_data->{'last-error-by'} ne $OPT{'desired-version'})
+ ) {
+ log_debug(
+ "Clearing error-counter for ${group_id}: New version of lintian");
+ delete($group_data->{'processing-errors'});
+ # Leave "last-error-by" as we can use that to tell if the previous
+ # version triggered errors.
+ }
+
+ if (not %{$members}) {
+ # No members left, remove the group entirely. This should not happen
+ # as the ACTIVE_GROUPS check above ought to have caught this.
+ delete($state->{$group_id});
+ log_debug("Group ${group_id} dropped: No members left (late)");
+ } else {
+ # remove redundant fields
+ remove_if_empty($group_data, 'out-of-date');
+ for my $metadata_field (qw(component maintainer uploaders)) {
+ remove_if_empty($group_data->{'mirror-metadata'}, $metadata_field);
+ }
+ remove_if_empty($group_data, 'mirror-metadata');
+ }
+
+ return;
+}
+
+# Helper for local_mirror_manifests - it parses a paragraph from Sources file
+sub _parse_srcs_pg {
+ my ($state, $blacklist, $extra_metadata, $paragraph) = @_;
+ my $dir = $paragraph->{'Directory'}//'';
+ my $group_id = $paragraph->{'Package'} . '/' . $paragraph->{'Version'};
+ my $member_id = "source:${group_id}";
+ my (%data, %group_metadata, $group_mirror_md);
+ if (exists $blacklist->{$paragraph->{'Package'}}) {
+ log_debug("Ignoring blacklisted package src:$paragraph->{'Package'}");
+ return;
+ }
+ # only include the source if it has any binaries to be checked.
+ # - Otherwise we may end up checking a source with no binaries
+ # (happens if the architecture is "behind" in building)
+ return unless $ACTIVE_GROUPS{$group_id};
+ $dir .= '/' if $dir;
+ foreach my $f (split m/\n/, $paragraph->{"Checksums-${DEFAULT_CHECKSUM}"}){
+
+ # trim both ends
+ $f =~ s/^\s+|\s+$//g;
+
+ next unless $f && $f =~ m/\.dsc$/;
+ my ($checksum, undef, $basename) = split(m/\s++/, $f);
+ my $b64_checksum = encode_base64(pack('H*', $checksum));
+ # $dir should end with a slash if it is non-empty.
+ $data{lc $DEFAULT_CHECKSUM} = $b64_checksum;
+ $data{'path'} = $extra_metadata->{'mirror-dir'} . "/$dir" . $basename;
+ last;
+ }
+
+ $group_mirror_md = $group_metadata{'mirror-metadata'} = {};
+ $group_mirror_md->{'component'} = $extra_metadata->{'component'};
+ $group_mirror_md->{'maintainer'} = $paragraph->{'maintainer'};
+ if (my $uploaders = $paragraph->{'uploaders'}) {
+ my @ulist = split(/>\K\s*,\s*/, $uploaders);
+ $group_mirror_md->{'uploaders'} = \@ulist;
+ }
+
+ add_member_to_group($state, $group_id, $member_id, \%data,
+ \%group_metadata);
+ return;
+}
+
+# Helper for local_mirror_manifests - it parses a paragraph from Packages file
+sub _parse_pkgs_pg {
+ my ($state, $blacklist, $extra_metadata, $type, $paragraph) = @_;
+ my ($group_id, $member_id, %data, %group_metadata, $b64_checksum);
+ my $package = $paragraph->{'Package'};
+ my $version = $paragraph->{'Version'};
+ my $architecture = $paragraph->{'Architecture'};
+ if (not defined($paragraph->{'Source'})) {
+ $paragraph->{'Source'} = $package;
+ } elsif ($paragraph->{'Source'} =~ /^([-+\.\w]+)\s+\((.+)\)$/) {
+ $paragraph->{'Source'} = $1;
+ $paragraph->{'Source-version'} = $2;
+ }
+ if (exists $blacklist->{$paragraph->{'Source'}}) {
+ log_debug("Ignoring binary package $package: it is part of "
+ . "blacklisted source package $paragraph->{'Source'}");
+ return;
+ }
+ if (not defined($paragraph->{'Source-Version'})) {
+ $paragraph->{'Source-Version'} = $paragraph->{'Version'};
+ }
+ $group_id = $paragraph->{'Source'} . '/' . $paragraph->{'Source-Version'};
+ $member_id = "${type}:${package}/${version}/${architecture}";
+ $data{'path'}
+ = $extra_metadata->{'mirror-dir'} . '/' . $paragraph->{'filename'};
+ $b64_checksum
+ = encode_base64(pack('H*', $paragraph->{lc $DEFAULT_CHECKSUM}));
+ $data{lc $DEFAULT_CHECKSUM} = $b64_checksum;
+
+ $group_metadata{'mirror-metadata'}{'maintainer'}
+ = $paragraph->{'Maintainer'};
+ if (my $uploaders = $paragraph->{'Uploaders'}) {
+ my @ulist = split(/>\K\s*,\s*/, $uploaders);
+ $group_metadata{'mirror-metadata'}{'uploaders'} = \@ulist;
+ }
+
+ add_member_to_group($state, $group_id, $member_id, \%data,
+ \%group_metadata);
+ return;
+}
+
+# local_mirror_manifests ($mirdir, $dists, $components, $archs)
+#
+# Returns a list of manifests that represents what is on the local mirror
+# at $mirdir. 3 manifests will be returned, one for "source", one for "binary"
+# and one for "udeb" packages. They are populated based on the "Sources" and
+# "Packages" files.
+#
+# $mirdir - the path to the local mirror
+# $dists - listref of dists to consider (e.g. ['unstable'])
+# $components - listref of components to consider (e.g. ['main', 'contrib', 'non-free'])
+# $archs - listref of archs to consider (e.g. ['i386', 'amd64'])
+# $blacklist - hashref of source packages to ignore (e.g. {'gcc-8-cross-ports' => 1})
+#
+sub local_mirror_manifests {
+ my ($state, $mirdir, $dists, $components, $archs, $blacklist) = @_;
+ foreach my $dist (@$dists) {
+ foreach my $component (@{$components}) {
+ my $srcs = "$mirdir/dists/$dist/$component/source/Sources";
+
+ my %extra_metadata = (
+ 'component' => $component,
+ 'mirror-dir' => $mirdir,
+ );
+
+ # Binaries have a "per arch" file.
+ # - we check those first and then include the source packages that
+ # are referred to by these binaries.
+
+ my $dist_path = "$mirdir/dists/$dist/$component";
+ for my $arch (@{$archs}) {
+
+ my $pkgs = "${dist_path}/binary-$arch/Packages";
+ my $pkgfd = _open_data_file($pkgs);
+ local $/ = undef;
+ my $pkgstring = <$pkgfd>;
+ close $pkgfd;
+
+ my $binsub = sub {
+ _parse_pkgs_pg($state, $blacklist, \%extra_metadata,
+ 'binary', @_);
+ };
+ visit_dpkg_paragraph_string($binsub, $pkgstring);
+
+ my $upkgs
+ = "${dist_path}/debian-installer/binary-$arch/Packages";
+ my $upkgfd = _open_data_file($upkgs);
+ local $/ = undef;
+ my $upkgstring = <$upkgfd>;
+ close $upkgfd;
+
+ my $udebsub = sub {
+ _parse_pkgs_pg($state, $blacklist, \%extra_metadata,
+ 'udeb', @_);
+ };
+ visit_dpkg_paragraph_string($udebsub, $upkgstring);
+ }
+
+ my $srcfd = _open_data_file($srcs);
+ local $/ = undef;
+ my $srcstring = <$srcfd>;
+ close $srcfd;
+
+ my $srcsub
+ = sub { _parse_srcs_pg($state, $blacklist, \%extra_metadata, @_) };
+ visit_dpkg_paragraph_string($srcsub, $srcstring);
+ }
+ }
+ return;
+}
+
+# _open_data_file ($file)
+#
+# Opens $file if it exists, otherwise it tries common extensions (i.e. .gz) and opens
+# that instead. It may pipe the file through an external decompressor, so the returned
+# file descriptor cannot be assumed to be a file.
+#
+# If $file does not exists and no common extensions are found, this dies. It may also
+# die if it finds a file, but is unable to open it.
+sub _open_data_file {
+ my ($file) = @_;
+ if (-e $file) {
+ open(my $fd, '<:encoding(UTF-8)', $file);
+ return $fd;
+ }
+ if (-e "${file}.gz") {
+ my $fd = open_gz("${file}.gz");
+ binmode($fd, 'encoding(UTF-8)');
+ return $fd;
+ }
+ if (-e "${file}.xz") {
+ open(my $fd, '-|', 'xz', '-dc', "${file}.xz");
+ binmode($fd, 'encoding(UTF-8)');
+ return $fd;
+ }
+ die("Cannot find ${file}: file does not exist");
+}
+
+sub log_debug {
+ if ($OPT{'debug'}) {
+ print "$_[0]\n";
+ }
+ return;
+}
+
+sub usage {
+ my $cmd = basename($0);
+ my $me = "lintian $cmd";
+ print <<EOF;
+Internal command for the Lintian reporting framework
+Usage: $me <args>
+
+ --help Show this text and exit
+ --debug Show/log debugging output
+
+ --reporting-config FILE
+ Path to the configuration file (listing the archive definitions) [!]
+ --desired-version X The desired "last-processed-by" Lintian version. [!]
+
+Arguments marked with [!] are required for a successful run.
+
+EOF
+
+ exit(0);
+}
+
+1;
+
+# Local Variables:
+# indent-tabs-mode: nil
+# cperl-indent-level: 4
+# End:
+# vim: syntax=perl sw=4 sts=4 sr et