diff options
Diffstat (limited to 'commands/reporting-lintian-harness.pm')
-rw-r--r-- | commands/reporting-lintian-harness.pm | 603 |
1 files changed, 603 insertions, 0 deletions
diff --git a/commands/reporting-lintian-harness.pm b/commands/reporting-lintian-harness.pm new file mode 100644 index 0000000..afca58d --- /dev/null +++ b/commands/reporting-lintian-harness.pm @@ -0,0 +1,603 @@ +#!/usr/bin/perl -w + +# Lintian reporting harness -- Run lintian against an archive mirror +# +# Copyright (C) 2015 Niels Thykier +# Copyright (C) 2018 Chris Lamb <lamby@debian.org> +# +# Based on "reporting/harness", which was: +# Copyright (C) 1998 Christian Schwarz and Richard Braakman +# +# This program is free software. It is distributed under the terms of +# the GNU General Public License as published by the Free Software +# Foundation; either version 2 of the License, or (at your option) any +# later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, you can find it on the World Wide +# Web at https://www.gnu.org/copyleft/gpl.html, or write to the Free +# Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, +# MA 02110-1301, USA. + +package reporting_lintian_harness; + +use v5.20; +use warnings; +use utf8; +use autodie; + +use constant BACKLOG_PROCESSING_TIME_LIMIT => 4 * 3600; # 4hours + +use Fcntl qw(F_GETFD F_SETFD FD_CLOEXEC SEEK_END); +use File::Basename qw(basename); +use File::Temp qw(tempfile); +use Getopt::Long(); +use IO::Async::Loop; +use IO::Async::Process; +use List::MoreUtils qw(first_index); +use POSIX qw(strftime); + +use Lintian::IO::Async qw(safe_qx); +use Lintian::Reporting::Util + qw(find_backlog load_state_cache save_state_cache); + +use constant EMPTY => q{}; +use constant COLON => q{:}; +use constant NEWLINE => qq{\n}; + +my (@LINTIAN_CMD, $LINTIAN_VERSION); + +my @REQUIRED_PARAMETERS = qw( + lintian-log-dir + schedule-chunk-size + schedule-limit-groups + state-dir +); +my %OPT = ('lintian-frontend' => 'lintian',); +my %OPT_HASH = ( + 'schedule-chunk-size=i' => \$OPT{'schedule-chunk-size'}, + 'schedule-limit-groups=i' => \$OPT{'schedule-limit-groups'}, + 'state-dir=s' => \$OPT{'state-dir'}, + 'lintian-frontend=s' => \$OPT{'lintian-frontend'}, + 'lintian-log-dir=s' => \$OPT{'lintian-log-dir'}, + 'lintian-scratch-space=s' => \$OPT{'lintian-scratch-space'}, + 'help|h' => \&usage, +); + +sub main { + STDOUT->autoflush; + Getopt::Long::config('bundling', 'no_getopt_compat', 'no_auto_abbrev'); + Getopt::Long::GetOptions(%OPT_HASH) or die("error parsing options\n"); + check_parameters(); + $LINTIAN_VERSION= safe_qx($OPT{'lintian-frontend'}, '--print-version'); + chomp($LINTIAN_VERSION); + prepare_lintian_environment_and_cmdline(); + exit(harness_lintian()); +} + +### END OF SCRIPT -- below are helper subroutines ### + +=item untaint(VALUE) + +Untaint VALUE + +=cut + +sub untaint { + return $_[0] = $1 if $_[0] =~ m/^(.*)$/; + return; +} + +sub check_parameters { + for my $parameter (@REQUIRED_PARAMETERS) { + if (not defined($OPT{$parameter})) { + die( "Missing required parameter \"--${parameter}\"" + . "(use --help for more info)\n"); + } + } + if (-d $OPT{'state-dir'}) { + untaint($OPT{'state-dir'}); + } else { + die("The --state-dir parameter must point to an existing directory\n"); + } + die("The argument for --schedule-limit-groups must be an > 0\n") + if $OPT{'schedule-limit-groups'} < 1; + + return; +} + +sub prepare_lintian_environment_and_cmdline { + my $frontend = 'lintian'; + my $eoa_marker_index = first_index { $_ eq '--' } @ARGV; + my $logs_dir = $OPT{'lintian-log-dir'}; + my @overridable_args = (qw(-EL +>=classification --show-overrides)); + my @args = ( + qw(--verbose), # We rely on this for filtering the log + qw(--exp-output=format=fullewi --packages-from-file -), + qw(--perf-debug --perf-output), + "+${logs_dir}/lintian-perf.log", + ); + $frontend = $OPT{'lintian-frontend'} if ($OPT{'lintian-frontend'}); + + if ($eoa_marker_index > -1) { + # Move known "non-parameters" and the "--" behind our arguments. + # It is a misfeature, but at least it does not break + # our code. NB: It requires *two* "--" on the command-line to + # trigger this case. + push(@args, splice(@ARGV, $eoa_marker_index)); + } + # Put "our" arguments after user supplied ones + @LINTIAN_CMD = ($frontend, @overridable_args, @ARGV, @args); + + # The environment part + for my $key (keys(%ENV)) { + delete($ENV{$key}) if $key =~ m/^LINTIAN_/; + } + if ($OPT{'lintian-scratch-space'}) { + $ENV{'TMPDIR'} = $OPT{'lintian-scratch-space'}; + log_msg("Setting TMPDIR to $ENV{'TMPDIR'}"); + } else { + log_msg('Leaving TMPDIR unset (no --lintian-scratch-space'); + } + return; +} + +sub log_msg { + my ($msg) = @_; + my $ts = strftime('[%FT%T]: ', localtime()); + print $ts, $msg, "\n"; + return; +} + +sub harness_lintian { + my (@worklist); + my $exit_code = 0; + my $state = load_state_cache($OPT{'state-dir'}); + my $lintian_log_dir = $OPT{'lintian-log-dir'}; + my $lintian_log = "${lintian_log_dir}/lintian.log"; + log_msg('Update complete, loading current state information'); + + @worklist = find_backlog($LINTIAN_VERSION, $state); + + # Always update the log if it exists, as we may have removed + # some entries. + if (-f $lintian_log) { + my $filter = generate_log_filter($state, {}); + + # update lintian.log + log_msg('Updating lintian.log...'); + rewrite_lintian_log($filter); + } + + log_msg(''); + + if (not @worklist) { + log_msg('Skipping Lintian run - nothing to do...'); + } else { + log_msg('Processing backlog...'); + if (@worklist > $OPT{'schedule-limit-groups'}) { + log_msg( + "Truncating worklist to size $OPT{'schedule-limit-groups'}" + . ' from ' + . (scalar(@worklist))); + @worklist = splice(@worklist, 0, $OPT{'schedule-limit-groups'}); + } + $exit_code= process_worklist(\@worklist, $state, $lintian_log_dir); + } + return $exit_code; +} + +sub process_worklist { + my ($worklist_ref, $state, $lintian_log_dir) = @_; + my $round = 0; + my $rounds = 1; + my @worklist = @{$worklist_ref}; + my $exit_code = 0; + my $schedule_chunk_size = $OPT{'schedule-chunk-size'}; + my $start_time = time(); + + if ($schedule_chunk_size > 0) { + # compute the number of rounds needed. + my $size_up = scalar @worklist + ($schedule_chunk_size - 1); + $rounds = int($size_up / $schedule_chunk_size); + } + + log_msg( + sprintf( + 'Groups to process %d will take %d round(s) [round limit: %s]', + scalar @worklist, + $rounds,$schedule_chunk_size > 0 ? $schedule_chunk_size : 'none' + )); + + log_msg('Command line used: ' . join(q{ }, @LINTIAN_CMD)); + while (@worklist) { + my $len = scalar @worklist; + my (@work_splice, @completed, %processed, %errors); + my ($status_fd, $lint_status_out); + my $got_alarm = 0; + + # Bail if there is less than 5 minutes left + if (time() >= $start_time + BACKLOG_PROCESSING_TIME_LIMIT - 300) { + log_msg('No more time for processing backlogs'); + $exit_code = 2; + last; + } + + $round++; + # correct bounds to fit chunk size + if ($schedule_chunk_size > 0 and $len > $schedule_chunk_size) { + $len = $schedule_chunk_size; + } + + # Sort @work_splice to have the "Range:"-line below produce + # reasonable output. + @work_splice = sort(splice(@worklist, 0, $len)); + + log_msg("Running Lintian (round $round/$rounds) ..."); + if ($len == 1) { + log_msg(' - Single group: ' . $work_splice[0]); + } else { + log_msg(' - Range: GROUP:' + . $work_splice[0] + . q{ ... GROUP:} + . $work_splice[-1]); + } + + next if ($OPT{'dry-run'}); + + pipe($status_fd, $lint_status_out); + my ($nfd, $new_lintian_log) + = tempfile('lintian.log-XXXXXXX', DIR => $lintian_log_dir); + # We do not mind if anyone reads the lintian log as it is being written + chmod(0644, $nfd); + log_msg("New lintian log at $new_lintian_log"); + + my $loop = IO::Async::Loop->new; + my $future = $loop->new_future; + my $signalled_lintian = 0; + + push(@LINTIAN_CMD, '--status-log', '&3'); + my $process = IO::Async::Process->new( + command => [@LINTIAN_CMD], + stdin => { via => 'pipe_write' }, + stdout => { via => 'pipe_read' }, + stderr => { via => 'pipe_read' }, + fd3 => { via => 'pipe_read' }, + on_finish => sub { + my ($self, $exitcode) = @_; + my $status = ($exitcode >> 8); + my $signal = ($exitcode & 0xff); + + if ($signal) { + log_msg("Lintian terminated by signal: $signal"); + # If someone is sending us signals (e.g. SIGINT/Ctrl-C) + # don't start the next round. + log_msg(' - skipping the rest of the worklist'); + @worklist = (); + $future->fail( + "Command @LINTIAN_CMD received signal $signal"); + return; + } + + if ($status == 0 || $status == 1) { + # exit 1 (policy violations) happens all the time (sadly) + # exit 2 (broken packages) also happens all the time... + log_msg('Lintian finished successfully'); + $future->done("Done with @LINTIAN_CMD"); + return; + } + + log_msg("warning: executing lintian returned status $status"); + if ($got_alarm) { + # Ideally, lintian would always die by the signal + # but some times it catches it and terminates + # "normally" + log_msg('Stopped by a signal or time out'); + log_msg(' - skipping the rest of the worklist'); + @worklist = (); + } + + $future->fail("Error status $status from @LINTIAN_CMD"); + return; + }); + + $process->stdout->configure( + on_read => sub { + my ($stream, $buffref, $eof) = @_; + + if (length $$buffref) { + print {$nfd} $$buffref; + $$buffref = EMPTY; + } + + close($nfd) + if $eof; + + return 0; + }, + ); + + $process->stderr->configure( + on_read => sub { + my ($stream, $buffref, $eof) = @_; + + if (length $$buffref) { + print STDOUT $$buffref; + $$buffref = EMPTY; + } + + return 0; + }, + ); + + $process->fd3->configure( + on_read => sub { + my ($stream, $buffref, $eof) = @_; + + while($$buffref =~ s/^(.*)\n//) { + my $line = $1; + + # listen to status updates from lintian + if ($line =~ m/^complete ([^ ]+) \(([^\)]+)\)$/) { + my ($group_id, $runtime) = ($1, $2); + push(@completed, $group_id); + $processed{$group_id} = 1; + log_msg(" [lintian] processed $group_id" + . " successfully (time: $runtime)"); + } elsif ($line =~ m/^error ([^ ]+) \(([^\)]+)\)$/) { + my ($group_id, $runtime) = ($1, $2); + log_msg(" [lintian] error processing $group_id " + . "(time: $runtime)"); + $processed{$group_id} = 1; + # We ignore errors if we sent lintian a signal to avoid + # *some* false-positives. + $errors{$group_id} = 1 if not $signalled_lintian; + } elsif ($line =~ m/^ack-signal (SIG\S+)$/) { + my $signal = $1; + log_msg( +"Signal $signal acknowledged: disabled timed alarms" + ); + alarm(0); + } + } + + alarm(0) + if $eof; + + return 0; + }, + ); + + $loop->add($process); + + my $groups = $state->{'groups'}; + # Submit the tasks to Lintian + foreach my $group_id (@work_splice) { + my $members; + if (not exists($groups->{$group_id})) { + # Sanity check (can in theory happen if an external process + # modifies the state cache and we have reloaded it) + log_msg( + "Group ${group_id} disappeared before we could schedule it" + ); + next; + } + $members = $groups->{$group_id}{'members'}; + for my $member_id (sort(keys(%{${members}}))) { + my $path = $members->{$member_id}{'path'}; + $process->stdin->write($path . NEWLINE); + } + } + $process->stdin->close_when_empty; + + eval { + my $time_limit + = $start_time + BACKLOG_PROCESSING_TIME_LIMIT - time(); + my $count = 0; + my $sig_handler = sub { + my ($signal_name) = @_; + $signalled_lintian = 1; + $count++; + if ($signal_name eq 'ALRM') { + $got_alarm = 1 if $got_alarm >= 0; + } else { + $got_alarm = -1; + } + if ($count < 3) { + my $pid = $process->pid; + log_msg("Received SIG${signal_name}, " + . "sending SIGTERM to $pid [${count}/3]"); + $process->kill('TERM'); + if ($signal_name eq 'ALRM') { + log_msg( + 'Scheduling another alarm in 5 minutes from now...' + ); + alarm(300); + } + } else { + my $pid = $process->pid; + log_msg("Received SIG${signal_name} as the third one, " + . "sending SIGKILL to $pid"); + log_msg('You may have to clean up some ' + . 'temporary directories manually'); + $process->kill('KILL'); + } + }; + local $SIG{'TERM'} = $sig_handler; + local $SIG{'INT'} = $sig_handler; + local $SIG{'ALRM'} = $sig_handler; + + alarm($time_limit); + }; + + # Wait for lintian to terminate + $future->await; + + if ($got_alarm) { + if ($got_alarm == 1) { + # Lintian was (presumably) killed due to a + # time-out from this process + $exit_code = 2; + } else { + # Lintian was killed by another signal; notify + # harness that it should skip the rest as well. + $exit_code = 3; + } + } + + log_msg('Updating the lintian log used for reporting'); + my $filter = generate_log_filter($state, \%processed); + seek($nfd, 0, SEEK_END); + update_lintian_log($filter, $nfd, $new_lintian_log); + + log_msg('Updating harness state cache'); + # Reload the state cache, just in case it was modified by an external + # process during the lintian run. + $state = load_state_cache($OPT{'state-dir'}); + for my $group_id (@completed) { + my $group_data; + # In theory, they can disappear - in practise, that requires + # an external call to (e.g.) lintian reporting-sync-state. + next if not exists($state->{'groups'}{$group_id}); + $group_data = $state->{'groups'}{$group_id}; + $group_data->{'last-processed-by'} = $LINTIAN_VERSION; + delete($group_data->{'out-of-date'}); + # Always clear the error counter after a successful run. + delete($group_data->{'processing-errors'}); + delete($group_data->{'last-error-by'}); + } + for my $group_id (sort(keys(%errors))) { + my $group_data; + # In theory, they can disappear - in practise, that requires + # an external call to (e.g.) lintian reporting-sync-state. + next if not exists($state->{'groups'}{$group_id}); + $group_data = $state->{'groups'}{$group_id}; + if ($errors{$group_id}) { + if (not exists($group_data->{'last-error-by'}) + or $group_data->{'last-error-by'} ne $LINTIAN_VERSION) { + # If it is a new lintian version then (re)set the counter + # to 1. Case also triggers for the very first issue. + $group_data->{'processing-errors'} = 1; + } else { + # Repeated error with the same version + ++$group_data->{'processing-errors'}; + } + # Set the "last-error-by" flag so we can clear the + # error if there is a new version of lintian. + $group_data->{'last-error-by'} = $LINTIAN_VERSION; + } else { + delete($group_data->{'processing-errors'}); + } + } + save_state_cache($OPT{'state-dir'}, $state); + last if $exit_code; + } + return $exit_code; +} + +sub generate_log_filter { + my ($state, $exclude) = @_; + my %filter; + my $group_map = $state->{'groups'}; + for my $group_id (keys(%{${group_map}})) { + my $members; + next if exists($exclude->{$group_id}); + $members = $group_map->{$group_id}{'members'}; + for my $member_id (keys(%{$members})) { + $filter{$member_id} = 1; + } + } + return \%filter; +} + +sub update_lintian_log { + my ($keep_filter, $new_fd, $tmp_path) = @_; + my $lintian_log_dir = $OPT{'lintian-log-dir'}; + my $lintian_log = "${lintian_log_dir}/lintian.log"; + my $copy_mode = 0; + my $first = 1; + + eval { + open(my $input, '<', $lintian_log); + while (<$input>) { + if ( + m/^N: [ ] Processing [ ] (binary|udeb|source) [ ] + package [ ] (\S+) [ ] \(version [ ] (\S+), [ ] + arch [ ] (\S+)\)[ ]\.\.\./xsm + ) { + my ($type, $pkg, $ver, $arch) = ($1,$2, $3, $4); + my $k = "$type:$pkg/$ver"; + $k .= "/$arch" if $type ne 'source'; + $copy_mode = 0; + $copy_mode = 1 if exists($keep_filter->{$k}); + } + if ($copy_mode) { + if ($first) { + print {$new_fd} "N: ---start-of-old-lintian-log-file---\n"; + $first = 0; + } + print {$new_fd} $_; + } + } + close($input); + close($new_fd); + rename($tmp_path, $lintian_log); + }; + if (my $err = $@) { + # Unlink $new_lintian_log, we ignore errors as the one we + # already got is more important/interesting. + no autodie qw(unlink); + unlink($tmp_path) or warn("Cannot unlink $tmp_path: $!"); + die($err); + } + return; +} + +sub rewrite_lintian_log { + my ($keep_filter) = @_; + my $lintian_log_dir = $OPT{'lintian-log-dir'}; + my ($nfd, $new_lintian_log); + + ($nfd, $new_lintian_log) + = tempfile('lintian.log-XXXXXXX', DIR => $lintian_log_dir); + chmod(0644, $nfd); + update_lintian_log($keep_filter, $nfd, $new_lintian_log); + return 1; +} + +sub usage { + my $cmd = basename($0); + my $me = "lintian $cmd"; + print <<EOF; +Internal command for the Lintian reporting framework +Usage: $me <args> -- <extra lintian args> + + --help Show this text and exit + + --lintian-frontend PROG Use PROG as frontend for lintian (defaults to "lintian") + --lintian-log-dir DIR Path to the harness log dir. [!] + --lintian-scratch-space DIR Use DIR for temporary files (notably temp labs) + --schedule-chunk-size N Run at most N groups in a given lintian run. + --schedule-limit-groups N Schedule at most N groups in total. [!] + --state-dir DIR Directory containing the state cache (must be + writable). [!] + +Arguments marked with [!] are required for a successful run. +EOF + + exit(0); +} + +1; + +# Local Variables: +# indent-tabs-mode: nil +# cperl-indent-level: 4 +# End: +# vim: syntax=perl sw=4 sts=4 sr et + |