summaryrefslogtreecommitdiffstats
path: root/commands/reporting-sync-state.pm
diff options
context:
space:
mode:
Diffstat (limited to 'commands/reporting-sync-state.pm')
-rw-r--r--commands/reporting-sync-state.pm572
1 files changed, 572 insertions, 0 deletions
diff --git a/commands/reporting-sync-state.pm b/commands/reporting-sync-state.pm
new file mode 100644
index 0000000..af649a7
--- /dev/null
+++ b/commands/reporting-sync-state.pm
@@ -0,0 +1,572 @@
+#!/usr/bin/perl -w
+#
+# reporting-sync-state
+#
+# Copyright (C) 2018 Chris Lamb <lamby@debian.org>
+#
+# This program is free software. It is distributed under the terms of
+# the GNU General Public License as published by the Free Software
+# Foundation; either version 2 of the License, or (at your option) any
+# later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, you can find it on the World Wide
+# Web at https://www.gnu.org/copyleft/gpl.html, or write to the Free
+# Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
+# MA 02110-1301, USA.
+
+package reporting_sync_state;
+
+use v5.20;
+use warnings;
+use utf8;
+use autodie;
+
+use Getopt::Long();
+use File::Basename qw(basename);
+use YAML::XS ();
+use MIME::Base64 qw(encode_base64);
+
+use Lintian::Deb822::Parser qw(visit_dpkg_paragraph_string);
+use Lintian::Relation::Version qw(versions_comparator);
+use Lintian::Reporting::Util qw(
+ find_backlog
+ load_state_cache
+ save_state_cache
+);
+use Lintian::Util qw(
+ open_gz
+);
+
+my $DEFAULT_CHECKSUM = 'Sha256';
+my (%KNOWN_MEMBERS, %ACTIVE_GROUPS);
+my $CONFIG;
+my %OPT;
+my %OPT_HASH= (
+ 'reporting-config=s'=> \$OPT{'reporting-config'},
+ 'desired-version=s' => \$OPT{'desired-version'},
+ 'reschedule-all' => \$OPT{'reschedule-all'},
+ 'help|h' => \&usage,
+ 'debug|d' => \$OPT{'debug'},
+ 'dry-run' => \$OPT{'dry-run'},
+);
+
+sub check_parameters {
+ for my $parameter (qw(reporting-config desired-version)) {
+ if (not defined($OPT{$parameter})) {
+ die( "Missing required parameter \"--${parameter}\""
+ . "(use --help for more info)\n");
+ }
+ }
+ if (not $OPT{'reporting-config'} or not -f $OPT{'reporting-config'}) {
+ die("The --reporting-config parameter must point to an existing file\n"
+ );
+ }
+ return;
+}
+
+sub required_cfg_value {
+ my (@keys) = @_;
+ my $v = $CONFIG;
+ for my $key (@keys) {
+ if (not exists($v->{$key})) {
+ my $k = join('.', @keys);
+ die("Missing required config parameter: ${k}\n");
+ }
+ $v = $v->{$key};
+ }
+ return $v;
+}
+
+sub required_cfg_non_empty_list_value {
+ my (@keys) = @_;
+ my $v = required_cfg_value(@keys);
+ if (not defined($v) or ref($v) ne 'ARRAY' or scalar(@{$v}) < 1) {
+ my $k = join('.', @keys);
+ die("Invalid configuration: ${k} must be a non-empty list\n");
+ }
+ return $v;
+}
+
+sub optional_cfg_list_value {
+ my (@keys) = @_;
+ my $v = $CONFIG;
+ for my $key (@keys) {
+ if (not exists($v->{$key})) {
+ return [];
+ }
+ $v = $v->{$key};
+ }
+ if (ref($v) ne 'ARRAY') {
+ my $k = join('.', @keys);
+ die("Invalid configuration: ${k} must be a list (or missing)\n");
+ }
+ return $v;
+}
+
+sub main {
+ my ($state_dir, $state, $archives, %blacklist);
+ STDOUT->autoflush;
+ Getopt::Long::config('bundling', 'no_getopt_compat', 'no_auto_abbrev');
+ Getopt::Long::GetOptions(%OPT_HASH) or die("error parsing options\n");
+ check_parameters();
+ $CONFIG = YAML::XS::LoadFile($OPT{'reporting-config'});
+ $state_dir = required_cfg_value('storage', 'state-cache');
+ $state = load_state_cache($state_dir);
+
+ if (upgrade_state_cache_if_needed($state)) {
+ log_debug('Updated the state cache');
+ }
+ log_debug('Initial state had '
+ . (scalar(keys(%{$state->{'groups'}})))
+ . ' groups');
+ $archives = required_cfg_value('archives');
+ %blacklist = map { $_ => 1 } @{optional_cfg_list_value('blacklist')};
+ for my $archive (sort(keys(%{$archives}))) {
+ log_debug("Processing archive $archive");
+ my $path = required_cfg_value('archives', $archive, 'base-dir');
+ my $archs = required_cfg_non_empty_list_value('archives', $archive,
+ 'architectures');
+ my $components
+ = required_cfg_non_empty_list_value('archives', $archive,
+ 'components');
+ my $distributions
+ = required_cfg_non_empty_list_value('archives', $archive,
+ 'distributions');
+ local_mirror_manifests($state, $path, $distributions, $components,
+ $archs, \%blacklist);
+ }
+
+ cleanup_state($state);
+ if (not $OPT{'dry-run'}) {
+ save_state_cache($state_dir, $state);
+ }
+ exit(0);
+}
+
+# State:
+# group-id => {
+# 'last-processed-by' => <version or undef>,
+# 'out-of-date' => <0|1>, (# if omitted => 0, unless "last-processed-by" is omitted as well)
+# 'members' => {
+# $member_id => {
+# 'sha1' => <sha1>,
+# 'path' => <path/relative/to/mirror>,
+# }
+# },
+
+sub upgrade_state_cache_if_needed {
+ my ($state) = @_;
+ if (exists($state->{'groups'})) {
+ my $updated = 0;
+ my $groups = $state->{'groups'};
+ for my $group (sort(keys(%{$groups}))) {
+ my $group_data = $groups->{$group};
+ if ( exists($group_data->{'mirror-metadata'})
+ && exists($group_data->{'mirror-metadata'}{'area'})) {
+ if (not exists($group_data->{'mirror-metadata'}{'component'})){
+ $group_data->{'mirror-metadata'}{'component'}
+ = $group_data->{'mirror-metadata'}{'area'};
+ delete($group_data->{'mirror-metadata'}{'area'});
+ $updated = 1;
+ }
+ }
+ }
+ return $updated;
+ }
+ # Migrate the "last-processed-by" version.
+ my $groups = $state->{'groups'} = {};
+ for my $key (sort(keys(%${state}))) {
+ next if $key eq 'group';
+ if (exists($state->{$key}{'last-processed-by'})) {
+ my $last_version = $state->{$key}{'last-processed-by'};
+ delete($state->{$key});
+ $groups->{$key}{'last-processed-by'} = $last_version;
+ }
+ }
+ return 1;
+}
+
+sub add_member_to_group {
+ my ($state, $group_id, $member_id, $member_data, $group_metadata) = @_;
+ # Fetch members before group_data (relying on autovivification)
+ my $members = $state->{'groups'}{$group_id}{'members'};
+ my $group_data = $state->{'groups'}{$group_id};
+ my $member;
+ my $new_group = 0;
+ if (not defined($members)) {
+ $group_data->{'members'} = $members = {};
+ log_debug(
+ "${group_id} is out-of-date: New group (triggered by ${member_id})"
+ );
+ $new_group = 1;
+ }
+
+ $member = $members->{$member_id};
+ if (not defined($member)) {
+ $members->{$member_id} = $member = {};
+ }
+
+ # Update of path is not sufficient to consider the member out of date
+ # (mirror restructuring happens - although extremely rarely)
+ $member->{'path'} = $member_data->{'path'};
+ if ($member_data->{'mirror-metadata'}
+ && keys(%{$member_data->{'mirror-metadata'}})) {
+ $member->{'mirror-metadata'} = $member_data->{'mirror-metadata'};
+ }
+ if (not exists($group_data->{'mirror-metadata'})) {
+ $group_data->{'mirror-metadata'}= $group_metadata->{'mirror-metadata'};
+ } else {
+ for my $key (keys(%{$group_metadata->{'mirror-metadata'}})) {
+ $group_data->{'mirror-metadata'}{$key}
+ = $group_metadata->{'mirror-metadata'}{$key};
+ }
+ }
+ $KNOWN_MEMBERS{"${group_id} ${member_id}"} = 1;
+ $ACTIVE_GROUPS{$group_id} = 1;
+
+ if (!exists($member->{lc $DEFAULT_CHECKSUM})
+ || $member->{lc $DEFAULT_CHECKSUM} ne
+ $member_data->{lc $DEFAULT_CHECKSUM}) {
+ if (exists($member->{lc $DEFAULT_CHECKSUM})) {
+ # This seems worth a note even if the group is already out of date
+ my $lowercase = lc $DEFAULT_CHECKSUM;
+ log_debug(
+ "${group_id} is out-of-date: ${member_id} checksum mismatch"
+ . " ($member->{$lowercase} != $member_data->{$lowercase})");
+ } elsif (not $group_data->{'out-of-date'} and not $new_group) {
+ log_debug("${group_id} is out-of-date: New member (${member_id})");
+ }
+ $group_data->{'out-of-date'} = 1;
+ $member->{lc $DEFAULT_CHECKSUM} = $member_data->{lc $DEFAULT_CHECKSUM};
+ }
+ delete($member->{'sha1'});
+
+ return;
+}
+
+sub cleanup_state {
+ my ($state) = @_;
+ my %backlog
+ = map { $_ => 1 } find_backlog($OPT{'desired-version'}, $state);
+
+ # Empty 'members-to-groups' to prune "dead links". It will be
+ # recreated by cleanup_group_state below.
+ $state->{'members-to-groups'} = {};
+
+ for my $group_id (sort(keys(%{$state->{'groups'}}))) {
+ cleanup_group_state($state, $group_id, \%backlog);
+ }
+ return;
+}
+
+sub remove_if_empty {
+ my ($hash_ref, $key) = @_;
+ my ($val, $empty);
+ return if not exists($hash_ref->{$key});
+ $val = $hash_ref->{$key};
+ if (defined($val)) {
+ $empty = 1 if (ref($val) eq 'HASH' and not keys(%${val}));
+ $empty = 1 if (ref($val) eq 'ARRAY' and not scalar(@${val}));
+ } else {
+ $empty = 1;
+ }
+ delete($hash_ref->{$key}) if $empty;
+ return;
+}
+
+sub cleanup_group_state {
+ my ($state, $group_id, $backlog) = @_;
+ my ($members);
+ my $group_data = $state->{'groups'}{$group_id};
+ $members = $group_data->{'members'};
+ if (not exists($ACTIVE_GROUPS{$group_id}) or not $members) {
+ # No members left, remove the group entirely
+ delete($state->{'groups'}{$group_id});
+ if (not exists($ACTIVE_GROUPS{$group_id})) {
+ log_debug("Group ${group_id} dropped: It is not an active group");
+ } else {
+ log_debug("Group ${group_id} dropped: No members left (early)");
+ }
+
+ return;
+ }
+
+ for my $member_id (sort(keys(%{$members}))) {
+ if (not exists($KNOWN_MEMBERS{"${group_id} ${member_id}"})) {
+ delete($members->{$member_id});
+ if (not $group_data->{'out-of-date'}) {
+ $group_data->{'out-of-date'} = 1;
+ log_debug(
+ "${group_id} is out-of-date: ${member_id} disappeared");
+ }
+ } else {
+ my $member_data = $members->{$member_id};
+ # Create "member_id to group_data" link
+ $state->{'members-to-groups'}{$member_id} = $group_data;
+ delete($member_data->{'mirror-metadata'}{'component'})
+ if exists($member_data->{'mirror-metadata'});
+ remove_if_empty($member_data, 'mirror-metadata');
+ }
+ }
+
+ # Add the "out-of-date" flag if it is in the backlog OR we were asked
+ # to reschedule all
+ if (not $group_data->{'out-of-date'}) {
+ if ($OPT{'reschedule-all'} or $backlog->{$group_id}) {
+ $group_data->{'out-of-date'} = 1;
+ log_debug("Marking ${group_id} as out of date: In backlog")
+ if $backlog->{$group_id};
+ }
+ }
+ # Check for and possible clear the error counters
+ if (
+ exists($group_data->{'processing-errors'})
+ and (not exists($group_data->{'last-error-by'})
+ or $group_data->{'last-error-by'} ne $OPT{'desired-version'})
+ ) {
+ log_debug(
+ "Clearing error-counter for ${group_id}: New version of lintian");
+ delete($group_data->{'processing-errors'});
+ # Leave "last-error-by" as we can use that to tell if the previous
+ # version triggered errors.
+ }
+
+ if (not %{$members}) {
+ # No members left, remove the group entirely. This should not happen
+ # as the ACTIVE_GROUPS check above ought to have caught this.
+ delete($state->{$group_id});
+ log_debug("Group ${group_id} dropped: No members left (late)");
+ } else {
+ # remove redundant fields
+ remove_if_empty($group_data, 'out-of-date');
+ for my $metadata_field (qw(component maintainer uploaders)) {
+ remove_if_empty($group_data->{'mirror-metadata'}, $metadata_field);
+ }
+ remove_if_empty($group_data, 'mirror-metadata');
+ }
+
+ return;
+}
+
+# Helper for local_mirror_manifests - it parses a paragraph from Sources file
+sub _parse_srcs_pg {
+ my ($state, $blacklist, $extra_metadata, $paragraph) = @_;
+ my $dir = $paragraph->{'Directory'}//'';
+ my $group_id = $paragraph->{'Package'} . '/' . $paragraph->{'Version'};
+ my $member_id = "source:${group_id}";
+ my (%data, %group_metadata, $group_mirror_md);
+ if (exists $blacklist->{$paragraph->{'Package'}}) {
+ log_debug("Ignoring blacklisted package src:$paragraph->{'Package'}");
+ return;
+ }
+ # only include the source if it has any binaries to be checked.
+ # - Otherwise we may end up checking a source with no binaries
+ # (happens if the architecture is "behind" in building)
+ return unless $ACTIVE_GROUPS{$group_id};
+ $dir .= '/' if $dir;
+ foreach my $f (split m/\n/, $paragraph->{"Checksums-${DEFAULT_CHECKSUM}"}){
+
+ # trim both ends
+ $f =~ s/^\s+|\s+$//g;
+
+ next unless $f && $f =~ m/\.dsc$/;
+ my ($checksum, undef, $basename) = split(m/\s++/, $f);
+ my $b64_checksum = encode_base64(pack('H*', $checksum));
+ # $dir should end with a slash if it is non-empty.
+ $data{lc $DEFAULT_CHECKSUM} = $b64_checksum;
+ $data{'path'} = $extra_metadata->{'mirror-dir'} . "/$dir" . $basename;
+ last;
+ }
+
+ $group_mirror_md = $group_metadata{'mirror-metadata'} = {};
+ $group_mirror_md->{'component'} = $extra_metadata->{'component'};
+ $group_mirror_md->{'maintainer'} = $paragraph->{'maintainer'};
+ if (my $uploaders = $paragraph->{'uploaders'}) {
+ my @ulist = split(/>\K\s*,\s*/, $uploaders);
+ $group_mirror_md->{'uploaders'} = \@ulist;
+ }
+
+ add_member_to_group($state, $group_id, $member_id, \%data,
+ \%group_metadata);
+ return;
+}
+
+# Helper for local_mirror_manifests - it parses a paragraph from Packages file
+sub _parse_pkgs_pg {
+ my ($state, $blacklist, $extra_metadata, $type, $paragraph) = @_;
+ my ($group_id, $member_id, %data, %group_metadata, $b64_checksum);
+ my $package = $paragraph->{'Package'};
+ my $version = $paragraph->{'Version'};
+ my $architecture = $paragraph->{'Architecture'};
+ if (not defined($paragraph->{'Source'})) {
+ $paragraph->{'Source'} = $package;
+ } elsif ($paragraph->{'Source'} =~ /^([-+\.\w]+)\s+\((.+)\)$/) {
+ $paragraph->{'Source'} = $1;
+ $paragraph->{'Source-version'} = $2;
+ }
+ if (exists $blacklist->{$paragraph->{'Source'}}) {
+ log_debug("Ignoring binary package $package: it is part of "
+ . "blacklisted source package $paragraph->{'Source'}");
+ return;
+ }
+ if (not defined($paragraph->{'Source-Version'})) {
+ $paragraph->{'Source-Version'} = $paragraph->{'Version'};
+ }
+ $group_id = $paragraph->{'Source'} . '/' . $paragraph->{'Source-Version'};
+ $member_id = "${type}:${package}/${version}/${architecture}";
+ $data{'path'}
+ = $extra_metadata->{'mirror-dir'} . '/' . $paragraph->{'filename'};
+ $b64_checksum
+ = encode_base64(pack('H*', $paragraph->{lc $DEFAULT_CHECKSUM}));
+ $data{lc $DEFAULT_CHECKSUM} = $b64_checksum;
+
+ $group_metadata{'mirror-metadata'}{'maintainer'}
+ = $paragraph->{'Maintainer'};
+ if (my $uploaders = $paragraph->{'Uploaders'}) {
+ my @ulist = split(/>\K\s*,\s*/, $uploaders);
+ $group_metadata{'mirror-metadata'}{'uploaders'} = \@ulist;
+ }
+
+ add_member_to_group($state, $group_id, $member_id, \%data,
+ \%group_metadata);
+ return;
+}
+
+# local_mirror_manifests ($mirdir, $dists, $components, $archs)
+#
+# Returns a list of manifests that represents what is on the local mirror
+# at $mirdir. 3 manifests will be returned, one for "source", one for "binary"
+# and one for "udeb" packages. They are populated based on the "Sources" and
+# "Packages" files.
+#
+# $mirdir - the path to the local mirror
+# $dists - listref of dists to consider (e.g. ['unstable'])
+# $components - listref of components to consider (e.g. ['main', 'contrib', 'non-free'])
+# $archs - listref of archs to consider (e.g. ['i386', 'amd64'])
+# $blacklist - hashref of source packages to ignore (e.g. {'gcc-8-cross-ports' => 1})
+#
+sub local_mirror_manifests {
+ my ($state, $mirdir, $dists, $components, $archs, $blacklist) = @_;
+ foreach my $dist (@$dists) {
+ foreach my $component (@{$components}) {
+ my $srcs = "$mirdir/dists/$dist/$component/source/Sources";
+
+ my %extra_metadata = (
+ 'component' => $component,
+ 'mirror-dir' => $mirdir,
+ );
+
+ # Binaries have a "per arch" file.
+ # - we check those first and then include the source packages that
+ # are referred to by these binaries.
+
+ my $dist_path = "$mirdir/dists/$dist/$component";
+ for my $arch (@{$archs}) {
+
+ my $pkgs = "${dist_path}/binary-$arch/Packages";
+ my $pkgfd = _open_data_file($pkgs);
+ local $/ = undef;
+ my $pkgstring = <$pkgfd>;
+ close $pkgfd;
+
+ my $binsub = sub {
+ _parse_pkgs_pg($state, $blacklist, \%extra_metadata,
+ 'binary', @_);
+ };
+ visit_dpkg_paragraph_string($binsub, $pkgstring);
+
+ my $upkgs
+ = "${dist_path}/debian-installer/binary-$arch/Packages";
+ my $upkgfd = _open_data_file($upkgs);
+ local $/ = undef;
+ my $upkgstring = <$upkgfd>;
+ close $upkgfd;
+
+ my $udebsub = sub {
+ _parse_pkgs_pg($state, $blacklist, \%extra_metadata,
+ 'udeb', @_);
+ };
+ visit_dpkg_paragraph_string($udebsub, $upkgstring);
+ }
+
+ my $srcfd = _open_data_file($srcs);
+ local $/ = undef;
+ my $srcstring = <$srcfd>;
+ close $srcfd;
+
+ my $srcsub
+ = sub { _parse_srcs_pg($state, $blacklist, \%extra_metadata, @_) };
+ visit_dpkg_paragraph_string($srcsub, $srcstring);
+ }
+ }
+ return;
+}
+
+# _open_data_file ($file)
+#
+# Opens $file if it exists, otherwise it tries common extensions (i.e. .gz) and opens
+# that instead. It may pipe the file through an external decompressor, so the returned
+# file descriptor cannot be assumed to be a file.
+#
+# If $file does not exists and no common extensions are found, this dies. It may also
+# die if it finds a file, but is unable to open it.
+sub _open_data_file {
+ my ($file) = @_;
+ if (-e $file) {
+ open(my $fd, '<:encoding(UTF-8)', $file);
+ return $fd;
+ }
+ if (-e "${file}.gz") {
+ my $fd = open_gz("${file}.gz");
+ binmode($fd, 'encoding(UTF-8)');
+ return $fd;
+ }
+ if (-e "${file}.xz") {
+ open(my $fd, '-|', 'xz', '-dc', "${file}.xz");
+ binmode($fd, 'encoding(UTF-8)');
+ return $fd;
+ }
+ die("Cannot find ${file}: file does not exist");
+}
+
+sub log_debug {
+ if ($OPT{'debug'}) {
+ print "$_[0]\n";
+ }
+ return;
+}
+
+sub usage {
+ my $cmd = basename($0);
+ my $me = "lintian $cmd";
+ print <<EOF;
+Internal command for the Lintian reporting framework
+Usage: $me <args>
+
+ --help Show this text and exit
+ --debug Show/log debugging output
+
+ --reporting-config FILE
+ Path to the configuration file (listing the archive definitions) [!]
+ --desired-version X The desired "last-processed-by" Lintian version. [!]
+
+Arguments marked with [!] are required for a successful run.
+
+EOF
+
+ exit(0);
+}
+
+1;
+
+# Local Variables:
+# indent-tabs-mode: nil
+# cperl-indent-level: 4
+# End:
+# vim: syntax=perl sw=4 sts=4 sr et