#!/usr/bin/perl -w # # reporting-sync-state # # Copyright (C) 2018 Chris Lamb # # This program is free software. It is distributed under the terms of # the GNU General Public License as published by the Free Software # Foundation; either version 2 of the License, or (at your option) any # later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, you can find it on the World Wide # Web at https://www.gnu.org/copyleft/gpl.html, or write to the Free # Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, # MA 02110-1301, USA. package reporting_sync_state; use v5.20; use warnings; use utf8; use autodie; use Getopt::Long(); use File::Basename qw(basename); use YAML::XS (); use MIME::Base64 qw(encode_base64); use Lintian::Deb822::Parser qw(visit_dpkg_paragraph_string); use Lintian::Relation::Version qw(versions_comparator); use Lintian::Reporting::Util qw( find_backlog load_state_cache save_state_cache ); use Lintian::Util qw( open_gz ); my $DEFAULT_CHECKSUM = 'Sha256'; my (%KNOWN_MEMBERS, %ACTIVE_GROUPS); my $CONFIG; my %OPT; my %OPT_HASH= ( 'reporting-config=s'=> \$OPT{'reporting-config'}, 'desired-version=s' => \$OPT{'desired-version'}, 'reschedule-all' => \$OPT{'reschedule-all'}, 'help|h' => \&usage, 'debug|d' => \$OPT{'debug'}, 'dry-run' => \$OPT{'dry-run'}, ); sub check_parameters { for my $parameter (qw(reporting-config desired-version)) { if (not defined($OPT{$parameter})) { die( "Missing required parameter \"--${parameter}\"" . "(use --help for more info)\n"); } } if (not $OPT{'reporting-config'} or not -f $OPT{'reporting-config'}) { die("The --reporting-config parameter must point to an existing file\n" ); } return; } sub required_cfg_value { my (@keys) = @_; my $v = $CONFIG; for my $key (@keys) { if (not exists($v->{$key})) { my $k = join('.', @keys); die("Missing required config parameter: ${k}\n"); } $v = $v->{$key}; } return $v; } sub required_cfg_non_empty_list_value { my (@keys) = @_; my $v = required_cfg_value(@keys); if (not defined($v) or ref($v) ne 'ARRAY' or scalar(@{$v}) < 1) { my $k = join('.', @keys); die("Invalid configuration: ${k} must be a non-empty list\n"); } return $v; } sub optional_cfg_list_value { my (@keys) = @_; my $v = $CONFIG; for my $key (@keys) { if (not exists($v->{$key})) { return []; } $v = $v->{$key}; } if (ref($v) ne 'ARRAY') { my $k = join('.', @keys); die("Invalid configuration: ${k} must be a list (or missing)\n"); } return $v; } sub main { my ($state_dir, $state, $archives, %blacklist); STDOUT->autoflush; Getopt::Long::config('bundling', 'no_getopt_compat', 'no_auto_abbrev'); Getopt::Long::GetOptions(%OPT_HASH) or die("error parsing options\n"); check_parameters(); $CONFIG = YAML::XS::LoadFile($OPT{'reporting-config'}); $state_dir = required_cfg_value('storage', 'state-cache'); $state = load_state_cache($state_dir); if (upgrade_state_cache_if_needed($state)) { log_debug('Updated the state cache'); } log_debug('Initial state had ' . (scalar(keys(%{$state->{'groups'}}))) . ' groups'); $archives = required_cfg_value('archives'); %blacklist = map { $_ => 1 } @{optional_cfg_list_value('blacklist')}; for my $archive (sort(keys(%{$archives}))) { log_debug("Processing archive $archive"); my $path = required_cfg_value('archives', $archive, 'base-dir'); my $archs = required_cfg_non_empty_list_value('archives', $archive, 'architectures'); my $components = required_cfg_non_empty_list_value('archives', $archive, 'components'); my $distributions = required_cfg_non_empty_list_value('archives', $archive, 'distributions'); local_mirror_manifests($state, $path, $distributions, $components, $archs, \%blacklist); } cleanup_state($state); if (not $OPT{'dry-run'}) { save_state_cache($state_dir, $state); } exit(0); } # State: # group-id => { # 'last-processed-by' => , # 'out-of-date' => <0|1>, (# if omitted => 0, unless "last-processed-by" is omitted as well) # 'members' => { # $member_id => { # 'sha1' => , # 'path' => , # } # }, sub upgrade_state_cache_if_needed { my ($state) = @_; if (exists($state->{'groups'})) { my $updated = 0; my $groups = $state->{'groups'}; for my $group (sort(keys(%{$groups}))) { my $group_data = $groups->{$group}; if ( exists($group_data->{'mirror-metadata'}) && exists($group_data->{'mirror-metadata'}{'area'})) { if (not exists($group_data->{'mirror-metadata'}{'component'})){ $group_data->{'mirror-metadata'}{'component'} = $group_data->{'mirror-metadata'}{'area'}; delete($group_data->{'mirror-metadata'}{'area'}); $updated = 1; } } } return $updated; } # Migrate the "last-processed-by" version. my $groups = $state->{'groups'} = {}; for my $key (sort(keys(%${state}))) { next if $key eq 'group'; if (exists($state->{$key}{'last-processed-by'})) { my $last_version = $state->{$key}{'last-processed-by'}; delete($state->{$key}); $groups->{$key}{'last-processed-by'} = $last_version; } } return 1; } sub add_member_to_group { my ($state, $group_id, $member_id, $member_data, $group_metadata) = @_; # Fetch members before group_data (relying on autovivification) my $members = $state->{'groups'}{$group_id}{'members'}; my $group_data = $state->{'groups'}{$group_id}; my $member; my $new_group = 0; if (not defined($members)) { $group_data->{'members'} = $members = {}; log_debug( "${group_id} is out-of-date: New group (triggered by ${member_id})" ); $new_group = 1; } $member = $members->{$member_id}; if (not defined($member)) { $members->{$member_id} = $member = {}; } # Update of path is not sufficient to consider the member out of date # (mirror restructuring happens - although extremely rarely) $member->{'path'} = $member_data->{'path'}; if ($member_data->{'mirror-metadata'} && keys(%{$member_data->{'mirror-metadata'}})) { $member->{'mirror-metadata'} = $member_data->{'mirror-metadata'}; } if (not exists($group_data->{'mirror-metadata'})) { $group_data->{'mirror-metadata'}= $group_metadata->{'mirror-metadata'}; } else { for my $key (keys(%{$group_metadata->{'mirror-metadata'}})) { $group_data->{'mirror-metadata'}{$key} = $group_metadata->{'mirror-metadata'}{$key}; } } $KNOWN_MEMBERS{"${group_id} ${member_id}"} = 1; $ACTIVE_GROUPS{$group_id} = 1; if (!exists($member->{lc $DEFAULT_CHECKSUM}) || $member->{lc $DEFAULT_CHECKSUM} ne $member_data->{lc $DEFAULT_CHECKSUM}) { if (exists($member->{lc $DEFAULT_CHECKSUM})) { # This seems worth a note even if the group is already out of date my $lowercase = lc $DEFAULT_CHECKSUM; log_debug( "${group_id} is out-of-date: ${member_id} checksum mismatch" . " ($member->{$lowercase} != $member_data->{$lowercase})"); } elsif (not $group_data->{'out-of-date'} and not $new_group) { log_debug("${group_id} is out-of-date: New member (${member_id})"); } $group_data->{'out-of-date'} = 1; $member->{lc $DEFAULT_CHECKSUM} = $member_data->{lc $DEFAULT_CHECKSUM}; } delete($member->{'sha1'}); return; } sub cleanup_state { my ($state) = @_; my %backlog = map { $_ => 1 } find_backlog($OPT{'desired-version'}, $state); # Empty 'members-to-groups' to prune "dead links". It will be # recreated by cleanup_group_state below. $state->{'members-to-groups'} = {}; for my $group_id (sort(keys(%{$state->{'groups'}}))) { cleanup_group_state($state, $group_id, \%backlog); } return; } sub remove_if_empty { my ($hash_ref, $key) = @_; my ($val, $empty); return if not exists($hash_ref->{$key}); $val = $hash_ref->{$key}; if (defined($val)) { $empty = 1 if (ref($val) eq 'HASH' and not keys(%${val})); $empty = 1 if (ref($val) eq 'ARRAY' and not scalar(@${val})); } else { $empty = 1; } delete($hash_ref->{$key}) if $empty; return; } sub cleanup_group_state { my ($state, $group_id, $backlog) = @_; my ($members); my $group_data = $state->{'groups'}{$group_id}; $members = $group_data->{'members'}; if (not exists($ACTIVE_GROUPS{$group_id}) or not $members) { # No members left, remove the group entirely delete($state->{'groups'}{$group_id}); if (not exists($ACTIVE_GROUPS{$group_id})) { log_debug("Group ${group_id} dropped: It is not an active group"); } else { log_debug("Group ${group_id} dropped: No members left (early)"); } return; } for my $member_id (sort(keys(%{$members}))) { if (not exists($KNOWN_MEMBERS{"${group_id} ${member_id}"})) { delete($members->{$member_id}); if (not $group_data->{'out-of-date'}) { $group_data->{'out-of-date'} = 1; log_debug( "${group_id} is out-of-date: ${member_id} disappeared"); } } else { my $member_data = $members->{$member_id}; # Create "member_id to group_data" link $state->{'members-to-groups'}{$member_id} = $group_data; delete($member_data->{'mirror-metadata'}{'component'}) if exists($member_data->{'mirror-metadata'}); remove_if_empty($member_data, 'mirror-metadata'); } } # Add the "out-of-date" flag if it is in the backlog OR we were asked # to reschedule all if (not $group_data->{'out-of-date'}) { if ($OPT{'reschedule-all'} or $backlog->{$group_id}) { $group_data->{'out-of-date'} = 1; log_debug("Marking ${group_id} as out of date: In backlog") if $backlog->{$group_id}; } } # Check for and possible clear the error counters if ( exists($group_data->{'processing-errors'}) and (not exists($group_data->{'last-error-by'}) or $group_data->{'last-error-by'} ne $OPT{'desired-version'}) ) { log_debug( "Clearing error-counter for ${group_id}: New version of lintian"); delete($group_data->{'processing-errors'}); # Leave "last-error-by" as we can use that to tell if the previous # version triggered errors. } if (not %{$members}) { # No members left, remove the group entirely. This should not happen # as the ACTIVE_GROUPS check above ought to have caught this. delete($state->{$group_id}); log_debug("Group ${group_id} dropped: No members left (late)"); } else { # remove redundant fields remove_if_empty($group_data, 'out-of-date'); for my $metadata_field (qw(component maintainer uploaders)) { remove_if_empty($group_data->{'mirror-metadata'}, $metadata_field); } remove_if_empty($group_data, 'mirror-metadata'); } return; } # Helper for local_mirror_manifests - it parses a paragraph from Sources file sub _parse_srcs_pg { my ($state, $blacklist, $extra_metadata, $paragraph) = @_; my $dir = $paragraph->{'Directory'}//''; my $group_id = $paragraph->{'Package'} . '/' . $paragraph->{'Version'}; my $member_id = "source:${group_id}"; my (%data, %group_metadata, $group_mirror_md); if (exists $blacklist->{$paragraph->{'Package'}}) { log_debug("Ignoring blacklisted package src:$paragraph->{'Package'}"); return; } # only include the source if it has any binaries to be checked. # - Otherwise we may end up checking a source with no binaries # (happens if the architecture is "behind" in building) return unless $ACTIVE_GROUPS{$group_id}; $dir .= '/' if $dir; foreach my $f (split m/\n/, $paragraph->{"Checksums-${DEFAULT_CHECKSUM}"}){ # trim both ends $f =~ s/^\s+|\s+$//g; next unless $f && $f =~ m/\.dsc$/; my ($checksum, undef, $basename) = split(m/\s++/, $f); my $b64_checksum = encode_base64(pack('H*', $checksum)); # $dir should end with a slash if it is non-empty. $data{lc $DEFAULT_CHECKSUM} = $b64_checksum; $data{'path'} = $extra_metadata->{'mirror-dir'} . "/$dir" . $basename; last; } $group_mirror_md = $group_metadata{'mirror-metadata'} = {}; $group_mirror_md->{'component'} = $extra_metadata->{'component'}; $group_mirror_md->{'maintainer'} = $paragraph->{'maintainer'}; if (my $uploaders = $paragraph->{'uploaders'}) { my @ulist = split(/>\K\s*,\s*/, $uploaders); $group_mirror_md->{'uploaders'} = \@ulist; } add_member_to_group($state, $group_id, $member_id, \%data, \%group_metadata); return; } # Helper for local_mirror_manifests - it parses a paragraph from Packages file sub _parse_pkgs_pg { my ($state, $blacklist, $extra_metadata, $type, $paragraph) = @_; my ($group_id, $member_id, %data, %group_metadata, $b64_checksum); my $package = $paragraph->{'Package'}; my $version = $paragraph->{'Version'}; my $architecture = $paragraph->{'Architecture'}; if (not defined($paragraph->{'Source'})) { $paragraph->{'Source'} = $package; } elsif ($paragraph->{'Source'} =~ /^([-+\.\w]+)\s+\((.+)\)$/) { $paragraph->{'Source'} = $1; $paragraph->{'Source-version'} = $2; } if (exists $blacklist->{$paragraph->{'Source'}}) { log_debug("Ignoring binary package $package: it is part of " . "blacklisted source package $paragraph->{'Source'}"); return; } if (not defined($paragraph->{'Source-Version'})) { $paragraph->{'Source-Version'} = $paragraph->{'Version'}; } $group_id = $paragraph->{'Source'} . '/' . $paragraph->{'Source-Version'}; $member_id = "${type}:${package}/${version}/${architecture}"; $data{'path'} = $extra_metadata->{'mirror-dir'} . '/' . $paragraph->{'filename'}; $b64_checksum = encode_base64(pack('H*', $paragraph->{lc $DEFAULT_CHECKSUM})); $data{lc $DEFAULT_CHECKSUM} = $b64_checksum; $group_metadata{'mirror-metadata'}{'maintainer'} = $paragraph->{'Maintainer'}; if (my $uploaders = $paragraph->{'Uploaders'}) { my @ulist = split(/>\K\s*,\s*/, $uploaders); $group_metadata{'mirror-metadata'}{'uploaders'} = \@ulist; } add_member_to_group($state, $group_id, $member_id, \%data, \%group_metadata); return; } # local_mirror_manifests ($mirdir, $dists, $components, $archs) # # Returns a list of manifests that represents what is on the local mirror # at $mirdir. 3 manifests will be returned, one for "source", one for "binary" # and one for "udeb" packages. They are populated based on the "Sources" and # "Packages" files. # # $mirdir - the path to the local mirror # $dists - listref of dists to consider (e.g. ['unstable']) # $components - listref of components to consider (e.g. ['main', 'contrib', 'non-free']) # $archs - listref of archs to consider (e.g. ['i386', 'amd64']) # $blacklist - hashref of source packages to ignore (e.g. {'gcc-8-cross-ports' => 1}) # sub local_mirror_manifests { my ($state, $mirdir, $dists, $components, $archs, $blacklist) = @_; foreach my $dist (@$dists) { foreach my $component (@{$components}) { my $srcs = "$mirdir/dists/$dist/$component/source/Sources"; my %extra_metadata = ( 'component' => $component, 'mirror-dir' => $mirdir, ); # Binaries have a "per arch" file. # - we check those first and then include the source packages that # are referred to by these binaries. my $dist_path = "$mirdir/dists/$dist/$component"; for my $arch (@{$archs}) { my $pkgs = "${dist_path}/binary-$arch/Packages"; my $pkgfd = _open_data_file($pkgs); local $/ = undef; my $pkgstring = <$pkgfd>; close $pkgfd; my $binsub = sub { _parse_pkgs_pg($state, $blacklist, \%extra_metadata, 'binary', @_); }; visit_dpkg_paragraph_string($binsub, $pkgstring); my $upkgs = "${dist_path}/debian-installer/binary-$arch/Packages"; my $upkgfd = _open_data_file($upkgs); local $/ = undef; my $upkgstring = <$upkgfd>; close $upkgfd; my $udebsub = sub { _parse_pkgs_pg($state, $blacklist, \%extra_metadata, 'udeb', @_); }; visit_dpkg_paragraph_string($udebsub, $upkgstring); } my $srcfd = _open_data_file($srcs); local $/ = undef; my $srcstring = <$srcfd>; close $srcfd; my $srcsub = sub { _parse_srcs_pg($state, $blacklist, \%extra_metadata, @_) }; visit_dpkg_paragraph_string($srcsub, $srcstring); } } return; } # _open_data_file ($file) # # Opens $file if it exists, otherwise it tries common extensions (i.e. .gz) and opens # that instead. It may pipe the file through an external decompressor, so the returned # file descriptor cannot be assumed to be a file. # # If $file does not exists and no common extensions are found, this dies. It may also # die if it finds a file, but is unable to open it. sub _open_data_file { my ($file) = @_; if (-e $file) { open(my $fd, '<:encoding(UTF-8)', $file); return $fd; } if (-e "${file}.gz") { my $fd = open_gz("${file}.gz"); binmode($fd, 'encoding(UTF-8)'); return $fd; } if (-e "${file}.xz") { open(my $fd, '-|', 'xz', '-dc', "${file}.xz"); binmode($fd, 'encoding(UTF-8)'); return $fd; } die("Cannot find ${file}: file does not exist"); } sub log_debug { if ($OPT{'debug'}) { print "$_[0]\n"; } return; } sub usage { my $cmd = basename($0); my $me = "lintian $cmd"; print < --help Show this text and exit --debug Show/log debugging output --reporting-config FILE Path to the configuration file (listing the archive definitions) [!] --desired-version X The desired "last-processed-by" Lintian version. [!] Arguments marked with [!] are required for a successful run. EOF exit(0); } 1; # Local Variables: # indent-tabs-mode: nil # cperl-indent-level: 4 # End: # vim: syntax=perl sw=4 sts=4 sr et