diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-14 13:42:30 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-14 13:42:30 +0000 |
commit | 75808db17caf8b960b351e3408e74142f4c85aac (patch) | |
tree | 7989e9c09a4240248bf4658a22208a0a52d991c4 /commands/reporting-sync-state.pm | |
parent | Initial commit. (diff) | |
download | lintian-75808db17caf8b960b351e3408e74142f4c85aac.tar.xz lintian-75808db17caf8b960b351e3408e74142f4c85aac.zip |
Adding upstream version 2.117.0.upstream/2.117.0upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'commands/reporting-sync-state.pm')
-rw-r--r-- | commands/reporting-sync-state.pm | 572 |
1 files changed, 572 insertions, 0 deletions
diff --git a/commands/reporting-sync-state.pm b/commands/reporting-sync-state.pm new file mode 100644 index 0000000..af649a7 --- /dev/null +++ b/commands/reporting-sync-state.pm @@ -0,0 +1,572 @@ +#!/usr/bin/perl -w +# +# reporting-sync-state +# +# Copyright (C) 2018 Chris Lamb <lamby@debian.org> +# +# This program is free software. It is distributed under the terms of +# the GNU General Public License as published by the Free Software +# Foundation; either version 2 of the License, or (at your option) any +# later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, you can find it on the World Wide +# Web at https://www.gnu.org/copyleft/gpl.html, or write to the Free +# Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, +# MA 02110-1301, USA. + +package reporting_sync_state; + +use v5.20; +use warnings; +use utf8; +use autodie; + +use Getopt::Long(); +use File::Basename qw(basename); +use YAML::XS (); +use MIME::Base64 qw(encode_base64); + +use Lintian::Deb822::Parser qw(visit_dpkg_paragraph_string); +use Lintian::Relation::Version qw(versions_comparator); +use Lintian::Reporting::Util qw( + find_backlog + load_state_cache + save_state_cache +); +use Lintian::Util qw( + open_gz +); + +my $DEFAULT_CHECKSUM = 'Sha256'; +my (%KNOWN_MEMBERS, %ACTIVE_GROUPS); +my $CONFIG; +my %OPT; +my %OPT_HASH= ( + 'reporting-config=s'=> \$OPT{'reporting-config'}, + 'desired-version=s' => \$OPT{'desired-version'}, + 'reschedule-all' => \$OPT{'reschedule-all'}, + 'help|h' => \&usage, + 'debug|d' => \$OPT{'debug'}, + 'dry-run' => \$OPT{'dry-run'}, +); + +sub check_parameters { + for my $parameter (qw(reporting-config desired-version)) { + if (not defined($OPT{$parameter})) { + die( "Missing required parameter \"--${parameter}\"" + . "(use --help for more info)\n"); + } + } + if (not $OPT{'reporting-config'} or not -f $OPT{'reporting-config'}) { + die("The --reporting-config parameter must point to an existing file\n" + ); + } + return; +} + +sub required_cfg_value { + my (@keys) = @_; + my $v = $CONFIG; + for my $key (@keys) { + if (not exists($v->{$key})) { + my $k = join('.', @keys); + die("Missing required config parameter: ${k}\n"); + } + $v = $v->{$key}; + } + return $v; +} + +sub required_cfg_non_empty_list_value { + my (@keys) = @_; + my $v = required_cfg_value(@keys); + if (not defined($v) or ref($v) ne 'ARRAY' or scalar(@{$v}) < 1) { + my $k = join('.', @keys); + die("Invalid configuration: ${k} must be a non-empty list\n"); + } + return $v; +} + +sub optional_cfg_list_value { + my (@keys) = @_; + my $v = $CONFIG; + for my $key (@keys) { + if (not exists($v->{$key})) { + return []; + } + $v = $v->{$key}; + } + if (ref($v) ne 'ARRAY') { + my $k = join('.', @keys); + die("Invalid configuration: ${k} must be a list (or missing)\n"); + } + return $v; +} + +sub main { + my ($state_dir, $state, $archives, %blacklist); + STDOUT->autoflush; + Getopt::Long::config('bundling', 'no_getopt_compat', 'no_auto_abbrev'); + Getopt::Long::GetOptions(%OPT_HASH) or die("error parsing options\n"); + check_parameters(); + $CONFIG = YAML::XS::LoadFile($OPT{'reporting-config'}); + $state_dir = required_cfg_value('storage', 'state-cache'); + $state = load_state_cache($state_dir); + + if (upgrade_state_cache_if_needed($state)) { + log_debug('Updated the state cache'); + } + log_debug('Initial state had ' + . (scalar(keys(%{$state->{'groups'}}))) + . ' groups'); + $archives = required_cfg_value('archives'); + %blacklist = map { $_ => 1 } @{optional_cfg_list_value('blacklist')}; + for my $archive (sort(keys(%{$archives}))) { + log_debug("Processing archive $archive"); + my $path = required_cfg_value('archives', $archive, 'base-dir'); + my $archs = required_cfg_non_empty_list_value('archives', $archive, + 'architectures'); + my $components + = required_cfg_non_empty_list_value('archives', $archive, + 'components'); + my $distributions + = required_cfg_non_empty_list_value('archives', $archive, + 'distributions'); + local_mirror_manifests($state, $path, $distributions, $components, + $archs, \%blacklist); + } + + cleanup_state($state); + if (not $OPT{'dry-run'}) { + save_state_cache($state_dir, $state); + } + exit(0); +} + +# State: +# group-id => { +# 'last-processed-by' => <version or undef>, +# 'out-of-date' => <0|1>, (# if omitted => 0, unless "last-processed-by" is omitted as well) +# 'members' => { +# $member_id => { +# 'sha1' => <sha1>, +# 'path' => <path/relative/to/mirror>, +# } +# }, + +sub upgrade_state_cache_if_needed { + my ($state) = @_; + if (exists($state->{'groups'})) { + my $updated = 0; + my $groups = $state->{'groups'}; + for my $group (sort(keys(%{$groups}))) { + my $group_data = $groups->{$group}; + if ( exists($group_data->{'mirror-metadata'}) + && exists($group_data->{'mirror-metadata'}{'area'})) { + if (not exists($group_data->{'mirror-metadata'}{'component'})){ + $group_data->{'mirror-metadata'}{'component'} + = $group_data->{'mirror-metadata'}{'area'}; + delete($group_data->{'mirror-metadata'}{'area'}); + $updated = 1; + } + } + } + return $updated; + } + # Migrate the "last-processed-by" version. + my $groups = $state->{'groups'} = {}; + for my $key (sort(keys(%${state}))) { + next if $key eq 'group'; + if (exists($state->{$key}{'last-processed-by'})) { + my $last_version = $state->{$key}{'last-processed-by'}; + delete($state->{$key}); + $groups->{$key}{'last-processed-by'} = $last_version; + } + } + return 1; +} + +sub add_member_to_group { + my ($state, $group_id, $member_id, $member_data, $group_metadata) = @_; + # Fetch members before group_data (relying on autovivification) + my $members = $state->{'groups'}{$group_id}{'members'}; + my $group_data = $state->{'groups'}{$group_id}; + my $member; + my $new_group = 0; + if (not defined($members)) { + $group_data->{'members'} = $members = {}; + log_debug( + "${group_id} is out-of-date: New group (triggered by ${member_id})" + ); + $new_group = 1; + } + + $member = $members->{$member_id}; + if (not defined($member)) { + $members->{$member_id} = $member = {}; + } + + # Update of path is not sufficient to consider the member out of date + # (mirror restructuring happens - although extremely rarely) + $member->{'path'} = $member_data->{'path'}; + if ($member_data->{'mirror-metadata'} + && keys(%{$member_data->{'mirror-metadata'}})) { + $member->{'mirror-metadata'} = $member_data->{'mirror-metadata'}; + } + if (not exists($group_data->{'mirror-metadata'})) { + $group_data->{'mirror-metadata'}= $group_metadata->{'mirror-metadata'}; + } else { + for my $key (keys(%{$group_metadata->{'mirror-metadata'}})) { + $group_data->{'mirror-metadata'}{$key} + = $group_metadata->{'mirror-metadata'}{$key}; + } + } + $KNOWN_MEMBERS{"${group_id} ${member_id}"} = 1; + $ACTIVE_GROUPS{$group_id} = 1; + + if (!exists($member->{lc $DEFAULT_CHECKSUM}) + || $member->{lc $DEFAULT_CHECKSUM} ne + $member_data->{lc $DEFAULT_CHECKSUM}) { + if (exists($member->{lc $DEFAULT_CHECKSUM})) { + # This seems worth a note even if the group is already out of date + my $lowercase = lc $DEFAULT_CHECKSUM; + log_debug( + "${group_id} is out-of-date: ${member_id} checksum mismatch" + . " ($member->{$lowercase} != $member_data->{$lowercase})"); + } elsif (not $group_data->{'out-of-date'} and not $new_group) { + log_debug("${group_id} is out-of-date: New member (${member_id})"); + } + $group_data->{'out-of-date'} = 1; + $member->{lc $DEFAULT_CHECKSUM} = $member_data->{lc $DEFAULT_CHECKSUM}; + } + delete($member->{'sha1'}); + + return; +} + +sub cleanup_state { + my ($state) = @_; + my %backlog + = map { $_ => 1 } find_backlog($OPT{'desired-version'}, $state); + + # Empty 'members-to-groups' to prune "dead links". It will be + # recreated by cleanup_group_state below. + $state->{'members-to-groups'} = {}; + + for my $group_id (sort(keys(%{$state->{'groups'}}))) { + cleanup_group_state($state, $group_id, \%backlog); + } + return; +} + +sub remove_if_empty { + my ($hash_ref, $key) = @_; + my ($val, $empty); + return if not exists($hash_ref->{$key}); + $val = $hash_ref->{$key}; + if (defined($val)) { + $empty = 1 if (ref($val) eq 'HASH' and not keys(%${val})); + $empty = 1 if (ref($val) eq 'ARRAY' and not scalar(@${val})); + } else { + $empty = 1; + } + delete($hash_ref->{$key}) if $empty; + return; +} + +sub cleanup_group_state { + my ($state, $group_id, $backlog) = @_; + my ($members); + my $group_data = $state->{'groups'}{$group_id}; + $members = $group_data->{'members'}; + if (not exists($ACTIVE_GROUPS{$group_id}) or not $members) { + # No members left, remove the group entirely + delete($state->{'groups'}{$group_id}); + if (not exists($ACTIVE_GROUPS{$group_id})) { + log_debug("Group ${group_id} dropped: It is not an active group"); + } else { + log_debug("Group ${group_id} dropped: No members left (early)"); + } + + return; + } + + for my $member_id (sort(keys(%{$members}))) { + if (not exists($KNOWN_MEMBERS{"${group_id} ${member_id}"})) { + delete($members->{$member_id}); + if (not $group_data->{'out-of-date'}) { + $group_data->{'out-of-date'} = 1; + log_debug( + "${group_id} is out-of-date: ${member_id} disappeared"); + } + } else { + my $member_data = $members->{$member_id}; + # Create "member_id to group_data" link + $state->{'members-to-groups'}{$member_id} = $group_data; + delete($member_data->{'mirror-metadata'}{'component'}) + if exists($member_data->{'mirror-metadata'}); + remove_if_empty($member_data, 'mirror-metadata'); + } + } + + # Add the "out-of-date" flag if it is in the backlog OR we were asked + # to reschedule all + if (not $group_data->{'out-of-date'}) { + if ($OPT{'reschedule-all'} or $backlog->{$group_id}) { + $group_data->{'out-of-date'} = 1; + log_debug("Marking ${group_id} as out of date: In backlog") + if $backlog->{$group_id}; + } + } + # Check for and possible clear the error counters + if ( + exists($group_data->{'processing-errors'}) + and (not exists($group_data->{'last-error-by'}) + or $group_data->{'last-error-by'} ne $OPT{'desired-version'}) + ) { + log_debug( + "Clearing error-counter for ${group_id}: New version of lintian"); + delete($group_data->{'processing-errors'}); + # Leave "last-error-by" as we can use that to tell if the previous + # version triggered errors. + } + + if (not %{$members}) { + # No members left, remove the group entirely. This should not happen + # as the ACTIVE_GROUPS check above ought to have caught this. + delete($state->{$group_id}); + log_debug("Group ${group_id} dropped: No members left (late)"); + } else { + # remove redundant fields + remove_if_empty($group_data, 'out-of-date'); + for my $metadata_field (qw(component maintainer uploaders)) { + remove_if_empty($group_data->{'mirror-metadata'}, $metadata_field); + } + remove_if_empty($group_data, 'mirror-metadata'); + } + + return; +} + +# Helper for local_mirror_manifests - it parses a paragraph from Sources file +sub _parse_srcs_pg { + my ($state, $blacklist, $extra_metadata, $paragraph) = @_; + my $dir = $paragraph->{'Directory'}//''; + my $group_id = $paragraph->{'Package'} . '/' . $paragraph->{'Version'}; + my $member_id = "source:${group_id}"; + my (%data, %group_metadata, $group_mirror_md); + if (exists $blacklist->{$paragraph->{'Package'}}) { + log_debug("Ignoring blacklisted package src:$paragraph->{'Package'}"); + return; + } + # only include the source if it has any binaries to be checked. + # - Otherwise we may end up checking a source with no binaries + # (happens if the architecture is "behind" in building) + return unless $ACTIVE_GROUPS{$group_id}; + $dir .= '/' if $dir; + foreach my $f (split m/\n/, $paragraph->{"Checksums-${DEFAULT_CHECKSUM}"}){ + + # trim both ends + $f =~ s/^\s+|\s+$//g; + + next unless $f && $f =~ m/\.dsc$/; + my ($checksum, undef, $basename) = split(m/\s++/, $f); + my $b64_checksum = encode_base64(pack('H*', $checksum)); + # $dir should end with a slash if it is non-empty. + $data{lc $DEFAULT_CHECKSUM} = $b64_checksum; + $data{'path'} = $extra_metadata->{'mirror-dir'} . "/$dir" . $basename; + last; + } + + $group_mirror_md = $group_metadata{'mirror-metadata'} = {}; + $group_mirror_md->{'component'} = $extra_metadata->{'component'}; + $group_mirror_md->{'maintainer'} = $paragraph->{'maintainer'}; + if (my $uploaders = $paragraph->{'uploaders'}) { + my @ulist = split(/>\K\s*,\s*/, $uploaders); + $group_mirror_md->{'uploaders'} = \@ulist; + } + + add_member_to_group($state, $group_id, $member_id, \%data, + \%group_metadata); + return; +} + +# Helper for local_mirror_manifests - it parses a paragraph from Packages file +sub _parse_pkgs_pg { + my ($state, $blacklist, $extra_metadata, $type, $paragraph) = @_; + my ($group_id, $member_id, %data, %group_metadata, $b64_checksum); + my $package = $paragraph->{'Package'}; + my $version = $paragraph->{'Version'}; + my $architecture = $paragraph->{'Architecture'}; + if (not defined($paragraph->{'Source'})) { + $paragraph->{'Source'} = $package; + } elsif ($paragraph->{'Source'} =~ /^([-+\.\w]+)\s+\((.+)\)$/) { + $paragraph->{'Source'} = $1; + $paragraph->{'Source-version'} = $2; + } + if (exists $blacklist->{$paragraph->{'Source'}}) { + log_debug("Ignoring binary package $package: it is part of " + . "blacklisted source package $paragraph->{'Source'}"); + return; + } + if (not defined($paragraph->{'Source-Version'})) { + $paragraph->{'Source-Version'} = $paragraph->{'Version'}; + } + $group_id = $paragraph->{'Source'} . '/' . $paragraph->{'Source-Version'}; + $member_id = "${type}:${package}/${version}/${architecture}"; + $data{'path'} + = $extra_metadata->{'mirror-dir'} . '/' . $paragraph->{'filename'}; + $b64_checksum + = encode_base64(pack('H*', $paragraph->{lc $DEFAULT_CHECKSUM})); + $data{lc $DEFAULT_CHECKSUM} = $b64_checksum; + + $group_metadata{'mirror-metadata'}{'maintainer'} + = $paragraph->{'Maintainer'}; + if (my $uploaders = $paragraph->{'Uploaders'}) { + my @ulist = split(/>\K\s*,\s*/, $uploaders); + $group_metadata{'mirror-metadata'}{'uploaders'} = \@ulist; + } + + add_member_to_group($state, $group_id, $member_id, \%data, + \%group_metadata); + return; +} + +# local_mirror_manifests ($mirdir, $dists, $components, $archs) +# +# Returns a list of manifests that represents what is on the local mirror +# at $mirdir. 3 manifests will be returned, one for "source", one for "binary" +# and one for "udeb" packages. They are populated based on the "Sources" and +# "Packages" files. +# +# $mirdir - the path to the local mirror +# $dists - listref of dists to consider (e.g. ['unstable']) +# $components - listref of components to consider (e.g. ['main', 'contrib', 'non-free']) +# $archs - listref of archs to consider (e.g. ['i386', 'amd64']) +# $blacklist - hashref of source packages to ignore (e.g. {'gcc-8-cross-ports' => 1}) +# +sub local_mirror_manifests { + my ($state, $mirdir, $dists, $components, $archs, $blacklist) = @_; + foreach my $dist (@$dists) { + foreach my $component (@{$components}) { + my $srcs = "$mirdir/dists/$dist/$component/source/Sources"; + + my %extra_metadata = ( + 'component' => $component, + 'mirror-dir' => $mirdir, + ); + + # Binaries have a "per arch" file. + # - we check those first and then include the source packages that + # are referred to by these binaries. + + my $dist_path = "$mirdir/dists/$dist/$component"; + for my $arch (@{$archs}) { + + my $pkgs = "${dist_path}/binary-$arch/Packages"; + my $pkgfd = _open_data_file($pkgs); + local $/ = undef; + my $pkgstring = <$pkgfd>; + close $pkgfd; + + my $binsub = sub { + _parse_pkgs_pg($state, $blacklist, \%extra_metadata, + 'binary', @_); + }; + visit_dpkg_paragraph_string($binsub, $pkgstring); + + my $upkgs + = "${dist_path}/debian-installer/binary-$arch/Packages"; + my $upkgfd = _open_data_file($upkgs); + local $/ = undef; + my $upkgstring = <$upkgfd>; + close $upkgfd; + + my $udebsub = sub { + _parse_pkgs_pg($state, $blacklist, \%extra_metadata, + 'udeb', @_); + }; + visit_dpkg_paragraph_string($udebsub, $upkgstring); + } + + my $srcfd = _open_data_file($srcs); + local $/ = undef; + my $srcstring = <$srcfd>; + close $srcfd; + + my $srcsub + = sub { _parse_srcs_pg($state, $blacklist, \%extra_metadata, @_) }; + visit_dpkg_paragraph_string($srcsub, $srcstring); + } + } + return; +} + +# _open_data_file ($file) +# +# Opens $file if it exists, otherwise it tries common extensions (i.e. .gz) and opens +# that instead. It may pipe the file through an external decompressor, so the returned +# file descriptor cannot be assumed to be a file. +# +# If $file does not exists and no common extensions are found, this dies. It may also +# die if it finds a file, but is unable to open it. +sub _open_data_file { + my ($file) = @_; + if (-e $file) { + open(my $fd, '<:encoding(UTF-8)', $file); + return $fd; + } + if (-e "${file}.gz") { + my $fd = open_gz("${file}.gz"); + binmode($fd, 'encoding(UTF-8)'); + return $fd; + } + if (-e "${file}.xz") { + open(my $fd, '-|', 'xz', '-dc', "${file}.xz"); + binmode($fd, 'encoding(UTF-8)'); + return $fd; + } + die("Cannot find ${file}: file does not exist"); +} + +sub log_debug { + if ($OPT{'debug'}) { + print "$_[0]\n"; + } + return; +} + +sub usage { + my $cmd = basename($0); + my $me = "lintian $cmd"; + print <<EOF; +Internal command for the Lintian reporting framework +Usage: $me <args> + + --help Show this text and exit + --debug Show/log debugging output + + --reporting-config FILE + Path to the configuration file (listing the archive definitions) [!] + --desired-version X The desired "last-processed-by" Lintian version. [!] + +Arguments marked with [!] are required for a successful run. + +EOF + + exit(0); +} + +1; + +# Local Variables: +# indent-tabs-mode: nil +# cperl-indent-level: 4 +# End: +# vim: syntax=perl sw=4 sts=4 sr et |