diff options
Diffstat (limited to '')
30 files changed, 8220 insertions, 0 deletions
diff --git a/lib/Lintian/Data.pm b/lib/Lintian/Data.pm new file mode 100644 index 0000000..6a0b227 --- /dev/null +++ b/lib/Lintian/Data.pm @@ -0,0 +1,354 @@ +# Copyright (C) 2011 Niels Thykier <niels@thykier.net> +# Copyright (C) 2018 Chris Lamb <lamby@debian.org> +# Copyright (C) 2021 Felix Lechner +# Copyright (C) 2022 Axel Beckert +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, you can find it on the World Wide +# Web at https://www.gnu.org/copyleft/gpl.html, or write to the Free +# Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, +# MA 02110-1301, USA. + +package Lintian::Data; + +use v5.20; +use warnings; +use utf8; + +use Carp qw(croak); +use Unicode::UTF8 qw(encode_utf8); + +use Lintian::Data::Architectures; +use Lintian::Data::Archive::AutoRejection; +use Lintian::Data::Archive::Sections; +use Lintian::Data::Buildflags::Hardening; +use Lintian::Data::Debhelper::Addons; +use Lintian::Data::Debhelper::Commands; +use Lintian::Data::Debhelper::Levels; +use Lintian::Data::Fonts; +use Lintian::Data::InitD::VirtualFacilities; +use Lintian::Data::Policy::Releases; +use Lintian::Data::Provides::MailTransportAgent; +use Lintian::Data::Stylesheet; +use Lintian::Data::Traditional; + +use Moo; +use namespace::clean; + +with 'Lintian::Data::Authorities'; + +=head1 NAME + +Lintian::Data - Data parser for Lintian + +=head1 SYNOPSIS + + my $profile = Lintian::Data->new (vendor => 'debian'); + +=head1 DESCRIPTION + +Lintian::Data handles finding, parsing and implementation of Lintian Data + +=head1 INSTANCE METHODS + +=over 4 + +=item vendor + +=item data_paths + +=item data_cache + +=cut + +has vendor => (is => 'rw'); + +has data_paths => ( + is => 'rw', + coerce => sub { my ($arrayref) = @_; return ($arrayref // []); }, + default => sub { [] } +); + +has data_cache => ( + is => 'rw', + coerce => sub { my ($hashref) = @_; return ($hashref // {}); }, + default => sub { {} } +); + +=item load + +=cut + +sub load { + my ($self, $location, $separator) = @_; + + croak encode_utf8('no data type specified') + unless $location; + + unless (exists $self->data_cache->{$location}) { + + my $cache = Lintian::Data::Traditional->new; + $cache->location($location); + $cache->separator($separator); + + $cache->load($self->data_paths, $self->vendor); + + $self->data_cache->{$location} = $cache; + } + + return $self->data_cache->{$location}; +} + +=item all_sources + +=cut + +sub all_sources { + my ($self) = @_; + + my @sources = ( + $self->architectures,$self->auto_rejection, + $self->debhelper_addons,$self->debhelper_commands, + $self->debhelper_levels,$self->fonts, + $self->hardening_buildflags,$self->mail_transport_agents, + $self->policy_releases,$self->sections, + #$self->style_sheet, + $self->virtual_initd_facilities + ); + + return @sources; +} + +=item architectures + +=cut + +has architectures => ( + is => 'rw', + lazy => 1, + default => sub { + my ($self) = @_; + + my $architectures = Lintian::Data::Architectures->new; + $architectures->load($self->data_paths, $self->vendor); + + return $architectures; + } +); + +=item auto_rejection + +=cut + +has auto_rejection => ( + is => 'rw', + lazy => 1, + default => sub { + my ($self) = @_; + + my $auto_rejection = Lintian::Data::Archive::AutoRejection->new; + $auto_rejection->load($self->data_paths, $self->vendor); + + return $auto_rejection; + } +); + +=item debhelper_addons + +=cut + +has debhelper_addons => ( + is => 'rw', + lazy => 1, + default => sub { + my ($self) = @_; + + my $addons = Lintian::Data::Debhelper::Addons->new; + $addons->load($self->data_paths, $self->vendor); + + return $addons; + } +); + +=item debhelper_commands + +=cut + +has debhelper_commands => ( + is => 'rw', + lazy => 1, + default => sub { + my ($self) = @_; + + my $commands = Lintian::Data::Debhelper::Commands->new; + $commands->load($self->data_paths, $self->vendor); + + return $commands; + } +); + +=item debhelper_levels + +=cut + +has debhelper_levels => ( + is => 'rw', + lazy => 1, + default => sub { + my ($self) = @_; + + my $levels = Lintian::Data::Debhelper::Levels->new; + $levels->load($self->data_paths, $self->vendor); + + return $levels; + } +); + +=item fonts + +=cut + +has fonts => ( + is => 'rw', + lazy => 1, + default => sub { + my ($self) = @_; + + my $fonts = Lintian::Data::Fonts->new; + $fonts->load($self->data_paths, $self->vendor); + + return $fonts; + } +); + +=item hardening_buildflags + +=cut + +has hardening_buildflags => ( + is => 'rw', + lazy => 1, + default => sub { + my ($self) = @_; + + my $buildflags = Lintian::Data::Buildflags::Hardening->new; + $buildflags->load($self->data_paths, $self->vendor); + + return $buildflags; + } +); + +=item mail_transport_agents + +=cut + +has mail_transport_agents => ( + is => 'rw', + lazy => 1, + default => sub { + my ($self) = @_; + + my $manual = Lintian::Data::Provides::MailTransportAgent->new; + $manual->load($self->data_paths, $self->vendor); + + return $manual; + } +); + +=item policy_releases + +=cut + +has policy_releases => ( + is => 'rw', + lazy => 1, + default => sub { + my ($self) = @_; + + my $releases = Lintian::Data::Policy::Releases->new; + $releases->load($self->data_paths, $self->vendor); + + return $releases; + } +); + +=item sections + +=cut + +has sections => ( + is => 'rw', + lazy => 1, + default => sub { + my ($self) = @_; + + my $sections = Lintian::Data::Archive::Sections->new; + $sections->load($self->data_paths, $self->vendor); + + return $sections; + } +); + +=item style_sheet + +=cut + +has style_sheet => ( + is => 'rw', + lazy => 1, + default => sub { + my ($self) = @_; + + my $stylesheets = Lintian::Data::Stylesheet->new; + $stylesheets->load($self->data_paths, $self->vendor); + + return $stylesheets; + } +); + +=item virtual_initd_facilities + +=cut + +has virtual_initd_facilities => ( + is => 'rw', + lazy => 1, + default => sub { + my ($self) = @_; + + my $facilities = Lintian::Data::InitD::VirtualFacilities->new; + $facilities->load($self->data_paths, $self->vendor); + + return $facilities; + } +); + +=back + +=head1 AUTHOR + +Originally written by Niels Thykier <niels@thykier.net> for Lintian. + +=head1 SEE ALSO + +lintian(1) + +=cut + +1; + +# Local Variables: +# indent-tabs-mode: nil +# cperl-indent-level: 4 +# End: +# vim: syntax=perl sw=4 sts=4 sr et diff --git a/lib/Lintian/Data/Architectures.pm b/lib/Lintian/Data/Architectures.pm new file mode 100644 index 0000000..c45ced4 --- /dev/null +++ b/lib/Lintian/Data/Architectures.pm @@ -0,0 +1,441 @@ +# -*- perl -*- + +# Copyright (C) 2011-2012 Niels Thykier <niels@thykier.net> +# - Based on a shell script by Raphael Geissert <atomo64@gmail.com> +# Copyright (C) 2020-2021 Felix Lechner +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see <http://www.gnu.org/licenses/>. + +package Lintian::Data::Architectures; + +use v5.20; +use warnings; +use utf8; + +use Const::Fast; +use List::SomeUtils qw(first_value); +use Unicode::UTF8 qw(decode_utf8); + +use Lintian::IPC::Run3 qw(safe_qx); + +const my $EMPTY => q{}; +const my $SLASH => q{/}; + +const my $HOST_VARIABLES => q{host_variables}; + +use Moo; +use namespace::clean; + +with 'Lintian::Data::PreambledJSON'; + +=encoding utf-8 + +=head1 NAME + +Lintian::Data::Architectures -- Lintian API for handling architectures and wildcards + +=head1 SYNOPSIS + + use Lintian::Data::Architectures; + +=head1 DESCRIPTION + +Lintian API for checking and expanding architectures and architecture +wildcards. The functions are backed by a L<data|Lintian::Data> file, +so it may be out of date (use private/refresh-archs to update it). + +Generally all architecture names are in the format "$os-$architecture" and +wildcards are "$os-any" or "any-$cpu", though there are exceptions: + +Note that the architecture and cpu name are not always identical +(example architecture "armhf" has cpu name "arm"). + +=head1 INSTANCE METHODS + +=over 4 + +=item title + +=item location + +=item host_variables + +=item C<wildcards> + +=item C<names> + +=cut + +has title => ( + is => 'rw', + default => 'DEB_HOST_* Variables from Dpkg' +); + +has location => ( + is => 'rw', + default => 'architectures/host.json' +); + +has host_variables => ( + is => 'rw', + default => sub { {} }, + coerce => sub { my ($hashref) = @_; return ($hashref // {}); } +); + +has deb_host_multiarch => ( + is => 'rw', + lazy => 1, + coerce => sub { my ($hashref) = @_; return ($hashref // {}); }, + default => sub { + my ($self) = @_; + + my %deb_host_multiarch; + + $deb_host_multiarch{$_} + = $self->host_variables->{$_}{DEB_HOST_MULTIARCH} + for keys %{$self->host_variables}; + + return \%deb_host_multiarch; + } +); + +# The list of directories searched by default by the dynamic linker. +# Packages installing shared libraries into these directories must call +# ldconfig, must have shlibs files, and must ensure those libraries have +# proper SONAMEs. +# +# Directories listed here must not have leading slashes. +# +# On the topic of multi-arch dirs. Hopefully including the ones not +# native to the local platform won't hurt. +# +# See Bug#469301 and Bug#464796 for more details. +# +has ldconfig_folders => ( + is => 'rw', + lazy => 1, + coerce => sub { my ($arrayref) = @_; return ($arrayref // {}); }, + default => sub { + my ($self) = @_; + + my @multiarch = values %{$self->deb_host_multiarch}; + my @ldconfig_folders = map { ("lib/$_", "usr/lib/$_") } @multiarch; + + my @always = qw{ + lib + lib32 + lib64 + libx32 + usr/lib + usr/lib32 + usr/lib64 + usr/libx32 + usr/local/lib + }; + push(@ldconfig_folders, @always); + + my @with_slash = map { $_ . $SLASH } @ldconfig_folders; + + return \@with_slash; + } +); + +# Valid architecture wildcards. +has wildcards => ( + is => 'rw', + lazy => 1, + coerce => sub { my ($hashref) = @_; return ($hashref // {}); }, + default => sub { + my ($self) = @_; + + my %wildcards; + + for my $hyphenated (keys %{$self->host_variables}) { + + my $variables = $self->host_variables->{$hyphenated}; + + # NB: "$os-$cpu" is not always equal to $hyphenated + my $abi = $variables->{DEB_HOST_ARCH_ABI}; + my $libc = $variables->{DEB_HOST_ARCH_LIBC}; + my $os = $variables->{DEB_HOST_ARCH_OS}; + my $cpu = $variables->{DEB_HOST_ARCH_CPU}; + + # map $os-any (e.g. "linux-any") and any-$architecture (e.g. "any-amd64") to + # the relevant architectures. + $wildcards{'any'}{$hyphenated} = 1; + + $wildcards{'any-any'}{$hyphenated} = 1; + $wildcards{"any-$cpu"}{$hyphenated} = 1; + $wildcards{"$os-any"}{$hyphenated} = 1; + + $wildcards{'any-any-any'}{$hyphenated} = 1; + $wildcards{"any-any-$cpu"}{$hyphenated} = 1; + $wildcards{"any-$os-any"}{$hyphenated} = 1; + $wildcards{"any-$os-$cpu"}{$hyphenated} = 1; + $wildcards{"$libc-any-any"}{$hyphenated} = 1; + $wildcards{"$libc-any-$cpu"}{$hyphenated} = 1; + $wildcards{"$libc-$os-any"}{$hyphenated} = 1; + + $wildcards{'any-any-any-any'}{$hyphenated} = 1; + $wildcards{"any-any-any-$cpu"}{$hyphenated} = 1; + $wildcards{"any-any-$os-any"}{$hyphenated} = 1; + $wildcards{"any-any-$os-$cpu"}{$hyphenated} = 1; + $wildcards{"any-$libc-any-any"}{$hyphenated} = 1; + $wildcards{"any-$libc-any-$cpu"}{$hyphenated} = 1; + $wildcards{"any-$libc-$os-any"}{$hyphenated} = 1; + $wildcards{"any-$libc-$os-$cpu"}{$hyphenated} = 1; + $wildcards{"$abi-any-any-any"}{$hyphenated} = 1; + $wildcards{"$abi-any-any-$cpu"}{$hyphenated} = 1; + $wildcards{"$abi-any-$os-any"}{$hyphenated} = 1; + $wildcards{"$abi-any-$os-$cpu"}{$hyphenated} = 1; + $wildcards{"$abi-$libc-any-any"}{$hyphenated} = 1; + $wildcards{"$abi-$libc-any-$cpu"}{$hyphenated} = 1; + $wildcards{"$abi-$libc-$os-any"}{$hyphenated} = 1; + } + + return \%wildcards; + } +); + +# Maps aliases to the "original" arch name. +# (e.g. "linux-amd64" => "amd64") +has names => ( + is => 'rw', + lazy => 1, + coerce => sub { my ($hashref) = @_; return ($hashref // {}); }, + default => sub { + my ($self) = @_; + + my %names; + + for my $hyphenated (keys %{$self->host_variables}) { + + my $variables = $self->host_variables->{$hyphenated}; + + $names{$hyphenated} = $hyphenated; + + # NB: "$os-$cpu" ne $hyphenated in some cases + my $os = $variables->{DEB_HOST_ARCH_OS}; + my $cpu = $variables->{DEB_HOST_ARCH_CPU}; + + if ($os eq 'linux') { + + # Per Policy section 11.1 (3.9.3): + # + #"""[architecture] strings are in the format "os-arch", though + # the OS part is sometimes elided, as when the OS is Linux.""" + # + # i.e. "linux-amd64" and "amd64" are aliases, so handle them + # as such. Currently, dpkg-architecture -L gives us "amd64" + # but in case it changes to "linux-amd64", we are prepared. + + if ($hyphenated =~ /^linux-/) { + # It may be temping to use $cpu here, but it does not work + # for (e.g.) arm based architectures. Instead extract the + # "short" architecture name from $hyphenated + my (undef, $short) = split(/-/, $hyphenated, 2); + $names{$short} = $hyphenated; + + } else { + # short string in $hyphenated + my $long = "$os-$hyphenated"; + $names{$long} = $hyphenated; + } + } + } + + return \%names; + } +); + +=item is_wildcard ($wildcard) + +Returns a truth value if $wildcard is a known architecture wildcard. + +Note: 'any' is considered a wildcard and not an architecture. + +=cut + +sub is_wildcard { + my ($self, $wildcard) = @_; + + return exists $self->wildcards->{$wildcard}; +} + +=item is_release_architecture ($architecture) + +Returns a truth value if $architecture is (an alias of) a Debian machine +architecture. It returns a false value for +architecture wildcards (including "any") and unknown architectures. + +=cut + +sub is_release_architecture { + my ($self, $architecture) = @_; + + return exists $self->names->{$architecture}; +} + +=item expand_wildcard ($wildcard) + +Returns a list of architectures that this wildcard expands to. No +order is guaranteed (even between calls). Returned values must not be +modified. + +Note: This list is based on the architectures in Lintian's data file. +However, many of these are not supported or used in Debian or any of +its derivatives. + +The returned values matches the list generated by dpkg-architecture -L, +so the returned list may use (e.g.) "amd64" for "linux-amd64". + +=cut + +sub expand_wildcard { + my ($self, $wildcard) = @_; + + return keys %{ $self->wildcards->{$wildcard} // {} }; +} + +=item wildcard_includes ($wildcard, $architecture) + +Returns a truth value if $architecture is included in the list of +architectures that $wildcard expands to. + +This is generally faster than + + grep { $_ eq $architecture } expand_arch_wildcard ($wildcard) + +It also properly handles cases like "linux-amd64" and "amd64" being +aliases. + +=cut + +sub wildcard_includes { + my ($self, $wildcard, $architecture) = @_; + + $architecture = $self->names->{$architecture} + if exists $self->names->{$architecture}; + + return exists $self->wildcards->{$wildcard}{$architecture}; +} + +=item valid_restriction + +=cut + +sub valid_restriction { + my ($self, $restriction) = @_; + + # strip any negative prefix + $restriction =~ s/^!//; + + return + $self->is_release_architecture($restriction) + || $self->is_wildcard($restriction) + || $restriction eq 'all'; +} + +=item restriction_matches + +=cut + +sub restriction_matches { + my ($self, $restriction, $architecture) = @_; + + # look for negative prefix and strip + my $match_wanted = !($restriction =~ s/^!//); + + return $match_wanted + if $restriction eq $architecture; + + return $match_wanted + if $self->is_wildcard($restriction) + && $self->wildcard_includes($restriction, $architecture); + + return !$match_wanted; +} + +=item load + +=cut + +sub load { + my ($self, $search_space, $our_vendor) = @_; + + my @candidates = map { $_ . $SLASH . $self->location } @{$search_space}; + my $path = first_value { -e } @candidates; + + my $host_variables; + + return 0 + unless $self->read_file($path, \$host_variables); + + $self->host_variables($host_variables); + + return 1; +} + +=item refresh + +=cut + +sub refresh { + my ($self, $archive, $basedir) = @_; + + local $ENV{LC_ALL} = 'C'; + delete local $ENV{DEB_HOST_ARCH}; + + my @architectures + = split(/\n/, decode_utf8(safe_qx(qw{dpkg-architecture --list-known}))); + chomp for @architectures; + + my %host_variables; + for my $architecture (@architectures) { + + my @lines= split( + /\n/, + decode_utf8( + safe_qx(qw{dpkg-architecture --host-arch}, $architecture) + ) + ); + + for my $line (@lines) { + my ($key, $value) = split(/=/, $line, 2); + + $host_variables{$architecture}{$key} = $value + if $key =~ /^DEB_HOST_/; + } + } + + $self->cargo('host_variables'); + + my $data_path = "$basedir/" . $self->location; + my $status + = $self->write_file($HOST_VARIABLES, \%host_variables, $data_path); + + return $status; +} + +=back + +=cut + +1; + +# Local Variables: +# indent-tabs-mode: nil +# cperl-indent-level: 4 +# End: +# vim: syntax=perl sw=4 sts=4 sr et diff --git a/lib/Lintian/Data/Archive/AutoRejection.pm b/lib/Lintian/Data/Archive/AutoRejection.pm new file mode 100644 index 0000000..d05ae51 --- /dev/null +++ b/lib/Lintian/Data/Archive/AutoRejection.pm @@ -0,0 +1,154 @@ +# -*- perl -*- +# +# Copyright (C) 2021 Felix Lechner +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see <http://www.gnu.org/licenses/>. + +package Lintian::Data::Archive::AutoRejection; + +use v5.20; +use warnings; +use utf8; + +use Carp qw(carp); +use Const::Fast; +use HTTP::Tiny; +use List::SomeUtils qw(first_value uniq); +use Path::Tiny; +use Unicode::UTF8 qw(encode_utf8); +use YAML::XS qw(LoadFile); + +const my $EMPTY => q{}; +const my $SLASH => q{/}; + +use Moo; +use namespace::clean; + +=head1 NAME + +Lintian::Data::Archive::AutoRejection - Lintian interface to the archive's auto-rejection tags + +=head1 SYNOPSIS + + use Lintian::Data::Archive::AutoRejection; + +=head1 DESCRIPTION + +This module provides a way to load data files for the archive's auto-rejection tags + +=head1 INSTANCE METHODS + +=over 4 + +=item title + +=item location + +=item certain + +=item preventable + +=cut + +has title => ( + is => 'rw', + default => 'Archive Auto-Rejection Tags' +); + +has location => ( + is => 'rw', + default => 'archive/auto-rejection.yaml' +); + +has certain => (is => 'rw', default => sub { [] }); +has preventable => (is => 'rw', default => sub { [] }); + +=item load + +=cut + +sub load { + my ($self, $search_space, $our_vendor) = @_; + + my @candidates = map { $_ . $SLASH . $self->location } @{$search_space}; + my $path = first_value { -e } @candidates; + + unless (length $path) { + carp encode_utf8('Unknown data file: ' . $self->location); + return; + } + + my $yaml = LoadFile($path); + die encode_utf8('Could not parse YAML file ' . $self->location) + unless defined $yaml; + + my $base = $yaml->{lintian}; + die encode_utf8('Could not parse document base for ' . $self->location) + unless defined $base; + + my @certain = uniq @{ $base->{fatal} // [] }; + my @preventable = uniq @{ $base->{nonfatal} // [] }; + + $self->certain(\@certain); + $self->preventable(\@preventable); + + return; +} + +=item refresh + +=cut + +sub refresh { + my ($self, $archive, $basedir) = @_; + + my $auto_rejection_url + = 'https://ftp-master.debian.org/static/lintian.tags'; + + my $response = HTTP::Tiny->new->get($auto_rejection_url); + die encode_utf8("Failed to get $auto_rejection_url!\n") + unless $response->{success}; + + my $auto_rejection_yaml = $response->{content}; + + my $data_path = "$basedir/" . $self->location; + my $parent_dir = path($data_path)->parent->stringify; + path($parent_dir)->mkpath + unless -e $parent_dir; + + # already in UTF-8 + path($data_path)->spew($auto_rejection_yaml); + + return 1; +} + +=back + +=head1 AUTHOR + +Originally written by Felix Lechner <felix.lechner@lease-up.com> for Lintian. + +=head1 SEE ALSO + +lintian(1) + +=cut + +1; + +# Local Variables: +# indent-tabs-mode: nil +# cperl-indent-level: 4 +# End: +# vim: syntax=perl sw=4 sts=4 sr et diff --git a/lib/Lintian/Data/Archive/Sections.pm b/lib/Lintian/Data/Archive/Sections.pm new file mode 100644 index 0000000..24a99c7 --- /dev/null +++ b/lib/Lintian/Data/Archive/Sections.pm @@ -0,0 +1,133 @@ +# -*- perl -*- +# +# Copyright (C) 2021 Felix Lechner +# Copyright (C) 2022 Axel Beckert +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see <http://www.gnu.org/licenses/>. + +package Lintian::Data::Archive::Sections; + +use v5.20; +use warnings; +use utf8; + +use Carp qw(carp); +use Const::Fast; +use HTTP::Tiny; +use List::SomeUtils qw(first_value uniq); +use Path::Tiny; +use Unicode::UTF8 qw(encode_utf8); +use Lintian::Deb822; + +const my $EMPTY => q{}; +const my $SLASH => q{/}; + +use Moo; +use namespace::clean; + +with 'Lintian::Data::JoinedLines'; + +=head1 NAME + +Lintian::Data::Archive::Sections - Lintian interface to the archive's sections + +=head1 SYNOPSIS + + use Lintian::Data::Archive::Sections; + +=head1 DESCRIPTION + +This module provides a way to load the data file for the archive's section. + +=head1 INSTANCE METHODS + +=over 4 + +=item title + +=cut + +has title => ( + is => 'rw', + default => 'Archive Sections' +); + +=item location + +=cut + +has location => ( + is => 'rw', + default => 'fields/archive-sections' +); + +=item separator + +=cut + +has separator => (is => 'rw'); + +=item refresh + +=cut + +sub refresh { + my ($self, $archive, $basedir) = @_; + + my $sections_url = 'https://metadata.ftp-master.debian.org/sections.822'; + + my $response = HTTP::Tiny->new->get($sections_url); + die encode_utf8("Failed to get $sections_url!\n") + unless $response->{success}; + + my $sections_822 = $response->{content}; + + # TODO: We should probably save this in the original format and + # parse it with Lintian::Deb822 at some time. + my $sections = join("\n", + map { s/^Section: //r } + grep { m{^Section: [^/]*$} } + split(/\n/, $sections_822)) + ."\n"; + + my $data_path = "$basedir/" . $self->location; + my $parent_dir = path($data_path)->parent->stringify; + path($parent_dir)->mkpath + unless -e $parent_dir; + + # already in UTF-8 + path($data_path)->spew($sections); + + return 1; +} + +=back + +=head1 AUTHOR + +Originally written by Axel Beckert <abe@debian.org> for Lintian. + +=head1 SEE ALSO + +lintian(1) + +=cut + +1; + +# Local Variables: +# indent-tabs-mode: nil +# cperl-indent-level: 4 +# End: +# vim: syntax=perl sw=4 sts=4 sr et diff --git a/lib/Lintian/Data/Authorities.pm b/lib/Lintian/Data/Authorities.pm new file mode 100644 index 0000000..fdb77cd --- /dev/null +++ b/lib/Lintian/Data/Authorities.pm @@ -0,0 +1,330 @@ +# Copyright (C) 2011 Niels Thykier <niels@thykier.net> +# Copyright (C) 2018 Chris Lamb <lamby@debian.org> +# Copyright (C) 2021 Felix Lechner +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, you can find it on the World Wide +# Web at https://www.gnu.org/copyleft/gpl.html, or write to the Free +# Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, +# MA 02110-1301, USA. + +package Lintian::Data::Authorities; + +use v5.20; +use warnings; +use utf8; + +use Const::Fast; + +use Lintian::Data::Authority::DebconfSpecification; +use Lintian::Data::Authority::DebianPolicy; +use Lintian::Data::Authority::DeveloperReference; +use Lintian::Data::Authority::DocBaseManual; +use Lintian::Data::Authority::FilesystemHierarchy; +use Lintian::Data::Authority::JavaPolicy; +use Lintian::Data::Authority::LintianManual; +use Lintian::Data::Authority::MenuPolicy; +use Lintian::Data::Authority::MenuManual; +use Lintian::Data::Authority::NewMaintainer; +use Lintian::Data::Authority::PerlPolicy; +use Lintian::Data::Authority::PythonPolicy; +use Lintian::Data::Authority::VimPolicy; + +const my $EMPTY => q{}; + +use Moo::Role; +use namespace::clean; + +=head1 NAME + +Lintian::Data::Authorities - Lintian's Reference Authorities + +=head1 SYNOPSIS + + my $data = Lintian::Data->new; + +=head1 DESCRIPTION + +Lintian::Data::Authorities handles finding, parsing and implementation of Lintian reference authorities + +=head1 INSTANCE METHODS + +=over 4 + +=item markdown_authority_reference + +=cut + +sub markdown_authority_reference { + my ($self, $volume, $section) = @_; + + my @MARKDOWN_CAPABLE = ( + $self->new_maintainer,$self->menu_policy, + $self->perl_policy,$self->python_policy, + $self->java_policy,$self->vim_policy, + $self->lintian_manual,$self->developer_reference, + $self->policy_manual,$self->debconf_specification, + $self->menu_manual,$self->doc_base_manual, + $self->filesystem_hierarchy_standard, + ); + + my %by_shorthand = map { $_->shorthand => $_ } @MARKDOWN_CAPABLE; + + return $EMPTY + unless exists $by_shorthand{$volume}; + + my $manual = $by_shorthand{$volume}; + + return $manual->markdown_citation($section); +} + +=item debconf_specification + +=cut + +has debconf_specification => ( + is => 'rw', + lazy => 1, + default => sub { + my ($self) = @_; + + my $manual = Lintian::Data::Authority::DebconfSpecification->new; + $manual->load($self->data_paths, $self->vendor); + + return $manual; + } +); + +=item developer_reference + +=cut + +has developer_reference => ( + is => 'rw', + lazy => 1, + default => sub { + my ($self) = @_; + + my $manual = Lintian::Data::Authority::DeveloperReference->new; + $manual->load($self->data_paths, $self->vendor); + + return $manual; + } +); + +=item doc_base_manual + +=cut + +has doc_base_manual => ( + is => 'rw', + lazy => 1, + default => sub { + my ($self) = @_; + + my $manual = Lintian::Data::Authority::DocBaseManual->new; + $manual->load($self->data_paths, $self->vendor); + + return $manual; + } +); + +=item filesystem_hierarchy_standard + +=cut + +has filesystem_hierarchy_standard => ( + is => 'rw', + lazy => 1, + default => sub { + my ($self) = @_; + + my $manual= Lintian::Data::Authority::FilesystemHierarchy->new; + $manual->load($self->data_paths, $self->vendor); + + return $manual; + } +); + +=item java_policy + +=cut + +has java_policy => ( + is => 'rw', + lazy => 1, + default => sub { + my ($self) = @_; + + my $manual = Lintian::Data::Authority::JavaPolicy->new; + $manual->load($self->data_paths, $self->vendor); + + return $manual; + } +); + +=item lintian_manual + +=cut + +has lintian_manual => ( + is => 'rw', + lazy => 1, + default => sub { + my ($self) = @_; + + my $manual = Lintian::Data::Authority::LintianManual->new; + $manual->load($self->data_paths, $self->vendor); + + return $manual; + } +); + +=item menu_manual + +=cut + +has menu_manual => ( + is => 'rw', + lazy => 1, + default => sub { + my ($self) = @_; + + my $manual = Lintian::Data::Authority::MenuManual->new; + $manual->load($self->data_paths, $self->vendor); + + return $manual; + } +); + +=item menu_policy + +=cut + +has menu_policy => ( + is => 'rw', + lazy => 1, + default => sub { + my ($self) = @_; + + my $manual = Lintian::Data::Authority::MenuPolicy->new; + $manual->load($self->data_paths, $self->vendor); + + return $manual; + } +); + +=item menu_policy + +=cut + +has new_maintainer => ( + is => 'rw', + lazy => 1, + default => sub { + my ($self) = @_; + + my $manual = Lintian::Data::Authority::NewMaintainer->new; + $manual->load($self->data_paths, $self->vendor); + + return $manual; + } +); + +=item perl_policy + +=cut + +has perl_policy => ( + is => 'rw', + lazy => 1, + default => sub { + my ($self) = @_; + + my $manual = Lintian::Data::Authority::PerlPolicy->new; + $manual->load($self->data_paths, $self->vendor); + + return $manual; + } +); + +=item policy_manual + +=cut + +has policy_manual => ( + is => 'rw', + lazy => 1, + default => sub { + my ($self) = @_; + + my $manual = Lintian::Data::Authority::DebianPolicy->new; + $manual->load($self->data_paths, $self->vendor); + + return $manual; + } +); + +=item python_policy + +=cut + +has python_policy => ( + is => 'rw', + lazy => 1, + default => sub { + my ($self) = @_; + + my $manual = Lintian::Data::Authority::PythonPolicy->new; + $manual->load($self->data_paths, $self->vendor); + + return $manual; + } +); + +=item vim_policy + +=cut + +has vim_policy => ( + is => 'rw', + lazy => 1, + default => sub { + my ($self) = @_; + + my $manual = Lintian::Data::Authority::VimPolicy->new; + $manual->load($self->data_paths, $self->vendor); + + return $manual; + } +); + +=back + +=head1 AUTHOR + +Originally written by Felix Lechner <felix.lechner@lease-up.com> for Lintian. + +=head1 SEE ALSO + +lintian(1) + +=cut + +1; + +# Local Variables: +# indent-tabs-mode: nil +# cperl-indent-level: 4 +# End: +# vim: syntax=perl sw=4 sts=4 sr et diff --git a/lib/Lintian/Data/Authority/DebconfSpecification.pm b/lib/Lintian/Data/Authority/DebconfSpecification.pm new file mode 100644 index 0000000..661d11e --- /dev/null +++ b/lib/Lintian/Data/Authority/DebconfSpecification.pm @@ -0,0 +1,328 @@ +# -*- perl -*- +# +# Copyright (C) 1998 Christian Schwarz and Richard Braakman +# Copyright (C) 2001 Colin Watson +# Copyright (C) 2008 Jorda Polo +# Copyright (C) 2009 Russ Allbery +# Copyright (C) 2017-2019 Chris Lamb <lamby@debian.org> +# Copyright (C) 2020-2021 Felix Lechner +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see <http://www.gnu.org/licenses/>. + +package Lintian::Data::Authority::DebconfSpecification; + +use v5.20; +use warnings; +use utf8; + +use Carp qw(croak); +use Const::Fast; +use File::Basename qw(dirname); +use Path::Tiny; +use Unicode::UTF8 qw(encode_utf8); +use WWW::Mechanize (); + +use Lintian::Output::Markdown qw(markdown_authority); + +const my $EMPTY => q{}; +const my $SPACE => q{ }; +const my $SLASH => q{/}; +const my $COLON => q{:}; +const my $UNDERSCORE => q{_}; +const my $LEFT_PARENTHESIS => q{(}; +const my $RIGHT_PARENTHESIS => q{)}; + +const my $TWO_PARTS => 2; + +const my $VOLUME_KEY => $UNDERSCORE; +const my $SEPARATOR => $COLON x 2; + +use Moo; +use namespace::clean; + +with 'Lintian::Data::JoinedLines'; + +=head1 NAME + +Lintian::Data::Authority::DebconfSpecification - Lintian interface for manual references + +=head1 SYNOPSIS + + use Lintian::Data::Authority::DebconfSpecification; + +=head1 DESCRIPTION + +Lintian::Data::Authority::DebconfSpecification provides a way to load data files for +manual references. + +=head1 CLASS METHODS + +=over 4 + +=item title + +=item shorthand + +=item location + +=item separator + +=cut + +has title => ( + is => 'rw', + default => 'Debconf Specification' +); + +has shorthand => ( + is => 'rw', + default => 'debconf-specification' +); + +has location => ( + is => 'rw', + lazy => 1, + default => sub { + my ($self) = @_; + + return 'authority/' . $self->shorthand; + } +); + +has separator => ( + is => 'rw', + default => sub { qr/::/ } +); + +=item consumer + +=cut + +sub consumer { + my ($self, $key, $remainder, $previous) = @_; + + return undef + if defined $previous; + + my ($title, $url)= split($self->separator, $remainder, $TWO_PARTS); + + my %entry; + $entry{title} = $title; + $entry{url} = $url; + + return \%entry; +} + +=item markdown_citation + +=cut + +sub markdown_citation { + my ($self, $section_key) = @_; + + croak "Invalid section $section_key" + if $section_key eq $VOLUME_KEY; + + my $volume_entry = $self->value($VOLUME_KEY); + + # start with the citation to the overall manual. + my $volume_title = $volume_entry->{title}; + my $volume_url = $volume_entry->{url}; + + my $section_title; + my $section_url; + + if ($self->recognizes($section_key)) { + + my $section_entry = $self->value($section_key); + + $section_title = $section_entry->{title}; + $section_url = $section_entry->{url}; + } + + return markdown_authority( + $volume_title, $volume_url,$section_key, + $section_title, $section_url + ); +} + +=item write_line + +=cut + +sub write_line { + my ($data_fd, $section_key, $section_title, $destination) = @_; + + # drop final dots + $section_key =~ s{ [.]+ $}{}x; + + # reduce consecutive whitespace + $section_title =~ s{ \s+ }{ }gx; + + my $line= join($SEPARATOR,$section_key, $section_title, $destination); + + say {$data_fd} encode_utf8($line); + + return; +} + +=item write_data_file + +=cut + +sub write_data_file { + my ($self, $basedir, $generated) = @_; + + my $header =<<"HEADER"; +# Data about titles, sections, and URLs of manuals, used to expand references +# in tag descriptions and add links for HTML output. Each line of this file +# has three fields separated by double colons: +# +# <section> :: <title> :: <url> +# +# If <section> is an underscore, that line specifies the title and URL for the +# whole manual. + +HEADER + + my $data_path = "$basedir/" . $self->location; + my $parent_dir = path($data_path)->parent->stringify; + path($parent_dir)->mkpath + unless -e $parent_dir; + + my $output = encode_utf8($header) . $generated; + path($data_path)->spew($output); + + return; +} + +=item extract_sections_from_links + +=cut + +sub extract_sections_from_links { + my ($self, $data_fd, $base_url, $page_name)= @_; + + my $page_url = $base_url . $page_name; + + my $mechanize = WWW::Mechanize->new(); + $mechanize->get($page_url); + + my $page_title = $mechanize->title; + + # strip explanatory remark + $page_title =~ s{ \s* \N{EM DASH} .* $}{}x; + + # underscore is a token for the whole page + write_line($data_fd, $VOLUME_KEY, $page_title, $page_url); + + my %by_section_key; + my $in_appendix = 0; + + # https://stackoverflow.com/a/254687 + for my $link ($mechanize->links) { + + next + unless length $link->text; + + next + if $link->text !~ qr{^ \s* ([.\d]+) \s+ (.+) $}x; + + my $section_key = $1; + my $section_title = $2; + + # drop final dots + $section_key =~ s{ [.]+ $}{}x; + + # reduce consecutive whitespace + $section_title =~ s{ \s+ }{ }gx; + + my $relative_destination = $link->url; + + my $destination_base = $page_url; + $destination_base = dirname($page_url) . $SLASH + unless $destination_base =~ m{ / $}x + || $relative_destination =~ m{^ [#] }x; + + my $full_destination = $destination_base . $relative_destination; + + next + if exists $by_section_key{$section_key} + && ( $by_section_key{$section_key}{title} eq $section_title + || $by_section_key{$section_key}{destination} eq$full_destination); + + # Some manuals reuse section numbers for different references, + # e.g. the Debian Policy's normal and appendix sections are + # numbers that clash with each other. Track if we've already + # seen a section pointing to some other URL than the current one, + # and prepend it with an indicator + $in_appendix = 1 + if exists $by_section_key{$section_key} + && $by_section_key{$section_key}{destination} ne$full_destination; + + $section_key = "appendix-$section_key" + if $in_appendix; + + $by_section_key{$section_key}{title} = $section_title; + $by_section_key{$section_key}{destination} = $full_destination; + + write_line($data_fd, $section_key, $section_title, $full_destination); + } + + return; +} + +=item refresh + +=cut + +sub refresh { + my ($self, $archive, $basedir) = @_; + + # single page + my $base_url = 'https://www.debian.org/doc/packaging-manuals/'; + my $index_name = 'debconf_specification.html'; + + my $generated; + open(my $memory_fd, '>', \$generated) + or die encode_utf8('Cannot open scalar'); + + $self->extract_sections_from_links($memory_fd, $base_url, $index_name); + + close $memory_fd; + + $self->write_data_file($basedir, $generated); + + return; +} + +=back + +=head1 AUTHOR + +Originally written by Felix Lechner <felix.lechner@lease-up.com> for Lintian. + +=head1 SEE ALSO + +lintian(1) + +=cut + +1; + +# Local Variables: +# indent-tabs-mode: nil +# cperl-indent-level: 4 +# End: +# vim: syntax=perl sw=4 sts=4 sr et diff --git a/lib/Lintian/Data/Authority/DebianPolicy.pm b/lib/Lintian/Data/Authority/DebianPolicy.pm new file mode 100644 index 0000000..177b07d --- /dev/null +++ b/lib/Lintian/Data/Authority/DebianPolicy.pm @@ -0,0 +1,321 @@ +# -*- perl -*- +# +# Copyright (C) 1998 Christian Schwarz and Richard Braakman +# Copyright (C) 2001 Colin Watson +# Copyright (C) 2008 Jorda Polo +# Copyright (C) 2009 Russ Allbery +# Copyright (C) 2017-2019 Chris Lamb <lamby@debian.org> +# Copyright (C) 2020-2021 Felix Lechner +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see <http://www.gnu.org/licenses/>. + +package Lintian::Data::Authority::DebianPolicy; + +use v5.20; +use warnings; +use utf8; + +use Carp qw(croak); +use Const::Fast; +use Path::Tiny; +use Unicode::UTF8 qw(encode_utf8); +use WWW::Mechanize (); + +use Lintian::Output::Markdown qw(markdown_authority); + +const my $EMPTY => q{}; +const my $SPACE => q{ }; +const my $SLASH => q{/}; +const my $COLON => q{:}; +const my $UNDERSCORE => q{_}; +const my $LEFT_PARENTHESIS => q{(}; +const my $RIGHT_PARENTHESIS => q{)}; + +const my $TWO_PARTS => 2; + +const my $VOLUME_KEY => $UNDERSCORE; +const my $SEPARATOR => $COLON x 2; + +use Moo; +use namespace::clean; + +with 'Lintian::Data::JoinedLines'; + +=head1 NAME + +Lintian::Data::Authority::DebianPolicy - Lintian interface for manual references + +=head1 SYNOPSIS + + use Lintian::Data::Authority::DebianPolicy; + +=head1 DESCRIPTION + +Lintian::Data::Authority::DebianPolicy provides a way to load data files for +manual references. + +=head1 CLASS METHODS + +=over 4 + +=item title + +=item shorthand + +=item location + +=item separator + +=cut + +has title => ( + is => 'rw', + default => 'Debian Policy' +); + +has shorthand => ( + is => 'rw', + default => 'debian-policy' +); + +has location => ( + is => 'rw', + lazy => 1, + default => sub { + my ($self) = @_; + + return 'authority/' . $self->shorthand; + } +); + +has separator => ( + is => 'rw', + default => sub { qr/::/ } +); + +=item consumer + +=cut + +sub consumer { + my ($self, $key, $remainder, $previous) = @_; + + return undef + if defined $previous; + + my ($title, $url)= split($self->separator, $remainder, $TWO_PARTS); + + my %entry; + $entry{title} = $title; + $entry{url} = $url; + + return \%entry; +} + +=item markdown_citation + +=cut + +sub markdown_citation { + my ($self, $section_key) = @_; + + croak "Invalid section $section_key" + if $section_key eq $VOLUME_KEY; + + my $volume_entry = $self->value($VOLUME_KEY); + + # start with the citation to the overall manual. + my $volume_title = $volume_entry->{title}; + my $volume_url = $volume_entry->{url}; + + my $section_title; + my $section_url; + + if ($self->recognizes($section_key)) { + + my $section_entry = $self->value($section_key); + + $section_title = $section_entry->{title}; + $section_url = $section_entry->{url}; + } + + return markdown_authority( + $volume_title, $volume_url,$section_key, + $section_title, $section_url + ); +} + +=item write_line + +=cut + +sub write_line { + my ($data_fd, $section_key, $section_title, $destination) = @_; + + # drop final dots + $section_key =~ s{ [.]+ $}{}x; + + # reduce consecutive whitespace + $section_title =~ s{ \s+ }{ }gx; + + my $line= join($SEPARATOR,$section_key, $section_title, $destination); + + say {$data_fd} encode_utf8($line); + + return; +} + +=item write_data_file + +=cut + +sub write_data_file { + my ($self, $basedir, $generated) = @_; + + my $header =<<"HEADER"; +# Data about titles, sections, and URLs of manuals, used to expand references +# in tag descriptions and add links for HTML output. Each line of this file +# has three fields separated by double colons: +# +# <section> :: <title> :: <url> +# +# If <section> is an underscore, that line specifies the title and URL for the +# whole manual. + +HEADER + + my $data_path = "$basedir/" . $self->location; + my $parent_dir = path($data_path)->parent->stringify; + path($parent_dir)->mkpath + unless -e $parent_dir; + + my $output = encode_utf8($header) . $generated; + path($data_path)->spew($output); + + return; +} + +=item extract_sections_from_links + +=cut + +sub extract_sections_from_links { + my ($self, $data_fd, $base_url)= @_; + + my $mechanize = WWW::Mechanize->new(); + $mechanize->get($base_url); + + my $page_title = $mechanize->title; + + # strip explanatory remark + $page_title =~ s{ \s* \N{EM DASH} .* $}{}x; + + # underscore is a token for the whole page + write_line($data_fd, $VOLUME_KEY, $page_title, $base_url); + + my %by_section_key; + my $in_appendix = 0; + + # https://stackoverflow.com/a/254687 + for my $link ($mechanize->links) { + + next + unless length $link->text; + + next + if $link->text !~ qr{^ \s* ([.\d]+) \s+ (.+) $}x; + + my $section_key = $1; + my $section_title = $2; + + # drop final dots + $section_key =~ s{ [.]+ $}{}x; + + # reduce consecutive whitespace + $section_title =~ s{ \s+ }{ }gx; + + my $destination = $base_url . $link->url; + + next + if exists $by_section_key{$section_key} + && ( $by_section_key{$section_key}{title} eq $section_title + || $by_section_key{$section_key}{destination} eq $destination); + + # Some manuals reuse section numbers for different references, + # e.g. the Debian Policy's normal and appendix sections are + # numbers that clash with each other. Track if we've already + # seen a section pointing to some other URL than the current one, + # and prepend it with an indicator + $in_appendix = 1 + if exists $by_section_key{$section_key} + && $by_section_key{$section_key}{destination} ne $destination; + + $section_key = "appendix-$section_key" + if $in_appendix; + + # do not collect the upgrading checklists in appendix 10 of policy + # the numbering changes all the time + next + if $section_key =~ m{^ appendix-10 [.] }x; + + $by_section_key{$section_key}{title} = $section_title; + $by_section_key{$section_key}{destination} = $destination; + + write_line($data_fd, $section_key, $section_title, $destination); + } + + return; +} + +=item refresh + +=cut + +sub refresh { + my ($self, $archive, $basedir) = @_; + + my $base_url = 'https://www.debian.org/doc/debian-policy/'; + + my $generated; + open(my $memory_fd, '>', \$generated) + or die encode_utf8('Cannot open scalar'); + + $self->extract_sections_from_links($memory_fd, $base_url); + + close $memory_fd; + + $self->write_data_file($basedir, $generated); + + return; +} + +=back + +=head1 AUTHOR + +Originally written by Felix Lechner <felix.lechner@lease-up.com> for Lintian. + +=head1 SEE ALSO + +lintian(1) + +=cut + +1; + +# Local Variables: +# indent-tabs-mode: nil +# cperl-indent-level: 4 +# End: +# vim: syntax=perl sw=4 sts=4 sr et diff --git a/lib/Lintian/Data/Authority/DeveloperReference.pm b/lib/Lintian/Data/Authority/DeveloperReference.pm new file mode 100644 index 0000000..676cbf4 --- /dev/null +++ b/lib/Lintian/Data/Authority/DeveloperReference.pm @@ -0,0 +1,319 @@ +# -*- perl -*- +# +# Copyright (C) 1998 Christian Schwarz and Richard Braakman +# Copyright (C) 2001 Colin Watson +# Copyright (C) 2008 Jorda Polo +# Copyright (C) 2009 Russ Allbery +# Copyright (C) 2017-2019 Chris Lamb <lamby@debian.org> +# Copyright (C) 2020-2021 Felix Lechner +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see <http://www.gnu.org/licenses/>. + +package Lintian::Data::Authority::DeveloperReference; + +use v5.20; +use warnings; +use utf8; + +use Carp qw(croak); +use Const::Fast; +use Path::Tiny; +use Unicode::UTF8 qw(encode_utf8); +use WWW::Mechanize (); + +use Lintian::Output::Markdown qw(markdown_authority); + +const my $EMPTY => q{}; +const my $SPACE => q{ }; +const my $SLASH => q{/}; +const my $COLON => q{:}; +const my $UNDERSCORE => q{_}; +const my $LEFT_PARENTHESIS => q{(}; +const my $RIGHT_PARENTHESIS => q{)}; + +const my $TWO_PARTS => 2; + +const my $VOLUME_KEY => $UNDERSCORE; +const my $SEPARATOR => $COLON x 2; + +use Moo; +use namespace::clean; + +with 'Lintian::Data::JoinedLines'; + +=head1 NAME + +Lintian::Data::Authority::DeveloperReference - Lintian interface for manual references + +=head1 SYNOPSIS + + use Lintian::Data::Authority::DeveloperReference; + +=head1 DESCRIPTION + +Lintian::Data::Authority::DeveloperReference provides a way to load data files for +manual references. + +=head1 CLASS METHODS + +=over 4 + +=item title + +=item shorthand + +=item location + +=item separator + +=cut + +has title => ( + is => 'rw', + default => q{Developer's Reference} +); + +has shorthand => ( + is => 'rw', + default => 'developer-reference' +); + +has location => ( + is => 'rw', + lazy => 1, + default => sub { + my ($self) = @_; + + return 'authority/' . $self->shorthand; + } +); + +has separator => ( + is => 'rw', + default => sub { qr/::/ } +); + +=item consumer + +=cut + +sub consumer { + my ($self, $key, $remainder, $previous) = @_; + + return undef + if defined $previous; + + my ($title, $url)= split($self->separator, $remainder, $TWO_PARTS); + + my %entry; + $entry{title} = $title; + $entry{url} = $url; + + return \%entry; +} + +=item markdown_citation + +=cut + +sub markdown_citation { + my ($self, $section_key) = @_; + + croak "Invalid section $section_key" + if $section_key eq $VOLUME_KEY; + + my $volume_entry = $self->value($VOLUME_KEY); + + # start with the citation to the overall manual. + my $volume_title = $volume_entry->{title}; + my $volume_url = $volume_entry->{url}; + + my $section_title; + my $section_url; + + if ($self->recognizes($section_key)) { + + my $section_entry = $self->value($section_key); + + $section_title = $section_entry->{title}; + $section_url = $section_entry->{url}; + } + + return markdown_authority( + $volume_title, $volume_url,$section_key, + $section_title, $section_url + ); +} + +=item write_line + +=cut + +sub write_line { + my ($data_fd, $section_key, $section_title, $destination) = @_; + + # drop final dots + $section_key =~ s{ [.]+ $}{}x; + + # reduce consecutive whitespace + $section_title =~ s{ \s+ }{ }gx; + + my $line= join($SEPARATOR,$section_key, $section_title, $destination); + + say {$data_fd} encode_utf8($line); + + return; +} + +=item write_data_file + +=cut + +sub write_data_file { + my ($self, $basedir, $generated) = @_; + + my $header =<<"HEADER"; +# Data about titles, sections, and URLs of manuals, used to expand references +# in tag descriptions and add links for HTML output. Each line of this file +# has three fields separated by double colons: +# +# <section> :: <title> :: <url> +# +# If <section> is an underscore, that line specifies the title and URL for the +# whole manual. + +HEADER + + my $data_path = "$basedir/" . $self->location; + my $parent_dir = path($data_path)->parent->stringify; + path($parent_dir)->mkpath + unless -e $parent_dir; + + my $output = encode_utf8($header) . $generated; + path($data_path)->spew($output); + + return; +} + +=item extract_sections_from_links + +=cut + +sub extract_sections_from_links { + my ($self, $data_fd, $base_url)= @_; + + my $mechanize = WWW::Mechanize->new(); + $mechanize->get($base_url); + + my $page_title = $mechanize->title; + + # strip explanatory remark + $page_title =~ s{ \s* \N{EM DASH} .* $}{}x; + + # underscore is a token for the whole page + write_line($data_fd, $VOLUME_KEY, $page_title, $base_url); + + my %by_section_key; + my $in_appendix = 0; + + # https://stackoverflow.com/a/254687 + for my $link ($mechanize->links) { + + next + unless length $link->text; + + next + if $link->text !~ qr{^ \s* ([.\d]+) \s+ (.+) $}x; + + my $section_key = $1; + my $section_title = $2; + + # drop final dots + $section_key =~ s{ [.]+ $}{}x; + + # reduce consecutive whitespace + $section_title =~ s{ \s+ }{ }gx; + + my $destination = $base_url . $link->url; + + # developers reference likes to return locale specific pages + $destination =~ s{ [.]\w{2}[.]html }{.html}x; + + next + if exists $by_section_key{$section_key} + && ( $by_section_key{$section_key}{title} eq $section_title + || $by_section_key{$section_key}{destination} eq $destination); + + # Some manuals reuse section numbers for different references, + # e.g. the Debian Policy's normal and appendix sections are + # numbers that clash with each other. Track if we've already + # seen a section pointing to some other URL than the current one, + # and prepend it with an indicator + $in_appendix = 1 + if exists $by_section_key{$section_key} + && $by_section_key{$section_key}{destination} ne $destination; + + $section_key = "appendix-$section_key" + if $in_appendix; + + $by_section_key{$section_key}{title} = $section_title; + $by_section_key{$section_key}{destination} = $destination; + + write_line($data_fd, $section_key, $section_title, $destination); + } + + return; +} + +=item refresh + +=cut + +sub refresh { + my ($self, $archive, $basedir) = @_; + + my $base_url = 'https://www.debian.org/doc/developers-reference/'; + + my $generated; + open(my $memory_fd, '>', \$generated) + or die encode_utf8('Cannot open scalar'); + + $self->extract_sections_from_links($memory_fd, $base_url); + + close $memory_fd; + + $self->write_data_file($basedir, $generated); + + return; +} + +=back + +=head1 AUTHOR + +Originally written by Felix Lechner <felix.lechner@lease-up.com> for Lintian. + +=head1 SEE ALSO + +lintian(1) + +=cut + +1; + +# Local Variables: +# indent-tabs-mode: nil +# cperl-indent-level: 4 +# End: +# vim: syntax=perl sw=4 sts=4 sr et diff --git a/lib/Lintian/Data/Authority/DocBaseManual.pm b/lib/Lintian/Data/Authority/DocBaseManual.pm new file mode 100644 index 0000000..53cfbcb --- /dev/null +++ b/lib/Lintian/Data/Authority/DocBaseManual.pm @@ -0,0 +1,431 @@ +# -*- perl -*- +# +# Copyright (C) 1998 Christian Schwarz and Richard Braakman +# Copyright (C) 2001 Colin Watson +# Copyright (C) 2008 Jorda Polo +# Copyright (C) 2009 Russ Allbery +# Copyright (C) 2017-2019 Chris Lamb <lamby@debian.org> +# Copyright (C) 2020-2021 Felix Lechner +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see <http://www.gnu.org/licenses/>. + +package Lintian::Data::Authority::DocBaseManual; + +use v5.20; +use warnings; +use utf8; + +use Carp qw(croak); +use Const::Fast; +use File::Basename qw(dirname basename); +use IPC::Run3; +use Path::Tiny; +use Unicode::UTF8 qw(encode_utf8); +use WWW::Mechanize (); + +use Lintian::Output::Markdown qw(markdown_authority); + +const my $EMPTY => q{}; +const my $SPACE => q{ }; +const my $SLASH => q{/}; +const my $COLON => q{:}; +const my $INDENT => $SPACE x 4; +const my $UNDERSCORE => q{_}; +const my $LEFT_PARENTHESIS => q{(}; +const my $RIGHT_PARENTHESIS => q{)}; + +const my $TWO_PARTS => 2; + +const my $VOLUME_KEY => $UNDERSCORE; +const my $SEPARATOR => $COLON x 2; + +const my $WAIT_STATUS_SHIFT => 8; + +use Moo; +use namespace::clean; + +with 'Lintian::Data::JoinedLines'; + +=head1 NAME + +Lintian::Data::Authority::DocBaseManual - Lintian interface for manual references + +=head1 SYNOPSIS + + use Lintian::Data::Authority::DocBaseManual; + +=head1 DESCRIPTION + +Lintian::Data::Authority::DocBaseManual provides a way to load data files for +manual references. + +=head1 CLASS METHODS + +=over 4 + +=item title + +=item shorthand + +=item location + +=item separator + +=cut + +has title => ( + is => 'rw', + default => 'Doc-Base Manual' +); + +has shorthand => ( + is => 'rw', + default => 'doc-base-manual' +); + +has location => ( + is => 'rw', + lazy => 1, + default => sub { + my ($self) = @_; + + return 'authority/' . $self->shorthand; + } +); + +has separator => ( + is => 'rw', + default => sub { qr/::/ } +); + +=item consumer + +=cut + +sub consumer { + my ($self, $key, $remainder, $previous) = @_; + + return undef + if defined $previous; + + my ($title, $url)= split($self->separator, $remainder, $TWO_PARTS); + + my %entry; + $entry{title} = $title; + $entry{url} = $url; + + return \%entry; +} + +=item markdown_citation + +=cut + +sub markdown_citation { + my ($self, $section_key) = @_; + + croak "Invalid section $section_key" + if $section_key eq $VOLUME_KEY; + + my $volume_entry = $self->value($VOLUME_KEY); + + # start with the citation to the overall manual. + my $volume_title = $volume_entry->{title}; + my $volume_url = $volume_entry->{url}; + + my $section_title; + my $section_url; + + if ($self->recognizes($section_key)) { + + my $section_entry = $self->value($section_key); + + $section_title = $section_entry->{title}; + $section_url = $section_entry->{url}; + } + + return markdown_authority( + $volume_title, $volume_url,$section_key, + $section_title, $section_url + ); +} + +=item write_line + +=cut + +sub write_line { + my ($data_fd, $section_key, $section_title, $destination) = @_; + + # drop final dots + $section_key =~ s{ [.]+ $}{}x; + + # reduce consecutive whitespace + $section_title =~ s{ \s+ }{ }gx; + + my $line= join($SEPARATOR,$section_key, $section_title, $destination); + + say {$data_fd} encode_utf8($line); + + return; +} + +=item write_data_file + +=cut + +sub write_data_file { + my ($self, $basedir, $generated) = @_; + + my $header =<<"HEADER"; +# Data about titles, sections, and URLs of manuals, used to expand references +# in tag descriptions and add links for HTML output. Each line of this file +# has three fields separated by double colons: +# +# <section> :: <title> :: <url> +# +# If <section> is an underscore, that line specifies the title and URL for the +# whole manual. + +HEADER + + my $data_path = "$basedir/" . $self->location; + my $parent_dir = path($data_path)->parent->stringify; + path($parent_dir)->mkpath + unless -e $parent_dir; + + my $output = encode_utf8($header) . $generated; + path($data_path)->spew($output); + + return; +} + +=item find_installable_name + +=cut + +sub find_installable_name { + my ($self, $archive, $port, $requested_path) = @_; + + my @installed_by; + + # find installable package + for my $installable_architecture ('all', $port) { + + my $local_path + = $archive->contents_gz('sid', 'main', $installable_architecture); + + open(my $fd, '<:gzip', $local_path) + or die encode_utf8("Cannot open $local_path."); + + while (my $line = <$fd>) { + + chomp $line; + + my ($path, $finder) = split($SPACE, $line, 2); + next + unless length $path + && length $finder; + + if ($path eq $requested_path) { + + my $name = $1; + + my @locations = split(m{,}, $finder); + for my $location (@locations) { + + my ($section, $installable)= split(m{/}, $location, 2); + + push(@installed_by, $installable); + } + + next; + } + } + + close $fd; + } + + die encode_utf8( + "The path $requested_path is not installed by any package.") + if @installed_by < 1; + + if (@installed_by > 1) { + warn encode_utf8( + "The path $requested_path is installed by multiple packages:\n"); + warn encode_utf8($INDENT . "- $_\n")for @installed_by; + } + + my $installable_name = shift @installed_by; + + return $installable_name; +} + +=item refresh + +=cut + +sub refresh { + my ($self, $archive, $basedir) = @_; + + # shipped as part of the doc-base installable + my $shipped_base = 'usr/share/doc/doc-base/doc-base.html/'; + my $index_name = 'index.html'; + + my $shipped_path = $shipped_base . $index_name; + my $stored_uri = "file:///$shipped_path"; + + # neutral sort order + local $ENV{LC_ALL} = 'C'; + + my $port = 'amd64'; + my $installable_name + = $self->find_installable_name($archive, $port, $shipped_path); + + my $deb822_by_installable_name + = $archive->deb822_packages_by_installable_name('sid', 'main', $port); + + my $work_folder + = Path::Tiny->tempdir(TEMPLATE => 'refresh-doc-base-manual-XXXXXXXXXX'); + + die encode_utf8("Installable $installable_name not shipped in port $port") + unless exists $deb822_by_installable_name->{$installable_name}; + + my $deb822 = $deb822_by_installable_name->{$installable_name}; + + my $pool_path = $deb822->value('Filename'); + + my $deb_filename = basename($pool_path); + my $deb_local_path = "$work_folder/$deb_filename"; + my $deb_url = $archive->mirror_base . $SLASH . $pool_path; + + my $stderr; + run3([qw{wget --quiet}, "--output-document=$deb_local_path", $deb_url], + undef, \$stderr); + my $status = ($? >> $WAIT_STATUS_SHIFT); + + # stderr already in UTF-8 + die $stderr + if $status; + + my $extract_folder = "$work_folder/unpacked/$pool_path"; + path($extract_folder)->mkpath; + + run3([qw{dpkg-deb --extract}, $deb_local_path, $extract_folder], + undef, \$stderr); + $status = ($? >> $WAIT_STATUS_SHIFT); + + # stderr already in UTF-8 + die $stderr + if $status; + + unlink($deb_local_path) + or die encode_utf8("Cannot delete $deb_local_path"); + + my $generated; + open(my $memory_fd, '>', \$generated) + or die encode_utf8('Cannot open scalar'); + + my $mechanize = WWW::Mechanize->new(); + + my $fresh_uri = URI::file->new_abs("/$extract_folder/$shipped_path"); + $mechanize->get($fresh_uri); + + my $page_title = $mechanize->title; + + # strip explanatory remark + $page_title =~ s{ \s* \N{EM DASH} .* $}{}x; + + # underscore is a token for the whole page + write_line($memory_fd, $VOLUME_KEY, $page_title, $stored_uri); + + my %by_section_key; + my $in_appendix = 0; + + # https://stackoverflow.com/a/254687 + for my $link ($mechanize->links) { + + next + unless length $link->text; + + next + if $link->text !~ qr{^ \s* ([.\d]+) \s+ (.+) $}x; + + my $section_key = $1; + my $section_title = $2; + + # drop final dots + $section_key =~ s{ [.]+ $}{}x; + + # reduce consecutive whitespace + $section_title =~ s{ \s+ }{ }gx; + + my $relative_destination = $link->url; + + my $destination_base = $stored_uri; + $destination_base = dirname($stored_uri) . $SLASH + unless $destination_base =~ m{ / $}x + || $relative_destination =~ m{^ [#] }x; + + my $full_destination = $destination_base . $relative_destination; + + next + if exists $by_section_key{$section_key} + && ( $by_section_key{$section_key}{title} eq $section_title + || $by_section_key{$section_key}{destination} eq$full_destination); + + # Some manuals reuse section numbers for different references, + # e.g. the Debian Policy's normal and appendix sections are + # numbers that clash with each other. Track if we've already + # seen a section pointing to some other URL than the current one, + # and prepend it with an indicator + $in_appendix = 1 + if exists $by_section_key{$section_key} + && $by_section_key{$section_key}{destination} ne$full_destination; + + $section_key = "appendix-$section_key" + if $in_appendix; + + $by_section_key{$section_key}{title} = $section_title; + $by_section_key{$section_key}{destination} = $full_destination; + + write_line($memory_fd, $section_key, $section_title,$full_destination); + } + + close $memory_fd; + + $self->write_data_file($basedir, $generated); + + return; +} + +=back + +=head1 AUTHOR + +Originally written by Felix Lechner <felix.lechner@lease-up.com> for Lintian. + +=head1 SEE ALSO + +lintian(1) + +=cut + +1; + +# Local Variables: +# indent-tabs-mode: nil +# cperl-indent-level: 4 +# End: +# vim: syntax=perl sw=4 sts=4 sr et diff --git a/lib/Lintian/Data/Authority/FilesystemHierarchy.pm b/lib/Lintian/Data/Authority/FilesystemHierarchy.pm new file mode 100644 index 0000000..89fb677 --- /dev/null +++ b/lib/Lintian/Data/Authority/FilesystemHierarchy.pm @@ -0,0 +1,333 @@ +# -*- perl -*- +# +# Copyright (C) 1998 Christian Schwarz and Richard Braakman +# Copyright (C) 2001 Colin Watson +# Copyright (C) 2008 Jorda Polo +# Copyright (C) 2009 Russ Allbery +# Copyright (C) 2017-2019 Chris Lamb <lamby@debian.org> +# Copyright (C) 2020-2021 Felix Lechner +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see <http://www.gnu.org/licenses/>. + +package Lintian::Data::Authority::FilesystemHierarchy; + +use v5.20; +use warnings; +use utf8; + +use Carp qw(croak); +use Const::Fast; +use File::Basename qw(dirname); +use Path::Tiny; +use Unicode::UTF8 qw(encode_utf8); +use WWW::Mechanize (); + +use Lintian::Output::Markdown qw(markdown_authority); + +const my $EMPTY => q{}; +const my $SPACE => q{ }; +const my $SLASH => q{/}; +const my $COLON => q{:}; +const my $UNDERSCORE => q{_}; +const my $LEFT_PARENTHESIS => q{(}; +const my $RIGHT_PARENTHESIS => q{)}; + +const my $THREE_PARTS => 3; + +const my $VOLUME_KEY => $UNDERSCORE; +const my $SEPARATOR => $COLON x 2; + +use Moo; +use namespace::clean; + +with 'Lintian::Data::JoinedLines'; + +=head1 NAME + +Lintian::Data::Authority::FilesystemHierarchy - Lintian interface for manual references + +=head1 SYNOPSIS + + use Lintian::Data::Authority::FilesystemHierarchy; + +=head1 DESCRIPTION + +Lintian::Data::Authority::FilesystemHierarchy provides a way to load data files for +manual references. + +=head1 CLASS METHODS + +=over 4 + +=item title + +=item shorthand + +=item location + +=item separator + +=cut + +has title => ( + is => 'rw', + default => 'Filesystem Hierarchy Standard' +); + +has shorthand => ( + is => 'rw', + default => 'filesystem-hierarchy' +); + +has location => ( + is => 'rw', + lazy => 1, + default => sub { + my ($self) = @_; + + return 'authority/' . $self->shorthand; + } +); + +has separator => ( + is => 'rw', + default => sub { qr/::/ } +); + +=item consumer + +=cut + +sub consumer { + my ($self, $key, $remainder, $previous) = @_; + + return undef + if defined $previous; + + my ($number, $title, $url) + = split($self->separator, $remainder, $THREE_PARTS); + + my %entry; + $entry{title} = $title; + $entry{number} = $number; + $entry{url} = $url; + + return \%entry; +} + +=item markdown_citation + +=cut + +sub markdown_citation { + my ($self, $section_key) = @_; + + croak "Invalid section $section_key" + if $section_key eq $VOLUME_KEY; + + my $volume_entry = $self->value($VOLUME_KEY); + + # start with the citation to the overall manual. + my $volume_title = $volume_entry->{title}; + my $volume_url = $volume_entry->{url}; + + my $section_title; + my $section_number; + my $section_url; + + if ($self->recognizes($section_key)) { + + my $section_entry = $self->value($section_key); + + $section_title = $section_entry->{title}; + $section_number = $section_entry->{number}; + $section_url = $section_entry->{url}; + } + + return markdown_authority( + $volume_title, $volume_url,$section_number, + $section_title, $section_url + ); +} + +=item write_line + +=cut + +sub write_line { + my ($data_fd, $section_key, $section_number, $section_title, $destination) + = @_; + + # drop final dots + $section_key =~ s{ [.]+ $}{}x; + + # reduce consecutive whitespace + $section_title =~ s{ \s+ }{ }gx; + + my $line= join($SEPARATOR, + $section_key, $section_number, $section_title, $destination); + + say {$data_fd} encode_utf8($line); + + return; +} + +=item write_data_file + +=cut + +sub write_data_file { + my ($self, $basedir, $generated) = @_; + + my $header =<<"HEADER"; +# Data about titles, sections, and URLs of manuals, used to expand references +# in tag descriptions and add links for HTML output. Each line of this file +# has three fields separated by double colons: +# +# <section> :: <title> :: <url> +# +# If <section> is an underscore, that line specifies the title and URL for the +# whole manual. + +HEADER + + my $data_path = "$basedir/" . $self->location; + my $parent_dir = path($data_path)->parent->stringify; + path($parent_dir)->mkpath + unless -e $parent_dir; + + my $output = encode_utf8($header) . $generated; + path($data_path)->spew($output); + + return; +} + +=item extract_sections_from_links + +=cut + +sub extract_sections_from_links { + my ($self, $data_fd, $base_url, $page_name)= @_; + + my $page_url = $base_url . $page_name; + + my $mechanize = WWW::Mechanize->new(); + $mechanize->get($page_url); + + my $page_title = $mechanize->title; + + # strip explanatory remark + $page_title =~ s{ \s* \N{EM DASH} .* $}{}x; + + # underscore is a token for the whole page + write_line($data_fd, $VOLUME_KEY, $EMPTY, $page_title, $page_url); + + my %by_section_key; + my $in_appendix = 0; + + # https://stackoverflow.com/a/254687 + for my $link ($mechanize->links) { + + next + unless length $link->url; + + # make lowercase + my $section_key = lc($link->url); + + # strip hash; it's a fragment; + $section_key =~ s{^ [#] }{}x; + + next + unless length $link->text; + + next + if $link->text !~ qr{^ \s* ([.\d]+) \s+ (.+) $}x; + + my $section_number = $1; + my $section_title = $2; + + # drop final dot + $section_number =~ s{ [.]+ $}{}x; + + # reduce consecutive whitespace + $section_title =~ s{ \s+ }{ }gx; + + # includes hash + my $relative_destination = $link->url; + + my $destination_base = $page_url; + $destination_base = dirname($page_url) . $SLASH + unless $destination_base =~ m{ / $}x + || $relative_destination =~ m{^ [#] }x; + + my $full_destination = $destination_base . $relative_destination; + + next + if exists $by_section_key{$section_key}; + + $by_section_key{$section_key}{title} = $section_title; + $by_section_key{$section_key}{number} = $section_number; + $by_section_key{$section_key}{destination} = $full_destination; + + write_line($data_fd, $section_key, $section_number, + $section_title, $full_destination); + } + + return; +} + +=item refresh + +=cut + +sub refresh { + my ($self, $archive, $basedir) = @_; + + # single page version + # plain directory shows a file list + my $base_url = 'https://refspecs.linuxfoundation.org/FHS_3.0/'; + my $index_name = 'fhs-3.0.html'; + + my $generated; + open(my $memory_fd, '>', \$generated) + or die encode_utf8('Cannot open scalar'); + + $self->extract_sections_from_links($memory_fd, $base_url, $index_name); + + close $memory_fd; + + $self->write_data_file($basedir, $generated); + + return; +} + +=back + +=head1 AUTHOR + +Originally written by Felix Lechner <felix.lechner@lease-up.com> for Lintian. + +=head1 SEE ALSO + +lintian(1) + +=cut + +1; + +# Local Variables: +# indent-tabs-mode: nil +# cperl-indent-level: 4 +# End: +# vim: syntax=perl sw=4 sts=4 sr et diff --git a/lib/Lintian/Data/Authority/JavaPolicy.pm b/lib/Lintian/Data/Authority/JavaPolicy.pm new file mode 100644 index 0000000..eaa6704 --- /dev/null +++ b/lib/Lintian/Data/Authority/JavaPolicy.pm @@ -0,0 +1,290 @@ +# -*- perl -*- +# +# Copyright (C) 1998 Christian Schwarz and Richard Braakman +# Copyright (C) 2001 Colin Watson +# Copyright (C) 2008 Jorda Polo +# Copyright (C) 2009 Russ Allbery +# Copyright (C) 2017-2019 Chris Lamb <lamby@debian.org> +# Copyright (C) 2020-2021 Felix Lechner +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see <http://www.gnu.org/licenses/>. + +package Lintian::Data::Authority::JavaPolicy; + +use v5.20; +use warnings; +use utf8; + +use Carp qw(croak); +use Const::Fast; +use List::SomeUtils qw(any first_value); +use Path::Tiny; +use Unicode::UTF8 qw(encode_utf8); +use WWW::Mechanize (); + +use Lintian::Output::Markdown qw(markdown_authority); + +const my $SLASH => q{/}; +const my $UNDERSCORE => q{_}; + +const my $VOLUME_KEY => $UNDERSCORE; +const my $SECTIONS => 'sections'; + +use Moo; +use namespace::clean; + +with 'Lintian::Data::PreambledJSON'; + +=head1 NAME + +Lintian::Data::Authority::JavaPolicy - Lintian interface for manual references + +=head1 SYNOPSIS + + use Lintian::Data::Authority::JavaPolicy; + +=head1 DESCRIPTION + +Lintian::Data::Authority::JavaPolicy provides a way to load data files for +manual references. + +=head1 CLASS METHODS + +=over 4 + +=item title + +=item shorthand + +=item location + +=item by_section_key + +=cut + +has title => ( + is => 'rw', + default => 'Java Policy' +); + +has shorthand => ( + is => 'rw', + default => 'java-policy' +); + +has location => ( + is => 'rw', + lazy => 1, + default => sub { + my ($self) = @_; + + return 'authority/' . $self->shorthand . '.json'; + } +); + +has by_section_key => (is => 'rw', default => sub { {} }); + +=item markdown_citation + +=cut + +sub markdown_citation { + my ($self, $section_key) = @_; + + croak "Invalid section $section_key" + if $section_key eq $VOLUME_KEY; + + my $volume_entry = $self->value($VOLUME_KEY); + + # start with the citation to the overall manual. + my $volume_title = $volume_entry->{title}; + my $volume_url = $volume_entry->{destination}; + + my $section_title; + my $section_url; + + if ($self->recognizes($section_key)) { + + my $section_entry = $self->value($section_key); + + $section_title = $section_entry->{title}; + $section_url = $section_entry->{destination}; + } + + return markdown_authority( + $volume_title, $volume_url,$section_key, + $section_title, $section_url + ); +} + +=item recognizes (KEY) + +Returns true if KEY is known, and false otherwise. + +=cut + +sub recognizes { + my ($self, $key) = @_; + + return 0 + unless length $key; + + return 1 + if exists $self->by_section_key->{$key}; + + return 0; +} + +=item value (KEY) + +Returns the value attached to KEY if it was listed in the data +file represented by this Lintian::Data instance and the undefined value +otherwise. + +=cut + +sub value { + my ($self, $key) = @_; + + return undef + unless length $key; + + return $self->by_section_key->{$key}; +} + +=item load + +=cut + +sub load { + my ($self, $search_space, $our_vendor) = @_; + + my @candidates = map { $_ . $SLASH . $self->location } @{$search_space}; + my $path = first_value { -e } @candidates; + + my $reference; + + return 0 + unless $self->read_file($path, \$reference); + + my @sections = @{$reference // []}; + + for my $section (@sections) { + + my $key = $section->{key}; + + # only store first value for duplicates + # silently ignore later values + $self->by_section_key->{$key} //= $section; + } + + return 1; +} + +=item refresh + +=cut + +sub refresh { + my ($self, $archive, $basedir) = @_; + + my $base_url = 'https://www.debian.org/doc/packaging-manuals/java-policy/'; + + my $mechanize = WWW::Mechanize->new(); + $mechanize->get($base_url); + + my $page_title = $mechanize->title; + + my @sections; + + # underscore is a token for the whole page + my %volume; + $volume{key} = $VOLUME_KEY; + $volume{title} = $page_title; + $volume{destination} = $base_url; + + # store array to resemble web layout + # may contain duplicates + push(@sections, \%volume); + + my $in_appendix = 0; + + # https://stackoverflow.com/a/254687 + for my $link ($mechanize->links) { + + next + unless length $link->text; + + next + if $link->text !~ qr{^ \s* ([.\d]+) \s+ (.+) $}x; + + my $section_key = $1; + my $section_title = $2; + + # drop final dots + $section_key =~ s{ [.]+ $}{}x; + + # reduce consecutive whitespace + $section_title =~ s{ \s+ }{ }gx; + + my $destination = $base_url . $link->url; + + my @similar = grep { $_->{key} eq $section_key } @sections; + next + if (any { $_->{title} eq $section_title } @similar) + || (any { $_->{destination} eq $destination } @similar); + + # Some manuals reuse section numbers for different references, + # e.g. the Debian Policy's normal and appendix sections are + # numbers that clash with each other. Track if we've already + # seen a section pointing to some other URL than the current one, + # and prepend it with an indicator + $in_appendix = 1 + if any { $_->{destination} ne $destination } @similar; + + $section_key = "appendix-$section_key" + if $in_appendix; + + my %section; + $section{key} = $section_key; + $section{title} = $section_title; + $section{destination} = $destination; + push(@sections, \%section); + } + + my $data_path = "$basedir/" . $self->location; + my $status = $self->write_file($SECTIONS, \@sections, $data_path); + + return $status; +} + +=back + +=head1 AUTHOR + +Originally written by Felix Lechner <felix.lechner@lease-up.com> for Lintian. + +=head1 SEE ALSO + +lintian(1) + +=cut + +1; + +# Local Variables: +# indent-tabs-mode: nil +# cperl-indent-level: 4 +# End: +# vim: syntax=perl sw=4 sts=4 sr et diff --git a/lib/Lintian/Data/Authority/LintianManual.pm b/lib/Lintian/Data/Authority/LintianManual.pm new file mode 100644 index 0000000..3fc7bd0 --- /dev/null +++ b/lib/Lintian/Data/Authority/LintianManual.pm @@ -0,0 +1,324 @@ +# -*- perl -*- +# +# Copyright (C) 1998 Christian Schwarz and Richard Braakman +# Copyright (C) 2001 Colin Watson +# Copyright (C) 2008 Jorda Polo +# Copyright (C) 2009 Russ Allbery +# Copyright (C) 2017-2019 Chris Lamb <lamby@debian.org> +# Copyright (C) 2020-2021 Felix Lechner +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see <http://www.gnu.org/licenses/>. + +package Lintian::Data::Authority::LintianManual; + +use v5.20; +use warnings; +use utf8; + +use Carp qw(croak); +use Const::Fast; +use IPC::Run3; +use Path::Tiny; +use Unicode::UTF8 qw(encode_utf8); +use URI::file; +use WWW::Mechanize (); + +use Lintian::Output::Markdown qw(markdown_authority); + +const my $EMPTY => q{}; +const my $SPACE => q{ }; +const my $SLASH => q{/}; +const my $COLON => q{:}; +const my $UNDERSCORE => q{_}; +const my $LEFT_PARENTHESIS => q{(}; +const my $RIGHT_PARENTHESIS => q{)}; + +const my $TWO_PARTS => 2; + +const my $VOLUME_KEY => $UNDERSCORE; +const my $SEPARATOR => $COLON x 2; + +const my $WAIT_STATUS_SHIFT => 8; + +use Moo; +use namespace::clean; + +with 'Lintian::Data::JoinedLines'; + +=head1 NAME + +Lintian::Data::Authority::LintianManual - Lintian interface for manual references + +=head1 SYNOPSIS + + use Lintian::Data::Authority::LintianManual; + +=head1 DESCRIPTION + +Lintian::Data::Authority::LintianManual provides a way to load data files for +manual references. + +=head1 CLASS METHODS + +=over 4 + +=item title + +=item shorthand + +=item location + +=item separator + +=cut + +has title => ( + is => 'rw', + default => 'Lintian Manual' +); + +has shorthand => ( + is => 'rw', + default => 'lintian-manual' +); + +has location => ( + is => 'rw', + lazy => 1, + default => sub { + my ($self) = @_; + + return 'authority/' . $self->shorthand; + } +); + +has separator => ( + is => 'rw', + default => sub { qr/::/ } +); + +=item consumer + +=cut + +sub consumer { + my ($self, $key, $remainder, $previous) = @_; + + return undef + if defined $previous; + + my ($title, $url)= split($self->separator, $remainder, $TWO_PARTS); + + my %entry; + $entry{title} = $title; + $entry{url} = $url; + + return \%entry; +} + +=item markdown_citation + +=cut + +sub markdown_citation { + my ($self, $section_key) = @_; + + croak "Invalid section $section_key" + if $section_key eq $VOLUME_KEY; + + my $volume_entry = $self->value($VOLUME_KEY); + + # start with the citation to the overall manual. + my $volume_title = $volume_entry->{title}; + my $volume_url = $volume_entry->{url}; + + my $section_title; + my $section_url; + + if ($self->recognizes($section_key)) { + + my $section_entry = $self->value($section_key); + + $section_title = $section_entry->{title}; + $section_url = $section_entry->{url}; + } + + return markdown_authority( + $volume_title, $volume_url,$section_key, + $section_title, $section_url + ); +} + +=item write_line + +=cut + +sub write_line { + my ($data_fd, $section_key, $section_title, $destination) = @_; + + # drop final dots + $section_key =~ s{ [.]+ $}{}x; + + # reduce consecutive whitespace + $section_title =~ s{ \s+ }{ }gx; + + my $line= join($SEPARATOR,$section_key, $section_title, $destination); + + say {$data_fd} encode_utf8($line); + + return; +} + +=item write_data_file + +=cut + +sub write_data_file { + my ($self, $basedir, $generated) = @_; + + my $header =<<"HEADER"; +# Data about titles, sections, and URLs of manuals, used to expand references +# in tag descriptions and add links for HTML output. Each line of this file +# has three fields separated by double colons: +# +# <section> :: <title> :: <url> +# +# If <section> is an underscore, that line specifies the title and URL for the +# whole manual. + +HEADER + + my $data_path = "$basedir/" . $self->location; + my $parent_dir = path($data_path)->parent->stringify; + path($parent_dir)->mkpath + unless -e $parent_dir; + + my $output = encode_utf8($header) . $generated; + path($data_path)->spew($output); + + return; +} + +=item refresh + +=cut + +sub refresh { + my ($self, $archive, $basedir) = @_; + + # WWW::Mechanize will not parse page title without the suffix + my $temp_tiny = Path::Tiny->tempfile( + TEMPLATE => 'lintian-manual-XXXXXXXX', + SUFFIX => '.html' + ); + my $local_uri = URI::file->new_abs($temp_tiny->stringify); + + # for rst2html + local $ENV{LC_ALL} = 'en_US.UTF-8'; + + my $stderr; + run3(['rst2html', "$ENV{LINTIAN_BASE}/doc/lintian.rst"], + undef, $local_uri->file, \$stderr); + my $status = ($? >> $WAIT_STATUS_SHIFT); + + # stderr already in UTF-8 + die $stderr + if $status; + + my $generated; + open(my $memory_fd, '>', \$generated) + or die encode_utf8("Cannot open scalar: $!"); + + my $page_url = 'https://lintian.debian.org/manual/index.html'; + + my $mechanize = WWW::Mechanize->new(); + $mechanize->get($local_uri); + + my $page_title = $mechanize->title; + + # underscore is a token for the whole page + write_line($memory_fd, $VOLUME_KEY, $page_title, $page_url); + + my %by_section_key; + my $in_appendix = 0; + + # https://stackoverflow.com/a/254687 + for my $link ($mechanize->links) { + + next + unless length $link->text; + + next + if $link->text !~ qr{^ \s* ([.\d]+) \s+ (.+) $}x; + + my $section_key = $1; + my $section_title = $2; + + # drop final dots + $section_key =~ s{ [.]+ $}{}x; + + # reduce consecutive whitespace + $section_title =~ s{ \s+ }{ }gx; + + my $destination = $page_url . $link->url; + + next + if exists $by_section_key{$section_key} + && ( $by_section_key{$section_key}{title} eq $section_title + || $by_section_key{$section_key}{destination} eq $destination); + + # Some manuals reuse section numbers for different references, + # e.g. the Debian Policy's normal and appendix sections are + # numbers that clash with each other. Track if we've already + # seen a section pointing to some other URL than the current one, + # and prepend it with an indicator + $in_appendix = 1 + if exists $by_section_key{$section_key} + && $by_section_key{$section_key}{destination} ne $destination; + + $section_key = "appendix-$section_key" + if $in_appendix; + + $by_section_key{$section_key}{title} = $section_title; + $by_section_key{$section_key}{destination} = $destination; + + write_line($memory_fd, $section_key, $section_title, $destination); + } + + close $memory_fd; + + $self->write_data_file($basedir, $generated); + + return; +} + +=back + +=head1 AUTHOR + +Originally written by Felix Lechner <felix.lechner@lease-up.com> for Lintian. + +=head1 SEE ALSO + +lintian(1) + +=cut + +1; + +# Local Variables: +# indent-tabs-mode: nil +# cperl-indent-level: 4 +# End: +# vim: syntax=perl sw=4 sts=4 sr et diff --git a/lib/Lintian/Data/Authority/MenuManual.pm b/lib/Lintian/Data/Authority/MenuManual.pm new file mode 100644 index 0000000..c8a2878 --- /dev/null +++ b/lib/Lintian/Data/Authority/MenuManual.pm @@ -0,0 +1,316 @@ +# -*- perl -*- +# +# Copyright (C) 1998 Christian Schwarz and Richard Braakman +# Copyright (C) 2001 Colin Watson +# Copyright (C) 2008 Jorda Polo +# Copyright (C) 2009 Russ Allbery +# Copyright (C) 2017-2019 Chris Lamb <lamby@debian.org> +# Copyright (C) 2020-2021 Felix Lechner +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see <http://www.gnu.org/licenses/>. + +package Lintian::Data::Authority::MenuManual; + +use v5.20; +use warnings; +use utf8; + +use Carp qw(croak); +use Const::Fast; +use Path::Tiny; +use Unicode::UTF8 qw(encode_utf8); +use WWW::Mechanize (); + +use Lintian::Output::Markdown qw(markdown_authority); + +const my $EMPTY => q{}; +const my $SPACE => q{ }; +const my $SLASH => q{/}; +const my $COLON => q{:}; +const my $UNDERSCORE => q{_}; +const my $LEFT_PARENTHESIS => q{(}; +const my $RIGHT_PARENTHESIS => q{)}; + +const my $TWO_PARTS => 2; + +const my $VOLUME_KEY => $UNDERSCORE; +const my $SEPARATOR => $COLON x 2; + +use Moo; +use namespace::clean; + +with 'Lintian::Data::JoinedLines'; + +=head1 NAME + +Lintian::Data::Authority::MenuManual - Lintian interface for manual references + +=head1 SYNOPSIS + + use Lintian::Data::Authority::MenuManual; + +=head1 DESCRIPTION + +Lintian::Data::Authority::MenuManual provides a way to load data files for +manual references. + +=head1 CLASS METHODS + +=over 4 + +=item title + +=item shorthand + +=item location + +=item separator + +=cut + +has title => ( + is => 'rw', + default => 'Menu Manual' +); + +has shorthand => ( + is => 'rw', + default => 'menu-manual' +); + +has location => ( + is => 'rw', + lazy => 1, + default => sub { + my ($self) = @_; + + return 'authority/' . $self->shorthand; + } +); + +has separator => ( + is => 'rw', + default => sub { qr/::/ } +); + +=item consumer + +=cut + +sub consumer { + my ($self, $key, $remainder, $previous) = @_; + + return undef + if defined $previous; + + my ($title, $url)= split($self->separator, $remainder, $TWO_PARTS); + + my %entry; + $entry{title} = $title; + $entry{url} = $url; + + return \%entry; +} + +=item markdown_citation + +=cut + +sub markdown_citation { + my ($self, $section_key) = @_; + + croak "Invalid section $section_key" + if $section_key eq $VOLUME_KEY; + + my $volume_entry = $self->value($VOLUME_KEY); + + # start with the citation to the overall manual. + my $volume_title = $volume_entry->{title}; + my $volume_url = $volume_entry->{url}; + + my $section_title; + my $section_url; + + if ($self->recognizes($section_key)) { + + my $section_entry = $self->value($section_key); + + $section_title = $section_entry->{title}; + $section_url = $section_entry->{url}; + } + + return markdown_authority( + $volume_title, $volume_url,$section_key, + $section_title, $section_url + ); +} + +=item write_line + +=cut + +sub write_line { + my ($data_fd, $section_key, $section_title, $destination) = @_; + + # drop final dots + $section_key =~ s{ [.]+ $}{}x; + + # reduce consecutive whitespace + $section_title =~ s{ \s+ }{ }gx; + + my $line= join($SEPARATOR,$section_key, $section_title, $destination); + + say {$data_fd} encode_utf8($line); + + return; +} + +=item write_data_file + +=cut + +sub write_data_file { + my ($self, $basedir, $generated) = @_; + + my $header =<<"HEADER"; +# Data about titles, sections, and URLs of manuals, used to expand references +# in tag descriptions and add links for HTML output. Each line of this file +# has three fields separated by double colons: +# +# <section> :: <title> :: <url> +# +# If <section> is an underscore, that line specifies the title and URL for the +# whole manual. + +HEADER + + my $data_path = "$basedir/" . $self->location; + my $parent_dir = path($data_path)->parent->stringify; + path($parent_dir)->mkpath + unless -e $parent_dir; + + my $output = encode_utf8($header) . $generated; + path($data_path)->spew($output); + + return; +} + +=item extract_sections_from_links + +=cut + +sub extract_sections_from_links { + my ($self, $data_fd, $base_url)= @_; + + my $mechanize = WWW::Mechanize->new(); + $mechanize->get($base_url); + + my $page_title = $mechanize->title; + + # strip explanatory remark + $page_title =~ s{ \s* \N{EM DASH} .* $}{}x; + + # underscore is a token for the whole page + write_line($data_fd, $VOLUME_KEY, $page_title, $base_url); + + my %by_section_key; + my $in_appendix = 0; + + # https://stackoverflow.com/a/254687 + for my $link ($mechanize->links) { + + next + unless length $link->text; + + next + if $link->text !~ qr{^ \s* ([.\d]+) \s+ (.+) $}x; + + my $section_key = $1; + my $section_title = $2; + + # drop final dots + $section_key =~ s{ [.]+ $}{}x; + + # reduce consecutive whitespace + $section_title =~ s{ \s+ }{ }gx; + + my $destination = $base_url . $link->url; + + next + if exists $by_section_key{$section_key} + && ( $by_section_key{$section_key}{title} eq $section_title + || $by_section_key{$section_key}{destination} eq $destination); + + # Some manuals reuse section numbers for different references, + # e.g. the Debian Policy's normal and appendix sections are + # numbers that clash with each other. Track if we've already + # seen a section pointing to some other URL than the current one, + # and prepend it with an indicator + $in_appendix = 1 + if exists $by_section_key{$section_key} + && $by_section_key{$section_key}{destination} ne $destination; + + $section_key = "appendix-$section_key" + if $in_appendix; + + $by_section_key{$section_key}{title} = $section_title; + $by_section_key{$section_key}{destination} = $destination; + + write_line($data_fd, $section_key, $section_title, $destination); + } + + return; +} + +=item refresh + +=cut + +sub refresh { + my ($self, $archive, $basedir) = @_; + + my $base_url = 'https://www.debian.org/doc/packaging-manuals/menu.html/'; + + my $generated; + open(my $memory_fd, '>', \$generated) + or die encode_utf8('Cannot open scalar'); + + $self->extract_sections_from_links($memory_fd, $base_url); + + close $memory_fd; + + $self->write_data_file($basedir, $generated); + + return; +} + +=back + +=head1 AUTHOR + +Originally written by Felix Lechner <felix.lechner@lease-up.com> for Lintian. + +=head1 SEE ALSO + +lintian(1) + +=cut + +1; + +# Local Variables: +# indent-tabs-mode: nil +# cperl-indent-level: 4 +# End: +# vim: syntax=perl sw=4 sts=4 sr et diff --git a/lib/Lintian/Data/Authority/MenuPolicy.pm b/lib/Lintian/Data/Authority/MenuPolicy.pm new file mode 100644 index 0000000..e0f710a --- /dev/null +++ b/lib/Lintian/Data/Authority/MenuPolicy.pm @@ -0,0 +1,316 @@ +# -*- perl -*- +# +# Copyright (C) 1998 Christian Schwarz and Richard Braakman +# Copyright (C) 2001 Colin Watson +# Copyright (C) 2008 Jorda Polo +# Copyright (C) 2009 Russ Allbery +# Copyright (C) 2017-2019 Chris Lamb <lamby@debian.org> +# Copyright (C) 2020-2021 Felix Lechner +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see <http://www.gnu.org/licenses/>. + +package Lintian::Data::Authority::MenuPolicy; + +use v5.20; +use warnings; +use utf8; + +use Carp qw(croak); +use Const::Fast; +use Path::Tiny; +use Unicode::UTF8 qw(encode_utf8); +use WWW::Mechanize (); + +use Lintian::Output::Markdown qw(markdown_authority); + +const my $EMPTY => q{}; +const my $SPACE => q{ }; +const my $SLASH => q{/}; +const my $COLON => q{:}; +const my $UNDERSCORE => q{_}; +const my $LEFT_PARENTHESIS => q{(}; +const my $RIGHT_PARENTHESIS => q{)}; + +const my $TWO_PARTS => 2; + +const my $VOLUME_KEY => $UNDERSCORE; +const my $SEPARATOR => $COLON x 2; + +use Moo; +use namespace::clean; + +with 'Lintian::Data::JoinedLines'; + +=head1 NAME + +Lintian::Data::Authority::MenuPolicy - Lintian interface for manual references + +=head1 SYNOPSIS + + use Lintian::Data::Authority::MenuPolicy; + +=head1 DESCRIPTION + +Lintian::Data::Authority::MenuPolicy provides a way to load data files for +manual references. + +=head1 CLASS METHODS + +=over 4 + +=item title + +=item shorthand + +=item location + +=item separator + +=cut + +has title => ( + is => 'rw', + default => 'Menu Policy' +); + +has shorthand => ( + is => 'rw', + default => 'menu-policy' +); + +has location => ( + is => 'rw', + lazy => 1, + default => sub { + my ($self) = @_; + + return 'authority/' . $self->shorthand; + } +); + +has separator => ( + is => 'rw', + default => sub { qr/::/ } +); + +=item consumer + +=cut + +sub consumer { + my ($self, $key, $remainder, $previous) = @_; + + return undef + if defined $previous; + + my ($title, $url)= split($self->separator, $remainder, $TWO_PARTS); + + my %entry; + $entry{title} = $title; + $entry{url} = $url; + + return \%entry; +} + +=item markdown_citation + +=cut + +sub markdown_citation { + my ($self, $section_key) = @_; + + croak "Invalid section $section_key" + if $section_key eq $VOLUME_KEY; + + my $volume_entry = $self->value($VOLUME_KEY); + + # start with the citation to the overall manual. + my $volume_title = $volume_entry->{title}; + my $volume_url = $volume_entry->{url}; + + my $section_title; + my $section_url; + + if ($self->recognizes($section_key)) { + + my $section_entry = $self->value($section_key); + + $section_title = $section_entry->{title}; + $section_url = $section_entry->{url}; + } + + return markdown_authority( + $volume_title, $volume_url,$section_key, + $section_title, $section_url + ); +} + +=item write_line + +=cut + +sub write_line { + my ($data_fd, $section_key, $section_title, $destination) = @_; + + # drop final dots + $section_key =~ s{ [.]+ $}{}x; + + # reduce consecutive whitespace + $section_title =~ s{ \s+ }{ }gx; + + my $line= join($SEPARATOR,$section_key, $section_title, $destination); + + say {$data_fd} encode_utf8($line); + + return; +} + +=item write_data_file + +=cut + +sub write_data_file { + my ($self, $basedir, $generated) = @_; + + my $header =<<"HEADER"; +# Data about titles, sections, and URLs of manuals, used to expand references +# in tag descriptions and add links for HTML output. Each line of this file +# has three fields separated by double colons: +# +# <section> :: <title> :: <url> +# +# If <section> is an underscore, that line specifies the title and URL for the +# whole manual. + +HEADER + + my $data_path = "$basedir/" . $self->location; + my $parent_dir = path($data_path)->parent->stringify; + path($parent_dir)->mkpath + unless -e $parent_dir; + + my $output = encode_utf8($header) . $generated; + path($data_path)->spew($output); + + return; +} + +=item extract_sections_from_links + +=cut + +sub extract_sections_from_links { + my ($self, $data_fd, $base_url)= @_; + + my $mechanize = WWW::Mechanize->new(); + $mechanize->get($base_url); + + my $page_title = $mechanize->title; + + # strip explanatory remark + $page_title =~ s{ \s* \N{EM DASH} .* $}{}x; + + # underscore is a token for the whole page + write_line($data_fd, $VOLUME_KEY, $page_title, $base_url); + + my %by_section_key; + my $in_appendix = 0; + + # https://stackoverflow.com/a/254687 + for my $link ($mechanize->links) { + + next + unless length $link->text; + + next + if $link->text !~ qr{^ \s* ([.\d]+) \s+ (.+) $}x; + + my $section_key = $1; + my $section_title = $2; + + # drop final dots + $section_key =~ s{ [.]+ $}{}x; + + # reduce consecutive whitespace + $section_title =~ s{ \s+ }{ }gx; + + my $destination = $base_url . $link->url; + + next + if exists $by_section_key{$section_key} + && ( $by_section_key{$section_key}{title} eq $section_title + || $by_section_key{$section_key}{destination} eq $destination); + + # Some manuals reuse section numbers for different references, + # e.g. the Debian Policy's normal and appendix sections are + # numbers that clash with each other. Track if we've already + # seen a section pointing to some other URL than the current one, + # and prepend it with an indicator + $in_appendix = 1 + if exists $by_section_key{$section_key} + && $by_section_key{$section_key}{destination} ne $destination; + + $section_key = "appendix-$section_key" + if $in_appendix; + + $by_section_key{$section_key}{title} = $section_title; + $by_section_key{$section_key}{destination} = $destination; + + write_line($data_fd, $section_key, $section_title, $destination); + } + + return; +} + +=item refresh + +=cut + +sub refresh { + my ($self, $archive, $basedir) = @_; + + my $base_url = 'https://www.debian.org/doc/packaging-manuals/menu-policy/'; + + my $generated; + open(my $memory_fd, '>', \$generated) + or die encode_utf8('Cannot open scalar'); + + $self->extract_sections_from_links($memory_fd, $base_url); + + close $memory_fd; + + $self->write_data_file($basedir, $generated); + + return; +} + +=back + +=head1 AUTHOR + +Originally written by Felix Lechner <felix.lechner@lease-up.com> for Lintian. + +=head1 SEE ALSO + +lintian(1) + +=cut + +1; + +# Local Variables: +# indent-tabs-mode: nil +# cperl-indent-level: 4 +# End: +# vim: syntax=perl sw=4 sts=4 sr et diff --git a/lib/Lintian/Data/Authority/NewMaintainer.pm b/lib/Lintian/Data/Authority/NewMaintainer.pm new file mode 100644 index 0000000..bd8c933 --- /dev/null +++ b/lib/Lintian/Data/Authority/NewMaintainer.pm @@ -0,0 +1,290 @@ +# -*- perl -*- +# +# Copyright (C) 1998 Christian Schwarz and Richard Braakman +# Copyright (C) 2001 Colin Watson +# Copyright (C) 2008 Jorda Polo +# Copyright (C) 2009 Russ Allbery +# Copyright (C) 2017-2019 Chris Lamb <lamby@debian.org> +# Copyright (C) 2020-2021 Felix Lechner +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see <http://www.gnu.org/licenses/>. + +package Lintian::Data::Authority::NewMaintainer; + +use v5.20; +use warnings; +use utf8; + +use Carp qw(croak); +use Const::Fast; +use List::SomeUtils qw(any first_value); +use Path::Tiny; +use Unicode::UTF8 qw(encode_utf8); +use WWW::Mechanize (); + +use Lintian::Output::Markdown qw(markdown_authority); + +const my $SLASH => q{/}; +const my $UNDERSCORE => q{_}; + +const my $VOLUME_KEY => $UNDERSCORE; +const my $SECTIONS => 'sections'; + +use Moo; +use namespace::clean; + +with 'Lintian::Data::PreambledJSON'; + +=head1 NAME + +Lintian::Data::Authority::NewMaintainer - Lintian interface for manual references + +=head1 SYNOPSIS + + use Lintian::Data::Authority::NewMaintainer; + +=head1 DESCRIPTION + +Lintian::Data::Authority::NewMaintainer provides a way to load data files for +manual references. + +=head1 CLASS METHODS + +=over 4 + +=item title + +=item shorthand + +=item location + +=item by_section_key + +=cut + +has title => ( + is => 'rw', + default => 'New Maintainer\'s Guide' +); + +has shorthand => ( + is => 'rw', + default => 'new-maintainer' +); + +has location => ( + is => 'rw', + lazy => 1, + default => sub { + my ($self) = @_; + + return 'authority/' . $self->shorthand . '.json'; + } +); + +has by_section_key => (is => 'rw', default => sub { {} }); + +=item markdown_citation + +=cut + +sub markdown_citation { + my ($self, $section_key) = @_; + + croak "Invalid section $section_key" + if $section_key eq $VOLUME_KEY; + + my $volume_entry = $self->value($VOLUME_KEY); + + # start with the citation to the overall manual. + my $volume_title = $volume_entry->{title}; + my $volume_url = $volume_entry->{destination}; + + my $section_title; + my $section_url; + + if ($self->recognizes($section_key)) { + + my $section_entry = $self->value($section_key); + + $section_title = $section_entry->{title}; + $section_url = $section_entry->{destination}; + } + + return markdown_authority( + $volume_title, $volume_url,$section_key, + $section_title, $section_url + ); +} + +=item recognizes (KEY) + +Returns true if KEY is known, and false otherwise. + +=cut + +sub recognizes { + my ($self, $key) = @_; + + return 0 + unless length $key; + + return 1 + if exists $self->by_section_key->{$key}; + + return 0; +} + +=item value (KEY) + +Returns the value attached to KEY if it was listed in the data +file represented by this Lintian::Data instance and the undefined value +otherwise. + +=cut + +sub value { + my ($self, $key) = @_; + + return undef + unless length $key; + + return $self->by_section_key->{$key}; +} + +=item load + +=cut + +sub load { + my ($self, $search_space, $our_vendor) = @_; + + my @candidates = map { $_ . $SLASH . $self->location } @{$search_space}; + my $path = first_value { -e } @candidates; + + my $reference; + + return 0 + unless $self->read_file($path, \$reference); + + my @sections = @{$reference // []}; + + for my $section (@sections) { + + my $key = $section->{key}; + + # only store first value for duplicates + # silently ignore later values + $self->by_section_key->{$key} //= $section; + } + + return 1; +} + +=item refresh + +=cut + +sub refresh { + my ($self, $archive, $basedir) = @_; + + my $base_url = 'https://www.debian.org/doc/manuals/maint-guide/index.html'; + + my $mechanize = WWW::Mechanize->new(); + $mechanize->get($base_url); + + my $page_title = $mechanize->title; + + my @sections; + + # underscore is a token for the whole page + my %volume; + $volume{key} = $VOLUME_KEY; + $volume{title} = $page_title; + $volume{destination} = $base_url; + + # store array to resemble web layout + # may contain duplicates + push(@sections, \%volume); + + my $in_appendix = 0; + + # https://stackoverflow.com/a/254687 + for my $link ($mechanize->links) { + + next + unless length $link->text; + + next + if $link->text !~ qr{^ \s* ([.\d[:upper:]]+) \s+ (.+) $}x; + + my $section_key = $1; + my $section_title = $2; + + # drop final dots + $section_key =~ s{ [.]+ $}{}x; + + # reduce consecutive whitespace + $section_title =~ s{ \s+ }{ }gx; + + my $destination = $base_url . $link->url; + + my @similar = grep { $_->{key} eq $section_key } @sections; + next + if (any { $_->{title} eq $section_title } @similar) + || (any { $_->{destination} eq $destination } @similar); + + # Some manuals reuse section numbers for different references, + # e.g. the Debian Policy's normal and appendix sections are + # numbers that clash with each other. Track if we've already + # seen a section pointing to some other URL than the current one, + # and prepend it with an indicator + $in_appendix = 1 + if any { $_->{destination} ne $destination } @similar; + + $section_key = "appendix-$section_key" + if $in_appendix; + + my %section; + $section{key} = $section_key; + $section{title} = $section_title; + $section{destination} = $destination; + push(@sections, \%section); + } + + my $data_path = "$basedir/" . $self->location; + my $status = $self->write_file($SECTIONS, \@sections, $data_path); + + return $status; +} + +=back + +=head1 AUTHOR + +Originally written by Felix Lechner <felix.lechner@lease-up.com> for Lintian. + +=head1 SEE ALSO + +lintian(1) + +=cut + +1; + +# Local Variables: +# indent-tabs-mode: nil +# cperl-indent-level: 4 +# End: +# vim: syntax=perl sw=4 sts=4 sr et diff --git a/lib/Lintian/Data/Authority/PerlPolicy.pm b/lib/Lintian/Data/Authority/PerlPolicy.pm new file mode 100644 index 0000000..92dc31a --- /dev/null +++ b/lib/Lintian/Data/Authority/PerlPolicy.pm @@ -0,0 +1,316 @@ +# -*- perl -*- +# +# Copyright (C) 1998 Christian Schwarz and Richard Braakman +# Copyright (C) 2001 Colin Watson +# Copyright (C) 2008 Jorda Polo +# Copyright (C) 2009 Russ Allbery +# Copyright (C) 2017-2019 Chris Lamb <lamby@debian.org> +# Copyright (C) 2020-2021 Felix Lechner +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see <http://www.gnu.org/licenses/>. + +package Lintian::Data::Authority::PerlPolicy; + +use v5.20; +use warnings; +use utf8; + +use Carp qw(croak); +use Const::Fast; +use Path::Tiny; +use Unicode::UTF8 qw(encode_utf8); +use WWW::Mechanize (); + +use Lintian::Output::Markdown qw(markdown_authority); + +const my $EMPTY => q{}; +const my $SPACE => q{ }; +const my $SLASH => q{/}; +const my $COLON => q{:}; +const my $UNDERSCORE => q{_}; +const my $LEFT_PARENTHESIS => q{(}; +const my $RIGHT_PARENTHESIS => q{)}; + +const my $TWO_PARTS => 2; + +const my $VOLUME_KEY => $UNDERSCORE; +const my $SEPARATOR => $COLON x 2; + +use Moo; +use namespace::clean; + +with 'Lintian::Data::JoinedLines'; + +=head1 NAME + +Lintian::Data::Authority::PerlPolicy - Lintian interface for manual references + +=head1 SYNOPSIS + + use Lintian::Data::Authority::PerlPolicy; + +=head1 DESCRIPTION + +Lintian::Data::Authority::PerlPolicy provides a way to load data files for +manual references. + +=head1 CLASS METHODS + +=over 4 + +=item title + +=item shorthand + +=item location + +=item separator + +=cut + +has title => ( + is => 'rw', + default => 'Perl Policy' +); + +has shorthand => ( + is => 'rw', + default => 'perl-policy' +); + +has location => ( + is => 'rw', + lazy => 1, + default => sub { + my ($self) = @_; + + return 'authority/' . $self->shorthand; + } +); + +has separator => ( + is => 'rw', + default => sub { qr/::/ } +); + +=item consumer + +=cut + +sub consumer { + my ($self, $key, $remainder, $previous) = @_; + + return undef + if defined $previous; + + my ($title, $url)= split($self->separator, $remainder, $TWO_PARTS); + + my %entry; + $entry{title} = $title; + $entry{url} = $url; + + return \%entry; +} + +=item markdown_citation + +=cut + +sub markdown_citation { + my ($self, $section_key) = @_; + + croak "Invalid section $section_key" + if $section_key eq $VOLUME_KEY; + + my $volume_entry = $self->value($VOLUME_KEY); + + # start with the citation to the overall manual. + my $volume_title = $volume_entry->{title}; + my $volume_url = $volume_entry->{url}; + + my $section_title; + my $section_url; + + if ($self->recognizes($section_key)) { + + my $section_entry = $self->value($section_key); + + $section_title = $section_entry->{title}; + $section_url = $section_entry->{url}; + } + + return markdown_authority( + $volume_title, $volume_url,$section_key, + $section_title, $section_url + ); +} + +=item write_line + +=cut + +sub write_line { + my ($data_fd, $section_key, $section_title, $destination) = @_; + + # drop final dots + $section_key =~ s{ [.]+ $}{}x; + + # reduce consecutive whitespace + $section_title =~ s{ \s+ }{ }gx; + + my $line= join($SEPARATOR,$section_key, $section_title, $destination); + + say {$data_fd} encode_utf8($line); + + return; +} + +=item write_data_file + +=cut + +sub write_data_file { + my ($self, $basedir, $generated) = @_; + + my $header =<<"HEADER"; +# Data about titles, sections, and URLs of manuals, used to expand references +# in tag descriptions and add links for HTML output. Each line of this file +# has three fields separated by double colons: +# +# <section> :: <title> :: <url> +# +# If <section> is an underscore, that line specifies the title and URL for the +# whole manual. + +HEADER + + my $data_path = "$basedir/" . $self->location; + my $parent_dir = path($data_path)->parent->stringify; + path($parent_dir)->mkpath + unless -e $parent_dir; + + my $output = encode_utf8($header) . $generated; + path($data_path)->spew($output); + + return; +} + +=item extract_sections_from_links + +=cut + +sub extract_sections_from_links { + my ($self, $data_fd, $base_url)= @_; + + my $mechanize = WWW::Mechanize->new(); + $mechanize->get($base_url); + + my $page_title = $mechanize->title; + + # strip explanatory remark + $page_title =~ s{ \s* \N{EM DASH} .* $}{}x; + + # underscore is a token for the whole page + write_line($data_fd, $VOLUME_KEY, $page_title, $base_url); + + my %by_section_key; + my $in_appendix = 0; + + # https://stackoverflow.com/a/254687 + for my $link ($mechanize->links) { + + next + unless length $link->text; + + next + if $link->text !~ qr{^ \s* ([A-Z]|[A-Z]?[.\d]+) \s+ (.+) $}x; + + my $section_key = $1; + my $section_title = $2; + + # drop final dots + $section_key =~ s{ [.]+ $}{}x; + + # reduce consecutive whitespace + $section_title =~ s{ \s+ }{ }gx; + + my $destination = $base_url . $link->url; + + next + if exists $by_section_key{$section_key} + && ( $by_section_key{$section_key}{title} eq $section_title + || $by_section_key{$section_key}{destination} eq $destination); + + # Some manuals reuse section numbers for different references, + # e.g. the Debian Policy's normal and appendix sections are + # numbers that clash with each other. Track if we've already + # seen a section pointing to some other URL than the current one, + # and prepend it with an indicator + $in_appendix = 1 + if exists $by_section_key{$section_key} + && $by_section_key{$section_key}{destination} ne $destination; + + $section_key = "appendix-$section_key" + if $in_appendix; + + $by_section_key{$section_key}{title} = $section_title; + $by_section_key{$section_key}{destination} = $destination; + + write_line($data_fd, $section_key, $section_title, $destination); + } + + return; +} + +=item refresh + +=cut + +sub refresh { + my ($self, $archive, $basedir) = @_; + + my $base_url = 'https://www.debian.org/doc/packaging-manuals/perl-policy/'; + + my $generated; + open(my $memory_fd, '>', \$generated) + or die encode_utf8('Cannot open scalar'); + + $self->extract_sections_from_links($memory_fd, $base_url); + + close $memory_fd; + + $self->write_data_file($basedir, $generated); + + return; +} + +=back + +=head1 AUTHOR + +Originally written by Felix Lechner <felix.lechner@lease-up.com> for Lintian. + +=head1 SEE ALSO + +lintian(1) + +=cut + +1; + +# Local Variables: +# indent-tabs-mode: nil +# cperl-indent-level: 4 +# End: +# vim: syntax=perl sw=4 sts=4 sr et diff --git a/lib/Lintian/Data/Authority/PythonPolicy.pm b/lib/Lintian/Data/Authority/PythonPolicy.pm new file mode 100644 index 0000000..ebeda04 --- /dev/null +++ b/lib/Lintian/Data/Authority/PythonPolicy.pm @@ -0,0 +1,317 @@ +# -*- perl -*- +# +# Copyright (C) 1998 Christian Schwarz and Richard Braakman +# Copyright (C) 2001 Colin Watson +# Copyright (C) 2008 Jorda Polo +# Copyright (C) 2009 Russ Allbery +# Copyright (C) 2017-2019 Chris Lamb <lamby@debian.org> +# Copyright (C) 2020-2021 Felix Lechner +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see <http://www.gnu.org/licenses/>. + +package Lintian::Data::Authority::PythonPolicy; + +use v5.20; +use warnings; +use utf8; + +use Carp qw(croak); +use Const::Fast; +use Path::Tiny; +use Unicode::UTF8 qw(encode_utf8); +use WWW::Mechanize (); + +use Lintian::Output::Markdown qw(markdown_authority); + +const my $EMPTY => q{}; +const my $SPACE => q{ }; +const my $SLASH => q{/}; +const my $COLON => q{:}; +const my $UNDERSCORE => q{_}; +const my $LEFT_PARENTHESIS => q{(}; +const my $RIGHT_PARENTHESIS => q{)}; + +const my $TWO_PARTS => 2; + +const my $VOLUME_KEY => $UNDERSCORE; +const my $SEPARATOR => $COLON x 2; + +use Moo; +use namespace::clean; + +with 'Lintian::Data::JoinedLines'; + +=head1 NAME + +Lintian::Data::Authority::PythonPolicy - Lintian interface for manual references + +=head1 SYNOPSIS + + use Lintian::Data::Authority::PythonPolicy; + +=head1 DESCRIPTION + +Lintian::Data::Authority::PythonPolicy provides a way to load data files for +manual references. + +=head1 CLASS METHODS + +=over 4 + +=item title + +=item shorthand + +=item location + +=item separator + +=cut + +has title => ( + is => 'rw', + default => 'Python Policy' +); + +has shorthand => ( + is => 'rw', + default => 'python-policy' +); + +has location => ( + is => 'rw', + lazy => 1, + default => sub { + my ($self) = @_; + + return 'authority/' . $self->shorthand; + } +); + +has separator => ( + is => 'rw', + default => sub { qr/::/ } +); + +=item consumer + +=cut + +sub consumer { + my ($self, $key, $remainder, $previous) = @_; + + return undef + if defined $previous; + + my ($title, $url)= split($self->separator, $remainder, $TWO_PARTS); + + my %entry; + $entry{title} = $title; + $entry{url} = $url; + + return \%entry; +} + +=item markdown_citation + +=cut + +sub markdown_citation { + my ($self, $section_key) = @_; + + croak "Invalid section $section_key" + if $section_key eq $VOLUME_KEY; + + my $volume_entry = $self->value($VOLUME_KEY); + + # start with the citation to the overall manual. + my $volume_title = $volume_entry->{title}; + my $volume_url = $volume_entry->{url}; + + my $section_title; + my $section_url; + + if ($self->recognizes($section_key)) { + + my $section_entry = $self->value($section_key); + + $section_title = $section_entry->{title}; + $section_url = $section_entry->{url}; + } + + return markdown_authority( + $volume_title, $volume_url,$section_key, + $section_title, $section_url + ); +} + +=item write_line + +=cut + +sub write_line { + my ($data_fd, $section_key, $section_title, $destination) = @_; + + # drop final dots + $section_key =~ s{ [.]+ $}{}x; + + # reduce consecutive whitespace + $section_title =~ s{ \s+ }{ }gx; + + my $line= join($SEPARATOR,$section_key, $section_title, $destination); + + say {$data_fd} encode_utf8($line); + + return; +} + +=item write_data_file + +=cut + +sub write_data_file { + my ($self, $basedir, $generated) = @_; + + my $header =<<"HEADER"; +# Data about titles, sections, and URLs of manuals, used to expand references +# in tag descriptions and add links for HTML output. Each line of this file +# has three fields separated by double colons: +# +# <section> :: <title> :: <url> +# +# If <section> is an underscore, that line specifies the title and URL for the +# whole manual. + +HEADER + + my $data_path = "$basedir/" . $self->location; + my $parent_dir = path($data_path)->parent->stringify; + path($parent_dir)->mkpath + unless -e $parent_dir; + + my $output = encode_utf8($header) . $generated; + path($data_path)->spew($output); + + return; +} + +=item extract_sections_from_links + +=cut + +sub extract_sections_from_links { + my ($self, $data_fd, $base_url)= @_; + + my $mechanize = WWW::Mechanize->new(); + $mechanize->get($base_url); + + my $page_title = $mechanize->title; + + # strip explanatory remark + $page_title =~ s{ \s* \N{EM DASH} .* $}{}x; + + # underscore is a token for the whole page + write_line($data_fd, $VOLUME_KEY, $page_title, $base_url); + + my %by_section_key; + my $in_appendix = 0; + + # https://stackoverflow.com/a/254687 + for my $link ($mechanize->links) { + + next + unless length $link->text; + + next + if $link->text !~ qr{^ \s* ([.\d]+) \s+ (.+) $}x; + + my $section_key = $1; + my $section_title = $2; + + # drop final dots + $section_key =~ s{ [.]+ $}{}x; + + # reduce consecutive whitespace + $section_title =~ s{ \s+ }{ }gx; + + my $destination = $base_url . $link->url; + + next + if exists $by_section_key{$section_key} + && ( $by_section_key{$section_key}{title} eq $section_title + || $by_section_key{$section_key}{destination} eq $destination); + + # Some manuals reuse section numbers for different references, + # e.g. the Debian Policy's normal and appendix sections are + # numbers that clash with each other. Track if we've already + # seen a section pointing to some other URL than the current one, + # and prepend it with an indicator + $in_appendix = 1 + if exists $by_section_key{$section_key} + && $by_section_key{$section_key}{destination} ne $destination; + + $section_key = "appendix-$section_key" + if $in_appendix; + + $by_section_key{$section_key}{title} = $section_title; + $by_section_key{$section_key}{destination} = $destination; + + write_line($data_fd, $section_key, $section_title, $destination); + } + + return; +} + +=item refresh + +=cut + +sub refresh { + my ($self, $archive, $basedir) = @_; + + my $base_url + = 'https://www.debian.org/doc/packaging-manuals/python-policy/'; + + my $generated; + open(my $memory_fd, '>', \$generated) + or die encode_utf8('Cannot open scalar'); + + $self->extract_sections_from_links($memory_fd, $base_url); + + close $memory_fd; + + $self->write_data_file($basedir, $generated); + + return; +} + +=back + +=head1 AUTHOR + +Originally written by Felix Lechner <felix.lechner@lease-up.com> for Lintian. + +=head1 SEE ALSO + +lintian(1) + +=cut + +1; + +# Local Variables: +# indent-tabs-mode: nil +# cperl-indent-level: 4 +# End: +# vim: syntax=perl sw=4 sts=4 sr et diff --git a/lib/Lintian/Data/Authority/VimPolicy.pm b/lib/Lintian/Data/Authority/VimPolicy.pm new file mode 100644 index 0000000..6ffbe91 --- /dev/null +++ b/lib/Lintian/Data/Authority/VimPolicy.pm @@ -0,0 +1,459 @@ +# -*- perl -*- +# +# Copyright (C) 1998 Christian Schwarz and Richard Braakman +# Copyright (C) 2001 Colin Watson +# Copyright (C) 2008 Jorda Polo +# Copyright (C) 2009 Russ Allbery +# Copyright (C) 2017-2019 Chris Lamb <lamby@debian.org> +# Copyright (C) 2020-2021 Felix Lechner +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see <http://www.gnu.org/licenses/>. + +package Lintian::Data::Authority::VimPolicy; + +use v5.20; +use warnings; +use utf8; + +use Carp qw(croak); +use Const::Fast; +use File::Basename qw(basename); +use IPC::Run3; +use HTML::TokeParser::Simple; +use Path::Tiny; +use Unicode::UTF8 qw(encode_utf8); + +use Lintian::Output::Markdown qw(markdown_authority); + +const my $EMPTY => q{}; +const my $SPACE => q{ }; +const my $SLASH => q{/}; +const my $COLON => q{:}; +const my $INDENT => $SPACE x 4; +const my $UNDERSCORE => q{_}; +const my $LEFT_PARENTHESIS => q{(}; +const my $RIGHT_PARENTHESIS => q{)}; + +const my $TWO_PARTS => 2; + +const my $VOLUME_KEY => $UNDERSCORE; +const my $SEPARATOR => $COLON x 2; + +const my $WAIT_STATUS_SHIFT => 8; + +use Moo; +use namespace::clean; + +with 'Lintian::Data::JoinedLines'; + +=head1 NAME + +Lintian::Data::Authority::VimPolicy - Lintian interface for manual references + +=head1 SYNOPSIS + + use Lintian::Data::Authority::VimPolicy; + +=head1 DESCRIPTION + +Lintian::Data::Authority::VimPolicy provides a way to load data files for +manual references. + +=head1 CLASS METHODS + +=over 4 + +=item title + +=item shorthand + +=item location + +=item separator + +=cut + +has title => ( + is => 'rw', + default => 'Vim Policy' +); + +has shorthand => ( + is => 'rw', + default => 'vim-policy' +); + +has location => ( + is => 'rw', + lazy => 1, + default => sub { + my ($self) = @_; + + return 'authority/' . $self->shorthand; + } +); + +has separator => ( + is => 'rw', + default => sub { qr/::/ } +); + +=item consumer + +=cut + +sub consumer { + my ($self, $key, $remainder, $previous) = @_; + + return undef + if defined $previous; + + my ($title, $url)= split($self->separator, $remainder, $TWO_PARTS); + + my %entry; + $entry{title} = $title; + $entry{url} = $url; + + return \%entry; +} + +=item markdown_citation + +=cut + +sub markdown_citation { + my ($self, $section_key) = @_; + + croak "Invalid section $section_key" + if $section_key eq $VOLUME_KEY; + + my $volume_entry = $self->value($VOLUME_KEY); + + # start with the citation to the overall manual. + my $volume_title = $volume_entry->{title}; + my $volume_url = $volume_entry->{url}; + + my $section_title; + my $section_url; + + if ($self->recognizes($section_key)) { + + my $section_entry = $self->value($section_key); + + $section_title = $section_entry->{title}; + $section_url = $section_entry->{url}; + } + + return markdown_authority( + $volume_title, $volume_url,$section_key, + $section_title, $section_url + ); +} + +=item write_line + +=cut + +sub write_line { + my ($data_fd, $section_key, $section_title, $destination) = @_; + + # drop final dots + $section_key =~ s{ [.]+ $}{}x; + + # reduce consecutive whitespace + $section_title =~ s{ \s+ }{ }gx; + + my $line= join($SEPARATOR,$section_key, $section_title, $destination); + + say {$data_fd} encode_utf8($line); + + return; +} + +=item write_data_file + +=cut + +sub write_data_file { + my ($self, $basedir, $generated) = @_; + + my $header =<<"HEADER"; +# Data about titles, sections, and URLs of manuals, used to expand references +# in tag descriptions and add links for HTML output. Each line of this file +# has three fields separated by double colons: +# +# <section> :: <title> :: <url> +# +# If <section> is an underscore, that line specifies the title and URL for the +# whole manual. + +HEADER + + my $data_path = "$basedir/" . $self->location; + my $parent_dir = path($data_path)->parent->stringify; + path($parent_dir)->mkpath + unless -e $parent_dir; + + my $output = encode_utf8($header) . $generated; + path($data_path)->spew($output); + + return; +} + +=item find_installable_name + +=cut + +sub find_installable_name { + my ($self, $archive, $release, $liberty, $port, $requested_path) = @_; + + my @installed_by; + + # find installable package + for my $installable_architecture ('all', $port) { + + my $local_path + = $archive->contents_gz($release, $liberty, + $installable_architecture); + + open(my $fd, '<:gzip', $local_path) + or die encode_utf8("Cannot open $local_path."); + + while (my $line = <$fd>) { + + chomp $line; + + my ($path, $finder) = split($SPACE, $line, 2); + next + unless length $path + && length $finder; + + if ($path eq $requested_path) { + + my $name = $1; + + my @locations = split(m{,}, $finder); + for my $location (@locations) { + + my ($section, $installable)= split(m{/}, $location, 2); + + push(@installed_by, $installable); + } + + next; + } + } + + close $fd; + } + + die encode_utf8( + "The path $requested_path is not installed by any package.") + if @installed_by < 1; + + if (@installed_by > 1) { + warn encode_utf8( + "The path $requested_path is installed by multiple packages:\n"); + warn encode_utf8($INDENT . "- $_\n")for @installed_by; + } + + my $installable_name = shift @installed_by; + + return $installable_name; +} + +=item refresh + +=cut + +sub refresh { + my ($self, $archive, $basedir) = @_; + + # shipped as part of the vim installable + my $shipped_base = 'usr/share/doc/vim/vim-policy.html/'; + my $index_name = 'index.html'; + + my $shipped_path = $shipped_base . $index_name; + my $stored_uri = "file:///$shipped_base"; + + # neutral sort order + local $ENV{LC_ALL} = 'C'; + + my $release = 'stable'; + my $port = 'amd64'; + + my $installable_name + = $self->find_installable_name($archive, $release, 'main', $port, + $shipped_path); + + my $deb822_by_installable_name + = $archive->deb822_packages_by_installable_name($release, 'main', $port); + + my $work_folder + = Path::Tiny->tempdir( + TEMPLATE => 'refresh-doc-base-specification-XXXXXXXXXX'); + + die encode_utf8("Installable $installable_name not shipped in port $port") + unless exists $deb822_by_installable_name->{$installable_name}; + + my $deb822 = $deb822_by_installable_name->{$installable_name}; + + my $pool_path = $deb822->value('Filename'); + + my $deb_filename = basename($pool_path); + my $deb_local_path = "$work_folder/$deb_filename"; + my $deb_url = $archive->mirror_base . $SLASH . $pool_path; + + my $stderr; + run3([qw{wget --quiet}, "--output-document=$deb_local_path", $deb_url], + undef, \$stderr); + my $status = ($? >> $WAIT_STATUS_SHIFT); + + # stderr already in UTF-8 + die $stderr + if $status; + + my $extract_folder = "$work_folder/unpacked/$pool_path"; + path($extract_folder)->mkpath; + + run3([qw{dpkg-deb --extract}, $deb_local_path, $extract_folder], + undef, \$stderr); + $status = ($? >> $WAIT_STATUS_SHIFT); + + # stderr already in UTF-8 + die $stderr + if $status; + + unlink($deb_local_path) + or die encode_utf8("Cannot delete $deb_local_path"); + + my $generated; + open(my $memory_fd, '>', \$generated) + or die encode_utf8("Cannot open scalar: $!"); + + my $fresh_uri = URI::file->new_abs("/$extract_folder/$shipped_path"); + + my $parser = HTML::TokeParser::Simple->new(url => $fresh_uri); + my $in_title = 0; + my $in_dt_tag = 0; + my $after_a_tag = 0; + + my $page_title = $EMPTY; + my $section_key = $EMPTY; + my $section_title = $EMPTY; + my $relative_destination = $EMPTY; + + while (my $token = $parser->get_token) { + + if (length $token->get_tag) { + + if ($token->get_tag eq 'h1') { + + $in_title = ($token->is_start_tag + && $token->get_attr('class') eq 'title'); + + # not yet leaving title + next + if $in_title; + + # trim both ends + $page_title =~ s/^\s+|\s+$//g; + + # underscore is a token for the whole page + write_line($memory_fd, $VOLUME_KEY, $page_title, + $stored_uri . $index_name) + if length $page_title; + + $page_title = $EMPTY; + } + + if ($token->get_tag eq 'dt') { + + $in_dt_tag = $token->is_start_tag; + + # not yet leaving dt tag + next + if $in_dt_tag; + + # trim both ends + $section_key =~ s/^\s+|\s+$//g; + $section_title =~ s/^\s+|\s+$//g; + + my $full_destination = $stored_uri . $relative_destination; + + write_line( + $memory_fd, $section_key, + $section_title,$full_destination + )if length $section_title; + + $section_key = $EMPTY; + $section_title = $EMPTY; + $relative_destination = $EMPTY; + } + + if ($token->get_tag eq 'a') { + + $after_a_tag = $token->is_start_tag; + + $relative_destination = $token->get_attr('href') + if $token->is_start_tag; + } + + } else { + + # concatenate span objects + $page_title .= $token->as_is + if length $token->as_is + && $in_title + && $after_a_tag; + + $section_key = $token->as_is + if length $token->as_is + && $in_dt_tag + && !$after_a_tag; + + # concatenate span objects + $section_title .= $token->as_is + if length $token->as_is + && $in_dt_tag + && $after_a_tag; + } + } + + close $memory_fd; + + $self->write_data_file($basedir, $generated); + + return; +} + +=back + +=head1 AUTHOR + +Originally written by Felix Lechner <felix.lechner@lease-up.com> for Lintian. + +=head1 SEE ALSO + +lintian(1) + +=cut + +1; + +# Local Variables: +# indent-tabs-mode: nil +# cperl-indent-level: 4 +# End: +# vim: syntax=perl sw=4 sts=4 sr et diff --git a/lib/Lintian/Data/Buildflags/Hardening.pm b/lib/Lintian/Data/Buildflags/Hardening.pm new file mode 100644 index 0000000..75056df --- /dev/null +++ b/lib/Lintian/Data/Buildflags/Hardening.pm @@ -0,0 +1,154 @@ +# -*- perl -*- + +# Copyright (C) 2011-2012 Niels Thykier <niels@thykier.net> +# - Based on a shell script by Raphael Geissert <atomo64@gmail.com> +# Copyright (C) 2021 Felix Lechner +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see <http://www.gnu.org/licenses/>. + +package Lintian::Data::Buildflags::Hardening; + +use v5.20; +use warnings; +use utf8; + +use Const::Fast; +use List::SomeUtils qw(first_value uniq); +use Unicode::UTF8 qw(decode_utf8); + +use Lintian::Deb822; +use Lintian::IPC::Run3 qw(safe_qx); + +use Moo; +use namespace::clean; + +const my $EMPTY => q{}; +const my $SLASH => q{/}; + +const my $RECOMMENDED_FEATURES => q{recommended_features}; + +with 'Lintian::Data::PreambledJSON'; + +=encoding utf-8 + +=head1 NAME + +Lintian::Data::Buildflags::Hardening -- Lintian API for hardening build flags + +=head1 SYNOPSIS + + use Lintian::Data::Buildflags::Hardening; + +=head1 DESCRIPTION + +Lintian API for hardening build flags. + +=head1 INSTANCE METHODS + +=over 4 + +=item title + +=item location + +=item recommended_features + +=cut + +has title => ( + is => 'rw', + default => 'Hardening Flags from Dpkg' +); + +has location => ( + is => 'rw', + default => 'buildflags/hardening.json' +); + +has recommended_features => ( + is => 'rw', + default => sub { {} }, + coerce => sub { my ($hashref) = @_; return ($hashref // {}); } +); + +=item load + +=cut + +sub load { + my ($self, $search_space, $our_vendor) = @_; + + my @candidates = map { $_ . $SLASH . $self->location } @{$search_space}; + my $path = first_value { -e } @candidates; + + my $recommended_features; + return 0 + unless $self->read_file($path, \$recommended_features); + + $self->recommended_features($recommended_features); + + return 1; +} + +=item refresh + +=cut + +sub refresh { + my ($self, $archive, $basedir) = @_; + + # find all recommended hardening features + local $ENV{LC_ALL} = 'C'; + local $ENV{DEB_BUILD_MAINT_OPTIONS} = 'hardening=+all'; + + my @architectures + = split(/\n/, decode_utf8(safe_qx('dpkg-architecture', '-L'))); + chomp for @architectures; + + my %recommended_features; + for my $architecture (@architectures) { + + local $ENV{DEB_HOST_ARCH} = $architecture; + + my @command = qw{dpkg-buildflags --query-features hardening}; + my $feature_output = decode_utf8(safe_qx(@command)); + + my $deb822 = Lintian::Deb822->new; + my @sections = $deb822->parse_string($feature_output); + + my @enabled = grep { $_->value('Enabled') eq 'yes' } @sections; + my @features = uniq map { $_->value('Feature') } @enabled; + + $recommended_features{$architecture} = [sort @features]; + } + + my $data_path = "$basedir/" . $self->location; + my $status + = $self->write_file($RECOMMENDED_FEATURES, \%recommended_features, + $data_path); + + return $status; +} + +=back + +=cut + +1; + +# Local Variables: +# indent-tabs-mode: nil +# cperl-indent-level: 4 +# End: +# vim: syntax=perl sw=4 sts=4 sr et diff --git a/lib/Lintian/Data/Debhelper/Addons.pm b/lib/Lintian/Data/Debhelper/Addons.pm new file mode 100644 index 0000000..3b8dbb1 --- /dev/null +++ b/lib/Lintian/Data/Debhelper/Addons.pm @@ -0,0 +1,215 @@ +# -*- perl -*- +# +# Copyright (C) 2008 by Raphael Geissert <atomo64@gmail.com> +# Copyright (C) 2017-2018 Chris Lamb <lamby@debian.org> +# Copyright (C) 2021 Felix Lechner +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see <http://www.gnu.org/licenses/>. + +package Lintian::Data::Debhelper::Addons; + +use v5.20; +use warnings; +use utf8; + +use Carp qw(croak); +use Const::Fast; +use List::SomeUtils qw(first_value any uniq); +use PerlIO::gzip; +use Unicode::UTF8 qw(encode_utf8); + +const my $SPACE => q{ }; +const my $SLASH => q{/}; + +const my $WAIT_STATUS_SHIFT => 8; + +const my $ADD_ONS => 'add_ons'; + +use Moo; +use namespace::clean; + +with 'Lintian::Data::PreambledJSON'; + +=head1 NAME + +Lintian::Data::Debhelper::Addons - Lintian interface for debhelper addons. + +=head1 SYNOPSIS + + use Lintian::Data::Debhelper::Addons; + +=head1 DESCRIPTION + +This module provides a way to load data files for debhelper. + +=head1 INSTANCE METHODS + +=over 4 + +=item title + +=item location + +=item installable_names_by_add_on + +=cut + +has title => ( + is => 'rw', + default => 'Debhelper Add-ons' +); + +has location => ( + is => 'rw', + default => 'debhelper/add_ons.json' +); + +has installable_names_by_add_on => (is => 'rw', default => sub { {} }); + +=item all + +=cut + +sub all { + my ($self) = @_; + + return keys %{$self->installable_names_by_add_on}; +} + +=item installed_by + +=cut + +sub installed_by { + my ($self, $name) = @_; + + return () + unless exists $self->installable_names_by_add_on->{$name}; + + my @installed_by = @{$self->installable_names_by_add_on->{$name} // []}; + + push(@installed_by, 'debhelper-compat') + if any { $_ eq 'debhelper' } @installed_by; + + return @installed_by; +} + +=item load + +=cut + +sub load { + my ($self, $search_space, $our_vendor) = @_; + + my @candidates = map { $_ . $SLASH . $self->location } @{$search_space}; + my $path = first_value { -e } @candidates; + + my $reference; + return 0 + unless $self->read_file($path, \$reference); + + my %add_ons = %{$reference // {}}; + my %installable_names_by_add_on; + + for my $name (keys %add_ons) { + + my @installable_names; + push(@installable_names, @{$add_ons{$name}{installed_by}}); + + $installable_names_by_add_on{$name} = \@installable_names; + } + + $self->installable_names_by_add_on(\%installable_names_by_add_on); + + return 1; +} + +=item refresh + +=cut + +sub refresh { + my ($self, $archive, $basedir) = @_; + + # neutral sort order + local $ENV{LC_ALL} = 'C'; + + my $port = 'amd64'; + + my %add_ons; + + for my $installable_architecture ('all', $port) { + + my $local_path + = $archive->contents_gz('sid', 'main', $installable_architecture); + + open(my $fd, '<:gzip', $local_path) + or die encode_utf8("Cannot open $local_path."); + + while (my $line = <$fd>) { + + chomp $line; + + my ($path, $finder) = split($SPACE, $line, 2); + next + unless length $path + && length $finder; + + if ($path + =~ m{^ usr/share/perl5/Debian/Debhelper/Sequence/ (\S+) [.]pm $}x + ) { + + my $name = $1; + + my @locations = split(m{,}, $finder); + for my $location (@locations) { + + my ($section, $installable)= split(m{/}, $location, 2); + + $add_ons{$name}{installed_by} //= []; + push(@{$add_ons{$name}{installed_by}}, $installable); + } + + next; + } + } + + close $fd; + } + + my $data_path = "$basedir/" . $self->location; + my $status = $self->write_file($ADD_ONS, \%add_ons,$data_path); + + return $status; +} + +=back + +=head1 AUTHOR + +Originally written by Felix Lechner <felix.lechner@lease-up.com> for Lintian. + +=head1 SEE ALSO + +lintian(1) + +=cut + +1; + +# Local Variables: +# indent-tabs-mode: nil +# cperl-indent-level: 4 +# End: +# vim: syntax=perl sw=4 sts=4 sr et diff --git a/lib/Lintian/Data/Debhelper/Commands.pm b/lib/Lintian/Data/Debhelper/Commands.pm new file mode 100644 index 0000000..bd1ea67 --- /dev/null +++ b/lib/Lintian/Data/Debhelper/Commands.pm @@ -0,0 +1,306 @@ +# -*- perl -*- +# +# Copyright (C) 2008 by Raphael Geissert <atomo64@gmail.com> +# Copyright (C) 2017-2018 Chris Lamb <lamby@debian.org> +# Copyright (C) 2021 Felix Lechner +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see <http://www.gnu.org/licenses/>. + +package Lintian::Data::Debhelper::Commands; + +use v5.20; +use warnings; +use utf8; + +use Const::Fast; +use File::Basename; +use IPC::Run3; +use List::SomeUtils qw(first_value any uniq); +use Path::Tiny; +use PerlIO::gzip; +use Unicode::UTF8 qw(encode_utf8); + +const my $SPACE => q{ }; +const my $SLASH => q{/}; + +const my $WAIT_STATUS_SHIFT => 8; + +const my $COMMANDS => 'commands'; + +use Moo; +use namespace::clean; + +with 'Lintian::Data::PreambledJSON'; + +=head1 NAME + +Lintian::Data::Debhelper::Commands - Lintian interface for debhelper commands. + +=head1 SYNOPSIS + + use Lintian::Data::Debhelper::Commands; + +=head1 DESCRIPTION + +This module provides a way to load data files for debhelper. + +=head1 INSTANCE METHODS + +=over 4 + +=item title + +=item location + +=item installable_names_by_command + +=item maint_commands + +=item misc_depends_commands + +=cut + +has title => ( + is => 'rw', + default => 'Debhelper Commands' +); + +has location => ( + is => 'rw', + default => 'debhelper/commands.json' +); + +has installable_names_by_command => (is => 'rw', default => sub { {} }); +has maint_commands => (is => 'rw', default => sub { [] }); +has misc_depends_commands => (is => 'rw', default => sub { [] }); + +=item all + +=cut + +sub all { + my ($self) = @_; + + return keys %{$self->installable_names_by_command}; +} + +=item installed_by + +=cut + +sub installed_by { + my ($self, $name) = @_; + + return () + unless exists $self->installable_names_by_command->{$name}; + + my @installed_by = @{$self->installable_names_by_command->{$name} // []}; + + push(@installed_by, 'debhelper-compat') + if any { $_ eq 'debhelper' } @installed_by; + + return @installed_by; +} + +=item load + +=cut + +sub load { + my ($self, $search_space, $our_vendor) = @_; + + my @candidates = map { $_ . $SLASH . $self->location } @{$search_space}; + my $path = first_value { -e } @candidates; + + my $reference; + return 0 + unless $self->read_file($path, \$reference); + + my %commands = %{$reference // {}}; + + my %installable_names_by_command; + my @maint_commands; + my @misc_depends_commands; + + for my $name (keys %commands) { + + my @installable_names; + push(@installable_names, @{$commands{$name}{installed_by}}); + + $installable_names_by_command{$name} = \@installable_names; + + push(@maint_commands, $name) + if $commands{$name}{uses_autoscript}; + + push(@misc_depends_commands, $name) + if $commands{$name}{uses_misc_depends} + && $name ne 'dh_gencontrol'; + } + + $self->installable_names_by_command(\%installable_names_by_command); + $self->maint_commands(\@maint_commands); + $self->misc_depends_commands(\@misc_depends_commands); + + return 1; +} + +=item refresh + +=cut + +sub refresh { + my ($self, $archive, $basedir) = @_; + + # neutral sort order + local $ENV{LC_ALL} = 'C'; + + my $port = 'amd64'; + + my %commands; + + for my $installable_architecture ('all', $port) { + + my $local_path + = $archive->contents_gz('sid', 'main', $installable_architecture); + + open(my $fd, '<:gzip', $local_path) + or die encode_utf8("Cannot open $local_path."); + + while (my $line = <$fd>) { + + chomp $line; + + my ($path, $finder) = split($SPACE, $line, 2); + next + unless length $path + && length $finder; + + if ($path =~ m{^ usr/bin/ (dh_ \S+) $}x) { + + my $name = $1; + + my @locations = split(m{,}, $finder); + for my $location (@locations) { + + my ($section, $installable)= split(m{/}, $location, 2); + + $commands{$name}{installed_by} //= []; + push(@{$commands{$name}{installed_by}}, $installable); + } + + next; + } + } + + close $fd; + } + + my $deb822_by_installable_name + = $archive->deb822_packages_by_installable_name('sid', 'main', $port); + + my $work_folder + = Path::Tiny->tempdir( + TEMPLATE => 'refresh-debhelper-add-ons-XXXXXXXXXX'); + + my @uses_autoscript; + my @uses_misc_depends; + + my @installable_names= uniq map { @{$_->{installed_by}} }values %commands; + + for my $installable_name (sort @installable_names) { + + next + unless exists $deb822_by_installable_name->{$installable_name}; + + my $deb822 = $deb822_by_installable_name->{$installable_name}; + + my $pool_path = $deb822->value('Filename'); + + my $deb_filename = basename($pool_path); + my $deb_local_path = "$work_folder/$deb_filename"; + my $deb_url = $archive->mirror_base . $SLASH . $pool_path; + + my $stderr; + run3( + [qw{wget --quiet}, "--output-document=$deb_local_path", $deb_url], + undef, \$stderr + ); + my $status = ($? >> $WAIT_STATUS_SHIFT); + + # stderr already in UTF-8 + die $stderr + if $status; + + my $extract_folder = "$work_folder/pool/$pool_path"; + path($extract_folder)->mkpath; + + run3([qw{dpkg-deb --extract}, $deb_local_path, $extract_folder], + undef, \$stderr); + $status = ($? >> $WAIT_STATUS_SHIFT); + + # stderr already in UTF-8 + die $stderr + if $status; + + unlink($deb_local_path) + or die encode_utf8("Cannot delete $deb_local_path"); + + my $autoscript_rule = File::Find::Rule->file; + $autoscript_rule->name(qr{^dh_}); + $autoscript_rule->grep(qr{autoscript}); + my @autoscript_matches + = $autoscript_rule->in("$extract_folder/usr/bin"); + + push(@uses_autoscript, map { basename($_) } @autoscript_matches); + + my $misc_depends_rule = File::Find::Rule->file; + $misc_depends_rule->name(qr{^dh_}); + $misc_depends_rule->grep(qr{misc:Depends}); + my @misc_depends_matches + = $misc_depends_rule->in("$extract_folder/usr/bin"); + + push(@uses_misc_depends, map { basename($_) } @misc_depends_matches); + + path("$work_folder/pool")->remove_tree; + } + + $commands{$_}{uses_autoscript} = 1 for @uses_autoscript; + + $commands{$_}{uses_misc_depends} = 1 for @uses_misc_depends; + + my $data_path = "$basedir/" . $self->location; + my $status = $self->write_file($COMMANDS, \%commands,$data_path); + + return $status; +} + +=back + +=head1 AUTHOR + +Originally written by Felix Lechner <felix.lechner@lease-up.com> for Lintian. + +=head1 SEE ALSO + +lintian(1) + +=cut + +1; + +# Local Variables: +# indent-tabs-mode: nil +# cperl-indent-level: 4 +# End: +# vim: syntax=perl sw=4 sts=4 sr et diff --git a/lib/Lintian/Data/Debhelper/Levels.pm b/lib/Lintian/Data/Debhelper/Levels.pm new file mode 100644 index 0000000..571ce2c --- /dev/null +++ b/lib/Lintian/Data/Debhelper/Levels.pm @@ -0,0 +1,89 @@ +# -*- perl -*- +# +# Copyright (C) 1998 Christian Schwarz and Richard Braakman +# Copyright (C) 2009 Russ Allbery +# Copyright (C) 2020 Felix Lechner +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see <http://www.gnu.org/licenses/>. + +package Lintian::Data::Debhelper::Levels; + +use v5.20; +use warnings; +use utf8; + +use Moo; +use namespace::clean; + +with 'Lintian::Data::JoinedLines'; + +=head1 NAME + +Lintian::Data::Debhelper::Levels - Lintian interface for debhelper +compat levels. + +=head1 SYNOPSIS + + use Lintian::Data::Debhelper::Levels; + +=head1 DESCRIPTION + +This module provides a way to load data files for debhelper. + +=head1 INSTANCE METHODS + +=over 4 + +=item title + +=item location + +=item separator + +=cut + +has title => ( + is => 'rw', + default => 'Debhelper Levels' +); + +has location => ( + is => 'rw', + default => 'debhelper/compat-level' +); + +has separator => ( + is => 'rw', + default => sub { qr/=/ } +); + +=back + +=head1 AUTHOR + +Originally written by Felix Lechner <felix.lechner@lease-up.com> for Lintian. + +=head1 SEE ALSO + +lintian(1) + +=cut + +1; + +# Local Variables: +# indent-tabs-mode: nil +# cperl-indent-level: 4 +# End: +# vim: syntax=perl sw=4 sts=4 sr et diff --git a/lib/Lintian/Data/Fonts.pm b/lib/Lintian/Data/Fonts.pm new file mode 100644 index 0000000..4820439 --- /dev/null +++ b/lib/Lintian/Data/Fonts.pm @@ -0,0 +1,216 @@ +# -*- perl -*- +# +# Copyright (C) 2021 Felix Lechner +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see <http://www.gnu.org/licenses/>. + +package Lintian::Data::Fonts; + +use v5.20; +use warnings; +use utf8; + +use Const::Fast; +use File::Basename; +use List::SomeUtils qw(first_value uniq); +use PerlIO::gzip; +use Unicode::UTF8 qw(encode_utf8); + +const my $SPACE => q{ }; +const my $SLASH => q{/}; + +const my $FONTS => 'fonts'; + +use Moo; +use namespace::clean; + +with 'Lintian::Data::PreambledJSON'; + +=head1 NAME + +Lintian::Data::Fonts - Lintian interface for fonts. + +=head1 SYNOPSIS + + use Lintian::Data::Fonts; + +=head1 DESCRIPTION + +This module provides a way to load data files for fonts. + +=head1 INSTANCE METHODS + +=over 4 + +=item title + +=item location + +=item installable_names_by_font + +=cut + +has title => ( + is => 'rw', + default => 'Fonts Available for Installation' +); + +has location => ( + is => 'rw', + default => 'fonts.json' +); + +has installable_names_by_font => (is => 'rw', default => sub { {} }); + +=item all + +=cut + +sub all { + my ($self) = @_; + + return keys %{$self->installable_names_by_font}; +} + +=item installed_by + +=cut + +sub installed_by { + my ($self, $name) = @_; + + my $lowercase = lc $name; + + return () + unless exists $self->installable_names_by_font->{$lowercase}; + + my @installed_by = @{$self->installable_names_by_font->{$lowercase} // []}; + + return @installed_by; +} + +=item load + +=cut + +sub load { + my ($self, $search_space, $our_vendor) = @_; + + my @candidates = map { $_ . $SLASH . $self->location } @{$search_space}; + my $path = first_value { -e } @candidates; + + my $reference; + return 0 + unless $self->read_file($path, \$reference); + + my %fonts = %{$reference // {}}; + my %installable_names_by_font; + + for my $name (keys %fonts) { + + my @installable_names; + push(@installable_names, @{$fonts{$name}{installed_by}}); + + $installable_names_by_font{$name} = \@installable_names; + } + + $self->installable_names_by_font(\%installable_names_by_font); + + return 1; +} + +=item refresh + +=cut + +sub refresh { + my ($self, $archive, $basedir) = @_; + + # neutral sort order + local $ENV{LC_ALL} = 'C'; + + my $port = 'amd64'; + + my %fonts; + + for my $installable_architecture ('all', $port) { + + my $local_path + = $archive->contents_gz('sid', 'main', $installable_architecture); + + open(my $fd, '<:gzip', $local_path) + or die encode_utf8("Cannot open $local_path."); + + while (my $line = <$fd>) { + + chomp $line; + + my ($path, $finder) = split($SPACE, $line, 2); + next + unless length $path + && length $finder; + + if ($path =~ m{ [.] (?:[to]tf|pfb) $}ix) { + + my @locations = split(m{,}, $finder); + for my $location (@locations) { + + my ($section, $installable_name) + = split(m{/}, $location, 2); + + # Record only packages starting with ttf-, otf-, t1-, xfonts- or fonts- + next + unless $installable_name + =~ m{^ (?: [to]tf | t1 | x?fonts ) - }x; + + my $basename = basename($path); + my $lowercase = lc $basename; + + $fonts{$lowercase}{installed_by} //= []; + push(@{$fonts{$lowercase}{installed_by}}, + $installable_name); + } + + next; + } + } + + close $fd; + } + + my $data_path = "$basedir/" . $self->location; + my $status = $self->write_file($FONTS, \%fonts,$data_path); + + return $status; +} + +=back + +=head1 AUTHOR + +Originally written by Felix Lechner <felix.lechner@lease-up.com> for Lintian. + +=head1 SEE ALSO + +lintian(1) + +=cut + +1; + +# Local Variables: +# indent-tabs-mode: nil +# cperl-indent-level: 4 +# End: +# vim: syntax=perl sw=4 sts=4 sr et diff --git a/lib/Lintian/Data/InitD/VirtualFacilities.pm b/lib/Lintian/Data/InitD/VirtualFacilities.pm new file mode 100644 index 0000000..fbb4030 --- /dev/null +++ b/lib/Lintian/Data/InitD/VirtualFacilities.pm @@ -0,0 +1,256 @@ +# -*- perl -*- +# +# Copyright (C) 2008, 2010 by Raphael Geissert <atomo64@gmail.com> +# Copyright (C) 2017 Chris Lamb <lamby@debian.org> +# Copyright (C) 2021 Felix Lechner +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see <http://www.gnu.org/licenses/>. + +package Lintian::Data::InitD::VirtualFacilities; + +use v5.20; +use warnings; +use utf8; + +use Const::Fast; +use File::Basename; +use IPC::Run3; +use List::SomeUtils qw(first_value uniq); +use Path::Tiny; +use PerlIO::gzip; +use Unicode::UTF8 qw(encode_utf8); + +const my $EMPTY => q{}; +const my $SPACE => q{ }; +const my $SLASH => q{/}; +const my $DOLLAR => q{$}; + +const my $NEWLINE => qq{\n}; + +const my $WAIT_STATUS_SHIFT => 8; + +use Moo; +use namespace::clean; + +with 'Lintian::Data::JoinedLines'; + +=head1 NAME + +Lintian::Data::InitD::VirtualFacilities - Lintian interface for init.d virtual facilities + +=head1 SYNOPSIS + + use Lintian::Data::InitD::VirtualFacilities; + +=head1 DESCRIPTION + +This module provides a way to load data files for init.d. + +=head1 INSTANCE METHODS + +=over 4 + +=item title + +=item location + +=item separator + +=cut + +has title => ( + is => 'rw', + default => 'Init.d Virtual Facilities' +); + +has location => ( + is => 'rw', + default => 'init.d/virtual_facilities' +); + +has separator => ( + is => 'rw', + default => sub { qr{ \s+ }x } +); + +=item refresh + +=cut + +sub refresh { + my ($self, $archive, $basedir) = @_; + + # neutral sort order + local $ENV{LC_ALL} = 'C'; + + my $port = 'amd64'; + + my %paths_by_installable_names; + + for my $installable_architecture ('all', $port) { + + my $local_path + = $archive->contents_gz('sid', 'main', $installable_architecture); + + open(my $fd, '<:gzip', $local_path) + or die encode_utf8("Cannot open $local_path."); + + while (my $line = <$fd>) { + + chomp $line; + + my ($path, $finder) = split($SPACE, $line, 2); + next + unless length $path + && length $finder; + + # catch both monolithic and split configurations + if ($path =~ m{^ etc/insserv[.]conf (?: $ | [.]d / )? }x) { + + my @locations = split(m{,}, $finder); + for my $location (@locations) { + + my ($section, $installable)= split(m{/}, $location, 2); + + $paths_by_installable_names{$installable} //= []; + push(@{$paths_by_installable_names{$installable}}, $path); + } + + next; + } + } + + close $fd; + } + + my $deb822_by_installable_name + = $archive->deb822_packages_by_installable_name('sid', 'main', $port); + + my $work_folder + = Path::Tiny->tempdir( + TEMPLATE => 'refresh-debhelper-add-ons-XXXXXXXXXX'); + + my @virtual_facilities; + + my @installable_names = keys %paths_by_installable_names; + + for my $installable_name (sort @installable_names) { + + next + unless exists $deb822_by_installable_name->{$installable_name}; + + my $deb822 = $deb822_by_installable_name->{$installable_name}; + + my $pool_path = $deb822->value('Filename'); + + my $deb_filename = basename($pool_path); + my $deb_local_path = "$work_folder/$deb_filename"; + my $deb_url = $archive->mirror_base . $SLASH . $pool_path; + + my $stderr; + run3( + [qw{wget --quiet}, "--output-document=$deb_local_path", $deb_url], + undef, \$stderr + ); + my $status = ($? >> $WAIT_STATUS_SHIFT); + + # stderr already in UTF-8 + die $stderr + if $status; + + my $extract_folder = "$work_folder/pool/$pool_path"; + path($extract_folder)->mkpath; + + run3([qw{dpkg-deb --extract}, $deb_local_path, $extract_folder], + undef, \$stderr); + $status = ($? >> $WAIT_STATUS_SHIFT); + + # stderr already in UTF-8 + die $stderr + if $status; + + unlink($deb_local_path) + or die encode_utf8("Cannot delete $deb_local_path"); + + my $monolithic_rule = File::Find::Rule->file; + $monolithic_rule->name('insserv.conf'); + my @files= $monolithic_rule->in("$extract_folder/etc"); + + my $split_files_rule = File::Find::Rule->file; + push(@files, + $split_files_rule->in("$extract_folder/etc/insserv.conf.d")); + + for my $path (@files) { + + open(my $fd, '<', $path) + or die encode_utf8("Cannot open $path."); + + while (my $line = <$fd>) { + + if ($line =~ m{^ ( \$\S+ ) }x) { + + my $virtual = $1; + push(@virtual_facilities, $virtual); + } + } + + close $fd; + } + + path("$work_folder/pool")->remove_tree; + } + + push(@virtual_facilities, $DOLLAR . 'all'); + + my $generated = $EMPTY; + + # still in UTF-8 + $generated .= $_ . $NEWLINE for sort +uniq @virtual_facilities; + + my $header =<<"HEADER"; +# The list of known virtual facilities that init scripts may depend on. +# + +HEADER + + my $data_path = "$basedir/" . $self->location; + my $parent_dir = path($data_path)->parent->stringify; + path($parent_dir)->mkpath + unless -e $parent_dir; + + my $output = encode_utf8($header) . $generated; + path($data_path)->spew($output); + + return 1; +} + +=back + +=head1 AUTHOR + +Originally written by Felix Lechner <felix.lechner@lease-up.com> for Lintian. + +=head1 SEE ALSO + +lintian(1) + +=cut + +1; + +# Local Variables: +# indent-tabs-mode: nil +# cperl-indent-level: 4 +# End: +# vim: syntax=perl sw=4 sts=4 sr et diff --git a/lib/Lintian/Data/JoinedLines.pm b/lib/Lintian/Data/JoinedLines.pm new file mode 100644 index 0000000..a753430 --- /dev/null +++ b/lib/Lintian/Data/JoinedLines.pm @@ -0,0 +1,369 @@ +# -*- perl -*- +# Lintian::Data::JoinedLines -- interface to query lists of keywords + +# Copyright (C) 2008 Russ Allbery +# Copyright (C) 2017-2018 Chris Lamb <lamby@debian.org> +# Copyright (C) 2021 Felix Lechner +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see <http://www.gnu.org/licenses/>. + +package Lintian::Data::JoinedLines; + +use v5.20; +use warnings; +use utf8; + +use Carp qw(carp croak); +use Const::Fast; +use List::SomeUtils qw(any); +use Unicode::UTF8 qw(encode_utf8); + +use Moo::Role; +use namespace::clean; + +const my $EMPTY => q{}; +const my $SLASH => q{/}; + +=head1 NAME + +Lintian::Data::JoinedLines - Lintian interface to query lists of keywords + +=head1 SYNOPSIS + + my $keyword; + my $list = Lintian::Data::JoinedLines->new('type'); + if ($list->recognizes($keyword)) { + # do something ... + } + my $hash = Lintian::Data::JoinedLines->new('another-type', qr{\s++}); + if ($hash->value($keyword) > 1) { + # do something ... + } + if ($list->value($keyword) > 1) { + # do something ... + } + my @keywords = $list->all; + if ($list->matches_any($keyword)) { + # do something ... + } + +=head1 DESCRIPTION + +Lintian::Data::JoinedLines provides a way of loading a list of keywords or key/value +pairs from a file in the Lintian root and then querying that list. +The lists are stored in the F<data> directory of the Lintian root and +consist of one keyword or key/value pair per line. Blank lines and +lines beginning with C<#> are ignored. Leading and trailing whitespace +is stripped. + +If requested, the lines are split into key/value pairs with a given +separator regular expression. Otherwise, keywords are taken verbatim +as they are listed in the file and may include spaces. + +This module allows lists such as menu sections, doc-base sections, +obsolete packages, package fields, and so forth to be stored in simple, +easily editable files. + +NB: By default Lintian::Data::JoinedLines is lazy and defers loading of the data +file until it is actually needed. + +=head2 Interface for the CODE argument + +This section describes the interface between for the CODE argument +for the class method new. + +The sub will be called once for each key/pair with three arguments, +KEY, VALUE and CURVALUE. The first two are the key/value pair parsed +from the data file and CURVALUE is current value associated with the +key. CURVALUE will be C<undef> the first time the sub is called with +that KEY argument. + +The sub can then modify VALUE in some way and return the new value for +that KEY. If CURVALUE is not C<undef>, the sub may return C<undef> to +indicate that the current value should still be used. It is not +permissible for the sub to return C<undef> if CURVALUE is C<undef>. + +Where Perl semantics allow it, the sub can modify CURVALUE and the +changes will be reflected in the result. As an example, if CURVALUE +is a hashref, new keys can be inserted etc. + +=head1 INSTANCE METHODS + +=over 4 + +=item dataset + +=item C<keyorder> + +=cut + +has dataset => ( + is => 'rw', + coerce => sub { my ($hashref) = @_; return ($hashref // {}); }, + default => sub { {} } +); + +has keyorder => ( + is => 'rw', + coerce => sub { my ($arrayref) = @_; return ($arrayref // []); }, + default => sub { [] } +); + +=item all + +Returns all keywords listed in the data file as a list in original order. +In a scalar context, returns the number of keywords. + +=cut + +sub all { + my ($self) = @_; + + return @{$self->keyorder}; +} + +=item recognizes (KEY) + +Returns true if KEY was listed in the data file represented by this +Lintian::Data::JoinedLines instance and false otherwise. + +=cut + +sub recognizes { + my ($self, $key) = @_; + + return 0 + unless length $key; + + return 1 + if exists $self->dataset->{$key}; + + return 0; +} + +=item resembles (KEY) + +Returns true if the data file contains a key that is a case-insensitive match +to KEY, and false otherwise. + +=cut + +sub resembles { + my ($self, $key) = @_; + + return 0 + unless length $key; + + return 1 + if $self->recognizes($key); + + return 1 + if any { m{^\Q$key\E$}i } keys %{$self->dataset}; + + return 0; +} + +=item value (KEY) + +Returns the value attached to KEY if it was listed in the data +file represented by this Lintian::Data::JoinedLines instance and the undefined value +otherwise. + +=cut + +sub value { + my ($self, $key) = @_; + + return undef + unless length $key; + + return $self->dataset->{$key}; +} + +=item matches_any(KEYWORD[, MODIFIERS]) + +Returns true if KEYWORD matches any regular expression listed in the +data file. The optional MODIFIERS serve as modifiers on all regexes. + +=cut + +sub matches_any { + my ($self, $wanted, $modifiers) = @_; + + return 0 + unless length $wanted; + + $modifiers //= $EMPTY; + + return 1 + if any { $wanted =~ /(?$modifiers)$_/ } $self->all; + + return 0; +} + +=item load + +=cut + +sub load { + my ($self, $search_space, $our_vendor) = @_; + + my @remaining_lineage = @{$search_space // []}; + unless (@remaining_lineage) { + + carp encode_utf8('Unknown data file: ' . $self->location); + return 0; + } + + my $directory = shift @remaining_lineage; + + my $path = $directory . $SLASH . $self->location; + + return $self->load(\@remaining_lineage, $our_vendor) + unless -e $path; + + open(my $fd, '<:utf8_strict', $path) + or die encode_utf8("Cannot open $path: $!"); + + my $position = 1; + while (my $line = <$fd>) { + + # trim both ends + $line =~ s/^\s+|\s+$//g; + + next + unless length $line; + + next + if $line =~ m{^\#}; + + # a command + if ($line =~ s/^\@//) { + + my ($directive, $value) = split(/\s+/, $line, 2); + if ($directive eq 'delete') { + + croak encode_utf8( + "Missing key after \@delete in $path at line $position") + unless length $value; + + @{$self->keyorder} = grep { $_ ne $value } @{$self->keyorder}; + delete $self->dataset->{$value}; + + } elsif ($directive eq 'include-parent') { + + $self->load(\@remaining_lineage, $our_vendor) + or croak encode_utf8("No ancestor data file for $path"); + + } elsif ($directive eq 'if-vendor-is' + || $directive eq 'if-vendor-is-not') { + + my ($specified_vendor, $remain) = split(/\s+/, $value, 2); + + croak encode_utf8("Missing vendor name after \@$directive") + unless length $specified_vendor; + croak encode_utf8( + "Missing command after vendor name for \@$directive") + unless length $remain; + + $our_vendor =~ s{/.*$}{}; + + next + if $directive eq 'if-vendor-is' + && $our_vendor ne $specified_vendor; + + next + if $directive eq 'if-vendor-is-not' + && $our_vendor eq $specified_vendor; + + $line = $remain; + redo; + + } else { + croak encode_utf8( + "Unknown operation \@$directive in $path at line $position" + ); + } + next; + } + + my $key = $line; + my $remainder; + + ($key, $remainder) = split($self->separator, $line, 2) + if defined $self->separator; + + # do not autovivify; 'exists' below + my $previous; + $previous = $self->dataset->{$key} + if exists $self->dataset->{$key}; + + my $value; + if ($self->can('consumer')) { + + $value = $self->consumer($key, $remainder, $previous); + next + unless defined $value; + + } else { + $value = $remainder; + } + + push(@{$self->keyorder}, $key) + unless exists $self->dataset->{$key}; + + $self->dataset->{$key} = $value; + + } continue { + ++$position; + } + + close $fd; + + return 1; +} + +=back + +=head1 FILES + +=over 4 + +=item LINTIAN_INCLUDE_DIR/data + +The files loaded by this module must be located in this directory. +Relative paths containing a C</> are permitted, so files may be organized +in subdirectories in this directory. + +Note that lintian supports multiple LINTIAN_INCLUDE_DIRs. + +=back + +=head1 AUTHOR + +Originally written by Russ Allbery <rra@debian.org> for Lintian. + +=head1 SEE ALSO + +lintian(1), L<https://lintian.debian.org/manual/section-2.6.html> + +=cut + +1; + +# Local Variables: +# indent-tabs-mode: nil +# cperl-indent-level: 4 +# End: +# vim: syntax=perl sw=4 sts=4 sr et diff --git a/lib/Lintian/Data/Policy/Releases.pm b/lib/Lintian/Data/Policy/Releases.pm new file mode 100644 index 0000000..540da13 --- /dev/null +++ b/lib/Lintian/Data/Policy/Releases.pm @@ -0,0 +1,274 @@ +# -*- perl -*- +# +# Copyright (C) 1998 Christian Schwarz and Richard Braakman +# Copyright (C) 2009 Russ Allbery +# Copyright (C) 2020 Felix Lechner +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see <http://www.gnu.org/licenses/>. + +package Lintian::Data::Policy::Releases; + +use v5.20; +use warnings; +use utf8; + +use Const::Fast; +use Date::Parse qw(str2time); +use List::SomeUtils qw(first_value); +use IPC::Run3; +use HTTP::Tiny; +use List::SomeUtils qw(minmax); +use List::UtilsBy qw(rev_nsort_by); +use Path::Tiny; +use Time::Moment; +use Unicode::UTF8 qw(decode_utf8 encode_utf8); + +const my $SLASH => q{/}; + +const my $RELEASES => q{releases}; + +const my $WAIT_STATUS_SHIFT => 8; + +use Moo; +use namespace::clean; + +with 'Lintian::Data::PreambledJSON'; + +=head1 NAME + +Lintian::Data::Policy::Releases - Lintian interface for policy releases + +=head1 SYNOPSIS + + use Lintian::Data::Policy::Releases; + +=head1 DESCRIPTION + +This module provides a way to load data files for policy releases. + +=head1 INSTANCE METHODS + +=over 4 + +=item title + +=item location + +=item ordered_versions + +=item by_version + +=item max_dots + +=cut + +has title => ( + is => 'rw', + default => 'Debian Policy Releases' +); + +has location => ( + is => 'rw', + default => 'debian-policy/releases.json' +); + +has ordered_versions => (is => 'rw', default => sub { [] }); +has by_version => (is => 'rw', default => sub { {} }); +has max_dots => (is => 'rw'); + +=item latest_version + +=cut + +sub latest_version { + my ($self) = @_; + + return $self->ordered_versions->[0]; +} + +=item normalize + +=cut + +sub normalize { + my ($self, $version) = @_; + + my $have = $version =~ tr{\.}{}; + my $need = $self->max_dots - $have; + + $version .= '.0' for (1..$need); + + return $version; +} + +=item is_known + +=cut + +sub is_known { + my ($self, $version) = @_; + + my $normalized = $self->normalize($version); + + return exists $self->by_version->{$normalized}; +} + +=item epoch + +=cut + +sub epoch { + my ($self, $version) = @_; + + my $normalized = $self->normalize($version); + + my $release = $self->by_version->{$normalized}; + return undef + unless defined $release; + + return $release->{epoch}; +} + +=item author + +=cut + +sub author { + my ($self, $version) = @_; + + my $normalized = $self->normalize($version); + + my $release = $self->by_version->{$normalized}; + return undef + unless defined $release; + + return $release->{author}; +} + +=item load + +=cut + +sub load { + my ($self, $search_space, $our_vendor) = @_; + + my @candidates = map { $_ . $SLASH . $self->location } @{$search_space}; + my $path = first_value { -e } @candidates; + + my $reference; + return 0 + unless $self->read_file($path, \$reference); + + my @releases = @{$reference // []}; + + my @sorted = rev_nsort_by { $_->{epoch} } @releases; + my @ordered_versions = map { $_->{version} } @sorted; + $self->ordered_versions(\@ordered_versions); + + my @dot_count = map { tr{\.}{} } @ordered_versions; + my (undef, $max_dots) = minmax @dot_count; + $self->max_dots($max_dots); + + # normalize versions + $_->{version} = $self->normalize($_->{version}) for @releases; + + my %by_version; + $by_version{$_->{version}} = $_ for @releases; + + $self->by_version(\%by_version); + + return 1; +} + +=item refresh + +=cut + +sub refresh { + my ($self, $archive, $basedir) = @_; + + my $changelog_url + = 'https://salsa.debian.org/dbnpolicy/policy/-/raw/master/debian/changelog?inline=false'; + + my $response = HTTP::Tiny->new->get($changelog_url); + die encode_utf8("Failed to get $changelog_url!\n") + unless $response->{success}; + + my $tempfile_tiny = Path::Tiny->tempfile; + $tempfile_tiny->spew($response->{content}); + + my @command = ( + qw{dpkg-parsechangelog --format rfc822 --all --file}, + $tempfile_tiny->stringify + ); + my $rfc822; + my $stderr; + run3(\@command, \undef, \$rfc822, \$stderr); + my $dpkg_status = ($? >> $WAIT_STATUS_SHIFT); + + # already in UTF-8 + die $stderr + if $dpkg_status; + + my $deb822 = Lintian::Deb822->new; + my @sections = $deb822->parse_string(decode_utf8($rfc822)); + + my @releases; + for my $section (@sections) { + + my $epoch = str2time($section->value('Date'), 'GMT'); + my $moment = Time::Moment->from_epoch($epoch); + my $timestamp = $moment->strftime('%Y-%m-%dT%H:%M:%S%Z'); + + my @closes = sort { $a <=> $b } $section->trimmed_list('Closes'); + my @changes = split(/\n/, $section->text('Changes')); + + my %release; + $release{version} = $section->value('Version'); + $release{timestamp} = $timestamp; + $release{epoch} = $epoch; + $release{closes} = \@closes; + $release{changes} = \@changes; + $release{author} = $section->value('Maintainer'); + + push(@releases, \%release); + } + + my @sorted = rev_nsort_by { $_->{epoch} } @releases; + + my $data_path = "$basedir/" . $self->location; + my $write_status = $self->write_file($RELEASES, \@releases, $data_path); + + return $write_status; +} + +=back + +=head1 AUTHOR + +Originally written by Felix Lechner <felix.lechner@lease-up.com> for Lintian. + +=head1 SEE ALSO + +lintian(1) + +=cut + +1; + +# Local Variables: +# indent-tabs-mode: nil +# cperl-indent-level: 4 +# End: +# vim: syntax=perl sw=4 sts=4 sr et diff --git a/lib/Lintian/Data/PreambledJSON.pm b/lib/Lintian/Data/PreambledJSON.pm new file mode 100644 index 0000000..e2af970 --- /dev/null +++ b/lib/Lintian/Data/PreambledJSON.pm @@ -0,0 +1,164 @@ +# -*- perl -*- + +# Copyright (C) 2021 Felix Lechner +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see <http://www.gnu.org/licenses/>. + +package Lintian::Data::PreambledJSON; + +use v5.20; +use warnings; +use utf8; + +use Carp qw(carp); +use Const::Fast; +use JSON::MaybeXS; +use Path::Tiny; +use Time::Piece; +use Unicode::UTF8 qw(encode_utf8); + +use Moo::Role; +use namespace::clean; + +const my $EMPTY => q{}; + +const my $PREAMBLE => q{preamble}; +const my $TITLE => q{title}; +const my $CARGO => q{cargo}; + +=encoding utf-8 + +=head1 NAME + +Lintian::Data::PreambledJSON -- Data in preambled JSON format + +=head1 SYNOPSIS + + use Lintian::Data::PreambledJSON; + +=head1 DESCRIPTION + +Routines for access and management of preambled JSON data files. + +=head1 INSTANCE METHODS + +=over 4 + +=item last_modified + +=cut + +has cargo => ( + is => 'rw', + coerce => sub { my ($scalar) = @_; return ($scalar // $EMPTY); } +); + +=item read_file + +=cut + +sub read_file { + my ($self, $path, $double_reference) = @_; + + if (!length $path || !-e $path) { + + carp encode_utf8("Unknown data file: $path"); + return 0; + } + + my $json = path($path)->slurp; + my $data = decode_json($json); + + my %preamble = %{$data->{$PREAMBLE}}; + my $stored_title = $preamble{$TITLE}; + my $storage_key = $preamble{$CARGO}; + + unless (length $stored_title && length $storage_key) { + warn encode_utf8("Please refresh data file $path: invalid format"); + return 0; + } + + unless ($stored_title eq $self->title) { + warn encode_utf8( + "Please refresh data file $path: wrong title $stored_title"); + return 0; + } + + if ($storage_key eq $PREAMBLE) { + warn encode_utf8( + "Please refresh data file $path: disallowed cargo key $storage_key" + ); + return 0; + } + + if (!exists $data->{$storage_key}) { + warn encode_utf8( + "Please refresh data file $path: cargo key $storage_key not found" + ); + return 0; + } + + ${$double_reference} = $data->{$storage_key}; + + return 1; +} + +=item write_file + +=cut + +sub write_file { + my ($self, $storage_key, $reference, $path) = @_; + + die +"Cannot write preambled JSON data file $path: disallowed cargo key $storage_key" + if $storage_key eq $PREAMBLE; + + my %preamble; + $preamble{$TITLE} = $self->title; + $preamble{$CARGO} = $storage_key; + + my %combined; + $combined{$PREAMBLE} = \%preamble; + $combined{$storage_key} = $reference; + + # convert to UTF-8 prior to encoding in JSON + my $encoder = JSON->new; + $encoder->canonical; + $encoder->utf8; + $encoder->pretty; + + my $json = $encoder->encode(\%combined); + + my $parentdir = path($path)->parent->stringify; + path($parentdir)->mkpath + unless -e $parentdir; + + # already in UTF-8 + path($path)->spew($json); + + return 1; +} + +=back + +=cut + +1; + +# Local Variables: +# indent-tabs-mode: nil +# cperl-indent-level: 4 +# End: +# vim: syntax=perl sw=4 sts=4 sr et diff --git a/lib/Lintian/Data/Provides/MailTransportAgent.pm b/lib/Lintian/Data/Provides/MailTransportAgent.pm new file mode 100644 index 0000000..51818f2 --- /dev/null +++ b/lib/Lintian/Data/Provides/MailTransportAgent.pm @@ -0,0 +1,193 @@ +# -*- perl -*- +# +# Copyright (C) 2008 Niko Tyni +# Copyright (C) 2018 Chris Lamb <lamby@debian.org> +# Copyright (C) 2021 Felix Lechner +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see <http://www.gnu.org/licenses/>. + +package Lintian::Data::Provides::MailTransportAgent; + +use v5.20; +use warnings; +use utf8; + +use Carp qw(carp); +use Const::Fast; +use List::SomeUtils qw(first_value any); +use Path::Tiny; +use Unicode::UTF8 qw(encode_utf8); + +use Moo; +use namespace::clean; + +const my $SLASH => q{/}; + +const my $NEWLINE => qq{\n}; + +=head1 NAME + +Lintian::Data::Provides::MailTransportAgent - Lintian interface for mail transport agents. + +=head1 SYNOPSIS + + use Lintian::Data::Provides::MailTransportAgent; + +=head1 DESCRIPTION + +This module provides a way to load data files for mail transport agents. + +=head1 INSTANCE METHODS + +=over 4 + +=item title + +=item location + +=item mail_transport_agents + +=item deb822_by_installable_name + +=cut + +has title => ( + is => 'rw', + default => 'Mail Transport Agents' +); + +has location => ( + is => 'rw', + default => 'fields/mail-transport-agents' +); + +has mail_transport_agents => (is => 'rw', default => sub { [] }); + +=item all + +=cut + +sub all { + my ($self) = @_; + + return keys %{$self->mail_transport_agents}; +} + +=item load + +=cut + +sub load { + my ($self, $search_space, $our_vendor) = @_; + + my @candidates = map { $_ . $SLASH . $self->location } @{$search_space}; + my $path = first_value { -e } @candidates; + + unless (length $path) { + carp encode_utf8('Unknown data file: ' . $self->location); + return 0; + } + + open(my $fd, '<:utf8_strict', $path) + or die encode_utf8("Cannot open $path: $!"); + + my $position = 1; + while (my $line = <$fd>) { + + # trim both ends + $line =~ s/^\s+|\s+$//g; + + next + unless length $line; + + next + if $line =~ m{^ [#]}x; + + my $agent = $line; + + push(@{$self->mail_transport_agents}, $agent); + + } continue { + ++$position; + } + + close $fd; + + return 1; +} + +=item refresh + +=cut + +sub refresh { + my ($self, $archive, $basedir) = @_; + + my @mail_transport_agents; + + # neutral sort order + local $ENV{LC_ALL} = 'C'; + + my $port = 'amd64'; + + my $deb822_by_installable_name + = $archive->deb822_packages_by_installable_name('sid', 'main', $port); + + for my $installable_name (keys %{$deb822_by_installable_name}) { + + my $deb822 = $deb822_by_installable_name->{$installable_name}; + + my @provides = $deb822->trimmed_list('Provides', qr{ \s* , \s* }x); + + push(@mail_transport_agents, $installable_name) + if any { $_ eq 'mail-transport-agent' } @provides; + } + + my $text = encode_utf8(<<'EOF'); +# Packages that provide mail-transport-agent +# +EOF + + $text .= encode_utf8($_ . $NEWLINE)for sort @mail_transport_agents; + + my $datapath = "$basedir/" . $self->location; + my $parentdir = path($datapath)->parent->stringify; + path($parentdir)->mkpath + unless -e $parentdir; + + # already in UTF-8 + path($datapath)->spew($text); + + return 1; +} + +=back + +=head1 AUTHOR + +Originally written by Felix Lechner <felix.lechner@lease-up.com> for Lintian. + +=head1 SEE ALSO + +lintian(1) + +=cut + +1; + +# Local Variables: +# indent-tabs-mode: nil +# cperl-indent-level: 4 +# End: +# vim: syntax=perl sw=4 sts=4 sr et diff --git a/lib/Lintian/Data/Stylesheet.pm b/lib/Lintian/Data/Stylesheet.pm new file mode 100644 index 0000000..bfc8c5b --- /dev/null +++ b/lib/Lintian/Data/Stylesheet.pm @@ -0,0 +1,139 @@ +# -*- perl -*- +# +# Copyright (C) 2021 Felix Lechner +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see <http://www.gnu.org/licenses/>. + +package Lintian::Data::Stylesheet; + +use v5.20; +use warnings; +use utf8; + +use Carp qw(carp); +use Const::Fast; +use HTTP::Tiny; +use List::SomeUtils qw(first_value); +use Path::Tiny; +use Unicode::UTF8 qw(encode_utf8); + +const my $EMPTY => q{}; +const my $SLASH => q{/}; + +use Moo; +use namespace::clean; + +=head1 NAME + +Lintian::Data::Stylesheet - Lintian interface to CSS style sheets + +=head1 SYNOPSIS + + use Lintian::Data::Stylesheet; + +=head1 DESCRIPTION + +This module provides a way to load data files to CSS style sheets + +=head1 INSTANCE METHODS + +=over 4 + +=item title + +=item location + +=item C<css> + +=cut + +has title => ( + is => 'rw', + default => 'Lintian CSS Style Sheet' +); + +has location => ( + is => 'rw', + default => 'stylesheets/lintian.css' +); + +has css => (is => 'rw', default => $EMPTY); + +=item load + +=cut + +sub load { + my ($self, $search_space, $our_vendor) = @_; + + my @candidates = map { $_ . $SLASH . $self->location } @{$search_space}; + my $path = first_value { -e } @candidates; + + unless (length $path) { + carp encode_utf8('Unknown data file: ' . $self->location); + return 0; + } + + my $style_sheet = path($path)->slurp_utf8; + + $self->css($style_sheet); + + return 1; +} + +=item refresh + +=cut + +sub refresh { + my ($self, $archive, $basedir) = @_; + + my $css_url = 'https://lintian.debian.org/stylesheets/lintian.css'; + + my $response = HTTP::Tiny->new->get($css_url); + die encode_utf8("Failed to get $css_url!\n") + unless $response->{success}; + + my $style_sheet = $response->{content}; + + my $data_path = "$basedir/" . $self->location; + my $parent_dir = path($data_path)->parent->stringify; + path($parent_dir)->mkpath + unless -e $parent_dir; + + # already in UTF-8 + path($data_path)->spew($style_sheet); + + return 1; +} + +=back + +=head1 AUTHOR + +Originally written by Felix Lechner <felix.lechner@lease-up.com> for Lintian. + +=head1 SEE ALSO + +lintian(1) + +=cut + +1; + +# Local Variables: +# indent-tabs-mode: nil +# cperl-indent-level: 4 +# End: +# vim: syntax=perl sw=4 sts=4 sr et diff --git a/lib/Lintian/Data/Traditional.pm b/lib/Lintian/Data/Traditional.pm new file mode 100644 index 0000000..9deaf12 --- /dev/null +++ b/lib/Lintian/Data/Traditional.pm @@ -0,0 +1,73 @@ +# -*- perl -*- +# +# Copyright (C) 2020 Felix Lechner +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see <http://www.gnu.org/licenses/>. + +package Lintian::Data::Traditional; + +use v5.20; +use warnings; +use utf8; + +use Moo; +use namespace::clean; + +with 'Lintian::Data::JoinedLines'; + +=head1 NAME + +Lintian::Data::Traditional - Lintian interface for generic data + +=head1 SYNOPSIS + + use Lintian::Data::Traditional; + +=head1 DESCRIPTION + +Lintian::Data::Traditional provides a way to load generic, traditional +data files. + +=head1 CLASS METHODS + +=over 4 + +=item location + +=item separator + +=cut + +has location => (is => 'rw'); +has separator => (is => 'rw'); + +=back + +=head1 AUTHOR + +Originally written by Felix Lechner <felix.lechner@lease-up.com> for Lintian. + +=head1 SEE ALSO + +lintian(1) + +=cut + +1; + +# Local Variables: +# indent-tabs-mode: nil +# cperl-indent-level: 4 +# End: +# vim: syntax=perl sw=4 sts=4 sr et |