diff options
Diffstat (limited to 'lib/Lintian/Data/Authority')
-rw-r--r-- | lib/Lintian/Data/Authority/DebconfSpecification.pm | 328 | ||||
-rw-r--r-- | lib/Lintian/Data/Authority/DebianPolicy.pm | 321 | ||||
-rw-r--r-- | lib/Lintian/Data/Authority/DeveloperReference.pm | 319 | ||||
-rw-r--r-- | lib/Lintian/Data/Authority/DocBaseManual.pm | 431 | ||||
-rw-r--r-- | lib/Lintian/Data/Authority/FilesystemHierarchy.pm | 333 | ||||
-rw-r--r-- | lib/Lintian/Data/Authority/JavaPolicy.pm | 290 | ||||
-rw-r--r-- | lib/Lintian/Data/Authority/LintianManual.pm | 324 | ||||
-rw-r--r-- | lib/Lintian/Data/Authority/MenuManual.pm | 316 | ||||
-rw-r--r-- | lib/Lintian/Data/Authority/MenuPolicy.pm | 316 | ||||
-rw-r--r-- | lib/Lintian/Data/Authority/NewMaintainer.pm | 290 | ||||
-rw-r--r-- | lib/Lintian/Data/Authority/PerlPolicy.pm | 316 | ||||
-rw-r--r-- | lib/Lintian/Data/Authority/PythonPolicy.pm | 317 | ||||
-rw-r--r-- | lib/Lintian/Data/Authority/VimPolicy.pm | 459 |
13 files changed, 4360 insertions, 0 deletions
diff --git a/lib/Lintian/Data/Authority/DebconfSpecification.pm b/lib/Lintian/Data/Authority/DebconfSpecification.pm new file mode 100644 index 0000000..661d11e --- /dev/null +++ b/lib/Lintian/Data/Authority/DebconfSpecification.pm @@ -0,0 +1,328 @@ +# -*- perl -*- +# +# Copyright (C) 1998 Christian Schwarz and Richard Braakman +# Copyright (C) 2001 Colin Watson +# Copyright (C) 2008 Jorda Polo +# Copyright (C) 2009 Russ Allbery +# Copyright (C) 2017-2019 Chris Lamb <lamby@debian.org> +# Copyright (C) 2020-2021 Felix Lechner +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see <http://www.gnu.org/licenses/>. + +package Lintian::Data::Authority::DebconfSpecification; + +use v5.20; +use warnings; +use utf8; + +use Carp qw(croak); +use Const::Fast; +use File::Basename qw(dirname); +use Path::Tiny; +use Unicode::UTF8 qw(encode_utf8); +use WWW::Mechanize (); + +use Lintian::Output::Markdown qw(markdown_authority); + +const my $EMPTY => q{}; +const my $SPACE => q{ }; +const my $SLASH => q{/}; +const my $COLON => q{:}; +const my $UNDERSCORE => q{_}; +const my $LEFT_PARENTHESIS => q{(}; +const my $RIGHT_PARENTHESIS => q{)}; + +const my $TWO_PARTS => 2; + +const my $VOLUME_KEY => $UNDERSCORE; +const my $SEPARATOR => $COLON x 2; + +use Moo; +use namespace::clean; + +with 'Lintian::Data::JoinedLines'; + +=head1 NAME + +Lintian::Data::Authority::DebconfSpecification - Lintian interface for manual references + +=head1 SYNOPSIS + + use Lintian::Data::Authority::DebconfSpecification; + +=head1 DESCRIPTION + +Lintian::Data::Authority::DebconfSpecification provides a way to load data files for +manual references. + +=head1 CLASS METHODS + +=over 4 + +=item title + +=item shorthand + +=item location + +=item separator + +=cut + +has title => ( + is => 'rw', + default => 'Debconf Specification' +); + +has shorthand => ( + is => 'rw', + default => 'debconf-specification' +); + +has location => ( + is => 'rw', + lazy => 1, + default => sub { + my ($self) = @_; + + return 'authority/' . $self->shorthand; + } +); + +has separator => ( + is => 'rw', + default => sub { qr/::/ } +); + +=item consumer + +=cut + +sub consumer { + my ($self, $key, $remainder, $previous) = @_; + + return undef + if defined $previous; + + my ($title, $url)= split($self->separator, $remainder, $TWO_PARTS); + + my %entry; + $entry{title} = $title; + $entry{url} = $url; + + return \%entry; +} + +=item markdown_citation + +=cut + +sub markdown_citation { + my ($self, $section_key) = @_; + + croak "Invalid section $section_key" + if $section_key eq $VOLUME_KEY; + + my $volume_entry = $self->value($VOLUME_KEY); + + # start with the citation to the overall manual. + my $volume_title = $volume_entry->{title}; + my $volume_url = $volume_entry->{url}; + + my $section_title; + my $section_url; + + if ($self->recognizes($section_key)) { + + my $section_entry = $self->value($section_key); + + $section_title = $section_entry->{title}; + $section_url = $section_entry->{url}; + } + + return markdown_authority( + $volume_title, $volume_url,$section_key, + $section_title, $section_url + ); +} + +=item write_line + +=cut + +sub write_line { + my ($data_fd, $section_key, $section_title, $destination) = @_; + + # drop final dots + $section_key =~ s{ [.]+ $}{}x; + + # reduce consecutive whitespace + $section_title =~ s{ \s+ }{ }gx; + + my $line= join($SEPARATOR,$section_key, $section_title, $destination); + + say {$data_fd} encode_utf8($line); + + return; +} + +=item write_data_file + +=cut + +sub write_data_file { + my ($self, $basedir, $generated) = @_; + + my $header =<<"HEADER"; +# Data about titles, sections, and URLs of manuals, used to expand references +# in tag descriptions and add links for HTML output. Each line of this file +# has three fields separated by double colons: +# +# <section> :: <title> :: <url> +# +# If <section> is an underscore, that line specifies the title and URL for the +# whole manual. + +HEADER + + my $data_path = "$basedir/" . $self->location; + my $parent_dir = path($data_path)->parent->stringify; + path($parent_dir)->mkpath + unless -e $parent_dir; + + my $output = encode_utf8($header) . $generated; + path($data_path)->spew($output); + + return; +} + +=item extract_sections_from_links + +=cut + +sub extract_sections_from_links { + my ($self, $data_fd, $base_url, $page_name)= @_; + + my $page_url = $base_url . $page_name; + + my $mechanize = WWW::Mechanize->new(); + $mechanize->get($page_url); + + my $page_title = $mechanize->title; + + # strip explanatory remark + $page_title =~ s{ \s* \N{EM DASH} .* $}{}x; + + # underscore is a token for the whole page + write_line($data_fd, $VOLUME_KEY, $page_title, $page_url); + + my %by_section_key; + my $in_appendix = 0; + + # https://stackoverflow.com/a/254687 + for my $link ($mechanize->links) { + + next + unless length $link->text; + + next + if $link->text !~ qr{^ \s* ([.\d]+) \s+ (.+) $}x; + + my $section_key = $1; + my $section_title = $2; + + # drop final dots + $section_key =~ s{ [.]+ $}{}x; + + # reduce consecutive whitespace + $section_title =~ s{ \s+ }{ }gx; + + my $relative_destination = $link->url; + + my $destination_base = $page_url; + $destination_base = dirname($page_url) . $SLASH + unless $destination_base =~ m{ / $}x + || $relative_destination =~ m{^ [#] }x; + + my $full_destination = $destination_base . $relative_destination; + + next + if exists $by_section_key{$section_key} + && ( $by_section_key{$section_key}{title} eq $section_title + || $by_section_key{$section_key}{destination} eq$full_destination); + + # Some manuals reuse section numbers for different references, + # e.g. the Debian Policy's normal and appendix sections are + # numbers that clash with each other. Track if we've already + # seen a section pointing to some other URL than the current one, + # and prepend it with an indicator + $in_appendix = 1 + if exists $by_section_key{$section_key} + && $by_section_key{$section_key}{destination} ne$full_destination; + + $section_key = "appendix-$section_key" + if $in_appendix; + + $by_section_key{$section_key}{title} = $section_title; + $by_section_key{$section_key}{destination} = $full_destination; + + write_line($data_fd, $section_key, $section_title, $full_destination); + } + + return; +} + +=item refresh + +=cut + +sub refresh { + my ($self, $archive, $basedir) = @_; + + # single page + my $base_url = 'https://www.debian.org/doc/packaging-manuals/'; + my $index_name = 'debconf_specification.html'; + + my $generated; + open(my $memory_fd, '>', \$generated) + or die encode_utf8('Cannot open scalar'); + + $self->extract_sections_from_links($memory_fd, $base_url, $index_name); + + close $memory_fd; + + $self->write_data_file($basedir, $generated); + + return; +} + +=back + +=head1 AUTHOR + +Originally written by Felix Lechner <felix.lechner@lease-up.com> for Lintian. + +=head1 SEE ALSO + +lintian(1) + +=cut + +1; + +# Local Variables: +# indent-tabs-mode: nil +# cperl-indent-level: 4 +# End: +# vim: syntax=perl sw=4 sts=4 sr et diff --git a/lib/Lintian/Data/Authority/DebianPolicy.pm b/lib/Lintian/Data/Authority/DebianPolicy.pm new file mode 100644 index 0000000..177b07d --- /dev/null +++ b/lib/Lintian/Data/Authority/DebianPolicy.pm @@ -0,0 +1,321 @@ +# -*- perl -*- +# +# Copyright (C) 1998 Christian Schwarz and Richard Braakman +# Copyright (C) 2001 Colin Watson +# Copyright (C) 2008 Jorda Polo +# Copyright (C) 2009 Russ Allbery +# Copyright (C) 2017-2019 Chris Lamb <lamby@debian.org> +# Copyright (C) 2020-2021 Felix Lechner +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see <http://www.gnu.org/licenses/>. + +package Lintian::Data::Authority::DebianPolicy; + +use v5.20; +use warnings; +use utf8; + +use Carp qw(croak); +use Const::Fast; +use Path::Tiny; +use Unicode::UTF8 qw(encode_utf8); +use WWW::Mechanize (); + +use Lintian::Output::Markdown qw(markdown_authority); + +const my $EMPTY => q{}; +const my $SPACE => q{ }; +const my $SLASH => q{/}; +const my $COLON => q{:}; +const my $UNDERSCORE => q{_}; +const my $LEFT_PARENTHESIS => q{(}; +const my $RIGHT_PARENTHESIS => q{)}; + +const my $TWO_PARTS => 2; + +const my $VOLUME_KEY => $UNDERSCORE; +const my $SEPARATOR => $COLON x 2; + +use Moo; +use namespace::clean; + +with 'Lintian::Data::JoinedLines'; + +=head1 NAME + +Lintian::Data::Authority::DebianPolicy - Lintian interface for manual references + +=head1 SYNOPSIS + + use Lintian::Data::Authority::DebianPolicy; + +=head1 DESCRIPTION + +Lintian::Data::Authority::DebianPolicy provides a way to load data files for +manual references. + +=head1 CLASS METHODS + +=over 4 + +=item title + +=item shorthand + +=item location + +=item separator + +=cut + +has title => ( + is => 'rw', + default => 'Debian Policy' +); + +has shorthand => ( + is => 'rw', + default => 'debian-policy' +); + +has location => ( + is => 'rw', + lazy => 1, + default => sub { + my ($self) = @_; + + return 'authority/' . $self->shorthand; + } +); + +has separator => ( + is => 'rw', + default => sub { qr/::/ } +); + +=item consumer + +=cut + +sub consumer { + my ($self, $key, $remainder, $previous) = @_; + + return undef + if defined $previous; + + my ($title, $url)= split($self->separator, $remainder, $TWO_PARTS); + + my %entry; + $entry{title} = $title; + $entry{url} = $url; + + return \%entry; +} + +=item markdown_citation + +=cut + +sub markdown_citation { + my ($self, $section_key) = @_; + + croak "Invalid section $section_key" + if $section_key eq $VOLUME_KEY; + + my $volume_entry = $self->value($VOLUME_KEY); + + # start with the citation to the overall manual. + my $volume_title = $volume_entry->{title}; + my $volume_url = $volume_entry->{url}; + + my $section_title; + my $section_url; + + if ($self->recognizes($section_key)) { + + my $section_entry = $self->value($section_key); + + $section_title = $section_entry->{title}; + $section_url = $section_entry->{url}; + } + + return markdown_authority( + $volume_title, $volume_url,$section_key, + $section_title, $section_url + ); +} + +=item write_line + +=cut + +sub write_line { + my ($data_fd, $section_key, $section_title, $destination) = @_; + + # drop final dots + $section_key =~ s{ [.]+ $}{}x; + + # reduce consecutive whitespace + $section_title =~ s{ \s+ }{ }gx; + + my $line= join($SEPARATOR,$section_key, $section_title, $destination); + + say {$data_fd} encode_utf8($line); + + return; +} + +=item write_data_file + +=cut + +sub write_data_file { + my ($self, $basedir, $generated) = @_; + + my $header =<<"HEADER"; +# Data about titles, sections, and URLs of manuals, used to expand references +# in tag descriptions and add links for HTML output. Each line of this file +# has three fields separated by double colons: +# +# <section> :: <title> :: <url> +# +# If <section> is an underscore, that line specifies the title and URL for the +# whole manual. + +HEADER + + my $data_path = "$basedir/" . $self->location; + my $parent_dir = path($data_path)->parent->stringify; + path($parent_dir)->mkpath + unless -e $parent_dir; + + my $output = encode_utf8($header) . $generated; + path($data_path)->spew($output); + + return; +} + +=item extract_sections_from_links + +=cut + +sub extract_sections_from_links { + my ($self, $data_fd, $base_url)= @_; + + my $mechanize = WWW::Mechanize->new(); + $mechanize->get($base_url); + + my $page_title = $mechanize->title; + + # strip explanatory remark + $page_title =~ s{ \s* \N{EM DASH} .* $}{}x; + + # underscore is a token for the whole page + write_line($data_fd, $VOLUME_KEY, $page_title, $base_url); + + my %by_section_key; + my $in_appendix = 0; + + # https://stackoverflow.com/a/254687 + for my $link ($mechanize->links) { + + next + unless length $link->text; + + next + if $link->text !~ qr{^ \s* ([.\d]+) \s+ (.+) $}x; + + my $section_key = $1; + my $section_title = $2; + + # drop final dots + $section_key =~ s{ [.]+ $}{}x; + + # reduce consecutive whitespace + $section_title =~ s{ \s+ }{ }gx; + + my $destination = $base_url . $link->url; + + next + if exists $by_section_key{$section_key} + && ( $by_section_key{$section_key}{title} eq $section_title + || $by_section_key{$section_key}{destination} eq $destination); + + # Some manuals reuse section numbers for different references, + # e.g. the Debian Policy's normal and appendix sections are + # numbers that clash with each other. Track if we've already + # seen a section pointing to some other URL than the current one, + # and prepend it with an indicator + $in_appendix = 1 + if exists $by_section_key{$section_key} + && $by_section_key{$section_key}{destination} ne $destination; + + $section_key = "appendix-$section_key" + if $in_appendix; + + # do not collect the upgrading checklists in appendix 10 of policy + # the numbering changes all the time + next + if $section_key =~ m{^ appendix-10 [.] }x; + + $by_section_key{$section_key}{title} = $section_title; + $by_section_key{$section_key}{destination} = $destination; + + write_line($data_fd, $section_key, $section_title, $destination); + } + + return; +} + +=item refresh + +=cut + +sub refresh { + my ($self, $archive, $basedir) = @_; + + my $base_url = 'https://www.debian.org/doc/debian-policy/'; + + my $generated; + open(my $memory_fd, '>', \$generated) + or die encode_utf8('Cannot open scalar'); + + $self->extract_sections_from_links($memory_fd, $base_url); + + close $memory_fd; + + $self->write_data_file($basedir, $generated); + + return; +} + +=back + +=head1 AUTHOR + +Originally written by Felix Lechner <felix.lechner@lease-up.com> for Lintian. + +=head1 SEE ALSO + +lintian(1) + +=cut + +1; + +# Local Variables: +# indent-tabs-mode: nil +# cperl-indent-level: 4 +# End: +# vim: syntax=perl sw=4 sts=4 sr et diff --git a/lib/Lintian/Data/Authority/DeveloperReference.pm b/lib/Lintian/Data/Authority/DeveloperReference.pm new file mode 100644 index 0000000..676cbf4 --- /dev/null +++ b/lib/Lintian/Data/Authority/DeveloperReference.pm @@ -0,0 +1,319 @@ +# -*- perl -*- +# +# Copyright (C) 1998 Christian Schwarz and Richard Braakman +# Copyright (C) 2001 Colin Watson +# Copyright (C) 2008 Jorda Polo +# Copyright (C) 2009 Russ Allbery +# Copyright (C) 2017-2019 Chris Lamb <lamby@debian.org> +# Copyright (C) 2020-2021 Felix Lechner +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see <http://www.gnu.org/licenses/>. + +package Lintian::Data::Authority::DeveloperReference; + +use v5.20; +use warnings; +use utf8; + +use Carp qw(croak); +use Const::Fast; +use Path::Tiny; +use Unicode::UTF8 qw(encode_utf8); +use WWW::Mechanize (); + +use Lintian::Output::Markdown qw(markdown_authority); + +const my $EMPTY => q{}; +const my $SPACE => q{ }; +const my $SLASH => q{/}; +const my $COLON => q{:}; +const my $UNDERSCORE => q{_}; +const my $LEFT_PARENTHESIS => q{(}; +const my $RIGHT_PARENTHESIS => q{)}; + +const my $TWO_PARTS => 2; + +const my $VOLUME_KEY => $UNDERSCORE; +const my $SEPARATOR => $COLON x 2; + +use Moo; +use namespace::clean; + +with 'Lintian::Data::JoinedLines'; + +=head1 NAME + +Lintian::Data::Authority::DeveloperReference - Lintian interface for manual references + +=head1 SYNOPSIS + + use Lintian::Data::Authority::DeveloperReference; + +=head1 DESCRIPTION + +Lintian::Data::Authority::DeveloperReference provides a way to load data files for +manual references. + +=head1 CLASS METHODS + +=over 4 + +=item title + +=item shorthand + +=item location + +=item separator + +=cut + +has title => ( + is => 'rw', + default => q{Developer's Reference} +); + +has shorthand => ( + is => 'rw', + default => 'developer-reference' +); + +has location => ( + is => 'rw', + lazy => 1, + default => sub { + my ($self) = @_; + + return 'authority/' . $self->shorthand; + } +); + +has separator => ( + is => 'rw', + default => sub { qr/::/ } +); + +=item consumer + +=cut + +sub consumer { + my ($self, $key, $remainder, $previous) = @_; + + return undef + if defined $previous; + + my ($title, $url)= split($self->separator, $remainder, $TWO_PARTS); + + my %entry; + $entry{title} = $title; + $entry{url} = $url; + + return \%entry; +} + +=item markdown_citation + +=cut + +sub markdown_citation { + my ($self, $section_key) = @_; + + croak "Invalid section $section_key" + if $section_key eq $VOLUME_KEY; + + my $volume_entry = $self->value($VOLUME_KEY); + + # start with the citation to the overall manual. + my $volume_title = $volume_entry->{title}; + my $volume_url = $volume_entry->{url}; + + my $section_title; + my $section_url; + + if ($self->recognizes($section_key)) { + + my $section_entry = $self->value($section_key); + + $section_title = $section_entry->{title}; + $section_url = $section_entry->{url}; + } + + return markdown_authority( + $volume_title, $volume_url,$section_key, + $section_title, $section_url + ); +} + +=item write_line + +=cut + +sub write_line { + my ($data_fd, $section_key, $section_title, $destination) = @_; + + # drop final dots + $section_key =~ s{ [.]+ $}{}x; + + # reduce consecutive whitespace + $section_title =~ s{ \s+ }{ }gx; + + my $line= join($SEPARATOR,$section_key, $section_title, $destination); + + say {$data_fd} encode_utf8($line); + + return; +} + +=item write_data_file + +=cut + +sub write_data_file { + my ($self, $basedir, $generated) = @_; + + my $header =<<"HEADER"; +# Data about titles, sections, and URLs of manuals, used to expand references +# in tag descriptions and add links for HTML output. Each line of this file +# has three fields separated by double colons: +# +# <section> :: <title> :: <url> +# +# If <section> is an underscore, that line specifies the title and URL for the +# whole manual. + +HEADER + + my $data_path = "$basedir/" . $self->location; + my $parent_dir = path($data_path)->parent->stringify; + path($parent_dir)->mkpath + unless -e $parent_dir; + + my $output = encode_utf8($header) . $generated; + path($data_path)->spew($output); + + return; +} + +=item extract_sections_from_links + +=cut + +sub extract_sections_from_links { + my ($self, $data_fd, $base_url)= @_; + + my $mechanize = WWW::Mechanize->new(); + $mechanize->get($base_url); + + my $page_title = $mechanize->title; + + # strip explanatory remark + $page_title =~ s{ \s* \N{EM DASH} .* $}{}x; + + # underscore is a token for the whole page + write_line($data_fd, $VOLUME_KEY, $page_title, $base_url); + + my %by_section_key; + my $in_appendix = 0; + + # https://stackoverflow.com/a/254687 + for my $link ($mechanize->links) { + + next + unless length $link->text; + + next + if $link->text !~ qr{^ \s* ([.\d]+) \s+ (.+) $}x; + + my $section_key = $1; + my $section_title = $2; + + # drop final dots + $section_key =~ s{ [.]+ $}{}x; + + # reduce consecutive whitespace + $section_title =~ s{ \s+ }{ }gx; + + my $destination = $base_url . $link->url; + + # developers reference likes to return locale specific pages + $destination =~ s{ [.]\w{2}[.]html }{.html}x; + + next + if exists $by_section_key{$section_key} + && ( $by_section_key{$section_key}{title} eq $section_title + || $by_section_key{$section_key}{destination} eq $destination); + + # Some manuals reuse section numbers for different references, + # e.g. the Debian Policy's normal and appendix sections are + # numbers that clash with each other. Track if we've already + # seen a section pointing to some other URL than the current one, + # and prepend it with an indicator + $in_appendix = 1 + if exists $by_section_key{$section_key} + && $by_section_key{$section_key}{destination} ne $destination; + + $section_key = "appendix-$section_key" + if $in_appendix; + + $by_section_key{$section_key}{title} = $section_title; + $by_section_key{$section_key}{destination} = $destination; + + write_line($data_fd, $section_key, $section_title, $destination); + } + + return; +} + +=item refresh + +=cut + +sub refresh { + my ($self, $archive, $basedir) = @_; + + my $base_url = 'https://www.debian.org/doc/developers-reference/'; + + my $generated; + open(my $memory_fd, '>', \$generated) + or die encode_utf8('Cannot open scalar'); + + $self->extract_sections_from_links($memory_fd, $base_url); + + close $memory_fd; + + $self->write_data_file($basedir, $generated); + + return; +} + +=back + +=head1 AUTHOR + +Originally written by Felix Lechner <felix.lechner@lease-up.com> for Lintian. + +=head1 SEE ALSO + +lintian(1) + +=cut + +1; + +# Local Variables: +# indent-tabs-mode: nil +# cperl-indent-level: 4 +# End: +# vim: syntax=perl sw=4 sts=4 sr et diff --git a/lib/Lintian/Data/Authority/DocBaseManual.pm b/lib/Lintian/Data/Authority/DocBaseManual.pm new file mode 100644 index 0000000..53cfbcb --- /dev/null +++ b/lib/Lintian/Data/Authority/DocBaseManual.pm @@ -0,0 +1,431 @@ +# -*- perl -*- +# +# Copyright (C) 1998 Christian Schwarz and Richard Braakman +# Copyright (C) 2001 Colin Watson +# Copyright (C) 2008 Jorda Polo +# Copyright (C) 2009 Russ Allbery +# Copyright (C) 2017-2019 Chris Lamb <lamby@debian.org> +# Copyright (C) 2020-2021 Felix Lechner +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see <http://www.gnu.org/licenses/>. + +package Lintian::Data::Authority::DocBaseManual; + +use v5.20; +use warnings; +use utf8; + +use Carp qw(croak); +use Const::Fast; +use File::Basename qw(dirname basename); +use IPC::Run3; +use Path::Tiny; +use Unicode::UTF8 qw(encode_utf8); +use WWW::Mechanize (); + +use Lintian::Output::Markdown qw(markdown_authority); + +const my $EMPTY => q{}; +const my $SPACE => q{ }; +const my $SLASH => q{/}; +const my $COLON => q{:}; +const my $INDENT => $SPACE x 4; +const my $UNDERSCORE => q{_}; +const my $LEFT_PARENTHESIS => q{(}; +const my $RIGHT_PARENTHESIS => q{)}; + +const my $TWO_PARTS => 2; + +const my $VOLUME_KEY => $UNDERSCORE; +const my $SEPARATOR => $COLON x 2; + +const my $WAIT_STATUS_SHIFT => 8; + +use Moo; +use namespace::clean; + +with 'Lintian::Data::JoinedLines'; + +=head1 NAME + +Lintian::Data::Authority::DocBaseManual - Lintian interface for manual references + +=head1 SYNOPSIS + + use Lintian::Data::Authority::DocBaseManual; + +=head1 DESCRIPTION + +Lintian::Data::Authority::DocBaseManual provides a way to load data files for +manual references. + +=head1 CLASS METHODS + +=over 4 + +=item title + +=item shorthand + +=item location + +=item separator + +=cut + +has title => ( + is => 'rw', + default => 'Doc-Base Manual' +); + +has shorthand => ( + is => 'rw', + default => 'doc-base-manual' +); + +has location => ( + is => 'rw', + lazy => 1, + default => sub { + my ($self) = @_; + + return 'authority/' . $self->shorthand; + } +); + +has separator => ( + is => 'rw', + default => sub { qr/::/ } +); + +=item consumer + +=cut + +sub consumer { + my ($self, $key, $remainder, $previous) = @_; + + return undef + if defined $previous; + + my ($title, $url)= split($self->separator, $remainder, $TWO_PARTS); + + my %entry; + $entry{title} = $title; + $entry{url} = $url; + + return \%entry; +} + +=item markdown_citation + +=cut + +sub markdown_citation { + my ($self, $section_key) = @_; + + croak "Invalid section $section_key" + if $section_key eq $VOLUME_KEY; + + my $volume_entry = $self->value($VOLUME_KEY); + + # start with the citation to the overall manual. + my $volume_title = $volume_entry->{title}; + my $volume_url = $volume_entry->{url}; + + my $section_title; + my $section_url; + + if ($self->recognizes($section_key)) { + + my $section_entry = $self->value($section_key); + + $section_title = $section_entry->{title}; + $section_url = $section_entry->{url}; + } + + return markdown_authority( + $volume_title, $volume_url,$section_key, + $section_title, $section_url + ); +} + +=item write_line + +=cut + +sub write_line { + my ($data_fd, $section_key, $section_title, $destination) = @_; + + # drop final dots + $section_key =~ s{ [.]+ $}{}x; + + # reduce consecutive whitespace + $section_title =~ s{ \s+ }{ }gx; + + my $line= join($SEPARATOR,$section_key, $section_title, $destination); + + say {$data_fd} encode_utf8($line); + + return; +} + +=item write_data_file + +=cut + +sub write_data_file { + my ($self, $basedir, $generated) = @_; + + my $header =<<"HEADER"; +# Data about titles, sections, and URLs of manuals, used to expand references +# in tag descriptions and add links for HTML output. Each line of this file +# has three fields separated by double colons: +# +# <section> :: <title> :: <url> +# +# If <section> is an underscore, that line specifies the title and URL for the +# whole manual. + +HEADER + + my $data_path = "$basedir/" . $self->location; + my $parent_dir = path($data_path)->parent->stringify; + path($parent_dir)->mkpath + unless -e $parent_dir; + + my $output = encode_utf8($header) . $generated; + path($data_path)->spew($output); + + return; +} + +=item find_installable_name + +=cut + +sub find_installable_name { + my ($self, $archive, $port, $requested_path) = @_; + + my @installed_by; + + # find installable package + for my $installable_architecture ('all', $port) { + + my $local_path + = $archive->contents_gz('sid', 'main', $installable_architecture); + + open(my $fd, '<:gzip', $local_path) + or die encode_utf8("Cannot open $local_path."); + + while (my $line = <$fd>) { + + chomp $line; + + my ($path, $finder) = split($SPACE, $line, 2); + next + unless length $path + && length $finder; + + if ($path eq $requested_path) { + + my $name = $1; + + my @locations = split(m{,}, $finder); + for my $location (@locations) { + + my ($section, $installable)= split(m{/}, $location, 2); + + push(@installed_by, $installable); + } + + next; + } + } + + close $fd; + } + + die encode_utf8( + "The path $requested_path is not installed by any package.") + if @installed_by < 1; + + if (@installed_by > 1) { + warn encode_utf8( + "The path $requested_path is installed by multiple packages:\n"); + warn encode_utf8($INDENT . "- $_\n")for @installed_by; + } + + my $installable_name = shift @installed_by; + + return $installable_name; +} + +=item refresh + +=cut + +sub refresh { + my ($self, $archive, $basedir) = @_; + + # shipped as part of the doc-base installable + my $shipped_base = 'usr/share/doc/doc-base/doc-base.html/'; + my $index_name = 'index.html'; + + my $shipped_path = $shipped_base . $index_name; + my $stored_uri = "file:///$shipped_path"; + + # neutral sort order + local $ENV{LC_ALL} = 'C'; + + my $port = 'amd64'; + my $installable_name + = $self->find_installable_name($archive, $port, $shipped_path); + + my $deb822_by_installable_name + = $archive->deb822_packages_by_installable_name('sid', 'main', $port); + + my $work_folder + = Path::Tiny->tempdir(TEMPLATE => 'refresh-doc-base-manual-XXXXXXXXXX'); + + die encode_utf8("Installable $installable_name not shipped in port $port") + unless exists $deb822_by_installable_name->{$installable_name}; + + my $deb822 = $deb822_by_installable_name->{$installable_name}; + + my $pool_path = $deb822->value('Filename'); + + my $deb_filename = basename($pool_path); + my $deb_local_path = "$work_folder/$deb_filename"; + my $deb_url = $archive->mirror_base . $SLASH . $pool_path; + + my $stderr; + run3([qw{wget --quiet}, "--output-document=$deb_local_path", $deb_url], + undef, \$stderr); + my $status = ($? >> $WAIT_STATUS_SHIFT); + + # stderr already in UTF-8 + die $stderr + if $status; + + my $extract_folder = "$work_folder/unpacked/$pool_path"; + path($extract_folder)->mkpath; + + run3([qw{dpkg-deb --extract}, $deb_local_path, $extract_folder], + undef, \$stderr); + $status = ($? >> $WAIT_STATUS_SHIFT); + + # stderr already in UTF-8 + die $stderr + if $status; + + unlink($deb_local_path) + or die encode_utf8("Cannot delete $deb_local_path"); + + my $generated; + open(my $memory_fd, '>', \$generated) + or die encode_utf8('Cannot open scalar'); + + my $mechanize = WWW::Mechanize->new(); + + my $fresh_uri = URI::file->new_abs("/$extract_folder/$shipped_path"); + $mechanize->get($fresh_uri); + + my $page_title = $mechanize->title; + + # strip explanatory remark + $page_title =~ s{ \s* \N{EM DASH} .* $}{}x; + + # underscore is a token for the whole page + write_line($memory_fd, $VOLUME_KEY, $page_title, $stored_uri); + + my %by_section_key; + my $in_appendix = 0; + + # https://stackoverflow.com/a/254687 + for my $link ($mechanize->links) { + + next + unless length $link->text; + + next + if $link->text !~ qr{^ \s* ([.\d]+) \s+ (.+) $}x; + + my $section_key = $1; + my $section_title = $2; + + # drop final dots + $section_key =~ s{ [.]+ $}{}x; + + # reduce consecutive whitespace + $section_title =~ s{ \s+ }{ }gx; + + my $relative_destination = $link->url; + + my $destination_base = $stored_uri; + $destination_base = dirname($stored_uri) . $SLASH + unless $destination_base =~ m{ / $}x + || $relative_destination =~ m{^ [#] }x; + + my $full_destination = $destination_base . $relative_destination; + + next + if exists $by_section_key{$section_key} + && ( $by_section_key{$section_key}{title} eq $section_title + || $by_section_key{$section_key}{destination} eq$full_destination); + + # Some manuals reuse section numbers for different references, + # e.g. the Debian Policy's normal and appendix sections are + # numbers that clash with each other. Track if we've already + # seen a section pointing to some other URL than the current one, + # and prepend it with an indicator + $in_appendix = 1 + if exists $by_section_key{$section_key} + && $by_section_key{$section_key}{destination} ne$full_destination; + + $section_key = "appendix-$section_key" + if $in_appendix; + + $by_section_key{$section_key}{title} = $section_title; + $by_section_key{$section_key}{destination} = $full_destination; + + write_line($memory_fd, $section_key, $section_title,$full_destination); + } + + close $memory_fd; + + $self->write_data_file($basedir, $generated); + + return; +} + +=back + +=head1 AUTHOR + +Originally written by Felix Lechner <felix.lechner@lease-up.com> for Lintian. + +=head1 SEE ALSO + +lintian(1) + +=cut + +1; + +# Local Variables: +# indent-tabs-mode: nil +# cperl-indent-level: 4 +# End: +# vim: syntax=perl sw=4 sts=4 sr et diff --git a/lib/Lintian/Data/Authority/FilesystemHierarchy.pm b/lib/Lintian/Data/Authority/FilesystemHierarchy.pm new file mode 100644 index 0000000..89fb677 --- /dev/null +++ b/lib/Lintian/Data/Authority/FilesystemHierarchy.pm @@ -0,0 +1,333 @@ +# -*- perl -*- +# +# Copyright (C) 1998 Christian Schwarz and Richard Braakman +# Copyright (C) 2001 Colin Watson +# Copyright (C) 2008 Jorda Polo +# Copyright (C) 2009 Russ Allbery +# Copyright (C) 2017-2019 Chris Lamb <lamby@debian.org> +# Copyright (C) 2020-2021 Felix Lechner +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see <http://www.gnu.org/licenses/>. + +package Lintian::Data::Authority::FilesystemHierarchy; + +use v5.20; +use warnings; +use utf8; + +use Carp qw(croak); +use Const::Fast; +use File::Basename qw(dirname); +use Path::Tiny; +use Unicode::UTF8 qw(encode_utf8); +use WWW::Mechanize (); + +use Lintian::Output::Markdown qw(markdown_authority); + +const my $EMPTY => q{}; +const my $SPACE => q{ }; +const my $SLASH => q{/}; +const my $COLON => q{:}; +const my $UNDERSCORE => q{_}; +const my $LEFT_PARENTHESIS => q{(}; +const my $RIGHT_PARENTHESIS => q{)}; + +const my $THREE_PARTS => 3; + +const my $VOLUME_KEY => $UNDERSCORE; +const my $SEPARATOR => $COLON x 2; + +use Moo; +use namespace::clean; + +with 'Lintian::Data::JoinedLines'; + +=head1 NAME + +Lintian::Data::Authority::FilesystemHierarchy - Lintian interface for manual references + +=head1 SYNOPSIS + + use Lintian::Data::Authority::FilesystemHierarchy; + +=head1 DESCRIPTION + +Lintian::Data::Authority::FilesystemHierarchy provides a way to load data files for +manual references. + +=head1 CLASS METHODS + +=over 4 + +=item title + +=item shorthand + +=item location + +=item separator + +=cut + +has title => ( + is => 'rw', + default => 'Filesystem Hierarchy Standard' +); + +has shorthand => ( + is => 'rw', + default => 'filesystem-hierarchy' +); + +has location => ( + is => 'rw', + lazy => 1, + default => sub { + my ($self) = @_; + + return 'authority/' . $self->shorthand; + } +); + +has separator => ( + is => 'rw', + default => sub { qr/::/ } +); + +=item consumer + +=cut + +sub consumer { + my ($self, $key, $remainder, $previous) = @_; + + return undef + if defined $previous; + + my ($number, $title, $url) + = split($self->separator, $remainder, $THREE_PARTS); + + my %entry; + $entry{title} = $title; + $entry{number} = $number; + $entry{url} = $url; + + return \%entry; +} + +=item markdown_citation + +=cut + +sub markdown_citation { + my ($self, $section_key) = @_; + + croak "Invalid section $section_key" + if $section_key eq $VOLUME_KEY; + + my $volume_entry = $self->value($VOLUME_KEY); + + # start with the citation to the overall manual. + my $volume_title = $volume_entry->{title}; + my $volume_url = $volume_entry->{url}; + + my $section_title; + my $section_number; + my $section_url; + + if ($self->recognizes($section_key)) { + + my $section_entry = $self->value($section_key); + + $section_title = $section_entry->{title}; + $section_number = $section_entry->{number}; + $section_url = $section_entry->{url}; + } + + return markdown_authority( + $volume_title, $volume_url,$section_number, + $section_title, $section_url + ); +} + +=item write_line + +=cut + +sub write_line { + my ($data_fd, $section_key, $section_number, $section_title, $destination) + = @_; + + # drop final dots + $section_key =~ s{ [.]+ $}{}x; + + # reduce consecutive whitespace + $section_title =~ s{ \s+ }{ }gx; + + my $line= join($SEPARATOR, + $section_key, $section_number, $section_title, $destination); + + say {$data_fd} encode_utf8($line); + + return; +} + +=item write_data_file + +=cut + +sub write_data_file { + my ($self, $basedir, $generated) = @_; + + my $header =<<"HEADER"; +# Data about titles, sections, and URLs of manuals, used to expand references +# in tag descriptions and add links for HTML output. Each line of this file +# has three fields separated by double colons: +# +# <section> :: <title> :: <url> +# +# If <section> is an underscore, that line specifies the title and URL for the +# whole manual. + +HEADER + + my $data_path = "$basedir/" . $self->location; + my $parent_dir = path($data_path)->parent->stringify; + path($parent_dir)->mkpath + unless -e $parent_dir; + + my $output = encode_utf8($header) . $generated; + path($data_path)->spew($output); + + return; +} + +=item extract_sections_from_links + +=cut + +sub extract_sections_from_links { + my ($self, $data_fd, $base_url, $page_name)= @_; + + my $page_url = $base_url . $page_name; + + my $mechanize = WWW::Mechanize->new(); + $mechanize->get($page_url); + + my $page_title = $mechanize->title; + + # strip explanatory remark + $page_title =~ s{ \s* \N{EM DASH} .* $}{}x; + + # underscore is a token for the whole page + write_line($data_fd, $VOLUME_KEY, $EMPTY, $page_title, $page_url); + + my %by_section_key; + my $in_appendix = 0; + + # https://stackoverflow.com/a/254687 + for my $link ($mechanize->links) { + + next + unless length $link->url; + + # make lowercase + my $section_key = lc($link->url); + + # strip hash; it's a fragment; + $section_key =~ s{^ [#] }{}x; + + next + unless length $link->text; + + next + if $link->text !~ qr{^ \s* ([.\d]+) \s+ (.+) $}x; + + my $section_number = $1; + my $section_title = $2; + + # drop final dot + $section_number =~ s{ [.]+ $}{}x; + + # reduce consecutive whitespace + $section_title =~ s{ \s+ }{ }gx; + + # includes hash + my $relative_destination = $link->url; + + my $destination_base = $page_url; + $destination_base = dirname($page_url) . $SLASH + unless $destination_base =~ m{ / $}x + || $relative_destination =~ m{^ [#] }x; + + my $full_destination = $destination_base . $relative_destination; + + next + if exists $by_section_key{$section_key}; + + $by_section_key{$section_key}{title} = $section_title; + $by_section_key{$section_key}{number} = $section_number; + $by_section_key{$section_key}{destination} = $full_destination; + + write_line($data_fd, $section_key, $section_number, + $section_title, $full_destination); + } + + return; +} + +=item refresh + +=cut + +sub refresh { + my ($self, $archive, $basedir) = @_; + + # single page version + # plain directory shows a file list + my $base_url = 'https://refspecs.linuxfoundation.org/FHS_3.0/'; + my $index_name = 'fhs-3.0.html'; + + my $generated; + open(my $memory_fd, '>', \$generated) + or die encode_utf8('Cannot open scalar'); + + $self->extract_sections_from_links($memory_fd, $base_url, $index_name); + + close $memory_fd; + + $self->write_data_file($basedir, $generated); + + return; +} + +=back + +=head1 AUTHOR + +Originally written by Felix Lechner <felix.lechner@lease-up.com> for Lintian. + +=head1 SEE ALSO + +lintian(1) + +=cut + +1; + +# Local Variables: +# indent-tabs-mode: nil +# cperl-indent-level: 4 +# End: +# vim: syntax=perl sw=4 sts=4 sr et diff --git a/lib/Lintian/Data/Authority/JavaPolicy.pm b/lib/Lintian/Data/Authority/JavaPolicy.pm new file mode 100644 index 0000000..eaa6704 --- /dev/null +++ b/lib/Lintian/Data/Authority/JavaPolicy.pm @@ -0,0 +1,290 @@ +# -*- perl -*- +# +# Copyright (C) 1998 Christian Schwarz and Richard Braakman +# Copyright (C) 2001 Colin Watson +# Copyright (C) 2008 Jorda Polo +# Copyright (C) 2009 Russ Allbery +# Copyright (C) 2017-2019 Chris Lamb <lamby@debian.org> +# Copyright (C) 2020-2021 Felix Lechner +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see <http://www.gnu.org/licenses/>. + +package Lintian::Data::Authority::JavaPolicy; + +use v5.20; +use warnings; +use utf8; + +use Carp qw(croak); +use Const::Fast; +use List::SomeUtils qw(any first_value); +use Path::Tiny; +use Unicode::UTF8 qw(encode_utf8); +use WWW::Mechanize (); + +use Lintian::Output::Markdown qw(markdown_authority); + +const my $SLASH => q{/}; +const my $UNDERSCORE => q{_}; + +const my $VOLUME_KEY => $UNDERSCORE; +const my $SECTIONS => 'sections'; + +use Moo; +use namespace::clean; + +with 'Lintian::Data::PreambledJSON'; + +=head1 NAME + +Lintian::Data::Authority::JavaPolicy - Lintian interface for manual references + +=head1 SYNOPSIS + + use Lintian::Data::Authority::JavaPolicy; + +=head1 DESCRIPTION + +Lintian::Data::Authority::JavaPolicy provides a way to load data files for +manual references. + +=head1 CLASS METHODS + +=over 4 + +=item title + +=item shorthand + +=item location + +=item by_section_key + +=cut + +has title => ( + is => 'rw', + default => 'Java Policy' +); + +has shorthand => ( + is => 'rw', + default => 'java-policy' +); + +has location => ( + is => 'rw', + lazy => 1, + default => sub { + my ($self) = @_; + + return 'authority/' . $self->shorthand . '.json'; + } +); + +has by_section_key => (is => 'rw', default => sub { {} }); + +=item markdown_citation + +=cut + +sub markdown_citation { + my ($self, $section_key) = @_; + + croak "Invalid section $section_key" + if $section_key eq $VOLUME_KEY; + + my $volume_entry = $self->value($VOLUME_KEY); + + # start with the citation to the overall manual. + my $volume_title = $volume_entry->{title}; + my $volume_url = $volume_entry->{destination}; + + my $section_title; + my $section_url; + + if ($self->recognizes($section_key)) { + + my $section_entry = $self->value($section_key); + + $section_title = $section_entry->{title}; + $section_url = $section_entry->{destination}; + } + + return markdown_authority( + $volume_title, $volume_url,$section_key, + $section_title, $section_url + ); +} + +=item recognizes (KEY) + +Returns true if KEY is known, and false otherwise. + +=cut + +sub recognizes { + my ($self, $key) = @_; + + return 0 + unless length $key; + + return 1 + if exists $self->by_section_key->{$key}; + + return 0; +} + +=item value (KEY) + +Returns the value attached to KEY if it was listed in the data +file represented by this Lintian::Data instance and the undefined value +otherwise. + +=cut + +sub value { + my ($self, $key) = @_; + + return undef + unless length $key; + + return $self->by_section_key->{$key}; +} + +=item load + +=cut + +sub load { + my ($self, $search_space, $our_vendor) = @_; + + my @candidates = map { $_ . $SLASH . $self->location } @{$search_space}; + my $path = first_value { -e } @candidates; + + my $reference; + + return 0 + unless $self->read_file($path, \$reference); + + my @sections = @{$reference // []}; + + for my $section (@sections) { + + my $key = $section->{key}; + + # only store first value for duplicates + # silently ignore later values + $self->by_section_key->{$key} //= $section; + } + + return 1; +} + +=item refresh + +=cut + +sub refresh { + my ($self, $archive, $basedir) = @_; + + my $base_url = 'https://www.debian.org/doc/packaging-manuals/java-policy/'; + + my $mechanize = WWW::Mechanize->new(); + $mechanize->get($base_url); + + my $page_title = $mechanize->title; + + my @sections; + + # underscore is a token for the whole page + my %volume; + $volume{key} = $VOLUME_KEY; + $volume{title} = $page_title; + $volume{destination} = $base_url; + + # store array to resemble web layout + # may contain duplicates + push(@sections, \%volume); + + my $in_appendix = 0; + + # https://stackoverflow.com/a/254687 + for my $link ($mechanize->links) { + + next + unless length $link->text; + + next + if $link->text !~ qr{^ \s* ([.\d]+) \s+ (.+) $}x; + + my $section_key = $1; + my $section_title = $2; + + # drop final dots + $section_key =~ s{ [.]+ $}{}x; + + # reduce consecutive whitespace + $section_title =~ s{ \s+ }{ }gx; + + my $destination = $base_url . $link->url; + + my @similar = grep { $_->{key} eq $section_key } @sections; + next + if (any { $_->{title} eq $section_title } @similar) + || (any { $_->{destination} eq $destination } @similar); + + # Some manuals reuse section numbers for different references, + # e.g. the Debian Policy's normal and appendix sections are + # numbers that clash with each other. Track if we've already + # seen a section pointing to some other URL than the current one, + # and prepend it with an indicator + $in_appendix = 1 + if any { $_->{destination} ne $destination } @similar; + + $section_key = "appendix-$section_key" + if $in_appendix; + + my %section; + $section{key} = $section_key; + $section{title} = $section_title; + $section{destination} = $destination; + push(@sections, \%section); + } + + my $data_path = "$basedir/" . $self->location; + my $status = $self->write_file($SECTIONS, \@sections, $data_path); + + return $status; +} + +=back + +=head1 AUTHOR + +Originally written by Felix Lechner <felix.lechner@lease-up.com> for Lintian. + +=head1 SEE ALSO + +lintian(1) + +=cut + +1; + +# Local Variables: +# indent-tabs-mode: nil +# cperl-indent-level: 4 +# End: +# vim: syntax=perl sw=4 sts=4 sr et diff --git a/lib/Lintian/Data/Authority/LintianManual.pm b/lib/Lintian/Data/Authority/LintianManual.pm new file mode 100644 index 0000000..3fc7bd0 --- /dev/null +++ b/lib/Lintian/Data/Authority/LintianManual.pm @@ -0,0 +1,324 @@ +# -*- perl -*- +# +# Copyright (C) 1998 Christian Schwarz and Richard Braakman +# Copyright (C) 2001 Colin Watson +# Copyright (C) 2008 Jorda Polo +# Copyright (C) 2009 Russ Allbery +# Copyright (C) 2017-2019 Chris Lamb <lamby@debian.org> +# Copyright (C) 2020-2021 Felix Lechner +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see <http://www.gnu.org/licenses/>. + +package Lintian::Data::Authority::LintianManual; + +use v5.20; +use warnings; +use utf8; + +use Carp qw(croak); +use Const::Fast; +use IPC::Run3; +use Path::Tiny; +use Unicode::UTF8 qw(encode_utf8); +use URI::file; +use WWW::Mechanize (); + +use Lintian::Output::Markdown qw(markdown_authority); + +const my $EMPTY => q{}; +const my $SPACE => q{ }; +const my $SLASH => q{/}; +const my $COLON => q{:}; +const my $UNDERSCORE => q{_}; +const my $LEFT_PARENTHESIS => q{(}; +const my $RIGHT_PARENTHESIS => q{)}; + +const my $TWO_PARTS => 2; + +const my $VOLUME_KEY => $UNDERSCORE; +const my $SEPARATOR => $COLON x 2; + +const my $WAIT_STATUS_SHIFT => 8; + +use Moo; +use namespace::clean; + +with 'Lintian::Data::JoinedLines'; + +=head1 NAME + +Lintian::Data::Authority::LintianManual - Lintian interface for manual references + +=head1 SYNOPSIS + + use Lintian::Data::Authority::LintianManual; + +=head1 DESCRIPTION + +Lintian::Data::Authority::LintianManual provides a way to load data files for +manual references. + +=head1 CLASS METHODS + +=over 4 + +=item title + +=item shorthand + +=item location + +=item separator + +=cut + +has title => ( + is => 'rw', + default => 'Lintian Manual' +); + +has shorthand => ( + is => 'rw', + default => 'lintian-manual' +); + +has location => ( + is => 'rw', + lazy => 1, + default => sub { + my ($self) = @_; + + return 'authority/' . $self->shorthand; + } +); + +has separator => ( + is => 'rw', + default => sub { qr/::/ } +); + +=item consumer + +=cut + +sub consumer { + my ($self, $key, $remainder, $previous) = @_; + + return undef + if defined $previous; + + my ($title, $url)= split($self->separator, $remainder, $TWO_PARTS); + + my %entry; + $entry{title} = $title; + $entry{url} = $url; + + return \%entry; +} + +=item markdown_citation + +=cut + +sub markdown_citation { + my ($self, $section_key) = @_; + + croak "Invalid section $section_key" + if $section_key eq $VOLUME_KEY; + + my $volume_entry = $self->value($VOLUME_KEY); + + # start with the citation to the overall manual. + my $volume_title = $volume_entry->{title}; + my $volume_url = $volume_entry->{url}; + + my $section_title; + my $section_url; + + if ($self->recognizes($section_key)) { + + my $section_entry = $self->value($section_key); + + $section_title = $section_entry->{title}; + $section_url = $section_entry->{url}; + } + + return markdown_authority( + $volume_title, $volume_url,$section_key, + $section_title, $section_url + ); +} + +=item write_line + +=cut + +sub write_line { + my ($data_fd, $section_key, $section_title, $destination) = @_; + + # drop final dots + $section_key =~ s{ [.]+ $}{}x; + + # reduce consecutive whitespace + $section_title =~ s{ \s+ }{ }gx; + + my $line= join($SEPARATOR,$section_key, $section_title, $destination); + + say {$data_fd} encode_utf8($line); + + return; +} + +=item write_data_file + +=cut + +sub write_data_file { + my ($self, $basedir, $generated) = @_; + + my $header =<<"HEADER"; +# Data about titles, sections, and URLs of manuals, used to expand references +# in tag descriptions and add links for HTML output. Each line of this file +# has three fields separated by double colons: +# +# <section> :: <title> :: <url> +# +# If <section> is an underscore, that line specifies the title and URL for the +# whole manual. + +HEADER + + my $data_path = "$basedir/" . $self->location; + my $parent_dir = path($data_path)->parent->stringify; + path($parent_dir)->mkpath + unless -e $parent_dir; + + my $output = encode_utf8($header) . $generated; + path($data_path)->spew($output); + + return; +} + +=item refresh + +=cut + +sub refresh { + my ($self, $archive, $basedir) = @_; + + # WWW::Mechanize will not parse page title without the suffix + my $temp_tiny = Path::Tiny->tempfile( + TEMPLATE => 'lintian-manual-XXXXXXXX', + SUFFIX => '.html' + ); + my $local_uri = URI::file->new_abs($temp_tiny->stringify); + + # for rst2html + local $ENV{LC_ALL} = 'en_US.UTF-8'; + + my $stderr; + run3(['rst2html', "$ENV{LINTIAN_BASE}/doc/lintian.rst"], + undef, $local_uri->file, \$stderr); + my $status = ($? >> $WAIT_STATUS_SHIFT); + + # stderr already in UTF-8 + die $stderr + if $status; + + my $generated; + open(my $memory_fd, '>', \$generated) + or die encode_utf8("Cannot open scalar: $!"); + + my $page_url = 'https://lintian.debian.org/manual/index.html'; + + my $mechanize = WWW::Mechanize->new(); + $mechanize->get($local_uri); + + my $page_title = $mechanize->title; + + # underscore is a token for the whole page + write_line($memory_fd, $VOLUME_KEY, $page_title, $page_url); + + my %by_section_key; + my $in_appendix = 0; + + # https://stackoverflow.com/a/254687 + for my $link ($mechanize->links) { + + next + unless length $link->text; + + next + if $link->text !~ qr{^ \s* ([.\d]+) \s+ (.+) $}x; + + my $section_key = $1; + my $section_title = $2; + + # drop final dots + $section_key =~ s{ [.]+ $}{}x; + + # reduce consecutive whitespace + $section_title =~ s{ \s+ }{ }gx; + + my $destination = $page_url . $link->url; + + next + if exists $by_section_key{$section_key} + && ( $by_section_key{$section_key}{title} eq $section_title + || $by_section_key{$section_key}{destination} eq $destination); + + # Some manuals reuse section numbers for different references, + # e.g. the Debian Policy's normal and appendix sections are + # numbers that clash with each other. Track if we've already + # seen a section pointing to some other URL than the current one, + # and prepend it with an indicator + $in_appendix = 1 + if exists $by_section_key{$section_key} + && $by_section_key{$section_key}{destination} ne $destination; + + $section_key = "appendix-$section_key" + if $in_appendix; + + $by_section_key{$section_key}{title} = $section_title; + $by_section_key{$section_key}{destination} = $destination; + + write_line($memory_fd, $section_key, $section_title, $destination); + } + + close $memory_fd; + + $self->write_data_file($basedir, $generated); + + return; +} + +=back + +=head1 AUTHOR + +Originally written by Felix Lechner <felix.lechner@lease-up.com> for Lintian. + +=head1 SEE ALSO + +lintian(1) + +=cut + +1; + +# Local Variables: +# indent-tabs-mode: nil +# cperl-indent-level: 4 +# End: +# vim: syntax=perl sw=4 sts=4 sr et diff --git a/lib/Lintian/Data/Authority/MenuManual.pm b/lib/Lintian/Data/Authority/MenuManual.pm new file mode 100644 index 0000000..c8a2878 --- /dev/null +++ b/lib/Lintian/Data/Authority/MenuManual.pm @@ -0,0 +1,316 @@ +# -*- perl -*- +# +# Copyright (C) 1998 Christian Schwarz and Richard Braakman +# Copyright (C) 2001 Colin Watson +# Copyright (C) 2008 Jorda Polo +# Copyright (C) 2009 Russ Allbery +# Copyright (C) 2017-2019 Chris Lamb <lamby@debian.org> +# Copyright (C) 2020-2021 Felix Lechner +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see <http://www.gnu.org/licenses/>. + +package Lintian::Data::Authority::MenuManual; + +use v5.20; +use warnings; +use utf8; + +use Carp qw(croak); +use Const::Fast; +use Path::Tiny; +use Unicode::UTF8 qw(encode_utf8); +use WWW::Mechanize (); + +use Lintian::Output::Markdown qw(markdown_authority); + +const my $EMPTY => q{}; +const my $SPACE => q{ }; +const my $SLASH => q{/}; +const my $COLON => q{:}; +const my $UNDERSCORE => q{_}; +const my $LEFT_PARENTHESIS => q{(}; +const my $RIGHT_PARENTHESIS => q{)}; + +const my $TWO_PARTS => 2; + +const my $VOLUME_KEY => $UNDERSCORE; +const my $SEPARATOR => $COLON x 2; + +use Moo; +use namespace::clean; + +with 'Lintian::Data::JoinedLines'; + +=head1 NAME + +Lintian::Data::Authority::MenuManual - Lintian interface for manual references + +=head1 SYNOPSIS + + use Lintian::Data::Authority::MenuManual; + +=head1 DESCRIPTION + +Lintian::Data::Authority::MenuManual provides a way to load data files for +manual references. + +=head1 CLASS METHODS + +=over 4 + +=item title + +=item shorthand + +=item location + +=item separator + +=cut + +has title => ( + is => 'rw', + default => 'Menu Manual' +); + +has shorthand => ( + is => 'rw', + default => 'menu-manual' +); + +has location => ( + is => 'rw', + lazy => 1, + default => sub { + my ($self) = @_; + + return 'authority/' . $self->shorthand; + } +); + +has separator => ( + is => 'rw', + default => sub { qr/::/ } +); + +=item consumer + +=cut + +sub consumer { + my ($self, $key, $remainder, $previous) = @_; + + return undef + if defined $previous; + + my ($title, $url)= split($self->separator, $remainder, $TWO_PARTS); + + my %entry; + $entry{title} = $title; + $entry{url} = $url; + + return \%entry; +} + +=item markdown_citation + +=cut + +sub markdown_citation { + my ($self, $section_key) = @_; + + croak "Invalid section $section_key" + if $section_key eq $VOLUME_KEY; + + my $volume_entry = $self->value($VOLUME_KEY); + + # start with the citation to the overall manual. + my $volume_title = $volume_entry->{title}; + my $volume_url = $volume_entry->{url}; + + my $section_title; + my $section_url; + + if ($self->recognizes($section_key)) { + + my $section_entry = $self->value($section_key); + + $section_title = $section_entry->{title}; + $section_url = $section_entry->{url}; + } + + return markdown_authority( + $volume_title, $volume_url,$section_key, + $section_title, $section_url + ); +} + +=item write_line + +=cut + +sub write_line { + my ($data_fd, $section_key, $section_title, $destination) = @_; + + # drop final dots + $section_key =~ s{ [.]+ $}{}x; + + # reduce consecutive whitespace + $section_title =~ s{ \s+ }{ }gx; + + my $line= join($SEPARATOR,$section_key, $section_title, $destination); + + say {$data_fd} encode_utf8($line); + + return; +} + +=item write_data_file + +=cut + +sub write_data_file { + my ($self, $basedir, $generated) = @_; + + my $header =<<"HEADER"; +# Data about titles, sections, and URLs of manuals, used to expand references +# in tag descriptions and add links for HTML output. Each line of this file +# has three fields separated by double colons: +# +# <section> :: <title> :: <url> +# +# If <section> is an underscore, that line specifies the title and URL for the +# whole manual. + +HEADER + + my $data_path = "$basedir/" . $self->location; + my $parent_dir = path($data_path)->parent->stringify; + path($parent_dir)->mkpath + unless -e $parent_dir; + + my $output = encode_utf8($header) . $generated; + path($data_path)->spew($output); + + return; +} + +=item extract_sections_from_links + +=cut + +sub extract_sections_from_links { + my ($self, $data_fd, $base_url)= @_; + + my $mechanize = WWW::Mechanize->new(); + $mechanize->get($base_url); + + my $page_title = $mechanize->title; + + # strip explanatory remark + $page_title =~ s{ \s* \N{EM DASH} .* $}{}x; + + # underscore is a token for the whole page + write_line($data_fd, $VOLUME_KEY, $page_title, $base_url); + + my %by_section_key; + my $in_appendix = 0; + + # https://stackoverflow.com/a/254687 + for my $link ($mechanize->links) { + + next + unless length $link->text; + + next + if $link->text !~ qr{^ \s* ([.\d]+) \s+ (.+) $}x; + + my $section_key = $1; + my $section_title = $2; + + # drop final dots + $section_key =~ s{ [.]+ $}{}x; + + # reduce consecutive whitespace + $section_title =~ s{ \s+ }{ }gx; + + my $destination = $base_url . $link->url; + + next + if exists $by_section_key{$section_key} + && ( $by_section_key{$section_key}{title} eq $section_title + || $by_section_key{$section_key}{destination} eq $destination); + + # Some manuals reuse section numbers for different references, + # e.g. the Debian Policy's normal and appendix sections are + # numbers that clash with each other. Track if we've already + # seen a section pointing to some other URL than the current one, + # and prepend it with an indicator + $in_appendix = 1 + if exists $by_section_key{$section_key} + && $by_section_key{$section_key}{destination} ne $destination; + + $section_key = "appendix-$section_key" + if $in_appendix; + + $by_section_key{$section_key}{title} = $section_title; + $by_section_key{$section_key}{destination} = $destination; + + write_line($data_fd, $section_key, $section_title, $destination); + } + + return; +} + +=item refresh + +=cut + +sub refresh { + my ($self, $archive, $basedir) = @_; + + my $base_url = 'https://www.debian.org/doc/packaging-manuals/menu.html/'; + + my $generated; + open(my $memory_fd, '>', \$generated) + or die encode_utf8('Cannot open scalar'); + + $self->extract_sections_from_links($memory_fd, $base_url); + + close $memory_fd; + + $self->write_data_file($basedir, $generated); + + return; +} + +=back + +=head1 AUTHOR + +Originally written by Felix Lechner <felix.lechner@lease-up.com> for Lintian. + +=head1 SEE ALSO + +lintian(1) + +=cut + +1; + +# Local Variables: +# indent-tabs-mode: nil +# cperl-indent-level: 4 +# End: +# vim: syntax=perl sw=4 sts=4 sr et diff --git a/lib/Lintian/Data/Authority/MenuPolicy.pm b/lib/Lintian/Data/Authority/MenuPolicy.pm new file mode 100644 index 0000000..e0f710a --- /dev/null +++ b/lib/Lintian/Data/Authority/MenuPolicy.pm @@ -0,0 +1,316 @@ +# -*- perl -*- +# +# Copyright (C) 1998 Christian Schwarz and Richard Braakman +# Copyright (C) 2001 Colin Watson +# Copyright (C) 2008 Jorda Polo +# Copyright (C) 2009 Russ Allbery +# Copyright (C) 2017-2019 Chris Lamb <lamby@debian.org> +# Copyright (C) 2020-2021 Felix Lechner +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see <http://www.gnu.org/licenses/>. + +package Lintian::Data::Authority::MenuPolicy; + +use v5.20; +use warnings; +use utf8; + +use Carp qw(croak); +use Const::Fast; +use Path::Tiny; +use Unicode::UTF8 qw(encode_utf8); +use WWW::Mechanize (); + +use Lintian::Output::Markdown qw(markdown_authority); + +const my $EMPTY => q{}; +const my $SPACE => q{ }; +const my $SLASH => q{/}; +const my $COLON => q{:}; +const my $UNDERSCORE => q{_}; +const my $LEFT_PARENTHESIS => q{(}; +const my $RIGHT_PARENTHESIS => q{)}; + +const my $TWO_PARTS => 2; + +const my $VOLUME_KEY => $UNDERSCORE; +const my $SEPARATOR => $COLON x 2; + +use Moo; +use namespace::clean; + +with 'Lintian::Data::JoinedLines'; + +=head1 NAME + +Lintian::Data::Authority::MenuPolicy - Lintian interface for manual references + +=head1 SYNOPSIS + + use Lintian::Data::Authority::MenuPolicy; + +=head1 DESCRIPTION + +Lintian::Data::Authority::MenuPolicy provides a way to load data files for +manual references. + +=head1 CLASS METHODS + +=over 4 + +=item title + +=item shorthand + +=item location + +=item separator + +=cut + +has title => ( + is => 'rw', + default => 'Menu Policy' +); + +has shorthand => ( + is => 'rw', + default => 'menu-policy' +); + +has location => ( + is => 'rw', + lazy => 1, + default => sub { + my ($self) = @_; + + return 'authority/' . $self->shorthand; + } +); + +has separator => ( + is => 'rw', + default => sub { qr/::/ } +); + +=item consumer + +=cut + +sub consumer { + my ($self, $key, $remainder, $previous) = @_; + + return undef + if defined $previous; + + my ($title, $url)= split($self->separator, $remainder, $TWO_PARTS); + + my %entry; + $entry{title} = $title; + $entry{url} = $url; + + return \%entry; +} + +=item markdown_citation + +=cut + +sub markdown_citation { + my ($self, $section_key) = @_; + + croak "Invalid section $section_key" + if $section_key eq $VOLUME_KEY; + + my $volume_entry = $self->value($VOLUME_KEY); + + # start with the citation to the overall manual. + my $volume_title = $volume_entry->{title}; + my $volume_url = $volume_entry->{url}; + + my $section_title; + my $section_url; + + if ($self->recognizes($section_key)) { + + my $section_entry = $self->value($section_key); + + $section_title = $section_entry->{title}; + $section_url = $section_entry->{url}; + } + + return markdown_authority( + $volume_title, $volume_url,$section_key, + $section_title, $section_url + ); +} + +=item write_line + +=cut + +sub write_line { + my ($data_fd, $section_key, $section_title, $destination) = @_; + + # drop final dots + $section_key =~ s{ [.]+ $}{}x; + + # reduce consecutive whitespace + $section_title =~ s{ \s+ }{ }gx; + + my $line= join($SEPARATOR,$section_key, $section_title, $destination); + + say {$data_fd} encode_utf8($line); + + return; +} + +=item write_data_file + +=cut + +sub write_data_file { + my ($self, $basedir, $generated) = @_; + + my $header =<<"HEADER"; +# Data about titles, sections, and URLs of manuals, used to expand references +# in tag descriptions and add links for HTML output. Each line of this file +# has three fields separated by double colons: +# +# <section> :: <title> :: <url> +# +# If <section> is an underscore, that line specifies the title and URL for the +# whole manual. + +HEADER + + my $data_path = "$basedir/" . $self->location; + my $parent_dir = path($data_path)->parent->stringify; + path($parent_dir)->mkpath + unless -e $parent_dir; + + my $output = encode_utf8($header) . $generated; + path($data_path)->spew($output); + + return; +} + +=item extract_sections_from_links + +=cut + +sub extract_sections_from_links { + my ($self, $data_fd, $base_url)= @_; + + my $mechanize = WWW::Mechanize->new(); + $mechanize->get($base_url); + + my $page_title = $mechanize->title; + + # strip explanatory remark + $page_title =~ s{ \s* \N{EM DASH} .* $}{}x; + + # underscore is a token for the whole page + write_line($data_fd, $VOLUME_KEY, $page_title, $base_url); + + my %by_section_key; + my $in_appendix = 0; + + # https://stackoverflow.com/a/254687 + for my $link ($mechanize->links) { + + next + unless length $link->text; + + next + if $link->text !~ qr{^ \s* ([.\d]+) \s+ (.+) $}x; + + my $section_key = $1; + my $section_title = $2; + + # drop final dots + $section_key =~ s{ [.]+ $}{}x; + + # reduce consecutive whitespace + $section_title =~ s{ \s+ }{ }gx; + + my $destination = $base_url . $link->url; + + next + if exists $by_section_key{$section_key} + && ( $by_section_key{$section_key}{title} eq $section_title + || $by_section_key{$section_key}{destination} eq $destination); + + # Some manuals reuse section numbers for different references, + # e.g. the Debian Policy's normal and appendix sections are + # numbers that clash with each other. Track if we've already + # seen a section pointing to some other URL than the current one, + # and prepend it with an indicator + $in_appendix = 1 + if exists $by_section_key{$section_key} + && $by_section_key{$section_key}{destination} ne $destination; + + $section_key = "appendix-$section_key" + if $in_appendix; + + $by_section_key{$section_key}{title} = $section_title; + $by_section_key{$section_key}{destination} = $destination; + + write_line($data_fd, $section_key, $section_title, $destination); + } + + return; +} + +=item refresh + +=cut + +sub refresh { + my ($self, $archive, $basedir) = @_; + + my $base_url = 'https://www.debian.org/doc/packaging-manuals/menu-policy/'; + + my $generated; + open(my $memory_fd, '>', \$generated) + or die encode_utf8('Cannot open scalar'); + + $self->extract_sections_from_links($memory_fd, $base_url); + + close $memory_fd; + + $self->write_data_file($basedir, $generated); + + return; +} + +=back + +=head1 AUTHOR + +Originally written by Felix Lechner <felix.lechner@lease-up.com> for Lintian. + +=head1 SEE ALSO + +lintian(1) + +=cut + +1; + +# Local Variables: +# indent-tabs-mode: nil +# cperl-indent-level: 4 +# End: +# vim: syntax=perl sw=4 sts=4 sr et diff --git a/lib/Lintian/Data/Authority/NewMaintainer.pm b/lib/Lintian/Data/Authority/NewMaintainer.pm new file mode 100644 index 0000000..bd8c933 --- /dev/null +++ b/lib/Lintian/Data/Authority/NewMaintainer.pm @@ -0,0 +1,290 @@ +# -*- perl -*- +# +# Copyright (C) 1998 Christian Schwarz and Richard Braakman +# Copyright (C) 2001 Colin Watson +# Copyright (C) 2008 Jorda Polo +# Copyright (C) 2009 Russ Allbery +# Copyright (C) 2017-2019 Chris Lamb <lamby@debian.org> +# Copyright (C) 2020-2021 Felix Lechner +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see <http://www.gnu.org/licenses/>. + +package Lintian::Data::Authority::NewMaintainer; + +use v5.20; +use warnings; +use utf8; + +use Carp qw(croak); +use Const::Fast; +use List::SomeUtils qw(any first_value); +use Path::Tiny; +use Unicode::UTF8 qw(encode_utf8); +use WWW::Mechanize (); + +use Lintian::Output::Markdown qw(markdown_authority); + +const my $SLASH => q{/}; +const my $UNDERSCORE => q{_}; + +const my $VOLUME_KEY => $UNDERSCORE; +const my $SECTIONS => 'sections'; + +use Moo; +use namespace::clean; + +with 'Lintian::Data::PreambledJSON'; + +=head1 NAME + +Lintian::Data::Authority::NewMaintainer - Lintian interface for manual references + +=head1 SYNOPSIS + + use Lintian::Data::Authority::NewMaintainer; + +=head1 DESCRIPTION + +Lintian::Data::Authority::NewMaintainer provides a way to load data files for +manual references. + +=head1 CLASS METHODS + +=over 4 + +=item title + +=item shorthand + +=item location + +=item by_section_key + +=cut + +has title => ( + is => 'rw', + default => 'New Maintainer\'s Guide' +); + +has shorthand => ( + is => 'rw', + default => 'new-maintainer' +); + +has location => ( + is => 'rw', + lazy => 1, + default => sub { + my ($self) = @_; + + return 'authority/' . $self->shorthand . '.json'; + } +); + +has by_section_key => (is => 'rw', default => sub { {} }); + +=item markdown_citation + +=cut + +sub markdown_citation { + my ($self, $section_key) = @_; + + croak "Invalid section $section_key" + if $section_key eq $VOLUME_KEY; + + my $volume_entry = $self->value($VOLUME_KEY); + + # start with the citation to the overall manual. + my $volume_title = $volume_entry->{title}; + my $volume_url = $volume_entry->{destination}; + + my $section_title; + my $section_url; + + if ($self->recognizes($section_key)) { + + my $section_entry = $self->value($section_key); + + $section_title = $section_entry->{title}; + $section_url = $section_entry->{destination}; + } + + return markdown_authority( + $volume_title, $volume_url,$section_key, + $section_title, $section_url + ); +} + +=item recognizes (KEY) + +Returns true if KEY is known, and false otherwise. + +=cut + +sub recognizes { + my ($self, $key) = @_; + + return 0 + unless length $key; + + return 1 + if exists $self->by_section_key->{$key}; + + return 0; +} + +=item value (KEY) + +Returns the value attached to KEY if it was listed in the data +file represented by this Lintian::Data instance and the undefined value +otherwise. + +=cut + +sub value { + my ($self, $key) = @_; + + return undef + unless length $key; + + return $self->by_section_key->{$key}; +} + +=item load + +=cut + +sub load { + my ($self, $search_space, $our_vendor) = @_; + + my @candidates = map { $_ . $SLASH . $self->location } @{$search_space}; + my $path = first_value { -e } @candidates; + + my $reference; + + return 0 + unless $self->read_file($path, \$reference); + + my @sections = @{$reference // []}; + + for my $section (@sections) { + + my $key = $section->{key}; + + # only store first value for duplicates + # silently ignore later values + $self->by_section_key->{$key} //= $section; + } + + return 1; +} + +=item refresh + +=cut + +sub refresh { + my ($self, $archive, $basedir) = @_; + + my $base_url = 'https://www.debian.org/doc/manuals/maint-guide/index.html'; + + my $mechanize = WWW::Mechanize->new(); + $mechanize->get($base_url); + + my $page_title = $mechanize->title; + + my @sections; + + # underscore is a token for the whole page + my %volume; + $volume{key} = $VOLUME_KEY; + $volume{title} = $page_title; + $volume{destination} = $base_url; + + # store array to resemble web layout + # may contain duplicates + push(@sections, \%volume); + + my $in_appendix = 0; + + # https://stackoverflow.com/a/254687 + for my $link ($mechanize->links) { + + next + unless length $link->text; + + next + if $link->text !~ qr{^ \s* ([.\d[:upper:]]+) \s+ (.+) $}x; + + my $section_key = $1; + my $section_title = $2; + + # drop final dots + $section_key =~ s{ [.]+ $}{}x; + + # reduce consecutive whitespace + $section_title =~ s{ \s+ }{ }gx; + + my $destination = $base_url . $link->url; + + my @similar = grep { $_->{key} eq $section_key } @sections; + next + if (any { $_->{title} eq $section_title } @similar) + || (any { $_->{destination} eq $destination } @similar); + + # Some manuals reuse section numbers for different references, + # e.g. the Debian Policy's normal and appendix sections are + # numbers that clash with each other. Track if we've already + # seen a section pointing to some other URL than the current one, + # and prepend it with an indicator + $in_appendix = 1 + if any { $_->{destination} ne $destination } @similar; + + $section_key = "appendix-$section_key" + if $in_appendix; + + my %section; + $section{key} = $section_key; + $section{title} = $section_title; + $section{destination} = $destination; + push(@sections, \%section); + } + + my $data_path = "$basedir/" . $self->location; + my $status = $self->write_file($SECTIONS, \@sections, $data_path); + + return $status; +} + +=back + +=head1 AUTHOR + +Originally written by Felix Lechner <felix.lechner@lease-up.com> for Lintian. + +=head1 SEE ALSO + +lintian(1) + +=cut + +1; + +# Local Variables: +# indent-tabs-mode: nil +# cperl-indent-level: 4 +# End: +# vim: syntax=perl sw=4 sts=4 sr et diff --git a/lib/Lintian/Data/Authority/PerlPolicy.pm b/lib/Lintian/Data/Authority/PerlPolicy.pm new file mode 100644 index 0000000..92dc31a --- /dev/null +++ b/lib/Lintian/Data/Authority/PerlPolicy.pm @@ -0,0 +1,316 @@ +# -*- perl -*- +# +# Copyright (C) 1998 Christian Schwarz and Richard Braakman +# Copyright (C) 2001 Colin Watson +# Copyright (C) 2008 Jorda Polo +# Copyright (C) 2009 Russ Allbery +# Copyright (C) 2017-2019 Chris Lamb <lamby@debian.org> +# Copyright (C) 2020-2021 Felix Lechner +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see <http://www.gnu.org/licenses/>. + +package Lintian::Data::Authority::PerlPolicy; + +use v5.20; +use warnings; +use utf8; + +use Carp qw(croak); +use Const::Fast; +use Path::Tiny; +use Unicode::UTF8 qw(encode_utf8); +use WWW::Mechanize (); + +use Lintian::Output::Markdown qw(markdown_authority); + +const my $EMPTY => q{}; +const my $SPACE => q{ }; +const my $SLASH => q{/}; +const my $COLON => q{:}; +const my $UNDERSCORE => q{_}; +const my $LEFT_PARENTHESIS => q{(}; +const my $RIGHT_PARENTHESIS => q{)}; + +const my $TWO_PARTS => 2; + +const my $VOLUME_KEY => $UNDERSCORE; +const my $SEPARATOR => $COLON x 2; + +use Moo; +use namespace::clean; + +with 'Lintian::Data::JoinedLines'; + +=head1 NAME + +Lintian::Data::Authority::PerlPolicy - Lintian interface for manual references + +=head1 SYNOPSIS + + use Lintian::Data::Authority::PerlPolicy; + +=head1 DESCRIPTION + +Lintian::Data::Authority::PerlPolicy provides a way to load data files for +manual references. + +=head1 CLASS METHODS + +=over 4 + +=item title + +=item shorthand + +=item location + +=item separator + +=cut + +has title => ( + is => 'rw', + default => 'Perl Policy' +); + +has shorthand => ( + is => 'rw', + default => 'perl-policy' +); + +has location => ( + is => 'rw', + lazy => 1, + default => sub { + my ($self) = @_; + + return 'authority/' . $self->shorthand; + } +); + +has separator => ( + is => 'rw', + default => sub { qr/::/ } +); + +=item consumer + +=cut + +sub consumer { + my ($self, $key, $remainder, $previous) = @_; + + return undef + if defined $previous; + + my ($title, $url)= split($self->separator, $remainder, $TWO_PARTS); + + my %entry; + $entry{title} = $title; + $entry{url} = $url; + + return \%entry; +} + +=item markdown_citation + +=cut + +sub markdown_citation { + my ($self, $section_key) = @_; + + croak "Invalid section $section_key" + if $section_key eq $VOLUME_KEY; + + my $volume_entry = $self->value($VOLUME_KEY); + + # start with the citation to the overall manual. + my $volume_title = $volume_entry->{title}; + my $volume_url = $volume_entry->{url}; + + my $section_title; + my $section_url; + + if ($self->recognizes($section_key)) { + + my $section_entry = $self->value($section_key); + + $section_title = $section_entry->{title}; + $section_url = $section_entry->{url}; + } + + return markdown_authority( + $volume_title, $volume_url,$section_key, + $section_title, $section_url + ); +} + +=item write_line + +=cut + +sub write_line { + my ($data_fd, $section_key, $section_title, $destination) = @_; + + # drop final dots + $section_key =~ s{ [.]+ $}{}x; + + # reduce consecutive whitespace + $section_title =~ s{ \s+ }{ }gx; + + my $line= join($SEPARATOR,$section_key, $section_title, $destination); + + say {$data_fd} encode_utf8($line); + + return; +} + +=item write_data_file + +=cut + +sub write_data_file { + my ($self, $basedir, $generated) = @_; + + my $header =<<"HEADER"; +# Data about titles, sections, and URLs of manuals, used to expand references +# in tag descriptions and add links for HTML output. Each line of this file +# has three fields separated by double colons: +# +# <section> :: <title> :: <url> +# +# If <section> is an underscore, that line specifies the title and URL for the +# whole manual. + +HEADER + + my $data_path = "$basedir/" . $self->location; + my $parent_dir = path($data_path)->parent->stringify; + path($parent_dir)->mkpath + unless -e $parent_dir; + + my $output = encode_utf8($header) . $generated; + path($data_path)->spew($output); + + return; +} + +=item extract_sections_from_links + +=cut + +sub extract_sections_from_links { + my ($self, $data_fd, $base_url)= @_; + + my $mechanize = WWW::Mechanize->new(); + $mechanize->get($base_url); + + my $page_title = $mechanize->title; + + # strip explanatory remark + $page_title =~ s{ \s* \N{EM DASH} .* $}{}x; + + # underscore is a token for the whole page + write_line($data_fd, $VOLUME_KEY, $page_title, $base_url); + + my %by_section_key; + my $in_appendix = 0; + + # https://stackoverflow.com/a/254687 + for my $link ($mechanize->links) { + + next + unless length $link->text; + + next + if $link->text !~ qr{^ \s* ([A-Z]|[A-Z]?[.\d]+) \s+ (.+) $}x; + + my $section_key = $1; + my $section_title = $2; + + # drop final dots + $section_key =~ s{ [.]+ $}{}x; + + # reduce consecutive whitespace + $section_title =~ s{ \s+ }{ }gx; + + my $destination = $base_url . $link->url; + + next + if exists $by_section_key{$section_key} + && ( $by_section_key{$section_key}{title} eq $section_title + || $by_section_key{$section_key}{destination} eq $destination); + + # Some manuals reuse section numbers for different references, + # e.g. the Debian Policy's normal and appendix sections are + # numbers that clash with each other. Track if we've already + # seen a section pointing to some other URL than the current one, + # and prepend it with an indicator + $in_appendix = 1 + if exists $by_section_key{$section_key} + && $by_section_key{$section_key}{destination} ne $destination; + + $section_key = "appendix-$section_key" + if $in_appendix; + + $by_section_key{$section_key}{title} = $section_title; + $by_section_key{$section_key}{destination} = $destination; + + write_line($data_fd, $section_key, $section_title, $destination); + } + + return; +} + +=item refresh + +=cut + +sub refresh { + my ($self, $archive, $basedir) = @_; + + my $base_url = 'https://www.debian.org/doc/packaging-manuals/perl-policy/'; + + my $generated; + open(my $memory_fd, '>', \$generated) + or die encode_utf8('Cannot open scalar'); + + $self->extract_sections_from_links($memory_fd, $base_url); + + close $memory_fd; + + $self->write_data_file($basedir, $generated); + + return; +} + +=back + +=head1 AUTHOR + +Originally written by Felix Lechner <felix.lechner@lease-up.com> for Lintian. + +=head1 SEE ALSO + +lintian(1) + +=cut + +1; + +# Local Variables: +# indent-tabs-mode: nil +# cperl-indent-level: 4 +# End: +# vim: syntax=perl sw=4 sts=4 sr et diff --git a/lib/Lintian/Data/Authority/PythonPolicy.pm b/lib/Lintian/Data/Authority/PythonPolicy.pm new file mode 100644 index 0000000..ebeda04 --- /dev/null +++ b/lib/Lintian/Data/Authority/PythonPolicy.pm @@ -0,0 +1,317 @@ +# -*- perl -*- +# +# Copyright (C) 1998 Christian Schwarz and Richard Braakman +# Copyright (C) 2001 Colin Watson +# Copyright (C) 2008 Jorda Polo +# Copyright (C) 2009 Russ Allbery +# Copyright (C) 2017-2019 Chris Lamb <lamby@debian.org> +# Copyright (C) 2020-2021 Felix Lechner +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see <http://www.gnu.org/licenses/>. + +package Lintian::Data::Authority::PythonPolicy; + +use v5.20; +use warnings; +use utf8; + +use Carp qw(croak); +use Const::Fast; +use Path::Tiny; +use Unicode::UTF8 qw(encode_utf8); +use WWW::Mechanize (); + +use Lintian::Output::Markdown qw(markdown_authority); + +const my $EMPTY => q{}; +const my $SPACE => q{ }; +const my $SLASH => q{/}; +const my $COLON => q{:}; +const my $UNDERSCORE => q{_}; +const my $LEFT_PARENTHESIS => q{(}; +const my $RIGHT_PARENTHESIS => q{)}; + +const my $TWO_PARTS => 2; + +const my $VOLUME_KEY => $UNDERSCORE; +const my $SEPARATOR => $COLON x 2; + +use Moo; +use namespace::clean; + +with 'Lintian::Data::JoinedLines'; + +=head1 NAME + +Lintian::Data::Authority::PythonPolicy - Lintian interface for manual references + +=head1 SYNOPSIS + + use Lintian::Data::Authority::PythonPolicy; + +=head1 DESCRIPTION + +Lintian::Data::Authority::PythonPolicy provides a way to load data files for +manual references. + +=head1 CLASS METHODS + +=over 4 + +=item title + +=item shorthand + +=item location + +=item separator + +=cut + +has title => ( + is => 'rw', + default => 'Python Policy' +); + +has shorthand => ( + is => 'rw', + default => 'python-policy' +); + +has location => ( + is => 'rw', + lazy => 1, + default => sub { + my ($self) = @_; + + return 'authority/' . $self->shorthand; + } +); + +has separator => ( + is => 'rw', + default => sub { qr/::/ } +); + +=item consumer + +=cut + +sub consumer { + my ($self, $key, $remainder, $previous) = @_; + + return undef + if defined $previous; + + my ($title, $url)= split($self->separator, $remainder, $TWO_PARTS); + + my %entry; + $entry{title} = $title; + $entry{url} = $url; + + return \%entry; +} + +=item markdown_citation + +=cut + +sub markdown_citation { + my ($self, $section_key) = @_; + + croak "Invalid section $section_key" + if $section_key eq $VOLUME_KEY; + + my $volume_entry = $self->value($VOLUME_KEY); + + # start with the citation to the overall manual. + my $volume_title = $volume_entry->{title}; + my $volume_url = $volume_entry->{url}; + + my $section_title; + my $section_url; + + if ($self->recognizes($section_key)) { + + my $section_entry = $self->value($section_key); + + $section_title = $section_entry->{title}; + $section_url = $section_entry->{url}; + } + + return markdown_authority( + $volume_title, $volume_url,$section_key, + $section_title, $section_url + ); +} + +=item write_line + +=cut + +sub write_line { + my ($data_fd, $section_key, $section_title, $destination) = @_; + + # drop final dots + $section_key =~ s{ [.]+ $}{}x; + + # reduce consecutive whitespace + $section_title =~ s{ \s+ }{ }gx; + + my $line= join($SEPARATOR,$section_key, $section_title, $destination); + + say {$data_fd} encode_utf8($line); + + return; +} + +=item write_data_file + +=cut + +sub write_data_file { + my ($self, $basedir, $generated) = @_; + + my $header =<<"HEADER"; +# Data about titles, sections, and URLs of manuals, used to expand references +# in tag descriptions and add links for HTML output. Each line of this file +# has three fields separated by double colons: +# +# <section> :: <title> :: <url> +# +# If <section> is an underscore, that line specifies the title and URL for the +# whole manual. + +HEADER + + my $data_path = "$basedir/" . $self->location; + my $parent_dir = path($data_path)->parent->stringify; + path($parent_dir)->mkpath + unless -e $parent_dir; + + my $output = encode_utf8($header) . $generated; + path($data_path)->spew($output); + + return; +} + +=item extract_sections_from_links + +=cut + +sub extract_sections_from_links { + my ($self, $data_fd, $base_url)= @_; + + my $mechanize = WWW::Mechanize->new(); + $mechanize->get($base_url); + + my $page_title = $mechanize->title; + + # strip explanatory remark + $page_title =~ s{ \s* \N{EM DASH} .* $}{}x; + + # underscore is a token for the whole page + write_line($data_fd, $VOLUME_KEY, $page_title, $base_url); + + my %by_section_key; + my $in_appendix = 0; + + # https://stackoverflow.com/a/254687 + for my $link ($mechanize->links) { + + next + unless length $link->text; + + next + if $link->text !~ qr{^ \s* ([.\d]+) \s+ (.+) $}x; + + my $section_key = $1; + my $section_title = $2; + + # drop final dots + $section_key =~ s{ [.]+ $}{}x; + + # reduce consecutive whitespace + $section_title =~ s{ \s+ }{ }gx; + + my $destination = $base_url . $link->url; + + next + if exists $by_section_key{$section_key} + && ( $by_section_key{$section_key}{title} eq $section_title + || $by_section_key{$section_key}{destination} eq $destination); + + # Some manuals reuse section numbers for different references, + # e.g. the Debian Policy's normal and appendix sections are + # numbers that clash with each other. Track if we've already + # seen a section pointing to some other URL than the current one, + # and prepend it with an indicator + $in_appendix = 1 + if exists $by_section_key{$section_key} + && $by_section_key{$section_key}{destination} ne $destination; + + $section_key = "appendix-$section_key" + if $in_appendix; + + $by_section_key{$section_key}{title} = $section_title; + $by_section_key{$section_key}{destination} = $destination; + + write_line($data_fd, $section_key, $section_title, $destination); + } + + return; +} + +=item refresh + +=cut + +sub refresh { + my ($self, $archive, $basedir) = @_; + + my $base_url + = 'https://www.debian.org/doc/packaging-manuals/python-policy/'; + + my $generated; + open(my $memory_fd, '>', \$generated) + or die encode_utf8('Cannot open scalar'); + + $self->extract_sections_from_links($memory_fd, $base_url); + + close $memory_fd; + + $self->write_data_file($basedir, $generated); + + return; +} + +=back + +=head1 AUTHOR + +Originally written by Felix Lechner <felix.lechner@lease-up.com> for Lintian. + +=head1 SEE ALSO + +lintian(1) + +=cut + +1; + +# Local Variables: +# indent-tabs-mode: nil +# cperl-indent-level: 4 +# End: +# vim: syntax=perl sw=4 sts=4 sr et diff --git a/lib/Lintian/Data/Authority/VimPolicy.pm b/lib/Lintian/Data/Authority/VimPolicy.pm new file mode 100644 index 0000000..6ffbe91 --- /dev/null +++ b/lib/Lintian/Data/Authority/VimPolicy.pm @@ -0,0 +1,459 @@ +# -*- perl -*- +# +# Copyright (C) 1998 Christian Schwarz and Richard Braakman +# Copyright (C) 2001 Colin Watson +# Copyright (C) 2008 Jorda Polo +# Copyright (C) 2009 Russ Allbery +# Copyright (C) 2017-2019 Chris Lamb <lamby@debian.org> +# Copyright (C) 2020-2021 Felix Lechner +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see <http://www.gnu.org/licenses/>. + +package Lintian::Data::Authority::VimPolicy; + +use v5.20; +use warnings; +use utf8; + +use Carp qw(croak); +use Const::Fast; +use File::Basename qw(basename); +use IPC::Run3; +use HTML::TokeParser::Simple; +use Path::Tiny; +use Unicode::UTF8 qw(encode_utf8); + +use Lintian::Output::Markdown qw(markdown_authority); + +const my $EMPTY => q{}; +const my $SPACE => q{ }; +const my $SLASH => q{/}; +const my $COLON => q{:}; +const my $INDENT => $SPACE x 4; +const my $UNDERSCORE => q{_}; +const my $LEFT_PARENTHESIS => q{(}; +const my $RIGHT_PARENTHESIS => q{)}; + +const my $TWO_PARTS => 2; + +const my $VOLUME_KEY => $UNDERSCORE; +const my $SEPARATOR => $COLON x 2; + +const my $WAIT_STATUS_SHIFT => 8; + +use Moo; +use namespace::clean; + +with 'Lintian::Data::JoinedLines'; + +=head1 NAME + +Lintian::Data::Authority::VimPolicy - Lintian interface for manual references + +=head1 SYNOPSIS + + use Lintian::Data::Authority::VimPolicy; + +=head1 DESCRIPTION + +Lintian::Data::Authority::VimPolicy provides a way to load data files for +manual references. + +=head1 CLASS METHODS + +=over 4 + +=item title + +=item shorthand + +=item location + +=item separator + +=cut + +has title => ( + is => 'rw', + default => 'Vim Policy' +); + +has shorthand => ( + is => 'rw', + default => 'vim-policy' +); + +has location => ( + is => 'rw', + lazy => 1, + default => sub { + my ($self) = @_; + + return 'authority/' . $self->shorthand; + } +); + +has separator => ( + is => 'rw', + default => sub { qr/::/ } +); + +=item consumer + +=cut + +sub consumer { + my ($self, $key, $remainder, $previous) = @_; + + return undef + if defined $previous; + + my ($title, $url)= split($self->separator, $remainder, $TWO_PARTS); + + my %entry; + $entry{title} = $title; + $entry{url} = $url; + + return \%entry; +} + +=item markdown_citation + +=cut + +sub markdown_citation { + my ($self, $section_key) = @_; + + croak "Invalid section $section_key" + if $section_key eq $VOLUME_KEY; + + my $volume_entry = $self->value($VOLUME_KEY); + + # start with the citation to the overall manual. + my $volume_title = $volume_entry->{title}; + my $volume_url = $volume_entry->{url}; + + my $section_title; + my $section_url; + + if ($self->recognizes($section_key)) { + + my $section_entry = $self->value($section_key); + + $section_title = $section_entry->{title}; + $section_url = $section_entry->{url}; + } + + return markdown_authority( + $volume_title, $volume_url,$section_key, + $section_title, $section_url + ); +} + +=item write_line + +=cut + +sub write_line { + my ($data_fd, $section_key, $section_title, $destination) = @_; + + # drop final dots + $section_key =~ s{ [.]+ $}{}x; + + # reduce consecutive whitespace + $section_title =~ s{ \s+ }{ }gx; + + my $line= join($SEPARATOR,$section_key, $section_title, $destination); + + say {$data_fd} encode_utf8($line); + + return; +} + +=item write_data_file + +=cut + +sub write_data_file { + my ($self, $basedir, $generated) = @_; + + my $header =<<"HEADER"; +# Data about titles, sections, and URLs of manuals, used to expand references +# in tag descriptions and add links for HTML output. Each line of this file +# has three fields separated by double colons: +# +# <section> :: <title> :: <url> +# +# If <section> is an underscore, that line specifies the title and URL for the +# whole manual. + +HEADER + + my $data_path = "$basedir/" . $self->location; + my $parent_dir = path($data_path)->parent->stringify; + path($parent_dir)->mkpath + unless -e $parent_dir; + + my $output = encode_utf8($header) . $generated; + path($data_path)->spew($output); + + return; +} + +=item find_installable_name + +=cut + +sub find_installable_name { + my ($self, $archive, $release, $liberty, $port, $requested_path) = @_; + + my @installed_by; + + # find installable package + for my $installable_architecture ('all', $port) { + + my $local_path + = $archive->contents_gz($release, $liberty, + $installable_architecture); + + open(my $fd, '<:gzip', $local_path) + or die encode_utf8("Cannot open $local_path."); + + while (my $line = <$fd>) { + + chomp $line; + + my ($path, $finder) = split($SPACE, $line, 2); + next + unless length $path + && length $finder; + + if ($path eq $requested_path) { + + my $name = $1; + + my @locations = split(m{,}, $finder); + for my $location (@locations) { + + my ($section, $installable)= split(m{/}, $location, 2); + + push(@installed_by, $installable); + } + + next; + } + } + + close $fd; + } + + die encode_utf8( + "The path $requested_path is not installed by any package.") + if @installed_by < 1; + + if (@installed_by > 1) { + warn encode_utf8( + "The path $requested_path is installed by multiple packages:\n"); + warn encode_utf8($INDENT . "- $_\n")for @installed_by; + } + + my $installable_name = shift @installed_by; + + return $installable_name; +} + +=item refresh + +=cut + +sub refresh { + my ($self, $archive, $basedir) = @_; + + # shipped as part of the vim installable + my $shipped_base = 'usr/share/doc/vim/vim-policy.html/'; + my $index_name = 'index.html'; + + my $shipped_path = $shipped_base . $index_name; + my $stored_uri = "file:///$shipped_base"; + + # neutral sort order + local $ENV{LC_ALL} = 'C'; + + my $release = 'stable'; + my $port = 'amd64'; + + my $installable_name + = $self->find_installable_name($archive, $release, 'main', $port, + $shipped_path); + + my $deb822_by_installable_name + = $archive->deb822_packages_by_installable_name($release, 'main', $port); + + my $work_folder + = Path::Tiny->tempdir( + TEMPLATE => 'refresh-doc-base-specification-XXXXXXXXXX'); + + die encode_utf8("Installable $installable_name not shipped in port $port") + unless exists $deb822_by_installable_name->{$installable_name}; + + my $deb822 = $deb822_by_installable_name->{$installable_name}; + + my $pool_path = $deb822->value('Filename'); + + my $deb_filename = basename($pool_path); + my $deb_local_path = "$work_folder/$deb_filename"; + my $deb_url = $archive->mirror_base . $SLASH . $pool_path; + + my $stderr; + run3([qw{wget --quiet}, "--output-document=$deb_local_path", $deb_url], + undef, \$stderr); + my $status = ($? >> $WAIT_STATUS_SHIFT); + + # stderr already in UTF-8 + die $stderr + if $status; + + my $extract_folder = "$work_folder/unpacked/$pool_path"; + path($extract_folder)->mkpath; + + run3([qw{dpkg-deb --extract}, $deb_local_path, $extract_folder], + undef, \$stderr); + $status = ($? >> $WAIT_STATUS_SHIFT); + + # stderr already in UTF-8 + die $stderr + if $status; + + unlink($deb_local_path) + or die encode_utf8("Cannot delete $deb_local_path"); + + my $generated; + open(my $memory_fd, '>', \$generated) + or die encode_utf8("Cannot open scalar: $!"); + + my $fresh_uri = URI::file->new_abs("/$extract_folder/$shipped_path"); + + my $parser = HTML::TokeParser::Simple->new(url => $fresh_uri); + my $in_title = 0; + my $in_dt_tag = 0; + my $after_a_tag = 0; + + my $page_title = $EMPTY; + my $section_key = $EMPTY; + my $section_title = $EMPTY; + my $relative_destination = $EMPTY; + + while (my $token = $parser->get_token) { + + if (length $token->get_tag) { + + if ($token->get_tag eq 'h1') { + + $in_title = ($token->is_start_tag + && $token->get_attr('class') eq 'title'); + + # not yet leaving title + next + if $in_title; + + # trim both ends + $page_title =~ s/^\s+|\s+$//g; + + # underscore is a token for the whole page + write_line($memory_fd, $VOLUME_KEY, $page_title, + $stored_uri . $index_name) + if length $page_title; + + $page_title = $EMPTY; + } + + if ($token->get_tag eq 'dt') { + + $in_dt_tag = $token->is_start_tag; + + # not yet leaving dt tag + next + if $in_dt_tag; + + # trim both ends + $section_key =~ s/^\s+|\s+$//g; + $section_title =~ s/^\s+|\s+$//g; + + my $full_destination = $stored_uri . $relative_destination; + + write_line( + $memory_fd, $section_key, + $section_title,$full_destination + )if length $section_title; + + $section_key = $EMPTY; + $section_title = $EMPTY; + $relative_destination = $EMPTY; + } + + if ($token->get_tag eq 'a') { + + $after_a_tag = $token->is_start_tag; + + $relative_destination = $token->get_attr('href') + if $token->is_start_tag; + } + + } else { + + # concatenate span objects + $page_title .= $token->as_is + if length $token->as_is + && $in_title + && $after_a_tag; + + $section_key = $token->as_is + if length $token->as_is + && $in_dt_tag + && !$after_a_tag; + + # concatenate span objects + $section_title .= $token->as_is + if length $token->as_is + && $in_dt_tag + && $after_a_tag; + } + } + + close $memory_fd; + + $self->write_data_file($basedir, $generated); + + return; +} + +=back + +=head1 AUTHOR + +Originally written by Felix Lechner <felix.lechner@lease-up.com> for Lintian. + +=head1 SEE ALSO + +lintian(1) + +=cut + +1; + +# Local Variables: +# indent-tabs-mode: nil +# cperl-indent-level: 4 +# End: +# vim: syntax=perl sw=4 sts=4 sr et |