# -*- perl -*- # # Copyright (C) 1998 Christian Schwarz and Richard Braakman # Copyright (C) 2001 Colin Watson # Copyright (C) 2008 Jorda Polo # Copyright (C) 2009 Russ Allbery # Copyright (C) 2017-2019 Chris Lamb # Copyright (C) 2020-2021 Felix Lechner # # This program is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the Free # Software Foundation; either version 2 of the License, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for # more details. # # You should have received a copy of the GNU General Public License along with # this program. If not, see . package Lintian::Data::Authority::FilesystemHierarchy; use v5.20; use warnings; use utf8; use Carp qw(croak); use Const::Fast; use File::Basename qw(dirname); use Path::Tiny; use Unicode::UTF8 qw(encode_utf8); use WWW::Mechanize (); use Lintian::Output::Markdown qw(markdown_authority); const my $EMPTY => q{}; const my $SPACE => q{ }; const my $SLASH => q{/}; const my $COLON => q{:}; const my $UNDERSCORE => q{_}; const my $LEFT_PARENTHESIS => q{(}; const my $RIGHT_PARENTHESIS => q{)}; const my $THREE_PARTS => 3; const my $VOLUME_KEY => $UNDERSCORE; const my $SEPARATOR => $COLON x 2; use Moo; use namespace::clean; with 'Lintian::Data::JoinedLines'; =head1 NAME Lintian::Data::Authority::FilesystemHierarchy - Lintian interface for manual references =head1 SYNOPSIS use Lintian::Data::Authority::FilesystemHierarchy; =head1 DESCRIPTION Lintian::Data::Authority::FilesystemHierarchy provides a way to load data files for manual references. =head1 CLASS METHODS =over 4 =item title =item shorthand =item location =item separator =cut has title => ( is => 'rw', default => 'Filesystem Hierarchy Standard' ); has shorthand => ( is => 'rw', default => 'filesystem-hierarchy' ); has location => ( is => 'rw', lazy => 1, default => sub { my ($self) = @_; return 'authority/' . $self->shorthand; } ); has separator => ( is => 'rw', default => sub { qr/::/ } ); =item consumer =cut sub consumer { my ($self, $key, $remainder, $previous) = @_; return undef if defined $previous; my ($number, $title, $url) = split($self->separator, $remainder, $THREE_PARTS); my %entry; $entry{title} = $title; $entry{number} = $number; $entry{url} = $url; return \%entry; } =item markdown_citation =cut sub markdown_citation { my ($self, $section_key) = @_; croak "Invalid section $section_key" if $section_key eq $VOLUME_KEY; my $volume_entry = $self->value($VOLUME_KEY); # start with the citation to the overall manual. my $volume_title = $volume_entry->{title}; my $volume_url = $volume_entry->{url}; my $section_title; my $section_number; my $section_url; if ($self->recognizes($section_key)) { my $section_entry = $self->value($section_key); $section_title = $section_entry->{title}; $section_number = $section_entry->{number}; $section_url = $section_entry->{url}; } return markdown_authority( $volume_title, $volume_url,$section_number, $section_title, $section_url ); } =item write_line =cut sub write_line { my ($data_fd, $section_key, $section_number, $section_title, $destination) = @_; # drop final dots $section_key =~ s{ [.]+ $}{}x; # reduce consecutive whitespace $section_title =~ s{ \s+ }{ }gx; my $line= join($SEPARATOR, $section_key, $section_number, $section_title, $destination); say {$data_fd} encode_utf8($line); return; } =item write_data_file =cut sub write_data_file { my ($self, $basedir, $generated) = @_; my $header =<<"HEADER"; # Data about titles, sections, and URLs of manuals, used to expand references # in tag descriptions and add links for HTML output. Each line of this file # has three fields separated by double colons: # #
:: :: <url> # # If <section> is an underscore, that line specifies the title and URL for the # whole manual. HEADER my $data_path = "$basedir/" . $self->location; my $parent_dir = path($data_path)->parent->stringify; path($parent_dir)->mkpath unless -e $parent_dir; my $output = encode_utf8($header) . $generated; path($data_path)->spew($output); return; } =item extract_sections_from_links =cut sub extract_sections_from_links { my ($self, $data_fd, $base_url, $page_name)= @_; my $page_url = $base_url . $page_name; my $mechanize = WWW::Mechanize->new(); $mechanize->get($page_url); my $page_title = $mechanize->title; # strip explanatory remark $page_title =~ s{ \s* \N{EM DASH} .* $}{}x; # underscore is a token for the whole page write_line($data_fd, $VOLUME_KEY, $EMPTY, $page_title, $page_url); my %by_section_key; my $in_appendix = 0; # https://stackoverflow.com/a/254687 for my $link ($mechanize->links) { next unless length $link->url; # make lowercase my $section_key = lc($link->url); # strip hash; it's a fragment; $section_key =~ s{^ [#] }{}x; next unless length $link->text; next if $link->text !~ qr{^ \s* ([.\d]+) \s+ (.+) $}x; my $section_number = $1; my $section_title = $2; # drop final dot $section_number =~ s{ [.]+ $}{}x; # reduce consecutive whitespace $section_title =~ s{ \s+ }{ }gx; # includes hash my $relative_destination = $link->url; my $destination_base = $page_url; $destination_base = dirname($page_url) . $SLASH unless $destination_base =~ m{ / $}x || $relative_destination =~ m{^ [#] }x; my $full_destination = $destination_base . $relative_destination; next if exists $by_section_key{$section_key}; $by_section_key{$section_key}{title} = $section_title; $by_section_key{$section_key}{number} = $section_number; $by_section_key{$section_key}{destination} = $full_destination; write_line($data_fd, $section_key, $section_number, $section_title, $full_destination); } return; } =item refresh =cut sub refresh { my ($self, $archive, $basedir) = @_; # single page version # plain directory shows a file list my $base_url = 'https://refspecs.linuxfoundation.org/FHS_3.0/'; my $index_name = 'fhs-3.0.html'; my $generated; open(my $memory_fd, '>', \$generated) or die encode_utf8('Cannot open scalar'); $self->extract_sections_from_links($memory_fd, $base_url, $index_name); close $memory_fd; $self->write_data_file($basedir, $generated); return; } =back =head1 AUTHOR Originally written by Felix Lechner <felix.lechner@lease-up.com> for Lintian. =head1 SEE ALSO lintian(1) =cut 1; # Local Variables: # indent-tabs-mode: nil # cperl-indent-level: 4 # End: # vim: syntax=perl sw=4 sts=4 sr et