13 files changed, 4360 insertions, 0 deletions
diff --git a/lib/Lintian/Data/Authority/DebconfSpecification.pm b/lib/Lintian/Data/Authority/DebconfSpecification.pm
new file mode 100644
index 0000000..661d11e
--- /dev/null
+++ b/lib/Lintian/Data/Authority/DebconfSpecification.pm
@@ -0,0 +1,328 @@
+# -*- perl -*-
+#
+# Copyright (C) 1998 Christian Schwarz and Richard Braakman
+# Copyright (C) 2001 Colin Watson
+# Copyright (C) 2008 Jorda Polo
+# Copyright (C) 2009 Russ Allbery
+# Copyright (C) 2017-2019 Chris Lamb <lamby@debian.org>
+# Copyright (C) 2020-2021 Felix Lechner
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program.  If not, see <http://www.gnu.org/licenses/>.
+
+package Lintian::Data::Authority::DebconfSpecification;
+
+use v5.20;
+use warnings;
+use utf8;
+
+use Carp qw(croak);
+use Const::Fast;
+use File::Basename qw(dirname);
+use Path::Tiny;
+use Unicode::UTF8 qw(encode_utf8);
+use WWW::Mechanize ();
+
+use Lintian::Output::Markdown qw(markdown_authority);
+
+const my $EMPTY => q{};
+const my $SPACE => q{ };
+const my $SLASH => q{/};
+const my $COLON => q{:};
+const my $UNDERSCORE => q{_};
+const my $LEFT_PARENTHESIS => q{(};
+const my $RIGHT_PARENTHESIS => q{)};
+
+const my $TWO_PARTS => 2;
+
+const my $VOLUME_KEY => $UNDERSCORE;
+const my $SEPARATOR => $COLON x 2;
+
+use Moo;
+use namespace::clean;
+
+with 'Lintian::Data::JoinedLines';
+
+=head1 NAME
+
+Lintian::Data::Authority::DebconfSpecification - Lintian interface for manual references
+
+=head1 SYNOPSIS
+
+    use Lintian::Data::Authority::DebconfSpecification;
+
+=head1 DESCRIPTION
+
+Lintian::Data::Authority::DebconfSpecification provides a way to load data files for
+manual references.
+
+=head1 CLASS METHODS
+
+=over 4
+
+=item title
+
+=item shorthand
+
+=item location
+
+=item separator
+
+=cut
+
+has title => (
+    is => 'rw',
+    default => 'Debconf Specification'
+);
+
+has shorthand => (
+    is => 'rw',
+    default => 'debconf-specification'
+);
+
+has location => (
+    is => 'rw',
+    lazy => 1,
+    default => sub {
+        my ($self) = @_;
+
+        return 'authority/' . $self->shorthand;
+    }
+);
+
+has separator => (
+    is => 'rw',
+    default => sub { qr/::/ }
+);
+
+=item consumer
+
+=cut
+
+sub consumer {
+    my ($self, $key, $remainder, $previous) = @_;
+
+    return undef
+      if defined $previous;
+
+    my ($title, $url)= split($self->separator, $remainder, $TWO_PARTS);
+
+    my %entry;
+    $entry{title} = $title;
+    $entry{url} = $url;
+
+    return \%entry;
+}
+
+=item markdown_citation
+
+=cut
+
+sub markdown_citation {
+    my ($self, $section_key) = @_;
+
+    croak "Invalid section $section_key"
+      if $section_key eq $VOLUME_KEY;
+
+    my $volume_entry = $self->value($VOLUME_KEY);
+
+    # start with the citation to the overall manual.
+    my $volume_title = $volume_entry->{title};
+    my $volume_url   = $volume_entry->{url};
+
+    my $section_title;
+    my $section_url;
+
+    if ($self->recognizes($section_key)) {
+
+        my $section_entry = $self->value($section_key);
+
+        $section_title = $section_entry->{title};
+        $section_url   = $section_entry->{url};
+    }
+
+    return markdown_authority(
+        $volume_title, $volume_url,$section_key,
+        $section_title, $section_url
+    );
+}
+
+=item write_line
+
+=cut
+
+sub write_line {
+    my ($data_fd, $section_key, $section_title, $destination) = @_;
+
+    # drop final dots
+    $section_key =~ s{ [.]+ $}{}x;
+
+    # reduce consecutive whitespace
+    $section_title =~ s{ \s+ }{ }gx;
+
+    my $line= join($SEPARATOR,$section_key, $section_title, $destination);
+
+    say {$data_fd} encode_utf8($line);
+
+    return;
+}
+
+=item write_data_file
+
+=cut
+
+sub write_data_file {
+    my ($self, $basedir, $generated) = @_;
+
+    my $header =<<"HEADER";
+# Data about titles, sections, and URLs of manuals, used to expand references
+# in tag descriptions and add links for HTML output.  Each line of this file
+# has three fields separated by double colons:
+#
+#     <section> :: <title> :: <url>
+#
+# If <section> is an underscore, that line specifies the title and URL for the
+# whole manual.
+
+HEADER
+
+    my $data_path = "$basedir/" . $self->location;
+    my $parent_dir = path($data_path)->parent->stringify;
+    path($parent_dir)->mkpath
+      unless -e $parent_dir;
+
+    my $output = encode_utf8($header) . $generated;
+    path($data_path)->spew($output);
+
+    return;
+}
+
+=item extract_sections_from_links
+
+=cut
+
+sub extract_sections_from_links {
+    my ($self, $data_fd, $base_url, $page_name)= @_;
+
+    my $page_url = $base_url . $page_name;
+
+    my $mechanize = WWW::Mechanize->new();
+    $mechanize->get($page_url);
+
+    my $page_title = $mechanize->title;
+
+    # strip explanatory remark
+    $page_title =~ s{ \s* \N{EM DASH} .* $}{}x;
+
+    # underscore is a token for the whole page
+    write_line($data_fd, $VOLUME_KEY, $page_title, $page_url);
+
+    my %by_section_key;
+    my $in_appendix = 0;
+
+    # https://stackoverflow.com/a/254687
+    for my $link ($mechanize->links) {
+
+        next
+          unless length $link->text;
+
+        next
+          if $link->text !~ qr{^ \s* ([.\d]+) \s+ (.+) $}x;
+
+        my $section_key = $1;
+        my $section_title = $2;
+
+        # drop final dots
+        $section_key =~ s{ [.]+ $}{}x;
+
+        # reduce consecutive whitespace
+        $section_title =~ s{ \s+ }{ }gx;
+
+        my $relative_destination = $link->url;
+
+        my $destination_base = $page_url;
+        $destination_base = dirname($page_url) . $SLASH
+          unless $destination_base =~ m{ / $}x
+          || $relative_destination =~ m{^ [#] }x;
+
+        my $full_destination = $destination_base . $relative_destination;
+
+        next
+          if exists $by_section_key{$section_key}
+          && ( $by_section_key{$section_key}{title} eq $section_title
+            || $by_section_key{$section_key}{destination} eq$full_destination);
+
+        # Some manuals reuse section numbers for different references,
+        # e.g. the Debian Policy's normal and appendix sections are
+        # numbers that clash with each other. Track if we've already
+        # seen a section pointing to some other URL than the current one,
+        # and prepend it with an indicator
+        $in_appendix = 1
+          if exists $by_section_key{$section_key}
+          && $by_section_key{$section_key}{destination} ne$full_destination;
+
+        $section_key = "appendix-$section_key"
+          if $in_appendix;
+
+        $by_section_key{$section_key}{title} = $section_title;
+        $by_section_key{$section_key}{destination} = $full_destination;
+
+        write_line($data_fd, $section_key, $section_title, $full_destination);
+    }
+
+    return;
+}
+
+=item refresh
+
+=cut
+
+sub refresh {
+    my ($self, $archive, $basedir) = @_;
+
+    # single page
+    my $base_url = 'https://www.debian.org/doc/packaging-manuals/';
+    my $index_name = 'debconf_specification.html';
+
+    my $generated;
+    open(my $memory_fd, '>', \$generated)
+      or die encode_utf8('Cannot open scalar');
+
+    $self->extract_sections_from_links($memory_fd, $base_url, $index_name);
+
+    close $memory_fd;
+
+    $self->write_data_file($basedir, $generated);
+
+    return;
+}
+
+=back
+
+=head1 AUTHOR
+
+Originally written by Felix Lechner <felix.lechner@lease-up.com> for Lintian.
+
+=head1 SEE ALSO
+
+lintian(1)
+
+=cut
+
+1;
+
+# Local Variables:
+# indent-tabs-mode: nil
+# cperl-indent-level: 4
+# End:
+# vim: syntax=perl sw=4 sts=4 sr et
diff --git a/lib/Lintian/Data/Authority/DebianPolicy.pm b/lib/Lintian/Data/Authority/DebianPolicy.pm
new file mode 100644
index 0000000..177b07d
--- /dev/null
+++ b/lib/Lintian/Data/Authority/DebianPolicy.pm
@@ -0,0 +1,321 @@
+# -*- perl -*-
+#
+# Copyright (C) 1998 Christian Schwarz and Richard Braakman
+# Copyright (C) 2001 Colin Watson
+# Copyright (C) 2008 Jorda Polo
+# Copyright (C) 2009 Russ Allbery
+# Copyright (C) 2017-2019 Chris Lamb <lamby@debian.org>
+# Copyright (C) 2020-2021 Felix Lechner
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program.  If not, see <http://www.gnu.org/licenses/>.
+
+package Lintian::Data::Authority::DebianPolicy;
+
+use v5.20;
+use warnings;
+use utf8;
+
+use Carp qw(croak);
+use Const::Fast;
+use Path::Tiny;
+use Unicode::UTF8 qw(encode_utf8);
+use WWW::Mechanize ();
+
+use Lintian::Output::Markdown qw(markdown_authority);
+
+const my $EMPTY => q{};
+const my $SPACE => q{ };
+const my $SLASH => q{/};
+const my $COLON => q{:};
+const my $UNDERSCORE => q{_};
+const my $LEFT_PARENTHESIS => q{(};
+const my $RIGHT_PARENTHESIS => q{)};
+
+const my $TWO_PARTS => 2;
+
+const my $VOLUME_KEY => $UNDERSCORE;
+const my $SEPARATOR => $COLON x 2;
+
+use Moo;
+use namespace::clean;
+
+with 'Lintian::Data::JoinedLines';
+
+=head1 NAME
+
+Lintian::Data::Authority::DebianPolicy - Lintian interface for manual references
+
+=head1 SYNOPSIS
+
+    use Lintian::Data::Authority::DebianPolicy;
+
+=head1 DESCRIPTION
+
+Lintian::Data::Authority::DebianPolicy provides a way to load data files for
+manual references.
+
+=head1 CLASS METHODS
+
+=over 4
+
+=item title
+
+=item shorthand
+
+=item location
+
+=item separator
+
+=cut
+
+has title => (
+    is => 'rw',
+    default => 'Debian Policy'
+);
+
+has shorthand => (
+    is => 'rw',
+    default => 'debian-policy'
+);
+
+has location => (
+    is => 'rw',
+    lazy => 1,
+    default => sub {
+        my ($self) = @_;
+
+        return 'authority/' . $self->shorthand;
+    }
+);
+
+has separator => (
+    is => 'rw',
+    default => sub { qr/::/ }
+);
+
+=item consumer
+
+=cut
+
+sub consumer {
+    my ($self, $key, $remainder, $previous) = @_;
+
+    return undef
+      if defined $previous;
+
+    my ($title, $url)= split($self->separator, $remainder, $TWO_PARTS);
+
+    my %entry;
+    $entry{title} = $title;
+    $entry{url} = $url;
+
+    return \%entry;
+}
+
+=item markdown_citation
+
+=cut
+
+sub markdown_citation {
+    my ($self, $section_key) = @_;
+
+    croak "Invalid section $section_key"
+      if $section_key eq $VOLUME_KEY;
+
+    my $volume_entry = $self->value($VOLUME_KEY);
+
+    # start with the citation to the overall manual.
+    my $volume_title = $volume_entry->{title};
+    my $volume_url   = $volume_entry->{url};
+
+    my $section_title;
+    my $section_url;
+
+    if ($self->recognizes($section_key)) {
+
+        my $section_entry = $self->value($section_key);
+
+        $section_title = $section_entry->{title};
+        $section_url   = $section_entry->{url};
+    }
+
+    return markdown_authority(
+        $volume_title, $volume_url,$section_key,
+        $section_title, $section_url
+    );
+}
+
+=item write_line
+
+=cut
+
+sub write_line {
+    my ($data_fd, $section_key, $section_title, $destination) = @_;
+
+    # drop final dots
+    $section_key =~ s{ [.]+ $}{}x;
+
+    # reduce consecutive whitespace
+    $section_title =~ s{ \s+ }{ }gx;
+
+    my $line= join($SEPARATOR,$section_key, $section_title, $destination);
+
+    say {$data_fd} encode_utf8($line);
+
+    return;
+}
+
+=item write_data_file
+
+=cut
+
+sub write_data_file {
+    my ($self, $basedir, $generated) = @_;
+
+    my $header =<<"HEADER";
+# Data about titles, sections, and URLs of manuals, used to expand references
+# in tag descriptions and add links for HTML output.  Each line of this file
+# has three fields separated by double colons:
+#
+#     <section> :: <title> :: <url>
+#
+# If <section> is an underscore, that line specifies the title and URL for the
+# whole manual.
+
+HEADER
+
+    my $data_path = "$basedir/" . $self->location;
+    my $parent_dir = path($data_path)->parent->stringify;
+    path($parent_dir)->mkpath
+      unless -e $parent_dir;
+
+    my $output = encode_utf8($header) . $generated;
+    path($data_path)->spew($output);
+
+    return;
+}
+
+=item extract_sections_from_links
+
+=cut
+
+sub extract_sections_from_links {
+    my ($self, $data_fd, $base_url)= @_;
+
+    my $mechanize = WWW::Mechanize->new();
+    $mechanize->get($base_url);
+
+    my $page_title = $mechanize->title;
+
+    # strip explanatory remark
+    $page_title =~ s{ \s* \N{EM DASH} .* $}{}x;
+
+    # underscore is a token for the whole page
+    write_line($data_fd, $VOLUME_KEY, $page_title, $base_url);
+
+    my %by_section_key;
+    my $in_appendix = 0;
+
+    # https://stackoverflow.com/a/254687
+    for my $link ($mechanize->links) {
+
+        next
+          unless length $link->text;
+
+        next
+          if $link->text !~ qr{^ \s* ([.\d]+) \s+ (.+) $}x;
+
+        my $section_key = $1;
+        my $section_title = $2;
+
+        # drop final dots
+        $section_key =~ s{ [.]+ $}{}x;
+
+        # reduce consecutive whitespace
+        $section_title =~ s{ \s+ }{ }gx;
+
+        my $destination = $base_url . $link->url;
+
+        next
+          if exists $by_section_key{$section_key}
+          && ( $by_section_key{$section_key}{title} eq $section_title
+            || $by_section_key{$section_key}{destination} eq $destination);
+
+        # Some manuals reuse section numbers for different references,
+        # e.g. the Debian Policy's normal and appendix sections are
+        # numbers that clash with each other. Track if we've already
+        # seen a section pointing to some other URL than the current one,
+        # and prepend it with an indicator
+        $in_appendix = 1
+          if exists $by_section_key{$section_key}
+          && $by_section_key{$section_key}{destination} ne $destination;
+
+        $section_key = "appendix-$section_key"
+          if $in_appendix;
+
+        # do not collect the upgrading checklists in appendix 10 of policy
+        # the numbering changes all the time
+        next
+          if $section_key =~ m{^ appendix-10 [.] }x;
+
+        $by_section_key{$section_key}{title} = $section_title;
+        $by_section_key{$section_key}{destination} = $destination;
+
+        write_line($data_fd, $section_key, $section_title, $destination);
+    }
+
+    return;
+}
+
+=item refresh
+
+=cut
+
+sub refresh {
+    my ($self, $archive, $basedir) = @_;
+
+    my $base_url = 'https://www.debian.org/doc/debian-policy/';
+
+    my $generated;
+    open(my $memory_fd, '>', \$generated)
+      or die encode_utf8('Cannot open scalar');
+
+    $self->extract_sections_from_links($memory_fd, $base_url);
+
+    close $memory_fd;
+
+    $self->write_data_file($basedir, $generated);
+
+    return;
+}
+
+=back
+
+=head1 AUTHOR
+
+Originally written by Felix Lechner <felix.lechner@lease-up.com> for Lintian.
+
+=head1 SEE ALSO
+
+lintian(1)
+
+=cut
+
+1;
+
+# Local Variables:
+# indent-tabs-mode: nil
+# cperl-indent-level: 4
+# End:
+# vim: syntax=perl sw=4 sts=4 sr et
diff --git a/lib/Lintian/Data/Authority/DeveloperReference.pm b/lib/Lintian/Data/Authority/DeveloperReference.pm
new file mode 100644
index 0000000..676cbf4
--- /dev/null
+++ b/lib/Lintian/Data/Authority/DeveloperReference.pm
@@ -0,0 +1,319 @@
+# -*- perl -*-
+#
+# Copyright (C) 1998 Christian Schwarz and Richard Braakman
+# Copyright (C) 2001 Colin Watson
+# Copyright (C) 2008 Jorda Polo
+# Copyright (C) 2009 Russ Allbery
+# Copyright (C) 2017-2019 Chris Lamb <lamby@debian.org>
+# Copyright (C) 2020-2021 Felix Lechner
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program.  If not, see <http://www.gnu.org/licenses/>.
+
+package Lintian::Data::Authority::DeveloperReference;
+
+use v5.20;
+use warnings;
+use utf8;
+
+use Carp qw(croak);
+use Const::Fast;
+use Path::Tiny;
+use Unicode::UTF8 qw(encode_utf8);
+use WWW::Mechanize ();
+
+use Lintian::Output::Markdown qw(markdown_authority);
+
+const my $EMPTY => q{};
+const my $SPACE => q{ };
+const my $SLASH => q{/};
+const my $COLON => q{:};
+const my $UNDERSCORE => q{_};
+const my $LEFT_PARENTHESIS => q{(};
+const my $RIGHT_PARENTHESIS => q{)};
+
+const my $TWO_PARTS => 2;
+
+const my $VOLUME_KEY => $UNDERSCORE;
+const my $SEPARATOR => $COLON x 2;
+
+use Moo;
+use namespace::clean;
+
+with 'Lintian::Data::JoinedLines';
+
+=head1 NAME
+
+Lintian::Data::Authority::DeveloperReference - Lintian interface for manual references
+
+=head1 SYNOPSIS
+
+    use Lintian::Data::Authority::DeveloperReference;
+
+=head1 DESCRIPTION
+
+Lintian::Data::Authority::DeveloperReference provides a way to load data files for
+manual references.
+
+=head1 CLASS METHODS
+
+=over 4
+
+=item title
+
+=item shorthand
+
+=item location
+
+=item separator
+
+=cut
+
+has title => (
+    is => 'rw',
+    default => q{Developer's Reference}
+);
+
+has shorthand => (
+    is => 'rw',
+    default => 'developer-reference'
+);
+
+has location => (
+    is => 'rw',
+    lazy => 1,
+    default => sub {
+        my ($self) = @_;
+
+        return 'authority/' . $self->shorthand;
+    }
+);
+
+has separator => (
+    is => 'rw',
+    default => sub { qr/::/ }
+);
+
+=item consumer
+
+=cut
+
+sub consumer {
+    my ($self, $key, $remainder, $previous) = @_;
+
+    return undef
+      if defined $previous;
+
+    my ($title, $url)= split($self->separator, $remainder, $TWO_PARTS);
+
+    my %entry;
+    $entry{title} = $title;
+    $entry{url} = $url;
+
+    return \%entry;
+}
+
+=item markdown_citation
+
+=cut
+
+sub markdown_citation {
+    my ($self, $section_key) = @_;
+
+    croak "Invalid section $section_key"
+      if $section_key eq $VOLUME_KEY;
+
+    my $volume_entry = $self->value($VOLUME_KEY);
+
+    # start with the citation to the overall manual.
+    my $volume_title = $volume_entry->{title};
+    my $volume_url   = $volume_entry->{url};
+
+    my $section_title;
+    my $section_url;
+
+    if ($self->recognizes($section_key)) {
+
+        my $section_entry = $self->value($section_key);
+
+        $section_title = $section_entry->{title};
+        $section_url   = $section_entry->{url};
+    }
+
+    return markdown_authority(
+        $volume_title, $volume_url,$section_key,
+        $section_title, $section_url
+    );
+}
+
+=item write_line
+
+=cut
+
+sub write_line {
+    my ($data_fd, $section_key, $section_title, $destination) = @_;
+
+    # drop final dots
+    $section_key =~ s{ [.]+ $}{}x;
+
+    # reduce consecutive whitespace
+    $section_title =~ s{ \s+ }{ }gx;
+
+    my $line= join($SEPARATOR,$section_key, $section_title, $destination);
+
+    say {$data_fd} encode_utf8($line);
+
+    return;
+}
+
+=item write_data_file
+
+=cut
+
+sub write_data_file {
+    my ($self, $basedir, $generated) = @_;
+
+    my $header =<<"HEADER";
+# Data about titles, sections, and URLs of manuals, used to expand references
+# in tag descriptions and add links for HTML output.  Each line of this file
+# has three fields separated by double colons:
+#
+#     <section> :: <title> :: <url>
+#
+# If <section> is an underscore, that line specifies the title and URL for the
+# whole manual.
+
+HEADER
+
+    my $data_path = "$basedir/" . $self->location;
+    my $parent_dir = path($data_path)->parent->stringify;
+    path($parent_dir)->mkpath
+      unless -e $parent_dir;
+
+    my $output = encode_utf8($header) . $generated;
+    path($data_path)->spew($output);
+
+    return;
+}
+
+=item extract_sections_from_links
+
+=cut
+
+sub extract_sections_from_links {
+    my ($self, $data_fd, $base_url)= @_;
+
+    my $mechanize = WWW::Mechanize->new();
+    $mechanize->get($base_url);
+
+    my $page_title = $mechanize->title;
+
+    # strip explanatory remark
+    $page_title =~ s{ \s* \N{EM DASH} .* $}{}x;
+
+    # underscore is a token for the whole page
+    write_line($data_fd, $VOLUME_KEY, $page_title, $base_url);
+
+    my %by_section_key;
+    my $in_appendix = 0;
+
+    # https://stackoverflow.com/a/254687
+    for my $link ($mechanize->links) {
+
+        next
+          unless length $link->text;
+
+        next
+          if $link->text !~ qr{^ \s* ([.\d]+) \s+ (.+) $}x;
+
+        my $section_key = $1;
+        my $section_title = $2;
+
+        # drop final dots
+        $section_key =~ s{ [.]+ $}{}x;
+
+        # reduce consecutive whitespace
+        $section_title =~ s{ \s+ }{ }gx;
+
+        my $destination = $base_url . $link->url;
+
+        # developers reference likes to return locale specific pages
+        $destination =~ s{ [.]\w{2}[.]html }{.html}x;
+
+        next
+          if exists $by_section_key{$section_key}
+          && ( $by_section_key{$section_key}{title} eq $section_title
+            || $by_section_key{$section_key}{destination} eq $destination);
+
+        # Some manuals reuse section numbers for different references,
+        # e.g. the Debian Policy's normal and appendix sections are
+        # numbers that clash with each other. Track if we've already
+        # seen a section pointing to some other URL than the current one,
+        # and prepend it with an indicator
+        $in_appendix = 1
+          if exists $by_section_key{$section_key}
+          && $by_section_key{$section_key}{destination} ne $destination;
+
+        $section_key = "appendix-$section_key"
+          if $in_appendix;
+
+        $by_section_key{$section_key}{title} = $section_title;
+        $by_section_key{$section_key}{destination} = $destination;
+
+        write_line($data_fd, $section_key, $section_title, $destination);
+    }
+
+    return;
+}
+
+=item refresh
+
+=cut
+
+sub refresh {
+    my ($self, $archive, $basedir) = @_;
+
+    my $base_url = 'https://www.debian.org/doc/developers-reference/';
+
+    my $generated;
+    open(my $memory_fd, '>', \$generated)
+      or die encode_utf8('Cannot open scalar');
+
+    $self->extract_sections_from_links($memory_fd, $base_url);
+
+    close $memory_fd;
+
+    $self->write_data_file($basedir, $generated);
+
+    return;
+}
+
+=back
+
+=head1 AUTHOR
+
+Originally written by Felix Lechner <felix.lechner@lease-up.com> for Lintian.
+
+=head1 SEE ALSO
+
+lintian(1)
+
+=cut
+
+1;
+
+# Local Variables:
+# indent-tabs-mode: nil
+# cperl-indent-level: 4
+# End:
+# vim: syntax=perl sw=4 sts=4 sr et
diff --git a/lib/Lintian/Data/Authority/DocBaseManual.pm b/lib/Lintian/Data/Authority/DocBaseManual.pm
new file mode 100644
index 0000000..53cfbcb
--- /dev/null
+++ b/lib/Lintian/Data/Authority/DocBaseManual.pm
@@ -0,0 +1,431 @@
+# -*- perl -*-
+#
+# Copyright (C) 1998 Christian Schwarz and Richard Braakman
+# Copyright (C) 2001 Colin Watson
+# Copyright (C) 2008 Jorda Polo
+# Copyright (C) 2009 Russ Allbery
+# Copyright (C) 2017-2019 Chris Lamb <lamby@debian.org>
+# Copyright (C) 2020-2021 Felix Lechner
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program.  If not, see <http://www.gnu.org/licenses/>.
+
+package Lintian::Data::Authority::DocBaseManual;
+
+use v5.20;
+use warnings;
+use utf8;
+
+use Carp qw(croak);
+use Const::Fast;
+use File::Basename qw(dirname basename);
+use IPC::Run3;
+use Path::Tiny;
+use Unicode::UTF8 qw(encode_utf8);
+use WWW::Mechanize ();
+
+use Lintian::Output::Markdown qw(markdown_authority);
+
+const my $EMPTY => q{};
+const my $SPACE => q{ };
+const my $SLASH => q{/};
+const my $COLON => q{:};
+const my $INDENT => $SPACE x 4;
+const my $UNDERSCORE => q{_};
+const my $LEFT_PARENTHESIS => q{(};
+const my $RIGHT_PARENTHESIS => q{)};
+
+const my $TWO_PARTS => 2;
+
+const my $VOLUME_KEY => $UNDERSCORE;
+const my $SEPARATOR => $COLON x 2;
+
+const my $WAIT_STATUS_SHIFT => 8;
+
+use Moo;
+use namespace::clean;
+
+with 'Lintian::Data::JoinedLines';
+
+=head1 NAME
+
+Lintian::Data::Authority::DocBaseManual - Lintian interface for manual references
+
+=head1 SYNOPSIS
+
+    use Lintian::Data::Authority::DocBaseManual;
+
+=head1 DESCRIPTION
+
+Lintian::Data::Authority::DocBaseManual provides a way to load data files for
+manual references.
+
+=head1 CLASS METHODS
+
+=over 4
+
+=item title
+
+=item shorthand
+
+=item location
+
+=item separator
+
+=cut
+
+has title => (
+    is => 'rw',
+    default => 'Doc-Base Manual'
+);
+
+has shorthand => (
+    is => 'rw',
+    default => 'doc-base-manual'
+);
+
+has location => (
+    is => 'rw',
+    lazy => 1,
+    default => sub {
+        my ($self) = @_;
+
+        return 'authority/' . $self->shorthand;
+    }
+);
+
+has separator => (
+    is => 'rw',
+    default => sub { qr/::/ }
+);
+
+=item consumer
+
+=cut
+
+sub consumer {
+    my ($self, $key, $remainder, $previous) = @_;
+
+    return undef
+      if defined $previous;
+
+    my ($title, $url)= split($self->separator, $remainder, $TWO_PARTS);
+
+    my %entry;
+    $entry{title} = $title;
+    $entry{url} = $url;
+
+    return \%entry;
+}
+
+=item markdown_citation
+
+=cut
+
+sub markdown_citation {
+    my ($self, $section_key) = @_;
+
+    croak "Invalid section $section_key"
+      if $section_key eq $VOLUME_KEY;
+
+    my $volume_entry = $self->value($VOLUME_KEY);
+
+    # start with the citation to the overall manual.
+    my $volume_title = $volume_entry->{title};
+    my $volume_url   = $volume_entry->{url};
+
+    my $section_title;
+    my $section_url;
+
+    if ($self->recognizes($section_key)) {
+
+        my $section_entry = $self->value($section_key);
+
+        $section_title = $section_entry->{title};
+        $section_url   = $section_entry->{url};
+    }
+
+    return markdown_authority(
+        $volume_title, $volume_url,$section_key,
+        $section_title, $section_url
+    );
+}
+
+=item write_line
+
+=cut
+
+sub write_line {
+    my ($data_fd, $section_key, $section_title, $destination) = @_;
+
+    # drop final dots
+    $section_key =~ s{ [.]+ $}{}x;
+
+    # reduce consecutive whitespace
+    $section_title =~ s{ \s+ }{ }gx;
+
+    my $line= join($SEPARATOR,$section_key, $section_title, $destination);
+
+    say {$data_fd} encode_utf8($line);
+
+    return;
+}
+
+=item write_data_file
+
+=cut
+
+sub write_data_file {
+    my ($self, $basedir, $generated) = @_;
+
+    my $header =<<"HEADER";
+# Data about titles, sections, and URLs of manuals, used to expand references
+# in tag descriptions and add links for HTML output.  Each line of this file
+# has three fields separated by double colons:
+#
+#     <section> :: <title> :: <url>
+#
+# If <section> is an underscore, that line specifies the title and URL for the
+# whole manual.
+
+HEADER
+
+    my $data_path = "$basedir/" . $self->location;
+    my $parent_dir = path($data_path)->parent->stringify;
+    path($parent_dir)->mkpath
+      unless -e $parent_dir;
+
+    my $output = encode_utf8($header) . $generated;
+    path($data_path)->spew($output);
+
+    return;
+}
+
+=item find_installable_name
+
+=cut
+
+sub find_installable_name {
+    my ($self, $archive, $port, $requested_path) = @_;
+
+    my @installed_by;
+
+    # find installable package
+    for my $installable_architecture ('all', $port) {
+
+        my $local_path
+          = $archive->contents_gz('sid', 'main', $installable_architecture);
+
+        open(my $fd, '<:gzip', $local_path)
+          or die encode_utf8("Cannot open $local_path.");
+
+        while (my $line = <$fd>) {
+
+            chomp $line;
+
+            my ($path, $finder) = split($SPACE, $line, 2);
+            next
+              unless length $path
+              && length $finder;
+
+            if ($path eq $requested_path) {
+
+                my $name = $1;
+
+                my @locations = split(m{,}, $finder);
+                for my $location (@locations) {
+
+                    my ($section, $installable)= split(m{/}, $location, 2);
+
+                    push(@installed_by, $installable);
+                }
+
+                next;
+            }
+        }
+
+        close $fd;
+    }
+
+    die encode_utf8(
+        "The path $requested_path is not installed by any package.")
+      if @installed_by < 1;
+
+    if (@installed_by > 1) {
+        warn encode_utf8(
+            "The path $requested_path is installed by multiple packages:\n");
+        warn encode_utf8($INDENT . "- $_\n")for @installed_by;
+    }
+
+    my $installable_name = shift @installed_by;
+
+    return $installable_name;
+}
+
+=item refresh
+
+=cut
+
+sub refresh {
+    my ($self, $archive, $basedir) = @_;
+
+    # shipped as part of the doc-base installable
+    my $shipped_base = 'usr/share/doc/doc-base/doc-base.html/';
+    my $index_name = 'index.html';
+
+    my $shipped_path = $shipped_base . $index_name;
+    my $stored_uri = "file:///$shipped_path";
+
+    # neutral sort order
+    local $ENV{LC_ALL} = 'C';
+
+    my $port = 'amd64';
+    my $installable_name
+      = $self->find_installable_name($archive, $port, $shipped_path);
+
+    my $deb822_by_installable_name
+      = $archive->deb822_packages_by_installable_name('sid', 'main', $port);
+
+    my $work_folder
+      = Path::Tiny->tempdir(TEMPLATE => 'refresh-doc-base-manual-XXXXXXXXXX');
+
+    die encode_utf8("Installable $installable_name not shipped in port $port")
+      unless exists $deb822_by_installable_name->{$installable_name};
+
+    my $deb822 = $deb822_by_installable_name->{$installable_name};
+
+    my $pool_path = $deb822->value('Filename');
+
+    my $deb_filename = basename($pool_path);
+    my $deb_local_path = "$work_folder/$deb_filename";
+    my $deb_url = $archive->mirror_base . $SLASH . $pool_path;
+
+    my $stderr;
+    run3([qw{wget --quiet}, "--output-document=$deb_local_path", $deb_url],
+        undef, \$stderr);
+    my $status = ($? >> $WAIT_STATUS_SHIFT);
+
+    # stderr already in UTF-8
+    die $stderr
+      if $status;
+
+    my $extract_folder = "$work_folder/unpacked/$pool_path";
+    path($extract_folder)->mkpath;
+
+    run3([qw{dpkg-deb --extract}, $deb_local_path, $extract_folder],
+        undef, \$stderr);
+    $status = ($? >> $WAIT_STATUS_SHIFT);
+
+    # stderr already in UTF-8
+    die $stderr
+      if $status;
+
+    unlink($deb_local_path)
+      or die encode_utf8("Cannot delete $deb_local_path");
+
+    my $generated;
+    open(my $memory_fd, '>', \$generated)
+      or die encode_utf8('Cannot open scalar');
+
+    my $mechanize = WWW::Mechanize->new();
+
+    my $fresh_uri = URI::file->new_abs("/$extract_folder/$shipped_path");
+    $mechanize->get($fresh_uri);
+
+    my $page_title = $mechanize->title;
+
+    # strip explanatory remark
+    $page_title =~ s{ \s* \N{EM DASH} .* $}{}x;
+
+    # underscore is a token for the whole page
+    write_line($memory_fd, $VOLUME_KEY, $page_title, $stored_uri);
+
+    my %by_section_key;
+    my $in_appendix = 0;
+
+    # https://stackoverflow.com/a/254687
+    for my $link ($mechanize->links) {
+
+        next
+          unless length $link->text;
+
+        next
+          if $link->text !~ qr{^ \s* ([.\d]+) \s+ (.+) $}x;
+
+        my $section_key = $1;
+        my $section_title = $2;
+
+        # drop final dots
+        $section_key =~ s{ [.]+ $}{}x;
+
+        # reduce consecutive whitespace
+        $section_title =~ s{ \s+ }{ }gx;
+
+        my $relative_destination = $link->url;
+
+        my $destination_base = $stored_uri;
+        $destination_base = dirname($stored_uri) . $SLASH
+          unless $destination_base =~ m{ / $}x
+          || $relative_destination =~ m{^ [#] }x;
+
+        my $full_destination = $destination_base . $relative_destination;
+
+        next
+          if exists $by_section_key{$section_key}
+          && ( $by_section_key{$section_key}{title} eq $section_title
+            || $by_section_key{$section_key}{destination} eq$full_destination);
+
+        # Some manuals reuse section numbers for different references,
+        # e.g. the Debian Policy's normal and appendix sections are
+        # numbers that clash with each other. Track if we've already
+        # seen a section pointing to some other URL than the current one,
+        # and prepend it with an indicator
+        $in_appendix = 1
+          if exists $by_section_key{$section_key}
+          && $by_section_key{$section_key}{destination} ne$full_destination;
+
+        $section_key = "appendix-$section_key"
+          if $in_appendix;
+
+        $by_section_key{$section_key}{title} = $section_title;
+        $by_section_key{$section_key}{destination} = $full_destination;
+
+        write_line($memory_fd, $section_key, $section_title,$full_destination);
+    }
+
+    close $memory_fd;
+
+    $self->write_data_file($basedir, $generated);
+
+    return;
+}
+
+=back
+
+=head1 AUTHOR
+
+Originally written by Felix Lechner <felix.lechner@lease-up.com> for Lintian.
+
+=head1 SEE ALSO
+
+lintian(1)
+
+=cut
+
+1;
+
+# Local Variables:
+# indent-tabs-mode: nil
+# cperl-indent-level: 4
+# End:
+# vim: syntax=perl sw=4 sts=4 sr et
diff --git a/lib/Lintian/Data/Authority/FilesystemHierarchy.pm b/lib/Lintian/Data/Authority/FilesystemHierarchy.pm
new file mode 100644
index 0000000..89fb677
--- /dev/null
+++ b/lib/Lintian/Data/Authority/FilesystemHierarchy.pm
@@ -0,0 +1,333 @@
+# -*- perl -*-
+#
+# Copyright (C) 1998 Christian Schwarz and Richard Braakman
+# Copyright (C) 2001 Colin Watson
+# Copyright (C) 2008 Jorda Polo
+# Copyright (C) 2009 Russ Allbery
+# Copyright (C) 2017-2019 Chris Lamb <lamby@debian.org>
+# Copyright (C) 2020-2021 Felix Lechner
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program.  If not, see <http://www.gnu.org/licenses/>.
+
+package Lintian::Data::Authority::FilesystemHierarchy;
+
+use v5.20;
+use warnings;
+use utf8;
+
+use Carp qw(croak);
+use Const::Fast;
+use File::Basename qw(dirname);
+use Path::Tiny;
+use Unicode::UTF8 qw(encode_utf8);
+use WWW::Mechanize ();
+
+use Lintian::Output::Markdown qw(markdown_authority);
+
+const my $EMPTY => q{};
+const my $SPACE => q{ };
+const my $SLASH => q{/};
+const my $COLON => q{:};
+const my $UNDERSCORE => q{_};
+const my $LEFT_PARENTHESIS => q{(};
+const my $RIGHT_PARENTHESIS => q{)};
+
+const my $THREE_PARTS => 3;
+
+const my $VOLUME_KEY => $UNDERSCORE;
+const my $SEPARATOR => $COLON x 2;
+
+use Moo;
+use namespace::clean;
+
+with 'Lintian::Data::JoinedLines';
+
+=head1 NAME
+
+Lintian::Data::Authority::FilesystemHierarchy - Lintian interface for manual references
+
+=head1 SYNOPSIS
+
+    use Lintian::Data::Authority::FilesystemHierarchy;
+
+=head1 DESCRIPTION
+
+Lintian::Data::Authority::FilesystemHierarchy provides a way to load data files for
+manual references.
+
+=head1 CLASS METHODS
+
+=over 4
+
+=item title
+
+=item shorthand
+
+=item location
+
+=item separator
+
+=cut
+
+has title => (
+    is => 'rw',
+    default => 'Filesystem Hierarchy Standard'
+);
+
+has shorthand => (
+    is => 'rw',
+    default => 'filesystem-hierarchy'
+);
+
+has location => (
+    is => 'rw',
+    lazy => 1,
+    default => sub {
+        my ($self) = @_;
+
+        return 'authority/' . $self->shorthand;
+    }
+);
+
+has separator => (
+    is => 'rw',
+    default => sub { qr/::/ }
+);
+
+=item consumer
+
+=cut
+
+sub consumer {
+    my ($self, $key, $remainder, $previous) = @_;
+
+    return undef
+      if defined $previous;
+
+    my ($number, $title, $url)
+      = split($self->separator, $remainder, $THREE_PARTS);
+
+    my %entry;
+    $entry{title} = $title;
+    $entry{number} = $number;
+    $entry{url} = $url;
+
+    return \%entry;
+}
+
+=item markdown_citation
+
+=cut
+
+sub markdown_citation {
+    my ($self, $section_key) = @_;
+
+    croak "Invalid section $section_key"
+      if $section_key eq $VOLUME_KEY;
+
+    my $volume_entry = $self->value($VOLUME_KEY);
+
+    # start with the citation to the overall manual.
+    my $volume_title = $volume_entry->{title};
+    my $volume_url   = $volume_entry->{url};
+
+    my $section_title;
+    my $section_number;
+    my $section_url;
+
+    if ($self->recognizes($section_key)) {
+
+        my $section_entry = $self->value($section_key);
+
+        $section_title = $section_entry->{title};
+        $section_number = $section_entry->{number};
+        $section_url = $section_entry->{url};
+    }
+
+    return markdown_authority(
+        $volume_title, $volume_url,$section_number,
+        $section_title, $section_url
+    );
+}
+
+=item write_line
+
+=cut
+
+sub write_line {
+    my ($data_fd, $section_key, $section_number, $section_title, $destination)
+      = @_;
+
+    # drop final dots
+    $section_key =~ s{ [.]+ $}{}x;
+
+    # reduce consecutive whitespace
+    $section_title =~ s{ \s+ }{ }gx;
+
+    my $line= join($SEPARATOR,
+        $section_key, $section_number, $section_title, $destination);
+
+    say {$data_fd} encode_utf8($line);
+
+    return;
+}
+
+=item write_data_file
+
+=cut
+
+sub write_data_file {
+    my ($self, $basedir, $generated) = @_;
+
+    my $header =<<"HEADER";
+# Data about titles, sections, and URLs of manuals, used to expand references
+# in tag descriptions and add links for HTML output.  Each line of this file
+# has three fields separated by double colons:
+#
+#     <section> :: <title> :: <url>
+#
+# If <section> is an underscore, that line specifies the title and URL for the
+# whole manual.
+
+HEADER
+
+    my $data_path = "$basedir/" . $self->location;
+    my $parent_dir = path($data_path)->parent->stringify;
+    path($parent_dir)->mkpath
+      unless -e $parent_dir;
+
+    my $output = encode_utf8($header) . $generated;
+    path($data_path)->spew($output);
+
+    return;
+}
+
+=item extract_sections_from_links
+
+=cut
+
+sub extract_sections_from_links {
+    my ($self, $data_fd, $base_url, $page_name)= @_;
+
+    my $page_url = $base_url . $page_name;
+
+    my $mechanize = WWW::Mechanize->new();
+    $mechanize->get($page_url);
+
+    my $page_title = $mechanize->title;
+
+    # strip explanatory remark
+    $page_title =~ s{ \s* \N{EM DASH} .* $}{}x;
+
+    # underscore is a token for the whole page
+    write_line($data_fd, $VOLUME_KEY, $EMPTY, $page_title, $page_url);
+
+    my %by_section_key;
+    my $in_appendix = 0;
+
+    # https://stackoverflow.com/a/254687
+    for my $link ($mechanize->links) {
+
+        next
+          unless length $link->url;
+
+        # make lowercase
+        my $section_key = lc($link->url);
+
+        # strip hash; it's a fragment;
+        $section_key =~ s{^ [#] }{}x;
+
+        next
+          unless length $link->text;
+
+        next
+          if $link->text !~ qr{^ \s* ([.\d]+) \s+ (.+) $}x;
+
+        my $section_number = $1;
+        my $section_title = $2;
+
+        # drop final dot
+        $section_number =~ s{ [.]+ $}{}x;
+
+        # reduce consecutive whitespace
+        $section_title =~ s{ \s+ }{ }gx;
+
+        # includes hash
+        my $relative_destination = $link->url;
+
+        my $destination_base = $page_url;
+        $destination_base = dirname($page_url) . $SLASH
+          unless $destination_base =~ m{ / $}x
+          || $relative_destination =~ m{^ [#] }x;
+
+        my $full_destination = $destination_base . $relative_destination;
+
+        next
+          if exists $by_section_key{$section_key};
+
+        $by_section_key{$section_key}{title} = $section_title;
+        $by_section_key{$section_key}{number} = $section_number;
+        $by_section_key{$section_key}{destination} = $full_destination;
+
+        write_line($data_fd, $section_key, $section_number,
+            $section_title, $full_destination);
+    }
+
+    return;
+}
+
+=item refresh
+
+=cut
+
+sub refresh {
+    my ($self, $archive, $basedir) = @_;
+
+    # single page version
+    # plain directory shows a file list
+    my $base_url = 'https://refspecs.linuxfoundation.org/FHS_3.0/';
+    my $index_name = 'fhs-3.0.html';
+
+    my $generated;
+    open(my $memory_fd, '>', \$generated)
+      or die encode_utf8('Cannot open scalar');
+
+    $self->extract_sections_from_links($memory_fd, $base_url, $index_name);
+
+    close $memory_fd;
+
+    $self->write_data_file($basedir, $generated);
+
+    return;
+}
+
+=back
+
+=head1 AUTHOR
+
+Originally written by Felix Lechner <felix.lechner@lease-up.com> for Lintian.
+
+=head1 SEE ALSO
+
+lintian(1)
+
+=cut
+
+1;
+
+# Local Variables:
+# indent-tabs-mode: nil
+# cperl-indent-level: 4
+# End:
+# vim: syntax=perl sw=4 sts=4 sr et
diff --git a/lib/Lintian/Data/Authority/JavaPolicy.pm b/lib/Lintian/Data/Authority/JavaPolicy.pm
new file mode 100644
index 0000000..eaa6704
--- /dev/null
+++ b/lib/Lintian/Data/Authority/JavaPolicy.pm
@@ -0,0 +1,290 @@
+# -*- perl -*-
+#
+# Copyright (C) 1998 Christian Schwarz and Richard Braakman
+# Copyright (C) 2001 Colin Watson
+# Copyright (C) 2008 Jorda Polo
+# Copyright (C) 2009 Russ Allbery
+# Copyright (C) 2017-2019 Chris Lamb <lamby@debian.org>
+# Copyright (C) 2020-2021 Felix Lechner
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program.  If not, see <http://www.gnu.org/licenses/>.
+
+package Lintian::Data::Authority::JavaPolicy;
+
+use v5.20;
+use warnings;
+use utf8;
+
+use Carp qw(croak);
+use Const::Fast;
+use List::SomeUtils qw(any first_value);
+use Path::Tiny;
+use Unicode::UTF8 qw(encode_utf8);
+use WWW::Mechanize ();
+
+use Lintian::Output::Markdown qw(markdown_authority);
+
+const my $SLASH => q{/};
+const my $UNDERSCORE => q{_};
+
+const my $VOLUME_KEY => $UNDERSCORE;
+const my $SECTIONS => 'sections';
+
+use Moo;
+use namespace::clean;
+
+with 'Lintian::Data::PreambledJSON';
+
+=head1 NAME
+
+Lintian::Data::Authority::JavaPolicy - Lintian interface for manual references
+
+=head1 SYNOPSIS
+
+    use Lintian::Data::Authority::JavaPolicy;
+
+=head1 DESCRIPTION
+
+Lintian::Data::Authority::JavaPolicy provides a way to load data files for
+manual references.
+
+=head1 CLASS METHODS
+
+=over 4
+
+=item title
+
+=item shorthand
+
+=item location
+
+=item by_section_key
+
+=cut
+
+has title => (
+    is => 'rw',
+    default => 'Java Policy'
+);
+
+has shorthand => (
+    is => 'rw',
+    default => 'java-policy'
+);
+
+has location => (
+    is => 'rw',
+    lazy => 1,
+    default => sub {
+        my ($self) = @_;
+
+        return 'authority/' . $self->shorthand . '.json';
+    }
+);
+
+has by_section_key => (is => 'rw', default => sub { {} });
+
+=item markdown_citation
+
+=cut
+
+sub markdown_citation {
+    my ($self, $section_key) = @_;
+
+    croak "Invalid section $section_key"
+      if $section_key eq $VOLUME_KEY;
+
+    my $volume_entry = $self->value($VOLUME_KEY);
+
+    # start with the citation to the overall manual.
+    my $volume_title = $volume_entry->{title};
+    my $volume_url   = $volume_entry->{destination};
+
+    my $section_title;
+    my $section_url;
+
+    if ($self->recognizes($section_key)) {
+
+        my $section_entry = $self->value($section_key);
+
+        $section_title = $section_entry->{title};
+        $section_url   = $section_entry->{destination};
+    }
+
+    return markdown_authority(
+        $volume_title, $volume_url,$section_key,
+        $section_title, $section_url
+    );
+}
+
+=item recognizes (KEY)
+
+Returns true if KEY is known, and false otherwise.
+
+=cut
+
+sub recognizes {
+    my ($self, $key) = @_;
+
+    return 0
+      unless length $key;
+
+    return 1
+      if exists $self->by_section_key->{$key};
+
+    return 0;
+}
+
+=item value (KEY)
+
+Returns the value attached to KEY if it was listed in the data
+file represented by this Lintian::Data instance and the undefined value
+otherwise.
+
+=cut
+
+sub value {
+    my ($self, $key) = @_;
+
+    return undef
+      unless length $key;
+
+    return $self->by_section_key->{$key};
+}
+
+=item load
+
+=cut
+
+sub load {
+    my ($self, $search_space, $our_vendor) = @_;
+
+    my @candidates = map { $_ . $SLASH . $self->location } @{$search_space};
+    my $path = first_value { -e } @candidates;
+
+    my $reference;
+
+    return 0
+      unless $self->read_file($path, \$reference);
+
+    my @sections = @{$reference // []};
+
+    for my $section (@sections) {
+
+        my $key = $section->{key};
+
+        # only store first value for duplicates
+        # silently ignore later values
+        $self->by_section_key->{$key} //= $section;
+    }
+
+    return 1;
+}
+
+=item refresh
+
+=cut
+
+sub refresh {
+    my ($self, $archive, $basedir) = @_;
+
+    my $base_url = 'https://www.debian.org/doc/packaging-manuals/java-policy/';
+
+    my $mechanize = WWW::Mechanize->new();
+    $mechanize->get($base_url);
+
+    my $page_title = $mechanize->title;
+
+    my @sections;
+
+    # underscore is a token for the whole page
+    my %volume;
+    $volume{key} = $VOLUME_KEY;
+    $volume{title} = $page_title;
+    $volume{destination} = $base_url;
+
+    # store array to resemble web layout
+    # may contain duplicates
+    push(@sections, \%volume);
+
+    my $in_appendix = 0;
+
+    # https://stackoverflow.com/a/254687
+    for my $link ($mechanize->links) {
+
+        next
+          unless length $link->text;
+
+        next
+          if $link->text !~ qr{^ \s* ([.\d]+) \s+ (.+) $}x;
+
+        my $section_key = $1;
+        my $section_title = $2;
+
+        # drop final dots
+        $section_key =~ s{ [.]+ $}{}x;
+
+        # reduce consecutive whitespace
+        $section_title =~ s{ \s+ }{ }gx;
+
+        my $destination = $base_url . $link->url;
+
+        my @similar = grep { $_->{key} eq $section_key } @sections;
+        next
+          if (any { $_->{title} eq $section_title } @similar)
+          || (any { $_->{destination} eq $destination } @similar);
+
+        # Some manuals reuse section numbers for different references,
+        # e.g. the Debian Policy's normal and appendix sections are
+        # numbers that clash with each other. Track if we've already
+        # seen a section pointing to some other URL than the current one,
+        # and prepend it with an indicator
+        $in_appendix = 1
+          if any { $_->{destination} ne $destination } @similar;
+
+        $section_key = "appendix-$section_key"
+          if $in_appendix;
+
+        my %section;
+        $section{key} = $section_key;
+        $section{title} = $section_title;
+        $section{destination} = $destination;
+        push(@sections, \%section);
+    }
+
+    my $data_path = "$basedir/" . $self->location;
+    my $status = $self->write_file($SECTIONS, \@sections, $data_path);
+
+    return $status;
+}
+
+=back
+
+=head1 AUTHOR
+
+Originally written by Felix Lechner <felix.lechner@lease-up.com> for Lintian.
+
+=head1 SEE ALSO
+
+lintian(1)
+
+=cut
+
+1;
+
+# Local Variables:
+# indent-tabs-mode: nil
+# cperl-indent-level: 4
+# End:
+# vim: syntax=perl sw=4 sts=4 sr et
diff --git a/lib/Lintian/Data/Authority/LintianManual.pm b/lib/Lintian/Data/Authority/LintianManual.pm
new file mode 100644
index 0000000..3fc7bd0
--- /dev/null
+++ b/lib/Lintian/Data/Authority/LintianManual.pm
@@ -0,0 +1,324 @@
+# -*- perl -*-
+#
+# Copyright (C) 1998 Christian Schwarz and Richard Braakman
+# Copyright (C) 2001 Colin Watson
+# Copyright (C) 2008 Jorda Polo
+# Copyright (C) 2009 Russ Allbery
+# Copyright (C) 2017-2019 Chris Lamb <lamby@debian.org>
+# Copyright (C) 2020-2021 Felix Lechner
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program.  If not, see <http://www.gnu.org/licenses/>.
+
+package Lintian::Data::Authority::LintianManual;
+
+use v5.20;
+use warnings;
+use utf8;
+
+use Carp qw(croak);
+use Const::Fast;
+use IPC::Run3;
+use Path::Tiny;
+use Unicode::UTF8 qw(encode_utf8);
+use URI::file;
+use WWW::Mechanize ();
+
+use Lintian::Output::Markdown qw(markdown_authority);
+
+const my $EMPTY => q{};
+const my $SPACE => q{ };
+const my $SLASH => q{/};
+const my $COLON => q{:};
+const my $UNDERSCORE => q{_};
+const my $LEFT_PARENTHESIS => q{(};
+const my $RIGHT_PARENTHESIS => q{)};
+
+const my $TWO_PARTS => 2;
+
+const my $VOLUME_KEY => $UNDERSCORE;
+const my $SEPARATOR => $COLON x 2;
+
+const my $WAIT_STATUS_SHIFT => 8;
+
+use Moo;
+use namespace::clean;
+
+with 'Lintian::Data::JoinedLines';
+
+=head1 NAME
+
+Lintian::Data::Authority::LintianManual - Lintian interface for manual references
+
+=head1 SYNOPSIS
+
+    use Lintian::Data::Authority::LintianManual;
+
+=head1 DESCRIPTION
+
+Lintian::Data::Authority::LintianManual provides a way to load data files for
+manual references.
+
+=head1 CLASS METHODS
+
+=over 4
+
+=item title
+
+=item shorthand
+
+=item location
+
+=item separator
+
+=cut
+
+has title => (
+    is => 'rw',
+    default => 'Lintian Manual'
+);
+
+has shorthand => (
+    is => 'rw',
+    default => 'lintian-manual'
+);
+
+has location => (
+    is => 'rw',
+    lazy => 1,
+    default => sub {
+        my ($self) = @_;
+
+        return 'authority/' . $self->shorthand;
+    }
+);
+
+has separator => (
+    is => 'rw',
+    default => sub { qr/::/ }
+);
+
+=item consumer
+
+=cut
+
+sub consumer {
+    my ($self, $key, $remainder, $previous) = @_;
+
+    return undef
+      if defined $previous;
+
+    my ($title, $url)= split($self->separator, $remainder, $TWO_PARTS);
+
+    my %entry;
+    $entry{title} = $title;
+    $entry{url} = $url;
+
+    return \%entry;
+}
+
+=item markdown_citation
+
+=cut
+
+sub markdown_citation {
+    my ($self, $section_key) = @_;
+
+    croak "Invalid section $section_key"
+      if $section_key eq $VOLUME_KEY;
+
+    my $volume_entry = $self->value($VOLUME_KEY);
+
+    # start with the citation to the overall manual.
+    my $volume_title = $volume_entry->{title};
+    my $volume_url   = $volume_entry->{url};
+
+    my $section_title;
+    my $section_url;
+
+    if ($self->recognizes($section_key)) {
+
+        my $section_entry = $self->value($section_key);
+
+        $section_title = $section_entry->{title};
+        $section_url   = $section_entry->{url};
+    }
+
+    return markdown_authority(
+        $volume_title, $volume_url,$section_key,
+        $section_title, $section_url
+    );
+}
+
+=item write_line
+
+=cut
+
+sub write_line {
+    my ($data_fd, $section_key, $section_title, $destination) = @_;
+
+    # drop final dots
+    $section_key =~ s{ [.]+ $}{}x;
+
+    # reduce consecutive whitespace
+    $section_title =~ s{ \s+ }{ }gx;
+
+    my $line= join($SEPARATOR,$section_key, $section_title, $destination);
+
+    say {$data_fd} encode_utf8($line);
+
+    return;
+}
+
+=item write_data_file
+
+=cut
+
+sub write_data_file {
+    my ($self, $basedir, $generated) = @_;
+
+    my $header =<<"HEADER";
+# Data about titles, sections, and URLs of manuals, used to expand references
+# in tag descriptions and add links for HTML output.  Each line of this file
+# has three fields separated by double colons:
+#
+#     <section> :: <title> :: <url>
+#
+# If <section> is an underscore, that line specifies the title and URL for the
+# whole manual.
+
+HEADER
+
+    my $data_path = "$basedir/" . $self->location;
+    my $parent_dir = path($data_path)->parent->stringify;
+    path($parent_dir)->mkpath
+      unless -e $parent_dir;
+
+    my $output = encode_utf8($header) . $generated;
+    path($data_path)->spew($output);
+
+    return;
+}
+
+=item refresh
+
+=cut
+
+sub refresh {
+    my ($self, $archive, $basedir) = @_;
+
+    # WWW::Mechanize will not parse page title without the suffix
+    my $temp_tiny = Path::Tiny->tempfile(
+        TEMPLATE => 'lintian-manual-XXXXXXXX',
+        SUFFIX => '.html'
+    );
+    my $local_uri = URI::file->new_abs($temp_tiny->stringify);
+
+    # for rst2html
+    local $ENV{LC_ALL} = 'en_US.UTF-8';
+
+    my $stderr;
+    run3(['rst2html', "$ENV{LINTIAN_BASE}/doc/lintian.rst"],
+        undef, $local_uri->file, \$stderr);
+    my $status = ($? >> $WAIT_STATUS_SHIFT);
+
+    # stderr already in UTF-8
+    die $stderr
+      if $status;
+
+    my $generated;
+    open(my $memory_fd, '>', \$generated)
+      or die encode_utf8("Cannot open scalar: $!");
+
+    my $page_url = 'https://lintian.debian.org/manual/index.html';
+
+    my $mechanize = WWW::Mechanize->new();
+    $mechanize->get($local_uri);
+
+    my $page_title = $mechanize->title;
+
+    # underscore is a token for the whole page
+    write_line($memory_fd, $VOLUME_KEY, $page_title, $page_url);
+
+    my %by_section_key;
+    my $in_appendix = 0;
+
+    # https://stackoverflow.com/a/254687
+    for my $link ($mechanize->links) {
+
+        next
+          unless length $link->text;
+
+        next
+          if $link->text !~ qr{^ \s* ([.\d]+) \s+ (.+) $}x;
+
+        my $section_key = $1;
+        my $section_title = $2;
+
+        # drop final dots
+        $section_key =~ s{ [.]+ $}{}x;
+
+        # reduce consecutive whitespace
+        $section_title =~ s{ \s+ }{ }gx;
+
+        my $destination = $page_url . $link->url;
+
+        next
+          if exists $by_section_key{$section_key}
+          && ( $by_section_key{$section_key}{title} eq $section_title
+            || $by_section_key{$section_key}{destination} eq $destination);
+
+        # Some manuals reuse section numbers for different references,
+        # e.g. the Debian Policy's normal and appendix sections are
+        # numbers that clash with each other. Track if we've already
+        # seen a section pointing to some other URL than the current one,
+        # and prepend it with an indicator
+        $in_appendix = 1
+          if exists $by_section_key{$section_key}
+          && $by_section_key{$section_key}{destination} ne $destination;
+
+        $section_key = "appendix-$section_key"
+          if $in_appendix;
+
+        $by_section_key{$section_key}{title} = $section_title;
+        $by_section_key{$section_key}{destination} = $destination;
+
+        write_line($memory_fd, $section_key, $section_title, $destination);
+    }
+
+    close $memory_fd;
+
+    $self->write_data_file($basedir, $generated);
+
+    return;
+}
+
+=back
+
+=head1 AUTHOR
+
+Originally written by Felix Lechner <felix.lechner@lease-up.com> for Lintian.
+
+=head1 SEE ALSO
+
+lintian(1)
+
+=cut
+
+1;
+
+# Local Variables:
+# indent-tabs-mode: nil
+# cperl-indent-level: 4
+# End:
+# vim: syntax=perl sw=4 sts=4 sr et
diff --git a/lib/Lintian/Data/Authority/MenuManual.pm b/lib/Lintian/Data/Authority/MenuManual.pm
new file mode 100644
index 0000000..c8a2878
--- /dev/null
+++ b/lib/Lintian/Data/Authority/MenuManual.pm
@@ -0,0 +1,316 @@
+# -*- perl -*-
+#
+# Copyright (C) 1998 Christian Schwarz and Richard Braakman
+# Copyright (C) 2001 Colin Watson
+# Copyright (C) 2008 Jorda Polo
+# Copyright (C) 2009 Russ Allbery
+# Copyright (C) 2017-2019 Chris Lamb <lamby@debian.org>
+# Copyright (C) 2020-2021 Felix Lechner
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program.  If not, see <http://www.gnu.org/licenses/>.
+
+package Lintian::Data::Authority::MenuManual;
+
+use v5.20;
+use warnings;
+use utf8;
+
+use Carp qw(croak);
+use Const::Fast;
+use Path::Tiny;
+use Unicode::UTF8 qw(encode_utf8);
+use WWW::Mechanize ();
+
+use Lintian::Output::Markdown qw(markdown_authority);
+
+const my $EMPTY => q{};
+const my $SPACE => q{ };
+const my $SLASH => q{/};
+const my $COLON => q{:};
+const my $UNDERSCORE => q{_};
+const my $LEFT_PARENTHESIS => q{(};
+const my $RIGHT_PARENTHESIS => q{)};
+
+const my $TWO_PARTS => 2;
+
+const my $VOLUME_KEY => $UNDERSCORE;
+const my $SEPARATOR => $COLON x 2;
+
+use Moo;
+use namespace::clean;
+
+with 'Lintian::Data::JoinedLines';
+
+=head1 NAME
+
+Lintian::Data::Authority::MenuManual - Lintian interface for manual references
+
+=head1 SYNOPSIS
+
+    use Lintian::Data::Authority::MenuManual;
+
+=head1 DESCRIPTION
+
+Lintian::Data::Authority::MenuManual provides a way to load data files for
+manual references.
+
+=head1 CLASS METHODS
+
+=over 4
+
+=item title
+
+=item shorthand
+
+=item location
+
+=item separator
+
+=cut
+
+has title => (
+    is => 'rw',
+    default => 'Menu Manual'
+);
+
+has shorthand => (
+    is => 'rw',
+    default => 'menu-manual'
+);
+
+has location => (
+    is => 'rw',
+    lazy => 1,
+    default => sub {
+        my ($self) = @_;
+
+        return 'authority/' . $self->shorthand;
+    }
+);
+
+has separator => (
+    is => 'rw',
+    default => sub { qr/::/ }
+);
+
+=item consumer
+
+=cut
+
+sub consumer {
+    my ($self, $key, $remainder, $previous) = @_;
+
+    return undef
+      if defined $previous;
+
+    my ($title, $url)= split($self->separator, $remainder, $TWO_PARTS);
+
+    my %entry;
+    $entry{title} = $title;
+    $entry{url} = $url;
+
+    return \%entry;
+}
+
+=item markdown_citation
+
+=cut
+
+sub markdown_citation {
+    my ($self, $section_key) = @_;
+
+    croak "Invalid section $section_key"
+      if $section_key eq $VOLUME_KEY;
+
+    my $volume_entry = $self->value($VOLUME_KEY);
+
+    # start with the citation to the overall manual.
+    my $volume_title = $volume_entry->{title};
+    my $volume_url   = $volume_entry->{url};
+
+    my $section_title;
+    my $section_url;
+
+    if ($self->recognizes($section_key)) {
+
+        my $section_entry = $self->value($section_key);
+
+        $section_title = $section_entry->{title};
+        $section_url   = $section_entry->{url};
+    }
+
+    return markdown_authority(
+        $volume_title, $volume_url,$section_key,
+        $section_title, $section_url
+    );
+}
+
+=item write_line
+
+=cut
+
+sub write_line {
+    my ($data_fd, $section_key, $section_title, $destination) = @_;
+
+    # drop final dots
+    $section_key =~ s{ [.]+ $}{}x;
+
+    # reduce consecutive whitespace
+    $section_title =~ s{ \s+ }{ }gx;
+
+    my $line= join($SEPARATOR,$section_key, $section_title, $destination);
+
+    say {$data_fd} encode_utf8($line);
+
+    return;
+}
+
+=item write_data_file
+
+=cut
+
+sub write_data_file {
+    my ($self, $basedir, $generated) = @_;
+
+    my $header =<<"HEADER";
+# Data about titles, sections, and URLs of manuals, used to expand references
+# in tag descriptions and add links for HTML output.  Each line of this file
+# has three fields separated by double colons:
+#
+#     <section> :: <title> :: <url>
+#
+# If <section> is an underscore, that line specifies the title and URL for the
+# whole manual.
+
+HEADER
+
+    my $data_path = "$basedir/" . $self->location;
+    my $parent_dir = path($data_path)->parent->stringify;
+    path($parent_dir)->mkpath
+      unless -e $parent_dir;
+
+    my $output = encode_utf8($header) . $generated;
+    path($data_path)->spew($output);
+
+    return;
+}
+
+=item extract_sections_from_links
+
+=cut
+
+sub extract_sections_from_links {
+    my ($self, $data_fd, $base_url)= @_;
+
+    my $mechanize = WWW::Mechanize->new();
+    $mechanize->get($base_url);
+
+    my $page_title = $mechanize->title;
+
+    # strip explanatory remark
+    $page_title =~ s{ \s* \N{EM DASH} .* $}{}x;
+
+    # underscore is a token for the whole page
+    write_line($data_fd, $VOLUME_KEY, $page_title, $base_url);
+
+    my %by_section_key;
+    my $in_appendix = 0;
+
+    # https://stackoverflow.com/a/254687
+    for my $link ($mechanize->links) {
+
+        next
+          unless length $link->text;
+
+        next
+          if $link->text !~ qr{^ \s* ([.\d]+) \s+ (.+) $}x;
+
+        my $section_key = $1;
+        my $section_title = $2;
+
+        # drop final dots
+        $section_key =~ s{ [.]+ $}{}x;
+
+        # reduce consecutive whitespace
+        $section_title =~ s{ \s+ }{ }gx;
+
+        my $destination = $base_url . $link->url;
+
+        next
+          if exists $by_section_key{$section_key}
+          && ( $by_section_key{$section_key}{title} eq $section_title
+            || $by_section_key{$section_key}{destination} eq $destination);
+
+        # Some manuals reuse section numbers for different references,
+        # e.g. the Debian Policy's normal and appendix sections are
+        # numbers that clash with each other. Track if we've already
+        # seen a section pointing to some other URL than the current one,
+        # and prepend it with an indicator
+        $in_appendix = 1
+          if exists $by_section_key{$section_key}
+          && $by_section_key{$section_key}{destination} ne $destination;
+
+        $section_key = "appendix-$section_key"
+          if $in_appendix;
+
+        $by_section_key{$section_key}{title} = $section_title;
+        $by_section_key{$section_key}{destination} = $destination;
+
+        write_line($data_fd, $section_key, $section_title, $destination);
+    }
+
+    return;
+}
+
+=item refresh
+
+=cut
+
+sub refresh {
+    my ($self, $archive, $basedir) = @_;
+
+    my $base_url = 'https://www.debian.org/doc/packaging-manuals/menu.html/';
+
+    my $generated;
+    open(my $memory_fd, '>', \$generated)
+      or die encode_utf8('Cannot open scalar');
+
+    $self->extract_sections_from_links($memory_fd, $base_url);
+
+    close $memory_fd;
+
+    $self->write_data_file($basedir, $generated);
+
+    return;
+}
+
+=back
+
+=head1 AUTHOR
+
+Originally written by Felix Lechner <felix.lechner@lease-up.com> for Lintian.
+
+=head1 SEE ALSO
+
+lintian(1)
+
+=cut
+
+1;
+
+# Local Variables:
+# indent-tabs-mode: nil
+# cperl-indent-level: 4
+# End:
+# vim: syntax=perl sw=4 sts=4 sr et
diff --git a/lib/Lintian/Data/Authority/MenuPolicy.pm b/lib/Lintian/Data/Authority/MenuPolicy.pm
new file mode 100644
index 0000000..e0f710a
--- /dev/null
+++ b/lib/Lintian/Data/Authority/MenuPolicy.pm
@@ -0,0 +1,316 @@
+# -*- perl -*-
+#
+# Copyright (C) 1998 Christian Schwarz and Richard Braakman
+# Copyright (C) 2001 Colin Watson
+# Copyright (C) 2008 Jorda Polo
+# Copyright (C) 2009 Russ Allbery
+# Copyright (C) 2017-2019 Chris Lamb <lamby@debian.org>
+# Copyright (C) 2020-2021 Felix Lechner
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program.  If not, see <http://www.gnu.org/licenses/>.
+
+package Lintian::Data::Authority::MenuPolicy;
+
+use v5.20;
+use warnings;
+use utf8;
+
+use Carp qw(croak);
+use Const::Fast;
+use Path::Tiny;
+use Unicode::UTF8 qw(encode_utf8);
+use WWW::Mechanize ();
+
+use Lintian::Output::Markdown qw(markdown_authority);
+
+const my $EMPTY => q{};
+const my $SPACE => q{ };
+const my $SLASH => q{/};
+const my $COLON => q{:};
+const my $UNDERSCORE => q{_};
+const my $LEFT_PARENTHESIS => q{(};
+const my $RIGHT_PARENTHESIS => q{)};
+
+const my $TWO_PARTS => 2;
+
+const my $VOLUME_KEY => $UNDERSCORE;
+const my $SEPARATOR => $COLON x 2;
+
+use Moo;
+use namespace::clean;
+
+with 'Lintian::Data::JoinedLines';
+
+=head1 NAME
+
+Lintian::Data::Authority::MenuPolicy - Lintian interface for manual references
+
+=head1 SYNOPSIS
+
+    use Lintian::Data::Authority::MenuPolicy;
+
+=head1 DESCRIPTION
+
+Lintian::Data::Authority::MenuPolicy provides a way to load data files for
+manual references.
+
+=head1 CLASS METHODS
+
+=over 4
+
+=item title
+
+=item shorthand
+
+=item location
+
+=item separator
+
+=cut
+
+has title => (
+    is => 'rw',
+    default => 'Menu Policy'
+);
+
+has shorthand => (
+    is => 'rw',
+    default => 'menu-policy'
+);
+
+has location => (
+    is => 'rw',
+    lazy => 1,
+    default => sub {
+        my ($self) = @_;
+
+        return 'authority/' . $self->shorthand;
+    }
+);
+
+has separator => (
+    is => 'rw',
+    default => sub { qr/::/ }
+);
+
+=item consumer
+
+=cut
+
+sub consumer {
+    my ($self, $key, $remainder, $previous) = @_;
+
+    return undef
+      if defined $previous;
+
+    my ($title, $url)= split($self->separator, $remainder, $TWO_PARTS);
+
+    my %entry;
+    $entry{title} = $title;
+    $entry{url} = $url;
+
+    return \%entry;
+}
+
+=item markdown_citation
+
+=cut
+
+sub markdown_citation {
+    my ($self, $section_key) = @_;
+
+    croak "Invalid section $section_key"
+      if $section_key eq $VOLUME_KEY;
+
+    my $volume_entry = $self->value($VOLUME_KEY);
+
+    # start with the citation to the overall manual.
+    my $volume_title = $volume_entry->{title};
+    my $volume_url   = $volume_entry->{url};
+
+    my $section_title;
+    my $section_url;
+
+    if ($self->recognizes($section_key)) {
+
+        my $section_entry = $self->value($section_key);
+
+        $section_title = $section_entry->{title};
+        $section_url   = $section_entry->{url};
+    }
+
+    return markdown_authority(
+        $volume_title, $volume_url,$section_key,
+        $section_title, $section_url
+    );
+}
+
+=item write_line
+
+=cut
+
+sub write_line {
+    my ($data_fd, $section_key, $section_title, $destination) = @_;
+
+    # drop final dots
+    $section_key =~ s{ [.]+ $}{}x;
+
+    # reduce consecutive whitespace
+    $section_title =~ s{ \s+ }{ }gx;
+
+    my $line= join($SEPARATOR,$section_key, $section_title, $destination);
+
+    say {$data_fd} encode_utf8($line);
+
+    return;
+}
+
+=item write_data_file
+
+=cut
+
+sub write_data_file {
+    my ($self, $basedir, $generated) = @_;
+
+    my $header =<<"HEADER";
+# Data about titles, sections, and URLs of manuals, used to expand references
+# in tag descriptions and add links for HTML output.  Each line of this file
+# has three fields separated by double colons:
+#
+#     <section> :: <title> :: <url>
+#
+# If <section> is an underscore, that line specifies the title and URL for the
+# whole manual.
+
+HEADER
+
+    my $data_path = "$basedir/" . $self->location;
+    my $parent_dir = path($data_path)->parent->stringify;
+    path($parent_dir)->mkpath
+      unless -e $parent_dir;
+
+    my $output = encode_utf8($header) . $generated;
+    path($data_path)->spew($output);
+
+    return;
+}
+
+=item extract_sections_from_links
+
+=cut
+
+sub extract_sections_from_links {
+    my ($self, $data_fd, $base_url)= @_;
+
+    my $mechanize = WWW::Mechanize->new();
+    $mechanize->get($base_url);
+
+    my $page_title = $mechanize->title;
+
+    # strip explanatory remark
+    $page_title =~ s{ \s* \N{EM DASH} .* $}{}x;
+
+    # underscore is a token for the whole page
+    write_line($data_fd, $VOLUME_KEY, $page_title, $base_url);
+
+    my %by_section_key;
+    my $in_appendix = 0;
+
+    # https://stackoverflow.com/a/254687
+    for my $link ($mechanize->links) {
+
+        next
+          unless length $link->text;
+
+        next
+          if $link->text !~ qr{^ \s* ([.\d]+) \s+ (.+) $}x;
+
+        my $section_key = $1;
+        my $section_title = $2;
+
+        # drop final dots
+        $section_key =~ s{ [.]+ $}{}x;
+
+        # reduce consecutive whitespace
+        $section_title =~ s{ \s+ }{ }gx;
+
+        my $destination = $base_url . $link->url;
+
+        next
+          if exists $by_section_key{$section_key}
+          && ( $by_section_key{$section_key}{title} eq $section_title
+            || $by_section_key{$section_key}{destination} eq $destination);
+
+        # Some manuals reuse section numbers for different references,
+        # e.g. the Debian Policy's normal and appendix sections are
+        # numbers that clash with each other. Track if we've already
+        # seen a section pointing to some other URL than the current one,
+        # and prepend it with an indicator
+        $in_appendix = 1
+          if exists $by_section_key{$section_key}
+          && $by_section_key{$section_key}{destination} ne $destination;
+
+        $section_key = "appendix-$section_key"
+          if $in_appendix;
+
+        $by_section_key{$section_key}{title} = $section_title;
+        $by_section_key{$section_key}{destination} = $destination;
+
+        write_line($data_fd, $section_key, $section_title, $destination);
+    }
+
+    return;
+}
+
+=item refresh
+
+=cut
+
+sub refresh {
+    my ($self, $archive, $basedir) = @_;
+
+    my $base_url = 'https://www.debian.org/doc/packaging-manuals/menu-policy/';
+
+    my $generated;
+    open(my $memory_fd, '>', \$generated)
+      or die encode_utf8('Cannot open scalar');
+
+    $self->extract_sections_from_links($memory_fd, $base_url);
+
+    close $memory_fd;
+
+    $self->write_data_file($basedir, $generated);
+
+    return;
+}
+
+=back
+
+=head1 AUTHOR
+
+Originally written by Felix Lechner <felix.lechner@lease-up.com> for Lintian.
+
+=head1 SEE ALSO
+
+lintian(1)
+
+=cut
+
+1;
+
+# Local Variables:
+# indent-tabs-mode: nil
+# cperl-indent-level: 4
+# End:
+# vim: syntax=perl sw=4 sts=4 sr et
diff --git a/lib/Lintian/Data/Authority/NewMaintainer.pm b/lib/Lintian/Data/Authority/NewMaintainer.pm
new file mode 100644
index 0000000..bd8c933
--- /dev/null
+++ b/lib/Lintian/Data/Authority/NewMaintainer.pm
@@ -0,0 +1,290 @@
+# -*- perl -*-
+#
+# Copyright (C) 1998 Christian Schwarz and Richard Braakman
+# Copyright (C) 2001 Colin Watson
+# Copyright (C) 2008 Jorda Polo
+# Copyright (C) 2009 Russ Allbery
+# Copyright (C) 2017-2019 Chris Lamb <lamby@debian.org>
+# Copyright (C) 2020-2021 Felix Lechner
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program.  If not, see <http://www.gnu.org/licenses/>.
+
+package Lintian::Data::Authority::NewMaintainer;
+
+use v5.20;
+use warnings;
+use utf8;
+
+use Carp qw(croak);
+use Const::Fast;
+use List::SomeUtils qw(any first_value);
+use Path::Tiny;
+use Unicode::UTF8 qw(encode_utf8);
+use WWW::Mechanize ();
+
+use Lintian::Output::Markdown qw(markdown_authority);
+
+const my $SLASH => q{/};
+const my $UNDERSCORE => q{_};
+
+const my $VOLUME_KEY => $UNDERSCORE;
+const my $SECTIONS => 'sections';
+
+use Moo;
+use namespace::clean;
+
+with 'Lintian::Data::PreambledJSON';
+
+=head1 NAME
+
+Lintian::Data::Authority::NewMaintainer - Lintian interface for manual references
+
+=head1 SYNOPSIS
+
+    use Lintian::Data::Authority::NewMaintainer;
+
+=head1 DESCRIPTION
+
+Lintian::Data::Authority::NewMaintainer provides a way to load data files for
+manual references.
+
+=head1 CLASS METHODS
+
+=over 4
+
+=item title
+
+=item shorthand
+
+=item location
+
+=item by_section_key
+
+=cut
+
+has title => (
+    is => 'rw',
+    default => 'New Maintainer\'s Guide'
+);
+
+has shorthand => (
+    is => 'rw',
+    default => 'new-maintainer'
+);
+
+has location => (
+    is => 'rw',
+    lazy => 1,
+    default => sub {
+        my ($self) = @_;
+
+        return 'authority/' . $self->shorthand . '.json';
+    }
+);
+
+has by_section_key => (is => 'rw', default => sub { {} });
+
+=item markdown_citation
+
+=cut
+
+sub markdown_citation {
+    my ($self, $section_key) = @_;
+
+    croak "Invalid section $section_key"
+      if $section_key eq $VOLUME_KEY;
+
+    my $volume_entry = $self->value($VOLUME_KEY);
+
+    # start with the citation to the overall manual.
+    my $volume_title = $volume_entry->{title};
+    my $volume_url   = $volume_entry->{destination};
+
+    my $section_title;
+    my $section_url;
+
+    if ($self->recognizes($section_key)) {
+
+        my $section_entry = $self->value($section_key);
+
+        $section_title = $section_entry->{title};
+        $section_url   = $section_entry->{destination};
+    }
+
+    return markdown_authority(
+        $volume_title, $volume_url,$section_key,
+        $section_title, $section_url
+    );
+}
+
+=item recognizes (KEY)
+
+Returns true if KEY is known, and false otherwise.
+
+=cut
+
+sub recognizes {
+    my ($self, $key) = @_;
+
+    return 0
+      unless length $key;
+
+    return 1
+      if exists $self->by_section_key->{$key};
+
+    return 0;
+}
+
+=item value (KEY)
+
+Returns the value attached to KEY if it was listed in the data
+file represented by this Lintian::Data instance and the undefined value
+otherwise.
+
+=cut
+
+sub value {
+    my ($self, $key) = @_;
+
+    return undef
+      unless length $key;
+
+    return $self->by_section_key->{$key};
+}
+
+=item load
+
+=cut
+
+sub load {
+    my ($self, $search_space, $our_vendor) = @_;
+
+    my @candidates = map { $_ . $SLASH . $self->location } @{$search_space};
+    my $path = first_value { -e } @candidates;
+
+    my $reference;
+
+    return 0
+      unless $self->read_file($path, \$reference);
+
+    my @sections = @{$reference // []};
+
+    for my $section (@sections) {
+
+        my $key = $section->{key};
+
+        # only store first value for duplicates
+        # silently ignore later values
+        $self->by_section_key->{$key} //= $section;
+    }
+
+    return 1;
+}
+
+=item refresh
+
+=cut
+
+sub refresh {
+    my ($self, $archive, $basedir) = @_;
+
+    my $base_url = 'https://www.debian.org/doc/manuals/maint-guide/index.html';
+
+    my $mechanize = WWW::Mechanize->new();
+    $mechanize->get($base_url);
+
+    my $page_title = $mechanize->title;
+
+    my @sections;
+
+    # underscore is a token for the whole page
+    my %volume;
+    $volume{key} = $VOLUME_KEY;
+    $volume{title} = $page_title;
+    $volume{destination} = $base_url;
+
+    # store array to resemble web layout
+    # may contain duplicates
+    push(@sections, \%volume);
+
+    my $in_appendix = 0;
+
+    # https://stackoverflow.com/a/254687
+    for my $link ($mechanize->links) {
+
+        next
+          unless length $link->text;
+
+        next
+          if $link->text !~ qr{^ \s* ([.\d[:upper:]]+) \s+ (.+) $}x;
+
+        my $section_key = $1;
+        my $section_title = $2;
+
+        # drop final dots
+        $section_key =~ s{ [.]+ $}{}x;
+
+        # reduce consecutive whitespace
+        $section_title =~ s{ \s+ }{ }gx;
+
+        my $destination = $base_url . $link->url;
+
+        my @similar = grep { $_->{key} eq $section_key } @sections;
+        next
+          if (any { $_->{title} eq $section_title } @similar)
+          || (any { $_->{destination} eq $destination } @similar);
+
+        # Some manuals reuse section numbers for different references,
+        # e.g. the Debian Policy's normal and appendix sections are
+        # numbers that clash with each other. Track if we've already
+        # seen a section pointing to some other URL than the current one,
+        # and prepend it with an indicator
+        $in_appendix = 1
+          if any { $_->{destination} ne $destination } @similar;
+
+        $section_key = "appendix-$section_key"
+          if $in_appendix;
+
+        my %section;
+        $section{key} = $section_key;
+        $section{title} = $section_title;
+        $section{destination} = $destination;
+        push(@sections, \%section);
+    }
+
+    my $data_path = "$basedir/" . $self->location;
+    my $status = $self->write_file($SECTIONS, \@sections, $data_path);
+
+    return $status;
+}
+
+=back
+
+=head1 AUTHOR
+
+Originally written by Felix Lechner <felix.lechner@lease-up.com> for Lintian.
+
+=head1 SEE ALSO
+
+lintian(1)
+
+=cut
+
+1;
+
+# Local Variables:
+# indent-tabs-mode: nil
+# cperl-indent-level: 4
+# End:
+# vim: syntax=perl sw=4 sts=4 sr et
diff --git a/lib/Lintian/Data/Authority/PerlPolicy.pm b/lib/Lintian/Data/Authority/PerlPolicy.pm
new file mode 100644
index 0000000..92dc31a
--- /dev/null
+++ b/lib/Lintian/Data/Authority/PerlPolicy.pm
@@ -0,0 +1,316 @@
+# -*- perl -*-
+#
+# Copyright (C) 1998 Christian Schwarz and Richard Braakman
+# Copyright (C) 2001 Colin Watson
+# Copyright (C) 2008 Jorda Polo
+# Copyright (C) 2009 Russ Allbery
+# Copyright (C) 2017-2019 Chris Lamb <lamby@debian.org>
+# Copyright (C) 2020-2021 Felix Lechner
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program.  If not, see <http://www.gnu.org/licenses/>.
+
+package Lintian::Data::Authority::PerlPolicy;
+
+use v5.20;
+use warnings;
+use utf8;
+
+use Carp qw(croak);
+use Const::Fast;
+use Path::Tiny;
+use Unicode::UTF8 qw(encode_utf8);
+use WWW::Mechanize ();
+
+use Lintian::Output::Markdown qw(markdown_authority);
+
+const my $EMPTY => q{};
+const my $SPACE => q{ };
+const my $SLASH => q{/};
+const my $COLON => q{:};
+const my $UNDERSCORE => q{_};
+const my $LEFT_PARENTHESIS => q{(};
+const my $RIGHT_PARENTHESIS => q{)};
+
+const my $TWO_PARTS => 2;
+
+const my $VOLUME_KEY => $UNDERSCORE;
+const my $SEPARATOR => $COLON x 2;
+
+use Moo;
+use namespace::clean;
+
+with 'Lintian::Data::JoinedLines';
+
+=head1 NAME
+
+Lintian::Data::Authority::PerlPolicy - Lintian interface for manual references
+
+=head1 SYNOPSIS
+
+    use Lintian::Data::Authority::PerlPolicy;
+
+=head1 DESCRIPTION
+
+Lintian::Data::Authority::PerlPolicy provides a way to load data files for
+manual references.
+
+=head1 CLASS METHODS
+
+=over 4
+
+=item title
+
+=item shorthand
+
+=item location
+
+=item separator
+
+=cut
+
+has title => (
+    is => 'rw',
+    default => 'Perl Policy'
+);
+
+has shorthand => (
+    is => 'rw',
+    default => 'perl-policy'
+);
+
+has location => (
+    is => 'rw',
+    lazy => 1,
+    default => sub {
+        my ($self) = @_;
+
+        return 'authority/' . $self->shorthand;
+    }
+);
+
+has separator => (
+    is => 'rw',
+    default => sub { qr/::/ }
+);
+
+=item consumer
+
+=cut
+
+sub consumer {
+    my ($self, $key, $remainder, $previous) = @_;
+
+    return undef
+      if defined $previous;
+
+    my ($title, $url)= split($self->separator, $remainder, $TWO_PARTS);
+
+    my %entry;
+    $entry{title} = $title;
+    $entry{url} = $url;
+
+    return \%entry;
+}
+
+=item markdown_citation
+
+=cut
+
+sub markdown_citation {
+    my ($self, $section_key) = @_;
+
+    croak "Invalid section $section_key"
+      if $section_key eq $VOLUME_KEY;
+
+    my $volume_entry = $self->value($VOLUME_KEY);
+
+    # start with the citation to the overall manual.
+    my $volume_title = $volume_entry->{title};
+    my $volume_url   = $volume_entry->{url};
+
+    my $section_title;
+    my $section_url;
+
+    if ($self->recognizes($section_key)) {
+
+        my $section_entry = $self->value($section_key);
+
+        $section_title = $section_entry->{title};
+        $section_url   = $section_entry->{url};
+    }
+
+    return markdown_authority(
+        $volume_title, $volume_url,$section_key,
+        $section_title, $section_url
+    );
+}
+
+=item write_line
+
+=cut
+
+sub write_line {
+    my ($data_fd, $section_key, $section_title, $destination) = @_;
+
+    # drop final dots
+    $section_key =~ s{ [.]+ $}{}x;
+
+    # reduce consecutive whitespace
+    $section_title =~ s{ \s+ }{ }gx;
+
+    my $line= join($SEPARATOR,$section_key, $section_title, $destination);
+
+    say {$data_fd} encode_utf8($line);
+
+    return;
+}
+
+=item write_data_file
+
+=cut
+
+sub write_data_file {
+    my ($self, $basedir, $generated) = @_;
+
+    my $header =<<"HEADER";
+# Data about titles, sections, and URLs of manuals, used to expand references
+# in tag descriptions and add links for HTML output.  Each line of this file
+# has three fields separated by double colons:
+#
+#     <section> :: <title> :: <url>
+#
+# If <section> is an underscore, that line specifies the title and URL for the
+# whole manual.
+
+HEADER
+
+    my $data_path = "$basedir/" . $self->location;
+    my $parent_dir = path($data_path)->parent->stringify;
+    path($parent_dir)->mkpath
+      unless -e $parent_dir;
+
+    my $output = encode_utf8($header) . $generated;
+    path($data_path)->spew($output);
+
+    return;
+}
+
+=item extract_sections_from_links
+
+=cut
+
+sub extract_sections_from_links {
+    my ($self, $data_fd, $base_url)= @_;
+
+    my $mechanize = WWW::Mechanize->new();
+    $mechanize->get($base_url);
+
+    my $page_title = $mechanize->title;
+
+    # strip explanatory remark
+    $page_title =~ s{ \s* \N{EM DASH} .* $}{}x;
+
+    # underscore is a token for the whole page
+    write_line($data_fd, $VOLUME_KEY, $page_title, $base_url);
+
+    my %by_section_key;
+    my $in_appendix = 0;
+
+    # https://stackoverflow.com/a/254687
+    for my $link ($mechanize->links) {
+
+        next
+          unless length $link->text;
+
+        next
+          if $link->text !~ qr{^ \s* ([A-Z]|[A-Z]?[.\d]+) \s+ (.+) $}x;
+
+        my $section_key = $1;
+        my $section_title = $2;
+
+        # drop final dots
+        $section_key =~ s{ [.]+ $}{}x;
+
+        # reduce consecutive whitespace
+        $section_title =~ s{ \s+ }{ }gx;
+
+        my $destination = $base_url . $link->url;
+
+        next
+          if exists $by_section_key{$section_key}
+          && ( $by_section_key{$section_key}{title} eq $section_title
+            || $by_section_key{$section_key}{destination} eq $destination);
+
+        # Some manuals reuse section numbers for different references,
+        # e.g. the Debian Policy's normal and appendix sections are
+        # numbers that clash with each other. Track if we've already
+        # seen a section pointing to some other URL than the current one,
+        # and prepend it with an indicator
+        $in_appendix = 1
+          if exists $by_section_key{$section_key}
+          && $by_section_key{$section_key}{destination} ne $destination;
+
+        $section_key = "appendix-$section_key"
+          if $in_appendix;
+
+        $by_section_key{$section_key}{title} = $section_title;
+        $by_section_key{$section_key}{destination} = $destination;
+
+        write_line($data_fd, $section_key, $section_title, $destination);
+    }
+
+    return;
+}
+
+=item refresh
+
+=cut
+
+sub refresh {
+    my ($self, $archive, $basedir) = @_;
+
+    my $base_url = 'https://www.debian.org/doc/packaging-manuals/perl-policy/';
+
+    my $generated;
+    open(my $memory_fd, '>', \$generated)
+      or die encode_utf8('Cannot open scalar');
+
+    $self->extract_sections_from_links($memory_fd, $base_url);
+
+    close $memory_fd;
+
+    $self->write_data_file($basedir, $generated);
+
+    return;
+}
+
+=back
+
+=head1 AUTHOR
+
+Originally written by Felix Lechner <felix.lechner@lease-up.com> for Lintian.
+
+=head1 SEE ALSO
+
+lintian(1)
+
+=cut
+
+1;
+
+# Local Variables:
+# indent-tabs-mode: nil
+# cperl-indent-level: 4
+# End:
+# vim: syntax=perl sw=4 sts=4 sr et
diff --git a/lib/Lintian/Data/Authority/PythonPolicy.pm b/lib/Lintian/Data/Authority/PythonPolicy.pm
new file mode 100644
index 0000000..ebeda04
--- /dev/null
+++ b/lib/Lintian/Data/Authority/PythonPolicy.pm
@@ -0,0 +1,317 @@
+# -*- perl -*-
+#
+# Copyright (C) 1998 Christian Schwarz and Richard Braakman
+# Copyright (C) 2001 Colin Watson
+# Copyright (C) 2008 Jorda Polo
+# Copyright (C) 2009 Russ Allbery
+# Copyright (C) 2017-2019 Chris Lamb <lamby@debian.org>
+# Copyright (C) 2020-2021 Felix Lechner
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program.  If not, see <http://www.gnu.org/licenses/>.
+
+package Lintian::Data::Authority::PythonPolicy;
+
+use v5.20;
+use warnings;
+use utf8;
+
+use Carp qw(croak);
+use Const::Fast;
+use Path::Tiny;
+use Unicode::UTF8 qw(encode_utf8);
+use WWW::Mechanize ();
+
+use Lintian::Output::Markdown qw(markdown_authority);
+
+const my $EMPTY => q{};
+const my $SPACE => q{ };
+const my $SLASH => q{/};
+const my $COLON => q{:};
+const my $UNDERSCORE => q{_};
+const my $LEFT_PARENTHESIS => q{(};
+const my $RIGHT_PARENTHESIS => q{)};
+
+const my $TWO_PARTS => 2;
+
+const my $VOLUME_KEY => $UNDERSCORE;
+const my $SEPARATOR => $COLON x 2;
+
+use Moo;
+use namespace::clean;
+
+with 'Lintian::Data::JoinedLines';
+
+=head1 NAME
+
+Lintian::Data::Authority::PythonPolicy - Lintian interface for manual references
+
+=head1 SYNOPSIS
+
+    use Lintian::Data::Authority::PythonPolicy;
+
+=head1 DESCRIPTION
+
+Lintian::Data::Authority::PythonPolicy provides a way to load data files for
+manual references.
+
+=head1 CLASS METHODS
+
+=over 4
+
+=item title
+
+=item shorthand
+
+=item location
+
+=item separator
+
+=cut
+
+has title => (
+    is => 'rw',
+    default => 'Python Policy'
+);
+
+has shorthand => (
+    is => 'rw',
+    default => 'python-policy'
+);
+
+has location => (
+    is => 'rw',
+    lazy => 1,
+    default => sub {
+        my ($self) = @_;
+
+        return 'authority/' . $self->shorthand;
+    }
+);
+
+has separator => (
+    is => 'rw',
+    default => sub { qr/::/ }
+);
+
+=item consumer
+
+=cut
+
+sub consumer {
+    my ($self, $key, $remainder, $previous) = @_;
+
+    return undef
+      if defined $previous;
+
+    my ($title, $url)= split($self->separator, $remainder, $TWO_PARTS);
+
+    my %entry;
+    $entry{title} = $title;
+    $entry{url} = $url;
+
+    return \%entry;
+}
+
+=item markdown_citation
+
+=cut
+
+sub markdown_citation {
+    my ($self, $section_key) = @_;
+
+    croak "Invalid section $section_key"
+      if $section_key eq $VOLUME_KEY;
+
+    my $volume_entry = $self->value($VOLUME_KEY);
+
+    # start with the citation to the overall manual.
+    my $volume_title = $volume_entry->{title};
+    my $volume_url   = $volume_entry->{url};
+
+    my $section_title;
+    my $section_url;
+
+    if ($self->recognizes($section_key)) {
+
+        my $section_entry = $self->value($section_key);
+
+        $section_title = $section_entry->{title};
+        $section_url   = $section_entry->{url};
+    }
+
+    return markdown_authority(
+        $volume_title, $volume_url,$section_key,
+        $section_title, $section_url
+    );
+}
+
+=item write_line
+
+=cut
+
+sub write_line {
+    my ($data_fd, $section_key, $section_title, $destination) = @_;
+
+    # drop final dots
+    $section_key =~ s{ [.]+ $}{}x;
+
+    # reduce consecutive whitespace
+    $section_title =~ s{ \s+ }{ }gx;
+
+    my $line= join($SEPARATOR,$section_key, $section_title, $destination);
+
+    say {$data_fd} encode_utf8($line);
+
+    return;
+}
+
+=item write_data_file
+
+=cut
+
+sub write_data_file {
+    my ($self, $basedir, $generated) = @_;
+
+    my $header =<<"HEADER";
+# Data about titles, sections, and URLs of manuals, used to expand references
+# in tag descriptions and add links for HTML output.  Each line of this file
+# has three fields separated by double colons:
+#
+#     <section> :: <title> :: <url>
+#
+# If <section> is an underscore, that line specifies the title and URL for the
+# whole manual.
+
+HEADER
+
+    my $data_path = "$basedir/" . $self->location;
+    my $parent_dir = path($data_path)->parent->stringify;
+    path($parent_dir)->mkpath
+      unless -e $parent_dir;
+
+    my $output = encode_utf8($header) . $generated;
+    path($data_path)->spew($output);
+
+    return;
+}
+
+=item extract_sections_from_links
+
+=cut
+
+sub extract_sections_from_links {
+    my ($self, $data_fd, $base_url)= @_;
+
+    my $mechanize = WWW::Mechanize->new();
+    $mechanize->get($base_url);
+
+    my $page_title = $mechanize->title;
+
+    # strip explanatory remark
+    $page_title =~ s{ \s* \N{EM DASH} .* $}{}x;
+
+    # underscore is a token for the whole page
+    write_line($data_fd, $VOLUME_KEY, $page_title, $base_url);
+
+    my %by_section_key;
+    my $in_appendix = 0;
+
+    # https://stackoverflow.com/a/254687
+    for my $link ($mechanize->links) {
+
+        next
+          unless length $link->text;
+
+        next
+          if $link->text !~ qr{^ \s* ([.\d]+) \s+ (.+) $}x;
+
+        my $section_key = $1;
+        my $section_title = $2;
+
+        # drop final dots
+        $section_key =~ s{ [.]+ $}{}x;
+
+        # reduce consecutive whitespace
+        $section_title =~ s{ \s+ }{ }gx;
+
+        my $destination = $base_url . $link->url;
+
+        next
+          if exists $by_section_key{$section_key}
+          && ( $by_section_key{$section_key}{title} eq $section_title
+            || $by_section_key{$section_key}{destination} eq $destination);
+
+        # Some manuals reuse section numbers for different references,
+        # e.g. the Debian Policy's normal and appendix sections are
+        # numbers that clash with each other. Track if we've already
+        # seen a section pointing to some other URL than the current one,
+        # and prepend it with an indicator
+        $in_appendix = 1
+          if exists $by_section_key{$section_key}
+          && $by_section_key{$section_key}{destination} ne $destination;
+
+        $section_key = "appendix-$section_key"
+          if $in_appendix;
+
+        $by_section_key{$section_key}{title} = $section_title;
+        $by_section_key{$section_key}{destination} = $destination;
+
+        write_line($data_fd, $section_key, $section_title, $destination);
+    }
+
+    return;
+}
+
+=item refresh
+
+=cut
+
+sub refresh {
+    my ($self, $archive, $basedir) = @_;
+
+    my $base_url
+      = 'https://www.debian.org/doc/packaging-manuals/python-policy/';
+
+    my $generated;
+    open(my $memory_fd, '>', \$generated)
+      or die encode_utf8('Cannot open scalar');
+
+    $self->extract_sections_from_links($memory_fd, $base_url);
+
+    close $memory_fd;
+
+    $self->write_data_file($basedir, $generated);
+
+    return;
+}
+
+=back
+
+=head1 AUTHOR
+
+Originally written by Felix Lechner <felix.lechner@lease-up.com> for Lintian.
+
+=head1 SEE ALSO
+
+lintian(1)
+
+=cut
+
+1;
+
+# Local Variables:
+# indent-tabs-mode: nil
+# cperl-indent-level: 4
+# End:
+# vim: syntax=perl sw=4 sts=4 sr et
diff --git a/lib/Lintian/Data/Authority/VimPolicy.pm b/lib/Lintian/Data/Authority/VimPolicy.pm
new file mode 100644
index 0000000..6ffbe91
--- /dev/null
+++ b/lib/Lintian/Data/Authority/VimPolicy.pm
@@ -0,0 +1,459 @@
+# -*- perl -*-
+#
+# Copyright (C) 1998 Christian Schwarz and Richard Braakman
+# Copyright (C) 2001 Colin Watson
+# Copyright (C) 2008 Jorda Polo
+# Copyright (C) 2009 Russ Allbery
+# Copyright (C) 2017-2019 Chris Lamb <lamby@debian.org>
+# Copyright (C) 2020-2021 Felix Lechner
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program.  If not, see <http://www.gnu.org/licenses/>.
+
+package Lintian::Data::Authority::VimPolicy;
+
+use v5.20;
+use warnings;
+use utf8;
+
+use Carp qw(croak);
+use Const::Fast;
+use File::Basename qw(basename);
+use IPC::Run3;
+use HTML::TokeParser::Simple;
+use Path::Tiny;
+use Unicode::UTF8 qw(encode_utf8);
+
+use Lintian::Output::Markdown qw(markdown_authority);
+
+const my $EMPTY => q{};
+const my $SPACE => q{ };
+const my $SLASH => q{/};
+const my $COLON => q{:};
+const my $INDENT => $SPACE x 4;
+const my $UNDERSCORE => q{_};
+const my $LEFT_PARENTHESIS => q{(};
+const my $RIGHT_PARENTHESIS => q{)};
+
+const my $TWO_PARTS => 2;
+
+const my $VOLUME_KEY => $UNDERSCORE;
+const my $SEPARATOR => $COLON x 2;
+
+const my $WAIT_STATUS_SHIFT => 8;
+
+use Moo;
+use namespace::clean;
+
+with 'Lintian::Data::JoinedLines';
+
+=head1 NAME
+
+Lintian::Data::Authority::VimPolicy - Lintian interface for manual references
+
+=head1 SYNOPSIS
+
+    use Lintian::Data::Authority::VimPolicy;
+
+=head1 DESCRIPTION
+
+Lintian::Data::Authority::VimPolicy provides a way to load data files for
+manual references.
+
+=head1 CLASS METHODS
+
+=over 4
+
+=item title
+
+=item shorthand
+
+=item location
+
+=item separator
+
+=cut
+
+has title => (
+    is => 'rw',
+    default => 'Vim Policy'
+);
+
+has shorthand => (
+    is => 'rw',
+    default => 'vim-policy'
+);
+
+has location => (
+    is => 'rw',
+    lazy => 1,
+    default => sub {
+        my ($self) = @_;
+
+        return 'authority/' . $self->shorthand;
+    }
+);
+
+has separator => (
+    is => 'rw',
+    default => sub { qr/::/ }
+);
+
+=item consumer
+
+=cut
+
+sub consumer {
+    my ($self, $key, $remainder, $previous) = @_;
+
+    return undef
+      if defined $previous;
+
+    my ($title, $url)= split($self->separator, $remainder, $TWO_PARTS);
+
+    my %entry;
+    $entry{title} = $title;
+    $entry{url} = $url;
+
+    return \%entry;
+}
+
+=item markdown_citation
+
+=cut
+
+sub markdown_citation {
+    my ($self, $section_key) = @_;
+
+    croak "Invalid section $section_key"
+      if $section_key eq $VOLUME_KEY;
+
+    my $volume_entry = $self->value($VOLUME_KEY);
+
+    # start with the citation to the overall manual.
+    my $volume_title = $volume_entry->{title};
+    my $volume_url   = $volume_entry->{url};
+
+    my $section_title;
+    my $section_url;
+
+    if ($self->recognizes($section_key)) {
+
+        my $section_entry = $self->value($section_key);
+
+        $section_title = $section_entry->{title};
+        $section_url   = $section_entry->{url};
+    }
+
+    return markdown_authority(
+        $volume_title, $volume_url,$section_key,
+        $section_title, $section_url
+    );
+}
+
+=item write_line
+
+=cut
+
+sub write_line {
+    my ($data_fd, $section_key, $section_title, $destination) = @_;
+
+    # drop final dots
+    $section_key =~ s{ [.]+ $}{}x;
+
+    # reduce consecutive whitespace
+    $section_title =~ s{ \s+ }{ }gx;
+
+    my $line= join($SEPARATOR,$section_key, $section_title, $destination);
+
+    say {$data_fd} encode_utf8($line);
+
+    return;
+}
+
+=item write_data_file
+
+=cut
+
+sub write_data_file {
+    my ($self, $basedir, $generated) = @_;
+
+    my $header =<<"HEADER";
+# Data about titles, sections, and URLs of manuals, used to expand references
+# in tag descriptions and add links for HTML output.  Each line of this file
+# has three fields separated by double colons:
+#
+#     <section> :: <title> :: <url>
+#
+# If <section> is an underscore, that line specifies the title and URL for the
+# whole manual.
+
+HEADER
+
+    my $data_path = "$basedir/" . $self->location;
+    my $parent_dir = path($data_path)->parent->stringify;
+    path($parent_dir)->mkpath
+      unless -e $parent_dir;
+
+    my $output = encode_utf8($header) . $generated;
+    path($data_path)->spew($output);
+
+    return;
+}
+
+=item find_installable_name
+
+=cut
+
+sub find_installable_name {
+    my ($self, $archive, $release, $liberty, $port, $requested_path) = @_;
+
+    my @installed_by;
+
+    # find installable package
+    for my $installable_architecture ('all', $port) {
+
+        my $local_path
+          = $archive->contents_gz($release, $liberty,
+            $installable_architecture);
+
+        open(my $fd, '<:gzip', $local_path)
+          or die encode_utf8("Cannot open $local_path.");
+
+        while (my $line = <$fd>) {
+
+            chomp $line;
+
+            my ($path, $finder) = split($SPACE, $line, 2);
+            next
+              unless length $path
+              && length $finder;
+
+            if ($path eq $requested_path) {
+
+                my $name = $1;
+
+                my @locations = split(m{,}, $finder);
+                for my $location (@locations) {
+
+                    my ($section, $installable)= split(m{/}, $location, 2);
+
+                    push(@installed_by, $installable);
+                }
+
+                next;
+            }
+        }
+
+        close $fd;
+    }
+
+    die encode_utf8(
+        "The path $requested_path is not installed by any package.")
+      if @installed_by < 1;
+
+    if (@installed_by > 1) {
+        warn encode_utf8(
+            "The path $requested_path is installed by multiple packages:\n");
+        warn encode_utf8($INDENT . "- $_\n")for @installed_by;
+    }
+
+    my $installable_name = shift @installed_by;
+
+    return $installable_name;
+}
+
+=item refresh
+
+=cut
+
+sub refresh {
+    my ($self, $archive, $basedir) = @_;
+
+    # shipped as part of the vim installable
+    my $shipped_base = 'usr/share/doc/vim/vim-policy.html/';
+    my $index_name = 'index.html';
+
+    my $shipped_path = $shipped_base . $index_name;
+    my $stored_uri = "file:///$shipped_base";
+
+    # neutral sort order
+    local $ENV{LC_ALL} = 'C';
+
+    my $release = 'stable';
+    my $port = 'amd64';
+
+    my $installable_name
+      = $self->find_installable_name($archive, $release, 'main', $port,
+        $shipped_path);
+
+    my $deb822_by_installable_name
+      = $archive->deb822_packages_by_installable_name($release, 'main', $port);
+
+    my $work_folder
+      = Path::Tiny->tempdir(
+        TEMPLATE => 'refresh-doc-base-specification-XXXXXXXXXX');
+
+    die encode_utf8("Installable $installable_name not shipped in port $port")
+      unless exists $deb822_by_installable_name->{$installable_name};
+
+    my $deb822 = $deb822_by_installable_name->{$installable_name};
+
+    my $pool_path = $deb822->value('Filename');
+
+    my $deb_filename = basename($pool_path);
+    my $deb_local_path = "$work_folder/$deb_filename";
+    my $deb_url = $archive->mirror_base . $SLASH . $pool_path;
+
+    my $stderr;
+    run3([qw{wget --quiet}, "--output-document=$deb_local_path", $deb_url],
+        undef, \$stderr);
+    my $status = ($? >> $WAIT_STATUS_SHIFT);
+
+    # stderr already in UTF-8
+    die $stderr
+      if $status;
+
+    my $extract_folder = "$work_folder/unpacked/$pool_path";
+    path($extract_folder)->mkpath;
+
+    run3([qw{dpkg-deb --extract}, $deb_local_path, $extract_folder],
+        undef, \$stderr);
+    $status = ($? >> $WAIT_STATUS_SHIFT);
+
+    # stderr already in UTF-8
+    die $stderr
+      if $status;
+
+    unlink($deb_local_path)
+      or die encode_utf8("Cannot delete $deb_local_path");
+
+    my $generated;
+    open(my $memory_fd, '>', \$generated)
+      or die encode_utf8("Cannot open scalar: $!");
+
+    my $fresh_uri = URI::file->new_abs("/$extract_folder/$shipped_path");
+
+    my $parser = HTML::TokeParser::Simple->new(url => $fresh_uri);
+    my $in_title = 0;
+    my $in_dt_tag = 0;
+    my $after_a_tag = 0;
+
+    my $page_title = $EMPTY;
+    my $section_key = $EMPTY;
+    my $section_title = $EMPTY;
+    my $relative_destination = $EMPTY;
+
+    while (my $token = $parser->get_token) {
+
+        if (length $token->get_tag) {
+
+            if ($token->get_tag eq 'h1') {
+
+                $in_title = ($token->is_start_tag
+                      && $token->get_attr('class') eq 'title');
+
+                # not yet leaving title
+                next
+                  if $in_title;
+
+                # trim both ends
+                $page_title =~ s/^\s+|\s+$//g;
+
+                # underscore is a token for the whole page
+                write_line($memory_fd, $VOLUME_KEY, $page_title,
+                    $stored_uri . $index_name)
+                  if length $page_title;
+
+                $page_title = $EMPTY;
+            }
+
+            if ($token->get_tag eq 'dt') {
+
+                $in_dt_tag = $token->is_start_tag;
+
+                # not yet leaving dt tag
+                next
+                  if $in_dt_tag;
+
+                # trim both ends
+                $section_key =~ s/^\s+|\s+$//g;
+                $section_title =~ s/^\s+|\s+$//g;
+
+                my $full_destination = $stored_uri . $relative_destination;
+
+                write_line(
+                    $memory_fd, $section_key,
+                    $section_title,$full_destination
+                )if length $section_title;
+
+                $section_key = $EMPTY;
+                $section_title = $EMPTY;
+                $relative_destination = $EMPTY;
+            }
+
+            if ($token->get_tag eq 'a') {
+
+                $after_a_tag = $token->is_start_tag;
+
+                $relative_destination = $token->get_attr('href')
+                  if $token->is_start_tag;
+            }
+
+        } else {
+
+            # concatenate span objects
+            $page_title .= $token->as_is
+              if length $token->as_is
+              && $in_title
+              && $after_a_tag;
+
+            $section_key = $token->as_is
+              if length $token->as_is
+              && $in_dt_tag
+              && !$after_a_tag;
+
+            # concatenate span objects
+            $section_title .= $token->as_is
+              if length $token->as_is
+              && $in_dt_tag
+              && $after_a_tag;
+        }
+    }
+
+    close $memory_fd;
+
+    $self->write_data_file($basedir, $generated);
+
+    return;
+}
+
+=back
+
+=head1 AUTHOR
+
+Originally written by Felix Lechner <felix.lechner@lease-up.com> for Lintian.
+
+=head1 SEE ALSO
+
+lintian(1)
+
+=cut
+
+1;
+
+# Local Variables:
+# indent-tabs-mode: nil
+# cperl-indent-level: 4
+# End:
+# vim: syntax=perl sw=4 sts=4 sr et