7 files changed, 3113 insertions, 0 deletions
diff --git a/lib/Lintian/Index/Ar.pm b/lib/Lintian/Index/Ar.pm
new file mode 100644
index 0000000..01f3e6b
--- /dev/null
+++ b/lib/Lintian/Index/Ar.pm
@@ -0,0 +1,128 @@
+# -*- perl -*- Lintian::Index::Ar
+#
+# Copyright (C) 2020 Felix Lechner
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program.  If not, see <http://www.gnu.org/licenses/>.
+
+package Lintian::Index::Ar;
+
+use v5.20;
+use warnings;
+use utf8;
+
+use Const::Fast;
+use Cwd;
+use Path::Tiny;
+use Unicode::UTF8 qw(decode_utf8 encode_utf8);
+
+use Lintian::IPC::Run3 qw(safe_qx);
+
+use Moo::Role;
+use namespace::clean;
+
+const my $EMPTY => q{};
+const my $NEWLINE => qq{\n};
+
+=head1 NAME
+
+Lintian::Index::Ar - binary symbol information.
+
+=head1 SYNOPSIS
+
+    use Lintian::Index;
+
+=head1 DESCRIPTION
+
+Lintian::Index::Ar binary symbol information.
+
+=head1 INSTANCE METHODS
+
+=over 4
+
+=item add_ar
+
+=cut
+
+sub add_ar {
+    my ($self) = @_;
+
+    my $savedir = getcwd;
+    chdir($self->basedir)
+      or die encode_utf8(
+        $self->identifier . ': Cannot change to directory ' . $self->basedir);
+
+    my $errors = $EMPTY;
+
+    my @archives
+      = grep { $_->name =~ / [.]a $/msx && $_->is_regular_file }
+      @{$self->sorted_list};
+
+    for my $archive (@archives) {
+
+        # skip empty archives to avoid ar error message; happens in tests
+        next
+          unless $archive->size;
+
+        my %ar_info;
+
+    # fails silently for non-ar files (#934899); probably creates empty entries
+        my $bytes = safe_qx(qw{ar t}, $archive);
+        if ($?) {
+            $errors .= "ar failed for $archive" . $NEWLINE;
+            next;
+        }
+
+        my $output = decode_utf8($bytes);
+        my @members = split(/\n/, $output);
+
+        my $count = 1;
+        for my $member (@members) {
+
+            # more info could be added with -v above
+            $ar_info{$count}{name} = $member;
+
+        } continue {
+            $count++;
+        }
+
+        $archive->ar_info(\%ar_info);
+    }
+
+    chdir($savedir)
+      or die encode_utf8(
+        $self->identifier . ": Cannot change to directory $savedir");
+
+    return $errors;
+}
+
+=back
+
+=head1 AUTHOR
+
+Originally written by Felix Lechner <felix.lechner@lease-up.com> for
+Lintian.
+
+=head1 SEE ALSO
+
+lintian(1)
+
+=cut
+
+1;
+
+# Local Variables:
+# indent-tabs-mode: nil
+# cperl-indent-level: 4
+# End:
+# vim: syntax=perl sw=4 sts=4 sr et
diff --git a/lib/Lintian/Index/Elf.pm b/lib/Lintian/Index/Elf.pm
new file mode 100644
index 0000000..1fe4d7a
--- /dev/null
+++ b/lib/Lintian/Index/Elf.pm
@@ -0,0 +1,739 @@
+# -*- perl -*- Lintian::Index::Elf
+#
+# Copyright (C) 1998 Christian Schwarz
+# Copyright (C) 2008 Adam D. Barratt
+# Copyright (C) 2017-2018 Chris Lamb <lamby@debian.org>
+# Copyright (C) 2020-2022 Felix Lechner
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program.  If not, see <http://www.gnu.org/licenses/>.
+
+package Lintian::Index::Elf;
+
+use v5.20;
+use warnings;
+use utf8;
+
+use bignum qw(hex);
+
+use Const::Fast;
+use Cwd;
+use IPC::Run3;
+use Unicode::UTF8 qw(encode_utf8 valid_utf8 decode_utf8);
+
+use Lintian::Elf::Section;
+use Lintian::Elf::Symbol;
+use Lintian::Storage::MLDBM;
+
+use Moo::Role;
+use namespace::clean;
+
+const my $EMPTY => q{};
+const my $SPACE => q{ };
+const my $HYPHEN => q{-};
+const my $NEWLINE => qq{\n};
+
+const my $LINES_PER_FILE => 3;
+
+=head1 NAME
+
+Lintian::Index::Elf - binary symbol information.
+
+=head1 SYNOPSIS
+
+    use Lintian::Index;
+
+=head1 DESCRIPTION
+
+Lintian::Index::Elf binary symbol information.
+
+=head1 INSTANCE METHODS
+
+=over 4
+
+=item elf_storage
+
+=cut
+
+has elf_storage => (
+    is => 'rw',
+    lazy => 1,
+    default => sub {
+        my ($self) = @_;
+
+        my $storage = Lintian::Storage::MLDBM->new;
+        $storage->create('elf');
+
+        return $storage;
+    }
+);
+
+=item elf_storage_by_member
+
+=cut
+
+has elf_storage_by_member => (
+    is => 'rw',
+    lazy => 1,
+    default => sub {
+        my ($self) = @_;
+
+        my $storage = Lintian::Storage::MLDBM->new;
+        $storage->create('elf-by-member');
+
+        return $storage;
+    }
+);
+
+=item add_elf
+
+=cut
+
+sub add_elf {
+    my ($self) = @_;
+
+    my $savedir = getcwd;
+    chdir($self->basedir)
+      or die encode_utf8(
+        $self->identifier . ': Cannot change to directory ' . $self->basedir);
+
+    my $errors = $EMPTY;
+
+    my @files = grep { $_->is_file } @{$self->sorted_list};
+
+    # must be ELF or static library
+    my @with_objects = grep {
+        $_->file_type =~ /\bELF\b/
+          || ( $_->file_type =~ /\bcurrent ar archive\b/
+            && $_->name =~ /\.a$/)
+    } @files;
+
+    for my $file (@with_objects) {
+
+        local $SIG{__WARN__}= sub {
+            warn encode_utf8($self->identifier
+                  . ': Warning while running readelf on'
+                  . $file->name
+                  . ": $_[0]");
+        };
+
+        my @command = (qw{readelf --all --wide}, $file->name);
+        my $combined_bytes;
+
+        run3(\@command, \undef, \$combined_bytes, \$combined_bytes);
+
+        next
+          unless length $combined_bytes;
+
+        my $combined_output;
+
+        if (valid_utf8($combined_bytes)) {
+            $combined_output = decode_utf8($combined_bytes);
+
+        } else {
+            $combined_output = $combined_bytes;
+            $errors .= "Output from '@command' is not valid UTF-8" . $NEWLINE;
+        }
+
+        # each object file in an archive gets its own File section
+        my @per_files = split(/^(File): (.*)$/m, $combined_output);
+        shift @per_files while @per_files && $per_files[0] ne 'File';
+
+        @per_files = ($combined_output)
+          unless @per_files;
+
+        # Special case - readelf will not prefix the output with "File:
+        # $name" if it only gets one ELF file argument, so act as if it did...
+        # (but it does "the right thing" if passed a static lib >.>)
+        #
+        # - In fact, if readelf always emitted that File: header, we could
+        #   simply use xargs directly on readelf and just parse its output
+        #   in the loop below.
+        if (@per_files == 1) {
+            unshift(@per_files, $file->name);
+            unshift(@per_files, 'File');
+        }
+
+        unless (@per_files % $LINES_PER_FILE == 0) {
+
+            $errors
+              .= "Parsed data from readelf is not a multiple of $LINES_PER_FILE for $file"
+              . $NEWLINE;
+            next;
+        }
+
+        while (defined(my $fixed = shift @per_files)) {
+
+            my $recorded_name = shift @per_files;
+            my $per_file = shift @per_files;
+
+            unless ($fixed eq 'File') {
+                $errors .= "Unknown output from readelf for $file" . $NEWLINE;
+                next;
+            }
+
+            unless (length $recorded_name) {
+                $errors .= "No file name from readelf for $file" . $NEWLINE;
+                next;
+            }
+
+            my ($container, $member) = ($recorded_name =~ /^(.*)\(([^)]+)\)$/);
+
+            $container = $recorded_name
+              unless defined $container && defined $member;
+
+            unless ($container eq $file->name) {
+                $errors
+                  .= "Container not same as file name ($container vs $file)"
+                  . $NEWLINE;
+                next;
+            }
+
+            # ignore empty archives, such as in musl-dev_1.2.1-1_amd64.deb
+            next
+              unless length $per_file;
+
+            my $object_name;
+            if ($recorded_name =~ m{^(?:.+)\(([^/\)]+)\)$}){
+
+                # object file in a static lib.
+                $object_name = $1;
+            }
+
+            parse_per_file($file, $object_name, $per_file);
+        }
+    }
+
+    chdir($savedir)
+      or die encode_utf8(
+        $self->identifier . ": Cannot change to directory $savedir");
+
+    return $errors;
+}
+
+=item parse_per_file
+
+=cut
+
+sub parse_per_file {
+    my ($file, $object_name, $from_readelf) = @_;
+
+    my %by_object;
+
+    $by_object{READELF} = $from_readelf;
+
+    # sometimes there are three blank lines; seen on armhf
+    my @paragraphs = split(/\n{2,}/, $from_readelf);
+
+    for my $paragraph (@paragraphs) {
+
+        my ($first, $bulk) = split(m{\n}, $paragraph, 2);
+
+        if ($first =~ /^ELF Header:/) {
+            elf_header($bulk, \%by_object);
+            next;
+        }
+
+        if ($first =~ /^Program Headers:/) {
+            program_headers($bulk, \%by_object);
+            next;
+        }
+
+        if ($first =~ /^Dynamic section at offset .*:/) {
+            dynamic_section($bulk, \%by_object);
+            next;
+        }
+
+        if ($first =~ /^Section Headers:/) {
+            section_headers($bulk, \%by_object);
+            next;
+        }
+
+        if ($first =~ /^Symbol table '.dynsym'/) {
+            symbol_table($bulk, \%by_object);
+            next;
+        }
+
+        if ($first =~ /^Version symbols section /) {
+            version_symbols($bulk, \%by_object);
+            next;
+        }
+
+        if ($first =~ /^There is no dynamic section in this file/) {
+            # a dynamic section was declared but it's empty.
+            $by_object{'BAD-DYNAMIC-TABLE'} = 1
+              if exists $by_object{PH}{DYNAMIC};
+            next;
+        }
+    }
+
+    my %section_name_by_number;
+    for my $name (keys %{$by_object{SH} // {}}) {
+
+        my $number = $by_object{SH}{$name}{number};
+        $section_name_by_number{$number} = $name;
+    }
+
+    for my $symbol_number (keys %{$by_object{'DYNAMIC-SYMBOLS'}}) {
+
+        my $symbol_name
+          = $by_object{'DYNAMIC-SYMBOLS'}{$symbol_number}{symbol_name};
+        my $section_number
+          = $by_object{'DYNAMIC-SYMBOLS'}{$symbol_number}{section_number};
+
+        my $symbol_version;
+
+        if ($symbol_name =~ m{^ (.*) @ (.*) \s [(] .* [)] $}x) {
+
+            $symbol_name = $1;
+            $symbol_version = $2;
+
+        } else {
+            $symbol_version = $by_object{'SYMBOL-VERSIONS'}{$symbol_number}
+              // $EMPTY;
+
+            if (   $symbol_version eq '*local*'
+                || $symbol_version eq '*global*'){
+
+                if ($section_number eq 'UND') {
+                    $symbol_version = $EMPTY;
+
+                } else {
+                    $symbol_version = 'Base';
+                }
+
+            } elsif ($symbol_version eq '()') {
+                $symbol_version = '(Base)';
+            }
+        }
+
+        # happens once or twice for regular binaries
+        next
+          unless length $symbol_name;
+
+        # look up numbered section
+        my $section_name = $section_name_by_number{$section_number}
+          // $section_number;
+
+        my $symbol = Lintian::Elf::Symbol->new;
+        $symbol->section($section_name);
+        $symbol->version($symbol_version);
+        $symbol->name($symbol_name);
+
+        push(@{ $by_object{SYMBOLS} }, $symbol);
+    }
+
+    if (length $object_name) {
+
+        # object file in a static lib.
+        $file->elf_by_member($object_name, \%by_object);
+
+    } else {
+        $file->elf(\%by_object);
+    }
+
+    return;
+}
+
+=item elf_header
+
+=cut
+
+sub elf_header {
+    my ($text, $by_object) = @_;
+
+    my @lines = split(m{\n}, $text);
+
+    for my $line (@lines) {
+
+        next
+          if divert_error('ELF header', $line, $by_object);
+
+        my ($field, $value) = split(/:/, $line, 2);
+
+        # trim both ends
+        $field =~ s/^\s+|\s+$//g;
+        $value =~ s/^\s+|\s+$//g;
+
+        next
+          unless length $field && length $value;
+
+        $by_object->{'ELF-HEADER'}{$field} = $value;
+    }
+
+    return;
+}
+
+=item program_headers
+
+=cut
+
+sub program_headers {
+    my ($text, $by_object) = @_;
+
+    my @lines = split(m{\n}, $text);
+
+    while (defined(my $line = shift @lines)) {
+
+        next
+          if divert_error('program headers', $line, $by_object);
+
+        if ($line =~ m{^ \s* (\S+) \s* (?:(?:\S+\s+){4}) \S+ \s (...) }x) {
+
+            my $header = $1;
+            my $flags = $2;
+
+            $header =~ s/^GNU_//g;
+
+            next
+              if $header eq 'Type';
+
+            my $newflags = $EMPTY;
+            $newflags .= ($flags =~ /R/) ? 'r' : $HYPHEN;
+            $newflags .= ($flags =~ /W/) ? 'w' : $HYPHEN;
+            $newflags .= ($flags =~ /E/) ? 'x' : $HYPHEN;
+
+            $by_object->{PH}{$header}{flags} = $newflags;
+
+            if ($header eq 'INTERP' && @lines) {
+                # Check if the next line is the "requesting an interpreter"
+                # (readelf appears to always emit on the next line if at all)
+                my $next_line = $lines[0];
+
+                if ($next_line
+                    =~ m{ [[] Requesting \s program \s interpreter: \s ([^\]]+) []] }x
+                ){
+
+                    my $interpreter = $1;
+
+                    $by_object->{INTERP} = $interpreter;
+
+                    # discard line
+                    shift @lines;
+                }
+            }
+        }
+    }
+
+    return;
+}
+
+=item dynamic_section
+
+=cut
+
+sub dynamic_section {
+    my ($text, $by_object) = @_;
+
+    my @lines = split(m{\n}, $text);
+
+    while (defined(my $line = shift @lines)) {
+
+        next
+          if divert_error('dynamic section', $line, $by_object);
+
+        if ($line
+            =~ m{^ \s* 0x (?:[0-9A-F]+) \s+ [(] (.*?) [)] \s+ ([\x21-\x7f][\x20-\x7f]*) \Z}ix
+        ) {
+
+            my $type = $1;
+            my $remainder = $2;
+
+            my $keep = 0;
+
+            if ($type eq 'RPATH' || $type eq 'RUNPATH') {
+                $remainder =~ s{^ .* [[] }{}x;
+                $remainder =~ s{ []] \s* $}{}x;
+                $keep = 1;
+
+            } elsif ($type eq 'TEXTREL' || $type eq 'DEBUG') {
+                $keep = 1;
+
+            } elsif ($type eq 'FLAGS_1') {
+                # Will contain "NOW" if the binary was built with -Wl,-z,now
+                $remainder =~ s/^Flags:\s*//i;
+                $keep = 1;
+
+            } elsif (($type eq 'FLAGS' && $remainder =~ m/\bBIND_NOW\b/)
+                || $type eq 'BIND_NOW') {
+
+                # Variants of bindnow
+                $type = 'FLAGS_1';
+                $remainder = 'NOW';
+                $keep = 1;
+            }
+
+            $keep = 1
+              if $remainder
+              =~ s{^ (?: Shared \s library | Library \s soname ) : \s [[] (.*) []] }{$1}x;
+
+            next
+              unless $keep;
+
+            # Here we just need RPATH and NEEDS, so ignore the rest for now
+            if ($type eq 'RPATH' || $type eq 'RUNPATH') {
+
+                # RPATH is like PATH
+                my @components = split(/:/, $remainder);
+                $by_object->{$type}{$_} = 1 for @components;
+
+            } elsif ($type eq 'NEEDED' || $type eq 'SONAME') {
+                push(@{ $by_object->{$type} }, $remainder);
+
+            } elsif ($type eq 'TEXTREL' || $type eq 'DEBUG') {
+                $by_object->{$type} = 1;
+
+            } elsif ($type eq 'FLAGS_1') {
+
+                my @flags = split(/\s+/, $remainder);
+                $by_object->{$type}{$_} = 1 for @flags;
+            }
+        }
+    }
+
+    return;
+}
+
+=item section_headers
+
+=cut
+
+sub section_headers {
+    my ($text, $by_object) = @_;
+
+    const my $TOTAL_FIELDS => 11;
+
+    my @lines = split(m{\n}, $text);
+
+    die 'No column labels.'
+      unless @lines;
+
+    my $first = shift @lines;
+
+    my %labels_by_column;
+
+    my $column = 1;
+    for my $label (split($SPACE, $first)) {
+
+        $label =~ s{^ [[] }{}x;
+        $label =~ s{ []] $}{}x;
+
+        $labels_by_column{$column} = $label;
+
+    } continue {
+        ++$column;
+    }
+
+    die 'Not enough column labels.'
+      if keys %labels_by_column != $TOTAL_FIELDS;
+
+    my $row = 1;
+    while (defined(my $line = shift @lines)) {
+
+        next
+          if divert_error('section headers', $line, $by_object);
+
+        last
+          if $line =~ /^Key to Flags:/;
+
+        my %section_header;
+
+        my @matches = (
+            $line =~ m{^ \s*
+                         [[] \s* (\S+) []] \s    # Nr
+                         (\S+)?      \s+         # Name
+                         (\S+)       \s+         # Type
+                         ([0-9a-f]+) \s          # Address/Addr
+                         ([0-9a-f]+) \s          # Off
+                         ([0-9a-f]+) \s          # Size
+                         (\S+)       \s+         # ES
+                         (\S+)?      \s+         # Flg
+                         (\S+)       \s+         # Lk
+                         (\S+)       \s+         # Inf
+                         (\S+)                   # Al
+                       $}x
+        );
+
+        if (@matches != $TOTAL_FIELDS) {
+
+            warn "Parse error in readelf section headers [row $row]: $line";
+            next;
+        }
+
+        for my $column (keys %labels_by_column) {
+
+            my $label = $labels_by_column{$column};
+            my $value = $matches[$column -1] // $EMPTY;
+
+            $section_header{$label} = $value;
+        }
+
+        # http://sco.com/developers/gabi/latest/ch4.sheader.html
+        my $section = Lintian::Elf::Section->new;
+        $section->number($section_header{Nr});
+        $section->name($section_header{Name});
+        $section->type($section_header{Type});
+
+        # readelf uses both
+        $section->address(
+            hex($section_header{Address} // $section_header{Addr}));
+        $section->offset(hex($section_header{Off}));
+        $section->size(hex($section_header{Size}));
+        $section->entry_size(hex($section_header{ES}));
+        $section->flags($section_header{Flg});
+        $section->index_link(hex($section_header{Lk}));
+        $section->index_info(hex($section_header{Inf}));
+        $section->alignment(hex($section_header{Al}));
+
+        die 'No section number.'
+          unless length $section->number;
+
+        $by_object->{'SECTION-HEADERS'}{$section->number} = $section;
+
+    } continue {
+        ++$row;
+    }
+
+    return;
+}
+
+=item symbol_table
+
+=cut
+
+sub symbol_table {
+    my ($text, $by_object) = @_;
+
+    # We (sometimes) need to read the "Version symbols section" first to
+    # use this data and readelf tends to print after this section, so
+    # save for later.
+
+    my @lines = split(m{\n}, $text);
+
+    while (defined(my $line = shift @lines)) {
+
+        next
+          if divert_error('symbol table', $line, $by_object);
+
+        if ($line
+            =~ m{^ \s* (\d+) : \s* [0-9a-f]+ \s+ \d+ \s+ (?:(?:\S+\s+){3}) (?: [[] .* []] \s+)? (\S+) \s+ (.*) \Z}x
+        ) {
+
+            my $symbol_number = $1;
+            my $section_number = $2;
+            my $symbol_name = $3;
+
+            $by_object->{'DYNAMIC-SYMBOLS'}{$symbol_number}{section_number}
+              = $section_number;
+            $by_object->{'DYNAMIC-SYMBOLS'}{$symbol_number}{symbol_name}
+              = $symbol_name;
+        }
+    }
+
+    return;
+}
+
+=item version_symbols
+
+=cut
+
+sub version_symbols {
+    my ($text, $by_object) = @_;
+
+    my @lines = split(m{\n}, $text);
+
+    while (defined(my $line = shift @lines)) {
+
+        next
+          if divert_error('version symbols', $line, $by_object);
+
+        if ($line
+            =~ m{^ \s* [0-9a-f]+ : \s* \S+ \s* (?: [(] \S+ [)] )? (?: \s | \Z ) }xi
+        ){
+
+            while ($line
+                =~ m{ ([0-9a-f]+ h?) \s* (?: [(] (\S+) [)] )? (?: \s | \Z ) }cgix
+            ) {
+
+                my $symbol_number = $1;
+                my $symbol_version = $2;
+
+                # for libfuse2_2.9.9-3_amd64.deb
+                next
+                  unless length $symbol_version;
+
+                $symbol_version = "($symbol_version)"
+                  if $symbol_number =~ m{ h $}x;
+
+                $by_object->{'SYMBOL-VERSIONS'}{$symbol_number}
+                  = $symbol_version;
+            }
+        }
+    }
+
+    return;
+}
+
+=item divert_error
+
+=cut
+
+sub divert_error {
+    my ($section, $line, $by_object) = @_;
+
+    return 0
+      unless $line =~ s{^ readelf: \s+ }{}x;
+
+    if ($line =~ s{^ Error: \s+ }{}x) {
+
+        my $message = "In $section: $line";
+
+        $by_object->{ERRORS} //= [];
+        push(@{$by_object->{ERRORS}}, $message);
+
+        return 1;
+    }
+
+    if ($line =~ s{^ Warning: \s+ }{}x) {
+
+        my $message = "In $section: $line";
+
+        $by_object->{WARNINGS} //= [];
+        push(@{$by_object->{WARNINGS}}, $message);
+
+        return 1;
+    }
+
+    return 0;
+}
+
+=back
+
+=head1 AUTHOR
+
+Originally written by Felix Lechner <felix.lechner@lease-up.com> for
+Lintian.
+
+=head1 SEE ALSO
+
+lintian(1)
+
+=cut
+
+1;
+
+# Local Variables:
+# indent-tabs-mode: nil
+# cperl-indent-level: 4
+# End:
+# vim: syntax=perl sw=4 sts=4 sr et
diff --git a/lib/Lintian/Index/FileTypes.pm b/lib/Lintian/Index/FileTypes.pm
new file mode 100644
index 0000000..a71efb3
--- /dev/null
+++ b/lib/Lintian/Index/FileTypes.pm
@@ -0,0 +1,195 @@
+# -*- perl -*- Lintian::Index::FileTypes
+#
+# Copyright (C) 2020-2021 Felix Lechner
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program.  If not, see <http://www.gnu.org/licenses/>.
+
+package Lintian::Index::FileTypes;
+
+use v5.20;
+use warnings;
+use utf8;
+
+use Const::Fast;
+use Cwd;
+use Unicode::UTF8 qw(encode_utf8 decode_utf8);
+
+use Lintian::IPC::Run3 qw(xargs);
+
+use Moo::Role;
+use namespace::clean;
+
+const my $EMPTY => q{};
+const my $SPACE => q{ };
+const my $COMMA => q{,};
+const my $NEWLINE => qq{\n};
+
+const my $KEEP_EMPTY_FIELDS => -1;
+const my $GZIP_MAGIC_SIZE => 9;
+const my $GZIP_MAGIC_BYTES => 0x1f8b;
+
+=head1 NAME
+
+Lintian::Index::FileTypes - determine file type via magic.
+
+=head1 SYNOPSIS
+
+    use Lintian::Index;
+
+=head1 DESCRIPTION
+
+Lintian::Index::FileTypes determine file type via magic.
+
+=head1 INSTANCE METHODS
+
+=over 4
+
+=item add_file_types
+
+=cut
+
+sub add_file_types {
+    my ($self) = @_;
+
+    my $savedir = getcwd;
+    chdir $self->basedir
+      or die encode_utf8(
+        $self->identifier . ': Cannot change to directory ' . $self->basedir);
+
+    my $errors = $EMPTY;
+
+    my @files = grep { $_->is_file } @{$self->sorted_list};
+    my @names = map { $_->name } @files;
+
+    my @command = qw(file --raw --no-pad --print0 --print0 --);
+
+    my %file_types;
+
+    xargs(
+        \@command,
+        \@names,
+        sub {
+            my ($stdout, $stderr, $status, @partial) = @_;
+
+            # ignore failures if possible; file returns non-zero and
+            # "ERROR" on parse errors but output is still usable
+
+            # undecoded split allows names with non UTF-8 bytes
+            $stdout =~ s{ \0 $}{}x;
+
+            my @lines = split(m{\0}, $stdout, $KEEP_EMPTY_FIELDS);
+
+            unless (@lines % 2 == 0) {
+                $errors
+                  .= 'Did not get an even number lines from file command.'
+                  . $NEWLINE;
+                return;
+            }
+
+            while (defined(my $path = shift @lines)) {
+
+                my $type = shift @lines;
+
+                unless (length $path && length $type) {
+                    $errors
+                      .= "syntax error in file-info output: '$path' '$type'"
+                      . $NEWLINE;
+                    next;
+                }
+
+                # drop relative prefix, if present
+                $path =~ s{^ [.]/ }{}x;
+
+                $file_types{$path} = $self->adjust_type($path, $type);
+            }
+
+            return;
+        }
+    );
+
+    $_->file_type($file_types{$_->name}) for @files;
+
+    chdir $savedir
+      or die encode_utf8(
+        $self->identifier . ": Cannot change to directory $savedir");
+
+    return $errors;
+}
+
+=item adjust_type
+
+=cut
+
+# some files need to be corrected
+sub adjust_type {
+    my ($self, $name, $file_type) = @_;
+
+    if ($name =~ m{ [.]gz $}ix && $file_type !~ /compressed/) {
+
+        my $item = $self->lookup($name);
+
+        die encode_utf8("Cannot find file $name in index")
+          unless $item;
+
+        my $buffer = $item->magic($GZIP_MAGIC_SIZE);
+        if (length $buffer) {
+
+            # translation of the unpack
+            #  nn nn ,  NN NN NN NN, nn nn, cc     - bytes read
+            #  $magic,  __ __ __ __, __ __, $comp  - variables
+            my ($magic, undef, undef, $compression) = unpack('nNnc', $buffer);
+
+            # gzip file magic
+            if ($magic == $GZIP_MAGIC_BYTES) {
+
+                my $augment = 'gzip compressed data';
+
+                # 2 for max compression; RFC1952 suggests this is a
+                # flag and not a value, hence bit operation
+                $augment .= $COMMA . $SPACE . 'max compression'
+                  if $compression & 2;
+
+                return $file_type . $COMMA . $SPACE . $augment;
+            }
+        }
+    }
+
+    # some TFMs are categorized as gzip, see Bug#963589
+    return 'data'
+      if $name =~ m{ [.]tfm $}ix
+      && $file_type =~ /gzip compressed data/;
+
+    return $file_type;
+}
+
+=back
+
+=head1 AUTHOR
+
+Originally written by Felix Lechner <felix.lechner@lease-up.com> for
+Lintian.
+
+=head1 SEE ALSO
+
+lintian(1)
+
+=cut
+
+1;
+
+# Local Variables:
+# indent-tabs-mode: nil
+# cperl-indent-level: 4
+# End:
+# vim: syntax=perl sw=4 sts=4 sr et
diff --git a/lib/Lintian/Index/Item.pm b/lib/Lintian/Index/Item.pm
new file mode 100644
index 0000000..7ef6c09
--- /dev/null
+++ b/lib/Lintian/Index/Item.pm
@@ -0,0 +1,1567 @@
+# -*- perl -*-
+# Lintian::Index::Item -- Representation of path entry in a package
+#
+# Copyright (C) 2011 Niels Thykier
+# Copyright (C) 2020 Felix Lechner
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program.  If not, see <http://www.gnu.org/licenses/>.
+
+package Lintian::Index::Item;
+
+use v5.20;
+use warnings;
+use utf8;
+use autodie qw(open);
+
+use Carp qw(croak confess);
+use Const::Fast;
+use Date::Parse qw(str2time);
+use List::SomeUtils qw(all);
+use Path::Tiny;
+use Syntax::Keyword::Try;
+use Text::Balanced qw(extract_delimited);
+use Unicode::UTF8 qw(valid_utf8 decode_utf8 encode_utf8);
+
+use Lintian::Pointer::Item;
+use Lintian::SlidingWindow;
+use Lintian::Util qw(normalize_link_target);
+
+use Moo;
+use namespace::clean;
+
+use constant {
+    TYPE_FILE      => 0x00_01_00_00,
+    TYPE_HARDLINK  => 0x00_02_00_00,
+    TYPE_DIR       => 0x00_04_00_00,
+    TYPE_SYMLINK   => 0x00_08_00_00,
+    TYPE_BLOCK_DEV => 0x00_10_00_00,
+    TYPE_CHAR_DEV  => 0x00_20_00_00,
+    TYPE_PIPE      => 0x00_40_00_00,
+    TYPE_OTHER     => 0x00_80_00_00,
+    TYPE_MASK      => 0x00_ff_00_00,
+
+    UNSAFE_PATH    => 0x01_00_00_00,
+    FS_PATH_IS_OK  => 0x02_00_00_00,
+    OPEN_IS_OK     => 0x06_00_00_00, # Implies FS_PATH_IS_OK
+    ACCESS_INFO    => 0x07_00_00_00,
+    # 0o6777 == 0xdff, which covers set[ug]id + sticky bit.  Accordingly,
+    # 0xffff should be more than sufficient for the foreseeable future.
+    OPERM_MASK     => 0x00_00_ff_ff,
+};
+
+use overload (
+    q{""} => \&_as_string,
+    'qr' => \&_as_regex_ref,
+    'bool' => \&_bool,
+    q{!} => \&_bool_not,
+    q{.} => \&_str_concat,
+    'cmp' => \&_str_cmp,
+    'eq' => \&_str_eq,
+    'ne' => \&_str_ne,
+    'fallback' => 0,
+);
+
+const my $EMPTY => q{};
+const my $SPACE => q{ };
+const my $SLASH => q{/};
+const my $DOT => q{.};
+const my $DOUBLE_DOT => q{..};
+const my $DOUBLE_QUOTE => q{"};
+const my $BACKSLASH => q{\\};
+const my $HASHBANG => q{#!};
+
+const my $MAXIMUM_LINK_DEPTH => 18;
+
+const my $BYTE_MAXIMUM => 255;
+const my $SINGLE_OCTAL_MASK => oct(7);
+const my $DUAL_OCTAL_MASK => oct(77);
+
+const my $ELF_MAGIC_SIZE => 4;
+const my $LINCITY_MAGIC_SIZE => 6;
+const my $SHELL_SCRIPT_MAGIC_SIZE => 2;
+
+const my $READ_BITS => oct(444);
+const my $WRITE_BITS => oct(222);
+const my $EXECUTABLE_BITS => oct(111);
+
+const my $SETUID => oct(4000);
+const my $SETGID => oct(2000);
+
+=head1 NAME
+
+Lintian::Index::Item - Lintian representation of a path entry in a package
+
+=head1 SYNOPSIS
+
+    my ($name, $type, $dir) = ('lintian', 'source', '/path/to/entry');
+
+=head1 INSTANCE METHODS
+
+=over 4
+
+=item init_from_tar_output
+
+=item get_quoted_filename
+
+=item unescape_c_style
+
+=cut
+
+my $datepattern = qr/\d{4}-\d{2}-\d{2}/;
+my $timepattern = qr/\d{2}\:\d{2}(?:\:\d{2}(?:\.\d+)?)?/;
+my $symlinkpattern = qr/\s+->\s+/;
+my $hardlinkpattern = qr/\s+link\s+to\s+/;
+
+# adapted from https://www.perlmonks.org/?node_id=1056606
+my %T = (
+    (map {chr() => chr} 0..$BYTE_MAXIMUM),
+    (map {sprintf('%o',$_) => chr} 0..($BYTE_MAXIMUM & $SINGLE_OCTAL_MASK)),
+    (map {sprintf('%02o',$_) => chr} 0..($BYTE_MAXIMUM & $DUAL_OCTAL_MASK)),
+    (map {sprintf('%03o',$_) => chr} 0..$BYTE_MAXIMUM),
+    (split //, "r\rn\nb\ba\af\ft\tv\013")
+);
+
+sub unescape_c_style {
+    my ($escaped) = @_;
+
+    (my $result = $escaped) =~ s/\\([0-7]{1,3}|.)/$T{$1}/g;
+
+    return $result;
+}
+
+sub get_quoted_filename {
+    my ($unknown, $skip) = @_;
+
+    # extract quoted file name
+    my ($delimited, $extra)
+      = extract_delimited($unknown, $DOUBLE_QUOTE, $skip, $BACKSLASH);
+
+    return (undef, undef)
+      unless defined $delimited;
+
+    # drop quotes
+    my $cstylename = substr($delimited, 1, (length $delimited) - 2);
+
+    # convert c-style escapes
+    my $name = unescape_c_style($cstylename);
+
+    return ($name, $extra);
+}
+
+sub init_from_tar_output {
+    my ($self, $line) = @_;
+
+    chomp $line;
+
+    # allow spaces in ownership and filenames (#895175 and #950589)
+
+    my ($initial, $size, $date, $time, $remainder)
+      = split(/\s+(\d+)\s+($datepattern)\s+($timepattern)\s+/, $line,2);
+
+    die encode_utf8(
+        $self->index->identifier . ": Cannot parse tar output: $line")
+      unless all { defined } ($initial, $size, $date, $time, $remainder);
+
+    $self->size($size);
+    $self->date($date);
+    $self->time($time);
+
+    my ($permissions, $ownership) = split(/\s+/, $initial, 2);
+    die encode_utf8($self->index->identifier
+          .": Cannot parse permissions and ownership in tar output: $line")
+      unless all { defined } ($permissions, $ownership);
+
+    $self->perm($permissions);
+
+    my ($owner, $group) = split(qr{/}, $ownership, 2);
+    die encode_utf8($self->index->identifier
+          . ": Cannot parse owner and group in tar output: $line")
+      unless all { defined } ($owner, $group);
+
+    $self->owner($owner);
+    $self->group($group);
+
+    my ($name, $extra) = get_quoted_filename($remainder, $EMPTY);
+    die encode_utf8($self->index->identifier
+          . ": Cannot parse file name in tar output: $line")
+      unless all { defined } ($name, $extra);
+
+    # strip relative prefix
+    $name =~ s{^\./+}{}s;
+
+    # slashes cannot appear in names but are sometimes doubled
+    # as in emboss-explorer_2.2.0-10.dsc
+    # better implemented in a Moo trigger on the attribute
+    $name =~ s{/+}{/}g;
+
+    # make sure directories end with a slash, except root
+    $name .= $SLASH
+      if length $name
+      && $self->perm =~ / ^d /msx
+      && $name !~ m{ /$ }msx;
+
+    $self->name($name);
+
+    # look for symbolic link target
+    if ($self->perm =~ /^l/) {
+
+        my ($linktarget, undef) = get_quoted_filename($extra, $symlinkpattern);
+        die encode_utf8($self->index->identifier
+              .": Cannot parse symbolic link target in tar output: $line")
+          unless defined $linktarget;
+
+        # do not remove multiple slashes from symlink targets
+        # caught by symlink-has-double-slash, which is tested
+        # leaves resolution of these links unsolved
+
+        # do not strip relative prefix for symbolic links
+        $self->link($linktarget);
+    }
+
+    # look for hard link target
+    if ($self->perm =~ /^h/) {
+
+        my ($linktarget, undef)= get_quoted_filename($extra, $hardlinkpattern);
+        die encode_utf8($self->index->identifier
+              . ": Cannot parse hard link target in tar output: $line")
+          unless defined $linktarget;
+
+        # strip relative prefix
+        $linktarget =~ s{^\./+}{}s;
+
+        # slashes cannot appear in names but are sometimes doubled
+        # as in emboss-explorer_2.2.0-10.dsc
+        # better implemented in a Moo trigger on the attribute, but requires
+        # separate attributes for hard and symbolic link targets
+        $linktarget =~ s{/+}{/}g;
+
+        $self->link($linktarget);
+    }
+
+    return;
+}
+
+=item bytes_match(REGEX)
+
+Returns the matched string if REGEX matches the file's byte contents,
+or $EMPTY otherwise.
+
+=cut
+
+sub bytes_match {
+    my ($self, $regex) = @_;
+
+    return $EMPTY
+      unless $self->is_file;
+
+    return $EMPTY
+      unless $self->is_open_ok;
+
+    return $EMPTY
+      unless length $regex;
+
+    open(my $fd, '<:raw', $self->unpacked_path);
+    my $sfd = Lintian::SlidingWindow->new;
+    $sfd->handle($fd);
+
+    my $match;
+    while (my $block = $sfd->readwindow) {
+
+        if ($block =~ /($regex)/) {
+
+            $match = $1;
+            last;
+        }
+    }
+
+    close $fd;
+
+    return $match // $EMPTY;
+}
+
+=item mentions_in_operation(REGEX)
+
+Returns the matched string if REGEX matches in a file location
+that is likely an operation (vs text), or $EMPTY otherwise.
+
+=cut
+
+sub mentions_in_operation {
+    my ($self, $regex) = @_;
+
+    # prefer strings(1) output (eg. for ELF) if we have it
+    # may not work as expected on ELF due to ld's SHF_MERGE
+    my $match;
+    if (length $self->strings && $self->strings =~ /($regex)/) {
+        $match = $1;
+
+    } elsif ($self->is_script) {
+        $match = $self->bytes_match($regex);
+    }
+
+    return $match // $EMPTY;
+}
+
+=item magic(COUNT)
+
+Returns the specified COUNT of magic bytes for the file.
+
+=cut
+
+sub magic {
+    my ($self, $count) = @_;
+
+    return $EMPTY
+      if length $self->link;
+
+    return $EMPTY
+      if $self->size < $count;
+
+    return $EMPTY
+      unless $self->is_open_ok;
+
+    my $magic;
+
+    open(my $fd, '<', $self->unpacked_path);
+    die encode_utf8($self->index->identifier
+          . ": Could not read $count bytes from "
+          . $self->name)
+      unless read($fd, $magic, $count) == $count;
+    close $fd;
+
+    return $magic;
+}
+
+=item C<hashbang>
+
+Returns the C<hashbang> for the file if it is a script.
+
+=cut
+
+has hashbang => (
+    is => 'rw',
+    lazy => 1,
+    default => sub {
+        my ($self) = @_;
+
+        return $EMPTY
+          unless $self->is_script;
+
+        my $trimmed_bytes = $EMPTY;
+        my $magic;
+
+        open(my $fd, '<', $self->unpacked_path);
+        if (read($fd, $magic, 2) && $magic eq $HASHBANG && !eof($fd)) {
+            $trimmed_bytes = <$fd>;
+        }
+        close $fd;
+
+        # decoding UTF-8 fails on magyarispell_1.6.1-2.dsc and ldc_1.24.0-1.dsc
+
+        # remove comment, if any
+        $trimmed_bytes =~ s/^([^#]*)/$1/;
+
+        # trim both ends
+        $trimmed_bytes =~ s/^\s+|\s+$//g;
+
+        return $trimmed_bytes;
+    }
+);
+
+=item interpreter_with_options
+
+Returns the interpreter requested by a script with options
+after stripping C<env>.
+
+=cut
+
+has interpreter_with_options => (
+    is => 'rw',
+    lazy => 1,
+    default => sub {
+        my ($self) = @_;
+
+        my $with_options = $self->hashbang;
+
+        $with_options =~ s{^/usr/bin/env\s+}{};
+
+        return $with_options;
+    }
+);
+
+=item interpreter
+
+Returns the interpreter requested by a script but strips C<env>.
+
+=cut
+
+has interpreter => (
+    is => 'rw',
+    lazy => 1,
+    default => sub {
+        my ($self) = @_;
+
+        my $interpreter = $self->interpreter_with_options;
+
+        # keep base command without options
+        $interpreter =~ s/^(\S+).*/$1/;
+
+        return $interpreter;
+    }
+);
+
+=item C<calls_env>
+
+Returns true if file is a script that calls C<env>.
+
+=cut
+
+has calls_env => (
+    is => 'rw',
+    lazy => 1,
+    default => sub {
+        my ($self) = @_;
+
+        # must return a boolean success value #943724
+        return 1
+          if $self->hashbang =~ m{^/usr/bin/env\s+};
+
+        return 0;
+    }
+);
+
+=item C<is_shell_script>
+
+Returns true if file is a script requesting a recognized shell
+interpreter.
+
+=cut
+
+has is_shell_script => (
+    is => 'rw',
+    lazy => 1,
+    default => sub {
+        my ($self) = @_;
+
+        my $interpreter = $self->interpreter;
+
+        # keep basename
+        my ($basename) = ($interpreter =~ m{([^/]*)/?$}s);
+
+        return 1
+          if $basename =~ /^(?:[bd]?a|t?c|(?:pd|m)?k|z)?sh$/;
+
+        return 0;
+    }
+);
+
+=item is_elf
+
+Returns true if file is an ELF executable, and false otherwise.
+
+=cut
+
+has is_elf => (
+    is => 'rw',
+    lazy => 1,
+    default => sub {
+        my ($self) = @_;
+
+        return 1
+          if $self->magic($ELF_MAGIC_SIZE) eq "\x7FELF";
+
+        return 0;
+    }
+);
+
+=item is_script
+
+Returns true if file is a script and false otherwise.
+
+=cut
+
+has is_script => (
+    is => 'rw',
+    lazy => 1,
+    default => sub {
+        my ($self) = @_;
+
+        # skip lincity data files; magic: #!#!#!
+        return 0
+          if $self->magic($LINCITY_MAGIC_SIZE) eq '#!#!#!';
+
+        return 0
+          unless $self->magic($SHELL_SCRIPT_MAGIC_SIZE) eq $HASHBANG;
+
+        return 1;
+    }
+);
+
+=item is_maintainer_script
+
+Returns true if file is a maintainer script and false otherwise.
+
+=cut
+
+has is_maintainer_script => (
+    is => 'rw',
+    lazy => 1,
+    default => sub {
+        my ($self) = @_;
+
+        return 1
+          if $self->name =~ /^ config | (?:pre|post)(?:inst|rm) $/x
+          && $self->is_open_ok;
+
+        return 0;
+    }
+);
+
+=item identity
+
+Returns the owner and group of the path, separated by a slash.
+
+NB: If only numerical owner information is available in the package,
+this may return a numerical owner (except uid 0 is always mapped to
+"root")
+
+=cut
+
+sub identity {
+    my ($self) = @_;
+
+    return $self->owner . $SLASH . $self->group;
+}
+
+=item operm
+
+Returns the file permissions of this object in octal (e.g. 0644).
+
+NB: This is only well defined for file entries that are subject to
+permissions (e.g. files).  Particularly, the value is not well defined
+for symlinks.
+
+=cut
+
+sub operm {
+    my ($self) = @_;
+
+    return $self->path_info & OPERM_MASK;
+}
+
+=item octal_permissions
+
+=cut
+
+sub octal_permissions {
+    my ($self) = @_;
+
+    return sprintf('%04o', $self->operm);
+}
+
+=item children
+
+Returns a list of children (as Lintian::File::Path objects) of this entry.
+The list and its contents should not be modified.
+
+Only returns direct children of this directory.  The entries are sorted by name.
+
+NB: Returns the empty list for non-dir entries.
+
+=cut
+
+sub children {
+    my ($self) = @_;
+
+    croak encode_utf8('No index in ' . $self->name)
+      unless defined $self->index;
+
+    my @names = values %{$self->childnames};
+
+    return map { $self->index->lookup($_) } @names;
+}
+
+=item descendants
+
+Returns a list of children (as Lintian::File::Path objects) of this entry.
+The list and its contents should not be modified.
+
+Descends recursively into subdirectories and return the descendants in
+breadth-first order.  Children of a given directory will be sorted by
+name.
+
+NB: Returns the empty list for non-dir entries.
+
+=cut
+
+sub descendants {
+    my ($self) = @_;
+
+    my @descendants = $self->children;
+
+    my @directories = grep { $_->is_dir } @descendants;
+    push(@descendants, $_->descendants) for @directories;
+
+    return @descendants;
+}
+
+=item timestamp
+
+Returns a Unix timestamp for the given path. This is a number of
+seconds since the start of Unix epoch in UTC.
+
+=cut
+
+sub timestamp {
+    my ($self) = @_;
+
+    my $timestamp = $self->date . $SPACE . $self->time;
+
+    return str2time($timestamp, 'GMT');
+}
+
+=item child(BASENAME)
+
+Returns the child named BASENAME if it is a child of this directory.
+Otherwise, this method returns C<undef>.
+
+Even for directories, BASENAME should not end with a slash.
+
+When invoked on non-dirs, this method always returns C<undef>.
+
+Example:
+
+  $dir_entry->child('foo') => $entry OR undef
+
+=cut
+
+sub child {
+    my ($self, $basename) = @_;
+
+    croak encode_utf8('No index in ' . $self->name)
+      unless defined $self->index;
+
+    croak encode_utf8($self->index->identifier . ': Basename is required')
+      unless length $basename;
+
+    my $childname = $self->childnames->{$basename};
+    return undef
+      unless $childname;
+
+    return $self->index->lookup($childname);
+}
+
+=item is_symlink
+
+Returns a truth value if this entry is a symlink.
+
+=item is_hardlink
+
+Returns a truth value if this entry is a hardlink to a regular file.
+
+NB: The target of a hardlink is always a regular file (and not a dir etc.).
+
+=item is_dir
+
+Returns a truth value if this entry is a dir.
+
+NB: Unlike the "-d $dir" operator this will never return true for
+symlinks, even if the symlink points to a dir.
+
+=item is_file
+
+Returns a truth value if this entry is a regular file (or a hardlink to one).
+
+NB: Unlike the "-f $file" operator this will never return true for
+symlinks, even if the symlink points to a file (or hardlink).
+
+=item is_regular_file
+
+Returns a truth value if this entry is a regular file.
+
+This is eqv. to $path->is_file and not $path->is_hardlink.
+
+NB: Unlike the "-f $file" operator this will never return true for
+symlinks, even if the symlink points to a file.
+
+=cut
+
+sub is_symlink {
+    my ($self) = @_;
+
+    return $self->path_info & TYPE_SYMLINK ? 1 : 0;
+}
+
+sub is_hardlink {
+    my ($self) = @_;
+
+    return $self->path_info & TYPE_HARDLINK ? 1 : 0;
+}
+
+sub is_dir {
+    my ($self) = @_;
+
+    return $self->path_info & TYPE_DIR ? 1 : 0;
+}
+
+sub is_file {
+    my ($self) = @_;
+
+    return $self->path_info & (TYPE_FILE | TYPE_HARDLINK) ? 1 : 0;
+}
+
+sub is_regular_file {
+    my ($self) = @_;
+
+    return $self->path_info & TYPE_FILE ? 1 : 0;
+}
+
+=item link_normalized
+
+Returns the target of the link normalized against it's directory name.
+If the link cannot be normalized or normalized path might escape the
+package root, this method returns C<undef>.
+
+NB: This method will return the empty string for links pointing to the
+root dir of the package.
+
+Only available on "links" (i.e. symlinks or hardlinks).  On non-links
+this will croak.
+
+I<Symlinks only>: If you want the symlink target as a L<Lintian::File::Path>
+object, use the L<resolve_path|/resolve_path([PATH])> method with no
+arguments instead.
+
+=cut
+
+has link_normalized => (
+    is => 'rw',
+    lazy => 1,
+    default => sub {
+        my ($self) = @_;
+
+        my $name = $self->name;
+        my $link = $self->link;
+
+        croak encode_utf8($self->index->identifier . ": $name is not a link")
+          unless length $link;
+
+        my $dir = $self->dirname;
+
+        # hardlinks are always relative to the package root
+        $dir = $SLASH
+          if $self->is_hardlink;
+
+        my $target = normalize_link_target($dir, $link);
+
+        return $target;
+    }
+);
+
+=item is_readable
+
+Returns a truth value if the permission bits of this entry have
+at least one bit denoting readability set (bitmask 0444).
+
+=item is_writable
+
+Returns a truth value if the permission bits of this entry have
+at least one bit denoting writability set (bitmask 0222).
+
+=item is_executable
+
+Returns a truth value if the permission bits of this entry have
+at least one bit denoting executability set (bitmask 0111).
+
+=cut
+
+sub is_readable   {
+    my ($self) = @_;
+
+    return $self->path_info & $READ_BITS;
+}
+
+sub is_writable   {
+    my ($self) = @_;
+
+    return $self->path_info & $WRITE_BITS;
+}
+
+sub is_executable {
+    my ($self) = @_;
+
+    return $self->path_info & $EXECUTABLE_BITS;
+}
+
+=item all_bits_set
+
+=cut
+
+sub all_bits_set {
+    my ($self, $bits) = @_;
+
+    return ($self->operm & $bits) == $bits;
+}
+
+=item is_setuid
+
+=cut
+
+sub is_setuid {
+    my ($self) = @_;
+
+    return $self->operm & $SETUID;
+}
+
+=item is_setgid
+
+=cut
+
+sub is_setgid {
+    my ($self) = @_;
+
+    return $self->operm & $SETGID;
+}
+
+=item unpacked_path
+
+Returns the path to this object on the file system, which must be a
+regular file, a hardlink or a directory.
+
+This method may fail if:
+
+=over 4
+
+=item * The object is neither a directory or a file-like object (e.g. a
+named pipe).
+
+=item * If the object is dangling symlink or the path traverses a symlink
+outside the package root.
+
+=back
+
+To test if this is safe to call, if the target is (supposed) to be a:
+
+=over 4
+
+=item * file or hardlink then test with L</is_open_ok>.
+
+=item * dir then assert L<resolve_path|/resolve_path([PATH])> returns a
+defined entry, for which L</is_dir> returns a truth value.
+
+=back
+
+=cut
+
+sub unpacked_path {
+    my ($self) = @_;
+
+    $self->_check_access;
+
+    croak encode_utf8('No index in ' . $self->name)
+      unless defined $self->index;
+
+    my $basedir = $self->index->basedir;
+
+    croak encode_utf8($self->index->identifier . ': No base directory')
+      unless length $basedir;
+
+    my $unpacked = path($basedir)->child($self->name)->stringify;
+
+    # bug in perl, file operator should not care but does
+    # https://github.com/Perl/perl5/issues/10550
+    # also, https://github.com/Perl/perl5/issues/9674
+    utf8::downgrade $unpacked;
+
+    return $unpacked;
+}
+
+=item is_open_ok
+
+Returns a truth value if it is safe to attempt open a read handle to
+the underlying file object.
+
+Returns a truth value if the path may be opened.
+
+=cut
+
+sub is_open_ok {
+    my ($self) = @_;
+
+    my $path_info = $self->path_info;
+
+    return 1
+      if ($path_info & OPEN_IS_OK) == OPEN_IS_OK;
+
+    return 0
+      if $path_info & ACCESS_INFO;
+
+    try {
+        $self->_check_open;
+
+    } catch {
+        return 0;
+
+        # perlcritic 1.140-1 requires the semicolon on the next line
+    };
+
+    return 1;
+}
+
+sub _check_access {
+    my ($self) = @_;
+
+    my $path_info = $self->path_info;
+
+    return 1
+      if ($path_info & FS_PATH_IS_OK) == FS_PATH_IS_OK;
+
+    return 0
+      if $path_info & ACCESS_INFO;
+
+    my $resolvable = $self->resolve_path;
+    unless ($resolvable) {
+        $self->path_info($self->path_info | UNSAFE_PATH);
+        # NB: We are deliberately vague here to avoid suggesting
+        # whether $path exists.  In some cases (e.g. lintian.d.o)
+        # the output is readily available to wider public.
+        confess encode_utf8($self->index->identifier
+              .': Attempt to access through broken or unsafe symlink: '
+              . $self->name);
+    }
+
+    $self->path_info($self->path_info | FS_PATH_IS_OK);
+
+    return 1;
+}
+
+sub _check_open {
+    my ($self) = @_;
+
+    $self->_check_access;
+
+    # Symlinks can point to a "non-file" object inside the
+    # package root
+    # Leave "_path_access" here as _check_access marks it either as
+    # "UNSAFE_PATH" or "FS_PATH_IS_OK"
+
+    confess encode_utf8($self->index->identifier
+          .': Opening of irregular file not supported: '
+          . $self->name)
+      unless $self->is_file || ($self->is_symlink && -e $self->unpacked_path);
+
+    $self->path_info($self->path_info | OPEN_IS_OK);
+
+    return 1;
+}
+
+=item follow
+
+Return dereferenced link if applicable
+
+=cut
+
+sub follow {
+    my ($self, $maxlinks) = @_;
+
+    return $self
+      unless length $self->link;
+
+    return $self->dereferenced
+      if defined $self->dereferenced;
+
+    # set limit
+    $maxlinks //= $MAXIMUM_LINK_DEPTH;
+
+    # catch recursive links
+    return undef
+      if $maxlinks <= 0;
+
+    # reduce counter
+    $maxlinks--;
+
+    my $reference;
+
+    croak encode_utf8('No index in ' . $self->name)
+      unless defined $self->index;
+
+    if ($self->is_hardlink) {
+        # hard links are resolved against package root
+        $reference = $self->index->lookup;
+
+    } else {
+        # otherwise resolve against the parent
+        $reference = $self->parent_dir;
+    }
+
+    croak encode_utf8($self->index->identifier
+          . ': No parent reference for link in '
+          . $self->name)
+      unless defined $reference;
+
+    # follow link
+    my $dereferenced = $reference->resolve_path($self->link, $maxlinks);
+    $self->dereferenced($dereferenced);
+
+    return $self->dereferenced;
+}
+
+=item resolve_path([PATH])
+
+Resolve PATH relative to this path entry.
+
+If PATH starts with a slash and the file hierarchy has a well-defined
+root directory, then PATH will instead be resolved relatively to the
+root dir.  If the file hierarchy does not have a well-defined root dir
+(e.g. for source packages), this method will return C<undef>.
+
+If PATH is omitted, then the entry is resolved and the target is
+returned if it is valid.  Except for symlinks, all entries always
+resolve to themselves.  NB: hardlinks also resolve as themselves.
+
+It is an error to attempt to resolve a PATH against a non-directory
+and non-symlink entry - as such resolution would always fail
+(i.e. foo/../bar is an invalid path unless foo is a directory or a
+symlink to a dir).
+
+
+The resolution takes symlinks into account and following them provided
+that the target path is valid (and can be followed safely).  If the
+path is invalid or circular (symlinks), escapes the root directory or
+follows an unsafe symlink, the method returns C<undef>.  Otherwise, it
+returns the path entry that denotes the target path.
+
+
+If PATH contains at least one path segment and ends with a slash, then
+the resolved path will end in a directory (or fail).  Otherwise, the
+resolved PATH can end in any entry I<except> a symlink.
+
+Examples:
+
+  $symlink_entry->resolve_path => $nonsymlink_entry OR undef
+
+  $x->resolve_path => $x
+
+  For directory or symlink entries (dol), you can also resolve a path:
+
+  $dol_entry->resolve_path('some/../where') => $nonsymlink_entry OR undef
+
+  # Note the trailing slash
+  $dol_entry->resolve_path('some/../where/') => $dir_entry OR undef
+
+=cut
+
+sub resolve_path {
+    my ($self, $request, $maxlinks) = @_;
+
+    croak encode_utf8('No index in ' . $self->name)
+      unless defined $self->index;
+
+    croak encode_utf8(
+        $self->index->identifier . ': Can only resolve string arguments')
+      if defined $request && ref($request) ne $EMPTY;
+
+    $request //= $EMPTY;
+
+    if (length $self->link) {
+        # follow the link
+        my $dereferenced = $self->follow($maxlinks);
+        return undef
+          unless defined $dereferenced;
+
+        # and use that to resolve the request
+        return $dereferenced->resolve_path($request, $maxlinks);
+    }
+
+    my $reference;
+
+    # check for absolute reference; remove slash
+    if ($request =~ s{^/+}{}s) {
+
+        # require anchoring for absolute references
+        return undef
+          unless $self->index->anchored;
+
+        # get root entry
+        $reference = $self->index->lookup;
+
+    } elsif ($self->is_dir) {
+        # directories are their own starting point
+        $reference = $self;
+
+    } else {
+        # otherwise, use parent directory
+        $reference = $self->parent_dir;
+    }
+
+    return undef
+      unless defined $reference;
+
+    # read first segment; strip all trailing slashes for recursive use
+    if ($request =~ s{^([^/]+)/*}{}) {
+
+        my $segment = $1;
+
+        # single dot, or two slashes in a row
+        return $reference->resolve_path($request, $maxlinks)
+          if $segment eq $DOT || !length $segment;
+
+        # for double dot, go up a level
+        if ($segment eq $DOUBLE_DOT) {
+            my $parent = $reference->parent_dir;
+            return undef
+              unless defined $parent;
+
+            return $parent->resolve_path($request, $maxlinks);
+        }
+
+        # look for child otherwise
+        my $child = $reference->child($segment);
+        return undef
+          unless defined $child;
+
+        return $child->resolve_path($request, $maxlinks);
+    }
+
+    croak encode_utf8($self->index->identifier
+          . ": Cannot parse path resolution request: $request")
+      if length $request;
+
+    # nothing else to resolve
+    return $self;
+}
+
+=item name
+
+Returns the name of the file (relative to the package root).
+
+NB: It will never have any leading "./" (or "/") in it.
+
+=item basename
+
+Returns the "filename" part of the name, similar basename(1) or
+File::Basename::basename (without passing a suffix to strip in either
+case).
+
+NB: Returns the empty string for the "root" dir.
+
+=item dirname
+
+Returns the "directory" part of the name, similar to dirname(1) or
+File::Basename::dirname.  The dirname will end with a trailing slash
+(except the "root" dir - see below).
+
+NB: Returns the empty string for the "root" dir.
+
+=item link
+
+If this is a link (i.e. is_symlink or is_hardlink returns a truth
+value), this method returns the target of the link.
+
+If this is not a link, then this returns undef.
+
+If the path is a symlink this method can be used to determine if the
+symlink is relative or absolute.  This is I<not> true for hardlinks,
+where the link target is always relative to the root.
+
+NB: Even for symlinks, a leading "./" will be stripped.
+
+=item normalized
+
+=item faux
+
+Returns a truth value if this entry absent in the package.  This can
+happen if a package does not include all intermediate directories.
+
+=item size
+
+Returns the size of the path in bytes.
+
+NB: Only regular files can have a non-zero file size.
+
+=item date
+
+Return the modification date as YYYY-MM-DD.
+
+=item time
+
+=item perm
+
+=item path_info
+
+=item owner
+
+Returns the owner of the path entry as a username.
+
+NB: If only numerical owner information is available in the package,
+this may return a numerical owner (except uid 0 is always mapped to
+"root")
+
+=item group
+
+Returns the group of the path entry as a username.
+
+NB: If only numerical owner information is available in the package,
+this may return a numerical group (except gid 0 is always mapped to
+"root")
+
+=item uid
+
+Returns the uid of the owner of the path entry.
+
+NB: If the uid is not available, 0 will be returned.
+This usually happens if the numerical data is not collected (e.g. in
+source packages)
+
+=item gid
+
+Returns the gid of the owner of the path entry.
+
+NB: If the gid is not available, 0 will be returned.
+This usually happens if the numerical data is not collected (e.g. in
+source packages)
+
+=item file_type
+
+Return the data from L<file(1)> if it has been collected.
+
+Note this is only defined for files as Lintian only runs L<file(1)> on
+files.
+
+=item java_info
+
+=item strings
+
+=item C<basedir>
+
+=item index
+
+=item parent_dir
+
+=item child_table
+
+=item sorted_children
+
+Returns the parent directory entry of this entry as a
+L<Lintian::File::Path>.
+
+NB: Returns C<undef> for the "root" dir.
+
+=item C<childnames>
+
+=item parent_dir
+
+Return the parent dir entry of this the path entry.
+
+=item dereferenced
+
+=cut
+
+has name => (
+    is => 'rw',
+    lazy => 1,
+    coerce => sub { my ($string) = @_; return $string // $EMPTY;},
+    trigger => sub {
+        my ($self, $name) = @_;
+
+        my ($basename) = ($name =~ m{([^/]*)/?$}s);
+        $self->basename($basename);
+
+        # allow newline in names; need /s for dot matching (#929729)
+        my ($dirname) = ($name =~ m{^(.+/)?(?:[^/]+/?)$}s);
+        $self->dirname($dirname);
+    },
+    default => $EMPTY
+);
+has basename => (
+    is => 'rw',
+    lazy => 1,
+    coerce => sub { my ($string) = @_; return $string // $EMPTY;},
+    default => $EMPTY
+);
+has dirname => (
+    is => 'rw',
+    lazy => 1,
+    coerce => sub { my ($string) = @_; return $string // $EMPTY;},
+    default => $EMPTY
+);
+
+has link => (
+    is => 'rw',
+    coerce => sub { my ($string) = @_; return $string // $EMPTY;},
+    default => $EMPTY
+);
+has normalized => (
+    is => 'rw',
+    coerce => sub { my ($string) = @_; return $string // $EMPTY;},
+    default => $EMPTY
+);
+has faux => (is => 'rw', default => 0);
+
+has size => (is => 'rw', default => 0);
+has date => (
+    is => 'rw',
+    default => sub {
+        my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = gmtime;
+        return sprintf('%04d-%02d-%02d', $year, $mon, $mday);
+    }
+);
+has time => (
+    is => 'rw',
+    default => sub {
+        my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = gmtime;
+        return sprintf('%02d:%02d:%02d', $hour, $min, $sec);
+    }
+);
+
+has perm => (is => 'rw');
+has path_info => (is => 'rw');
+
+has owner => (
+    is => 'rw',
+    coerce => sub { my ($string) = @_; return $string // 'root'; },
+    default => 'root'
+);
+has group => (
+    is => 'rw',
+    coerce => sub { my ($string) = @_; return $string // 'root'; },
+    default => 'root'
+);
+has uid => (
+    is => 'rw',
+    coerce => sub { my ($value) = @_; return int($value // 0); },
+    default => 0
+);
+has gid => (
+    is => 'rw',
+    coerce => sub { my ($value) = @_; return int($value // 0); },
+    default => 0
+);
+
+has md5sum => (
+    is => 'rw',
+    coerce => sub { my ($checksum) = @_; return ($checksum // 0); },
+    default => 0
+);
+has file_type => (
+    is => 'rw',
+    coerce => sub { my ($text) = @_; return ($text // $EMPTY); },
+    default => $EMPTY
+);
+has java_info => (
+    is => 'rw',
+    coerce => sub { my ($hashref) = @_; return ($hashref // {}); },
+    default => sub { {} }
+);
+has strings => (
+    is => 'rw',
+    coerce => sub { my ($text) = @_; return ($text // $EMPTY); },
+    default => $EMPTY
+);
+has ar_info => (
+    is => 'rw',
+    coerce => sub { my ($hashref) = @_; return ($hashref // {}); },
+    default => sub { {} }
+);
+
+has index => (is => 'rw');
+has childnames => (is => 'rw', default => sub { {} });
+has parent_dir => (
+    is => 'rw',
+    lazy => 1,
+    default => sub {
+        my ($self) = @_;
+
+        # do not return root as its own parent
+        return
+          if $self->name eq $EMPTY;
+
+        croak encode_utf8('No index in ' . $self->name)
+          unless defined $self->index;
+
+        # returns root by default
+        return $self->index->lookup($self->dirname);
+    }
+);
+has dereferenced => (is => 'rw');
+
+=item elf
+
+=cut
+
+sub elf {
+    my ($self, @args) = @_;
+
+    if (@args) {
+
+        $self->index->elf_storage->{$self->name} = $args[0];
+
+        return ();
+    }
+
+    my %copy = %{$self->index->elf_storage->{$self->name} // {} };
+
+    return \%copy;
+}
+
+=item elf_by_member
+
+=cut
+
+sub elf_by_member {
+    my ($self, @args) = @_;
+
+    if (@args) {
+
+        my $object_name = $args[0];
+        my $by_object = $args[1];
+
+        my $tmp = $self->index->elf_storage_by_member->{$self->name} // {};
+        $tmp->{$object_name} = $by_object;
+        $self->index->elf_storage_by_member->{$self->name} = $tmp;
+
+        return ();
+    }
+
+    my %copy = %{$self->index->elf_storage_by_member->{$self->name} // {} };
+
+    return \%copy;
+}
+
+=item pointer
+
+=cut
+
+sub pointer {
+    my ($self, $position) = @_;
+
+    my $pointer = Lintian::Pointer::Item->new;
+    $pointer->item($self);
+    $pointer->position($position);
+
+    return $pointer;
+}
+
+=item bytes
+
+Returns verbatim file contents as a scalar.
+
+=item is_valid_utf8
+
+Boolean true if file contents are valid UTF-8.
+
+=item decoded_utf8
+
+Returns a decoded, wide-character string if file contents are valid UTF-8.
+
+=cut
+
+sub bytes {
+    my ($self) = @_;
+
+    return $EMPTY
+      unless $self->is_open_ok;
+
+    my $bytes = path($self->unpacked_path)->slurp;
+
+    return $bytes;
+}
+
+sub is_valid_utf8 {
+    my ($self) = @_;
+
+    my $bytes = $self->bytes;
+    return 0
+      unless defined $bytes;
+
+    return valid_utf8($bytes);
+}
+
+sub decoded_utf8 {
+    my ($self) = @_;
+
+    return $EMPTY
+      unless $self->is_valid_utf8;
+
+    return decode_utf8($self->bytes);
+}
+
+### OVERLOADED OPERATORS ###
+
+# overload apparently does not like the mk_ro_accessor, so use a level
+# of indirection
+
+sub _as_regex_ref {
+    my ($self) = @_;
+    my $name = $self->name;
+    return qr{ \Q$name\E }xsm;
+}
+
+sub _as_string {
+    my ($self) = @_;
+    return $self->name;
+}
+
+sub _bool {
+    # Always true (used in "if ($info->index('some/path')) {...}")
+    return 1;
+}
+
+sub _bool_not {
+    my ($self) = @_;
+    return !$self->_bool;
+}
+
+sub _str_cmp {
+    my ($self, $str, $swap) = @_;
+    return $str cmp $self->name if $swap;
+    return $self->name cmp $str;
+}
+
+sub _str_concat {
+    my ($self, $str, $swap) = @_;
+    return $str . $self->name if $swap;
+    return $self->name . $str;
+}
+
+sub _str_eq {
+    my ($self, $str) = @_;
+    return $self->name eq $str;
+}
+
+sub _str_ne {
+    my ($self, $str) = @_;
+    return $self->name ne $str;
+}
+
+=back
+
+=head1 AUTHOR
+
+Originally written by Niels Thykier <niels@thykier.net> for Lintian.
+
+=head1 SEE ALSO
+
+lintian(1)
+
+=cut
+
+1;
+
+# Local Variables:
+# indent-tabs-mode: nil
+# cperl-indent-level: 4
+# End:
+# vim: syntax=perl sw=4 sts=4 sr et
+
diff --git a/lib/Lintian/Index/Java.pm b/lib/Lintian/Index/Java.pm
new file mode 100644
index 0000000..4b33bec
--- /dev/null
+++ b/lib/Lintian/Index/Java.pm
@@ -0,0 +1,258 @@
+# -*- perl -*- Lintian::Index::Java
+#
+# Copyright (C) 2020 Felix Lechner
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program.  If not, see <http://www.gnu.org/licenses/>.
+
+package Lintian::Index::Java;
+
+use v5.20;
+use warnings;
+use utf8;
+
+use Archive::Zip qw( :ERROR_CODES :CONSTANTS );
+use Const::Fast;
+use Cwd;
+use Path::Tiny;
+use Unicode::UTF8 qw(encode_utf8);
+
+use Moo::Role;
+use namespace::clean;
+
+const my $EMPTY => q{};
+const my $NEWLINE => qq{\n};
+const my $SPACE => q{ };
+const my $DASH => q{-};
+
+const my $JAVA_MAGIC_SIZE => 8;
+const my $JAVA_MAGIC_BYTES => 0xCAFEBABE;
+
+=head1 NAME
+
+Lintian::Index::Java - java information.
+
+=head1 SYNOPSIS
+
+    use Lintian::Index;
+
+=head1 DESCRIPTION
+
+Lintian::Index::Java java information.
+
+=head1 INSTANCE METHODS
+
+=over 4
+
+=item add_java
+
+=cut
+
+sub add_java {
+    my ($self) = @_;
+
+    my $savedir = getcwd;
+    chdir($self->basedir)
+      or die encode_utf8(
+        $self->identifier . ': Cannot change to directory ' . $self->basedir);
+
+    my $errors = $EMPTY;
+
+    my @files = grep { $_->is_file } @{$self->sorted_list};
+
+    # Wheezy's version of file calls "jar files" for "Zip archive".
+    # Newer versions seem to call them "Java Jar file".
+    # Jessie also introduced "Java archive data (JAR)"...
+    my @java_files = grep {
+        $_->file_type=~ m{
+            Java [ ] (?:Jar [ ] file|archive [ ] data)
+            | Zip [ ] archive
+            | JAR }x;
+    } @files;
+
+    my @lines;
+    for my $file (@java_files) {
+
+        push(@lines, parse_jar($file->name))
+          if $file->name =~ /\S+\.jar$/i;
+    }
+
+    my $file;
+    my $file_list;
+    my $manifest = 0;
+    local $_ = undef;
+
+    my %java_info;
+
+    for my $line (@lines) {
+        chomp $line;
+        next if $line =~ /^\s*$/;
+
+        if ($line =~ /^-- ERROR:\s*(\S.+)$/) {
+            $java_info{$file}{error} = $1;
+
+        } elsif ($line =~ m{^-- MANIFEST: (?:\./)?(?:.+)$}) {
+            # TODO: check $file == $1 ?
+            $java_info{$file}{manifest} = {};
+            $manifest = $java_info{$file}{manifest};
+            $file_list = 0;
+
+        } elsif ($line =~ m{^-- (?:\./)?(.+)$}) {
+            $file = $1;
+            $java_info{$file}{files} = {};
+            $file_list = $java_info{$file}{files};
+            $manifest = 0;
+        } else {
+            if ($manifest && $line =~ m{^  (\S+):\s(.*)$}) {
+                $manifest->{$1} = $2;
+            } elsif ($file_list) {
+                my ($fname, $clmajor) = ($line =~ m{^([^-].*):\s*([-\d]+)$});
+                $file_list->{$fname} = $clmajor;
+            }
+        }
+    }
+
+    $_->java_info($java_info{$_->name}) for @java_files;
+
+    chdir($savedir)
+      or die encode_utf8(
+        $self->identifier . ": Cannot change to directory $savedir");
+
+    return $errors;
+}
+
+=item parse_jar
+
+=cut
+
+sub parse_jar {
+    my ($path) = @_;
+
+    my @lines;
+
+    # This script needs unzip, there's no way around.
+    push(@lines, "-- $path");
+
+    # Without this Archive::Zip will emit errors to standard error for
+    # faulty zip files - but that is not what we want.  AFAICT, it is
+    # the only way to get a textual error as well, so (ab)use it for
+    # this purpose while we are at it.
+    my $errorhandler = sub {
+        my ($err) = @_;
+        $err =~ s/\r?\n/ /g;
+
+        # trim right
+        $err =~ s/\s+$//;
+
+        push(@lines, "-- ERROR: $err");
+    };
+    my $oldhandler = Archive::Zip::setErrorHandler($errorhandler);
+
+    my $azip = Archive::Zip->new;
+    if($azip->read($path) == AZ_OK) {
+
+        # save manifest for the end
+        my $manifest;
+
+        # file list comes first
+        foreach my $member ($azip->members) {
+            my $name = $member->fileName;
+
+            next
+              if $member->isDirectory;
+
+            # store for later processing
+            $manifest = $member
+              if $name =~ m{^META-INF/MANIFEST.MF$}i;
+
+            # add version if we can find it
+            my $jversion;
+            if ($name =~ /\.class$/) {
+                # Collect the Major version of the class file.
+                my ($contents, $zerr) = $member->contents;
+
+       # bug in Archive::Zip; seen in android-platform-libcore_10.0.0+r36-1.dsc
+                last
+                  unless defined $zerr;
+
+                last
+                  unless $zerr == AZ_OK;
+
+                # Ensure we can read at least 8 bytes for the unpack.
+                next
+                  if length $contents < $JAVA_MAGIC_SIZE;
+
+                # translation of the unpack
+                #  NN NN NN NN, nn nn, nn nn   - bytes read
+                #     $magic  , __ __, $major  - variables
+                my ($magic, undef, $major) = unpack('Nnn', $contents);
+                $jversion = $major
+                  if $magic == $JAVA_MAGIC_BYTES;
+            }
+            push(@lines, "$name: " . ($jversion // $DASH));
+        }
+
+        if ($manifest) {
+            push(@lines, "-- MANIFEST: $path");
+
+            my ($contents, $zerr) = $manifest->contents;
+
+       # bug in Archive::Zip; seen in android-platform-libcore_10.0.0+r36-1.dsc
+            return ()
+              unless defined $zerr;
+
+            if ($zerr == AZ_OK) {
+                my $partial = $EMPTY;
+                my $first = 1;
+                my @list = split($NEWLINE, $contents);
+                foreach my $line (@list) {
+
+                    # remove DOS type line feeds
+                    $line =~ s/\r//g;
+
+                    if ($line =~ /^(\S+:)\s*(.*)/) {
+                        push(@lines, $SPACE . $SPACE . "$1 $2");
+                    }
+                    if ($line =~ /^ (.*)/) {
+                        push(@lines, $1);
+                    }
+                }
+            }
+        }
+    }
+
+    Archive::Zip::setErrorHandler($oldhandler);
+
+    return @lines;
+}
+
+=back
+
+=head1 AUTHOR
+
+Originally written by Felix Lechner <felix.lechner@lease-up.com> for
+Lintian.
+
+=head1 SEE ALSO
+
+lintian(1)
+
+=cut
+
+1;
+
+# Local Variables:
+# indent-tabs-mode: nil
+# cperl-indent-level: 4
+# End:
+# vim: syntax=perl sw=4 sts=4 sr et
diff --git a/lib/Lintian/Index/Md5sums.pm b/lib/Lintian/Index/Md5sums.pm
new file mode 100644
index 0000000..c1d0583
--- /dev/null
+++ b/lib/Lintian/Index/Md5sums.pm
@@ -0,0 +1,127 @@
+# -*- perl -*- Lintian::Index::Md5sums
+#
+# Copyright (C) 2020 Felix Lechner
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program.  If not, see <http://www.gnu.org/licenses/>.
+
+package Lintian::Index::Md5sums;
+
+use v5.20;
+use warnings;
+use utf8;
+
+use Const::Fast;
+use Cwd;
+use Unicode::UTF8 qw(encode_utf8 decode_utf8);
+
+use Lintian::IPC::Run3 qw(xargs);
+use Lintian::Util qw(read_md5sums);
+
+use Moo::Role;
+use namespace::clean;
+
+const my $EMPTY => q{};
+const my $NEWLINE => qq{\n};
+
+const my $WAIT_STATUS_SHIFT => 8;
+const my $NULL => qq{\0};
+
+=head1 NAME
+
+Lintian::Index::Md5sums - calculate checksums for index.
+
+=head1 SYNOPSIS
+
+    use Lintian::Index;
+
+=head1 DESCRIPTION
+
+Lintian::Index::Md5sums calculates checksums for an index.
+
+=head1 INSTANCE METHODS
+
+=over 4
+
+=item add_md5sums
+
+=cut
+
+sub add_md5sums {
+    my ($self) = @_;
+
+    my $savedir = getcwd;
+    chdir($self->basedir)
+      or die encode_utf8(
+        $self->identifier . ': Cannot change to directory ' . $self->basedir);
+
+    my $errors = $EMPTY;
+
+    # get the regular files in the index
+    my @files = grep { $_->is_file } @{$self->sorted_list};
+    my @names = map { $_->name } @files;
+
+    my @command = qw(md5sum --);
+
+    my %md5sums;
+
+    xargs(
+        \@command,
+        \@names,
+        sub {
+            my ($stdout, $stderr, $status, @partial) = @_;
+
+            $stderr = decode_utf8($stderr)
+              if length $stderr;
+
+            if ($status) {
+                $errors .= "Cannot run @command: $stderr" . $NEWLINE;
+                return;
+            }
+
+            # undecoded split allows names with non UTF-8 bytes
+            my ($partial_sums, undef) = read_md5sums($stdout);
+
+            $md5sums{$_} = $partial_sums->{$_}for @partial;
+        }
+    );
+
+    $_->md5sum($md5sums{$_->name}) for @files;
+
+    chdir($savedir)
+      or die encode_utf8(
+        $self->identifier . ": Cannot change to directory $savedir");
+
+    return $errors;
+}
+
+=back
+
+=head1 AUTHOR
+
+Originally written by Felix Lechner <felix.lechner@lease-up.com> for
+Lintian.
+
+=head1 SEE ALSO
+
+lintian(1)
+
+=cut
+
+1;
+
+# Local Variables:
+# indent-tabs-mode: nil
+# cperl-indent-level: 4
+# End:
+# vim: syntax=perl sw=4 sts=4 sr et
diff --git a/lib/Lintian/Index/Strings.pm b/lib/Lintian/Index/Strings.pm
new file mode 100644
index 0000000..f0e4fa1
--- /dev/null
+++ b/lib/Lintian/Index/Strings.pm
@@ -0,0 +1,99 @@
+# -*- perl -*- Lintian::Index::Strings
+#
+# Copyright (C) 2020 Felix Lechner
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program.  If not, see <http://www.gnu.org/licenses/>.
+
+package Lintian::Index::Strings;
+
+use v5.20;
+use warnings;
+use utf8;
+
+use Const::Fast;
+use Path::Tiny;
+use Unicode::UTF8 qw(decode_utf8);
+
+use Lintian::IPC::Run3 qw(safe_qx);
+
+use Moo::Role;
+use namespace::clean;
+
+const my $EMPTY => q{};
+
+=head1 NAME
+
+Lintian::Index::Strings - strings in binary files.
+
+=head1 SYNOPSIS
+
+    use Lintian::Index;
+
+=head1 DESCRIPTION
+
+Lintian::Index::Strings strings in binary files.
+
+=head1 INSTANCE METHODS
+
+=over 4
+
+=item add_strings
+
+=cut
+
+sub add_strings {
+    my ($self) = @_;
+
+    my $errors = $EMPTY;
+
+    my @files = grep { $_->is_file } @{$self->sorted_list};
+    for my $file (@files) {
+
+        next
+          if $file->name =~ m{^usr/lib/debug/};
+
+        # skip non-binaries
+        next
+          unless $file->file_type =~ /\bELF\b/;
+
+        # prior implementations sometimes made the list unique
+        my $allstrings
+          = decode_utf8(safe_qx(qw{strings --all --}, $file->unpacked_path));
+
+        $file->strings($allstrings);
+    }
+
+    return $errors;
+}
+
+=back
+
+=head1 AUTHOR
+
+Originally written by Felix Lechner <felix.lechner@lease-up.com> for
+Lintian.
+
+=head1 SEE ALSO
+
+lintian(1)
+
+=cut
+
+1;
+
+# Local Variables:
+# indent-tabs-mode: nil
+# cperl-indent-level: 4
+# End:
+# vim: syntax=perl sw=4 sts=4 sr et