summaryrefslogtreecommitdiffstats
path: root/lib/Lintian/Index
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Lintian/Index')
-rw-r--r--lib/Lintian/Index/Ar.pm128
-rw-r--r--lib/Lintian/Index/Elf.pm739
-rw-r--r--lib/Lintian/Index/FileTypes.pm195
-rw-r--r--lib/Lintian/Index/Item.pm1567
-rw-r--r--lib/Lintian/Index/Java.pm258
-rw-r--r--lib/Lintian/Index/Md5sums.pm127
-rw-r--r--lib/Lintian/Index/Strings.pm99
7 files changed, 3113 insertions, 0 deletions
diff --git a/lib/Lintian/Index/Ar.pm b/lib/Lintian/Index/Ar.pm
new file mode 100644
index 0000000..01f3e6b
--- /dev/null
+++ b/lib/Lintian/Index/Ar.pm
@@ -0,0 +1,128 @@
+# -*- perl -*- Lintian::Index::Ar
+#
+# Copyright (C) 2020 Felix Lechner
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program. If not, see <http://www.gnu.org/licenses/>.
+
+package Lintian::Index::Ar;
+
+use v5.20;
+use warnings;
+use utf8;
+
+use Const::Fast;
+use Cwd;
+use Path::Tiny;
+use Unicode::UTF8 qw(decode_utf8 encode_utf8);
+
+use Lintian::IPC::Run3 qw(safe_qx);
+
+use Moo::Role;
+use namespace::clean;
+
+const my $EMPTY => q{};
+const my $NEWLINE => qq{\n};
+
+=head1 NAME
+
+Lintian::Index::Ar - binary symbol information.
+
+=head1 SYNOPSIS
+
+ use Lintian::Index;
+
+=head1 DESCRIPTION
+
+Lintian::Index::Ar binary symbol information.
+
+=head1 INSTANCE METHODS
+
+=over 4
+
+=item add_ar
+
+=cut
+
+sub add_ar {
+ my ($self) = @_;
+
+ my $savedir = getcwd;
+ chdir($self->basedir)
+ or die encode_utf8(
+ $self->identifier . ': Cannot change to directory ' . $self->basedir);
+
+ my $errors = $EMPTY;
+
+ my @archives
+ = grep { $_->name =~ / [.]a $/msx && $_->is_regular_file }
+ @{$self->sorted_list};
+
+ for my $archive (@archives) {
+
+ # skip empty archives to avoid ar error message; happens in tests
+ next
+ unless $archive->size;
+
+ my %ar_info;
+
+ # fails silently for non-ar files (#934899); probably creates empty entries
+ my $bytes = safe_qx(qw{ar t}, $archive);
+ if ($?) {
+ $errors .= "ar failed for $archive" . $NEWLINE;
+ next;
+ }
+
+ my $output = decode_utf8($bytes);
+ my @members = split(/\n/, $output);
+
+ my $count = 1;
+ for my $member (@members) {
+
+ # more info could be added with -v above
+ $ar_info{$count}{name} = $member;
+
+ } continue {
+ $count++;
+ }
+
+ $archive->ar_info(\%ar_info);
+ }
+
+ chdir($savedir)
+ or die encode_utf8(
+ $self->identifier . ": Cannot change to directory $savedir");
+
+ return $errors;
+}
+
+=back
+
+=head1 AUTHOR
+
+Originally written by Felix Lechner <felix.lechner@lease-up.com> for
+Lintian.
+
+=head1 SEE ALSO
+
+lintian(1)
+
+=cut
+
+1;
+
+# Local Variables:
+# indent-tabs-mode: nil
+# cperl-indent-level: 4
+# End:
+# vim: syntax=perl sw=4 sts=4 sr et
diff --git a/lib/Lintian/Index/Elf.pm b/lib/Lintian/Index/Elf.pm
new file mode 100644
index 0000000..1fe4d7a
--- /dev/null
+++ b/lib/Lintian/Index/Elf.pm
@@ -0,0 +1,739 @@
+# -*- perl -*- Lintian::Index::Elf
+#
+# Copyright (C) 1998 Christian Schwarz
+# Copyright (C) 2008 Adam D. Barratt
+# Copyright (C) 2017-2018 Chris Lamb <lamby@debian.org>
+# Copyright (C) 2020-2022 Felix Lechner
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program. If not, see <http://www.gnu.org/licenses/>.
+
+package Lintian::Index::Elf;
+
+use v5.20;
+use warnings;
+use utf8;
+
+use bignum qw(hex);
+
+use Const::Fast;
+use Cwd;
+use IPC::Run3;
+use Unicode::UTF8 qw(encode_utf8 valid_utf8 decode_utf8);
+
+use Lintian::Elf::Section;
+use Lintian::Elf::Symbol;
+use Lintian::Storage::MLDBM;
+
+use Moo::Role;
+use namespace::clean;
+
+const my $EMPTY => q{};
+const my $SPACE => q{ };
+const my $HYPHEN => q{-};
+const my $NEWLINE => qq{\n};
+
+const my $LINES_PER_FILE => 3;
+
+=head1 NAME
+
+Lintian::Index::Elf - binary symbol information.
+
+=head1 SYNOPSIS
+
+ use Lintian::Index;
+
+=head1 DESCRIPTION
+
+Lintian::Index::Elf binary symbol information.
+
+=head1 INSTANCE METHODS
+
+=over 4
+
+=item elf_storage
+
+=cut
+
+has elf_storage => (
+ is => 'rw',
+ lazy => 1,
+ default => sub {
+ my ($self) = @_;
+
+ my $storage = Lintian::Storage::MLDBM->new;
+ $storage->create('elf');
+
+ return $storage;
+ }
+);
+
+=item elf_storage_by_member
+
+=cut
+
+has elf_storage_by_member => (
+ is => 'rw',
+ lazy => 1,
+ default => sub {
+ my ($self) = @_;
+
+ my $storage = Lintian::Storage::MLDBM->new;
+ $storage->create('elf-by-member');
+
+ return $storage;
+ }
+);
+
+=item add_elf
+
+=cut
+
+sub add_elf {
+ my ($self) = @_;
+
+ my $savedir = getcwd;
+ chdir($self->basedir)
+ or die encode_utf8(
+ $self->identifier . ': Cannot change to directory ' . $self->basedir);
+
+ my $errors = $EMPTY;
+
+ my @files = grep { $_->is_file } @{$self->sorted_list};
+
+ # must be ELF or static library
+ my @with_objects = grep {
+ $_->file_type =~ /\bELF\b/
+ || ( $_->file_type =~ /\bcurrent ar archive\b/
+ && $_->name =~ /\.a$/)
+ } @files;
+
+ for my $file (@with_objects) {
+
+ local $SIG{__WARN__}= sub {
+ warn encode_utf8($self->identifier
+ . ': Warning while running readelf on'
+ . $file->name
+ . ": $_[0]");
+ };
+
+ my @command = (qw{readelf --all --wide}, $file->name);
+ my $combined_bytes;
+
+ run3(\@command, \undef, \$combined_bytes, \$combined_bytes);
+
+ next
+ unless length $combined_bytes;
+
+ my $combined_output;
+
+ if (valid_utf8($combined_bytes)) {
+ $combined_output = decode_utf8($combined_bytes);
+
+ } else {
+ $combined_output = $combined_bytes;
+ $errors .= "Output from '@command' is not valid UTF-8" . $NEWLINE;
+ }
+
+ # each object file in an archive gets its own File section
+ my @per_files = split(/^(File): (.*)$/m, $combined_output);
+ shift @per_files while @per_files && $per_files[0] ne 'File';
+
+ @per_files = ($combined_output)
+ unless @per_files;
+
+ # Special case - readelf will not prefix the output with "File:
+ # $name" if it only gets one ELF file argument, so act as if it did...
+ # (but it does "the right thing" if passed a static lib >.>)
+ #
+ # - In fact, if readelf always emitted that File: header, we could
+ # simply use xargs directly on readelf and just parse its output
+ # in the loop below.
+ if (@per_files == 1) {
+ unshift(@per_files, $file->name);
+ unshift(@per_files, 'File');
+ }
+
+ unless (@per_files % $LINES_PER_FILE == 0) {
+
+ $errors
+ .= "Parsed data from readelf is not a multiple of $LINES_PER_FILE for $file"
+ . $NEWLINE;
+ next;
+ }
+
+ while (defined(my $fixed = shift @per_files)) {
+
+ my $recorded_name = shift @per_files;
+ my $per_file = shift @per_files;
+
+ unless ($fixed eq 'File') {
+ $errors .= "Unknown output from readelf for $file" . $NEWLINE;
+ next;
+ }
+
+ unless (length $recorded_name) {
+ $errors .= "No file name from readelf for $file" . $NEWLINE;
+ next;
+ }
+
+ my ($container, $member) = ($recorded_name =~ /^(.*)\(([^)]+)\)$/);
+
+ $container = $recorded_name
+ unless defined $container && defined $member;
+
+ unless ($container eq $file->name) {
+ $errors
+ .= "Container not same as file name ($container vs $file)"
+ . $NEWLINE;
+ next;
+ }
+
+ # ignore empty archives, such as in musl-dev_1.2.1-1_amd64.deb
+ next
+ unless length $per_file;
+
+ my $object_name;
+ if ($recorded_name =~ m{^(?:.+)\(([^/\)]+)\)$}){
+
+ # object file in a static lib.
+ $object_name = $1;
+ }
+
+ parse_per_file($file, $object_name, $per_file);
+ }
+ }
+
+ chdir($savedir)
+ or die encode_utf8(
+ $self->identifier . ": Cannot change to directory $savedir");
+
+ return $errors;
+}
+
+=item parse_per_file
+
+=cut
+
+sub parse_per_file {
+ my ($file, $object_name, $from_readelf) = @_;
+
+ my %by_object;
+
+ $by_object{READELF} = $from_readelf;
+
+ # sometimes there are three blank lines; seen on armhf
+ my @paragraphs = split(/\n{2,}/, $from_readelf);
+
+ for my $paragraph (@paragraphs) {
+
+ my ($first, $bulk) = split(m{\n}, $paragraph, 2);
+
+ if ($first =~ /^ELF Header:/) {
+ elf_header($bulk, \%by_object);
+ next;
+ }
+
+ if ($first =~ /^Program Headers:/) {
+ program_headers($bulk, \%by_object);
+ next;
+ }
+
+ if ($first =~ /^Dynamic section at offset .*:/) {
+ dynamic_section($bulk, \%by_object);
+ next;
+ }
+
+ if ($first =~ /^Section Headers:/) {
+ section_headers($bulk, \%by_object);
+ next;
+ }
+
+ if ($first =~ /^Symbol table '.dynsym'/) {
+ symbol_table($bulk, \%by_object);
+ next;
+ }
+
+ if ($first =~ /^Version symbols section /) {
+ version_symbols($bulk, \%by_object);
+ next;
+ }
+
+ if ($first =~ /^There is no dynamic section in this file/) {
+ # a dynamic section was declared but it's empty.
+ $by_object{'BAD-DYNAMIC-TABLE'} = 1
+ if exists $by_object{PH}{DYNAMIC};
+ next;
+ }
+ }
+
+ my %section_name_by_number;
+ for my $name (keys %{$by_object{SH} // {}}) {
+
+ my $number = $by_object{SH}{$name}{number};
+ $section_name_by_number{$number} = $name;
+ }
+
+ for my $symbol_number (keys %{$by_object{'DYNAMIC-SYMBOLS'}}) {
+
+ my $symbol_name
+ = $by_object{'DYNAMIC-SYMBOLS'}{$symbol_number}{symbol_name};
+ my $section_number
+ = $by_object{'DYNAMIC-SYMBOLS'}{$symbol_number}{section_number};
+
+ my $symbol_version;
+
+ if ($symbol_name =~ m{^ (.*) @ (.*) \s [(] .* [)] $}x) {
+
+ $symbol_name = $1;
+ $symbol_version = $2;
+
+ } else {
+ $symbol_version = $by_object{'SYMBOL-VERSIONS'}{$symbol_number}
+ // $EMPTY;
+
+ if ( $symbol_version eq '*local*'
+ || $symbol_version eq '*global*'){
+
+ if ($section_number eq 'UND') {
+ $symbol_version = $EMPTY;
+
+ } else {
+ $symbol_version = 'Base';
+ }
+
+ } elsif ($symbol_version eq '()') {
+ $symbol_version = '(Base)';
+ }
+ }
+
+ # happens once or twice for regular binaries
+ next
+ unless length $symbol_name;
+
+ # look up numbered section
+ my $section_name = $section_name_by_number{$section_number}
+ // $section_number;
+
+ my $symbol = Lintian::Elf::Symbol->new;
+ $symbol->section($section_name);
+ $symbol->version($symbol_version);
+ $symbol->name($symbol_name);
+
+ push(@{ $by_object{SYMBOLS} }, $symbol);
+ }
+
+ if (length $object_name) {
+
+ # object file in a static lib.
+ $file->elf_by_member($object_name, \%by_object);
+
+ } else {
+ $file->elf(\%by_object);
+ }
+
+ return;
+}
+
+=item elf_header
+
+=cut
+
+sub elf_header {
+ my ($text, $by_object) = @_;
+
+ my @lines = split(m{\n}, $text);
+
+ for my $line (@lines) {
+
+ next
+ if divert_error('ELF header', $line, $by_object);
+
+ my ($field, $value) = split(/:/, $line, 2);
+
+ # trim both ends
+ $field =~ s/^\s+|\s+$//g;
+ $value =~ s/^\s+|\s+$//g;
+
+ next
+ unless length $field && length $value;
+
+ $by_object->{'ELF-HEADER'}{$field} = $value;
+ }
+
+ return;
+}
+
+=item program_headers
+
+=cut
+
+sub program_headers {
+ my ($text, $by_object) = @_;
+
+ my @lines = split(m{\n}, $text);
+
+ while (defined(my $line = shift @lines)) {
+
+ next
+ if divert_error('program headers', $line, $by_object);
+
+ if ($line =~ m{^ \s* (\S+) \s* (?:(?:\S+\s+){4}) \S+ \s (...) }x) {
+
+ my $header = $1;
+ my $flags = $2;
+
+ $header =~ s/^GNU_//g;
+
+ next
+ if $header eq 'Type';
+
+ my $newflags = $EMPTY;
+ $newflags .= ($flags =~ /R/) ? 'r' : $HYPHEN;
+ $newflags .= ($flags =~ /W/) ? 'w' : $HYPHEN;
+ $newflags .= ($flags =~ /E/) ? 'x' : $HYPHEN;
+
+ $by_object->{PH}{$header}{flags} = $newflags;
+
+ if ($header eq 'INTERP' && @lines) {
+ # Check if the next line is the "requesting an interpreter"
+ # (readelf appears to always emit on the next line if at all)
+ my $next_line = $lines[0];
+
+ if ($next_line
+ =~ m{ [[] Requesting \s program \s interpreter: \s ([^\]]+) []] }x
+ ){
+
+ my $interpreter = $1;
+
+ $by_object->{INTERP} = $interpreter;
+
+ # discard line
+ shift @lines;
+ }
+ }
+ }
+ }
+
+ return;
+}
+
+=item dynamic_section
+
+=cut
+
+sub dynamic_section {
+ my ($text, $by_object) = @_;
+
+ my @lines = split(m{\n}, $text);
+
+ while (defined(my $line = shift @lines)) {
+
+ next
+ if divert_error('dynamic section', $line, $by_object);
+
+ if ($line
+ =~ m{^ \s* 0x (?:[0-9A-F]+) \s+ [(] (.*?) [)] \s+ ([\x21-\x7f][\x20-\x7f]*) \Z}ix
+ ) {
+
+ my $type = $1;
+ my $remainder = $2;
+
+ my $keep = 0;
+
+ if ($type eq 'RPATH' || $type eq 'RUNPATH') {
+ $remainder =~ s{^ .* [[] }{}x;
+ $remainder =~ s{ []] \s* $}{}x;
+ $keep = 1;
+
+ } elsif ($type eq 'TEXTREL' || $type eq 'DEBUG') {
+ $keep = 1;
+
+ } elsif ($type eq 'FLAGS_1') {
+ # Will contain "NOW" if the binary was built with -Wl,-z,now
+ $remainder =~ s/^Flags:\s*//i;
+ $keep = 1;
+
+ } elsif (($type eq 'FLAGS' && $remainder =~ m/\bBIND_NOW\b/)
+ || $type eq 'BIND_NOW') {
+
+ # Variants of bindnow
+ $type = 'FLAGS_1';
+ $remainder = 'NOW';
+ $keep = 1;
+ }
+
+ $keep = 1
+ if $remainder
+ =~ s{^ (?: Shared \s library | Library \s soname ) : \s [[] (.*) []] }{$1}x;
+
+ next
+ unless $keep;
+
+ # Here we just need RPATH and NEEDS, so ignore the rest for now
+ if ($type eq 'RPATH' || $type eq 'RUNPATH') {
+
+ # RPATH is like PATH
+ my @components = split(/:/, $remainder);
+ $by_object->{$type}{$_} = 1 for @components;
+
+ } elsif ($type eq 'NEEDED' || $type eq 'SONAME') {
+ push(@{ $by_object->{$type} }, $remainder);
+
+ } elsif ($type eq 'TEXTREL' || $type eq 'DEBUG') {
+ $by_object->{$type} = 1;
+
+ } elsif ($type eq 'FLAGS_1') {
+
+ my @flags = split(/\s+/, $remainder);
+ $by_object->{$type}{$_} = 1 for @flags;
+ }
+ }
+ }
+
+ return;
+}
+
+=item section_headers
+
+=cut
+
+sub section_headers {
+ my ($text, $by_object) = @_;
+
+ const my $TOTAL_FIELDS => 11;
+
+ my @lines = split(m{\n}, $text);
+
+ die 'No column labels.'
+ unless @lines;
+
+ my $first = shift @lines;
+
+ my %labels_by_column;
+
+ my $column = 1;
+ for my $label (split($SPACE, $first)) {
+
+ $label =~ s{^ [[] }{}x;
+ $label =~ s{ []] $}{}x;
+
+ $labels_by_column{$column} = $label;
+
+ } continue {
+ ++$column;
+ }
+
+ die 'Not enough column labels.'
+ if keys %labels_by_column != $TOTAL_FIELDS;
+
+ my $row = 1;
+ while (defined(my $line = shift @lines)) {
+
+ next
+ if divert_error('section headers', $line, $by_object);
+
+ last
+ if $line =~ /^Key to Flags:/;
+
+ my %section_header;
+
+ my @matches = (
+ $line =~ m{^ \s*
+ [[] \s* (\S+) []] \s # Nr
+ (\S+)? \s+ # Name
+ (\S+) \s+ # Type
+ ([0-9a-f]+) \s # Address/Addr
+ ([0-9a-f]+) \s # Off
+ ([0-9a-f]+) \s # Size
+ (\S+) \s+ # ES
+ (\S+)? \s+ # Flg
+ (\S+) \s+ # Lk
+ (\S+) \s+ # Inf
+ (\S+) # Al
+ $}x
+ );
+
+ if (@matches != $TOTAL_FIELDS) {
+
+ warn "Parse error in readelf section headers [row $row]: $line";
+ next;
+ }
+
+ for my $column (keys %labels_by_column) {
+
+ my $label = $labels_by_column{$column};
+ my $value = $matches[$column -1] // $EMPTY;
+
+ $section_header{$label} = $value;
+ }
+
+ # http://sco.com/developers/gabi/latest/ch4.sheader.html
+ my $section = Lintian::Elf::Section->new;
+ $section->number($section_header{Nr});
+ $section->name($section_header{Name});
+ $section->type($section_header{Type});
+
+ # readelf uses both
+ $section->address(
+ hex($section_header{Address} // $section_header{Addr}));
+ $section->offset(hex($section_header{Off}));
+ $section->size(hex($section_header{Size}));
+ $section->entry_size(hex($section_header{ES}));
+ $section->flags($section_header{Flg});
+ $section->index_link(hex($section_header{Lk}));
+ $section->index_info(hex($section_header{Inf}));
+ $section->alignment(hex($section_header{Al}));
+
+ die 'No section number.'
+ unless length $section->number;
+
+ $by_object->{'SECTION-HEADERS'}{$section->number} = $section;
+
+ } continue {
+ ++$row;
+ }
+
+ return;
+}
+
+=item symbol_table
+
+=cut
+
+sub symbol_table {
+ my ($text, $by_object) = @_;
+
+ # We (sometimes) need to read the "Version symbols section" first to
+ # use this data and readelf tends to print after this section, so
+ # save for later.
+
+ my @lines = split(m{\n}, $text);
+
+ while (defined(my $line = shift @lines)) {
+
+ next
+ if divert_error('symbol table', $line, $by_object);
+
+ if ($line
+ =~ m{^ \s* (\d+) : \s* [0-9a-f]+ \s+ \d+ \s+ (?:(?:\S+\s+){3}) (?: [[] .* []] \s+)? (\S+) \s+ (.*) \Z}x
+ ) {
+
+ my $symbol_number = $1;
+ my $section_number = $2;
+ my $symbol_name = $3;
+
+ $by_object->{'DYNAMIC-SYMBOLS'}{$symbol_number}{section_number}
+ = $section_number;
+ $by_object->{'DYNAMIC-SYMBOLS'}{$symbol_number}{symbol_name}
+ = $symbol_name;
+ }
+ }
+
+ return;
+}
+
+=item version_symbols
+
+=cut
+
+sub version_symbols {
+ my ($text, $by_object) = @_;
+
+ my @lines = split(m{\n}, $text);
+
+ while (defined(my $line = shift @lines)) {
+
+ next
+ if divert_error('version symbols', $line, $by_object);
+
+ if ($line
+ =~ m{^ \s* [0-9a-f]+ : \s* \S+ \s* (?: [(] \S+ [)] )? (?: \s | \Z ) }xi
+ ){
+
+ while ($line
+ =~ m{ ([0-9a-f]+ h?) \s* (?: [(] (\S+) [)] )? (?: \s | \Z ) }cgix
+ ) {
+
+ my $symbol_number = $1;
+ my $symbol_version = $2;
+
+ # for libfuse2_2.9.9-3_amd64.deb
+ next
+ unless length $symbol_version;
+
+ $symbol_version = "($symbol_version)"
+ if $symbol_number =~ m{ h $}x;
+
+ $by_object->{'SYMBOL-VERSIONS'}{$symbol_number}
+ = $symbol_version;
+ }
+ }
+ }
+
+ return;
+}
+
+=item divert_error
+
+=cut
+
+sub divert_error {
+ my ($section, $line, $by_object) = @_;
+
+ return 0
+ unless $line =~ s{^ readelf: \s+ }{}x;
+
+ if ($line =~ s{^ Error: \s+ }{}x) {
+
+ my $message = "In $section: $line";
+
+ $by_object->{ERRORS} //= [];
+ push(@{$by_object->{ERRORS}}, $message);
+
+ return 1;
+ }
+
+ if ($line =~ s{^ Warning: \s+ }{}x) {
+
+ my $message = "In $section: $line";
+
+ $by_object->{WARNINGS} //= [];
+ push(@{$by_object->{WARNINGS}}, $message);
+
+ return 1;
+ }
+
+ return 0;
+}
+
+=back
+
+=head1 AUTHOR
+
+Originally written by Felix Lechner <felix.lechner@lease-up.com> for
+Lintian.
+
+=head1 SEE ALSO
+
+lintian(1)
+
+=cut
+
+1;
+
+# Local Variables:
+# indent-tabs-mode: nil
+# cperl-indent-level: 4
+# End:
+# vim: syntax=perl sw=4 sts=4 sr et
diff --git a/lib/Lintian/Index/FileTypes.pm b/lib/Lintian/Index/FileTypes.pm
new file mode 100644
index 0000000..a71efb3
--- /dev/null
+++ b/lib/Lintian/Index/FileTypes.pm
@@ -0,0 +1,195 @@
+# -*- perl -*- Lintian::Index::FileTypes
+#
+# Copyright (C) 2020-2021 Felix Lechner
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program. If not, see <http://www.gnu.org/licenses/>.
+
+package Lintian::Index::FileTypes;
+
+use v5.20;
+use warnings;
+use utf8;
+
+use Const::Fast;
+use Cwd;
+use Unicode::UTF8 qw(encode_utf8 decode_utf8);
+
+use Lintian::IPC::Run3 qw(xargs);
+
+use Moo::Role;
+use namespace::clean;
+
+const my $EMPTY => q{};
+const my $SPACE => q{ };
+const my $COMMA => q{,};
+const my $NEWLINE => qq{\n};
+
+const my $KEEP_EMPTY_FIELDS => -1;
+const my $GZIP_MAGIC_SIZE => 9;
+const my $GZIP_MAGIC_BYTES => 0x1f8b;
+
+=head1 NAME
+
+Lintian::Index::FileTypes - determine file type via magic.
+
+=head1 SYNOPSIS
+
+ use Lintian::Index;
+
+=head1 DESCRIPTION
+
+Lintian::Index::FileTypes determine file type via magic.
+
+=head1 INSTANCE METHODS
+
+=over 4
+
+=item add_file_types
+
+=cut
+
+sub add_file_types {
+ my ($self) = @_;
+
+ my $savedir = getcwd;
+ chdir $self->basedir
+ or die encode_utf8(
+ $self->identifier . ': Cannot change to directory ' . $self->basedir);
+
+ my $errors = $EMPTY;
+
+ my @files = grep { $_->is_file } @{$self->sorted_list};
+ my @names = map { $_->name } @files;
+
+ my @command = qw(file --raw --no-pad --print0 --print0 --);
+
+ my %file_types;
+
+ xargs(
+ \@command,
+ \@names,
+ sub {
+ my ($stdout, $stderr, $status, @partial) = @_;
+
+ # ignore failures if possible; file returns non-zero and
+ # "ERROR" on parse errors but output is still usable
+
+ # undecoded split allows names with non UTF-8 bytes
+ $stdout =~ s{ \0 $}{}x;
+
+ my @lines = split(m{\0}, $stdout, $KEEP_EMPTY_FIELDS);
+
+ unless (@lines % 2 == 0) {
+ $errors
+ .= 'Did not get an even number lines from file command.'
+ . $NEWLINE;
+ return;
+ }
+
+ while (defined(my $path = shift @lines)) {
+
+ my $type = shift @lines;
+
+ unless (length $path && length $type) {
+ $errors
+ .= "syntax error in file-info output: '$path' '$type'"
+ . $NEWLINE;
+ next;
+ }
+
+ # drop relative prefix, if present
+ $path =~ s{^ [.]/ }{}x;
+
+ $file_types{$path} = $self->adjust_type($path, $type);
+ }
+
+ return;
+ }
+ );
+
+ $_->file_type($file_types{$_->name}) for @files;
+
+ chdir $savedir
+ or die encode_utf8(
+ $self->identifier . ": Cannot change to directory $savedir");
+
+ return $errors;
+}
+
+=item adjust_type
+
+=cut
+
+# some files need to be corrected
+sub adjust_type {
+ my ($self, $name, $file_type) = @_;
+
+ if ($name =~ m{ [.]gz $}ix && $file_type !~ /compressed/) {
+
+ my $item = $self->lookup($name);
+
+ die encode_utf8("Cannot find file $name in index")
+ unless $item;
+
+ my $buffer = $item->magic($GZIP_MAGIC_SIZE);
+ if (length $buffer) {
+
+ # translation of the unpack
+ # nn nn , NN NN NN NN, nn nn, cc - bytes read
+ # $magic, __ __ __ __, __ __, $comp - variables
+ my ($magic, undef, undef, $compression) = unpack('nNnc', $buffer);
+
+ # gzip file magic
+ if ($magic == $GZIP_MAGIC_BYTES) {
+
+ my $augment = 'gzip compressed data';
+
+ # 2 for max compression; RFC1952 suggests this is a
+ # flag and not a value, hence bit operation
+ $augment .= $COMMA . $SPACE . 'max compression'
+ if $compression & 2;
+
+ return $file_type . $COMMA . $SPACE . $augment;
+ }
+ }
+ }
+
+ # some TFMs are categorized as gzip, see Bug#963589
+ return 'data'
+ if $name =~ m{ [.]tfm $}ix
+ && $file_type =~ /gzip compressed data/;
+
+ return $file_type;
+}
+
+=back
+
+=head1 AUTHOR
+
+Originally written by Felix Lechner <felix.lechner@lease-up.com> for
+Lintian.
+
+=head1 SEE ALSO
+
+lintian(1)
+
+=cut
+
+1;
+
+# Local Variables:
+# indent-tabs-mode: nil
+# cperl-indent-level: 4
+# End:
+# vim: syntax=perl sw=4 sts=4 sr et
diff --git a/lib/Lintian/Index/Item.pm b/lib/Lintian/Index/Item.pm
new file mode 100644
index 0000000..7ef6c09
--- /dev/null
+++ b/lib/Lintian/Index/Item.pm
@@ -0,0 +1,1567 @@
+# -*- perl -*-
+# Lintian::Index::Item -- Representation of path entry in a package
+#
+# Copyright (C) 2011 Niels Thykier
+# Copyright (C) 2020 Felix Lechner
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program. If not, see <http://www.gnu.org/licenses/>.
+
+package Lintian::Index::Item;
+
+use v5.20;
+use warnings;
+use utf8;
+use autodie qw(open);
+
+use Carp qw(croak confess);
+use Const::Fast;
+use Date::Parse qw(str2time);
+use List::SomeUtils qw(all);
+use Path::Tiny;
+use Syntax::Keyword::Try;
+use Text::Balanced qw(extract_delimited);
+use Unicode::UTF8 qw(valid_utf8 decode_utf8 encode_utf8);
+
+use Lintian::Pointer::Item;
+use Lintian::SlidingWindow;
+use Lintian::Util qw(normalize_link_target);
+
+use Moo;
+use namespace::clean;
+
+use constant {
+ TYPE_FILE => 0x00_01_00_00,
+ TYPE_HARDLINK => 0x00_02_00_00,
+ TYPE_DIR => 0x00_04_00_00,
+ TYPE_SYMLINK => 0x00_08_00_00,
+ TYPE_BLOCK_DEV => 0x00_10_00_00,
+ TYPE_CHAR_DEV => 0x00_20_00_00,
+ TYPE_PIPE => 0x00_40_00_00,
+ TYPE_OTHER => 0x00_80_00_00,
+ TYPE_MASK => 0x00_ff_00_00,
+
+ UNSAFE_PATH => 0x01_00_00_00,
+ FS_PATH_IS_OK => 0x02_00_00_00,
+ OPEN_IS_OK => 0x06_00_00_00, # Implies FS_PATH_IS_OK
+ ACCESS_INFO => 0x07_00_00_00,
+ # 0o6777 == 0xdff, which covers set[ug]id + sticky bit. Accordingly,
+ # 0xffff should be more than sufficient for the foreseeable future.
+ OPERM_MASK => 0x00_00_ff_ff,
+};
+
+use overload (
+ q{""} => \&_as_string,
+ 'qr' => \&_as_regex_ref,
+ 'bool' => \&_bool,
+ q{!} => \&_bool_not,
+ q{.} => \&_str_concat,
+ 'cmp' => \&_str_cmp,
+ 'eq' => \&_str_eq,
+ 'ne' => \&_str_ne,
+ 'fallback' => 0,
+);
+
+const my $EMPTY => q{};
+const my $SPACE => q{ };
+const my $SLASH => q{/};
+const my $DOT => q{.};
+const my $DOUBLE_DOT => q{..};
+const my $DOUBLE_QUOTE => q{"};
+const my $BACKSLASH => q{\\};
+const my $HASHBANG => q{#!};
+
+const my $MAXIMUM_LINK_DEPTH => 18;
+
+const my $BYTE_MAXIMUM => 255;
+const my $SINGLE_OCTAL_MASK => oct(7);
+const my $DUAL_OCTAL_MASK => oct(77);
+
+const my $ELF_MAGIC_SIZE => 4;
+const my $LINCITY_MAGIC_SIZE => 6;
+const my $SHELL_SCRIPT_MAGIC_SIZE => 2;
+
+const my $READ_BITS => oct(444);
+const my $WRITE_BITS => oct(222);
+const my $EXECUTABLE_BITS => oct(111);
+
+const my $SETUID => oct(4000);
+const my $SETGID => oct(2000);
+
+=head1 NAME
+
+Lintian::Index::Item - Lintian representation of a path entry in a package
+
+=head1 SYNOPSIS
+
+ my ($name, $type, $dir) = ('lintian', 'source', '/path/to/entry');
+
+=head1 INSTANCE METHODS
+
+=over 4
+
+=item init_from_tar_output
+
+=item get_quoted_filename
+
+=item unescape_c_style
+
+=cut
+
+my $datepattern = qr/\d{4}-\d{2}-\d{2}/;
+my $timepattern = qr/\d{2}\:\d{2}(?:\:\d{2}(?:\.\d+)?)?/;
+my $symlinkpattern = qr/\s+->\s+/;
+my $hardlinkpattern = qr/\s+link\s+to\s+/;
+
+# adapted from https://www.perlmonks.org/?node_id=1056606
+my %T = (
+ (map {chr() => chr} 0..$BYTE_MAXIMUM),
+ (map {sprintf('%o',$_) => chr} 0..($BYTE_MAXIMUM & $SINGLE_OCTAL_MASK)),
+ (map {sprintf('%02o',$_) => chr} 0..($BYTE_MAXIMUM & $DUAL_OCTAL_MASK)),
+ (map {sprintf('%03o',$_) => chr} 0..$BYTE_MAXIMUM),
+ (split //, "r\rn\nb\ba\af\ft\tv\013")
+);
+
+sub unescape_c_style {
+ my ($escaped) = @_;
+
+ (my $result = $escaped) =~ s/\\([0-7]{1,3}|.)/$T{$1}/g;
+
+ return $result;
+}
+
+sub get_quoted_filename {
+ my ($unknown, $skip) = @_;
+
+ # extract quoted file name
+ my ($delimited, $extra)
+ = extract_delimited($unknown, $DOUBLE_QUOTE, $skip, $BACKSLASH);
+
+ return (undef, undef)
+ unless defined $delimited;
+
+ # drop quotes
+ my $cstylename = substr($delimited, 1, (length $delimited) - 2);
+
+ # convert c-style escapes
+ my $name = unescape_c_style($cstylename);
+
+ return ($name, $extra);
+}
+
+sub init_from_tar_output {
+ my ($self, $line) = @_;
+
+ chomp $line;
+
+ # allow spaces in ownership and filenames (#895175 and #950589)
+
+ my ($initial, $size, $date, $time, $remainder)
+ = split(/\s+(\d+)\s+($datepattern)\s+($timepattern)\s+/, $line,2);
+
+ die encode_utf8(
+ $self->index->identifier . ": Cannot parse tar output: $line")
+ unless all { defined } ($initial, $size, $date, $time, $remainder);
+
+ $self->size($size);
+ $self->date($date);
+ $self->time($time);
+
+ my ($permissions, $ownership) = split(/\s+/, $initial, 2);
+ die encode_utf8($self->index->identifier
+ .": Cannot parse permissions and ownership in tar output: $line")
+ unless all { defined } ($permissions, $ownership);
+
+ $self->perm($permissions);
+
+ my ($owner, $group) = split(qr{/}, $ownership, 2);
+ die encode_utf8($self->index->identifier
+ . ": Cannot parse owner and group in tar output: $line")
+ unless all { defined } ($owner, $group);
+
+ $self->owner($owner);
+ $self->group($group);
+
+ my ($name, $extra) = get_quoted_filename($remainder, $EMPTY);
+ die encode_utf8($self->index->identifier
+ . ": Cannot parse file name in tar output: $line")
+ unless all { defined } ($name, $extra);
+
+ # strip relative prefix
+ $name =~ s{^\./+}{}s;
+
+ # slashes cannot appear in names but are sometimes doubled
+ # as in emboss-explorer_2.2.0-10.dsc
+ # better implemented in a Moo trigger on the attribute
+ $name =~ s{/+}{/}g;
+
+ # make sure directories end with a slash, except root
+ $name .= $SLASH
+ if length $name
+ && $self->perm =~ / ^d /msx
+ && $name !~ m{ /$ }msx;
+
+ $self->name($name);
+
+ # look for symbolic link target
+ if ($self->perm =~ /^l/) {
+
+ my ($linktarget, undef) = get_quoted_filename($extra, $symlinkpattern);
+ die encode_utf8($self->index->identifier
+ .": Cannot parse symbolic link target in tar output: $line")
+ unless defined $linktarget;
+
+ # do not remove multiple slashes from symlink targets
+ # caught by symlink-has-double-slash, which is tested
+ # leaves resolution of these links unsolved
+
+ # do not strip relative prefix for symbolic links
+ $self->link($linktarget);
+ }
+
+ # look for hard link target
+ if ($self->perm =~ /^h/) {
+
+ my ($linktarget, undef)= get_quoted_filename($extra, $hardlinkpattern);
+ die encode_utf8($self->index->identifier
+ . ": Cannot parse hard link target in tar output: $line")
+ unless defined $linktarget;
+
+ # strip relative prefix
+ $linktarget =~ s{^\./+}{}s;
+
+ # slashes cannot appear in names but are sometimes doubled
+ # as in emboss-explorer_2.2.0-10.dsc
+ # better implemented in a Moo trigger on the attribute, but requires
+ # separate attributes for hard and symbolic link targets
+ $linktarget =~ s{/+}{/}g;
+
+ $self->link($linktarget);
+ }
+
+ return;
+}
+
+=item bytes_match(REGEX)
+
+Returns the matched string if REGEX matches the file's byte contents,
+or $EMPTY otherwise.
+
+=cut
+
+sub bytes_match {
+ my ($self, $regex) = @_;
+
+ return $EMPTY
+ unless $self->is_file;
+
+ return $EMPTY
+ unless $self->is_open_ok;
+
+ return $EMPTY
+ unless length $regex;
+
+ open(my $fd, '<:raw', $self->unpacked_path);
+ my $sfd = Lintian::SlidingWindow->new;
+ $sfd->handle($fd);
+
+ my $match;
+ while (my $block = $sfd->readwindow) {
+
+ if ($block =~ /($regex)/) {
+
+ $match = $1;
+ last;
+ }
+ }
+
+ close $fd;
+
+ return $match // $EMPTY;
+}
+
+=item mentions_in_operation(REGEX)
+
+Returns the matched string if REGEX matches in a file location
+that is likely an operation (vs text), or $EMPTY otherwise.
+
+=cut
+
+sub mentions_in_operation {
+ my ($self, $regex) = @_;
+
+ # prefer strings(1) output (eg. for ELF) if we have it
+ # may not work as expected on ELF due to ld's SHF_MERGE
+ my $match;
+ if (length $self->strings && $self->strings =~ /($regex)/) {
+ $match = $1;
+
+ } elsif ($self->is_script) {
+ $match = $self->bytes_match($regex);
+ }
+
+ return $match // $EMPTY;
+}
+
+=item magic(COUNT)
+
+Returns the specified COUNT of magic bytes for the file.
+
+=cut
+
+sub magic {
+ my ($self, $count) = @_;
+
+ return $EMPTY
+ if length $self->link;
+
+ return $EMPTY
+ if $self->size < $count;
+
+ return $EMPTY
+ unless $self->is_open_ok;
+
+ my $magic;
+
+ open(my $fd, '<', $self->unpacked_path);
+ die encode_utf8($self->index->identifier
+ . ": Could not read $count bytes from "
+ . $self->name)
+ unless read($fd, $magic, $count) == $count;
+ close $fd;
+
+ return $magic;
+}
+
+=item C<hashbang>
+
+Returns the C<hashbang> for the file if it is a script.
+
+=cut
+
+has hashbang => (
+ is => 'rw',
+ lazy => 1,
+ default => sub {
+ my ($self) = @_;
+
+ return $EMPTY
+ unless $self->is_script;
+
+ my $trimmed_bytes = $EMPTY;
+ my $magic;
+
+ open(my $fd, '<', $self->unpacked_path);
+ if (read($fd, $magic, 2) && $magic eq $HASHBANG && !eof($fd)) {
+ $trimmed_bytes = <$fd>;
+ }
+ close $fd;
+
+ # decoding UTF-8 fails on magyarispell_1.6.1-2.dsc and ldc_1.24.0-1.dsc
+
+ # remove comment, if any
+ $trimmed_bytes =~ s/^([^#]*)/$1/;
+
+ # trim both ends
+ $trimmed_bytes =~ s/^\s+|\s+$//g;
+
+ return $trimmed_bytes;
+ }
+);
+
+=item interpreter_with_options
+
+Returns the interpreter requested by a script with options
+after stripping C<env>.
+
+=cut
+
+has interpreter_with_options => (
+ is => 'rw',
+ lazy => 1,
+ default => sub {
+ my ($self) = @_;
+
+ my $with_options = $self->hashbang;
+
+ $with_options =~ s{^/usr/bin/env\s+}{};
+
+ return $with_options;
+ }
+);
+
+=item interpreter
+
+Returns the interpreter requested by a script but strips C<env>.
+
+=cut
+
+has interpreter => (
+ is => 'rw',
+ lazy => 1,
+ default => sub {
+ my ($self) = @_;
+
+ my $interpreter = $self->interpreter_with_options;
+
+ # keep base command without options
+ $interpreter =~ s/^(\S+).*/$1/;
+
+ return $interpreter;
+ }
+);
+
+=item C<calls_env>
+
+Returns true if file is a script that calls C<env>.
+
+=cut
+
+has calls_env => (
+ is => 'rw',
+ lazy => 1,
+ default => sub {
+ my ($self) = @_;
+
+ # must return a boolean success value #943724
+ return 1
+ if $self->hashbang =~ m{^/usr/bin/env\s+};
+
+ return 0;
+ }
+);
+
+=item C<is_shell_script>
+
+Returns true if file is a script requesting a recognized shell
+interpreter.
+
+=cut
+
+has is_shell_script => (
+ is => 'rw',
+ lazy => 1,
+ default => sub {
+ my ($self) = @_;
+
+ my $interpreter = $self->interpreter;
+
+ # keep basename
+ my ($basename) = ($interpreter =~ m{([^/]*)/?$}s);
+
+ return 1
+ if $basename =~ /^(?:[bd]?a|t?c|(?:pd|m)?k|z)?sh$/;
+
+ return 0;
+ }
+);
+
+=item is_elf
+
+Returns true if file is an ELF executable, and false otherwise.
+
+=cut
+
+has is_elf => (
+ is => 'rw',
+ lazy => 1,
+ default => sub {
+ my ($self) = @_;
+
+ return 1
+ if $self->magic($ELF_MAGIC_SIZE) eq "\x7FELF";
+
+ return 0;
+ }
+);
+
+=item is_script
+
+Returns true if file is a script and false otherwise.
+
+=cut
+
+has is_script => (
+ is => 'rw',
+ lazy => 1,
+ default => sub {
+ my ($self) = @_;
+
+ # skip lincity data files; magic: #!#!#!
+ return 0
+ if $self->magic($LINCITY_MAGIC_SIZE) eq '#!#!#!';
+
+ return 0
+ unless $self->magic($SHELL_SCRIPT_MAGIC_SIZE) eq $HASHBANG;
+
+ return 1;
+ }
+);
+
+=item is_maintainer_script
+
+Returns true if file is a maintainer script and false otherwise.
+
+=cut
+
+has is_maintainer_script => (
+ is => 'rw',
+ lazy => 1,
+ default => sub {
+ my ($self) = @_;
+
+ return 1
+ if $self->name =~ /^ config | (?:pre|post)(?:inst|rm) $/x
+ && $self->is_open_ok;
+
+ return 0;
+ }
+);
+
+=item identity
+
+Returns the owner and group of the path, separated by a slash.
+
+NB: If only numerical owner information is available in the package,
+this may return a numerical owner (except uid 0 is always mapped to
+"root")
+
+=cut
+
+sub identity {
+ my ($self) = @_;
+
+ return $self->owner . $SLASH . $self->group;
+}
+
+=item operm
+
+Returns the file permissions of this object in octal (e.g. 0644).
+
+NB: This is only well defined for file entries that are subject to
+permissions (e.g. files). Particularly, the value is not well defined
+for symlinks.
+
+=cut
+
+sub operm {
+ my ($self) = @_;
+
+ return $self->path_info & OPERM_MASK;
+}
+
+=item octal_permissions
+
+=cut
+
+sub octal_permissions {
+ my ($self) = @_;
+
+ return sprintf('%04o', $self->operm);
+}
+
+=item children
+
+Returns a list of children (as Lintian::File::Path objects) of this entry.
+The list and its contents should not be modified.
+
+Only returns direct children of this directory. The entries are sorted by name.
+
+NB: Returns the empty list for non-dir entries.
+
+=cut
+
+sub children {
+ my ($self) = @_;
+
+ croak encode_utf8('No index in ' . $self->name)
+ unless defined $self->index;
+
+ my @names = values %{$self->childnames};
+
+ return map { $self->index->lookup($_) } @names;
+}
+
+=item descendants
+
+Returns a list of children (as Lintian::File::Path objects) of this entry.
+The list and its contents should not be modified.
+
+Descends recursively into subdirectories and return the descendants in
+breadth-first order. Children of a given directory will be sorted by
+name.
+
+NB: Returns the empty list for non-dir entries.
+
+=cut
+
+sub descendants {
+ my ($self) = @_;
+
+ my @descendants = $self->children;
+
+ my @directories = grep { $_->is_dir } @descendants;
+ push(@descendants, $_->descendants) for @directories;
+
+ return @descendants;
+}
+
+=item timestamp
+
+Returns a Unix timestamp for the given path. This is a number of
+seconds since the start of Unix epoch in UTC.
+
+=cut
+
+sub timestamp {
+ my ($self) = @_;
+
+ my $timestamp = $self->date . $SPACE . $self->time;
+
+ return str2time($timestamp, 'GMT');
+}
+
+=item child(BASENAME)
+
+Returns the child named BASENAME if it is a child of this directory.
+Otherwise, this method returns C<undef>.
+
+Even for directories, BASENAME should not end with a slash.
+
+When invoked on non-dirs, this method always returns C<undef>.
+
+Example:
+
+ $dir_entry->child('foo') => $entry OR undef
+
+=cut
+
+sub child {
+ my ($self, $basename) = @_;
+
+ croak encode_utf8('No index in ' . $self->name)
+ unless defined $self->index;
+
+ croak encode_utf8($self->index->identifier . ': Basename is required')
+ unless length $basename;
+
+ my $childname = $self->childnames->{$basename};
+ return undef
+ unless $childname;
+
+ return $self->index->lookup($childname);
+}
+
+=item is_symlink
+
+Returns a truth value if this entry is a symlink.
+
+=item is_hardlink
+
+Returns a truth value if this entry is a hardlink to a regular file.
+
+NB: The target of a hardlink is always a regular file (and not a dir etc.).
+
+=item is_dir
+
+Returns a truth value if this entry is a dir.
+
+NB: Unlike the "-d $dir" operator this will never return true for
+symlinks, even if the symlink points to a dir.
+
+=item is_file
+
+Returns a truth value if this entry is a regular file (or a hardlink to one).
+
+NB: Unlike the "-f $file" operator this will never return true for
+symlinks, even if the symlink points to a file (or hardlink).
+
+=item is_regular_file
+
+Returns a truth value if this entry is a regular file.
+
+This is eqv. to $path->is_file and not $path->is_hardlink.
+
+NB: Unlike the "-f $file" operator this will never return true for
+symlinks, even if the symlink points to a file.
+
+=cut
+
+sub is_symlink {
+ my ($self) = @_;
+
+ return $self->path_info & TYPE_SYMLINK ? 1 : 0;
+}
+
+sub is_hardlink {
+ my ($self) = @_;
+
+ return $self->path_info & TYPE_HARDLINK ? 1 : 0;
+}
+
+sub is_dir {
+ my ($self) = @_;
+
+ return $self->path_info & TYPE_DIR ? 1 : 0;
+}
+
+sub is_file {
+ my ($self) = @_;
+
+ return $self->path_info & (TYPE_FILE | TYPE_HARDLINK) ? 1 : 0;
+}
+
+sub is_regular_file {
+ my ($self) = @_;
+
+ return $self->path_info & TYPE_FILE ? 1 : 0;
+}
+
+=item link_normalized
+
+Returns the target of the link normalized against it's directory name.
+If the link cannot be normalized or normalized path might escape the
+package root, this method returns C<undef>.
+
+NB: This method will return the empty string for links pointing to the
+root dir of the package.
+
+Only available on "links" (i.e. symlinks or hardlinks). On non-links
+this will croak.
+
+I<Symlinks only>: If you want the symlink target as a L<Lintian::File::Path>
+object, use the L<resolve_path|/resolve_path([PATH])> method with no
+arguments instead.
+
+=cut
+
+has link_normalized => (
+ is => 'rw',
+ lazy => 1,
+ default => sub {
+ my ($self) = @_;
+
+ my $name = $self->name;
+ my $link = $self->link;
+
+ croak encode_utf8($self->index->identifier . ": $name is not a link")
+ unless length $link;
+
+ my $dir = $self->dirname;
+
+ # hardlinks are always relative to the package root
+ $dir = $SLASH
+ if $self->is_hardlink;
+
+ my $target = normalize_link_target($dir, $link);
+
+ return $target;
+ }
+);
+
+=item is_readable
+
+Returns a truth value if the permission bits of this entry have
+at least one bit denoting readability set (bitmask 0444).
+
+=item is_writable
+
+Returns a truth value if the permission bits of this entry have
+at least one bit denoting writability set (bitmask 0222).
+
+=item is_executable
+
+Returns a truth value if the permission bits of this entry have
+at least one bit denoting executability set (bitmask 0111).
+
+=cut
+
+sub is_readable {
+ my ($self) = @_;
+
+ return $self->path_info & $READ_BITS;
+}
+
+sub is_writable {
+ my ($self) = @_;
+
+ return $self->path_info & $WRITE_BITS;
+}
+
+sub is_executable {
+ my ($self) = @_;
+
+ return $self->path_info & $EXECUTABLE_BITS;
+}
+
+=item all_bits_set
+
+=cut
+
+sub all_bits_set {
+ my ($self, $bits) = @_;
+
+ return ($self->operm & $bits) == $bits;
+}
+
+=item is_setuid
+
+=cut
+
+sub is_setuid {
+ my ($self) = @_;
+
+ return $self->operm & $SETUID;
+}
+
+=item is_setgid
+
+=cut
+
+sub is_setgid {
+ my ($self) = @_;
+
+ return $self->operm & $SETGID;
+}
+
+=item unpacked_path
+
+Returns the path to this object on the file system, which must be a
+regular file, a hardlink or a directory.
+
+This method may fail if:
+
+=over 4
+
+=item * The object is neither a directory or a file-like object (e.g. a
+named pipe).
+
+=item * If the object is dangling symlink or the path traverses a symlink
+outside the package root.
+
+=back
+
+To test if this is safe to call, if the target is (supposed) to be a:
+
+=over 4
+
+=item * file or hardlink then test with L</is_open_ok>.
+
+=item * dir then assert L<resolve_path|/resolve_path([PATH])> returns a
+defined entry, for which L</is_dir> returns a truth value.
+
+=back
+
+=cut
+
+sub unpacked_path {
+ my ($self) = @_;
+
+ $self->_check_access;
+
+ croak encode_utf8('No index in ' . $self->name)
+ unless defined $self->index;
+
+ my $basedir = $self->index->basedir;
+
+ croak encode_utf8($self->index->identifier . ': No base directory')
+ unless length $basedir;
+
+ my $unpacked = path($basedir)->child($self->name)->stringify;
+
+ # bug in perl, file operator should not care but does
+ # https://github.com/Perl/perl5/issues/10550
+ # also, https://github.com/Perl/perl5/issues/9674
+ utf8::downgrade $unpacked;
+
+ return $unpacked;
+}
+
+=item is_open_ok
+
+Returns a truth value if it is safe to attempt open a read handle to
+the underlying file object.
+
+Returns a truth value if the path may be opened.
+
+=cut
+
+sub is_open_ok {
+ my ($self) = @_;
+
+ my $path_info = $self->path_info;
+
+ return 1
+ if ($path_info & OPEN_IS_OK) == OPEN_IS_OK;
+
+ return 0
+ if $path_info & ACCESS_INFO;
+
+ try {
+ $self->_check_open;
+
+ } catch {
+ return 0;
+
+ # perlcritic 1.140-1 requires the semicolon on the next line
+ };
+
+ return 1;
+}
+
+sub _check_access {
+ my ($self) = @_;
+
+ my $path_info = $self->path_info;
+
+ return 1
+ if ($path_info & FS_PATH_IS_OK) == FS_PATH_IS_OK;
+
+ return 0
+ if $path_info & ACCESS_INFO;
+
+ my $resolvable = $self->resolve_path;
+ unless ($resolvable) {
+ $self->path_info($self->path_info | UNSAFE_PATH);
+ # NB: We are deliberately vague here to avoid suggesting
+ # whether $path exists. In some cases (e.g. lintian.d.o)
+ # the output is readily available to wider public.
+ confess encode_utf8($self->index->identifier
+ .': Attempt to access through broken or unsafe symlink: '
+ . $self->name);
+ }
+
+ $self->path_info($self->path_info | FS_PATH_IS_OK);
+
+ return 1;
+}
+
+sub _check_open {
+ my ($self) = @_;
+
+ $self->_check_access;
+
+ # Symlinks can point to a "non-file" object inside the
+ # package root
+ # Leave "_path_access" here as _check_access marks it either as
+ # "UNSAFE_PATH" or "FS_PATH_IS_OK"
+
+ confess encode_utf8($self->index->identifier
+ .': Opening of irregular file not supported: '
+ . $self->name)
+ unless $self->is_file || ($self->is_symlink && -e $self->unpacked_path);
+
+ $self->path_info($self->path_info | OPEN_IS_OK);
+
+ return 1;
+}
+
+=item follow
+
+Return dereferenced link if applicable
+
+=cut
+
+sub follow {
+ my ($self, $maxlinks) = @_;
+
+ return $self
+ unless length $self->link;
+
+ return $self->dereferenced
+ if defined $self->dereferenced;
+
+ # set limit
+ $maxlinks //= $MAXIMUM_LINK_DEPTH;
+
+ # catch recursive links
+ return undef
+ if $maxlinks <= 0;
+
+ # reduce counter
+ $maxlinks--;
+
+ my $reference;
+
+ croak encode_utf8('No index in ' . $self->name)
+ unless defined $self->index;
+
+ if ($self->is_hardlink) {
+ # hard links are resolved against package root
+ $reference = $self->index->lookup;
+
+ } else {
+ # otherwise resolve against the parent
+ $reference = $self->parent_dir;
+ }
+
+ croak encode_utf8($self->index->identifier
+ . ': No parent reference for link in '
+ . $self->name)
+ unless defined $reference;
+
+ # follow link
+ my $dereferenced = $reference->resolve_path($self->link, $maxlinks);
+ $self->dereferenced($dereferenced);
+
+ return $self->dereferenced;
+}
+
+=item resolve_path([PATH])
+
+Resolve PATH relative to this path entry.
+
+If PATH starts with a slash and the file hierarchy has a well-defined
+root directory, then PATH will instead be resolved relatively to the
+root dir. If the file hierarchy does not have a well-defined root dir
+(e.g. for source packages), this method will return C<undef>.
+
+If PATH is omitted, then the entry is resolved and the target is
+returned if it is valid. Except for symlinks, all entries always
+resolve to themselves. NB: hardlinks also resolve as themselves.
+
+It is an error to attempt to resolve a PATH against a non-directory
+and non-symlink entry - as such resolution would always fail
+(i.e. foo/../bar is an invalid path unless foo is a directory or a
+symlink to a dir).
+
+
+The resolution takes symlinks into account and following them provided
+that the target path is valid (and can be followed safely). If the
+path is invalid or circular (symlinks), escapes the root directory or
+follows an unsafe symlink, the method returns C<undef>. Otherwise, it
+returns the path entry that denotes the target path.
+
+
+If PATH contains at least one path segment and ends with a slash, then
+the resolved path will end in a directory (or fail). Otherwise, the
+resolved PATH can end in any entry I<except> a symlink.
+
+Examples:
+
+ $symlink_entry->resolve_path => $nonsymlink_entry OR undef
+
+ $x->resolve_path => $x
+
+ For directory or symlink entries (dol), you can also resolve a path:
+
+ $dol_entry->resolve_path('some/../where') => $nonsymlink_entry OR undef
+
+ # Note the trailing slash
+ $dol_entry->resolve_path('some/../where/') => $dir_entry OR undef
+
+=cut
+
+sub resolve_path {
+ my ($self, $request, $maxlinks) = @_;
+
+ croak encode_utf8('No index in ' . $self->name)
+ unless defined $self->index;
+
+ croak encode_utf8(
+ $self->index->identifier . ': Can only resolve string arguments')
+ if defined $request && ref($request) ne $EMPTY;
+
+ $request //= $EMPTY;
+
+ if (length $self->link) {
+ # follow the link
+ my $dereferenced = $self->follow($maxlinks);
+ return undef
+ unless defined $dereferenced;
+
+ # and use that to resolve the request
+ return $dereferenced->resolve_path($request, $maxlinks);
+ }
+
+ my $reference;
+
+ # check for absolute reference; remove slash
+ if ($request =~ s{^/+}{}s) {
+
+ # require anchoring for absolute references
+ return undef
+ unless $self->index->anchored;
+
+ # get root entry
+ $reference = $self->index->lookup;
+
+ } elsif ($self->is_dir) {
+ # directories are their own starting point
+ $reference = $self;
+
+ } else {
+ # otherwise, use parent directory
+ $reference = $self->parent_dir;
+ }
+
+ return undef
+ unless defined $reference;
+
+ # read first segment; strip all trailing slashes for recursive use
+ if ($request =~ s{^([^/]+)/*}{}) {
+
+ my $segment = $1;
+
+ # single dot, or two slashes in a row
+ return $reference->resolve_path($request, $maxlinks)
+ if $segment eq $DOT || !length $segment;
+
+ # for double dot, go up a level
+ if ($segment eq $DOUBLE_DOT) {
+ my $parent = $reference->parent_dir;
+ return undef
+ unless defined $parent;
+
+ return $parent->resolve_path($request, $maxlinks);
+ }
+
+ # look for child otherwise
+ my $child = $reference->child($segment);
+ return undef
+ unless defined $child;
+
+ return $child->resolve_path($request, $maxlinks);
+ }
+
+ croak encode_utf8($self->index->identifier
+ . ": Cannot parse path resolution request: $request")
+ if length $request;
+
+ # nothing else to resolve
+ return $self;
+}
+
+=item name
+
+Returns the name of the file (relative to the package root).
+
+NB: It will never have any leading "./" (or "/") in it.
+
+=item basename
+
+Returns the "filename" part of the name, similar basename(1) or
+File::Basename::basename (without passing a suffix to strip in either
+case).
+
+NB: Returns the empty string for the "root" dir.
+
+=item dirname
+
+Returns the "directory" part of the name, similar to dirname(1) or
+File::Basename::dirname. The dirname will end with a trailing slash
+(except the "root" dir - see below).
+
+NB: Returns the empty string for the "root" dir.
+
+=item link
+
+If this is a link (i.e. is_symlink or is_hardlink returns a truth
+value), this method returns the target of the link.
+
+If this is not a link, then this returns undef.
+
+If the path is a symlink this method can be used to determine if the
+symlink is relative or absolute. This is I<not> true for hardlinks,
+where the link target is always relative to the root.
+
+NB: Even for symlinks, a leading "./" will be stripped.
+
+=item normalized
+
+=item faux
+
+Returns a truth value if this entry absent in the package. This can
+happen if a package does not include all intermediate directories.
+
+=item size
+
+Returns the size of the path in bytes.
+
+NB: Only regular files can have a non-zero file size.
+
+=item date
+
+Return the modification date as YYYY-MM-DD.
+
+=item time
+
+=item perm
+
+=item path_info
+
+=item owner
+
+Returns the owner of the path entry as a username.
+
+NB: If only numerical owner information is available in the package,
+this may return a numerical owner (except uid 0 is always mapped to
+"root")
+
+=item group
+
+Returns the group of the path entry as a username.
+
+NB: If only numerical owner information is available in the package,
+this may return a numerical group (except gid 0 is always mapped to
+"root")
+
+=item uid
+
+Returns the uid of the owner of the path entry.
+
+NB: If the uid is not available, 0 will be returned.
+This usually happens if the numerical data is not collected (e.g. in
+source packages)
+
+=item gid
+
+Returns the gid of the owner of the path entry.
+
+NB: If the gid is not available, 0 will be returned.
+This usually happens if the numerical data is not collected (e.g. in
+source packages)
+
+=item file_type
+
+Return the data from L<file(1)> if it has been collected.
+
+Note this is only defined for files as Lintian only runs L<file(1)> on
+files.
+
+=item java_info
+
+=item strings
+
+=item C<basedir>
+
+=item index
+
+=item parent_dir
+
+=item child_table
+
+=item sorted_children
+
+Returns the parent directory entry of this entry as a
+L<Lintian::File::Path>.
+
+NB: Returns C<undef> for the "root" dir.
+
+=item C<childnames>
+
+=item parent_dir
+
+Return the parent dir entry of this the path entry.
+
+=item dereferenced
+
+=cut
+
+has name => (
+ is => 'rw',
+ lazy => 1,
+ coerce => sub { my ($string) = @_; return $string // $EMPTY;},
+ trigger => sub {
+ my ($self, $name) = @_;
+
+ my ($basename) = ($name =~ m{([^/]*)/?$}s);
+ $self->basename($basename);
+
+ # allow newline in names; need /s for dot matching (#929729)
+ my ($dirname) = ($name =~ m{^(.+/)?(?:[^/]+/?)$}s);
+ $self->dirname($dirname);
+ },
+ default => $EMPTY
+);
+has basename => (
+ is => 'rw',
+ lazy => 1,
+ coerce => sub { my ($string) = @_; return $string // $EMPTY;},
+ default => $EMPTY
+);
+has dirname => (
+ is => 'rw',
+ lazy => 1,
+ coerce => sub { my ($string) = @_; return $string // $EMPTY;},
+ default => $EMPTY
+);
+
+has link => (
+ is => 'rw',
+ coerce => sub { my ($string) = @_; return $string // $EMPTY;},
+ default => $EMPTY
+);
+has normalized => (
+ is => 'rw',
+ coerce => sub { my ($string) = @_; return $string // $EMPTY;},
+ default => $EMPTY
+);
+has faux => (is => 'rw', default => 0);
+
+has size => (is => 'rw', default => 0);
+has date => (
+ is => 'rw',
+ default => sub {
+ my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = gmtime;
+ return sprintf('%04d-%02d-%02d', $year, $mon, $mday);
+ }
+);
+has time => (
+ is => 'rw',
+ default => sub {
+ my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = gmtime;
+ return sprintf('%02d:%02d:%02d', $hour, $min, $sec);
+ }
+);
+
+has perm => (is => 'rw');
+has path_info => (is => 'rw');
+
+has owner => (
+ is => 'rw',
+ coerce => sub { my ($string) = @_; return $string // 'root'; },
+ default => 'root'
+);
+has group => (
+ is => 'rw',
+ coerce => sub { my ($string) = @_; return $string // 'root'; },
+ default => 'root'
+);
+has uid => (
+ is => 'rw',
+ coerce => sub { my ($value) = @_; return int($value // 0); },
+ default => 0
+);
+has gid => (
+ is => 'rw',
+ coerce => sub { my ($value) = @_; return int($value // 0); },
+ default => 0
+);
+
+has md5sum => (
+ is => 'rw',
+ coerce => sub { my ($checksum) = @_; return ($checksum // 0); },
+ default => 0
+);
+has file_type => (
+ is => 'rw',
+ coerce => sub { my ($text) = @_; return ($text // $EMPTY); },
+ default => $EMPTY
+);
+has java_info => (
+ is => 'rw',
+ coerce => sub { my ($hashref) = @_; return ($hashref // {}); },
+ default => sub { {} }
+);
+has strings => (
+ is => 'rw',
+ coerce => sub { my ($text) = @_; return ($text // $EMPTY); },
+ default => $EMPTY
+);
+has ar_info => (
+ is => 'rw',
+ coerce => sub { my ($hashref) = @_; return ($hashref // {}); },
+ default => sub { {} }
+);
+
+has index => (is => 'rw');
+has childnames => (is => 'rw', default => sub { {} });
+has parent_dir => (
+ is => 'rw',
+ lazy => 1,
+ default => sub {
+ my ($self) = @_;
+
+ # do not return root as its own parent
+ return
+ if $self->name eq $EMPTY;
+
+ croak encode_utf8('No index in ' . $self->name)
+ unless defined $self->index;
+
+ # returns root by default
+ return $self->index->lookup($self->dirname);
+ }
+);
+has dereferenced => (is => 'rw');
+
+=item elf
+
+=cut
+
+sub elf {
+ my ($self, @args) = @_;
+
+ if (@args) {
+
+ $self->index->elf_storage->{$self->name} = $args[0];
+
+ return ();
+ }
+
+ my %copy = %{$self->index->elf_storage->{$self->name} // {} };
+
+ return \%copy;
+}
+
+=item elf_by_member
+
+=cut
+
+sub elf_by_member {
+ my ($self, @args) = @_;
+
+ if (@args) {
+
+ my $object_name = $args[0];
+ my $by_object = $args[1];
+
+ my $tmp = $self->index->elf_storage_by_member->{$self->name} // {};
+ $tmp->{$object_name} = $by_object;
+ $self->index->elf_storage_by_member->{$self->name} = $tmp;
+
+ return ();
+ }
+
+ my %copy = %{$self->index->elf_storage_by_member->{$self->name} // {} };
+
+ return \%copy;
+}
+
+=item pointer
+
+=cut
+
+sub pointer {
+ my ($self, $position) = @_;
+
+ my $pointer = Lintian::Pointer::Item->new;
+ $pointer->item($self);
+ $pointer->position($position);
+
+ return $pointer;
+}
+
+=item bytes
+
+Returns verbatim file contents as a scalar.
+
+=item is_valid_utf8
+
+Boolean true if file contents are valid UTF-8.
+
+=item decoded_utf8
+
+Returns a decoded, wide-character string if file contents are valid UTF-8.
+
+=cut
+
+sub bytes {
+ my ($self) = @_;
+
+ return $EMPTY
+ unless $self->is_open_ok;
+
+ my $bytes = path($self->unpacked_path)->slurp;
+
+ return $bytes;
+}
+
+sub is_valid_utf8 {
+ my ($self) = @_;
+
+ my $bytes = $self->bytes;
+ return 0
+ unless defined $bytes;
+
+ return valid_utf8($bytes);
+}
+
+sub decoded_utf8 {
+ my ($self) = @_;
+
+ return $EMPTY
+ unless $self->is_valid_utf8;
+
+ return decode_utf8($self->bytes);
+}
+
+### OVERLOADED OPERATORS ###
+
+# overload apparently does not like the mk_ro_accessor, so use a level
+# of indirection
+
+sub _as_regex_ref {
+ my ($self) = @_;
+ my $name = $self->name;
+ return qr{ \Q$name\E }xsm;
+}
+
+sub _as_string {
+ my ($self) = @_;
+ return $self->name;
+}
+
+sub _bool {
+ # Always true (used in "if ($info->index('some/path')) {...}")
+ return 1;
+}
+
+sub _bool_not {
+ my ($self) = @_;
+ return !$self->_bool;
+}
+
+sub _str_cmp {
+ my ($self, $str, $swap) = @_;
+ return $str cmp $self->name if $swap;
+ return $self->name cmp $str;
+}
+
+sub _str_concat {
+ my ($self, $str, $swap) = @_;
+ return $str . $self->name if $swap;
+ return $self->name . $str;
+}
+
+sub _str_eq {
+ my ($self, $str) = @_;
+ return $self->name eq $str;
+}
+
+sub _str_ne {
+ my ($self, $str) = @_;
+ return $self->name ne $str;
+}
+
+=back
+
+=head1 AUTHOR
+
+Originally written by Niels Thykier <niels@thykier.net> for Lintian.
+
+=head1 SEE ALSO
+
+lintian(1)
+
+=cut
+
+1;
+
+# Local Variables:
+# indent-tabs-mode: nil
+# cperl-indent-level: 4
+# End:
+# vim: syntax=perl sw=4 sts=4 sr et
+
diff --git a/lib/Lintian/Index/Java.pm b/lib/Lintian/Index/Java.pm
new file mode 100644
index 0000000..4b33bec
--- /dev/null
+++ b/lib/Lintian/Index/Java.pm
@@ -0,0 +1,258 @@
+# -*- perl -*- Lintian::Index::Java
+#
+# Copyright (C) 2020 Felix Lechner
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program. If not, see <http://www.gnu.org/licenses/>.
+
+package Lintian::Index::Java;
+
+use v5.20;
+use warnings;
+use utf8;
+
+use Archive::Zip qw( :ERROR_CODES :CONSTANTS );
+use Const::Fast;
+use Cwd;
+use Path::Tiny;
+use Unicode::UTF8 qw(encode_utf8);
+
+use Moo::Role;
+use namespace::clean;
+
+const my $EMPTY => q{};
+const my $NEWLINE => qq{\n};
+const my $SPACE => q{ };
+const my $DASH => q{-};
+
+const my $JAVA_MAGIC_SIZE => 8;
+const my $JAVA_MAGIC_BYTES => 0xCAFEBABE;
+
+=head1 NAME
+
+Lintian::Index::Java - java information.
+
+=head1 SYNOPSIS
+
+ use Lintian::Index;
+
+=head1 DESCRIPTION
+
+Lintian::Index::Java java information.
+
+=head1 INSTANCE METHODS
+
+=over 4
+
+=item add_java
+
+=cut
+
+sub add_java {
+ my ($self) = @_;
+
+ my $savedir = getcwd;
+ chdir($self->basedir)
+ or die encode_utf8(
+ $self->identifier . ': Cannot change to directory ' . $self->basedir);
+
+ my $errors = $EMPTY;
+
+ my @files = grep { $_->is_file } @{$self->sorted_list};
+
+ # Wheezy's version of file calls "jar files" for "Zip archive".
+ # Newer versions seem to call them "Java Jar file".
+ # Jessie also introduced "Java archive data (JAR)"...
+ my @java_files = grep {
+ $_->file_type=~ m{
+ Java [ ] (?:Jar [ ] file|archive [ ] data)
+ | Zip [ ] archive
+ | JAR }x;
+ } @files;
+
+ my @lines;
+ for my $file (@java_files) {
+
+ push(@lines, parse_jar($file->name))
+ if $file->name =~ /\S+\.jar$/i;
+ }
+
+ my $file;
+ my $file_list;
+ my $manifest = 0;
+ local $_ = undef;
+
+ my %java_info;
+
+ for my $line (@lines) {
+ chomp $line;
+ next if $line =~ /^\s*$/;
+
+ if ($line =~ /^-- ERROR:\s*(\S.+)$/) {
+ $java_info{$file}{error} = $1;
+
+ } elsif ($line =~ m{^-- MANIFEST: (?:\./)?(?:.+)$}) {
+ # TODO: check $file == $1 ?
+ $java_info{$file}{manifest} = {};
+ $manifest = $java_info{$file}{manifest};
+ $file_list = 0;
+
+ } elsif ($line =~ m{^-- (?:\./)?(.+)$}) {
+ $file = $1;
+ $java_info{$file}{files} = {};
+ $file_list = $java_info{$file}{files};
+ $manifest = 0;
+ } else {
+ if ($manifest && $line =~ m{^ (\S+):\s(.*)$}) {
+ $manifest->{$1} = $2;
+ } elsif ($file_list) {
+ my ($fname, $clmajor) = ($line =~ m{^([^-].*):\s*([-\d]+)$});
+ $file_list->{$fname} = $clmajor;
+ }
+ }
+ }
+
+ $_->java_info($java_info{$_->name}) for @java_files;
+
+ chdir($savedir)
+ or die encode_utf8(
+ $self->identifier . ": Cannot change to directory $savedir");
+
+ return $errors;
+}
+
+=item parse_jar
+
+=cut
+
+sub parse_jar {
+ my ($path) = @_;
+
+ my @lines;
+
+ # This script needs unzip, there's no way around.
+ push(@lines, "-- $path");
+
+ # Without this Archive::Zip will emit errors to standard error for
+ # faulty zip files - but that is not what we want. AFAICT, it is
+ # the only way to get a textual error as well, so (ab)use it for
+ # this purpose while we are at it.
+ my $errorhandler = sub {
+ my ($err) = @_;
+ $err =~ s/\r?\n/ /g;
+
+ # trim right
+ $err =~ s/\s+$//;
+
+ push(@lines, "-- ERROR: $err");
+ };
+ my $oldhandler = Archive::Zip::setErrorHandler($errorhandler);
+
+ my $azip = Archive::Zip->new;
+ if($azip->read($path) == AZ_OK) {
+
+ # save manifest for the end
+ my $manifest;
+
+ # file list comes first
+ foreach my $member ($azip->members) {
+ my $name = $member->fileName;
+
+ next
+ if $member->isDirectory;
+
+ # store for later processing
+ $manifest = $member
+ if $name =~ m{^META-INF/MANIFEST.MF$}i;
+
+ # add version if we can find it
+ my $jversion;
+ if ($name =~ /\.class$/) {
+ # Collect the Major version of the class file.
+ my ($contents, $zerr) = $member->contents;
+
+ # bug in Archive::Zip; seen in android-platform-libcore_10.0.0+r36-1.dsc
+ last
+ unless defined $zerr;
+
+ last
+ unless $zerr == AZ_OK;
+
+ # Ensure we can read at least 8 bytes for the unpack.
+ next
+ if length $contents < $JAVA_MAGIC_SIZE;
+
+ # translation of the unpack
+ # NN NN NN NN, nn nn, nn nn - bytes read
+ # $magic , __ __, $major - variables
+ my ($magic, undef, $major) = unpack('Nnn', $contents);
+ $jversion = $major
+ if $magic == $JAVA_MAGIC_BYTES;
+ }
+ push(@lines, "$name: " . ($jversion // $DASH));
+ }
+
+ if ($manifest) {
+ push(@lines, "-- MANIFEST: $path");
+
+ my ($contents, $zerr) = $manifest->contents;
+
+ # bug in Archive::Zip; seen in android-platform-libcore_10.0.0+r36-1.dsc
+ return ()
+ unless defined $zerr;
+
+ if ($zerr == AZ_OK) {
+ my $partial = $EMPTY;
+ my $first = 1;
+ my @list = split($NEWLINE, $contents);
+ foreach my $line (@list) {
+
+ # remove DOS type line feeds
+ $line =~ s/\r//g;
+
+ if ($line =~ /^(\S+:)\s*(.*)/) {
+ push(@lines, $SPACE . $SPACE . "$1 $2");
+ }
+ if ($line =~ /^ (.*)/) {
+ push(@lines, $1);
+ }
+ }
+ }
+ }
+ }
+
+ Archive::Zip::setErrorHandler($oldhandler);
+
+ return @lines;
+}
+
+=back
+
+=head1 AUTHOR
+
+Originally written by Felix Lechner <felix.lechner@lease-up.com> for
+Lintian.
+
+=head1 SEE ALSO
+
+lintian(1)
+
+=cut
+
+1;
+
+# Local Variables:
+# indent-tabs-mode: nil
+# cperl-indent-level: 4
+# End:
+# vim: syntax=perl sw=4 sts=4 sr et
diff --git a/lib/Lintian/Index/Md5sums.pm b/lib/Lintian/Index/Md5sums.pm
new file mode 100644
index 0000000..c1d0583
--- /dev/null
+++ b/lib/Lintian/Index/Md5sums.pm
@@ -0,0 +1,127 @@
+# -*- perl -*- Lintian::Index::Md5sums
+#
+# Copyright (C) 2020 Felix Lechner
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program. If not, see <http://www.gnu.org/licenses/>.
+
+package Lintian::Index::Md5sums;
+
+use v5.20;
+use warnings;
+use utf8;
+
+use Const::Fast;
+use Cwd;
+use Unicode::UTF8 qw(encode_utf8 decode_utf8);
+
+use Lintian::IPC::Run3 qw(xargs);
+use Lintian::Util qw(read_md5sums);
+
+use Moo::Role;
+use namespace::clean;
+
+const my $EMPTY => q{};
+const my $NEWLINE => qq{\n};
+
+const my $WAIT_STATUS_SHIFT => 8;
+const my $NULL => qq{\0};
+
+=head1 NAME
+
+Lintian::Index::Md5sums - calculate checksums for index.
+
+=head1 SYNOPSIS
+
+ use Lintian::Index;
+
+=head1 DESCRIPTION
+
+Lintian::Index::Md5sums calculates checksums for an index.
+
+=head1 INSTANCE METHODS
+
+=over 4
+
+=item add_md5sums
+
+=cut
+
+sub add_md5sums {
+ my ($self) = @_;
+
+ my $savedir = getcwd;
+ chdir($self->basedir)
+ or die encode_utf8(
+ $self->identifier . ': Cannot change to directory ' . $self->basedir);
+
+ my $errors = $EMPTY;
+
+ # get the regular files in the index
+ my @files = grep { $_->is_file } @{$self->sorted_list};
+ my @names = map { $_->name } @files;
+
+ my @command = qw(md5sum --);
+
+ my %md5sums;
+
+ xargs(
+ \@command,
+ \@names,
+ sub {
+ my ($stdout, $stderr, $status, @partial) = @_;
+
+ $stderr = decode_utf8($stderr)
+ if length $stderr;
+
+ if ($status) {
+ $errors .= "Cannot run @command: $stderr" . $NEWLINE;
+ return;
+ }
+
+ # undecoded split allows names with non UTF-8 bytes
+ my ($partial_sums, undef) = read_md5sums($stdout);
+
+ $md5sums{$_} = $partial_sums->{$_}for @partial;
+ }
+ );
+
+ $_->md5sum($md5sums{$_->name}) for @files;
+
+ chdir($savedir)
+ or die encode_utf8(
+ $self->identifier . ": Cannot change to directory $savedir");
+
+ return $errors;
+}
+
+=back
+
+=head1 AUTHOR
+
+Originally written by Felix Lechner <felix.lechner@lease-up.com> for
+Lintian.
+
+=head1 SEE ALSO
+
+lintian(1)
+
+=cut
+
+1;
+
+# Local Variables:
+# indent-tabs-mode: nil
+# cperl-indent-level: 4
+# End:
+# vim: syntax=perl sw=4 sts=4 sr et
diff --git a/lib/Lintian/Index/Strings.pm b/lib/Lintian/Index/Strings.pm
new file mode 100644
index 0000000..f0e4fa1
--- /dev/null
+++ b/lib/Lintian/Index/Strings.pm
@@ -0,0 +1,99 @@
+# -*- perl -*- Lintian::Index::Strings
+#
+# Copyright (C) 2020 Felix Lechner
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program. If not, see <http://www.gnu.org/licenses/>.
+
+package Lintian::Index::Strings;
+
+use v5.20;
+use warnings;
+use utf8;
+
+use Const::Fast;
+use Path::Tiny;
+use Unicode::UTF8 qw(decode_utf8);
+
+use Lintian::IPC::Run3 qw(safe_qx);
+
+use Moo::Role;
+use namespace::clean;
+
+const my $EMPTY => q{};
+
+=head1 NAME
+
+Lintian::Index::Strings - strings in binary files.
+
+=head1 SYNOPSIS
+
+ use Lintian::Index;
+
+=head1 DESCRIPTION
+
+Lintian::Index::Strings strings in binary files.
+
+=head1 INSTANCE METHODS
+
+=over 4
+
+=item add_strings
+
+=cut
+
+sub add_strings {
+ my ($self) = @_;
+
+ my $errors = $EMPTY;
+
+ my @files = grep { $_->is_file } @{$self->sorted_list};
+ for my $file (@files) {
+
+ next
+ if $file->name =~ m{^usr/lib/debug/};
+
+ # skip non-binaries
+ next
+ unless $file->file_type =~ /\bELF\b/;
+
+ # prior implementations sometimes made the list unique
+ my $allstrings
+ = decode_utf8(safe_qx(qw{strings --all --}, $file->unpacked_path));
+
+ $file->strings($allstrings);
+ }
+
+ return $errors;
+}
+
+=back
+
+=head1 AUTHOR
+
+Originally written by Felix Lechner <felix.lechner@lease-up.com> for
+Lintian.
+
+=head1 SEE ALSO
+
+lintian(1)
+
+=cut
+
+1;
+
+# Local Variables:
+# indent-tabs-mode: nil
+# cperl-indent-level: 4
+# End:
+# vim: syntax=perl sw=4 sts=4 sr et