diff options
Diffstat (limited to 'lib/Lintian/Index/Elf.pm')
-rw-r--r-- | lib/Lintian/Index/Elf.pm | 739 |
1 files changed, 739 insertions, 0 deletions
diff --git a/lib/Lintian/Index/Elf.pm b/lib/Lintian/Index/Elf.pm new file mode 100644 index 0000000..1fe4d7a --- /dev/null +++ b/lib/Lintian/Index/Elf.pm @@ -0,0 +1,739 @@ +# -*- perl -*- Lintian::Index::Elf +# +# Copyright (C) 1998 Christian Schwarz +# Copyright (C) 2008 Adam D. Barratt +# Copyright (C) 2017-2018 Chris Lamb <lamby@debian.org> +# Copyright (C) 2020-2022 Felix Lechner +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see <http://www.gnu.org/licenses/>. + +package Lintian::Index::Elf; + +use v5.20; +use warnings; +use utf8; + +use bignum qw(hex); + +use Const::Fast; +use Cwd; +use IPC::Run3; +use Unicode::UTF8 qw(encode_utf8 valid_utf8 decode_utf8); + +use Lintian::Elf::Section; +use Lintian::Elf::Symbol; +use Lintian::Storage::MLDBM; + +use Moo::Role; +use namespace::clean; + +const my $EMPTY => q{}; +const my $SPACE => q{ }; +const my $HYPHEN => q{-}; +const my $NEWLINE => qq{\n}; + +const my $LINES_PER_FILE => 3; + +=head1 NAME + +Lintian::Index::Elf - binary symbol information. + +=head1 SYNOPSIS + + use Lintian::Index; + +=head1 DESCRIPTION + +Lintian::Index::Elf binary symbol information. + +=head1 INSTANCE METHODS + +=over 4 + +=item elf_storage + +=cut + +has elf_storage => ( + is => 'rw', + lazy => 1, + default => sub { + my ($self) = @_; + + my $storage = Lintian::Storage::MLDBM->new; + $storage->create('elf'); + + return $storage; + } +); + +=item elf_storage_by_member + +=cut + +has elf_storage_by_member => ( + is => 'rw', + lazy => 1, + default => sub { + my ($self) = @_; + + my $storage = Lintian::Storage::MLDBM->new; + $storage->create('elf-by-member'); + + return $storage; + } +); + +=item add_elf + +=cut + +sub add_elf { + my ($self) = @_; + + my $savedir = getcwd; + chdir($self->basedir) + or die encode_utf8( + $self->identifier . ': Cannot change to directory ' . $self->basedir); + + my $errors = $EMPTY; + + my @files = grep { $_->is_file } @{$self->sorted_list}; + + # must be ELF or static library + my @with_objects = grep { + $_->file_type =~ /\bELF\b/ + || ( $_->file_type =~ /\bcurrent ar archive\b/ + && $_->name =~ /\.a$/) + } @files; + + for my $file (@with_objects) { + + local $SIG{__WARN__}= sub { + warn encode_utf8($self->identifier + . ': Warning while running readelf on' + . $file->name + . ": $_[0]"); + }; + + my @command = (qw{readelf --all --wide}, $file->name); + my $combined_bytes; + + run3(\@command, \undef, \$combined_bytes, \$combined_bytes); + + next + unless length $combined_bytes; + + my $combined_output; + + if (valid_utf8($combined_bytes)) { + $combined_output = decode_utf8($combined_bytes); + + } else { + $combined_output = $combined_bytes; + $errors .= "Output from '@command' is not valid UTF-8" . $NEWLINE; + } + + # each object file in an archive gets its own File section + my @per_files = split(/^(File): (.*)$/m, $combined_output); + shift @per_files while @per_files && $per_files[0] ne 'File'; + + @per_files = ($combined_output) + unless @per_files; + + # Special case - readelf will not prefix the output with "File: + # $name" if it only gets one ELF file argument, so act as if it did... + # (but it does "the right thing" if passed a static lib >.>) + # + # - In fact, if readelf always emitted that File: header, we could + # simply use xargs directly on readelf and just parse its output + # in the loop below. + if (@per_files == 1) { + unshift(@per_files, $file->name); + unshift(@per_files, 'File'); + } + + unless (@per_files % $LINES_PER_FILE == 0) { + + $errors + .= "Parsed data from readelf is not a multiple of $LINES_PER_FILE for $file" + . $NEWLINE; + next; + } + + while (defined(my $fixed = shift @per_files)) { + + my $recorded_name = shift @per_files; + my $per_file = shift @per_files; + + unless ($fixed eq 'File') { + $errors .= "Unknown output from readelf for $file" . $NEWLINE; + next; + } + + unless (length $recorded_name) { + $errors .= "No file name from readelf for $file" . $NEWLINE; + next; + } + + my ($container, $member) = ($recorded_name =~ /^(.*)\(([^)]+)\)$/); + + $container = $recorded_name + unless defined $container && defined $member; + + unless ($container eq $file->name) { + $errors + .= "Container not same as file name ($container vs $file)" + . $NEWLINE; + next; + } + + # ignore empty archives, such as in musl-dev_1.2.1-1_amd64.deb + next + unless length $per_file; + + my $object_name; + if ($recorded_name =~ m{^(?:.+)\(([^/\)]+)\)$}){ + + # object file in a static lib. + $object_name = $1; + } + + parse_per_file($file, $object_name, $per_file); + } + } + + chdir($savedir) + or die encode_utf8( + $self->identifier . ": Cannot change to directory $savedir"); + + return $errors; +} + +=item parse_per_file + +=cut + +sub parse_per_file { + my ($file, $object_name, $from_readelf) = @_; + + my %by_object; + + $by_object{READELF} = $from_readelf; + + # sometimes there are three blank lines; seen on armhf + my @paragraphs = split(/\n{2,}/, $from_readelf); + + for my $paragraph (@paragraphs) { + + my ($first, $bulk) = split(m{\n}, $paragraph, 2); + + if ($first =~ /^ELF Header:/) { + elf_header($bulk, \%by_object); + next; + } + + if ($first =~ /^Program Headers:/) { + program_headers($bulk, \%by_object); + next; + } + + if ($first =~ /^Dynamic section at offset .*:/) { + dynamic_section($bulk, \%by_object); + next; + } + + if ($first =~ /^Section Headers:/) { + section_headers($bulk, \%by_object); + next; + } + + if ($first =~ /^Symbol table '.dynsym'/) { + symbol_table($bulk, \%by_object); + next; + } + + if ($first =~ /^Version symbols section /) { + version_symbols($bulk, \%by_object); + next; + } + + if ($first =~ /^There is no dynamic section in this file/) { + # a dynamic section was declared but it's empty. + $by_object{'BAD-DYNAMIC-TABLE'} = 1 + if exists $by_object{PH}{DYNAMIC}; + next; + } + } + + my %section_name_by_number; + for my $name (keys %{$by_object{SH} // {}}) { + + my $number = $by_object{SH}{$name}{number}; + $section_name_by_number{$number} = $name; + } + + for my $symbol_number (keys %{$by_object{'DYNAMIC-SYMBOLS'}}) { + + my $symbol_name + = $by_object{'DYNAMIC-SYMBOLS'}{$symbol_number}{symbol_name}; + my $section_number + = $by_object{'DYNAMIC-SYMBOLS'}{$symbol_number}{section_number}; + + my $symbol_version; + + if ($symbol_name =~ m{^ (.*) @ (.*) \s [(] .* [)] $}x) { + + $symbol_name = $1; + $symbol_version = $2; + + } else { + $symbol_version = $by_object{'SYMBOL-VERSIONS'}{$symbol_number} + // $EMPTY; + + if ( $symbol_version eq '*local*' + || $symbol_version eq '*global*'){ + + if ($section_number eq 'UND') { + $symbol_version = $EMPTY; + + } else { + $symbol_version = 'Base'; + } + + } elsif ($symbol_version eq '()') { + $symbol_version = '(Base)'; + } + } + + # happens once or twice for regular binaries + next + unless length $symbol_name; + + # look up numbered section + my $section_name = $section_name_by_number{$section_number} + // $section_number; + + my $symbol = Lintian::Elf::Symbol->new; + $symbol->section($section_name); + $symbol->version($symbol_version); + $symbol->name($symbol_name); + + push(@{ $by_object{SYMBOLS} }, $symbol); + } + + if (length $object_name) { + + # object file in a static lib. + $file->elf_by_member($object_name, \%by_object); + + } else { + $file->elf(\%by_object); + } + + return; +} + +=item elf_header + +=cut + +sub elf_header { + my ($text, $by_object) = @_; + + my @lines = split(m{\n}, $text); + + for my $line (@lines) { + + next + if divert_error('ELF header', $line, $by_object); + + my ($field, $value) = split(/:/, $line, 2); + + # trim both ends + $field =~ s/^\s+|\s+$//g; + $value =~ s/^\s+|\s+$//g; + + next + unless length $field && length $value; + + $by_object->{'ELF-HEADER'}{$field} = $value; + } + + return; +} + +=item program_headers + +=cut + +sub program_headers { + my ($text, $by_object) = @_; + + my @lines = split(m{\n}, $text); + + while (defined(my $line = shift @lines)) { + + next + if divert_error('program headers', $line, $by_object); + + if ($line =~ m{^ \s* (\S+) \s* (?:(?:\S+\s+){4}) \S+ \s (...) }x) { + + my $header = $1; + my $flags = $2; + + $header =~ s/^GNU_//g; + + next + if $header eq 'Type'; + + my $newflags = $EMPTY; + $newflags .= ($flags =~ /R/) ? 'r' : $HYPHEN; + $newflags .= ($flags =~ /W/) ? 'w' : $HYPHEN; + $newflags .= ($flags =~ /E/) ? 'x' : $HYPHEN; + + $by_object->{PH}{$header}{flags} = $newflags; + + if ($header eq 'INTERP' && @lines) { + # Check if the next line is the "requesting an interpreter" + # (readelf appears to always emit on the next line if at all) + my $next_line = $lines[0]; + + if ($next_line + =~ m{ [[] Requesting \s program \s interpreter: \s ([^\]]+) []] }x + ){ + + my $interpreter = $1; + + $by_object->{INTERP} = $interpreter; + + # discard line + shift @lines; + } + } + } + } + + return; +} + +=item dynamic_section + +=cut + +sub dynamic_section { + my ($text, $by_object) = @_; + + my @lines = split(m{\n}, $text); + + while (defined(my $line = shift @lines)) { + + next + if divert_error('dynamic section', $line, $by_object); + + if ($line + =~ m{^ \s* 0x (?:[0-9A-F]+) \s+ [(] (.*?) [)] \s+ ([\x21-\x7f][\x20-\x7f]*) \Z}ix + ) { + + my $type = $1; + my $remainder = $2; + + my $keep = 0; + + if ($type eq 'RPATH' || $type eq 'RUNPATH') { + $remainder =~ s{^ .* [[] }{}x; + $remainder =~ s{ []] \s* $}{}x; + $keep = 1; + + } elsif ($type eq 'TEXTREL' || $type eq 'DEBUG') { + $keep = 1; + + } elsif ($type eq 'FLAGS_1') { + # Will contain "NOW" if the binary was built with -Wl,-z,now + $remainder =~ s/^Flags:\s*//i; + $keep = 1; + + } elsif (($type eq 'FLAGS' && $remainder =~ m/\bBIND_NOW\b/) + || $type eq 'BIND_NOW') { + + # Variants of bindnow + $type = 'FLAGS_1'; + $remainder = 'NOW'; + $keep = 1; + } + + $keep = 1 + if $remainder + =~ s{^ (?: Shared \s library | Library \s soname ) : \s [[] (.*) []] }{$1}x; + + next + unless $keep; + + # Here we just need RPATH and NEEDS, so ignore the rest for now + if ($type eq 'RPATH' || $type eq 'RUNPATH') { + + # RPATH is like PATH + my @components = split(/:/, $remainder); + $by_object->{$type}{$_} = 1 for @components; + + } elsif ($type eq 'NEEDED' || $type eq 'SONAME') { + push(@{ $by_object->{$type} }, $remainder); + + } elsif ($type eq 'TEXTREL' || $type eq 'DEBUG') { + $by_object->{$type} = 1; + + } elsif ($type eq 'FLAGS_1') { + + my @flags = split(/\s+/, $remainder); + $by_object->{$type}{$_} = 1 for @flags; + } + } + } + + return; +} + +=item section_headers + +=cut + +sub section_headers { + my ($text, $by_object) = @_; + + const my $TOTAL_FIELDS => 11; + + my @lines = split(m{\n}, $text); + + die 'No column labels.' + unless @lines; + + my $first = shift @lines; + + my %labels_by_column; + + my $column = 1; + for my $label (split($SPACE, $first)) { + + $label =~ s{^ [[] }{}x; + $label =~ s{ []] $}{}x; + + $labels_by_column{$column} = $label; + + } continue { + ++$column; + } + + die 'Not enough column labels.' + if keys %labels_by_column != $TOTAL_FIELDS; + + my $row = 1; + while (defined(my $line = shift @lines)) { + + next + if divert_error('section headers', $line, $by_object); + + last + if $line =~ /^Key to Flags:/; + + my %section_header; + + my @matches = ( + $line =~ m{^ \s* + [[] \s* (\S+) []] \s # Nr + (\S+)? \s+ # Name + (\S+) \s+ # Type + ([0-9a-f]+) \s # Address/Addr + ([0-9a-f]+) \s # Off + ([0-9a-f]+) \s # Size + (\S+) \s+ # ES + (\S+)? \s+ # Flg + (\S+) \s+ # Lk + (\S+) \s+ # Inf + (\S+) # Al + $}x + ); + + if (@matches != $TOTAL_FIELDS) { + + warn "Parse error in readelf section headers [row $row]: $line"; + next; + } + + for my $column (keys %labels_by_column) { + + my $label = $labels_by_column{$column}; + my $value = $matches[$column -1] // $EMPTY; + + $section_header{$label} = $value; + } + + # http://sco.com/developers/gabi/latest/ch4.sheader.html + my $section = Lintian::Elf::Section->new; + $section->number($section_header{Nr}); + $section->name($section_header{Name}); + $section->type($section_header{Type}); + + # readelf uses both + $section->address( + hex($section_header{Address} // $section_header{Addr})); + $section->offset(hex($section_header{Off})); + $section->size(hex($section_header{Size})); + $section->entry_size(hex($section_header{ES})); + $section->flags($section_header{Flg}); + $section->index_link(hex($section_header{Lk})); + $section->index_info(hex($section_header{Inf})); + $section->alignment(hex($section_header{Al})); + + die 'No section number.' + unless length $section->number; + + $by_object->{'SECTION-HEADERS'}{$section->number} = $section; + + } continue { + ++$row; + } + + return; +} + +=item symbol_table + +=cut + +sub symbol_table { + my ($text, $by_object) = @_; + + # We (sometimes) need to read the "Version symbols section" first to + # use this data and readelf tends to print after this section, so + # save for later. + + my @lines = split(m{\n}, $text); + + while (defined(my $line = shift @lines)) { + + next + if divert_error('symbol table', $line, $by_object); + + if ($line + =~ m{^ \s* (\d+) : \s* [0-9a-f]+ \s+ \d+ \s+ (?:(?:\S+\s+){3}) (?: [[] .* []] \s+)? (\S+) \s+ (.*) \Z}x + ) { + + my $symbol_number = $1; + my $section_number = $2; + my $symbol_name = $3; + + $by_object->{'DYNAMIC-SYMBOLS'}{$symbol_number}{section_number} + = $section_number; + $by_object->{'DYNAMIC-SYMBOLS'}{$symbol_number}{symbol_name} + = $symbol_name; + } + } + + return; +} + +=item version_symbols + +=cut + +sub version_symbols { + my ($text, $by_object) = @_; + + my @lines = split(m{\n}, $text); + + while (defined(my $line = shift @lines)) { + + next + if divert_error('version symbols', $line, $by_object); + + if ($line + =~ m{^ \s* [0-9a-f]+ : \s* \S+ \s* (?: [(] \S+ [)] )? (?: \s | \Z ) }xi + ){ + + while ($line + =~ m{ ([0-9a-f]+ h?) \s* (?: [(] (\S+) [)] )? (?: \s | \Z ) }cgix + ) { + + my $symbol_number = $1; + my $symbol_version = $2; + + # for libfuse2_2.9.9-3_amd64.deb + next + unless length $symbol_version; + + $symbol_version = "($symbol_version)" + if $symbol_number =~ m{ h $}x; + + $by_object->{'SYMBOL-VERSIONS'}{$symbol_number} + = $symbol_version; + } + } + } + + return; +} + +=item divert_error + +=cut + +sub divert_error { + my ($section, $line, $by_object) = @_; + + return 0 + unless $line =~ s{^ readelf: \s+ }{}x; + + if ($line =~ s{^ Error: \s+ }{}x) { + + my $message = "In $section: $line"; + + $by_object->{ERRORS} //= []; + push(@{$by_object->{ERRORS}}, $message); + + return 1; + } + + if ($line =~ s{^ Warning: \s+ }{}x) { + + my $message = "In $section: $line"; + + $by_object->{WARNINGS} //= []; + push(@{$by_object->{WARNINGS}}, $message); + + return 1; + } + + return 0; +} + +=back + +=head1 AUTHOR + +Originally written by Felix Lechner <felix.lechner@lease-up.com> for +Lintian. + +=head1 SEE ALSO + +lintian(1) + +=cut + +1; + +# Local Variables: +# indent-tabs-mode: nil +# cperl-indent-level: 4 +# End: +# vim: syntax=perl sw=4 sts=4 sr et |