summaryrefslogtreecommitdiffstats
path: root/lib/Lintian/Index/Elf.pm
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Lintian/Index/Elf.pm')
-rw-r--r--lib/Lintian/Index/Elf.pm739
1 files changed, 739 insertions, 0 deletions
diff --git a/lib/Lintian/Index/Elf.pm b/lib/Lintian/Index/Elf.pm
new file mode 100644
index 0000000..1fe4d7a
--- /dev/null
+++ b/lib/Lintian/Index/Elf.pm
@@ -0,0 +1,739 @@
+# -*- perl -*- Lintian::Index::Elf
+#
+# Copyright (C) 1998 Christian Schwarz
+# Copyright (C) 2008 Adam D. Barratt
+# Copyright (C) 2017-2018 Chris Lamb <lamby@debian.org>
+# Copyright (C) 2020-2022 Felix Lechner
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program. If not, see <http://www.gnu.org/licenses/>.
+
+package Lintian::Index::Elf;
+
+use v5.20;
+use warnings;
+use utf8;
+
+use bignum qw(hex);
+
+use Const::Fast;
+use Cwd;
+use IPC::Run3;
+use Unicode::UTF8 qw(encode_utf8 valid_utf8 decode_utf8);
+
+use Lintian::Elf::Section;
+use Lintian::Elf::Symbol;
+use Lintian::Storage::MLDBM;
+
+use Moo::Role;
+use namespace::clean;
+
+const my $EMPTY => q{};
+const my $SPACE => q{ };
+const my $HYPHEN => q{-};
+const my $NEWLINE => qq{\n};
+
+const my $LINES_PER_FILE => 3;
+
+=head1 NAME
+
+Lintian::Index::Elf - binary symbol information.
+
+=head1 SYNOPSIS
+
+ use Lintian::Index;
+
+=head1 DESCRIPTION
+
+Lintian::Index::Elf binary symbol information.
+
+=head1 INSTANCE METHODS
+
+=over 4
+
+=item elf_storage
+
+=cut
+
+has elf_storage => (
+ is => 'rw',
+ lazy => 1,
+ default => sub {
+ my ($self) = @_;
+
+ my $storage = Lintian::Storage::MLDBM->new;
+ $storage->create('elf');
+
+ return $storage;
+ }
+);
+
+=item elf_storage_by_member
+
+=cut
+
+has elf_storage_by_member => (
+ is => 'rw',
+ lazy => 1,
+ default => sub {
+ my ($self) = @_;
+
+ my $storage = Lintian::Storage::MLDBM->new;
+ $storage->create('elf-by-member');
+
+ return $storage;
+ }
+);
+
+=item add_elf
+
+=cut
+
+sub add_elf {
+ my ($self) = @_;
+
+ my $savedir = getcwd;
+ chdir($self->basedir)
+ or die encode_utf8(
+ $self->identifier . ': Cannot change to directory ' . $self->basedir);
+
+ my $errors = $EMPTY;
+
+ my @files = grep { $_->is_file } @{$self->sorted_list};
+
+ # must be ELF or static library
+ my @with_objects = grep {
+ $_->file_type =~ /\bELF\b/
+ || ( $_->file_type =~ /\bcurrent ar archive\b/
+ && $_->name =~ /\.a$/)
+ } @files;
+
+ for my $file (@with_objects) {
+
+ local $SIG{__WARN__}= sub {
+ warn encode_utf8($self->identifier
+ . ': Warning while running readelf on'
+ . $file->name
+ . ": $_[0]");
+ };
+
+ my @command = (qw{readelf --all --wide}, $file->name);
+ my $combined_bytes;
+
+ run3(\@command, \undef, \$combined_bytes, \$combined_bytes);
+
+ next
+ unless length $combined_bytes;
+
+ my $combined_output;
+
+ if (valid_utf8($combined_bytes)) {
+ $combined_output = decode_utf8($combined_bytes);
+
+ } else {
+ $combined_output = $combined_bytes;
+ $errors .= "Output from '@command' is not valid UTF-8" . $NEWLINE;
+ }
+
+ # each object file in an archive gets its own File section
+ my @per_files = split(/^(File): (.*)$/m, $combined_output);
+ shift @per_files while @per_files && $per_files[0] ne 'File';
+
+ @per_files = ($combined_output)
+ unless @per_files;
+
+ # Special case - readelf will not prefix the output with "File:
+ # $name" if it only gets one ELF file argument, so act as if it did...
+ # (but it does "the right thing" if passed a static lib >.>)
+ #
+ # - In fact, if readelf always emitted that File: header, we could
+ # simply use xargs directly on readelf and just parse its output
+ # in the loop below.
+ if (@per_files == 1) {
+ unshift(@per_files, $file->name);
+ unshift(@per_files, 'File');
+ }
+
+ unless (@per_files % $LINES_PER_FILE == 0) {
+
+ $errors
+ .= "Parsed data from readelf is not a multiple of $LINES_PER_FILE for $file"
+ . $NEWLINE;
+ next;
+ }
+
+ while (defined(my $fixed = shift @per_files)) {
+
+ my $recorded_name = shift @per_files;
+ my $per_file = shift @per_files;
+
+ unless ($fixed eq 'File') {
+ $errors .= "Unknown output from readelf for $file" . $NEWLINE;
+ next;
+ }
+
+ unless (length $recorded_name) {
+ $errors .= "No file name from readelf for $file" . $NEWLINE;
+ next;
+ }
+
+ my ($container, $member) = ($recorded_name =~ /^(.*)\(([^)]+)\)$/);
+
+ $container = $recorded_name
+ unless defined $container && defined $member;
+
+ unless ($container eq $file->name) {
+ $errors
+ .= "Container not same as file name ($container vs $file)"
+ . $NEWLINE;
+ next;
+ }
+
+ # ignore empty archives, such as in musl-dev_1.2.1-1_amd64.deb
+ next
+ unless length $per_file;
+
+ my $object_name;
+ if ($recorded_name =~ m{^(?:.+)\(([^/\)]+)\)$}){
+
+ # object file in a static lib.
+ $object_name = $1;
+ }
+
+ parse_per_file($file, $object_name, $per_file);
+ }
+ }
+
+ chdir($savedir)
+ or die encode_utf8(
+ $self->identifier . ": Cannot change to directory $savedir");
+
+ return $errors;
+}
+
+=item parse_per_file
+
+=cut
+
+sub parse_per_file {
+ my ($file, $object_name, $from_readelf) = @_;
+
+ my %by_object;
+
+ $by_object{READELF} = $from_readelf;
+
+ # sometimes there are three blank lines; seen on armhf
+ my @paragraphs = split(/\n{2,}/, $from_readelf);
+
+ for my $paragraph (@paragraphs) {
+
+ my ($first, $bulk) = split(m{\n}, $paragraph, 2);
+
+ if ($first =~ /^ELF Header:/) {
+ elf_header($bulk, \%by_object);
+ next;
+ }
+
+ if ($first =~ /^Program Headers:/) {
+ program_headers($bulk, \%by_object);
+ next;
+ }
+
+ if ($first =~ /^Dynamic section at offset .*:/) {
+ dynamic_section($bulk, \%by_object);
+ next;
+ }
+
+ if ($first =~ /^Section Headers:/) {
+ section_headers($bulk, \%by_object);
+ next;
+ }
+
+ if ($first =~ /^Symbol table '.dynsym'/) {
+ symbol_table($bulk, \%by_object);
+ next;
+ }
+
+ if ($first =~ /^Version symbols section /) {
+ version_symbols($bulk, \%by_object);
+ next;
+ }
+
+ if ($first =~ /^There is no dynamic section in this file/) {
+ # a dynamic section was declared but it's empty.
+ $by_object{'BAD-DYNAMIC-TABLE'} = 1
+ if exists $by_object{PH}{DYNAMIC};
+ next;
+ }
+ }
+
+ my %section_name_by_number;
+ for my $name (keys %{$by_object{SH} // {}}) {
+
+ my $number = $by_object{SH}{$name}{number};
+ $section_name_by_number{$number} = $name;
+ }
+
+ for my $symbol_number (keys %{$by_object{'DYNAMIC-SYMBOLS'}}) {
+
+ my $symbol_name
+ = $by_object{'DYNAMIC-SYMBOLS'}{$symbol_number}{symbol_name};
+ my $section_number
+ = $by_object{'DYNAMIC-SYMBOLS'}{$symbol_number}{section_number};
+
+ my $symbol_version;
+
+ if ($symbol_name =~ m{^ (.*) @ (.*) \s [(] .* [)] $}x) {
+
+ $symbol_name = $1;
+ $symbol_version = $2;
+
+ } else {
+ $symbol_version = $by_object{'SYMBOL-VERSIONS'}{$symbol_number}
+ // $EMPTY;
+
+ if ( $symbol_version eq '*local*'
+ || $symbol_version eq '*global*'){
+
+ if ($section_number eq 'UND') {
+ $symbol_version = $EMPTY;
+
+ } else {
+ $symbol_version = 'Base';
+ }
+
+ } elsif ($symbol_version eq '()') {
+ $symbol_version = '(Base)';
+ }
+ }
+
+ # happens once or twice for regular binaries
+ next
+ unless length $symbol_name;
+
+ # look up numbered section
+ my $section_name = $section_name_by_number{$section_number}
+ // $section_number;
+
+ my $symbol = Lintian::Elf::Symbol->new;
+ $symbol->section($section_name);
+ $symbol->version($symbol_version);
+ $symbol->name($symbol_name);
+
+ push(@{ $by_object{SYMBOLS} }, $symbol);
+ }
+
+ if (length $object_name) {
+
+ # object file in a static lib.
+ $file->elf_by_member($object_name, \%by_object);
+
+ } else {
+ $file->elf(\%by_object);
+ }
+
+ return;
+}
+
+=item elf_header
+
+=cut
+
+sub elf_header {
+ my ($text, $by_object) = @_;
+
+ my @lines = split(m{\n}, $text);
+
+ for my $line (@lines) {
+
+ next
+ if divert_error('ELF header', $line, $by_object);
+
+ my ($field, $value) = split(/:/, $line, 2);
+
+ # trim both ends
+ $field =~ s/^\s+|\s+$//g;
+ $value =~ s/^\s+|\s+$//g;
+
+ next
+ unless length $field && length $value;
+
+ $by_object->{'ELF-HEADER'}{$field} = $value;
+ }
+
+ return;
+}
+
+=item program_headers
+
+=cut
+
+sub program_headers {
+ my ($text, $by_object) = @_;
+
+ my @lines = split(m{\n}, $text);
+
+ while (defined(my $line = shift @lines)) {
+
+ next
+ if divert_error('program headers', $line, $by_object);
+
+ if ($line =~ m{^ \s* (\S+) \s* (?:(?:\S+\s+){4}) \S+ \s (...) }x) {
+
+ my $header = $1;
+ my $flags = $2;
+
+ $header =~ s/^GNU_//g;
+
+ next
+ if $header eq 'Type';
+
+ my $newflags = $EMPTY;
+ $newflags .= ($flags =~ /R/) ? 'r' : $HYPHEN;
+ $newflags .= ($flags =~ /W/) ? 'w' : $HYPHEN;
+ $newflags .= ($flags =~ /E/) ? 'x' : $HYPHEN;
+
+ $by_object->{PH}{$header}{flags} = $newflags;
+
+ if ($header eq 'INTERP' && @lines) {
+ # Check if the next line is the "requesting an interpreter"
+ # (readelf appears to always emit on the next line if at all)
+ my $next_line = $lines[0];
+
+ if ($next_line
+ =~ m{ [[] Requesting \s program \s interpreter: \s ([^\]]+) []] }x
+ ){
+
+ my $interpreter = $1;
+
+ $by_object->{INTERP} = $interpreter;
+
+ # discard line
+ shift @lines;
+ }
+ }
+ }
+ }
+
+ return;
+}
+
+=item dynamic_section
+
+=cut
+
+sub dynamic_section {
+ my ($text, $by_object) = @_;
+
+ my @lines = split(m{\n}, $text);
+
+ while (defined(my $line = shift @lines)) {
+
+ next
+ if divert_error('dynamic section', $line, $by_object);
+
+ if ($line
+ =~ m{^ \s* 0x (?:[0-9A-F]+) \s+ [(] (.*?) [)] \s+ ([\x21-\x7f][\x20-\x7f]*) \Z}ix
+ ) {
+
+ my $type = $1;
+ my $remainder = $2;
+
+ my $keep = 0;
+
+ if ($type eq 'RPATH' || $type eq 'RUNPATH') {
+ $remainder =~ s{^ .* [[] }{}x;
+ $remainder =~ s{ []] \s* $}{}x;
+ $keep = 1;
+
+ } elsif ($type eq 'TEXTREL' || $type eq 'DEBUG') {
+ $keep = 1;
+
+ } elsif ($type eq 'FLAGS_1') {
+ # Will contain "NOW" if the binary was built with -Wl,-z,now
+ $remainder =~ s/^Flags:\s*//i;
+ $keep = 1;
+
+ } elsif (($type eq 'FLAGS' && $remainder =~ m/\bBIND_NOW\b/)
+ || $type eq 'BIND_NOW') {
+
+ # Variants of bindnow
+ $type = 'FLAGS_1';
+ $remainder = 'NOW';
+ $keep = 1;
+ }
+
+ $keep = 1
+ if $remainder
+ =~ s{^ (?: Shared \s library | Library \s soname ) : \s [[] (.*) []] }{$1}x;
+
+ next
+ unless $keep;
+
+ # Here we just need RPATH and NEEDS, so ignore the rest for now
+ if ($type eq 'RPATH' || $type eq 'RUNPATH') {
+
+ # RPATH is like PATH
+ my @components = split(/:/, $remainder);
+ $by_object->{$type}{$_} = 1 for @components;
+
+ } elsif ($type eq 'NEEDED' || $type eq 'SONAME') {
+ push(@{ $by_object->{$type} }, $remainder);
+
+ } elsif ($type eq 'TEXTREL' || $type eq 'DEBUG') {
+ $by_object->{$type} = 1;
+
+ } elsif ($type eq 'FLAGS_1') {
+
+ my @flags = split(/\s+/, $remainder);
+ $by_object->{$type}{$_} = 1 for @flags;
+ }
+ }
+ }
+
+ return;
+}
+
+=item section_headers
+
+=cut
+
+sub section_headers {
+ my ($text, $by_object) = @_;
+
+ const my $TOTAL_FIELDS => 11;
+
+ my @lines = split(m{\n}, $text);
+
+ die 'No column labels.'
+ unless @lines;
+
+ my $first = shift @lines;
+
+ my %labels_by_column;
+
+ my $column = 1;
+ for my $label (split($SPACE, $first)) {
+
+ $label =~ s{^ [[] }{}x;
+ $label =~ s{ []] $}{}x;
+
+ $labels_by_column{$column} = $label;
+
+ } continue {
+ ++$column;
+ }
+
+ die 'Not enough column labels.'
+ if keys %labels_by_column != $TOTAL_FIELDS;
+
+ my $row = 1;
+ while (defined(my $line = shift @lines)) {
+
+ next
+ if divert_error('section headers', $line, $by_object);
+
+ last
+ if $line =~ /^Key to Flags:/;
+
+ my %section_header;
+
+ my @matches = (
+ $line =~ m{^ \s*
+ [[] \s* (\S+) []] \s # Nr
+ (\S+)? \s+ # Name
+ (\S+) \s+ # Type
+ ([0-9a-f]+) \s # Address/Addr
+ ([0-9a-f]+) \s # Off
+ ([0-9a-f]+) \s # Size
+ (\S+) \s+ # ES
+ (\S+)? \s+ # Flg
+ (\S+) \s+ # Lk
+ (\S+) \s+ # Inf
+ (\S+) # Al
+ $}x
+ );
+
+ if (@matches != $TOTAL_FIELDS) {
+
+ warn "Parse error in readelf section headers [row $row]: $line";
+ next;
+ }
+
+ for my $column (keys %labels_by_column) {
+
+ my $label = $labels_by_column{$column};
+ my $value = $matches[$column -1] // $EMPTY;
+
+ $section_header{$label} = $value;
+ }
+
+ # http://sco.com/developers/gabi/latest/ch4.sheader.html
+ my $section = Lintian::Elf::Section->new;
+ $section->number($section_header{Nr});
+ $section->name($section_header{Name});
+ $section->type($section_header{Type});
+
+ # readelf uses both
+ $section->address(
+ hex($section_header{Address} // $section_header{Addr}));
+ $section->offset(hex($section_header{Off}));
+ $section->size(hex($section_header{Size}));
+ $section->entry_size(hex($section_header{ES}));
+ $section->flags($section_header{Flg});
+ $section->index_link(hex($section_header{Lk}));
+ $section->index_info(hex($section_header{Inf}));
+ $section->alignment(hex($section_header{Al}));
+
+ die 'No section number.'
+ unless length $section->number;
+
+ $by_object->{'SECTION-HEADERS'}{$section->number} = $section;
+
+ } continue {
+ ++$row;
+ }
+
+ return;
+}
+
+=item symbol_table
+
+=cut
+
+sub symbol_table {
+ my ($text, $by_object) = @_;
+
+ # We (sometimes) need to read the "Version symbols section" first to
+ # use this data and readelf tends to print after this section, so
+ # save for later.
+
+ my @lines = split(m{\n}, $text);
+
+ while (defined(my $line = shift @lines)) {
+
+ next
+ if divert_error('symbol table', $line, $by_object);
+
+ if ($line
+ =~ m{^ \s* (\d+) : \s* [0-9a-f]+ \s+ \d+ \s+ (?:(?:\S+\s+){3}) (?: [[] .* []] \s+)? (\S+) \s+ (.*) \Z}x
+ ) {
+
+ my $symbol_number = $1;
+ my $section_number = $2;
+ my $symbol_name = $3;
+
+ $by_object->{'DYNAMIC-SYMBOLS'}{$symbol_number}{section_number}
+ = $section_number;
+ $by_object->{'DYNAMIC-SYMBOLS'}{$symbol_number}{symbol_name}
+ = $symbol_name;
+ }
+ }
+
+ return;
+}
+
+=item version_symbols
+
+=cut
+
+sub version_symbols {
+ my ($text, $by_object) = @_;
+
+ my @lines = split(m{\n}, $text);
+
+ while (defined(my $line = shift @lines)) {
+
+ next
+ if divert_error('version symbols', $line, $by_object);
+
+ if ($line
+ =~ m{^ \s* [0-9a-f]+ : \s* \S+ \s* (?: [(] \S+ [)] )? (?: \s | \Z ) }xi
+ ){
+
+ while ($line
+ =~ m{ ([0-9a-f]+ h?) \s* (?: [(] (\S+) [)] )? (?: \s | \Z ) }cgix
+ ) {
+
+ my $symbol_number = $1;
+ my $symbol_version = $2;
+
+ # for libfuse2_2.9.9-3_amd64.deb
+ next
+ unless length $symbol_version;
+
+ $symbol_version = "($symbol_version)"
+ if $symbol_number =~ m{ h $}x;
+
+ $by_object->{'SYMBOL-VERSIONS'}{$symbol_number}
+ = $symbol_version;
+ }
+ }
+ }
+
+ return;
+}
+
+=item divert_error
+
+=cut
+
+sub divert_error {
+ my ($section, $line, $by_object) = @_;
+
+ return 0
+ unless $line =~ s{^ readelf: \s+ }{}x;
+
+ if ($line =~ s{^ Error: \s+ }{}x) {
+
+ my $message = "In $section: $line";
+
+ $by_object->{ERRORS} //= [];
+ push(@{$by_object->{ERRORS}}, $message);
+
+ return 1;
+ }
+
+ if ($line =~ s{^ Warning: \s+ }{}x) {
+
+ my $message = "In $section: $line";
+
+ $by_object->{WARNINGS} //= [];
+ push(@{$by_object->{WARNINGS}}, $message);
+
+ return 1;
+ }
+
+ return 0;
+}
+
+=back
+
+=head1 AUTHOR
+
+Originally written by Felix Lechner <felix.lechner@lease-up.com> for
+Lintian.
+
+=head1 SEE ALSO
+
+lintian(1)
+
+=cut
+
+1;
+
+# Local Variables:
+# indent-tabs-mode: nil
+# cperl-indent-level: 4
+# End:
+# vim: syntax=perl sw=4 sts=4 sr et