# -*- perl -*- Lintian::Index::Elf # # Copyright (C) 1998 Christian Schwarz # Copyright (C) 2008 Adam D. Barratt # Copyright (C) 2017-2018 Chris Lamb # Copyright (C) 2020-2022 Felix Lechner # # This program is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the Free # Software Foundation; either version 2 of the License, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for # more details. # # You should have received a copy of the GNU General Public License along with # this program. If not, see . package Lintian::Index::Elf; use v5.20; use warnings; use utf8; use bignum qw(hex); use Const::Fast; use Cwd; use IPC::Run3; use Unicode::UTF8 qw(encode_utf8 valid_utf8 decode_utf8); use Lintian::Elf::Section; use Lintian::Elf::Symbol; use Lintian::Storage::MLDBM; use Moo::Role; use namespace::clean; const my $EMPTY => q{}; const my $SPACE => q{ }; const my $HYPHEN => q{-}; const my $NEWLINE => qq{\n}; const my $LINES_PER_FILE => 3; =head1 NAME Lintian::Index::Elf - binary symbol information. =head1 SYNOPSIS use Lintian::Index; =head1 DESCRIPTION Lintian::Index::Elf binary symbol information. =head1 INSTANCE METHODS =over 4 =item elf_storage =cut has elf_storage => ( is => 'rw', lazy => 1, default => sub { my ($self) = @_; my $storage = Lintian::Storage::MLDBM->new; $storage->create('elf'); return $storage; } ); =item elf_storage_by_member =cut has elf_storage_by_member => ( is => 'rw', lazy => 1, default => sub { my ($self) = @_; my $storage = Lintian::Storage::MLDBM->new; $storage->create('elf-by-member'); return $storage; } ); =item add_elf =cut sub add_elf { my ($self) = @_; my $savedir = getcwd; chdir($self->basedir) or die encode_utf8( $self->identifier . ': Cannot change to directory ' . $self->basedir); my $errors = $EMPTY; my @files = grep { $_->is_file } @{$self->sorted_list}; # must be ELF or static library my @with_objects = grep { $_->file_type =~ /\bELF\b/ || ( $_->file_type =~ /\bcurrent ar archive\b/ && $_->name =~ /\.a$/) } @files; for my $file (@with_objects) { local $SIG{__WARN__}= sub { warn encode_utf8($self->identifier . ': Warning while running readelf on' . $file->name . ": $_[0]"); }; my @command = (qw{readelf --all --wide}, $file->name); my $combined_bytes; run3(\@command, \undef, \$combined_bytes, \$combined_bytes); next unless length $combined_bytes; my $combined_output; if (valid_utf8($combined_bytes)) { $combined_output = decode_utf8($combined_bytes); } else { $combined_output = $combined_bytes; $errors .= "Output from '@command' is not valid UTF-8" . $NEWLINE; } # each object file in an archive gets its own File section my @per_files = split(/^(File): (.*)$/m, $combined_output); shift @per_files while @per_files && $per_files[0] ne 'File'; @per_files = ($combined_output) unless @per_files; # Special case - readelf will not prefix the output with "File: # $name" if it only gets one ELF file argument, so act as if it did... # (but it does "the right thing" if passed a static lib >.>) # # - In fact, if readelf always emitted that File: header, we could # simply use xargs directly on readelf and just parse its output # in the loop below. if (@per_files == 1) { unshift(@per_files, $file->name); unshift(@per_files, 'File'); } unless (@per_files % $LINES_PER_FILE == 0) { $errors .= "Parsed data from readelf is not a multiple of $LINES_PER_FILE for $file" . $NEWLINE; next; } while (defined(my $fixed = shift @per_files)) { my $recorded_name = shift @per_files; my $per_file = shift @per_files; unless ($fixed eq 'File') { $errors .= "Unknown output from readelf for $file" . $NEWLINE; next; } unless (length $recorded_name) { $errors .= "No file name from readelf for $file" . $NEWLINE; next; } my ($container, $member) = ($recorded_name =~ /^(.*)\(([^)]+)\)$/); $container = $recorded_name unless defined $container && defined $member; unless ($container eq $file->name) { $errors .= "Container not same as file name ($container vs $file)" . $NEWLINE; next; } # ignore empty archives, such as in musl-dev_1.2.1-1_amd64.deb next unless length $per_file; my $object_name; if ($recorded_name =~ m{^(?:.+)\(([^/\)]+)\)$}){ # object file in a static lib. $object_name = $1; } parse_per_file($file, $object_name, $per_file); } } chdir($savedir) or die encode_utf8( $self->identifier . ": Cannot change to directory $savedir"); return $errors; } =item parse_per_file =cut sub parse_per_file { my ($file, $object_name, $from_readelf) = @_; my %by_object; $by_object{READELF} = $from_readelf; # sometimes there are three blank lines; seen on armhf my @paragraphs = split(/\n{2,}/, $from_readelf); for my $paragraph (@paragraphs) { my ($first, $bulk) = split(m{\n}, $paragraph, 2); if ($first =~ /^ELF Header:/) { elf_header($bulk, \%by_object); next; } if ($first =~ /^Program Headers:/) { program_headers($bulk, \%by_object); next; } if ($first =~ /^Dynamic section at offset .*:/) { dynamic_section($bulk, \%by_object); next; } if ($first =~ /^Section Headers:/) { section_headers($bulk, \%by_object); next; } if ($first =~ /^Symbol table '.dynsym'/) { symbol_table($bulk, \%by_object); next; } if ($first =~ /^Version symbols section /) { version_symbols($bulk, \%by_object); next; } if ($first =~ /^There is no dynamic section in this file/) { # a dynamic section was declared but it's empty. $by_object{'BAD-DYNAMIC-TABLE'} = 1 if exists $by_object{PH}{DYNAMIC}; next; } } my %section_name_by_number; for my $name (keys %{$by_object{SH} // {}}) { my $number = $by_object{SH}{$name}{number}; $section_name_by_number{$number} = $name; } for my $symbol_number (keys %{$by_object{'DYNAMIC-SYMBOLS'}}) { my $symbol_name = $by_object{'DYNAMIC-SYMBOLS'}{$symbol_number}{symbol_name}; my $section_number = $by_object{'DYNAMIC-SYMBOLS'}{$symbol_number}{section_number}; my $symbol_version; if ($symbol_name =~ m{^ (.*) @ (.*) \s [(] .* [)] $}x) { $symbol_name = $1; $symbol_version = $2; } else { $symbol_version = $by_object{'SYMBOL-VERSIONS'}{$symbol_number} // $EMPTY; if ( $symbol_version eq '*local*' || $symbol_version eq '*global*'){ if ($section_number eq 'UND') { $symbol_version = $EMPTY; } else { $symbol_version = 'Base'; } } elsif ($symbol_version eq '()') { $symbol_version = '(Base)'; } } # happens once or twice for regular binaries next unless length $symbol_name; # look up numbered section my $section_name = $section_name_by_number{$section_number} // $section_number; my $symbol = Lintian::Elf::Symbol->new; $symbol->section($section_name); $symbol->version($symbol_version); $symbol->name($symbol_name); push(@{ $by_object{SYMBOLS} }, $symbol); } if (length $object_name) { # object file in a static lib. $file->elf_by_member($object_name, \%by_object); } else { $file->elf(\%by_object); } return; } =item elf_header =cut sub elf_header { my ($text, $by_object) = @_; my @lines = split(m{\n}, $text); for my $line (@lines) { next if divert_error('ELF header', $line, $by_object); my ($field, $value) = split(/:/, $line, 2); # trim both ends $field =~ s/^\s+|\s+$//g; $value =~ s/^\s+|\s+$//g; next unless length $field && length $value; $by_object->{'ELF-HEADER'}{$field} = $value; } return; } =item program_headers =cut sub program_headers { my ($text, $by_object) = @_; my @lines = split(m{\n}, $text); while (defined(my $line = shift @lines)) { next if divert_error('program headers', $line, $by_object); if ($line =~ m{^ \s* (\S+) \s* (?:(?:\S+\s+){4}) \S+ \s (...) }x) { my $header = $1; my $flags = $2; $header =~ s/^GNU_//g; next if $header eq 'Type'; my $newflags = $EMPTY; $newflags .= ($flags =~ /R/) ? 'r' : $HYPHEN; $newflags .= ($flags =~ /W/) ? 'w' : $HYPHEN; $newflags .= ($flags =~ /E/) ? 'x' : $HYPHEN; $by_object->{PH}{$header}{flags} = $newflags; if ($header eq 'INTERP' && @lines) { # Check if the next line is the "requesting an interpreter" # (readelf appears to always emit on the next line if at all) my $next_line = $lines[0]; if ($next_line =~ m{ [[] Requesting \s program \s interpreter: \s ([^\]]+) []] }x ){ my $interpreter = $1; $by_object->{INTERP} = $interpreter; # discard line shift @lines; } } } } return; } =item dynamic_section =cut sub dynamic_section { my ($text, $by_object) = @_; my @lines = split(m{\n}, $text); while (defined(my $line = shift @lines)) { next if divert_error('dynamic section', $line, $by_object); if ($line =~ m{^ \s* 0x (?:[0-9A-F]+) \s+ [(] (.*?) [)] \s+ ([\x21-\x7f][\x20-\x7f]*) \Z}ix ) { my $type = $1; my $remainder = $2; my $keep = 0; if ($type eq 'RPATH' || $type eq 'RUNPATH') { $remainder =~ s{^ .* [[] }{}x; $remainder =~ s{ []] \s* $}{}x; $keep = 1; } elsif ($type eq 'TEXTREL' || $type eq 'DEBUG') { $keep = 1; } elsif ($type eq 'FLAGS_1') { # Will contain "NOW" if the binary was built with -Wl,-z,now $remainder =~ s/^Flags:\s*//i; $keep = 1; } elsif (($type eq 'FLAGS' && $remainder =~ m/\bBIND_NOW\b/) || $type eq 'BIND_NOW') { # Variants of bindnow $type = 'FLAGS_1'; $remainder = 'NOW'; $keep = 1; } $keep = 1 if $remainder =~ s{^ (?: Shared \s library | Library \s soname ) : \s [[] (.*) []] }{$1}x; next unless $keep; # Here we just need RPATH and NEEDS, so ignore the rest for now if ($type eq 'RPATH' || $type eq 'RUNPATH') { # RPATH is like PATH my @components = split(/:/, $remainder); $by_object->{$type}{$_} = 1 for @components; } elsif ($type eq 'NEEDED' || $type eq 'SONAME') { push(@{ $by_object->{$type} }, $remainder); } elsif ($type eq 'TEXTREL' || $type eq 'DEBUG') { $by_object->{$type} = 1; } elsif ($type eq 'FLAGS_1') { my @flags = split(/\s+/, $remainder); $by_object->{$type}{$_} = 1 for @flags; } } } return; } =item section_headers =cut sub section_headers { my ($text, $by_object) = @_; const my $TOTAL_FIELDS => 11; my @lines = split(m{\n}, $text); die 'No column labels.' unless @lines; my $first = shift @lines; my %labels_by_column; my $column = 1; for my $label (split($SPACE, $first)) { $label =~ s{^ [[] }{}x; $label =~ s{ []] $}{}x; $labels_by_column{$column} = $label; } continue { ++$column; } die 'Not enough column labels.' if keys %labels_by_column != $TOTAL_FIELDS; my $row = 1; while (defined(my $line = shift @lines)) { next if divert_error('section headers', $line, $by_object); last if $line =~ /^Key to Flags:/; my %section_header; my @matches = ( $line =~ m{^ \s* [[] \s* (\S+) []] \s # Nr (\S+)? \s+ # Name (\S+) \s+ # Type ([0-9a-f]+) \s # Address/Addr ([0-9a-f]+) \s # Off ([0-9a-f]+) \s # Size (\S+) \s+ # ES (\S+)? \s+ # Flg (\S+) \s+ # Lk (\S+) \s+ # Inf (\S+) # Al $}x ); if (@matches != $TOTAL_FIELDS) { warn "Parse error in readelf section headers [row $row]: $line"; next; } for my $column (keys %labels_by_column) { my $label = $labels_by_column{$column}; my $value = $matches[$column -1] // $EMPTY; $section_header{$label} = $value; } # http://sco.com/developers/gabi/latest/ch4.sheader.html my $section = Lintian::Elf::Section->new; $section->number($section_header{Nr}); $section->name($section_header{Name}); $section->type($section_header{Type}); # readelf uses both $section->address( hex($section_header{Address} // $section_header{Addr})); $section->offset(hex($section_header{Off})); $section->size(hex($section_header{Size})); $section->entry_size(hex($section_header{ES})); $section->flags($section_header{Flg}); $section->index_link(hex($section_header{Lk})); $section->index_info(hex($section_header{Inf})); $section->alignment(hex($section_header{Al})); die 'No section number.' unless length $section->number; $by_object->{'SECTION-HEADERS'}{$section->number} = $section; } continue { ++$row; } return; } =item symbol_table =cut sub symbol_table { my ($text, $by_object) = @_; # We (sometimes) need to read the "Version symbols section" first to # use this data and readelf tends to print after this section, so # save for later. my @lines = split(m{\n}, $text); while (defined(my $line = shift @lines)) { next if divert_error('symbol table', $line, $by_object); if ($line =~ m{^ \s* (\d+) : \s* [0-9a-f]+ \s+ \d+ \s+ (?:(?:\S+\s+){3}) (?: [[] .* []] \s+)? (\S+) \s+ (.*) \Z}x ) { my $symbol_number = $1; my $section_number = $2; my $symbol_name = $3; $by_object->{'DYNAMIC-SYMBOLS'}{$symbol_number}{section_number} = $section_number; $by_object->{'DYNAMIC-SYMBOLS'}{$symbol_number}{symbol_name} = $symbol_name; } } return; } =item version_symbols =cut sub version_symbols { my ($text, $by_object) = @_; my @lines = split(m{\n}, $text); while (defined(my $line = shift @lines)) { next if divert_error('version symbols', $line, $by_object); if ($line =~ m{^ \s* [0-9a-f]+ : \s* \S+ \s* (?: [(] \S+ [)] )? (?: \s | \Z ) }xi ){ while ($line =~ m{ ([0-9a-f]+ h?) \s* (?: [(] (\S+) [)] )? (?: \s | \Z ) }cgix ) { my $symbol_number = $1; my $symbol_version = $2; # for libfuse2_2.9.9-3_amd64.deb next unless length $symbol_version; $symbol_version = "($symbol_version)" if $symbol_number =~ m{ h $}x; $by_object->{'SYMBOL-VERSIONS'}{$symbol_number} = $symbol_version; } } } return; } =item divert_error =cut sub divert_error { my ($section, $line, $by_object) = @_; return 0 unless $line =~ s{^ readelf: \s+ }{}x; if ($line =~ s{^ Error: \s+ }{}x) { my $message = "In $section: $line"; $by_object->{ERRORS} //= []; push(@{$by_object->{ERRORS}}, $message); return 1; } if ($line =~ s{^ Warning: \s+ }{}x) { my $message = "In $section: $line"; $by_object->{WARNINGS} //= []; push(@{$by_object->{WARNINGS}}, $message); return 1; } return 0; } =back =head1 AUTHOR Originally written by Felix Lechner for Lintian. =head1 SEE ALSO lintian(1) =cut 1; # Local Variables: # indent-tabs-mode: nil # cperl-indent-level: 4 # End: # vim: syntax=perl sw=4 sts=4 sr et