diff options
Diffstat (limited to 'scripts/Dpkg/Shlibs/Objdump.pm')
-rw-r--r-- | scripts/Dpkg/Shlibs/Objdump.pm | 582 |
1 files changed, 582 insertions, 0 deletions
diff --git a/scripts/Dpkg/Shlibs/Objdump.pm b/scripts/Dpkg/Shlibs/Objdump.pm new file mode 100644 index 0000000..c9af965 --- /dev/null +++ b/scripts/Dpkg/Shlibs/Objdump.pm @@ -0,0 +1,582 @@ +# Copyright © 2007-2010 Raphaël Hertzog <hertzog@debian.org> +# Copyright © 2007-2009,2012-2015,2017-2018 Guillem Jover <guillem@debian.org> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +package Dpkg::Shlibs::Objdump; + +use strict; +use warnings; +use feature qw(state); + +our $VERSION = '0.01'; + +use Dpkg::Gettext; +use Dpkg::ErrorHandling; + +sub new { + my $this = shift; + my $class = ref($this) || $this; + my $self = { objects => {} }; + bless $self, $class; + return $self; +} + +sub add_object { + my ($self, $obj) = @_; + my $id = $obj->get_id; + if ($id) { + $self->{objects}{$id} = $obj; + } + return $id; +} + +sub analyze { + my ($self, $file) = @_; + my $obj = Dpkg::Shlibs::Objdump::Object->new($file); + + return $self->add_object($obj); +} + +sub locate_symbol { + my ($self, $name) = @_; + foreach my $obj (values %{$self->{objects}}) { + my $sym = $obj->get_symbol($name); + if (defined($sym) && $sym->{defined}) { + return $sym; + } + } + return; +} + +sub get_object { + my ($self, $objid) = @_; + if ($self->has_object($objid)) { + return $self->{objects}{$objid}; + } + return; +} + +sub has_object { + my ($self, $objid) = @_; + return exists $self->{objects}{$objid}; +} + +use constant { + # ELF Class. + ELF_BITS_NONE => 0, + ELF_BITS_32 => 1, + ELF_BITS_64 => 2, + + # ELF Data encoding. + ELF_ORDER_NONE => 0, + ELF_ORDER_2LSB => 1, + ELF_ORDER_2MSB => 2, + + # ELF Machine. + EM_SPARC => 2, + EM_MIPS => 8, + EM_SPARC64_OLD => 11, + EM_SPARC32PLUS => 18, + EM_PPC64 => 21, + EM_S390 => 22, + EM_ARM => 40, + EM_ALPHA_OLD => 41, + EM_SH => 42, + EM_SPARC64 => 43, + EM_IA64 => 50, + EM_AVR => 83, + EM_M32R => 88, + EM_MN10300 => 89, + EM_MN10200 => 90, + EM_OR1K => 92, + EM_XTENSA => 94, + EM_MICROBLAZE => 189, + EM_ARCV2 => 195, + EM_LOONGARCH => 258, + EM_AVR_OLD => 0x1057, + EM_OR1K_OLD => 0x8472, + EM_ALPHA => 0x9026, + EM_M32R_CYGNUS => 0x9041, + EM_S390_OLD => 0xa390, + EM_XTENSA_OLD => 0xabc7, + EM_MICROBLAZE_OLD => 0xbaab, + EM_MN10300_CYGNUS => 0xbeef, + EM_MN10200_CYGNUS => 0xdead, + + # ELF Version. + EV_NONE => 0, + EV_CURRENT => 1, + + # ELF Flags (might influence the ABI). + EF_ARM_ALIGN8 => 0x00000040, + EF_ARM_NEW_ABI => 0x00000080, + EF_ARM_OLD_ABI => 0x00000100, + EF_ARM_SOFT_FLOAT => 0x00000200, + EF_ARM_HARD_FLOAT => 0x00000400, + EF_ARM_EABI_MASK => 0xff000000, + + EF_IA64_ABI64 => 0x00000010, + + EF_LOONGARCH_SOFT_FLOAT => 0x00000001, + EF_LOONGARCH_SINGLE_FLOAT => 0x00000002, + EF_LOONGARCH_DOUBLE_FLOAT => 0x00000003, + EF_LOONGARCH_ABI_MASK => 0x00000007, + + EF_MIPS_ABI2 => 0x00000020, + EF_MIPS_32BIT => 0x00000100, + EF_MIPS_FP64 => 0x00000200, + EF_MIPS_NAN2008 => 0x00000400, + EF_MIPS_ABI_MASK => 0x0000f000, + EF_MIPS_ARCH_MASK => 0xf0000000, + + EF_PPC64_ABI64 => 0x00000003, + + EF_SH_MACH_MASK => 0x0000001f, +}; + +# These map alternative or old machine IDs to their canonical form. +my %elf_mach_map = ( + EM_ALPHA_OLD() => EM_ALPHA, + EM_AVR_OLD() => EM_AVR, + EM_M32R_CYGNUS() => EM_M32R, + EM_MICROBLAZE_OLD() => EM_MICROBLAZE, + EM_MN10200_CYGNUS() => EM_MN10200, + EM_MN10300_CYGNUS() => EM_MN10300, + EM_OR1K_OLD() => EM_OR1K, + EM_S390_OLD() => EM_S390, + EM_SPARC32PLUS() => EM_SPARC, + EM_SPARC64_OLD() => EM_SPARC64, + EM_XTENSA_OLD() => EM_XTENSA, +); + +# These masks will try to expose processor flags that are ABI incompatible, +# and as such are part of defining the architecture ABI. If uncertain it is +# always better to not mask a flag, because that preserves the historical +# behavior, and we do not drop dependencies. +my %elf_flags_mask = ( + EM_IA64() => EF_IA64_ABI64, + EM_LOONGARCH() => EF_LOONGARCH_ABI_MASK, + EM_MIPS() => EF_MIPS_ABI_MASK | EF_MIPS_ABI2, + EM_PPC64() => EF_PPC64_ABI64, +); + +sub get_format { + my ($file) = @_; + state %format; + + return $format{$file} if exists $format{$file}; + + my $header; + + open my $fh, '<', $file or syserr(g_('cannot read %s'), $file); + my $rc = read $fh, $header, 64; + if (not defined $rc) { + syserr(g_('cannot read %s'), $file); + } elsif ($rc != 64) { + return; + } + close $fh; + + my %elf; + + # Unpack the identifier field. + @elf{qw(magic bits endian vertype osabi verabi)} = unpack 'a4C5', $header; + + return unless $elf{magic} eq "\x7fELF"; + return unless $elf{vertype} == EV_CURRENT; + + my ($elf_word, $elf_endian); + if ($elf{bits} == ELF_BITS_32) { + $elf_word = 'L'; + } elsif ($elf{bits} == ELF_BITS_64) { + $elf_word = 'Q'; + } else { + return; + } + if ($elf{endian} == ELF_ORDER_2LSB) { + $elf_endian = '<'; + } elsif ($elf{endian} == ELF_ORDER_2MSB) { + $elf_endian = '>'; + } else { + return; + } + + # Unpack the endianness and size dependent fields. + my $tmpl = "x16(S2Lx[${elf_word}3]L)${elf_endian}"; + @elf{qw(type mach version flags)} = unpack $tmpl, $header; + + # Canonicalize the machine ID. + $elf{mach} = $elf_mach_map{$elf{mach}} // $elf{mach}; + + # Mask any processor flags that might not change the architecture ABI. + $elf{flags} &= $elf_flags_mask{$elf{mach}} // 0; + + # Repack for easy comparison, as a big-endian byte stream, so that + # unpacking for output gives meaningful results. + $format{$file} = pack 'C2(SL)>', @elf{qw(bits endian mach flags)}; + + return $format{$file}; +} + +sub is_elf { + my $file = shift; + open(my $file_fh, '<', $file) or syserr(g_('cannot read %s'), $file); + my ($header, $result) = ('', 0); + if (read($file_fh, $header, 4) == 4) { + $result = 1 if ($header =~ /^\177ELF$/); + } + close($file_fh); + return $result; +} + +package Dpkg::Shlibs::Objdump::Object; + +use strict; +use warnings; +use feature qw(state); + +use Dpkg::Gettext; +use Dpkg::ErrorHandling; +use Dpkg::Path qw(find_command); +use Dpkg::Arch qw(debarch_to_gnutriplet get_build_arch get_host_arch); + +sub new { + my $this = shift; + my $file = shift // ''; + my $class = ref($this) || $this; + my $self = {}; + bless $self, $class; + + $self->reset; + if ($file) { + $self->analyze($file); + } + + return $self; +} + +sub reset { + my $self = shift; + + $self->{file} = ''; + $self->{id} = ''; + $self->{HASH} = ''; + $self->{GNU_HASH} = ''; + $self->{INTERP} = 0; + $self->{SONAME} = ''; + $self->{NEEDED} = []; + $self->{RPATH} = []; + $self->{dynsyms} = {}; + $self->{flags} = {}; + $self->{dynrelocs} = {}; + + return $self; +} + +sub _select_objdump { + # Decide which objdump to call + if (get_build_arch() ne get_host_arch()) { + my $od = debarch_to_gnutriplet(get_host_arch()) . '-objdump'; + return $od if find_command($od); + } + return 'objdump'; +} + +sub analyze { + my ($self, $file) = @_; + + $file ||= $self->{file}; + return unless $file; + + $self->reset; + $self->{file} = $file; + + $self->{exec_abi} = Dpkg::Shlibs::Objdump::get_format($file); + + if (not defined $self->{exec_abi}) { + warning(g_("unknown executable format in file '%s'"), $file); + return; + } + + state $OBJDUMP = _select_objdump(); + local $ENV{LC_ALL} = 'C'; + open(my $objdump, '-|', $OBJDUMP, '-w', '-f', '-p', '-T', '-R', $file) + or syserr(g_('cannot fork for %s'), $OBJDUMP); + my $ret = $self->parse_objdump_output($objdump); + close($objdump); + return $ret; +} + +sub parse_objdump_output { + my ($self, $fh) = @_; + + my $section = 'none'; + while (<$fh>) { + s/\s*$//; + next if length == 0; + + if (/^DYNAMIC SYMBOL TABLE:/) { + $section = 'dynsym'; + next; + } elsif (/^DYNAMIC RELOCATION RECORDS/) { + $section = 'dynreloc'; + $_ = <$fh>; # Skip header + next; + } elsif (/^Dynamic Section:/) { + $section = 'dyninfo'; + next; + } elsif (/^Program Header:/) { + $section = 'program'; + next; + } elsif (/^Version definitions:/) { + $section = 'verdef'; + next; + } elsif (/^Version References:/) { + $section = 'verref'; + next; + } + + if ($section eq 'dynsym') { + $self->parse_dynamic_symbol($_); + } elsif ($section eq 'dynreloc') { + if (/^\S+\s+(\S+)\s+(.+)$/) { + $self->{dynrelocs}{$2} = $1; + } else { + warning(g_("couldn't parse dynamic relocation record: %s"), $_); + } + } elsif ($section eq 'dyninfo') { + if (/^\s*NEEDED\s+(\S+)/) { + push @{$self->{NEEDED}}, $1; + } elsif (/^\s*SONAME\s+(\S+)/) { + $self->{SONAME} = $1; + } elsif (/^\s*HASH\s+(\S+)/) { + $self->{HASH} = $1; + } elsif (/^\s*GNU_HASH\s+(\S+)/) { + $self->{GNU_HASH} = $1; + } elsif (/^\s*RUNPATH\s+(\S+)/) { + # RUNPATH takes precedence over RPATH but is + # considered after LD_LIBRARY_PATH while RPATH + # is considered before (if RUNPATH is not set). + my $runpath = $1; + $self->{RPATH} = [ split /:/, $runpath ]; + } elsif (/^\s*RPATH\s+(\S+)/) { + my $rpath = $1; + unless (scalar(@{$self->{RPATH}})) { + $self->{RPATH} = [ split /:/, $rpath ]; + } + } + } elsif ($section eq 'program') { + if (/^\s*INTERP\s+/) { + $self->{INTERP} = 1; + } + } elsif ($section eq 'none') { + if (/^\s*.+:\s*file\s+format\s+(\S+)$/) { + $self->{format} = $1; + } elsif (/^architecture:\s*\S+,\s*flags\s*\S+:$/) { + # Parse 2 lines of "-f" + # architecture: i386, flags 0x00000112: + # EXEC_P, HAS_SYMS, D_PAGED + # start address 0x08049b50 + $_ = <$fh>; + chomp; + $self->{flags}{$_} = 1 foreach (split(/,\s*/)); + } + } + } + # Update status of dynamic symbols given the relocations that have + # been parsed after the symbols... + $self->apply_relocations(); + + return $section ne 'none'; +} + +# Output format of objdump -w -T +# +# /lib/libc.so.6: file format elf32-i386 +# +# DYNAMIC SYMBOL TABLE: +# 00056ef0 g DF .text 000000db GLIBC_2.2 getwchar +# 00000000 g DO *ABS* 00000000 GCC_3.0 GCC_3.0 +# 00069960 w DF .text 0000001e GLIBC_2.0 bcmp +# 00000000 w D *UND* 00000000 _pthread_cleanup_pop_restore +# 0000b788 g DF .text 0000008e Base .protected xine_close +# 0000b788 g DF .text 0000008e .hidden IA__g_free +# | ||||||| | | | | +# | ||||||| | | Version str (.visibility) + Symbol name +# | ||||||| | Alignment +# | ||||||| Section name (or *UND* for an undefined symbol) +# | ||||||F=Function,f=file,O=object +# | |||||d=debugging,D=dynamic +# | ||||I=Indirect +# | |||W=warning +# | ||C=constructor +# | |w=weak +# | g=global,l=local,!=both global/local +# Size of the symbol +# +# GLIBC_2.2 is the version string associated to the symbol +# (GLIBC_2.2) is the same but the symbol is hidden, a newer version of the +# symbol exist + +my $vis_re = qr/(\.protected|\.hidden|\.internal|0x\S+)/; +my $dynsym_re = qr< + ^ + [0-9a-f]+ # Symbol size + \ (.{7}) # Flags + \s+(\S+) # Section name + \s+[0-9a-f]+ # Alignment + (?:\s+(\S+))? # Version string + (?:\s+$vis_re)? # Visibility + \s+(.+) # Symbol name +>x; + +sub parse_dynamic_symbol { + my ($self, $line) = @_; + if ($line =~ $dynsym_re) { + my ($flags, $sect, $ver, $vis, $name) = ($1, $2, $3, $4, $5); + + # Special case if version is missing but extra visibility + # attribute replaces it in the match + if (defined($ver) and $ver =~ /^$vis_re$/) { + $vis = $ver; + $ver = ''; + } + + # Cleanup visibility field + $vis =~ s/^\.// if defined($vis); + + my $symbol = { + name => $name, + version => $ver // '', + section => $sect, + dynamic => substr($flags, 5, 1) eq 'D', + debug => substr($flags, 5, 1) eq 'd', + type => substr($flags, 6, 1), + weak => substr($flags, 1, 1) eq 'w', + local => substr($flags, 0, 1) eq 'l', + global => substr($flags, 0, 1) eq 'g', + visibility => $vis // '', + hidden => '', + defined => $sect ne '*UND*' + }; + + # Handle hidden symbols + if (defined($ver) and $ver =~ /^\((.*)\)$/) { + $ver = $1; + $symbol->{version} = $1; + $symbol->{hidden} = 1; + } + + # Register symbol + $self->add_dynamic_symbol($symbol); + } elsif ($line =~ /^[0-9a-f]+ (.{7})\s+(\S+)\s+[0-9a-f]+/) { + # Same start but no version and no symbol ... just ignore + } elsif ($line =~ /^REG_G\d+\s+/) { + # Ignore some s390-specific output like + # REG_G6 g R *UND* 0000000000000000 #scratch + } else { + warning(g_("couldn't parse dynamic symbol definition: %s"), $line); + } +} + +sub apply_relocations { + my $self = shift; + foreach my $sym (values %{$self->{dynsyms}}) { + # We want to mark as undefined symbols those which are currently + # defined but that depend on a copy relocation + next if not $sym->{defined}; + + my @relocs; + + # When objdump qualifies the symbol with a version it will use @ when + # the symbol is in an undefined section (which we discarded above, or + # @@ otherwise. + push @relocs, $sym->{name} . '@@' . $sym->{version} if $sym->{version}; + + # Symbols that are not versioned, or versioned but shown with objdump + # from binutils < 2.26, do not have a version appended. + push @relocs, $sym->{name}; + + foreach my $reloc (@relocs) { + next if not exists $self->{dynrelocs}{$reloc}; + next if not $self->{dynrelocs}{$reloc} =~ /^R_.*_COPY$/; + + $sym->{defined} = 0; + last; + } + } +} + +sub add_dynamic_symbol { + my ($self, $symbol) = @_; + $symbol->{objid} = $symbol->{soname} = $self->get_id(); + $symbol->{soname} =~ s{^.*/}{} unless $self->{SONAME}; + if ($symbol->{version}) { + $self->{dynsyms}{$symbol->{name} . '@' . $symbol->{version}} = $symbol; + } else { + $self->{dynsyms}{$symbol->{name} . '@Base'} = $symbol; + } +} + +sub get_id { + my $self = shift; + return $self->{SONAME} || $self->{file}; +} + +sub get_symbol { + my ($self, $name) = @_; + if (exists $self->{dynsyms}{$name}) { + return $self->{dynsyms}{$name}; + } + if ($name !~ /@/) { + if (exists $self->{dynsyms}{$name . '@Base'}) { + return $self->{dynsyms}{$name . '@Base'}; + } + } + return; +} + +sub get_exported_dynamic_symbols { + my $self = shift; + return grep { + $_->{defined} && $_->{dynamic} && !$_->{local} + } values %{$self->{dynsyms}}; +} + +sub get_undefined_dynamic_symbols { + my $self = shift; + return grep { + (!$_->{defined}) && $_->{dynamic} + } values %{$self->{dynsyms}}; +} + +sub get_needed_libraries { + my $self = shift; + return @{$self->{NEEDED}}; +} + +sub is_executable { + my $self = shift; + return (exists $self->{flags}{EXEC_P} && $self->{flags}{EXEC_P}) || + (exists $self->{INTERP} && $self->{INTERP}); +} + +sub is_public_library { + my $self = shift; + return exists $self->{flags}{DYNAMIC} && $self->{flags}{DYNAMIC} + && exists $self->{SONAME} && $self->{SONAME}; +} + +1; |