summaryrefslogtreecommitdiffstats
path: root/lib/Lintian/Index/Item.pm
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Lintian/Index/Item.pm')
-rw-r--r--lib/Lintian/Index/Item.pm1567
1 files changed, 1567 insertions, 0 deletions
diff --git a/lib/Lintian/Index/Item.pm b/lib/Lintian/Index/Item.pm
new file mode 100644
index 0000000..7ef6c09
--- /dev/null
+++ b/lib/Lintian/Index/Item.pm
@@ -0,0 +1,1567 @@
+# -*- perl -*-
+# Lintian::Index::Item -- Representation of path entry in a package
+#
+# Copyright (C) 2011 Niels Thykier
+# Copyright (C) 2020 Felix Lechner
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program. If not, see <http://www.gnu.org/licenses/>.
+
+package Lintian::Index::Item;
+
+use v5.20;
+use warnings;
+use utf8;
+use autodie qw(open);
+
+use Carp qw(croak confess);
+use Const::Fast;
+use Date::Parse qw(str2time);
+use List::SomeUtils qw(all);
+use Path::Tiny;
+use Syntax::Keyword::Try;
+use Text::Balanced qw(extract_delimited);
+use Unicode::UTF8 qw(valid_utf8 decode_utf8 encode_utf8);
+
+use Lintian::Pointer::Item;
+use Lintian::SlidingWindow;
+use Lintian::Util qw(normalize_link_target);
+
+use Moo;
+use namespace::clean;
+
+use constant {
+ TYPE_FILE => 0x00_01_00_00,
+ TYPE_HARDLINK => 0x00_02_00_00,
+ TYPE_DIR => 0x00_04_00_00,
+ TYPE_SYMLINK => 0x00_08_00_00,
+ TYPE_BLOCK_DEV => 0x00_10_00_00,
+ TYPE_CHAR_DEV => 0x00_20_00_00,
+ TYPE_PIPE => 0x00_40_00_00,
+ TYPE_OTHER => 0x00_80_00_00,
+ TYPE_MASK => 0x00_ff_00_00,
+
+ UNSAFE_PATH => 0x01_00_00_00,
+ FS_PATH_IS_OK => 0x02_00_00_00,
+ OPEN_IS_OK => 0x06_00_00_00, # Implies FS_PATH_IS_OK
+ ACCESS_INFO => 0x07_00_00_00,
+ # 0o6777 == 0xdff, which covers set[ug]id + sticky bit. Accordingly,
+ # 0xffff should be more than sufficient for the foreseeable future.
+ OPERM_MASK => 0x00_00_ff_ff,
+};
+
+use overload (
+ q{""} => \&_as_string,
+ 'qr' => \&_as_regex_ref,
+ 'bool' => \&_bool,
+ q{!} => \&_bool_not,
+ q{.} => \&_str_concat,
+ 'cmp' => \&_str_cmp,
+ 'eq' => \&_str_eq,
+ 'ne' => \&_str_ne,
+ 'fallback' => 0,
+);
+
+const my $EMPTY => q{};
+const my $SPACE => q{ };
+const my $SLASH => q{/};
+const my $DOT => q{.};
+const my $DOUBLE_DOT => q{..};
+const my $DOUBLE_QUOTE => q{"};
+const my $BACKSLASH => q{\\};
+const my $HASHBANG => q{#!};
+
+const my $MAXIMUM_LINK_DEPTH => 18;
+
+const my $BYTE_MAXIMUM => 255;
+const my $SINGLE_OCTAL_MASK => oct(7);
+const my $DUAL_OCTAL_MASK => oct(77);
+
+const my $ELF_MAGIC_SIZE => 4;
+const my $LINCITY_MAGIC_SIZE => 6;
+const my $SHELL_SCRIPT_MAGIC_SIZE => 2;
+
+const my $READ_BITS => oct(444);
+const my $WRITE_BITS => oct(222);
+const my $EXECUTABLE_BITS => oct(111);
+
+const my $SETUID => oct(4000);
+const my $SETGID => oct(2000);
+
+=head1 NAME
+
+Lintian::Index::Item - Lintian representation of a path entry in a package
+
+=head1 SYNOPSIS
+
+ my ($name, $type, $dir) = ('lintian', 'source', '/path/to/entry');
+
+=head1 INSTANCE METHODS
+
+=over 4
+
+=item init_from_tar_output
+
+=item get_quoted_filename
+
+=item unescape_c_style
+
+=cut
+
+my $datepattern = qr/\d{4}-\d{2}-\d{2}/;
+my $timepattern = qr/\d{2}\:\d{2}(?:\:\d{2}(?:\.\d+)?)?/;
+my $symlinkpattern = qr/\s+->\s+/;
+my $hardlinkpattern = qr/\s+link\s+to\s+/;
+
+# adapted from https://www.perlmonks.org/?node_id=1056606
+my %T = (
+ (map {chr() => chr} 0..$BYTE_MAXIMUM),
+ (map {sprintf('%o',$_) => chr} 0..($BYTE_MAXIMUM & $SINGLE_OCTAL_MASK)),
+ (map {sprintf('%02o',$_) => chr} 0..($BYTE_MAXIMUM & $DUAL_OCTAL_MASK)),
+ (map {sprintf('%03o',$_) => chr} 0..$BYTE_MAXIMUM),
+ (split //, "r\rn\nb\ba\af\ft\tv\013")
+);
+
+sub unescape_c_style {
+ my ($escaped) = @_;
+
+ (my $result = $escaped) =~ s/\\([0-7]{1,3}|.)/$T{$1}/g;
+
+ return $result;
+}
+
+sub get_quoted_filename {
+ my ($unknown, $skip) = @_;
+
+ # extract quoted file name
+ my ($delimited, $extra)
+ = extract_delimited($unknown, $DOUBLE_QUOTE, $skip, $BACKSLASH);
+
+ return (undef, undef)
+ unless defined $delimited;
+
+ # drop quotes
+ my $cstylename = substr($delimited, 1, (length $delimited) - 2);
+
+ # convert c-style escapes
+ my $name = unescape_c_style($cstylename);
+
+ return ($name, $extra);
+}
+
+sub init_from_tar_output {
+ my ($self, $line) = @_;
+
+ chomp $line;
+
+ # allow spaces in ownership and filenames (#895175 and #950589)
+
+ my ($initial, $size, $date, $time, $remainder)
+ = split(/\s+(\d+)\s+($datepattern)\s+($timepattern)\s+/, $line,2);
+
+ die encode_utf8(
+ $self->index->identifier . ": Cannot parse tar output: $line")
+ unless all { defined } ($initial, $size, $date, $time, $remainder);
+
+ $self->size($size);
+ $self->date($date);
+ $self->time($time);
+
+ my ($permissions, $ownership) = split(/\s+/, $initial, 2);
+ die encode_utf8($self->index->identifier
+ .": Cannot parse permissions and ownership in tar output: $line")
+ unless all { defined } ($permissions, $ownership);
+
+ $self->perm($permissions);
+
+ my ($owner, $group) = split(qr{/}, $ownership, 2);
+ die encode_utf8($self->index->identifier
+ . ": Cannot parse owner and group in tar output: $line")
+ unless all { defined } ($owner, $group);
+
+ $self->owner($owner);
+ $self->group($group);
+
+ my ($name, $extra) = get_quoted_filename($remainder, $EMPTY);
+ die encode_utf8($self->index->identifier
+ . ": Cannot parse file name in tar output: $line")
+ unless all { defined } ($name, $extra);
+
+ # strip relative prefix
+ $name =~ s{^\./+}{}s;
+
+ # slashes cannot appear in names but are sometimes doubled
+ # as in emboss-explorer_2.2.0-10.dsc
+ # better implemented in a Moo trigger on the attribute
+ $name =~ s{/+}{/}g;
+
+ # make sure directories end with a slash, except root
+ $name .= $SLASH
+ if length $name
+ && $self->perm =~ / ^d /msx
+ && $name !~ m{ /$ }msx;
+
+ $self->name($name);
+
+ # look for symbolic link target
+ if ($self->perm =~ /^l/) {
+
+ my ($linktarget, undef) = get_quoted_filename($extra, $symlinkpattern);
+ die encode_utf8($self->index->identifier
+ .": Cannot parse symbolic link target in tar output: $line")
+ unless defined $linktarget;
+
+ # do not remove multiple slashes from symlink targets
+ # caught by symlink-has-double-slash, which is tested
+ # leaves resolution of these links unsolved
+
+ # do not strip relative prefix for symbolic links
+ $self->link($linktarget);
+ }
+
+ # look for hard link target
+ if ($self->perm =~ /^h/) {
+
+ my ($linktarget, undef)= get_quoted_filename($extra, $hardlinkpattern);
+ die encode_utf8($self->index->identifier
+ . ": Cannot parse hard link target in tar output: $line")
+ unless defined $linktarget;
+
+ # strip relative prefix
+ $linktarget =~ s{^\./+}{}s;
+
+ # slashes cannot appear in names but are sometimes doubled
+ # as in emboss-explorer_2.2.0-10.dsc
+ # better implemented in a Moo trigger on the attribute, but requires
+ # separate attributes for hard and symbolic link targets
+ $linktarget =~ s{/+}{/}g;
+
+ $self->link($linktarget);
+ }
+
+ return;
+}
+
+=item bytes_match(REGEX)
+
+Returns the matched string if REGEX matches the file's byte contents,
+or $EMPTY otherwise.
+
+=cut
+
+sub bytes_match {
+ my ($self, $regex) = @_;
+
+ return $EMPTY
+ unless $self->is_file;
+
+ return $EMPTY
+ unless $self->is_open_ok;
+
+ return $EMPTY
+ unless length $regex;
+
+ open(my $fd, '<:raw', $self->unpacked_path);
+ my $sfd = Lintian::SlidingWindow->new;
+ $sfd->handle($fd);
+
+ my $match;
+ while (my $block = $sfd->readwindow) {
+
+ if ($block =~ /($regex)/) {
+
+ $match = $1;
+ last;
+ }
+ }
+
+ close $fd;
+
+ return $match // $EMPTY;
+}
+
+=item mentions_in_operation(REGEX)
+
+Returns the matched string if REGEX matches in a file location
+that is likely an operation (vs text), or $EMPTY otherwise.
+
+=cut
+
+sub mentions_in_operation {
+ my ($self, $regex) = @_;
+
+ # prefer strings(1) output (eg. for ELF) if we have it
+ # may not work as expected on ELF due to ld's SHF_MERGE
+ my $match;
+ if (length $self->strings && $self->strings =~ /($regex)/) {
+ $match = $1;
+
+ } elsif ($self->is_script) {
+ $match = $self->bytes_match($regex);
+ }
+
+ return $match // $EMPTY;
+}
+
+=item magic(COUNT)
+
+Returns the specified COUNT of magic bytes for the file.
+
+=cut
+
+sub magic {
+ my ($self, $count) = @_;
+
+ return $EMPTY
+ if length $self->link;
+
+ return $EMPTY
+ if $self->size < $count;
+
+ return $EMPTY
+ unless $self->is_open_ok;
+
+ my $magic;
+
+ open(my $fd, '<', $self->unpacked_path);
+ die encode_utf8($self->index->identifier
+ . ": Could not read $count bytes from "
+ . $self->name)
+ unless read($fd, $magic, $count) == $count;
+ close $fd;
+
+ return $magic;
+}
+
+=item C<hashbang>
+
+Returns the C<hashbang> for the file if it is a script.
+
+=cut
+
+has hashbang => (
+ is => 'rw',
+ lazy => 1,
+ default => sub {
+ my ($self) = @_;
+
+ return $EMPTY
+ unless $self->is_script;
+
+ my $trimmed_bytes = $EMPTY;
+ my $magic;
+
+ open(my $fd, '<', $self->unpacked_path);
+ if (read($fd, $magic, 2) && $magic eq $HASHBANG && !eof($fd)) {
+ $trimmed_bytes = <$fd>;
+ }
+ close $fd;
+
+ # decoding UTF-8 fails on magyarispell_1.6.1-2.dsc and ldc_1.24.0-1.dsc
+
+ # remove comment, if any
+ $trimmed_bytes =~ s/^([^#]*)/$1/;
+
+ # trim both ends
+ $trimmed_bytes =~ s/^\s+|\s+$//g;
+
+ return $trimmed_bytes;
+ }
+);
+
+=item interpreter_with_options
+
+Returns the interpreter requested by a script with options
+after stripping C<env>.
+
+=cut
+
+has interpreter_with_options => (
+ is => 'rw',
+ lazy => 1,
+ default => sub {
+ my ($self) = @_;
+
+ my $with_options = $self->hashbang;
+
+ $with_options =~ s{^/usr/bin/env\s+}{};
+
+ return $with_options;
+ }
+);
+
+=item interpreter
+
+Returns the interpreter requested by a script but strips C<env>.
+
+=cut
+
+has interpreter => (
+ is => 'rw',
+ lazy => 1,
+ default => sub {
+ my ($self) = @_;
+
+ my $interpreter = $self->interpreter_with_options;
+
+ # keep base command without options
+ $interpreter =~ s/^(\S+).*/$1/;
+
+ return $interpreter;
+ }
+);
+
+=item C<calls_env>
+
+Returns true if file is a script that calls C<env>.
+
+=cut
+
+has calls_env => (
+ is => 'rw',
+ lazy => 1,
+ default => sub {
+ my ($self) = @_;
+
+ # must return a boolean success value #943724
+ return 1
+ if $self->hashbang =~ m{^/usr/bin/env\s+};
+
+ return 0;
+ }
+);
+
+=item C<is_shell_script>
+
+Returns true if file is a script requesting a recognized shell
+interpreter.
+
+=cut
+
+has is_shell_script => (
+ is => 'rw',
+ lazy => 1,
+ default => sub {
+ my ($self) = @_;
+
+ my $interpreter = $self->interpreter;
+
+ # keep basename
+ my ($basename) = ($interpreter =~ m{([^/]*)/?$}s);
+
+ return 1
+ if $basename =~ /^(?:[bd]?a|t?c|(?:pd|m)?k|z)?sh$/;
+
+ return 0;
+ }
+);
+
+=item is_elf
+
+Returns true if file is an ELF executable, and false otherwise.
+
+=cut
+
+has is_elf => (
+ is => 'rw',
+ lazy => 1,
+ default => sub {
+ my ($self) = @_;
+
+ return 1
+ if $self->magic($ELF_MAGIC_SIZE) eq "\x7FELF";
+
+ return 0;
+ }
+);
+
+=item is_script
+
+Returns true if file is a script and false otherwise.
+
+=cut
+
+has is_script => (
+ is => 'rw',
+ lazy => 1,
+ default => sub {
+ my ($self) = @_;
+
+ # skip lincity data files; magic: #!#!#!
+ return 0
+ if $self->magic($LINCITY_MAGIC_SIZE) eq '#!#!#!';
+
+ return 0
+ unless $self->magic($SHELL_SCRIPT_MAGIC_SIZE) eq $HASHBANG;
+
+ return 1;
+ }
+);
+
+=item is_maintainer_script
+
+Returns true if file is a maintainer script and false otherwise.
+
+=cut
+
+has is_maintainer_script => (
+ is => 'rw',
+ lazy => 1,
+ default => sub {
+ my ($self) = @_;
+
+ return 1
+ if $self->name =~ /^ config | (?:pre|post)(?:inst|rm) $/x
+ && $self->is_open_ok;
+
+ return 0;
+ }
+);
+
+=item identity
+
+Returns the owner and group of the path, separated by a slash.
+
+NB: If only numerical owner information is available in the package,
+this may return a numerical owner (except uid 0 is always mapped to
+"root")
+
+=cut
+
+sub identity {
+ my ($self) = @_;
+
+ return $self->owner . $SLASH . $self->group;
+}
+
+=item operm
+
+Returns the file permissions of this object in octal (e.g. 0644).
+
+NB: This is only well defined for file entries that are subject to
+permissions (e.g. files). Particularly, the value is not well defined
+for symlinks.
+
+=cut
+
+sub operm {
+ my ($self) = @_;
+
+ return $self->path_info & OPERM_MASK;
+}
+
+=item octal_permissions
+
+=cut
+
+sub octal_permissions {
+ my ($self) = @_;
+
+ return sprintf('%04o', $self->operm);
+}
+
+=item children
+
+Returns a list of children (as Lintian::File::Path objects) of this entry.
+The list and its contents should not be modified.
+
+Only returns direct children of this directory. The entries are sorted by name.
+
+NB: Returns the empty list for non-dir entries.
+
+=cut
+
+sub children {
+ my ($self) = @_;
+
+ croak encode_utf8('No index in ' . $self->name)
+ unless defined $self->index;
+
+ my @names = values %{$self->childnames};
+
+ return map { $self->index->lookup($_) } @names;
+}
+
+=item descendants
+
+Returns a list of children (as Lintian::File::Path objects) of this entry.
+The list and its contents should not be modified.
+
+Descends recursively into subdirectories and return the descendants in
+breadth-first order. Children of a given directory will be sorted by
+name.
+
+NB: Returns the empty list for non-dir entries.
+
+=cut
+
+sub descendants {
+ my ($self) = @_;
+
+ my @descendants = $self->children;
+
+ my @directories = grep { $_->is_dir } @descendants;
+ push(@descendants, $_->descendants) for @directories;
+
+ return @descendants;
+}
+
+=item timestamp
+
+Returns a Unix timestamp for the given path. This is a number of
+seconds since the start of Unix epoch in UTC.
+
+=cut
+
+sub timestamp {
+ my ($self) = @_;
+
+ my $timestamp = $self->date . $SPACE . $self->time;
+
+ return str2time($timestamp, 'GMT');
+}
+
+=item child(BASENAME)
+
+Returns the child named BASENAME if it is a child of this directory.
+Otherwise, this method returns C<undef>.
+
+Even for directories, BASENAME should not end with a slash.
+
+When invoked on non-dirs, this method always returns C<undef>.
+
+Example:
+
+ $dir_entry->child('foo') => $entry OR undef
+
+=cut
+
+sub child {
+ my ($self, $basename) = @_;
+
+ croak encode_utf8('No index in ' . $self->name)
+ unless defined $self->index;
+
+ croak encode_utf8($self->index->identifier . ': Basename is required')
+ unless length $basename;
+
+ my $childname = $self->childnames->{$basename};
+ return undef
+ unless $childname;
+
+ return $self->index->lookup($childname);
+}
+
+=item is_symlink
+
+Returns a truth value if this entry is a symlink.
+
+=item is_hardlink
+
+Returns a truth value if this entry is a hardlink to a regular file.
+
+NB: The target of a hardlink is always a regular file (and not a dir etc.).
+
+=item is_dir
+
+Returns a truth value if this entry is a dir.
+
+NB: Unlike the "-d $dir" operator this will never return true for
+symlinks, even if the symlink points to a dir.
+
+=item is_file
+
+Returns a truth value if this entry is a regular file (or a hardlink to one).
+
+NB: Unlike the "-f $file" operator this will never return true for
+symlinks, even if the symlink points to a file (or hardlink).
+
+=item is_regular_file
+
+Returns a truth value if this entry is a regular file.
+
+This is eqv. to $path->is_file and not $path->is_hardlink.
+
+NB: Unlike the "-f $file" operator this will never return true for
+symlinks, even if the symlink points to a file.
+
+=cut
+
+sub is_symlink {
+ my ($self) = @_;
+
+ return $self->path_info & TYPE_SYMLINK ? 1 : 0;
+}
+
+sub is_hardlink {
+ my ($self) = @_;
+
+ return $self->path_info & TYPE_HARDLINK ? 1 : 0;
+}
+
+sub is_dir {
+ my ($self) = @_;
+
+ return $self->path_info & TYPE_DIR ? 1 : 0;
+}
+
+sub is_file {
+ my ($self) = @_;
+
+ return $self->path_info & (TYPE_FILE | TYPE_HARDLINK) ? 1 : 0;
+}
+
+sub is_regular_file {
+ my ($self) = @_;
+
+ return $self->path_info & TYPE_FILE ? 1 : 0;
+}
+
+=item link_normalized
+
+Returns the target of the link normalized against it's directory name.
+If the link cannot be normalized or normalized path might escape the
+package root, this method returns C<undef>.
+
+NB: This method will return the empty string for links pointing to the
+root dir of the package.
+
+Only available on "links" (i.e. symlinks or hardlinks). On non-links
+this will croak.
+
+I<Symlinks only>: If you want the symlink target as a L<Lintian::File::Path>
+object, use the L<resolve_path|/resolve_path([PATH])> method with no
+arguments instead.
+
+=cut
+
+has link_normalized => (
+ is => 'rw',
+ lazy => 1,
+ default => sub {
+ my ($self) = @_;
+
+ my $name = $self->name;
+ my $link = $self->link;
+
+ croak encode_utf8($self->index->identifier . ": $name is not a link")
+ unless length $link;
+
+ my $dir = $self->dirname;
+
+ # hardlinks are always relative to the package root
+ $dir = $SLASH
+ if $self->is_hardlink;
+
+ my $target = normalize_link_target($dir, $link);
+
+ return $target;
+ }
+);
+
+=item is_readable
+
+Returns a truth value if the permission bits of this entry have
+at least one bit denoting readability set (bitmask 0444).
+
+=item is_writable
+
+Returns a truth value if the permission bits of this entry have
+at least one bit denoting writability set (bitmask 0222).
+
+=item is_executable
+
+Returns a truth value if the permission bits of this entry have
+at least one bit denoting executability set (bitmask 0111).
+
+=cut
+
+sub is_readable {
+ my ($self) = @_;
+
+ return $self->path_info & $READ_BITS;
+}
+
+sub is_writable {
+ my ($self) = @_;
+
+ return $self->path_info & $WRITE_BITS;
+}
+
+sub is_executable {
+ my ($self) = @_;
+
+ return $self->path_info & $EXECUTABLE_BITS;
+}
+
+=item all_bits_set
+
+=cut
+
+sub all_bits_set {
+ my ($self, $bits) = @_;
+
+ return ($self->operm & $bits) == $bits;
+}
+
+=item is_setuid
+
+=cut
+
+sub is_setuid {
+ my ($self) = @_;
+
+ return $self->operm & $SETUID;
+}
+
+=item is_setgid
+
+=cut
+
+sub is_setgid {
+ my ($self) = @_;
+
+ return $self->operm & $SETGID;
+}
+
+=item unpacked_path
+
+Returns the path to this object on the file system, which must be a
+regular file, a hardlink or a directory.
+
+This method may fail if:
+
+=over 4
+
+=item * The object is neither a directory or a file-like object (e.g. a
+named pipe).
+
+=item * If the object is dangling symlink or the path traverses a symlink
+outside the package root.
+
+=back
+
+To test if this is safe to call, if the target is (supposed) to be a:
+
+=over 4
+
+=item * file or hardlink then test with L</is_open_ok>.
+
+=item * dir then assert L<resolve_path|/resolve_path([PATH])> returns a
+defined entry, for which L</is_dir> returns a truth value.
+
+=back
+
+=cut
+
+sub unpacked_path {
+ my ($self) = @_;
+
+ $self->_check_access;
+
+ croak encode_utf8('No index in ' . $self->name)
+ unless defined $self->index;
+
+ my $basedir = $self->index->basedir;
+
+ croak encode_utf8($self->index->identifier . ': No base directory')
+ unless length $basedir;
+
+ my $unpacked = path($basedir)->child($self->name)->stringify;
+
+ # bug in perl, file operator should not care but does
+ # https://github.com/Perl/perl5/issues/10550
+ # also, https://github.com/Perl/perl5/issues/9674
+ utf8::downgrade $unpacked;
+
+ return $unpacked;
+}
+
+=item is_open_ok
+
+Returns a truth value if it is safe to attempt open a read handle to
+the underlying file object.
+
+Returns a truth value if the path may be opened.
+
+=cut
+
+sub is_open_ok {
+ my ($self) = @_;
+
+ my $path_info = $self->path_info;
+
+ return 1
+ if ($path_info & OPEN_IS_OK) == OPEN_IS_OK;
+
+ return 0
+ if $path_info & ACCESS_INFO;
+
+ try {
+ $self->_check_open;
+
+ } catch {
+ return 0;
+
+ # perlcritic 1.140-1 requires the semicolon on the next line
+ };
+
+ return 1;
+}
+
+sub _check_access {
+ my ($self) = @_;
+
+ my $path_info = $self->path_info;
+
+ return 1
+ if ($path_info & FS_PATH_IS_OK) == FS_PATH_IS_OK;
+
+ return 0
+ if $path_info & ACCESS_INFO;
+
+ my $resolvable = $self->resolve_path;
+ unless ($resolvable) {
+ $self->path_info($self->path_info | UNSAFE_PATH);
+ # NB: We are deliberately vague here to avoid suggesting
+ # whether $path exists. In some cases (e.g. lintian.d.o)
+ # the output is readily available to wider public.
+ confess encode_utf8($self->index->identifier
+ .': Attempt to access through broken or unsafe symlink: '
+ . $self->name);
+ }
+
+ $self->path_info($self->path_info | FS_PATH_IS_OK);
+
+ return 1;
+}
+
+sub _check_open {
+ my ($self) = @_;
+
+ $self->_check_access;
+
+ # Symlinks can point to a "non-file" object inside the
+ # package root
+ # Leave "_path_access" here as _check_access marks it either as
+ # "UNSAFE_PATH" or "FS_PATH_IS_OK"
+
+ confess encode_utf8($self->index->identifier
+ .': Opening of irregular file not supported: '
+ . $self->name)
+ unless $self->is_file || ($self->is_symlink && -e $self->unpacked_path);
+
+ $self->path_info($self->path_info | OPEN_IS_OK);
+
+ return 1;
+}
+
+=item follow
+
+Return dereferenced link if applicable
+
+=cut
+
+sub follow {
+ my ($self, $maxlinks) = @_;
+
+ return $self
+ unless length $self->link;
+
+ return $self->dereferenced
+ if defined $self->dereferenced;
+
+ # set limit
+ $maxlinks //= $MAXIMUM_LINK_DEPTH;
+
+ # catch recursive links
+ return undef
+ if $maxlinks <= 0;
+
+ # reduce counter
+ $maxlinks--;
+
+ my $reference;
+
+ croak encode_utf8('No index in ' . $self->name)
+ unless defined $self->index;
+
+ if ($self->is_hardlink) {
+ # hard links are resolved against package root
+ $reference = $self->index->lookup;
+
+ } else {
+ # otherwise resolve against the parent
+ $reference = $self->parent_dir;
+ }
+
+ croak encode_utf8($self->index->identifier
+ . ': No parent reference for link in '
+ . $self->name)
+ unless defined $reference;
+
+ # follow link
+ my $dereferenced = $reference->resolve_path($self->link, $maxlinks);
+ $self->dereferenced($dereferenced);
+
+ return $self->dereferenced;
+}
+
+=item resolve_path([PATH])
+
+Resolve PATH relative to this path entry.
+
+If PATH starts with a slash and the file hierarchy has a well-defined
+root directory, then PATH will instead be resolved relatively to the
+root dir. If the file hierarchy does not have a well-defined root dir
+(e.g. for source packages), this method will return C<undef>.
+
+If PATH is omitted, then the entry is resolved and the target is
+returned if it is valid. Except for symlinks, all entries always
+resolve to themselves. NB: hardlinks also resolve as themselves.
+
+It is an error to attempt to resolve a PATH against a non-directory
+and non-symlink entry - as such resolution would always fail
+(i.e. foo/../bar is an invalid path unless foo is a directory or a
+symlink to a dir).
+
+
+The resolution takes symlinks into account and following them provided
+that the target path is valid (and can be followed safely). If the
+path is invalid or circular (symlinks), escapes the root directory or
+follows an unsafe symlink, the method returns C<undef>. Otherwise, it
+returns the path entry that denotes the target path.
+
+
+If PATH contains at least one path segment and ends with a slash, then
+the resolved path will end in a directory (or fail). Otherwise, the
+resolved PATH can end in any entry I<except> a symlink.
+
+Examples:
+
+ $symlink_entry->resolve_path => $nonsymlink_entry OR undef
+
+ $x->resolve_path => $x
+
+ For directory or symlink entries (dol), you can also resolve a path:
+
+ $dol_entry->resolve_path('some/../where') => $nonsymlink_entry OR undef
+
+ # Note the trailing slash
+ $dol_entry->resolve_path('some/../where/') => $dir_entry OR undef
+
+=cut
+
+sub resolve_path {
+ my ($self, $request, $maxlinks) = @_;
+
+ croak encode_utf8('No index in ' . $self->name)
+ unless defined $self->index;
+
+ croak encode_utf8(
+ $self->index->identifier . ': Can only resolve string arguments')
+ if defined $request && ref($request) ne $EMPTY;
+
+ $request //= $EMPTY;
+
+ if (length $self->link) {
+ # follow the link
+ my $dereferenced = $self->follow($maxlinks);
+ return undef
+ unless defined $dereferenced;
+
+ # and use that to resolve the request
+ return $dereferenced->resolve_path($request, $maxlinks);
+ }
+
+ my $reference;
+
+ # check for absolute reference; remove slash
+ if ($request =~ s{^/+}{}s) {
+
+ # require anchoring for absolute references
+ return undef
+ unless $self->index->anchored;
+
+ # get root entry
+ $reference = $self->index->lookup;
+
+ } elsif ($self->is_dir) {
+ # directories are their own starting point
+ $reference = $self;
+
+ } else {
+ # otherwise, use parent directory
+ $reference = $self->parent_dir;
+ }
+
+ return undef
+ unless defined $reference;
+
+ # read first segment; strip all trailing slashes for recursive use
+ if ($request =~ s{^([^/]+)/*}{}) {
+
+ my $segment = $1;
+
+ # single dot, or two slashes in a row
+ return $reference->resolve_path($request, $maxlinks)
+ if $segment eq $DOT || !length $segment;
+
+ # for double dot, go up a level
+ if ($segment eq $DOUBLE_DOT) {
+ my $parent = $reference->parent_dir;
+ return undef
+ unless defined $parent;
+
+ return $parent->resolve_path($request, $maxlinks);
+ }
+
+ # look for child otherwise
+ my $child = $reference->child($segment);
+ return undef
+ unless defined $child;
+
+ return $child->resolve_path($request, $maxlinks);
+ }
+
+ croak encode_utf8($self->index->identifier
+ . ": Cannot parse path resolution request: $request")
+ if length $request;
+
+ # nothing else to resolve
+ return $self;
+}
+
+=item name
+
+Returns the name of the file (relative to the package root).
+
+NB: It will never have any leading "./" (or "/") in it.
+
+=item basename
+
+Returns the "filename" part of the name, similar basename(1) or
+File::Basename::basename (without passing a suffix to strip in either
+case).
+
+NB: Returns the empty string for the "root" dir.
+
+=item dirname
+
+Returns the "directory" part of the name, similar to dirname(1) or
+File::Basename::dirname. The dirname will end with a trailing slash
+(except the "root" dir - see below).
+
+NB: Returns the empty string for the "root" dir.
+
+=item link
+
+If this is a link (i.e. is_symlink or is_hardlink returns a truth
+value), this method returns the target of the link.
+
+If this is not a link, then this returns undef.
+
+If the path is a symlink this method can be used to determine if the
+symlink is relative or absolute. This is I<not> true for hardlinks,
+where the link target is always relative to the root.
+
+NB: Even for symlinks, a leading "./" will be stripped.
+
+=item normalized
+
+=item faux
+
+Returns a truth value if this entry absent in the package. This can
+happen if a package does not include all intermediate directories.
+
+=item size
+
+Returns the size of the path in bytes.
+
+NB: Only regular files can have a non-zero file size.
+
+=item date
+
+Return the modification date as YYYY-MM-DD.
+
+=item time
+
+=item perm
+
+=item path_info
+
+=item owner
+
+Returns the owner of the path entry as a username.
+
+NB: If only numerical owner information is available in the package,
+this may return a numerical owner (except uid 0 is always mapped to
+"root")
+
+=item group
+
+Returns the group of the path entry as a username.
+
+NB: If only numerical owner information is available in the package,
+this may return a numerical group (except gid 0 is always mapped to
+"root")
+
+=item uid
+
+Returns the uid of the owner of the path entry.
+
+NB: If the uid is not available, 0 will be returned.
+This usually happens if the numerical data is not collected (e.g. in
+source packages)
+
+=item gid
+
+Returns the gid of the owner of the path entry.
+
+NB: If the gid is not available, 0 will be returned.
+This usually happens if the numerical data is not collected (e.g. in
+source packages)
+
+=item file_type
+
+Return the data from L<file(1)> if it has been collected.
+
+Note this is only defined for files as Lintian only runs L<file(1)> on
+files.
+
+=item java_info
+
+=item strings
+
+=item C<basedir>
+
+=item index
+
+=item parent_dir
+
+=item child_table
+
+=item sorted_children
+
+Returns the parent directory entry of this entry as a
+L<Lintian::File::Path>.
+
+NB: Returns C<undef> for the "root" dir.
+
+=item C<childnames>
+
+=item parent_dir
+
+Return the parent dir entry of this the path entry.
+
+=item dereferenced
+
+=cut
+
+has name => (
+ is => 'rw',
+ lazy => 1,
+ coerce => sub { my ($string) = @_; return $string // $EMPTY;},
+ trigger => sub {
+ my ($self, $name) = @_;
+
+ my ($basename) = ($name =~ m{([^/]*)/?$}s);
+ $self->basename($basename);
+
+ # allow newline in names; need /s for dot matching (#929729)
+ my ($dirname) = ($name =~ m{^(.+/)?(?:[^/]+/?)$}s);
+ $self->dirname($dirname);
+ },
+ default => $EMPTY
+);
+has basename => (
+ is => 'rw',
+ lazy => 1,
+ coerce => sub { my ($string) = @_; return $string // $EMPTY;},
+ default => $EMPTY
+);
+has dirname => (
+ is => 'rw',
+ lazy => 1,
+ coerce => sub { my ($string) = @_; return $string // $EMPTY;},
+ default => $EMPTY
+);
+
+has link => (
+ is => 'rw',
+ coerce => sub { my ($string) = @_; return $string // $EMPTY;},
+ default => $EMPTY
+);
+has normalized => (
+ is => 'rw',
+ coerce => sub { my ($string) = @_; return $string // $EMPTY;},
+ default => $EMPTY
+);
+has faux => (is => 'rw', default => 0);
+
+has size => (is => 'rw', default => 0);
+has date => (
+ is => 'rw',
+ default => sub {
+ my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = gmtime;
+ return sprintf('%04d-%02d-%02d', $year, $mon, $mday);
+ }
+);
+has time => (
+ is => 'rw',
+ default => sub {
+ my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = gmtime;
+ return sprintf('%02d:%02d:%02d', $hour, $min, $sec);
+ }
+);
+
+has perm => (is => 'rw');
+has path_info => (is => 'rw');
+
+has owner => (
+ is => 'rw',
+ coerce => sub { my ($string) = @_; return $string // 'root'; },
+ default => 'root'
+);
+has group => (
+ is => 'rw',
+ coerce => sub { my ($string) = @_; return $string // 'root'; },
+ default => 'root'
+);
+has uid => (
+ is => 'rw',
+ coerce => sub { my ($value) = @_; return int($value // 0); },
+ default => 0
+);
+has gid => (
+ is => 'rw',
+ coerce => sub { my ($value) = @_; return int($value // 0); },
+ default => 0
+);
+
+has md5sum => (
+ is => 'rw',
+ coerce => sub { my ($checksum) = @_; return ($checksum // 0); },
+ default => 0
+);
+has file_type => (
+ is => 'rw',
+ coerce => sub { my ($text) = @_; return ($text // $EMPTY); },
+ default => $EMPTY
+);
+has java_info => (
+ is => 'rw',
+ coerce => sub { my ($hashref) = @_; return ($hashref // {}); },
+ default => sub { {} }
+);
+has strings => (
+ is => 'rw',
+ coerce => sub { my ($text) = @_; return ($text // $EMPTY); },
+ default => $EMPTY
+);
+has ar_info => (
+ is => 'rw',
+ coerce => sub { my ($hashref) = @_; return ($hashref // {}); },
+ default => sub { {} }
+);
+
+has index => (is => 'rw');
+has childnames => (is => 'rw', default => sub { {} });
+has parent_dir => (
+ is => 'rw',
+ lazy => 1,
+ default => sub {
+ my ($self) = @_;
+
+ # do not return root as its own parent
+ return
+ if $self->name eq $EMPTY;
+
+ croak encode_utf8('No index in ' . $self->name)
+ unless defined $self->index;
+
+ # returns root by default
+ return $self->index->lookup($self->dirname);
+ }
+);
+has dereferenced => (is => 'rw');
+
+=item elf
+
+=cut
+
+sub elf {
+ my ($self, @args) = @_;
+
+ if (@args) {
+
+ $self->index->elf_storage->{$self->name} = $args[0];
+
+ return ();
+ }
+
+ my %copy = %{$self->index->elf_storage->{$self->name} // {} };
+
+ return \%copy;
+}
+
+=item elf_by_member
+
+=cut
+
+sub elf_by_member {
+ my ($self, @args) = @_;
+
+ if (@args) {
+
+ my $object_name = $args[0];
+ my $by_object = $args[1];
+
+ my $tmp = $self->index->elf_storage_by_member->{$self->name} // {};
+ $tmp->{$object_name} = $by_object;
+ $self->index->elf_storage_by_member->{$self->name} = $tmp;
+
+ return ();
+ }
+
+ my %copy = %{$self->index->elf_storage_by_member->{$self->name} // {} };
+
+ return \%copy;
+}
+
+=item pointer
+
+=cut
+
+sub pointer {
+ my ($self, $position) = @_;
+
+ my $pointer = Lintian::Pointer::Item->new;
+ $pointer->item($self);
+ $pointer->position($position);
+
+ return $pointer;
+}
+
+=item bytes
+
+Returns verbatim file contents as a scalar.
+
+=item is_valid_utf8
+
+Boolean true if file contents are valid UTF-8.
+
+=item decoded_utf8
+
+Returns a decoded, wide-character string if file contents are valid UTF-8.
+
+=cut
+
+sub bytes {
+ my ($self) = @_;
+
+ return $EMPTY
+ unless $self->is_open_ok;
+
+ my $bytes = path($self->unpacked_path)->slurp;
+
+ return $bytes;
+}
+
+sub is_valid_utf8 {
+ my ($self) = @_;
+
+ my $bytes = $self->bytes;
+ return 0
+ unless defined $bytes;
+
+ return valid_utf8($bytes);
+}
+
+sub decoded_utf8 {
+ my ($self) = @_;
+
+ return $EMPTY
+ unless $self->is_valid_utf8;
+
+ return decode_utf8($self->bytes);
+}
+
+### OVERLOADED OPERATORS ###
+
+# overload apparently does not like the mk_ro_accessor, so use a level
+# of indirection
+
+sub _as_regex_ref {
+ my ($self) = @_;
+ my $name = $self->name;
+ return qr{ \Q$name\E }xsm;
+}
+
+sub _as_string {
+ my ($self) = @_;
+ return $self->name;
+}
+
+sub _bool {
+ # Always true (used in "if ($info->index('some/path')) {...}")
+ return 1;
+}
+
+sub _bool_not {
+ my ($self) = @_;
+ return !$self->_bool;
+}
+
+sub _str_cmp {
+ my ($self, $str, $swap) = @_;
+ return $str cmp $self->name if $swap;
+ return $self->name cmp $str;
+}
+
+sub _str_concat {
+ my ($self, $str, $swap) = @_;
+ return $str . $self->name if $swap;
+ return $self->name . $str;
+}
+
+sub _str_eq {
+ my ($self, $str) = @_;
+ return $self->name eq $str;
+}
+
+sub _str_ne {
+ my ($self, $str) = @_;
+ return $self->name ne $str;
+}
+
+=back
+
+=head1 AUTHOR
+
+Originally written by Niels Thykier <niels@thykier.net> for Lintian.
+
+=head1 SEE ALSO
+
+lintian(1)
+
+=cut
+
+1;
+
+# Local Variables:
+# indent-tabs-mode: nil
+# cperl-indent-level: 4
+# End:
+# vim: syntax=perl sw=4 sts=4 sr et
+