summaryrefslogtreecommitdiffstats
path: root/lib/Lintian/Check/Shell/NonPosix
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Lintian/Check/Shell/NonPosix')
-rw-r--r--lib/Lintian/Check/Shell/NonPosix/BashCentric.pm348
1 files changed, 348 insertions, 0 deletions
diff --git a/lib/Lintian/Check/Shell/NonPosix/BashCentric.pm b/lib/Lintian/Check/Shell/NonPosix/BashCentric.pm
new file mode 100644
index 0000000..024ea6a
--- /dev/null
+++ b/lib/Lintian/Check/Shell/NonPosix/BashCentric.pm
@@ -0,0 +1,348 @@
+# shell/non-posix/bash-centric -- lintian check script -*- perl -*-
+#
+# Copyright (C) 1998 Richard Braakman
+# Copyright (C) 2002 Josip Rodin
+# Copyright (C) 2016-2019 Chris Lamb <lamby@debian.org>
+# Copyright (C) 2021 Felix Lechner
+# Copyright (C) 2021 Rafael Laboissiere
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, you can find it on the World Wide
+# Web at https://www.gnu.org/copyleft/gpl.html, or write to the Free
+# Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
+# MA 02110-1301, USA.
+
+# bashism sounded too much like fascism
+package Lintian::Check::Shell::NonPosix::BashCentric;
+
+use v5.20;
+use warnings;
+use utf8;
+
+use Const::Fast;
+use File::Basename;
+use List::SomeUtils qw(uniq);
+use Unicode::UTF8 qw(encode_utf8);
+
+use Moo;
+use namespace::clean;
+
+with 'Lintian::Check';
+
+const my $EMPTY => q{};
+const my $SLASH => q{/};
+
+# When detecting commands inside shell scripts, use this regex to match the
+# beginning of the command rather than checking whether the command is at the
+# beginning of a line.
+const my $LEADING_PATTERN=>
+'(?:(?:^|[`&;(|{])\s*|(?:if|then|do|while|!)\s+|env(?:\s+[[:alnum:]_]+=(?:\S+|\"[^"]*\"|\'[^\']*\'))*\s+)';
+const my $LEADING_REGEX => qr/$LEADING_PATTERN/;
+
+my @bashism_single_quote_regexes = (
+ $LEADING_REGEX
+ . qr{echo\s+(?:-[^e\s]+\s+)?\'[^\']*(\\[abcEfnrtv0])+.*?[\']},
+ # unsafe echo with backslashes
+ $LEADING_REGEX . qr{source\s+[\"\']?(?:\.\/|[\/\$\w~.-])\S*},
+ # should be '.', not 'source'
+);
+
+my @bashism_string_regexes = (
+ qr/\$\[\w+\]/, # arith not allowed
+ qr/\$\{\w+\:\d+(?::\d+)?\}/, # ${foo:3[:1]}
+ qr/\$\{\w+(\/.+?){1,2}\}/, # ${parm/?/pat[/str]}
+ qr/\$\{\#?\w+\[[0-9\*\@]+\]\}/,# bash arrays, ${name[0|*|@]}
+ qr/\$\{!\w+[\@*]\}/, # ${!prefix[*|@]}
+ qr/\$\{!\w+\}/, # ${!name}
+ qr/(\$\(|\`)\s*\<\s*\S+\s*([\)\`])/, # $(\< foo) should be $(cat foo)
+ qr/\$\{?RANDOM\}?\b/, # $RANDOM
+ qr/\$\{?(OS|MACH)TYPE\}?\b/, # $(OS|MACH)TYPE
+ qr/\$\{?HOST(TYPE|NAME)\}?\b/, # $HOST(TYPE|NAME)
+ qr/\$\{?DIRSTACK\}?\b/, # $DIRSTACK
+ qr/\$\{?EUID\}?\b/, # $EUID should be "id -u"
+ qr/\$\{?UID\}?\b/, # $UID should be "id -ru"
+ qr/\$\{?SECONDS\}?\b/, # $SECONDS
+ qr/\$\{?BASH_[A-Z]+\}?\b/, # $BASH_SOMETHING
+ qr/\$\{?SHELLOPTS\}?\b/, # $SHELLOPTS
+ qr/\$\{?PIPESTATUS\}?\b/, # $PIPESTATUS
+ qr/\$\{?SHLVL\}?\b/, # $SHLVL
+ qr/<<</, # <<< here string
+ $LEADING_REGEX
+ . qr/echo\s+(?:-[^e\s]+\s+)?\"[^\"]*(\\[abcEfnrtv0])+.*?[\"]/,
+ # unsafe echo with backslashes
+);
+
+my @bashism_regexes = (
+ qr/(?:^|\s+)function \w+(\s|\(|\Z)/, # function is useless
+ qr/(test|-o|-a)\s*[^\s]+\s+==\s/, # should be 'b = a'
+ qr/\[\s+[^\]]+\s+==\s/, # should be 'b = a'
+ qr/\s(\|\&)/, # pipelining is not POSIX
+ qr/[^\\\$]\{(?:[^\s\\\}]*?,)+[^\\\}\s]*\}/, # brace expansion
+ qr/(?:^|\s+)\w+\[\d+\]=/, # bash arrays, H[0]
+ $LEADING_REGEX . qr/read\s+(?:-[a-qs-zA-Z\d-]+)/,
+ # read with option other than -r
+ $LEADING_REGEX . qr/read\s*(?:-\w+\s*)*(?:\".*?\"|[\'].*?[\'])?\s*(?:;|$)/,
+ # read without variable
+ qr/\&>/, # cshism
+ qr/(<\&|>\&)\s*((-|\d+)[^\s;|)`&\\\\]|[^-\d\s]+)/, # should be >word 2>&1
+ qr/\[\[(?!:)/, # alternative test command
+ $LEADING_REGEX . qr/select\s+\w+/, # 'select' is not POSIX
+ $LEADING_REGEX . qr/echo\s+(-n\s+)?-n?en?/, # echo -e
+ $LEADING_REGEX . qr/exec\s+-[acl]/, # exec -c/-l/-a name
+ qr/(?:^|\s+)let\s/, # let ...
+ qr/(?<![\$\(])\(\(.*\)\)/, # '((' should be '$(('
+ qr/\$\[[^][]+\]/, # '$[' should be '$(('
+ qr/(\[|test)\s+-a/, # test with unary -a (should be -e)
+ qr{/dev/(tcp|udp)}, # /dev/(tcp|udp)
+ $LEADING_REGEX . qr/\w+\+=/, # should be "VAR="${VAR}foo"
+ $LEADING_REGEX . qr/suspend\s/,
+ $LEADING_REGEX . qr/caller\s/,
+ $LEADING_REGEX . qr/complete\s/,
+ $LEADING_REGEX . qr/compgen\s/,
+ $LEADING_REGEX . qr/declare\s/,
+ $LEADING_REGEX . qr/typeset\s/,
+ $LEADING_REGEX . qr/disown\s/,
+ $LEADING_REGEX . qr/builtin\s/,
+ $LEADING_REGEX . qr/set\s+-[BHT]+/, # set -[BHT]
+ $LEADING_REGEX . qr/alias\s+-p/, # alias -p
+ $LEADING_REGEX . qr/unalias\s+-a/, # unalias -a
+ $LEADING_REGEX . qr/local\s+-[a-zA-Z]+/, # local -opt
+ qr/(?:^|\s+)\s*\(?\w*[^\(\w\s]+\S*?\s*\(\)\s*([\{|\(]|\Z)/,
+ # function names should only contain [a-z0-9_]
+ $LEADING_REGEX . qr/(push|pop)d(\s|\Z)/, # (push|pod)d
+ $LEADING_REGEX . qr/export\s+-[^p]/, # export only takes -p as an option
+ $LEADING_REGEX . qr/ulimit(\s|\Z)/,
+ $LEADING_REGEX . qr/shopt(\s|\Z)/,
+ $LEADING_REGEX . qr/time\s/,
+ $LEADING_REGEX . qr/dirs(\s|\Z)/,
+ qr/(?:^|\s+)[<>]\(.*?\)/, # <() process substitution
+ qr/(?:^|\s+)readonly\s+-[af]/, # readonly -[af]
+ $LEADING_REGEX . qr/(sh|\$\{?SHELL\}?) -[rD]/, # sh -[rD]
+ $LEADING_REGEX . qr/(sh|\$\{?SHELL\}?) --\w+/, # sh --long-option
+ $LEADING_REGEX . qr/(sh|\$\{?SHELL\}?) [-+]O/, # sh [-+]O
+);
+
+sub visit_installed_files {
+ my ($self, $item) = @_;
+
+ return
+ unless length $item->interpreter;
+
+ my $basename = basename($item->interpreter);
+
+ return
+ unless $basename eq 'sh';
+
+ $self->check_bash_centric($item, 'bash-term-in-posix-shell');
+
+ return;
+}
+
+sub visit_control_files {
+ my ($self, $item) = @_;
+
+ return
+ unless length $item->interpreter;
+
+ my $basename = basename($item->interpreter);
+
+ return
+ unless $basename eq 'sh';
+
+ $self->check_bash_centric($item, 'possible-bashism-in-maintainer-script');
+
+ return;
+}
+
+sub check_bash_centric {
+ my ($self, $item, $tag_name) = @_;
+
+ return
+ unless $item->is_open_ok;
+
+ open(my $fd, '<', $item->unpacked_path)
+ or die encode_utf8('Cannot open ' . $item->unpacked_path);
+
+ my $stashed = $EMPTY;
+
+ my $position = 1;
+ while (my $possible_continuation = <$fd>) {
+
+ chomp $possible_continuation;
+
+ # skip empty lines
+ next
+ if $possible_continuation =~ /^\s*$/;
+
+ # skip comment lines
+ next
+ if $possible_continuation =~ /^\s*\#/;
+
+ my $no_comment = remove_comments($possible_continuation);
+
+ # Concatenate lines containing continuation character (\)
+ # at the end
+ if ($no_comment =~ s{\\$}{}) {
+
+ $stashed .= $no_comment;
+
+ next;
+ }
+
+ my $line = $stashed . $no_comment;
+ $stashed = $EMPTY;
+
+ # see Bug#999756 and tclsh(1)
+ last
+ if $line =~ m{^ exec \s }x;
+
+ my $pointer = $item->pointer($position);
+
+ my @matches = uniq +$self->check_line($line);
+
+ for my $match (@matches) {
+
+ my $printable = "'$match'";
+ $printable = '{hex:' . sprintf('%vX', $match) . '}'
+ if $match =~ /\P{XPosixPrint}/;
+
+ $self->pointed_hint($tag_name, $pointer, $printable);
+ }
+
+ } continue {
+ ++$position;
+ }
+
+ close $fd;
+
+ return;
+}
+
+sub check_line {
+ my ($self, $line) = @_;
+
+ my @matches;
+
+ # since this test is ugly, I have to do it by itself
+ # detect source (.) trying to pass args to the command it runs
+ # The first expression weeds out '. "foo bar"'
+ if (
+ $line !~ m{\A \s*\.\s+
+ (?:\"[^\"]+\"|\'[^\']+\')\s*
+ (?:[\&\|<;]|\d?>|\Z)}xsm
+ && $line =~ /^\s*(\.\s+[^\s;\`:]+\s+([^\s;]+))/
+ ) {
+
+ my ($dot_command, $extra) = ($1, $2);
+
+ push(@matches, $dot_command)
+ if length $dot_command
+ && $extra !~ m{^ & | [|] | < | \d? > }x;
+ }
+
+ my $modified = $line;
+
+ for my $regex (@bashism_single_quote_regexes) {
+ if ($modified =~ $regex) {
+
+ # on unmodified line
+ my ($match) = ($line =~ /($regex)/);
+
+ push(@matches, $match)
+ if length $match;
+ }
+ }
+
+ # Ignore anything inside single quotes; it could be an
+ # argument to grep or the like.
+
+ # Remove "quoted quotes". They're likely to be
+ # inside another pair of quotes; we're not
+ # interested in them for their own sake and
+ # removing them makes finding the limits of
+ # the outer pair far easier.
+ $modified =~ s/(^|[^\\\'\"])\"\'\"/$1/g;
+ $modified =~ s/(^|[^\\\'\"])\'\"\'/$1/g;
+
+ $modified =~ s/(^|[^\\\"](?:\\\\)*)\'(?:\\.|[^\\\'])+\'/$1''/g;
+
+ for my $regex (@bashism_string_regexes) {
+ if ($modified =~ $regex) {
+
+ # on unmodified line
+ my ($match) = ($line =~ /($regex)/);
+
+ $match //= $EMPTY;
+
+ push(@matches, $match)
+ if length $match;
+ }
+ }
+
+ $modified =~ s/(^|[^\\\'](?:\\\\)*)\"(?:\\.|[^\\\"])+\"/$1""/g;
+
+ for my $regex (@bashism_regexes) {
+ if ($modified =~ $regex) {
+
+ # on unmodified line
+ my ($match) = ($line =~ /($regex)/);
+
+ $match //= $EMPTY;
+
+ push(@matches, $match)
+ if length $match;
+ }
+ }
+
+ # trim both ends of each element
+ s/^\s+|\s+$//g for @matches;
+
+ my @meaningful = grep { length } @matches;
+
+ return @meaningful;
+}
+
+sub remove_comments {
+ my ($line) = @_;
+
+ return $line
+ unless length $line;
+
+ my $simplified = $line;
+
+ # Remove quoted strings so we can more easily ignore comments
+ # inside them
+ $simplified =~ s/(^|[^\\](?:\\\\)*)\'(?:\\.|[^\\\'])+\'/$1''/g;
+ $simplified =~ s/(^|[^\\](?:\\\\)*)\"(?:\\.|[^\\\"])+\"/$1""/g;
+
+ # If the remaining string contains what looks like a comment,
+ # eat it. In either case, swap the unmodified script line
+ # back in for processing (if required) and return it.
+ if ($simplified =~ m/(?:^|[^[\\])[\s\&;\(\)](\#.*$)/) {
+
+ my $comment = $1;
+
+ # eat comment
+ $line =~ s/\Q$comment\E//;
+ }
+
+ return $line;
+}
+
+1;
+
+# Local Variables:
+# indent-tabs-mode: nil
+# cperl-indent-level: 4
+# End:
+# vim: syntax=perl sw=4 sts=4 sr et