summaryrefslogtreecommitdiffstats
path: root/t/chainlint.pl
diff options
context:
space:
mode:
Diffstat (limited to '')
-rwxr-xr-xt/chainlint.pl823
1 files changed, 823 insertions, 0 deletions
diff --git a/t/chainlint.pl b/t/chainlint.pl
new file mode 100755
index 0000000..e966412
--- /dev/null
+++ b/t/chainlint.pl
@@ -0,0 +1,823 @@
+#!/usr/bin/env perl
+#
+# Copyright (c) 2021-2022 Eric Sunshine <sunshine@sunshineco.com>
+#
+# This tool scans shell scripts for test definitions and checks those tests for
+# problems, such as broken &&-chains, which might hide bugs in the tests
+# themselves or in behaviors being exercised by the tests.
+#
+# Input arguments are pathnames of shell scripts containing test definitions,
+# or globs referencing a collection of scripts. For each problem discovered,
+# the pathname of the script containing the test is printed along with the test
+# name and the test body with a `?!FOO?!` annotation at the location of each
+# detected problem, where "FOO" is a tag such as "AMP" which indicates a broken
+# &&-chain. Returns zero if no problems are discovered, otherwise non-zero.
+
+use warnings;
+use strict;
+use Config;
+use File::Glob;
+use Getopt::Long;
+
+my $jobs = -1;
+my $show_stats;
+my $emit_all;
+
+# Lexer tokenizes POSIX shell scripts. It is roughly modeled after section 2.3
+# "Token Recognition" of POSIX chapter 2 "Shell Command Language". Although
+# similar to lexical analyzers for other languages, this one differs in a few
+# substantial ways due to quirks of the shell command language.
+#
+# For instance, in many languages, newline is just whitespace like space or
+# TAB, but in shell a newline is a command separator, thus a distinct lexical
+# token. A newline is significant and returned as a distinct token even at the
+# end of a shell comment.
+#
+# In other languages, `1+2` would typically be scanned as three tokens
+# (`1`, `+`, and `2`), but in shell it is a single token. However, the similar
+# `1 + 2`, which embeds whitepace, is scanned as three token in shell, as well.
+# In shell, several characters with special meaning lose that meaning when not
+# surrounded by whitespace. For instance, the negation operator `!` is special
+# when standing alone surrounded by whitespace; whereas in `foo!uucp` it is
+# just a plain character in the longer token "foo!uucp". In many other
+# languages, `"string"/foo:'string'` might be scanned as five tokens ("string",
+# `/`, `foo`, `:`, and 'string'), but in shell, it is just a single token.
+#
+# The lexical analyzer for the shell command language is also somewhat unusual
+# in that it recursively invokes the parser to handle the body of `$(...)`
+# expressions which can contain arbitrary shell code. Such expressions may be
+# encountered both inside and outside of double-quoted strings.
+#
+# The lexical analyzer is responsible for consuming shell here-doc bodies which
+# extend from the line following a `<<TAG` operator until a line consisting
+# solely of `TAG`. Here-doc consumption begins when a newline is encountered.
+# It is legal for multiple here-doc `<<TAG` operators to be present on a single
+# line, in which case their bodies must be present one following the next, and
+# are consumed in the (left-to-right) order the `<<TAG` operators appear on the
+# line. A special complication is that the bodies of all here-docs must be
+# consumed when the newline is encountered even if the parse context depth has
+# changed. For instance, in `cat <<A && x=$(cat <<B &&\n`, bodies of here-docs
+# "A" and "B" must be consumed even though "A" was introduced outside the
+# recursive parse context in which "B" was introduced and in which the newline
+# is encountered.
+package Lexer;
+
+sub new {
+ my ($class, $parser, $s) = @_;
+ bless {
+ parser => $parser,
+ buff => $s,
+ lineno => 1,
+ heretags => []
+ } => $class;
+}
+
+sub scan_heredoc_tag {
+ my $self = shift @_;
+ ${$self->{buff}} =~ /\G(-?)/gc;
+ my $indented = $1;
+ my $token = $self->scan_token();
+ return "<<$indented" unless $token;
+ my $tag = $token->[0];
+ $tag =~ s/['"\\]//g;
+ push(@{$self->{heretags}}, $indented ? "\t$tag" : "$tag");
+ return "<<$indented$tag";
+}
+
+sub scan_op {
+ my ($self, $c) = @_;
+ my $b = $self->{buff};
+ return $c unless $$b =~ /\G(.)/sgc;
+ my $cc = $c . $1;
+ return scan_heredoc_tag($self) if $cc eq '<<';
+ return $cc if $cc =~ /^(?:&&|\|\||>>|;;|<&|>&|<>|>\|)$/;
+ pos($$b)--;
+ return $c;
+}
+
+sub scan_sqstring {
+ my $self = shift @_;
+ ${$self->{buff}} =~ /\G([^']*'|.*\z)/sgc;
+ my $s = $1;
+ $self->{lineno} += () = $s =~ /\n/sg;
+ return "'" . $s;
+}
+
+sub scan_dqstring {
+ my $self = shift @_;
+ my $b = $self->{buff};
+ my $s = '"';
+ while (1) {
+ # slurp up non-special characters
+ $s .= $1 if $$b =~ /\G([^"\$\\]+)/gc;
+ # handle special characters
+ last unless $$b =~ /\G(.)/sgc;
+ my $c = $1;
+ $s .= '"', last if $c eq '"';
+ $s .= '$' . $self->scan_dollar(), next if $c eq '$';
+ if ($c eq '\\') {
+ $s .= '\\', last unless $$b =~ /\G(.)/sgc;
+ $c = $1;
+ $self->{lineno}++, next if $c eq "\n"; # line splice
+ # backslash escapes only $, `, ", \ in dq-string
+ $s .= '\\' unless $c =~ /^[\$`"\\]$/;
+ $s .= $c;
+ next;
+ }
+ die("internal error scanning dq-string '$c'\n");
+ }
+ $self->{lineno} += () = $s =~ /\n/sg;
+ return $s;
+}
+
+sub scan_balanced {
+ my ($self, $c1, $c2) = @_;
+ my $b = $self->{buff};
+ my $depth = 1;
+ my $s = $c1;
+ while ($$b =~ /\G([^\Q$c1$c2\E]*(?:[\Q$c1$c2\E]|\z))/gc) {
+ $s .= $1;
+ $depth++, next if $s =~ /\Q$c1\E$/;
+ $depth--;
+ last if $depth == 0;
+ }
+ $self->{lineno} += () = $s =~ /\n/sg;
+ return $s;
+}
+
+sub scan_subst {
+ my $self = shift @_;
+ my @tokens = $self->{parser}->parse(qr/^\)$/);
+ $self->{parser}->next_token(); # closing ")"
+ return @tokens;
+}
+
+sub scan_dollar {
+ my $self = shift @_;
+ my $b = $self->{buff};
+ return $self->scan_balanced('(', ')') if $$b =~ /\G\((?=\()/gc; # $((...))
+ return '(' . join(' ', map {$_->[0]} $self->scan_subst()) . ')' if $$b =~ /\G\(/gc; # $(...)
+ return $self->scan_balanced('{', '}') if $$b =~ /\G\{/gc; # ${...}
+ return $1 if $$b =~ /\G(\w+)/gc; # $var
+ return $1 if $$b =~ /\G([@*#?$!0-9-])/gc; # $*, $1, $$, etc.
+ return '';
+}
+
+sub swallow_heredocs {
+ my $self = shift @_;
+ my $b = $self->{buff};
+ my $tags = $self->{heretags};
+ while (my $tag = shift @$tags) {
+ my $start = pos($$b);
+ my $indent = $tag =~ s/^\t// ? '\\s*' : '';
+ $$b =~ /(?:\G|\n)$indent\Q$tag\E(?:\n|\z)/gc;
+ my $body = substr($$b, $start, pos($$b) - $start);
+ $self->{lineno} += () = $body =~ /\n/sg;
+ }
+}
+
+sub scan_token {
+ my $self = shift @_;
+ my $b = $self->{buff};
+ my $token = '';
+ my ($start, $startln);
+RESTART:
+ $startln = $self->{lineno};
+ $$b =~ /\G[ \t]+/gc; # skip whitespace (but not newline)
+ $start = pos($$b) || 0;
+ $self->{lineno}++, return ["\n", $start, pos($$b), $startln, $startln] if $$b =~ /\G#[^\n]*(?:\n|\z)/gc; # comment
+ while (1) {
+ # slurp up non-special characters
+ $token .= $1 if $$b =~ /\G([^\\;&|<>(){}'"\$\s]+)/gc;
+ # handle special characters
+ last unless $$b =~ /\G(.)/sgc;
+ my $c = $1;
+ pos($$b)--, last if $c =~ /^[ \t]$/; # whitespace ends token
+ pos($$b)--, last if length($token) && $c =~ /^[;&|<>(){}\n]$/;
+ $token .= $self->scan_sqstring(), next if $c eq "'";
+ $token .= $self->scan_dqstring(), next if $c eq '"';
+ $token .= $c . $self->scan_dollar(), next if $c eq '$';
+ $self->{lineno}++, $self->swallow_heredocs(), $token = $c, last if $c eq "\n";
+ $token = $self->scan_op($c), last if $c =~ /^[;&|<>]$/;
+ $token = $c, last if $c =~ /^[(){}]$/;
+ if ($c eq '\\') {
+ $token .= '\\', last unless $$b =~ /\G(.)/sgc;
+ $c = $1;
+ $self->{lineno}++, next if $c eq "\n" && length($token); # line splice
+ $self->{lineno}++, goto RESTART if $c eq "\n"; # line splice
+ $token .= '\\' . $c;
+ next;
+ }
+ die("internal error scanning character '$c'\n");
+ }
+ return length($token) ? [$token, $start, pos($$b), $startln, $self->{lineno}] : undef;
+}
+
+# ShellParser parses POSIX shell scripts (with minor extensions for Bash). It
+# is a recursive descent parser very roughly modeled after section 2.10 "Shell
+# Grammar" of POSIX chapter 2 "Shell Command Language".
+package ShellParser;
+
+sub new {
+ my ($class, $s) = @_;
+ my $self = bless {
+ buff => [],
+ stop => [],
+ output => []
+ } => $class;
+ $self->{lexer} = Lexer->new($self, $s);
+ return $self;
+}
+
+sub next_token {
+ my $self = shift @_;
+ return pop(@{$self->{buff}}) if @{$self->{buff}};
+ return $self->{lexer}->scan_token();
+}
+
+sub untoken {
+ my $self = shift @_;
+ push(@{$self->{buff}}, @_);
+}
+
+sub peek {
+ my $self = shift @_;
+ my $token = $self->next_token();
+ return undef unless defined($token);
+ $self->untoken($token);
+ return $token;
+}
+
+sub stop_at {
+ my ($self, $token) = @_;
+ return 1 unless defined($token);
+ my $stop = ${$self->{stop}}[-1] if @{$self->{stop}};
+ return defined($stop) && $token->[0] =~ $stop;
+}
+
+sub expect {
+ my ($self, $expect) = @_;
+ my $token = $self->next_token();
+ return $token if defined($token) && $token->[0] eq $expect;
+ push(@{$self->{output}}, "?!ERR?! expected '$expect' but found '" . (defined($token) ? $token->[0] : "<end-of-input>") . "'\n");
+ $self->untoken($token) if defined($token);
+ return ();
+}
+
+sub optional_newlines {
+ my $self = shift @_;
+ my @tokens;
+ while (my $token = $self->peek()) {
+ last unless $token->[0] eq "\n";
+ push(@tokens, $self->next_token());
+ }
+ return @tokens;
+}
+
+sub parse_group {
+ my $self = shift @_;
+ return ($self->parse(qr/^}$/),
+ $self->expect('}'));
+}
+
+sub parse_subshell {
+ my $self = shift @_;
+ return ($self->parse(qr/^\)$/),
+ $self->expect(')'));
+}
+
+sub parse_case_pattern {
+ my $self = shift @_;
+ my @tokens;
+ while (defined(my $token = $self->next_token())) {
+ push(@tokens, $token);
+ last if $token->[0] eq ')';
+ }
+ return @tokens;
+}
+
+sub parse_case {
+ my $self = shift @_;
+ my @tokens;
+ push(@tokens,
+ $self->next_token(), # subject
+ $self->optional_newlines(),
+ $self->expect('in'),
+ $self->optional_newlines());
+ while (1) {
+ my $token = $self->peek();
+ last unless defined($token) && $token->[0] ne 'esac';
+ push(@tokens,
+ $self->parse_case_pattern(),
+ $self->optional_newlines(),
+ $self->parse(qr/^(?:;;|esac)$/)); # item body
+ $token = $self->peek();
+ last unless defined($token) && $token->[0] ne 'esac';
+ push(@tokens,
+ $self->expect(';;'),
+ $self->optional_newlines());
+ }
+ push(@tokens, $self->expect('esac'));
+ return @tokens;
+}
+
+sub parse_for {
+ my $self = shift @_;
+ my @tokens;
+ push(@tokens,
+ $self->next_token(), # variable
+ $self->optional_newlines());
+ my $token = $self->peek();
+ if (defined($token) && $token->[0] eq 'in') {
+ push(@tokens,
+ $self->expect('in'),
+ $self->optional_newlines());
+ }
+ push(@tokens,
+ $self->parse(qr/^do$/), # items
+ $self->expect('do'),
+ $self->optional_newlines(),
+ $self->parse_loop_body(),
+ $self->expect('done'));
+ return @tokens;
+}
+
+sub parse_if {
+ my $self = shift @_;
+ my @tokens;
+ while (1) {
+ push(@tokens,
+ $self->parse(qr/^then$/), # if/elif condition
+ $self->expect('then'),
+ $self->optional_newlines(),
+ $self->parse(qr/^(?:elif|else|fi)$/)); # if/elif body
+ my $token = $self->peek();
+ last unless defined($token) && $token->[0] eq 'elif';
+ push(@tokens, $self->expect('elif'));
+ }
+ my $token = $self->peek();
+ if (defined($token) && $token->[0] eq 'else') {
+ push(@tokens,
+ $self->expect('else'),
+ $self->optional_newlines(),
+ $self->parse(qr/^fi$/)); # else body
+ }
+ push(@tokens, $self->expect('fi'));
+ return @tokens;
+}
+
+sub parse_loop_body {
+ my $self = shift @_;
+ return $self->parse(qr/^done$/);
+}
+
+sub parse_loop {
+ my $self = shift @_;
+ return ($self->parse(qr/^do$/), # condition
+ $self->expect('do'),
+ $self->optional_newlines(),
+ $self->parse_loop_body(),
+ $self->expect('done'));
+}
+
+sub parse_func {
+ my $self = shift @_;
+ return ($self->expect('('),
+ $self->expect(')'),
+ $self->optional_newlines(),
+ $self->parse_cmd()); # body
+}
+
+sub parse_bash_array_assignment {
+ my $self = shift @_;
+ my @tokens = $self->expect('(');
+ while (defined(my $token = $self->next_token())) {
+ push(@tokens, $token);
+ last if $token->[0] eq ')';
+ }
+ return @tokens;
+}
+
+my %compound = (
+ '{' => \&parse_group,
+ '(' => \&parse_subshell,
+ 'case' => \&parse_case,
+ 'for' => \&parse_for,
+ 'if' => \&parse_if,
+ 'until' => \&parse_loop,
+ 'while' => \&parse_loop);
+
+sub parse_cmd {
+ my $self = shift @_;
+ my $cmd = $self->next_token();
+ return () unless defined($cmd);
+ return $cmd if $cmd->[0] eq "\n";
+
+ my $token;
+ my @tokens = $cmd;
+ if ($cmd->[0] eq '!') {
+ push(@tokens, $self->parse_cmd());
+ return @tokens;
+ } elsif (my $f = $compound{$cmd->[0]}) {
+ push(@tokens, $self->$f());
+ } elsif (defined($token = $self->peek()) && $token->[0] eq '(') {
+ if ($cmd->[0] !~ /\w=$/) {
+ push(@tokens, $self->parse_func());
+ return @tokens;
+ }
+ my @array = $self->parse_bash_array_assignment();
+ $tokens[-1]->[0] .= join(' ', map {$_->[0]} @array);
+ $tokens[-1]->[2] = $array[$#array][2] if @array;
+ }
+
+ while (defined(my $token = $self->next_token())) {
+ $self->untoken($token), last if $self->stop_at($token);
+ push(@tokens, $token);
+ last if $token->[0] =~ /^(?:[;&\n|]|&&|\|\|)$/;
+ }
+ push(@tokens, $self->next_token()) if $tokens[-1]->[0] ne "\n" && defined($token = $self->peek()) && $token->[0] eq "\n";
+ return @tokens;
+}
+
+sub accumulate {
+ my ($self, $tokens, $cmd) = @_;
+ push(@$tokens, @$cmd);
+}
+
+sub parse {
+ my ($self, $stop) = @_;
+ push(@{$self->{stop}}, $stop);
+ goto DONE if $self->stop_at($self->peek());
+ my @tokens;
+ while (my @cmd = $self->parse_cmd()) {
+ $self->accumulate(\@tokens, \@cmd);
+ last if $self->stop_at($self->peek());
+ }
+DONE:
+ pop(@{$self->{stop}});
+ return @tokens;
+}
+
+# TestParser is a subclass of ShellParser which, beyond parsing shell script
+# code, is also imbued with semantic knowledge of test construction, and checks
+# tests for common problems (such as broken &&-chains) which might hide bugs in
+# the tests themselves or in behaviors being exercised by the tests. As such,
+# TestParser is only called upon to parse test bodies, not the top-level
+# scripts in which the tests are defined.
+package TestParser;
+
+use base 'ShellParser';
+
+sub new {
+ my $class = shift @_;
+ my $self = $class->SUPER::new(@_);
+ $self->{problems} = [];
+ return $self;
+}
+
+sub find_non_nl {
+ my $tokens = shift @_;
+ my $n = shift @_;
+ $n = $#$tokens if !defined($n);
+ $n-- while $n >= 0 && $$tokens[$n]->[0] eq "\n";
+ return $n;
+}
+
+sub ends_with {
+ my ($tokens, $needles) = @_;
+ my $n = find_non_nl($tokens);
+ for my $needle (reverse(@$needles)) {
+ return undef if $n < 0;
+ $n = find_non_nl($tokens, $n), next if $needle eq "\n";
+ return undef if $$tokens[$n]->[0] !~ $needle;
+ $n--;
+ }
+ return 1;
+}
+
+sub match_ending {
+ my ($tokens, $endings) = @_;
+ for my $needles (@$endings) {
+ next if @$tokens < scalar(grep {$_ ne "\n"} @$needles);
+ return 1 if ends_with($tokens, $needles);
+ }
+ return undef;
+}
+
+sub parse_loop_body {
+ my $self = shift @_;
+ my @tokens = $self->SUPER::parse_loop_body(@_);
+ # did loop signal failure via "|| return" or "|| exit"?
+ return @tokens if !@tokens || grep {$_->[0] =~ /^(?:return|exit|\$\?)$/} @tokens;
+ # did loop upstream of a pipe signal failure via "|| echo 'impossible
+ # text'" as the final command in the loop body?
+ return @tokens if ends_with(\@tokens, [qr/^\|\|$/, "\n", qr/^echo$/, qr/^.+$/]);
+ # flag missing "return/exit" handling explicit failure in loop body
+ my $n = find_non_nl(\@tokens);
+ push(@{$self->{problems}}, ['LOOP', $tokens[$n]]);
+ return @tokens;
+}
+
+my @safe_endings = (
+ [qr/^(?:&&|\|\||\||&)$/],
+ [qr/^(?:exit|return)$/, qr/^(?:\d+|\$\?)$/],
+ [qr/^(?:exit|return)$/, qr/^(?:\d+|\$\?)$/, qr/^;$/],
+ [qr/^(?:exit|return|continue)$/],
+ [qr/^(?:exit|return|continue)$/, qr/^;$/]);
+
+sub accumulate {
+ my ($self, $tokens, $cmd) = @_;
+ my $problems = $self->{problems};
+
+ # no previous command to check for missing "&&"
+ goto DONE unless @$tokens;
+
+ # new command is empty line; can't yet check if previous is missing "&&"
+ goto DONE if @$cmd == 1 && $$cmd[0]->[0] eq "\n";
+
+ # did previous command end with "&&", "|", "|| return" or similar?
+ goto DONE if match_ending($tokens, \@safe_endings);
+
+ # if this command handles "$?" specially, then okay for previous
+ # command to be missing "&&"
+ for my $token (@$cmd) {
+ goto DONE if $token->[0] =~ /\$\?/;
+ }
+
+ # if this command is "false", "return 1", or "exit 1" (which signal
+ # failure explicitly), then okay for all preceding commands to be
+ # missing "&&"
+ if ($$cmd[0]->[0] =~ /^(?:false|return|exit)$/) {
+ @$problems = grep {$_->[0] ne 'AMP'} @$problems;
+ goto DONE;
+ }
+
+ # flag missing "&&" at end of previous command
+ my $n = find_non_nl($tokens);
+ push(@$problems, ['AMP', $tokens->[$n]]) unless $n < 0;
+
+DONE:
+ $self->SUPER::accumulate($tokens, $cmd);
+}
+
+# ScriptParser is a subclass of ShellParser which identifies individual test
+# definitions within test scripts, and passes each test body through TestParser
+# to identify possible problems. ShellParser detects test definitions not only
+# at the top-level of test scripts but also within compound commands such as
+# loops and function definitions.
+package ScriptParser;
+
+use base 'ShellParser';
+
+sub new {
+ my $class = shift @_;
+ my $self = $class->SUPER::new(@_);
+ $self->{ntests} = 0;
+ return $self;
+}
+
+# extract the raw content of a token, which may be a single string or a
+# composition of multiple strings and non-string character runs; for instance,
+# `"test body"` unwraps to `test body`; `word"a b"42'c d'` to `worda b42c d`
+sub unwrap {
+ my $token = (@_ ? shift @_ : $_)->[0];
+ # simple case: 'sqstring' or "dqstring"
+ return $token if $token =~ s/^'([^']*)'$/$1/;
+ return $token if $token =~ s/^"([^"]*)"$/$1/;
+
+ # composite case
+ my ($s, $q, $escaped);
+ while (1) {
+ # slurp up non-special characters
+ $s .= $1 if $token =~ /\G([^\\'"]*)/gc;
+ # handle special characters
+ last unless $token =~ /\G(.)/sgc;
+ my $c = $1;
+ $q = undef, next if defined($q) && $c eq $q;
+ $q = $c, next if !defined($q) && $c =~ /^['"]$/;
+ if ($c eq '\\') {
+ last unless $token =~ /\G(.)/sgc;
+ $c = $1;
+ $s .= '\\' if $c eq "\n"; # preserve line splice
+ }
+ $s .= $c;
+ }
+ return $s
+}
+
+sub check_test {
+ my $self = shift @_;
+ my ($title, $body) = map(unwrap, @_);
+ $self->{ntests}++;
+ my $parser = TestParser->new(\$body);
+ my @tokens = $parser->parse();
+ my $problems = $parser->{problems};
+ return unless $emit_all || @$problems;
+ my $c = main::fd_colors(1);
+ my $lineno = $_[1]->[3];
+ my $start = 0;
+ my $checked = '';
+ for (sort {$a->[1]->[2] <=> $b->[1]->[2]} @$problems) {
+ my ($label, $token) = @$_;
+ my $pos = $token->[2];
+ $checked .= substr($body, $start, $pos - $start) . " ?!$label?! ";
+ $start = $pos;
+ }
+ $checked .= substr($body, $start);
+ $checked =~ s/^/$lineno++ . ' '/mge;
+ $checked =~ s/^\d+ \n//;
+ $checked =~ s/(\s) \?!/$1?!/mg;
+ $checked =~ s/\?! (\s)/?!$1/mg;
+ $checked =~ s/(\?![^?]+\?!)/$c->{rev}$c->{red}$1$c->{reset}/mg;
+ $checked =~ s/^\d+/$c->{dim}$&$c->{reset}/mg;
+ $checked .= "\n" unless $checked =~ /\n$/;
+ push(@{$self->{output}}, "$c->{blue}# chainlint: $title$c->{reset}\n$checked");
+}
+
+sub parse_cmd {
+ my $self = shift @_;
+ my @tokens = $self->SUPER::parse_cmd();
+ return @tokens unless @tokens && $tokens[0]->[0] =~ /^test_expect_(?:success|failure)$/;
+ my $n = $#tokens;
+ $n-- while $n >= 0 && $tokens[$n]->[0] =~ /^(?:[;&\n|]|&&|\|\|)$/;
+ $self->check_test($tokens[1], $tokens[2]) if $n == 2; # title body
+ $self->check_test($tokens[2], $tokens[3]) if $n > 2; # prereq title body
+ return @tokens;
+}
+
+# main contains high-level functionality for processing command-line switches,
+# feeding input test scripts to ScriptParser, and reporting results.
+package main;
+
+my $getnow = sub { return time(); };
+my $interval = sub { return time() - shift; };
+if (eval {require Time::HiRes; Time::HiRes->import(); 1;}) {
+ $getnow = sub { return [Time::HiRes::gettimeofday()]; };
+ $interval = sub { return Time::HiRes::tv_interval(shift); };
+}
+
+# Restore TERM if test framework set it to "dumb" so 'tput' will work; do this
+# outside of get_colors() since under 'ithreads' all threads use %ENV of main
+# thread and ignore %ENV changes in subthreads.
+$ENV{TERM} = $ENV{USER_TERM} if $ENV{USER_TERM};
+
+my @NOCOLORS = (bold => '', rev => '', dim => '', reset => '', blue => '', green => '', red => '');
+my %COLORS = ();
+sub get_colors {
+ return \%COLORS if %COLORS;
+ if (exists($ENV{NO_COLOR})) {
+ %COLORS = @NOCOLORS;
+ return \%COLORS;
+ }
+ if ($ENV{TERM} =~ /xterm|xterm-\d+color|xterm-new|xterm-direct|nsterm|nsterm-\d+color|nsterm-direct/) {
+ %COLORS = (bold => "\e[1m",
+ rev => "\e[7m",
+ dim => "\e[2m",
+ reset => "\e[0m",
+ blue => "\e[34m",
+ green => "\e[32m",
+ red => "\e[31m");
+ return \%COLORS;
+ }
+ if (system("tput sgr0 >/dev/null 2>&1") == 0 &&
+ system("tput bold >/dev/null 2>&1") == 0 &&
+ system("tput rev >/dev/null 2>&1") == 0 &&
+ system("tput dim >/dev/null 2>&1") == 0 &&
+ system("tput setaf 1 >/dev/null 2>&1") == 0) {
+ %COLORS = (bold => `tput bold`,
+ rev => `tput rev`,
+ dim => `tput dim`,
+ reset => `tput sgr0`,
+ blue => `tput setaf 4`,
+ green => `tput setaf 2`,
+ red => `tput setaf 1`);
+ return \%COLORS;
+ }
+ %COLORS = @NOCOLORS;
+ return \%COLORS;
+}
+
+my %FD_COLORS = ();
+sub fd_colors {
+ my $fd = shift;
+ return $FD_COLORS{$fd} if exists($FD_COLORS{$fd});
+ $FD_COLORS{$fd} = -t $fd ? get_colors() : {@NOCOLORS};
+ return $FD_COLORS{$fd};
+}
+
+sub ncores {
+ # Windows
+ return $ENV{NUMBER_OF_PROCESSORS} if exists($ENV{NUMBER_OF_PROCESSORS});
+ # Linux / MSYS2 / Cygwin / WSL
+ do { local @ARGV='/proc/cpuinfo'; return scalar(grep(/^processor[\s\d]*:/, <>)); } if -r '/proc/cpuinfo';
+ # macOS & BSD
+ return qx/sysctl -n hw.ncpu/ if $^O =~ /(?:^darwin$|bsd)/;
+ return 1;
+}
+
+sub show_stats {
+ my ($start_time, $stats) = @_;
+ my $walltime = $interval->($start_time);
+ my ($usertime) = times();
+ my ($total_workers, $total_scripts, $total_tests, $total_errs) = (0, 0, 0, 0);
+ my $c = fd_colors(2);
+ print(STDERR $c->{green});
+ for (@$stats) {
+ my ($worker, $nscripts, $ntests, $nerrs) = @$_;
+ print(STDERR "worker $worker: $nscripts scripts, $ntests tests, $nerrs errors\n");
+ $total_workers++;
+ $total_scripts += $nscripts;
+ $total_tests += $ntests;
+ $total_errs += $nerrs;
+ }
+ printf(STDERR "total: %d workers, %d scripts, %d tests, %d errors, %.2fs/%.2fs (wall/user)$c->{reset}\n", $total_workers, $total_scripts, $total_tests, $total_errs, $walltime, $usertime);
+}
+
+sub check_script {
+ my ($id, $next_script, $emit) = @_;
+ my ($nscripts, $ntests, $nerrs) = (0, 0, 0);
+ while (my $path = $next_script->()) {
+ $nscripts++;
+ my $fh;
+ unless (open($fh, "<", $path)) {
+ $emit->("?!ERR?! $path: $!\n");
+ next;
+ }
+ my $s = do { local $/; <$fh> };
+ close($fh);
+ my $parser = ScriptParser->new(\$s);
+ 1 while $parser->parse_cmd();
+ if (@{$parser->{output}}) {
+ my $c = fd_colors(1);
+ my $s = join('', @{$parser->{output}});
+ $emit->("$c->{bold}$c->{blue}# chainlint: $path$c->{reset}\n" . $s);
+ $nerrs += () = $s =~ /\?![^?]+\?!/g;
+ }
+ $ntests += $parser->{ntests};
+ }
+ return [$id, $nscripts, $ntests, $nerrs];
+}
+
+sub exit_code {
+ my $stats = shift @_;
+ for (@$stats) {
+ my ($worker, $nscripts, $ntests, $nerrs) = @$_;
+ return 1 if $nerrs;
+ }
+ return 0;
+}
+
+Getopt::Long::Configure(qw{bundling});
+GetOptions(
+ "emit-all!" => \$emit_all,
+ "jobs|j=i" => \$jobs,
+ "stats|show-stats!" => \$show_stats) or die("option error\n");
+$jobs = ncores() if $jobs < 1;
+
+my $start_time = $getnow->();
+my @stats;
+
+my @scripts;
+push(@scripts, File::Glob::bsd_glob($_)) for (@ARGV);
+unless (@scripts) {
+ show_stats($start_time, \@stats) if $show_stats;
+ exit;
+}
+
+unless ($Config{useithreads} && eval {
+ require threads; threads->import();
+ require Thread::Queue; Thread::Queue->import();
+ 1;
+ }) {
+ push(@stats, check_script(1, sub { shift(@scripts); }, sub { print(@_); }));
+ show_stats($start_time, \@stats) if $show_stats;
+ exit(exit_code(\@stats));
+}
+
+my $script_queue = Thread::Queue->new();
+my $output_queue = Thread::Queue->new();
+
+sub next_script { return $script_queue->dequeue(); }
+sub emit { $output_queue->enqueue(@_); }
+
+sub monitor {
+ while (my $s = $output_queue->dequeue()) {
+ print($s);
+ }
+}
+
+my $mon = threads->create({'context' => 'void'}, \&monitor);
+threads->create({'context' => 'list'}, \&check_script, $_, \&next_script, \&emit) for 1..$jobs;
+
+$script_queue->enqueue(@scripts);
+$script_queue->end();
+
+for (threads->list()) {
+ push(@stats, $_->join()) unless $_ == $mon;
+}
+
+$output_queue->end();
+$mon->join();
+
+show_stats($start_time, \@stats) if $show_stats;
+exit(exit_code(\@stats));