summaryrefslogtreecommitdiffstats
path: root/t/scripts/spellintian.t
diff options
context:
space:
mode:
Diffstat (limited to 't/scripts/spellintian.t')
-rwxr-xr-xt/scripts/spellintian.t169
1 files changed, 169 insertions, 0 deletions
diff --git a/t/scripts/spellintian.t b/t/scripts/spellintian.t
new file mode 100755
index 0000000..719d65c
--- /dev/null
+++ b/t/scripts/spellintian.t
@@ -0,0 +1,169 @@
+#!/usr/bin/perl
+
+# Copyright (C) 2014-2016 Jakub Wilk <jwilk@jwilk.net>
+# Copyright (C) 2017-2023 Axel Beckert <abe@debian.org>
+#
+# This program is free software. It is distributed under the terms of
+# the GNU General Public License as published by the Free Software
+# Foundation; either version 2 of the License, or (at your option) any
+# later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, you can find it on the World Wide
+# Web at https://www.gnu.org/copyleft/gpl.html, or write to the Free
+# Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
+# MA 02110-1301, USA.
+
+use strict;
+use warnings;
+
+use Const::Fast;
+use IPC::Run3;
+use List::SomeUtils qw(uniq);
+use Array::Utils qw(intersect);
+use Test::More tests => 8;
+
+const my $NEWLINE => qq{\n};
+const my $DOT => q{.};
+const my $WAIT_STATUS_SHIFT => 8;
+
+$ENV{'LINTIAN_BASE'} //= $DOT;
+
+my $cmd_path = "$ENV{LINTIAN_BASE}/bin/spellintian";
+my $spelling_data = "$ENV{LINTIAN_BASE}/data/spelling/corrections";
+my @word_lists
+ = qw(/usr/share/dict/american-english /usr/share/dict/british-english);
+
+# See #1019541 why some valid words are ignored and still ok to be
+# listed as a misspelled word.
+my @valid_but_very_seldom_words = qw(bellow singed want's);
+
+# See #865055 why "iff" is wrong. "publically" is a seldom, but valid
+# English word, is used in the OpenSSL license and hence causes quite
+# some false positives, when being added (again).
+my @valid_words = qw(iff publically);
+
+sub t {
+ my ($input, $expected, @options) = @_;
+
+ my @command = ($cmd_path, @options);
+ my $output;
+ run3(\@command, \$input, \$output);
+
+ my $status = ($? >> $WAIT_STATUS_SHIFT);
+ is($status, 0, 'exit status 0');
+ is($output, $expected, 'expected output');
+
+ return;
+}
+
+my $s = "A familar brown gnu allows\nto jump over the lazy dog.\n";
+
+t($s,
+ 'familar -> familiar'
+ . $NEWLINE
+ . '"allows to" -> "allows one to"'
+ . $NEWLINE);
+t(
+ $s,
+ 'familar -> familiar'
+ . $NEWLINE
+ . '"allows to" -> "allows one to"'
+ . $NEWLINE
+ . 'gnu -> GNU'
+ . $NEWLINE,
+ '--picky'
+);
+
+foreach my $word_list (@word_lists) {
+ open(my $wl_fh, '<', $word_list)
+ or die "Can't open $word_list for reading: $!";
+ local $/ = undef; # enable localized slurp mode
+ push(@valid_words, split(/\n/, <$wl_fh>));
+ close $wl_fh;
+}
+
+# Don't list identical words from American and British English twice.
+@valid_words = uniq(@valid_words);
+
+# Ignore words which are valid but very seldom and unlikely to show up
+# in Debian packages.
+foreach my $valid_but_very_seldom_word (@valid_but_very_seldom_words) {
+ @valid_words = grep { !/^$valid_but_very_seldom_word$/ } @valid_words;
+}
+
+my $iff = 0;
+my $publically = 0;
+my @case_sen;
+my @equal;
+my @valid_but_listed_words = qw();
+my @bad_spellings = qw();
+my @good_spellings = qw();
+
+open(my $sp_fh, '<', $spelling_data)
+ or die "Can't open $spelling_data for reading: $!";
+while (my $corr = <$sp_fh>) {
+ next if $corr =~ m{ ^\# | ^$ }x;
+ chomp($corr);
+
+ my ($wrong, $good) = split(/\|\|/, $corr);
+ # Check for corrections equal to original
+ if ($wrong eq $good) {
+ push @equal, $wrong;
+ # Check if case sensitive corrections have been added to the wrong
+ # file (data/spelling/corrections, not data/spelling/corrections-case).
+ # Bad example from #883041: german||German
+ } elsif ($wrong eq lc($good)) {
+ push @case_sen, $wrong;
+ }
+
+ # Needed later, e.g. for checking against lists of valid words.
+ push(@bad_spellings, $wrong);
+ push(@good_spellings, $good);
+}
+close($sp_fh);
+
+ok(
+ scalar(@equal) == 0,
+ "No no-op correction present in ${spelling_data} ("
+ . join(', ', @equal) . ')'
+);
+ok(
+ scalar(@case_sen) == 0,
+ "No case sensitive correction present in ${spelling_data} ("
+ . join(', ', @case_sen) . ')'
+);
+
+# Check if valid words have beeing has been added as correction.
+my %word_count = ();
+foreach my $word (@valid_words, @bad_spellings) {
+ $word_count{$word}++;
+}
+foreach my $word (keys %word_count) {
+ push(@valid_but_listed_words, $word) if $word_count{$word} > 1;
+}
+
+ok(
+ scalar(@valid_but_listed_words) == 0,
+ "No valid word is present in ${spelling_data} ("
+ . join(', ', sort @valid_but_listed_words) . ')'
+);
+
+my @good_bad_ugly = intersect(@bad_spellings, @good_spellings);
+
+ok(
+ scalar(@good_bad_ugly) == 0,
+ 'No bad spelling is listed as good spelling for another bad spelling ('
+ . join(', ', @good_bad_ugly) . ')'
+);
+
+# Local Variables:
+# indent-tabs-mode: nil
+# cperl-indent-level: 4
+# End:
+# vim: syntax=perl sw=4 sts=4 sr et