summaryrefslogtreecommitdiffstats
path: root/lib/Lintian/Check/Cruft.pm
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Lintian/Check/Cruft.pm')
-rw-r--r--lib/Lintian/Check/Cruft.pm836
1 files changed, 836 insertions, 0 deletions
diff --git a/lib/Lintian/Check/Cruft.pm b/lib/Lintian/Check/Cruft.pm
new file mode 100644
index 0000000..1a402c6
--- /dev/null
+++ b/lib/Lintian/Check/Cruft.pm
@@ -0,0 +1,836 @@
+# cruft -- lintian check script -*- perl -*-
+#
+# based on debhelper check,
+# Copyright (C) 1999 Joey Hess
+# Copyright (C) 2000 Sean 'Shaleh' Perry
+# Copyright (C) 2002 Josip Rodin
+# Copyright (C) 2007 Russ Allbery
+# Copyright (C) 2013-2018 Bastien ROUCARIES
+# Copyright (C) 2017-2020 Chris Lamb <lamby@debian.org>
+# Copyright (C) 2020-2021 Felix Lechner
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, you can find it on the World Wide
+# Web at https://www.gnu.org/copyleft/gpl.html, or write to the Free
+# Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
+# MA 02110-1301, USA.
+
+package Lintian::Check::Cruft;
+
+use v5.20;
+use warnings;
+use utf8;
+
+use Const::Fast;
+use List::SomeUtils qw(any none);
+
+const my $EMPTY => q{};
+const my $ASTERISK => q{*};
+const my $DOT => q{.};
+
+const my $ITEM_NOT_FOUND => -1;
+
+use Moo;
+use namespace::clean;
+
+with 'Lintian::Check';
+
+# Half of the size used in the "sliding window" for detecting bad
+# licenses like GFDL with invariant sections.
+# NB: Keep in sync cruft-gfdl-fp-sliding-win/pre_build.
+# not less than 8192 for source missing
+use constant BLOCKSIZE => 16_384;
+use Lintian::SlidingWindow;
+
+my %NVIDIA_LICENSE = (
+ keywords => [qw{license intellectual retain property}],
+ sentences =>[
+'retain all intellectual property and proprietary rights in and to this software and related documentation'
+ ]
+);
+
+my %NON_FREE_LICENSES = (
+# first field is tag
+# second field is a list of keywords in lower case
+# third field are lower case sentences to match the license. Notes that space are normalized before and formatting removed
+# fourth field is a regex to use to match the license, use lower case and [ ] for space.
+# 5th field is a function to call if the field 2th to 5th match.
+# (see dispatch table %LICENSE_CHECK_DISPATCH_TABLE
+
+ # json license
+ 'license-problem-json-evil' => {
+ keywords => [qw{software evil good}],
+ sentences => ['software shall be used for good'],
+ regex =>
+qr{software [ ] shall [ ] be [ ] used [ ] for [ ] good [ ]? ,? [ ]? not [ ] evil}msx
+ },
+ # non free RFC old version
+ 'license-problem-non-free-RFC' => {
+ keywords => [qw{document purpose translate language}],
+ sentences => ['this document itself may not be modified in any way'],
+ regex =>
+qr{this [ ] document [ ] itself [ ] may [ ] not [ ] be [ ] modified [ ] in [ ] any [ ] way [ ]?,
+ [ ]? such [ ] as [ ] by [ ] removing [ ] the [ ] copyright [ ] notice [ ] or [ ] references
+ [ ] to [ ] .{0,256} [ ]? except [ ] as [ ] needed [ ] for [ ] the [ ] purpose [ ] of [ ] developing
+ [ ] .{0,128} [ ]? in [ ] which [ ] case [ ] the [ ] procedures [ ] for [ ] copyrights [ ] defined
+ [ ] in [ ] the [ ] .{0,128} [ ]? process [ ] must [ ] be [ ] followed[ ]?,[ ]?
+ or [ ] as [ ] required [ ] to [ ] translate [ ] it [ ] into [ ] languages [ ]}msx,
+ callsub => 'rfc_whitelist_filename'
+ },
+ 'license-problem-non-free-RFC-BCP78' => {
+ keywords => [qw{license document bcp restriction}],
+ sentences => ['bcp 78'],
+ regex =>
+qr{this [ ] document [ ] is [ ] subject [ ] to [ ] (?:the [ ] rights [ ]?, [ ] licenses [ ] and [ ]restrictions [ ] contained [ ] in [ ])? bcp [ ] 78}msx,
+ callsub => 'rfc_whitelist_filename'
+ },
+# check GFDL block - The ".{0,1024}"-part in the regex
+# will contain the "no invariants etc." part if
+# it is a good use of the license. We include it
+# here to ensure that we do not emit a false positive
+# if the "redeeming" part is in the next block
+# keyword document is here in order to benefit for other license keyword and a shortcut for documentation
+ 'license-problem-gfdl-invariants' => {
+ keywords => [qw{license document gnu copy documentation}],
+ sentences => ['gnu free documentation license'],
+ regex =>
+qr{(?'rawcontextbefore'(?:(?:(?!a [ ] copy [ ] of [ ] the [ ] license [ ] is).){1024}|
+\A(?:(?!a [ ] copy [ ] of [ ] the [ ] license [ ] is).){0,1024}|
+(?:[ ] copy [ ] of [ ] the [ ] license [ ] is.{0,1024}?))) gnu [ ] free [ ]
+documentation [ ] license (?'rawgfdlsections'(?:(?!gnu [ ] free [ ] documentation
+[ ] license).){0,1024}?) (?:a [ ] copy [ ] of [ ] the [ ] license [ ] is|
+this [ ] document [ ] is [ ] distributed)}msx,
+ callsub => 'check_gfdl_license_problem'
+ },
+ # php license
+ 'license-problem-php-license' => {
+ keywords => [qw{www.php.net group\@php.net phpfoo conjunction php}],
+ sentences => ['this product includes php'],
+ regex => qr{php [ ] license [ ]?[,;][ ]? version [ ] 3(?:\.\d+)?}msx,
+ callsub => 'php_source_whitelist'
+ },
+ 'license-problem-bad-php-license' => {
+ keywords => [qw{www.php.net add-on conjunction}],
+ sentences => ['this product includes php'],
+ regex => qr{php [ ] license [ ]?[,;][ ]? version [ ] 2(?:\.\d+)?}msx,
+ callsub => 'php_source_whitelist'
+ },
+ # cc by nc sa note that " is replaced by [ ]
+ 'license-problem-cc-by-nc-sa' => {
+ keywords => [qw{license by-nc-sa creativecommons.org}],
+ sentences => [
+ '://creativecommons.org/licenses/by-nc-sa',
+ 'under attribution-noncommercial'
+ ],
+ regex =>
+qr{(?:license [ ] rdf:[^=:]+=[ ]* (?:ht|f)tps?://(?:[^/.]\.)??creativecommons\.org/licenses/by-nc-sa/\d+(?:\.\d+)?(?:/[[:alpha:]]+)?/? [ ]* >|available [ ] under [ ] attribution-noncommercial)}msx
+ },
+ # not really a license but warn it: visual c++ generated file
+ 'source-contains-autogenerated-visual-c++-file' => {
+ keywords => [qw{microsoft visual generated}],
+ sentences => ['microsoft visual c++ generated'],
+ regex =>
+qr{microsoft [ ] visual [ ] c[+][+] [ ] generated (?![ ] by [ ] freeze\.py)}msx
+ },
+ # not really a license but warn about it: gperf generated file
+ 'source-contains-autogenerated-gperf-data' => {
+ keywords => [qw{code produced gperf version}],
+ sentences => ['code produced by gperf version'],
+ regex =>
+ qr{code [ ] produced [ ] by [ ] gperf [ ] version [ ] \d+\.\d+}msx
+ },
+ # warn about copy of ieee-data
+ 'source-contains-data-from-ieee-data-oui-db' => {
+ keywords => [qw{struck scitex racore}],
+ sentences => ['dr. b. struck'],
+ regex => qr{dr. [ ] b. [ ] struck}msx
+ },
+ # warn about unicode license for utf for convert utf
+ 'license-problem-convert-utf-code' => {
+ keywords => [qw{fall-through bytestowrite utf-8}],
+ sentences => ['the fall-through switches in utf-8 reading'],
+ regex =>
+qr{the [ ] fall-through [ ] switches [ ] in [ ] utf-8 [ ] reading [ ] code [ ] save}msx
+ }
+);
+
+# get usual data about admissible/not admissible GFDL invariant part of license
+has GFDL_FRAGMENTS => (
+ is => 'rw',
+ lazy => 1,
+ default => sub {
+ my ($self) = @_;
+
+ my %gfdl_fragments;
+
+ my $data = $self->data->load('cruft/gfdl-license-fragments-checks',
+ qr/\s*\~\~\s*/);
+
+ for my $gfdlsectionsregex ($data->all) {
+
+ my $secondpart = $data->value($gfdlsectionsregex);
+
+ # allow empty parameters
+ $secondpart //= $EMPTY;
+ my ($acceptonlyinfile,$applytag)
+ = split(/\s*\~\~\s*/, $secondpart, 2);
+
+ $acceptonlyinfile //= $EMPTY;
+ $applytag //= $EMPTY;
+
+ # trim both ends
+ $acceptonlyinfile =~ s/^\s+|\s+$//g;
+ $applytag =~ s/^\s+|\s+$//g;
+
+ # accept all files if empty
+ $acceptonlyinfile ||= $DOT . $ASTERISK;
+
+ my %ret = (
+ 'gfdlsectionsregex' => qr/$gfdlsectionsregex/xis,
+ 'acceptonlyinfile' => qr/$acceptonlyinfile/xs,
+ );
+
+ $ret{'tag'} = $applytag
+ if length $applytag;
+
+ $gfdl_fragments{$gfdlsectionsregex} = \%ret;
+ }
+
+ return \%gfdl_fragments;
+ }
+);
+
+sub visit_patched_files {
+ my ($self, $item) = @_;
+
+ return
+ unless $item->is_file;
+
+ # license string in debian/changelog are probably just change
+ # Ignore these strings in d/README.{Debian,source}. If they
+ # appear there it is probably just "file XXX got removed
+ # because of license Y".
+ $self->full_text_check($item)
+ unless $item->name eq 'debian/changelog'
+ && $item->name eq 'debian/README.Debian'
+ && $item->name eq 'debian/README.source';
+
+ return;
+}
+
+# do basic license check against well known offender
+# note that it does not replace licensecheck(1)
+# and is only used for autoreject by ftp-master
+sub full_text_check {
+ my ($self, $item) = @_;
+
+ return undef
+ unless $item ->is_regular_file;
+
+ open(my $fd, '<:raw', $item->unpacked_path)
+ or die encode_utf8('Cannot open ' . $item->unpacked_path);
+
+ my $sfd = Lintian::SlidingWindow->new;
+ $sfd->handle($fd);
+ $sfd->blocksize(BLOCKSIZE);
+ $sfd->blocksub(sub { $_ = lc; });
+
+ unless (-T $fd) {
+ close($fd);
+ return undef;
+ }
+
+ # we try to read this file in block and use a sliding window
+ # for efficiency. We store two blocks in @queue and the whole
+ # string to match in $block. Please emit license tags only once
+ # per file
+ BLOCK:
+ while (my $lowercase = $sfd->readwindow()) {
+
+ my $blocknumber = $sfd->blocknumber();
+
+ my $clean = clean_text($lowercase);
+
+ # Check for non-distributable files - this
+ # applies even to non-free, as we still need
+ # permission to distribute those.
+ # nvdia opencv infamous license
+ last BLOCK
+ if $self->check_for_single_bad_license($item, $lowercase, $clean,
+ 'license-problem-nvidia-intellectual',
+ \%NVIDIA_LICENSE);
+
+ unless ($self->processable->is_non_free) {
+
+ for my $tag_name (keys %NON_FREE_LICENSES) {
+
+ last BLOCK
+ if $self->check_for_single_bad_license($item, $lowercase,
+ $clean,$tag_name, $NON_FREE_LICENSES{$tag_name});
+ }
+ }
+
+ # check javascript in html file
+ if ($item->basename =~ /\.(?:x?html?\d?|xht)$/i) {
+
+ my $blockscript = $lowercase;
+ my $indexscript;
+
+ while (($indexscript = index($blockscript, '<script'))
+ > $ITEM_NOT_FOUND){
+
+ $blockscript = substr($blockscript,$indexscript);
+
+ # sourced script ok
+ if ($blockscript =~ m{\A<script\s+[^>]*?src="[^"]+?"[^>]*?>}sm)
+ {
+
+ $blockscript = substr($blockscript,$+[0]);
+ next;
+ }
+
+ # extract script
+ if ($blockscript =~ m{<script[^>]*?>(.*?)</script>}sm) {
+
+ $blockscript = substr($blockscript,$+[0]);
+
+ my $lcscript = $1;
+
+ # check if js script is minified
+ my $firstline = $EMPTY;
+ for my $line (split /\n/, $lcscript) {
+
+ if ($line =~ /^\s*$/) {
+ next;
+
+ } else {
+ $firstline = $line;
+ last;
+ }
+ }
+
+ if ($firstline
+ =~ m/.{0,20}((?:\bcopyright\b|[\(]c[\)]\s*\w|\N{COPYRIGHT SIGN}).{0,50})/
+ ){
+
+ my $extract = $1;
+ $extract =~ s/^\s+|\s+$//g;
+
+ $self->pointed_hint(
+ 'embedded-script-includes-copyright-statement',
+ $item->pointer,
+ 'extract of copyright statement:',
+ $extract
+ );
+ }
+
+ # clean up jslint craps line
+ my $cleaned = $lcscript;
+ $cleaned =~ s{^\s*/[*][^\n]*[*]/\s*$}{}gm;
+ $cleaned =~ s{^\s*//[^\n]*$}{}gm;
+ $cleaned =~ s/^\s+//gm;
+
+ # strip indentation
+ $cleaned =~ s/^\s+//mg;
+ $cleaned = _strip_c_comments($cleaned);
+ # strip empty line
+ $cleaned =~ s/^\s*\n//mg;
+ # remove last \n
+ $cleaned =~ s/\n\Z//m;
+
+# detect browserified javascript (comment are removed here and code is stripped)
+ my $contiguous = $cleaned;
+ $contiguous =~ s/\n/ /msg;
+
+ # get browserified regexp
+ my $BROWSERIFY_REGEX
+ = $self->data->load('cruft/browserify-regex',
+ qr/\s*\~\~\s*/);
+
+ for my $condition ($BROWSERIFY_REGEX->all) {
+
+ my $pattern = $BROWSERIFY_REGEX->value($condition);
+ if ($contiguous =~ m{$pattern}msx) {
+
+ my $extra
+ = (defined $1) ? 'code fragment:'.$1 : $EMPTY;
+ $self->pointed_hint(
+ 'source-contains-browserified-javascript',
+ $item->pointer, $extra);
+
+ last;
+ }
+ }
+
+ next;
+ }
+
+ last;
+ }
+ }
+
+ # check if file is javascript but not minified
+ my $isjsfile = ($item->name =~ m/\.js$/) ? 1 : 0;
+ if ($isjsfile) {
+ my $minjsregexp
+ = qr/(?i)[-._](?:compiled|compressed|lite|min|pack(?:ed)?|prod|umd|yc)\.js$/;
+ $isjsfile = ($item->name =~ m{$minjsregexp}) ? 0 : 1;
+ }
+
+ if ($isjsfile) {
+ # exception sphinx documentation
+ if ($item->basename eq 'searchindex.js') {
+ if ($lowercase =~ m/\A\s*search\.setindex\s* \s* \(\s*\{/xms) {
+
+ $self->pointed_hint(
+ 'source-contains-prebuilt-sphinx-documentation',
+ $item->parent_dir->pointer);
+ last BLOCK;
+ }
+ }
+
+ if ($item->basename eq 'search_index.js') {
+ if ($lowercase =~ m/\A\s*var\s*search_index\s*=/xms) {
+
+ $self->pointed_hint(
+ 'source-contains-prebuilt-pandoc-documentation',
+ $item->parent_dir->pointer);
+ last BLOCK;
+ }
+ }
+ # false positive in dx package at least
+ elsif ($item->basename eq 'srchidx.js') {
+
+ last BLOCK
+ if $lowercase
+ =~ m/\A\s*profiles \s* = \s* new \s* Array\s*\(/xms;
+ }
+ # https://github.com/rafaelp/css_browser_selector is actually the
+ # original source. (#874381)
+ elsif ($lowercase =~ m/css_browser_selector\(/) {
+
+ last BLOCK;
+ }
+ # Avoid false-positives in Jush's syntax highlighting definition files.
+ elsif ($lowercase =~ m/jush\.tr\./) {
+
+ last BLOCK;
+ }
+
+ # now search hidden minified
+
+ # clean up jslint craps line
+ my $cleaned = $lowercase;
+ $cleaned =~ s{^\s*/[*][^\n]*[*]/\s*$}{}gm;
+ $cleaned =~ s{^\s*//[^\n]*$}{}gm;
+ $cleaned =~ s/^\s+//gm;
+
+ # strip indentation
+ $cleaned =~ s/^\s+//mg;
+ $cleaned = _strip_c_comments($cleaned);
+ # strip empty line
+ $cleaned =~ s/^\s*\n//mg;
+ # remove last \n
+ $cleaned =~ s/\n\Z//m;
+
+# detect browserified javascript (comment are removed here and code is stripped)
+ my $contiguous = $cleaned;
+ $contiguous =~ s/\n/ /msg;
+
+ # get browserified regexp
+ my $BROWSERIFY_REGEX
+ = $self->data->load('cruft/browserify-regex',qr/\s*\~\~\s*/);
+
+ for my $condition ($BROWSERIFY_REGEX->all) {
+
+ my $pattern = $BROWSERIFY_REGEX->value($condition);
+ if ($contiguous =~ m{$pattern}msx) {
+
+ my $extra = (defined $1) ? 'code fragment:'.$1 : $EMPTY;
+ $self->pointed_hint(
+ 'source-contains-browserified-javascript',
+ $item->pointer, $extra);
+
+ last;
+ }
+ }
+ }
+
+ # search link rel header
+ if ($lowercase =~ / \Q rel="copyright" \E /msx) {
+
+ my $href = $lowercase;
+ $href =~ m{<link \s+
+ rel="copyright" \s+
+ href="([^"]+)" \s*/? \s*>}xmsi;
+
+ my $url = $1 // $EMPTY;
+
+ $self->pointed_hint('license-problem-cc-by-nc-sa', $item->pointer)
+ if $url =~ m{^https?://creativecommons.org/licenses/by-nc-sa/};
+ }
+ last BLOCK;
+ }
+ return close($fd);
+}
+
+# strip C comment
+# warning block is at more 8192 char in order to be too slow
+# and in order to avoid regex recursion
+sub _strip_c_comments {
+ my ($lowercase) = @_;
+
+ # from perl faq strip comments
+ $lowercase =~ s{
+ # Strip /* */ comments
+ /\* [^*]*+ \*++ (?: [^/*][^*]*+\*++ ) */
+ # Strip // comments (C++ style)
+ | // (?: [^\\] | [^\n][\n]? )*? (?=\n)
+ | (
+ # Keep "/* */" (etc) as is
+ "(?: \\. | [^"\\]++)*"
+ # Keep '/**/' (etc) as is
+ | '(?: \\. | [^'\\]++)*'
+ # Keep anything else
+ | .[^/"'\\]*+
+ )
+ }{defined $1 ? $1 : ""}xgse;
+
+ return $lowercase;
+}
+
+# return True in case of license problem
+sub check_gfdl_license_problem {
+ my ($self, $item, $tag_name, %matchedhash) = @_;
+
+ my $rawgfdlsections = $matchedhash{rawgfdlsections} || $EMPTY;
+ my $rawcontextbefore = $matchedhash{rawcontextbefore} || $EMPTY;
+
+ # strip punctuation
+ my $gfdlsections = _strip_punct($rawgfdlsections);
+ my $contextbefore = _strip_punct($rawcontextbefore);
+
+ # remove line number at beginning of line
+ # see krusader/1:2.4.0~beta3-2/doc/en_US/advanced-functions.docbook/
+ $gfdlsections =~ s{[ ]\d+[ ]}{ }gxsmo;
+ $gfdlsections =~ s{^\d+[ ]}{ }xsmo;
+ $gfdlsections =~ s{[ ]\d+$}{ }xsmo;
+ $gfdlsections =~ s{[ ]+}{ }xsmo;
+
+ # remove classical and without meaning part of
+ # matched string
+ my $oldgfdlsections;
+ do {
+ $oldgfdlsections = $gfdlsections;
+ $gfdlsections =~ s{ \A \(?[ ]? g?fdl [ ]?\)?[ ]? [,\.;]?[ ]?}{}xsmo;
+ $gfdlsections =~ s{ \A (?:either[ ])?
+ version [ ] \d+(?:\.\d+)? [ ]?}{}xsmo;
+ $gfdlsections =~ s{ \A of [ ] the [ ] license [ ]?[,\.;][ ]?}{}xsmo;
+ $gfdlsections=~ s{ \A or (?:[ ]\(?[ ]? at [ ] your [ ] option [ ]?\)?)?
+ [ ] any [ ] later [ ] version[ ]?}{}xsmo;
+ $gfdlsections =~ s{ \A (as[ ])? published [ ] by [ ]
+ the [ ] free [ ] software [ ] foundation[ ]?}{}xsmo;
+ $gfdlsections =~ s{\(?[ ]? fsf [ ]?\)?[ ]?}{}xsmo;
+ $gfdlsections =~ s{\A [ ]? [,\.;]? [ ]?}{}xsmo;
+ $gfdlsections =~ s{[ ]? [,\.]? [ ]?\Z}{}xsmo;
+ } while ($oldgfdlsections ne $gfdlsections);
+
+ $contextbefore =~ s{
+ [ ]? (:?[,\.;]? [ ]?)?
+ permission [ ] is [ ] granted [ ] to [ ] copy [ ]?[,\.;]?[ ]?
+ distribute [ ]?[,\.;]?[ ]? and[ ]?/?[ ]?or [ ] modify [ ]
+ this [ ] document [ ] under [ ] the [ ] terms [ ] of [ ] the\Z}{}xsmo;
+
+ # Treat ambiguous empty text
+ if ($gfdlsections eq $EMPTY) {
+
+ # lie in order to check more part
+ $self->pointed_hint('license-problem-gfdl-invariants-empty',
+ $item->pointer);
+
+ return 0;
+ }
+
+ # official wording
+ if(
+ $gfdlsections =~ m{\A
+ with [ ] no [ ] invariant [ ] sections[ ]?,
+ [ ]? no [ ] front(?:[ ]?-[ ]?|[ ])cover [ ] texts[ ]?,?
+ [ ]? and [ ] no [ ] back(?:[ ]?-?[ ]?|[ ])cover [ ] texts
+ \Z}xs
+ ) {
+ return 0;
+ }
+
+ # example are ok
+ if (
+ $contextbefore =~ m{following [ ] is [ ] an [ ] example
+ (:?[ ] of [ ] the [ ] license [ ] notice [ ] to [ ] use
+ (?:[ ] after [ ] the [ ] copyright [ ] (?:line(?:\(s\)|s)?)?
+ (?:[ ] using [ ] all [ ] the [ ] features? [ ] of [ ] the [ ] gfdl)?
+ )?
+ )? [ ]? [,:]? \Z}xs
+ ){
+ return 0;
+ }
+
+ # GFDL license, assume it is bad unless it
+ # explicitly states it has no "bad sections".
+ for my $gfdl_fragment (keys %{$self->GFDL_FRAGMENTS}) {
+
+ my $gfdl_data = $self->GFDL_FRAGMENTS->{$gfdl_fragment};
+ my $gfdlsectionsregex = $gfdl_data->{'gfdlsectionsregex'};
+ if ($gfdlsections =~ m{$gfdlsectionsregex}) {
+
+ my $acceptonlyinfile = $gfdl_data->{'acceptonlyinfile'};
+ if ($item->name =~ m{$acceptonlyinfile}) {
+
+ my $applytag = $gfdl_data->{'tag'};
+
+ # lie will allow checking more blocks
+ $self->pointed_hint($applytag, $item->pointer,
+ 'invariant part is:',
+ $gfdlsections)
+ if defined $applytag;
+
+ return 0;
+
+ } else {
+ $self->pointed_hint(
+ 'license-problem-gfdl-invariants',
+ $item->pointer,'invariant part is:',
+ $gfdlsections
+ );
+ return 1;
+ }
+ }
+ }
+
+ # catch all
+ $self->pointed_hint(
+ 'license-problem-gfdl-invariants',
+ $item->pointer,'invariant part is:',
+ $gfdlsections
+ );
+
+ return 1;
+}
+
+sub rfc_whitelist_filename {
+ my ($self, $item, $tag_name, %matchedhash) = @_;
+
+ return 0
+ if $item->name eq 'debian/copyright';
+
+ my $lcname = lc($item->basename);
+
+ # prebuilt-file or forbidden file type
+ # specified separator protects against spaces in pattern
+ my $RFC_WHITELIST= $self->data->load('cruft/rfc-whitelist',qr/\s*\~\~\s*/);
+
+ my @patterns = $RFC_WHITELIST->all;
+
+ return 0
+ if any { $lcname =~ m/ $_ /xms } @patterns;
+
+ $self->pointed_hint($tag_name, $item->pointer);
+
+ return 1;
+}
+
+sub php_source_whitelist {
+ my ($self, $item, $tag_name, %matchedhash) = @_;
+
+ my $copyright_path
+ = $self->processable->patched->resolve_path('debian/copyright');
+
+ return 0
+ if defined $copyright_path
+ && $copyright_path->bytes
+ =~ m{^Source: https?://(pecl|pear).php.net/package/.*$}m;
+
+ return 0
+ if $self->processable->source_name =~ /^php\d*(?:\.\d+)?$/xms;
+
+ $self->pointed_hint($tag_name, $item->pointer);
+
+ return 1;
+}
+
+sub clean_text {
+ my ($text) = @_;
+
+ # be paranoiac replace gnu with texinfo by gnu
+ $text =~ s{
+ (?:@[[:alpha:]]*?\{)?\s*gnu\s*\} # Texinfo cmd
+ }{ gnu }gxms;
+
+ # pod2man formatting
+ $text =~ s{ \\ \* \( [LR] \" }{\"}gxsm;
+ $text =~ s{ \\ -}{-}gxsm;
+
+ # replace some shortcut (clisp)
+ $text =~ s{\(&fdl;\)}{ }gxsm;
+ $text =~ s{&fsf;}{free software foundation}gxsm;
+
+ # non breaking space
+ $text =~ s{&nbsp;}{ }gxsm;
+
+ # replace some common comment-marker/markup with space
+ $text =~ s{^\.\\\"}{ }gxms; # man comments
+
+ # po comment may include html tag
+ $text =~ s/\"\s?\v\#~\s?\"//gxms;
+
+ # strip .rtf paragraph marks (#892967)
+ $text =~ s/\\par\b//gxms;
+
+ $text =~ s/\\url[{][^}]*?[}]/ /gxms; # (la)?tex url
+ $text =~ s/\\emph[{]/ /gxms; # (la)?tex emph
+ $text =~ s<\\href[{][^}]*?[}]
+ [{]([^}]*?)[}]>< $1 >gxms;# (la)?tex href
+ $text =~ s<\\hyperlink
+ [{][^}]*?[}]
+ [{]([^}]*?)[}]>< $1 >gxms; # (la)?tex hyperlink
+ $text =~ s{-\\/}{-}gxms; # tex strange hyphen
+ $text =~ s/\\char/ /gxms; # tex char command
+
+ # Texinfo comment with end section
+ $text =~ s{\@c(?:omment)?\h+
+ end \h+ ifman\s+}{ }gxms;
+ $text =~ s{\@c(?:omment)?\s+
+ noman\s+}{ }gxms; # Texinfo comment no manual
+
+ $text =~ s/\@c(?:omment)?\s+/ /gxms; # Texinfo comment
+
+ # Texinfo bold,italic, roman, fixed width
+ $text =~ s/\@[birt][{]/ /gxms;
+ $text =~ s/\@sansserif[{]/ /gxms; # Texinfo sans serif
+ $text =~ s/\@slanted[{]/ /gxms; # Texinfo slanted
+ $text =~ s/\@var[{]/ /gxms; # Texinfo emphasis
+
+ $text =~ s/\@(?:small)?example\s+/ /gxms; # Texinfo example
+ $text =~ s{\@end \h+
+ (?:small)example\s+}{ }gxms; # Texinfo end example tag
+ $text =~ s/\@group\s+/ /gxms; # Texinfo group
+ $text =~ s/\@end\h+group\s+/ /gxms; # Texinfo end group
+
+ $text =~ s/<!--/ /gxms; # XML comments
+ $text =~ s/-->/ /gxms; # end XML comment
+
+ $text =~ s{</?a[^>]*?>}{ }gxms; # a link
+ $text =~ s{<br\s*/?>}{ }gxms; # (X)?HTML line
+ # breaks
+ $text =~ s{</?citetitle[^>]*?>}{ }gxms; # DocBook citation title
+ $text =~ s{</?div[^>]*?>}{ }gxms; # html style
+ $text =~ s{</?font[^>]*?>}{ }gxms; # bold
+ $text =~ s{</?b[^>]*?>}{ }gxms; # italic
+ $text =~ s{</?i[^>]*?>}{ }gxms; # italic
+ $text =~ s{</?link[^>]*?>}{ }gxms; # xml link
+ $text =~ s{</?p[^>]*?>}{ }gxms; # html paragraph
+ $text =~ s{</?quote[^>]*?>}{ }gxms; # xml quote
+ $text =~ s{</?span[^>]*?>}{ }gxms; # span tag
+ $text =~ s{</?ulink[^>]*?>}{ }gxms; # ulink DocBook
+ $text =~ s{</?var[^>]*?>}{ }gxms; # var used by texinfo2html
+
+ $text =~ s{\&[lr]dquo;}{ }gxms; # html rquote
+
+ $text =~ s{\(\*note.*?::\)}{ }gxms; # info file note
+
+ # String array (e.g. "line1",\n"line2")
+ $text =~ s/\"\s*,/ /gxms;
+ # String array (e.g. "line1"\n ,"line2"),
+ $text =~ s/,\s*\"/ /gxms;
+ $text =~ s/\\n/ /gxms; # Verbatim \n in string array
+
+ $text =~ s/\\&/ /gxms; # pod2man formatting
+ $text =~ s/\\s(?:0|-1)/ /gxms; # pod2man formatting
+
+ $text =~ s/(?:``|'')/ /gxms; # quote like
+
+ # diff/patch lines (should be after html tag)
+ $text =~ s/^[-\+!<>]/ /gxms;
+ $text =~ s{\@\@ \s*
+ [-+] \d+,\d+ \s+
+ [-+] \d+,\d+ \s*
+ \@\@}{ }gxms; # patch line
+
+ # Texinfo end tag (could be more clever but brute force is fast)
+ $text =~ s/}/ /gxms;
+ # Tex section titles
+ $text =~ s/^\s*\\(sub)*section\*?\{\s*\S+/ /gxms;
+ # single char at end
+ # String, C-style comment/javadoc indent,
+ # quotes for strings, pipe and backslash, tilde in some txt
+ $text =~ s/[%\*\"\|\\\#~]/ /gxms;
+ # delete double spacing now and normalize spacing
+ # to space character
+ $text =~ s{\s++}{ }gsm;
+
+ # trim both ends
+ $text =~ s/^\s+|\s+$//g;
+
+ return $text;
+}
+
+# do not use space around punctuation
+sub _strip_punct() {
+ my ($text) = @_;
+ # replace final punctuation
+ $text =~ s{(?:
+ \s*[,\.;]\s*\Z | # final punctuation
+ \A\s*[,\.;]\s* # punctuation at the beginning
+ )}{ }gxms;
+
+ # delete double spacing now and normalize spacing
+ # to space character
+ $text =~ s{\s++}{ }gsm;
+
+ # trim both ends
+ $text =~ s/^\s+|\s+$//g;
+
+ return $text;
+}
+
+sub check_for_single_bad_license {
+ my ($self, $item, $lowercase, $clean, $tag_name, $license_data) = @_;
+
+ # do fast keyword search
+ # could make more sense as 'return 1 unless all' but does not work
+ return 0
+ if none { $lowercase =~ / \Q$_\E /msx } @{$license_data->{keywords}};
+
+ return 0
+ if none { $clean =~ / \Q$_\E /msx }@{$license_data->{sentences}};
+
+ my $regex = $license_data->{regex};
+ return 0
+ if defined $regex && $clean !~ $regex;
+
+ my $callsub = $license_data->{callsub};
+ if (!defined $callsub) {
+
+ $self->pointed_hint($tag_name, $item->pointer);
+ return 1;
+ }
+
+ return $self->$callsub($item, $tag_name, %+);
+}
+
+1;
+
+# Local Variables:
+# indent-tabs-mode: nil
+# cperl-indent-level: 4
+# End:
+# vim: syntax=perl sw=4 sts=4 sr et