summaryrefslogtreecommitdiffstats
path: root/lib/Lintian/SlidingWindow.pm
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Lintian/SlidingWindow.pm')
-rw-r--r--lib/Lintian/SlidingWindow.pm171
1 files changed, 171 insertions, 0 deletions
diff --git a/lib/Lintian/SlidingWindow.pm b/lib/Lintian/SlidingWindow.pm
new file mode 100644
index 0000000..2274d78
--- /dev/null
+++ b/lib/Lintian/SlidingWindow.pm
@@ -0,0 +1,171 @@
+# -*- perl -*-
+
+# Copyright (C) 2013 Bastien ROUCARIES
+# Copyright (C) 2021 Felix Lechner
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program. If not, see <http://www.gnu.org/licenses/>.
+
+package Lintian::SlidingWindow;
+
+use v5.20;
+use warnings;
+use utf8;
+
+use Const::Fast;
+use Unicode::UTF8 qw(encode_utf8);
+
+use Moo;
+use namespace::clean;
+
+const my $DEFAULT_BLOCK_SIZE => 4096;
+const my $EMPTY => q{};
+
+has handle => (is => 'rw');
+has blocksize => (is => 'rw', default => $DEFAULT_BLOCK_SIZE);
+has blocknumber => (is => 'rw', default => -1);
+has blocksub => (is => 'rw', default => undef);
+has _queue => (is => 'rw', default => sub {[q{}, q{}]});
+
+sub readwindow {
+ my ($self) = @_;
+ my $window;
+
+ my $first = $self->blocknumber < 0;
+ {
+ # This path is too hot for autodie at its current performance
+ # (at the time of writing, that would be autodie/2.23).
+ # - Benchmark chromium-browser/32.0.1700.123-2/source
+ no autodie qw(read);
+ my $blocksize = $self->blocksize;
+ # Read twice the amount in the first window and split that
+ # into "two parts". That way we avoid half a block followed
+ # by a full block with the first half being identical to the
+ # previous one.
+ $blocksize *= 2 if $first;
+ my $res = read($self->handle, $window, $blocksize);
+ if (not $res) {
+ die encode_utf8("read failed: $!\n") unless defined($res);
+ return;
+ }
+ }
+
+ if(defined($self->blocksub)) {
+ local $_ = $window;
+ $self->blocksub->();
+ $window = $_;
+ }
+
+ $self->blocknumber($self->blocknumber + 1);
+
+ if ($first && $self->blocksize < length($window)) {
+ # Split the first block into two windows. We assume here that
+ # if the two halves are not of equal length, then it is
+ # because the file is shorter than 2*blocksize. In this case,
+ # make the second half the shorter (it shouldn't matter and it
+ # is easier to do this way).
+ my $blocksize = $self->blocksize;
+ $self->_queue->[0] = substr($window, 0, $blocksize);
+ $self->_queue->[1] = substr($window, $blocksize);
+ return $window;
+ }
+ shift(@{$self->_queue});
+ push(@{$self->_queue}, $window);
+ return join($EMPTY, @{$self->_queue});
+}
+
+=head1 NAME
+
+Lintian::SlidingWindow - Lintian interface to sliding window match
+
+=head1 SYNOPSIS
+
+ use Lintian::SlidingWindow;
+
+ my $sfd = Lintian::SlidingWindow->new('<','someevilfile.c', sub { $_ = lc($_); });
+ my $window;
+ while ($window = $sfd->readwindow) {
+ if (index($window, 'evil') > -1) {
+ if($window =~
+ m/software \s++ shall \s++
+ be \s++ used \s++ for \s++ good \s*+ ,?+ \s*+
+ not \s++ evil/xsim) {
+ # do something like : tag 'license-problem-json-evil';
+ }
+ }
+ }
+
+=head1 DESCRIPTION
+
+Lintian::SlidingWindow provides a way of matching some pattern,
+including multi line pattern, without needing to fully load the
+file in memory.
+
+=head1 CLASS METHODS
+
+=over 4
+
+=item new(HANDLE[, BLOCKSUB[, BLOCKSIZE]])
+
+Create a new sliding window by reading from a given HANDLE, which must
+be open for reading. Optionally run BLOCKSUB against each block. Note
+that BLOCKSUB should apply transform byte by byte and does not depend
+of context.
+
+Each window consists of up to two blocks of BLOCKSIZE characters.
+
+=back
+
+=head1 INSTANCE METHODS
+
+=over 4
+
+=item readwindow
+
+Return a new block of sliding window. Return undef at end of file.
+
+=item C<blocksize>
+
+=item blocknumber
+
+=item handle
+
+=item blocksub
+
+=back
+
+=head1 DIAGNOSTICS
+
+=over 4
+
+=item no data type specified
+
+=back
+
+=head1 AUTHOR
+
+Originally written by Bastien ROUCARIES for Lintian.
+
+=head1 SEE ALSO
+
+lintian(1)
+
+=cut
+
+1;
+
+# Local Variables:
+# indent-tabs-mode: nil
+# cperl-indent-level: 4
+# End:
+# vim: syntax=perl sw=4 sts=4 sr et