diff options
Diffstat (limited to 'lib/Lintian/SlidingWindow.pm')
-rw-r--r-- | lib/Lintian/SlidingWindow.pm | 171 |
1 files changed, 171 insertions, 0 deletions
diff --git a/lib/Lintian/SlidingWindow.pm b/lib/Lintian/SlidingWindow.pm new file mode 100644 index 0000000..2274d78 --- /dev/null +++ b/lib/Lintian/SlidingWindow.pm @@ -0,0 +1,171 @@ +# -*- perl -*- + +# Copyright (C) 2013 Bastien ROUCARIES +# Copyright (C) 2021 Felix Lechner +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see <http://www.gnu.org/licenses/>. + +package Lintian::SlidingWindow; + +use v5.20; +use warnings; +use utf8; + +use Const::Fast; +use Unicode::UTF8 qw(encode_utf8); + +use Moo; +use namespace::clean; + +const my $DEFAULT_BLOCK_SIZE => 4096; +const my $EMPTY => q{}; + +has handle => (is => 'rw'); +has blocksize => (is => 'rw', default => $DEFAULT_BLOCK_SIZE); +has blocknumber => (is => 'rw', default => -1); +has blocksub => (is => 'rw', default => undef); +has _queue => (is => 'rw', default => sub {[q{}, q{}]}); + +sub readwindow { + my ($self) = @_; + my $window; + + my $first = $self->blocknumber < 0; + { + # This path is too hot for autodie at its current performance + # (at the time of writing, that would be autodie/2.23). + # - Benchmark chromium-browser/32.0.1700.123-2/source + no autodie qw(read); + my $blocksize = $self->blocksize; + # Read twice the amount in the first window and split that + # into "two parts". That way we avoid half a block followed + # by a full block with the first half being identical to the + # previous one. + $blocksize *= 2 if $first; + my $res = read($self->handle, $window, $blocksize); + if (not $res) { + die encode_utf8("read failed: $!\n") unless defined($res); + return; + } + } + + if(defined($self->blocksub)) { + local $_ = $window; + $self->blocksub->(); + $window = $_; + } + + $self->blocknumber($self->blocknumber + 1); + + if ($first && $self->blocksize < length($window)) { + # Split the first block into two windows. We assume here that + # if the two halves are not of equal length, then it is + # because the file is shorter than 2*blocksize. In this case, + # make the second half the shorter (it shouldn't matter and it + # is easier to do this way). + my $blocksize = $self->blocksize; + $self->_queue->[0] = substr($window, 0, $blocksize); + $self->_queue->[1] = substr($window, $blocksize); + return $window; + } + shift(@{$self->_queue}); + push(@{$self->_queue}, $window); + return join($EMPTY, @{$self->_queue}); +} + +=head1 NAME + +Lintian::SlidingWindow - Lintian interface to sliding window match + +=head1 SYNOPSIS + + use Lintian::SlidingWindow; + + my $sfd = Lintian::SlidingWindow->new('<','someevilfile.c', sub { $_ = lc($_); }); + my $window; + while ($window = $sfd->readwindow) { + if (index($window, 'evil') > -1) { + if($window =~ + m/software \s++ shall \s++ + be \s++ used \s++ for \s++ good \s*+ ,?+ \s*+ + not \s++ evil/xsim) { + # do something like : tag 'license-problem-json-evil'; + } + } + } + +=head1 DESCRIPTION + +Lintian::SlidingWindow provides a way of matching some pattern, +including multi line pattern, without needing to fully load the +file in memory. + +=head1 CLASS METHODS + +=over 4 + +=item new(HANDLE[, BLOCKSUB[, BLOCKSIZE]]) + +Create a new sliding window by reading from a given HANDLE, which must +be open for reading. Optionally run BLOCKSUB against each block. Note +that BLOCKSUB should apply transform byte by byte and does not depend +of context. + +Each window consists of up to two blocks of BLOCKSIZE characters. + +=back + +=head1 INSTANCE METHODS + +=over 4 + +=item readwindow + +Return a new block of sliding window. Return undef at end of file. + +=item C<blocksize> + +=item blocknumber + +=item handle + +=item blocksub + +=back + +=head1 DIAGNOSTICS + +=over 4 + +=item no data type specified + +=back + +=head1 AUTHOR + +Originally written by Bastien ROUCARIES for Lintian. + +=head1 SEE ALSO + +lintian(1) + +=cut + +1; + +# Local Variables: +# indent-tabs-mode: nil +# cperl-indent-level: 4 +# End: +# vim: syntax=perl sw=4 sts=4 sr et |