# files/privacy-breach -- lintian check script -*- perl -*- # Copyright (C) 1998 Christian Schwarz and Richard Braakman # Copyright (C) 2021 Felix Lechner # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, you can find it on the World Wide # Web at https://www.gnu.org/copyleft/gpl.html, or write to the Free # Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, # MA 02110-1301, USA. package Lintian::Check::Files::PrivacyBreach; use v5.20; use warnings; use utf8; use Const::Fast; use Unicode::UTF8 qw(encode_utf8); use Lintian::SlidingWindow; const my $BLOCKSIZE => 16_384; const my $EMPTY => q{}; const my $PRIVACY_BREAKER_WEBSITES_FIELDS => 3; use Moo; use namespace::clean; with 'Lintian::Check'; has PRIVACY_BREAKER_WEBSITES => ( is => 'rw', lazy => 1, default => sub { my ($self) = @_; my %website; my $data = $self->data->load('files/privacy-breaker-websites',qr/\s*\~\~/); for my $key ($data->all) { my $value = $data->value($key); my ($pattern, $tag, $suggest) = split(/ \s* ~~ \s* /msx, $value,$PRIVACY_BREAKER_WEBSITES_FIELDS); $tag //= $EMPTY; # trim both ends $tag =~ s/^\s+|\s+$//g; $tag = $key unless length $tag; $website{$key} = { 'tag' => $tag, 'regexp' => qr/$pattern/xsm, }; $website{$key}{'suggest'} = $suggest if defined $suggest; } return \%website; } ); has PRIVACY_BREAKER_FRAGMENTS => ( is => 'rw', lazy => 1, default => sub { my ($self) = @_; my %fragment; my $data = $self->data->load('files/privacy-breaker-fragments',qr/\s*\~\~/); for my $key ($data->all) { my $value = $data->value($key); my ($pattern, $tag) = split(/\s*\~\~\s*/, $value, 2); $fragment{$key} = { 'keyword' => $key, 'regex' => qr/$pattern/xsm, 'tag' => $tag, }; } return \%fragment; } ); has PRIVACY_BREAKER_TAG_ATTR => ( is => 'rw', lazy => 1, default => sub { my ($self) = @_; my %attribute; my $data = $self->data->load('files/privacy-breaker-tag-attr',qr/\s*\~\~\s*/); for my $key ($data->all) { my $value = $data->value($key); my ($keywords,$pattern) = split(/\s*\~\~\s*/, $value, 2); $pattern =~ s/&URL/(?:(?:ht|f)tps?:)?\/\/[^"\r\n]*/g; my @keywordlist; my @keywordsorraw = split(/\s*\|\|\s*/,$keywords); for my $keywordor (@keywordsorraw) { my @keywordsandraw = split(/\s*&&\s*/,$keywordor); push(@keywordlist, \@keywordsandraw); } $attribute{$key} = { 'keywords' => \@keywordlist, 'regex' => qr/$pattern/xsm, }; } return \%attribute; } ); sub detect_privacy_breach { my ($self, $file) = @_; my %privacybreachhash; return unless $file->is_regular_file; open(my $fd, '<:raw', $file->unpacked_path) or die encode_utf8('Cannot open ' . $file->unpacked_path); my $sfd = Lintian::SlidingWindow->new; $sfd->handle($fd); $sfd->blocksize($BLOCKSIZE); $sfd->blocksub(sub { $_ = lc; }); while (my $lowercase = $sfd->readwindow) { # strip comments for my $x (qw(