475 lines
15 KiB
Perl
475 lines
15 KiB
Perl
package Devscripts::Uscan::Utils;
|
|
|
|
use strict;
|
|
use Devscripts::Uscan::Output;
|
|
use Devscripts::Utils;
|
|
use Exporter 'import';
|
|
|
|
our @EXPORT = (
|
|
qw(fix_href recursive_regex_dir newest_dir get_compression
|
|
get_suffix get_priority quoted_regex_parse safe_replace mangle
|
|
uscan_exec uscan_exec_no_fail)
|
|
);
|
|
|
|
#######################################################################
|
|
# {{{ code 5: utility functions (download)
|
|
#######################################################################
|
|
sub fix_href ($) {
|
|
my ($href) = @_;
|
|
|
|
# Remove newline (code moved from outside fix_href)
|
|
$href =~ s/\n//g;
|
|
|
|
# Remove whitespace from URLs:
|
|
# https://www.w3.org/TR/html5/links.html#links-created-by-a-and-area-elements
|
|
$href =~ s/^\s+//;
|
|
$href =~ s/\s+$//;
|
|
|
|
return $href;
|
|
}
|
|
|
|
sub recursive_regex_dir ($$$$$$) {
|
|
|
|
# If return '', parent code to cause return 1
|
|
my ($line, $base, $dirversionmangle, $watchfile, $lineptr,
|
|
$download_version)
|
|
= @_;
|
|
|
|
$base =~ m%^(\w+://[^/]+)/(.*)$%;
|
|
my $site = $1;
|
|
my @dirs = ();
|
|
if (defined $2) {
|
|
@dirs = split /(\/)/, $2;
|
|
}
|
|
my $dir = '/';
|
|
|
|
foreach my $dirpattern (@dirs) {
|
|
if ($dirpattern =~ /\(.*\)/) {
|
|
uscan_verbose "dir=>$dir dirpattern=>$dirpattern";
|
|
my $newest_dir = newest_dir($line, $site, $dir, $dirpattern,
|
|
$dirversionmangle, $watchfile, $lineptr, $download_version);
|
|
uscan_verbose "newest_dir => '$newest_dir'";
|
|
if ($newest_dir ne '') {
|
|
$dir .= "$newest_dir";
|
|
} else {
|
|
uscan_debug "No \$newest_dir";
|
|
return '';
|
|
}
|
|
} else {
|
|
$dir .= "$dirpattern";
|
|
}
|
|
}
|
|
return $site . $dir;
|
|
}
|
|
|
|
# very similar to code above
|
|
sub newest_dir ($$$$$$$$) {
|
|
|
|
# return string $newdir as success
|
|
# return string '' if error, to cause grand parent code to return 1
|
|
my ($line, $site, $dir, $pattern, $dirversionmangle, $watchfile,
|
|
$lineptr, $download_version)
|
|
= @_;
|
|
my ($newdir);
|
|
uscan_verbose "Requesting URL:\n $site$dir";
|
|
if ($site =~ m%^http(s)?://%) {
|
|
require Devscripts::Uscan::http;
|
|
$newdir = Devscripts::Uscan::http::http_newdir($1, @_);
|
|
} elsif ($site =~ m%^ftp://%) {
|
|
require Devscripts::Uscan::ftp;
|
|
$newdir = Devscripts::Uscan::ftp::ftp_newdir(@_);
|
|
} else {
|
|
# Neither HTTP nor FTP site
|
|
uscan_warn "neither HTTP nor FTP site, impossible case for newdir().";
|
|
$newdir = '';
|
|
}
|
|
return $newdir;
|
|
}
|
|
#######################################################################
|
|
# }}} code 5: utility functions (download)
|
|
#######################################################################
|
|
|
|
#######################################################################
|
|
# {{{ code 6: utility functions (compression)
|
|
#######################################################################
|
|
# Get legal values for compression
|
|
sub get_compression ($) {
|
|
my $compression = $_[0];
|
|
my $canonical_compression;
|
|
|
|
# be liberal in what you accept...
|
|
my %opt2comp = (
|
|
gz => 'gzip',
|
|
gzip => 'gzip',
|
|
bz2 => 'bzip2',
|
|
bzip2 => 'bzip2',
|
|
lzma => 'lzma',
|
|
xz => 'xz',
|
|
zip => 'zip',
|
|
zst => 'zst',
|
|
zstd => 'zst',
|
|
);
|
|
|
|
# Normalize compression methods to the names used by Dpkg::Compression
|
|
if (exists $opt2comp{$compression}) {
|
|
$canonical_compression = $opt2comp{$compression};
|
|
} else {
|
|
uscan_die "$progname: invalid compression, $compression given.";
|
|
}
|
|
return $canonical_compression;
|
|
}
|
|
|
|
# Get legal values for compression suffix
|
|
sub get_suffix ($) {
|
|
my $compression = $_[0];
|
|
my $canonical_suffix;
|
|
|
|
# be liberal in what you accept...
|
|
my %opt2suffix = (
|
|
gz => 'gz',
|
|
gzip => 'gz',
|
|
bz2 => 'bz2',
|
|
bzip2 => 'bz2',
|
|
lzma => 'lzma',
|
|
xz => 'xz',
|
|
zip => 'zip',
|
|
zst => 'zst',
|
|
zstd => 'zst',
|
|
);
|
|
|
|
# Normalize compression methods to the names used by Dpkg::Compression
|
|
if (exists $opt2suffix{$compression}) {
|
|
$canonical_suffix = $opt2suffix{$compression};
|
|
} elsif ($compression eq 'default') {
|
|
require Devscripts::MkOrigtargz::Config;
|
|
return &Devscripts::MkOrigtargz::Config::default_compression;
|
|
} else {
|
|
uscan_die "$progname: invalid suffix, $compression given.";
|
|
}
|
|
return $canonical_suffix;
|
|
}
|
|
|
|
# Get compression priority
|
|
sub get_priority ($) {
|
|
my $href = $_[0];
|
|
my $priority = 0;
|
|
if ($href =~ m/\.tar\.gz/i) {
|
|
$priority = 1;
|
|
}
|
|
if ($href =~ m/\.tar\.bz2/i) {
|
|
$priority = 2;
|
|
}
|
|
if ($href =~ m/\.tar\.lzma/i) {
|
|
$priority = 3;
|
|
}
|
|
#if ($href =~ m/\.tar\.zstd?/i) {
|
|
# $priority = 4;
|
|
#}
|
|
if ($href =~ m/\.tar\.xz/i) {
|
|
$priority = 4;
|
|
}
|
|
return $priority;
|
|
}
|
|
#######################################################################
|
|
# }}} code 6: utility functions (compression)
|
|
#######################################################################
|
|
|
|
#######################################################################
|
|
# {{{ code 7: utility functions (regex)
|
|
#######################################################################
|
|
sub quoted_regex_parse($) {
|
|
my $pattern = shift;
|
|
my %closers = ('{', '}', '[', ']', '(', ')', '<', '>');
|
|
|
|
$pattern =~ /^(s|tr|y)(.)(.*)$/;
|
|
my ($sep, $rest) = ($2, $3 || '');
|
|
my $closer = $closers{$sep};
|
|
|
|
my $parsed_ok = 1;
|
|
my $regexp = '';
|
|
my $replacement = '';
|
|
my $flags = '';
|
|
my $open = 1;
|
|
my $last_was_escape = 0;
|
|
my $in_replacement = 0;
|
|
|
|
for my $char (split //, $rest) {
|
|
if ($char eq $sep and !$last_was_escape) {
|
|
$open++;
|
|
if ($open == 1) {
|
|
if ($in_replacement) {
|
|
|
|
# Separator after end of replacement
|
|
uscan_warn "Extra \"$sep\" after end of replacement.";
|
|
$parsed_ok = 0;
|
|
last;
|
|
} else {
|
|
$in_replacement = 1;
|
|
}
|
|
} else {
|
|
if ($open > 1) {
|
|
if ($in_replacement) {
|
|
$replacement .= $char;
|
|
} else {
|
|
$regexp .= $char;
|
|
}
|
|
}
|
|
}
|
|
} elsif ($char eq $closer and !$last_was_escape) {
|
|
$open--;
|
|
if ($open > 0) {
|
|
if ($in_replacement) {
|
|
$replacement .= $char;
|
|
} else {
|
|
$regexp .= $char;
|
|
}
|
|
} elsif ($open < 0) {
|
|
uscan_warn "Extra \"$closer\" after end of replacement.";
|
|
$parsed_ok = 0;
|
|
last;
|
|
}
|
|
} else {
|
|
if ($in_replacement) {
|
|
if ($open) {
|
|
$replacement .= $char;
|
|
} else {
|
|
$flags .= $char;
|
|
}
|
|
} else {
|
|
if ($open) {
|
|
$regexp .= $char;
|
|
} elsif ($char !~ m/\s/) {
|
|
uscan_warn
|
|
"Non-whitespace between <...> and <...> (or similars).";
|
|
$parsed_ok = 0;
|
|
last;
|
|
}
|
|
|
|
# skip if blanks between <...> and <...> (or similars)
|
|
}
|
|
}
|
|
|
|
# Don't treat \\ as an escape
|
|
$last_was_escape = ($char eq '\\' and !$last_was_escape);
|
|
}
|
|
|
|
unless ($in_replacement and $open == 0) {
|
|
uscan_warn "Empty replacement string.";
|
|
$parsed_ok = 0;
|
|
}
|
|
|
|
return ($parsed_ok, $regexp, $replacement, $flags);
|
|
}
|
|
|
|
sub safe_replace($$) {
|
|
my ($in, $pat) = @_;
|
|
eval "uscan_debug \"safe_replace input=\\\"\$\$in\\\"\\n\"";
|
|
$pat =~ s/^\s*(.*?)\s*$/$1/;
|
|
|
|
$pat =~ /^(s|tr|y)(.)/;
|
|
my ($op, $sep) = ($1, $2 || '');
|
|
my $esc = "\Q$sep\E";
|
|
my ($parsed_ok, $regexp, $replacement, $flags);
|
|
|
|
if ($sep eq '{' or $sep eq '(' or $sep eq '[' or $sep eq '<') {
|
|
($parsed_ok, $regexp, $replacement, $flags) = quoted_regex_parse($pat);
|
|
|
|
unless ($parsed_ok) {
|
|
uscan_warn "stop mangling: rule=\"$pat\"\n"
|
|
. " mangling rule with <...>, (...), {...} failed.";
|
|
return 0;
|
|
}
|
|
} elsif ($pat
|
|
!~ /^(?:s|tr|y)$esc((?:\\.|[^\\$esc])*)$esc((?:\\.|[^\\$esc])*)$esc([a-z]*)$/
|
|
) {
|
|
$sep = "/" if $sep eq '';
|
|
uscan_warn "stop mangling: rule=\"$pat\"\n"
|
|
. " rule doesn't match \"(s|tr|y)$sep.*$sep.*$sep\[a-z\]*\" (or similar).";
|
|
return 0;
|
|
} else {
|
|
($regexp, $replacement, $flags) = ($1, $2, $3);
|
|
}
|
|
|
|
uscan_debug
|
|
"safe_replace with regexp=\"$regexp\", replacement=\"$replacement\", and flags=\"$flags\"";
|
|
my $safeflags = $flags;
|
|
if ($op eq 'tr' or $op eq 'y') {
|
|
$safeflags =~ tr/cds//cd;
|
|
if ($safeflags ne $flags) {
|
|
uscan_warn "stop mangling: rule=\"$pat\"\n"
|
|
. " flags must consist of \"cds\" only.";
|
|
return 0;
|
|
}
|
|
|
|
$regexp =~ s/\\(.)/$1/g;
|
|
$replacement =~ s/\\(.)/$1/g;
|
|
|
|
$regexp =~ s/([^-])/'\\x' . unpack 'H*', $1/ge;
|
|
$replacement =~ s/([^-])/'\\x' . unpack 'H*', $1/ge;
|
|
|
|
eval "\$\$in =~ tr<$regexp><$replacement>$flags;";
|
|
|
|
if ($@) {
|
|
uscan_warn "stop mangling: rule=\"$pat\"\n"
|
|
. " mangling \"tr\" or \"y\" rule execution failed.";
|
|
return 0;
|
|
} else {
|
|
return 1;
|
|
}
|
|
} else {
|
|
$safeflags =~ tr/gix//cd;
|
|
if ($safeflags ne $flags) {
|
|
uscan_warn "stop mangling: rule=\"$pat\"\n"
|
|
. " flags must consist of \"gix\" only.";
|
|
return 0;
|
|
}
|
|
|
|
my $global = ($flags =~ s/g//);
|
|
$flags = "(?$flags)" if length $flags;
|
|
|
|
my $slashg;
|
|
if ($regexp =~ /(?<!\\)(\\\\)*\\G/) {
|
|
$slashg = 1;
|
|
|
|
# if it's not initial, it is too dangerous
|
|
if ($regexp =~ /^.*[^\\](\\\\)*\\G/) {
|
|
uscan_warn "stop mangling: rule=\"$pat\"\n"
|
|
. " dangerous use of \\G with regexp=\"$regexp\".";
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
# Behave like Perl and treat e.g. "\." in replacement as "."
|
|
# We allow the case escape characters to remain and
|
|
# process them later
|
|
$replacement =~ s/(^|[^\\])\\([^luLUE])/$1$2/g;
|
|
|
|
# Unescape escaped separator characters
|
|
$replacement =~ s/\\\Q$sep\E/$sep/g;
|
|
|
|
# If bracketing quotes were used, also unescape the
|
|
# closing version
|
|
### {{ ### (FOOL EDITOR for non-quoted kets)
|
|
$replacement =~ s/\\\Q}\E/}/g if $sep eq '{';
|
|
$replacement =~ s/\\\Q]\E/]/g if $sep eq '[';
|
|
$replacement =~ s/\\\Q)\E/)/g if $sep eq '(';
|
|
$replacement =~ s/\\\Q>\E/>/g if $sep eq '<';
|
|
|
|
# The replacement below will modify $replacement so keep
|
|
# a copy. We'll need to restore it to the current value if
|
|
# the global flag was set on the input pattern.
|
|
my $orig_replacement = $replacement;
|
|
|
|
my ($first, $last, $pos, $zerowidth, $matched, @captures) = (0, -1, 0);
|
|
while (1) {
|
|
eval {
|
|
# handle errors due to unsafe constructs in $regexp
|
|
no re 'eval';
|
|
|
|
# restore position
|
|
pos($$in) = $pos if $pos;
|
|
|
|
if ($zerowidth) {
|
|
|
|
# previous match was a zero-width match, simulate it to set
|
|
# the internal flag that avoids the infinite loop
|
|
$$in =~ /()/g;
|
|
}
|
|
|
|
# Need to use /g to make it use and save pos()
|
|
$matched = ($$in =~ /$flags$regexp/g);
|
|
|
|
if ($matched) {
|
|
|
|
# save position and size of the match
|
|
my $oldpos = $pos;
|
|
$pos = pos($$in);
|
|
($first, $last) = ($-[0], $+[0]);
|
|
|
|
if ($slashg) {
|
|
|
|
# \G in the match, weird things can happen
|
|
$zerowidth = ($pos == $oldpos);
|
|
|
|
# For example, matching without a match
|
|
$matched = 0
|
|
if ( not defined $first
|
|
or not defined $last);
|
|
} else {
|
|
$zerowidth = ($last - $first == 0);
|
|
}
|
|
for my $i (0 .. $#-) {
|
|
$captures[$i] = substr $$in, $-[$i], $+[$i] - $-[$i];
|
|
}
|
|
}
|
|
};
|
|
if ($@) {
|
|
uscan_warn "stop mangling: rule=\"$pat\"\n"
|
|
. " mangling \"s\" rule execution failed.";
|
|
return 0;
|
|
}
|
|
|
|
# No match; leave the original string untouched but return
|
|
# success as there was nothing wrong with the pattern
|
|
return 1 unless $matched;
|
|
|
|
# Replace $X
|
|
$replacement
|
|
=~ s/[\$\\](\d)/defined $captures[$1] ? $captures[$1] : ''/ge;
|
|
$replacement
|
|
=~ s/\$\{(\d)\}/defined $captures[$1] ? $captures[$1] : ''/ge;
|
|
$replacement =~ s/\$&/$captures[0]/g;
|
|
|
|
# Make \l etc escapes work
|
|
$replacement =~ s/\\l(.)/lc $1/e;
|
|
$replacement =~ s/\\L(.*?)(\\E|\z)/lc $1/e;
|
|
$replacement =~ s/\\u(.)/uc $1/e;
|
|
$replacement =~ s/\\U(.*?)(\\E|\z)/uc $1/e;
|
|
|
|
# Actually do the replacement
|
|
substr $$in, $first, $last - $first, $replacement;
|
|
|
|
# Update position
|
|
$pos += length($replacement) - ($last - $first);
|
|
|
|
if ($global) {
|
|
$replacement = $orig_replacement;
|
|
} else {
|
|
last;
|
|
}
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
# call this as
|
|
# if mangle($watchfile, \$line, 'uversionmangle:',
|
|
# \@{$options{'uversionmangle'}}, \$version) {
|
|
# return 1;
|
|
# }
|
|
sub mangle($$$$$) {
|
|
my ($watchfile, $lineptr, $name, $rulesptr, $verptr) = @_;
|
|
foreach my $pat (@{$rulesptr}) {
|
|
if (!safe_replace($verptr, $pat)) {
|
|
uscan_warn "In $watchfile, potentially"
|
|
. " unsafe or malformed $name"
|
|
. " pattern:\n '$pat'"
|
|
. " found. Skipping watchline\n"
|
|
. " $$lineptr";
|
|
return 1;
|
|
}
|
|
uscan_debug "After $name $$verptr";
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
*uscan_exec_no_fail = \&ds_exec_no_fail;
|
|
|
|
*uscan_exec = \&ds_exec;
|
|
|
|
#######################################################################
|
|
# }}} code 7: utility functions (regex)
|
|
#######################################################################
|
|
|
|
1;
|