1
0
Fork 0
devscripts/lib/Devscripts/Uscan/Utils.pm
Daniel Baumann b543f2e88d
Adding upstream version 2.25.15.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
2025-06-21 11:04:07 +02:00

475 lines
15 KiB
Perl

package Devscripts::Uscan::Utils;
use strict;
use Devscripts::Uscan::Output;
use Devscripts::Utils;
use Exporter 'import';
our @EXPORT = (
qw(fix_href recursive_regex_dir newest_dir get_compression
get_suffix get_priority quoted_regex_parse safe_replace mangle
uscan_exec uscan_exec_no_fail)
);
#######################################################################
# {{{ code 5: utility functions (download)
#######################################################################
sub fix_href ($) {
my ($href) = @_;
# Remove newline (code moved from outside fix_href)
$href =~ s/\n//g;
# Remove whitespace from URLs:
# https://www.w3.org/TR/html5/links.html#links-created-by-a-and-area-elements
$href =~ s/^\s+//;
$href =~ s/\s+$//;
return $href;
}
sub recursive_regex_dir ($$$$$$) {
# If return '', parent code to cause return 1
my ($line, $base, $dirversionmangle, $watchfile, $lineptr,
$download_version)
= @_;
$base =~ m%^(\w+://[^/]+)/(.*)$%;
my $site = $1;
my @dirs = ();
if (defined $2) {
@dirs = split /(\/)/, $2;
}
my $dir = '/';
foreach my $dirpattern (@dirs) {
if ($dirpattern =~ /\(.*\)/) {
uscan_verbose "dir=>$dir dirpattern=>$dirpattern";
my $newest_dir = newest_dir($line, $site, $dir, $dirpattern,
$dirversionmangle, $watchfile, $lineptr, $download_version);
uscan_verbose "newest_dir => '$newest_dir'";
if ($newest_dir ne '') {
$dir .= "$newest_dir";
} else {
uscan_debug "No \$newest_dir";
return '';
}
} else {
$dir .= "$dirpattern";
}
}
return $site . $dir;
}
# very similar to code above
sub newest_dir ($$$$$$$$) {
# return string $newdir as success
# return string '' if error, to cause grand parent code to return 1
my ($line, $site, $dir, $pattern, $dirversionmangle, $watchfile,
$lineptr, $download_version)
= @_;
my ($newdir);
uscan_verbose "Requesting URL:\n $site$dir";
if ($site =~ m%^http(s)?://%) {
require Devscripts::Uscan::http;
$newdir = Devscripts::Uscan::http::http_newdir($1, @_);
} elsif ($site =~ m%^ftp://%) {
require Devscripts::Uscan::ftp;
$newdir = Devscripts::Uscan::ftp::ftp_newdir(@_);
} else {
# Neither HTTP nor FTP site
uscan_warn "neither HTTP nor FTP site, impossible case for newdir().";
$newdir = '';
}
return $newdir;
}
#######################################################################
# }}} code 5: utility functions (download)
#######################################################################
#######################################################################
# {{{ code 6: utility functions (compression)
#######################################################################
# Get legal values for compression
sub get_compression ($) {
my $compression = $_[0];
my $canonical_compression;
# be liberal in what you accept...
my %opt2comp = (
gz => 'gzip',
gzip => 'gzip',
bz2 => 'bzip2',
bzip2 => 'bzip2',
lzma => 'lzma',
xz => 'xz',
zip => 'zip',
zst => 'zst',
zstd => 'zst',
);
# Normalize compression methods to the names used by Dpkg::Compression
if (exists $opt2comp{$compression}) {
$canonical_compression = $opt2comp{$compression};
} else {
uscan_die "$progname: invalid compression, $compression given.";
}
return $canonical_compression;
}
# Get legal values for compression suffix
sub get_suffix ($) {
my $compression = $_[0];
my $canonical_suffix;
# be liberal in what you accept...
my %opt2suffix = (
gz => 'gz',
gzip => 'gz',
bz2 => 'bz2',
bzip2 => 'bz2',
lzma => 'lzma',
xz => 'xz',
zip => 'zip',
zst => 'zst',
zstd => 'zst',
);
# Normalize compression methods to the names used by Dpkg::Compression
if (exists $opt2suffix{$compression}) {
$canonical_suffix = $opt2suffix{$compression};
} elsif ($compression eq 'default') {
require Devscripts::MkOrigtargz::Config;
return &Devscripts::MkOrigtargz::Config::default_compression;
} else {
uscan_die "$progname: invalid suffix, $compression given.";
}
return $canonical_suffix;
}
# Get compression priority
sub get_priority ($) {
my $href = $_[0];
my $priority = 0;
if ($href =~ m/\.tar\.gz/i) {
$priority = 1;
}
if ($href =~ m/\.tar\.bz2/i) {
$priority = 2;
}
if ($href =~ m/\.tar\.lzma/i) {
$priority = 3;
}
#if ($href =~ m/\.tar\.zstd?/i) {
# $priority = 4;
#}
if ($href =~ m/\.tar\.xz/i) {
$priority = 4;
}
return $priority;
}
#######################################################################
# }}} code 6: utility functions (compression)
#######################################################################
#######################################################################
# {{{ code 7: utility functions (regex)
#######################################################################
sub quoted_regex_parse($) {
my $pattern = shift;
my %closers = ('{', '}', '[', ']', '(', ')', '<', '>');
$pattern =~ /^(s|tr|y)(.)(.*)$/;
my ($sep, $rest) = ($2, $3 || '');
my $closer = $closers{$sep};
my $parsed_ok = 1;
my $regexp = '';
my $replacement = '';
my $flags = '';
my $open = 1;
my $last_was_escape = 0;
my $in_replacement = 0;
for my $char (split //, $rest) {
if ($char eq $sep and !$last_was_escape) {
$open++;
if ($open == 1) {
if ($in_replacement) {
# Separator after end of replacement
uscan_warn "Extra \"$sep\" after end of replacement.";
$parsed_ok = 0;
last;
} else {
$in_replacement = 1;
}
} else {
if ($open > 1) {
if ($in_replacement) {
$replacement .= $char;
} else {
$regexp .= $char;
}
}
}
} elsif ($char eq $closer and !$last_was_escape) {
$open--;
if ($open > 0) {
if ($in_replacement) {
$replacement .= $char;
} else {
$regexp .= $char;
}
} elsif ($open < 0) {
uscan_warn "Extra \"$closer\" after end of replacement.";
$parsed_ok = 0;
last;
}
} else {
if ($in_replacement) {
if ($open) {
$replacement .= $char;
} else {
$flags .= $char;
}
} else {
if ($open) {
$regexp .= $char;
} elsif ($char !~ m/\s/) {
uscan_warn
"Non-whitespace between <...> and <...> (or similars).";
$parsed_ok = 0;
last;
}
# skip if blanks between <...> and <...> (or similars)
}
}
# Don't treat \\ as an escape
$last_was_escape = ($char eq '\\' and !$last_was_escape);
}
unless ($in_replacement and $open == 0) {
uscan_warn "Empty replacement string.";
$parsed_ok = 0;
}
return ($parsed_ok, $regexp, $replacement, $flags);
}
sub safe_replace($$) {
my ($in, $pat) = @_;
eval "uscan_debug \"safe_replace input=\\\"\$\$in\\\"\\n\"";
$pat =~ s/^\s*(.*?)\s*$/$1/;
$pat =~ /^(s|tr|y)(.)/;
my ($op, $sep) = ($1, $2 || '');
my $esc = "\Q$sep\E";
my ($parsed_ok, $regexp, $replacement, $flags);
if ($sep eq '{' or $sep eq '(' or $sep eq '[' or $sep eq '<') {
($parsed_ok, $regexp, $replacement, $flags) = quoted_regex_parse($pat);
unless ($parsed_ok) {
uscan_warn "stop mangling: rule=\"$pat\"\n"
. " mangling rule with <...>, (...), {...} failed.";
return 0;
}
} elsif ($pat
!~ /^(?:s|tr|y)$esc((?:\\.|[^\\$esc])*)$esc((?:\\.|[^\\$esc])*)$esc([a-z]*)$/
) {
$sep = "/" if $sep eq '';
uscan_warn "stop mangling: rule=\"$pat\"\n"
. " rule doesn't match \"(s|tr|y)$sep.*$sep.*$sep\[a-z\]*\" (or similar).";
return 0;
} else {
($regexp, $replacement, $flags) = ($1, $2, $3);
}
uscan_debug
"safe_replace with regexp=\"$regexp\", replacement=\"$replacement\", and flags=\"$flags\"";
my $safeflags = $flags;
if ($op eq 'tr' or $op eq 'y') {
$safeflags =~ tr/cds//cd;
if ($safeflags ne $flags) {
uscan_warn "stop mangling: rule=\"$pat\"\n"
. " flags must consist of \"cds\" only.";
return 0;
}
$regexp =~ s/\\(.)/$1/g;
$replacement =~ s/\\(.)/$1/g;
$regexp =~ s/([^-])/'\\x' . unpack 'H*', $1/ge;
$replacement =~ s/([^-])/'\\x' . unpack 'H*', $1/ge;
eval "\$\$in =~ tr<$regexp><$replacement>$flags;";
if ($@) {
uscan_warn "stop mangling: rule=\"$pat\"\n"
. " mangling \"tr\" or \"y\" rule execution failed.";
return 0;
} else {
return 1;
}
} else {
$safeflags =~ tr/gix//cd;
if ($safeflags ne $flags) {
uscan_warn "stop mangling: rule=\"$pat\"\n"
. " flags must consist of \"gix\" only.";
return 0;
}
my $global = ($flags =~ s/g//);
$flags = "(?$flags)" if length $flags;
my $slashg;
if ($regexp =~ /(?<!\\)(\\\\)*\\G/) {
$slashg = 1;
# if it's not initial, it is too dangerous
if ($regexp =~ /^.*[^\\](\\\\)*\\G/) {
uscan_warn "stop mangling: rule=\"$pat\"\n"
. " dangerous use of \\G with regexp=\"$regexp\".";
return 0;
}
}
# Behave like Perl and treat e.g. "\." in replacement as "."
# We allow the case escape characters to remain and
# process them later
$replacement =~ s/(^|[^\\])\\([^luLUE])/$1$2/g;
# Unescape escaped separator characters
$replacement =~ s/\\\Q$sep\E/$sep/g;
# If bracketing quotes were used, also unescape the
# closing version
### {{ ### (FOOL EDITOR for non-quoted kets)
$replacement =~ s/\\\Q}\E/}/g if $sep eq '{';
$replacement =~ s/\\\Q]\E/]/g if $sep eq '[';
$replacement =~ s/\\\Q)\E/)/g if $sep eq '(';
$replacement =~ s/\\\Q>\E/>/g if $sep eq '<';
# The replacement below will modify $replacement so keep
# a copy. We'll need to restore it to the current value if
# the global flag was set on the input pattern.
my $orig_replacement = $replacement;
my ($first, $last, $pos, $zerowidth, $matched, @captures) = (0, -1, 0);
while (1) {
eval {
# handle errors due to unsafe constructs in $regexp
no re 'eval';
# restore position
pos($$in) = $pos if $pos;
if ($zerowidth) {
# previous match was a zero-width match, simulate it to set
# the internal flag that avoids the infinite loop
$$in =~ /()/g;
}
# Need to use /g to make it use and save pos()
$matched = ($$in =~ /$flags$regexp/g);
if ($matched) {
# save position and size of the match
my $oldpos = $pos;
$pos = pos($$in);
($first, $last) = ($-[0], $+[0]);
if ($slashg) {
# \G in the match, weird things can happen
$zerowidth = ($pos == $oldpos);
# For example, matching without a match
$matched = 0
if ( not defined $first
or not defined $last);
} else {
$zerowidth = ($last - $first == 0);
}
for my $i (0 .. $#-) {
$captures[$i] = substr $$in, $-[$i], $+[$i] - $-[$i];
}
}
};
if ($@) {
uscan_warn "stop mangling: rule=\"$pat\"\n"
. " mangling \"s\" rule execution failed.";
return 0;
}
# No match; leave the original string untouched but return
# success as there was nothing wrong with the pattern
return 1 unless $matched;
# Replace $X
$replacement
=~ s/[\$\\](\d)/defined $captures[$1] ? $captures[$1] : ''/ge;
$replacement
=~ s/\$\{(\d)\}/defined $captures[$1] ? $captures[$1] : ''/ge;
$replacement =~ s/\$&/$captures[0]/g;
# Make \l etc escapes work
$replacement =~ s/\\l(.)/lc $1/e;
$replacement =~ s/\\L(.*?)(\\E|\z)/lc $1/e;
$replacement =~ s/\\u(.)/uc $1/e;
$replacement =~ s/\\U(.*?)(\\E|\z)/uc $1/e;
# Actually do the replacement
substr $$in, $first, $last - $first, $replacement;
# Update position
$pos += length($replacement) - ($last - $first);
if ($global) {
$replacement = $orig_replacement;
} else {
last;
}
}
return 1;
}
}
# call this as
# if mangle($watchfile, \$line, 'uversionmangle:',
# \@{$options{'uversionmangle'}}, \$version) {
# return 1;
# }
sub mangle($$$$$) {
my ($watchfile, $lineptr, $name, $rulesptr, $verptr) = @_;
foreach my $pat (@{$rulesptr}) {
if (!safe_replace($verptr, $pat)) {
uscan_warn "In $watchfile, potentially"
. " unsafe or malformed $name"
. " pattern:\n '$pat'"
. " found. Skipping watchline\n"
. " $$lineptr";
return 1;
}
uscan_debug "After $name $$verptr";
}
return 0;
}
*uscan_exec_no_fail = \&ds_exec_no_fail;
*uscan_exec = \&ds_exec;
#######################################################################
# }}} code 7: utility functions (regex)
#######################################################################
1;