diff options
Diffstat (limited to 'lib/Devscripts/Uscan/Utils.pm')
-rw-r--r-- | lib/Devscripts/Uscan/Utils.pm | 468 |
1 files changed, 468 insertions, 0 deletions
diff --git a/lib/Devscripts/Uscan/Utils.pm b/lib/Devscripts/Uscan/Utils.pm new file mode 100644 index 0000000..4a6dd8a --- /dev/null +++ b/lib/Devscripts/Uscan/Utils.pm @@ -0,0 +1,468 @@ +package Devscripts::Uscan::Utils; + +use strict; +use Devscripts::Uscan::Output; +use Devscripts::Utils; +use Exporter 'import'; + +our @EXPORT = ( + qw(fix_href recursive_regex_dir newest_dir get_compression + get_suffix get_priority quoted_regex_parse safe_replace mangle + uscan_exec uscan_exec_no_fail) +); + +####################################################################### +# {{{ code 5: utility functions (download) +####################################################################### +sub fix_href ($) { + my ($href) = @_; + + # Remove newline (code moved from outside fix_href) + $href =~ s/\n//g; + + # Remove whitespace from URLs: + # https://www.w3.org/TR/html5/links.html#links-created-by-a-and-area-elements + $href =~ s/^\s+//; + $href =~ s/\s+$//; + + return $href; +} + +sub recursive_regex_dir ($$$$$$) { + + # If return '', parent code to cause return 1 + my ($line, $base, $dirversionmangle, $watchfile, $lineptr, + $download_version) + = @_; + + $base =~ m%^(\w+://[^/]+)/(.*)$%; + my $site = $1; + my @dirs = (); + if (defined $2) { + @dirs = split /(\/)/, $2; + } + my $dir = '/'; + + foreach my $dirpattern (@dirs) { + if ($dirpattern =~ /\(.*\)/) { + uscan_verbose "dir=>$dir dirpattern=>$dirpattern"; + my $newest_dir = newest_dir($line, $site, $dir, $dirpattern, + $dirversionmangle, $watchfile, $lineptr, $download_version); + uscan_verbose "newest_dir => '$newest_dir'"; + if ($newest_dir ne '') { + $dir .= "$newest_dir"; + } else { + uscan_debug "No \$newest_dir"; + return ''; + } + } else { + $dir .= "$dirpattern"; + } + } + return $site . $dir; +} + +# very similar to code above +sub newest_dir ($$$$$$$$) { + + # return string $newdir as success + # return string '' if error, to cause grand parent code to return 1 + my ($line, $site, $dir, $pattern, $dirversionmangle, $watchfile, + $lineptr, $download_version) + = @_; + my ($newdir); + uscan_verbose "Requesting URL:\n $site$dir"; + if ($site =~ m%^http(s)?://%) { + require Devscripts::Uscan::http; + $newdir = Devscripts::Uscan::http::http_newdir($1, @_); + } elsif ($site =~ m%^ftp://%) { + require Devscripts::Uscan::ftp; + $newdir = Devscripts::Uscan::ftp::ftp_newdir(@_); + } else { + # Neither HTTP nor FTP site + uscan_warn "neither HTTP nor FTP site, impossible case for newdir()."; + $newdir = ''; + } + return $newdir; +} +####################################################################### +# }}} code 5: utility functions (download) +####################################################################### + +####################################################################### +# {{{ code 6: utility functions (compression) +####################################################################### +# Get legal values for compression +sub get_compression ($) { + my $compression = $_[0]; + my $canonical_compression; + + # be liberal in what you accept... + my %opt2comp = ( + gz => 'gzip', + gzip => 'gzip', + bz2 => 'bzip2', + bzip2 => 'bzip2', + lzma => 'lzma', + xz => 'xz', + zip => 'zip', + ); + + # Normalize compression methods to the names used by Dpkg::Compression + if (exists $opt2comp{$compression}) { + $canonical_compression = $opt2comp{$compression}; + } else { + uscan_die "$progname: invalid compression, $compression given."; + } + return $canonical_compression; +} + +# Get legal values for compression suffix +sub get_suffix ($) { + my $compression = $_[0]; + my $canonical_suffix; + + # be liberal in what you accept... + my %opt2suffix = ( + gz => 'gz', + gzip => 'gz', + bz2 => 'bz2', + bzip2 => 'bz2', + lzma => 'lzma', + xz => 'xz', + zip => 'zip', + ); + + # Normalize compression methods to the names used by Dpkg::Compression + if (exists $opt2suffix{$compression}) { + $canonical_suffix = $opt2suffix{$compression}; + } elsif ($compression eq 'default') { + require Devscripts::MkOrigtargz::Config; + return &Devscripts::MkOrigtargz::Config::default_compression; + } else { + uscan_die "$progname: invalid suffix, $compression given."; + } + return $canonical_suffix; +} + +# Get compression priority +sub get_priority ($) { + my $href = $_[0]; + my $priority = 0; + if ($href =~ m/\.tar\.gz/i) { + $priority = 1; + } + if ($href =~ m/\.tar\.bz2/i) { + $priority = 2; + } + if ($href =~ m/\.tar\.lzma/i) { + $priority = 3; + } + if ($href =~ m/\.tar\.xz/i) { + $priority = 4; + } + return $priority; +} +####################################################################### +# }}} code 6: utility functions (compression) +####################################################################### + +####################################################################### +# {{{ code 7: utility functions (regex) +####################################################################### +sub quoted_regex_parse($) { + my $pattern = shift; + my %closers = ('{', '}', '[', ']', '(', ')', '<', '>'); + + $pattern =~ /^(s|tr|y)(.)(.*)$/; + my ($sep, $rest) = ($2, $3 || ''); + my $closer = $closers{$sep}; + + my $parsed_ok = 1; + my $regexp = ''; + my $replacement = ''; + my $flags = ''; + my $open = 1; + my $last_was_escape = 0; + my $in_replacement = 0; + + for my $char (split //, $rest) { + if ($char eq $sep and !$last_was_escape) { + $open++; + if ($open == 1) { + if ($in_replacement) { + + # Separator after end of replacement + uscan_warn "Extra \"$sep\" after end of replacement."; + $parsed_ok = 0; + last; + } else { + $in_replacement = 1; + } + } else { + if ($open > 1) { + if ($in_replacement) { + $replacement .= $char; + } else { + $regexp .= $char; + } + } + } + } elsif ($char eq $closer and !$last_was_escape) { + $open--; + if ($open > 0) { + if ($in_replacement) { + $replacement .= $char; + } else { + $regexp .= $char; + } + } elsif ($open < 0) { + uscan_warn "Extra \"$closer\" after end of replacement."; + $parsed_ok = 0; + last; + } + } else { + if ($in_replacement) { + if ($open) { + $replacement .= $char; + } else { + $flags .= $char; + } + } else { + if ($open) { + $regexp .= $char; + } elsif ($char !~ m/\s/) { + uscan_warn + "Non-whitespace between <...> and <...> (or similars)."; + $parsed_ok = 0; + last; + } + + # skip if blanks between <...> and <...> (or similars) + } + } + + # Don't treat \\ as an escape + $last_was_escape = ($char eq '\\' and !$last_was_escape); + } + + unless ($in_replacement and $open == 0) { + uscan_warn "Empty replacement string."; + $parsed_ok = 0; + } + + return ($parsed_ok, $regexp, $replacement, $flags); +} + +sub safe_replace($$) { + my ($in, $pat) = @_; + eval "uscan_debug \"safe_replace input=\\\"\$\$in\\\"\\n\""; + $pat =~ s/^\s*(.*?)\s*$/$1/; + + $pat =~ /^(s|tr|y)(.)/; + my ($op, $sep) = ($1, $2 || ''); + my $esc = "\Q$sep\E"; + my ($parsed_ok, $regexp, $replacement, $flags); + + if ($sep eq '{' or $sep eq '(' or $sep eq '[' or $sep eq '<') { + ($parsed_ok, $regexp, $replacement, $flags) = quoted_regex_parse($pat); + + unless ($parsed_ok) { + uscan_warn "stop mangling: rule=\"$pat\"\n" + . " mangling rule with <...>, (...), {...} failed."; + return 0; + } + } elsif ($pat + !~ /^(?:s|tr|y)$esc((?:\\.|[^\\$esc])*)$esc((?:\\.|[^\\$esc])*)$esc([a-z]*)$/ + ) { + $sep = "/" if $sep eq ''; + uscan_warn "stop mangling: rule=\"$pat\"\n" + . " rule doesn't match \"(s|tr|y)$sep.*$sep.*$sep\[a-z\]*\" (or similar)."; + return 0; + } else { + ($regexp, $replacement, $flags) = ($1, $2, $3); + } + + uscan_debug +"safe_replace with regexp=\"$regexp\", replacement=\"$replacement\", and flags=\"$flags\""; + my $safeflags = $flags; + if ($op eq 'tr' or $op eq 'y') { + $safeflags =~ tr/cds//cd; + if ($safeflags ne $flags) { + uscan_warn "stop mangling: rule=\"$pat\"\n" + . " flags must consist of \"cds\" only."; + return 0; + } + + $regexp =~ s/\\(.)/$1/g; + $replacement =~ s/\\(.)/$1/g; + + $regexp =~ s/([^-])/'\\x' . unpack 'H*', $1/ge; + $replacement =~ s/([^-])/'\\x' . unpack 'H*', $1/ge; + + eval "\$\$in =~ tr<$regexp><$replacement>$flags;"; + + if ($@) { + uscan_warn "stop mangling: rule=\"$pat\"\n" + . " mangling \"tr\" or \"y\" rule execution failed."; + return 0; + } else { + return 1; + } + } else { + $safeflags =~ tr/gix//cd; + if ($safeflags ne $flags) { + uscan_warn "stop mangling: rule=\"$pat\"\n" + . " flags must consist of \"gix\" only."; + return 0; + } + + my $global = ($flags =~ s/g//); + $flags = "(?$flags)" if length $flags; + + my $slashg; + if ($regexp =~ /(?<!\\)(\\\\)*\\G/) { + $slashg = 1; + + # if it's not initial, it is too dangerous + if ($regexp =~ /^.*[^\\](\\\\)*\\G/) { + uscan_warn "stop mangling: rule=\"$pat\"\n" + . " dangerous use of \\G with regexp=\"$regexp\"."; + return 0; + } + } + + # Behave like Perl and treat e.g. "\." in replacement as "." + # We allow the case escape characters to remain and + # process them later + $replacement =~ s/(^|[^\\])\\([^luLUE])/$1$2/g; + + # Unescape escaped separator characters + $replacement =~ s/\\\Q$sep\E/$sep/g; + + # If bracketing quotes were used, also unescape the + # closing version + ### {{ ### (FOOL EDITOR for non-quoted kets) + $replacement =~ s/\\\Q}\E/}/g if $sep eq '{'; + $replacement =~ s/\\\Q]\E/]/g if $sep eq '['; + $replacement =~ s/\\\Q)\E/)/g if $sep eq '('; + $replacement =~ s/\\\Q>\E/>/g if $sep eq '<'; + + # The replacement below will modify $replacement so keep + # a copy. We'll need to restore it to the current value if + # the global flag was set on the input pattern. + my $orig_replacement = $replacement; + + my ($first, $last, $pos, $zerowidth, $matched, @captures) = (0, -1, 0); + while (1) { + eval { + # handle errors due to unsafe constructs in $regexp + no re 'eval'; + + # restore position + pos($$in) = $pos if $pos; + + if ($zerowidth) { + + # previous match was a zero-width match, simulate it to set + # the internal flag that avoids the infinite loop + $$in =~ /()/g; + } + + # Need to use /g to make it use and save pos() + $matched = ($$in =~ /$flags$regexp/g); + + if ($matched) { + + # save position and size of the match + my $oldpos = $pos; + $pos = pos($$in); + ($first, $last) = ($-[0], $+[0]); + + if ($slashg) { + + # \G in the match, weird things can happen + $zerowidth = ($pos == $oldpos); + + # For example, matching without a match + $matched = 0 + if ( not defined $first + or not defined $last); + } else { + $zerowidth = ($last - $first == 0); + } + for my $i (0 .. $#-) { + $captures[$i] = substr $$in, $-[$i], $+[$i] - $-[$i]; + } + } + }; + if ($@) { + uscan_warn "stop mangling: rule=\"$pat\"\n" + . " mangling \"s\" rule execution failed."; + return 0; + } + + # No match; leave the original string untouched but return + # success as there was nothing wrong with the pattern + return 1 unless $matched; + + # Replace $X + $replacement + =~ s/[\$\\](\d)/defined $captures[$1] ? $captures[$1] : ''/ge; + $replacement + =~ s/\$\{(\d)\}/defined $captures[$1] ? $captures[$1] : ''/ge; + $replacement =~ s/\$&/$captures[0]/g; + + # Make \l etc escapes work + $replacement =~ s/\\l(.)/lc $1/e; + $replacement =~ s/\\L(.*?)(\\E|\z)/lc $1/e; + $replacement =~ s/\\u(.)/uc $1/e; + $replacement =~ s/\\U(.*?)(\\E|\z)/uc $1/e; + + # Actually do the replacement + substr $$in, $first, $last - $first, $replacement; + + # Update position + $pos += length($replacement) - ($last - $first); + + if ($global) { + $replacement = $orig_replacement; + } else { + last; + } + } + + return 1; + } +} + +# call this as +# if mangle($watchfile, \$line, 'uversionmangle:', +# \@{$options{'uversionmangle'}}, \$version) { +# return 1; +# } +sub mangle($$$$$) { + my ($watchfile, $lineptr, $name, $rulesptr, $verptr) = @_; + foreach my $pat (@{$rulesptr}) { + if (!safe_replace($verptr, $pat)) { + uscan_warn "In $watchfile, potentially" + . " unsafe or malformed $name" + . " pattern:\n '$pat'" + . " found. Skipping watchline\n" + . " $$lineptr"; + return 1; + } + uscan_debug "After $name $$verptr"; + } + return 0; +} + +*uscan_exec_no_fail = \&ds_exec_no_fail; + +*uscan_exec = \&ds_exec; + +####################################################################### +# }}} code 7: utility functions (regex) +####################################################################### + +1; |