1 files changed, 468 insertions, 0 deletions
diff --git a/lib/Devscripts/Uscan/Utils.pm b/lib/Devscripts/Uscan/Utils.pm
new file mode 100644
index 0000000..e65c776
--- /dev/null
+++ b/lib/Devscripts/Uscan/Utils.pm
@@ -0,0 +1,468 @@
+package Devscripts::Uscan::Utils;
+
+use strict;
+use Devscripts::Uscan::Output;
+use Devscripts::Utils;
+use Exporter 'import';
+
+our @EXPORT = (
+    qw(fix_href recursive_regex_dir newest_dir get_compression
+      get_suffix get_priority quoted_regex_parse safe_replace mangle
+      uscan_exec uscan_exec_no_fail)
+);
+
+#######################################################################
+# {{{ code 5: utility functions (download)
+#######################################################################
+sub fix_href ($) {
+    my ($href) = @_;
+
+    # Remove newline (code moved from outside fix_href)
+    $href =~ s/\n//g;
+
+  # Remove whitespace from URLs:
+  # https://www.w3.org/TR/html5/links.html#links-created-by-a-and-area-elements
+    $href =~ s/^\s+//;
+    $href =~ s/\s+$//;
+
+    return $href;
+}
+
+sub recursive_regex_dir ($$$$$$) {
+
+    # If return '', parent code to cause return 1
+    my ($downloader, $base, $dirversionmangle, $watchfile, $lineptr,
+        $download_version)
+      = @_;
+
+    $base =~ m%^(\w+://[^/]+)/(.*)$%;
+    my $site = $1;
+    my @dirs = ();
+    if (defined $2) {
+        @dirs = split /(\/)/, $2;
+    }
+    my $dir = '/';
+
+    foreach my $dirpattern (@dirs) {
+        if ($dirpattern =~ /\(.*\)/) {
+            uscan_verbose "dir=>$dir  dirpattern=>$dirpattern";
+            my $newest_dir = newest_dir($downloader, $site, $dir, $dirpattern,
+                $dirversionmangle, $watchfile, $lineptr, $download_version);
+            uscan_verbose "newest_dir => '$newest_dir'";
+            if ($newest_dir ne '') {
+                $dir .= "$newest_dir";
+            } else {
+                uscan_debug "No \$newest_dir";
+                return '';
+            }
+        } else {
+            $dir .= "$dirpattern";
+        }
+    }
+    return $site . $dir;
+}
+
+# very similar to code above
+sub newest_dir ($$$$$$$$) {
+
+    # return string $newdir as success
+    # return string '' if error, to cause grand parent code to return 1
+    my ($downloader, $site, $dir, $pattern, $dirversionmangle, $watchfile,
+        $lineptr, $download_version)
+      = @_;
+    my ($newdir);
+    uscan_verbose "Requesting URL:\n   $site$dir";
+    if ($site =~ m%^http(s)?://%) {
+        require Devscripts::Uscan::http;
+        $newdir = Devscripts::Uscan::http::http_newdir($1, @_);
+    } elsif ($site =~ m%^ftp://%) {
+        require Devscripts::Uscan::ftp;
+        $newdir = Devscripts::Uscan::ftp::ftp_newdir(@_);
+    } else {
+        # Neither HTTP nor FTP site
+        uscan_warn "neither HTTP nor FTP site, impossible case for newdir().";
+        $newdir = '';
+    }
+    return $newdir;
+}
+#######################################################################
+# }}} code 5: utility functions (download)
+#######################################################################
+
+#######################################################################
+# {{{ code 6: utility functions (compression)
+#######################################################################
+# Get legal values for compression
+sub get_compression ($) {
+    my $compression = $_[0];
+    my $canonical_compression;
+
+    # be liberal in what you accept...
+    my %opt2comp = (
+        gz    => 'gzip',
+        gzip  => 'gzip',
+        bz2   => 'bzip2',
+        bzip2 => 'bzip2',
+        lzma  => 'lzma',
+        xz    => 'xz',
+        zip   => 'zip',
+    );
+
+    # Normalize compression methods to the names used by Dpkg::Compression
+    if (exists $opt2comp{$compression}) {
+        $canonical_compression = $opt2comp{$compression};
+    } else {
+        uscan_die "$progname: invalid compression, $compression given.";
+    }
+    return $canonical_compression;
+}
+
+# Get legal values for compression suffix
+sub get_suffix ($) {
+    my $compression = $_[0];
+    my $canonical_suffix;
+
+    # be liberal in what you accept...
+    my %opt2suffix = (
+        gz    => 'gz',
+        gzip  => 'gz',
+        bz2   => 'bz2',
+        bzip2 => 'bz2',
+        lzma  => 'lzma',
+        xz    => 'xz',
+        zip   => 'zip',
+    );
+
+    # Normalize compression methods to the names used by Dpkg::Compression
+    if (exists $opt2suffix{$compression}) {
+        $canonical_suffix = $opt2suffix{$compression};
+    } elsif ($compression eq 'default') {
+        require Devscripts::MkOrigtargz::Config;
+        return &Devscripts::MkOrigtargz::Config::default_compression;
+    } else {
+        uscan_die "$progname: invalid suffix, $compression given.";
+    }
+    return $canonical_suffix;
+}
+
+# Get compression priority
+sub get_priority ($) {
+    my $href     = $_[0];
+    my $priority = 0;
+    if ($href =~ m/\.tar\.gz/i) {
+        $priority = 1;
+    }
+    if ($href =~ m/\.tar\.bz2/i) {
+        $priority = 2;
+    }
+    if ($href =~ m/\.tar\.lzma/i) {
+        $priority = 3;
+    }
+    if ($href =~ m/\.tar\.xz/i) {
+        $priority = 4;
+    }
+    return $priority;
+}
+#######################################################################
+# }}} code 6: utility functions (compression)
+#######################################################################
+
+#######################################################################
+# {{{ code 7: utility functions (regex)
+#######################################################################
+sub quoted_regex_parse($) {
+    my $pattern = shift;
+    my %closers = ('{', '}', '[', ']', '(', ')', '<', '>');
+
+    $pattern =~ /^(s|tr|y)(.)(.*)$/;
+    my ($sep, $rest) = ($2, $3 || '');
+    my $closer = $closers{$sep};
+
+    my $parsed_ok       = 1;
+    my $regexp          = '';
+    my $replacement     = '';
+    my $flags           = '';
+    my $open            = 1;
+    my $last_was_escape = 0;
+    my $in_replacement  = 0;
+
+    for my $char (split //, $rest) {
+        if ($char eq $sep and !$last_was_escape) {
+            $open++;
+            if ($open == 1) {
+                if ($in_replacement) {
+
+                    # Separator after end of replacement
+                    uscan_warn "Extra \"$sep\" after end of replacement.";
+                    $parsed_ok = 0;
+                    last;
+                } else {
+                    $in_replacement = 1;
+                }
+            } else {
+                if ($open > 1) {
+                    if ($in_replacement) {
+                        $replacement .= $char;
+                    } else {
+                        $regexp .= $char;
+                    }
+                }
+            }
+        } elsif ($char eq $closer and !$last_was_escape) {
+            $open--;
+            if ($open > 0) {
+                if ($in_replacement) {
+                    $replacement .= $char;
+                } else {
+                    $regexp .= $char;
+                }
+            } elsif ($open < 0) {
+                uscan_warn "Extra \"$closer\" after end of replacement.";
+                $parsed_ok = 0;
+                last;
+            }
+        } else {
+            if ($in_replacement) {
+                if ($open) {
+                    $replacement .= $char;
+                } else {
+                    $flags .= $char;
+                }
+            } else {
+                if ($open) {
+                    $regexp .= $char;
+                } elsif ($char !~ m/\s/) {
+                    uscan_warn
+                      "Non-whitespace between <...> and <...> (or similars).";
+                    $parsed_ok = 0;
+                    last;
+                }
+
+                # skip if blanks between <...> and <...> (or similars)
+            }
+        }
+
+        # Don't treat \\ as an escape
+        $last_was_escape = ($char eq '\\' and !$last_was_escape);
+    }
+
+    unless ($in_replacement and $open == 0) {
+        uscan_warn "Empty replacement string.";
+        $parsed_ok = 0;
+    }
+
+    return ($parsed_ok, $regexp, $replacement, $flags);
+}
+
+sub safe_replace($$) {
+    my ($in, $pat) = @_;
+    eval "uscan_debug \"safe_replace input=\\\"\$\$in\\\"\\n\"";
+    $pat =~ s/^\s*(.*?)\s*$/$1/;
+
+    $pat =~ /^(s|tr|y)(.)/;
+    my ($op, $sep) = ($1, $2 || '');
+    my $esc = "\Q$sep\E";
+    my ($parsed_ok, $regexp, $replacement, $flags);
+
+    if ($sep eq '{' or $sep eq '(' or $sep eq '[' or $sep eq '<') {
+        ($parsed_ok, $regexp, $replacement, $flags) = quoted_regex_parse($pat);
+
+        unless ($parsed_ok) {
+            uscan_warn "stop mangling: rule=\"$pat\"\n"
+              . "  mangling rule with <...>, (...), {...} failed.";
+            return 0;
+        }
+    } elsif ($pat
+        !~ /^(?:s|tr|y)$esc((?:\\.|[^\\$esc])*)$esc((?:\\.|[^\\$esc])*)$esc([a-z]*)$/
+    ) {
+        $sep = "/" if $sep eq '';
+        uscan_warn "stop mangling: rule=\"$pat\"\n"
+          . "   rule doesn't match \"(s|tr|y)$sep.*$sep.*$sep\[a-z\]*\" (or similar).";
+        return 0;
+    } else {
+        ($regexp, $replacement, $flags) = ($1, $2, $3);
+    }
+
+    uscan_debug
+"safe_replace with regexp=\"$regexp\", replacement=\"$replacement\", and flags=\"$flags\"";
+    my $safeflags = $flags;
+    if ($op eq 'tr' or $op eq 'y') {
+        $safeflags =~ tr/cds//cd;
+        if ($safeflags ne $flags) {
+            uscan_warn "stop mangling: rule=\"$pat\"\n"
+              . "   flags must consist of \"cds\" only.";
+            return 0;
+        }
+
+        $regexp =~ s/\\(.)/$1/g;
+        $replacement =~ s/\\(.)/$1/g;
+
+        $regexp =~ s/([^-])/'\\x'  . unpack 'H*', $1/ge;
+        $replacement =~ s/([^-])/'\\x'  . unpack 'H*', $1/ge;
+
+        eval "\$\$in =~ tr<$regexp><$replacement>$flags;";
+
+        if ($@) {
+            uscan_warn "stop mangling: rule=\"$pat\"\n"
+              . "   mangling \"tr\" or \"y\" rule execution failed.";
+            return 0;
+        } else {
+            return 1;
+        }
+    } else {
+        $safeflags =~ tr/gix//cd;
+        if ($safeflags ne $flags) {
+            uscan_warn "stop mangling: rule=\"$pat\"\n"
+              . "   flags must consist of \"gix\" only.";
+            return 0;
+        }
+
+        my $global = ($flags =~ s/g//);
+        $flags = "(?$flags)" if length $flags;
+
+        my $slashg;
+        if ($regexp =~ /(?<!\\)(\\\\)*\\G/) {
+            $slashg = 1;
+
+            # if it's not initial, it is too dangerous
+            if ($regexp =~ /^.*[^\\](\\\\)*\\G/) {
+                uscan_warn "stop mangling: rule=\"$pat\"\n"
+                  . "   dangerous use of \\G with regexp=\"$regexp\".";
+                return 0;
+            }
+        }
+
+        # Behave like Perl and treat e.g. "\." in replacement as "."
+        # We allow the case escape characters to remain and
+        # process them later
+        $replacement =~ s/(^|[^\\])\\([^luLUE])/$1$2/g;
+
+        # Unescape escaped separator characters
+        $replacement =~ s/\\\Q$sep\E/$sep/g;
+
+        # If bracketing quotes were used, also unescape the
+        # closing version
+        ### {{ ### (FOOL EDITOR for non-quoted kets)
+        $replacement =~ s/\\\Q}\E/}/g if $sep eq '{';
+        $replacement =~ s/\\\Q]\E/]/g if $sep eq '[';
+        $replacement =~ s/\\\Q)\E/)/g if $sep eq '(';
+        $replacement =~ s/\\\Q>\E/>/g if $sep eq '<';
+
+        # The replacement below will modify $replacement so keep
+        # a copy. We'll need to restore it to the current value if
+        # the global flag was set on the input pattern.
+        my $orig_replacement = $replacement;
+
+        my ($first, $last, $pos, $zerowidth, $matched, @captures) = (0, -1, 0);
+        while (1) {
+            eval {
+                # handle errors due to unsafe constructs in $regexp
+                no re 'eval';
+
+                # restore position
+                pos($$in) = $pos if $pos;
+
+                if ($zerowidth) {
+
+                    # previous match was a zero-width match, simulate it to set
+                    # the internal flag that avoids the infinite loop
+                    $$in =~ /()/g;
+                }
+
+                # Need to use /g to make it use and save pos()
+                $matched = ($$in =~ /$flags$regexp/g);
+
+                if ($matched) {
+
+                    # save position and size of the match
+                    my $oldpos = $pos;
+                    $pos = pos($$in);
+                    ($first, $last) = ($-[0], $+[0]);
+
+                    if ($slashg) {
+
+                        # \G in the match, weird things can happen
+                        $zerowidth = ($pos == $oldpos);
+
+                        # For example, matching without a match
+                        $matched = 0
+                          if ( not defined $first
+                            or not defined $last);
+                    } else {
+                        $zerowidth = ($last - $first == 0);
+                    }
+                    for my $i (0 .. $#-) {
+                        $captures[$i] = substr $$in, $-[$i], $+[$i] - $-[$i];
+                    }
+                }
+            };
+            if ($@) {
+                uscan_warn "stop mangling: rule=\"$pat\"\n"
+                  . "   mangling \"s\" rule execution failed.";
+                return 0;
+            }
+
+            # No match; leave the original string  untouched but return
+            # success as there was nothing wrong with the pattern
+            return 1 unless $matched;
+
+            # Replace $X
+            $replacement
+              =~ s/[\$\\](\d)/defined $captures[$1] ? $captures[$1] : ''/ge;
+            $replacement
+              =~ s/\$\{(\d)\}/defined $captures[$1] ? $captures[$1] : ''/ge;
+            $replacement =~ s/\$&/$captures[0]/g;
+
+            # Make \l etc escapes work
+            $replacement =~ s/\\l(.)/lc $1/e;
+            $replacement =~ s/\\L(.*?)(\\E|\z)/lc $1/e;
+            $replacement =~ s/\\u(.)/uc $1/e;
+            $replacement =~ s/\\U(.*?)(\\E|\z)/uc $1/e;
+
+            # Actually do the replacement
+            substr $$in, $first, $last - $first, $replacement;
+
+            # Update position
+            $pos += length($replacement) - ($last - $first);
+
+            if ($global) {
+                $replacement = $orig_replacement;
+            } else {
+                last;
+            }
+        }
+
+        return 1;
+    }
+}
+
+# call this as
+#    if mangle($watchfile, \$line, 'uversionmangle:',
+#	    \@{$options{'uversionmangle'}}, \$version) {
+#	return 1;
+#    }
+sub mangle($$$$$) {
+    my ($watchfile, $lineptr, $name, $rulesptr, $verptr) = @_;
+    foreach my $pat (@{$rulesptr}) {
+        if (!safe_replace($verptr, $pat)) {
+            uscan_warn "In $watchfile, potentially"
+              . " unsafe or malformed $name"
+              . " pattern:\n  '$pat'"
+              . " found. Skipping watchline\n"
+              . "  $$lineptr";
+            return 1;
+        }
+        uscan_debug "After $name $$verptr";
+    }
+    return 0;
+}
+
+*uscan_exec_no_fail = \&ds_exec_no_fail;
+
+*uscan_exec = \&ds_exec;
+
+#######################################################################
+# }}} code 7: utility functions (regex)
+#######################################################################
+
+1;