#! @PERL@
#
# Written by Adam Byrtek <alpha@debian.org>, 2002
# Rewritten by David Sterba <dave@jikos.cz>, 2009
#
# Extfs to handle patches in context and unified diff format.
# Known issues: When name of file to patch is modified during editing, 
# hunk is duplicated on copyin. It is unavoidable.

use bytes;
use strict;
use warnings;
use POSIX;
use File::Temp 'tempfile';

# standard binaries
my $lzip = 'lzip';
my $lz4  = 'lz4';
my $lzma = 'lzma';
my $lzo  = 'lzop';
my $xz   = 'xz';
my $zstd = 'zstd';
my $bzip = 'bzip2';
my $gzip = 'gzip';
my $fileutil = 'file -b';

# date parsing requires Date::Parse from TimeDate module
my $parsedates = eval 'require Date::Parse';

# regular expressions
my $unified_header=qr/^--- .*\t.*\n\+\+\+ .*\t.*\n$/;
my $unified_extract=qr/^--- ([^\t]+).*\n\+\+\+ ([^\t]+)\s*(.*)\n/;
my $unified_header2=qr/^--- .*\n\+\+\+ .*\n$/;
my $unified_extract2=qr/^--- ([^\s]+).*\n\+\+\+ ([^\s]+)\s*(.*)\n/;
my $unified_contents=qr/^([+\-\\ \n]|@@ .* @@)/;
my $unified_hunk=qr/@@ -(\d+)(,(\d+))? \+(\d+)(,(\d+))? @@.*\n/;

my $context_header=qr/^\*\*\* .*\t.*\n--- .*\t.*\n$/;
my $context_extract=qr/^\*\*\* ([^\t]+).*\n--- ([^\t]+)\s*(.*)\n/;
my $context_header2=qr/^\*\*\* .*\n--- .*\n$/;
my $context_extract2=qr/^\*\*\* ([^\s]+).*\n--- ([^\s]+)\s*(.*)\n/;
my $context_contents=qr/^([!+\-\\ \n]|-{3} .* -{4}|\*{3} .* \*{4}|\*{15})/;

my $ls_extract_id=qr/^[^\s]+\s+[^\s]+\s+([^\s]+)\s+([^\s]+)/;
my $basename=qr|^(.*/)*([^/]+)$|;

sub patchfs_canonicalize_path ($) {
  my ($fname) = @_;
  $fname =~ s,/+,/,g;
  $fname =~ s,(^|/)(?:\.?\./)+,$1,;
  return $fname;
}

# output unix date in a mc-readable format
sub timef
{
    my @time=localtime($_[0]);
    return sprintf '%02d-%02d-%02d %02d:%02d', $time[4]+1, $time[3],
		   $time[5]+1900, $time[2], $time[1];
}

# parse given string as a date and return unix time
sub datetime
{
    # in case of problems fall back to 0 in unix time
    # note: str2time interprets some wrong values (eg. " ") as 'today'
    if ($parsedates && defined (my $t=str2time($_[0]))) {
	return timef($t);
    }
    return timef(time);
}

# print message on stderr and exit
sub error
{
    print STDERR $_[0], "\n";
    exit 1;
}

# (compressed) input
sub myin
{
    my ($qfname)=(quotemeta $_[0]);

    $_=`$fileutil $qfname`;
    if (/^'*lz4/) {
	return "$lz4 -dc $qfname";
    } elsif (/^'*lzip/) {
	return "$lzip -dc $qfname";
    } elsif (/^'*lzma/) {
	return "$lzma -dc $qfname";
    } elsif (/^'*lzo/) {
	return "$lzo -dc $qfname";
    } elsif (/^'*xz/) {
	return "$xz -dc $qfname";
    } elsif (/^'*zst/) {
	return "$zstd -dc $qfname";
    } elsif (/^'*bzip/) {
	return "$bzip -dc $qfname";
    } elsif (/^'*gzip/) {
	return "$gzip -dc $qfname";
    } else {
	return "cat $qfname";
    }
}

# (compressed) output
sub myout
{
    my ($qfname,$append)=(quotemeta $_[0],$_[1]);
    my ($sep) = $append ? '>>' : '>';

    $_=`$fileutil $qfname`;
    if (/^'*lz4/) {
	return "$lz4 -c $sep $qfname";
    } elsif (/^'*lzip/) {
	return "$lzip -c $sep $qfname";
    } elsif (/^'*lzma/) {
	return "$lzma -c $sep $qfname";
    } elsif (/^'*lzo/) {
	return "$lzo -c $sep $qfname";
    } elsif (/^'*xz/) {
	return "$xz -c $sep $qfname";
    } elsif (/^'*zst/) {
	return "$zstd -c $sep $qfname";
    } elsif (/^'*bzip/) {
	return "$bzip -c $sep $qfname";
    } elsif (/^'*gzip/) {
	return "$gzip -c $sep $qfname";
    } else {
	return "cat $sep $qfname";
    }
}

# select diff filename conforming with rules found in diff.info
sub diff_filename
{
    my ($fsrc,$fdst)= @_;
    # TODO: can remove these two calls later
    $fsrc = patchfs_canonicalize_path ($fsrc);
    $fdst = patchfs_canonicalize_path ($fdst);
    if (!$fdst && !$fsrc) {
	error 'Index: not yet implemented';
    } elsif (!$fsrc || $fsrc eq '/dev/null') {
	return ($fdst,'PATCH-CREATE/');
    } elsif (!$fdst || $fdst eq '/dev/null') {
	return ($fsrc,'PATCH-REMOVE/');
    } elsif (($fdst eq '/dev/null') && ($fsrc eq '/dev/null')) {
	error 'Malformed diff, missing a sane filename';
    } else {
	# fewest path name components
	if ($fdst=~s|/|/|g < $fsrc=~s|/|/|g) {
	    return ($fdst,'');
	} elsif ($fdst=~s|/|/|g > $fsrc=~s|/|/|g) {
	    return ($fsrc,'');
	} else {
	    # shorter base name
	    if (($fdst=~/$basename/o,length $2) < ($fsrc=~/$basename/o,length $2)) {
		return ($fdst,'');
	    } elsif (($fdst=~/$basename/o,length $2) > ($fsrc=~/$basename/o,length $2)) {
		return ($fsrc,'');
	    } else {
		# shortest names
		if (length $fdst < length $fsrc) {
		    return ($fdst,'');
		} else {
		    return ($fsrc,'');
		}
	    }
	}
    }
}

# IN: diff "archive" name
# IN: file handle for output; STDIN for list, tempfile else
# IN: filename to watch (for: copyout, rm), '' for: list
# IN: remove the file?
#     true  - ... and print out the rest
#     false - ie. copyout mode, print just the file
sub parse($$$$)
{
    my $archive=shift;
    my $fh=shift;
    my $file=shift;
    my $rmmod=shift;
    my ($state,$fsize,$time);
    my ($f,$fsrc,$fdst,$prefix);
    my ($unified,$context);
    my ($skipread, $filetoprint, $filefound);
    my ($h_add,$h_del,$h_ctx);	# hunk line counts
    my ($h_r1,$h_r2);		# hunk ranges
    my @outsrc;		# if desired ...
    my @outdst;
    my $line;
    my %fmap_size=();
    my %fmap_time=();

    import Date::Parse if ($parsedates && $file eq '');

    $line=1;
    $state=0; $fsize=0; $f='';
    $filefound=0;
    while ($skipread || ($line++,$_=<I>)) {
	$skipread=0;
	if($state == 0) {	# expecting comments
	    $unified=$context=0;
	    $unified=1 if (/^--- /);
	    $context=1 if (/^\*\*\* /);
	    if (!$unified && !$context) {
		$filefound=0 if($file ne '' && $filetoprint);
		# shortcut for rmmod xor filefound
		# - in rmmod we print if not found
		# - in copyout (!rmmod) we print if found
		print $fh $_ if($rmmod != $filefound);
		next;
	    }

	    if($file eq '' && $filetoprint) {
		$fmap_size{"$prefix$f"}+=$fsize;
		$fmap_time{"$prefix$f"}=$time;
	    }

	    # start of new file
	    $_ .=<I>;	# steal next line, both formats
	    $line++;
	    if($unified) {
		if(/$unified_header/o) {
		    ($fsrc,$fdst,$time) = /$unified_extract/o;
		} elsif(/$unified_header2/o) {
		    ($fsrc,$fdst,$time) = /$unified_extract2/o;
		} else {
		    error "Can't parse unified diff header";
		}
	    } elsif($context) {
		if(/$context_header/o) {
		    ($fsrc,$fdst,$time) = /$context_extract/o;
		} elsif(/$context_header2/o) {
		    ($fsrc,$fdst,$time) = /$context_extract2/o;
		} else {
		    error "Can't parse context diff header";
		}
	    } else {
		error "Unrecognized diff header";
	    }
	    $fsrc=patchfs_canonicalize_path($fsrc);
	    $fdst=patchfs_canonicalize_path($fdst);
	    if(wantarray) {
		push @outsrc,$fsrc;
		push @outdst,$fdst;
	    }
	    ($f,$prefix)=diff_filename($fsrc,$fdst);
	    $filefound=($f eq $file);

	    $f="$f.diff";
	    $filetoprint=1;
	    $fsize=length;
	    print $fh $_ if($rmmod != $filefound);

	    $state=1;
	} elsif($state == 1) { # expecting diff hunk headers, end of file or comments
	    if($unified) {
		my ($a,$b,$c,$d);
		($a,$b,$h_r1,$c,$d,$h_r2)=/$unified_hunk/o;
		if(!defined($a) || !defined($c)) {
		    # hunk header does not come, a comment inside
		    # or maybe a new file, state 0 will decide
		    $skipread=1;
		    $state=0;
		    next;
		}
		$fsize+=length;
		print $fh $_ if($rmmod != $filefound);
		$h_r1=1 if(!defined($b));
		$h_r2=1 if(!defined($d));
		$h_add=$h_del=$h_ctx=0;
		$state=2;
	    } elsif($context) {
		if(!/$context_contents/o) {
		    $skipread=1;
		    $state=0;
		    next;
		}
		print $fh $_ if($rmmod != $filefound);
		$fsize+=length;
	    }
	} elsif($state == 2) { # expecting hunk contents
	    if($h_del + $h_ctx == $h_r1 && $h_add + $h_ctx == $h_r2) {
		# hooray, end of hunk
		# we optimistically ended with a hunk before but
		# the line has been read already
		$skipread=1;
		$state=1;
		next;
	    }
	    print $fh $_ if($rmmod != $filefound);
	    $fsize+=length;
	    my ($first)= /^(.)/;
	    if(ord($first) == ord('+')) { $h_add++; }
	    elsif(ord($first) == ord('-')) { $h_del++; }
	    elsif(ord($first) == ord(' ')) { $h_ctx++; }
	    elsif(ord($first) == ord('\\')) { 0; }
	    elsif(ord($first) == ord('@')) { error "Malformed hunk, header came too early"; }
	    else { error "$archive:$line: Unrecognized character '$first' in hunk"; }
	}
    }
    if($file eq '' && $filetoprint) {
	$fmap_size{"$prefix$f"}+=$fsize;
	$fmap_time{"$prefix$f"}=$time;
    }

    # use uid and gid from file
    my $qarchive = quotemeta $archive;
    my ($uid,$gid)=(`ls -l $qarchive`=~/$ls_extract_id/o);

    # flush all file names with cumulative file size
    while(my ($fn, $fs) = each %fmap_size) {
	printf $fh "-rw-r--r-- 1 %s %s %d %s %s\n", $uid, $gid, $fs, datetime($fmap_time{$fn}), $fn;
    }

    close($fh) if($file ne '');
    return \(@outsrc, @outdst) if wantarray;
}

# list files affected by patch
sub list($) {
	parse($_[0], *STDOUT, '', 0);
	close(I);
}

# extract diff from patch
# IN: diff file to find
# IN: output file name
sub copyout($$) {
    my ($file,$out)=@_;

    $file=~s/^(PATCH-(CREATE|REMOVE)\/)?(.*)\.diff$/$3/;
    $file = patchfs_canonicalize_path ($file);

    open(FH, ">$out") or error("Cannot open output file");
    parse('', *FH, $file, 0);
}

# remove diff(s) from patch
# IN: archive
# IN: file to delete
sub rm($$) {
    my $archive=shift;
    my ($tmp,$tmpname)=tempfile();

    @_=map {scalar(s/^(PATCH-(CREATE|REMOVE)\/)?(.*)\.diff$/$3/,$_)} @_;

    # just the first file for now
    parse($archive, $tmp, $_[0], 1);
    close I;

    # replace archive
    system("cat \Q$tmpname\E | " . myout($archive,0))==0
      or error "Can't write to archive";
    system("rm -f -- \Q$tmpname\E");
}

# append diff to archive
# IN: diff archive name
# IN: newly created file name in archive
# IN: the real source file
sub copyin($$$) {
    # TODO: seems to be tricky. what to do?
    # copyin of file which is already there may:
    #  * delete the original and copy only the new
    #  * just append the new hunks to the same file
    #    problems: may not be a valid diff, unmerged hunks
    #  * try to merge the two together
    #    ... but we do not want write patchutils again, right?
    error "Copying files into diff not supported";
    return;

    my ($archive,$name,$src)=@_;

    # in case we are appending another diff, we have
    # to delete/merge all the files
    open(DEVNULL, ">/dev/null");
    open I, myin($src).'|';
    my ($srclist,$dstlist)=parse($archive, *DEVNULL, '', 0);
    close(I);
    close(DEVNULL);
    foreach(@$srclist) {
	print("SRC: del $_\n");
    }
    foreach(@$dstlist) {
	print("DST: del $_\n");
    }
    return;

    # remove overwritten file
    open I, myin($archive).'|';
    rm ($archive, $name);
    close I;

    my $cmd1=myin("$src.diff");
    my $cmd2=myout($archive,1);
    system("$cmd1 | $cmd2")==0
      or error "Can't write to archive";
}

my $fin = $ARGV[1];

# resolve symlink
while (-l $fin) {
    $fin = readlink $fin;
}

if ($ARGV[0] eq 'list') {
    open I, myin($fin).'|';
    list ($fin);
    exit 0;
} elsif ($ARGV[0] eq 'copyout') {
    open I, myin($fin)."|";
    copyout ($ARGV[2], $ARGV[3]);
    exit 0;
} elsif ($ARGV[0] eq 'rm') {
    open I, myin($fin)."|";
    rm ($fin, $ARGV[2]);
    exit 0;
} elsif ($ARGV[0] eq 'rmdir') {
    exit 0;
} elsif ($ARGV[0] eq 'mkdir') {
    exit 0;
} elsif ($ARGV[0] eq 'copyin') {
    copyin ($fin, $ARGV[2], $ARGV[3]);
    exit 0;
}
exit 1;