1 files changed, 476 insertions, 0 deletions
diff --git a/zdiff.cc b/zdiff.cc
new file mode 100644
index 0000000..d01f492
--- /dev/null
+++ b/zdiff.cc
@@ -0,0 +1,476 @@
+/*  Zdiff - decompress and compare two files line by line
+    Copyright (C) 2010 Antonio Diaz Diaz.
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#define _FILE_OFFSET_BITS 64
+
+#include <cctype>
+#include <cerrno>
+#include <climits>
+#include <csignal>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <string>
+#include <vector>
+#include <fcntl.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#if defined(__MSVCRT__) || defined(__OS2__)
+#include <io.h>
+#endif
+
+#include "arg_parser.h"
+#include "zutils.h"
+
+#if CHAR_BIT != 8
+#error "Environments where CHAR_BIT != 8 are not supported."
+#endif
+
+#ifndef LLONG_MAX
+#define LLONG_MAX  0x7FFFFFFFFFFFFFFFLL
+#endif
+#ifndef LLONG_MIN
+#define LLONG_MIN  (-LLONG_MAX - 1LL)
+#endif
+#ifndef ULLONG_MAX
+#define ULLONG_MAX 0xFFFFFFFFFFFFFFFFULL
+#endif
+
+
+namespace {
+
+std::string fifonames[2];	// names of the two fifos passed to diff
+
+#ifdef O_BINARY
+const int o_binary = O_BINARY;
+#else
+const int o_binary = 0;
+#endif
+
+struct { const char * from; const char * to; } const known_extensions[] = {
+  { ".bz2",  ""     },
+  { ".tbz",  ".tar" },
+  { ".tbz2", ".tar" },
+  { ".gz",   ""     },
+  { ".tgz",  ".tar" },
+  { ".lz",   ""     },
+  { ".tlz",  ".tar" },
+  { ".xz",   ""     },
+  { ".txz",  ".tar" },
+  { 0,       0      } };
+
+
+void show_help() throw()
+  {
+  std::printf( "Zdiff compares two files (\"-\" means standard input), and if they\n" );
+  std::printf( "differ, shows the differences line by line. If any given file is\n" );
+  std::printf( "compressed, its uncompressed content is used. Zdiff is a front end to\n" );
+  std::printf( "the diff program and has the limitation that messages from diff refer to\n" );
+  std::printf( "temporary filenames instead of those specified.\n" );
+  std::printf( "The supported compressors are bzip2, gzip, lzip and xz.\n" );
+  std::printf( "\nUsage: zdiff [options] file1 [file2]\n" );
+  std::printf( "\nCompares <file1> to <file2>. If <file2> is omitted zdiff tries the\n" );
+  std::printf( "following:\n" );
+  std::printf( "If <file1> is compressed, compares <file1> to the file with the\n" );
+  std::printf( "corresponding decompressed file name (removes the extension from\n" );
+  std::printf( "<file1>).\n" );
+  std::printf( "If <file1> is not compressed, compares <file1> to the uncompressed\n" );
+  std::printf( "contents of <file1>.[bz2|gz|lz|xz] (the first one that is found).\n" );
+  std::printf( "If no suitable file is found, compares <file1> to data read from\n" );
+  std::printf( "standard input.\n" );
+  std::printf( "\nExit status is 0 if inputs are identical, 1 if different, 2 if trouble.\n" );
+  std::printf( "\nOptions:\n" );
+  std::printf( "  -h, --help                    display this help and exit\n" );
+  std::printf( "  -V, --version                 output version information and exit\n" );
+  std::printf( "  -a, --text                    treat all files as text\n" );
+  std::printf( "  -b, --ignore-space-change     ignore changes in the amount of white space\n" );
+  std::printf( "  -B, --ignore-blank-lines      ignore changes whose lines are all blank\n" );
+  std::printf( "  -c                            use the context output format\n" );
+  std::printf( "  -C, --context=<n>             same as -c but use <n> lines of context\n" );
+  std::printf( "  -d, --minimal                 try hard to find a smaller set of changes\n" );
+  std::printf( "  -E, --ignore-tab-expansion    ignore changes due to tab expansion\n" );
+  std::printf( "  -i, --ignore-case             ignore case differences in file contents\n" );
+  std::printf( "  -p, --show-c-function         show which C function each change is in\n" );
+  std::printf( "  -q, --brief                   output only whether files differ\n" );
+  std::printf( "  -s, --report-identical-files  report when two files are identical\n" );
+  std::printf( "  -t, --expand-tabs             expand tabs to spaces in output\n" );
+  std::printf( "  -T, --initial-tab             make tabs line up by prepending a tab\n" );
+  std::printf( "  -u                            use the unified output format\n" );
+  std::printf( "  -U, --unified=<n>             same as -u but use <n> lines of context\n" );
+  std::printf( "  -w, --ignore-all-space        ignore all white space\n" );
+  std::printf( "Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n" );
+  std::printf( "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n" );
+  show_help_addr();
+  }
+
+
+int open_instream( const std::string & input_filename ) throw()
+  {
+  int infd = open( input_filename.c_str(), O_RDONLY | o_binary );
+  if( infd < 0 && verbosity >= 0 )
+    std::fprintf( stderr, "%s: Can't open input file `%s': %s.\n",
+                  util_name, input_filename.c_str(), std::strerror( errno ) );
+  return infd;
+  }
+
+
+int open_other_instream( std::string & name ) throw()
+  {
+  for( int i = 0; known_extensions[i].from; ++i )
+    {					// search uncompressed version
+    const std::string from( known_extensions[i].from );
+    if( name.size() > from.size() &&
+        name.compare( name.size() - from.size(), from.size(), from ) == 0 )
+      {
+      name.resize( name.size() - from.size() );
+      name += known_extensions[i].to;
+      return open( name.c_str(), O_RDONLY | o_binary );
+      }
+    }
+  for( int i = 0; simple_extensions[i]; ++i )
+    {					// search compressed version
+    const std::string s( name + simple_extensions[i] );
+    const int infd = open( s.c_str(), O_RDONLY | o_binary );
+    if( infd >= 0 ) { name = s; return infd; }
+    }
+  return -1;
+  }
+
+
+bool check_identical( const char * const name1, const char * const name2 ) throw()
+  {
+  if( !std::strcmp( name1, name2 ) ) return true;
+  struct stat stat1, stat2;
+  if( stat( name1, &stat1 ) || stat( name2, &stat2 ) ) return false;
+  return ( stat1.st_ino == stat2.st_ino && stat1.st_dev == stat2.st_dev );
+  }
+
+
+const char * my_basename( const char * filename ) throw()
+  {
+  const char * c = filename;
+  while( *c ) { if( *c == '/' ) { filename = c + 1; } ++c; }
+  return filename;
+  }
+
+
+void remove_fifos() throw()
+  {
+  if( fifonames[0].size() )
+    { std::remove( fifonames[0].c_str() ); fifonames[0].clear(); }
+  if( fifonames[1].size() )
+    { std::remove( fifonames[1].c_str() ); fifonames[1].clear(); }
+  }
+
+
+// Set fifonames[i] to "${TMPDIR}/<coded_pid><i>_<basename(filenames[i])>"
+// and create FIFOs.
+bool set_fifonames( const std::string filenames[2] )
+  {
+  enum { num_codes = 36 };
+  const char * const codes = "0123456789abcdefghijklmnopqrstuvwxyz";
+  const char * p = std::getenv( "TMPDIR" );
+  const int pid = getpid();
+
+  for( int i = 0; i < 2; ++i )
+    {
+    if( p ) fifonames[i] = p; else fifonames[i] = "/tmp";
+    fifonames[i] += '/';
+    int n = ( 2 * pid ) + i;
+    const unsigned int pos = fifonames[i].size();
+    do { fifonames[i].insert( pos, 1, codes[n % num_codes] );
+         n /= num_codes; }
+    while( n );
+    fifonames[i] += '_';
+    fifonames[i] += my_basename( filenames[i].c_str() );
+    }
+
+  for( int i = 0; i < 2; ++i )
+    if( mkfifo( fifonames[i].c_str(), S_IRUSR | S_IWUSR ) != 0 )
+      {
+      if( errno == EEXIST )
+        {
+        std::remove( fifonames[i].c_str() );
+        if( mkfifo( fifonames[i].c_str(), S_IRUSR | S_IWUSR ) == 0 )
+          continue;
+        }
+      if( verbosity >= 0 )
+        std::fprintf( stderr, "%s: Can't create FIFO `%s': %s.\n",
+                      util_name, fifonames[i].c_str(), std::strerror( errno ) );
+      return false;
+      }
+  return true;
+  }
+
+
+bool set_data_feeder( const std::string & fifoname, const int infd,
+                      pid_t * const pidp )
+  {
+  std::string file_type;
+  const uint8_t * magic_data;
+  int magic_size;
+  const bool compressed =
+    test_format( infd, file_type, &magic_data, &magic_size );
+
+  if( compressed )			// compressed with `file_type'
+    {
+    int fda[2];				// pipe from feeder to decompressor
+    if( pipe( fda ) < 0 )
+      { show_error( "Can't create pipe", errno ); return false; }
+    const pid_t pid = fork();
+    if( pid == 0 )			// child (decompressor feeder)
+      {
+      const pid_t pid2 = fork();
+      if( pid2 == 0 )			// grandchild (decompressor)
+        {
+        const int outfd = open( fifoname.c_str(), O_WRONLY | o_binary );
+        if( outfd < 0 )
+          {
+          if( verbosity >= 0 )
+            std::fprintf( stderr, "%s: Can't open FIFO `%s' for writing: %s.\n",
+                        util_name, fifoname.c_str(), std::strerror( errno ) );
+          _exit( 2 );
+          }
+        if( dup2( fda[0], STDIN_FILENO ) >= 0 &&
+            dup2( outfd, STDOUT_FILENO ) >= 0 &&
+            close( fda[0] ) == 0 && close( fda[1] ) == 0 &&
+            close( outfd ) == 0 )
+          execlp( file_type.c_str(), file_type.c_str(), "-cdfq", (char *)0 );
+        if( verbosity >= 0 )
+          std::fprintf( stderr, "%s: Can't exec `%s': %s.\n",
+                        util_name, file_type.c_str(), std::strerror( errno ) );
+        _exit( 2 );
+        }
+      if( pid2 < 0 )
+        {
+        if( verbosity >= 0 )
+          std::fprintf( stderr, "%s: Can't fork `%s': %s.\n",
+                        util_name, file_type.c_str(), std::strerror( errno ) );
+        _exit( 2 );
+        }
+
+      if( close( fda[0] ) != 0 ||
+          !feed_data( infd, fda[1], magic_data, magic_size ) )
+        _exit( 2 );
+      if( close( fda[1] ) != 0 )
+        { show_error( "Can't close output of feeder", errno ); _exit( 2 ); }
+      _exit( wait_for_child( pid2, file_type.c_str() ) );
+      }
+					// parent
+    close( fda[0] ); close( fda[1] );
+    if( pid < 0 )
+      { show_error( "Can't fork decompressor feeder", errno ); return false; }
+    *pidp = pid;
+    }
+  else					// not compressed
+    {
+    const pid_t pid = fork();
+    if( pid == 0 )			// child (feeder)
+      {
+      const int outfd = open( fifoname.c_str(), O_WRONLY | o_binary );
+      if( outfd < 0 )
+        {
+        if( verbosity >= 0 )
+          std::fprintf( stderr, "%s: Can't open FIFO `%s' for writing: %s.\n",
+                      util_name, fifoname.c_str(), std::strerror( errno ) );
+        _exit( 2 );
+        }
+      if( !feed_data( infd, outfd, magic_data, magic_size ) )
+        _exit( 2 );
+      if( close( outfd ) != 0 )
+        { show_error( "Can't close output of feeder", errno ); _exit( 2 ); }
+      _exit( 0 );
+      }
+					// parent
+    if( pid < 0 )
+      { show_error( "Can't fork data feeder", errno ); return false; }
+    *pidp = pid;
+    }
+  return true;
+  }
+
+
+extern "C" void signal_handler( int sig ) throw()
+  {
+  remove_fifos();
+  std::signal( sig, SIG_DFL );
+  std::raise( sig );
+  }
+
+
+void set_signals() throw()
+  {
+  std::signal( SIGHUP, signal_handler );
+  std::signal( SIGINT, signal_handler );
+  std::signal( SIGTERM, signal_handler );
+  }
+
+} // end namespace
+
+
+int main( const int argc, const char * const argv[] )
+  {
+  std::vector< const char * > diff_args;	// args to diff, maybe empty
+  invocation_name = argv[0];
+  util_name = "zdiff";
+
+  const Arg_parser::Option options[] =
+    {
+    { 'a', "text",                   Arg_parser::no  },
+    { 'b', "ignore-space-change",    Arg_parser::no  },
+    { 'B', "ignore-blank-lines",     Arg_parser::no  },
+    { 'c',  0,                       Arg_parser::no  },
+    { 'C', "context",                Arg_parser::yes },
+    { 'd', "minimal",                Arg_parser::no  },
+    { 'E', "ignore-tab-expansion",   Arg_parser::no  },
+    { 'h', "help",                   Arg_parser::no  },
+    { 'i', "ignore-case",            Arg_parser::no  },
+    { 'p', "show-c-function",        Arg_parser::no  },
+    { 'q', "brief",                  Arg_parser::no  },
+    { 's', "report-identical-files", Arg_parser::no  },
+    { 't', "expand-tabs",            Arg_parser::no  },
+    { 'T', "initial-tab",            Arg_parser::no  },
+    { 'u',  0,                       Arg_parser::no  },
+    { 'U', "unified",                Arg_parser::yes },
+    { 'V', "version",                Arg_parser::no  },
+    { 'w', "ignore-all-space",       Arg_parser::no  },
+    {  0 ,  0,                       Arg_parser::no  } };
+
+  const Arg_parser parser( argc, argv, options );
+  if( parser.error().size() )				// bad option
+    { show_error( parser.error().c_str(), 0, true ); return 2; }
+
+  int argind = 0;
+  for( ; argind < parser.arguments(); ++argind )
+    {
+    const int code = parser.code( argind );
+    if( !code ) break;					// no more options
+    const char * const arg = parser.argument( argind ).c_str();
+    switch( code )
+      {
+      case 'a': diff_args.push_back( "-a" ); break;
+      case 'b': diff_args.push_back( "-b" ); break;
+      case 'B': diff_args.push_back( "-B" ); break;
+      case 'c': diff_args.push_back( "-c" ); break;
+      case 'C': diff_args.push_back( "-C" ); diff_args.push_back( arg ); break;
+      case 'd': diff_args.push_back( "-d" ); break;
+      case 'E': diff_args.push_back( "-E" ); break;
+      case 'h': show_help(); return 0;
+      case 'i': diff_args.push_back( "-i" ); break;
+      case 'p': diff_args.push_back( "-p" ); break;
+      case 'q': diff_args.push_back( "-q" ); break;
+      case 's': diff_args.push_back( "-s" ); break;
+      case 't': diff_args.push_back( "-t" ); break;
+      case 'T': diff_args.push_back( "-T" ); break;
+      case 'u': diff_args.push_back( "-u" ); break;
+      case 'U': diff_args.push_back( "-U" ); diff_args.push_back( arg ); break;
+      case 'V': show_version( "Zdiff" ); return 0;
+      case 'w': diff_args.push_back( "-w" ); break;
+      default : internal_error( "uncaught option" );
+      }
+    } // end process options
+
+#if defined(__MSVCRT__) || defined(__OS2__)
+  _setmode( STDIN_FILENO, O_BINARY );
+  _setmode( STDOUT_FILENO, O_BINARY );
+#endif
+
+  if( argind >= parser.arguments() )
+    { show_error( "No files given.", 0, true ); return 2; }
+  if( argind + 2 < parser.arguments() )
+    { show_error( "Too many files.", 0, true ); return 2; }
+
+  const int files = parser.arguments() - argind;
+  std::string filenames[2];		// file names of the two input files
+  filenames[0] = parser.argument( argind );
+  if( files == 2 ) filenames[1] = parser.argument( argind + 1 );
+
+  int infd[2];				// file descriptors of the two files
+  infd[0] = ( filenames[0] == "-" ) ?
+    STDIN_FILENO : open_instream( filenames[0] );
+  if( infd[0] < 0 ) return 2;
+
+  if( ( files == 1 && filenames[0] == "-" ) ||
+      ( files == 2 && check_identical( filenames[0].c_str(),
+                                       filenames[1].c_str() ) ) )
+    return 0;
+
+  if( files == 2 )
+    {
+    infd[1] = ( filenames[1] == "-" ) ?
+      STDIN_FILENO : open_instream( filenames[1] );
+    if( infd[1] < 0 ) return 2;
+    }
+  else
+    {
+    filenames[1] = filenames[0];
+    infd[1] = open_other_instream( filenames[1] );
+    if( infd[1] < 0 ) { infd[1] = STDIN_FILENO; filenames[1] = "-"; }
+    }
+
+  std::atexit( remove_fifos );
+  set_signals();
+  if( !set_fifonames( filenames ) ) return 2;
+
+  const pid_t diff_pid = fork();
+  if( diff_pid == 0 )			// child (diff)
+    {
+    const char ** const argv = new const char *[diff_args.size()+5];
+    argv[0] = "diff";
+    for( unsigned int i = 0; i < diff_args.size(); ++i )
+      argv[i+1] = diff_args[i];
+    argv[diff_args.size()+1] = "--";
+    argv[diff_args.size()+2] = fifonames[0].c_str();
+    argv[diff_args.size()+3] = fifonames[1].c_str();
+    argv[diff_args.size()+4] = 0;
+    execvp( argv[0], (char **)argv );
+    show_error( "Can't exec `diff'." );
+    _exit( 2 );
+    }
+					// parent
+  if( diff_pid < 0 )
+    { show_error( "Can't fork `diff'", errno ); return 2; }
+
+  pid_t pid[2];
+  if( !set_data_feeder( fifonames[0], infd[0], &pid[0] ) ||
+      !set_data_feeder( fifonames[1], infd[1], &pid[1] ) )
+    return 2;
+
+  int retval = wait_for_child( diff_pid, "diff" );
+
+  if( retval != 0 )
+    {
+    if( pid[0] ) kill( pid[0], SIGTERM );
+    if( pid[1] ) kill( pid[1], SIGTERM );
+    }
+  else
+    if( ( pid[0] && wait_for_child( pid[0], "data feeder" ) != 0 ) ||
+        ( pid[1] && wait_for_child( pid[1], "data feeder" ) != 0 ) )
+      retval = 2;
+
+  for( int i = 0; i < 2; ++i )
+    if( filenames[i] != "-" && close( infd[i] ) != 0 )
+      {
+      if( verbosity >= 0 )
+        std::fprintf( stderr, "%s: Can't close input file `%s': %s.\n",
+                      util_name, filenames[i].c_str(), std::strerror( errno ) );
+      retval = 2;
+      }
+
+  return retval;
+  }