1 files changed, 81 insertions, 136 deletions
diff --git a/zdiff.cc b/zdiff.cc
index 28425a3..7419929 100644
--- a/zdiff.cc
+++ b/zdiff.cc
@@ -1,5 +1,5 @@
 /*  Zdiff - decompress and compare two files line by line
-    Copyright (C) 2010, 2011 Antonio Diaz Diaz.
+    Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
 
     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -17,6 +17,7 @@
 
 #define _FILE_OFFSET_BITS 64
 
+#include <algorithm>
 #include <cctype>
 #include <cerrno>
 #include <climits>
@@ -30,9 +31,6 @@
 #include <stdint.h>
 #include <unistd.h>
 #include <sys/stat.h>
-#if defined(__MSVCRT__) || defined(__OS2__)
-#include <io.h>
-#endif
 
 #include "arg_parser.h"
 #include "zutils.h"
@@ -41,126 +39,60 @@
 #error "Environments where CHAR_BIT != 8 are not supported."
 #endif
 
-#ifndef LLONG_MAX
-#define LLONG_MAX  0x7FFFFFFFFFFFFFFFLL
-#endif
-#ifndef LLONG_MIN
-#define LLONG_MIN  (-LLONG_MAX - 1LL)
-#endif
-#ifndef ULLONG_MAX
-#define ULLONG_MAX 0xFFFFFFFFFFFFFFFFULL
-#endif
-
 
 namespace {
 
 std::string fifonames[2];	// names of the two fifos passed to diff
 
-#ifdef O_BINARY
-const int o_binary = O_BINARY;
-#else
-const int o_binary = 0;
-#endif
+#include "zcmpdiff.cc"
 
-struct { const char * from; const char * to; } const known_extensions[] = {
-  { ".bz2",  ""     },
-  { ".tbz",  ".tar" },
-  { ".tbz2", ".tar" },
-  { ".gz",   ""     },
-  { ".tgz",  ".tar" },
-  { ".lz",   ""     },
-  { ".tlz",  ".tar" },
-  { ".xz",   ""     },
-  { ".txz",  ".tar" },
-  { 0,       0      } };
 
-
-void show_help() throw()
+void show_help()
   {
-  std::printf( "Zdiff compares two files (\"-\" means standard input), and if they\n" );
-  std::printf( "differ, shows the differences line by line. If any given file is\n" );
-  std::printf( "compressed, its uncompressed content is used. Zdiff is a front end to\n" );
-  std::printf( "the diff program and has the limitation that messages from diff refer to\n" );
-  std::printf( "temporary filenames instead of those specified.\n" );
-  std::printf( "The supported compressors are bzip2, gzip, lzip and xz.\n" );
-  std::printf( "\nUsage: zdiff [options] file1 [file2]\n" );
-  std::printf( "\nCompares <file1> to <file2>. If <file2> is omitted zdiff tries the\n" );
-  std::printf( "following:\n" );
-  std::printf( "If <file1> is compressed, compares <file1> to the file with the\n" );
-  std::printf( "corresponding decompressed file name (removes the extension from\n" );
-  std::printf( "<file1>).\n" );
-  std::printf( "If <file1> is not compressed, compares <file1> to the uncompressed\n" );
-  std::printf( "contents of <file1>.[bz2|gz|lz|xz] (the first one that is found).\n" );
-  std::printf( "If no suitable file is found, compares <file1> to data read from\n" );
-  std::printf( "standard input.\n" );
-  std::printf( "\nExit status is 0 if inputs are identical, 1 if different, 2 if trouble.\n" );
-  std::printf( "\nOptions:\n" );
-  std::printf( "  -h, --help                    display this help and exit\n" );
-  std::printf( "  -V, --version                 output version information and exit\n" );
-  std::printf( "  -a, --text                    treat all files as text\n" );
-  std::printf( "  -b, --ignore-space-change     ignore changes in the amount of white space\n" );
-  std::printf( "  -B, --ignore-blank-lines      ignore changes whose lines are all blank\n" );
-  std::printf( "  -c                            use the context output format\n" );
-  std::printf( "  -C, --context=<n>             same as -c but use <n> lines of context\n" );
-  std::printf( "  -d, --minimal                 try hard to find a smaller set of changes\n" );
-  std::printf( "  -E, --ignore-tab-expansion    ignore changes due to tab expansion\n" );
-  std::printf( "  -i, --ignore-case             ignore case differences in file contents\n" );
-  std::printf( "  -p, --show-c-function         show which C function each change is in\n" );
-  std::printf( "  -q, --brief                   output only whether files differ\n" );
-  std::printf( "  -s, --report-identical-files  report when two files are identical\n" );
-  std::printf( "  -t, --expand-tabs             expand tabs to spaces in output\n" );
-  std::printf( "  -T, --initial-tab             make tabs line up by prepending a tab\n" );
-  std::printf( "  -u                            use the unified output format\n" );
-  std::printf( "  -U, --unified=<n>             same as -u but use <n> lines of context\n" );
-  std::printf( "  -w, --ignore-all-space        ignore all white space\n" );
-  std::printf( "Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n" );
-  std::printf( "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n" );
+  std::printf( "Zdiff compares two files (\"-\" means standard input), and if they\n"
+               "differ, shows the differences line by line. If any given file is\n"
+               "compressed, its uncompressed content is used. Zdiff is a front end to\n"
+               "the diff program and has the limitation that messages from diff refer to\n"
+               "temporary filenames instead of those specified.\n"
+               "\nThe supported formats are bzip2, gzip, lzip and xz.\n"
+               "\nUsage: zdiff [options] file1 [file2]\n"
+               "\nCompares <file1> to <file2>. If <file2> is omitted zdiff tries the\n"
+               "following:\n"
+               "If <file1> is compressed, compares <file1> to the file with the\n"
+               "corresponding decompressed file name (removes the extension from\n"
+               "<file1>).\n"
+               "If <file1> is not compressed, compares <file1> to the uncompressed\n"
+               "contents of <file1>.[bz2|gz|lz|xz] (the first one that is found).\n"
+               "If no suitable file is found, compares <file1> to data read from\n"
+               "standard input.\n"
+               "\nExit status is 0 if inputs are identical, 1 if different, 2 if trouble.\n"
+               "\nOptions:\n"
+               "  -h, --help                      display this help and exit\n"
+               "  -V, --version                   output version information and exit\n"
+               "  -a, --text                      treat all files as text\n"
+               "  -b, --ignore-space-change       ignore changes in the amount of white space\n"
+               "  -B, --ignore-blank-lines        ignore changes whose lines are all blank\n"
+               "  -c                              use the context output format\n"
+               "  -C, --context=<n>               same as -c but use <n> lines of context\n"
+               "  -d, --minimal                   try hard to find a smaller set of changes\n"
+               "  -E, --ignore-tab-expansion      ignore changes due to tab expansion\n"
+               "      --format=[<fmt1>][,<fmt2>]  force given formats (bz2, gz, lz, xz)\n"
+               "  -i, --ignore-case               ignore case differences in file contents\n"
+               "  -p, --show-c-function           show which C function each change is in\n"
+               "  -q, --brief                     output only whether files differ\n"
+               "  -s, --report-identical-files    report when two files are identical\n"
+               "  -t, --expand-tabs               expand tabs to spaces in output\n"
+               "  -T, --initial-tab               make tabs line up by prepending a tab\n"
+               "  -u                              use the unified output format\n"
+               "  -U, --unified=<n>               same as -u but use <n> lines of context\n"
+               "  -w, --ignore-all-space          ignore all white space\n"
+               "Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n"
+               "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n" );
   show_help_addr();
   }
 
 
-int open_instream( const std::string & input_filename ) throw()
-  {
-  int infd = open( input_filename.c_str(), O_RDONLY | o_binary );
-  if( infd < 0 )
-    show_error2( "Can't open input file", input_filename.c_str() );
-  return infd;
-  }
-
-
-int open_other_instream( std::string & name ) throw()
-  {
-  for( int i = 0; known_extensions[i].from; ++i )
-    {					// search uncompressed version
-    const std::string from( known_extensions[i].from );
-    if( name.size() > from.size() &&
-        name.compare( name.size() - from.size(), from.size(), from ) == 0 )
-      {
-      name.resize( name.size() - from.size() );
-      name += known_extensions[i].to;
-      return open( name.c_str(), O_RDONLY | o_binary );
-      }
-    }
-  for( int i = 0; simple_extensions[i]; ++i )
-    {					// search compressed version
-    const std::string s( name + simple_extensions[i] );
-    const int infd = open( s.c_str(), O_RDONLY | o_binary );
-    if( infd >= 0 ) { name = s; return infd; }
-    }
-  return -1;
-  }
-
-
-bool check_identical( const char * const name1, const char * const name2 ) throw()
-  {
-  if( !std::strcmp( name1, name2 ) ) return true;
-  struct stat stat1, stat2;
-  if( stat( name1, &stat1 ) || stat( name2, &stat2 ) ) return false;
-  return ( stat1.st_ino == stat2.st_ino && stat1.st_dev == stat2.st_dev );
-  }
-
-
-const char * my_basename( const char * filename ) throw()
+const char * my_basename( const char * filename )
   {
   const char * c = filename;
   while( *c ) { if( *c == '/' ) { filename = c + 1; } ++c; }
@@ -168,7 +100,7 @@ const char * my_basename( const char * filename ) throw()
   }
 
 
-extern "C" void remove_fifos() throw()
+extern "C" void remove_fifos()
   {
   if( fifonames[0].size() )
     { std::remove( fifonames[0].c_str() ); fifonames[0].clear(); }
@@ -191,7 +123,7 @@ bool set_fifonames( const std::string filenames[2] )
     if( p ) fifonames[i] = p; else fifonames[i] = "/tmp";
     fifonames[i] += '/';
     int n = ( 2 * pid ) + i;
-    const unsigned int pos = fifonames[i].size();
+    const unsigned pos = fifonames[i].size();
     do { fifonames[i].insert( pos, 1, codes[n % num_codes] );
          n /= num_codes; }
     while( n );
@@ -216,15 +148,15 @@ bool set_fifonames( const std::string filenames[2] )
 
 
 bool set_data_feeder( const std::string & fifoname, const int infd,
-                      pid_t * const pidp )
+                      pid_t * const pidp, const int format_type )
   {
-  std::string file_type;
-  const uint8_t * magic_data;
-  int magic_size;
-  const bool compressed =
-    test_format( infd, file_type, &magic_data, &magic_size );
+  const uint8_t * magic_data = 0;
+  int magic_size = 0;
+  const char * const decompressor_name = ( format_type >= 0 ) ?
+    decompressor_names[format_type] :
+    test_format( infd, &magic_data, &magic_size );
 
-  if( compressed )			// compressed with `file_type'
+  if( decompressor_name )		// compressed
     {
     int fda[2];				// pipe from feeder to decompressor
     if( pipe( fda ) < 0 )
@@ -239,7 +171,7 @@ bool set_data_feeder( const std::string & fifoname, const int infd,
         if( outfd < 0 )
           {
           if( verbosity >= 0 )
-            std::fprintf( stderr, "%s: Can't open FIFO `%s' for writing: %s.\n",
+            std::fprintf( stderr, "%s: Can't open FIFO '%s' for writing: %s.\n",
                         util_name, fifoname.c_str(), std::strerror( errno ) );
           _exit( 2 );
           }
@@ -247,19 +179,20 @@ bool set_data_feeder( const std::string & fifoname, const int infd,
             dup2( outfd, STDOUT_FILENO ) >= 0 &&
             close( fda[0] ) == 0 && close( fda[1] ) == 0 &&
             close( outfd ) == 0 )
-          execlp( file_type.c_str(), file_type.c_str(), "-cdfq", (char *)0 );
-        show_exec_error( file_type.c_str() );
+          execlp( decompressor_name, decompressor_name,
+                  (verbosity >= 0) ? "-d" : "-dq", (char *)0 );
+        show_exec_error( decompressor_name );
         _exit( 2 );
         }
       if( pid2 < 0 )
-        { show_fork_error( file_type.c_str() ); _exit( 2 ); }
+        { show_fork_error( decompressor_name ); _exit( 2 ); }
 
       if( close( fda[0] ) != 0 ||
           !feed_data( infd, fda[1], magic_data, magic_size ) )
         _exit( 2 );
       if( close( fda[1] ) != 0 )
         { show_close_error( "data feeder" ); _exit( 2 ); }
-      _exit( wait_for_child( pid2, file_type.c_str() ) );
+      _exit( wait_for_child( pid2, decompressor_name ) );
       }
 					// parent
     close( fda[0] ); close( fda[1] );
@@ -276,7 +209,7 @@ bool set_data_feeder( const std::string & fifoname, const int infd,
       if( outfd < 0 )
         {
         if( verbosity >= 0 )
-          std::fprintf( stderr, "%s: Can't open FIFO `%s' for writing: %s.\n",
+          std::fprintf( stderr, "%s: Can't open FIFO '%s' for writing: %s.\n",
                       util_name, fifoname.c_str(), std::strerror( errno ) );
         _exit( 2 );
         }
@@ -295,7 +228,7 @@ bool set_data_feeder( const std::string & fifoname, const int infd,
   }
 
 
-extern "C" void signal_handler( int sig ) throw()
+extern "C" void signal_handler( int sig )
   {
   remove_fifos();
   std::signal( sig, SIG_DFL );
@@ -303,7 +236,7 @@ extern "C" void signal_handler( int sig ) throw()
   }
 
 
-void set_signals() throw()
+void set_signals()
   {
   std::signal( SIGHUP, signal_handler );
   std::signal( SIGINT, signal_handler );
@@ -315,7 +248,9 @@ void set_signals() throw()
 
 int main( const int argc, const char * const argv[] )
   {
+  enum { format_opt = 256 };
   std::vector< const char * > diff_args;	// args to diff, maybe empty
+  int format_types[2] = { -1, -1 };
   invocation_name = argv[0];
   util_name = "zdiff";
 
@@ -339,6 +274,7 @@ int main( const int argc, const char * const argv[] )
     { 'U', "unified",                Arg_parser::yes },
     { 'V', "version",                Arg_parser::no  },
     { 'w', "ignore-all-space",       Arg_parser::no  },
+    { format_opt, "format",          Arg_parser::yes },
     {  0 ,  0,                       Arg_parser::no  } };
 
   const Arg_parser parser( argc, argv, options );
@@ -371,13 +307,14 @@ int main( const int argc, const char * const argv[] )
       case 'U': diff_args.push_back( "-U" ); diff_args.push_back( arg ); break;
       case 'V': show_version( "Zdiff" ); return 0;
       case 'w': diff_args.push_back( "-w" ); break;
+      case format_opt: get_format_types( arg, format_types ); break;
       default : internal_error( "uncaught option" );
       }
     } // end process options
 
 #if defined(__MSVCRT__) || defined(__OS2__)
-  _setmode( STDIN_FILENO, O_BINARY );
-  _setmode( STDOUT_FILENO, O_BINARY );
+  _fsetmode( stdin, "b" );
+  _fsetmode( stdout, "b" );
 #endif
 
   if( argind >= parser.arguments() )
@@ -408,6 +345,9 @@ int main( const int argc, const char * const argv[] )
     }
   else
     {
+    if( format_types[0] >= 0 || format_types[1] >= 0 )
+      { show_error( "Two files must be given when format is specified.", 0, true );
+        return 2; }
     filenames[1] = filenames[0];
     infd[1] = open_other_instream( filenames[1] );
     if( infd[1] < 0 ) { infd[1] = STDIN_FILENO; filenames[1] = "-"; }
@@ -422,7 +362,7 @@ int main( const int argc, const char * const argv[] )
     {
     const char ** const argv = new const char *[diff_args.size()+5];
     argv[0] = DIFF;
-    for( unsigned int i = 0; i < diff_args.size(); ++i )
+    for( unsigned i = 0; i < diff_args.size(); ++i )
       argv[i+1] = diff_args[i];
     argv[diff_args.size()+1] = "--";
     argv[diff_args.size()+2] = fifonames[0].c_str();
@@ -437,16 +377,21 @@ int main( const int argc, const char * const argv[] )
     { show_fork_error( DIFF ); return 2; }
 
   pid_t pid[2];
-  if( !set_data_feeder( fifonames[0], infd[0], &pid[0] ) ||
-      !set_data_feeder( fifonames[1], infd[1], &pid[1] ) )
+  if( !set_data_feeder( fifonames[0], infd[0], &pid[0], format_types[0] ) ||
+      !set_data_feeder( fifonames[1], infd[1], &pid[1], format_types[1] ) )
     return 2;
 
   int retval = wait_for_child( diff_pid, DIFF );
 
   if( retval != 0 )
     {
-    if( pid[0] ) kill( pid[0], SIGTERM );
-    if( pid[1] ) kill( pid[1], SIGTERM );
+    for( int i = 0; i < 2; ++i )
+      if( pid[i] )
+        {
+        const int tmp = child_status( pid[i], "data feeder" );
+        if( tmp < 0 ) kill( pid[i], SIGTERM );	// child not terminated
+        else if( tmp != 0 ) retval = 2;		// child status != 0
+        }
     }
   else
     if( ( pid[0] && wait_for_child( pid[0], "data feeder" ) != 0 ) ||