1 files changed, 446 insertions, 0 deletions
diff --git a/zdiff.cc b/zdiff.cc
new file mode 100644
index 0000000..a601459
--- /dev/null
+++ b/zdiff.cc
@@ -0,0 +1,446 @@
+/* Zdiff - decompress and compare two files line by line
+   Copyright (C) 2010-2024 Antonio Diaz Diaz.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 2 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#define _FILE_OFFSET_BITS 64
+
+#include <algorithm>
+#include <cctype>
+#include <cerrno>
+#include <climits>
+#include <csignal>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <string>
+#include <vector>
+#include <fcntl.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#if defined __MSVCRT__ || defined __OS2__
+#include <io.h>
+#endif
+
+#include "arg_parser.h"
+#include "rc.h"
+#include "zutils.h"
+
+
+namespace {
+
+std::string fifonames[2];	// names of the two fifos passed to diff
+
+#include "zcmpdiff.cc"
+
+void show_help()
+  {
+  std::printf( "zdiff compares two files and, if they differ, writes to standard output the\n"
+               "differences line by line. A hyphen '-' used as a file argument means standard\n"
+               "input. If any file given is compressed, its decompressed content is used.\n"
+               "zdiff is a front end to the program diff and has the limitation that messages\n"
+               "from diff refer to temporary file names instead of those specified.\n"
+               "\n'zdiff -v -V' prints the version of the diff program used.\n"
+               "\nThe formats supported are bzip2, gzip, lzip, xz, and zstd.\n"
+               "\nUsage: zdiff [options] file1 [file2]\n"
+               "\nzdiff compares file1 to file2. The standard input is used only if file1 or\n"
+               "file2 refers to standard input. If file2 is omitted zdiff tries to compare\n"
+               "file1 with the corresponding uncompressed file (if file1 is compressed), and\n"
+               "then with the corresponding compressed files of the remaining formats until\n"
+               "one is found.\n"
+               "\nExit status is 0 if inputs are identical, 1 if different, 2 if trouble.\n"
+               "Some options only work if the diff program used supports them.\n"
+               "\nOptions:\n"
+               "  -h, --help                        display this help and exit\n"
+               "  -V, --version                     output version information and exit\n"
+               "  -a, --text                        treat all files as text\n"
+               "  -b, --ignore-space-change         ignore changes in the amount of white space\n"
+               "  -B, --ignore-blank-lines          ignore changes whose lines are all blank\n"
+               "  -c                                use the context output format\n"
+               "  -C, --context=<n>                 same as -c but use <n> lines of context\n"
+               "  -d, --minimal                     try hard to find a smaller set of changes\n"
+               "  -E, --ignore-tab-expansion        ignore changes due to tab expansion\n"
+               "  -i, --ignore-case                 ignore case differences\n"
+               "  -M, --format=<list>               process only the formats in <list>\n"
+               "  -N, --no-rcfile                   don't read runtime configuration file\n"
+               "  -O, --force-format=[<f1>][,<f2>]  force one or both input formats\n"
+               "  -p, --show-c-function             show which C function each change is in\n"
+               "  -q, --brief                       output only whether files differ\n"
+               "  -s, --report-identical-files      report when two files are identical\n"
+               "  -t, --expand-tabs                 expand tabs to spaces in output\n"
+               "  -T, --initial-tab                 make tabs line up by prepending a tab\n"
+               "  -u                                use the unified output format\n"
+               "  -U, --unified=<n>                 same as -u but use <n> lines of context\n"
+               "  -v, --verbose                     verbose mode (for --version)\n"
+               "  -w, --ignore-all-space            ignore all white space\n"
+               "  -W, --width=<n>                   output at most <n> print columns (for -y)\n"
+               "  -y, --side-by-side                output in two columns\n"
+               "      --bz2=<command>               set compressor and options for bzip2 format\n"
+               "      --gz=<command>                set compressor and options for gzip format\n"
+               "      --lz=<command>                set compressor and options for lzip format\n"
+               "      --xz=<command>                set compressor and options for xz format\n"
+               "      --zst=<command>               set compressor and options for zstd format\n"
+               "\nValid formats for options '-M' and '-O' are 'bz2', 'gz', 'lz', 'xz', 'zst',\n"
+               "and 'un' for uncompressed.\n" );
+  show_help_addr();
+  }
+
+
+const char * my_basename( const char * filename )
+  {
+  const char * c = filename;
+  while( *c ) { if( *c == '/' ) { filename = c + 1; } ++c; }
+  return filename;
+  }
+
+
+extern "C" void remove_fifos()
+  {
+  if( fifonames[0].size() )
+    { std::remove( fifonames[0].c_str() ); fifonames[0].clear(); }
+  if( fifonames[1].size() )
+    { std::remove( fifonames[1].c_str() ); fifonames[1].clear(); }
+  }
+
+
+/* Set fifonames[i] to "${TMPDIR}/<coded_pid>[_-]<basename(filenames[i])>"
+   and create FIFOs. The pid is coded in little endian order.
+*/
+bool set_fifonames( const std::string filenames[2] )
+  {
+  enum { num_codes = 36 };
+  const char * const codes = "0123456789abcdefghijklmnopqrstuvwxyz";
+  const char * p = std::getenv( "TMPDIR" );
+
+  if( p ) { fifonames[0] = p; fifonames[0] += '/'; }
+  else fifonames[0] = "/tmp/";
+  unsigned n = getpid();
+  do fifonames[0] += codes[n % num_codes]; while( n /= num_codes );
+  const unsigned pos = fifonames[0].size();
+  fifonames[0] += '_';
+  fifonames[1] = fifonames[0];
+  fifonames[0] += my_basename( filenames[0].c_str() );
+  fifonames[1] += my_basename( filenames[1].c_str() );
+  if( fifonames[1] == fifonames[0] ) fifonames[1][pos] = '-';
+
+  for( int i = 0; i < 2; ++i )
+    if( mkfifo( fifonames[i].c_str(), S_IRUSR | S_IWUSR ) != 0 )
+      {
+      if( errno == EEXIST )
+        {
+        std::remove( fifonames[i].c_str() );
+        if( mkfifo( fifonames[i].c_str(), S_IRUSR | S_IWUSR ) == 0 )
+          continue;
+        }
+      show_file_error( fifonames[i].c_str(), "Can't create FIFO", errno );
+      return false;
+      }
+  return true;
+  }
+
+
+bool set_data_feeder( const std::string & filename,
+                      const std::string & fifoname, const int infd,
+                      Children & children, int format_index )
+  {
+  uint8_t magic_data[magic_buf_size];
+  int magic_size = 0;
+  if( format_index < 0 )
+    format_index = test_format( infd, magic_data, &magic_size );
+  children.compressor_name = get_compressor_name( format_index );
+
+  if( children.compressor_name )	// compressed
+    {
+    int fda[2];				// pipe from feeder to compressor
+    if( pipe( fda ) < 0 )
+      { show_error( "Can't create pipe", errno ); return false; }
+    const pid_t pid = fork();
+    if( pid == 0 )			// child 1 (compressor feeder)
+      {
+      if( close( fda[0] ) != 0 ||
+          !feed_data( filename, infd, fda[1], magic_data, magic_size ) )
+        _exit( 2 );
+      if( close( fda[1] ) != 0 )
+        { show_close_error(); _exit( 2 ); }
+      _exit( 0 );
+      }
+    if( pid < 0 )			// parent
+      { show_fork_error( "data feeder" ); return false; }
+
+    const pid_t pid2 = fork();
+    if( pid2 == 0 )			// child 2 (compressor)
+      {
+      const int outfd = open( fifoname.c_str(), O_WRONLY | O_BINARY );
+      if( outfd < 0 )
+        { show_file_error( fifoname.c_str(), "Can't open FIFO for writing",
+                           errno ); _exit( 2 ); }
+      if( dup2( fda[0], STDIN_FILENO ) >= 0 &&
+          dup2( outfd, STDOUT_FILENO ) >= 0 &&
+          close( fda[0] ) == 0 && close( fda[1] ) == 0 &&
+          close( outfd ) == 0 )
+        {
+        const std::vector< std::string > & compressor_args =
+          get_compressor_args( format_index );
+        const int size = compressor_args.size();
+        const char ** const argv = new const char *[size+3];
+        argv[0] = children.compressor_name;
+        for( int i = 0; i < size; ++i )
+          argv[i+1] = compressor_args[i].c_str();
+        argv[size+1] = ( verbosity >= 0 ) ? "-d" : "-dq";
+        argv[size+2] = 0;
+        execvp( argv[0], (char **)argv );
+        }
+      show_exec_error( children.compressor_name );
+      _exit( 2 );
+      }
+    if( pid2 < 0 )			// parent
+      { show_fork_error( children.compressor_name ); return false; }
+
+    close( fda[0] ); close( fda[1] );
+    children.pid[0] = pid;
+    children.pid[1] = pid2;
+    }
+  else					// uncompressed
+    {
+    const pid_t pid = fork();
+    if( pid == 0 )			// child (feeder)
+      {
+      const int outfd = open( fifoname.c_str(), O_WRONLY | O_BINARY );
+      if( outfd < 0 )
+        { show_file_error( fifoname.c_str(), "Can't open FIFO for writing",
+                           errno ); _exit( 2 ); }
+      if( !feed_data( filename, infd, outfd, magic_data, magic_size ) )
+        _exit( 2 );
+      if( close( outfd ) != 0 )
+        { show_close_error(); _exit( 2 ); }
+      _exit( 0 );
+      }
+    if( pid < 0 )			// parent
+      { show_fork_error( "data feeder" ); return false; }
+    children.pid[0] = pid;
+    children.pid[1] = 0;
+    }
+  return true;
+  }
+
+
+extern "C" void signal_handler( int sig )
+  {
+  remove_fifos();
+  std::signal( sig, SIG_DFL );
+  std::raise( sig );
+  }
+
+
+void set_signals()
+  {
+  std::signal( SIGHUP, signal_handler );
+  std::signal( SIGINT, signal_handler );
+  std::signal( SIGTERM, signal_handler );
+  }
+
+} // end namespace
+
+
+int main( const int argc, const char * const argv[] )
+  {
+  enum { bz2_opt = 256, gz_opt, lz_opt, xz_opt, zst_opt };
+  std::vector< const char * > diff_args;	// args to diff, maybe empty
+  int format_types[2] = { -1, -1 };		// < 0 means undefined
+  program_name = "zdiff";
+  invocation_name = ( argc > 0 ) ? argv[0] : program_name;
+
+  const Arg_parser::Option options[] =
+    {
+    { 'a', "text",                   Arg_parser::no  },
+    { 'b', "ignore-space-change",    Arg_parser::no  },
+    { 'B', "ignore-blank-lines",     Arg_parser::no  },
+    { 'c',  0,                       Arg_parser::no  },
+    { 'C', "context",                Arg_parser::yes },
+    { 'd', "minimal",                Arg_parser::no  },
+    { 'E', "ignore-tab-expansion",   Arg_parser::no  },
+    { 'h', "help",                   Arg_parser::no  },
+    { 'i', "ignore-case",            Arg_parser::no  },
+    { 'M', "format",                 Arg_parser::yes },
+    { 'N', "no-rcfile",              Arg_parser::no  },
+    { 'O', "force-format",           Arg_parser::yes },
+    { 'p', "show-c-function",        Arg_parser::no  },
+    { 'q', "brief",                  Arg_parser::no  },
+    { 's', "report-identical-files", Arg_parser::no  },
+    { 't', "expand-tabs",            Arg_parser::no  },
+    { 'T', "initial-tab",            Arg_parser::no  },
+    { 'u',  0,                       Arg_parser::no  },
+    { 'U', "unified",                Arg_parser::yes },
+    { 'v', "verbose",                Arg_parser::no  },
+    { 'V', "version",                Arg_parser::no  },
+    { 'w', "ignore-all-space",       Arg_parser::no  },
+    { 'W', "width",                  Arg_parser::yes },
+    { 'y', "side-by-side",           Arg_parser::no  },
+    { bz2_opt, "bz2",                Arg_parser::yes },
+    { gz_opt,  "gz",                 Arg_parser::yes },
+    { lz_opt,  "lz",                 Arg_parser::yes },
+    { xz_opt,  "xz",                 Arg_parser::yes },
+    { zst_opt, "zst",                Arg_parser::yes },
+    {  0,   0,                       Arg_parser::no  } };
+
+  const Arg_parser parser( argc, argv, options );
+  if( parser.error().size() )				// bad option
+    { show_error( parser.error().c_str(), 0, true ); return 2; }
+
+  maybe_process_config_file( parser );
+
+  int argind = 0;
+  for( ; argind < parser.arguments(); ++argind )
+    {
+    const int code = parser.code( argind );
+    if( !code ) break;					// no more options
+    const char * const pn = parser.parsed_name( argind ).c_str();
+    const std::string & sarg = parser.argument( argind );
+    const char * const arg = sarg.c_str();
+    switch( code )
+      {
+      case 'a': diff_args.push_back( "-a" ); break;
+      case 'b': diff_args.push_back( "-b" ); break;
+      case 'B': diff_args.push_back( "-B" ); break;
+      case 'c': diff_args.push_back( "-c" ); break;
+      case 'C': diff_args.push_back( "-C" ); diff_args.push_back( arg ); break;
+      case 'd': diff_args.push_back( "-d" ); break;
+      case 'E': diff_args.push_back( "-E" ); break;
+      case 'h': show_help(); return 0;
+      case 'i': diff_args.push_back( "-i" ); break;
+      case 'M': parse_format_list( sarg, pn ); break;
+      case 'N': break;
+      case 'O': parse_format_types2( sarg, pn, format_types ); break;
+      case 'p': diff_args.push_back( "-p" ); break;
+      case 'q': diff_args.push_back( "-q" ); break;
+      case 's': diff_args.push_back( "-s" ); break;
+      case 't': diff_args.push_back( "-t" ); break;
+      case 'T': diff_args.push_back( "-T" ); break;
+      case 'u': diff_args.push_back( "-u" ); break;
+      case 'U': diff_args.push_back( "-U" ); diff_args.push_back( arg ); break;
+      case 'v': if( verbosity < 4 ) ++verbosity; break;
+      case 'V': show_version( DIFF " --version" ); return 0;
+      case 'w': diff_args.push_back( "-w" ); break;
+      case 'W': diff_args.push_back( "-W" ); diff_args.push_back( arg ); break;
+      case 'y': diff_args.push_back( "-y" ); break;
+      case bz2_opt: parse_compressor( sarg, pn, fmt_bz2 ); break;
+      case gz_opt: parse_compressor( sarg, pn, fmt_gz ); break;
+      case lz_opt: parse_compressor( sarg, pn, fmt_lz ); break;
+      case xz_opt: parse_compressor( sarg, pn, fmt_xz ); break;
+      case zst_opt: parse_compressor( sarg, pn, fmt_zst ); break;
+      default: internal_error( "uncaught option." );
+      }
+    } // end process options
+
+#if defined __MSVCRT__ || defined __OS2__
+  setmode( STDIN_FILENO, O_BINARY );
+  setmode( STDOUT_FILENO, O_BINARY );
+#endif
+
+  const int files = parser.arguments() - argind;
+  if( files < 1 ) { show_error( "No files given.", 0, true ); return 2; }
+  if( files > 2 ) { show_error( "Too many files.", 0, true ); return 2; }
+
+  std::string filenames[2];		// file names of the two input files
+  filenames[0] = parser.argument( argind );
+  if( files == 2 ) filenames[1] = parser.argument( argind + 1 );
+
+  int infd[2];				// file descriptors of the two files
+  infd[0] = ( filenames[0] == "-" ) ?
+    STDIN_FILENO : open_instream( filenames[0] );
+  if( infd[0] < 0 ) return 2;
+
+  if( files == 2 )
+    {
+    if( check_identical( filenames[0].c_str(), filenames[1].c_str() ) )
+      return 0;
+    infd[1] = ( filenames[1] == "-" ) ?
+      STDIN_FILENO : open_instream( filenames[1] );
+    if( infd[1] < 0 ) return 2;
+    }
+  else
+    {
+    if( filenames[0] == "-" )
+      { show_error( "Missing operand after '-'.", 0, true ); return 2; }
+    if( format_types[0] >= 0 || format_types[1] >= 0 )
+      { show_error( "Two files must be given when format is specified.", 0, true );
+        return 2; }
+    filenames[1] = filenames[0];
+    infd[1] = open_other_instream( filenames[1] );
+    if( infd[1] < 0 )
+      {
+      if( verbosity >= 0 )
+        std::fprintf( stderr, "%s: Can't find file to compare with '%s'.\n",
+                      program_name, filenames[0].c_str() );
+      show_error( 0, 0, true ); return 2;
+      }
+    }
+
+  std::atexit( remove_fifos );
+  set_signals();
+  if( !set_fifonames( filenames ) ) return 2;
+
+  Children children[2];
+  if( !set_data_feeder( filenames[0], fifonames[0], infd[0], children[0],
+                        format_types[0] ) ||
+      !set_data_feeder( filenames[1], fifonames[1], infd[1], children[1],
+                        format_types[1] ) )
+    return 2;
+
+  const pid_t diff_pid = fork();
+  if( diff_pid == 0 )			// child (diff)
+    {
+    const char ** const argv = new const char *[diff_args.size()+5];
+    argv[0] = DIFF;
+    for( unsigned i = 0; i < diff_args.size(); ++i )
+      argv[i+1] = diff_args[i];
+    argv[diff_args.size()+1] = "--";
+    argv[diff_args.size()+2] = fifonames[0].c_str();
+    argv[diff_args.size()+3] = fifonames[1].c_str();
+    argv[diff_args.size()+4] = 0;
+    execvp( argv[0], (char **)argv );
+    show_exec_error( DIFF );
+    _exit( 2 );
+    }
+  if( diff_pid < 0 )			// parent
+    { show_fork_error( DIFF ); return 2; }
+
+  int retval = wait_for_child( diff_pid, DIFF );
+
+  for( int i = 0; i < 2; ++i )
+    {
+    int infd;			// fifo from decompressor
+    do infd = open( fifonames[i].c_str(), O_RDONLY | O_NONBLOCK | O_BINARY );
+    while( infd < 0 && errno == EINTR );
+    bool finished = false;	// set to true if fifo is empty and at EOF
+    if( infd >= 0 )
+      {
+      uint8_t b;
+      if( readblock( infd, &b, 1 ) <= 0 && errno == 0 ) finished = true;
+      close( infd );
+      }
+    if( !good_status( children[i], finished ) ) retval = 2;
+    }
+
+  for( int i = 0; i < 2; ++i )
+    if( filenames[i] != "-" && close( infd[i] ) != 0 )
+      {
+      show_file_error( filenames[i].c_str(), "Error closing input file", errno );
+      retval = 2;
+      }
+
+  return retval;
+  }