/* Zdiff - decompress and compare two files line by line
Copyright (C) 2010-2023 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
*/
#define _FILE_OFFSET_BITS 64
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#if defined __MSVCRT__ || defined __OS2__
#include
#endif
#include "arg_parser.h"
#include "rc.h"
#include "zutils.h"
namespace {
std::string fifonames[2]; // names of the two fifos passed to diff
#include "zcmpdiff.cc"
void show_help()
{
std::printf( "zdiff compares two files and, if they differ, writes to standard output the\n"
"differences line by line. A hyphen '-' used as a file argument means standard\n"
"input. If any file given is compressed, its decompressed content is used.\n"
"zdiff is a front end to the program diff and has the limitation that messages\n"
"from diff refer to temporary file names instead of those specified.\n"
"\n'zdiff -v -V' prints the version of the diff program used.\n"
"\nThe formats supported are bzip2, gzip, lzip, xz, and zstd.\n"
"\nUsage: zdiff [options] file1 [file2]\n"
"\nzdiff compares file1 to file2. The standard input is used only if file1 or\n"
"file2 refers to standard input. If file2 is omitted zdiff tries the\n"
"following:\n"
"\n - If file1 is compressed, compares its decompressed contents with\n"
" the corresponding uncompressed file (the name of file1 with the\n"
" extension removed).\n"
"\n - If file1 is uncompressed, compares it with the decompressed\n"
" contents of file1.[lz|bz2|gz|zst|xz] (the first one that is found).\n"
"\nExit status is 0 if inputs are identical, 1 if different, 2 if trouble.\n"
"Some options only work if the diff program used supports them.\n"
"\nOptions:\n"
" -h, --help display this help and exit\n"
" -V, --version output version information and exit\n"
" -a, --text treat all files as text\n"
" -b, --ignore-space-change ignore changes in the amount of white space\n"
" -B, --ignore-blank-lines ignore changes whose lines are all blank\n"
" -c use the context output format\n"
" -C, --context= same as -c but use lines of context\n"
" -d, --minimal try hard to find a smaller set of changes\n"
" -E, --ignore-tab-expansion ignore changes due to tab expansion\n"
" -i, --ignore-case ignore case differences in file contents\n"
" -M, --format= process only the formats in \n"
" -N, --no-rcfile don't read runtime configuration file\n"
" -O, --force-format=[][,] force one or both input formats\n"
" -p, --show-c-function show which C function each change is in\n"
" -q, --brief output only whether files differ\n"
" -s, --report-identical-files report when two files are identical\n"
" -t, --expand-tabs expand tabs to spaces in output\n"
" -T, --initial-tab make tabs line up by prepending a tab\n"
" -u use the unified output format\n"
" -U, --unified= same as -u but use lines of context\n"
" -v, --verbose verbose mode (for --version)\n"
" -w, --ignore-all-space ignore all white space\n"
" -W, --width= output at most print columns (for -y)\n"
" -y, --side-by-side output in two columns\n"
" --bz2= set compressor and options for bzip2 format\n"
" --gz= set compressor and options for gzip format\n"
" --lz= set compressor and options for lzip format\n"
" --xz= set compressor and options for xz format\n"
" --zst= set compressor and options for zstd format\n"
"\nValid formats for options '-M' and '-O' are 'bz2', 'gz', 'lz', 'xz', 'zst',\n"
"and 'un' for uncompressed.\n" );
show_help_addr();
}
const char * my_basename( const char * filename )
{
const char * c = filename;
while( *c ) { if( *c == '/' ) { filename = c + 1; } ++c; }
return filename;
}
extern "C" void remove_fifos()
{
if( fifonames[0].size() )
{ std::remove( fifonames[0].c_str() ); fifonames[0].clear(); }
if( fifonames[1].size() )
{ std::remove( fifonames[1].c_str() ); fifonames[1].clear(); }
}
/* Set fifonames[i] to "${TMPDIR}/[_-]"
and create FIFOs. The pid is coded in little endian order.
*/
bool set_fifonames( const std::string filenames[2] )
{
enum { num_codes = 36 };
const char * const codes = "0123456789abcdefghijklmnopqrstuvwxyz";
const char * p = std::getenv( "TMPDIR" );
if( p ) { fifonames[0] = p; fifonames[0] += '/'; }
else fifonames[0] = "/tmp/";
unsigned n = getpid();
do fifonames[0] += codes[n % num_codes]; while( n /= num_codes );
const unsigned pos = fifonames[0].size();
fifonames[0] += '_';
fifonames[1] = fifonames[0];
fifonames[0] += my_basename( filenames[0].c_str() );
fifonames[1] += my_basename( filenames[1].c_str() );
if( fifonames[1] == fifonames[0] ) fifonames[1][pos] = '-';
for( int i = 0; i < 2; ++i )
if( mkfifo( fifonames[i].c_str(), S_IRUSR | S_IWUSR ) != 0 )
{
if( errno == EEXIST )
{
std::remove( fifonames[i].c_str() );
if( mkfifo( fifonames[i].c_str(), S_IRUSR | S_IWUSR ) == 0 )
continue;
}
show_file_error( fifonames[i].c_str(), "Can't create FIFO", errno );
return false;
}
return true;
}
bool set_data_feeder( const std::string & filename,
const std::string & fifoname, const int infd,
Children & children, int format_index )
{
uint8_t magic_data[magic_buf_size];
int magic_size = 0;
if( format_index < 0 )
format_index = test_format( infd, magic_data, &magic_size );
children.compressor_name = get_compressor_name( format_index );
if( children.compressor_name ) // compressed
{
int fda[2]; // pipe from feeder to compressor
if( pipe( fda ) < 0 )
{ show_error( "Can't create pipe", errno ); return false; }
const pid_t pid = fork();
if( pid == 0 ) // child 1 (compressor feeder)
{
if( close( fda[0] ) != 0 ||
!feed_data( filename, infd, fda[1], magic_data, magic_size ) )
_exit( 2 );
if( close( fda[1] ) != 0 )
{ show_close_error(); _exit( 2 ); }
_exit( 0 );
}
if( pid < 0 ) // parent
{ show_fork_error( "data feeder" ); return false; }
const pid_t pid2 = fork();
if( pid2 == 0 ) // child 2 (compressor)
{
const int outfd = open( fifoname.c_str(), O_WRONLY | O_BINARY );
if( outfd < 0 )
{ show_file_error( fifoname.c_str(), "Can't open FIFO for writing",
errno ); _exit( 2 ); }
if( dup2( fda[0], STDIN_FILENO ) >= 0 &&
dup2( outfd, STDOUT_FILENO ) >= 0 &&
close( fda[0] ) == 0 && close( fda[1] ) == 0 &&
close( outfd ) == 0 )
{
const std::vector< std::string > & compressor_args =
get_compressor_args( format_index );
const int size = compressor_args.size();
const char ** const argv = new const char *[size+3];
argv[0] = children.compressor_name;
for( int i = 0; i < size; ++i )
argv[i+1] = compressor_args[i].c_str();
argv[size+1] = ( verbosity >= 0 ) ? "-d" : "-dq";
argv[size+2] = 0;
execvp( argv[0], (char **)argv );
}
show_exec_error( children.compressor_name );
_exit( 2 );
}
if( pid2 < 0 ) // parent
{ show_fork_error( children.compressor_name ); return false; }
close( fda[0] ); close( fda[1] );
children.pid[0] = pid;
children.pid[1] = pid2;
}
else // uncompressed
{
const pid_t pid = fork();
if( pid == 0 ) // child (feeder)
{
const int outfd = open( fifoname.c_str(), O_WRONLY | O_BINARY );
if( outfd < 0 )
{ show_file_error( fifoname.c_str(), "Can't open FIFO for writing",
errno ); _exit( 2 ); }
if( !feed_data( filename, infd, outfd, magic_data, magic_size ) )
_exit( 2 );
if( close( outfd ) != 0 )
{ show_close_error(); _exit( 2 ); }
_exit( 0 );
}
if( pid < 0 ) // parent
{ show_fork_error( "data feeder" ); return false; }
children.pid[0] = pid;
children.pid[1] = 0;
}
return true;
}
extern "C" void signal_handler( int sig )
{
remove_fifos();
std::signal( sig, SIG_DFL );
std::raise( sig );
}
void set_signals()
{
std::signal( SIGHUP, signal_handler );
std::signal( SIGINT, signal_handler );
std::signal( SIGTERM, signal_handler );
}
} // end namespace
int main( const int argc, const char * const argv[] )
{
enum { bz2_opt = 256, gz_opt, lz_opt, xz_opt, zst_opt };
std::vector< const char * > diff_args; // args to diff, maybe empty
int format_types[2] = { -1, -1 }; // < 0 means undefined
program_name = "zdiff";
invocation_name = ( argc > 0 ) ? argv[0] : program_name;
const Arg_parser::Option options[] =
{
{ 'a', "text", Arg_parser::no },
{ 'b', "ignore-space-change", Arg_parser::no },
{ 'B', "ignore-blank-lines", Arg_parser::no },
{ 'c', 0, Arg_parser::no },
{ 'C', "context", Arg_parser::yes },
{ 'd', "minimal", Arg_parser::no },
{ 'E', "ignore-tab-expansion", Arg_parser::no },
{ 'h', "help", Arg_parser::no },
{ 'i', "ignore-case", Arg_parser::no },
{ 'M', "format", Arg_parser::yes },
{ 'N', "no-rcfile", Arg_parser::no },
{ 'O', "force-format", Arg_parser::yes },
{ 'p', "show-c-function", Arg_parser::no },
{ 'q', "brief", Arg_parser::no },
{ 's', "report-identical-files", Arg_parser::no },
{ 't', "expand-tabs", Arg_parser::no },
{ 'T', "initial-tab", Arg_parser::no },
{ 'u', 0, Arg_parser::no },
{ 'U', "unified", Arg_parser::yes },
{ 'v', "verbose", Arg_parser::no },
{ 'V', "version", Arg_parser::no },
{ 'w', "ignore-all-space", Arg_parser::no },
{ 'W', "width", Arg_parser::yes },
{ 'y', "side-by-side", Arg_parser::no },
{ bz2_opt, "bz2", Arg_parser::yes },
{ gz_opt, "gz", Arg_parser::yes },
{ lz_opt, "lz", Arg_parser::yes },
{ xz_opt, "xz", Arg_parser::yes },
{ zst_opt, "zst", Arg_parser::yes },
{ 0, 0, Arg_parser::no } };
const Arg_parser parser( argc, argv, options );
if( parser.error().size() ) // bad option
{ show_error( parser.error().c_str(), 0, true ); return 2; }
maybe_process_config_file( parser );
int argind = 0;
for( ; argind < parser.arguments(); ++argind )
{
const int code = parser.code( argind );
if( !code ) break; // no more options
const char * const pn = parser.parsed_name( argind ).c_str();
const std::string & sarg = parser.argument( argind );
const char * const arg = sarg.c_str();
switch( code )
{
case 'a': diff_args.push_back( "-a" ); break;
case 'b': diff_args.push_back( "-b" ); break;
case 'B': diff_args.push_back( "-B" ); break;
case 'c': diff_args.push_back( "-c" ); break;
case 'C': diff_args.push_back( "-C" ); diff_args.push_back( arg ); break;
case 'd': diff_args.push_back( "-d" ); break;
case 'E': diff_args.push_back( "-E" ); break;
case 'h': show_help(); return 0;
case 'i': diff_args.push_back( "-i" ); break;
case 'M': parse_format_list( sarg, pn ); break;
case 'N': break;
case 'O': parse_format_types2( sarg, pn, format_types ); break;
case 'p': diff_args.push_back( "-p" ); break;
case 'q': diff_args.push_back( "-q" ); break;
case 's': diff_args.push_back( "-s" ); break;
case 't': diff_args.push_back( "-t" ); break;
case 'T': diff_args.push_back( "-T" ); break;
case 'u': diff_args.push_back( "-u" ); break;
case 'U': diff_args.push_back( "-U" ); diff_args.push_back( arg ); break;
case 'v': if( verbosity < 4 ) ++verbosity; break;
case 'V': show_version( DIFF " --version" ); return 0;
case 'w': diff_args.push_back( "-w" ); break;
case 'W': diff_args.push_back( "-W" ); diff_args.push_back( arg ); break;
case 'y': diff_args.push_back( "-y" ); break;
case bz2_opt: parse_compressor( sarg, pn, fmt_bz2 ); break;
case gz_opt: parse_compressor( sarg, pn, fmt_gz ); break;
case lz_opt: parse_compressor( sarg, pn, fmt_lz ); break;
case xz_opt: parse_compressor( sarg, pn, fmt_xz ); break;
case zst_opt: parse_compressor( sarg, pn, fmt_zst ); break;
default : internal_error( "uncaught option." );
}
} // end process options
#if defined __MSVCRT__ || defined __OS2__
setmode( STDIN_FILENO, O_BINARY );
setmode( STDOUT_FILENO, O_BINARY );
#endif
const int files = parser.arguments() - argind;
if( files < 1 ) { show_error( "No files given.", 0, true ); return 2; }
if( files > 2 ) { show_error( "Too many files.", 0, true ); return 2; }
std::string filenames[2]; // file names of the two input files
filenames[0] = parser.argument( argind );
if( files == 2 ) filenames[1] = parser.argument( argind + 1 );
int infd[2]; // file descriptors of the two files
infd[0] = ( filenames[0] == "-" ) ?
STDIN_FILENO : open_instream( filenames[0] );
if( infd[0] < 0 ) return 2;
if( files == 2 )
{
if( check_identical( filenames[0].c_str(), filenames[1].c_str() ) )
return 0;
infd[1] = ( filenames[1] == "-" ) ?
STDIN_FILENO : open_instream( filenames[1] );
if( infd[1] < 0 ) return 2;
}
else
{
if( filenames[0] == "-" )
{ show_error( "Missing operand after '-'.", 0, true ); return 2; }
if( format_types[0] >= 0 || format_types[1] >= 0 )
{ show_error( "Two files must be given when format is specified.", 0, true );
return 2; }
filenames[1] = filenames[0];
infd[1] = open_other_instream( filenames[1] );
if( infd[1] < 0 )
{
if( verbosity >= 0 )
std::fprintf( stderr, "%s: Can't find file to compare with '%s'.\n",
program_name, filenames[0].c_str() );
show_error( 0, 0, true ); return 2;
}
}
std::atexit( remove_fifos );
set_signals();
if( !set_fifonames( filenames ) ) return 2;
Children children[2];
if( !set_data_feeder( filenames[0], fifonames[0], infd[0], children[0],
format_types[0] ) ||
!set_data_feeder( filenames[1], fifonames[1], infd[1], children[1],
format_types[1] ) )
return 2;
const pid_t diff_pid = fork();
if( diff_pid == 0 ) // child (diff)
{
const char ** const argv = new const char *[diff_args.size()+5];
argv[0] = DIFF;
for( unsigned i = 0; i < diff_args.size(); ++i )
argv[i+1] = diff_args[i];
argv[diff_args.size()+1] = "--";
argv[diff_args.size()+2] = fifonames[0].c_str();
argv[diff_args.size()+3] = fifonames[1].c_str();
argv[diff_args.size()+4] = 0;
execvp( argv[0], (char **)argv );
show_exec_error( DIFF );
_exit( 2 );
}
if( diff_pid < 0 ) // parent
{ show_fork_error( DIFF ); return 2; }
int retval = wait_for_child( diff_pid, DIFF );
for( int i = 0; i < 2; ++i )
{
int infd; // fifo from decompressor
do infd = open( fifonames[i].c_str(), O_RDONLY | O_NONBLOCK | O_BINARY );
while( infd < 0 && errno == EINTR );
bool finished = false; // set to true if fifo is empty and at EOF
if( infd >= 0 )
{
uint8_t b;
if( readblock( infd, &b, 1 ) <= 0 && errno == 0 ) finished = true;
close( infd );
}
if( !good_status( children[i], finished ) ) retval = 2;
}
for( int i = 0; i < 2; ++i )
if( filenames[i] != "-" && close( infd[i] ) != 0 )
{
show_file_error( filenames[i].c_str(), "Error closing input file", errno );
retval = 2;
}
return retval;
}