/* Zdiff - decompress and compare two files line by line
Copyright (C) 2010 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
*/
#define _FILE_OFFSET_BITS 64
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#if defined(__MSVCRT__) || defined(__OS2__)
#include
#endif
#include "arg_parser.h"
#include "zutils.h"
#if CHAR_BIT != 8
#error "Environments where CHAR_BIT != 8 are not supported."
#endif
#ifndef LLONG_MAX
#define LLONG_MAX 0x7FFFFFFFFFFFFFFFLL
#endif
#ifndef LLONG_MIN
#define LLONG_MIN (-LLONG_MAX - 1LL)
#endif
#ifndef ULLONG_MAX
#define ULLONG_MAX 0xFFFFFFFFFFFFFFFFULL
#endif
namespace {
std::string fifonames[2]; // names of the two fifos passed to diff
#ifdef O_BINARY
const int o_binary = O_BINARY;
#else
const int o_binary = 0;
#endif
struct { const char * from; const char * to; } const known_extensions[] = {
{ ".bz2", "" },
{ ".tbz", ".tar" },
{ ".tbz2", ".tar" },
{ ".gz", "" },
{ ".tgz", ".tar" },
{ ".lz", "" },
{ ".tlz", ".tar" },
{ ".xz", "" },
{ ".txz", ".tar" },
{ 0, 0 } };
void show_help() throw()
{
std::printf( "Zdiff compares two files (\"-\" means standard input), and if they\n" );
std::printf( "differ, shows the differences line by line. If any given file is\n" );
std::printf( "compressed, its uncompressed content is used. Zdiff is a front end to\n" );
std::printf( "the diff program and has the limitation that messages from diff refer to\n" );
std::printf( "temporary filenames instead of those specified.\n" );
std::printf( "The supported compressors are bzip2, gzip, lzip and xz.\n" );
std::printf( "\nUsage: zdiff [options] file1 [file2]\n" );
std::printf( "\nCompares to . If is omitted zdiff tries the\n" );
std::printf( "following:\n" );
std::printf( "If is compressed, compares to the file with the\n" );
std::printf( "corresponding decompressed file name (removes the extension from\n" );
std::printf( ").\n" );
std::printf( "If is not compressed, compares to the uncompressed\n" );
std::printf( "contents of .[bz2|gz|lz|xz] (the first one that is found).\n" );
std::printf( "If no suitable file is found, compares to data read from\n" );
std::printf( "standard input.\n" );
std::printf( "\nExit status is 0 if inputs are identical, 1 if different, 2 if trouble.\n" );
std::printf( "\nOptions:\n" );
std::printf( " -h, --help display this help and exit\n" );
std::printf( " -V, --version output version information and exit\n" );
std::printf( " -a, --text treat all files as text\n" );
std::printf( " -b, --ignore-space-change ignore changes in the amount of white space\n" );
std::printf( " -B, --ignore-blank-lines ignore changes whose lines are all blank\n" );
std::printf( " -c use the context output format\n" );
std::printf( " -C, --context= same as -c but use lines of context\n" );
std::printf( " -d, --minimal try hard to find a smaller set of changes\n" );
std::printf( " -E, --ignore-tab-expansion ignore changes due to tab expansion\n" );
std::printf( " -i, --ignore-case ignore case differences in file contents\n" );
std::printf( " -p, --show-c-function show which C function each change is in\n" );
std::printf( " -q, --brief output only whether files differ\n" );
std::printf( " -s, --report-identical-files report when two files are identical\n" );
std::printf( " -t, --expand-tabs expand tabs to spaces in output\n" );
std::printf( " -T, --initial-tab make tabs line up by prepending a tab\n" );
std::printf( " -u use the unified output format\n" );
std::printf( " -U, --unified= same as -u but use lines of context\n" );
std::printf( " -w, --ignore-all-space ignore all white space\n" );
std::printf( "Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n" );
std::printf( "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n" );
show_help_addr();
}
int open_instream( const std::string & input_filename ) throw()
{
int infd = open( input_filename.c_str(), O_RDONLY | o_binary );
if( infd < 0 )
show_error2( "Can't open input file", input_filename.c_str() );
return infd;
}
int open_other_instream( std::string & name ) throw()
{
for( int i = 0; known_extensions[i].from; ++i )
{ // search uncompressed version
const std::string from( known_extensions[i].from );
if( name.size() > from.size() &&
name.compare( name.size() - from.size(), from.size(), from ) == 0 )
{
name.resize( name.size() - from.size() );
name += known_extensions[i].to;
return open( name.c_str(), O_RDONLY | o_binary );
}
}
for( int i = 0; simple_extensions[i]; ++i )
{ // search compressed version
const std::string s( name + simple_extensions[i] );
const int infd = open( s.c_str(), O_RDONLY | o_binary );
if( infd >= 0 ) { name = s; return infd; }
}
return -1;
}
bool check_identical( const char * const name1, const char * const name2 ) throw()
{
if( !std::strcmp( name1, name2 ) ) return true;
struct stat stat1, stat2;
if( stat( name1, &stat1 ) || stat( name2, &stat2 ) ) return false;
return ( stat1.st_ino == stat2.st_ino && stat1.st_dev == stat2.st_dev );
}
const char * my_basename( const char * filename ) throw()
{
const char * c = filename;
while( *c ) { if( *c == '/' ) { filename = c + 1; } ++c; }
return filename;
}
extern "C" void remove_fifos() throw()
{
if( fifonames[0].size() )
{ std::remove( fifonames[0].c_str() ); fifonames[0].clear(); }
if( fifonames[1].size() )
{ std::remove( fifonames[1].c_str() ); fifonames[1].clear(); }
}
// Set fifonames[i] to "${TMPDIR}/_"
// and create FIFOs.
bool set_fifonames( const std::string filenames[2] )
{
enum { num_codes = 36 };
const char * const codes = "0123456789abcdefghijklmnopqrstuvwxyz";
const char * p = std::getenv( "TMPDIR" );
const int pid = getpid();
for( int i = 0; i < 2; ++i )
{
if( p ) fifonames[i] = p; else fifonames[i] = "/tmp";
fifonames[i] += '/';
int n = ( 2 * pid ) + i;
const unsigned int pos = fifonames[i].size();
do { fifonames[i].insert( pos, 1, codes[n % num_codes] );
n /= num_codes; }
while( n );
fifonames[i] += '_';
fifonames[i] += my_basename( filenames[i].c_str() );
}
for( int i = 0; i < 2; ++i )
if( mkfifo( fifonames[i].c_str(), S_IRUSR | S_IWUSR ) != 0 )
{
if( errno == EEXIST )
{
std::remove( fifonames[i].c_str() );
if( mkfifo( fifonames[i].c_str(), S_IRUSR | S_IWUSR ) == 0 )
continue;
}
show_error2( "Can't create FIFO", fifonames[i].c_str() );
return false;
}
return true;
}
bool set_data_feeder( const std::string & fifoname, const int infd,
pid_t * const pidp )
{
std::string file_type;
const uint8_t * magic_data;
int magic_size;
const bool compressed =
test_format( infd, file_type, &magic_data, &magic_size );
if( compressed ) // compressed with `file_type'
{
int fda[2]; // pipe from feeder to decompressor
if( pipe( fda ) < 0 )
{ show_error( "Can't create pipe", errno ); return false; }
const pid_t pid = fork();
if( pid == 0 ) // child (decompressor feeder)
{
const pid_t pid2 = fork();
if( pid2 == 0 ) // grandchild (decompressor)
{
const int outfd = open( fifoname.c_str(), O_WRONLY | o_binary );
if( outfd < 0 )
{
if( verbosity >= 0 )
std::fprintf( stderr, "%s: Can't open FIFO `%s' for writing: %s.\n",
util_name, fifoname.c_str(), std::strerror( errno ) );
_exit( 2 );
}
if( dup2( fda[0], STDIN_FILENO ) >= 0 &&
dup2( outfd, STDOUT_FILENO ) >= 0 &&
close( fda[0] ) == 0 && close( fda[1] ) == 0 &&
close( outfd ) == 0 )
execlp( file_type.c_str(), file_type.c_str(), "-cdfq", (char *)0 );
show_exec_error( file_type.c_str() );
_exit( 2 );
}
if( pid2 < 0 )
{ show_fork_error( file_type.c_str() ); _exit( 2 ); }
if( close( fda[0] ) != 0 ||
!feed_data( infd, fda[1], magic_data, magic_size ) )
_exit( 2 );
if( close( fda[1] ) != 0 )
{ show_close_error( "data feeder" ); _exit( 2 ); }
_exit( wait_for_child( pid2, file_type.c_str() ) );
}
// parent
close( fda[0] ); close( fda[1] );
if( pid < 0 )
{ show_fork_error( "decompressor feeder" ); return false; }
*pidp = pid;
}
else // not compressed
{
const pid_t pid = fork();
if( pid == 0 ) // child (feeder)
{
const int outfd = open( fifoname.c_str(), O_WRONLY | o_binary );
if( outfd < 0 )
{
if( verbosity >= 0 )
std::fprintf( stderr, "%s: Can't open FIFO `%s' for writing: %s.\n",
util_name, fifoname.c_str(), std::strerror( errno ) );
_exit( 2 );
}
if( !feed_data( infd, outfd, magic_data, magic_size ) )
_exit( 2 );
if( close( outfd ) != 0 )
{ show_close_error( "data feeder" ); _exit( 2 ); }
_exit( 0 );
}
// parent
if( pid < 0 )
{ show_fork_error( "data feeder" ); return false; }
*pidp = pid;
}
return true;
}
extern "C" void signal_handler( int sig ) throw()
{
remove_fifos();
std::signal( sig, SIG_DFL );
std::raise( sig );
}
void set_signals() throw()
{
std::signal( SIGHUP, signal_handler );
std::signal( SIGINT, signal_handler );
std::signal( SIGTERM, signal_handler );
}
} // end namespace
int main( const int argc, const char * const argv[] )
{
std::vector< const char * > diff_args; // args to diff, maybe empty
invocation_name = argv[0];
util_name = "zdiff";
const Arg_parser::Option options[] =
{
{ 'a', "text", Arg_parser::no },
{ 'b', "ignore-space-change", Arg_parser::no },
{ 'B', "ignore-blank-lines", Arg_parser::no },
{ 'c', 0, Arg_parser::no },
{ 'C', "context", Arg_parser::yes },
{ 'd', "minimal", Arg_parser::no },
{ 'E', "ignore-tab-expansion", Arg_parser::no },
{ 'h', "help", Arg_parser::no },
{ 'i', "ignore-case", Arg_parser::no },
{ 'p', "show-c-function", Arg_parser::no },
{ 'q', "brief", Arg_parser::no },
{ 's', "report-identical-files", Arg_parser::no },
{ 't', "expand-tabs", Arg_parser::no },
{ 'T', "initial-tab", Arg_parser::no },
{ 'u', 0, Arg_parser::no },
{ 'U', "unified", Arg_parser::yes },
{ 'V', "version", Arg_parser::no },
{ 'w', "ignore-all-space", Arg_parser::no },
{ 0 , 0, Arg_parser::no } };
const Arg_parser parser( argc, argv, options );
if( parser.error().size() ) // bad option
{ show_error( parser.error().c_str(), 0, true ); return 2; }
int argind = 0;
for( ; argind < parser.arguments(); ++argind )
{
const int code = parser.code( argind );
if( !code ) break; // no more options
const char * const arg = parser.argument( argind ).c_str();
switch( code )
{
case 'a': diff_args.push_back( "-a" ); break;
case 'b': diff_args.push_back( "-b" ); break;
case 'B': diff_args.push_back( "-B" ); break;
case 'c': diff_args.push_back( "-c" ); break;
case 'C': diff_args.push_back( "-C" ); diff_args.push_back( arg ); break;
case 'd': diff_args.push_back( "-d" ); break;
case 'E': diff_args.push_back( "-E" ); break;
case 'h': show_help(); return 0;
case 'i': diff_args.push_back( "-i" ); break;
case 'p': diff_args.push_back( "-p" ); break;
case 'q': diff_args.push_back( "-q" ); break;
case 's': diff_args.push_back( "-s" ); break;
case 't': diff_args.push_back( "-t" ); break;
case 'T': diff_args.push_back( "-T" ); break;
case 'u': diff_args.push_back( "-u" ); break;
case 'U': diff_args.push_back( "-U" ); diff_args.push_back( arg ); break;
case 'V': show_version( "Zdiff" ); return 0;
case 'w': diff_args.push_back( "-w" ); break;
default : internal_error( "uncaught option" );
}
} // end process options
#if defined(__MSVCRT__) || defined(__OS2__)
_setmode( STDIN_FILENO, O_BINARY );
_setmode( STDOUT_FILENO, O_BINARY );
#endif
if( argind >= parser.arguments() )
{ show_error( "No files given.", 0, true ); return 2; }
if( argind + 2 < parser.arguments() )
{ show_error( "Too many files.", 0, true ); return 2; }
const int files = parser.arguments() - argind;
std::string filenames[2]; // file names of the two input files
filenames[0] = parser.argument( argind );
if( files == 2 ) filenames[1] = parser.argument( argind + 1 );
int infd[2]; // file descriptors of the two files
infd[0] = ( filenames[0] == "-" ) ?
STDIN_FILENO : open_instream( filenames[0] );
if( infd[0] < 0 ) return 2;
if( ( files == 1 && filenames[0] == "-" ) ||
( files == 2 && check_identical( filenames[0].c_str(),
filenames[1].c_str() ) ) )
return 0;
if( files == 2 )
{
infd[1] = ( filenames[1] == "-" ) ?
STDIN_FILENO : open_instream( filenames[1] );
if( infd[1] < 0 ) return 2;
}
else
{
filenames[1] = filenames[0];
infd[1] = open_other_instream( filenames[1] );
if( infd[1] < 0 ) { infd[1] = STDIN_FILENO; filenames[1] = "-"; }
}
std::atexit( remove_fifos );
set_signals();
if( !set_fifonames( filenames ) ) return 2;
const pid_t diff_pid = fork();
if( diff_pid == 0 ) // child (diff)
{
const char ** const argv = new const char *[diff_args.size()+5];
argv[0] = DIFF;
for( unsigned int i = 0; i < diff_args.size(); ++i )
argv[i+1] = diff_args[i];
argv[diff_args.size()+1] = "--";
argv[diff_args.size()+2] = fifonames[0].c_str();
argv[diff_args.size()+3] = fifonames[1].c_str();
argv[diff_args.size()+4] = 0;
execvp( argv[0], (char **)argv );
show_exec_error( DIFF );
_exit( 2 );
}
// parent
if( diff_pid < 0 )
{ show_fork_error( DIFF ); return 2; }
pid_t pid[2];
if( !set_data_feeder( fifonames[0], infd[0], &pid[0] ) ||
!set_data_feeder( fifonames[1], infd[1], &pid[1] ) )
return 2;
int retval = wait_for_child( diff_pid, DIFF );
if( retval != 0 )
{
if( pid[0] ) kill( pid[0], SIGTERM );
if( pid[1] ) kill( pid[1], SIGTERM );
}
else
if( ( pid[0] && wait_for_child( pid[0], "data feeder" ) != 0 ) ||
( pid[1] && wait_for_child( pid[1], "data feeder" ) != 0 ) )
retval = 2;
for( int i = 0; i < 2; ++i )
if( filenames[i] != "-" && close( infd[i] ) != 0 )
{
show_error2( "Can't close input file", filenames[i].c_str() );
retval = 2;
}
return retval;
}