diff options
Diffstat (limited to '')
-rw-r--r-- | zdiff.cc | 217 |
1 files changed, 81 insertions, 136 deletions
@@ -1,5 +1,5 @@ /* Zdiff - decompress and compare two files line by line - Copyright (C) 2010, 2011 Antonio Diaz Diaz. + Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -17,6 +17,7 @@ #define _FILE_OFFSET_BITS 64 +#include <algorithm> #include <cctype> #include <cerrno> #include <climits> @@ -30,9 +31,6 @@ #include <stdint.h> #include <unistd.h> #include <sys/stat.h> -#if defined(__MSVCRT__) || defined(__OS2__) -#include <io.h> -#endif #include "arg_parser.h" #include "zutils.h" @@ -41,126 +39,60 @@ #error "Environments where CHAR_BIT != 8 are not supported." #endif -#ifndef LLONG_MAX -#define LLONG_MAX 0x7FFFFFFFFFFFFFFFLL -#endif -#ifndef LLONG_MIN -#define LLONG_MIN (-LLONG_MAX - 1LL) -#endif -#ifndef ULLONG_MAX -#define ULLONG_MAX 0xFFFFFFFFFFFFFFFFULL -#endif - namespace { std::string fifonames[2]; // names of the two fifos passed to diff -#ifdef O_BINARY -const int o_binary = O_BINARY; -#else -const int o_binary = 0; -#endif +#include "zcmpdiff.cc" -struct { const char * from; const char * to; } const known_extensions[] = { - { ".bz2", "" }, - { ".tbz", ".tar" }, - { ".tbz2", ".tar" }, - { ".gz", "" }, - { ".tgz", ".tar" }, - { ".lz", "" }, - { ".tlz", ".tar" }, - { ".xz", "" }, - { ".txz", ".tar" }, - { 0, 0 } }; - -void show_help() throw() +void show_help() { - std::printf( "Zdiff compares two files (\"-\" means standard input), and if they\n" ); - std::printf( "differ, shows the differences line by line. If any given file is\n" ); - std::printf( "compressed, its uncompressed content is used. Zdiff is a front end to\n" ); - std::printf( "the diff program and has the limitation that messages from diff refer to\n" ); - std::printf( "temporary filenames instead of those specified.\n" ); - std::printf( "The supported compressors are bzip2, gzip, lzip and xz.\n" ); - std::printf( "\nUsage: zdiff [options] file1 [file2]\n" ); - std::printf( "\nCompares <file1> to <file2>. If <file2> is omitted zdiff tries the\n" ); - std::printf( "following:\n" ); - std::printf( "If <file1> is compressed, compares <file1> to the file with the\n" ); - std::printf( "corresponding decompressed file name (removes the extension from\n" ); - std::printf( "<file1>).\n" ); - std::printf( "If <file1> is not compressed, compares <file1> to the uncompressed\n" ); - std::printf( "contents of <file1>.[bz2|gz|lz|xz] (the first one that is found).\n" ); - std::printf( "If no suitable file is found, compares <file1> to data read from\n" ); - std::printf( "standard input.\n" ); - std::printf( "\nExit status is 0 if inputs are identical, 1 if different, 2 if trouble.\n" ); - std::printf( "\nOptions:\n" ); - std::printf( " -h, --help display this help and exit\n" ); - std::printf( " -V, --version output version information and exit\n" ); - std::printf( " -a, --text treat all files as text\n" ); - std::printf( " -b, --ignore-space-change ignore changes in the amount of white space\n" ); - std::printf( " -B, --ignore-blank-lines ignore changes whose lines are all blank\n" ); - std::printf( " -c use the context output format\n" ); - std::printf( " -C, --context=<n> same as -c but use <n> lines of context\n" ); - std::printf( " -d, --minimal try hard to find a smaller set of changes\n" ); - std::printf( " -E, --ignore-tab-expansion ignore changes due to tab expansion\n" ); - std::printf( " -i, --ignore-case ignore case differences in file contents\n" ); - std::printf( " -p, --show-c-function show which C function each change is in\n" ); - std::printf( " -q, --brief output only whether files differ\n" ); - std::printf( " -s, --report-identical-files report when two files are identical\n" ); - std::printf( " -t, --expand-tabs expand tabs to spaces in output\n" ); - std::printf( " -T, --initial-tab make tabs line up by prepending a tab\n" ); - std::printf( " -u use the unified output format\n" ); - std::printf( " -U, --unified=<n> same as -u but use <n> lines of context\n" ); - std::printf( " -w, --ignore-all-space ignore all white space\n" ); - std::printf( "Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n" ); - std::printf( "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n" ); + std::printf( "Zdiff compares two files (\"-\" means standard input), and if they\n" + "differ, shows the differences line by line. If any given file is\n" + "compressed, its uncompressed content is used. Zdiff is a front end to\n" + "the diff program and has the limitation that messages from diff refer to\n" + "temporary filenames instead of those specified.\n" + "\nThe supported formats are bzip2, gzip, lzip and xz.\n" + "\nUsage: zdiff [options] file1 [file2]\n" + "\nCompares <file1> to <file2>. If <file2> is omitted zdiff tries the\n" + "following:\n" + "If <file1> is compressed, compares <file1> to the file with the\n" + "corresponding decompressed file name (removes the extension from\n" + "<file1>).\n" + "If <file1> is not compressed, compares <file1> to the uncompressed\n" + "contents of <file1>.[bz2|gz|lz|xz] (the first one that is found).\n" + "If no suitable file is found, compares <file1> to data read from\n" + "standard input.\n" + "\nExit status is 0 if inputs are identical, 1 if different, 2 if trouble.\n" + "\nOptions:\n" + " -h, --help display this help and exit\n" + " -V, --version output version information and exit\n" + " -a, --text treat all files as text\n" + " -b, --ignore-space-change ignore changes in the amount of white space\n" + " -B, --ignore-blank-lines ignore changes whose lines are all blank\n" + " -c use the context output format\n" + " -C, --context=<n> same as -c but use <n> lines of context\n" + " -d, --minimal try hard to find a smaller set of changes\n" + " -E, --ignore-tab-expansion ignore changes due to tab expansion\n" + " --format=[<fmt1>][,<fmt2>] force given formats (bz2, gz, lz, xz)\n" + " -i, --ignore-case ignore case differences in file contents\n" + " -p, --show-c-function show which C function each change is in\n" + " -q, --brief output only whether files differ\n" + " -s, --report-identical-files report when two files are identical\n" + " -t, --expand-tabs expand tabs to spaces in output\n" + " -T, --initial-tab make tabs line up by prepending a tab\n" + " -u use the unified output format\n" + " -U, --unified=<n> same as -u but use <n> lines of context\n" + " -w, --ignore-all-space ignore all white space\n" + "Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n" + "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n" ); show_help_addr(); } -int open_instream( const std::string & input_filename ) throw() - { - int infd = open( input_filename.c_str(), O_RDONLY | o_binary ); - if( infd < 0 ) - show_error2( "Can't open input file", input_filename.c_str() ); - return infd; - } - - -int open_other_instream( std::string & name ) throw() - { - for( int i = 0; known_extensions[i].from; ++i ) - { // search uncompressed version - const std::string from( known_extensions[i].from ); - if( name.size() > from.size() && - name.compare( name.size() - from.size(), from.size(), from ) == 0 ) - { - name.resize( name.size() - from.size() ); - name += known_extensions[i].to; - return open( name.c_str(), O_RDONLY | o_binary ); - } - } - for( int i = 0; simple_extensions[i]; ++i ) - { // search compressed version - const std::string s( name + simple_extensions[i] ); - const int infd = open( s.c_str(), O_RDONLY | o_binary ); - if( infd >= 0 ) { name = s; return infd; } - } - return -1; - } - - -bool check_identical( const char * const name1, const char * const name2 ) throw() - { - if( !std::strcmp( name1, name2 ) ) return true; - struct stat stat1, stat2; - if( stat( name1, &stat1 ) || stat( name2, &stat2 ) ) return false; - return ( stat1.st_ino == stat2.st_ino && stat1.st_dev == stat2.st_dev ); - } - - -const char * my_basename( const char * filename ) throw() +const char * my_basename( const char * filename ) { const char * c = filename; while( *c ) { if( *c == '/' ) { filename = c + 1; } ++c; } @@ -168,7 +100,7 @@ const char * my_basename( const char * filename ) throw() } -extern "C" void remove_fifos() throw() +extern "C" void remove_fifos() { if( fifonames[0].size() ) { std::remove( fifonames[0].c_str() ); fifonames[0].clear(); } @@ -191,7 +123,7 @@ bool set_fifonames( const std::string filenames[2] ) if( p ) fifonames[i] = p; else fifonames[i] = "/tmp"; fifonames[i] += '/'; int n = ( 2 * pid ) + i; - const unsigned int pos = fifonames[i].size(); + const unsigned pos = fifonames[i].size(); do { fifonames[i].insert( pos, 1, codes[n % num_codes] ); n /= num_codes; } while( n ); @@ -216,15 +148,15 @@ bool set_fifonames( const std::string filenames[2] ) bool set_data_feeder( const std::string & fifoname, const int infd, - pid_t * const pidp ) + pid_t * const pidp, const int format_type ) { - std::string file_type; - const uint8_t * magic_data; - int magic_size; - const bool compressed = - test_format( infd, file_type, &magic_data, &magic_size ); + const uint8_t * magic_data = 0; + int magic_size = 0; + const char * const decompressor_name = ( format_type >= 0 ) ? + decompressor_names[format_type] : + test_format( infd, &magic_data, &magic_size ); - if( compressed ) // compressed with `file_type' + if( decompressor_name ) // compressed { int fda[2]; // pipe from feeder to decompressor if( pipe( fda ) < 0 ) @@ -239,7 +171,7 @@ bool set_data_feeder( const std::string & fifoname, const int infd, if( outfd < 0 ) { if( verbosity >= 0 ) - std::fprintf( stderr, "%s: Can't open FIFO `%s' for writing: %s.\n", + std::fprintf( stderr, "%s: Can't open FIFO '%s' for writing: %s.\n", util_name, fifoname.c_str(), std::strerror( errno ) ); _exit( 2 ); } @@ -247,19 +179,20 @@ bool set_data_feeder( const std::string & fifoname, const int infd, dup2( outfd, STDOUT_FILENO ) >= 0 && close( fda[0] ) == 0 && close( fda[1] ) == 0 && close( outfd ) == 0 ) - execlp( file_type.c_str(), file_type.c_str(), "-cdfq", (char *)0 ); - show_exec_error( file_type.c_str() ); + execlp( decompressor_name, decompressor_name, + (verbosity >= 0) ? "-d" : "-dq", (char *)0 ); + show_exec_error( decompressor_name ); _exit( 2 ); } if( pid2 < 0 ) - { show_fork_error( file_type.c_str() ); _exit( 2 ); } + { show_fork_error( decompressor_name ); _exit( 2 ); } if( close( fda[0] ) != 0 || !feed_data( infd, fda[1], magic_data, magic_size ) ) _exit( 2 ); if( close( fda[1] ) != 0 ) { show_close_error( "data feeder" ); _exit( 2 ); } - _exit( wait_for_child( pid2, file_type.c_str() ) ); + _exit( wait_for_child( pid2, decompressor_name ) ); } // parent close( fda[0] ); close( fda[1] ); @@ -276,7 +209,7 @@ bool set_data_feeder( const std::string & fifoname, const int infd, if( outfd < 0 ) { if( verbosity >= 0 ) - std::fprintf( stderr, "%s: Can't open FIFO `%s' for writing: %s.\n", + std::fprintf( stderr, "%s: Can't open FIFO '%s' for writing: %s.\n", util_name, fifoname.c_str(), std::strerror( errno ) ); _exit( 2 ); } @@ -295,7 +228,7 @@ bool set_data_feeder( const std::string & fifoname, const int infd, } -extern "C" void signal_handler( int sig ) throw() +extern "C" void signal_handler( int sig ) { remove_fifos(); std::signal( sig, SIG_DFL ); @@ -303,7 +236,7 @@ extern "C" void signal_handler( int sig ) throw() } -void set_signals() throw() +void set_signals() { std::signal( SIGHUP, signal_handler ); std::signal( SIGINT, signal_handler ); @@ -315,7 +248,9 @@ void set_signals() throw() int main( const int argc, const char * const argv[] ) { + enum { format_opt = 256 }; std::vector< const char * > diff_args; // args to diff, maybe empty + int format_types[2] = { -1, -1 }; invocation_name = argv[0]; util_name = "zdiff"; @@ -339,6 +274,7 @@ int main( const int argc, const char * const argv[] ) { 'U', "unified", Arg_parser::yes }, { 'V', "version", Arg_parser::no }, { 'w', "ignore-all-space", Arg_parser::no }, + { format_opt, "format", Arg_parser::yes }, { 0 , 0, Arg_parser::no } }; const Arg_parser parser( argc, argv, options ); @@ -371,13 +307,14 @@ int main( const int argc, const char * const argv[] ) case 'U': diff_args.push_back( "-U" ); diff_args.push_back( arg ); break; case 'V': show_version( "Zdiff" ); return 0; case 'w': diff_args.push_back( "-w" ); break; + case format_opt: get_format_types( arg, format_types ); break; default : internal_error( "uncaught option" ); } } // end process options #if defined(__MSVCRT__) || defined(__OS2__) - _setmode( STDIN_FILENO, O_BINARY ); - _setmode( STDOUT_FILENO, O_BINARY ); + _fsetmode( stdin, "b" ); + _fsetmode( stdout, "b" ); #endif if( argind >= parser.arguments() ) @@ -408,6 +345,9 @@ int main( const int argc, const char * const argv[] ) } else { + if( format_types[0] >= 0 || format_types[1] >= 0 ) + { show_error( "Two files must be given when format is specified.", 0, true ); + return 2; } filenames[1] = filenames[0]; infd[1] = open_other_instream( filenames[1] ); if( infd[1] < 0 ) { infd[1] = STDIN_FILENO; filenames[1] = "-"; } @@ -422,7 +362,7 @@ int main( const int argc, const char * const argv[] ) { const char ** const argv = new const char *[diff_args.size()+5]; argv[0] = DIFF; - for( unsigned int i = 0; i < diff_args.size(); ++i ) + for( unsigned i = 0; i < diff_args.size(); ++i ) argv[i+1] = diff_args[i]; argv[diff_args.size()+1] = "--"; argv[diff_args.size()+2] = fifonames[0].c_str(); @@ -437,16 +377,21 @@ int main( const int argc, const char * const argv[] ) { show_fork_error( DIFF ); return 2; } pid_t pid[2]; - if( !set_data_feeder( fifonames[0], infd[0], &pid[0] ) || - !set_data_feeder( fifonames[1], infd[1], &pid[1] ) ) + if( !set_data_feeder( fifonames[0], infd[0], &pid[0], format_types[0] ) || + !set_data_feeder( fifonames[1], infd[1], &pid[1], format_types[1] ) ) return 2; int retval = wait_for_child( diff_pid, DIFF ); if( retval != 0 ) { - if( pid[0] ) kill( pid[0], SIGTERM ); - if( pid[1] ) kill( pid[1], SIGTERM ); + for( int i = 0; i < 2; ++i ) + if( pid[i] ) + { + const int tmp = child_status( pid[i], "data feeder" ); + if( tmp < 0 ) kill( pid[i], SIGTERM ); // child not terminated + else if( tmp != 0 ) retval = 2; // child status != 0 + } } else if( ( pid[0] && wait_for_child( pid[0], "data feeder" ) != 0 ) || |