diff options
Diffstat (limited to 'zdiff.cc')
-rw-r--r-- | zdiff.cc | 476 |
1 files changed, 476 insertions, 0 deletions
diff --git a/zdiff.cc b/zdiff.cc new file mode 100644 index 0000000..d01f492 --- /dev/null +++ b/zdiff.cc @@ -0,0 +1,476 @@ +/* Zdiff - decompress and compare two files line by line + Copyright (C) 2010 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#define _FILE_OFFSET_BITS 64 + +#include <cctype> +#include <cerrno> +#include <climits> +#include <csignal> +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <string> +#include <vector> +#include <fcntl.h> +#include <stdint.h> +#include <unistd.h> +#include <sys/stat.h> +#if defined(__MSVCRT__) || defined(__OS2__) +#include <io.h> +#endif + +#include "arg_parser.h" +#include "zutils.h" + +#if CHAR_BIT != 8 +#error "Environments where CHAR_BIT != 8 are not supported." +#endif + +#ifndef LLONG_MAX +#define LLONG_MAX 0x7FFFFFFFFFFFFFFFLL +#endif +#ifndef LLONG_MIN +#define LLONG_MIN (-LLONG_MAX - 1LL) +#endif +#ifndef ULLONG_MAX +#define ULLONG_MAX 0xFFFFFFFFFFFFFFFFULL +#endif + + +namespace { + +std::string fifonames[2]; // names of the two fifos passed to diff + +#ifdef O_BINARY +const int o_binary = O_BINARY; +#else +const int o_binary = 0; +#endif + +struct { const char * from; const char * to; } const known_extensions[] = { + { ".bz2", "" }, + { ".tbz", ".tar" }, + { ".tbz2", ".tar" }, + { ".gz", "" }, + { ".tgz", ".tar" }, + { ".lz", "" }, + { ".tlz", ".tar" }, + { ".xz", "" }, + { ".txz", ".tar" }, + { 0, 0 } }; + + +void show_help() throw() + { + std::printf( "Zdiff compares two files (\"-\" means standard input), and if they\n" ); + std::printf( "differ, shows the differences line by line. If any given file is\n" ); + std::printf( "compressed, its uncompressed content is used. Zdiff is a front end to\n" ); + std::printf( "the diff program and has the limitation that messages from diff refer to\n" ); + std::printf( "temporary filenames instead of those specified.\n" ); + std::printf( "The supported compressors are bzip2, gzip, lzip and xz.\n" ); + std::printf( "\nUsage: zdiff [options] file1 [file2]\n" ); + std::printf( "\nCompares <file1> to <file2>. If <file2> is omitted zdiff tries the\n" ); + std::printf( "following:\n" ); + std::printf( "If <file1> is compressed, compares <file1> to the file with the\n" ); + std::printf( "corresponding decompressed file name (removes the extension from\n" ); + std::printf( "<file1>).\n" ); + std::printf( "If <file1> is not compressed, compares <file1> to the uncompressed\n" ); + std::printf( "contents of <file1>.[bz2|gz|lz|xz] (the first one that is found).\n" ); + std::printf( "If no suitable file is found, compares <file1> to data read from\n" ); + std::printf( "standard input.\n" ); + std::printf( "\nExit status is 0 if inputs are identical, 1 if different, 2 if trouble.\n" ); + std::printf( "\nOptions:\n" ); + std::printf( " -h, --help display this help and exit\n" ); + std::printf( " -V, --version output version information and exit\n" ); + std::printf( " -a, --text treat all files as text\n" ); + std::printf( " -b, --ignore-space-change ignore changes in the amount of white space\n" ); + std::printf( " -B, --ignore-blank-lines ignore changes whose lines are all blank\n" ); + std::printf( " -c use the context output format\n" ); + std::printf( " -C, --context=<n> same as -c but use <n> lines of context\n" ); + std::printf( " -d, --minimal try hard to find a smaller set of changes\n" ); + std::printf( " -E, --ignore-tab-expansion ignore changes due to tab expansion\n" ); + std::printf( " -i, --ignore-case ignore case differences in file contents\n" ); + std::printf( " -p, --show-c-function show which C function each change is in\n" ); + std::printf( " -q, --brief output only whether files differ\n" ); + std::printf( " -s, --report-identical-files report when two files are identical\n" ); + std::printf( " -t, --expand-tabs expand tabs to spaces in output\n" ); + std::printf( " -T, --initial-tab make tabs line up by prepending a tab\n" ); + std::printf( " -u use the unified output format\n" ); + std::printf( " -U, --unified=<n> same as -u but use <n> lines of context\n" ); + std::printf( " -w, --ignore-all-space ignore all white space\n" ); + std::printf( "Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n" ); + std::printf( "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n" ); + show_help_addr(); + } + + +int open_instream( const std::string & input_filename ) throw() + { + int infd = open( input_filename.c_str(), O_RDONLY | o_binary ); + if( infd < 0 && verbosity >= 0 ) + std::fprintf( stderr, "%s: Can't open input file `%s': %s.\n", + util_name, input_filename.c_str(), std::strerror( errno ) ); + return infd; + } + + +int open_other_instream( std::string & name ) throw() + { + for( int i = 0; known_extensions[i].from; ++i ) + { // search uncompressed version + const std::string from( known_extensions[i].from ); + if( name.size() > from.size() && + name.compare( name.size() - from.size(), from.size(), from ) == 0 ) + { + name.resize( name.size() - from.size() ); + name += known_extensions[i].to; + return open( name.c_str(), O_RDONLY | o_binary ); + } + } + for( int i = 0; simple_extensions[i]; ++i ) + { // search compressed version + const std::string s( name + simple_extensions[i] ); + const int infd = open( s.c_str(), O_RDONLY | o_binary ); + if( infd >= 0 ) { name = s; return infd; } + } + return -1; + } + + +bool check_identical( const char * const name1, const char * const name2 ) throw() + { + if( !std::strcmp( name1, name2 ) ) return true; + struct stat stat1, stat2; + if( stat( name1, &stat1 ) || stat( name2, &stat2 ) ) return false; + return ( stat1.st_ino == stat2.st_ino && stat1.st_dev == stat2.st_dev ); + } + + +const char * my_basename( const char * filename ) throw() + { + const char * c = filename; + while( *c ) { if( *c == '/' ) { filename = c + 1; } ++c; } + return filename; + } + + +void remove_fifos() throw() + { + if( fifonames[0].size() ) + { std::remove( fifonames[0].c_str() ); fifonames[0].clear(); } + if( fifonames[1].size() ) + { std::remove( fifonames[1].c_str() ); fifonames[1].clear(); } + } + + +// Set fifonames[i] to "${TMPDIR}/<coded_pid><i>_<basename(filenames[i])>" +// and create FIFOs. +bool set_fifonames( const std::string filenames[2] ) + { + enum { num_codes = 36 }; + const char * const codes = "0123456789abcdefghijklmnopqrstuvwxyz"; + const char * p = std::getenv( "TMPDIR" ); + const int pid = getpid(); + + for( int i = 0; i < 2; ++i ) + { + if( p ) fifonames[i] = p; else fifonames[i] = "/tmp"; + fifonames[i] += '/'; + int n = ( 2 * pid ) + i; + const unsigned int pos = fifonames[i].size(); + do { fifonames[i].insert( pos, 1, codes[n % num_codes] ); + n /= num_codes; } + while( n ); + fifonames[i] += '_'; + fifonames[i] += my_basename( filenames[i].c_str() ); + } + + for( int i = 0; i < 2; ++i ) + if( mkfifo( fifonames[i].c_str(), S_IRUSR | S_IWUSR ) != 0 ) + { + if( errno == EEXIST ) + { + std::remove( fifonames[i].c_str() ); + if( mkfifo( fifonames[i].c_str(), S_IRUSR | S_IWUSR ) == 0 ) + continue; + } + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Can't create FIFO `%s': %s.\n", + util_name, fifonames[i].c_str(), std::strerror( errno ) ); + return false; + } + return true; + } + + +bool set_data_feeder( const std::string & fifoname, const int infd, + pid_t * const pidp ) + { + std::string file_type; + const uint8_t * magic_data; + int magic_size; + const bool compressed = + test_format( infd, file_type, &magic_data, &magic_size ); + + if( compressed ) // compressed with `file_type' + { + int fda[2]; // pipe from feeder to decompressor + if( pipe( fda ) < 0 ) + { show_error( "Can't create pipe", errno ); return false; } + const pid_t pid = fork(); + if( pid == 0 ) // child (decompressor feeder) + { + const pid_t pid2 = fork(); + if( pid2 == 0 ) // grandchild (decompressor) + { + const int outfd = open( fifoname.c_str(), O_WRONLY | o_binary ); + if( outfd < 0 ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Can't open FIFO `%s' for writing: %s.\n", + util_name, fifoname.c_str(), std::strerror( errno ) ); + _exit( 2 ); + } + if( dup2( fda[0], STDIN_FILENO ) >= 0 && + dup2( outfd, STDOUT_FILENO ) >= 0 && + close( fda[0] ) == 0 && close( fda[1] ) == 0 && + close( outfd ) == 0 ) + execlp( file_type.c_str(), file_type.c_str(), "-cdfq", (char *)0 ); + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Can't exec `%s': %s.\n", + util_name, file_type.c_str(), std::strerror( errno ) ); + _exit( 2 ); + } + if( pid2 < 0 ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Can't fork `%s': %s.\n", + util_name, file_type.c_str(), std::strerror( errno ) ); + _exit( 2 ); + } + + if( close( fda[0] ) != 0 || + !feed_data( infd, fda[1], magic_data, magic_size ) ) + _exit( 2 ); + if( close( fda[1] ) != 0 ) + { show_error( "Can't close output of feeder", errno ); _exit( 2 ); } + _exit( wait_for_child( pid2, file_type.c_str() ) ); + } + // parent + close( fda[0] ); close( fda[1] ); + if( pid < 0 ) + { show_error( "Can't fork decompressor feeder", errno ); return false; } + *pidp = pid; + } + else // not compressed + { + const pid_t pid = fork(); + if( pid == 0 ) // child (feeder) + { + const int outfd = open( fifoname.c_str(), O_WRONLY | o_binary ); + if( outfd < 0 ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Can't open FIFO `%s' for writing: %s.\n", + util_name, fifoname.c_str(), std::strerror( errno ) ); + _exit( 2 ); + } + if( !feed_data( infd, outfd, magic_data, magic_size ) ) + _exit( 2 ); + if( close( outfd ) != 0 ) + { show_error( "Can't close output of feeder", errno ); _exit( 2 ); } + _exit( 0 ); + } + // parent + if( pid < 0 ) + { show_error( "Can't fork data feeder", errno ); return false; } + *pidp = pid; + } + return true; + } + + +extern "C" void signal_handler( int sig ) throw() + { + remove_fifos(); + std::signal( sig, SIG_DFL ); + std::raise( sig ); + } + + +void set_signals() throw() + { + std::signal( SIGHUP, signal_handler ); + std::signal( SIGINT, signal_handler ); + std::signal( SIGTERM, signal_handler ); + } + +} // end namespace + + +int main( const int argc, const char * const argv[] ) + { + std::vector< const char * > diff_args; // args to diff, maybe empty + invocation_name = argv[0]; + util_name = "zdiff"; + + const Arg_parser::Option options[] = + { + { 'a', "text", Arg_parser::no }, + { 'b', "ignore-space-change", Arg_parser::no }, + { 'B', "ignore-blank-lines", Arg_parser::no }, + { 'c', 0, Arg_parser::no }, + { 'C', "context", Arg_parser::yes }, + { 'd', "minimal", Arg_parser::no }, + { 'E', "ignore-tab-expansion", Arg_parser::no }, + { 'h', "help", Arg_parser::no }, + { 'i', "ignore-case", Arg_parser::no }, + { 'p', "show-c-function", Arg_parser::no }, + { 'q', "brief", Arg_parser::no }, + { 's', "report-identical-files", Arg_parser::no }, + { 't', "expand-tabs", Arg_parser::no }, + { 'T', "initial-tab", Arg_parser::no }, + { 'u', 0, Arg_parser::no }, + { 'U', "unified", Arg_parser::yes }, + { 'V', "version", Arg_parser::no }, + { 'w', "ignore-all-space", Arg_parser::no }, + { 0 , 0, Arg_parser::no } }; + + const Arg_parser parser( argc, argv, options ); + if( parser.error().size() ) // bad option + { show_error( parser.error().c_str(), 0, true ); return 2; } + + int argind = 0; + for( ; argind < parser.arguments(); ++argind ) + { + const int code = parser.code( argind ); + if( !code ) break; // no more options + const char * const arg = parser.argument( argind ).c_str(); + switch( code ) + { + case 'a': diff_args.push_back( "-a" ); break; + case 'b': diff_args.push_back( "-b" ); break; + case 'B': diff_args.push_back( "-B" ); break; + case 'c': diff_args.push_back( "-c" ); break; + case 'C': diff_args.push_back( "-C" ); diff_args.push_back( arg ); break; + case 'd': diff_args.push_back( "-d" ); break; + case 'E': diff_args.push_back( "-E" ); break; + case 'h': show_help(); return 0; + case 'i': diff_args.push_back( "-i" ); break; + case 'p': diff_args.push_back( "-p" ); break; + case 'q': diff_args.push_back( "-q" ); break; + case 's': diff_args.push_back( "-s" ); break; + case 't': diff_args.push_back( "-t" ); break; + case 'T': diff_args.push_back( "-T" ); break; + case 'u': diff_args.push_back( "-u" ); break; + case 'U': diff_args.push_back( "-U" ); diff_args.push_back( arg ); break; + case 'V': show_version( "Zdiff" ); return 0; + case 'w': diff_args.push_back( "-w" ); break; + default : internal_error( "uncaught option" ); + } + } // end process options + +#if defined(__MSVCRT__) || defined(__OS2__) + _setmode( STDIN_FILENO, O_BINARY ); + _setmode( STDOUT_FILENO, O_BINARY ); +#endif + + if( argind >= parser.arguments() ) + { show_error( "No files given.", 0, true ); return 2; } + if( argind + 2 < parser.arguments() ) + { show_error( "Too many files.", 0, true ); return 2; } + + const int files = parser.arguments() - argind; + std::string filenames[2]; // file names of the two input files + filenames[0] = parser.argument( argind ); + if( files == 2 ) filenames[1] = parser.argument( argind + 1 ); + + int infd[2]; // file descriptors of the two files + infd[0] = ( filenames[0] == "-" ) ? + STDIN_FILENO : open_instream( filenames[0] ); + if( infd[0] < 0 ) return 2; + + if( ( files == 1 && filenames[0] == "-" ) || + ( files == 2 && check_identical( filenames[0].c_str(), + filenames[1].c_str() ) ) ) + return 0; + + if( files == 2 ) + { + infd[1] = ( filenames[1] == "-" ) ? + STDIN_FILENO : open_instream( filenames[1] ); + if( infd[1] < 0 ) return 2; + } + else + { + filenames[1] = filenames[0]; + infd[1] = open_other_instream( filenames[1] ); + if( infd[1] < 0 ) { infd[1] = STDIN_FILENO; filenames[1] = "-"; } + } + + std::atexit( remove_fifos ); + set_signals(); + if( !set_fifonames( filenames ) ) return 2; + + const pid_t diff_pid = fork(); + if( diff_pid == 0 ) // child (diff) + { + const char ** const argv = new const char *[diff_args.size()+5]; + argv[0] = "diff"; + for( unsigned int i = 0; i < diff_args.size(); ++i ) + argv[i+1] = diff_args[i]; + argv[diff_args.size()+1] = "--"; + argv[diff_args.size()+2] = fifonames[0].c_str(); + argv[diff_args.size()+3] = fifonames[1].c_str(); + argv[diff_args.size()+4] = 0; + execvp( argv[0], (char **)argv ); + show_error( "Can't exec `diff'." ); + _exit( 2 ); + } + // parent + if( diff_pid < 0 ) + { show_error( "Can't fork `diff'", errno ); return 2; } + + pid_t pid[2]; + if( !set_data_feeder( fifonames[0], infd[0], &pid[0] ) || + !set_data_feeder( fifonames[1], infd[1], &pid[1] ) ) + return 2; + + int retval = wait_for_child( diff_pid, "diff" ); + + if( retval != 0 ) + { + if( pid[0] ) kill( pid[0], SIGTERM ); + if( pid[1] ) kill( pid[1], SIGTERM ); + } + else + if( ( pid[0] && wait_for_child( pid[0], "data feeder" ) != 0 ) || + ( pid[1] && wait_for_child( pid[1], "data feeder" ) != 0 ) ) + retval = 2; + + for( int i = 0; i < 2; ++i ) + if( filenames[i] != "-" && close( infd[i] ) != 0 ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Can't close input file `%s': %s.\n", + util_name, filenames[i].c_str(), std::strerror( errno ) ); + retval = 2; + } + + return retval; + } |