diff options
Diffstat (limited to 'zcmp.cc')
-rw-r--r-- | zcmp.cc | 197 |
1 files changed, 74 insertions, 123 deletions
@@ -1,5 +1,5 @@ /* Zcmp - decompress and compare two files byte by byte - Copyright (C) 2010, 2011 Antonio Diaz Diaz. + Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -17,6 +17,7 @@ #define _FILE_OFFSET_BITS 64 +#include <algorithm> #include <cctype> #include <cerrno> #include <climits> @@ -30,9 +31,6 @@ #include <stdint.h> #include <unistd.h> #include <sys/stat.h> -#if defined(__MSVCRT__) || defined(__OS2__) -#include <io.h> -#endif #include "arg_parser.h" #include "zutils.h" @@ -44,73 +42,52 @@ #ifndef LLONG_MAX #define LLONG_MAX 0x7FFFFFFFFFFFFFFFLL #endif -#ifndef LLONG_MIN -#define LLONG_MIN (-LLONG_MAX - 1LL) -#endif -#ifndef ULLONG_MAX -#define ULLONG_MAX 0xFFFFFFFFFFFFFFFFULL -#endif namespace { -#ifdef O_BINARY -const int o_binary = O_BINARY; -#else -const int o_binary = 0; -#endif - -struct { const char * from; const char * to; } const known_extensions[] = { - { ".bz2", "" }, - { ".tbz", ".tar" }, - { ".tbz2", ".tar" }, - { ".gz", "" }, - { ".tgz", ".tar" }, - { ".lz", "" }, - { ".tlz", ".tar" }, - { ".xz", "" }, - { ".txz", ".tar" }, - { 0, 0 } }; +#include "zcmpdiff.cc" -void show_help() throw() +void show_help() { - std::printf( "Zcmp compares two files (\"-\" means standard input), and if they\n" ); - std::printf( "differ, tells the first byte and line number where they differ. Bytes\n" ); - std::printf( "and lines are numbered starting with 1. If any given file is compressed,\n" ); - std::printf( "its uncompressed content is used. Compressed files are uncompressed on\n" ); - std::printf( "the fly; no temporary files are created.\n" ); - std::printf( "The supported compressors are bzip2, gzip, lzip and xz.\n" ); - std::printf( "\nUsage: zcmp [options] file1 [file2]\n" ); - std::printf( "\nCompares <file1> to <file2>. If <file2> is omitted zcmp tries the\n" ); - std::printf( "following:\n" ); - std::printf( "If <file1> is compressed, compares <file1> to the file with the\n" ); - std::printf( "corresponding decompressed file name (removes the extension from\n" ); - std::printf( "<file1>).\n" ); - std::printf( "If <file1> is not compressed, compares <file1> to the uncompressed\n" ); - std::printf( "contents of <file1>.[bz2|gz|lz|xz] (the first one that is found).\n" ); - std::printf( "If no suitable file is found, compares <file1> to data read from\n" ); - std::printf( "standard input.\n" ); - std::printf( "\nExit status is 0 if inputs are identical, 1 if different, 2 if trouble.\n" ); - std::printf( "\nOptions:\n" ); - std::printf( " -h, --help display this help and exit\n" ); - std::printf( " -V, --version output version information and exit\n" ); - std::printf( " -b, --print-bytes print differing bytes\n" ); - std::printf( " -i, --ignore-initial=<n>[,<n2>] ignore differences in the first <n> bytes\n" ); - std::printf( " -l, --list list position, value of all differing bytes\n" ); - std::printf( " -n, --bytes=<n> compare at most <n> bytes\n" ); - std::printf( " -q, --quiet suppress all messages\n" ); - std::printf( " -s, --silent (same as --quiet)\n" ); - std::printf( " -v, --verbose verbose mode (same as --list)\n" ); - std::printf( "Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n" ); - std::printf( "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n" ); + std::printf( "Zcmp compares two files (\"-\" means standard input), and if they\n" + "differ, tells the first byte and line number where they differ. Bytes\n" + "and lines are numbered starting with 1. If any given file is compressed,\n" + "its uncompressed content is used. Compressed files are uncompressed on\n" + "the fly; no temporary files are created.\n" + "\nThe supported formats are bzip2, gzip, lzip and xz.\n" + "\nUsage: zcmp [options] file1 [file2]\n" + "\nCompares <file1> to <file2>. If <file2> is omitted zcmp tries the\n" + "following:\n" + "If <file1> is compressed, compares <file1> to the file with the\n" + "corresponding decompressed file name (removes the extension from\n" + "<file1>).\n" + "If <file1> is not compressed, compares <file1> to the uncompressed\n" + "contents of <file1>.[bz2|gz|lz|xz] (the first one that is found).\n" + "If no suitable file is found, compares <file1> to data read from\n" + "standard input.\n" + "\nExit status is 0 if inputs are identical, 1 if different, 2 if trouble.\n" + "\nOptions:\n" + " -h, --help display this help and exit\n" + " -V, --version output version information and exit\n" + " -b, --print-bytes print differing bytes\n" + " --format=[<fmt1>][,<fmt2>] force given formats (bz2, gz, lz, xz)\n" + " -i, --ignore-initial=<n>[,<n2>] ignore differences in the first <n> bytes\n" + " -l, --list list position, value of all differing bytes\n" + " -n, --bytes=<n> compare at most <n> bytes\n" + " -q, --quiet suppress all messages\n" + " -s, --silent (same as --quiet)\n" + " -v, --verbose verbose mode (same as --list)\n" + "Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n" + "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n" ); show_help_addr(); } long long getnum( const char * const ptr, const char ** const tailp = 0, - const long long llimit = LLONG_MIN + 1, - const long long ulimit = LLONG_MAX ) throw() + const long long llimit = 0, + const long long ulimit = LLONG_MAX ) { errno = 0; char * tail; @@ -120,6 +97,7 @@ long long getnum( const char * const ptr, const char ** const tailp = 0, show_error( "Bad or missing numerical argument.", 0, true ); std::exit( 2 ); } + if( result < 0 ) errno = ERANGE; if( !errno && tail[0] && std::isalpha( tail[0] ) ) { @@ -148,7 +126,7 @@ long long getnum( const char * const ptr, const char ** const tailp = 0, } for( int i = 0; i < exponent; ++i ) { - if( LLONG_MAX / factor >= llabs( result ) ) result *= factor; + if( ulimit / factor >= result ) result *= factor; else { errno = ERANGE; break; } } } @@ -163,53 +141,12 @@ long long getnum( const char * const ptr, const char ** const tailp = 0, } -int open_instream( const std::string & input_filename ) throw() - { - int infd = open( input_filename.c_str(), O_RDONLY | o_binary ); - if( infd < 0 ) - show_error2( "Can't open input file", input_filename.c_str() ); - return infd; - } - - -int open_other_instream( std::string & name ) throw() - { - for( int i = 0; known_extensions[i].from; ++i ) - { // search uncompressed version - const std::string from( known_extensions[i].from ); - if( name.size() > from.size() && - name.compare( name.size() - from.size(), from.size(), from ) == 0 ) - { - name.resize( name.size() - from.size() ); - name += known_extensions[i].to; - return open( name.c_str(), O_RDONLY | o_binary ); - } - } - for( int i = 0; simple_extensions[i]; ++i ) - { // search compressed version - const std::string s( name + simple_extensions[i] ); - const int infd = open( s.c_str(), O_RDONLY | o_binary ); - if( infd >= 0 ) { name = s; return infd; } - } - return -1; - } - - -bool check_identical( const char * const name1, const char * const name2 ) throw() - { - if( !std::strcmp( name1, name2 ) ) return true; - struct stat stat1, stat2; - if( stat( name1, &stat1 ) || stat( name2, &stat2 ) ) return false; - return ( stat1.st_ino == stat2.st_ino && stat1.st_dev == stat2.st_dev ); - } - - void parse_ignore_initial( const char * const arg, long long ignore_initial[2] ) { const char * tail; - ignore_initial[0] = getnum( arg, &tail, 0 ); + ignore_initial[0] = getnum( arg, &tail ); if( *tail == ',' || *tail == ':' ) - ignore_initial[1] = getnum( ++tail, 0, 0 ); + ignore_initial[1] = getnum( ++tail ); else ignore_initial[1] = ignore_initial[0]; } @@ -253,10 +190,11 @@ void sprintc( char * const buf, unsigned char c ) int block_compare( const uint8_t * const buffer0, const uint8_t * const buffer1, - long long * line_numberp ) + unsigned long long * const line_numberp ) { const uint8_t * p0 = buffer0; const uint8_t * p1 = buffer1; + if( verbosity == 0 ) { int nl_count = 0; @@ -272,10 +210,11 @@ int block_compare( const uint8_t * const buffer0, int cmp( const long long max_size, const int infd[2], const std::string filenames[2], const bool print_bytes ) { - enum { buffer_size = 4096 }; - long long byte_number = 1; - long long line_number = 1; - long long rest = max_size; // remaining number of bytes to compare + const int buffer_size = 4096; + unsigned long long byte_number = 1; + unsigned long long line_number = 1; + // remaining number of bytes to compare + long long rest = ( ( max_size >= 0 ) ? max_size : buffer_size ); // buffers with space for sentinels at the end uint8_t * const buffer0 = new uint8_t[2*(buffer_size+1)]; uint8_t * const buffer1 = buffer0 + buffer_size + 1; @@ -285,7 +224,8 @@ int cmp( const long long max_size, const int infd[2], while( rest > 0 ) { - const int size = std::min( rest, (long long)buffer_size ); + const int size = std::min( (long long)buffer_size, rest ); + if( max_size >= 0 ) rest -= size; int rd[2]; // number of bytes read from each file for( int i = 0; i < 2; ++i ) { @@ -296,7 +236,6 @@ int cmp( const long long max_size, const int infd[2], return 2; } } - rest -= size; buffer0[rd[0]] = ~buffer1[rd[0]]; // sentinels for the block compare buffer1[rd[1]] = ~buffer0[rd[1]]; @@ -311,7 +250,7 @@ int cmp( const long long max_size, const int infd[2], if( verbosity == 0 ) // show first difference { if( !print_bytes ) - std::printf( "%s %s differ: byte %lld, line %lld\n", + std::printf( "%s %s differ: byte %llu, line %llu\n", filenames[0].c_str(), filenames[1].c_str(), byte_number, line_number ); else @@ -320,7 +259,7 @@ int cmp( const long long max_size, const int infd[2], const unsigned char c1 = buffer1[first_diff]; char buf0[5], buf1[5]; sprintc( buf0, c0 ); sprintc( buf1, c1 ); - std::printf( "%s %s differ: byte %lld, line %lld is %3o %s %3o %s\n", + std::printf( "%s %s differ: byte %llu, line %llu is %3o %s %3o %s\n", filenames[0].c_str(), filenames[1].c_str(), byte_number, line_number, c0, buf0, c1, buf1 ); } @@ -336,12 +275,12 @@ int cmp( const long long max_size, const int infd[2], if( c0 != c1 ) { if( !print_bytes ) - std::printf( "%lld %3o %3o\n", byte_number, c0, c1 ); + std::printf( "%llu %3o %3o\n", byte_number, c0, c1 ); else { char buf0[5], buf1[5]; sprintc( buf0, c0 ); sprintc( buf1, c1 ); - std::printf( "%lld %3o %-4s %3o %s\n", + std::printf( "%llu %3o %-4s %3o %s\n", byte_number, c0, buf0, c1, buf1 ); } } @@ -368,9 +307,11 @@ int cmp( const long long max_size, const int infd[2], int main( const int argc, const char * const argv[] ) { + enum { format_opt = 256 }; // number of initial bytes ignored for each file long long ignore_initial[2] = { 0, 0 }; - long long max_size = LLONG_MAX; + long long max_size = -1; // < 0 means unlimited size + int format_types[2] = { -1, -1 }; bool print_bytes = false; invocation_name = argv[0]; util_name = "zcmp"; @@ -386,6 +327,7 @@ int main( const int argc, const char * const argv[] ) { 's', "silent", Arg_parser::no }, { 'v', "verbose", Arg_parser::no }, { 'V', "version", Arg_parser::no }, + { format_opt, "format", Arg_parser::yes }, { 0 , 0, Arg_parser::no } }; const Arg_parser parser( argc, argv, options ); @@ -404,18 +346,19 @@ int main( const int argc, const char * const argv[] ) case 'h': show_help(); return 0; case 'i': parse_ignore_initial( arg, ignore_initial ); break; case 'l': verbosity = 1; break; - case 'n': max_size = getnum( arg, 0, 0 ); break; + case 'n': max_size = getnum( arg ); break; case 'q': case 's': verbosity = -1; break; case 'v': verbosity = 1; break; case 'V': show_version( "Zcmp" ); return 0; + case format_opt: get_format_types( arg, format_types ); break; default : internal_error( "uncaught option" ); } } // end process options #if defined(__MSVCRT__) || defined(__OS2__) - _setmode( STDIN_FILENO, O_BINARY ); - _setmode( STDOUT_FILENO, O_BINARY ); + _fsetmode( stdin, "b" ); + _fsetmode( stdout, "b" ); #endif if( argind >= parser.arguments() ) @@ -449,6 +392,9 @@ int main( const int argc, const char * const argv[] ) } else { + if( format_types[0] >= 0 || format_types[1] >= 0 ) + { show_error( "Two files must be given when format is specified.", 0, true ); + return 2; } filenames[1] = filenames[0]; infd[1] = open_other_instream( filenames[1] ); if( infd[1] < 0 ) { infd[1] = STDIN_FILENO; filenames[1] = "-"; } @@ -457,8 +403,8 @@ int main( const int argc, const char * const argv[] ) int old_infd[2]; // copy of file descriptors of the two files old_infd[0] = infd[0]; old_infd[1] = infd[1]; pid_t pid[2]; - if( !set_data_feeder( &infd[0], &pid[0] ) || - !set_data_feeder( &infd[1], &pid[1] ) ) + if( !set_data_feeder( &infd[0], &pid[0], format_types[0] ) || + !set_data_feeder( &infd[1], &pid[1], format_types[1] ) ) return 2; for( int i = 0; i < 2; ++i ) @@ -470,10 +416,15 @@ int main( const int argc, const char * const argv[] ) int retval = cmp( max_size, infd, filenames, print_bytes ); - if( retval != 0 ) + if( retval != 0 || max_size >= 0 ) { - if( pid[0] ) kill( pid[0], SIGTERM ); - if( pid[1] ) kill( pid[1], SIGTERM ); + for( int i = 0; i < 2; ++i ) + if( pid[i] ) + { + const int tmp = child_status( pid[i], "data feeder" ); + if( tmp < 0 ) kill( pid[i], SIGTERM ); // child not terminated + else if( tmp != 0 ) retval = 2; // child status != 0 + } } else if( ( pid[0] && wait_for_child( pid[0], "data feeder" ) != 0 ) || |