/* Zcmp - decompress and compare two files byte by byte Copyright (C) 2010, 2011 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #define _FILE_OFFSET_BITS 64 #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(__MSVCRT__) || defined(__OS2__) #include #endif #include "arg_parser.h" #include "zutils.h" #if CHAR_BIT != 8 #error "Environments where CHAR_BIT != 8 are not supported." #endif #ifndef LLONG_MAX #define LLONG_MAX 0x7FFFFFFFFFFFFFFFLL #endif #ifndef LLONG_MIN #define LLONG_MIN (-LLONG_MAX - 1LL) #endif #ifndef ULLONG_MAX #define ULLONG_MAX 0xFFFFFFFFFFFFFFFFULL #endif namespace { #ifdef O_BINARY const int o_binary = O_BINARY; #else const int o_binary = 0; #endif struct { const char * from; const char * to; } const known_extensions[] = { { ".bz2", "" }, { ".tbz", ".tar" }, { ".tbz2", ".tar" }, { ".gz", "" }, { ".tgz", ".tar" }, { ".lz", "" }, { ".tlz", ".tar" }, { ".xz", "" }, { ".txz", ".tar" }, { 0, 0 } }; void show_help() throw() { std::printf( "Zcmp compares two files (\"-\" means standard input), and if they\n" ); std::printf( "differ, tells the first byte and line number where they differ. Bytes\n" ); std::printf( "and lines are numbered starting with 1. If any given file is compressed,\n" ); std::printf( "its uncompressed content is used. Compressed files are uncompressed on\n" ); std::printf( "the fly; no temporary files are created.\n" ); std::printf( "The supported compressors are bzip2, gzip, lzip and xz.\n" ); std::printf( "\nUsage: zcmp [options] file1 [file2]\n" ); std::printf( "\nCompares to . If is omitted zcmp tries the\n" ); std::printf( "following:\n" ); std::printf( "If is compressed, compares to the file with the\n" ); std::printf( "corresponding decompressed file name (removes the extension from\n" ); std::printf( ").\n" ); std::printf( "If is not compressed, compares to the uncompressed\n" ); std::printf( "contents of .[bz2|gz|lz|xz] (the first one that is found).\n" ); std::printf( "If no suitable file is found, compares to data read from\n" ); std::printf( "standard input.\n" ); std::printf( "\nExit status is 0 if inputs are identical, 1 if different, 2 if trouble.\n" ); std::printf( "\nOptions:\n" ); std::printf( " -h, --help display this help and exit\n" ); std::printf( " -V, --version output version information and exit\n" ); std::printf( " -b, --print-bytes print differing bytes\n" ); std::printf( " -i, --ignore-initial=[,] ignore differences in the first bytes\n" ); std::printf( " -l, --list list position, value of all differing bytes\n" ); std::printf( " -n, --bytes= compare at most bytes\n" ); std::printf( " -q, --quiet suppress all messages\n" ); std::printf( " -s, --silent (same as --quiet)\n" ); std::printf( " -v, --verbose verbose mode (same as --list)\n" ); std::printf( "Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n" ); std::printf( "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n" ); show_help_addr(); } long long getnum( const char * const ptr, const char ** const tailp = 0, const long long llimit = LLONG_MIN + 1, const long long ulimit = LLONG_MAX ) throw() { errno = 0; char * tail; long long result = strtoll( ptr, &tail, 0 ); if( tail == ptr ) { show_error( "Bad or missing numerical argument.", 0, true ); std::exit( 2 ); } if( !errno && tail[0] && std::isalpha( tail[0] ) ) { int factor = ( tail[1] == 'i' ) ? 1024 : 1000; int exponent = 0; bool bad_multiplier = false; switch( tail[0] ) { case 'Y': exponent = 8; break; case 'Z': exponent = 7; break; case 'E': exponent = 6; break; case 'P': exponent = 5; break; case 'T': exponent = 4; break; case 'G': exponent = 3; break; case 'M': exponent = 2; break; case 'K': if( factor == 1024 ) exponent = 1; else bad_multiplier = true; break; case 'k': if( factor == 1000 ) exponent = 1; else bad_multiplier = true; break; default : bad_multiplier = true; } if( bad_multiplier ) { show_error( "Bad multiplier in numerical argument.", 0, true ); std::exit( 2 ); } for( int i = 0; i < exponent; ++i ) { if( LLONG_MAX / factor >= llabs( result ) ) result *= factor; else { errno = ERANGE; break; } } } if( !errno && ( result < llimit || result > ulimit ) ) errno = ERANGE; if( errno ) { show_error( "Numerical argument out of limits." ); std::exit( 2 ); } if( tailp ) *tailp = tail; return result; } int open_instream( const std::string & input_filename ) throw() { int infd = open( input_filename.c_str(), O_RDONLY | o_binary ); if( infd < 0 ) show_error2( "Can't open input file", input_filename.c_str() ); return infd; } int open_other_instream( std::string & name ) throw() { for( int i = 0; known_extensions[i].from; ++i ) { // search uncompressed version const std::string from( known_extensions[i].from ); if( name.size() > from.size() && name.compare( name.size() - from.size(), from.size(), from ) == 0 ) { name.resize( name.size() - from.size() ); name += known_extensions[i].to; return open( name.c_str(), O_RDONLY | o_binary ); } } for( int i = 0; simple_extensions[i]; ++i ) { // search compressed version const std::string s( name + simple_extensions[i] ); const int infd = open( s.c_str(), O_RDONLY | o_binary ); if( infd >= 0 ) { name = s; return infd; } } return -1; } bool check_identical( const char * const name1, const char * const name2 ) throw() { if( !std::strcmp( name1, name2 ) ) return true; struct stat stat1, stat2; if( stat( name1, &stat1 ) || stat( name2, &stat2 ) ) return false; return ( stat1.st_ino == stat2.st_ino && stat1.st_dev == stat2.st_dev ); } void parse_ignore_initial( const char * const arg, long long ignore_initial[2] ) { const char * tail; ignore_initial[0] = getnum( arg, &tail, 0 ); if( *tail == ',' || *tail == ':' ) ignore_initial[1] = getnum( ++tail, 0, 0 ); else ignore_initial[1] = ignore_initial[0]; } bool skip_ignore_initial( const long long ignore_initial, const int infd ) { if( ignore_initial > 0 ) { enum { buffer_size = 4096 }; long long rest = ignore_initial; uint8_t buffer[buffer_size]; while( rest > 0 ) { const int size = std::min( rest, (long long)buffer_size ); const int rd = readblock( infd, buffer, size ); if( rd != size && errno ) return false; if( rd < size ) break; rest -= rd; } } return true; } // Put into buf the unsigned char c, making unprintable bytes // visible by quoting like cat -t does. void sprintc( char * const buf, unsigned char c ) { int i = 0; if( c < 32 || c >= 127 ) { if( c >= 128 ) { c -= 128; buf[i++] = 'M'; buf[i++] = '-'; } if( c < 32 ) { c += 64; buf[i++] = '^'; } else if( c == 127 ) { c = '?'; buf[i++] = '^'; } } buf[i++] = c; buf[i++] = 0; } int block_compare( const uint8_t * const buffer0, const uint8_t * const buffer1, long long * line_numberp ) { const uint8_t * p0 = buffer0; const uint8_t * p1 = buffer1; if( verbosity == 0 ) { int nl_count = 0; while( *p0 == *p1 ) { if( *p0 == '\n' ) { ++nl_count; } ++p0; ++p1; } *line_numberp += nl_count; } else while( *p0 == *p1 ) { ++p0; ++p1; } return p0 - buffer0; } int cmp( const long long max_size, const int infd[2], const std::string filenames[2], const bool print_bytes ) { enum { buffer_size = 4096 }; long long byte_number = 1; long long line_number = 1; long long rest = max_size; // remaining number of bytes to compare // buffers with space for sentinels at the end uint8_t * const buffer0 = new uint8_t[2*(buffer_size+1)]; uint8_t * const buffer1 = buffer0 + buffer_size + 1; uint8_t * buffer[2]; buffer[0] = buffer0; buffer[1] = buffer1; int different = 0; while( rest > 0 ) { const int size = std::min( rest, (long long)buffer_size ); int rd[2]; // number of bytes read from each file for( int i = 0; i < 2; ++i ) { rd[i] = readblock( infd[i], buffer[i], size ); if( rd[i] != size && errno ) { show_error2( "Error reading file", filenames[i].c_str() ); return 2; } } rest -= size; buffer0[rd[0]] = ~buffer1[rd[0]]; // sentinels for the block compare buffer1[rd[1]] = ~buffer0[rd[1]]; int first_diff = block_compare( buffer0, buffer1, &line_number ); byte_number += first_diff; const int min_rd = std::min( rd[0], rd[1] ); if( first_diff < min_rd ) { if( verbosity < 0 ) return 1; // return status only if( verbosity == 0 ) // show first difference { if( !print_bytes ) std::printf( "%s %s differ: byte %lld, line %lld\n", filenames[0].c_str(), filenames[1].c_str(), byte_number, line_number ); else { const unsigned char c0 = buffer0[first_diff]; const unsigned char c1 = buffer1[first_diff]; char buf0[5], buf1[5]; sprintc( buf0, c0 ); sprintc( buf1, c1 ); std::printf( "%s %s differ: byte %lld, line %lld is %3o %s %3o %s\n", filenames[0].c_str(), filenames[1].c_str(), byte_number, line_number, c0, buf0, c1, buf1 ); } return 1; } else // verbosity > 0 ; show all differences { different = 1; for( ; first_diff < min_rd; ++byte_number, ++first_diff ) { const unsigned char c0 = buffer0[first_diff]; const unsigned char c1 = buffer1[first_diff]; if( c0 != c1 ) { if( !print_bytes ) std::printf( "%lld %3o %3o\n", byte_number, c0, c1 ); else { char buf0[5], buf1[5]; sprintc( buf0, c0 ); sprintc( buf1, c1 ); std::printf( "%lld %3o %-4s %3o %s\n", byte_number, c0, buf0, c1, buf1 ); } } } } } if( rd[0] != rd[1] ) { if( verbosity >= 0 ) std::fprintf( stderr, "%s: EOF on %s\n", util_name, filenames[rd[1]= parser.arguments() ) { show_error( "No files given.", 0, true ); return 2; } if( argind + 2 < parser.arguments() ) { show_error( "Too many files.", 0, true ); return 2; } const int files = parser.arguments() - argind; std::string filenames[2]; // file names of the two input files filenames[0] = parser.argument( argind ); if( files == 2 ) filenames[1] = parser.argument( argind + 1 ); int infd[2]; // file descriptors of the two files infd[0] = ( filenames[0] == "-" ) ? STDIN_FILENO : open_instream( filenames[0] ); if( infd[0] < 0 ) return 2; if( ( files == 1 && filenames[0] == "-" ) || ( files == 2 && check_identical( filenames[0].c_str(), filenames[1].c_str() ) ) ) { if( ignore_initial[0] == ignore_initial[1] ) return 0; else { show_error( "Can't compare parts of same file." ); return 2; } } if( files == 2 ) { infd[1] = ( filenames[1] == "-" ) ? STDIN_FILENO : open_instream( filenames[1] ); if( infd[1] < 0 ) return 2; } else { filenames[1] = filenames[0]; infd[1] = open_other_instream( filenames[1] ); if( infd[1] < 0 ) { infd[1] = STDIN_FILENO; filenames[1] = "-"; } } int old_infd[2]; // copy of file descriptors of the two files old_infd[0] = infd[0]; old_infd[1] = infd[1]; pid_t pid[2]; if( !set_data_feeder( &infd[0], &pid[0] ) || !set_data_feeder( &infd[1], &pid[1] ) ) return 2; for( int i = 0; i < 2; ++i ) if( !skip_ignore_initial( ignore_initial[i], infd[i] ) ) { show_error2( "Can't skip initial bytes from file", filenames[i].c_str() ); return 2; } int retval = cmp( max_size, infd, filenames, print_bytes ); if( retval != 0 ) { if( pid[0] ) kill( pid[0], SIGTERM ); if( pid[1] ) kill( pid[1], SIGTERM ); } else if( ( pid[0] && wait_for_child( pid[0], "data feeder" ) != 0 ) || ( pid[1] && wait_for_child( pid[1], "data feeder" ) != 0 ) ) retval = 2; for( int i = 0; i < 2; ++i ) { if( close( infd[i] ) != 0 ) { show_close_error( "data feeder" ); retval = 2; } if( filenames[i] != "-" && close( old_infd[i] ) != 0 ) { show_error2( "Can't close input file", filenames[i].c_str() ); retval = 2; } } if( std::fclose( stdout ) != 0 ) { show_error( "Can't close stdout", errno ); retval = 2; } return retval; }