/* Zcmp - decompress and compare two files byte by byte
Copyright (C) 2010 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
*/
#define _FILE_OFFSET_BITS 64
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#if defined(__MSVCRT__) || defined(__OS2__)
#include
#endif
#include "arg_parser.h"
#include "zutils.h"
#if CHAR_BIT != 8
#error "Environments where CHAR_BIT != 8 are not supported."
#endif
#ifndef LLONG_MAX
#define LLONG_MAX 0x7FFFFFFFFFFFFFFFLL
#endif
#ifndef LLONG_MIN
#define LLONG_MIN (-LLONG_MAX - 1LL)
#endif
#ifndef ULLONG_MAX
#define ULLONG_MAX 0xFFFFFFFFFFFFFFFFULL
#endif
namespace {
#ifdef O_BINARY
const int o_binary = O_BINARY;
#else
const int o_binary = 0;
#endif
struct { const char * from; const char * to; } const known_extensions[] = {
{ ".bz2", "" },
{ ".tbz", ".tar" },
{ ".tbz2", ".tar" },
{ ".gz", "" },
{ ".tgz", ".tar" },
{ ".lz", "" },
{ ".tlz", ".tar" },
{ ".xz", "" },
{ ".txz", ".tar" },
{ 0, 0 } };
void show_help() throw()
{
std::printf( "Zcmp compares two files (\"-\" means standard input), and if they\n" );
std::printf( "differ, tells the first byte and line number where they differ. Bytes\n" );
std::printf( "and lines are numbered starting with 1. If any given file is compressed,\n" );
std::printf( "its uncompressed content is used. Compressed files are uncompressed on\n" );
std::printf( "the fly; no temporary files are created.\n" );
std::printf( "The supported compressors are bzip2, gzip, lzip and xz.\n" );
std::printf( "\nUsage: zcmp [options] file1 [file2]\n" );
std::printf( "\nCompares to . If is omitted zcmp tries the\n" );
std::printf( "following:\n" );
std::printf( "If is compressed, compares to the file with the\n" );
std::printf( "corresponding decompressed file name (removes the extension from\n" );
std::printf( ").\n" );
std::printf( "If is not compressed, compares to the uncompressed\n" );
std::printf( "contents of .[bz2|gz|lz|xz] (the first one that is found).\n" );
std::printf( "If no suitable file is found, compares to data read from\n" );
std::printf( "standard input.\n" );
std::printf( "\nExit status is 0 if inputs are identical, 1 if different, 2 if trouble.\n" );
std::printf( "\nOptions:\n" );
std::printf( " -h, --help display this help and exit\n" );
std::printf( " -V, --version output version information and exit\n" );
std::printf( " -b, --print-bytes print differing bytes\n" );
std::printf( " -i, --ignore-initial=[,] ignore differences in the first bytes\n" );
std::printf( " -l, --list list position, value of all differing bytes\n" );
std::printf( " -n, --bytes= compare at most bytes\n" );
std::printf( " -q, --quiet suppress all messages\n" );
std::printf( " -s, --silent (same as --quiet)\n" );
std::printf( " -v, --verbose verbose mode (same as --list)\n" );
std::printf( "Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n" );
std::printf( "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n" );
show_help_addr();
}
long long getnum( const char * const ptr, const char ** const tailp = 0,
const long long llimit = LLONG_MIN + 1,
const long long ulimit = LLONG_MAX ) throw()
{
errno = 0;
char * tail;
long long result = strtoll( ptr, &tail, 0 );
if( tail == ptr )
{
show_error( "Bad or missing numerical argument.", 0, true );
std::exit( 2 );
}
if( !errno && tail[0] && std::isalpha( tail[0] ) )
{
int factor = ( tail[1] == 'i' ) ? 1024 : 1000;
int exponent = 0;
bool bad_multiplier = false;
switch( tail[0] )
{
case 'Y': exponent = 8; break;
case 'Z': exponent = 7; break;
case 'E': exponent = 6; break;
case 'P': exponent = 5; break;
case 'T': exponent = 4; break;
case 'G': exponent = 3; break;
case 'M': exponent = 2; break;
case 'K': if( factor == 1024 ) exponent = 1; else bad_multiplier = true;
break;
case 'k': if( factor == 1000 ) exponent = 1; else bad_multiplier = true;
break;
default : bad_multiplier = true;
}
if( bad_multiplier )
{
show_error( "Bad multiplier in numerical argument.", 0, true );
std::exit( 2 );
}
for( int i = 0; i < exponent; ++i )
{
if( LLONG_MAX / factor >= llabs( result ) ) result *= factor;
else { errno = ERANGE; break; }
}
}
if( !errno && ( result < llimit || result > ulimit ) ) errno = ERANGE;
if( errno )
{
show_error( "Numerical argument out of limits." );
std::exit( 2 );
}
if( tailp ) *tailp = tail;
return result;
}
int open_instream( const std::string & input_filename ) throw()
{
int infd = open( input_filename.c_str(), O_RDONLY | o_binary );
if( infd < 0 )
show_error2( "Can't open input file", input_filename.c_str() );
return infd;
}
int open_other_instream( std::string & name ) throw()
{
for( int i = 0; known_extensions[i].from; ++i )
{ // search uncompressed version
const std::string from( known_extensions[i].from );
if( name.size() > from.size() &&
name.compare( name.size() - from.size(), from.size(), from ) == 0 )
{
name.resize( name.size() - from.size() );
name += known_extensions[i].to;
return open( name.c_str(), O_RDONLY | o_binary );
}
}
for( int i = 0; simple_extensions[i]; ++i )
{ // search compressed version
const std::string s( name + simple_extensions[i] );
const int infd = open( s.c_str(), O_RDONLY | o_binary );
if( infd >= 0 ) { name = s; return infd; }
}
return -1;
}
bool check_identical( const char * const name1, const char * const name2 ) throw()
{
if( !std::strcmp( name1, name2 ) ) return true;
struct stat stat1, stat2;
if( stat( name1, &stat1 ) || stat( name2, &stat2 ) ) return false;
return ( stat1.st_ino == stat2.st_ino && stat1.st_dev == stat2.st_dev );
}
void parse_ignore_initial( const char * const arg, long long ignore_initial[2] )
{
const char * tail;
ignore_initial[0] = getnum( arg, &tail, 0 );
if( *tail == ',' || *tail == ':' )
ignore_initial[1] = getnum( ++tail, 0, 0 );
else ignore_initial[1] = ignore_initial[0];
}
bool skip_ignore_initial( const long long ignore_initial, const int infd )
{
if( ignore_initial > 0 )
{
enum { buffer_size = 4096 };
long long rest = ignore_initial;
uint8_t buffer[buffer_size];
while( rest > 0 )
{
const int size = std::min( rest, (long long)buffer_size );
const int rd = readblock( infd, buffer, size );
if( rd != size && errno ) return false;
if( rd < size ) break;
rest -= rd;
}
}
return true;
}
// Put into buf the unsigned char c, making unprintable bytes
// visible by quoting like cat -t does.
void sprintc( char * const buf, unsigned char c )
{
int i = 0;
if( c < 32 || c >= 127 )
{
if( c >= 128 ) { c -= 128; buf[i++] = 'M'; buf[i++] = '-'; }
if( c < 32 ) { c += 64; buf[i++] = '^'; }
else if( c == 127 ) { c = '?'; buf[i++] = '^'; }
}
buf[i++] = c;
buf[i++] = 0;
}
int block_compare( const uint8_t * const buffer0,
const uint8_t * const buffer1,
long long * line_numberp )
{
const uint8_t * p0 = buffer0;
const uint8_t * p1 = buffer1;
if( verbosity == 0 )
{
int nl_count = 0;
while( *p0 == *p1 )
{ if( *p0 == '\n' ) { ++nl_count; } ++p0; ++p1; }
*line_numberp += nl_count;
}
else while( *p0 == *p1 ) { ++p0; ++p1; }
return p0 - buffer0;
}
int cmp( const long long max_size, const int infd[2],
const std::string filenames[2], const bool print_bytes )
{
enum { buffer_size = 4096 };
long long byte_number = 1;
long long line_number = 1;
long long rest = max_size; // remaining number of bytes to compare
// buffers with space for sentinels at the end
uint8_t * const buffer0 = new uint8_t[2*(buffer_size+1)];
uint8_t * const buffer1 = buffer0 + buffer_size + 1;
uint8_t * buffer[2];
buffer[0] = buffer0; buffer[1] = buffer1;
int different = 0;
while( rest > 0 )
{
const int size = std::min( rest, (long long)buffer_size );
int rd[2]; // number of bytes read from each file
for( int i = 0; i < 2; ++i )
{
rd[i] = readblock( infd[i], buffer[i], size );
if( rd[i] != size && errno )
{
show_error2( "Error reading file", filenames[i].c_str() );
return 2;
}
}
rest -= size;
buffer0[rd[0]] = ~buffer1[rd[0]]; // sentinels for the block compare
buffer1[rd[1]] = ~buffer0[rd[1]];
int first_diff = block_compare( buffer0, buffer1, &line_number );
byte_number += first_diff;
const int min_rd = std::min( rd[0], rd[1] );
if( first_diff < min_rd )
{
if( verbosity < 0 ) return 1; // return status only
if( verbosity == 0 ) // show first difference
{
if( !print_bytes )
std::printf( "%s %s differ: byte %lld, line %lld\n",
filenames[0].c_str(), filenames[1].c_str(),
byte_number, line_number );
else
{
const unsigned char c0 = buffer0[first_diff];
const unsigned char c1 = buffer1[first_diff];
char buf0[5], buf1[5];
sprintc( buf0, c0 ); sprintc( buf1, c1 );
std::printf( "%s %s differ: byte %lld, line %lld is %3o %s %3o %s\n",
filenames[0].c_str(), filenames[1].c_str(),
byte_number, line_number, c0, buf0, c1, buf1 );
}
return 1;
}
else // verbosity > 0 ; show all differences
{
different = 1;
for( ; first_diff < min_rd; ++byte_number, ++first_diff )
{
const unsigned char c0 = buffer0[first_diff];
const unsigned char c1 = buffer1[first_diff];
if( c0 != c1 )
{
if( !print_bytes )
std::printf( "%lld %3o %3o\n", byte_number, c0, c1 );
else
{
char buf0[5], buf1[5];
sprintc( buf0, c0 ); sprintc( buf1, c1 );
std::printf( "%lld %3o %-4s %3o %s\n",
byte_number, c0, buf0, c1, buf1 );
}
}
}
}
}
if( rd[0] != rd[1] )
{
if( verbosity >= 0 )
std::fprintf( stderr, "%s: EOF on %s\n",
util_name, filenames[rd[1]= parser.arguments() )
{ show_error( "No files given.", 0, true ); return 2; }
if( argind + 2 < parser.arguments() )
{ show_error( "Too many files.", 0, true ); return 2; }
const int files = parser.arguments() - argind;
std::string filenames[2]; // file names of the two input files
filenames[0] = parser.argument( argind );
if( files == 2 ) filenames[1] = parser.argument( argind + 1 );
int infd[2]; // file descriptors of the two files
infd[0] = ( filenames[0] == "-" ) ?
STDIN_FILENO : open_instream( filenames[0] );
if( infd[0] < 0 ) return 2;
if( ( files == 1 && filenames[0] == "-" ) ||
( files == 2 && check_identical( filenames[0].c_str(),
filenames[1].c_str() ) ) )
{
if( ignore_initial[0] == ignore_initial[1] ) return 0;
else { show_error( "Can't compare parts of same file." ); return 2; }
}
if( files == 2 )
{
infd[1] = ( filenames[1] == "-" ) ?
STDIN_FILENO : open_instream( filenames[1] );
if( infd[1] < 0 ) return 2;
}
else
{
filenames[1] = filenames[0];
infd[1] = open_other_instream( filenames[1] );
if( infd[1] < 0 ) { infd[1] = STDIN_FILENO; filenames[1] = "-"; }
}
int old_infd[2]; // copy of file descriptors of the two files
old_infd[0] = infd[0]; old_infd[1] = infd[1];
pid_t pid[2];
if( !set_data_feeder( &infd[0], &pid[0] ) ||
!set_data_feeder( &infd[1], &pid[1] ) )
return 2;
for( int i = 0; i < 2; ++i )
if( !skip_ignore_initial( ignore_initial[i], infd[i] ) )
{
show_error2( "Can't skip initial bytes from file", filenames[i].c_str() );
return 2;
}
int retval = cmp( max_size, infd, filenames, print_bytes );
if( retval != 0 )
{
if( pid[0] ) kill( pid[0], SIGTERM );
if( pid[1] ) kill( pid[1], SIGTERM );
}
else
if( ( pid[0] && wait_for_child( pid[0], "data feeder" ) != 0 ) ||
( pid[1] && wait_for_child( pid[1], "data feeder" ) != 0 ) )
retval = 2;
for( int i = 0; i < 2; ++i )
{
if( close( infd[i] ) != 0 )
{ show_close_error( "data feeder" ); retval = 2; }
if( filenames[i] != "-" && close( old_infd[i] ) != 0 )
{
show_error2( "Can't close input file", filenames[i].c_str() );
retval = 2;
}
}
if( std::fclose( stdout ) != 0 )
{
show_error( "Can't close stdout", errno );
retval = 2;
}
return retval;
}