/* Zcmp - decompress and compare two files byte by byte Copyright (C) 2010-2021 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #define _FILE_OFFSET_BITS 64 #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(__MSVCRT__) || defined(__OS2__) #include #endif #include "arg_parser.h" #include "rc.h" #include "zutils.h" #ifndef LLONG_MAX #define LLONG_MAX 0x7FFFFFFFFFFFFFFFLL #endif namespace { #include "zcmpdiff.cc" void show_help() { std::printf( "zcmp compares two files and, if they differ, writes to standard output the\n" "first byte and line number where they differ. Bytes and lines are numbered\n" "starting with 1. A hyphen '-' used as a file argument means standard input.\n" "If any file given is compressed, its decompressed content is used. Compressed\n" "files are decompressed on the fly; no temporary files are created.\n" "\nThe formats supported are bzip2, gzip, lzip, and xz.\n" "\nUsage: zcmp [options] file1 [file2]\n" "\nzcmp compares file1 to file2. The standard input is used only if file1 or\n" "file2 refers to standard input. If file2 is omitted zcmp tries the\n" "following:\n" "\n - If file1 is compressed, compares its decompressed contents with\n" " the corresponding uncompressed file (the name of file1 with the\n" " extension removed).\n" "\n - If file1 is uncompressed, compares it with the decompressed\n" " contents of file1.[lz|bz2|gz|xz] (the first one that is found).\n" "\nExit status is 0 if inputs are identical, 1 if different, 2 if trouble.\n" "\nOptions:\n" " -h, --help display this help and exit\n" " -V, --version output version information and exit\n" " -b, --print-bytes print differing bytes\n" " -i, --ignore-initial=[:] ignore differences in the first bytes\n" " -l, --list list position, value of all differing bytes\n" " -M, --format= process only the formats in \n" " -n, --bytes= compare at most bytes\n" " -N, --no-rcfile don't read runtime configuration file\n" " -O, --force-format=[][,] force the formats given (bz2, gz, lz, xz)\n" " -q, --quiet suppress all messages\n" " -s, --silent (same as --quiet)\n" " -v, --verbose verbose mode (same as --list)\n" " --bz2= set compressor and options for bzip2 format\n" " --gz= set compressor and options for gzip format\n" " --lz= set compressor and options for lzip format\n" " --xz= set compressor and options for xz format\n" "\nNumbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n" "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n" ); show_help_addr(); } long long getnum( const char * const ptr, const char ** const tailp = 0, const long long llimit = 0, const long long ulimit = LLONG_MAX ) { char * tail; errno = 0; long long result = strtoll( ptr, &tail, 0 ); if( tail == ptr ) { show_error( "Bad or missing numerical argument.", 0, true ); std::exit( 2 ); } if( result < 0 ) errno = ERANGE; if( !errno && tail[0] && std::isalpha( tail[0] ) ) { const unsigned char ch = *tail++; int factor; bool bsuf; // 'B' suffix is present if( tail[0] == 'i' ) { ++tail; factor = 1024; } else factor = 1000; if( tail[0] == 'B' ) { ++tail; bsuf = true; } else bsuf = false; int exponent = -1; // -1 = bad multiplier switch( ch ) { case 'Y': exponent = 8; break; case 'Z': exponent = 7; break; case 'E': exponent = 6; break; case 'P': exponent = 5; break; case 'T': exponent = 4; break; case 'G': exponent = 3; break; case 'M': exponent = 2; break; case 'K': if( factor == 1024 ) exponent = 1; break; case 'k': if( factor == 1000 ) exponent = 1; break; case 'B': if( factor == 1000 && !bsuf ) exponent = 0; break; } if( exponent < 0 ) { show_error( "Bad multiplier in numerical argument.", 0, true ); std::exit( 2 ); } for( int i = 0; i < exponent; ++i ) { if( ulimit / factor >= result ) result *= factor; else { errno = ERANGE; break; } } } if( !errno && ( result < llimit || result > ulimit ) ) errno = ERANGE; if( errno ) { show_error( "Numerical argument out of limits." ); std::exit( 2 ); } if( tailp ) *tailp = tail; return result; } void parse_ignore_initial( const char * const arg, long long ignore_initial[2] ) { const char * tail; ignore_initial[0] = getnum( arg, &tail ); if( *tail == ':' || *tail == ',' ) ignore_initial[1] = getnum( ++tail ); else if( *tail == 0 ) ignore_initial[1] = ignore_initial[0]; else { show_error( "Bad separator in argument of '--ignore-initial'", 0, true ); std::exit( 2 ); } } bool skip_ignore_initial( const long long ignore_initial, const int infd ) { if( ignore_initial > 0 ) { enum { buffer_size = 4096 }; long long rest = ignore_initial; uint8_t buffer[buffer_size]; while( rest > 0 ) { const int size = std::min( rest, (long long)buffer_size ); const int rd = readblock( infd, buffer, size ); if( rd != size && errno ) return false; if( rd < size ) break; rest -= rd; } } return true; } // Put into buf the unsigned char c, making unprintable bytes // visible by quoting like cat -t does. void sprintc( char * const buf, unsigned char c ) { int i = 0; if( c < 32 || c >= 127 ) { if( c >= 128 ) { c -= 128; buf[i++] = 'M'; buf[i++] = '-'; } if( c < 32 ) { c += 64; buf[i++] = '^'; } else if( c == 127 ) { c = '?'; buf[i++] = '^'; } } buf[i++] = c; buf[i++] = 0; } int block_compare( const uint8_t * const buffer0, const uint8_t * const buffer1, unsigned long long * const line_numberp ) { const uint8_t * p0 = buffer0; const uint8_t * p1 = buffer1; if( verbosity == 0 ) { int nl_count = 0; while( *p0 == *p1 ) { if( *p0 == '\n' ) { ++nl_count; } ++p0; ++p1; } *line_numberp += nl_count; } else while( *p0 == *p1 ) { ++p0; ++p1; } return p0 - buffer0; } int cmp( const long long max_size, const int infd[2], const std::string filenames[2], const bool print_bytes ) { const int buffer_size = 4096; unsigned long long byte_number = 1; unsigned long long line_number = 1; // remaining number of bytes to compare long long rest = ( max_size >= 0 ) ? max_size : buffer_size; // buffers with space for sentinels at the end uint8_t * const buffer0 = new uint8_t[2*(buffer_size+1)]; uint8_t * const buffer1 = buffer0 + buffer_size + 1; uint8_t * buffer[2]; buffer[0] = buffer0; buffer[1] = buffer1; int different = 0; while( rest > 0 ) { const int size = std::min( (long long)buffer_size, rest ); if( max_size >= 0 ) rest -= size; int rd[2]; // number of bytes read from each file for( int i = 0; i < 2; ++i ) { rd[i] = readblock( infd[i], buffer[i], size ); if( rd[i] != size && errno ) { show_file_error( filenames[i].c_str(), "Read error", errno ); return 2; } } const int min_rd = std::min( rd[0], rd[1] ); buffer0[min_rd] = 0; // sentinels for the block compare buffer1[min_rd] = 1; int first_diff = block_compare( buffer0, buffer1, &line_number ); byte_number += first_diff; if( first_diff < min_rd ) { if( verbosity < 0 ) return 1; // return status only if( verbosity == 0 ) // show first difference { if( !print_bytes ) std::printf( "%s %s differ: byte %llu, line %llu\n", filenames[0].c_str(), filenames[1].c_str(), byte_number, line_number ); else { const unsigned char c0 = buffer0[first_diff]; const unsigned char c1 = buffer1[first_diff]; char buf0[5], buf1[5]; sprintc( buf0, c0 ); sprintc( buf1, c1 ); std::printf( "%s %s differ: byte %llu, line %llu is %3o %s %3o %s\n", filenames[0].c_str(), filenames[1].c_str(), byte_number, line_number, c0, buf0, c1, buf1 ); } std::fflush( stdout ); return 1; } else // verbosity > 0 ; show all differences { different = 1; for( ; first_diff < min_rd; ++byte_number, ++first_diff ) { const unsigned char c0 = buffer0[first_diff]; const unsigned char c1 = buffer1[first_diff]; if( c0 != c1 ) { if( !print_bytes ) std::printf( "%llu %3o %3o\n", byte_number, c0, c1 ); else { char buf0[5], buf1[5]; sprintc( buf0, c0 ); sprintc( buf1, c1 ); std::printf( "%llu %3o %-4s %3o %s\n", byte_number, c0, buf0, c1, buf1 ); } } } std::fflush( stdout ); } } if( rd[0] != rd[1] ) { if( verbosity >= 0 ) std::fprintf( stderr, "%s: EOF on %s\n", program_name, filenames[rd[1] 0 ) ? argv[0] : program_name; const Arg_parser::Option options[] = { { 'b', "print-bytes", Arg_parser::no }, { 'h', "help", Arg_parser::no }, { 'i', "ignore-initial", Arg_parser::yes }, { 'l', "list", Arg_parser::no }, { 'M', "format", Arg_parser::yes }, { 'n', "bytes", Arg_parser::yes }, { 'N', "no-rcfile", Arg_parser::no }, { 'O', "force-format", Arg_parser::yes }, { 'q', "quiet", Arg_parser::no }, { 's', "silent", Arg_parser::no }, { 'v', "verbose", Arg_parser::no }, { 'V', "version", Arg_parser::no }, { bz2_opt, "bz2", Arg_parser::yes }, { gz_opt, "gz", Arg_parser::yes }, { lz_opt, "lz", Arg_parser::yes }, { xz_opt, "xz", Arg_parser::yes }, { 0 , 0, Arg_parser::no } }; const Arg_parser parser( argc, argv, options ); if( parser.error().size() ) // bad option { show_error( parser.error().c_str(), 0, true ); return 2; } maybe_process_config_file( parser ); int argind = 0; for( ; argind < parser.arguments(); ++argind ) { const int code = parser.code( argind ); if( !code ) break; // no more options const std::string & arg = parser.argument( argind ); switch( code ) { case 'b': print_bytes = true; break; case 'h': show_help(); return 0; case 'i': parse_ignore_initial( arg.c_str(), ignore_initial ); break; case 'l': verbosity = 1; break; case 'M': parse_format_list( arg ); break; case 'n': max_size = getnum( arg.c_str() ); break; case 'N': break; case 'O': parse_format_types2( arg, format_types ); break; case 'q': case 's': verbosity = -1; break; case 'v': verbosity = 1; break; case 'V': show_version(); return 0; case bz2_opt: parse_compressor( arg, fmt_bz2 ); break; case gz_opt: parse_compressor( arg, fmt_gz ); break; case lz_opt: parse_compressor( arg, fmt_lz ); break; case xz_opt: parse_compressor( arg, fmt_xz ); break; default : internal_error( "uncaught option." ); } } // end process options #if defined(__MSVCRT__) || defined(__OS2__) setmode( STDIN_FILENO, O_BINARY ); setmode( STDOUT_FILENO, O_BINARY ); #endif if( argind >= parser.arguments() ) { show_error( "No files given.", 0, true ); return 2; } if( argind + 2 < parser.arguments() ) { show_error( "Too many files.", 0, true ); return 2; } const int files = parser.arguments() - argind; std::string filenames[2]; // file names of the two input files filenames[0] = parser.argument( argind ); if( files == 2 ) filenames[1] = parser.argument( argind + 1 ); int infd[2]; // file descriptors of the two files infd[0] = ( filenames[0] == "-" ) ? STDIN_FILENO : open_instream( filenames[0] ); if( infd[0] < 0 ) return 2; if( files == 2 ) { if( check_identical( filenames[0].c_str(), filenames[1].c_str() ) ) { if( ignore_initial[0] == ignore_initial[1] ) return 0; else { show_error( "Can't compare parts of same file." ); return 2; } } infd[1] = ( filenames[1] == "-" ) ? STDIN_FILENO : open_instream( filenames[1] ); if( infd[1] < 0 ) return 2; } else { if( filenames[0] == "-" ) { show_error( "Missing operand after '-'.", 0, true ); return 2; } if( format_types[0] >= 0 || format_types[1] >= 0 ) { show_error( "Two files must be given when format is specified.", 0, true ); return 2; } filenames[1] = filenames[0]; infd[1] = open_other_instream( filenames[1] ); if( infd[1] < 0 ) { if( verbosity >= 0 ) std::fprintf( stderr, "%s: Can't find file to compare with '%s'.\n", program_name, filenames[0].c_str() ); show_error( 0, 0, true ); return 2; } } int old_infd[2]; // copy of file descriptors of the two files old_infd[0] = infd[0]; old_infd[1] = infd[1]; Children children[2]; if( !set_data_feeder( filenames[0], &infd[0], children[0], format_types[0] ) || !set_data_feeder( filenames[1], &infd[1], children[1], format_types[1] ) ) return 2; for( int i = 0; i < 2; ++i ) if( !skip_ignore_initial( ignore_initial[i], infd[i] ) ) { show_file_error( filenames[i].c_str(), "Read error skipping initial bytes", errno ); return 2; } int retval = cmp( max_size, infd, filenames, print_bytes ); for( int i = 0; i < 2; ++i ) if( !good_status( children[i], retval == 0 && max_size < 0 ) ) retval = 2; for( int i = 0; i < 2; ++i ) { if( close( infd[i] ) != 0 ) { show_close_error(); retval = 2; } if( filenames[i] != "-" && close( old_infd[i] ) != 0 ) { show_file_error( filenames[i].c_str(), "Error closing input file", errno ); retval = 2; } } if( std::fclose( stdout ) != 0 ) { show_error( "Error closing stdout", errno ); retval = 2; } return retval; }