diff options
Diffstat (limited to '')
-rw-r--r-- | zcmp.cc | 121 |
1 files changed, 86 insertions, 35 deletions
@@ -1,5 +1,5 @@ /* Zcmp - decompress and compare two files byte by byte - Copyright (C) 2010-2021 Antonio Diaz Diaz. + Copyright (C) 2010-2022 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -31,7 +31,7 @@ #include <stdint.h> #include <unistd.h> #include <sys/stat.h> -#if defined(__MSVCRT__) || defined(__OS2__) +#if defined __MSVCRT__ || defined __OS2__ #include <io.h> #endif @@ -55,7 +55,7 @@ void show_help() "starting with 1. A hyphen '-' used as a file argument means standard input.\n" "If any file given is compressed, its decompressed content is used. Compressed\n" "files are decompressed on the fly; no temporary files are created.\n" - "\nThe formats supported are bzip2, gzip, lzip, and xz.\n" + "\nThe formats supported are bzip2, gzip, lzip, xz, and zstd.\n" "\nUsage: zcmp [options] file1 [file2]\n" "\nzcmp compares file1 to file2. The standard input is used only if file1 or\n" "file2 refers to standard input. If file2 is omitted zcmp tries the\n" @@ -64,7 +64,7 @@ void show_help() " the corresponding uncompressed file (the name of file1 with the\n" " extension removed).\n" "\n - If file1 is uncompressed, compares it with the decompressed\n" - " contents of file1.[lz|bz2|gz|xz] (the first one that is found).\n" + " contents of file1.[lz|bz2|gz|zst|xz] (the first one that is found).\n" "\nExit status is 0 if inputs are identical, 1 if different, 2 if trouble.\n" "\nOptions:\n" " -h, --help display this help and exit\n" @@ -75,7 +75,7 @@ void show_help() " -M, --format=<list> process only the formats in <list>\n" " -n, --bytes=<n> compare at most <n> bytes\n" " -N, --no-rcfile don't read runtime configuration file\n" - " -O, --force-format=[<f1>][,<f2>] force the formats given (bz2, gz, lz, xz)\n" + " -O, --force-format=[<f1>][,<f2>] force the formats given (bz2,gz,lz,xz,zst)\n" " -q, --quiet suppress all messages\n" " -s, --silent (same as --quiet)\n" " -v, --verbose verbose mode (same as --list)\n" @@ -83,22 +83,60 @@ void show_help() " --gz=<command> set compressor and options for gzip format\n" " --lz=<command> set compressor and options for lzip format\n" " --xz=<command> set compressor and options for xz format\n" + " --zst=<command> set compressor and options for zstd format\n" "\nNumbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n" "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n" ); show_help_addr(); } -long long getnum( const char * const ptr, const char ** const tailp = 0, +// separate large numbers >= 100_000 in groups of 3 digits using '_' +const char * format_num3( long long num ) + { + const char * const si_prefix = "kMGTPEZY"; + const char * const binary_prefix = "KMGTPEZY"; + enum { buffers = 8, bufsize = 4 * sizeof (long long) }; + static char buffer[buffers][bufsize]; // circle of static buffers for printf + static int current = 0; + + char * const buf = buffer[current++]; current %= buffers; + char * p = buf + bufsize - 1; // fill the buffer backwards + *p = 0; // terminator + const bool negative = num < 0; + if( negative ) num = -num; + char prefix = 0; // try binary first, then si + for( int i = 0; i < 8 && num >= 1024 && num % 1024 == 0; ++i ) + { num /= 1024; prefix = binary_prefix[i]; } + if( prefix ) *(--p) = 'i'; + else + for( int i = 0; i < 8 && num >= 1000 && num % 1000 == 0; ++i ) + { num /= 1000; prefix = si_prefix[i]; } + if( prefix ) *(--p) = prefix; + const bool split = num >= 100000; + + for( int i = 0; ; ) + { + *(--p) = num % 10 + '0'; num /= 10; if( num == 0 ) break; + if( split && ++i >= 3 ) { i = 0; *(--p) = '_'; } + } + if( negative ) *(--p) = '-'; + return p; + } + + +long long getnum( const char * const arg, const char * const option_name, + const char ** const tailp = 0, const long long llimit = 0, const long long ulimit = LLONG_MAX ) { char * tail; errno = 0; - long long result = strtoll( ptr, &tail, 0 ); - if( tail == ptr ) + long long result = strtoll( arg, &tail, 0 ); + if( tail == arg ) { - show_error( "Bad or missing numerical argument.", 0, true ); + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Bad or missing numerical argument in " + "option '%s'.\n", program_name, option_name ); std::exit( 2 ); } if( result < 0 ) errno = ERANGE; @@ -126,7 +164,9 @@ long long getnum( const char * const ptr, const char ** const tailp = 0, } if( exponent < 0 ) { - show_error( "Bad multiplier in numerical argument.", 0, true ); + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Bad multiplier in numerical argument of " + "option '%s'.\n", program_name, option_name ); std::exit( 2 ); } for( int i = 0; i < exponent; ++i ) @@ -138,7 +178,10 @@ long long getnum( const char * const ptr, const char ** const tailp = 0, if( !errno && ( result < llimit || result > ulimit ) ) errno = ERANGE; if( errno ) { - show_error( "Numerical argument out of limits." ); + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Numerical argument out of limits [%s,%s] " + "in option '%s'.\n", program_name, format_num3( llimit ), + format_num3( ulimit ), option_name ); std::exit( 2 ); } if( tailp ) *tailp = tail; @@ -146,16 +189,19 @@ long long getnum( const char * const ptr, const char ** const tailp = 0, } -void parse_ignore_initial( const char * const arg, long long ignore_initial[2] ) +void parse_ignore_initial( const char * const arg, const char * const pn, + long long ignore_initial[2] ) { const char * tail; - ignore_initial[0] = getnum( arg, &tail ); + ignore_initial[0] = getnum( arg, pn, &tail ); if( *tail == ':' || *tail == ',' ) - ignore_initial[1] = getnum( ++tail ); + ignore_initial[1] = getnum( ++tail, pn ); else if( *tail == 0 ) ignore_initial[1] = ignore_initial[0]; else { - show_error( "Bad separator in argument of '--ignore-initial'", 0, true ); + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Bad separator in argument of option '%s'.\n", + program_name, pn ); std::exit( 2 ); } } @@ -165,7 +211,7 @@ bool skip_ignore_initial( const long long ignore_initial, const int infd ) { if( ignore_initial > 0 ) { - enum { buffer_size = 4096 }; + const int buffer_size = 4096; long long rest = ignore_initial; uint8_t buffer[buffer_size]; while( rest > 0 ) @@ -218,7 +264,8 @@ int block_compare( const uint8_t * const buffer0, int cmp( const long long max_size, const int infd[2], - const std::string filenames[2], const bool print_bytes ) + const std::string filenames[2], bool finished[2], + const bool print_bytes ) { const int buffer_size = 4096; unsigned long long byte_number = 1; @@ -241,11 +288,11 @@ int cmp( const long long max_size, const int infd[2], { rd[i] = readblock( infd[i], buffer[i], size ); if( rd[i] != size && errno ) - { - show_file_error( filenames[i].c_str(), "Read error", errno ); - return 2; - } + { show_file_error( filenames[i].c_str(), "Read error", errno ); + return 2; } } + for( int i = 0; i < 2; ++i ) + if( rd[i] < size ) finished[i] = true; const int min_rd = std::min( rd[0], rd[1] ); buffer0[min_rd] = 0; // sentinels for the block compare @@ -319,7 +366,7 @@ int cmp( const long long max_size, const int infd[2], int main( const int argc, const char * const argv[] ) { - enum { bz2_opt = 256, gz_opt, lz_opt, xz_opt }; + enum { bz2_opt = 256, gz_opt, lz_opt, xz_opt, zst_opt }; // number of initial bytes ignored for each file long long ignore_initial[2] = { 0, 0 }; long long max_size = -1; // < 0 means unlimited size @@ -342,11 +389,12 @@ int main( const int argc, const char * const argv[] ) { 's', "silent", Arg_parser::no }, { 'v', "verbose", Arg_parser::no }, { 'V', "version", Arg_parser::no }, - { bz2_opt, "bz2", Arg_parser::yes }, - { gz_opt, "gz", Arg_parser::yes }, - { lz_opt, "lz", Arg_parser::yes }, - { xz_opt, "xz", Arg_parser::yes }, - { 0 , 0, Arg_parser::no } }; + { bz2_opt, "bz2", Arg_parser::yes }, + { gz_opt, "gz", Arg_parser::yes }, + { lz_opt, "lz", Arg_parser::yes }, + { xz_opt, "xz", Arg_parser::yes }, + { zst_opt, "zst", Arg_parser::yes }, + { 0, 0, Arg_parser::no } }; const Arg_parser parser( argc, argv, options ); if( parser.error().size() ) // bad option @@ -359,17 +407,18 @@ int main( const int argc, const char * const argv[] ) { const int code = parser.code( argind ); if( !code ) break; // no more options + const char * const pn = parser.parsed_name( argind ).c_str(); const std::string & arg = parser.argument( argind ); switch( code ) { case 'b': print_bytes = true; break; case 'h': show_help(); return 0; - case 'i': parse_ignore_initial( arg.c_str(), ignore_initial ); break; + case 'i': parse_ignore_initial( arg.c_str(), pn, ignore_initial ); break; case 'l': verbosity = 1; break; - case 'M': parse_format_list( arg ); break; - case 'n': max_size = getnum( arg.c_str() ); break; + case 'M': parse_format_list( arg, pn ); break; + case 'n': max_size = getnum( arg.c_str(), pn ); break; case 'N': break; - case 'O': parse_format_types2( arg, format_types ); break; + case 'O': parse_format_types2( arg, pn, format_types ); break; case 'q': case 's': verbosity = -1; break; case 'v': verbosity = 1; break; @@ -378,18 +427,19 @@ int main( const int argc, const char * const argv[] ) case gz_opt: parse_compressor( arg, fmt_gz ); break; case lz_opt: parse_compressor( arg, fmt_lz ); break; case xz_opt: parse_compressor( arg, fmt_xz ); break; + case zst_opt: parse_compressor( arg, fmt_zst ); break; default : internal_error( "uncaught option." ); } } // end process options -#if defined(__MSVCRT__) || defined(__OS2__) +#if defined __MSVCRT__ || defined __OS2__ setmode( STDIN_FILENO, O_BINARY ); setmode( STDOUT_FILENO, O_BINARY ); #endif if( argind >= parser.arguments() ) { show_error( "No files given.", 0, true ); return 2; } - if( argind + 2 < parser.arguments() ) + if( parser.arguments() - argind > 2 ) { show_error( "Too many files.", 0, true ); return 2; } const int files = parser.arguments() - argind; @@ -446,10 +496,11 @@ int main( const int argc, const char * const argv[] ) return 2; } - int retval = cmp( max_size, infd, filenames, print_bytes ); + bool finished[2] = { false, false }; + int retval = cmp( max_size, infd, filenames, finished, print_bytes ); for( int i = 0; i < 2; ++i ) - if( !good_status( children[i], retval == 0 && max_size < 0 ) ) retval = 2; + if( !good_status( children[i], finished[i] ) ) retval = 2; for( int i = 0; i < 2; ++i ) { |