From 6f69650067a884d986108e7465ea26948b3388d2 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Tue, 23 Jan 2024 06:44:38 +0100 Subject: Merging upstream version 1.11. Signed-off-by: Daniel Baumann --- main.cc | 244 +++++++++++++++++++++++++++++++++++++--------------------------- 1 file changed, 140 insertions(+), 104 deletions(-) (limited to 'main.cc') diff --git a/main.cc b/main.cc index 51b3af4..548b58f 100644 --- a/main.cc +++ b/main.cc @@ -1,6 +1,6 @@ /* Plzip - Massively parallel implementation of lzip Copyright (C) 2009 Laszlo Ersek. - Copyright (C) 2009-2022 Antonio Diaz Diaz. + Copyright (C) 2009-2024 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -17,16 +17,16 @@ */ /* Exit status: 0 for a normal exit, 1 for environmental problems - (file not found, invalid flags, I/O errors, etc), 2 to indicate a - corrupt or invalid input file, 3 for an internal consistency error - (e.g., bug) which caused plzip to panic. + (file not found, invalid command-line options, I/O errors, etc), 2 to + indicate a corrupt or invalid input file, 3 for an internal consistency + error (e.g., bug) which caused plzip to panic. */ #define _FILE_OFFSET_BITS 64 #include #include -#include +#include // SSIZE_MAX #include #include #include @@ -34,7 +34,7 @@ #include #include #include -#include +#include // SIZE_MAX #include #include #include @@ -77,7 +77,7 @@ int verbosity = 0; namespace { const char * const program_name = "plzip"; -const char * const program_year = "2022"; +const char * const program_year = "2024"; const char * invocation_name = program_name; // default value const struct { const char * from; const char * to; } known_extensions[] = { @@ -102,18 +102,19 @@ bool delete_output_on_interrupt = false; void show_help( const long num_online ) { - std::printf( "Plzip is a massively parallel (multi-threaded) implementation of lzip, fully\n" + std::printf( "Plzip is a massively parallel (multi-threaded) implementation of lzip,\n" "compatible with lzip 1.4 or newer. Plzip uses the compression library lzlib.\n" "\nLzip is a lossless data compressor with a user interface similar to the one\n" "of gzip or bzip2. Lzip uses a simplified form of the 'Lempel-Ziv-Markov\n" - "chain-Algorithm' (LZMA) stream format and provides a 3 factor integrity\n" - "checking to maximize interoperability and optimize safety. Lzip can compress\n" - "about as fast as gzip (lzip -0) or compress most files more than bzip2\n" - "(lzip -9). Decompression speed is intermediate between gzip and bzip2.\n" - "Lzip is better than gzip and bzip2 from a data recovery perspective. Lzip\n" - "has been designed, written, and tested with great care to replace gzip and\n" - "bzip2 as the standard general-purpose compressed format for unix-like\n" - "systems.\n" + "chain-Algorithm' (LZMA) stream format to maximize interoperability. The\n" + "maximum dictionary size is 512 MiB so that any lzip file can be decompressed\n" + "on 32-bit machines. Lzip provides accurate and robust 3-factor integrity\n" + "checking. Lzip can compress about as fast as gzip (lzip -0) or compress most\n" + "files more than bzip2 (lzip -9). Decompression speed is intermediate between\n" + "gzip and bzip2. Lzip is better than gzip and bzip2 from a data recovery\n" + "perspective. Lzip has been designed, written, and tested with great care to\n" + "replace gzip and bzip2 as the standard general-purpose compressed format for\n" + "Unix-like systems.\n" "\nPlzip can compress/decompress large files on multiprocessor machines much\n" "faster than lzip, at the cost of a slightly reduced compression ratio (0.4\n" "to 2 percent larger compressed files). Note that the number of usable\n" @@ -127,7 +128,7 @@ void show_help( const long num_online ) " -a, --trailing-error exit with error status if trailing data\n" " -B, --data-size= set size of input data blocks [2x8=16 MiB]\n" " -c, --stdout write to standard output, keep input files\n" - " -d, --decompress decompress\n" + " -d, --decompress decompress, test compressed file integrity\n" " -f, --force overwrite existing output files\n" " -F, --recompress force re-compression of compressed files\n" " -k, --keep keep (don't delete) input files\n" @@ -155,32 +156,46 @@ void show_help( const long num_online ) "decompresses from standard input to standard output.\n" "Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n" "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n" - "Dictionary sizes 12 to 29 are interpreted as powers of two, meaning 2^12\n" - "to 2^29 bytes.\n" - "\nThe bidimensional parameter space of LZMA can't be mapped to a linear\n" - "scale optimal for all files. If your files are large, very repetitive,\n" - "etc, you may need to use the options --dictionary-size and --match-length\n" - "directly to achieve optimal performance.\n" + "Dictionary sizes 12 to 29 are interpreted as powers of two, meaning 2^12 to\n" + "2^29 bytes.\n" + "\nThe bidimensional parameter space of LZMA can't be mapped to a linear scale\n" + "optimal for all files. If your files are large, very repetitive, etc, you\n" + "may need to use the options --dictionary-size and --match-length directly\n" + "to achieve optimal performance.\n" "\nTo extract all the files from archive 'foo.tar.lz', use the commands\n" "'tar -xf foo.tar.lz' or 'plzip -cd foo.tar.lz | tar -xf -'.\n" - "\nExit status: 0 for a normal exit, 1 for environmental problems (file\n" - "not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n" - "invalid input file, 3 for an internal consistency error (e.g., bug) which\n" - "caused plzip to panic.\n" + "\nExit status: 0 for a normal exit, 1 for environmental problems\n" + "(file not found, invalid command-line options, I/O errors, etc), 2 to\n" + "indicate a corrupt or invalid input file, 3 for an internal consistency\n" + "error (e.g., bug) which caused plzip to panic.\n" "\nReport bugs to lzip-bug@nongnu.org\n" "Plzip home page: http://www.nongnu.org/lzip/plzip.html\n" ); } +void show_lzlib_version() + { + std::printf( "Using lzlib %s\n", LZ_version() ); +#if !defined LZ_API_VERSION + std::fputs( "LZ_API_VERSION is not defined.\n", stdout ); +#elif LZ_API_VERSION >= 1012 + std::printf( "Using LZ_API_VERSION = %u\n", LZ_api_version() ); +#else + std::printf( "Compiled with LZ_API_VERSION = %u. " + "Using an unknown LZ_API_VERSION\n", LZ_API_VERSION ); +#endif + } + + void show_version() { std::printf( "%s %s\n", program_name, PROGVERSION ); std::printf( "Copyright (C) 2009 Laszlo Ersek.\n" ); std::printf( "Copyright (C) %s Antonio Diaz Diaz.\n", program_year ); - std::printf( "Using lzlib %s\n", LZ_version() ); std::printf( "License GPLv2+: GNU GPL version 2 or later \n" "This is free software: you are free to change and redistribute it.\n" "There is NO WARRANTY, to the extent permitted by law.\n" ); + show_lzlib_version(); } @@ -226,18 +241,7 @@ int check_lib() std::printf( "warning: LZ_API_VERSION != LZ_api_version() (%u vs %u)\n", LZ_API_VERSION, LZ_api_version() ); } #endif - if( verbosity >= 1 ) - { - std::printf( "Using lzlib %s\n", LZ_version() ); -#if !defined LZ_API_VERSION - std::fputs( "LZ_API_VERSION is not defined.\n", stdout ); -#elif LZ_API_VERSION >= 1012 - std::printf( "Using LZ_API_VERSION = %u\n", LZ_api_version() ); -#else - std::printf( "Compiled with LZ_API_VERSION = %u. " - "Using an unknown LZ_API_VERSION\n", LZ_API_VERSION ); -#endif - } + if( verbosity >= 1 ) show_lzlib_version(); return retval; } @@ -267,16 +271,15 @@ const char * bad_version( const unsigned version ) const char * format_ds( const unsigned dictionary_size ) { - enum { bufsize = 16, factor = 1024 }; + enum { bufsize = 16, factor = 1024, n = 3 }; static char buf[bufsize]; - const char * const prefix[8] = - { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" }; + const char * const prefix[n] = { "Ki", "Mi", "Gi" }; const char * p = ""; const char * np = " "; unsigned num = dictionary_size; bool exact = ( num % factor == 0 ); - for( int i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i ) + for( int i = 0; i < n && ( num > 9999 || ( exact && num >= factor ) ); ++i ) { num /= factor; if( num % factor != 0 ) exact = false; p = prefix[i]; np = ""; } snprintf( buf, bufsize, "%s%4u %sB", np, num, p ); @@ -291,12 +294,12 @@ void show_header( const unsigned dictionary_size ) namespace { -// separate large numbers >= 100_000 in groups of 3 digits using '_' +// separate numbers of 5 or more digits in groups of 3 digits using '_' const char * format_num3( unsigned long long num ) { - const char * const si_prefix = "kMGTPEZY"; - const char * const binary_prefix = "KMGTPEZY"; - enum { buffers = 8, bufsize = 4 * sizeof (long long) }; + enum { buffers = 8, bufsize = 4 * sizeof num, n = 10 }; + const char * const si_prefix = "kMGTPEZYRQ"; + const char * const binary_prefix = "KMGTPEZYRQ"; static char buffer[buffers][bufsize]; // circle of static buffers for printf static int current = 0; @@ -306,15 +309,15 @@ const char * format_num3( unsigned long long num ) if( num > 1024 ) { char prefix = 0; // try binary first, then si - for( int i = 0; i < 8 && num >= 1024 && num % 1024 == 0; ++i ) + for( int i = 0; i < n && num != 0 && num % 1024 == 0; ++i ) { num /= 1024; prefix = binary_prefix[i]; } if( prefix ) *(--p) = 'i'; else - for( int i = 0; i < 8 && num >= 1000 && num % 1000 == 0; ++i ) + for( int i = 0; i < n && num != 0 && num % 1000 == 0; ++i ) { num /= 1000; prefix = si_prefix[i]; } if( prefix ) *(--p) = prefix; } - const bool split = num >= 100000; + const bool split = num >= 10000; for( int i = 0; ; ) { @@ -325,6 +328,16 @@ const char * format_num3( unsigned long long num ) } +void show_option_error( const char * const arg, const char * const msg, + const char * const option_name ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: '%s': %s option '%s'.\n", + program_name, arg, msg, option_name ); + } + + +// Recognized formats: k, Ki, [MGTPEZYRQ][i] unsigned long long getnum( const char * const arg, const char * const option_name, const unsigned long long llimit, @@ -334,12 +347,8 @@ unsigned long long getnum( const char * const arg, errno = 0; unsigned long long result = strtoull( arg, &tail, 0 ); if( tail == arg ) - { - if( verbosity >= 0 ) - std::fprintf( stderr, "%s: Bad or missing numerical argument in " - "option '%s'.\n", program_name, option_name ); - std::exit( 1 ); - } + { show_option_error( arg, "Bad or missing numerical argument in", + option_name ); std::exit( 1 ); } if( !errno && tail[0] ) { @@ -347,6 +356,8 @@ unsigned long long getnum( const char * const arg, int exponent = 0; // 0 = bad multiplier switch( tail[0] ) { + case 'Q': exponent = 10; break; + case 'R': exponent = 9; break; case 'Y': exponent = 8; break; case 'Z': exponent = 7; break; case 'E': exponent = 6; break; @@ -358,12 +369,8 @@ unsigned long long getnum( const char * const arg, case 'k': if( factor == 1000 ) exponent = 1; break; } if( exponent <= 0 ) - { - if( verbosity >= 0 ) - std::fprintf( stderr, "%s: Bad multiplier in numerical argument of " - "option '%s'.\n", program_name, option_name ); - std::exit( 1 ); - } + { show_option_error( arg, "Bad multiplier in numerical argument of", + option_name ); std::exit( 1 ); } for( int i = 0; i < exponent; ++i ) { if( ulimit / factor >= result ) result *= factor; @@ -374,8 +381,8 @@ unsigned long long getnum( const char * const arg, if( errno ) { if( verbosity >= 0 ) - std::fprintf( stderr, "%s: Numerical argument out of limits [%s,%s] " - "in option '%s'.\n", program_name, format_num3( llimit ), + std::fprintf( stderr, "%s: '%s': Value out of limits [%s,%s] in " + "option '%s'.\n", program_name, arg, format_num3( llimit ), format_num3( ulimit ), option_name ); std::exit( 1 ); } @@ -447,7 +454,7 @@ void set_d_outname( const std::string & name, const int eindex ) } output_filename = name; output_filename += ".out"; if( verbosity >= 1 ) - std::fprintf( stderr, "%s: Can't guess original name for '%s' -- using '%s'\n", + std::fprintf( stderr, "%s: %s: Can't guess original name -- using '%s'\n", program_name, name.c_str(), output_filename.c_str() ); } @@ -469,9 +476,9 @@ int open_instream( const char * const name, struct stat * const in_statsp, if( i != 0 || ( !S_ISREG( mode ) && ( !can_read || one_to_one ) ) ) { if( verbosity >= 0 ) - std::fprintf( stderr, "%s: Input file '%s' is not a regular file%s.\n", + std::fprintf( stderr, "%s: %s: Input file is not a regular file%s.\n", program_name, name, ( can_read && one_to_one ) ? - ",\n and neither '-c' nor '-o' were specified" : "" ); + ",\n and neither '-c' nor '-o' were specified" : "" ); close( infd ); infd = -1; } @@ -488,7 +495,7 @@ int open_instream2( const char * const name, struct stat * const in_statsp, if( program_mode == m_compress && !recompress && eindex >= 0 ) { if( verbosity >= 0 ) - std::fprintf( stderr, "%s: Input file '%s' already has '%s' suffix.\n", + std::fprintf( stderr, "%s: %s: Input file already has '%s' suffix.\n", program_name, name, known_extensions[eindex].from ); return -1; } @@ -496,6 +503,33 @@ int open_instream2( const char * const name, struct stat * const in_statsp, } +bool make_dirs( const std::string & name ) + { + int i = name.size(); + while( i > 0 && name[i-1] != '/' ) --i; // remove last component + while( i > 0 && name[i-1] == '/' ) --i; // remove slash(es) + const int dirsize = i; // size of dirname without trailing slash(es) + + for( i = 0; i < dirsize; ) // if dirsize == 0, dirname is '/' or empty + { + while( i < dirsize && name[i] == '/' ) ++i; + const int first = i; + while( i < dirsize && name[i] != '/' ) ++i; + if( first < i ) + { + const std::string partial( name, 0, i ); + const mode_t mode = S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH; + struct stat st; + if( stat( partial.c_str(), &st ) == 0 ) + { if( !S_ISDIR( st.st_mode ) ) { errno = ENOTDIR; return false; } } + else if( mkdir( partial.c_str(), mode ) != 0 && errno != EEXIST ) + return false; // if EEXIST, another process created the dir + } + } + return true; + } + + bool open_outstream( const bool force, const bool protect ) { const mode_t usr_rw = S_IRUSR | S_IWUSR; @@ -504,18 +538,21 @@ bool open_outstream( const bool force, const bool protect ) int flags = O_CREAT | O_WRONLY | O_BINARY; if( force ) flags |= O_TRUNC; else flags |= O_EXCL; - outfd = open( output_filename.c_str(), flags, outfd_mode ); - if( outfd >= 0 ) delete_output_on_interrupt = true; - else if( verbosity >= 0 ) - { + outfd = -1; + if( output_filename.size() && + output_filename[output_filename.size()-1] == '/' ) errno = EISDIR; + else { + if( !protect && !make_dirs( output_filename ) ) + { show_file_error( output_filename.c_str(), + "Error creating intermediate directory", errno ); return false; } + outfd = open( output_filename.c_str(), flags, outfd_mode ); + if( outfd >= 0 ) { delete_output_on_interrupt = true; return true; } if( errno == EEXIST ) - std::fprintf( stderr, "%s: Output file '%s' already exists, skipping.\n", - program_name, output_filename.c_str() ); - else - std::fprintf( stderr, "%s: Can't create output file '%s': %s\n", - program_name, output_filename.c_str(), std::strerror( errno ) ); + { show_file_error( output_filename.c_str(), + "Output file already exists, skipping." ); return false; } } - return ( outfd >= 0 ); + show_file_error( output_filename.c_str(), "Can't create output file", errno ); + return false; } @@ -545,13 +582,13 @@ void cleanup_and_fail( const int retval ) { delete_output_on_interrupt = false; if( saved_verbosity >= 0 ) - std::fprintf( stderr, "%s: Deleting output file '%s', if it exists.\n", + std::fprintf( stderr, "%s: %s: Deleting output file, if it exists.\n", program_name, output_filename.c_str() ); if( outfd >= 0 ) { close( outfd ); outfd = -1; } if( std::remove( output_filename.c_str() ) != 0 && errno != ENOENT && saved_verbosity >= 0 ) - std::fprintf( stderr, "%s: WARNING: deletion of output file " - "(apparently) failed.\n", program_name ); + std::fprintf( stderr, "%s: warning: deletion of output file failed: %s\n", + program_name, std::strerror( errno ) ); } std::exit( retval ); } @@ -596,7 +633,7 @@ void close_and_set_permissions( const struct stat * const in_statsp ) if( in_statsp ) { const mode_t mode = in_statsp->st_mode; - // fchown will in many cases return with EPERM, which can be safely ignored. + // fchown in many cases returns with EPERM, which can be safely ignored. if( fchown( outfd, in_statsp->st_uid, in_statsp->st_gid ) == 0 ) { if( fchmod( outfd, mode ) != 0 ) warning = true; } else @@ -605,10 +642,8 @@ void close_and_set_permissions( const struct stat * const in_statsp ) warning = true; } if( close( outfd ) != 0 ) - { - show_error( "Error closing output file", errno ); - cleanup_and_fail( 1 ); - } + { show_file_error( output_filename.c_str(), "Error closing output file", + errno ); cleanup_and_fail( 1 ); } outfd = -1; delete_output_on_interrupt = false; if( in_statsp ) @@ -619,7 +654,8 @@ void close_and_set_permissions( const struct stat * const in_statsp ) if( utime( output_filename.c_str(), &t ) != 0 ) warning = true; } if( warning && verbosity >= 1 ) - show_error( "Can't change output file attributes." ); + show_file_error( output_filename.c_str(), + "warning: can't change output file attributes", errno ); } } // end namespace @@ -708,8 +744,8 @@ long sysconf( int flag ) int main( const int argc, const char * const argv[] ) { - /* Mapping from gzip/bzip2 style 1..9 compression modes - to the corresponding LZMA compression modes. */ + /* Mapping from gzip/bzip2 style 0..9 compression levels to the + corresponding LZMA compression parameters. */ const Lzma_options option_mapping[] = { { 65535, 16 }, // -0 (65535,16 chooses fast encoder) @@ -730,10 +766,9 @@ int main( const int argc, const char * const argv[] ) int in_slots = 4; int out_slots = 64; Mode program_mode = m_compress; + Cl_options cl_opts; // command-line options bool force = false; - bool ignore_trailing = true; bool keep_input_files = false; - bool loose_trailing = false; bool recompress = false; bool to_stdout = false; if( argc > 0 ) invocation_name = argv[0]; @@ -799,7 +834,7 @@ int main( const int argc, const char * const argv[] ) case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': encoder_options = option_mapping[code-'0']; break; - case 'a': ignore_trailing = false; break; + case 'a': cl_opts.ignore_trailing = false; break; case 'b': break; case 'B': data_size = getnum( arg, pn, 2 * LZ_min_dictionary_size(), 2 * LZ_max_dictionary_size() ); break; @@ -826,9 +861,9 @@ int main( const int argc, const char * const argv[] ) case opt_chk: return check_lib(); case opt_dbg: debug_level = getnum( arg, pn, 0, 3 ); break; case opt_in: in_slots = getnum( arg, pn, 1, 64 ); break; - case opt_lt: loose_trailing = true; break; + case opt_lt: cl_opts.loose_trailing = true; break; case opt_out: out_slots = getnum( arg, pn, 1, 1024 ); break; - default : internal_error( "uncaught option." ); + default: internal_error( "uncaught option." ); } } // end process options @@ -850,8 +885,7 @@ int main( const int argc, const char * const argv[] ) } if( filenames.empty() ) filenames.push_back("-"); - if( program_mode == m_list ) - return list_files( filenames, ignore_trailing, loose_trailing ); + if( program_mode == m_list ) return list_files( filenames, cl_opts ); const bool fast = encoder_options.dictionary_size == 65535 && encoder_options.match_len_limit == 16; @@ -896,11 +930,11 @@ int main( const int argc, const char * const argv[] ) int retval = 0; const bool one_to_one = !to_stdout && program_mode != m_test && !to_file; bool stdin_used = false; + struct stat in_stats; for( unsigned i = 0; i < filenames.size(); ++i ) { std::string input_filename; int infd; - struct stat in_stats; pp.set_name( filenames[i] ); if( filenames[i] == "-" ) @@ -917,7 +951,7 @@ int main( const int argc, const char * const argv[] ) eindex, one_to_one, recompress ); if( infd < 0 ) { set_retval( retval, 1 ); continue; } if( !check_tty_in( pp.name(), infd, program_mode, retval ) ) continue; - if( one_to_one ) // open outfd after verifying infd + if( one_to_one ) // open outfd after checking infd { if( program_mode == m_compress ) set_c_outname( input_filename, true, true ); @@ -930,7 +964,7 @@ int main( const int argc, const char * const argv[] ) if( one_to_one && !check_tty_out( program_mode ) ) { set_retval( retval, 1 ); return retval; } // don't delete a tty - if( to_file && outfd < 0 ) // open outfd after verifying infd + if( to_file && outfd < 0 ) // open outfd after checking infd { if( program_mode == m_compress ) set_c_outname( default_output_filename, filenames_given, false ); @@ -950,9 +984,9 @@ int main( const int argc, const char * const argv[] ) encoder_options.match_len_limit, num_workers, infd, outfd, pp, debug_level ); else - tmp = decompress( cfile_size, num_workers, infd, outfd, pp, - debug_level, in_slots, out_slots, ignore_trailing, - loose_trailing, infd_isreg, one_to_one ); + tmp = decompress( cfile_size, num_workers, infd, outfd, cl_opts, pp, + debug_level, in_slots, out_slots, infd_isreg, + one_to_one ); if( close( infd ) != 0 ) { show_file_error( pp.name(), "Error closing input file", errno ); set_retval( tmp, 1 ); } @@ -966,7 +1000,9 @@ int main( const int argc, const char * const argv[] ) if( input_filename.size() && !keep_input_files && one_to_one ) std::remove( input_filename.c_str() ); } - if( delete_output_on_interrupt ) close_and_set_permissions( 0 ); // -o + if( delete_output_on_interrupt ) // -o + close_and_set_permissions( ( retval == 0 && !stdin_used && + filenames_given && filenames.size() == 1 ) ? &in_stats : 0 ); else if( outfd >= 0 && close( outfd ) != 0 ) // -c { show_error( "Error closing stdout", errno ); -- cgit v1.2.3