diff options
Diffstat (limited to '')
-rw-r--r-- | main.cc | 355 |
1 files changed, 192 insertions, 163 deletions
@@ -1,25 +1,25 @@ -/* Plzip - Massively parallel implementation of lzip - Copyright (C) 2009 Laszlo Ersek. - Copyright (C) 2009-2019 Antonio Diaz Diaz. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. +/* Plzip - Massively parallel implementation of lzip + Copyright (C) 2009 Laszlo Ersek. + Copyright (C) 2009-2021 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ /* - Exit status: 0 for a normal exit, 1 for environmental problems - (file not found, invalid flags, I/O errors, etc), 2 to indicate a - corrupt or invalid input file, 3 for an internal consistency error - (eg, bug) which caused plzip to panic. + Exit status: 0 for a normal exit, 1 for environmental problems + (file not found, invalid flags, I/O errors, etc), 2 to indicate a + corrupt or invalid input file, 3 for an internal consistency error + (eg, bug) which caused plzip to panic. */ #define _FILE_OFFSET_BITS 64 @@ -34,7 +34,6 @@ #include <string> #include <vector> #include <fcntl.h> -#include <pthread.h> #include <stdint.h> #include <unistd.h> #include <utime.h> @@ -73,8 +72,8 @@ int verbosity = 0; namespace { const char * const program_name = "plzip"; -const char * const program_year = "2019"; -const char * invocation_name = 0; +const char * const program_year = "2021"; +const char * invocation_name = program_name; // default value const struct { const char * from; const char * to; } known_extensions[] = { { ".lz", "" }, @@ -99,20 +98,22 @@ bool delete_output_on_interrupt = false; void show_help( const long num_online ) { std::printf( "Plzip is a massively parallel (multi-threaded) implementation of lzip, fully\n" - "compatible with lzip 1.4 or newer. Plzip uses the lzlib compression library.\n" - "\nLzip is a lossless data compressor with a user interface similar to the\n" - "one of gzip or bzip2. Lzip can compress about as fast as gzip (lzip -0)\n" - "or compress most files more than bzip2 (lzip -9). Decompression speed is\n" - "intermediate between gzip and bzip2. Lzip is better than gzip and bzip2\n" - "from a data recovery perspective. Lzip has been designed, written and\n" - "tested with great care to replace gzip and bzip2 as the standard\n" - "general-purpose compressed format for unix-like systems.\n" - "\nPlzip can compress/decompress large files on multiprocessor machines\n" - "much faster than lzip, at the cost of a slightly reduced compression\n" - "ratio (0.4 to 2 percent larger compressed files). Note that the number\n" - "of usable threads is limited by file size; on files larger than a few GB\n" - "plzip can use hundreds of processors, but on files of only a few MB\n" - "plzip is no faster than lzip.\n" + "compatible with lzip 1.4 or newer. Plzip uses the compression library lzlib.\n" + "\nLzip is a lossless data compressor with a user interface similar to the one\n" + "of gzip or bzip2. Lzip uses a simplified form of the 'Lempel-Ziv-Markov\n" + "chain-Algorithm' (LZMA) stream format, chosen to maximize safety and\n" + "interoperability. Lzip can compress about as fast as gzip (lzip -0) or\n" + "compress most files more than bzip2 (lzip -9). Decompression speed is\n" + "intermediate between gzip and bzip2. Lzip is better than gzip and bzip2 from\n" + "a data recovery perspective. Lzip has been designed, written, and tested\n" + "with great care to replace gzip and bzip2 as the standard general-purpose\n" + "compressed format for unix-like systems.\n" + "\nPlzip can compress/decompress large files on multiprocessor machines much\n" + "faster than lzip, at the cost of a slightly reduced compression ratio (0.4\n" + "to 2 percent larger compressed files). Note that the number of usable\n" + "threads is limited by file size; on files larger than a few GB plzip can use\n" + "hundreds of processors, but on files of only a few MB plzip is no faster\n" + "than lzip.\n" "\nUsage: %s [options] [files]\n", invocation_name ); std::printf( "\nOptions:\n" " -h, --help display this help and exit\n" @@ -127,7 +128,7 @@ void show_help( const long num_online ) " -l, --list print (un)compressed file sizes\n" " -m, --match-length=<bytes> set match length limit in bytes [36]\n" " -n, --threads=<n> set number of (de)compression threads [%ld]\n" - " -o, --output=<file> if reading standard input, write to <file>\n" + " -o, --output=<file> write to <file>, keep input files\n" " -q, --quiet suppress all messages\n" " -s, --dictionary-size=<bytes> set dictionary size limit in bytes [8 MiB]\n" " -t, --test test compressed file integrity\n" @@ -138,12 +139,13 @@ void show_help( const long num_online ) " --loose-trailing allow trailing data seeming corrupt header\n" " --in-slots=<n> number of 1 MiB input packets buffered [4]\n" " --out-slots=<n> number of 1 MiB output packets buffered [64]\n" - , num_online ); + " --check-lib compare version of lzlib.h with liblz.{a,so}\n", + num_online ); if( verbosity >= 1 ) { - std::printf( " --debug=<level> (0-1) print debug statistics to stderr\n" ); + std::printf( " --debug=<level> print mode(2), debug statistics(1) to stderr\n" ); } - std::printf( "If no file names are given, or if a file is '-', plzip compresses or\n" + std::printf( "\nIf no file names are given, or if a file is '-', plzip compresses or\n" "decompresses from standard input to standard output.\n" "Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n" "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n" @@ -151,8 +153,10 @@ void show_help( const long num_online ) "to 2^29 bytes.\n" "\nThe bidimensional parameter space of LZMA can't be mapped to a linear\n" "scale optimal for all files. If your files are large, very repetitive,\n" - "etc, you may need to use the --dictionary-size and --match-length\n" - "options directly to achieve optimal performance.\n" + "etc, you may need to use the options --dictionary-size and --match-length\n" + "directly to achieve optimal performance.\n" + "\nTo extract all the files from archive 'foo.tar.lz', use the commands\n" + "'tar -xf foo.tar.lz' or 'plzip -cd foo.tar.lz | tar -xf -'.\n" "\nExit status: 0 for a normal exit, 1 for environmental problems (file\n" "not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n" "invalid input file, 3 for an internal consistency error (eg, bug) which\n" @@ -173,6 +177,37 @@ void show_version() "There is NO WARRANTY, to the extent permitted by law.\n" ); } + +int check_lib() + { + bool warning = false; + if( std::strcmp( LZ_version_string, LZ_version() ) != 0 ) + { warning = true; + if( verbosity >= 0 ) + std::printf( "warning: LZ_version_string != LZ_version() (%s vs %s)\n", + LZ_version_string, LZ_version() ); } +#if defined LZ_API_VERSION && LZ_API_VERSION >= 1012 + if( LZ_API_VERSION != LZ_api_version() ) + { warning = true; + if( verbosity >= 0 ) + std::printf( "warning: LZ_API_VERSION != LZ_api_version() (%u vs %u)\n", + LZ_API_VERSION, LZ_api_version() ); } +#endif + if( verbosity >= 1 ) + { + std::printf( "Using lzlib %s\n", LZ_version() ); +#if !defined LZ_API_VERSION + std::fputs( "LZ_API_VERSION is not defined.\n", stdout ); +#elif LZ_API_VERSION >= 1012 + std::printf( "Using LZ_API_VERSION = %u\n", LZ_api_version() ); +#else + std::printf( "Compiled with LZ_API_VERSION = %u. " + "Using an unknown LZ_API_VERSION\n", LZ_API_VERSION ); +#endif + } + return warning; + } + } // end namespace void Pretty_print::operator()( const char * const msg ) const @@ -220,7 +255,7 @@ const char * format_ds( const unsigned dictionary_size ) void show_header( const unsigned dictionary_size ) { - std::fprintf( stderr, "dictionary %s, ", format_ds( dictionary_size ) ); + std::fprintf( stderr, "dict %s, ", format_ds( dictionary_size ) ); } namespace { @@ -313,10 +348,14 @@ int extension_index( const std::string & name ) } -void set_c_outname( const std::string & name, const bool force_ext ) +void set_c_outname( const std::string & name, const bool filenames_given, + const bool force_ext ) { + /* zupdate < 1.9 depends on lzip adding the extension '.lz' to name when + reading from standard input. */ output_filename = name; - if( force_ext || extension_index( output_filename ) < 0 ) + if( force_ext || + ( !filenames_given && extension_index( output_filename ) < 0 ) ) output_filename += known_extensions[0].from; } @@ -342,7 +381,7 @@ void set_d_outname( const std::string & name, const int eindex ) } // end namespace int open_instream( const char * const name, struct stat * const in_statsp, - const bool no_ofile, const bool reg_only ) + const bool one_to_one, const bool reg_only ) { int infd = open( name, O_RDONLY | O_BINARY ); if( infd < 0 ) @@ -354,13 +393,12 @@ int open_instream( const char * const name, struct stat * const in_statsp, const bool can_read = ( i == 0 && !reg_only && ( S_ISBLK( mode ) || S_ISCHR( mode ) || S_ISFIFO( mode ) || S_ISSOCK( mode ) ) ); - if( i != 0 || ( !S_ISREG( mode ) && ( !can_read || !no_ofile ) ) ) + if( i != 0 || ( !S_ISREG( mode ) && ( !can_read || one_to_one ) ) ) { if( verbosity >= 0 ) std::fprintf( stderr, "%s: Input file '%s' is not a regular file%s.\n", - program_name, name, - ( can_read && !no_ofile ) ? - ",\n and '--stdout' was not specified" : "" ); + program_name, name, ( can_read && one_to_one ) ? + ",\n and neither '-c' nor '-o' were specified" : "" ); close( infd ); infd = -1; } @@ -372,7 +410,7 @@ namespace { int open_instream2( const char * const name, struct stat * const in_statsp, const Mode program_mode, const int eindex, - const bool recompress, const bool to_stdout ) + const bool one_to_one, const bool recompress ) { if( program_mode == m_compress && !recompress && eindex >= 0 ) { @@ -381,16 +419,15 @@ int open_instream2( const char * const name, struct stat * const in_statsp, program_name, name, known_extensions[eindex].from ); return -1; } - const bool no_ofile = ( to_stdout || program_mode == m_test ); - return open_instream( name, in_statsp, no_ofile, false ); + return open_instream( name, in_statsp, one_to_one, false ); } -bool open_outstream( const bool force, const bool from_stdin ) +bool open_outstream( const bool force, const bool protect ) { const mode_t usr_rw = S_IRUSR | S_IWUSR; const mode_t all_rw = usr_rw | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH; - const mode_t outfd_mode = from_stdin ? all_rw : usr_rw; + const mode_t outfd_mode = protect ? usr_rw : all_rw; int flags = O_CREAT | O_WRONLY | O_BINARY; if( force ) flags |= O_TRUNC; else flags |= O_EXCL; @@ -409,25 +446,6 @@ bool open_outstream( const bool force, const bool from_stdin ) } -bool check_tty( const char * const input_filename, const int infd, - const Mode program_mode ) - { - if( program_mode == m_compress && isatty( outfd ) ) - { - show_error( "I won't write compressed data to a terminal.", 0, true ); - return false; - } - if( ( program_mode == m_decompress || program_mode == m_test ) && - isatty( infd ) ) - { - show_file_error( input_filename, - "I won't read compressed data from a terminal." ); - return false; - } - return true; - } - - void set_signals( void (*action)(int) ) { std::signal( SIGHUP, action ); @@ -437,10 +455,10 @@ void set_signals( void (*action)(int) ) } // end namespace -// This can be called from any thread, main thread or sub-threads alike, -// since they all call common helper functions that call cleanup_and_fail() -// in case of an error. -// +/* This can be called from any thread, main thread or sub-threads alike, + since they all call common helper functions like 'xlock' that call + cleanup_and_fail() in case of an error. +*/ void cleanup_and_fail( const int retval ) { // only one thread can delete and exit @@ -474,7 +492,31 @@ extern "C" void signal_handler( int ) } - // Set permissions, owner and times. +bool check_tty_in( const char * const input_filename, const int infd, + const Mode program_mode, int & retval ) + { + if( ( program_mode == m_decompress || program_mode == m_test ) && + isatty( infd ) ) // for example /dev/tty + { show_file_error( input_filename, + "I won't read compressed data from a terminal." ); + close( infd ); set_retval( retval, 1 ); + if( program_mode != m_test ) cleanup_and_fail( retval ); + return false; } + return true; + } + +bool check_tty_out( const Mode program_mode ) + { + if( program_mode == m_compress && isatty( outfd ) ) + { show_file_error( output_filename.size() ? + output_filename.c_str() : "(stdout)", + "I won't write compressed data to a terminal." ); + return false; } + return true; + } + + +// Set permissions, owner, and times. void close_and_set_permissions( const struct stat * const in_statsp ) { bool warning = false; @@ -622,24 +664,20 @@ int main( const int argc, const char * const argv[] ) bool loose_trailing = false; bool recompress = false; bool to_stdout = false; - invocation_name = argv[0]; - - if( LZ_version()[0] < '1' ) - { show_error( "Bad library version. At least lzlib 1.0 is required." ); - return 1; } + if( argc > 0 ) invocation_name = argv[0]; - enum { opt_dbg = 256, opt_in, opt_lt, opt_out }; + enum { opt_chk = 256, opt_dbg, opt_in, opt_lt, opt_out }; const Arg_parser::Option options[] = { { '0', "fast", Arg_parser::no }, - { '1', 0, Arg_parser::no }, - { '2', 0, Arg_parser::no }, - { '3', 0, Arg_parser::no }, - { '4', 0, Arg_parser::no }, - { '5', 0, Arg_parser::no }, - { '6', 0, Arg_parser::no }, - { '7', 0, Arg_parser::no }, - { '8', 0, Arg_parser::no }, + { '1', 0, Arg_parser::no }, + { '2', 0, Arg_parser::no }, + { '3', 0, Arg_parser::no }, + { '4', 0, Arg_parser::no }, + { '5', 0, Arg_parser::no }, + { '6', 0, Arg_parser::no }, + { '7', 0, Arg_parser::no }, + { '8', 0, Arg_parser::no }, { '9', "best", Arg_parser::no }, { 'a', "trailing-error", Arg_parser::no }, { 'b', "member-size", Arg_parser::yes }, @@ -660,11 +698,12 @@ int main( const int argc, const char * const argv[] ) { 't', "test", Arg_parser::no }, { 'v', "verbose", Arg_parser::no }, { 'V', "version", Arg_parser::no }, + { opt_chk, "check-lib", Arg_parser::no }, { opt_dbg, "debug", Arg_parser::yes }, { opt_in, "in-slots", Arg_parser::yes }, { opt_lt, "loose-trailing", Arg_parser::no }, { opt_out, "out-slots", Arg_parser::yes }, - { 0 , 0, Arg_parser::no } }; + { 0, 0, Arg_parser::no } }; const Arg_parser parser( argc, argv, options ); if( parser.error().size() ) // bad option @@ -702,7 +741,8 @@ int main( const int argc, const char * const argv[] ) getnum( arg, LZ_min_match_len_limit(), LZ_max_match_len_limit() ); break; case 'n': num_workers = getnum( arg, 1, max_workers ); break; - case 'o': default_output_filename = sarg; break; + case 'o': if( sarg == "-" ) to_stdout = true; + else { default_output_filename = sarg; } break; case 'q': verbosity = -1; break; case 's': encoder_options.dictionary_size = get_dict_size( arg ); break; @@ -710,6 +750,7 @@ int main( const int argc, const char * const argv[] ) case 't': set_mode( program_mode, m_test ); break; case 'v': if( verbosity < 4 ) ++verbosity; break; case 'V': show_version(); return 0; + case opt_chk: return check_lib(); case opt_dbg: debug_level = getnum( arg, 0, 3 ); break; case opt_in: in_slots = getnum( arg, 1, 64 ); break; case opt_lt: loose_trailing = true; break; @@ -718,6 +759,10 @@ int main( const int argc, const char * const argv[] ) } } // end process options + if( LZ_version()[0] < '1' ) + { show_error( "Wrong library version. At least lzlib 1.0 is required." ); + return 1; } + #if defined(__MSVCRT__) || defined(__OS2__) setmode( STDIN_FILENO, O_BINARY ); setmode( STDOUT_FILENO, O_BINARY ); @@ -734,9 +779,6 @@ int main( const int argc, const char * const argv[] ) if( program_mode == m_list ) return list_files( filenames, ignore_trailing, loose_trailing ); - if( program_mode == m_test ) - outfd = -1; - const bool fast = encoder_options.dictionary_size == 65535 && encoder_options.match_len_limit == 16; if( data_size <= 0 ) @@ -762,112 +804,99 @@ int main( const int argc, const char * const argv[] ) num_workers = std::min( num_online, max_workers ); } - if( !to_stdout && program_mode != m_test && - ( filenames_given || default_output_filename.size() ) ) + if( program_mode == m_test ) to_stdout = false; // apply overrides + if( program_mode == m_test || to_stdout ) default_output_filename.clear(); + + if( to_stdout && program_mode != m_test ) // check tty only once + { outfd = STDOUT_FILENO; if( !check_tty_out( program_mode ) ) return 1; } + else outfd = -1; + + const bool to_file = !to_stdout && program_mode != m_test && + default_output_filename.size(); + if( !to_stdout && program_mode != m_test && ( filenames_given || to_file ) ) set_signals( signal_handler ); Pretty_print pp( filenames ); int failed_tests = 0; int retval = 0; + const bool one_to_one = !to_stdout && program_mode != m_test && !to_file; bool stdin_used = false; for( unsigned i = 0; i < filenames.size(); ++i ) { std::string input_filename; int infd; struct stat in_stats; - output_filename.clear(); - if( filenames[i].empty() || filenames[i] == "-" ) + pp.set_name( filenames[i] ); + if( filenames[i] == "-" ) { if( stdin_used ) continue; else stdin_used = true; infd = STDIN_FILENO; - if( program_mode != m_test ) - { - if( to_stdout || default_output_filename.empty() ) - outfd = STDOUT_FILENO; - else - { - if( program_mode == m_compress ) - set_c_outname( default_output_filename, false ); - else output_filename = default_output_filename; - if( !open_outstream( force, true ) ) - { - if( retval < 1 ) retval = 1; - close( infd ); - continue; - } - } - } + if( !check_tty_in( pp.name(), infd, program_mode, retval ) ) continue; + if( one_to_one ) { outfd = STDOUT_FILENO; output_filename.clear(); } } else { const int eindex = extension_index( input_filename = filenames[i] ); infd = open_instream2( input_filename.c_str(), &in_stats, program_mode, - eindex, recompress, to_stdout ); - if( infd < 0 ) { if( retval < 1 ) retval = 1; continue; } - if( program_mode != m_test ) + eindex, one_to_one, recompress ); + if( infd < 0 ) { set_retval( retval, 1 ); continue; } + if( !check_tty_in( pp.name(), infd, program_mode, retval ) ) continue; + if( one_to_one ) // open outfd after verifying infd { - if( to_stdout ) outfd = STDOUT_FILENO; - else - { - if( program_mode == m_compress ) - set_c_outname( input_filename, true ); - else set_d_outname( input_filename, eindex ); - if( !open_outstream( force, false ) ) - { - if( retval < 1 ) retval = 1; - close( infd ); - continue; - } - } + if( program_mode == m_compress ) + set_c_outname( input_filename, true, true ); + else set_d_outname( input_filename, eindex ); + if( !open_outstream( force, true ) ) + { close( infd ); set_retval( retval, 1 ); continue; } } } - pp.set_name( input_filename ); - if( !check_tty( pp.name(), infd, program_mode ) ) + if( one_to_one && !check_tty_out( program_mode ) ) + { set_retval( retval, 1 ); return retval; } // don't delete a tty + + if( to_file && outfd < 0 ) // open outfd after verifying infd { - if( retval < 1 ) retval = 1; - if( program_mode == m_test ) { close( infd ); continue; } - cleanup_and_fail( retval ); + if( program_mode == m_compress ) set_c_outname( default_output_filename, + filenames_given, false ); + else output_filename = default_output_filename; + if( !open_outstream( force, false ) || !check_tty_out( program_mode ) ) + return 1; // check tty only once and don't try to delete a tty } - const struct stat * const in_statsp = input_filename.size() ? &in_stats : 0; - const bool infd_isreg = in_statsp && S_ISREG( in_statsp->st_mode ); + const struct stat * const in_statsp = + ( input_filename.size() && one_to_one ) ? &in_stats : 0; + const bool infd_isreg = input_filename.size() && S_ISREG( in_stats.st_mode ); const unsigned long long cfile_size = - infd_isreg ? ( in_statsp->st_size + 99 ) / 100 : 0; + infd_isreg ? ( in_stats.st_size + 99 ) / 100 : 0; int tmp; if( program_mode == m_compress ) tmp = compress( cfile_size, data_size, encoder_options.dictionary_size, - encoder_options.match_len_limit, - num_workers, infd, outfd, pp, debug_level ); + encoder_options.match_len_limit, num_workers, + infd, outfd, pp, debug_level ); else - tmp = decompress( cfile_size, num_workers, infd, outfd, pp, debug_level, - in_slots, out_slots, ignore_trailing, loose_trailing, - infd_isreg ); + tmp = decompress( cfile_size, num_workers, infd, outfd, pp, + debug_level, in_slots, out_slots, ignore_trailing, + loose_trailing, infd_isreg, one_to_one ); if( close( infd ) != 0 ) - { - show_error( input_filename.size() ? "Error closing input file" : - "Error closing stdin", errno ); - if( tmp < 1 ) tmp = 1; - } - if( tmp > retval ) retval = tmp; + { show_file_error( pp.name(), "Error closing input file", errno ); + set_retval( tmp, 1 ); } + set_retval( retval, tmp ); if( tmp ) { if( program_mode != m_test ) cleanup_and_fail( retval ); else ++failed_tests; } - if( delete_output_on_interrupt ) + if( delete_output_on_interrupt && one_to_one ) close_and_set_permissions( in_statsp ); - if( input_filename.size() ) - { - if( !keep_input_files && !to_stdout && program_mode != m_test ) - std::remove( input_filename.c_str() ); - } + if( input_filename.size() && !keep_input_files && one_to_one ) + std::remove( input_filename.c_str() ); } - if( outfd >= 0 && close( outfd ) != 0 ) + if( delete_output_on_interrupt ) close_and_set_permissions( 0 ); // -o + else if( outfd >= 0 && close( outfd ) != 0 ) // -c { show_error( "Error closing stdout", errno ); - if( retval < 1 ) retval = 1; + set_retval( retval, 1 ); } if( failed_tests > 0 && verbosity >= 1 && filenames.size() > 1 ) std::fprintf( stderr, "%s: warning: %d %s failed the test.\n", |