diff options
Diffstat (limited to '')
-rw-r--r-- | main.cc | 980 |
1 files changed, 980 insertions, 0 deletions
@@ -0,0 +1,980 @@ +/* Plzip - Massively parallel implementation of lzip + Copyright (C) 2009 Laszlo Ersek. + Copyright (C) 2009-2022 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ +/* + Exit status: 0 for a normal exit, 1 for environmental problems + (file not found, invalid flags, I/O errors, etc), 2 to indicate a + corrupt or invalid input file, 3 for an internal consistency error + (e.g., bug) which caused plzip to panic. +*/ + +#define _FILE_OFFSET_BITS 64 + +#include <algorithm> +#include <cerrno> +#include <climits> +#include <csignal> +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <string> +#include <vector> +#include <fcntl.h> +#include <stdint.h> +#include <unistd.h> +#include <utime.h> +#include <sys/stat.h> +#include <lzlib.h> +#if defined __MSVCRT__ || defined __OS2__ +#include <io.h> +#if defined __MSVCRT__ +#define fchmod(x,y) 0 +#define fchown(x,y,z) 0 +#define strtoull std::strtoul +#define SIGHUP SIGTERM +#define S_ISSOCK(x) 0 +#ifndef S_IRGRP +#define S_IRGRP 0 +#define S_IWGRP 0 +#define S_IROTH 0 +#define S_IWOTH 0 +#endif +#endif +#endif + +#include "arg_parser.h" +#include "lzip.h" + +#ifndef O_BINARY +#define O_BINARY 0 +#endif + +#if CHAR_BIT != 8 +#error "Environments where CHAR_BIT != 8 are not supported." +#endif + +#if ( defined SIZE_MAX && SIZE_MAX < UINT_MAX ) || \ + ( defined SSIZE_MAX && SSIZE_MAX < INT_MAX ) +#error "Environments where 'size_t' is narrower than 'int' are not supported." +#endif + +int verbosity = 0; + +namespace { + +const char * const program_name = "plzip"; +const char * const program_year = "2022"; +const char * invocation_name = program_name; // default value + +const struct { const char * from; const char * to; } known_extensions[] = { + { ".lz", "" }, + { ".tlz", ".tar" }, + { 0, 0 } }; + +struct Lzma_options + { + int dictionary_size; // 4 KiB .. 512 MiB + int match_len_limit; // 5 .. 273 + }; + +enum Mode { m_compress, m_decompress, m_list, m_test }; + +/* Variables used in signal handler context. + They are not declared volatile because the handler never returns. */ +std::string output_filename; +int outfd = -1; +bool delete_output_on_interrupt = false; + + +void show_help( const long num_online ) + { + std::printf( "Plzip is a massively parallel (multi-threaded) implementation of lzip, fully\n" + "compatible with lzip 1.4 or newer. Plzip uses the compression library lzlib.\n" + "\nLzip is a lossless data compressor with a user interface similar to the one\n" + "of gzip or bzip2. Lzip uses a simplified form of the 'Lempel-Ziv-Markov\n" + "chain-Algorithm' (LZMA) stream format and provides a 3 factor integrity\n" + "checking to maximize interoperability and optimize safety. Lzip can compress\n" + "about as fast as gzip (lzip -0) or compress most files more than bzip2\n" + "(lzip -9). Decompression speed is intermediate between gzip and bzip2.\n" + "Lzip is better than gzip and bzip2 from a data recovery perspective. Lzip\n" + "has been designed, written, and tested with great care to replace gzip and\n" + "bzip2 as the standard general-purpose compressed format for unix-like\n" + "systems.\n" + "\nPlzip can compress/decompress large files on multiprocessor machines much\n" + "faster than lzip, at the cost of a slightly reduced compression ratio (0.4\n" + "to 2 percent larger compressed files). Note that the number of usable\n" + "threads is limited by file size; on files larger than a few GB plzip can use\n" + "hundreds of processors, but on files of only a few MB plzip is no faster\n" + "than lzip.\n" + "\nUsage: %s [options] [files]\n", invocation_name ); + std::printf( "\nOptions:\n" + " -h, --help display this help and exit\n" + " -V, --version output version information and exit\n" + " -a, --trailing-error exit with error status if trailing data\n" + " -B, --data-size=<bytes> set size of input data blocks [2x8=16 MiB]\n" + " -c, --stdout write to standard output, keep input files\n" + " -d, --decompress decompress\n" + " -f, --force overwrite existing output files\n" + " -F, --recompress force re-compression of compressed files\n" + " -k, --keep keep (don't delete) input files\n" + " -l, --list print (un)compressed file sizes\n" + " -m, --match-length=<bytes> set match length limit in bytes [36]\n" + " -n, --threads=<n> set number of (de)compression threads [%ld]\n" + " -o, --output=<file> write to <file>, keep input files\n" + " -q, --quiet suppress all messages\n" + " -s, --dictionary-size=<bytes> set dictionary size limit in bytes [8 MiB]\n" + " -t, --test test compressed file integrity\n" + " -v, --verbose be verbose (a 2nd -v gives more)\n" + " -0 .. -9 set compression level [default 6]\n" + " --fast alias for -0\n" + " --best alias for -9\n" + " --loose-trailing allow trailing data seeming corrupt header\n" + " --in-slots=<n> number of 1 MiB input packets buffered [4]\n" + " --out-slots=<n> number of 1 MiB output packets buffered [64]\n" + " --check-lib compare version of lzlib.h with liblz.{a,so}\n", + num_online ); + if( verbosity >= 1 ) + { + std::printf( " --debug=<level> print mode(2), debug statistics(1) to stderr\n" ); + } + std::printf( "\nIf no file names are given, or if a file is '-', plzip compresses or\n" + "decompresses from standard input to standard output.\n" + "Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n" + "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n" + "Dictionary sizes 12 to 29 are interpreted as powers of two, meaning 2^12\n" + "to 2^29 bytes.\n" + "\nThe bidimensional parameter space of LZMA can't be mapped to a linear\n" + "scale optimal for all files. If your files are large, very repetitive,\n" + "etc, you may need to use the options --dictionary-size and --match-length\n" + "directly to achieve optimal performance.\n" + "\nTo extract all the files from archive 'foo.tar.lz', use the commands\n" + "'tar -xf foo.tar.lz' or 'plzip -cd foo.tar.lz | tar -xf -'.\n" + "\nExit status: 0 for a normal exit, 1 for environmental problems (file\n" + "not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n" + "invalid input file, 3 for an internal consistency error (e.g., bug) which\n" + "caused plzip to panic.\n" + "\nReport bugs to lzip-bug@nongnu.org\n" + "Plzip home page: http://www.nongnu.org/lzip/plzip.html\n" ); + } + + +void show_version() + { + std::printf( "%s %s\n", program_name, PROGVERSION ); + std::printf( "Copyright (C) 2009 Laszlo Ersek.\n" ); + std::printf( "Copyright (C) %s Antonio Diaz Diaz.\n", program_year ); + std::printf( "Using lzlib %s\n", LZ_version() ); + std::printf( "License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>\n" + "This is free software: you are free to change and redistribute it.\n" + "There is NO WARRANTY, to the extent permitted by law.\n" ); + } + + +int check_lzlib_ver() // <major>.<minor> or <major>.<minor>[a-z.-]* + { +#if defined LZ_API_VERSION && LZ_API_VERSION >= 1012 + const unsigned char * p = (unsigned char *)LZ_version_string; + unsigned major = 0, minor = 0; + while( major < 100000 && isdigit( *p ) ) + { major *= 10; major += *p - '0'; ++p; } + if( *p == '.' ) ++p; + else +out: { show_error( "Invalid LZ_version_string in lzlib.h" ); return 2; } + while( minor < 100 && isdigit( *p ) ) + { minor *= 10; minor += *p - '0'; ++p; } + if( *p && *p != '-' && *p != '.' && !std::islower( *p ) ) goto out; + const unsigned version = major * 1000 + minor; + if( LZ_API_VERSION != version ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Version mismatch in lzlib.h: " + "LZ_API_VERSION = %u, should be %u.\n", + program_name, LZ_API_VERSION, version ); + return 2; + } +#endif + return 0; + } + + +int check_lib() + { + int retval = check_lzlib_ver(); + if( std::strcmp( LZ_version_string, LZ_version() ) != 0 ) + { set_retval( retval, 1 ); + if( verbosity >= 0 ) + std::printf( "warning: LZ_version_string != LZ_version() (%s vs %s)\n", + LZ_version_string, LZ_version() ); } +#if defined LZ_API_VERSION && LZ_API_VERSION >= 1012 + if( LZ_API_VERSION != LZ_api_version() ) + { set_retval( retval, 1 ); + if( verbosity >= 0 ) + std::printf( "warning: LZ_API_VERSION != LZ_api_version() (%u vs %u)\n", + LZ_API_VERSION, LZ_api_version() ); } +#endif + if( verbosity >= 1 ) + { + std::printf( "Using lzlib %s\n", LZ_version() ); +#if !defined LZ_API_VERSION + std::fputs( "LZ_API_VERSION is not defined.\n", stdout ); +#elif LZ_API_VERSION >= 1012 + std::printf( "Using LZ_API_VERSION = %u\n", LZ_api_version() ); +#else + std::printf( "Compiled with LZ_API_VERSION = %u. " + "Using an unknown LZ_API_VERSION\n", LZ_API_VERSION ); +#endif + } + return retval; + } + +} // end namespace + +void Pretty_print::operator()( const char * const msg ) const + { + if( verbosity < 0 ) return; + if( first_post ) + { + first_post = false; + std::fputs( padded_name.c_str(), stderr ); + if( !msg ) std::fflush( stderr ); + } + if( msg ) std::fprintf( stderr, "%s\n", msg ); + } + + +const char * bad_version( const unsigned version ) + { + static char buf[80]; + snprintf( buf, sizeof buf, "Version %u member format not supported.", + version ); + return buf; + } + + +const char * format_ds( const unsigned dictionary_size ) + { + enum { bufsize = 16, factor = 1024 }; + static char buf[bufsize]; + const char * const prefix[8] = + { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" }; + const char * p = ""; + const char * np = " "; + unsigned num = dictionary_size; + bool exact = ( num % factor == 0 ); + + for( int i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i ) + { num /= factor; if( num % factor != 0 ) exact = false; + p = prefix[i]; np = ""; } + snprintf( buf, bufsize, "%s%4u %sB", np, num, p ); + return buf; + } + + +void show_header( const unsigned dictionary_size ) + { + std::fprintf( stderr, "dict %s, ", format_ds( dictionary_size ) ); + } + +namespace { + +// separate large numbers >= 100_000 in groups of 3 digits using '_' +const char * format_num3( unsigned long long num ) + { + const char * const si_prefix = "kMGTPEZY"; + const char * const binary_prefix = "KMGTPEZY"; + enum { buffers = 8, bufsize = 4 * sizeof (long long) }; + static char buffer[buffers][bufsize]; // circle of static buffers for printf + static int current = 0; + + char * const buf = buffer[current++]; current %= buffers; + char * p = buf + bufsize - 1; // fill the buffer backwards + *p = 0; // terminator + if( num > 1024 ) + { + char prefix = 0; // try binary first, then si + for( int i = 0; i < 8 && num >= 1024 && num % 1024 == 0; ++i ) + { num /= 1024; prefix = binary_prefix[i]; } + if( prefix ) *(--p) = 'i'; + else + for( int i = 0; i < 8 && num >= 1000 && num % 1000 == 0; ++i ) + { num /= 1000; prefix = si_prefix[i]; } + if( prefix ) *(--p) = prefix; + } + const bool split = num >= 100000; + + for( int i = 0; ; ) + { + *(--p) = num % 10 + '0'; num /= 10; if( num == 0 ) break; + if( split && ++i >= 3 ) { i = 0; *(--p) = '_'; } + } + return p; + } + + +unsigned long long getnum( const char * const arg, + const char * const option_name, + const unsigned long long llimit, + const unsigned long long ulimit ) + { + char * tail; + errno = 0; + unsigned long long result = strtoull( arg, &tail, 0 ); + if( tail == arg ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Bad or missing numerical argument in " + "option '%s'.\n", program_name, option_name ); + std::exit( 1 ); + } + + if( !errno && tail[0] ) + { + const unsigned factor = ( tail[1] == 'i' ) ? 1024 : 1000; + int exponent = 0; // 0 = bad multiplier + switch( tail[0] ) + { + case 'Y': exponent = 8; break; + case 'Z': exponent = 7; break; + case 'E': exponent = 6; break; + case 'P': exponent = 5; break; + case 'T': exponent = 4; break; + case 'G': exponent = 3; break; + case 'M': exponent = 2; break; + case 'K': if( factor == 1024 ) exponent = 1; break; + case 'k': if( factor == 1000 ) exponent = 1; break; + } + if( exponent <= 0 ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Bad multiplier in numerical argument of " + "option '%s'.\n", program_name, option_name ); + std::exit( 1 ); + } + for( int i = 0; i < exponent; ++i ) + { + if( ulimit / factor >= result ) result *= factor; + else { errno = ERANGE; break; } + } + } + if( !errno && ( result < llimit || result > ulimit ) ) errno = ERANGE; + if( errno ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Numerical argument out of limits [%s,%s] " + "in option '%s'.\n", program_name, format_num3( llimit ), + format_num3( ulimit ), option_name ); + std::exit( 1 ); + } + return result; + } + + +int get_dict_size( const char * const arg, const char * const option_name ) + { + char * tail; + const long bits = std::strtol( arg, &tail, 0 ); + if( bits >= LZ_min_dictionary_bits() && + bits <= LZ_max_dictionary_bits() && *tail == 0 ) + return 1 << bits; + int dictionary_size = getnum( arg, option_name, LZ_min_dictionary_size(), + LZ_max_dictionary_size() ); + if( dictionary_size == 65535 ) ++dictionary_size; // no fast encoder + return dictionary_size; + } + + +void set_mode( Mode & program_mode, const Mode new_mode ) + { + if( program_mode != m_compress && program_mode != new_mode ) + { + show_error( "Only one operation can be specified.", 0, true ); + std::exit( 1 ); + } + program_mode = new_mode; + } + + +int extension_index( const std::string & name ) + { + for( int eindex = 0; known_extensions[eindex].from; ++eindex ) + { + const std::string ext( known_extensions[eindex].from ); + if( name.size() > ext.size() && + name.compare( name.size() - ext.size(), ext.size(), ext ) == 0 ) + return eindex; + } + return -1; + } + + +void set_c_outname( const std::string & name, const bool filenames_given, + const bool force_ext ) + { + /* zupdate < 1.9 depends on lzip adding the extension '.lz' to name when + reading from standard input. */ + output_filename = name; + if( force_ext || + ( !filenames_given && extension_index( output_filename ) < 0 ) ) + output_filename += known_extensions[0].from; + } + + +void set_d_outname( const std::string & name, const int eindex ) + { + if( eindex >= 0 ) + { + const std::string from( known_extensions[eindex].from ); + if( name.size() > from.size() ) + { + output_filename.assign( name, 0, name.size() - from.size() ); + output_filename += known_extensions[eindex].to; + return; + } + } + output_filename = name; output_filename += ".out"; + if( verbosity >= 1 ) + std::fprintf( stderr, "%s: Can't guess original name for '%s' -- using '%s'\n", + program_name, name.c_str(), output_filename.c_str() ); + } + +} // end namespace + +int open_instream( const char * const name, struct stat * const in_statsp, + const bool one_to_one, const bool reg_only ) + { + int infd = open( name, O_RDONLY | O_BINARY ); + if( infd < 0 ) + show_file_error( name, "Can't open input file", errno ); + else + { + const int i = fstat( infd, in_statsp ); + const mode_t mode = in_statsp->st_mode; + const bool can_read = ( i == 0 && !reg_only && + ( S_ISBLK( mode ) || S_ISCHR( mode ) || + S_ISFIFO( mode ) || S_ISSOCK( mode ) ) ); + if( i != 0 || ( !S_ISREG( mode ) && ( !can_read || one_to_one ) ) ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Input file '%s' is not a regular file%s.\n", + program_name, name, ( can_read && one_to_one ) ? + ",\n and neither '-c' nor '-o' were specified" : "" ); + close( infd ); + infd = -1; + } + } + return infd; + } + +namespace { + +int open_instream2( const char * const name, struct stat * const in_statsp, + const Mode program_mode, const int eindex, + const bool one_to_one, const bool recompress ) + { + if( program_mode == m_compress && !recompress && eindex >= 0 ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Input file '%s' already has '%s' suffix.\n", + program_name, name, known_extensions[eindex].from ); + return -1; + } + return open_instream( name, in_statsp, one_to_one, false ); + } + + +bool open_outstream( const bool force, const bool protect ) + { + const mode_t usr_rw = S_IRUSR | S_IWUSR; + const mode_t all_rw = usr_rw | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH; + const mode_t outfd_mode = protect ? usr_rw : all_rw; + int flags = O_CREAT | O_WRONLY | O_BINARY; + if( force ) flags |= O_TRUNC; else flags |= O_EXCL; + + outfd = open( output_filename.c_str(), flags, outfd_mode ); + if( outfd >= 0 ) delete_output_on_interrupt = true; + else if( verbosity >= 0 ) + { + if( errno == EEXIST ) + std::fprintf( stderr, "%s: Output file '%s' already exists, skipping.\n", + program_name, output_filename.c_str() ); + else + std::fprintf( stderr, "%s: Can't create output file '%s': %s\n", + program_name, output_filename.c_str(), std::strerror( errno ) ); + } + return ( outfd >= 0 ); + } + + +void set_signals( void (*action)(int) ) + { + std::signal( SIGHUP, action ); + std::signal( SIGINT, action ); + std::signal( SIGTERM, action ); + } + +} // end namespace + +/* This can be called from any thread, main thread or sub-threads alike, + since they all call common helper functions like 'xlock' that call + cleanup_and_fail() in case of an error. +*/ +void cleanup_and_fail( const int retval ) + { + // only one thread can delete and exit + static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; + + set_signals( SIG_IGN ); // ignore signals + pthread_mutex_lock( &mutex ); // ignore errors to avoid loop + const int saved_verbosity = verbosity; + verbosity = -1; // suppress messages from other threads + if( delete_output_on_interrupt ) + { + delete_output_on_interrupt = false; + if( saved_verbosity >= 0 ) + std::fprintf( stderr, "%s: Deleting output file '%s', if it exists.\n", + program_name, output_filename.c_str() ); + if( outfd >= 0 ) { close( outfd ); outfd = -1; } + if( std::remove( output_filename.c_str() ) != 0 && errno != ENOENT && + saved_verbosity >= 0 ) + std::fprintf( stderr, "%s: WARNING: deletion of output file " + "(apparently) failed.\n", program_name ); + } + std::exit( retval ); + } + +namespace { + +extern "C" void signal_handler( int ) + { + show_error( "Control-C or similar caught, quitting." ); + cleanup_and_fail( 1 ); + } + + +bool check_tty_in( const char * const input_filename, const int infd, + const Mode program_mode, int & retval ) + { + if( ( program_mode == m_decompress || program_mode == m_test ) && + isatty( infd ) ) // for example /dev/tty + { show_file_error( input_filename, + "I won't read compressed data from a terminal." ); + close( infd ); set_retval( retval, 2 ); + if( program_mode != m_test ) cleanup_and_fail( retval ); + return false; } + return true; + } + +bool check_tty_out( const Mode program_mode ) + { + if( program_mode == m_compress && isatty( outfd ) ) + { show_file_error( output_filename.size() ? + output_filename.c_str() : "(stdout)", + "I won't write compressed data to a terminal." ); + return false; } + return true; + } + + +// Set permissions, owner, and times. +void close_and_set_permissions( const struct stat * const in_statsp ) + { + bool warning = false; + if( in_statsp ) + { + const mode_t mode = in_statsp->st_mode; + // fchown will in many cases return with EPERM, which can be safely ignored. + if( fchown( outfd, in_statsp->st_uid, in_statsp->st_gid ) == 0 ) + { if( fchmod( outfd, mode ) != 0 ) warning = true; } + else + if( errno != EPERM || + fchmod( outfd, mode & ~( S_ISUID | S_ISGID | S_ISVTX ) ) != 0 ) + warning = true; + } + if( close( outfd ) != 0 ) + { + show_error( "Error closing output file", errno ); + cleanup_and_fail( 1 ); + } + outfd = -1; + delete_output_on_interrupt = false; + if( in_statsp ) + { + struct utimbuf t; + t.actime = in_statsp->st_atime; + t.modtime = in_statsp->st_mtime; + if( utime( output_filename.c_str(), &t ) != 0 ) warning = true; + } + if( warning && verbosity >= 1 ) + show_error( "Can't change output file attributes." ); + } + +} // end namespace + + +void show_error( const char * const msg, const int errcode, const bool help ) + { + if( verbosity < 0 ) return; + if( msg && msg[0] ) + std::fprintf( stderr, "%s: %s%s%s\n", program_name, msg, + ( errcode > 0 ) ? ": " : "", + ( errcode > 0 ) ? std::strerror( errcode ) : "" ); + if( help ) + std::fprintf( stderr, "Try '%s --help' for more information.\n", + invocation_name ); + } + + +void show_file_error( const char * const filename, const char * const msg, + const int errcode ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: %s: %s%s%s\n", program_name, filename, msg, + ( errcode > 0 ) ? ": " : "", + ( errcode > 0 ) ? std::strerror( errcode ) : "" ); + } + + +void internal_error( const char * const msg ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: internal error: %s\n", program_name, msg ); + std::exit( 3 ); + } + + +void show_progress( const unsigned long long packet_size, + const unsigned long long cfile_size, + const Pretty_print * const p ) + { + static unsigned long long csize = 0; // file_size / 100 + static unsigned long long pos = 0; + static const Pretty_print * pp = 0; + static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; + static bool enabled = true; + + if( !enabled ) return; + if( p ) // initialize static vars + { + if( verbosity < 2 || !isatty( STDERR_FILENO ) ) { enabled = false; return; } + csize = cfile_size; pos = 0; pp = p; + } + if( pp ) + { + xlock( &mutex ); + pos += packet_size; + if( csize > 0 ) + std::fprintf( stderr, "%4llu%% %.1f MB\r", pos / csize, pos / 1000000.0 ); + else + std::fprintf( stderr, " %.1f MB\r", pos / 1000000.0 ); + pp->reset(); (*pp)(); // restore cursor position + xunlock( &mutex ); + } + } + + +#if defined __MSVCRT__ +#include <windows.h> +#define _SC_NPROCESSORS_ONLN 1 +#define _SC_THREAD_THREADS_MAX 2 + +long sysconf( int flag ) + { + if( flag == _SC_NPROCESSORS_ONLN ) + { + SYSTEM_INFO si; + GetSystemInfo( &si ); + return si.dwNumberOfProcessors; + } + if( flag != _SC_THREAD_THREADS_MAX ) errno = EINVAL; + return -1; // unlimited threads or error + } + +#endif // __MSVCRT__ + + +int main( const int argc, const char * const argv[] ) + { + /* Mapping from gzip/bzip2 style 1..9 compression modes + to the corresponding LZMA compression modes. */ + const Lzma_options option_mapping[] = + { + { 65535, 16 }, // -0 (65535,16 chooses fast encoder) + { 1 << 20, 5 }, // -1 + { 3 << 19, 6 }, // -2 + { 1 << 21, 8 }, // -3 + { 3 << 20, 12 }, // -4 + { 1 << 22, 20 }, // -5 + { 1 << 23, 36 }, // -6 + { 1 << 24, 68 }, // -7 + { 3 << 23, 132 }, // -8 + { 1 << 25, 273 } }; // -9 + Lzma_options encoder_options = option_mapping[6]; // default = "-6" + std::string default_output_filename; + int data_size = 0; + int debug_level = 0; + int num_workers = 0; // start this many worker threads + int in_slots = 4; + int out_slots = 64; + Mode program_mode = m_compress; + bool force = false; + bool ignore_trailing = true; + bool keep_input_files = false; + bool loose_trailing = false; + bool recompress = false; + bool to_stdout = false; + if( argc > 0 ) invocation_name = argv[0]; + + enum { opt_chk = 256, opt_dbg, opt_in, opt_lt, opt_out }; + const Arg_parser::Option options[] = + { + { '0', "fast", Arg_parser::no }, + { '1', 0, Arg_parser::no }, + { '2', 0, Arg_parser::no }, + { '3', 0, Arg_parser::no }, + { '4', 0, Arg_parser::no }, + { '5', 0, Arg_parser::no }, + { '6', 0, Arg_parser::no }, + { '7', 0, Arg_parser::no }, + { '8', 0, Arg_parser::no }, + { '9', "best", Arg_parser::no }, + { 'a', "trailing-error", Arg_parser::no }, + { 'b', "member-size", Arg_parser::yes }, + { 'B', "data-size", Arg_parser::yes }, + { 'c', "stdout", Arg_parser::no }, + { 'd', "decompress", Arg_parser::no }, + { 'f', "force", Arg_parser::no }, + { 'F', "recompress", Arg_parser::no }, + { 'h', "help", Arg_parser::no }, + { 'k', "keep", Arg_parser::no }, + { 'l', "list", Arg_parser::no }, + { 'm', "match-length", Arg_parser::yes }, + { 'n', "threads", Arg_parser::yes }, + { 'o', "output", Arg_parser::yes }, + { 'q', "quiet", Arg_parser::no }, + { 's', "dictionary-size", Arg_parser::yes }, + { 'S', "volume-size", Arg_parser::yes }, + { 't', "test", Arg_parser::no }, + { 'v', "verbose", Arg_parser::no }, + { 'V', "version", Arg_parser::no }, + { opt_chk, "check-lib", Arg_parser::no }, + { opt_dbg, "debug", Arg_parser::yes }, + { opt_in, "in-slots", Arg_parser::yes }, + { opt_lt, "loose-trailing", Arg_parser::no }, + { opt_out, "out-slots", Arg_parser::yes }, + { 0, 0, Arg_parser::no } }; + + const Arg_parser parser( argc, argv, options ); + if( parser.error().size() ) // bad option + { show_error( parser.error().c_str(), 0, true ); return 1; } + + const long num_online = std::max( 1L, sysconf( _SC_NPROCESSORS_ONLN ) ); + long max_workers = sysconf( _SC_THREAD_THREADS_MAX ); + if( max_workers < 1 || max_workers > INT_MAX / (int)sizeof (pthread_t) ) + max_workers = INT_MAX / sizeof (pthread_t); + + int argind = 0; + for( ; argind < parser.arguments(); ++argind ) + { + const int code = parser.code( argind ); + if( !code ) break; // no more options + const char * const pn = parser.parsed_name( argind ).c_str(); + const std::string & sarg = parser.argument( argind ); + const char * const arg = sarg.c_str(); + switch( code ) + { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + encoder_options = option_mapping[code-'0']; break; + case 'a': ignore_trailing = false; break; + case 'b': break; + case 'B': data_size = getnum( arg, pn, 2 * LZ_min_dictionary_size(), + 2 * LZ_max_dictionary_size() ); break; + case 'c': to_stdout = true; break; + case 'd': set_mode( program_mode, m_decompress ); break; + case 'f': force = true; break; + case 'F': recompress = true; break; + case 'h': show_help( num_online ); return 0; + case 'k': keep_input_files = true; break; + case 'l': set_mode( program_mode, m_list ); break; + case 'm': encoder_options.match_len_limit = + getnum( arg, pn, LZ_min_match_len_limit(), + LZ_max_match_len_limit() ); break; + case 'n': num_workers = getnum( arg, pn, 1, max_workers ); break; + case 'o': if( sarg == "-" ) to_stdout = true; + else { default_output_filename = sarg; } break; + case 'q': verbosity = -1; break; + case 's': encoder_options.dictionary_size = get_dict_size( arg, pn ); + break; + case 'S': break; + case 't': set_mode( program_mode, m_test ); break; + case 'v': if( verbosity < 4 ) ++verbosity; break; + case 'V': show_version(); return 0; + case opt_chk: return check_lib(); + case opt_dbg: debug_level = getnum( arg, pn, 0, 3 ); break; + case opt_in: in_slots = getnum( arg, pn, 1, 64 ); break; + case opt_lt: loose_trailing = true; break; + case opt_out: out_slots = getnum( arg, pn, 1, 1024 ); break; + default : internal_error( "uncaught option." ); + } + } // end process options + + if( LZ_version()[0] < '1' ) + { show_error( "Wrong library version. At least lzlib 1.0 is required." ); + return 1; } + +#if defined __MSVCRT__ || defined __OS2__ + setmode( STDIN_FILENO, O_BINARY ); + setmode( STDOUT_FILENO, O_BINARY ); +#endif + + std::vector< std::string > filenames; + bool filenames_given = false; + for( ; argind < parser.arguments(); ++argind ) + { + filenames.push_back( parser.argument( argind ) ); + if( filenames.back() != "-" ) filenames_given = true; + } + if( filenames.empty() ) filenames.push_back("-"); + + if( program_mode == m_list ) + return list_files( filenames, ignore_trailing, loose_trailing ); + + const bool fast = encoder_options.dictionary_size == 65535 && + encoder_options.match_len_limit == 16; + if( data_size <= 0 ) + { + if( fast ) data_size = 1 << 20; + else data_size = 2 * std::max( 65536, encoder_options.dictionary_size ); + } + else if( !fast && data_size < encoder_options.dictionary_size ) + encoder_options.dictionary_size = + std::max( data_size, LZ_min_dictionary_size() ); + + if( num_workers <= 0 ) + { + if( program_mode == m_compress && sizeof (void *) <= 4 ) + { + // use less than 2.22 GiB on 32 bit systems + const long long limit = ( 27LL << 25 ) + ( 11LL << 27 ); // 4 * 568 MiB + const long long mem = ( 27LL * data_size ) / 8 + + ( fast ? 3LL << 19 : 11LL * encoder_options.dictionary_size ); + const int nmax32 = std::max( limit / mem, 1LL ); + if( max_workers > nmax32 ) max_workers = nmax32; + } + num_workers = std::min( num_online, max_workers ); + } + + if( program_mode == m_test ) to_stdout = false; // apply overrides + if( program_mode == m_test || to_stdout ) default_output_filename.clear(); + + if( to_stdout && program_mode != m_test ) // check tty only once + { outfd = STDOUT_FILENO; if( !check_tty_out( program_mode ) ) return 1; } + else outfd = -1; + + const bool to_file = !to_stdout && program_mode != m_test && + default_output_filename.size(); + if( !to_stdout && program_mode != m_test && ( filenames_given || to_file ) ) + set_signals( signal_handler ); + + Pretty_print pp( filenames ); + + int failed_tests = 0; + int retval = 0; + const bool one_to_one = !to_stdout && program_mode != m_test && !to_file; + bool stdin_used = false; + for( unsigned i = 0; i < filenames.size(); ++i ) + { + std::string input_filename; + int infd; + struct stat in_stats; + + pp.set_name( filenames[i] ); + if( filenames[i] == "-" ) + { + if( stdin_used ) continue; else stdin_used = true; + infd = STDIN_FILENO; + if( !check_tty_in( pp.name(), infd, program_mode, retval ) ) continue; + if( one_to_one ) { outfd = STDOUT_FILENO; output_filename.clear(); } + } + else + { + const int eindex = extension_index( input_filename = filenames[i] ); + infd = open_instream2( input_filename.c_str(), &in_stats, program_mode, + eindex, one_to_one, recompress ); + if( infd < 0 ) { set_retval( retval, 1 ); continue; } + if( !check_tty_in( pp.name(), infd, program_mode, retval ) ) continue; + if( one_to_one ) // open outfd after verifying infd + { + if( program_mode == m_compress ) + set_c_outname( input_filename, true, true ); + else set_d_outname( input_filename, eindex ); + if( !open_outstream( force, true ) ) + { close( infd ); set_retval( retval, 1 ); continue; } + } + } + + if( one_to_one && !check_tty_out( program_mode ) ) + { set_retval( retval, 1 ); return retval; } // don't delete a tty + + if( to_file && outfd < 0 ) // open outfd after verifying infd + { + if( program_mode == m_compress ) set_c_outname( default_output_filename, + filenames_given, false ); + else output_filename = default_output_filename; + if( !open_outstream( force, false ) || !check_tty_out( program_mode ) ) + return 1; // check tty only once and don't try to delete a tty + } + + const struct stat * const in_statsp = + ( input_filename.size() && one_to_one ) ? &in_stats : 0; + const bool infd_isreg = input_filename.size() && S_ISREG( in_stats.st_mode ); + const unsigned long long cfile_size = + infd_isreg ? ( in_stats.st_size + 99 ) / 100 : 0; + int tmp; + if( program_mode == m_compress ) + tmp = compress( cfile_size, data_size, encoder_options.dictionary_size, + encoder_options.match_len_limit, num_workers, + infd, outfd, pp, debug_level ); + else + tmp = decompress( cfile_size, num_workers, infd, outfd, pp, + debug_level, in_slots, out_slots, ignore_trailing, + loose_trailing, infd_isreg, one_to_one ); + if( close( infd ) != 0 ) + { show_file_error( pp.name(), "Error closing input file", errno ); + set_retval( tmp, 1 ); } + set_retval( retval, tmp ); + if( tmp ) + { if( program_mode != m_test ) cleanup_and_fail( retval ); + else ++failed_tests; } + + if( delete_output_on_interrupt && one_to_one ) + close_and_set_permissions( in_statsp ); + if( input_filename.size() && !keep_input_files && one_to_one ) + std::remove( input_filename.c_str() ); + } + if( delete_output_on_interrupt ) close_and_set_permissions( 0 ); // -o + else if( outfd >= 0 && close( outfd ) != 0 ) // -c + { + show_error( "Error closing stdout", errno ); + set_retval( retval, 1 ); + } + if( failed_tests > 0 && verbosity >= 1 && filenames.size() > 1 ) + std::fprintf( stderr, "%s: warning: %d %s failed the test.\n", + program_name, failed_tests, + ( failed_tests == 1 ) ? "file" : "files" ); + return retval; + } |