From ac32e8eabf1b97208c4ccdfe908aea863d09d1f3 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Tue, 13 Feb 2018 08:06:07 +0100 Subject: Adding upstream version 1.7. Signed-off-by: Daniel Baumann --- ChangeLog | 33 +++++-- INSTALL | 6 +- NEWS | 37 ++++++-- README | 6 +- arg_parser.cc | 2 +- arg_parser.h | 2 +- compress.cc | 103 ++++++++++---------- configure | 8 +- dec_stdout.cc | 30 +++--- dec_stream.cc | 65 +++++++++---- decompress.cc | 68 +++++++------- doc/plzip.1 | 9 +- doc/plzip.info | 268 ++++++++++++++++++++++++++++++++++++----------------- doc/plzip.texi | 235 +++++++++++++++++++++++++++++++++------------- file_index.cc | 37 ++++---- file_index.h | 8 +- list.cc | 11 ++- lzip.h | 61 ++++++------ main.cc | 231 ++++++++++++++++++++++++++------------------- testsuite/check.sh | 65 ++++++++++++- 20 files changed, 841 insertions(+), 444 deletions(-) diff --git a/ChangeLog b/ChangeLog index adc942b..20d8605 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,24 @@ +2018-02-07 Antonio Diaz Diaz + + * Version 1.7 released. + * compress.cc: Use 'LZ_compress_restart_member' and replace input + packet queue by a circular buffer to reduce memory fragmentation. + * compress.cc: Return one empty packet at a time to reduce mem use. + * main.cc: Reduce threads on 32 bit systems to use under 2.22 GiB. + * main.cc: Added new option '--loose-trailing'. + * Improved corrupt header detection to HD=3 on seekable files. + (On all files with lzlib 1.10 or newer). + * Replaced 'bits/byte' with inverse compression ratio in output. + * Show progress of decompression at verbosity level 2 (-vv). + * Show progress of (de)compression only if stderr is a terminal. + * main.cc: Do not add a second .lz extension to the arg of -o. + * Show dictionary size at verbosity level 4 (-vvvv). + * main.cc (cleanup_and_fail): Suppress messages from other threads. + * list.cc: Added missing '#include '. + * plzip.texi: Added chapter 'Output'. + * plzip.texi (Memory requirements): Added table. + * plzip.texi (Program design): Added a block diagram. + 2017-04-12 Antonio Diaz Diaz * Version 1.6 released. @@ -13,11 +34,11 @@ * main.cc: Added new option '-a, --trailing-error'. * main.cc (main): Delete '--output' file if infd is a terminal. * main.cc (main): Don't use stdin more than once. - * lzip.texi: Added chapters 'Trailing data' and 'Examples'. + * plzip.texi: Added chapters 'Trailing data' and 'Examples'. * configure: Avoid warning on some shells when testing for g++. * Makefile.in: Detect the existence of install-info. - * testsuite/check.sh: A POSIX shell is required to run the tests. - * testsuite/check.sh: Don't check error messages. + * check.sh: A POSIX shell is required to run the tests. + * check.sh: Don't check error messages. 2015-07-09 Antonio Diaz Diaz @@ -30,7 +51,7 @@ * dec_stream.cc: Don't use output packets or muxer when testing. * Make '-dvvv' and '-tvvv' show dictionary size like lzip. * lzip.h: Added missing 'const' to the declaration of 'compress'. - * lzip.texi: Added chapters 'Memory requirements' and + * plzip.texi: Added chapters 'Memory requirements' and 'Minimum file sizes'. * Makefile.in: Added new targets 'install*-compress'. @@ -99,7 +120,7 @@ * Version 0.6 released. * Small portability fixes. - * lzip.texinfo: Added chapter 'Program Design' and description + * plzip.texinfo: Added chapter 'Program Design' and description of option '--threads'. * Debug stats have been fixed. @@ -148,7 +169,7 @@ until something better appears on the net. -Copyright (C) 2009-2017 Antonio Diaz Diaz. +Copyright (C) 2009-2018 Antonio Diaz Diaz. This file is a collection of facts, and thus it is not copyrightable, but just in case, you have unlimited permission to copy, distribute and diff --git a/INSTALL b/INSTALL index b0cc6ea..6f359ca 100644 --- a/INSTALL +++ b/INSTALL @@ -4,7 +4,9 @@ You will need a C++ compiler and the lzlib compression library installed. I use gcc 5.3.0 and 4.1.2, but the code should compile with any standards compliant compiler. Lzlib must be version 1.0 or newer, but the fast encoder is only -available in lzlib 1.7 or newer. +available in lzlib 1.7 or newer, and the HD = 3 detection of corrupt +headers on non-seekable multimember files is only available in lzlib +1.10 or newer. Gcc is available at http://gcc.gnu.org. Lzlib is available at http://www.nongnu.org/lzip/lzlib.html. @@ -65,7 +67,7 @@ After running 'configure', you can run 'make' and 'make install' as explained above. -Copyright (C) 2009-2017 Antonio Diaz Diaz. +Copyright (C) 2009-2018 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute and modify it. diff --git a/NEWS b/NEWS index b7f1326..1916e07 100644 --- a/NEWS +++ b/NEWS @@ -1,9 +1,34 @@ -Changes in version 1.6: +Changes in version 1.7: -The option '-l, --list' has been ported from lziprecover. +When compressing on a 32 bit system, plzip now tries to limit the memory +use to under 2.22 GiB (4 worker threads at level -9) by reducing the +number of threads below the system's default. -It is now an error to specify two or more different operations in the -command line (--decompress, --list or --test). +The option '--loose-trailing', has been added. -In test mode, plzip now continues checking the rest of the files if any -input file is a terminal. +The test used by plzip to discriminate trailing data from a corrupt +header in multimember regular (seekable) files has been improved to a +Hamming distance (HD) of 3, and the 3 bit flips must happen in different +magic bytes for the test to fail. As a consequence some kinds of files +no longer can be appended to a lzip file as trailing data unless the +'--loose-trailing' option is used when decompressing. +Lzlib 1.10 or newer is required for this test to work on non-seekable +files. +Lziprecover can be used to remove conflicting trailing data from a file. + +The 'bits/byte' ratio has been replaced with the inverse compression +ratio in the output. + +The progress of decompression is now shown at verbosity level 2 (-vv) or +higher. + +Progress of (de)compression is only shown if stderr is a terminal. + +A second '.lz' extension is no longer added to the argument of '-o' if +it already ends in '.lz' or '.tlz'. + +The dictionary size is now shown at verbosity level 4 (-vvvv) when +decompressing or testing. + +The new chapter "Meaning of plzip's output", and a block diagram of +plzip have been added to the manual. diff --git a/README b/README index 5385ec9..d6f4d4a 100644 --- a/README +++ b/README @@ -17,7 +17,7 @@ creating a multimember compressed file. When decompressing, plzip decompresses as many members simultaneously as worker threads are chosen. Files that were compressed with lzip will not -be decompressed faster than using lzip (unless the "-b" option was used) +be decompressed faster than using lzip (unless the '-b' option was used) because lzip usually produces single-member files, which can't be decompressed in parallel. @@ -84,11 +84,11 @@ incomprehensible and therefore pointless. Plzip will correctly decompress a file which is the concatenation of two or more compressed files. The result is the concatenation of the -corresponding uncompressed files. Integrity testing of concatenated +corresponding decompressed files. Integrity testing of concatenated compressed files is also supported. -Copyright (C) 2009-2017 Antonio Diaz Diaz. +Copyright (C) 2009-2018 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute and modify it. diff --git a/arg_parser.cc b/arg_parser.cc index cc7d1e2..008ebc8 100644 --- a/arg_parser.cc +++ b/arg_parser.cc @@ -1,5 +1,5 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C++ version) - Copyright (C) 2006-2017 Antonio Diaz Diaz. + Copyright (C) 2006-2018 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided diff --git a/arg_parser.h b/arg_parser.h index 95b0320..f015881 100644 --- a/arg_parser.h +++ b/arg_parser.h @@ -1,5 +1,5 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C++ version) - Copyright (C) 2006-2017 Antonio Diaz Diaz. + Copyright (C) 2006-2018 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided diff --git a/compress.cc b/compress.cc index 5bcd999..beae59e 100644 --- a/compress.cc +++ b/compress.cc @@ -1,6 +1,6 @@ /* Plzip - Parallel compressor compatible with lzip Copyright (C) 2009 Laszlo Ersek. - Copyright (C) 2009-2017 Antonio Diaz Diaz. + Copyright (C) 2009-2018 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -159,8 +159,9 @@ struct Packet // data block with a serial number uint8_t * data; int size; // number of bytes in data (if any) unsigned id; // serial number assigned as received - Packet( uint8_t * const d, const int s, const unsigned i ) - : data( d ), size( s ), id( i ) {} + Packet() : data( 0 ), size( 0 ), id( 0 ) {} + void init( uint8_t * const d, const int s, const unsigned i ) + { data = d; size = s; id = i; } }; @@ -173,10 +174,11 @@ public: unsigned owait_counter; private: unsigned receive_id; // id assigned to next packet received + unsigned distrib_id; // id of next packet to be distributed unsigned deliver_id; // id of next packet to be delivered Slot_tally slot_tally; // limits the number of input packets - std::queue< Packet * > packet_queue; - std::vector< const Packet * > circular_buffer; + std::vector< Packet > circular_ibuffer; + std::vector< const Packet * > circular_obuffer; int num_working; // number of workers still running const int num_slots; // max packets in circulation pthread_mutex_t imutex; @@ -192,8 +194,9 @@ public: Packet_courier( const int workers, const int slots ) : icheck_counter( 0 ), iwait_counter( 0 ), ocheck_counter( 0 ), owait_counter( 0 ), - receive_id( 0 ), deliver_id( 0 ), - slot_tally( slots ), circular_buffer( slots, (Packet *) 0 ), + receive_id( 0 ), distrib_id( 0 ), deliver_id( 0 ), + slot_tally( slots ), circular_ibuffer( slots ), + circular_obuffer( slots, (Packet *) 0 ), num_working( workers ), num_slots( slots ), eof( false ) { xinit_mutex( &imutex ); xinit_cond( &iav_or_eof ); @@ -206,13 +209,13 @@ public: xdestroy_cond( &iav_or_eof ); xdestroy_mutex( &imutex ); } - // make a packet with data received from splitter + // fill a packet with data received from splitter void receive_packet( uint8_t * const data, const int size ) { - Packet * const ipacket = new Packet( data, size, receive_id++ ); slot_tally.get_slot(); // wait for a free slot xlock( &imutex ); - packet_queue.push( ipacket ); + circular_ibuffer[receive_id % num_slots].init( data, size, receive_id ); + ++receive_id; xsignal( &iav_or_eof ); xunlock( &imutex ); } @@ -223,18 +226,15 @@ public: Packet * ipacket = 0; xlock( &imutex ); ++icheck_counter; - while( packet_queue.empty() && !eof ) + while( receive_id == distrib_id && !eof ) // no packets to distribute { ++iwait_counter; xwait( &iav_or_eof, &imutex ); } - if( !packet_queue.empty() ) - { - ipacket = packet_queue.front(); - packet_queue.pop(); - } + if( receive_id != distrib_id ) + { ipacket = &circular_ibuffer[distrib_id % num_slots]; ++distrib_id; } xunlock( &imutex ); - if( !ipacket ) + if( !ipacket ) // EOF { // notify muxer when last worker exits xlock( &omutex ); @@ -250,10 +250,10 @@ public: const int i = opacket->id % num_slots; xlock( &omutex ); // id collision shouldn't happen - if( circular_buffer[i] != 0 ) + if( circular_obuffer[i] != 0 ) internal_error( "id collision in collect_packet." ); // merge packet into circular buffer - circular_buffer[i] = opacket; + circular_obuffer[i] = opacket; if( opacket->id == deliver_id ) xsignal( &oav_or_exit ); xunlock( &omutex ); } @@ -264,7 +264,7 @@ public: xlock( &omutex ); ++ocheck_counter; int i = deliver_id % num_slots; - while( circular_buffer[i] == 0 && num_working > 0 ) + while( circular_obuffer[i] == 0 && num_working > 0 ) { ++owait_counter; xwait( &oav_or_exit, &omutex ); @@ -272,18 +272,19 @@ public: packet_vector.clear(); while( true ) { - const Packet * const opacket = circular_buffer[i]; + const Packet * const opacket = circular_obuffer[i]; if( !opacket ) break; packet_vector.push_back( opacket ); - circular_buffer[i] = 0; + circular_obuffer[i] = 0; ++deliver_id; i = deliver_id % num_slots; } xunlock( &omutex ); - if( packet_vector.size() ) // return slots to the tally - slot_tally.leave_slots( packet_vector.size() ); } + void return_empty_packet() // return a slot to the tally + { slot_tally.leave_slot(); } + void finish() // splitter has no more packets to send { xlock( &imutex ); @@ -294,10 +295,10 @@ public: bool finished() // all packets delivered to muxer { - if( !slot_tally.all_free() || !eof || !packet_queue.empty() || + if( !slot_tally.all_free() || !eof || receive_id != distrib_id || num_working != 0 ) return false; for( int i = 0; i < num_slots; ++i ) - if( circular_buffer[i] != 0 ) return false; + if( circular_obuffer[i] != 0 ) return false; return true; } }; @@ -369,26 +370,32 @@ extern "C" void * cworker( void * arg ) const int dictionary_size = tmp.dictionary_size; const int match_len_limit = tmp.match_len_limit; const int offset = tmp.offset; + LZ_Encoder * encoder = 0; while( true ) { Packet * const packet = courier.distribute_packet(); if( !packet ) break; // no more packets to process - const bool fast = dictionary_size == 65535 && match_len_limit == 16; - const int dict_size = fast ? dictionary_size : - std::max( std::min( dictionary_size, packet->size ), - LZ_min_dictionary_size() ); - LZ_Encoder * const encoder = - LZ_compress_open( dict_size, match_len_limit, LLONG_MAX ); - if( !encoder || LZ_compress_errno( encoder ) != LZ_ok ) + if( !encoder ) { - if( !encoder || LZ_compress_errno( encoder ) == LZ_mem_error ) - pp( mem_msg ); - else - internal_error( "invalid argument to encoder." ); - cleanup_and_fail(); + const bool fast = dictionary_size == 65535 && match_len_limit == 16; + const int dict_size = fast ? dictionary_size : + std::max( std::min( dictionary_size, packet->size ), + LZ_min_dictionary_size() ); + encoder = LZ_compress_open( dict_size, match_len_limit, LLONG_MAX ); + if( !encoder || LZ_compress_errno( encoder ) != LZ_ok ) + { + if( !encoder || LZ_compress_errno( encoder ) == LZ_mem_error ) + pp( mem_msg ); + else + internal_error( "invalid argument to encoder." ); + cleanup_and_fail(); + } } + else + if( LZ_compress_restart_member( encoder, LLONG_MAX ) < 0 ) + { pp( "LZ_compress_restart_member failed." ); cleanup_and_fail(); } int written = 0; int new_pos = 0; @@ -422,13 +429,12 @@ extern "C" void * cworker( void * arg ) if( LZ_compress_finished( encoder ) == 1 ) break; } - if( LZ_compress_close( encoder ) < 0 ) - { pp( "LZ_compress_close failed." ); cleanup_and_fail(); } - if( packet->size > 0 ) show_progress( packet->size ); packet->size = new_pos; courier.collect_packet( packet ); } + if( encoder && LZ_compress_close( encoder ) < 0 ) + { pp( "LZ_compress_close failed." ); cleanup_and_fail(); } return 0; } @@ -452,7 +458,7 @@ void muxer( Packet_courier & courier, const Pretty_print & pp, const int outfd ) if( wr != opacket->size ) { pp(); show_error( "Write error", errno ); cleanup_and_fail(); } delete[] opacket->data; - delete opacket; + courier.return_empty_packet(); } } } @@ -462,7 +468,8 @@ void muxer( Packet_courier & courier, const Pretty_print & pp, const int outfd ) // init the courier, then start the splitter and the workers and // call the muxer. -int compress( const int data_size, const int dictionary_size, +int compress( const unsigned long long cfile_size, + const int data_size, const int dictionary_size, const int match_len_limit, const int num_workers, const int infd, const int outfd, const Pretty_print & pp, const int debug_level ) @@ -486,6 +493,8 @@ int compress( const int data_size, const int dictionary_size, int errcode = pthread_create( &splitter_thread, 0, csplitter, &splitter_arg ); if( errcode ) { show_error( "Can't create splitter thread", errcode ); cleanup_and_fail(); } + if( verbosity >= 1 ) pp(); + show_progress( 0, cfile_size, &pp ); // init Worker_arg worker_arg; worker_arg.courier = &courier; @@ -522,11 +531,11 @@ int compress( const int data_size, const int dictionary_size, if( in_size == 0 || out_size == 0 ) std::fputs( " no data compressed.\n", stderr ); else - std::fprintf( stderr, "%6.3f:1, %6.3f bits/byte, " - "%5.2f%% saved, %llu in, %llu out.\n", + std::fprintf( stderr, "%6.3f:1, %5.2f%% ratio, %5.2f%% saved, " + "%llu in, %llu out.\n", (double)in_size / out_size, - ( 8.0 * out_size ) / in_size, - 100.0 * ( 1.0 - ( (double)out_size / in_size ) ), + ( 100.0 * out_size ) / in_size, + 100.0 - ( ( 100.0 * out_size ) / in_size ), in_size, out_size ); } diff --git a/configure b/configure index eba8e88..e31f675 100755 --- a/configure +++ b/configure @@ -1,12 +1,12 @@ #! /bin/sh # configure script for Plzip - Parallel compressor compatible with lzip -# Copyright (C) 2009-2017 Antonio Diaz Diaz. +# Copyright (C) 2009-2018 Antonio Diaz Diaz. # # This configure script is free software: you have unlimited permission # to copy, distribute and modify it. pkgname=plzip -pkgversion=1.6 +pkgversion=1.7 progname=plzip srctrigger=doc/${pkgname}.texi @@ -168,7 +168,7 @@ echo "LDFLAGS = ${LDFLAGS}" rm -f Makefile cat > Makefile << EOF # Makefile for Plzip - Parallel compressor compatible with lzip -# Copyright (C) 2009-2017 Antonio Diaz Diaz. +# Copyright (C) 2009-2018 Antonio Diaz Diaz. # This file was generated automatically by configure. Don't edit. # # This Makefile is free software: you have unlimited permission @@ -193,4 +193,4 @@ cat "${srcdir}/Makefile.in" >> Makefile echo "OK. Now you can run make." echo "If make fails, verify that the lzlib compression library is correctly" -echo "installed. (see INSTALL)" +echo "installed (see INSTALL)." diff --git a/dec_stdout.cc b/dec_stdout.cc index a1a9d61..27b9f31 100644 --- a/dec_stdout.cc +++ b/dec_stdout.cc @@ -1,6 +1,6 @@ /* Plzip - Parallel compressor compatible with lzip Copyright (C) 2009 Laszlo Ersek. - Copyright (C) 2009-2017 Antonio Diaz Diaz. + Copyright (C) 2009-2018 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -229,6 +229,7 @@ extern "C" void * dworker_o( void * arg ) if( rd == 0 ) break; } } + show_progress( file_index.mblock( i ).size() ); } delete[] ibuffer; delete[] new_data; @@ -298,17 +299,22 @@ int dec_stdout( const int num_workers, const int infd, const int outfd, delete[] worker_threads; delete[] worker_args; - const unsigned long long in_size = file_index.cdata_size(); - const unsigned long long out_size = file_index.udata_size(); - if( verbosity >= 2 && out_size > 0 && in_size > 0 ) - std::fprintf( stderr, "%6.3f:1, %6.3f bits/byte, %5.2f%% saved. ", - (double)out_size / in_size, - ( 8.0 * in_size ) / out_size, - 100.0 * ( 1.0 - ( (double)in_size / out_size ) ) ); - if( verbosity >= 4 ) - std::fprintf( stderr, "decompressed %9llu, compressed %9llu. ", - out_size, in_size ); - + if( verbosity >= 2 ) + { + if( verbosity >= 4 ) show_header( file_index.dictionary_size( 0 ) ); + const unsigned long long in_size = file_index.cdata_size(); + const unsigned long long out_size = file_index.udata_size(); + if( out_size == 0 || in_size == 0 ) + std::fputs( "no data compressed. ", stderr ); + else + std::fprintf( stderr, "%6.3f:1, %5.2f%% ratio, %5.2f%% saved. ", + (double)out_size / in_size, + ( 100.0 * in_size ) / out_size, + 100.0 - ( ( 100.0 * in_size ) / out_size ) ); + if( verbosity >= 3 ) + std::fprintf( stderr, "decompressed %9llu, compressed %8llu. ", + out_size, in_size ); + } if( verbosity >= 1 ) std::fputs( "done\n", stderr ); if( debug_level & 1 ) diff --git a/dec_stream.cc b/dec_stream.cc index 5ec1ff7..36a0ec0 100644 --- a/dec_stream.cc +++ b/dec_stream.cc @@ -1,6 +1,6 @@ /* Plzip - Parallel compressor compatible with lzip Copyright (C) 2009 Laszlo Ersek. - Copyright (C) 2009-2017 Antonio Diaz Diaz. + Copyright (C) 2009-2018 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -101,7 +101,7 @@ public: } // make a packet with data received from splitter - // if data == 0, move to next queue + // if data == 0 (end of member token), move to next queue void receive_packet( uint8_t * const data, const int size ) { Packet * const ipacket = new Packet( data, size ); @@ -237,9 +237,11 @@ int find_magic( const uint8_t * const buffer, const int pos, const int size ) struct Splitter_arg { + unsigned long long cfile_size; Packet_courier * courier; const Pretty_print * pp; int infd; + unsigned dictionary_size; // returned by splitter to main thread }; @@ -247,7 +249,7 @@ struct Splitter_arg // courier for packaging and distribution to workers. extern "C" void * dsplitter_s( void * arg ) { - const Splitter_arg & tmp = *(Splitter_arg *)arg; + Splitter_arg & tmp = *(Splitter_arg *)arg; Packet_courier & courier = *tmp.courier; const Pretty_print & pp = *tmp.pp; const int infd = tmp.infd; @@ -264,16 +266,18 @@ extern "C" void * dsplitter_s( void * arg ) if( size != buffer_size && errno ) { pp(); show_error( "Read error", errno ); cleanup_and_fail(); } if( size + hsize < min_member_size ) - { pp( "Input file is too short." ); cleanup_and_fail( 2 ); } + { show_file_error( pp.name(), "Input file is too short." ); + cleanup_and_fail( 2 ); } const File_header & header = *(File_header *)buffer; if( !header.verify_magic() ) - { pp( bad_magic_msg ); cleanup_and_fail( 2 ); } + { show_file_error( pp.name(), bad_magic_msg ); cleanup_and_fail( 2 ); } if( !header.verify_version() ) { pp( bad_version( header.version() ) ); cleanup_and_fail( 2 ); } - const unsigned dictionary_size = header.dictionary_size(); - if( !isvalid_ds( dictionary_size ) ) + tmp.dictionary_size = header.dictionary_size(); + if( !isvalid_ds( tmp.dictionary_size ) ) { pp( bad_dict_msg ); cleanup_and_fail( 2 ); } - show_header( dictionary_size ); + if( verbosity >= 1 ) pp(); + show_progress( 0, tmp.cfile_size, &pp ); // init unsigned long long partial_member_size = 0; while( true ) @@ -301,6 +305,7 @@ extern "C" void * dsplitter_s( void * arg ) courier.receive_packet( 0, 0 ); // end of member token partial_member_size = 0; pos = newpos; + show_progress( member_size ); } } } @@ -340,6 +345,7 @@ struct Worker_arg const Pretty_print * pp; int worker_id; bool ignore_trailing; + bool loose_trailing; bool testing; }; @@ -353,6 +359,7 @@ extern "C" void * dworker_s( void * arg ) const Pretty_print & pp = *tmp.pp; const int worker_id = tmp.worker_id; const bool ignore_trailing = tmp.ignore_trailing; + const bool loose_trailing = tmp.loose_trailing; const bool testing = tmp.testing; uint8_t * new_data = new( std::nothrow ) uint8_t[max_packet_size]; @@ -387,12 +394,22 @@ extern "C" void * dworker_s( void * arg ) max_packet_size - new_pos ); if( rd < 0 ) { - if( LZ_decompress_errno( decoder ) == LZ_header_error ) + const enum LZ_Errno lz_errno = LZ_decompress_errno( decoder ); + if( lz_errno == LZ_header_error ) { trailing_data_found = true; if( !ignore_trailing ) { pp( trailing_msg ); cleanup_and_fail( 2 ); } } + else if( lz_errno == LZ_data_error && + LZ_decompress_member_position( decoder ) == 0 ) + { + trailing_data_found = true; + if( !loose_trailing ) + { pp( corrupt_mm_msg ); cleanup_and_fail( 2 ); } + else if( !ignore_trailing ) + { pp( trailing_msg ); cleanup_and_fail( 2 ); } + } else cleanup_and_fail( decompress_read_error( decoder, pp, worker_id ) ); } @@ -459,9 +476,10 @@ void muxer( Packet_courier & courier, const Pretty_print & pp, const int outfd ) // init the courier, then start the splitter and the workers and, // if not testing, call the muxer. -int dec_stream( const int num_workers, const int infd, const int outfd, +int dec_stream( const unsigned long long cfile_size, + const int num_workers, const int infd, const int outfd, const Pretty_print & pp, const int debug_level, - const bool ignore_trailing ) + const bool ignore_trailing, const bool loose_trailing ) { const int in_slots_per_worker = 2; const int out_slots = 32; @@ -472,6 +490,7 @@ int dec_stream( const int num_workers, const int infd, const int outfd, Packet_courier courier( num_workers, in_slots, out_slots ); Splitter_arg splitter_arg; + splitter_arg.cfile_size = cfile_size; splitter_arg.courier = &courier; splitter_arg.pp = &pp; splitter_arg.infd = infd; @@ -491,6 +510,7 @@ int dec_stream( const int num_workers, const int infd, const int outfd, worker_args[i].pp = &pp; worker_args[i].worker_id = i; worker_args[i].ignore_trailing = ignore_trailing; + worker_args[i].loose_trailing = loose_trailing; worker_args[i].testing = ( outfd < 0 ); errcode = pthread_create( &worker_threads[i], 0, dworker_s, &worker_args[i] ); if( errcode ) @@ -512,15 +532,20 @@ int dec_stream( const int num_workers, const int infd, const int outfd, if( errcode ) { show_error( "Can't join splitter thread", errcode ); cleanup_and_fail(); } - if( verbosity >= 2 && out_size > 0 && in_size > 0 ) - std::fprintf( stderr, "%6.3f:1, %6.3f bits/byte, %5.2f%% saved. ", - (double)out_size / in_size, - ( 8.0 * in_size ) / out_size, - 100.0 * ( 1.0 - ( (double)in_size / out_size ) ) ); - if( verbosity >= 4 ) - std::fprintf( stderr, "decompressed %9llu, compressed %9llu. ", - out_size, in_size ); - + if( verbosity >= 2 ) + { + if( verbosity >= 4 ) show_header( splitter_arg.dictionary_size ); + if( out_size == 0 || in_size == 0 ) + std::fputs( "no data compressed. ", stderr ); + else + std::fprintf( stderr, "%6.3f:1, %5.2f%% ratio, %5.2f%% saved. ", + (double)out_size / in_size, + ( 100.0 * in_size ) / out_size, + 100.0 - ( ( 100.0 * in_size ) / out_size ) ); + if( verbosity >= 3 ) + std::fprintf( stderr, "decompressed %9llu, compressed %8llu. ", + out_size, in_size ); + } if( verbosity >= 1 ) std::fputs( (outfd < 0) ? "ok\n" : "done\n", stderr ); if( debug_level & 1 ) diff --git a/decompress.cc b/decompress.cc index f580bca..ed1ac21 100644 --- a/decompress.cc +++ b/decompress.cc @@ -1,6 +1,6 @@ /* Plzip - Parallel compressor compatible with lzip Copyright (C) 2009 Laszlo Ersek. - Copyright (C) 2009-2017 Antonio Diaz Diaz. + Copyright (C) 2009-2018 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -37,23 +37,6 @@ #include "file_index.h" -void Pretty_print::operator()( const char * const msg ) const - { - if( verbosity >= 0 ) - { - if( first_post ) - { - first_post = false; - std::fprintf( stderr, " %s: ", name_.c_str() ); - for( unsigned i = name_.size(); i < longest_name; ++i ) - std::fputc( ' ', stderr ); - if( !msg ) std::fflush( stderr ); - } - if( msg ) std::fprintf( stderr, "%s\n", msg ); - } - } - - // Returns the number of bytes really read. // If (returned value < size) and (errno == 0), means EOF was reached. // @@ -197,6 +180,7 @@ extern "C" void * dworker( void * arg ) if( rd == 0 ) break; } } + show_progress( file_index.mblock( i ).size() ); } delete[] obuffer; delete[] ibuffer; @@ -211,25 +195,30 @@ extern "C" void * dworker( void * arg ) // start the workers and wait for them to finish. -int decompress( int num_workers, const int infd, const int outfd, - const Pretty_print & pp, const int debug_level, - const bool ignore_trailing, const bool infd_isreg ) +int decompress( const unsigned long long cfile_size, int num_workers, + const int infd, const int outfd, const Pretty_print & pp, + const int debug_level, const bool ignore_trailing, + const bool loose_trailing, const bool infd_isreg ) { if( !infd_isreg ) - return dec_stream( num_workers, infd, outfd, pp, debug_level, ignore_trailing ); + return dec_stream( cfile_size, num_workers, infd, outfd, pp, + debug_level, ignore_trailing, loose_trailing ); - const File_index file_index( infd, ignore_trailing ); + const File_index file_index( infd, ignore_trailing, loose_trailing ); if( file_index.retval() == 1 ) { lseek( infd, 0, SEEK_SET ); - return dec_stream( num_workers, infd, outfd, pp, debug_level, ignore_trailing ); + return dec_stream( cfile_size, num_workers, infd, outfd, pp, + debug_level, ignore_trailing, loose_trailing ); } if( file_index.retval() != 0 ) - { pp( file_index.error().c_str() ); return file_index.retval(); } + { show_file_error( pp.name(), file_index.error().c_str() ); + return file_index.retval(); } - show_header( file_index.dictionary_size( 0 ) ); if( num_workers > file_index.members() ) num_workers = file_index.members(); + if( verbosity >= 1 ) pp(); + show_progress( 0, cfile_size, &pp ); // init if( outfd >= 0 ) { @@ -266,17 +255,22 @@ int decompress( int num_workers, const int infd, const int outfd, delete[] worker_threads; delete[] worker_args; - const unsigned long long in_size = file_index.cdata_size(); - const unsigned long long out_size = file_index.udata_size(); - if( verbosity >= 2 && out_size > 0 && in_size > 0 ) - std::fprintf( stderr, "%6.3f:1, %6.3f bits/byte, %5.2f%% saved. ", - (double)out_size / in_size, - ( 8.0 * in_size ) / out_size, - 100.0 * ( 1.0 - ( (double)in_size / out_size ) ) ); - if( verbosity >= 4 ) - std::fprintf( stderr, "decompressed %9llu, compressed %9llu. ", - out_size, in_size ); - + if( verbosity >= 2 ) + { + if( verbosity >= 4 ) show_header( file_index.dictionary_size( 0 ) ); + const unsigned long long in_size = file_index.cdata_size(); + const unsigned long long out_size = file_index.udata_size(); + if( out_size == 0 || in_size == 0 ) + std::fputs( "no data compressed. ", stderr ); + else + std::fprintf( stderr, "%6.3f:1, %5.2f%% ratio, %5.2f%% saved. ", + (double)out_size / in_size, + ( 100.0 * in_size ) / out_size, + 100.0 - ( ( 100.0 * in_size ) / out_size ) ); + if( verbosity >= 3 ) + std::fprintf( stderr, "decompressed %9llu, compressed %8llu. ", + out_size, in_size ); + } if( verbosity >= 1 ) std::fputs( (outfd < 0) ? "ok\n" : "done\n", stderr ); return 0; diff --git a/doc/plzip.1 b/doc/plzip.1 index 5c47edd..99dfd8b 100644 --- a/doc/plzip.1 +++ b/doc/plzip.1 @@ -1,5 +1,5 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.46.1. -.TH PLZIP "1" "April 2017" "plzip 1.6" "User Commands" +.TH PLZIP "1" "February 2018" "plzip 1.7" "User Commands" .SH NAME plzip \- reduces the size of files .SH SYNOPSIS @@ -68,6 +68,9 @@ alias for \fB\-0\fR .TP \fB\-\-best\fR alias for \fB\-9\fR +.TP +\fB\-\-loose\-trailing\fR +allow trailing data seeming corrupt header .PP If no file names are given, or if a file is '\-', plzip compresses or decompresses from standard input to standard output. @@ -92,8 +95,8 @@ Plzip home page: http://www.nongnu.org/lzip/plzip.html .SH COPYRIGHT Copyright \(co 2009 Laszlo Ersek. .br -Copyright \(co 2017 Antonio Diaz Diaz. -Using lzlib 1.9 +Copyright \(co 2018 Antonio Diaz Diaz. +Using lzlib 1.10 License GPLv2+: GNU GPL version 2 or later .br This is free software: you are free to change and redistribute it. diff --git a/doc/plzip.info b/doc/plzip.info index cf53f13..c8d7387 100644 --- a/doc/plzip.info +++ b/doc/plzip.info @@ -11,11 +11,12 @@ File: plzip.info, Node: Top, Next: Introduction, Up: (dir) Plzip Manual ************ -This manual is for Plzip (version 1.6, 12 April 2017). +This manual is for Plzip (version 1.7, 7 February 2018). * Menu: * Introduction:: Purpose and features of plzip +* Output:: Meaning of plzip's output * Invoking plzip:: Command line interface * Program design:: Internal structure of plzip * File format:: Detailed format of the compressed file @@ -27,13 +28,13 @@ This manual is for Plzip (version 1.6, 12 April 2017). * Concept index:: Index of concepts - Copyright (C) 2009-2017 Antonio Diaz Diaz. + Copyright (C) 2009-2018 Antonio Diaz Diaz. This manual is free documentation: you have unlimited permission to copy, distribute and modify it.  -File: plzip.info, Node: Introduction, Next: Invoking plzip, Prev: Top, Up: Top +File: plzip.info, Node: Introduction, Next: Output, Prev: Top, Up: Top 1 Introduction ************** @@ -58,7 +59,7 @@ archiving, taking into account both data integrity and decoder availability: * The lzip format provides very safe integrity checking and some data - recovery means. The lziprecover program can repair bit-flip errors + recovery means. The lziprecover program can repair bit flip errors (one of the most common forms of data corruption) in lzip files, and provides data recovery capabilities, including error-checked merging of damaged copies of a file. *Note Data safety: @@ -114,17 +115,60 @@ entirely incomprehensible and therefore pointless. Plzip will correctly decompress a file which is the concatenation of two or more compressed files. The result is the concatenation of the -corresponding uncompressed files. Integrity testing of concatenated +corresponding decompressed files. Integrity testing of concatenated compressed files is also supported. + +File: plzip.info, Node: Output, Next: Invoking plzip, Prev: Introduction, Up: Top + +2 Meaning of plzip's output +*************************** + +The output of plzip looks like this: + + plzip -v foo + foo: 6.676:1, 14.98% ratio, 85.02% saved, 450560 in, 67493 out. + + plzip -tvv foo.lz + foo.lz: 6.676:1, 14.98% ratio, 85.02% saved. ok + + The meaning of each field is as follows: + +'N:1' + The compression ratio (uncompressed_size / compressed_size), shown + as N to 1. + +'ratio' + The inverse compression ratio + (compressed_size / uncompressed_size), shown as a percentage. A + decimal ratio is easily obtained by moving the decimal point two + places to the left; 14.98% = 0.1498. + +'saved' + The space saved by compression (1 - ratio), shown as a percentage. + +'in' + The size of the uncompressed data. When decompressing or testing, + it is shown as 'decompressed'. Note that plzip always prints the + uncompressed size before the compressed size when compressing, + decompressing, testing or listing. + +'out' + The size of the compressed data. When decompressing or testing, it + is shown as 'compressed'. + + + When decompressing or testing at verbosity level 4 (-vvvv), the +dictionary size used to compress the file is also shown. + LANGUAGE NOTE: Uncompressed = not compressed = plain data; it may never have been compressed. Decompressed is used to refer to data which have undergone the process of decompression.  -File: plzip.info, Node: Invoking plzip, Next: Program design, Prev: Introduction, Up: Top +File: plzip.info, Node: Invoking plzip, Next: Program design, Prev: Output, Up: Top -2 Invoking plzip +3 Invoking plzip **************** The format for running plzip is: @@ -135,7 +179,7 @@ The format for running plzip is: other FILES and is read just once, the first time it appears in the command line. - Plzip supports the following options: + plzip supports the following options: '-h' '--help' @@ -154,12 +198,12 @@ command line. '-B BYTES' '--data-size=BYTES' - Set the size of the input data blocks, in bytes. The input file - will be divided in chunks of this size before compression is - performed. Valid values range from 8 KiB to 1 GiB. Default value - is two times the dictionary size, except for option '-0' where it - defaults to 1 MiB. Plzip will reduce the dictionary size if it is - larger than the chosen data size. + When compressing, set the size of the input data blocks in bytes. + The input file will be divided in chunks of this size before + compression is performed. Valid values range from 8 KiB to 1 GiB. + Default value is two times the dictionary size, except for option + '-0' where it defaults to 1 MiB. Plzip will reduce the dictionary + size if it is larger than the chosen data size. '-c' '--stdout' @@ -170,10 +214,10 @@ command line. '-d' '--decompress' - Decompress the specified file(s). If a file does not exist or - can't be opened, plzip continues decompressing the rest of the - files. If a file fails to decompress, plzip exits immediately - without decompressing the rest of the files. + Decompress the specified files. If a file does not exist or can't + be opened, plzip continues decompressing the rest of the files. If + a file fails to decompress, or is a terminal, plzip exits + immediately without decompressing the rest of the files. '-f' '--force' @@ -181,8 +225,8 @@ command line. '-F' '--recompress' - Force re-compression of files whose name already has the '.lz' or - '.tlz' suffix. + When compressing, force re-compression of files whose name already + has the '.lz' or '.tlz' suffix. '-k' '--keep' @@ -192,7 +236,7 @@ command line. '-l' '--list' Print the uncompressed size, compressed size and percentage saved - of the specified file(s). Trailing data are ignored. The values + of the specified files. Trailing data are ignored. The values produced are correct even for multimember files. If more than one file is given, a final line containing the cumulative sizes is printed. With '-v', the dictionary size, the number of members in @@ -206,18 +250,21 @@ command line. '-m BYTES' '--match-length=BYTES' - Set the match length limit in bytes. After a match this long is - found, the search is finished. Valid values range from 5 to 273. - Larger values usually give better compression ratios but longer - compression times. + When compressing, set the match length limit in bytes. After a + match this long is found, the search is finished. Valid values + range from 5 to 273. Larger values usually give better compression + ratios but longer compression times. '-n N' '--threads=N' - Set the number of worker threads. Valid values range from 1 to "as - many as your system can support". If this option is not used, - plzip tries to detect the number of processors in the system and - use it as default value. 'plzip --help' shows the system's default - value. + Set the number of worker threads, overriding the system's default. + Valid values range from 1 to "as many as your system can support". + If this option is not used, plzip tries to detect the number of + processors in the system and use it as default value. When + compressing on a 32 bit system, plzip tries to limit the memory + use to under 2.22 GiB (4 worker threads at level -9) by reducing + the number of threads below the system's default. 'plzip --help' + shows the system's default value. Note that the number of usable threads is limited to ceil( file_size / data_size ) during compression (*note Minimum @@ -228,8 +275,9 @@ command line. '--output=FILE' When reading from standard input and '--stdout' has not been specified, use 'FILE' as the virtual name of the uncompressed - file. This produces a file named 'FILE' when decompressing, and a - file named 'FILE.lz' when compressing. + file. This produces a file named 'FILE' when decompressing, or a + file named 'FILE.lz' when compressing. A second '.lz' extension is + not added if 'FILE' already ends in '.lz' or '.tlz'. '-q' '--quiet' @@ -237,13 +285,13 @@ command line. '-s BYTES' '--dictionary-size=BYTES' - Set the dictionary size limit in bytes. Plzip will use the smallest - possible dictionary size for each file without exceeding this - limit. Valid values range from 4 KiB to 512 MiB. Values 12 to 29 - are interpreted as powers of two, meaning 2^12 to 2^29 bytes. Note - that dictionary sizes are quantized. If the specified size does - not match one of the valid sizes, it will be rounded upwards by - adding up to (BYTES / 8) to it. + When compressing, set the dictionary size limit in bytes. Plzip + will use the smallest possible dictionary size for each file + without exceeding this limit. Valid values range from 4 KiB to + 512 MiB. Values 12 to 29 are interpreted as powers of two, meaning + 2^12 to 2^29 bytes. Note that dictionary sizes are quantized. If + the specified size does not match one of the valid sizes, it will + be rounded upwards by adding up to (BYTES / 8) to it. For maximum compression you should use a dictionary size limit as large as possible, but keep in mind that the decompression memory @@ -252,10 +300,10 @@ command line. '-t' '--test' - Check integrity of the specified file(s), but don't decompress - them. This really performs a trial decompression and throws away - the result. Use it together with '-v' to see information about - the file(s). If a file does not exist, can't be opened, or is a + Check integrity of the specified files, but don't decompress them. + This really performs a trial decompression and throws away the + result. Use it together with '-v' to see information about the + files. If a file does not exist, can't be opened, or is a terminal, plzip continues checking the rest of the files. If a file fails the test, plzip may be unable to check the rest of the files. @@ -263,17 +311,19 @@ command line. '-v' '--verbose' Verbose mode. - When compressing, show the compression ratio for each file - processed. A second '-v' shows the progress of compression. + When compressing, show the compression ratio and size for each file + processed. When decompressing or testing, further -v's (up to 4) increase the verbosity level, showing status, compression ratio, dictionary size, decompressed size, and compressed size. + Two or more '-v' options show the progress of (de)compression, + except for single-member files. '-0 .. -9' Set the compression parameters (dictionary size and match length limit) as shown in the table below. The default compression level is '-6'. Note that '-9' can be much slower than '-0'. These - options have no effect when decompressing. + options have no effect when decompressing, testing or listing. The bidimensional parameter space of LZMA can't be mapped to a linear scale optimal for all files. If your files are large, very @@ -296,6 +346,13 @@ command line. '--best' Aliases for GNU gzip compatibility. +'--loose-trailing' + When decompressing, testing or listing, allow trailing data whose + first bytes are so similar to the magic bytes of a lzip header + that they can be confused with a corrupt header. Use this option + if a file triggers a "corrupt header" error and the cause is not + indeed a corrupt header. + Numbers given as arguments to options may be followed by a multiplier and an optional 'B' for "byte". @@ -321,7 +378,7 @@ caused plzip to panic.  File: plzip.info, Node: Program design, Next: File format, Prev: Invoking plzip, Up: Top -3 Program design +4 Program design **************** When compressing, plzip divides the input file into chunks and @@ -344,6 +401,17 @@ them to the workers. The workers (de)compress the blocks received from the splitter. The muxer collects processed packets from the workers, and writes them to the output file. + ,------------, + ,-->| worker 0 |--, + | `------------' | +,-------, ,----------, | ,------------, | ,-------, ,--------, +| input |-->| splitter |-+-->| worker 1 |--+-->| muxer |-->| output | +| file | `----------' | `------------' | `-------' | file | +`-------' | ... | `--------' + | ,------------, | + `-->| worker N-1 |--' + `------------' + When decompressing from a regular file, the splitter is removed and the workers read directly from the input file. If the output file is also a regular file, the muxer is also removed and the workers write @@ -355,7 +423,7 @@ I/O speed.  File: plzip.info, Node: File format, Next: Memory requirements, Prev: Program design, Up: Top -4 File format +5 File format ************* Perfection is reached, not when there is no longer anything to add, but @@ -426,17 +494,11 @@ additional information before, between, or after them.  File: plzip.info, Node: Memory requirements, Next: Minimum file sizes, Prev: File format, Up: Top -5 Memory required to compress and decompress +6 Memory required to compress and decompress ******************************************** -The amount of memory required *per thread* is approximately the -following: - - * For compression at level -0; 1.5 MiB plus 3 times the data size - (*note --data-size::). Default is 4.5 MiB. - - * For compression at other levels; 11 times the dictionary size plus - 3 times the data size. Default is 136 MiB. +The amount of memory required *per thread* for decompression or testing +is approximately the following: * For decompression of a regular (seekable) file to another regular file, or for testing of a regular file; the dictionary size. @@ -450,10 +512,35 @@ following: * For decompression of a non-seekable file or of standard input; the dictionary size plus up to 35 MiB. +The amount of memory required *per thread* for compression is +approximately the following: + + * For compression at level -0; 1.5 MiB plus 3.375 times the data size + (*note --data-size::). Default is 4.875 MiB. + + * For compression at other levels; 11 times the dictionary size plus + 3.375 times the data size. Default is 142 MiB. + +The following table shows the memory required *per thread* for +compression at a given level, using the default data size for each +level: + +Level Memory required +-0 4.875 MiB +-1 17.75 MiB +-2 26.625 MiB +-3 35.5 MiB +-4 53.25 MiB +-5 71 MiB +-6 142 MiB +-7 284 MiB +-8 426 MiB +-9 568 MiB +  File: plzip.info, Node: Minimum file sizes, Next: Trailing data, Prev: Memory requirements, Up: Top -6 Minimum file sizes required for full compression speed +7 Minimum file sizes required for full compression speed ******************************************************** When compressing, plzip divides the input file into chunks and @@ -466,7 +553,8 @@ must be at least as large as the number of worker threads times the chunk size (*note --data-size::). Else some processors will not get any data to compress, and compression will be proportionally slower. The maximum speed increase achievable on a given file is limited by the -ratio (file_size / data_size). +ratio (file_size / data_size). For example, a tarball the size of gcc or +linux will scale up to 8 processors at level -9. The following table shows the minimum uncompressed file size needed for full use of N processors at a given compression level, using the @@ -489,7 +577,7 @@ Level  File: plzip.info, Node: Trailing data, Next: Examples, Prev: Minimum file sizes, Up: Top -7 Extra data appended to the file +8 Extra data appended to the file ********************************* Sometimes extra data are found appended to a lzip file after the last @@ -501,10 +589,11 @@ member. Such trailing data may be: * Useful data added by the user; a cryptographically secure hash, a description of file contents, etc. It is safe to append any amount - of text to a lzip file as long as the text does not begin with the - string "LZIP", and does not contain any zero bytes (null - characters). Nonzero bytes and zero bytes can't be safely mixed in - trailing data. + of text to a lzip file as long as none of the first four bytes of + the text match the corresponding byte in the string "LZIP", and + the text does not contain any zero bytes (null characters). + Nonzero bytes and zero bytes can't be safely mixed in trailing + data. * Garbage added by some not totally successful copy operation. @@ -512,12 +601,17 @@ member. Such trailing data may be: and hash value (for a chosen hash) coincide with those of another file. - * In very rare cases, trailing data could be the corrupt header of - another member. In multimember or concatenated files the - probability of corruption happening in the magic bytes is 5 times - smaller than the probability of getting a false positive caused by - the corruption of the integrity information itself. Therefore it - can be considered to be below the noise level. + * In rare cases, trailing data could be the corrupt header of another + member. In multimember or concatenated files the probability of + corruption happening in the magic bytes is 5 times smaller than the + probability of getting a false positive caused by the corruption + of the integrity information itself. Therefore it can be + considered to be below the noise level. Additionally, the test + used by plzip to discriminate trailing data from a corrupt header + has a Hamming distance (HD) of 3, and the 3 bit flips must happen + in different magic bytes for the test to fail. In any case, the + option '--trailing-error' guarantees that any corrupt header will + be detected. Trailing data are in no way part of the lzip file format, but tools reading lzip files are expected to behave as correctly and usefully as @@ -531,7 +625,7 @@ cases where a file containing trailing data must be rejected, the option  File: plzip.info, Node: Examples, Next: Problems, Prev: Trailing data, Up: Top -8 A small tutorial with examples +9 A small tutorial with examples ******************************** WARNING! Even if plzip is bug-free, other causes may result in a corrupt @@ -595,8 +689,8 @@ to decompressed byte 15000 (5000 bytes are produced).  File: plzip.info, Node: Problems, Next: Concept index, Prev: Examples, Up: Top -9 Reporting bugs -**************** +10 Reporting bugs +***************** There are probably bugs in plzip. There are certainly errors and omissions in this manual. If you report them, they will get fixed. If @@ -625,6 +719,7 @@ Concept index * memory requirements: Memory requirements. (line 6) * minimum file sizes: Minimum file sizes. (line 6) * options: Invoking plzip. (line 6) +* output: Output. (line 6) * program design: Program design. (line 6) * trailing data: Trailing data. (line 6) * usage: Invoking plzip. (line 6) @@ -634,19 +729,20 @@ Concept index  Tag Table: Node: Top221 -Node: Introduction1103 -Node: Invoking plzip5274 -Ref: --trailing-error5843 -Ref: --data-size6086 -Node: Program design12796 -Node: File format14383 -Node: Memory requirements16815 -Node: Minimum file sizes17815 -Node: Trailing data19741 -Node: Examples21648 -Ref: concat-example22813 -Node: Problems23388 -Node: Concept index23914 +Node: Introduction1158 +Node: Output5134 +Node: Invoking plzip6614 +Ref: --trailing-error7177 +Ref: --data-size7420 +Node: Program design14938 +Node: File format17090 +Node: Memory requirements19522 +Node: Minimum file sizes20985 +Node: Trailing data23002 +Node: Examples25285 +Ref: concat-example26450 +Node: Problems27025 +Node: Concept index27553  End Tag Table diff --git a/doc/plzip.texi b/doc/plzip.texi index 5f32f6e..44cff75 100644 --- a/doc/plzip.texi +++ b/doc/plzip.texi @@ -6,8 +6,8 @@ @finalout @c %**end of header -@set UPDATED 12 April 2017 -@set VERSION 1.6 +@set UPDATED 7 February 2018 +@set VERSION 1.7 @dircategory Data Compression @direntry @@ -36,6 +36,7 @@ This manual is for Plzip (version @value{VERSION}, @value{UPDATED}). @menu * Introduction:: Purpose and features of plzip +* Output:: Meaning of plzip's output * Invoking plzip:: Command line interface * Program design:: Internal structure of plzip * File format:: Detailed format of the compressed file @@ -48,7 +49,7 @@ This manual is for Plzip (version @value{VERSION}, @value{UPDATED}). @end menu @sp 1 -Copyright @copyright{} 2009-2017 Antonio Diaz Diaz. +Copyright @copyright{} 2009-2018 Antonio Diaz Diaz. This manual is free documentation: you have unlimited permission to copy, distribute and modify it. @@ -81,7 +82,7 @@ availability: The lzip format provides very safe integrity checking and some data recovery means. The @uref{http://www.nongnu.org/lzip/manual/lziprecover_manual.html#Data-safety,,lziprecover} -program can repair bit-flip errors (one of the most common forms of data +program can repair bit flip errors (one of the most common forms of data corruption) in lzip files, and provides data recovery capabilities, including error-checked merging of damaged copies of a file. @ifnothtml @@ -143,9 +144,54 @@ incomprehensible and therefore pointless. Plzip will correctly decompress a file which is the concatenation of two or more compressed files. The result is the concatenation of the -corresponding uncompressed files. Integrity testing of concatenated +corresponding decompressed files. Integrity testing of concatenated compressed files is also supported. + +@node Output +@chapter Meaning of plzip's output +@cindex output + +The output of plzip looks like this: + +@example +plzip -v foo + foo: 6.676:1, 14.98% ratio, 85.02% saved, 450560 in, 67493 out. + +plzip -tvv foo.lz + foo.lz: 6.676:1, 14.98% ratio, 85.02% saved. ok +@end example + +The meaning of each field is as follows: + +@table @code +@item N:1 +The compression ratio @w{(uncompressed_size / compressed_size)}, shown +as N to 1. + +@item ratio +The inverse compression ratio @w{(compressed_size / uncompressed_size)}, +shown as a percentage. A decimal ratio is easily obtained by moving the +decimal point two places to the left; @w{14.98% = 0.1498}. + +@item saved +The space saved by compression @w{(1 - ratio)}, shown as a percentage. + +@item in +The size of the uncompressed data. When decompressing or testing, it is +shown as @code{decompressed}. Note that plzip always prints the +uncompressed size before the compressed size when compressing, +decompressing, testing or listing. + +@item out +The size of the compressed data. When decompressing or testing, it is +shown as @code{compressed}. + +@end table + +When decompressing or testing at verbosity level 4 (-vvvv), the +dictionary size used to compress the file is also shown. + LANGUAGE NOTE: Uncompressed = not compressed = plain data; it may never have been compressed. Decompressed is used to refer to data which have undergone the process of decompression. @@ -169,7 +215,7 @@ plzip [@var{options}] [@var{files}] mixed with other @var{files} and is read just once, the first time it appears in the command line. -Plzip supports the following options: +plzip supports the following options: @table @code @item -h @@ -190,12 +236,12 @@ garbage that can be safely ignored. @xref{concat-example}. @anchor{--data-size} @item -B @var{bytes} @itemx --data-size=@var{bytes} -Set the size of the input data blocks, in bytes. The input file will be -divided in chunks of this size before compression is performed. Valid -values range from 8 KiB to 1 GiB. Default value is two times the -dictionary size, except for option @samp{-0} where it defaults to 1 MiB. -Plzip will reduce the dictionary size if it is larger than the chosen -data size. +When compressing, set the size of the input data blocks in bytes. The +input file will be divided in chunks of this size before compression is +performed. Valid values range from @w{8 KiB} to @w{1 GiB}. Default value +is two times the dictionary size, except for option @samp{-0} where it +defaults to @w{1 MiB}. Plzip will reduce the dictionary size if it is +larger than the chosen data size. @item -c @itemx --stdout @@ -206,10 +252,10 @@ device. @item -d @itemx --decompress -Decompress the specified file(s). If a file does not exist or can't be +Decompress the specified files. If a file does not exist or can't be opened, plzip continues decompressing the rest of the files. If a file -fails to decompress, plzip exits immediately without decompressing the -rest of the files. +fails to decompress, or is a terminal, plzip exits immediately without +decompressing the rest of the files. @item -f @itemx --force @@ -217,8 +263,8 @@ Force overwrite of output files. @item -F @itemx --recompress -Force re-compression of files whose name already has the @samp{.lz} or -@samp{.tlz} suffix. +When compressing, force re-compression of files whose name already has +the @samp{.lz} or @samp{.tlz} suffix. @item -k @itemx --keep @@ -227,7 +273,7 @@ Keep (don't delete) input files during compression or decompression. @item -l @itemx --list Print the uncompressed size, compressed size and percentage saved of the -specified file(s). Trailing data are ignored. The values produced are +specified files. Trailing data are ignored. The values produced are correct even for multimember files. If more than one file is given, a final line containing the cumulative sizes is printed. With @samp{-v}, the dictionary size, the number of members in the file, and the amount @@ -240,16 +286,21 @@ verifies that none of the specified files contain trailing data. @item -m @var{bytes} @itemx --match-length=@var{bytes} -Set the match length limit in bytes. After a match this long is found, -the search is finished. Valid values range from 5 to 273. Larger values -usually give better compression ratios but longer compression times. +When compressing, set the match length limit in bytes. After a match +this long is found, the search is finished. Valid values range from 5 to +273. Larger values usually give better compression ratios but longer +compression times. @item -n @var{n} @itemx --threads=@var{n} -Set the number of worker threads. Valid values range from 1 to "as many -as your system can support". If this option is not used, plzip tries to -detect the number of processors in the system and use it as default -value. @w{@samp{plzip --help}} shows the system's default value. +Set the number of worker threads, overriding the system's default. Valid +values range from 1 to "as many as your system can support". If this +option is not used, plzip tries to detect the number of processors in +the system and use it as default value. When compressing on a @w{32 bit} +system, plzip tries to limit the memory use to under @w{2.22 GiB} (4 +worker threads at level -9) by reducing the number of threads below the +system's default. @w{@samp{plzip --help}} shows the system's default +value. Note that the number of usable threads is limited to @w{ceil( file_size / data_size )} during compression (@pxref{Minimum file sizes}), and to @@ -260,7 +311,9 @@ the number of members in the input during decompression. When reading from standard input and @samp{--stdout} has not been specified, use @samp{@var{file}} as the virtual name of the uncompressed file. This produces a file named @samp{@var{file}} when decompressing, -and a file named @samp{@var{file}.lz} when compressing. +or a file named @samp{@var{file}.lz} when compressing. A second +@samp{.lz} extension is not added if @samp{@var{file}} already ends in +@samp{.lz} or @samp{.tlz}. @item -q @itemx --quiet @@ -268,12 +321,12 @@ Quiet operation. Suppress all messages. @item -s @var{bytes} @itemx --dictionary-size=@var{bytes} -Set the dictionary size limit in bytes. Plzip will use the smallest -possible dictionary size for each file without exceeding this limit. -Valid values range from 4 KiB to 512 MiB. Values 12 to 29 are -interpreted as powers of two, meaning 2^12 to 2^29 bytes. Note that -dictionary sizes are quantized. If the specified size does not match one -of the valid sizes, it will be rounded upwards by adding up to +When compressing, set the dictionary size limit in bytes. Plzip will use +the smallest possible dictionary size for each file without exceeding +this limit. Valid values range from @w{4 KiB} to @w{512 MiB}. Values 12 +to 29 are interpreted as powers of two, meaning 2^12 to 2^29 bytes. Note +that dictionary sizes are quantized. If the specified size does not +match one of the valid sizes, it will be rounded upwards by adding up to @w{(@var{bytes} / 8)} to it. For maximum compression you should use a dictionary size limit as large @@ -282,27 +335,29 @@ is affected at compression time by the choice of dictionary size limit. @item -t @itemx --test -Check integrity of the specified file(s), but don't decompress them. -This really performs a trial decompression and throws away the result. -Use it together with @samp{-v} to see information about the file(s). If -a file does not exist, can't be opened, or is a terminal, plzip -continues checking the rest of the files. If a file fails the test, -plzip may be unable to check the rest of the files. +Check integrity of the specified files, but don't decompress them. This +really performs a trial decompression and throws away the result. Use it +together with @samp{-v} to see information about the files. If a file +does not exist, can't be opened, or is a terminal, plzip continues +checking the rest of the files. If a file fails the test, plzip may be +unable to check the rest of the files. @item -v @itemx --verbose Verbose mode.@* -When compressing, show the compression ratio for each file processed. A -second @samp{-v} shows the progress of compression.@* +When compressing, show the compression ratio and size for each file +processed.@* When decompressing or testing, further -v's (up to 4) increase the verbosity level, showing status, compression ratio, dictionary size, -decompressed size, and compressed size. +decompressed size, and compressed size.@* +Two or more @samp{-v} options show the progress of (de)compression, +except for single-member files. @item -0 .. -9 Set the compression parameters (dictionary size and match length limit) as shown in the table below. The default compression level is @samp{-6}. Note that @samp{-9} can be much slower than @samp{-0}. These options -have no effect when decompressing. +have no effect when decompressing, testing or listing. The bidimensional parameter space of LZMA can't be mapped to a linear scale optimal for all files. If your files are large, very repetitive, @@ -327,6 +382,12 @@ etc, you may need to use the @samp{--dictionary-size} and @itemx --best Aliases for GNU gzip compatibility. +@item --loose-trailing +When decompressing, testing or listing, allow trailing data whose first +bytes are so similar to the magic bytes of a lzip header that they can +be confused with a corrupt header. Use this option if a file triggers a +"corrupt header" error and the cause is not indeed a corrupt header. + @end table Numbers given as arguments to options may be followed by a multiplier @@ -363,8 +424,8 @@ creating a multimember compressed file. When decompressing, plzip decompresses as many members simultaneously as worker threads are chosen. Files that were compressed with lzip will not -be decompressed faster than using lzip (unless the @samp{-b} option was -used) because lzip usually produces single-member files, which can't be +be decompressed faster than using lzip (unless the @samp{-b} option was used) +because lzip usually produces single-member files, which can't be decompressed in parallel. For each input file, a splitter thread and several worker threads are @@ -377,6 +438,19 @@ to the workers. The workers (de)compress the blocks received from the splitter. The muxer collects processed packets from the workers, and writes them to the output file. +@verbatim + ,------------, + ,-->| worker 0 |--, + | `------------' | +,-------, ,----------, | ,------------, | ,-------, ,--------, +| input |-->| splitter |-+-->| worker 1 |--+-->| muxer |-->| output | +| file | `----------' | `------------' | `-------' | file | +`-------' | ... | `--------' + | ,------------, | + `-->| worker N-1 |--' + `------------' +@end verbatim + When decompressing from a regular file, the splitter is removed and the workers read directly from the input file. If the output file is also a regular file, the muxer is also removed and the workers write directly @@ -472,35 +546,60 @@ facilitates safe recovery of undamaged members from multimember files. @chapter Memory required to compress and decompress @cindex memory requirements -The amount of memory required @strong{per thread} is approximately the -following: +The amount of memory required @strong{per thread} for decompression or +testing is approximately the following: @itemize @bullet -@item -For compression at level -0; 1.5 MiB plus 3 times the data size -(@pxref{--data-size}). Default is 4.5 MiB. - -@item -For compression at other levels; 11 times the dictionary size plus 3 -times the data size. Default is 136 MiB. - @item For decompression of a regular (seekable) file to another regular file, or for testing of a regular file; the dictionary size. @item For testing of a non-seekable file or of standard input; the dictionary -size plus up to 5 MiB. +size plus up to @w{5 MiB}. @item For decompression of a regular file to a non-seekable file or to -standard output; the dictionary size plus up to 32 MiB. +standard output; the dictionary size plus up to @w{32 MiB}. @item For decompression of a non-seekable file or of standard input; the -dictionary size plus up to 35 MiB. +dictionary size plus up to @w{35 MiB}. +@end itemize + +@noindent +The amount of memory required @strong{per thread} for compression is +approximately the following: + +@itemize @bullet +@item +For compression at level -0; @w{1.5 MiB} plus 3.375 times the data size +(@pxref{--data-size}). Default is @w{4.875 MiB}. + +@item +For compression at other levels; 11 times the dictionary size plus 3.375 +times the data size. Default is @w{142 MiB}. @end itemize +@noindent +The following table shows the memory required @strong{per thread} for +compression at a given level, using the default data size for each +level: + +@multitable {Level} {Memory required} +@item Level @tab Memory required +@item -0 @tab 4.875 MiB +@item -1 @tab 17.75 MiB +@item -2 @tab 26.625 MiB +@item -3 @tab 35.5 MiB +@item -4 @tab 53.25 MiB +@item -5 @tab 71 MiB +@item -6 @tab 142 MiB +@item -7 @tab 284 MiB +@item -8 @tab 426 MiB +@item -9 @tab 568 MiB +@end multitable + @node Minimum file sizes @chapter Minimum file sizes required for full compression speed @@ -516,7 +615,8 @@ least as large as the number of worker threads times the chunk size (@pxref{--data-size}). Else some processors will not get any data to compress, and compression will be proportionally slower. The maximum speed increase achievable on a given file is limited by the ratio -@w{(file_size / data_size)}. +@w{(file_size / data_size)}. For example, a tarball the size of gcc or +linux will scale up to 8 processors at level -9. The following table shows the minimum uncompressed file size needed for full use of N processors at a given compression level, using the default @@ -554,9 +654,10 @@ padding zero bytes to a lzip file. @item Useful data added by the user; a cryptographically secure hash, a description of file contents, etc. It is safe to append any amount of -text to a lzip file as long as the text does not begin with the string -"LZIP", and does not contain any zero bytes (null characters). Nonzero -bytes and zero bytes can't be safely mixed in trailing data. +text to a lzip file as long as none of the first four bytes of the text +match the corresponding byte in the string "LZIP", and the text does not +contain any zero bytes (null characters). Nonzero bytes and zero bytes +can't be safely mixed in trailing data. @item Garbage added by some not totally successful copy operation. @@ -566,12 +667,16 @@ Malicious data added to the file in order to make its total size and hash value (for a chosen hash) coincide with those of another file. @item -In very rare cases, trailing data could be the corrupt header of another +In rare cases, trailing data could be the corrupt header of another member. In multimember or concatenated files the probability of corruption happening in the magic bytes is 5 times smaller than the probability of getting a false positive caused by the corruption of the integrity information itself. Therefore it can be considered to be below -the noise level. +the noise level. Additionally, the test used by plzip to discriminate +trailing data from a corrupt header has a Hamming distance (HD) of 3, +and the 3 bit flips must happen in different magic bytes for the test to +fail. In any case, the option @samp{--trailing-error} guarantees that +any corrupt header will be detected. @end itemize Trailing data are in no way part of the lzip file format, but tools @@ -607,7 +712,7 @@ plzip -v file @sp 1 @noindent Example 2: Like example 1 but the created @samp{file.lz} has a block -size of 1 MiB. The compression ratio is not shown. +size of @w{1 MiB}. The compression ratio is not shown. @example plzip -B 1MiB file @@ -656,7 +761,7 @@ Do this instead @sp 1 @noindent -Example 7: Decompress @samp{file.lz} partially until 10 KiB of +Example 7: Decompress @samp{file.lz} partially until @w{10 KiB} of decompressed data are produced. @example diff --git a/file_index.cc b/file_index.cc index 581b516..8238054 100644 --- a/file_index.cc +++ b/file_index.cc @@ -1,5 +1,5 @@ /* Plzip - Parallel compressor compatible with lzip - Copyright (C) 2009-2017 Antonio Diaz Diaz. + Copyright (C) 2009-2018 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -60,7 +60,8 @@ void File_index::set_num_error( const char * const msg, unsigned long long num ) // If successful, push last member and set pos to member header. -bool File_index::skip_trailing_data( const int fd, long long & pos ) +bool File_index::skip_trailing_data( const int fd, long long & pos, + const bool ignore_trailing, const bool loose_trailing ) { enum { block_size = 16384, buffer_size = block_size + File_trailer::size - 1 + File_header::size }; @@ -95,10 +96,13 @@ bool File_index::skip_trailing_data( const int fd, long long & pos ) if( !header.verify_magic() || !header.verify_version() || !isvalid_ds( dictionary_size ) ) continue; if( (*(File_header *)( buffer + i )).verify_prefix( bsize - i ) ) - { - error_ = "Last member in input file is truncated or corrupt."; - retval_ = 2; return false; - } + { error_ = "Last member in input file is truncated or corrupt."; + retval_ = 2; return false; } + if( !loose_trailing && bsize - i >= File_header::size && + (*(File_header *)( buffer + i )).verify_corrupt() ) + { error_ = corrupt_mm_msg; retval_ = 2; return false; } + if( !ignore_trailing ) + { error_ = trailing_msg; retval_ = 2; return false; } pos = ipos + i - member_size; member_vector.push_back( Member( 0, trailer.data_size(), pos, member_size, dictionary_size ) ); @@ -116,7 +120,8 @@ bool File_index::skip_trailing_data( const int fd, long long & pos ) } -File_index::File_index( const int infd, const bool ignore_trailing ) +File_index::File_index( const int infd, const bool ignore_trailing, + const bool loose_trailing ) : isize( lseek( infd, 0, SEEK_END ) ), retval_( 0 ) { if( isize < 0 ) @@ -147,11 +152,10 @@ File_index::File_index( const int infd, const bool ignore_trailing ) const unsigned long long member_size = trailer.member_size(); if( member_size < min_member_size || member_size > (unsigned long long)pos ) { - if( !member_vector.empty() ) - set_num_error( "Member size in trailer is corrupt at pos ", pos - 8 ); - else if( skip_trailing_data( infd, pos ) ) - { if( ignore_trailing ) continue; - error_ = trailing_msg; retval_ = 2; return; } + if( member_vector.empty() ) + { if( skip_trailing_data( infd, pos, ignore_trailing, loose_trailing ) ) + continue; else return; } + set_num_error( "Member size in trailer is corrupt at pos ", pos - 8 ); break; } if( seek_read( infd, header.data, File_header::size, @@ -161,11 +165,10 @@ File_index::File_index( const int infd, const bool ignore_trailing ) if( !header.verify_magic() || !header.verify_version() || !isvalid_ds( dictionary_size ) ) { - if( !member_vector.empty() ) - set_num_error( "Bad header at pos ", pos - member_size ); - else if( skip_trailing_data( infd, pos ) ) - { if( ignore_trailing ) continue; - error_ = trailing_msg; retval_ = 2; return; } + if( member_vector.empty() ) + { if( skip_trailing_data( infd, pos, ignore_trailing, loose_trailing ) ) + continue; else return; } + set_num_error( "Bad header at pos ", pos - member_size ); break; } pos -= member_size; diff --git a/file_index.h b/file_index.h index 5b9813e..7962b99 100644 --- a/file_index.h +++ b/file_index.h @@ -1,5 +1,5 @@ /* Plzip - Parallel compressor compatible with lzip - Copyright (C) 2009-2017 Antonio Diaz Diaz. + Copyright (C) 2009-2018 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -55,10 +55,12 @@ class File_index void set_errno_error( const char * const msg ); void set_num_error( const char * const msg, unsigned long long num ); - bool skip_trailing_data( const int fd, long long & pos ); + bool skip_trailing_data( const int fd, long long & pos, + const bool ignore_trailing, const bool loose_trailing ); public: - File_index( const int infd, const bool ignore_trailing ); + File_index( const int infd, const bool ignore_trailing, + const bool loose_trailing ); long members() const { return member_vector.size(); } const std::string & error() const { return error_; } diff --git a/list.cc b/list.cc index f4169f8..eeef1c3 100644 --- a/list.cc +++ b/list.cc @@ -1,5 +1,5 @@ /* Plzip - Parallel compressor compatible with lzip - Copyright (C) 2009-2017 Antonio Diaz Diaz. + Copyright (C) 2009-2018 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -37,7 +38,7 @@ void list_line( const unsigned long long uncomp_size, { if( uncomp_size > 0 ) std::printf( "%15llu %15llu %6.2f%% %s\n", uncomp_size, comp_size, - 100.0 * ( 1.0 - ( (double)comp_size / uncomp_size ) ), + 100.0 - ( ( 100.0 * comp_size ) / uncomp_size ), input_filename ); else std::printf( "%15llu %15llu -INF%% %s\n", uncomp_size, comp_size, @@ -48,7 +49,7 @@ void list_line( const unsigned long long uncomp_size, int list_files( const std::vector< std::string > & filenames, - const bool ignore_trailing ) + const bool ignore_trailing, const bool loose_trailing ) { unsigned long long total_comp = 0, total_uncomp = 0; int files = 0, retval = 0; @@ -61,11 +62,11 @@ int list_files( const std::vector< std::string > & filenames, const char * const input_filename = from_stdin ? "(stdin)" : filenames[i].c_str(); struct stat in_stats; // not used - const int infd = from_stdin ? STDIN_FILENO : + const int infd = from_stdin ? STDIN_FILENO : open_instream( input_filename, &in_stats, true, true ); if( infd < 0 ) { if( retval < 1 ) retval = 1; continue; } - const File_index file_index( infd, ignore_trailing ); + const File_index file_index( infd, ignore_trailing, loose_trailing ); close( infd ); if( file_index.retval() != 0 ) { diff --git a/lzip.h b/lzip.h index ee09d33..3587a8f 100644 --- a/lzip.h +++ b/lzip.h @@ -1,5 +1,5 @@ /* Plzip - Parallel compressor compatible with lzip - Copyright (C) 2009-2017 Antonio Diaz Diaz. + Copyright (C) 2009-2018 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -27,16 +27,19 @@ enum { min_member_size = 36 }; -class Pretty_print +// defined in main.cc +extern int verbosity; + +class Pretty_print // requires global var 'int verbosity' { std::string name_; + std::string padded_name; const char * const stdin_name; unsigned longest_name; mutable bool first_post; public: - Pretty_print( const std::vector< std::string > & filenames, - const int verbosity ) + Pretty_print( const std::vector< std::string > & filenames ) : stdin_name( "(stdin)" ), longest_name( 0 ), first_post( false ) { if( verbosity <= 0 ) return; @@ -54,6 +57,9 @@ public: { if( filename.size() && filename != "-" ) name_ = filename; else name_ = stdin_name; + padded_name = " "; padded_name += name_; padded_name += ": "; + if( name_.size() < longest_name ) + padded_name.append( longest_name - name_.size(), ' ' ); first_post = true; } @@ -88,11 +94,19 @@ struct File_header void set_magic() { std::memcpy( data, magic_string, 4 ); data[4] = 1; } bool verify_magic() const { return ( std::memcmp( data, magic_string, 4 ) == 0 ); } - bool verify_prefix( const int size ) const // detect truncated header + + bool verify_prefix( const int sz ) const // detect (truncated) header { - for( int i = 0; i < size && i < 4; ++i ) + for( int i = 0; i < sz && i < 4; ++i ) if( data[i] != magic_string[i] ) return false; - return ( size > 0 ); + return ( sz > 0 ); + } + bool verify_corrupt() const // detect corrupt header + { + int matches = 0; + for( int i = 0; i < 4; ++i ) + if( data[i] == magic_string[i] ) ++matches; + return ( matches > 1 && matches < 4 ); } uint8_t version() const { return data[4]; } @@ -165,6 +179,7 @@ struct File_trailer const char * const bad_magic_msg = "Bad magic number (file not in lzip format)."; const char * const bad_dict_msg = "Invalid dictionary size in member header."; +const char * const corrupt_mm_msg = "Corrupt header in multimember file."; const char * const trailing_msg = "Trailing data not allowed."; // defined in compress.cc @@ -179,7 +194,8 @@ void xunlock( pthread_mutex_t * const mutex ); void xwait( pthread_cond_t * const cond, pthread_mutex_t * const mutex ); void xsignal( pthread_cond_t * const cond ); void xbroadcast( pthread_cond_t * const cond ); -int compress( const int data_size, const int dictionary_size, +int compress( const unsigned long long cfile_size, + const int data_size, const int dictionary_size, const int match_len_limit, const int num_workers, const int infd, const int outfd, const Pretty_print & pp, const int debug_level ); @@ -193,25 +209,26 @@ int dec_stdout( const int num_workers, const int infd, const int outfd, const File_index & file_index ); // defined in dec_stream.cc -int dec_stream( const int num_workers, const int infd, const int outfd, +int dec_stream( const unsigned long long cfile_size, + const int num_workers, const int infd, const int outfd, const Pretty_print & pp, const int debug_level, - const bool ignore_trailing ); + const bool ignore_trailing, const bool loose_trailing ); // defined in decompress.cc int preadblock( const int fd, uint8_t * const buf, const int size, const long long pos ); int decompress_read_error( struct LZ_Decoder * const decoder, const Pretty_print & pp, const int worker_id ); -int decompress( int num_workers, const int infd, const int outfd, - const Pretty_print & pp, const int debug_level, - const bool ignore_trailing, const bool infd_isreg ); +int decompress( const unsigned long long cfile_size, int num_workers, + const int infd, const int outfd, const Pretty_print & pp, + const int debug_level, const bool ignore_trailing, + const bool loose_trailing, const bool infd_isreg ); // defined in list.cc int list_files( const std::vector< std::string > & filenames, - const bool ignore_trailing ); + const bool ignore_trailing, const bool loose_trailing ); // defined in main.cc -extern int verbosity; struct stat; const char * bad_version( const unsigned version ); const char * format_ds( const unsigned dictionary_size ); @@ -224,9 +241,9 @@ void show_error( const char * const msg, const int errcode = 0, void show_file_error( const char * const filename, const char * const msg, const int errcode = 0 ); void internal_error( const char * const msg ); -void show_progress( const int packet_size, - const Pretty_print * const p = 0, - const unsigned long long cfile_size = 0 ); +void show_progress( const unsigned long long packet_size, + const unsigned long long cfile_size = 0, + const Pretty_print * const p = 0 ); class Slot_tally @@ -262,12 +279,4 @@ public: if( ++num_free == 1 ) xsignal( &slot_av ); // num_free was 0 xunlock( &mutex ); } - - void leave_slots( const int slots ) // return slots to the tally - { - xlock( &mutex ); - num_free += slots; - if( num_free == slots ) xsignal( &slot_av ); // num_free was 0 - xunlock( &mutex ); - } }; diff --git a/main.cc b/main.cc index 5e75690..d1f76bc 100644 --- a/main.cc +++ b/main.cc @@ -1,6 +1,6 @@ /* Plzip - Parallel compressor compatible with lzip Copyright (C) 2009 Laszlo Ersek. - Copyright (C) 2009-2017 Antonio Diaz Diaz. + Copyright (C) 2009-2018 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -73,7 +73,7 @@ namespace { const char * const Program_name = "Plzip"; const char * const program_name = "plzip"; -const char * const program_year = "2017"; +const char * const program_year = "2018"; const char * invocation_name = 0; const struct { const char * from; const char * to; } known_extensions[] = { @@ -118,7 +118,9 @@ void show_help( const long num_online ) " -v, --verbose be verbose (a 2nd -v gives more)\n" " -0 .. -9 set compression level [default 6]\n" " --fast alias for -0\n" - " --best alias for -9\n", num_online ); + " --best alias for -9\n" + " --loose-trailing allow trailing data seeming corrupt header\n" + , num_online ); if( verbosity >= 1 ) { std::printf( " --debug= (0-1) print debug statistics to stderr\n" ); @@ -145,8 +147,8 @@ void show_help( const long num_online ) void show_version() { std::printf( "%s %s\n", program_name, PROGVERSION ); - std::printf( "Copyright (C) 2009 Laszlo Ersek.\n" - "Copyright (C) %s Antonio Diaz Diaz.\n", program_year ); + std::printf( "Copyright (C) 2009 Laszlo Ersek.\n" ); + std::printf( "Copyright (C) %s Antonio Diaz Diaz.\n", program_year ); std::printf( "Using lzlib %s\n", LZ_version() ); std::printf( "License GPLv2+: GNU GPL version 2 or later \n" "This is free software: you are free to change and redistribute it.\n" @@ -155,6 +157,21 @@ void show_version() } // end namespace +void Pretty_print::operator()( const char * const msg ) const + { + if( verbosity >= 0 ) + { + if( first_post ) + { + first_post = false; + std::fputs( padded_name.c_str(), stderr ); + if( !msg ) std::fflush( stderr ); + } + if( msg ) std::fprintf( stderr, "%s\n", msg ); + } + } + + const char * bad_version( const unsigned version ) { static char buf[80]; @@ -185,8 +202,7 @@ const char * format_ds( const unsigned dictionary_size ) void show_header( const unsigned dictionary_size ) { - if( verbosity >= 3 ) - std::fprintf( stderr, "dictionary %s. ", format_ds( dictionary_size ) ); + std::fprintf( stderr, "dictionary %s, ", format_ds( dictionary_size ) ); } namespace { @@ -278,6 +294,33 @@ int extension_index( const std::string & name ) return -1; } + +void set_c_outname( const std::string & name, const bool force_ext ) + { + output_filename = name; + if( force_ext || extension_index( output_filename ) < 0 ) + output_filename += known_extensions[0].from; + } + + +void set_d_outname( const std::string & name, const int eindex ) + { + if( eindex >= 0 ) + { + const std::string from( known_extensions[eindex].from ); + if( name.size() > from.size() ) + { + output_filename.assign( name, 0, name.size() - from.size() ); + output_filename += known_extensions[eindex].to; + return; + } + } + output_filename = name; output_filename += ".out"; + if( verbosity >= 1 ) + std::fprintf( stderr, "%s: Can't guess original name for '%s' -- using '%s'\n", + program_name, name.c_str(), output_filename.c_str() ); + } + } // end namespace int open_instream( const char * const name, struct stat * const in_statsp, @@ -325,32 +368,6 @@ int open_instream2( const char * const name, struct stat * const in_statsp, } -void set_c_outname( const std::string & name ) - { - output_filename = name; - output_filename += known_extensions[0].from; - } - - -void set_d_outname( const std::string & name, const int eindex ) - { - if( eindex >= 0 ) - { - const std::string from( known_extensions[eindex].from ); - if( name.size() > from.size() ) - { - output_filename.assign( name, 0, name.size() - from.size() ); - output_filename += known_extensions[eindex].to; - return; - } - } - output_filename = name; output_filename += ".out"; - if( verbosity >= 1 ) - std::fprintf( stderr, "%s: Can't guess original name for '%s' -- using '%s'\n", - program_name, name.c_str(), output_filename.c_str() ); - } - - bool open_outstream( const bool force, const bool from_stdin ) { const mode_t usr_rw = S_IRUSR | S_IWUSR; @@ -404,15 +421,19 @@ void cleanup_and_fail( const int retval ) static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; pthread_mutex_lock( &mutex ); // ignore errors to avoid loop + const int saved_verbosity = verbosity; + verbosity = -1; // suppress messages from other threads if( delete_output_on_interrupt ) { delete_output_on_interrupt = false; - if( verbosity >= 0 ) + if( saved_verbosity >= 0 ) std::fprintf( stderr, "%s: Deleting output file '%s', if it exists.\n", program_name, output_filename.c_str() ); if( outfd >= 0 ) { close( outfd ); outfd = -1; } - if( std::remove( output_filename.c_str() ) != 0 && errno != ENOENT ) - show_error( "WARNING: deletion of output file (apparently) failed." ); + if( std::remove( output_filename.c_str() ) != 0 && errno != ENOENT && + saved_verbosity >= 0 ) + std::fprintf( stderr, "%s: WARNING: deletion of output file " + "(apparently) failed.\n", program_name ); } std::exit( retval ); } @@ -503,25 +524,30 @@ void internal_error( const char * const msg ) } -void show_progress( const int packet_size, - const Pretty_print * const p, - const unsigned long long cfile_size ) +void show_progress( const unsigned long long packet_size, + const unsigned long long cfile_size, + const Pretty_print * const p ) { static unsigned long long csize = 0; // file_size / 100 static unsigned long long pos = 0; static const Pretty_print * pp = 0; static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; + static bool enabled = true; - if( verbosity < 2 ) return; + if( !enabled ) return; if( p ) // initialize static vars - { csize = cfile_size; pos = 0; pp = p; } + { + if( verbosity < 2 || !isatty( STDERR_FILENO ) ) { enabled = false; return; } + csize = cfile_size; pos = 0; pp = p; + } if( pp ) { xlock( &mutex ); pos += packet_size; if( csize > 0 ) - std::fprintf( stderr, "%4llu%%", pos / csize ); - std::fprintf( stderr, " %.1f MB\r", pos / 1000000.0 ); + std::fprintf( stderr, "%4llu%% %.1f MB\r", pos / csize, pos / 1000000.0 ); + else + std::fprintf( stderr, " %.1f MB\r", pos / 1000000.0 ); pp->reset(); (*pp)(); // restore cursor position xunlock( &mutex ); } @@ -549,12 +575,12 @@ int main( const int argc, const char * const argv[] ) std::vector< std::string > filenames; int data_size = 0; int debug_level = 0; - int infd = -1; int num_workers = 0; // start this many worker threads Mode program_mode = m_compress; bool force = false; bool ignore_trailing = true; bool keep_input_files = false; + bool loose_trailing = false; bool recompress = false; bool to_stdout = false; invocation_name = argv[0]; @@ -563,50 +589,51 @@ int main( const int argc, const char * const argv[] ) { show_error( "Bad library version. At least lzlib 1.0 is required." ); return 1; } - const long num_online = std::max( 1L, sysconf( _SC_NPROCESSORS_ONLN ) ); - long max_workers = sysconf( _SC_THREAD_THREADS_MAX ); - if( max_workers < 1 || max_workers > INT_MAX / (int)sizeof (pthread_t) ) - max_workers = INT_MAX / sizeof (pthread_t); - - enum Optcode { opt_dbg = 256 }; + enum { opt_dbg = 256, opt_lt }; const Arg_parser::Option options[] = { - { '0', "fast", Arg_parser::no }, - { '1', 0, Arg_parser::no }, - { '2', 0, Arg_parser::no }, - { '3', 0, Arg_parser::no }, - { '4', 0, Arg_parser::no }, - { '5', 0, Arg_parser::no }, - { '6', 0, Arg_parser::no }, - { '7', 0, Arg_parser::no }, - { '8', 0, Arg_parser::no }, - { '9', "best", Arg_parser::no }, - { 'a', "trailing-error", Arg_parser::no }, - { 'b', "member-size", Arg_parser::yes }, - { 'B', "data-size", Arg_parser::yes }, - { 'c', "stdout", Arg_parser::no }, - { 'd', "decompress", Arg_parser::no }, - { 'f', "force", Arg_parser::no }, - { 'F', "recompress", Arg_parser::no }, - { 'h', "help", Arg_parser::no }, - { 'k', "keep", Arg_parser::no }, - { 'l', "list", Arg_parser::no }, - { 'm', "match-length", Arg_parser::yes }, - { 'n', "threads", Arg_parser::yes }, - { 'o', "output", Arg_parser::yes }, - { 'q', "quiet", Arg_parser::no }, - { 's', "dictionary-size", Arg_parser::yes }, - { 'S', "volume-size", Arg_parser::yes }, - { 't', "test", Arg_parser::no }, - { 'v', "verbose", Arg_parser::no }, - { 'V', "version", Arg_parser::no }, - { opt_dbg, "debug", Arg_parser::yes }, - { 0 , 0, Arg_parser::no } }; + { '0', "fast", Arg_parser::no }, + { '1', 0, Arg_parser::no }, + { '2', 0, Arg_parser::no }, + { '3', 0, Arg_parser::no }, + { '4', 0, Arg_parser::no }, + { '5', 0, Arg_parser::no }, + { '6', 0, Arg_parser::no }, + { '7', 0, Arg_parser::no }, + { '8', 0, Arg_parser::no }, + { '9', "best", Arg_parser::no }, + { 'a', "trailing-error", Arg_parser::no }, + { 'b', "member-size", Arg_parser::yes }, + { 'B', "data-size", Arg_parser::yes }, + { 'c', "stdout", Arg_parser::no }, + { 'd', "decompress", Arg_parser::no }, + { 'f', "force", Arg_parser::no }, + { 'F', "recompress", Arg_parser::no }, + { 'h', "help", Arg_parser::no }, + { 'k', "keep", Arg_parser::no }, + { 'l', "list", Arg_parser::no }, + { 'm', "match-length", Arg_parser::yes }, + { 'n', "threads", Arg_parser::yes }, + { 'o', "output", Arg_parser::yes }, + { 'q', "quiet", Arg_parser::no }, + { 's', "dictionary-size", Arg_parser::yes }, + { 'S', "volume-size", Arg_parser::yes }, + { 't', "test", Arg_parser::no }, + { 'v', "verbose", Arg_parser::no }, + { 'V', "version", Arg_parser::no }, + { opt_dbg, "debug", Arg_parser::yes }, + { opt_lt, "loose-trailing", Arg_parser::no }, + { 0 , 0, Arg_parser::no } }; const Arg_parser parser( argc, argv, options ); if( parser.error().size() ) // bad option { show_error( parser.error().c_str(), 0, true ); return 1; } + const long num_online = std::max( 1L, sysconf( _SC_NPROCESSORS_ONLN ) ); + long max_workers = sysconf( _SC_THREAD_THREADS_MAX ); + if( max_workers < 1 || max_workers > INT_MAX / (int)sizeof (pthread_t) ) + max_workers = INT_MAX / sizeof (pthread_t); + int argind = 0; for( ; argind < parser.arguments(); ++argind ) { @@ -643,6 +670,7 @@ int main( const int argc, const char * const argv[] ) case 'v': if( verbosity < 4 ) ++verbosity; break; case 'V': show_version(); return 0; case opt_dbg: debug_level = getnum( arg, 0, 3 ); break; + case opt_lt: loose_trailing = true; break; default : internal_error( "uncaught option." ); } } // end process options @@ -661,7 +689,7 @@ int main( const int argc, const char * const argv[] ) if( filenames.empty() ) filenames.push_back("-"); if( program_mode == m_list ) - return list_files( filenames, ignore_trailing ); + return list_files( filenames, ignore_trailing, loose_trailing ); if( program_mode == m_test ) outfd = -1; @@ -678,19 +706,30 @@ int main( const int argc, const char * const argv[] ) std::max( data_size, LZ_min_dictionary_size() ); if( num_workers <= 0 ) + { + if( sizeof (void *) <= 4 ) // use less than 2.22 GiB on 32 bit systems + { + const long long limit = ( 27LL << 25 ) + ( 11LL << 27 ); // 4 * 568 MiB + const long long mem = ( 27LL * data_size ) / 8 + + ( fast ? 3LL << 19 : 11LL * encoder_options.dictionary_size ); + const int nmax32 = std::max( limit / mem, 1LL ); + if( max_workers > nmax32 ) max_workers = nmax32; + } num_workers = std::min( num_online, max_workers ); + } if( !to_stdout && program_mode != m_test && ( filenames_given || default_output_filename.size() ) ) set_signals(); - Pretty_print pp( filenames, verbosity ); + Pretty_print pp( filenames ); int retval = 0; bool stdin_used = false; for( unsigned i = 0; i < filenames.size(); ++i ) { std::string input_filename; + int infd; struct stat in_stats; output_filename.clear(); @@ -705,12 +744,12 @@ int main( const int argc, const char * const argv[] ) else { if( program_mode == m_compress ) - set_c_outname( default_output_filename ); + set_c_outname( default_output_filename, false ); else output_filename = default_output_filename; if( !open_outstream( force, true ) ) { if( retval < 1 ) retval = 1; - close( infd ); infd = -1; + close( infd ); continue; } } @@ -728,12 +767,12 @@ int main( const int argc, const char * const argv[] ) else { if( program_mode == m_compress ) - set_c_outname( input_filename ); + set_c_outname( input_filename, true ); else set_d_outname( input_filename, eindex ); if( !open_outstream( force, false ) ) { if( retval < 1 ) retval = 1; - close( infd ); infd = -1; + close( infd ); continue; } } @@ -744,24 +783,22 @@ int main( const int argc, const char * const argv[] ) if( !check_tty( pp.name(), infd, program_mode ) ) { if( retval < 1 ) retval = 1; - if( program_mode == m_test ) { close( infd ); infd = -1; continue; } + if( program_mode == m_test ) { close( infd ); continue; } cleanup_and_fail( retval ); } const struct stat * const in_statsp = input_filename.size() ? &in_stats : 0; const bool infd_isreg = in_statsp && S_ISREG( in_statsp->st_mode ); - if( verbosity >= 1 ) pp(); + const unsigned long long cfile_size = + infd_isreg ? ( in_statsp->st_size + 99 ) / 100 : 0; int tmp; if( program_mode == m_compress ) - { - show_progress( 0, &pp, infd_isreg ? in_statsp->st_size / 100 : 0 ); // init - tmp = compress( data_size, encoder_options.dictionary_size, + tmp = compress( cfile_size, data_size, encoder_options.dictionary_size, encoder_options.match_len_limit, num_workers, infd, outfd, pp, debug_level ); - } else - tmp = decompress( num_workers, infd, outfd, pp, debug_level, - ignore_trailing, infd_isreg ); + tmp = decompress( cfile_size, num_workers, infd, outfd, pp, debug_level, + ignore_trailing, loose_trailing, infd_isreg ); if( tmp > retval ) retval = tmp; if( tmp && program_mode != m_test ) cleanup_and_fail( retval ); @@ -769,14 +806,14 @@ int main( const int argc, const char * const argv[] ) close_and_set_permissions( in_statsp ); if( input_filename.size() ) { - close( infd ); infd = -1; + close( infd ); if( !keep_input_files && !to_stdout && program_mode != m_test ) std::remove( input_filename.c_str() ); } } if( outfd >= 0 && close( outfd ) != 0 ) { - show_error( "Can't close stdout", errno ); + show_error( "Error closing stdout", errno ); if( retval < 1 ) retval = 1; } return retval; diff --git a/testsuite/check.sh b/testsuite/check.sh index 4421fc4..a4113c3 100755 --- a/testsuite/check.sh +++ b/testsuite/check.sh @@ -1,6 +1,6 @@ #! /bin/sh # check script for Plzip - Parallel compressor compatible with lzip -# Copyright (C) 2009-2017 Antonio Diaz Diaz. +# Copyright (C) 2009-2018 Antonio Diaz Diaz. # # This script is free software: you have unlimited permission # to copy, distribute and modify it. @@ -53,6 +53,8 @@ done [ $? = 2 ] || test_failed $LINENO "${LZIP}" -cdq < in [ $? = 2 ] || test_failed $LINENO +"${LZIP}" -dq -o in < "${in_lz}" +[ $? = 1 ] || test_failed $LINENO # these are for code coverage "${LZIP}" -lt "${in_lz}" 2> /dev/null [ $? = 1 ] || test_failed $LINENO @@ -145,7 +147,7 @@ cat "${in_lz}" "${in_lz}" > in2.lz || framework_failure "${LZIP}" -cd in2.lz > copy2 || test_failed $LINENO cmp in2 copy2 || test_failed $LINENO -"${LZIP}" --output=copy2 < in2 || test_failed $LINENO +"${LZIP}" --output=copy2.lz < in2 || test_failed $LINENO "${LZIP}" -lq copy2.lz || test_failed $LINENO "${LZIP}" -t copy2.lz || test_failed $LINENO "${LZIP}" -cd copy2.lz > copy2 || test_failed $LINENO @@ -226,6 +228,61 @@ cat in in in in in in in in | "${LZIP}" -1s4Ki | "${LZIP}" -t || printf "\ntesting bad input..." +headers='LZIp LZiP LZip LzIP LzIp LziP lZIP lZIp lZiP lzIP' +body='\001\014\000\203\377\373\377\377\300\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000$\000\000\000\000\000\000\000' +cat "${in_lz}" > in0.lz +printf "LZIP${body}" >> in0.lz +if "${LZIP}" -tq in0.lz ; then + for header in ${headers} ; do + printf "${header}${body}" > in0.lz # first member + "${LZIP}" -lq in0.lz + [ $? = 2 ] || test_failed $LINENO ${header} + "${LZIP}" -tq in0.lz + [ $? = 2 ] || test_failed $LINENO ${header} + "${LZIP}" -tq < in0.lz + [ $? = 2 ] || test_failed $LINENO ${header} + "${LZIP}" -cdq in0.lz > /dev/null + [ $? = 2 ] || test_failed $LINENO ${header} + "${LZIP}" -lq --loose-trailing in0.lz + [ $? = 2 ] || test_failed $LINENO ${header} + "${LZIP}" -tq --loose-trailing in0.lz + [ $? = 2 ] || test_failed $LINENO ${header} + "${LZIP}" -tq --loose-trailing < in0.lz + [ $? = 2 ] || test_failed $LINENO ${header} + "${LZIP}" -cdq --loose-trailing in0.lz > /dev/null + [ $? = 2 ] || test_failed $LINENO ${header} + cat "${in_lz}" > in0.lz + printf "${header}${body}" >> in0.lz # trailing data + "${LZIP}" -lq in0.lz + [ $? = 2 ] || test_failed $LINENO ${header} + "${LZIP}" -tq in0.lz + [ $? = 2 ] || test_failed $LINENO ${header} +# "${LZIP}" -tq < in0.lz # requires lzlib-1.10 +# [ $? = 2 ] || test_failed $LINENO ${header} + "${LZIP}" -cdq in0.lz > /dev/null + [ $? = 2 ] || test_failed $LINENO ${header} + "${LZIP}" -lq --loose-trailing in0.lz + [ $? = 0 ] || test_failed $LINENO ${header} + "${LZIP}" -t --loose-trailing in0.lz + [ $? = 0 ] || test_failed $LINENO ${header} + "${LZIP}" -t --loose-trailing < in0.lz + [ $? = 0 ] || test_failed $LINENO ${header} + "${LZIP}" -cd --loose-trailing in0.lz > /dev/null + [ $? = 0 ] || test_failed $LINENO ${header} + "${LZIP}" -lq --loose-trailing --trailing-error in0.lz + [ $? = 2 ] || test_failed $LINENO ${header} + "${LZIP}" -tq --loose-trailing --trailing-error in0.lz + [ $? = 2 ] || test_failed $LINENO ${header} + "${LZIP}" -tq --loose-trailing --trailing-error < in0.lz + [ $? = 2 ] || test_failed $LINENO ${header} + "${LZIP}" -cdq --loose-trailing --trailing-error in0.lz > /dev/null + [ $? = 2 ] || test_failed $LINENO ${header} + done +else + printf "\nwarning: skipping header test: 'printf' does not work on your system." +fi +rm -f in0.lz + cat "${in_lz}" "${in_lz}" "${in_lz}" > in3.lz || framework_failure if dd if=in3.lz of=trunc.lz bs=14752 count=1 2> /dev/null && [ -e trunc.lz ] && cmp in2.lz trunc.lz > /dev/null 2>&1 ; then @@ -233,7 +290,7 @@ if dd if=in3.lz of=trunc.lz bs=14752 count=1 2> /dev/null && dd if=in3.lz of=trunc.lz bs=$i count=1 2> /dev/null "${LZIP}" -lq trunc.lz [ $? = 2 ] || test_failed $LINENO $i - "${LZIP}" -t trunc.lz 2> /dev/null + "${LZIP}" -tq trunc.lz [ $? = 2 ] || test_failed $LINENO $i "${LZIP}" -tq < trunc.lz [ $? = 2 ] || test_failed $LINENO $i @@ -245,6 +302,7 @@ if dd if=in3.lz of=trunc.lz bs=14752 count=1 2> /dev/null && else printf "\nwarning: skipping truncation test: 'dd' does not work on your system." fi +rm -f in3.lz trunc.lz cat "${in_lz}" > ingin.lz || framework_failure printf "g" >> ingin.lz || framework_failure @@ -258,6 +316,7 @@ cat "${in_lz}" >> ingin.lz || framework_failure "${LZIP}" -t < ingin.lz || test_failed $LINENO "${LZIP}" -d < ingin.lz > copy || test_failed $LINENO cmp in copy || test_failed $LINENO +rm -f ingin.lz echo if [ ${fail} = 0 ] ; then -- cgit v1.2.3