summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2018-02-13 07:06:07 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2018-02-13 07:06:07 +0000
commitac32e8eabf1b97208c4ccdfe908aea863d09d1f3 (patch)
tree97843c59827d6028ef778206eb2d194f772126dc
parentAdding upstream version 1.6. (diff)
downloadplzip-ac32e8eabf1b97208c4ccdfe908aea863d09d1f3.tar.xz
plzip-ac32e8eabf1b97208c4ccdfe908aea863d09d1f3.zip
Adding upstream version 1.7.upstream/1.7
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
-rw-r--r--ChangeLog33
-rw-r--r--INSTALL6
-rw-r--r--NEWS37
-rw-r--r--README6
-rw-r--r--arg_parser.cc2
-rw-r--r--arg_parser.h2
-rw-r--r--compress.cc103
-rwxr-xr-xconfigure8
-rw-r--r--dec_stdout.cc30
-rw-r--r--dec_stream.cc65
-rw-r--r--decompress.cc68
-rw-r--r--doc/plzip.19
-rw-r--r--doc/plzip.info268
-rw-r--r--doc/plzip.texi235
-rw-r--r--file_index.cc37
-rw-r--r--file_index.h8
-rw-r--r--list.cc11
-rw-r--r--lzip.h61
-rw-r--r--main.cc231
-rwxr-xr-xtestsuite/check.sh65
20 files changed, 841 insertions, 444 deletions
diff --git a/ChangeLog b/ChangeLog
index adc942b..20d8605 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,24 @@
+2018-02-07 Antonio Diaz Diaz <antonio@gnu.org>
+
+ * Version 1.7 released.
+ * compress.cc: Use 'LZ_compress_restart_member' and replace input
+ packet queue by a circular buffer to reduce memory fragmentation.
+ * compress.cc: Return one empty packet at a time to reduce mem use.
+ * main.cc: Reduce threads on 32 bit systems to use under 2.22 GiB.
+ * main.cc: Added new option '--loose-trailing'.
+ * Improved corrupt header detection to HD=3 on seekable files.
+ (On all files with lzlib 1.10 or newer).
+ * Replaced 'bits/byte' with inverse compression ratio in output.
+ * Show progress of decompression at verbosity level 2 (-vv).
+ * Show progress of (de)compression only if stderr is a terminal.
+ * main.cc: Do not add a second .lz extension to the arg of -o.
+ * Show dictionary size at verbosity level 4 (-vvvv).
+ * main.cc (cleanup_and_fail): Suppress messages from other threads.
+ * list.cc: Added missing '#include <pthread.h>'.
+ * plzip.texi: Added chapter 'Output'.
+ * plzip.texi (Memory requirements): Added table.
+ * plzip.texi (Program design): Added a block diagram.
+
2017-04-12 Antonio Diaz Diaz <antonio@gnu.org>
* Version 1.6 released.
@@ -13,11 +34,11 @@
* main.cc: Added new option '-a, --trailing-error'.
* main.cc (main): Delete '--output' file if infd is a terminal.
* main.cc (main): Don't use stdin more than once.
- * lzip.texi: Added chapters 'Trailing data' and 'Examples'.
+ * plzip.texi: Added chapters 'Trailing data' and 'Examples'.
* configure: Avoid warning on some shells when testing for g++.
* Makefile.in: Detect the existence of install-info.
- * testsuite/check.sh: A POSIX shell is required to run the tests.
- * testsuite/check.sh: Don't check error messages.
+ * check.sh: A POSIX shell is required to run the tests.
+ * check.sh: Don't check error messages.
2015-07-09 Antonio Diaz Diaz <antonio@gnu.org>
@@ -30,7 +51,7 @@
* dec_stream.cc: Don't use output packets or muxer when testing.
* Make '-dvvv' and '-tvvv' show dictionary size like lzip.
* lzip.h: Added missing 'const' to the declaration of 'compress'.
- * lzip.texi: Added chapters 'Memory requirements' and
+ * plzip.texi: Added chapters 'Memory requirements' and
'Minimum file sizes'.
* Makefile.in: Added new targets 'install*-compress'.
@@ -99,7 +120,7 @@
* Version 0.6 released.
* Small portability fixes.
- * lzip.texinfo: Added chapter 'Program Design' and description
+ * plzip.texinfo: Added chapter 'Program Design' and description
of option '--threads'.
* Debug stats have been fixed.
@@ -148,7 +169,7 @@
until something better appears on the net.
-Copyright (C) 2009-2017 Antonio Diaz Diaz.
+Copyright (C) 2009-2018 Antonio Diaz Diaz.
This file is a collection of facts, and thus it is not copyrightable,
but just in case, you have unlimited permission to copy, distribute and
diff --git a/INSTALL b/INSTALL
index b0cc6ea..6f359ca 100644
--- a/INSTALL
+++ b/INSTALL
@@ -4,7 +4,9 @@ You will need a C++ compiler and the lzlib compression library installed.
I use gcc 5.3.0 and 4.1.2, but the code should compile with any
standards compliant compiler.
Lzlib must be version 1.0 or newer, but the fast encoder is only
-available in lzlib 1.7 or newer.
+available in lzlib 1.7 or newer, and the HD = 3 detection of corrupt
+headers on non-seekable multimember files is only available in lzlib
+1.10 or newer.
Gcc is available at http://gcc.gnu.org.
Lzlib is available at http://www.nongnu.org/lzip/lzlib.html.
@@ -65,7 +67,7 @@ After running 'configure', you can run 'make' and 'make install' as
explained above.
-Copyright (C) 2009-2017 Antonio Diaz Diaz.
+Copyright (C) 2009-2018 Antonio Diaz Diaz.
This file is free documentation: you have unlimited permission to copy,
distribute and modify it.
diff --git a/NEWS b/NEWS
index b7f1326..1916e07 100644
--- a/NEWS
+++ b/NEWS
@@ -1,9 +1,34 @@
-Changes in version 1.6:
+Changes in version 1.7:
-The option '-l, --list' has been ported from lziprecover.
+When compressing on a 32 bit system, plzip now tries to limit the memory
+use to under 2.22 GiB (4 worker threads at level -9) by reducing the
+number of threads below the system's default.
-It is now an error to specify two or more different operations in the
-command line (--decompress, --list or --test).
+The option '--loose-trailing', has been added.
-In test mode, plzip now continues checking the rest of the files if any
-input file is a terminal.
+The test used by plzip to discriminate trailing data from a corrupt
+header in multimember regular (seekable) files has been improved to a
+Hamming distance (HD) of 3, and the 3 bit flips must happen in different
+magic bytes for the test to fail. As a consequence some kinds of files
+no longer can be appended to a lzip file as trailing data unless the
+'--loose-trailing' option is used when decompressing.
+Lzlib 1.10 or newer is required for this test to work on non-seekable
+files.
+Lziprecover can be used to remove conflicting trailing data from a file.
+
+The 'bits/byte' ratio has been replaced with the inverse compression
+ratio in the output.
+
+The progress of decompression is now shown at verbosity level 2 (-vv) or
+higher.
+
+Progress of (de)compression is only shown if stderr is a terminal.
+
+A second '.lz' extension is no longer added to the argument of '-o' if
+it already ends in '.lz' or '.tlz'.
+
+The dictionary size is now shown at verbosity level 4 (-vvvv) when
+decompressing or testing.
+
+The new chapter "Meaning of plzip's output", and a block diagram of
+plzip have been added to the manual.
diff --git a/README b/README
index 5385ec9..d6f4d4a 100644
--- a/README
+++ b/README
@@ -17,7 +17,7 @@ creating a multimember compressed file.
When decompressing, plzip decompresses as many members simultaneously as
worker threads are chosen. Files that were compressed with lzip will not
-be decompressed faster than using lzip (unless the "-b" option was used)
+be decompressed faster than using lzip (unless the '-b' option was used)
because lzip usually produces single-member files, which can't be
decompressed in parallel.
@@ -84,11 +84,11 @@ incomprehensible and therefore pointless.
Plzip will correctly decompress a file which is the concatenation of two
or more compressed files. The result is the concatenation of the
-corresponding uncompressed files. Integrity testing of concatenated
+corresponding decompressed files. Integrity testing of concatenated
compressed files is also supported.
-Copyright (C) 2009-2017 Antonio Diaz Diaz.
+Copyright (C) 2009-2018 Antonio Diaz Diaz.
This file is free documentation: you have unlimited permission to copy,
distribute and modify it.
diff --git a/arg_parser.cc b/arg_parser.cc
index cc7d1e2..008ebc8 100644
--- a/arg_parser.cc
+++ b/arg_parser.cc
@@ -1,5 +1,5 @@
/* Arg_parser - POSIX/GNU command line argument parser. (C++ version)
- Copyright (C) 2006-2017 Antonio Diaz Diaz.
+ Copyright (C) 2006-2018 Antonio Diaz Diaz.
This library is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided
diff --git a/arg_parser.h b/arg_parser.h
index 95b0320..f015881 100644
--- a/arg_parser.h
+++ b/arg_parser.h
@@ -1,5 +1,5 @@
/* Arg_parser - POSIX/GNU command line argument parser. (C++ version)
- Copyright (C) 2006-2017 Antonio Diaz Diaz.
+ Copyright (C) 2006-2018 Antonio Diaz Diaz.
This library is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided
diff --git a/compress.cc b/compress.cc
index 5bcd999..beae59e 100644
--- a/compress.cc
+++ b/compress.cc
@@ -1,6 +1,6 @@
/* Plzip - Parallel compressor compatible with lzip
Copyright (C) 2009 Laszlo Ersek.
- Copyright (C) 2009-2017 Antonio Diaz Diaz.
+ Copyright (C) 2009-2018 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -159,8 +159,9 @@ struct Packet // data block with a serial number
uint8_t * data;
int size; // number of bytes in data (if any)
unsigned id; // serial number assigned as received
- Packet( uint8_t * const d, const int s, const unsigned i )
- : data( d ), size( s ), id( i ) {}
+ Packet() : data( 0 ), size( 0 ), id( 0 ) {}
+ void init( uint8_t * const d, const int s, const unsigned i )
+ { data = d; size = s; id = i; }
};
@@ -173,10 +174,11 @@ public:
unsigned owait_counter;
private:
unsigned receive_id; // id assigned to next packet received
+ unsigned distrib_id; // id of next packet to be distributed
unsigned deliver_id; // id of next packet to be delivered
Slot_tally slot_tally; // limits the number of input packets
- std::queue< Packet * > packet_queue;
- std::vector< const Packet * > circular_buffer;
+ std::vector< Packet > circular_ibuffer;
+ std::vector< const Packet * > circular_obuffer;
int num_working; // number of workers still running
const int num_slots; // max packets in circulation
pthread_mutex_t imutex;
@@ -192,8 +194,9 @@ public:
Packet_courier( const int workers, const int slots )
: icheck_counter( 0 ), iwait_counter( 0 ),
ocheck_counter( 0 ), owait_counter( 0 ),
- receive_id( 0 ), deliver_id( 0 ),
- slot_tally( slots ), circular_buffer( slots, (Packet *) 0 ),
+ receive_id( 0 ), distrib_id( 0 ), deliver_id( 0 ),
+ slot_tally( slots ), circular_ibuffer( slots ),
+ circular_obuffer( slots, (Packet *) 0 ),
num_working( workers ), num_slots( slots ), eof( false )
{
xinit_mutex( &imutex ); xinit_cond( &iav_or_eof );
@@ -206,13 +209,13 @@ public:
xdestroy_cond( &iav_or_eof ); xdestroy_mutex( &imutex );
}
- // make a packet with data received from splitter
+ // fill a packet with data received from splitter
void receive_packet( uint8_t * const data, const int size )
{
- Packet * const ipacket = new Packet( data, size, receive_id++ );
slot_tally.get_slot(); // wait for a free slot
xlock( &imutex );
- packet_queue.push( ipacket );
+ circular_ibuffer[receive_id % num_slots].init( data, size, receive_id );
+ ++receive_id;
xsignal( &iav_or_eof );
xunlock( &imutex );
}
@@ -223,18 +226,15 @@ public:
Packet * ipacket = 0;
xlock( &imutex );
++icheck_counter;
- while( packet_queue.empty() && !eof )
+ while( receive_id == distrib_id && !eof ) // no packets to distribute
{
++iwait_counter;
xwait( &iav_or_eof, &imutex );
}
- if( !packet_queue.empty() )
- {
- ipacket = packet_queue.front();
- packet_queue.pop();
- }
+ if( receive_id != distrib_id )
+ { ipacket = &circular_ibuffer[distrib_id % num_slots]; ++distrib_id; }
xunlock( &imutex );
- if( !ipacket )
+ if( !ipacket ) // EOF
{
// notify muxer when last worker exits
xlock( &omutex );
@@ -250,10 +250,10 @@ public:
const int i = opacket->id % num_slots;
xlock( &omutex );
// id collision shouldn't happen
- if( circular_buffer[i] != 0 )
+ if( circular_obuffer[i] != 0 )
internal_error( "id collision in collect_packet." );
// merge packet into circular buffer
- circular_buffer[i] = opacket;
+ circular_obuffer[i] = opacket;
if( opacket->id == deliver_id ) xsignal( &oav_or_exit );
xunlock( &omutex );
}
@@ -264,7 +264,7 @@ public:
xlock( &omutex );
++ocheck_counter;
int i = deliver_id % num_slots;
- while( circular_buffer[i] == 0 && num_working > 0 )
+ while( circular_obuffer[i] == 0 && num_working > 0 )
{
++owait_counter;
xwait( &oav_or_exit, &omutex );
@@ -272,18 +272,19 @@ public:
packet_vector.clear();
while( true )
{
- const Packet * const opacket = circular_buffer[i];
+ const Packet * const opacket = circular_obuffer[i];
if( !opacket ) break;
packet_vector.push_back( opacket );
- circular_buffer[i] = 0;
+ circular_obuffer[i] = 0;
++deliver_id;
i = deliver_id % num_slots;
}
xunlock( &omutex );
- if( packet_vector.size() ) // return slots to the tally
- slot_tally.leave_slots( packet_vector.size() );
}
+ void return_empty_packet() // return a slot to the tally
+ { slot_tally.leave_slot(); }
+
void finish() // splitter has no more packets to send
{
xlock( &imutex );
@@ -294,10 +295,10 @@ public:
bool finished() // all packets delivered to muxer
{
- if( !slot_tally.all_free() || !eof || !packet_queue.empty() ||
+ if( !slot_tally.all_free() || !eof || receive_id != distrib_id ||
num_working != 0 ) return false;
for( int i = 0; i < num_slots; ++i )
- if( circular_buffer[i] != 0 ) return false;
+ if( circular_obuffer[i] != 0 ) return false;
return true;
}
};
@@ -369,26 +370,32 @@ extern "C" void * cworker( void * arg )
const int dictionary_size = tmp.dictionary_size;
const int match_len_limit = tmp.match_len_limit;
const int offset = tmp.offset;
+ LZ_Encoder * encoder = 0;
while( true )
{
Packet * const packet = courier.distribute_packet();
if( !packet ) break; // no more packets to process
- const bool fast = dictionary_size == 65535 && match_len_limit == 16;
- const int dict_size = fast ? dictionary_size :
- std::max( std::min( dictionary_size, packet->size ),
- LZ_min_dictionary_size() );
- LZ_Encoder * const encoder =
- LZ_compress_open( dict_size, match_len_limit, LLONG_MAX );
- if( !encoder || LZ_compress_errno( encoder ) != LZ_ok )
+ if( !encoder )
{
- if( !encoder || LZ_compress_errno( encoder ) == LZ_mem_error )
- pp( mem_msg );
- else
- internal_error( "invalid argument to encoder." );
- cleanup_and_fail();
+ const bool fast = dictionary_size == 65535 && match_len_limit == 16;
+ const int dict_size = fast ? dictionary_size :
+ std::max( std::min( dictionary_size, packet->size ),
+ LZ_min_dictionary_size() );
+ encoder = LZ_compress_open( dict_size, match_len_limit, LLONG_MAX );
+ if( !encoder || LZ_compress_errno( encoder ) != LZ_ok )
+ {
+ if( !encoder || LZ_compress_errno( encoder ) == LZ_mem_error )
+ pp( mem_msg );
+ else
+ internal_error( "invalid argument to encoder." );
+ cleanup_and_fail();
+ }
}
+ else
+ if( LZ_compress_restart_member( encoder, LLONG_MAX ) < 0 )
+ { pp( "LZ_compress_restart_member failed." ); cleanup_and_fail(); }
int written = 0;
int new_pos = 0;
@@ -422,13 +429,12 @@ extern "C" void * cworker( void * arg )
if( LZ_compress_finished( encoder ) == 1 ) break;
}
- if( LZ_compress_close( encoder ) < 0 )
- { pp( "LZ_compress_close failed." ); cleanup_and_fail(); }
-
if( packet->size > 0 ) show_progress( packet->size );
packet->size = new_pos;
courier.collect_packet( packet );
}
+ if( encoder && LZ_compress_close( encoder ) < 0 )
+ { pp( "LZ_compress_close failed." ); cleanup_and_fail(); }
return 0;
}
@@ -452,7 +458,7 @@ void muxer( Packet_courier & courier, const Pretty_print & pp, const int outfd )
if( wr != opacket->size )
{ pp(); show_error( "Write error", errno ); cleanup_and_fail(); }
delete[] opacket->data;
- delete opacket;
+ courier.return_empty_packet();
}
}
}
@@ -462,7 +468,8 @@ void muxer( Packet_courier & courier, const Pretty_print & pp, const int outfd )
// init the courier, then start the splitter and the workers and
// call the muxer.
-int compress( const int data_size, const int dictionary_size,
+int compress( const unsigned long long cfile_size,
+ const int data_size, const int dictionary_size,
const int match_len_limit, const int num_workers,
const int infd, const int outfd,
const Pretty_print & pp, const int debug_level )
@@ -486,6 +493,8 @@ int compress( const int data_size, const int dictionary_size,
int errcode = pthread_create( &splitter_thread, 0, csplitter, &splitter_arg );
if( errcode )
{ show_error( "Can't create splitter thread", errcode ); cleanup_and_fail(); }
+ if( verbosity >= 1 ) pp();
+ show_progress( 0, cfile_size, &pp ); // init
Worker_arg worker_arg;
worker_arg.courier = &courier;
@@ -522,11 +531,11 @@ int compress( const int data_size, const int dictionary_size,
if( in_size == 0 || out_size == 0 )
std::fputs( " no data compressed.\n", stderr );
else
- std::fprintf( stderr, "%6.3f:1, %6.3f bits/byte, "
- "%5.2f%% saved, %llu in, %llu out.\n",
+ std::fprintf( stderr, "%6.3f:1, %5.2f%% ratio, %5.2f%% saved, "
+ "%llu in, %llu out.\n",
(double)in_size / out_size,
- ( 8.0 * out_size ) / in_size,
- 100.0 * ( 1.0 - ( (double)out_size / in_size ) ),
+ ( 100.0 * out_size ) / in_size,
+ 100.0 - ( ( 100.0 * out_size ) / in_size ),
in_size, out_size );
}
diff --git a/configure b/configure
index eba8e88..e31f675 100755
--- a/configure
+++ b/configure
@@ -1,12 +1,12 @@
#! /bin/sh
# configure script for Plzip - Parallel compressor compatible with lzip
-# Copyright (C) 2009-2017 Antonio Diaz Diaz.
+# Copyright (C) 2009-2018 Antonio Diaz Diaz.
#
# This configure script is free software: you have unlimited permission
# to copy, distribute and modify it.
pkgname=plzip
-pkgversion=1.6
+pkgversion=1.7
progname=plzip
srctrigger=doc/${pkgname}.texi
@@ -168,7 +168,7 @@ echo "LDFLAGS = ${LDFLAGS}"
rm -f Makefile
cat > Makefile << EOF
# Makefile for Plzip - Parallel compressor compatible with lzip
-# Copyright (C) 2009-2017 Antonio Diaz Diaz.
+# Copyright (C) 2009-2018 Antonio Diaz Diaz.
# This file was generated automatically by configure. Don't edit.
#
# This Makefile is free software: you have unlimited permission
@@ -193,4 +193,4 @@ cat "${srcdir}/Makefile.in" >> Makefile
echo "OK. Now you can run make."
echo "If make fails, verify that the lzlib compression library is correctly"
-echo "installed. (see INSTALL)"
+echo "installed (see INSTALL)."
diff --git a/dec_stdout.cc b/dec_stdout.cc
index a1a9d61..27b9f31 100644
--- a/dec_stdout.cc
+++ b/dec_stdout.cc
@@ -1,6 +1,6 @@
/* Plzip - Parallel compressor compatible with lzip
Copyright (C) 2009 Laszlo Ersek.
- Copyright (C) 2009-2017 Antonio Diaz Diaz.
+ Copyright (C) 2009-2018 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -229,6 +229,7 @@ extern "C" void * dworker_o( void * arg )
if( rd == 0 ) break;
}
}
+ show_progress( file_index.mblock( i ).size() );
}
delete[] ibuffer; delete[] new_data;
@@ -298,17 +299,22 @@ int dec_stdout( const int num_workers, const int infd, const int outfd,
delete[] worker_threads;
delete[] worker_args;
- const unsigned long long in_size = file_index.cdata_size();
- const unsigned long long out_size = file_index.udata_size();
- if( verbosity >= 2 && out_size > 0 && in_size > 0 )
- std::fprintf( stderr, "%6.3f:1, %6.3f bits/byte, %5.2f%% saved. ",
- (double)out_size / in_size,
- ( 8.0 * in_size ) / out_size,
- 100.0 * ( 1.0 - ( (double)in_size / out_size ) ) );
- if( verbosity >= 4 )
- std::fprintf( stderr, "decompressed %9llu, compressed %9llu. ",
- out_size, in_size );
-
+ if( verbosity >= 2 )
+ {
+ if( verbosity >= 4 ) show_header( file_index.dictionary_size( 0 ) );
+ const unsigned long long in_size = file_index.cdata_size();
+ const unsigned long long out_size = file_index.udata_size();
+ if( out_size == 0 || in_size == 0 )
+ std::fputs( "no data compressed. ", stderr );
+ else
+ std::fprintf( stderr, "%6.3f:1, %5.2f%% ratio, %5.2f%% saved. ",
+ (double)out_size / in_size,
+ ( 100.0 * in_size ) / out_size,
+ 100.0 - ( ( 100.0 * in_size ) / out_size ) );
+ if( verbosity >= 3 )
+ std::fprintf( stderr, "decompressed %9llu, compressed %8llu. ",
+ out_size, in_size );
+ }
if( verbosity >= 1 ) std::fputs( "done\n", stderr );
if( debug_level & 1 )
diff --git a/dec_stream.cc b/dec_stream.cc
index 5ec1ff7..36a0ec0 100644
--- a/dec_stream.cc
+++ b/dec_stream.cc
@@ -1,6 +1,6 @@
/* Plzip - Parallel compressor compatible with lzip
Copyright (C) 2009 Laszlo Ersek.
- Copyright (C) 2009-2017 Antonio Diaz Diaz.
+ Copyright (C) 2009-2018 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -101,7 +101,7 @@ public:
}
// make a packet with data received from splitter
- // if data == 0, move to next queue
+ // if data == 0 (end of member token), move to next queue
void receive_packet( uint8_t * const data, const int size )
{
Packet * const ipacket = new Packet( data, size );
@@ -237,9 +237,11 @@ int find_magic( const uint8_t * const buffer, const int pos, const int size )
struct Splitter_arg
{
+ unsigned long long cfile_size;
Packet_courier * courier;
const Pretty_print * pp;
int infd;
+ unsigned dictionary_size; // returned by splitter to main thread
};
@@ -247,7 +249,7 @@ struct Splitter_arg
// courier for packaging and distribution to workers.
extern "C" void * dsplitter_s( void * arg )
{
- const Splitter_arg & tmp = *(Splitter_arg *)arg;
+ Splitter_arg & tmp = *(Splitter_arg *)arg;
Packet_courier & courier = *tmp.courier;
const Pretty_print & pp = *tmp.pp;
const int infd = tmp.infd;
@@ -264,16 +266,18 @@ extern "C" void * dsplitter_s( void * arg )
if( size != buffer_size && errno )
{ pp(); show_error( "Read error", errno ); cleanup_and_fail(); }
if( size + hsize < min_member_size )
- { pp( "Input file is too short." ); cleanup_and_fail( 2 ); }
+ { show_file_error( pp.name(), "Input file is too short." );
+ cleanup_and_fail( 2 ); }
const File_header & header = *(File_header *)buffer;
if( !header.verify_magic() )
- { pp( bad_magic_msg ); cleanup_and_fail( 2 ); }
+ { show_file_error( pp.name(), bad_magic_msg ); cleanup_and_fail( 2 ); }
if( !header.verify_version() )
{ pp( bad_version( header.version() ) ); cleanup_and_fail( 2 ); }
- const unsigned dictionary_size = header.dictionary_size();
- if( !isvalid_ds( dictionary_size ) )
+ tmp.dictionary_size = header.dictionary_size();
+ if( !isvalid_ds( tmp.dictionary_size ) )
{ pp( bad_dict_msg ); cleanup_and_fail( 2 ); }
- show_header( dictionary_size );
+ if( verbosity >= 1 ) pp();
+ show_progress( 0, tmp.cfile_size, &pp ); // init
unsigned long long partial_member_size = 0;
while( true )
@@ -301,6 +305,7 @@ extern "C" void * dsplitter_s( void * arg )
courier.receive_packet( 0, 0 ); // end of member token
partial_member_size = 0;
pos = newpos;
+ show_progress( member_size );
}
}
}
@@ -340,6 +345,7 @@ struct Worker_arg
const Pretty_print * pp;
int worker_id;
bool ignore_trailing;
+ bool loose_trailing;
bool testing;
};
@@ -353,6 +359,7 @@ extern "C" void * dworker_s( void * arg )
const Pretty_print & pp = *tmp.pp;
const int worker_id = tmp.worker_id;
const bool ignore_trailing = tmp.ignore_trailing;
+ const bool loose_trailing = tmp.loose_trailing;
const bool testing = tmp.testing;
uint8_t * new_data = new( std::nothrow ) uint8_t[max_packet_size];
@@ -387,12 +394,22 @@ extern "C" void * dworker_s( void * arg )
max_packet_size - new_pos );
if( rd < 0 )
{
- if( LZ_decompress_errno( decoder ) == LZ_header_error )
+ const enum LZ_Errno lz_errno = LZ_decompress_errno( decoder );
+ if( lz_errno == LZ_header_error )
{
trailing_data_found = true;
if( !ignore_trailing )
{ pp( trailing_msg ); cleanup_and_fail( 2 ); }
}
+ else if( lz_errno == LZ_data_error &&
+ LZ_decompress_member_position( decoder ) == 0 )
+ {
+ trailing_data_found = true;
+ if( !loose_trailing )
+ { pp( corrupt_mm_msg ); cleanup_and_fail( 2 ); }
+ else if( !ignore_trailing )
+ { pp( trailing_msg ); cleanup_and_fail( 2 ); }
+ }
else
cleanup_and_fail( decompress_read_error( decoder, pp, worker_id ) );
}
@@ -459,9 +476,10 @@ void muxer( Packet_courier & courier, const Pretty_print & pp, const int outfd )
// init the courier, then start the splitter and the workers and,
// if not testing, call the muxer.
-int dec_stream( const int num_workers, const int infd, const int outfd,
+int dec_stream( const unsigned long long cfile_size,
+ const int num_workers, const int infd, const int outfd,
const Pretty_print & pp, const int debug_level,
- const bool ignore_trailing )
+ const bool ignore_trailing, const bool loose_trailing )
{
const int in_slots_per_worker = 2;
const int out_slots = 32;
@@ -472,6 +490,7 @@ int dec_stream( const int num_workers, const int infd, const int outfd,
Packet_courier courier( num_workers, in_slots, out_slots );
Splitter_arg splitter_arg;
+ splitter_arg.cfile_size = cfile_size;
splitter_arg.courier = &courier;
splitter_arg.pp = &pp;
splitter_arg.infd = infd;
@@ -491,6 +510,7 @@ int dec_stream( const int num_workers, const int infd, const int outfd,
worker_args[i].pp = &pp;
worker_args[i].worker_id = i;
worker_args[i].ignore_trailing = ignore_trailing;
+ worker_args[i].loose_trailing = loose_trailing;
worker_args[i].testing = ( outfd < 0 );
errcode = pthread_create( &worker_threads[i], 0, dworker_s, &worker_args[i] );
if( errcode )
@@ -512,15 +532,20 @@ int dec_stream( const int num_workers, const int infd, const int outfd,
if( errcode )
{ show_error( "Can't join splitter thread", errcode ); cleanup_and_fail(); }
- if( verbosity >= 2 && out_size > 0 && in_size > 0 )
- std::fprintf( stderr, "%6.3f:1, %6.3f bits/byte, %5.2f%% saved. ",
- (double)out_size / in_size,
- ( 8.0 * in_size ) / out_size,
- 100.0 * ( 1.0 - ( (double)in_size / out_size ) ) );
- if( verbosity >= 4 )
- std::fprintf( stderr, "decompressed %9llu, compressed %9llu. ",
- out_size, in_size );
-
+ if( verbosity >= 2 )
+ {
+ if( verbosity >= 4 ) show_header( splitter_arg.dictionary_size );
+ if( out_size == 0 || in_size == 0 )
+ std::fputs( "no data compressed. ", stderr );
+ else
+ std::fprintf( stderr, "%6.3f:1, %5.2f%% ratio, %5.2f%% saved. ",
+ (double)out_size / in_size,
+ ( 100.0 * in_size ) / out_size,
+ 100.0 - ( ( 100.0 * in_size ) / out_size ) );
+ if( verbosity >= 3 )
+ std::fprintf( stderr, "decompressed %9llu, compressed %8llu. ",
+ out_size, in_size );
+ }
if( verbosity >= 1 ) std::fputs( (outfd < 0) ? "ok\n" : "done\n", stderr );
if( debug_level & 1 )
diff --git a/decompress.cc b/decompress.cc
index f580bca..ed1ac21 100644
--- a/decompress.cc
+++ b/decompress.cc
@@ -1,6 +1,6 @@
/* Plzip - Parallel compressor compatible with lzip
Copyright (C) 2009 Laszlo Ersek.
- Copyright (C) 2009-2017 Antonio Diaz Diaz.
+ Copyright (C) 2009-2018 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -37,23 +37,6 @@
#include "file_index.h"
-void Pretty_print::operator()( const char * const msg ) const
- {
- if( verbosity >= 0 )
- {
- if( first_post )
- {
- first_post = false;
- std::fprintf( stderr, " %s: ", name_.c_str() );
- for( unsigned i = name_.size(); i < longest_name; ++i )
- std::fputc( ' ', stderr );
- if( !msg ) std::fflush( stderr );
- }
- if( msg ) std::fprintf( stderr, "%s\n", msg );
- }
- }
-
-
// Returns the number of bytes really read.
// If (returned value < size) and (errno == 0), means EOF was reached.
//
@@ -197,6 +180,7 @@ extern "C" void * dworker( void * arg )
if( rd == 0 ) break;
}
}
+ show_progress( file_index.mblock( i ).size() );
}
delete[] obuffer; delete[] ibuffer;
@@ -211,25 +195,30 @@ extern "C" void * dworker( void * arg )
// start the workers and wait for them to finish.
-int decompress( int num_workers, const int infd, const int outfd,
- const Pretty_print & pp, const int debug_level,
- const bool ignore_trailing, const bool infd_isreg )
+int decompress( const unsigned long long cfile_size, int num_workers,
+ const int infd, const int outfd, const Pretty_print & pp,
+ const int debug_level, const bool ignore_trailing,
+ const bool loose_trailing, const bool infd_isreg )
{
if( !infd_isreg )
- return dec_stream( num_workers, infd, outfd, pp, debug_level, ignore_trailing );
+ return dec_stream( cfile_size, num_workers, infd, outfd, pp,
+ debug_level, ignore_trailing, loose_trailing );
- const File_index file_index( infd, ignore_trailing );
+ const File_index file_index( infd, ignore_trailing, loose_trailing );
if( file_index.retval() == 1 )
{
lseek( infd, 0, SEEK_SET );
- return dec_stream( num_workers, infd, outfd, pp, debug_level, ignore_trailing );
+ return dec_stream( cfile_size, num_workers, infd, outfd, pp,
+ debug_level, ignore_trailing, loose_trailing );
}
if( file_index.retval() != 0 )
- { pp( file_index.error().c_str() ); return file_index.retval(); }
+ { show_file_error( pp.name(), file_index.error().c_str() );
+ return file_index.retval(); }
- show_header( file_index.dictionary_size( 0 ) );
if( num_workers > file_index.members() )
num_workers = file_index.members();
+ if( verbosity >= 1 ) pp();
+ show_progress( 0, cfile_size, &pp ); // init
if( outfd >= 0 )
{
@@ -266,17 +255,22 @@ int decompress( int num_workers, const int infd, const int outfd,
delete[] worker_threads;
delete[] worker_args;
- const unsigned long long in_size = file_index.cdata_size();
- const unsigned long long out_size = file_index.udata_size();
- if( verbosity >= 2 && out_size > 0 && in_size > 0 )
- std::fprintf( stderr, "%6.3f:1, %6.3f bits/byte, %5.2f%% saved. ",
- (double)out_size / in_size,
- ( 8.0 * in_size ) / out_size,
- 100.0 * ( 1.0 - ( (double)in_size / out_size ) ) );
- if( verbosity >= 4 )
- std::fprintf( stderr, "decompressed %9llu, compressed %9llu. ",
- out_size, in_size );
-
+ if( verbosity >= 2 )
+ {
+ if( verbosity >= 4 ) show_header( file_index.dictionary_size( 0 ) );
+ const unsigned long long in_size = file_index.cdata_size();
+ const unsigned long long out_size = file_index.udata_size();
+ if( out_size == 0 || in_size == 0 )
+ std::fputs( "no data compressed. ", stderr );
+ else
+ std::fprintf( stderr, "%6.3f:1, %5.2f%% ratio, %5.2f%% saved. ",
+ (double)out_size / in_size,
+ ( 100.0 * in_size ) / out_size,
+ 100.0 - ( ( 100.0 * in_size ) / out_size ) );
+ if( verbosity >= 3 )
+ std::fprintf( stderr, "decompressed %9llu, compressed %8llu. ",
+ out_size, in_size );
+ }
if( verbosity >= 1 ) std::fputs( (outfd < 0) ? "ok\n" : "done\n", stderr );
return 0;
diff --git a/doc/plzip.1 b/doc/plzip.1
index 5c47edd..99dfd8b 100644
--- a/doc/plzip.1
+++ b/doc/plzip.1
@@ -1,5 +1,5 @@
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.46.1.
-.TH PLZIP "1" "April 2017" "plzip 1.6" "User Commands"
+.TH PLZIP "1" "February 2018" "plzip 1.7" "User Commands"
.SH NAME
plzip \- reduces the size of files
.SH SYNOPSIS
@@ -68,6 +68,9 @@ alias for \fB\-0\fR
.TP
\fB\-\-best\fR
alias for \fB\-9\fR
+.TP
+\fB\-\-loose\-trailing\fR
+allow trailing data seeming corrupt header
.PP
If no file names are given, or if a file is '\-', plzip compresses or
decompresses from standard input to standard output.
@@ -92,8 +95,8 @@ Plzip home page: http://www.nongnu.org/lzip/plzip.html
.SH COPYRIGHT
Copyright \(co 2009 Laszlo Ersek.
.br
-Copyright \(co 2017 Antonio Diaz Diaz.
-Using lzlib 1.9
+Copyright \(co 2018 Antonio Diaz Diaz.
+Using lzlib 1.10
License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>
.br
This is free software: you are free to change and redistribute it.
diff --git a/doc/plzip.info b/doc/plzip.info
index cf53f13..c8d7387 100644
--- a/doc/plzip.info
+++ b/doc/plzip.info
@@ -11,11 +11,12 @@ File: plzip.info, Node: Top, Next: Introduction, Up: (dir)
Plzip Manual
************
-This manual is for Plzip (version 1.6, 12 April 2017).
+This manual is for Plzip (version 1.7, 7 February 2018).
* Menu:
* Introduction:: Purpose and features of plzip
+* Output:: Meaning of plzip's output
* Invoking plzip:: Command line interface
* Program design:: Internal structure of plzip
* File format:: Detailed format of the compressed file
@@ -27,13 +28,13 @@ This manual is for Plzip (version 1.6, 12 April 2017).
* Concept index:: Index of concepts
- Copyright (C) 2009-2017 Antonio Diaz Diaz.
+ Copyright (C) 2009-2018 Antonio Diaz Diaz.
This manual is free documentation: you have unlimited permission to
copy, distribute and modify it.

-File: plzip.info, Node: Introduction, Next: Invoking plzip, Prev: Top, Up: Top
+File: plzip.info, Node: Introduction, Next: Output, Prev: Top, Up: Top
1 Introduction
**************
@@ -58,7 +59,7 @@ archiving, taking into account both data integrity and decoder
availability:
* The lzip format provides very safe integrity checking and some data
- recovery means. The lziprecover program can repair bit-flip errors
+ recovery means. The lziprecover program can repair bit flip errors
(one of the most common forms of data corruption) in lzip files,
and provides data recovery capabilities, including error-checked
merging of damaged copies of a file. *Note Data safety:
@@ -114,17 +115,60 @@ entirely incomprehensible and therefore pointless.
Plzip will correctly decompress a file which is the concatenation of
two or more compressed files. The result is the concatenation of the
-corresponding uncompressed files. Integrity testing of concatenated
+corresponding decompressed files. Integrity testing of concatenated
compressed files is also supported.
+
+File: plzip.info, Node: Output, Next: Invoking plzip, Prev: Introduction, Up: Top
+
+2 Meaning of plzip's output
+***************************
+
+The output of plzip looks like this:
+
+ plzip -v foo
+ foo: 6.676:1, 14.98% ratio, 85.02% saved, 450560 in, 67493 out.
+
+ plzip -tvv foo.lz
+ foo.lz: 6.676:1, 14.98% ratio, 85.02% saved. ok
+
+ The meaning of each field is as follows:
+
+'N:1'
+ The compression ratio (uncompressed_size / compressed_size), shown
+ as N to 1.
+
+'ratio'
+ The inverse compression ratio
+ (compressed_size / uncompressed_size), shown as a percentage. A
+ decimal ratio is easily obtained by moving the decimal point two
+ places to the left; 14.98% = 0.1498.
+
+'saved'
+ The space saved by compression (1 - ratio), shown as a percentage.
+
+'in'
+ The size of the uncompressed data. When decompressing or testing,
+ it is shown as 'decompressed'. Note that plzip always prints the
+ uncompressed size before the compressed size when compressing,
+ decompressing, testing or listing.
+
+'out'
+ The size of the compressed data. When decompressing or testing, it
+ is shown as 'compressed'.
+
+
+ When decompressing or testing at verbosity level 4 (-vvvv), the
+dictionary size used to compress the file is also shown.
+
LANGUAGE NOTE: Uncompressed = not compressed = plain data; it may
never have been compressed. Decompressed is used to refer to data which
have undergone the process of decompression.

-File: plzip.info, Node: Invoking plzip, Next: Program design, Prev: Introduction, Up: Top
+File: plzip.info, Node: Invoking plzip, Next: Program design, Prev: Output, Up: Top
-2 Invoking plzip
+3 Invoking plzip
****************
The format for running plzip is:
@@ -135,7 +179,7 @@ The format for running plzip is:
other FILES and is read just once, the first time it appears in the
command line.
- Plzip supports the following options:
+ plzip supports the following options:
'-h'
'--help'
@@ -154,12 +198,12 @@ command line.
'-B BYTES'
'--data-size=BYTES'
- Set the size of the input data blocks, in bytes. The input file
- will be divided in chunks of this size before compression is
- performed. Valid values range from 8 KiB to 1 GiB. Default value
- is two times the dictionary size, except for option '-0' where it
- defaults to 1 MiB. Plzip will reduce the dictionary size if it is
- larger than the chosen data size.
+ When compressing, set the size of the input data blocks in bytes.
+ The input file will be divided in chunks of this size before
+ compression is performed. Valid values range from 8 KiB to 1 GiB.
+ Default value is two times the dictionary size, except for option
+ '-0' where it defaults to 1 MiB. Plzip will reduce the dictionary
+ size if it is larger than the chosen data size.
'-c'
'--stdout'
@@ -170,10 +214,10 @@ command line.
'-d'
'--decompress'
- Decompress the specified file(s). If a file does not exist or
- can't be opened, plzip continues decompressing the rest of the
- files. If a file fails to decompress, plzip exits immediately
- without decompressing the rest of the files.
+ Decompress the specified files. If a file does not exist or can't
+ be opened, plzip continues decompressing the rest of the files. If
+ a file fails to decompress, or is a terminal, plzip exits
+ immediately without decompressing the rest of the files.
'-f'
'--force'
@@ -181,8 +225,8 @@ command line.
'-F'
'--recompress'
- Force re-compression of files whose name already has the '.lz' or
- '.tlz' suffix.
+ When compressing, force re-compression of files whose name already
+ has the '.lz' or '.tlz' suffix.
'-k'
'--keep'
@@ -192,7 +236,7 @@ command line.
'-l'
'--list'
Print the uncompressed size, compressed size and percentage saved
- of the specified file(s). Trailing data are ignored. The values
+ of the specified files. Trailing data are ignored. The values
produced are correct even for multimember files. If more than one
file is given, a final line containing the cumulative sizes is
printed. With '-v', the dictionary size, the number of members in
@@ -206,18 +250,21 @@ command line.
'-m BYTES'
'--match-length=BYTES'
- Set the match length limit in bytes. After a match this long is
- found, the search is finished. Valid values range from 5 to 273.
- Larger values usually give better compression ratios but longer
- compression times.
+ When compressing, set the match length limit in bytes. After a
+ match this long is found, the search is finished. Valid values
+ range from 5 to 273. Larger values usually give better compression
+ ratios but longer compression times.
'-n N'
'--threads=N'
- Set the number of worker threads. Valid values range from 1 to "as
- many as your system can support". If this option is not used,
- plzip tries to detect the number of processors in the system and
- use it as default value. 'plzip --help' shows the system's default
- value.
+ Set the number of worker threads, overriding the system's default.
+ Valid values range from 1 to "as many as your system can support".
+ If this option is not used, plzip tries to detect the number of
+ processors in the system and use it as default value. When
+ compressing on a 32 bit system, plzip tries to limit the memory
+ use to under 2.22 GiB (4 worker threads at level -9) by reducing
+ the number of threads below the system's default. 'plzip --help'
+ shows the system's default value.
Note that the number of usable threads is limited to
ceil( file_size / data_size ) during compression (*note Minimum
@@ -228,8 +275,9 @@ command line.
'--output=FILE'
When reading from standard input and '--stdout' has not been
specified, use 'FILE' as the virtual name of the uncompressed
- file. This produces a file named 'FILE' when decompressing, and a
- file named 'FILE.lz' when compressing.
+ file. This produces a file named 'FILE' when decompressing, or a
+ file named 'FILE.lz' when compressing. A second '.lz' extension is
+ not added if 'FILE' already ends in '.lz' or '.tlz'.
'-q'
'--quiet'
@@ -237,13 +285,13 @@ command line.
'-s BYTES'
'--dictionary-size=BYTES'
- Set the dictionary size limit in bytes. Plzip will use the smallest
- possible dictionary size for each file without exceeding this
- limit. Valid values range from 4 KiB to 512 MiB. Values 12 to 29
- are interpreted as powers of two, meaning 2^12 to 2^29 bytes. Note
- that dictionary sizes are quantized. If the specified size does
- not match one of the valid sizes, it will be rounded upwards by
- adding up to (BYTES / 8) to it.
+ When compressing, set the dictionary size limit in bytes. Plzip
+ will use the smallest possible dictionary size for each file
+ without exceeding this limit. Valid values range from 4 KiB to
+ 512 MiB. Values 12 to 29 are interpreted as powers of two, meaning
+ 2^12 to 2^29 bytes. Note that dictionary sizes are quantized. If
+ the specified size does not match one of the valid sizes, it will
+ be rounded upwards by adding up to (BYTES / 8) to it.
For maximum compression you should use a dictionary size limit as
large as possible, but keep in mind that the decompression memory
@@ -252,10 +300,10 @@ command line.
'-t'
'--test'
- Check integrity of the specified file(s), but don't decompress
- them. This really performs a trial decompression and throws away
- the result. Use it together with '-v' to see information about
- the file(s). If a file does not exist, can't be opened, or is a
+ Check integrity of the specified files, but don't decompress them.
+ This really performs a trial decompression and throws away the
+ result. Use it together with '-v' to see information about the
+ files. If a file does not exist, can't be opened, or is a
terminal, plzip continues checking the rest of the files. If a
file fails the test, plzip may be unable to check the rest of the
files.
@@ -263,17 +311,19 @@ command line.
'-v'
'--verbose'
Verbose mode.
- When compressing, show the compression ratio for each file
- processed. A second '-v' shows the progress of compression.
+ When compressing, show the compression ratio and size for each file
+ processed.
When decompressing or testing, further -v's (up to 4) increase the
verbosity level, showing status, compression ratio, dictionary
size, decompressed size, and compressed size.
+ Two or more '-v' options show the progress of (de)compression,
+ except for single-member files.
'-0 .. -9'
Set the compression parameters (dictionary size and match length
limit) as shown in the table below. The default compression level
is '-6'. Note that '-9' can be much slower than '-0'. These
- options have no effect when decompressing.
+ options have no effect when decompressing, testing or listing.
The bidimensional parameter space of LZMA can't be mapped to a
linear scale optimal for all files. If your files are large, very
@@ -296,6 +346,13 @@ command line.
'--best'
Aliases for GNU gzip compatibility.
+'--loose-trailing'
+ When decompressing, testing or listing, allow trailing data whose
+ first bytes are so similar to the magic bytes of a lzip header
+ that they can be confused with a corrupt header. Use this option
+ if a file triggers a "corrupt header" error and the cause is not
+ indeed a corrupt header.
+
Numbers given as arguments to options may be followed by a multiplier
and an optional 'B' for "byte".
@@ -321,7 +378,7 @@ caused plzip to panic.

File: plzip.info, Node: Program design, Next: File format, Prev: Invoking plzip, Up: Top
-3 Program design
+4 Program design
****************
When compressing, plzip divides the input file into chunks and
@@ -344,6 +401,17 @@ them to the workers. The workers (de)compress the blocks received from
the splitter. The muxer collects processed packets from the workers, and
writes them to the output file.
+ ,------------,
+ ,-->| worker 0 |--,
+ | `------------' |
+,-------, ,----------, | ,------------, | ,-------, ,--------,
+| input |-->| splitter |-+-->| worker 1 |--+-->| muxer |-->| output |
+| file | `----------' | `------------' | `-------' | file |
+`-------' | ... | `--------'
+ | ,------------, |
+ `-->| worker N-1 |--'
+ `------------'
+
When decompressing from a regular file, the splitter is removed and
the workers read directly from the input file. If the output file is
also a regular file, the muxer is also removed and the workers write
@@ -355,7 +423,7 @@ I/O speed.

File: plzip.info, Node: File format, Next: Memory requirements, Prev: Program design, Up: Top
-4 File format
+5 File format
*************
Perfection is reached, not when there is no longer anything to add, but
@@ -426,17 +494,11 @@ additional information before, between, or after them.

File: plzip.info, Node: Memory requirements, Next: Minimum file sizes, Prev: File format, Up: Top
-5 Memory required to compress and decompress
+6 Memory required to compress and decompress
********************************************
-The amount of memory required *per thread* is approximately the
-following:
-
- * For compression at level -0; 1.5 MiB plus 3 times the data size
- (*note --data-size::). Default is 4.5 MiB.
-
- * For compression at other levels; 11 times the dictionary size plus
- 3 times the data size. Default is 136 MiB.
+The amount of memory required *per thread* for decompression or testing
+is approximately the following:
* For decompression of a regular (seekable) file to another regular
file, or for testing of a regular file; the dictionary size.
@@ -450,10 +512,35 @@ following:
* For decompression of a non-seekable file or of standard input; the
dictionary size plus up to 35 MiB.
+The amount of memory required *per thread* for compression is
+approximately the following:
+
+ * For compression at level -0; 1.5 MiB plus 3.375 times the data size
+ (*note --data-size::). Default is 4.875 MiB.
+
+ * For compression at other levels; 11 times the dictionary size plus
+ 3.375 times the data size. Default is 142 MiB.
+
+The following table shows the memory required *per thread* for
+compression at a given level, using the default data size for each
+level:
+
+Level Memory required
+-0 4.875 MiB
+-1 17.75 MiB
+-2 26.625 MiB
+-3 35.5 MiB
+-4 53.25 MiB
+-5 71 MiB
+-6 142 MiB
+-7 284 MiB
+-8 426 MiB
+-9 568 MiB
+

File: plzip.info, Node: Minimum file sizes, Next: Trailing data, Prev: Memory requirements, Up: Top
-6 Minimum file sizes required for full compression speed
+7 Minimum file sizes required for full compression speed
********************************************************
When compressing, plzip divides the input file into chunks and
@@ -466,7 +553,8 @@ must be at least as large as the number of worker threads times the
chunk size (*note --data-size::). Else some processors will not get any
data to compress, and compression will be proportionally slower. The
maximum speed increase achievable on a given file is limited by the
-ratio (file_size / data_size).
+ratio (file_size / data_size). For example, a tarball the size of gcc or
+linux will scale up to 8 processors at level -9.
The following table shows the minimum uncompressed file size needed
for full use of N processors at a given compression level, using the
@@ -489,7 +577,7 @@ Level

File: plzip.info, Node: Trailing data, Next: Examples, Prev: Minimum file sizes, Up: Top
-7 Extra data appended to the file
+8 Extra data appended to the file
*********************************
Sometimes extra data are found appended to a lzip file after the last
@@ -501,10 +589,11 @@ member. Such trailing data may be:
* Useful data added by the user; a cryptographically secure hash, a
description of file contents, etc. It is safe to append any amount
- of text to a lzip file as long as the text does not begin with the
- string "LZIP", and does not contain any zero bytes (null
- characters). Nonzero bytes and zero bytes can't be safely mixed in
- trailing data.
+ of text to a lzip file as long as none of the first four bytes of
+ the text match the corresponding byte in the string "LZIP", and
+ the text does not contain any zero bytes (null characters).
+ Nonzero bytes and zero bytes can't be safely mixed in trailing
+ data.
* Garbage added by some not totally successful copy operation.
@@ -512,12 +601,17 @@ member. Such trailing data may be:
and hash value (for a chosen hash) coincide with those of another
file.
- * In very rare cases, trailing data could be the corrupt header of
- another member. In multimember or concatenated files the
- probability of corruption happening in the magic bytes is 5 times
- smaller than the probability of getting a false positive caused by
- the corruption of the integrity information itself. Therefore it
- can be considered to be below the noise level.
+ * In rare cases, trailing data could be the corrupt header of another
+ member. In multimember or concatenated files the probability of
+ corruption happening in the magic bytes is 5 times smaller than the
+ probability of getting a false positive caused by the corruption
+ of the integrity information itself. Therefore it can be
+ considered to be below the noise level. Additionally, the test
+ used by plzip to discriminate trailing data from a corrupt header
+ has a Hamming distance (HD) of 3, and the 3 bit flips must happen
+ in different magic bytes for the test to fail. In any case, the
+ option '--trailing-error' guarantees that any corrupt header will
+ be detected.
Trailing data are in no way part of the lzip file format, but tools
reading lzip files are expected to behave as correctly and usefully as
@@ -531,7 +625,7 @@ cases where a file containing trailing data must be rejected, the option

File: plzip.info, Node: Examples, Next: Problems, Prev: Trailing data, Up: Top
-8 A small tutorial with examples
+9 A small tutorial with examples
********************************
WARNING! Even if plzip is bug-free, other causes may result in a corrupt
@@ -595,8 +689,8 @@ to decompressed byte 15000 (5000 bytes are produced).

File: plzip.info, Node: Problems, Next: Concept index, Prev: Examples, Up: Top
-9 Reporting bugs
-****************
+10 Reporting bugs
+*****************
There are probably bugs in plzip. There are certainly errors and
omissions in this manual. If you report them, they will get fixed. If
@@ -625,6 +719,7 @@ Concept index
* memory requirements: Memory requirements. (line 6)
* minimum file sizes: Minimum file sizes. (line 6)
* options: Invoking plzip. (line 6)
+* output: Output. (line 6)
* program design: Program design. (line 6)
* trailing data: Trailing data. (line 6)
* usage: Invoking plzip. (line 6)
@@ -634,19 +729,20 @@ Concept index

Tag Table:
Node: Top221
-Node: Introduction1103
-Node: Invoking plzip5274
-Ref: --trailing-error5843
-Ref: --data-size6086
-Node: Program design12796
-Node: File format14383
-Node: Memory requirements16815
-Node: Minimum file sizes17815
-Node: Trailing data19741
-Node: Examples21648
-Ref: concat-example22813
-Node: Problems23388
-Node: Concept index23914
+Node: Introduction1158
+Node: Output5134
+Node: Invoking plzip6614
+Ref: --trailing-error7177
+Ref: --data-size7420
+Node: Program design14938
+Node: File format17090
+Node: Memory requirements19522
+Node: Minimum file sizes20985
+Node: Trailing data23002
+Node: Examples25285
+Ref: concat-example26450
+Node: Problems27025
+Node: Concept index27553

End Tag Table
diff --git a/doc/plzip.texi b/doc/plzip.texi
index 5f32f6e..44cff75 100644
--- a/doc/plzip.texi
+++ b/doc/plzip.texi
@@ -6,8 +6,8 @@
@finalout
@c %**end of header
-@set UPDATED 12 April 2017
-@set VERSION 1.6
+@set UPDATED 7 February 2018
+@set VERSION 1.7
@dircategory Data Compression
@direntry
@@ -36,6 +36,7 @@ This manual is for Plzip (version @value{VERSION}, @value{UPDATED}).
@menu
* Introduction:: Purpose and features of plzip
+* Output:: Meaning of plzip's output
* Invoking plzip:: Command line interface
* Program design:: Internal structure of plzip
* File format:: Detailed format of the compressed file
@@ -48,7 +49,7 @@ This manual is for Plzip (version @value{VERSION}, @value{UPDATED}).
@end menu
@sp 1
-Copyright @copyright{} 2009-2017 Antonio Diaz Diaz.
+Copyright @copyright{} 2009-2018 Antonio Diaz Diaz.
This manual is free documentation: you have unlimited permission
to copy, distribute and modify it.
@@ -81,7 +82,7 @@ availability:
The lzip format provides very safe integrity checking and some data
recovery means. The
@uref{http://www.nongnu.org/lzip/manual/lziprecover_manual.html#Data-safety,,lziprecover}
-program can repair bit-flip errors (one of the most common forms of data
+program can repair bit flip errors (one of the most common forms of data
corruption) in lzip files, and provides data recovery capabilities,
including error-checked merging of damaged copies of a file.
@ifnothtml
@@ -143,9 +144,54 @@ incomprehensible and therefore pointless.
Plzip will correctly decompress a file which is the concatenation of two
or more compressed files. The result is the concatenation of the
-corresponding uncompressed files. Integrity testing of concatenated
+corresponding decompressed files. Integrity testing of concatenated
compressed files is also supported.
+
+@node Output
+@chapter Meaning of plzip's output
+@cindex output
+
+The output of plzip looks like this:
+
+@example
+plzip -v foo
+ foo: 6.676:1, 14.98% ratio, 85.02% saved, 450560 in, 67493 out.
+
+plzip -tvv foo.lz
+ foo.lz: 6.676:1, 14.98% ratio, 85.02% saved. ok
+@end example
+
+The meaning of each field is as follows:
+
+@table @code
+@item N:1
+The compression ratio @w{(uncompressed_size / compressed_size)}, shown
+as N to 1.
+
+@item ratio
+The inverse compression ratio @w{(compressed_size / uncompressed_size)},
+shown as a percentage. A decimal ratio is easily obtained by moving the
+decimal point two places to the left; @w{14.98% = 0.1498}.
+
+@item saved
+The space saved by compression @w{(1 - ratio)}, shown as a percentage.
+
+@item in
+The size of the uncompressed data. When decompressing or testing, it is
+shown as @code{decompressed}. Note that plzip always prints the
+uncompressed size before the compressed size when compressing,
+decompressing, testing or listing.
+
+@item out
+The size of the compressed data. When decompressing or testing, it is
+shown as @code{compressed}.
+
+@end table
+
+When decompressing or testing at verbosity level 4 (-vvvv), the
+dictionary size used to compress the file is also shown.
+
LANGUAGE NOTE: Uncompressed = not compressed = plain data; it may never
have been compressed. Decompressed is used to refer to data which have
undergone the process of decompression.
@@ -169,7 +215,7 @@ plzip [@var{options}] [@var{files}]
mixed with other @var{files} and is read just once, the first time it
appears in the command line.
-Plzip supports the following options:
+plzip supports the following options:
@table @code
@item -h
@@ -190,12 +236,12 @@ garbage that can be safely ignored. @xref{concat-example}.
@anchor{--data-size}
@item -B @var{bytes}
@itemx --data-size=@var{bytes}
-Set the size of the input data blocks, in bytes. The input file will be
-divided in chunks of this size before compression is performed. Valid
-values range from 8 KiB to 1 GiB. Default value is two times the
-dictionary size, except for option @samp{-0} where it defaults to 1 MiB.
-Plzip will reduce the dictionary size if it is larger than the chosen
-data size.
+When compressing, set the size of the input data blocks in bytes. The
+input file will be divided in chunks of this size before compression is
+performed. Valid values range from @w{8 KiB} to @w{1 GiB}. Default value
+is two times the dictionary size, except for option @samp{-0} where it
+defaults to @w{1 MiB}. Plzip will reduce the dictionary size if it is
+larger than the chosen data size.
@item -c
@itemx --stdout
@@ -206,10 +252,10 @@ device.
@item -d
@itemx --decompress
-Decompress the specified file(s). If a file does not exist or can't be
+Decompress the specified files. If a file does not exist or can't be
opened, plzip continues decompressing the rest of the files. If a file
-fails to decompress, plzip exits immediately without decompressing the
-rest of the files.
+fails to decompress, or is a terminal, plzip exits immediately without
+decompressing the rest of the files.
@item -f
@itemx --force
@@ -217,8 +263,8 @@ Force overwrite of output files.
@item -F
@itemx --recompress
-Force re-compression of files whose name already has the @samp{.lz} or
-@samp{.tlz} suffix.
+When compressing, force re-compression of files whose name already has
+the @samp{.lz} or @samp{.tlz} suffix.
@item -k
@itemx --keep
@@ -227,7 +273,7 @@ Keep (don't delete) input files during compression or decompression.
@item -l
@itemx --list
Print the uncompressed size, compressed size and percentage saved of the
-specified file(s). Trailing data are ignored. The values produced are
+specified files. Trailing data are ignored. The values produced are
correct even for multimember files. If more than one file is given, a
final line containing the cumulative sizes is printed. With @samp{-v},
the dictionary size, the number of members in the file, and the amount
@@ -240,16 +286,21 @@ verifies that none of the specified files contain trailing data.
@item -m @var{bytes}
@itemx --match-length=@var{bytes}
-Set the match length limit in bytes. After a match this long is found,
-the search is finished. Valid values range from 5 to 273. Larger values
-usually give better compression ratios but longer compression times.
+When compressing, set the match length limit in bytes. After a match
+this long is found, the search is finished. Valid values range from 5 to
+273. Larger values usually give better compression ratios but longer
+compression times.
@item -n @var{n}
@itemx --threads=@var{n}
-Set the number of worker threads. Valid values range from 1 to "as many
-as your system can support". If this option is not used, plzip tries to
-detect the number of processors in the system and use it as default
-value. @w{@samp{plzip --help}} shows the system's default value.
+Set the number of worker threads, overriding the system's default. Valid
+values range from 1 to "as many as your system can support". If this
+option is not used, plzip tries to detect the number of processors in
+the system and use it as default value. When compressing on a @w{32 bit}
+system, plzip tries to limit the memory use to under @w{2.22 GiB} (4
+worker threads at level -9) by reducing the number of threads below the
+system's default. @w{@samp{plzip --help}} shows the system's default
+value.
Note that the number of usable threads is limited to @w{ceil( file_size
/ data_size )} during compression (@pxref{Minimum file sizes}), and to
@@ -260,7 +311,9 @@ the number of members in the input during decompression.
When reading from standard input and @samp{--stdout} has not been
specified, use @samp{@var{file}} as the virtual name of the uncompressed
file. This produces a file named @samp{@var{file}} when decompressing,
-and a file named @samp{@var{file}.lz} when compressing.
+or a file named @samp{@var{file}.lz} when compressing. A second
+@samp{.lz} extension is not added if @samp{@var{file}} already ends in
+@samp{.lz} or @samp{.tlz}.
@item -q
@itemx --quiet
@@ -268,12 +321,12 @@ Quiet operation. Suppress all messages.
@item -s @var{bytes}
@itemx --dictionary-size=@var{bytes}
-Set the dictionary size limit in bytes. Plzip will use the smallest
-possible dictionary size for each file without exceeding this limit.
-Valid values range from 4 KiB to 512 MiB. Values 12 to 29 are
-interpreted as powers of two, meaning 2^12 to 2^29 bytes. Note that
-dictionary sizes are quantized. If the specified size does not match one
-of the valid sizes, it will be rounded upwards by adding up to
+When compressing, set the dictionary size limit in bytes. Plzip will use
+the smallest possible dictionary size for each file without exceeding
+this limit. Valid values range from @w{4 KiB} to @w{512 MiB}. Values 12
+to 29 are interpreted as powers of two, meaning 2^12 to 2^29 bytes. Note
+that dictionary sizes are quantized. If the specified size does not
+match one of the valid sizes, it will be rounded upwards by adding up to
@w{(@var{bytes} / 8)} to it.
For maximum compression you should use a dictionary size limit as large
@@ -282,27 +335,29 @@ is affected at compression time by the choice of dictionary size limit.
@item -t
@itemx --test
-Check integrity of the specified file(s), but don't decompress them.
-This really performs a trial decompression and throws away the result.
-Use it together with @samp{-v} to see information about the file(s). If
-a file does not exist, can't be opened, or is a terminal, plzip
-continues checking the rest of the files. If a file fails the test,
-plzip may be unable to check the rest of the files.
+Check integrity of the specified files, but don't decompress them. This
+really performs a trial decompression and throws away the result. Use it
+together with @samp{-v} to see information about the files. If a file
+does not exist, can't be opened, or is a terminal, plzip continues
+checking the rest of the files. If a file fails the test, plzip may be
+unable to check the rest of the files.
@item -v
@itemx --verbose
Verbose mode.@*
-When compressing, show the compression ratio for each file processed. A
-second @samp{-v} shows the progress of compression.@*
+When compressing, show the compression ratio and size for each file
+processed.@*
When decompressing or testing, further -v's (up to 4) increase the
verbosity level, showing status, compression ratio, dictionary size,
-decompressed size, and compressed size.
+decompressed size, and compressed size.@*
+Two or more @samp{-v} options show the progress of (de)compression,
+except for single-member files.
@item -0 .. -9
Set the compression parameters (dictionary size and match length limit)
as shown in the table below. The default compression level is @samp{-6}.
Note that @samp{-9} can be much slower than @samp{-0}. These options
-have no effect when decompressing.
+have no effect when decompressing, testing or listing.
The bidimensional parameter space of LZMA can't be mapped to a linear
scale optimal for all files. If your files are large, very repetitive,
@@ -327,6 +382,12 @@ etc, you may need to use the @samp{--dictionary-size} and
@itemx --best
Aliases for GNU gzip compatibility.
+@item --loose-trailing
+When decompressing, testing or listing, allow trailing data whose first
+bytes are so similar to the magic bytes of a lzip header that they can
+be confused with a corrupt header. Use this option if a file triggers a
+"corrupt header" error and the cause is not indeed a corrupt header.
+
@end table
Numbers given as arguments to options may be followed by a multiplier
@@ -363,8 +424,8 @@ creating a multimember compressed file.
When decompressing, plzip decompresses as many members simultaneously as
worker threads are chosen. Files that were compressed with lzip will not
-be decompressed faster than using lzip (unless the @samp{-b} option was
-used) because lzip usually produces single-member files, which can't be
+be decompressed faster than using lzip (unless the @samp{-b} option was used)
+because lzip usually produces single-member files, which can't be
decompressed in parallel.
For each input file, a splitter thread and several worker threads are
@@ -377,6 +438,19 @@ to the workers. The workers (de)compress the blocks received from the
splitter. The muxer collects processed packets from the workers, and
writes them to the output file.
+@verbatim
+ ,------------,
+ ,-->| worker 0 |--,
+ | `------------' |
+,-------, ,----------, | ,------------, | ,-------, ,--------,
+| input |-->| splitter |-+-->| worker 1 |--+-->| muxer |-->| output |
+| file | `----------' | `------------' | `-------' | file |
+`-------' | ... | `--------'
+ | ,------------, |
+ `-->| worker N-1 |--'
+ `------------'
+@end verbatim
+
When decompressing from a regular file, the splitter is removed and the
workers read directly from the input file. If the output file is also a
regular file, the muxer is also removed and the workers write directly
@@ -472,35 +546,60 @@ facilitates safe recovery of undamaged members from multimember files.
@chapter Memory required to compress and decompress
@cindex memory requirements
-The amount of memory required @strong{per thread} is approximately the
-following:
+The amount of memory required @strong{per thread} for decompression or
+testing is approximately the following:
@itemize @bullet
@item
-For compression at level -0; 1.5 MiB plus 3 times the data size
-(@pxref{--data-size}). Default is 4.5 MiB.
-
-@item
-For compression at other levels; 11 times the dictionary size plus 3
-times the data size. Default is 136 MiB.
-
-@item
For decompression of a regular (seekable) file to another regular file,
or for testing of a regular file; the dictionary size.
@item
For testing of a non-seekable file or of standard input; the dictionary
-size plus up to 5 MiB.
+size plus up to @w{5 MiB}.
@item
For decompression of a regular file to a non-seekable file or to
-standard output; the dictionary size plus up to 32 MiB.
+standard output; the dictionary size plus up to @w{32 MiB}.
@item
For decompression of a non-seekable file or of standard input; the
-dictionary size plus up to 35 MiB.
+dictionary size plus up to @w{35 MiB}.
+@end itemize
+
+@noindent
+The amount of memory required @strong{per thread} for compression is
+approximately the following:
+
+@itemize @bullet
+@item
+For compression at level -0; @w{1.5 MiB} plus 3.375 times the data size
+(@pxref{--data-size}). Default is @w{4.875 MiB}.
+
+@item
+For compression at other levels; 11 times the dictionary size plus 3.375
+times the data size. Default is @w{142 MiB}.
@end itemize
+@noindent
+The following table shows the memory required @strong{per thread} for
+compression at a given level, using the default data size for each
+level:
+
+@multitable {Level} {Memory required}
+@item Level @tab Memory required
+@item -0 @tab 4.875 MiB
+@item -1 @tab 17.75 MiB
+@item -2 @tab 26.625 MiB
+@item -3 @tab 35.5 MiB
+@item -4 @tab 53.25 MiB
+@item -5 @tab 71 MiB
+@item -6 @tab 142 MiB
+@item -7 @tab 284 MiB
+@item -8 @tab 426 MiB
+@item -9 @tab 568 MiB
+@end multitable
+
@node Minimum file sizes
@chapter Minimum file sizes required for full compression speed
@@ -516,7 +615,8 @@ least as large as the number of worker threads times the chunk size
(@pxref{--data-size}). Else some processors will not get any data to
compress, and compression will be proportionally slower. The maximum
speed increase achievable on a given file is limited by the ratio
-@w{(file_size / data_size)}.
+@w{(file_size / data_size)}. For example, a tarball the size of gcc or
+linux will scale up to 8 processors at level -9.
The following table shows the minimum uncompressed file size needed for
full use of N processors at a given compression level, using the default
@@ -554,9 +654,10 @@ padding zero bytes to a lzip file.
@item
Useful data added by the user; a cryptographically secure hash, a
description of file contents, etc. It is safe to append any amount of
-text to a lzip file as long as the text does not begin with the string
-"LZIP", and does not contain any zero bytes (null characters). Nonzero
-bytes and zero bytes can't be safely mixed in trailing data.
+text to a lzip file as long as none of the first four bytes of the text
+match the corresponding byte in the string "LZIP", and the text does not
+contain any zero bytes (null characters). Nonzero bytes and zero bytes
+can't be safely mixed in trailing data.
@item
Garbage added by some not totally successful copy operation.
@@ -566,12 +667,16 @@ Malicious data added to the file in order to make its total size and
hash value (for a chosen hash) coincide with those of another file.
@item
-In very rare cases, trailing data could be the corrupt header of another
+In rare cases, trailing data could be the corrupt header of another
member. In multimember or concatenated files the probability of
corruption happening in the magic bytes is 5 times smaller than the
probability of getting a false positive caused by the corruption of the
integrity information itself. Therefore it can be considered to be below
-the noise level.
+the noise level. Additionally, the test used by plzip to discriminate
+trailing data from a corrupt header has a Hamming distance (HD) of 3,
+and the 3 bit flips must happen in different magic bytes for the test to
+fail. In any case, the option @samp{--trailing-error} guarantees that
+any corrupt header will be detected.
@end itemize
Trailing data are in no way part of the lzip file format, but tools
@@ -607,7 +712,7 @@ plzip -v file
@sp 1
@noindent
Example 2: Like example 1 but the created @samp{file.lz} has a block
-size of 1 MiB. The compression ratio is not shown.
+size of @w{1 MiB}. The compression ratio is not shown.
@example
plzip -B 1MiB file
@@ -656,7 +761,7 @@ Do this instead
@sp 1
@noindent
-Example 7: Decompress @samp{file.lz} partially until 10 KiB of
+Example 7: Decompress @samp{file.lz} partially until @w{10 KiB} of
decompressed data are produced.
@example
diff --git a/file_index.cc b/file_index.cc
index 581b516..8238054 100644
--- a/file_index.cc
+++ b/file_index.cc
@@ -1,5 +1,5 @@
/* Plzip - Parallel compressor compatible with lzip
- Copyright (C) 2009-2017 Antonio Diaz Diaz.
+ Copyright (C) 2009-2018 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -60,7 +60,8 @@ void File_index::set_num_error( const char * const msg, unsigned long long num )
// If successful, push last member and set pos to member header.
-bool File_index::skip_trailing_data( const int fd, long long & pos )
+bool File_index::skip_trailing_data( const int fd, long long & pos,
+ const bool ignore_trailing, const bool loose_trailing )
{
enum { block_size = 16384,
buffer_size = block_size + File_trailer::size - 1 + File_header::size };
@@ -95,10 +96,13 @@ bool File_index::skip_trailing_data( const int fd, long long & pos )
if( !header.verify_magic() || !header.verify_version() ||
!isvalid_ds( dictionary_size ) ) continue;
if( (*(File_header *)( buffer + i )).verify_prefix( bsize - i ) )
- {
- error_ = "Last member in input file is truncated or corrupt.";
- retval_ = 2; return false;
- }
+ { error_ = "Last member in input file is truncated or corrupt.";
+ retval_ = 2; return false; }
+ if( !loose_trailing && bsize - i >= File_header::size &&
+ (*(File_header *)( buffer + i )).verify_corrupt() )
+ { error_ = corrupt_mm_msg; retval_ = 2; return false; }
+ if( !ignore_trailing )
+ { error_ = trailing_msg; retval_ = 2; return false; }
pos = ipos + i - member_size;
member_vector.push_back( Member( 0, trailer.data_size(), pos,
member_size, dictionary_size ) );
@@ -116,7 +120,8 @@ bool File_index::skip_trailing_data( const int fd, long long & pos )
}
-File_index::File_index( const int infd, const bool ignore_trailing )
+File_index::File_index( const int infd, const bool ignore_trailing,
+ const bool loose_trailing )
: isize( lseek( infd, 0, SEEK_END ) ), retval_( 0 )
{
if( isize < 0 )
@@ -147,11 +152,10 @@ File_index::File_index( const int infd, const bool ignore_trailing )
const unsigned long long member_size = trailer.member_size();
if( member_size < min_member_size || member_size > (unsigned long long)pos )
{
- if( !member_vector.empty() )
- set_num_error( "Member size in trailer is corrupt at pos ", pos - 8 );
- else if( skip_trailing_data( infd, pos ) )
- { if( ignore_trailing ) continue;
- error_ = trailing_msg; retval_ = 2; return; }
+ if( member_vector.empty() )
+ { if( skip_trailing_data( infd, pos, ignore_trailing, loose_trailing ) )
+ continue; else return; }
+ set_num_error( "Member size in trailer is corrupt at pos ", pos - 8 );
break;
}
if( seek_read( infd, header.data, File_header::size,
@@ -161,11 +165,10 @@ File_index::File_index( const int infd, const bool ignore_trailing )
if( !header.verify_magic() || !header.verify_version() ||
!isvalid_ds( dictionary_size ) )
{
- if( !member_vector.empty() )
- set_num_error( "Bad header at pos ", pos - member_size );
- else if( skip_trailing_data( infd, pos ) )
- { if( ignore_trailing ) continue;
- error_ = trailing_msg; retval_ = 2; return; }
+ if( member_vector.empty() )
+ { if( skip_trailing_data( infd, pos, ignore_trailing, loose_trailing ) )
+ continue; else return; }
+ set_num_error( "Bad header at pos ", pos - member_size );
break;
}
pos -= member_size;
diff --git a/file_index.h b/file_index.h
index 5b9813e..7962b99 100644
--- a/file_index.h
+++ b/file_index.h
@@ -1,5 +1,5 @@
/* Plzip - Parallel compressor compatible with lzip
- Copyright (C) 2009-2017 Antonio Diaz Diaz.
+ Copyright (C) 2009-2018 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -55,10 +55,12 @@ class File_index
void set_errno_error( const char * const msg );
void set_num_error( const char * const msg, unsigned long long num );
- bool skip_trailing_data( const int fd, long long & pos );
+ bool skip_trailing_data( const int fd, long long & pos,
+ const bool ignore_trailing, const bool loose_trailing );
public:
- File_index( const int infd, const bool ignore_trailing );
+ File_index( const int infd, const bool ignore_trailing,
+ const bool loose_trailing );
long members() const { return member_vector.size(); }
const std::string & error() const { return error_; }
diff --git a/list.cc b/list.cc
index f4169f8..eeef1c3 100644
--- a/list.cc
+++ b/list.cc
@@ -1,5 +1,5 @@
/* Plzip - Parallel compressor compatible with lzip
- Copyright (C) 2009-2017 Antonio Diaz Diaz.
+ Copyright (C) 2009-2018 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -21,6 +21,7 @@
#include <cstring>
#include <string>
#include <vector>
+#include <pthread.h>
#include <stdint.h>
#include <unistd.h>
#include <sys/stat.h>
@@ -37,7 +38,7 @@ void list_line( const unsigned long long uncomp_size,
{
if( uncomp_size > 0 )
std::printf( "%15llu %15llu %6.2f%% %s\n", uncomp_size, comp_size,
- 100.0 * ( 1.0 - ( (double)comp_size / uncomp_size ) ),
+ 100.0 - ( ( 100.0 * comp_size ) / uncomp_size ),
input_filename );
else
std::printf( "%15llu %15llu -INF%% %s\n", uncomp_size, comp_size,
@@ -48,7 +49,7 @@ void list_line( const unsigned long long uncomp_size,
int list_files( const std::vector< std::string > & filenames,
- const bool ignore_trailing )
+ const bool ignore_trailing, const bool loose_trailing )
{
unsigned long long total_comp = 0, total_uncomp = 0;
int files = 0, retval = 0;
@@ -61,11 +62,11 @@ int list_files( const std::vector< std::string > & filenames,
const char * const input_filename =
from_stdin ? "(stdin)" : filenames[i].c_str();
struct stat in_stats; // not used
- const int infd = from_stdin ? STDIN_FILENO :
+ const int infd = from_stdin ? STDIN_FILENO :
open_instream( input_filename, &in_stats, true, true );
if( infd < 0 ) { if( retval < 1 ) retval = 1; continue; }
- const File_index file_index( infd, ignore_trailing );
+ const File_index file_index( infd, ignore_trailing, loose_trailing );
close( infd );
if( file_index.retval() != 0 )
{
diff --git a/lzip.h b/lzip.h
index ee09d33..3587a8f 100644
--- a/lzip.h
+++ b/lzip.h
@@ -1,5 +1,5 @@
/* Plzip - Parallel compressor compatible with lzip
- Copyright (C) 2009-2017 Antonio Diaz Diaz.
+ Copyright (C) 2009-2018 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -27,16 +27,19 @@ enum {
min_member_size = 36 };
-class Pretty_print
+// defined in main.cc
+extern int verbosity;
+
+class Pretty_print // requires global var 'int verbosity'
{
std::string name_;
+ std::string padded_name;
const char * const stdin_name;
unsigned longest_name;
mutable bool first_post;
public:
- Pretty_print( const std::vector< std::string > & filenames,
- const int verbosity )
+ Pretty_print( const std::vector< std::string > & filenames )
: stdin_name( "(stdin)" ), longest_name( 0 ), first_post( false )
{
if( verbosity <= 0 ) return;
@@ -54,6 +57,9 @@ public:
{
if( filename.size() && filename != "-" ) name_ = filename;
else name_ = stdin_name;
+ padded_name = " "; padded_name += name_; padded_name += ": ";
+ if( name_.size() < longest_name )
+ padded_name.append( longest_name - name_.size(), ' ' );
first_post = true;
}
@@ -88,11 +94,19 @@ struct File_header
void set_magic() { std::memcpy( data, magic_string, 4 ); data[4] = 1; }
bool verify_magic() const
{ return ( std::memcmp( data, magic_string, 4 ) == 0 ); }
- bool verify_prefix( const int size ) const // detect truncated header
+
+ bool verify_prefix( const int sz ) const // detect (truncated) header
{
- for( int i = 0; i < size && i < 4; ++i )
+ for( int i = 0; i < sz && i < 4; ++i )
if( data[i] != magic_string[i] ) return false;
- return ( size > 0 );
+ return ( sz > 0 );
+ }
+ bool verify_corrupt() const // detect corrupt header
+ {
+ int matches = 0;
+ for( int i = 0; i < 4; ++i )
+ if( data[i] == magic_string[i] ) ++matches;
+ return ( matches > 1 && matches < 4 );
}
uint8_t version() const { return data[4]; }
@@ -165,6 +179,7 @@ struct File_trailer
const char * const bad_magic_msg = "Bad magic number (file not in lzip format).";
const char * const bad_dict_msg = "Invalid dictionary size in member header.";
+const char * const corrupt_mm_msg = "Corrupt header in multimember file.";
const char * const trailing_msg = "Trailing data not allowed.";
// defined in compress.cc
@@ -179,7 +194,8 @@ void xunlock( pthread_mutex_t * const mutex );
void xwait( pthread_cond_t * const cond, pthread_mutex_t * const mutex );
void xsignal( pthread_cond_t * const cond );
void xbroadcast( pthread_cond_t * const cond );
-int compress( const int data_size, const int dictionary_size,
+int compress( const unsigned long long cfile_size,
+ const int data_size, const int dictionary_size,
const int match_len_limit, const int num_workers,
const int infd, const int outfd,
const Pretty_print & pp, const int debug_level );
@@ -193,25 +209,26 @@ int dec_stdout( const int num_workers, const int infd, const int outfd,
const File_index & file_index );
// defined in dec_stream.cc
-int dec_stream( const int num_workers, const int infd, const int outfd,
+int dec_stream( const unsigned long long cfile_size,
+ const int num_workers, const int infd, const int outfd,
const Pretty_print & pp, const int debug_level,
- const bool ignore_trailing );
+ const bool ignore_trailing, const bool loose_trailing );
// defined in decompress.cc
int preadblock( const int fd, uint8_t * const buf, const int size,
const long long pos );
int decompress_read_error( struct LZ_Decoder * const decoder,
const Pretty_print & pp, const int worker_id );
-int decompress( int num_workers, const int infd, const int outfd,
- const Pretty_print & pp, const int debug_level,
- const bool ignore_trailing, const bool infd_isreg );
+int decompress( const unsigned long long cfile_size, int num_workers,
+ const int infd, const int outfd, const Pretty_print & pp,
+ const int debug_level, const bool ignore_trailing,
+ const bool loose_trailing, const bool infd_isreg );
// defined in list.cc
int list_files( const std::vector< std::string > & filenames,
- const bool ignore_trailing );
+ const bool ignore_trailing, const bool loose_trailing );
// defined in main.cc
-extern int verbosity;
struct stat;
const char * bad_version( const unsigned version );
const char * format_ds( const unsigned dictionary_size );
@@ -224,9 +241,9 @@ void show_error( const char * const msg, const int errcode = 0,
void show_file_error( const char * const filename, const char * const msg,
const int errcode = 0 );
void internal_error( const char * const msg );
-void show_progress( const int packet_size,
- const Pretty_print * const p = 0,
- const unsigned long long cfile_size = 0 );
+void show_progress( const unsigned long long packet_size,
+ const unsigned long long cfile_size = 0,
+ const Pretty_print * const p = 0 );
class Slot_tally
@@ -262,12 +279,4 @@ public:
if( ++num_free == 1 ) xsignal( &slot_av ); // num_free was 0
xunlock( &mutex );
}
-
- void leave_slots( const int slots ) // return slots to the tally
- {
- xlock( &mutex );
- num_free += slots;
- if( num_free == slots ) xsignal( &slot_av ); // num_free was 0
- xunlock( &mutex );
- }
};
diff --git a/main.cc b/main.cc
index 5e75690..d1f76bc 100644
--- a/main.cc
+++ b/main.cc
@@ -1,6 +1,6 @@
/* Plzip - Parallel compressor compatible with lzip
Copyright (C) 2009 Laszlo Ersek.
- Copyright (C) 2009-2017 Antonio Diaz Diaz.
+ Copyright (C) 2009-2018 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -73,7 +73,7 @@ namespace {
const char * const Program_name = "Plzip";
const char * const program_name = "plzip";
-const char * const program_year = "2017";
+const char * const program_year = "2018";
const char * invocation_name = 0;
const struct { const char * from; const char * to; } known_extensions[] = {
@@ -118,7 +118,9 @@ void show_help( const long num_online )
" -v, --verbose be verbose (a 2nd -v gives more)\n"
" -0 .. -9 set compression level [default 6]\n"
" --fast alias for -0\n"
- " --best alias for -9\n", num_online );
+ " --best alias for -9\n"
+ " --loose-trailing allow trailing data seeming corrupt header\n"
+ , num_online );
if( verbosity >= 1 )
{
std::printf( " --debug=<level> (0-1) print debug statistics to stderr\n" );
@@ -145,8 +147,8 @@ void show_help( const long num_online )
void show_version()
{
std::printf( "%s %s\n", program_name, PROGVERSION );
- std::printf( "Copyright (C) 2009 Laszlo Ersek.\n"
- "Copyright (C) %s Antonio Diaz Diaz.\n", program_year );
+ std::printf( "Copyright (C) 2009 Laszlo Ersek.\n" );
+ std::printf( "Copyright (C) %s Antonio Diaz Diaz.\n", program_year );
std::printf( "Using lzlib %s\n", LZ_version() );
std::printf( "License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>\n"
"This is free software: you are free to change and redistribute it.\n"
@@ -155,6 +157,21 @@ void show_version()
} // end namespace
+void Pretty_print::operator()( const char * const msg ) const
+ {
+ if( verbosity >= 0 )
+ {
+ if( first_post )
+ {
+ first_post = false;
+ std::fputs( padded_name.c_str(), stderr );
+ if( !msg ) std::fflush( stderr );
+ }
+ if( msg ) std::fprintf( stderr, "%s\n", msg );
+ }
+ }
+
+
const char * bad_version( const unsigned version )
{
static char buf[80];
@@ -185,8 +202,7 @@ const char * format_ds( const unsigned dictionary_size )
void show_header( const unsigned dictionary_size )
{
- if( verbosity >= 3 )
- std::fprintf( stderr, "dictionary %s. ", format_ds( dictionary_size ) );
+ std::fprintf( stderr, "dictionary %s, ", format_ds( dictionary_size ) );
}
namespace {
@@ -278,6 +294,33 @@ int extension_index( const std::string & name )
return -1;
}
+
+void set_c_outname( const std::string & name, const bool force_ext )
+ {
+ output_filename = name;
+ if( force_ext || extension_index( output_filename ) < 0 )
+ output_filename += known_extensions[0].from;
+ }
+
+
+void set_d_outname( const std::string & name, const int eindex )
+ {
+ if( eindex >= 0 )
+ {
+ const std::string from( known_extensions[eindex].from );
+ if( name.size() > from.size() )
+ {
+ output_filename.assign( name, 0, name.size() - from.size() );
+ output_filename += known_extensions[eindex].to;
+ return;
+ }
+ }
+ output_filename = name; output_filename += ".out";
+ if( verbosity >= 1 )
+ std::fprintf( stderr, "%s: Can't guess original name for '%s' -- using '%s'\n",
+ program_name, name.c_str(), output_filename.c_str() );
+ }
+
} // end namespace
int open_instream( const char * const name, struct stat * const in_statsp,
@@ -325,32 +368,6 @@ int open_instream2( const char * const name, struct stat * const in_statsp,
}
-void set_c_outname( const std::string & name )
- {
- output_filename = name;
- output_filename += known_extensions[0].from;
- }
-
-
-void set_d_outname( const std::string & name, const int eindex )
- {
- if( eindex >= 0 )
- {
- const std::string from( known_extensions[eindex].from );
- if( name.size() > from.size() )
- {
- output_filename.assign( name, 0, name.size() - from.size() );
- output_filename += known_extensions[eindex].to;
- return;
- }
- }
- output_filename = name; output_filename += ".out";
- if( verbosity >= 1 )
- std::fprintf( stderr, "%s: Can't guess original name for '%s' -- using '%s'\n",
- program_name, name.c_str(), output_filename.c_str() );
- }
-
-
bool open_outstream( const bool force, const bool from_stdin )
{
const mode_t usr_rw = S_IRUSR | S_IWUSR;
@@ -404,15 +421,19 @@ void cleanup_and_fail( const int retval )
static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
pthread_mutex_lock( &mutex ); // ignore errors to avoid loop
+ const int saved_verbosity = verbosity;
+ verbosity = -1; // suppress messages from other threads
if( delete_output_on_interrupt )
{
delete_output_on_interrupt = false;
- if( verbosity >= 0 )
+ if( saved_verbosity >= 0 )
std::fprintf( stderr, "%s: Deleting output file '%s', if it exists.\n",
program_name, output_filename.c_str() );
if( outfd >= 0 ) { close( outfd ); outfd = -1; }
- if( std::remove( output_filename.c_str() ) != 0 && errno != ENOENT )
- show_error( "WARNING: deletion of output file (apparently) failed." );
+ if( std::remove( output_filename.c_str() ) != 0 && errno != ENOENT &&
+ saved_verbosity >= 0 )
+ std::fprintf( stderr, "%s: WARNING: deletion of output file "
+ "(apparently) failed.\n", program_name );
}
std::exit( retval );
}
@@ -503,25 +524,30 @@ void internal_error( const char * const msg )
}
-void show_progress( const int packet_size,
- const Pretty_print * const p,
- const unsigned long long cfile_size )
+void show_progress( const unsigned long long packet_size,
+ const unsigned long long cfile_size,
+ const Pretty_print * const p )
{
static unsigned long long csize = 0; // file_size / 100
static unsigned long long pos = 0;
static const Pretty_print * pp = 0;
static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
+ static bool enabled = true;
- if( verbosity < 2 ) return;
+ if( !enabled ) return;
if( p ) // initialize static vars
- { csize = cfile_size; pos = 0; pp = p; }
+ {
+ if( verbosity < 2 || !isatty( STDERR_FILENO ) ) { enabled = false; return; }
+ csize = cfile_size; pos = 0; pp = p;
+ }
if( pp )
{
xlock( &mutex );
pos += packet_size;
if( csize > 0 )
- std::fprintf( stderr, "%4llu%%", pos / csize );
- std::fprintf( stderr, " %.1f MB\r", pos / 1000000.0 );
+ std::fprintf( stderr, "%4llu%% %.1f MB\r", pos / csize, pos / 1000000.0 );
+ else
+ std::fprintf( stderr, " %.1f MB\r", pos / 1000000.0 );
pp->reset(); (*pp)(); // restore cursor position
xunlock( &mutex );
}
@@ -549,12 +575,12 @@ int main( const int argc, const char * const argv[] )
std::vector< std::string > filenames;
int data_size = 0;
int debug_level = 0;
- int infd = -1;
int num_workers = 0; // start this many worker threads
Mode program_mode = m_compress;
bool force = false;
bool ignore_trailing = true;
bool keep_input_files = false;
+ bool loose_trailing = false;
bool recompress = false;
bool to_stdout = false;
invocation_name = argv[0];
@@ -563,50 +589,51 @@ int main( const int argc, const char * const argv[] )
{ show_error( "Bad library version. At least lzlib 1.0 is required." );
return 1; }
- const long num_online = std::max( 1L, sysconf( _SC_NPROCESSORS_ONLN ) );
- long max_workers = sysconf( _SC_THREAD_THREADS_MAX );
- if( max_workers < 1 || max_workers > INT_MAX / (int)sizeof (pthread_t) )
- max_workers = INT_MAX / sizeof (pthread_t);
-
- enum Optcode { opt_dbg = 256 };
+ enum { opt_dbg = 256, opt_lt };
const Arg_parser::Option options[] =
{
- { '0', "fast", Arg_parser::no },
- { '1', 0, Arg_parser::no },
- { '2', 0, Arg_parser::no },
- { '3', 0, Arg_parser::no },
- { '4', 0, Arg_parser::no },
- { '5', 0, Arg_parser::no },
- { '6', 0, Arg_parser::no },
- { '7', 0, Arg_parser::no },
- { '8', 0, Arg_parser::no },
- { '9', "best", Arg_parser::no },
- { 'a', "trailing-error", Arg_parser::no },
- { 'b', "member-size", Arg_parser::yes },
- { 'B', "data-size", Arg_parser::yes },
- { 'c', "stdout", Arg_parser::no },
- { 'd', "decompress", Arg_parser::no },
- { 'f', "force", Arg_parser::no },
- { 'F', "recompress", Arg_parser::no },
- { 'h', "help", Arg_parser::no },
- { 'k', "keep", Arg_parser::no },
- { 'l', "list", Arg_parser::no },
- { 'm', "match-length", Arg_parser::yes },
- { 'n', "threads", Arg_parser::yes },
- { 'o', "output", Arg_parser::yes },
- { 'q', "quiet", Arg_parser::no },
- { 's', "dictionary-size", Arg_parser::yes },
- { 'S', "volume-size", Arg_parser::yes },
- { 't', "test", Arg_parser::no },
- { 'v', "verbose", Arg_parser::no },
- { 'V', "version", Arg_parser::no },
- { opt_dbg, "debug", Arg_parser::yes },
- { 0 , 0, Arg_parser::no } };
+ { '0', "fast", Arg_parser::no },
+ { '1', 0, Arg_parser::no },
+ { '2', 0, Arg_parser::no },
+ { '3', 0, Arg_parser::no },
+ { '4', 0, Arg_parser::no },
+ { '5', 0, Arg_parser::no },
+ { '6', 0, Arg_parser::no },
+ { '7', 0, Arg_parser::no },
+ { '8', 0, Arg_parser::no },
+ { '9', "best", Arg_parser::no },
+ { 'a', "trailing-error", Arg_parser::no },
+ { 'b', "member-size", Arg_parser::yes },
+ { 'B', "data-size", Arg_parser::yes },
+ { 'c', "stdout", Arg_parser::no },
+ { 'd', "decompress", Arg_parser::no },
+ { 'f', "force", Arg_parser::no },
+ { 'F', "recompress", Arg_parser::no },
+ { 'h', "help", Arg_parser::no },
+ { 'k', "keep", Arg_parser::no },
+ { 'l', "list", Arg_parser::no },
+ { 'm', "match-length", Arg_parser::yes },
+ { 'n', "threads", Arg_parser::yes },
+ { 'o', "output", Arg_parser::yes },
+ { 'q', "quiet", Arg_parser::no },
+ { 's', "dictionary-size", Arg_parser::yes },
+ { 'S', "volume-size", Arg_parser::yes },
+ { 't', "test", Arg_parser::no },
+ { 'v', "verbose", Arg_parser::no },
+ { 'V', "version", Arg_parser::no },
+ { opt_dbg, "debug", Arg_parser::yes },
+ { opt_lt, "loose-trailing", Arg_parser::no },
+ { 0 , 0, Arg_parser::no } };
const Arg_parser parser( argc, argv, options );
if( parser.error().size() ) // bad option
{ show_error( parser.error().c_str(), 0, true ); return 1; }
+ const long num_online = std::max( 1L, sysconf( _SC_NPROCESSORS_ONLN ) );
+ long max_workers = sysconf( _SC_THREAD_THREADS_MAX );
+ if( max_workers < 1 || max_workers > INT_MAX / (int)sizeof (pthread_t) )
+ max_workers = INT_MAX / sizeof (pthread_t);
+
int argind = 0;
for( ; argind < parser.arguments(); ++argind )
{
@@ -643,6 +670,7 @@ int main( const int argc, const char * const argv[] )
case 'v': if( verbosity < 4 ) ++verbosity; break;
case 'V': show_version(); return 0;
case opt_dbg: debug_level = getnum( arg, 0, 3 ); break;
+ case opt_lt: loose_trailing = true; break;
default : internal_error( "uncaught option." );
}
} // end process options
@@ -661,7 +689,7 @@ int main( const int argc, const char * const argv[] )
if( filenames.empty() ) filenames.push_back("-");
if( program_mode == m_list )
- return list_files( filenames, ignore_trailing );
+ return list_files( filenames, ignore_trailing, loose_trailing );
if( program_mode == m_test )
outfd = -1;
@@ -678,19 +706,30 @@ int main( const int argc, const char * const argv[] )
std::max( data_size, LZ_min_dictionary_size() );
if( num_workers <= 0 )
+ {
+ if( sizeof (void *) <= 4 ) // use less than 2.22 GiB on 32 bit systems
+ {
+ const long long limit = ( 27LL << 25 ) + ( 11LL << 27 ); // 4 * 568 MiB
+ const long long mem = ( 27LL * data_size ) / 8 +
+ ( fast ? 3LL << 19 : 11LL * encoder_options.dictionary_size );
+ const int nmax32 = std::max( limit / mem, 1LL );
+ if( max_workers > nmax32 ) max_workers = nmax32;
+ }
num_workers = std::min( num_online, max_workers );
+ }
if( !to_stdout && program_mode != m_test &&
( filenames_given || default_output_filename.size() ) )
set_signals();
- Pretty_print pp( filenames, verbosity );
+ Pretty_print pp( filenames );
int retval = 0;
bool stdin_used = false;
for( unsigned i = 0; i < filenames.size(); ++i )
{
std::string input_filename;
+ int infd;
struct stat in_stats;
output_filename.clear();
@@ -705,12 +744,12 @@ int main( const int argc, const char * const argv[] )
else
{
if( program_mode == m_compress )
- set_c_outname( default_output_filename );
+ set_c_outname( default_output_filename, false );
else output_filename = default_output_filename;
if( !open_outstream( force, true ) )
{
if( retval < 1 ) retval = 1;
- close( infd ); infd = -1;
+ close( infd );
continue;
}
}
@@ -728,12 +767,12 @@ int main( const int argc, const char * const argv[] )
else
{
if( program_mode == m_compress )
- set_c_outname( input_filename );
+ set_c_outname( input_filename, true );
else set_d_outname( input_filename, eindex );
if( !open_outstream( force, false ) )
{
if( retval < 1 ) retval = 1;
- close( infd ); infd = -1;
+ close( infd );
continue;
}
}
@@ -744,24 +783,22 @@ int main( const int argc, const char * const argv[] )
if( !check_tty( pp.name(), infd, program_mode ) )
{
if( retval < 1 ) retval = 1;
- if( program_mode == m_test ) { close( infd ); infd = -1; continue; }
+ if( program_mode == m_test ) { close( infd ); continue; }
cleanup_and_fail( retval );
}
const struct stat * const in_statsp = input_filename.size() ? &in_stats : 0;
const bool infd_isreg = in_statsp && S_ISREG( in_statsp->st_mode );
- if( verbosity >= 1 ) pp();
+ const unsigned long long cfile_size =
+ infd_isreg ? ( in_statsp->st_size + 99 ) / 100 : 0;
int tmp;
if( program_mode == m_compress )
- {
- show_progress( 0, &pp, infd_isreg ? in_statsp->st_size / 100 : 0 ); // init
- tmp = compress( data_size, encoder_options.dictionary_size,
+ tmp = compress( cfile_size, data_size, encoder_options.dictionary_size,
encoder_options.match_len_limit,
num_workers, infd, outfd, pp, debug_level );
- }
else
- tmp = decompress( num_workers, infd, outfd, pp, debug_level,
- ignore_trailing, infd_isreg );
+ tmp = decompress( cfile_size, num_workers, infd, outfd, pp, debug_level,
+ ignore_trailing, loose_trailing, infd_isreg );
if( tmp > retval ) retval = tmp;
if( tmp && program_mode != m_test ) cleanup_and_fail( retval );
@@ -769,14 +806,14 @@ int main( const int argc, const char * const argv[] )
close_and_set_permissions( in_statsp );
if( input_filename.size() )
{
- close( infd ); infd = -1;
+ close( infd );
if( !keep_input_files && !to_stdout && program_mode != m_test )
std::remove( input_filename.c_str() );
}
}
if( outfd >= 0 && close( outfd ) != 0 )
{
- show_error( "Can't close stdout", errno );
+ show_error( "Error closing stdout", errno );
if( retval < 1 ) retval = 1;
}
return retval;
diff --git a/testsuite/check.sh b/testsuite/check.sh
index 4421fc4..a4113c3 100755
--- a/testsuite/check.sh
+++ b/testsuite/check.sh
@@ -1,6 +1,6 @@
#! /bin/sh
# check script for Plzip - Parallel compressor compatible with lzip
-# Copyright (C) 2009-2017 Antonio Diaz Diaz.
+# Copyright (C) 2009-2018 Antonio Diaz Diaz.
#
# This script is free software: you have unlimited permission
# to copy, distribute and modify it.
@@ -53,6 +53,8 @@ done
[ $? = 2 ] || test_failed $LINENO
"${LZIP}" -cdq < in
[ $? = 2 ] || test_failed $LINENO
+"${LZIP}" -dq -o in < "${in_lz}"
+[ $? = 1 ] || test_failed $LINENO
# these are for code coverage
"${LZIP}" -lt "${in_lz}" 2> /dev/null
[ $? = 1 ] || test_failed $LINENO
@@ -145,7 +147,7 @@ cat "${in_lz}" "${in_lz}" > in2.lz || framework_failure
"${LZIP}" -cd in2.lz > copy2 || test_failed $LINENO
cmp in2 copy2 || test_failed $LINENO
-"${LZIP}" --output=copy2 < in2 || test_failed $LINENO
+"${LZIP}" --output=copy2.lz < in2 || test_failed $LINENO
"${LZIP}" -lq copy2.lz || test_failed $LINENO
"${LZIP}" -t copy2.lz || test_failed $LINENO
"${LZIP}" -cd copy2.lz > copy2 || test_failed $LINENO
@@ -226,6 +228,61 @@ cat in in in in in in in in | "${LZIP}" -1s4Ki | "${LZIP}" -t ||
printf "\ntesting bad input..."
+headers='LZIp LZiP LZip LzIP LzIp LziP lZIP lZIp lZiP lzIP'
+body='\001\014\000\203\377\373\377\377\300\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000$\000\000\000\000\000\000\000'
+cat "${in_lz}" > in0.lz
+printf "LZIP${body}" >> in0.lz
+if "${LZIP}" -tq in0.lz ; then
+ for header in ${headers} ; do
+ printf "${header}${body}" > in0.lz # first member
+ "${LZIP}" -lq in0.lz
+ [ $? = 2 ] || test_failed $LINENO ${header}
+ "${LZIP}" -tq in0.lz
+ [ $? = 2 ] || test_failed $LINENO ${header}
+ "${LZIP}" -tq < in0.lz
+ [ $? = 2 ] || test_failed $LINENO ${header}
+ "${LZIP}" -cdq in0.lz > /dev/null
+ [ $? = 2 ] || test_failed $LINENO ${header}
+ "${LZIP}" -lq --loose-trailing in0.lz
+ [ $? = 2 ] || test_failed $LINENO ${header}
+ "${LZIP}" -tq --loose-trailing in0.lz
+ [ $? = 2 ] || test_failed $LINENO ${header}
+ "${LZIP}" -tq --loose-trailing < in0.lz
+ [ $? = 2 ] || test_failed $LINENO ${header}
+ "${LZIP}" -cdq --loose-trailing in0.lz > /dev/null
+ [ $? = 2 ] || test_failed $LINENO ${header}
+ cat "${in_lz}" > in0.lz
+ printf "${header}${body}" >> in0.lz # trailing data
+ "${LZIP}" -lq in0.lz
+ [ $? = 2 ] || test_failed $LINENO ${header}
+ "${LZIP}" -tq in0.lz
+ [ $? = 2 ] || test_failed $LINENO ${header}
+# "${LZIP}" -tq < in0.lz # requires lzlib-1.10
+# [ $? = 2 ] || test_failed $LINENO ${header}
+ "${LZIP}" -cdq in0.lz > /dev/null
+ [ $? = 2 ] || test_failed $LINENO ${header}
+ "${LZIP}" -lq --loose-trailing in0.lz
+ [ $? = 0 ] || test_failed $LINENO ${header}
+ "${LZIP}" -t --loose-trailing in0.lz
+ [ $? = 0 ] || test_failed $LINENO ${header}
+ "${LZIP}" -t --loose-trailing < in0.lz
+ [ $? = 0 ] || test_failed $LINENO ${header}
+ "${LZIP}" -cd --loose-trailing in0.lz > /dev/null
+ [ $? = 0 ] || test_failed $LINENO ${header}
+ "${LZIP}" -lq --loose-trailing --trailing-error in0.lz
+ [ $? = 2 ] || test_failed $LINENO ${header}
+ "${LZIP}" -tq --loose-trailing --trailing-error in0.lz
+ [ $? = 2 ] || test_failed $LINENO ${header}
+ "${LZIP}" -tq --loose-trailing --trailing-error < in0.lz
+ [ $? = 2 ] || test_failed $LINENO ${header}
+ "${LZIP}" -cdq --loose-trailing --trailing-error in0.lz > /dev/null
+ [ $? = 2 ] || test_failed $LINENO ${header}
+ done
+else
+ printf "\nwarning: skipping header test: 'printf' does not work on your system."
+fi
+rm -f in0.lz
+
cat "${in_lz}" "${in_lz}" "${in_lz}" > in3.lz || framework_failure
if dd if=in3.lz of=trunc.lz bs=14752 count=1 2> /dev/null &&
[ -e trunc.lz ] && cmp in2.lz trunc.lz > /dev/null 2>&1 ; then
@@ -233,7 +290,7 @@ if dd if=in3.lz of=trunc.lz bs=14752 count=1 2> /dev/null &&
dd if=in3.lz of=trunc.lz bs=$i count=1 2> /dev/null
"${LZIP}" -lq trunc.lz
[ $? = 2 ] || test_failed $LINENO $i
- "${LZIP}" -t trunc.lz 2> /dev/null
+ "${LZIP}" -tq trunc.lz
[ $? = 2 ] || test_failed $LINENO $i
"${LZIP}" -tq < trunc.lz
[ $? = 2 ] || test_failed $LINENO $i
@@ -245,6 +302,7 @@ if dd if=in3.lz of=trunc.lz bs=14752 count=1 2> /dev/null &&
else
printf "\nwarning: skipping truncation test: 'dd' does not work on your system."
fi
+rm -f in3.lz trunc.lz
cat "${in_lz}" > ingin.lz || framework_failure
printf "g" >> ingin.lz || framework_failure
@@ -258,6 +316,7 @@ cat "${in_lz}" >> ingin.lz || framework_failure
"${LZIP}" -t < ingin.lz || test_failed $LINENO
"${LZIP}" -d < ingin.lz > copy || test_failed $LINENO
cmp in copy || test_failed $LINENO
+rm -f ingin.lz
echo
if [ ${fail} = 0 ] ; then