summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaniel Baumann <mail@daniel-baumann.ch>2015-11-07 15:28:38 +0000
committerDaniel Baumann <mail@daniel-baumann.ch>2015-11-07 15:28:38 +0000
commit891c72a3374b810770cc6d85cb703ae158497d6f (patch)
tree8dcb364bfefef691ae2a6ad970b70cb7f7f760fb
parentAdding debian version 0.7-5. (diff)
downloadplzip-891c72a3374b810770cc6d85cb703ae158497d6f.tar.xz
plzip-891c72a3374b810770cc6d85cb703ae158497d6f.zip
Merging upstream version 0.8.
Signed-off-by: Daniel Baumann <mail@daniel-baumann.ch>
-rw-r--r--ChangeLog33
-rw-r--r--INSTALL26
-rw-r--r--NEWS21
-rw-r--r--README4
-rw-r--r--arg_parser.cc5
-rw-r--r--arg_parser.h7
-rw-r--r--compress.cc101
-rwxr-xr-xconfigure8
-rw-r--r--decompress.cc70
-rw-r--r--doc/plzip.119
-rw-r--r--doc/plzip.info86
-rw-r--r--doc/plzip.texinfo65
-rw-r--r--main.cc174
-rw-r--r--plzip.h39
-rwxr-xr-xtestsuite/check.sh7
15 files changed, 392 insertions, 273 deletions
diff --git a/ChangeLog b/ChangeLog
index 980d86e..15b3c82 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,16 @@
+2012-01-17 Antonio Diaz Diaz <ant_diaz@teleline.es>
+
+ * Version 0.8 released.
+ * main.cc: Added new option '-F, --recompress'.
+ * decompress.cc (decompress): Show compression ratio.
+ * main.cc (close_and_set_permissions): Inability to change output
+ file attributes has been downgraded from error to warning.
+ * Small change in '--help' output and man page.
+ * Changed quote characters in messages as advised by GNU Standards.
+ * main.cc: Set stdin/stdout in binary mode on OS2.
+ * compress.cc: Reduce memory use of compressed packets.
+ * decompress.cc: Use Boyer-Moore algorithm to search for headers.
+
2010-12-03 Antonio Diaz Diaz <ant_diaz@teleline.es>
* Version 0.7 released.
@@ -6,16 +19,16 @@
* decompress.cc: A limit has been set on the number of packets
produced by workers to limit the amount of memory used.
* main.cc (open_instream): Do not show the message
- " and `--stdout' was not specified" for directories, etc.
+ " and '--stdout' was not specified" for directories, etc.
* main.cc: Fixed warning about fchown return value being ignored.
- * testsuite: `test1' renamed to `test.txt'. Added new tests.
+ * testsuite: 'test1' renamed to 'test.txt'. Added new tests.
2010-03-20 Antonio Diaz Diaz <ant_diaz@teleline.es>
* Version 0.6 released.
* Small portability fixes.
- * Added chapter `Program Design' and description of option
- `--threads' to manual.
+ * Added chapter 'Program Design' and description of option
+ '--threads' to manual.
* Debug stats have been fixed.
2010-02-10 Antonio Diaz Diaz <ant_diaz@teleline.es>
@@ -33,19 +46,19 @@
2010-01-24 Antonio Diaz Diaz <ant_diaz@teleline.es>
* Version 0.3 released.
- * Implemented option `--data-size'.
+ * Implemented option '--data-size'.
* Output file is now removed if plzip is interrupted.
* This version automatically chooses the smallest possible
dictionary size for each member during compression, saving
memory during decompression.
- * main.cc: New constant `o_binary'.
+ * main.cc: New constant 'o_binary'.
2010-01-17 Antonio Diaz Diaz <ant_diaz@teleline.es>
* Version 0.2 released.
- * Implemented option `--dictionary-size'.
- * Implemented option `--match-length'.
- * `lacos_rbtree' has been replaced with a circular buffer.
+ * Implemented option '--dictionary-size'.
+ * Implemented option '--match-length'.
+ * 'lacos_rbtree' has been replaced with a circular buffer.
2009-12-05 Antonio Diaz Diaz <ant_diaz@teleline.es>
@@ -63,7 +76,7 @@
until something better appears on the net.
-Copyright (C) 2009, 2010 Antonio Diaz Diaz.
+Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz.
This file is a collection of facts, and thus it is not copyrightable,
but just in case, you have unlimited permission to copy, distribute and
diff --git a/INSTALL b/INSTALL
index 4fb240d..a2998a5 100644
--- a/INSTALL
+++ b/INSTALL
@@ -20,7 +20,7 @@ This creates the directory ./plzip[version] containing the source from
the main archive.
2. Change to plzip directory and run configure.
- (Try `configure --help' for usage instructions).
+ (Try 'configure --help' for usage instructions).
cd plzip[version]
./configure
@@ -29,30 +29,30 @@ the main archive.
make
-4. Optionally, type `make check' to run the tests that come with plzip.
+4. Optionally, type 'make check' to run the tests that come with plzip.
-5. Type `make install' to install the program and any data files and
+5. Type 'make install' to install the program and any data files and
documentation.
Another way
-----------
You can also compile plzip into a separate directory. To do this, you
-must use a version of `make' that supports the `VPATH' variable, such
-as GNU `make'. `cd' to the directory where you want the object files
-and executables to go and run the `configure' script. `configure'
-automatically checks for the source code in `.', in `..' and in the
-directory that `configure' is in.
-
-`configure' recognizes the option `--srcdir=DIR' to control where to
-look for the sources. Usually `configure' can determine that directory
+must use a version of 'make' that supports the 'VPATH' variable, such
+as GNU 'make'. 'cd' to the directory where you want the object files
+and executables to go and run the 'configure' script. 'configure'
+automatically checks for the source code in '.', in '..' and in the
+directory that 'configure' is in.
+
+'configure' recognizes the option '--srcdir=DIR' to control where to
+look for the sources. Usually 'configure' can determine that directory
automatically.
-After running `configure', you can run `make' and `make install' as
+After running 'configure', you can run 'make' and 'make install' as
explained above.
-Copyright (C) 2009, 2010 Antonio Diaz Diaz.
+Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz.
This file is free documentation: you have unlimited permission to copy,
distribute and modify it.
diff --git a/NEWS b/NEWS
index dda5e9f..9cfeae0 100644
--- a/NEWS
+++ b/NEWS
@@ -1,12 +1,17 @@
-Changes in version 0.7:
+Changes in version 0.8:
-Match length limits set by options -1 to -9 have been changed to match
-those of lzip 1.11.
+The option "-F, --recompress", which forces recompression of files whose
+name already has the ".lz" or ".tlz" suffix, has been added.
-A limit has been set on the number of packets produced by decompresor
-worker threads to limit the amount of memory used.
+The options "-d, --decompress" and "-t, --test" now also show
+compression ratio.
-Do not show the message "and `--stdout' was not specified" for file
-types that can't be read (directories, etc).
+Inability to change output file attributes has been downgraded from
+error to warning.
-A warning about fchown's return value being ignored has been fixed.
+A small change has been made in the "--help" output and man page.
+
+Quote characters in messages have been changed as advised by GNU Coding
+Standards.
+
+Stdin and stdout are now set in binary mode on OS2.
diff --git a/README b/README
index 1270724..4db6172 100644
--- a/README
+++ b/README
@@ -4,7 +4,7 @@ Plzip is a massively parallel (multi-threaded), lossless data compressor
based on the lzlib compression library, with very safe integrity
checking and a user interface similar to the one of bzip2, gzip or lzip.
Plzip uses the lzip file format; the files produced by plzip are fully
-compatible with lzip-1.4 or newer.
+compatible with lzip-1.4 or newer, and can be rescued with lziprecover.
Plzip is intended for faster compression/decompression of big files on
multiprocessor machines, which makes it specially well suited for
@@ -12,7 +12,7 @@ distribution of big software files and large scale data archiving. On
files big enough, plzip can use hundreds of processors.
-Copyright (C) 2009, 2010 Antonio Diaz Diaz.
+Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz.
This file is free documentation: you have unlimited permission to copy,
distribute and modify it.
diff --git a/arg_parser.cc b/arg_parser.cc
index cc9f87d..27137a1 100644
--- a/arg_parser.cc
+++ b/arg_parser.cc
@@ -1,5 +1,6 @@
-/* Arg_parser - A POSIX/GNU command line argument parser. (C++ version)
- Copyright (C) 2006, 2007, 2008, 2009, 2010 Antonio Diaz Diaz.
+/* Arg_parser - POSIX/GNU command line argument parser. (C++ version)
+ Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012
+ Antonio Diaz Diaz.
This library is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
diff --git a/arg_parser.h b/arg_parser.h
index da1cc94..5d036ab 100644
--- a/arg_parser.h
+++ b/arg_parser.h
@@ -1,5 +1,6 @@
-/* Arg_parser - A POSIX/GNU command line argument parser. (C++ version)
- Copyright (C) 2006, 2007, 2008, 2009, 2010 Antonio Diaz Diaz.
+/* Arg_parser - POSIX/GNU command line argument parser. (C++ version)
+ Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012
+ Antonio Diaz Diaz.
This library is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -65,7 +66,7 @@ private:
{
int code;
std::string argument;
- Record( const int c = 0 ) : code( c ) {}
+ explicit Record( const int c = 0 ) : code( c ) {}
};
std::string error_;
diff --git a/compress.cc b/compress.cc
index 7945cf0..cf0135a 100644
--- a/compress.cc
+++ b/compress.cc
@@ -1,6 +1,6 @@
/* Plzip - A parallel compressor compatible with lzip
Copyright (C) 2009 Laszlo Ersek.
- Copyright (C) 2009, 2010 Antonio Diaz Diaz.
+ Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -46,63 +46,63 @@
#endif
-void xinit( pthread_cond_t * cond, pthread_mutex_t * mutex )
+void xinit( pthread_mutex_t * const mutex )
{
- int errcode = pthread_cond_init( cond, 0 );
- if( errcode ) { show_error( "pthread_cond_init", errcode ); fatal(); }
+ const int errcode = pthread_mutex_init( mutex, 0 );
+ if( errcode ) { show_error( "pthread_mutex_init", errcode ); fatal(); }
+ }
- if( mutex )
- {
- errcode = pthread_mutex_init( mutex, 0 );
- if( errcode ) { show_error( "pthread_mutex_init", errcode ); fatal(); }
- }
+void xinit( pthread_cond_t * const cond )
+ {
+ const int errcode = pthread_cond_init( cond, 0 );
+ if( errcode ) { show_error( "pthread_cond_init", errcode ); fatal(); }
}
-void xdestroy( pthread_cond_t * cond, pthread_mutex_t * mutex )
+void xdestroy( pthread_mutex_t * const mutex )
{
- int errcode = pthread_cond_destroy( cond );
- if( errcode ) { show_error( "pthread_cond_destroy", errcode ); fatal(); }
+ const int errcode = pthread_mutex_destroy( mutex );
+ if( errcode ) { show_error( "pthread_mutex_destroy", errcode ); fatal(); }
+ }
- if( mutex )
- {
- errcode = pthread_mutex_destroy( mutex );
- if( errcode ) { show_error( "pthread_mutex_destroy", errcode ); fatal(); }
- }
+void xdestroy( pthread_cond_t * const cond )
+ {
+ const int errcode = pthread_cond_destroy( cond );
+ if( errcode ) { show_error( "pthread_cond_destroy", errcode ); fatal(); }
}
-void xlock( pthread_mutex_t * mutex )
+void xlock( pthread_mutex_t * const mutex )
{
- int errcode = pthread_mutex_lock( mutex );
+ const int errcode = pthread_mutex_lock( mutex );
if( errcode ) { show_error( "pthread_mutex_lock", errcode ); fatal(); }
}
-void xunlock( pthread_mutex_t * mutex )
+void xunlock( pthread_mutex_t * const mutex )
{
- int errcode = pthread_mutex_unlock( mutex );
+ const int errcode = pthread_mutex_unlock( mutex );
if( errcode ) { show_error( "pthread_mutex_unlock", errcode ); fatal(); }
}
-void xwait( pthread_cond_t * cond, pthread_mutex_t * mutex )
+void xwait( pthread_cond_t * const cond, pthread_mutex_t * const mutex )
{
- int errcode = pthread_cond_wait( cond, mutex );
+ const int errcode = pthread_cond_wait( cond, mutex );
if( errcode ) { show_error( "pthread_cond_wait", errcode ); fatal(); }
}
-void xsignal( pthread_cond_t * cond )
+void xsignal( pthread_cond_t * const cond )
{
- int errcode = pthread_cond_signal( cond );
+ const int errcode = pthread_cond_signal( cond );
if( errcode ) { show_error( "pthread_cond_signal", errcode ); fatal(); }
}
-void xbroadcast( pthread_cond_t * cond )
+void xbroadcast( pthread_cond_t * const cond )
{
- int errcode = pthread_cond_broadcast( cond );
+ const int errcode = pthread_cond_broadcast( cond );
if( errcode ) { show_error( "pthread_cond_broadcast", errcode ); fatal(); }
}
@@ -142,17 +142,26 @@ private:
pthread_cond_t oav_or_exit; // output packet available or all workers exited
bool eof; // splitter done
+ Packet_courier( const Packet_courier & ); // declared as private
+ void operator=( const Packet_courier & ); // declared as private
+
public:
- Packet_courier( const int num_workers, const int slots )
+ Packet_courier( const int workers, const int slots )
: icheck_counter( 0 ), iwait_counter( 0 ),
ocheck_counter( 0 ), owait_counter( 0 ),
receive_id( 0 ), deliver_id( 0 ),
slot_tally( slots ), circular_buffer( slots, (Packet *) 0 ),
- num_working( num_workers ), num_slots( slots ), eof( false )
- { xinit( &iav_or_eof, &imutex ); xinit( &oav_or_exit, &omutex ); }
+ num_working( workers ), num_slots( slots ), eof( false )
+ {
+ xinit( &imutex ); xinit( &iav_or_eof );
+ xinit( &omutex ); xinit( &oav_or_exit );
+ }
~Packet_courier()
- { xdestroy( &iav_or_eof, &imutex ); xdestroy( &oav_or_exit, &omutex ); }
+ {
+ xdestroy( &oav_or_exit ); xdestroy( &omutex );
+ xdestroy( &iav_or_eof ); xdestroy( &imutex );
+ }
const Slot_tally & tally() const { return slot_tally; }
@@ -271,7 +280,7 @@ extern "C" void * csplitter( void * arg )
for( bool first_post = true; ; first_post = false )
{
- uint8_t * data = new( std::nothrow ) uint8_t[data_size];
+ uint8_t * const data = new( std::nothrow ) uint8_t[data_size];
if( data == 0 ) { pp( "Not enough memory" ); fatal(); }
const int size = readblock( infd, data, data_size );
if( size != data_size && errno )
@@ -281,14 +290,15 @@ extern "C" void * csplitter( void * arg )
{
in_size += size;
courier.receive_packet( data, size );
+ if( size < data_size ) break; // EOF
}
else
{
delete[] data;
- courier.finish(); // no more packets to send
break;
}
}
+ courier.finish(); // no more packets to send
return 0;
}
@@ -314,11 +324,11 @@ extern "C" void * cworker( void * arg )
while( true )
{
- Packet * packet = courier.distribute_packet();
+ Packet * const packet = courier.distribute_packet();
if( packet == 0 ) break; // no more packets to process
- const int compr_size = 42 + packet->size + ( ( packet->size + 7 ) / 8 );
- uint8_t * const new_data = new( std::nothrow ) uint8_t[compr_size];
+ const int max_compr_size = 42 + packet->size + ( ( packet->size + 7 ) / 8 );
+ uint8_t * const new_data = new( std::nothrow ) uint8_t[max_compr_size];
if( new_data == 0 ) { pp( "Not enough memory" ); fatal(); }
const int dict_size = std::max( LZ_min_dictionary_size(),
std::min( dictionary_size, packet->size ) );
@@ -346,10 +356,11 @@ extern "C" void * cworker( void * arg )
if( wr < 0 ) internal_error( "library error (LZ_compress_write)" );
written += wr;
}
- if( written >= packet->size ) LZ_compress_finish( encoder );
+ if( written >= packet->size )
+ { delete[] packet->data; LZ_compress_finish( encoder ); }
}
const int rd = LZ_compress_read( encoder, new_data + new_size,
- compr_size - new_size );
+ max_compr_size - new_size );
if( rd < 0 )
{
pp();
@@ -359,7 +370,7 @@ extern "C" void * cworker( void * arg )
fatal();
}
new_size += rd;
- if( new_size > compr_size )
+ if( new_size > max_compr_size )
internal_error( "packet size exceeded in worker" );
if( LZ_compress_finished( encoder ) == 1 ) break;
}
@@ -367,7 +378,6 @@ extern "C" void * cworker( void * arg )
if( LZ_compress_close( encoder ) < 0 )
{ pp( "LZ_compress_close failed" ); fatal(); }
- delete[] packet->data;
packet->data = new_data;
packet->size = new_size;
courier.collect_packet( packet );
@@ -405,12 +415,15 @@ void muxer( Packet_courier & courier, const Pretty_print & pp, const int outfd )
// call the muxer.
int compress( const int data_size, const int dictionary_size,
const int match_len_limit, const int num_workers,
- const int num_slots, const int infd, const int outfd,
+ const int infd, const int outfd,
const Pretty_print & pp, const int debug_level )
{
+ const int slots_per_worker = 2;
+ const int num_slots = ( ( INT_MAX / num_workers >= slots_per_worker ) ?
+ num_workers * slots_per_worker : INT_MAX );
in_size = 0;
out_size = 0;
- Packet_courier courier( num_workers, num_slots );
+ Packet_courier courier( num_workers, num_slots - 1 );
Splitter_arg splitter_arg;
splitter_arg.courier = &courier;
@@ -434,7 +447,7 @@ int compress( const int data_size, const int dictionary_size,
{ pp( "Not enough memory" ); fatal(); }
for( int i = 0; i < num_workers; ++i )
{
- errcode = pthread_create( &worker_threads[i], 0, cworker, &worker_arg );
+ errcode = pthread_create( worker_threads + i, 0, cworker, &worker_arg );
if( errcode )
{ show_error( "Can't create worker threads", errcode ); fatal(); }
}
@@ -456,7 +469,7 @@ int compress( const int data_size, const int dictionary_size,
if( verbosity >= 1 )
{
if( in_size <= 0 || out_size <= 0 )
- std::fprintf( stderr, "no data compressed.\n" );
+ std::fprintf( stderr, " no data compressed.\n" );
else
std::fprintf( stderr, "%6.3f:1, %6.3f bits/byte, "
"%5.2f%% saved, %lld in, %lld out.\n",
diff --git a/configure b/configure
index 17609d1..57497d9 100755
--- a/configure
+++ b/configure
@@ -1,16 +1,14 @@
#! /bin/sh
# configure script for Plzip - A parallel compressor compatible with lzip
-# Copyright (C) 2009, 2010 Antonio Diaz Diaz.
+# Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz.
#
# This configure script is free software: you have unlimited permission
# to copy, distribute and modify it.
-#
-# Date of this version: 2010-12-03
args=
no_create=
pkgname=plzip
-pkgversion=0.7
+pkgversion=0.8
progname=plzip
srctrigger=plzip.h
@@ -167,7 +165,7 @@ echo "LDFLAGS = ${LDFLAGS}"
rm -f Makefile
cat > Makefile << EOF
# Makefile for Plzip - A parallel compressor compatible with lzip
-# Copyright (C) 2009, 2010 Antonio Diaz Diaz.
+# Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz.
# This file was generated automatically by configure. Do not edit.
#
# This Makefile is free software: you have unlimited permission
diff --git a/decompress.cc b/decompress.cc
index df8d88e..ef098ae 100644
--- a/decompress.cc
+++ b/decompress.cc
@@ -1,6 +1,6 @@
/* Plzip - A parallel compressor compatible with lzip
Copyright (C) 2009 Laszlo Ersek.
- Copyright (C) 2009, 2010 Antonio Diaz Diaz.
+ Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -73,6 +73,9 @@ private:
pthread_cond_t slot_av; // free output slot available
bool eof; // splitter done
+ Packet_courier( const Packet_courier & ); // declared as private
+ void operator=( const Packet_courier & ); // declared as private
+
public:
Packet_courier( const int workers, const int slots )
: icheck_counter( 0 ), iwait_counter( 0 ),
@@ -82,14 +85,14 @@ public:
opacket_queues( workers ), num_working( workers ),
num_workers( workers ), num_free( 8 * slots ), eof( false )
{
- xinit( &iav_or_eof, &imutex );
- xinit( &oav_or_exit, &omutex ); xinit( &slot_av, 0 );
+ xinit( &imutex ); xinit( &iav_or_eof );
+ xinit( &omutex ); xinit( &oav_or_exit ); xinit( &slot_av );
}
~Packet_courier()
{
- xdestroy( &iav_or_eof, &imutex );
- xdestroy( &oav_or_exit, &omutex ); xdestroy( &slot_av, 0 );
+ xdestroy( &slot_av ); xdestroy( &oav_or_exit ); xdestroy( &omutex );
+ xdestroy( &iav_or_eof ); xdestroy( &imutex );
}
const Slot_tally & tally() const { return slot_tally; }
@@ -207,6 +210,29 @@ public:
};
+// Search forward from 'pos' for "LZIP" (Boyer-Moore algorithm)
+// Return pos of found string or 'pos+size' if not found.
+//
+int find_magic( const uint8_t * const buffer, const int pos, const int size ) throw()
+ {
+ const uint8_t table[256] = {
+ 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+ 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+ 4,4,4,4,4,4,4,4,4,1,4,4,3,4,4,4,4,4,4,4,4,4,4,4,4,4,2,4,4,4,4,4,
+ 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+ 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+ 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+ 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+ 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4 };
+
+ for( int i = pos; i <= pos + size - 4; i += table[buffer[i+3]] )
+ if( buffer[i] == 'L' && buffer[i+1] == 'Z' &&
+ buffer[i+2] == 'I' && buffer[i+3] == 'P' )
+ return i; // magic string found
+ return pos + size;
+ }
+
+
struct Splitter_arg
{
Packet_courier * courier;
@@ -235,8 +261,7 @@ extern "C" void * dsplitter( void * arg )
bool at_stream_end = ( size < buffer_size );
if( size != buffer_size && errno )
{ pp(); show_error( "Read error", errno ); fatal(); }
- if( size <= tsize || buffer[0] != 'L' || buffer[1] != 'Z' ||
- buffer[2] != 'I' || buffer[3] != 'P' )
+ if( size <= tsize || find_magic( buffer, 0, 4 ) != 0 )
{ pp( "Bad magic number (file not in lzip format)" ); fatal(); }
long long partial_member_size = 0;
@@ -244,15 +269,16 @@ extern "C" void * dsplitter( void * arg )
{
int pos = 0;
for( int newpos = 1; newpos <= size; ++newpos )
- if( buffer[newpos] == 'L' && buffer[newpos+1] == 'Z' &&
- buffer[newpos+2] == 'I' && buffer[newpos+3] == 'P' )
+ {
+ newpos = find_magic( buffer, newpos, size + 4 - newpos );
+ if( newpos <= size )
{
long long member_size = 0;
for( int i = 1; i <= 8; ++i )
{ member_size <<= 8; member_size += base_buffer[tsize+newpos-i]; }
if( partial_member_size + newpos - pos == member_size )
{ // header found
- uint8_t * data = new( std::nothrow ) uint8_t[newpos - pos];
+ uint8_t * const data = new( std::nothrow ) uint8_t[newpos - pos];
if( data == 0 ) { pp( "Not enough memory" ); fatal(); }
std::memcpy( data, buffer + pos, newpos - pos );
courier.receive_packet( data, newpos - pos );
@@ -261,6 +287,7 @@ extern "C" void * dsplitter( void * arg )
pos = newpos;
}
}
+ }
if( at_stream_end )
{
@@ -317,7 +344,7 @@ extern "C" void * dworker( void * arg )
while( true )
{
- Packet * ipacket = courier.distribute_packet( worker_id );
+ const Packet * const ipacket = courier.distribute_packet( worker_id );
if( ipacket == 0 ) break; // no more packets to process
if( ipacket->data == 0 ) LZ_decompress_finish( decoder );
@@ -362,7 +389,7 @@ extern "C" void * dworker( void * arg )
}
if( LZ_decompress_finished( decoder ) == 1 )
{
- LZ_decompress_reset( decoder );
+ LZ_decompress_reset( decoder ); // prepare for new ipacket
Packet * opacket = new Packet; // end of member token
opacket->data = 0;
opacket->size = 0;
@@ -379,9 +406,10 @@ extern "C" void * dworker( void * arg )
}
delete[] new_data;
- if( LZ_decompress_total_in_size( decoder ) != 0 )
+ if( LZ_decompress_member_position( decoder ) != 0 )
{ pp( "Error, some data remains in decoder" ); fatal(); }
- LZ_decompress_close( decoder );
+ if( LZ_decompress_close( decoder ) < 0 )
+ { pp( "LZ_decompress_close failed" ); fatal(); }
return 0;
}
@@ -413,10 +441,13 @@ void muxer( Packet_courier & courier, const Pretty_print & pp, const int outfd )
// init the courier, then start the splitter and the workers and
// call the muxer.
-int decompress( const int num_workers, const int num_slots,
- const int infd, const int outfd, const Pretty_print & pp,
- const int debug_level, const bool testing )
+int decompress( const int num_workers, const int infd, const int outfd,
+ const Pretty_print & pp, const int debug_level,
+ const bool testing )
{
+ const int slots_per_worker = 2;
+ const int num_slots = ( ( INT_MAX / num_workers >= slots_per_worker ) ?
+ num_workers * slots_per_worker : INT_MAX );
in_size = 0;
out_size = 0;
Packet_courier courier( num_workers, num_slots );
@@ -460,6 +491,11 @@ int decompress( const int num_workers, const int num_slots,
if( errcode )
{ show_error( "Can't join splitter thread", errcode ); fatal(); }
+ if( verbosity >= 3 && out_size > 0 && in_size > 0 )
+ std::fprintf( stderr, "%6.3f:1, %6.3f bits/byte, %5.2f%% saved. ",
+ (double)out_size / in_size,
+ ( 8.0 * in_size ) / out_size,
+ 100.0 * ( 1.0 - ( (double)in_size / out_size ) ) );
if( verbosity >= 2 )
std::fprintf( stderr, "decompressed size %9lld, size %9lld. ",
out_size, in_size );
diff --git a/doc/plzip.1 b/doc/plzip.1
index 5fe6e3b..ec3fc36 100644
--- a/doc/plzip.1
+++ b/doc/plzip.1
@@ -1,5 +1,5 @@
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.37.1.
-.TH PLZIP "1" "December 2010" "Plzip 0.7" "User Commands"
+.TH PLZIP "1" "January 2012" "Plzip 0.8" "User Commands"
.SH NAME
Plzip \- reduces the size of files
.SH SYNOPSIS
@@ -15,7 +15,7 @@ display this help and exit
\fB\-V\fR, \fB\-\-version\fR
output version information and exit
.TP
-\fB\-B\fR, \fB\-\-data\-size=\fR<n>
+\fB\-B\fR, \fB\-\-data\-size=\fR<bytes>
set input data block size in bytes
.TP
\fB\-c\fR, \fB\-\-stdout\fR
@@ -27,10 +27,13 @@ decompress
\fB\-f\fR, \fB\-\-force\fR
overwrite existing output files
.TP
+\fB\-F\fR, \fB\-\-recompress\fR
+force recompression of compressed files
+.TP
\fB\-k\fR, \fB\-\-keep\fR
keep (don't delete) input files
.TP
-\fB\-m\fR, \fB\-\-match\-length=\fR<n>
+\fB\-m\fR, \fB\-\-match\-length=\fR<bytes>
set match length limit in bytes [36]
.TP
\fB\-n\fR, \fB\-\-threads=\fR<n>
@@ -42,7 +45,7 @@ if reading stdin, place the output into <file>
\fB\-q\fR, \fB\-\-quiet\fR
suppress all messages
.TP
-\fB\-s\fR, \fB\-\-dictionary\-size=\fR<n>
+\fB\-s\fR, \fB\-\-dictionary\-size=\fR<bytes>
set dictionary size limit in bytes [8MiB]
.TP
\fB\-t\fR, \fB\-\-test\fR
@@ -64,6 +67,10 @@ If no file names are given, plzip compresses or decompresses
from standard input to standard output.
Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,
Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...
+The bidimensional parameter space of LZMA can't be mapped to a linear
+scale optimal for all files. If your files are large, very repetitive,
+etc, you may need to use the \fB\-\-match\-length\fR and \fB\-\-dictionary\-size\fR
+options directly to achieve optimal performance.
.SH "REPORTING BUGS"
Report bugs to lzip\-bug@nongnu.org
.br
@@ -71,8 +78,8 @@ Plzip home page: http://www.nongnu.org/lzip/plzip.html
.SH COPYRIGHT
Copyright \(co 2009 Laszlo Ersek.
.br
-Copyright \(co 2010 Antonio Diaz Diaz.
-Using Lzlib 1.1\-rc1
+Copyright \(co 2012 Antonio Diaz Diaz.
+Using Lzlib 1.3\-rc1
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
.br
This is free software: you are free to change and redistribute it.
diff --git a/doc/plzip.info b/doc/plzip.info
index 22f0052..3a12bef 100644
--- a/doc/plzip.info
+++ b/doc/plzip.info
@@ -12,7 +12,7 @@ File: plzip.info, Node: Top, Next: Introduction, Up: (dir)
Plzip Manual
************
-This manual is for Plzip (version 0.7, 3 December 2010).
+This manual is for Plzip (version 0.8, 17 January 2012).
* Menu:
@@ -24,7 +24,7 @@ This manual is for Plzip (version 0.7, 3 December 2010).
* Concept Index:: Index of concepts
- Copyright (C) 2009, 2010 Antonio Diaz Diaz.
+ Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz.
This manual is free documentation: you have unlimited permission to
copy, distribute and modify it.
@@ -39,7 +39,7 @@ Plzip is a massively parallel (multi-threaded), lossless data compressor
based on the lzlib compression library, with very safe integrity
checking and a user interface similar to the one of bzip2, gzip or lzip.
Plzip uses the lzip file format; the files produced by plzip are fully
-compatible with lzip-1.4 or newer.
+compatible with lzip-1.4 or newer, and can be rescued with lziprecover.
Plzip is intended for faster compression/decompression of big files
on multiprocessor machines, which makes it specially well suited for
@@ -106,8 +106,8 @@ The format for running plzip is:
`--version'
Print the version number of plzip on the standard output and exit.
-`-B'
-`--data-size=SIZE'
+`-B BYTES'
+`--data-size=BYTES'
Set the input data block size in bytes. The input file will be
divided in chunks of this size before compression is performed.
Valid values range from 8KiB to 1GiB. Default value is two times
@@ -125,21 +125,27 @@ The format for running plzip is:
`-f'
`--force'
- Force overwrite of output file.
+ Force overwrite of output files.
+
+`-F'
+`--recompress'
+ Force recompression of files whose name already has the `.lz' or
+ `.tlz' suffix.
`-k'
`--keep'
Keep (don't delete) input files during compression or
decompression.
-`-m LENGTH'
-`--match-length=LENGTH'
- Set the match length limit in bytes. Valid values range from 5 to
- 273. Larger values usually give better compression ratios but
- longer compression times.
+`-m BYTES'
+`--match-length=BYTES'
+ Set the match length limit in bytes. After a match this long is
+ found, the search is finished. Valid values range from 5 to 273.
+ Larger values usually give better compression ratios but longer
+ compression times.
-`-n THREADS'
-`--threads=THREADS'
+`-n N'
+`--threads=N'
Set the number of worker threads. Valid values range from 1 to "as
many as your system can support". If this option is not used,
plzip tries to detect the number of processors in the system and
@@ -156,24 +162,35 @@ The format for running plzip is:
`--quiet'
Quiet operation. Suppress all messages.
-`-s SIZE'
-`--dictionary-size=SIZE'
+`-s BYTES'
+`--dictionary-size=BYTES'
Set the dictionary size limit in bytes. Valid values range from
- 4KiB to 512MiB. Note that dictionary sizes are quantized. If the
- specified size does not match one of the valid sizes, it will be
- rounded upwards.
+ 4KiB to 512MiB. Plzip will use the smallest possible dictionary
+ size for each member without exceeding this limit. Note that
+ dictionary sizes are quantized. If the specified size does not
+ match one of the valid sizes, it will be rounded upwards by adding
+ up to (BYTES / 16) to it.
+
+ For maximum compression you should use a dictionary size limit as
+ large as possible, but keep in mind that the decompression memory
+ requirement is affected at compression time by the choice of
+ dictionary size limit.
`-t'
`--test'
Check integrity of the specified file(s), but don't decompress
them. This really performs a trial decompression and throws away
- the result. Use `-tvv' or `-tvvv' to see information about the
- file.
+ the result. Use it together with `-v' to see information about
+ the file.
`-v'
`--verbose'
- Verbose mode. Show the compression ratio for each file processed.
- Further -v's increase the verbosity level.
+ Verbose mode.
+ When compressing, show the compression ratio for each file
+ processed.
+ When decompressing or testing, further -v's (up to 4) increase the
+ verbosity level, showing status, compression ratio, decompressed
+ size, and compressed size.
`-1 .. -9'
Set the compression parameters (dictionary size and match length
@@ -265,13 +282,13 @@ additional information before, between, or after them.
All multibyte values are stored in little endian order.
`ID string'
- A four byte string, identifying the member type, with the value
+ A four byte string, identifying the lzip format, with the value
"LZIP".
`VN (version number, 1 byte)'
Just in case something needs to be modified in the future. Valid
- values are 0 and 1. Version 0 files have only one member and lack
- `Member size'.
+ values are 0 and 1. Version 0 files are deprecated. They can
+ contain only one member and lack the `Member size' field.
`DS (coded dictionary size, 1 byte)'
Bits 4-0 contain the base 2 logarithm of the base dictionary size.
@@ -292,7 +309,7 @@ additional information before, between, or after them.
`Member size (8 bytes)'
Total size of the member, including header and trailer. This
- facilitates safe recovery of undamaged members from multimember
+ facilitates safe recovery of undamaged members from multi-member
files.
@@ -334,11 +351,16 @@ Concept Index

Tag Table:
Node: Top223
-Node: Introduction833
-Node: Invoking Plzip3592
-Node: Program Design7840
-Node: File Format8502
-Node: Problems10458
-Node: Concept Index10987
+Node: Introduction845
+Node: Invoking Plzip3641
+Node: Program Design8597
+Node: File Format9259
+Node: Problems11254
+Node: Concept Index11783

End Tag Table
+
+
+Local Variables:
+coding: iso-8859-15
+End:
diff --git a/doc/plzip.texinfo b/doc/plzip.texinfo
index 517dc11..c83d5a5 100644
--- a/doc/plzip.texinfo
+++ b/doc/plzip.texinfo
@@ -1,12 +1,13 @@
\input texinfo @c -*-texinfo-*-
@c %**start of header
@setfilename plzip.info
+@documentencoding ISO-8859-15
@settitle Plzip Manual
@finalout
@c %**end of header
-@set UPDATED 3 December 2010
-@set VERSION 0.7
+@set UPDATED 17 January 2012
+@set VERSION 0.8
@dircategory Data Compression
@direntry
@@ -43,7 +44,7 @@ This manual is for Plzip (version @value{VERSION}, @value{UPDATED}).
@end menu
@sp 1
-Copyright @copyright{} 2009, 2010 Antonio Diaz Diaz.
+Copyright @copyright{} 2009, 2010, 2011, 2012 Antonio Diaz Diaz.
This manual is free documentation: you have unlimited permission
to copy, distribute and modify it.
@@ -57,7 +58,7 @@ Plzip is a massively parallel (multi-threaded), lossless data compressor
based on the lzlib compression library, with very safe integrity
checking and a user interface similar to the one of bzip2, gzip or lzip.
Plzip uses the lzip file format; the files produced by plzip are fully
-compatible with lzip-1.4 or newer.
+compatible with lzip-1.4 or newer, and can be rescued with lziprecover.
Plzip is intended for faster compression/decompression of big files on
multiprocessor machines, which makes it specially well suited for
@@ -131,8 +132,8 @@ Print an informative help message describing the options and exit.
@itemx --version
Print the version number of plzip on the standard output and exit.
-@item -B
-@itemx --data-size=@var{size}
+@item -B @var{bytes}
+@itemx --data-size=@var{bytes}
Set the input data block size in bytes. The input file will be divided
in chunks of this size before compression is performed. Valid values
range from 8KiB to 1GiB. Default value is two times the dictionary size.
@@ -150,20 +151,25 @@ Decompress.
@item -f
@itemx --force
-Force overwrite of output file.
+Force overwrite of output files.
+
+@item -F
+@itemx --recompress
+Force recompression of files whose name already has the @samp{.lz} or
+@samp{.tlz} suffix.
@item -k
@itemx --keep
Keep (don't delete) input files during compression or decompression.
-@item -m @var{length}
-@itemx --match-length=@var{length}
-Set the match length limit in bytes. Valid values range from 5 to 273.
-Larger values usually give better compression ratios but longer
-compression times.
+@item -m @var{bytes}
+@itemx --match-length=@var{bytes}
+Set the match length limit in bytes. After a match this long is found,
+the search is finished. Valid values range from 5 to 273. Larger values
+usually give better compression ratios but longer compression times.
-@item -n @var{threads}
-@itemx --threads=@var{threads}
+@item -n @var{n}
+@itemx --threads=@var{n}
Set the number of worker threads. Valid values range from 1 to "as many
as your system can support". If this option is not used, plzip tries to
detect the number of processors in the system and use it as default
@@ -180,22 +186,31 @@ and a file named @samp{@var{file}.lz} when compressing.
@itemx --quiet
Quiet operation. Suppress all messages.
-@item -s @var{size}
-@itemx --dictionary-size=@var{size}
+@item -s @var{bytes}
+@itemx --dictionary-size=@var{bytes}
Set the dictionary size limit in bytes. Valid values range from 4KiB to
-512MiB. Note that dictionary sizes are quantized. If the specified size
-does not match one of the valid sizes, it will be rounded upwards.
+512MiB. Plzip will use the smallest possible dictionary size for each
+member without exceeding this limit. Note that dictionary sizes are
+quantized. If the specified size does not match one of the valid sizes,
+it will be rounded upwards by adding up to (@var{bytes} / 16) to it.
+
+For maximum compression you should use a dictionary size limit as large
+as possible, but keep in mind that the decompression memory requirement
+is affected at compression time by the choice of dictionary size limit.
@item -t
@itemx --test
Check integrity of the specified file(s), but don't decompress them.
This really performs a trial decompression and throws away the result.
-Use @samp{-tvv} or @samp{-tvvv} to see information about the file.
+Use it together with @samp{-v} to see information about the file.
@item -v
@itemx --verbose
-Verbose mode. Show the compression ratio for each file processed.
-Further -v's increase the verbosity level.
+Verbose mode.@*
+When compressing, show the compression ratio for each file processed.@*
+When decompressing or testing, further -v's (up to 4) increase the
+verbosity level, showing status, compression ratio, decompressed size,
+and compressed size.
@item -1 .. -9
Set the compression parameters (dictionary size and match length limit)
@@ -297,12 +312,12 @@ All multibyte values are stored in little endian order.
@table @samp
@item ID string
-A four byte string, identifying the member type, with the value "LZIP".
+A four byte string, identifying the lzip format, with the value "LZIP".
@item VN (version number, 1 byte)
Just in case something needs to be modified in the future. Valid values
-are 0 and 1. Version 0 files have only one member and lack @samp{Member
-size}.
+are 0 and 1. Version 0 files are deprecated. They can contain only one
+member and lack the @samp{Member size} field.
@item DS (coded dictionary size, 1 byte)
Bits 4-0 contain the base 2 logarithm of the base dictionary size.@*
@@ -323,7 +338,7 @@ Size of the uncompressed original data.
@item Member size (8 bytes)
Total size of the member, including header and trailer. This facilitates
-safe recovery of undamaged members from multimember files.
+safe recovery of undamaged members from multi-member files.
@end table
diff --git a/main.cc b/main.cc
index f8fdb93..d96fdb7 100644
--- a/main.cc
+++ b/main.cc
@@ -1,6 +1,6 @@
/* Plzip - A parallel compressor compatible with lzip
Copyright (C) 2009 Laszlo Ersek.
- Copyright (C) 2009, 2010 Antonio Diaz Diaz.
+ Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -63,7 +63,7 @@ namespace {
const char * const Program_name = "Plzip";
const char * const program_name = "plzip";
-const char * const program_year = "2010";
+const char * const program_year = "2012";
const char * invocation_name = 0;
#ifdef O_BINARY
@@ -97,50 +97,55 @@ void show_help() throw()
{
std::printf( "%s - A parallel compressor compatible with lzip.\n", Program_name );
std::printf( "\nUsage: %s [options] [files]\n", invocation_name );
- std::printf( "\nOptions:\n" );
- std::printf( " -h, --help display this help and exit\n" );
- std::printf( " -V, --version output version information and exit\n" );
- std::printf( " -B, --data-size=<n> set input data block size in bytes\n" );
- std::printf( " -c, --stdout send output to standard output\n" );
- std::printf( " -d, --decompress decompress\n" );
- std::printf( " -f, --force overwrite existing output files\n" );
- std::printf( " -k, --keep keep (don't delete) input files\n" );
- std::printf( " -m, --match-length=<n> set match length limit in bytes [36]\n" );
- std::printf( " -n, --threads=<n> set the number of (de)compression threads\n" );
- std::printf( " -o, --output=<file> if reading stdin, place the output into <file>\n" );
- std::printf( " -q, --quiet suppress all messages\n" );
- std::printf( " -s, --dictionary-size=<n> set dictionary size limit in bytes [8MiB]\n" );
- std::printf( " -t, --test test compressed file integrity\n" );
- std::printf( " -v, --verbose be verbose (a 2nd -v gives more)\n" );
- std::printf( " -1 .. -9 set compression level [default 6]\n" );
- std::printf( " --fast alias for -1\n" );
- std::printf( " --best alias for -9\n" );
+ std::printf( "\nOptions:\n"
+ " -h, --help display this help and exit\n"
+ " -V, --version output version information and exit\n"
+ " -B, --data-size=<bytes> set input data block size in bytes\n"
+ " -c, --stdout send output to standard output\n"
+ " -d, --decompress decompress\n"
+ " -f, --force overwrite existing output files\n"
+ " -F, --recompress force recompression of compressed files\n"
+ " -k, --keep keep (don't delete) input files\n"
+ " -m, --match-length=<bytes> set match length limit in bytes [36]\n"
+ " -n, --threads=<n> set the number of (de)compression threads\n"
+ " -o, --output=<file> if reading stdin, place the output into <file>\n"
+ " -q, --quiet suppress all messages\n"
+ " -s, --dictionary-size=<bytes> set dictionary size limit in bytes [8MiB]\n"
+ " -t, --test test compressed file integrity\n"
+ " -v, --verbose be verbose (a 2nd -v gives more)\n"
+ " -1 .. -9 set compression level [default 6]\n"
+ " --fast alias for -1\n"
+ " --best alias for -9\n" );
if( verbosity > 0 )
{
std::printf( " -D, --debug=<level> (0-1) print debug statistics to stderr\n" );
}
- std::printf( "If no file names are given, %s compresses or decompresses\n", program_name );
- std::printf( "from standard input to standard output.\n" );
- std::printf( "Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n" );
- std::printf( "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n" );
- std::printf( "\nReport bugs to lzip-bug@nongnu.org\n" );
- std::printf( "Plzip home page: http://www.nongnu.org/lzip/plzip.html\n" );
+ std::printf( "If no file names are given, plzip compresses or decompresses\n"
+ "from standard input to standard output.\n"
+ "Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n"
+ "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n"
+ "The bidimensional parameter space of LZMA can't be mapped to a linear\n"
+ "scale optimal for all files. If your files are large, very repetitive,\n"
+ "etc, you may need to use the --match-length and --dictionary-size\n"
+ "options directly to achieve optimal performance.\n"
+ "\nReport bugs to lzip-bug@nongnu.org\n"
+ "Plzip home page: http://www.nongnu.org/lzip/plzip.html\n" );
}
void show_version() throw()
{
std::printf( "%s %s\n", Program_name, PROGVERSION );
- std::printf( "Copyright (C) 2009 Laszlo Ersek.\n" );
- std::printf( "Copyright (C) %s Antonio Diaz Diaz.\n", program_year );
+ std::printf( "Copyright (C) 2009 Laszlo Ersek.\n"
+ "Copyright (C) %s Antonio Diaz Diaz.\n", program_year );
std::printf( "Using Lzlib %s\n", LZ_version() );
- std::printf( "License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>\n" );
- std::printf( "This is free software: you are free to change and redistribute it.\n" );
- std::printf( "There is NO WARRANTY, to the extent permitted by law.\n" );
+ std::printf( "License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>\n"
+ "This is free software: you are free to change and redistribute it.\n"
+ "There is NO WARRANTY, to the extent permitted by law.\n" );
}
-long long getnum( const char * const ptr, const int bs = 0,
+long long getnum( const char * const ptr,
const long long llimit = LLONG_MIN + 1,
const long long ulimit = LLONG_MAX ) throw()
{
@@ -161,9 +166,6 @@ long long getnum( const char * const ptr, const int bs = 0,
switch( tail[0] )
{
case ' ': break;
- case 'b': if( bs > 0 ) { factor = bs; exponent = 1; }
- else bad_multiplier = true;
- break;
case 'Y': exponent = 8; break;
case 'Z': exponent = 7; break;
case 'E': exponent = 6; break;
@@ -205,7 +207,7 @@ int get_dict_size( const char * const arg ) throw()
if( bits >= LZ_min_dictionary_bits() &&
bits <= LZ_max_dictionary_bits() && *tail == 0 )
return ( 1 << bits );
- return getnum( arg, 0, LZ_min_dictionary_size(), LZ_max_dictionary_size() );
+ return getnum( arg, LZ_min_dictionary_size(), LZ_max_dictionary_size() );
}
@@ -224,13 +226,13 @@ int extension_index( const std::string & name ) throw()
int open_instream( const std::string & name, struct stat * const in_statsp,
const Mode program_mode, const int eindex,
- const bool force, const bool to_stdout ) throw()
+ const bool recompress, const bool to_stdout ) throw()
{
int infd = -1;
- if( program_mode == m_compress && !force && eindex >= 0 )
+ if( program_mode == m_compress && !recompress && eindex >= 0 )
{
if( verbosity >= 0 )
- std::fprintf( stderr, "%s: Input file `%s' already has `%s' suffix.\n",
+ std::fprintf( stderr, "%s: Input file '%s' already has '%s' suffix.\n",
program_name, name.c_str(),
known_extensions[eindex].from );
}
@@ -240,7 +242,7 @@ int open_instream( const std::string & name, struct stat * const in_statsp,
if( infd < 0 )
{
if( verbosity >= 0 )
- std::fprintf( stderr, "%s: Can't open input file `%s': %s.\n",
+ std::fprintf( stderr, "%s: Can't open input file '%s': %s.\n",
program_name, name.c_str(), std::strerror( errno ) );
}
else
@@ -253,10 +255,10 @@ int open_instream( const std::string & name, struct stat * const in_statsp,
if( i != 0 || ( !S_ISREG( mode ) && ( !to_stdout || !can_read ) ) )
{
if( verbosity >= 0 )
- std::fprintf( stderr, "%s: Input file `%s' is not a regular file%s.\n",
+ std::fprintf( stderr, "%s: Input file '%s' is not a regular file%s.\n",
program_name, name.c_str(),
( can_read && !to_stdout ) ?
- " and `--stdout' was not specified" : "" );
+ " and '--stdout' was not specified" : "" );
close( infd );
infd = -1;
}
@@ -286,8 +288,8 @@ void set_d_outname( const std::string & name, const int i ) throw()
}
}
output_filename = name; output_filename += ".out";
- if( verbosity >= 0 )
- std::fprintf( stderr, "%s: Can't guess original name for `%s' -- using `%s'.\n",
+ if( verbosity >= 1 )
+ std::fprintf( stderr, "%s: Can't guess original name for '%s' -- using '%s'.\n",
program_name, name.c_str(), output_filename.c_str() );
}
@@ -301,10 +303,10 @@ bool open_outstream( const bool force ) throw()
if( outfd < 0 && verbosity >= 0 )
{
if( errno == EEXIST )
- std::fprintf( stderr, "%s: Output file `%s' already exists, skipping.\n",
+ std::fprintf( stderr, "%s: Output file '%s' already exists, skipping.\n",
program_name, output_filename.c_str() );
else
- std::fprintf( stderr, "%s: Can't create output file `%s': %s.\n",
+ std::fprintf( stderr, "%s: Can't create output file '%s': %s.\n",
program_name, output_filename.c_str(), std::strerror( errno ) );
}
return ( outfd >= 0 );
@@ -334,10 +336,10 @@ void cleanup_and_fail( const int retval ) throw()
{
delete_output_on_interrupt = false;
if( verbosity >= 0 )
- std::fprintf( stderr, "%s: Deleting output file `%s', if it exists.\n",
- program_name, output_filename.c_str() );
+ std::fprintf( stderr, "%s: Deleting output file '%s', if it exists.\n",
+ program_name, output_filename.c_str() );
if( outfd >= 0 ) { close( outfd ); outfd = -1; }
- if( std::remove( output_filename.c_str() ) != 0 )
+ if( std::remove( output_filename.c_str() ) != 0 && errno != ENOENT )
show_error( "WARNING: deletion of output file (apparently) failed." );
}
std::exit( retval );
@@ -347,30 +349,26 @@ void cleanup_and_fail( const int retval ) throw()
// Set permissions, owner and times.
void close_and_set_permissions( const struct stat * const in_statsp )
{
- bool error = false;
+ bool warning = false;
if( in_statsp )
{
- if( fchmod( outfd, in_statsp->st_mode ) != 0 ||
- ( fchown( outfd, in_statsp->st_uid, in_statsp->st_gid ) != 0 &&
- errno != EPERM ) ) error = true;
// fchown will in many cases return with EPERM, which can be safely ignored.
+ if( ( fchown( outfd, in_statsp->st_uid, in_statsp->st_gid ) != 0 &&
+ errno != EPERM ) ||
+ fchmod( outfd, in_statsp->st_mode ) != 0 ) warning = true;
}
- if( close( outfd ) == 0 ) outfd = -1;
- else cleanup_and_fail( 1 );
+ if( close( outfd ) != 0 ) cleanup_and_fail( 1 );
+ outfd = -1;
delete_output_on_interrupt = false;
- if( !in_statsp ) return;
- if( !error )
+ if( in_statsp )
{
struct utimbuf t;
t.actime = in_statsp->st_atime;
t.modtime = in_statsp->st_mtime;
- if( utime( output_filename.c_str(), &t ) != 0 ) error = true;
+ if( utime( output_filename.c_str(), &t ) != 0 ) warning = true;
}
- if( error )
- {
+ if( warning && verbosity >= 1 )
show_error( "Can't change output file attributes." );
- cleanup_and_fail( 1 );
- }
}
@@ -397,8 +395,9 @@ void set_signals() throw()
int verbosity = 0;
-// This can be called from any thread, main thread or sub-threads alike, since
-// they all call common helper functions that call fatal() in case of an error.
+// This can be called from any thread, main thread or sub-threads alike,
+// since they all call common helper functions that call fatal() in case
+// of an error.
//
void fatal() { signal_handler( SIGUSR1 ); }
@@ -432,13 +431,13 @@ void show_error( const char * const msg, const int errcode, const bool help ) th
std::fprintf( stderr, "\n" );
}
if( help && invocation_name && invocation_name[0] )
- std::fprintf( stderr, "Try `%s --help' for more information.\n",
+ std::fprintf( stderr, "Try '%s --help' for more information.\n",
invocation_name );
}
}
-void internal_error( const char * const msg )
+void internal_error( const char * const msg ) throw()
{
if( verbosity >= 0 )
std::fprintf( stderr, "%s: internal error: %s.\n", program_name, msg );
@@ -477,7 +476,7 @@ int writeblock( const int fd, const uint8_t * const buf, const int size ) throw(
errno = 0;
const int n = write( fd, buf + size - rest, rest );
if( n > 0 ) rest -= n;
- else if( errno && errno != EINTR && errno != EAGAIN ) break;
+ else if( n < 0 && errno != EINTR && errno != EAGAIN ) break;
}
return ( rest > 0 ) ? size - rest : size;
}
@@ -507,6 +506,7 @@ int main( const int argc, const char * const argv[] )
Mode program_mode = m_compress;
bool force = false;
bool keep_input_files = false;
+ bool recompress = false;
bool to_stdout = false;
std::string input_filename;
std::string default_output_filename;
@@ -518,11 +518,8 @@ int main( const int argc, const char * const argv[] )
if( LZ_version()[0] != LZ_version_string[0] )
internal_error( "bad library version" );
- const int slots_per_worker = 2;
long max_workers = sysconf( _SC_THREAD_THREADS_MAX );
- if( max_workers < 1 || max_workers > INT_MAX / slots_per_worker )
- max_workers = INT_MAX / slots_per_worker;
- if( max_workers > INT_MAX / (int)sizeof (pthread_t) )
+ if( max_workers < 1 || max_workers > INT_MAX / (int)sizeof (pthread_t) )
max_workers = INT_MAX / sizeof (pthread_t);
const Arg_parser::Option options[] =
@@ -541,9 +538,9 @@ int main( const int argc, const char * const argv[] )
{ 'B', "data-size", Arg_parser::yes },
{ 'c', "stdout", Arg_parser::no },
{ 'd', "decompress", Arg_parser::no },
- { 'e', "extreme", Arg_parser::no },
{ 'D', "debug", Arg_parser::yes },
{ 'f', "force", Arg_parser::no },
+ { 'F', "recompress", Arg_parser::no },
{ 'h', "help", Arg_parser::no },
{ 'k', "keep", Arg_parser::no },
{ 'm', "match-length", Arg_parser::yes },
@@ -573,20 +570,20 @@ int main( const int argc, const char * const argv[] )
case '5': case '6': case '7': case '8': case '9':
encoder_options = option_mapping[code-'0']; break;
case 'b': break;
- case 'B': data_size = getnum( arg, 0, 2 * LZ_min_dictionary_size(),
+ case 'B': data_size = getnum( arg, 2 * LZ_min_dictionary_size(),
2 * LZ_max_dictionary_size() ); break;
case 'c': to_stdout = true; break;
case 'd': program_mode = m_decompress; break;
- case 'D': debug_level = getnum( arg, 0, 0, 3 ); break;
- case 'e': break; // ignored by now
+ case 'D': debug_level = getnum( arg, 0, 3 ); break;
case 'f': force = true; break;
+ case 'F': recompress = true; break;
case 'h': show_help(); return 0;
case 'k': keep_input_files = true; break;
case 'm': encoder_options.match_len_limit =
- getnum( arg, 0, LZ_min_match_len_limit(),
- LZ_max_match_len_limit() ); break;
+ getnum( arg, LZ_min_match_len_limit(),
+ LZ_max_match_len_limit() ); break;
case 'o': default_output_filename = arg; break;
- case 'n': num_workers = getnum( arg, 0, 1, max_workers ); break;
+ case 'n': num_workers = getnum( arg, 1, max_workers ); break;
case 'q': verbosity = -1; break;
case 's': encoder_options.dictionary_size = get_dict_size( arg );
break;
@@ -598,6 +595,14 @@ int main( const int argc, const char * const argv[] )
}
} // end process options
+#if defined(__OS2__)
+ _fsetmode( stdin, "b" );
+ _fsetmode( stdout, "b" );
+#endif
+
+ if( program_mode == m_test )
+ outfd = -1;
+
if( data_size <= 0 )
data_size = 2 * std::max( 65536, encoder_options.dictionary_size );
else if( data_size < encoder_options.dictionary_size )
@@ -609,7 +614,6 @@ int main( const int argc, const char * const argv[] )
if( num_online <= 0 ) num_online = 1;
num_workers = std::min( num_online, max_workers );
}
- const int num_slots = num_workers * slots_per_worker;
bool filenames_given = false;
for( ; argind < parser.arguments(); ++argind )
@@ -625,8 +629,6 @@ int main( const int argc, const char * const argv[] )
std::signal( SIGUSR1, signal_handler );
Pretty_print pp( filenames );
- if( program_mode == m_test )
- outfd = -1;
int retval = 0;
for( unsigned int i = 0; i < filenames.size(); ++i )
@@ -662,7 +664,7 @@ int main( const int argc, const char * const argv[] )
input_filename = filenames[i];
const int eindex = extension_index( input_filename );
infd = open_instream( input_filename, &in_stats, program_mode,
- eindex, force, to_stdout );
+ eindex, recompress, to_stdout );
if( infd < 0 ) { if( retval < 1 ) retval = 1; continue; }
if( program_mode != m_test )
{
@@ -693,11 +695,11 @@ int main( const int argc, const char * const argv[] )
int tmp = 0;
if( program_mode == m_compress )
tmp = compress( data_size, encoder_options.dictionary_size,
- encoder_options.match_len_limit, num_workers,
- num_slots, infd, outfd, pp, debug_level );
+ encoder_options.match_len_limit,
+ num_workers, infd, outfd, pp, debug_level );
else
- tmp = decompress( num_workers, num_slots, infd, outfd, pp,
- debug_level, program_mode == m_test );
+ tmp = decompress( num_workers, infd, outfd, pp, debug_level,
+ program_mode == m_test );
if( tmp > retval ) retval = tmp;
if( tmp && program_mode != m_test ) cleanup_and_fail( retval );
diff --git a/plzip.h b/plzip.h
index 572e814..b12bbbd 100644
--- a/plzip.h
+++ b/plzip.h
@@ -1,6 +1,6 @@
/* Plzip - A parallel compressor compatible with lzip
Copyright (C) 2009 Laszlo Ersek.
- Copyright (C) 2009, 2010 Antonio Diaz Diaz.
+ Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -24,7 +24,7 @@ class Pretty_print
mutable bool first_post;
public:
- Pretty_print( const std::vector< std::string > & filenames )
+ explicit Pretty_print( const std::vector< std::string > & filenames )
: stdin_name( "(stdin)" ), longest_name( 0 ), first_post( false )
{
const unsigned int stdin_name_len = std::strlen( stdin_name );
@@ -52,13 +52,15 @@ public:
/*--------------------- Defined in compress.cc ---------------------*/
-void xinit( pthread_cond_t * cond, pthread_mutex_t * mutex );
-void xdestroy( pthread_cond_t * cond, pthread_mutex_t * mutex );
-void xlock( pthread_mutex_t * mutex );
-void xunlock( pthread_mutex_t * mutex );
-void xwait( pthread_cond_t * cond, pthread_mutex_t * mutex );
-void xsignal( pthread_cond_t * cond );
-void xbroadcast( pthread_cond_t * cond );
+void xinit( pthread_mutex_t * const mutex );
+void xinit( pthread_cond_t * const cond );
+void xdestroy( pthread_mutex_t * const mutex );
+void xdestroy( pthread_cond_t * const cond );
+void xlock( pthread_mutex_t * const mutex );
+void xunlock( pthread_mutex_t * const mutex );
+void xwait( pthread_cond_t * const cond, pthread_mutex_t * const mutex );
+void xsignal( pthread_cond_t * const cond );
+void xbroadcast( pthread_cond_t * const cond );
class Slot_tally
@@ -72,13 +74,16 @@ private:
pthread_mutex_t mutex;
pthread_cond_t slot_av; // free slot available
+ Slot_tally( const Slot_tally & ); // declared as private
+ void operator=( const Slot_tally & ); // declared as private
+
public:
- Slot_tally( const int slots )
+ explicit Slot_tally( const int slots )
: check_counter( 0 ), wait_counter( 0 ),
num_slots( slots ), num_free( slots )
- { xinit( &slot_av, &mutex ); }
+ { xinit( &mutex ); xinit( &slot_av ); }
- ~Slot_tally() { xdestroy( &slot_av, &mutex ); }
+ ~Slot_tally() { xdestroy( &slot_av ); xdestroy( &mutex ); }
bool all_free() { return ( num_free == num_slots ); }
@@ -103,15 +108,15 @@ public:
int compress( const int data_size, const int dictionary_size,
const int match_len_limit, const int num_workers,
- const int num_slots, const int infd, const int outfd,
+ const int infd, const int outfd,
const Pretty_print & pp, const int debug_level );
/*-------------------- Defined in decompress.cc --------------------*/
-int decompress( const int num_workers, const int num_slots,
- const int infd, const int outfd, const Pretty_print & pp,
- const int debug_level, const bool testing );
+int decompress( const int num_workers, const int infd, const int outfd,
+ const Pretty_print & pp, const int debug_level,
+ const bool testing );
/*----------------------- Defined in main.cc -----------------------*/
@@ -121,6 +126,6 @@ extern int verbosity;
void fatal(); // terminate the program
void show_error( const char * const msg, const int errcode = 0, const bool help = false ) throw();
-void internal_error( const char * const msg );
+void internal_error( const char * const msg ) throw();
int readblock( const int fd, uint8_t * const buf, const int size ) throw();
int writeblock( const int fd, const uint8_t * const buf, const int size ) throw();
diff --git a/testsuite/check.sh b/testsuite/check.sh
index c640b3b..d4f919a 100755
--- a/testsuite/check.sh
+++ b/testsuite/check.sh
@@ -19,13 +19,14 @@ fi
if [ -d tmp ] ; then rm -rf tmp ; fi
mkdir tmp
-printf "testing plzip-%s..." "$2"
cd "${objdir}"/tmp
cat "${testdir}"/test.txt > in || framework_failure
cat in in in in > in4 || framework_failure
fail=0
+printf "testing plzip-%s..." "$2"
+
"${LZIP}" -t "${testdir}"/test_v0.lz || fail=1
printf .
"${LZIP}" -cd "${testdir}"/test_v0.lz > copy || fail=1
@@ -69,8 +70,8 @@ for i in s4Ki 0 1 2 3 4 5 6 7 8 9 ; do
printf .
done
-"${LZIP}" -$i < in > anyothername || fail=1
-"${LZIP}" -dq anyothername || fail=1
+"${LZIP}" < in > anyothername || fail=1
+"${LZIP}" -d anyothername || fail=1
cmp in anyothername.out || fail=1
printf .