diff options
-rw-r--r-- | ChangeLog | 5 | ||||
-rw-r--r-- | INSTALL | 7 | ||||
-rw-r--r-- | NEWS | 2 | ||||
-rw-r--r-- | README | 30 | ||||
-rwxr-xr-x | configure | 38 | ||||
-rw-r--r-- | doc/plzip.1 | 9 | ||||
-rw-r--r-- | doc/plzip.info | 76 | ||||
-rw-r--r-- | doc/plzip.texinfo | 63 | ||||
-rw-r--r-- | main.cc | 23 | ||||
-rwxr-xr-x | testsuite/check.sh | 18 |
10 files changed, 162 insertions, 109 deletions
@@ -1,11 +1,12 @@ -2013-03-08 Antonio Diaz Diaz <ant_diaz@teleline.es> +2013-05-29 Antonio Diaz Diaz <antonio@gnu.org> - * Version 1.0-rc1 released. + * Version 1.0 released. * compress.cc: 'deliver_packet' changed to 'deliver_packets'. * Scalability of decompression from/to regular files has been increased by removing splitter and muxer when not needed. * The number of worker threads is now limited to the number of members when decompressing from a regular file. + * configure: Options now accept a separate argument. * Makefile.in: Added new target 'install-as-lzip'. * Makefile.in: Added new target 'install-bin'. * main.cc: Use 'setmode' instead of '_setmode' on Windows and OS/2. @@ -1,7 +1,7 @@ Requirements ------------ You will need a C++ compiler and the lzlib compression library installed. -I use gcc 4.7.2 and 3.3.6, but the code should compile with any +I use gcc 4.8.0 and 3.3.6, but the code should compile with any standards compliant compiler. Lzlib must be version 1.0 or newer. Gcc is available at http://gcc.gnu.org. @@ -38,8 +38,9 @@ the main archive. typing 'make install-bin', 'make install-info' or 'make install-man' respectively. -5a. Type 'make install-as-lzip' to install the program and any data - files and documentation, and link the program to the name 'lzip'. + Instead of 'make install', you can type 'make install-as-lzip' to + install the program and any data files and documentation, and link + the program to the name 'lzip'. Another way @@ -8,6 +8,8 @@ Scalability when decompressing from/to regular files has been increased. The number of worker threads is now limited to the number of members in the input file when decompressing from a regular file. +"configure" now accepts options with a separate argument. + The target "install-as-lzip" has been added to the Makefile. The target "install-bin" has been added to the Makefile. @@ -3,13 +3,18 @@ Description Plzip is a massively parallel (multi-threaded), lossless data compressor based on the lzlib compression library, with very safe integrity checking and a user interface similar to the one of bzip2, gzip or lzip. -Plzip uses the lzip file format; the files produced by plzip are fully -compatible with lzip-1.4 or newer, and can be rescued with lziprecover. Plzip is intended for faster compression/decompression of big files on multiprocessor machines, which makes it specially well suited for distribution of big software files and large scale data archiving. On -files big enough, plzip can use hundreds of processors. +files big enough (several GB), plzip can use hundreds of processors. + +Plzip uses the lzip file format; the files produced by plzip are fully +compatible with lzip-1.4 or newer, and can be rescued with lziprecover. + +Plzip uses the same well-defined exit status values used by lzip and +bzip2, which makes it safer when used in pipes or scripts than +compressors returning ambiguous warning values, like gzip. Plzip replaces every file given in the command line with a compressed version of itself, with the name "original_name.lz". Each compressed @@ -29,15 +34,16 @@ corresponding uncompressed files. Integrity testing of concatenated compressed files is also supported. As a self-check for your protection, plzip stores in the member trailer -the 32-bit CRC of the original data and the size of the original data, -to make sure that the decompressed version of the data is identical to -the original. This guards against corruption of the compressed data, and -against undetected bugs in plzip (hopefully very unlikely). The chances -of data corruption going undetected are microscopic, less than one -chance in 4000 million for each member processed. Be aware, though, that -the check occurs upon decompression, so it can only tell you that -something is wrong. It can't help you recover the original uncompressed -data. +the 32-bit CRC of the original data, the size of the original data and +the size of the member. These values, together with the value remaining +in the range decoder and the end-of-stream marker, provide a very safe 4 +factor integrity checking which guarantees that the decompressed version +of the data is identical to the original. This guards against corruption +of the compressed data, and against undetected bugs in plzip (hopefully +very unlikely). The chances of data corruption going undetected are +microscopic. Be aware, though, that the check occurs upon decompression, +so it can only tell you that something is wrong. It can't help you +recover the original uncompressed data. Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. @@ -5,10 +5,8 @@ # This configure script is free software: you have unlimited permission # to copy, distribute and modify it. -args= -no_create= pkgname=plzip -pkgversion=1.0-rc1 +pkgversion=1.0 progname=plzip srctrigger=doc/plzip.texinfo @@ -28,18 +26,19 @@ CXXFLAGS='-Wall -W -O2' LDFLAGS= # checking whether we are using GNU C++. -if [ ! -x /bin/g++ ] && - [ ! -x /usr/bin/g++ ] && - [ ! -x /usr/local/bin/g++ ] ; then +${CXX} --version > /dev/null 2>&1 +if [ $? != 0 ] ; then CXX=c++ CXXFLAGS='-W -O2' fi # Loop over all args -while [ -n "$1" ] ; do +args= +no_create= +while [ $# != 0 ] ; do # Get the first arg, and shuffle - option=$1 + option=$1 ; arg2=no shift # Add the argument quoted to args @@ -74,6 +73,14 @@ while [ -n "$1" ] ; do --version | -V) echo "Configure script for ${pkgname} version ${pkgversion}" exit 0 ;; + --srcdir) srcdir=$1 ; arg2=yes ;; + --prefix) prefix=$1 ; arg2=yes ;; + --exec-prefix) exec_prefix=$1 ; arg2=yes ;; + --bindir) bindir=$1 ; arg2=yes ;; + --datarootdir) datarootdir=$1 ; arg2=yes ;; + --infodir) infodir=$1 ; arg2=yes ;; + --mandir) mandir=$1 ; arg2=yes ;; + --srcdir=*) srcdir=${optarg} ;; --prefix=*) prefix=${optarg} ;; --exec-prefix=*) exec_prefix=${optarg} ;; @@ -88,11 +95,22 @@ while [ -n "$1" ] ; do CXXFLAGS=*) CXXFLAGS=${optarg} ;; LDFLAGS=*) LDFLAGS=${optarg} ;; - --* | *=* | *-*-*) ;; + --*) + echo "configure: WARNING: unrecognized option: '${option}'" 1>&2 ;; + *=* | *-*-*) ;; *) - echo "configure: Unrecognized option: \"${option}\"; use --help for usage." 1>&2 + echo "configure: unrecognized option: '${option}'" 1>&2 + echo "Try 'configure --help' for more information." exit 1 ;; esac + + # Check if the option took a separate argument + if [ "${arg2}" = yes ] ; then + if [ $# != 0 ] ; then args="${args} \"$1\"" ; shift + else echo "configure: Missing argument to \"${option}\"" 1>&2 + exit 1 + fi + fi done # Find the source files, if location was not specified. diff --git a/doc/plzip.1 b/doc/plzip.1 index 2b1261b..5b91105 100644 --- a/doc/plzip.1 +++ b/doc/plzip.1 @@ -1,5 +1,5 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.37.1. -.TH PLZIP "1" "March 2013" "Plzip 1.0-rc1" "User Commands" +.TH PLZIP "1" "May 2013" "Plzip 1.0" "User Commands" .SH NAME Plzip \- reduces the size of files .SH SYNOPSIS @@ -71,6 +71,11 @@ The bidimensional parameter space of LZMA can't be mapped to a linear scale optimal for all files. If your files are large, very repetitive, etc, you may need to use the \fB\-\-match\-length\fR and \fB\-\-dictionary\-size\fR options directly to achieve optimal performance. +.PP +Exit status: 0 for a normal exit, 1 for environmental problems (file +not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or +invalid input file, 3 for an internal consistency error (eg, bug) which +caused plzip to panic. .SH "REPORTING BUGS" Report bugs to lzip\-bug@nongnu.org .br @@ -79,7 +84,7 @@ Plzip home page: http://www.nongnu.org/lzip/plzip.html Copyright \(co 2009 Laszlo Ersek. .br Copyright \(co 2013 Antonio Diaz Diaz. -Using Lzlib 1.4\-rc2 +Using Lzlib 1.4 License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html> .br This is free software: you are free to change and redistribute it. diff --git a/doc/plzip.info b/doc/plzip.info index bf22e32..2070f4f 100644 --- a/doc/plzip.info +++ b/doc/plzip.info @@ -12,16 +12,16 @@ File: plzip.info, Node: Top, Next: Introduction, Up: (dir) Plzip Manual ************ -This manual is for Plzip (version 1.0-rc1, 8 March 2013). +This manual is for Plzip (version 1.0, 29 May 2013). * Menu: -* Introduction:: Purpose and features of plzip -* Program Design:: Internal structure of plzip -* Invoking Plzip:: Command line interface -* File Format:: Detailed format of the compressed file -* Problems:: Reporting bugs -* Concept Index:: Index of concepts +* Introduction:: Purpose and features of plzip +* Program Design:: Internal structure of plzip +* Invoking Plzip:: Command line interface +* File Format:: Detailed format of the compressed file +* Problems:: Reporting bugs +* Concept Index:: Index of concepts Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. @@ -38,13 +38,19 @@ File: plzip.info, Node: Introduction, Next: Program Design, Prev: Top, Up: T Plzip is a massively parallel (multi-threaded), lossless data compressor based on the lzlib compression library, with very safe integrity checking and a user interface similar to the one of bzip2, gzip or lzip. -Plzip uses the lzip file format; the files produced by plzip are fully -compatible with lzip-1.4 or newer, and can be rescued with lziprecover. Plzip is intended for faster compression/decompression of big files on multiprocessor machines, which makes it specially well suited for distribution of big software files and large scale data archiving. On -files big enough, plzip can use hundreds of processors. +files big enough (several GB), plzip can use hundreds of processors. + + Plzip uses the lzip file format; the files produced by plzip are +fully compatible with lzip-1.4 or newer, and can be rescued with +lziprecover. + + Plzip uses the same well-defined exit status values used by lzip and +bzip2, which makes it safer when used in pipes or scripts than +compressors returning ambiguous warning values, like gzip. Plzip replaces every file given in the command line with a compressed version of itself, with the name "original_name.lz". Each compressed @@ -71,15 +77,16 @@ filename.tlz becomes filename.tar anyothername becomes anyothername.out As a self-check for your protection, plzip stores in the member -trailer the 32-bit CRC of the original data and the size of the -original data, to make sure that the decompressed version of the data -is identical to the original. This guards against corruption of the -compressed data, and against undetected bugs in plzip (hopefully very -unlikely). The chances of data corruption going undetected are -microscopic, less than one chance in 4000 million for each member -processed. Be aware, though, that the check occurs upon decompression, -so it can only tell you that something is wrong. It can't help you -recover the original uncompressed data. +trailer the 32-bit CRC of the original data, the size of the original +data and the size of the member. These values, together with the value +remaining in the range decoder and the end-of-stream marker, provide a +very safe 4 factor integrity checking which guarantees that the +decompressed version of the data is identical to the original. This +guards against corruption of the compressed data, and against +undetected bugs in plzip (hopefully very unlikely). The chances of data +corruption going undetected are microscopic. Be aware, though, that the +check occurs upon decompression, so it can only tell you that something +is wrong. It can't help you recover the original uncompressed data. WARNING! Even if plzip is bug-free, other causes may result in a corrupt compressed file (bugs in the system libraries, memory errors, @@ -88,11 +95,6 @@ give the `--keep' option to plzip and do not remove the original file until you verify the compressed file with a command like `plzip -cd file.lz | cmp file -'. - Return values: 0 for a normal exit, 1 for environmental problems -(file not found, invalid flags, I/O errors, etc), 2 to indicate a -corrupt or invalid input file, 3 for an internal consistency error (eg, -bug) which caused plzip to panic. - File: plzip.info, Node: Program Design, Next: Invoking Plzip, Prev: Introduction, Up: Top @@ -266,6 +268,12 @@ E exabyte (10^18) | Ei exbibyte (2^60) Z zettabyte (10^21) | Zi zebibyte (2^70) Y yottabyte (10^24) | Yi yobibyte (2^80) + + Exit status: 0 for a normal exit, 1 for environmental problems (file +not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or +invalid input file, 3 for an internal consistency error (eg, bug) which +caused plzip to panic. + File: plzip.info, Node: File Format, Next: Problems, Prev: Invoking Plzip, Up: Top @@ -323,7 +331,8 @@ additional information before, between, or after them. `Lzma stream' The lzma stream, finished by an end of stream marker. Uses default - values for encoder properties. + values for encoder properties. See the lzip manual for a full + description. `CRC32 (4 bytes)' CRC of the uncompressed original data. @@ -333,8 +342,9 @@ additional information before, between, or after them. `Member size (8 bytes)' Total size of the member, including header and trailer. This field - acts as a distributed index, and facilitates safe recovery of - undamaged members from multi-member files. + acts as a distributed index, allows the verification of stream + integrity, and facilitates safe recovery of undamaged members from + multi-member files. @@ -375,12 +385,12 @@ Concept Index Tag Table: Node: Top223 -Node: Introduction864 -Node: Program Design4030 -Node: Invoking Plzip5084 -Node: File Format10093 -Node: Problems12473 -Node: Concept Index13002 +Node: Introduction865 +Node: Program Design4113 +Node: Invoking Plzip5167 +Node: File Format10416 +Node: Problems12895 +Node: Concept Index13424 End Tag Table diff --git a/doc/plzip.texinfo b/doc/plzip.texinfo index 5e62234..b832884 100644 --- a/doc/plzip.texinfo +++ b/doc/plzip.texinfo @@ -6,8 +6,8 @@ @finalout @c %**end of header -@set UPDATED 8 March 2013 -@set VERSION 1.0-rc1 +@set UPDATED 29 May 2013 +@set VERSION 1.0 @dircategory Data Compression @direntry @@ -35,12 +35,12 @@ This manual is for Plzip (version @value{VERSION}, @value{UPDATED}). @menu -* Introduction:: Purpose and features of plzip -* Program Design:: Internal structure of plzip -* Invoking Plzip:: Command line interface -* File Format:: Detailed format of the compressed file -* Problems:: Reporting bugs -* Concept Index:: Index of concepts +* Introduction:: Purpose and features of plzip +* Program Design:: Internal structure of plzip +* Invoking Plzip:: Command line interface +* File Format:: Detailed format of the compressed file +* Problems:: Reporting bugs +* Concept Index:: Index of concepts @end menu @sp 1 @@ -57,13 +57,18 @@ to copy, distribute and modify it. Plzip is a massively parallel (multi-threaded), lossless data compressor based on the lzlib compression library, with very safe integrity checking and a user interface similar to the one of bzip2, gzip or lzip. -Plzip uses the lzip file format; the files produced by plzip are fully -compatible with lzip-1.4 or newer, and can be rescued with lziprecover. Plzip is intended for faster compression/decompression of big files on multiprocessor machines, which makes it specially well suited for distribution of big software files and large scale data archiving. On -files big enough, plzip can use hundreds of processors. +files big enough (several GB), plzip can use hundreds of processors. + +Plzip uses the lzip file format; the files produced by plzip are fully +compatible with lzip-1.4 or newer, and can be rescued with lziprecover. + +Plzip uses the same well-defined exit status values used by lzip and +bzip2, which makes it safer when used in pipes or scripts than +compressors returning ambiguous warning values, like gzip. Plzip replaces every file given in the command line with a compressed version of itself, with the name "original_name.lz". Each compressed @@ -92,15 +97,16 @@ file from that of the compressed file as follows: @end multitable As a self-check for your protection, plzip stores in the member trailer -the 32-bit CRC of the original data and the size of the original data, -to make sure that the decompressed version of the data is identical to -the original. This guards against corruption of the compressed data, and -against undetected bugs in plzip (hopefully very unlikely). The chances -of data corruption going undetected are microscopic, less than one -chance in 4000 million for each member processed. Be aware, though, that -the check occurs upon decompression, so it can only tell you that -something is wrong. It can't help you recover the original uncompressed -data. +the 32-bit CRC of the original data, the size of the original data and +the size of the member. These values, together with the value remaining +in the range decoder and the end-of-stream marker, provide a very safe 4 +factor integrity checking which guarantees that the decompressed version +of the data is identical to the original. This guards against corruption +of the compressed data, and against undetected bugs in plzip (hopefully +very unlikely). The chances of data corruption going undetected are +microscopic. Be aware, though, that the check occurs upon decompression, +so it can only tell you that something is wrong. It can't help you +recover the original uncompressed data. WARNING! Even if plzip is bug-free, other causes may result in a corrupt compressed file (bugs in the system libraries, memory errors, etc). @@ -109,11 +115,6 @@ Therefore, if the data you are going to compress is important, give the you verify the compressed file with a command like @w{@samp{plzip -cd file.lz | cmp file -}}. -Return values: 0 for a normal exit, 1 for environmental problems (file -not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or -invalid input file, 3 for an internal consistency error (eg, bug) which -caused plzip to panic. - @node Program Design @chapter Program Design @@ -289,6 +290,12 @@ Table of SI and binary prefixes (unit multipliers): @item Y @tab yottabyte (10^24) @tab | @tab Yi @tab yobibyte (2^80) @end multitable +@sp 1 +Exit status: 0 for a normal exit, 1 for environmental problems (file not +found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or +invalid input file, 3 for an internal consistency error (eg, bug) which +caused plzip to panic. + @node File Format @chapter File Format @@ -349,7 +356,7 @@ Valid values for dictionary size range from 4KiB to 512MiB. @item Lzma stream The lzma stream, finished by an end of stream marker. Uses default values -for encoder properties. +for encoder properties. See the lzip manual for a full description. @item CRC32 (4 bytes) CRC of the uncompressed original data. @@ -359,8 +366,8 @@ Size of the uncompressed original data. @item Member size (8 bytes) Total size of the member, including header and trailer. This field acts -as a distributed index, and facilitates safe recovery of undamaged -members from multi-member files. +as a distributed index, allows the verification of stream integrity, and +facilitates safe recovery of undamaged members from multi-member files. @end table @@ -16,7 +16,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ /* - Return values: 0 for a normal exit, 1 for environmental problems + Exit status: 0 for a normal exit, 1 for environmental problems (file not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or invalid input file, 3 for an internal consistency error (eg, bug) which caused plzip to panic. @@ -135,6 +135,10 @@ void show_help( const long num_online ) "scale optimal for all files. If your files are large, very repetitive,\n" "etc, you may need to use the --match-length and --dictionary-size\n" "options directly to achieve optimal performance.\n" + "\nExit status: 0 for a normal exit, 1 for environmental problems (file\n" + "not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n" + "invalid input file, 3 for an internal consistency error (eg, bug) which\n" + "caused plzip to panic.\n" "\nReport bugs to lzip-bug@nongnu.org\n" "Plzip home page: http://www.nongnu.org/lzip/plzip.html\n" ); } @@ -156,8 +160,8 @@ unsigned long long getnum( const char * const ptr, const unsigned long long llimit, const unsigned long long ulimit ) { - errno = 0; char * tail; + errno = 0; unsigned long long result = strtoull( ptr, &tail, 0 ); if( tail == ptr ) { @@ -231,7 +235,7 @@ int extension_index( const std::string & name ) } -int open_instream( const std::string & name, struct stat * const in_statsp, +int open_instream( const char * const name, struct stat * const in_statsp, const Mode program_mode, const int eindex, const bool recompress, const bool to_stdout ) { @@ -240,17 +244,16 @@ int open_instream( const std::string & name, struct stat * const in_statsp, { if( verbosity >= 0 ) std::fprintf( stderr, "%s: Input file '%s' already has '%s' suffix.\n", - program_name, name.c_str(), - known_extensions[eindex].from ); + program_name, name, known_extensions[eindex].from ); } else { - infd = open( name.c_str(), O_RDONLY | o_binary ); + infd = open( name, O_RDONLY | o_binary ); if( infd < 0 ) { if( verbosity >= 0 ) std::fprintf( stderr, "%s: Can't open input file '%s': %s.\n", - program_name, name.c_str(), std::strerror( errno ) ); + program_name, name, std::strerror( errno ) ); } else { @@ -263,7 +266,7 @@ int open_instream( const std::string & name, struct stat * const in_statsp, { if( verbosity >= 0 ) std::fprintf( stderr, "%s: Input file '%s' is not a regular file%s.\n", - program_name, name.c_str(), + program_name, name, ( can_read && !to_stdout ) ? " and '--stdout' was not specified" : "" ); close( infd ); @@ -555,8 +558,8 @@ int main( const int argc, const char * const argv[] ) case 'm': encoder_options.match_len_limit = getnum( arg, LZ_min_match_len_limit(), LZ_max_match_len_limit() ); break; - case 'o': default_output_filename = arg; break; case 'n': num_workers = getnum( arg, 1, max_workers ); break; + case 'o': default_output_filename = arg; break; case 'q': verbosity = -1; break; case 's': encoder_options.dictionary_size = get_dict_size( arg ); break; @@ -633,7 +636,7 @@ int main( const int argc, const char * const argv[] ) { input_filename = filenames[i]; const int eindex = extension_index( input_filename ); - infd = open_instream( input_filename, &in_stats, program_mode, + infd = open_instream( input_filename.c_str(), &in_stats, program_mode, eindex, recompress, to_stdout ); if( infd < 0 ) { if( retval < 1 ) retval = 1; continue; } if( program_mode != m_test ) diff --git a/testsuite/check.sh b/testsuite/check.sh index a044738..031fe00 100755 --- a/testsuite/check.sh +++ b/testsuite/check.sh @@ -27,6 +27,15 @@ fail=0 printf "testing plzip-%s..." "$2" +"${LZIP}" -cqs-1 in > /dev/null +if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi +"${LZIP}" -cqs0 in > /dev/null +if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi +"${LZIP}" -cqs4095 in > /dev/null +if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi +"${LZIP}" -cqm274 in > /dev/null +if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi + "${LZIP}" -t "${in_lz}" || fail=1 "${LZIP}" -cd "${in_lz}" > copy || fail=1 cmp in copy || fail=1 @@ -39,15 +48,6 @@ if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi cmp in copy || fail=1 printf . -"${LZIP}" -cqs-1 in > out -if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi -"${LZIP}" -cqs0 in > out -if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi -"${LZIP}" -cqs4095 in > out -if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi -"${LZIP}" -cqm274 in > out -if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi - for i in s4Ki 0 1 2 3 4 5 6 7 8 9 ; do "${LZIP}" -k -$i in || fail=1 mv -f in.lz copy.lz || fail=1 |