diff options
-rw-r--r-- | ChangeLog | 5 | ||||
-rw-r--r-- | INSTALL | 2 | ||||
-rw-r--r-- | NEWS | 2 | ||||
-rw-r--r-- | README | 23 | ||||
-rwxr-xr-x | configure | 26 | ||||
-rw-r--r-- | decoder.cc | 14 | ||||
-rw-r--r-- | decoder.h | 4 | ||||
-rw-r--r-- | doc/lzip.1 | 7 | ||||
-rw-r--r-- | doc/lzip.info | 66 | ||||
-rw-r--r-- | doc/lzip.texinfo | 49 | ||||
-rw-r--r-- | encoder.cc | 3 | ||||
-rw-r--r-- | main.cc | 29 | ||||
-rwxr-xr-x | testsuite/check.sh | 18 |
13 files changed, 147 insertions, 101 deletions
@@ -1,3 +1,8 @@ +2013-05-11 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 1.15-pre2 released. + * configure: Options now accept a separate argument. + 2013-03-21 Antonio Diaz Diaz <ant_diaz@teleline.es> * Version 1.15-pre1 released. @@ -1,7 +1,7 @@ Requirements ------------ You will need a C++ compiler. -I use gcc 4.7.2 and 3.3.6, but the code should compile with any +I use gcc 4.8.0 and 3.3.6, but the code should compile with any standards compliant compiler. Gcc is available at http://gcc.gnu.org. @@ -7,5 +7,7 @@ File version is now shown only if verbosity >= 4. Option "-n, --threads" is now accepted and ignored for compatibility with plzip. +"configure" now accepts options with a separate argument. + The chapter "Stream Format" and the appendix "Reference source code" have been added to the manual. @@ -6,6 +6,10 @@ gzip or bzip2. Lzip decompresses almost as fast as gzip and compresses better than bzip2, which makes it well suited for software distribution and data archiving. +Lzip uses the same well-defined exit status values used by bzip2, which +makes it safer when used in pipes or scripts than compressors returning +ambiguous warning values, like gzip. + If you ever need to recover data from a damaged lzip file, try the lziprecover program. @@ -42,15 +46,16 @@ memory requirement is affected at compression time by the choice of dictionary size limit. As a self-check for your protection, lzip stores in the member trailer -the 32-bit CRC of the original data and the size of the original data, -to make sure that the decompressed version of the data is identical to -the original. This guards against corruption of the compressed data, and -against undetected bugs in lzip (hopefully very unlikely). The chances -of data corruption going undetected are microscopic, less than one -chance in 4000 million for each member processed. Be aware, though, that -the check occurs upon decompression, so it can only tell you that -something is wrong. It can't help you recover the original uncompressed -data. +the 32-bit CRC of the original data, the size of the original data and +the size of the member. These values, together with the value remaining +in the range decoder and the end-of-stream marker, provide a very safe 4 +factor integrity checking which guarantees that the decompressed version +of the data is identical to the original. This guards against corruption +of the compressed data, and against undetected bugs in lzip (hopefully +very unlikely). The chances of data corruption going undetected are +microscopic. Be aware, though, that the check occurs upon decompression, +so it can only tell you that something is wrong. It can't help you +recover the original uncompressed data. Lzip implements a simplified version of the LZMA (Lempel-Ziv-Markov chain-Algorithm) algorithm. The high compression of LZMA comes from @@ -5,10 +5,8 @@ # This configure script is free software: you have unlimited permission # to copy, distribute and modify it. -args= -no_create= pkgname=lzip -pkgversion=1.15-pre1 +pkgversion=1.15-pre2 progname=lzip srctrigger=doc/lzip.texinfo @@ -36,10 +34,12 @@ if [ ! -x /bin/g++ ] && fi # Loop over all args -while [ -n "$1" ] ; do +args= +no_create= +while [ $# != 0 ] ; do # Get the first arg, and shuffle - option=$1 + option=$1 ; arg2=no shift # Add the argument quoted to args @@ -74,6 +74,14 @@ while [ -n "$1" ] ; do --version | -V) echo "Configure script for ${pkgname} version ${pkgversion}" exit 0 ;; + --srcdir) srcdir=$1 ; arg2=yes ;; + --prefix) prefix=$1 ; arg2=yes ;; + --exec-prefix) exec_prefix=$1 ; arg2=yes ;; + --bindir) bindir=$1 ; arg2=yes ;; + --datarootdir) datarootdir=$1 ; arg2=yes ;; + --infodir) infodir=$1 ; arg2=yes ;; + --mandir) mandir=$1 ; arg2=yes ;; + --srcdir=*) srcdir=${optarg} ;; --prefix=*) prefix=${optarg} ;; --exec-prefix=*) exec_prefix=${optarg} ;; @@ -93,6 +101,14 @@ while [ -n "$1" ] ; do echo "configure: Unrecognized option: \"${option}\"; use --help for usage." 1>&2 exit 1 ;; esac + + # Check if the option took a separate argument + if [ "${arg2}" = yes ] ; then + if [ $# != 0 ] ; then args="${args} \"$1\"" ; shift + else echo "configure: Missing argument to \"${option}\"" 1>&2 + exit 1 + fi + fi done # Find the source files, if location was not specified. @@ -175,7 +175,7 @@ bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const trailer.member_size(), member_size, member_size ); } } - if( !error && pp.verbosity() >= 3 && data_position() > 0 && member_size > 0 ) + if( !error && pp.verbosity() >= 2 && data_position() > 0 && member_size > 0 ) std::fprintf( stderr, "%6.3f:1, %6.3f bits/byte, %5.2f%% saved. ", (double)data_position() / member_size, ( 8.0 * member_size ) / data_position(), @@ -214,7 +214,7 @@ int LZ_decoder::decode_member( const Pretty_print & pp ) while( !rdec.finished() ) { const int pos_state = data_position() & pos_state_mask; - if( rdec.decode_bit( bm_match[state()][pos_state] ) == 0 ) + if( rdec.decode_bit( bm_match[state()][pos_state] ) == 0 ) // 1st bit { const uint8_t prev_byte = get_prev_byte(); if( state.is_char() ) @@ -227,21 +227,21 @@ int LZ_decoder::decode_member( const Pretty_print & pp ) else { int len; - if( rdec.decode_bit( bm_rep[state()] ) == 1 ) + if( rdec.decode_bit( bm_rep[state()] ) == 1 ) // 2nd bit { - if( rdec.decode_bit( bm_rep0[state()] ) == 0 ) + if( rdec.decode_bit( bm_rep0[state()] ) == 0 ) // 3rd bit { - if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) + if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit { state.set_short_rep(); put_byte( get_byte( rep0 ) ); continue; } } else { unsigned distance; - if( rdec.decode_bit( bm_rep1[state()] ) == 0 ) + if( rdec.decode_bit( bm_rep1[state()] ) == 0 ) // 4th bit distance = rep1; else { - if( rdec.decode_bit( bm_rep2[state()] ) == 0 ) + if( rdec.decode_bit( bm_rep2[state()] ) == 0 ) // 5th bit distance = rep2; else { distance = rep3; rep3 = rep2; } @@ -176,11 +176,11 @@ public: match_byte <<= 1; const int match_bit = match_byte & 0x100; const int bit = decode_bit( bm1[match_bit+symbol] ); - symbol = ( symbol << 1 ) + bit; + symbol = ( symbol << 1 ) | bit; if( match_bit != bit << 8 ) { while( symbol < 0x100 ) - symbol = ( symbol << 1 ) + decode_bit( bm[symbol] ); + symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); break; } } @@ -1,5 +1,5 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.37.1. -.TH LZIP "1" "March 2013" "Lzip 1.15-pre1" "User Commands" +.TH LZIP "1" "May 2013" "Lzip 1.15-pre2" "User Commands" .SH NAME Lzip \- reduces the size of files .SH SYNOPSIS @@ -71,6 +71,11 @@ The bidimensional parameter space of LZMA can't be mapped to a linear scale optimal for all files. If your files are large, very repetitive, etc, you may need to use the \fB\-\-match\-length\fR and \fB\-\-dictionary\-size\fR options directly to achieve optimal performance. +.PP +Exit status: 0 for a normal exit, 1 for environmental problems (file +not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or +invalid input file, 3 for an internal consistency error (eg, bug) which +caused lzip to panic. .SH "REPORTING BUGS" Report bugs to lzip\-bug@nongnu.org .br diff --git a/doc/lzip.info b/doc/lzip.info index 5c4cf81..9c4e874 100644 --- a/doc/lzip.info +++ b/doc/lzip.info @@ -11,7 +11,7 @@ File: lzip.info, Node: Top, Next: Introduction, Up: (dir) Lzip Manual *********** -This manual is for Lzip (version 1.15-pre1, 21 March 2013). +This manual is for Lzip (version 1.15-pre2, 11 May 2013). * Menu: @@ -43,6 +43,10 @@ gzip or bzip2. Lzip decompresses almost as fast as gzip and compresses better than bzip2, which makes it well suited for software distribution and data archiving. + Lzip uses the same well-defined exit status values used by bzip2, +which makes it safer when used in pipes or scripts than compressors +returning ambiguous warning values, like gzip. + If you ever need to recover data from a damaged lzip file, try the lziprecover program. @@ -93,20 +97,16 @@ filename.tlz becomes filename.tar anyothername becomes anyothername.out As a self-check for your protection, lzip stores in the member -trailer the 32-bit CRC of the original data and the size of the -original data, to make sure that the decompressed version of the data -is identical to the original. This guards against corruption of the -compressed data, and against undetected bugs in lzip (hopefully very -unlikely). The chances of data corruption going undetected are -microscopic, less than one chance in 4000 million for each member -processed. Be aware, though, that the check occurs upon decompression, -so it can only tell you that something is wrong. It can't help you -recover the original uncompressed data. - - Return values: 0 for a normal exit, 1 for environmental problems -(file not found, invalid flags, I/O errors, etc), 2 to indicate a -corrupt or invalid input file, 3 for an internal consistency error (eg, -bug) which caused lzip to panic. +trailer the 32-bit CRC of the original data, the size of the original +data and the size of the member. These values, together with the value +remaining in the range decoder and the end-of-stream marker, provide a +very safe 4 factor integrity checking which guarantees that the +decompressed version of the data is identical to the original. This +guards against corruption of the compressed data, and against +undetected bugs in lzip (hopefully very unlikely). The chances of data +corruption going undetected are microscopic. Be aware, though, that the +check occurs upon decompression, so it can only tell you that something +is wrong. It can't help you recover the original uncompressed data. File: lzip.info, Node: Algorithm, Next: Invoking Lzip, Prev: Introduction, Up: Top @@ -325,6 +325,12 @@ E exabyte (10^18) | Ei exbibyte (2^60) Z zettabyte (10^21) | Zi zebibyte (2^70) Y yottabyte (10^24) | Yi yobibyte (2^80) + + Exit status: 0 for a normal exit, 1 for environmental problems (file +not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or +invalid input file, 3 for an internal consistency error (eg, bug) which +caused lzip to panic. + File: lzip.info, Node: File Format, Next: Stream Format, Prev: Invoking Lzip, Up: Top @@ -392,8 +398,9 @@ additional information before, between, or after them. `Member size (8 bytes)' Total size of the member, including header and trailer. This field - acts as a distributed index, and facilitates safe recovery of - undamaged members from multi-member files. + acts as a distributed index, allows the verification of stream + integrity, and facilitates safe recovery of undamaged members from + multi-member files. @@ -535,8 +542,8 @@ bm_align reverse bit tree for distances >= 128, after fixed probability bits - There are two separated sets of length contexts (`Len_model' in the -source). One for normal matches, the other for repeated matches. The + There are two separate sets of contexts for lengths (`Len_model' in +the source). One for normal matches, the other for repeated matches. The contexts in each Len_model are (see `decode_len' in the source): Name Indices Used when @@ -747,7 +754,6 @@ struct Bit_model Bit_model() : probability( bit_model_total / 2 ) {} }; - struct Len_model { Bit_model choice1; @@ -1057,7 +1063,7 @@ typedef uint8_t File_trailer[20]; // 12-19 member size including header and trailer -/* Return values: 0 for a normal exit, 1 for environmental problems +/* Exit status: 0 for a normal exit, 1 for environmental problems (file not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or invalid input file. */ @@ -1156,15 +1162,15 @@ Concept Index Tag Table: Node: Top224 -Node: Introduction1067 -Node: Algorithm4732 -Node: Invoking Lzip7250 -Node: File Format12602 -Node: Stream Format14985 -Node: Examples23695 -Node: Problems25644 -Node: Reference source code26174 -Node: Concept Index39424 +Node: Introduction1065 +Node: Algorithm4786 +Node: Invoking Lzip7304 +Node: File Format12895 +Node: Stream Format15328 +Node: Examples24042 +Node: Problems25991 +Node: Reference source code26521 +Node: Concept Index39768 End Tag Table diff --git a/doc/lzip.texinfo b/doc/lzip.texinfo index 632abc5..484b5ac 100644 --- a/doc/lzip.texinfo +++ b/doc/lzip.texinfo @@ -6,8 +6,8 @@ @finalout @c %**end of header -@set UPDATED 21 March 2013 -@set VERSION 1.15-pre1 +@set UPDATED 11 May 2013 +@set VERSION 1.15-pre2 @dircategory Data Compression @direntry @@ -64,6 +64,10 @@ gzip or bzip2. Lzip decompresses almost as fast as gzip and compresses better than bzip2, which makes it well suited for software distribution and data archiving. +Lzip uses the same well-defined exit status values used by bzip2, which +makes it safer when used in pipes or scripts than compressors returning +ambiguous warning values, like gzip. + If you ever need to recover data from a damaged lzip file, try the lziprecover program. @@ -116,20 +120,16 @@ file from that of the compressed file as follows: @end multitable As a self-check for your protection, lzip stores in the member trailer -the 32-bit CRC of the original data and the size of the original data, -to make sure that the decompressed version of the data is identical to -the original. This guards against corruption of the compressed data, and -against undetected bugs in lzip (hopefully very unlikely). The chances -of data corruption going undetected are microscopic, less than one -chance in 4000 million for each member processed. Be aware, though, that -the check occurs upon decompression, so it can only tell you that -something is wrong. It can't help you recover the original uncompressed -data. - -Return values: 0 for a normal exit, 1 for environmental problems (file -not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or -invalid input file, 3 for an internal consistency error (eg, bug) which -caused lzip to panic. +the 32-bit CRC of the original data, the size of the original data and +the size of the member. These values, together with the value remaining +in the range decoder and the end-of-stream marker, provide a very safe 4 +factor integrity checking which guarantees that the decompressed version +of the data is identical to the original. This guards against corruption +of the compressed data, and against undetected bugs in lzip (hopefully +very unlikely). The chances of data corruption going undetected are +microscopic. Be aware, though, that the check occurs upon decompression, +so it can only tell you that something is wrong. It can't help you +recover the original uncompressed data. @node Algorithm @@ -350,6 +350,12 @@ Table of SI and binary prefixes (unit multipliers): @item Y @tab yottabyte (10^24) @tab | @tab Yi @tab yobibyte (2^80) @end multitable +@sp 1 +Exit status: 0 for a normal exit, 1 for environmental problems (file not +found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or +invalid input file, 3 for an internal consistency error (eg, bug) which +caused lzip to panic. + @node File Format @chapter File Format @@ -420,8 +426,8 @@ Size of the uncompressed original data. @item Member size (8 bytes) Total size of the member, including header and trailer. This field acts -as a distributed index, and facilitates safe recovery of undamaged -members from multi-member files. +as a distributed index, allows the verification of stream integrity, and +facilitates safe recovery of undamaged members from multi-member files. @end table @@ -572,8 +578,8 @@ fixed probability bits @end multitable @sp 1 -There are two separated sets of length contexts (@samp{Len_model} in the -source). One for normal matches, the other for repeated matches. The +There are two separate sets of contexts for lengths (@samp{Len_model} in +the source). One for normal matches, the other for repeated matches. The contexts in each Len_model are (see @samp{decode_len} in the source): @multitable @columnfractions .2 .4 .4 @@ -814,7 +820,6 @@ struct Bit_model Bit_model() : probability( bit_model_total / 2 ) {} }; - struct Len_model { Bit_model choice1; @@ -1124,7 +1129,7 @@ typedef uint8_t File_trailer[20]; // 12-19 member size including header and trailer -/* Return values: 0 for a normal exit, 1 for environmental problems +/* Exit status: 0 for a normal exit, 1 for environmental problems (file not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or invalid input file. */ @@ -404,7 +404,8 @@ int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances], return 1; } - for( int i = 0; i < num_rep_distances; ++i ) trials[0].reps[i] = reps[i]; + for( int i = 0; i < num_rep_distances; ++i ) + trials[0].reps[i] = reps[i]; trials[1].prev_index = 0; trials[1].prev_index2 = single_step_trial; @@ -15,7 +15,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ /* - Return values: 0 for a normal exit, 1 for environmental problems + Exit status: 0 for a normal exit, 1 for environmental problems (file not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or invalid input file, 3 for an internal consistency error (eg, bug) which caused lzip to panic. @@ -132,6 +132,10 @@ void show_help() "scale optimal for all files. If your files are large, very repetitive,\n" "etc, you may need to use the --match-length and --dictionary-size\n" "options directly to achieve optimal performance.\n" + "\nExit status: 0 for a normal exit, 1 for environmental problems (file\n" + "not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n" + "invalid input file, 3 for an internal consistency error (eg, bug) which\n" + "caused lzip to panic.\n" "\nReport bugs to lzip-bug@nongnu.org\n" "Lzip home page: http://www.nongnu.org/lzip/lzip.html\n" ); } @@ -170,8 +174,8 @@ unsigned long long getnum( const char * const ptr, const unsigned long long llimit, const unsigned long long ulimit ) { - errno = 0; char * tail; + errno = 0; unsigned long long result = strtoull( ptr, &tail, 0 ); if( tail == ptr ) { @@ -245,7 +249,7 @@ int extension_index( const std::string & name ) } -int open_instream( const std::string & name, struct stat * const in_statsp, +int open_instream( const char * const name, struct stat * const in_statsp, const Mode program_mode, const int eindex, const bool recompress, const bool to_stdout ) { @@ -254,17 +258,16 @@ int open_instream( const std::string & name, struct stat * const in_statsp, { if( verbosity >= 0 ) std::fprintf( stderr, "%s: Input file '%s' already has '%s' suffix.\n", - program_name, name.c_str(), - known_extensions[eindex].from ); + program_name, name, known_extensions[eindex].from ); } else { - infd = open( name.c_str(), O_RDONLY | o_binary ); + infd = open( name, O_RDONLY | o_binary ); if( infd < 0 ) { if( verbosity >= 0 ) std::fprintf( stderr, "%s: Can't open input file '%s': %s.\n", - program_name, name.c_str(), std::strerror( errno ) ); + program_name, name, std::strerror( errno ) ); } else { @@ -277,7 +280,7 @@ int open_instream( const std::string & name, struct stat * const in_statsp, { if( verbosity >= 0 ) std::fprintf( stderr, "%s: Input file '%s' is not a regular file%s.\n", - program_name, name.c_str(), + program_name, name, ( can_read && !to_stdout ) ? " and '--stdout' was not specified" : "" ); close( infd ); @@ -624,7 +627,7 @@ int decompress( const int infd, const Pretty_print & pp, const bool testing ) { pp( "Invalid dictionary size in member header" ); retval = 2; break; } if( verbosity >= 2 || ( verbosity == 1 && first_member ) ) - { pp(); if( verbosity >= 2 ) show_header( header ); } + { pp(); if( verbosity >= 3 ) show_header( header ); } LZ_decoder decoder( header, rdec, outfd ); const int result = decoder.decode_member( pp ); @@ -644,8 +647,7 @@ int decompress( const int infd, const Pretty_print & pp, const bool testing ) retval = 2; break; } if( verbosity >= 2 ) - { if( testing ) std::fprintf( stderr, "ok\n" ); - else std::fprintf( stderr, "done\n" ); pp.reset(); } + { std::fprintf( stderr, testing ? "ok\n" : "done\n" ); pp.reset(); } } } catch( std::bad_alloc ) @@ -655,8 +657,7 @@ int decompress( const int infd, const Pretty_print & pp, const bool testing ) } catch( Error e ) { pp(); show_error( e.msg, errno ); retval = 1; } if( verbosity == 1 && retval == 0 ) - { if( testing ) std::fprintf( stderr, "ok\n" ); - else std::fprintf( stderr, "done\n" ); } + std::fprintf( stderr, testing ? "ok\n" : "done\n" ); return retval; } @@ -865,7 +866,7 @@ int main( const int argc, const char * const argv[] ) { input_filename = filenames[i]; const int eindex = extension_index( input_filename ); - infd = open_instream( input_filename, &in_stats, program_mode, + infd = open_instream( input_filename.c_str(), &in_stats, program_mode, eindex, recompress, to_stdout ); if( infd < 0 ) { if( retval < 1 ) retval = 1; continue; } if( program_mode != m_test ) diff --git a/testsuite/check.sh b/testsuite/check.sh index ff3f574..64f481c 100755 --- a/testsuite/check.sh +++ b/testsuite/check.sh @@ -26,6 +26,15 @@ fail=0 printf "testing lzip-%s..." "$2" +"${LZIP}" -cqs-1 in > /dev/null +if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi +"${LZIP}" -cqs0 in > /dev/null +if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi +"${LZIP}" -cqs4095 in > /dev/null +if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi +"${LZIP}" -cqm274 in > /dev/null +if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi + "${LZIP}" -t "${testdir}"/test.txt.lz || fail=1 "${LZIP}" -cd "${testdir}"/test.txt.lz > copy || fail=1 cmp in copy || fail=1 @@ -38,15 +47,6 @@ if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi cmp in copy || fail=1 printf . -"${LZIP}" -cqs-1 in > out -if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi -"${LZIP}" -cqs0 in > out -if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi -"${LZIP}" -cqs4095 in > out -if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi -"${LZIP}" -cqm274 in > out -if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi - for i in s4Ki 0 1 2 3 4 5 6 7 8 9 ; do "${LZIP}" -k -$i in || fail=1 mv -f in.lz copy.lz || fail=1 |