diff options
Diffstat (limited to '')
-rw-r--r-- | ChangeLog | 8 | ||||
-rw-r--r-- | INSTALL | 7 | ||||
-rw-r--r-- | Makefile.in | 6 | ||||
-rw-r--r-- | NEWS | 15 | ||||
-rw-r--r-- | README | 23 | ||||
-rw-r--r-- | carg_parser.c | 10 | ||||
-rw-r--r-- | carg_parser.h | 2 | ||||
-rwxr-xr-x | configure | 28 | ||||
-rw-r--r-- | decoder.c | 68 | ||||
-rw-r--r-- | decoder.h | 71 | ||||
-rw-r--r-- | doc/clzip.1 | 7 | ||||
-rw-r--r-- | doc/clzip.info | 54 | ||||
-rw-r--r-- | doc/clzip.texinfo | 43 | ||||
-rw-r--r-- | encoder.c | 28 | ||||
-rw-r--r-- | encoder.h | 38 | ||||
-rw-r--r-- | lzip.h (renamed from clzip.h) | 18 | ||||
-rw-r--r-- | main.c | 23 | ||||
-rwxr-xr-x | testsuite/check.sh | 18 |
18 files changed, 253 insertions, 214 deletions
@@ -1,3 +1,11 @@ +2013-05-13 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 1.5-pre1 released. + * Decompression time has been reduced by 1%. + * main.c (show_header): Show header version if verbosity >= 4. + * Ignore option '-n, --threads' for compatibility with plzip. + * configure: Options now accept a separate argument. + 2013-02-18 Antonio Diaz Diaz <ant_diaz@teleline.es> * Version 1.4 released. @@ -1,7 +1,7 @@ Requirements ------------ You will need a C compiler. -I use gcc 4.7.2 and 3.3.6, but the code should compile with any +I use gcc 4.8.0 and 3.3.6, but the code should compile with any standards compliant compiler. Gcc is available at http://gcc.gnu.org. @@ -36,8 +36,9 @@ the main archive. typing 'make install-bin', 'make install-info' or 'make install-man' respectively. -5a. Type 'make install-as-lzip' to install the program and any data - files and documentation, and link the program to the name 'lzip'. + Instead of 'make install', you can type 'make install-as-lzip' to + install the program and any data files and documentation, and link + the program to the name 'lzip'. Another way diff --git a/Makefile.in b/Makefile.in index a27a481..bc932b7 100644 --- a/Makefile.in +++ b/Makefile.in @@ -29,9 +29,9 @@ main.o : main.c $(objs) : Makefile carg_parser.o : carg_parser.h -decoder.o : clzip.h decoder.h -encoder.o : clzip.h encoder.h -main.o : carg_parser.h clzip.h decoder.h encoder.h +decoder.o : lzip.h decoder.h +encoder.o : lzip.h encoder.h +main.o : carg_parser.h lzip.h decoder.h encoder.h doc : info man @@ -1,13 +1,10 @@ -Changes in version 1.4: +Changes in version 1.5: -Multi-step trials have been implemented. +Decompression time has been reduced by 1%. -Compression ratio has been slightly increased. +File version is now shown only if verbosity >= 4. -Compression time has been reduced by 10%. +Option "-n, --threads" is now accepted and ignored for compatibility +with plzip. -Decompression time has been reduced by 8%. - -The target "install-as-lzip" has been added to the Makefile. - -The target "install-bin" has been added to the Makefile. +"configure" now accepts options with a separate argument. @@ -6,6 +6,10 @@ gzip or bzip2. Clzip decompresses almost as fast as gzip and compresses better than bzip2, which makes it well suited for software distribution and data archiving. +Clzip uses the same well-defined exit status values used by bzip2, which +makes it safer when used in pipes or scripts than compressors returning +ambiguous warning values, like gzip. + Clzip uses the lzip file format; the files produced by clzip are fully compatible with lzip-1.4 or newer. Clzip is in fact a C language version of lzip, intended for embedded devices or systems lacking a C++ @@ -47,15 +51,16 @@ memory requirement is affected at compression time by the choice of dictionary size limit. As a self-check for your protection, clzip stores in the member trailer -the 32-bit CRC of the original data and the size of the original data, -to make sure that the decompressed version of the data is identical to -the original. This guards against corruption of the compressed data, and -against undetected bugs in clzip (hopefully very unlikely). The chances -of data corruption going undetected are microscopic, less than one -chance in 4000 million for each member processed. Be aware, though, that -the check occurs upon decompression, so it can only tell you that -something is wrong. It can't help you recover the original uncompressed -data. +the 32-bit CRC of the original data, the size of the original data and +the size of the member. These values, together with the value remaining +in the range decoder and the end-of-stream marker, provide a very safe 4 +factor integrity checking which guarantees that the decompressed version +of the data is identical to the original. This guards against corruption +of the compressed data, and against undetected bugs in clzip (hopefully +very unlikely). The chances of data corruption going undetected are +microscopic. Be aware, though, that the check occurs upon decompression, +so it can only tell you that something is wrong. It can't help you +recover the original uncompressed data. Clzip implements a simplified version of the LZMA (Lempel-Ziv-Markov chain-Algorithm) algorithm. The high compression of LZMA comes from diff --git a/carg_parser.c b/carg_parser.c index 973bb7e..a86f76f 100644 --- a/carg_parser.c +++ b/carg_parser.c @@ -1,5 +1,5 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C version) - Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 + Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This library is free software: you can redistribute it and/or modify @@ -89,15 +89,14 @@ static char parse_long_option( struct Arg_parser * const ap, int * const argindp ) { unsigned len; - int index = -1; - int i; + int index = -1, i; char exact = 0, ambig = 0; for( len = 0; opt[len+2] && opt[len+2] != '='; ++len ) ; /* Test all long options for either exact match or abbreviated matches. */ for( i = 0; options[i].code != 0; ++i ) - if( options[i].name && !strncmp( options[i].name, &opt[2], len ) ) + if( options[i].name && strncmp( options[i].name, &opt[2], len ) == 0 ) { if( strlen( options[i].name ) == len ) /* Exact match found */ { index = i; exact = 1; break; } @@ -165,8 +164,7 @@ static char parse_short_option( struct Arg_parser * const ap, while( cind > 0 ) { - int index = -1; - int i; + int index = -1, i; const unsigned char code = opt[cind]; char code_str[2]; code_str[0] = code; code_str[1] = 0; diff --git a/carg_parser.h b/carg_parser.h index 3575dd7..41aa7b3 100644 --- a/carg_parser.h +++ b/carg_parser.h @@ -1,5 +1,5 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C version) - Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 + Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This library is free software: you can redistribute it and/or modify @@ -5,12 +5,10 @@ # This configure script is free software: you have unlimited permission # to copy, distribute and modify it. -args= -no_create= pkgname=clzip -pkgversion=1.4 +pkgversion=1.5-pre1 progname=clzip -srctrigger=clzip.h +srctrigger=doc/clzip.texinfo # clear some things potentially inherited from environment. LC_ALL=C @@ -36,10 +34,12 @@ if [ ! -x /bin/gcc ] && fi # Loop over all args -while [ -n "$1" ] ; do +args= +no_create= +while [ $# != 0 ] ; do # Get the first arg, and shuffle - option=$1 + option=$1 ; arg2=no shift # Add the argument quoted to args @@ -74,6 +74,14 @@ while [ -n "$1" ] ; do --version | -V) echo "Configure script for ${pkgname} version ${pkgversion}" exit 0 ;; + --srcdir) srcdir=$1 ; arg2=yes ;; + --prefix) prefix=$1 ; arg2=yes ;; + --exec-prefix) exec_prefix=$1 ; arg2=yes ;; + --bindir) bindir=$1 ; arg2=yes ;; + --datarootdir) datarootdir=$1 ; arg2=yes ;; + --infodir) infodir=$1 ; arg2=yes ;; + --mandir) mandir=$1 ; arg2=yes ;; + --srcdir=*) srcdir=${optarg} ;; --prefix=*) prefix=${optarg} ;; --exec-prefix=*) exec_prefix=${optarg} ;; @@ -93,6 +101,14 @@ while [ -n "$1" ] ; do echo "configure: Unrecognized option: \"${option}\"; use --help for usage." 1>&2 exit 1 ;; esac + + # Check if the option took a separate argument + if [ "${arg2}" = yes ] ; then + if [ $# != 0 ] ; then args="${args} \"$1\"" ; shift + else echo "configure: Missing argument to \"${option}\"" 1>&2 + exit 1 + fi + fi done # Find the source files, if location was not specified. @@ -25,7 +25,7 @@ #include <string.h> #include <unistd.h> -#include "clzip.h" +#include "lzip.h" #include "decoder.h" @@ -124,10 +124,10 @@ bool LZd_verify_trailer( struct LZ_decoder * const decoder, File_trailer trailer; const int trailer_size = Ft_versioned_size( decoder->member_version ); const unsigned long long member_size = - Rd_member_position( decoder->range_decoder ) + trailer_size; + Rd_member_position( decoder->rdec ) + trailer_size; bool error = false; - int size = Rd_read_data( decoder->range_decoder, trailer, trailer_size ); + int size = Rd_read_data( decoder->rdec, trailer, trailer_size ); if( size < trailer_size ) { error = true; @@ -142,7 +142,7 @@ bool LZd_verify_trailer( struct LZ_decoder * const decoder, if( decoder->member_version == 0 ) Ft_set_member_size( trailer, member_size ); - if( decoder->range_decoder->code != 0 ) + if( decoder->rdec->code != 0 ) { error = true; Pp_show_msg( pp, "Range decoder final code is not zero" ); @@ -177,7 +177,7 @@ bool LZd_verify_trailer( struct LZ_decoder * const decoder, Ft_get_member_size( trailer ), member_size, member_size ); } } - if( !error && pp->verbosity >= 3 && LZd_data_position( decoder ) > 0 && member_size > 0 ) + if( !error && pp->verbosity >= 2 && LZd_data_position( decoder ) > 0 && member_size > 0 ) fprintf( stderr, "%6.3f:1, %6.3f bits/byte, %5.2f%% saved. ", (double)LZd_data_position( decoder ) / member_size, ( 8.0 * member_size ) / LZd_data_position( decoder ), @@ -199,84 +199,82 @@ int LZd_decode_member( struct LZ_decoder * const decoder, unsigned rep1 = 0; /* used for efficient coding of */ unsigned rep2 = 0; /* repeated distances */ unsigned rep3 = 0; - State state = 0; - Rd_load( decoder->range_decoder ); - while( !Rd_finished( decoder->range_decoder ) ) + Rd_load( decoder->rdec ); + while( !Rd_finished( decoder->rdec ) ) { const int pos_state = LZd_data_position( decoder ) & pos_state_mask; - if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_match[state][pos_state] ) == 0 ) + if( Rd_decode_bit( decoder->rdec, &decoder->bm_match[state][pos_state] ) == 0 ) /* 1st bit */ { const uint8_t prev_byte = LZd_get_prev_byte( decoder ); if( St_is_char( state ) ) { state -= ( state < 4 ) ? state : 3; - LZd_put_byte( decoder, Rd_decode_tree( decoder->range_decoder, + LZd_put_byte( decoder, Rd_decode_tree( decoder->rdec, decoder->bm_literal[get_lit_state(prev_byte)], 8 ) ); } else { state -= ( state < 10 ) ? 3 : 6; - LZd_put_byte( decoder, Rd_decode_matched( decoder->range_decoder, - decoder->bm_literal[get_lit_state(prev_byte)], LZd_get_byte( decoder, rep0 ) ) ); + LZd_put_byte( decoder, Rd_decode_matched( decoder->rdec, + decoder->bm_literal[get_lit_state(prev_byte)], + LZd_get_byte( decoder, rep0 ) ) ); } } else { int len; - if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_rep[state] ) == 1 ) + if( Rd_decode_bit( decoder->rdec, &decoder->bm_rep[state] ) == 1 ) /* 2nd bit */ { - len = 0; - if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_rep0[state] ) == 1 ) + if( Rd_decode_bit( decoder->rdec, &decoder->bm_rep0[state] ) == 0 ) /* 3rd bit */ + { + if( Rd_decode_bit( decoder->rdec, &decoder->bm_len[state][pos_state] ) == 0 ) /* 4th bit */ + { state = St_set_short_rep( state ); + LZd_put_byte( decoder, LZd_get_byte( decoder, rep0 ) ); continue; } + } + else { unsigned distance; - if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_rep1[state] ) == 0 ) + if( Rd_decode_bit( decoder->rdec, &decoder->bm_rep1[state] ) == 0 ) /* 4th bit */ distance = rep1; else { - if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_rep2[state] ) == 0 ) + if( Rd_decode_bit( decoder->rdec, &decoder->bm_rep2[state] ) == 0 ) /* 5th bit */ distance = rep2; - else { distance = rep3; rep3 = rep2; } + else + { distance = rep3; rep3 = rep2; } rep2 = rep1; } rep1 = rep0; rep0 = distance; } - else - { - if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_len[state][pos_state] ) == 0 ) - { state = St_set_short_rep( state ); len = 1; } - } - if( len == 0 ) - { - state = St_set_rep( state ); - len = min_match_len + Led_decode( &decoder->rep_match_len_decoder, decoder->range_decoder, pos_state ); - } + state = St_set_rep( state ); + len = min_match_len + Rd_decode_len( decoder->rdec, &decoder->rep_len_model, pos_state ); } else { int dis_slot; const unsigned rep0_saved = rep0; - len = min_match_len + Led_decode( &decoder->len_decoder, decoder->range_decoder, pos_state ); - dis_slot = Rd_decode_tree6( decoder->range_decoder, decoder->bm_dis_slot[get_dis_state(len)] ); + len = min_match_len + Rd_decode_len( decoder->rdec, &decoder->match_len_model, pos_state ); + dis_slot = Rd_decode_tree6( decoder->rdec, decoder->bm_dis_slot[get_dis_state(len)] ); if( dis_slot < start_dis_model ) rep0 = dis_slot; else { const int direct_bits = ( dis_slot >> 1 ) - 1; rep0 = ( 2 | ( dis_slot & 1 ) ) << direct_bits; if( dis_slot < end_dis_model ) - rep0 += Rd_decode_tree_reversed( decoder->range_decoder, + rep0 += Rd_decode_tree_reversed( decoder->rdec, decoder->bm_dis + rep0 - dis_slot - 1, direct_bits ); else { - rep0 += Rd_decode( decoder->range_decoder, direct_bits - dis_align_bits ) << dis_align_bits; - rep0 += Rd_decode_tree_reversed4( decoder->range_decoder, decoder->bm_align ); + rep0 += Rd_decode( decoder->rdec, direct_bits - dis_align_bits ) << dis_align_bits; + rep0 += Rd_decode_tree_reversed4( decoder->rdec, decoder->bm_align ); if( rep0 == 0xFFFFFFFFU ) /* Marker found */ { rep0 = rep0_saved; - Rd_normalize( decoder->range_decoder ); + Rd_normalize( decoder->rdec ); LZd_flush_data( decoder ); if( len == min_match_len ) /* End Of Stream marker */ { @@ -284,7 +282,7 @@ int LZd_decode_member( struct LZ_decoder * const decoder, } if( len == min_match_len + 1 ) /* Sync Flush marker */ { - Rd_load( decoder->range_decoder ); continue; + Rd_load( decoder->rdec ); continue; } if( pp->verbosity >= 0 ) { @@ -140,24 +140,24 @@ static inline int Rd_decode_bit( struct Range_decoder * const rdec, static inline int Rd_decode_tree( struct Range_decoder * const rdec, Bit_model bm[], const int num_bits ) { - int model = 1; + int symbol = 1; int i; for( i = num_bits; i > 0; --i ) - model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] ); - return model - (1 << num_bits); + symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); + return symbol - (1 << num_bits); } static inline int Rd_decode_tree6( struct Range_decoder * const rdec, Bit_model bm[] ) { - int model = 1; - model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] ); - model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] ); - model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] ); - model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] ); - model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] ); - model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] ); - return model - (1 << 6); + int symbol = 1; + symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); + symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); + symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); + symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); + symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); + symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); + return symbol - (1 << 6); } static inline int Rd_decode_tree_reversed( struct Range_decoder * const rdec, @@ -213,36 +213,17 @@ static inline int Rd_decode_matched( struct Range_decoder * const rdec, return symbol - 0x100; } - -struct Len_decoder - { - Bit_model choice1; - Bit_model choice2; - Bit_model bm_low[pos_states][len_low_symbols]; - Bit_model bm_mid[pos_states][len_mid_symbols]; - Bit_model bm_high[len_high_symbols]; - }; - -static inline void Led_init( struct Len_decoder * const len_decoder ) - { - Bm_init( &len_decoder->choice1 ); - Bm_init( &len_decoder->choice2 ); - Bm_array_init( len_decoder->bm_low[0], pos_states * len_low_symbols ); - Bm_array_init( len_decoder->bm_mid[0], pos_states * len_mid_symbols ); - Bm_array_init( len_decoder->bm_high, len_high_symbols ); - } - -static inline int Led_decode( struct Len_decoder * const len_decoder, - struct Range_decoder * const rdec, - const int pos_state ) +static inline int Rd_decode_len( struct Range_decoder * const rdec, + struct Len_model * const lm, + const int pos_state ) { - if( Rd_decode_bit( rdec, &len_decoder->choice1 ) == 0 ) - return Rd_decode_tree( rdec, len_decoder->bm_low[pos_state], len_low_bits ); - if( Rd_decode_bit( rdec, &len_decoder->choice2 ) == 0 ) + if( Rd_decode_bit( rdec, &lm->choice1 ) == 0 ) + return Rd_decode_tree( rdec, lm->bm_low[pos_state], len_low_bits ); + if( Rd_decode_bit( rdec, &lm->choice2 ) == 0 ) return len_low_symbols + - Rd_decode_tree( rdec, len_decoder->bm_mid[pos_state], len_mid_bits ); + Rd_decode_tree( rdec, lm->bm_mid[pos_state], len_mid_bits ); return len_low_symbols + len_mid_symbols + - Rd_decode_tree( rdec, len_decoder->bm_high, len_high_bits ); + Rd_decode_tree( rdec, lm->bm_high, len_high_bits ); } @@ -269,9 +250,9 @@ struct LZ_decoder Bit_model bm_dis[modeled_distances-end_dis_model]; Bit_model bm_align[dis_align_size]; - struct Range_decoder * range_decoder; - struct Len_decoder len_decoder; - struct Len_decoder rep_match_len_decoder; + struct Range_decoder * rdec; + struct Len_model match_len_model; + struct Len_model rep_len_model; }; void LZd_flush_data( struct LZ_decoder * const decoder ); @@ -322,7 +303,7 @@ static inline void LZd_copy_block( struct LZ_decoder * const decoder, static inline bool LZd_init( struct LZ_decoder * const decoder, const File_header header, - struct Range_decoder * const rdec, const int ofd ) + struct Range_decoder * const rde, const int ofd ) { decoder->partial_data_pos = 0; decoder->dictionary_size = Fh_get_dictionary_size( header ); @@ -346,9 +327,9 @@ static inline bool LZd_init( struct LZ_decoder * const decoder, Bm_array_init( decoder->bm_dis, modeled_distances - end_dis_model ); Bm_array_init( decoder->bm_align, dis_align_size ); - decoder->range_decoder = rdec; - Led_init( &decoder->len_decoder ); - Led_init( &decoder->rep_match_len_decoder ); + decoder->rdec = rde; + Lm_init( &decoder->match_len_model ); + Lm_init( &decoder->rep_len_model ); decoder->buffer[decoder->buffer_size-1] = 0; /* prev_byte of first_byte */ return true; } diff --git a/doc/clzip.1 b/doc/clzip.1 index 02181a7..4fc2a26 100644 --- a/doc/clzip.1 +++ b/doc/clzip.1 @@ -1,5 +1,5 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.37.1. -.TH CLZIP "1" "February 2013" "Clzip 1.4" "User Commands" +.TH CLZIP "1" "May 2013" "Clzip 1.5-pre1" "User Commands" .SH NAME Clzip \- reduces the size of files .SH SYNOPSIS @@ -71,6 +71,11 @@ The bidimensional parameter space of LZMA can't be mapped to a linear scale optimal for all files. If your files are large, very repetitive, etc, you may need to use the \fB\-\-match\-length\fR and \fB\-\-dictionary\-size\fR options directly to achieve optimal performance. +.PP +Exit status: 0 for a normal exit, 1 for environmental problems (file +not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or +invalid input file, 3 for an internal consistency error (eg, bug) which +caused clzip to panic. .SH "REPORTING BUGS" Report bugs to lzip\-bug@nongnu.org .br diff --git a/doc/clzip.info b/doc/clzip.info index ccec058..41723f3 100644 --- a/doc/clzip.info +++ b/doc/clzip.info @@ -12,7 +12,7 @@ File: clzip.info, Node: Top, Next: Introduction, Up: (dir) Clzip Manual ************ -This manual is for Clzip (version 1.4, 18 February 2013). +This manual is for Clzip (version 1.5-pre1, 13 May 2013). * Menu: @@ -42,6 +42,10 @@ gzip or bzip2. Clzip decompresses almost as fast as gzip and compresses better than bzip2, which makes it well suited for software distribution and data archiving. + Clzip uses the same well-defined exit status values used by bzip2, +which makes it safer when used in pipes or scripts than compressors +returning ambiguous warning values, like gzip. + Clzip uses the lzip file format; the files produced by clzip are fully compatible with lzip-1.4 or newer. Clzip is in fact a C language version of lzip, intended for embedded devices or systems lacking a C++ @@ -96,20 +100,16 @@ filename.tlz becomes filename.tar anyothername becomes anyothername.out As a self-check for your protection, clzip stores in the member -trailer the 32-bit CRC of the original data and the size of the -original data, to make sure that the decompressed version of the data -is identical to the original. This guards against corruption of the -compressed data, and against undetected bugs in clzip (hopefully very -unlikely). The chances of data corruption going undetected are -microscopic, less than one chance in 4000 million for each member -processed. Be aware, though, that the check occurs upon decompression, -so it can only tell you that something is wrong. It can't help you -recover the original uncompressed data. - - Return values: 0 for a normal exit, 1 for environmental problems -(file not found, invalid flags, I/O errors, etc), 2 to indicate a -corrupt or invalid input file, 3 for an internal consistency error (eg, -bug) which caused clzip to panic. +trailer the 32-bit CRC of the original data, the size of the original +data and the size of the member. These values, together with the value +remaining in the range decoder and the end-of-stream marker, provide a +very safe 4 factor integrity checking which guarantees that the +decompressed version of the data is identical to the original. This +guards against corruption of the compressed data, and against +undetected bugs in clzip (hopefully very unlikely). The chances of data +corruption going undetected are microscopic. Be aware, though, that the +check occurs upon decompression, so it can only tell you that something +is wrong. It can't help you recover the original uncompressed data. File: clzip.info, Node: Algorithm, Next: Invoking Clzip, Prev: Introduction, Up: Top @@ -326,6 +326,12 @@ E exabyte (10^18) | Ei exbibyte (2^60) Z zettabyte (10^21) | Zi zebibyte (2^70) Y yottabyte (10^24) | Yi yobibyte (2^80) + + Exit status: 0 for a normal exit, 1 for environmental problems (file +not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or +invalid input file, 3 for an internal consistency error (eg, bug) which +caused clzip to panic. + File: clzip.info, Node: File Format, Next: Examples, Prev: Invoking Clzip, Up: Top @@ -378,6 +384,7 @@ additional information before, between, or after them. Bits 4-0 contain the base 2 logarithm of the base size (12 to 29). Bits 7-5 contain the number of wedges (0 to 7) to substract from the base size to obtain the dictionary size. + Example: 0xD3 = (2^19 - 6 * 2^15) = (512KiB - 6 * 32KiB) = 320KiB Valid values for dictionary size range from 4KiB to 512MiB. `Lzma stream' @@ -392,8 +399,9 @@ additional information before, between, or after them. `Member size (8 bytes)' Total size of the member, including header and trailer. This field - acts as a distributed index, and facilitates safe recovery of - undamaged members from multi-member files. + acts as a distributed index, allows the verification of stream + integrity, and facilitates safe recovery of undamaged members from + multi-member files. @@ -509,12 +517,12 @@ Concept Index Tag Table: Node: Top226 Node: Introduction920 -Node: Algorithm4755 -Node: Invoking Clzip7279 -Node: File Format12551 -Node: Examples14860 -Node: Problems16821 -Node: Concept Index17347 +Node: Algorithm4811 +Node: Invoking Clzip7335 +Node: File Format12847 +Node: Examples15277 +Node: Problems17238 +Node: Concept Index17764 End Tag Table diff --git a/doc/clzip.texinfo b/doc/clzip.texinfo index 1d0479f..e372d60 100644 --- a/doc/clzip.texinfo +++ b/doc/clzip.texinfo @@ -6,8 +6,8 @@ @finalout @c %**end of header -@set UPDATED 18 February 2013 -@set VERSION 1.4 +@set UPDATED 13 May 2013 +@set VERSION 1.5-pre1 @dircategory Data Compression @direntry @@ -61,6 +61,10 @@ gzip or bzip2. Clzip decompresses almost as fast as gzip and compresses better than bzip2, which makes it well suited for software distribution and data archiving. +Clzip uses the same well-defined exit status values used by bzip2, which +makes it safer when used in pipes or scripts than compressors returning +ambiguous warning values, like gzip. + Clzip uses the lzip file format; the files produced by clzip are fully compatible with lzip-1.4 or newer. Clzip is in fact a C language version of lzip, intended for embedded devices or systems lacking a C++ @@ -117,20 +121,16 @@ file from that of the compressed file as follows: @end multitable As a self-check for your protection, clzip stores in the member trailer -the 32-bit CRC of the original data and the size of the original data, -to make sure that the decompressed version of the data is identical to -the original. This guards against corruption of the compressed data, and -against undetected bugs in clzip (hopefully very unlikely). The chances -of data corruption going undetected are microscopic, less than one -chance in 4000 million for each member processed. Be aware, though, that -the check occurs upon decompression, so it can only tell you that -something is wrong. It can't help you recover the original uncompressed -data. - -Return values: 0 for a normal exit, 1 for environmental problems (file -not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or -invalid input file, 3 for an internal consistency error (eg, bug) which -caused clzip to panic. +the 32-bit CRC of the original data, the size of the original data and +the size of the member. These values, together with the value remaining +in the range decoder and the end-of-stream marker, provide a very safe 4 +factor integrity checking which guarantees that the decompressed version +of the data is identical to the original. This guards against corruption +of the compressed data, and against undetected bugs in clzip (hopefully +very unlikely). The chances of data corruption going undetected are +microscopic. Be aware, though, that the check occurs upon decompression, +so it can only tell you that something is wrong. It can't help you +recover the original uncompressed data. @node Algorithm @@ -349,6 +349,12 @@ Table of SI and binary prefixes (unit multipliers): @item Y @tab yottabyte (10^24) @tab | @tab Yi @tab yobibyte (2^80) @end multitable +@sp 1 +Exit status: 0 for a normal exit, 1 for environmental problems (file not +found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or +invalid input file, 3 for an internal consistency error (eg, bug) which +caused clzip to panic. + @node File Format @chapter File Format @@ -404,6 +410,7 @@ wedges between 0 and 7. The size of a wedge is (base_size / 16).@* Bits 4-0 contain the base 2 logarithm of the base size (12 to 29).@* Bits 7-5 contain the number of wedges (0 to 7) to substract from the base size to obtain the dictionary size.@* +Example: 0xD3 = (2^19 - 6 * 2^15) = (512KiB - 6 * 32KiB) = 320KiB@* Valid values for dictionary size range from 4KiB to 512MiB. @item Lzma stream @@ -418,8 +425,8 @@ Size of the uncompressed original data. @item Member size (8 bytes) Total size of the member, including header and trailer. This field acts -as a distributed index, and facilitates safe recovery of undamaged -members from multi-member files. +as a distributed index, allows the verification of stream integrity, and +facilitates safe recovery of undamaged members from multi-member files. @end table @@ -23,7 +23,7 @@ #include <stdlib.h> #include <string.h> -#include "clzip.h" +#include "lzip.h" #include "encoder.h" @@ -259,22 +259,22 @@ void Lee_encode( struct Len_encoder * const len_encoder, symbol -= min_match_len; if( symbol < len_low_symbols ) { - Re_encode_bit( renc, &len_encoder->choice1, 0 ); - Re_encode_tree( renc, len_encoder->bm_low[pos_state], symbol, len_low_bits ); + Re_encode_bit( renc, &len_encoder->lm.choice1, 0 ); + Re_encode_tree( renc, len_encoder->lm.bm_low[pos_state], symbol, len_low_bits ); } else { - Re_encode_bit( renc, &len_encoder->choice1, 1 ); + Re_encode_bit( renc, &len_encoder->lm.choice1, 1 ); if( symbol < len_low_symbols + len_mid_symbols ) { - Re_encode_bit( renc, &len_encoder->choice2, 0 ); - Re_encode_tree( renc, len_encoder->bm_mid[pos_state], + Re_encode_bit( renc, &len_encoder->lm.choice2, 0 ); + Re_encode_tree( renc, len_encoder->lm.bm_mid[pos_state], symbol - len_low_symbols, len_mid_bits ); } else { - Re_encode_bit( renc, &len_encoder->choice2, 1 ); - Re_encode_tree( renc, len_encoder->bm_high, + Re_encode_bit( renc, &len_encoder->lm.choice2, 1 ); + Re_encode_tree( renc, len_encoder->lm.bm_high, symbol - len_low_symbols - len_mid_symbols, len_high_bits ); } } @@ -369,8 +369,8 @@ bool LZe_init( struct LZ_encoder * const encoder, encoder->matchfinder = mf; if( !Re_init( &encoder->range_encoder, outfd ) ) return false; - Lee_init( &encoder->len_encoder, encoder->matchfinder->match_len_limit ); - Lee_init( &encoder->rep_match_len_encoder, encoder->matchfinder->match_len_limit ); + Lee_init( &encoder->match_len_encoder, encoder->matchfinder->match_len_limit ); + Lee_init( &encoder->rep_len_encoder, encoder->matchfinder->match_len_limit ); encoder->num_dis_slots = 2 * real_bits( encoder->matchfinder->dictionary_size - 1 ); @@ -473,7 +473,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const encoder, for( len = min_match_len; len <= replens[rep]; ++len ) Tr_update( &encoder->trials[len], price + - Lee_price( &encoder->rep_match_len_encoder, len, pos_state ), + Lee_price( &encoder->rep_len_encoder, len, pos_state ), rep, 0 ); } @@ -654,7 +654,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const encoder, LZe_price_rep( encoder, rep, cur_state, pos_state ); for( i = min_match_len; i <= len; ++i ) Tr_update( &encoder->trials[cur+i], price + - Lee_price( &encoder->rep_match_len_encoder, i, pos_state ), + Lee_price( &encoder->rep_len_encoder, i, pos_state ), rep, cur ); if( rep == 0 ) start_len = len + 1; /* discard shorter matches */ @@ -671,7 +671,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const encoder, pos_state2 = ( pos_state + len ) & pos_state_mask; state2 = St_set_rep( cur_state ); - price += Lee_price( &encoder->rep_match_len_encoder, len, pos_state ) + + price += Lee_price( &encoder->rep_len_encoder, len, pos_state ) + price0( encoder->bm_match[state2][pos_state2] ) + LZe_price_matched( encoder, data[len-1], data[len], data[len-dis] ); pos_state2 = ( pos_state2 + 1 ) & pos_state_mask; @@ -829,7 +829,7 @@ bool LZe_encode_member( struct LZ_encoder * const encoder, if( len == 1 ) state = St_set_short_rep( state ); else { - Lee_encode( &encoder->rep_match_len_encoder, &encoder->range_encoder, len, pos_state ); + Lee_encode( &encoder->rep_len_encoder, &encoder->range_encoder, len, pos_state ); state = St_set_rep( state ); } } @@ -107,9 +107,9 @@ static inline int price_symbol_reversed( const Bit_model bm[], int symbol, for( i = num_bits; i > 0; --i ) { const int bit = symbol & 1; - symbol >>= 1; price += price_bit( bm[model], bit ); model = ( model << 1 ) | bit; + symbol >>= 1; } return price; } @@ -376,11 +376,7 @@ static inline void Re_encode_matched( struct Range_encoder * const renc, struct Len_encoder { - Bit_model choice1; - Bit_model choice2; - Bit_model bm_low[pos_states][len_low_symbols]; - Bit_model bm_mid[pos_states][len_mid_symbols]; - Bit_model bm_high[len_high_symbols]; + struct Len_model lm; int prices[pos_states][max_len_symbols]; int len_symbols; int counters[pos_states]; @@ -390,21 +386,21 @@ static inline void Lee_update_prices( struct Len_encoder * const len_encoder, const int pos_state ) { int * const pps = len_encoder->prices[pos_state]; - int tmp = price0( len_encoder->choice1 ); + int tmp = price0( len_encoder->lm.choice1 ); int len = 0; for( ; len < len_low_symbols && len < len_encoder->len_symbols; ++len ) pps[len] = tmp + - price_symbol( len_encoder->bm_low[pos_state], len, len_low_bits ); - tmp = price1( len_encoder->choice1 ); + price_symbol( len_encoder->lm.bm_low[pos_state], len, len_low_bits ); + tmp = price1( len_encoder->lm.choice1 ); for( ; len < len_low_symbols + len_mid_symbols && len < len_encoder->len_symbols; ++len ) - pps[len] = tmp + price0( len_encoder->choice2 ) + - price_symbol( len_encoder->bm_mid[pos_state], len - len_low_symbols, len_mid_bits ); + pps[len] = tmp + price0( len_encoder->lm.choice2 ) + + price_symbol( len_encoder->lm.bm_mid[pos_state], len - len_low_symbols, len_mid_bits ); for( ; len < len_encoder->len_symbols; ++len ) /* using 4 slots per value makes "Lee_price" faster */ len_encoder->prices[3][len] = len_encoder->prices[2][len] = len_encoder->prices[1][len] = len_encoder->prices[0][len] = - tmp + price1( len_encoder->choice2 ) + - price_symbol( len_encoder->bm_high, len - len_low_symbols - len_mid_symbols, len_high_bits ); + tmp + price1( len_encoder->lm.choice2 ) + + price_symbol( len_encoder->lm.bm_high, len - len_low_symbols - len_mid_symbols, len_high_bits ); len_encoder->counters[pos_state] = len_encoder->len_symbols; } @@ -412,11 +408,7 @@ static inline void Lee_init( struct Len_encoder * const len_encoder, const int match_len_limit ) { int i; - Bm_init( &len_encoder->choice1 ); - Bm_init( &len_encoder->choice2 ); - Bm_array_init( len_encoder->bm_low[0], pos_states * len_low_symbols ); - Bm_array_init( len_encoder->bm_mid[0], pos_states * len_mid_symbols ); - Bm_array_init( len_encoder->bm_high, len_high_symbols ); + Lm_init( &len_encoder->lm ); len_encoder->len_symbols = match_len_limit + 1 - min_match_len; for( i = 0; i < pos_states; ++i ) Lee_update_prices( len_encoder, i ); } @@ -502,8 +494,8 @@ struct LZ_encoder struct Matchfinder * matchfinder; struct Range_encoder range_encoder; - struct Len_encoder len_encoder; - struct Len_encoder rep_match_len_encoder; + struct Len_encoder match_len_encoder; + struct Len_encoder rep_len_encoder; int num_dis_slots; struct Pair pairs[max_match_len+1]; @@ -572,7 +564,7 @@ static inline int LZe_price_rep0_len( const struct LZ_encoder * const encoder, const State state, const int pos_state ) { return LZe_price_rep( encoder, 0, state, pos_state ) + - Lee_price( &encoder->rep_match_len_encoder, len, pos_state ); + Lee_price( &encoder->rep_len_encoder, len, pos_state ); } static inline int LZe_price_dis( const struct LZ_encoder * const encoder, @@ -589,7 +581,7 @@ static inline int LZe_price_pair( const struct LZ_encoder * const encoder, const int dis, const int len, const int pos_state ) { - return Lee_price( &encoder->len_encoder, len, pos_state ) + + return Lee_price( &encoder->match_len_encoder, len, pos_state ) + LZe_price_dis( encoder, dis, get_dis_state( len ) ); } @@ -620,7 +612,7 @@ static inline void LZe_encode_pair( struct LZ_encoder * const encoder, const int pos_state ) { const int dis_slot = get_slot( dis ); - Lee_encode( &encoder->len_encoder, &encoder->range_encoder, len, pos_state ); + Lee_encode( &encoder->match_len_encoder, &encoder->range_encoder, len, pos_state ); Re_encode_tree( &encoder->range_encoder, encoder->bm_dis_slot[get_dis_state(len)], dis_slot, dis_slot_bits ); @@ -94,6 +94,24 @@ static inline void Bm_init( Bit_model * const probability ) static inline void Bm_array_init( Bit_model * const p, const int size ) { int i = 0; while( i < size ) p[i++] = bit_model_total / 2; } +struct Len_model + { + Bit_model choice1; + Bit_model choice2; + Bit_model bm_low[pos_states][len_low_symbols]; + Bit_model bm_mid[pos_states][len_mid_symbols]; + Bit_model bm_high[len_high_symbols]; + }; + +static inline void Lm_init( struct Len_model * const lm ) + { + Bm_init( &lm->choice1 ); + Bm_init( &lm->choice2 ); + Bm_array_init( lm->bm_low[0], pos_states * len_low_symbols ); + Bm_array_init( lm->bm_mid[0], pos_states * len_mid_symbols ); + Bm_array_init( lm->bm_high, len_high_symbols ); + } + struct Pretty_print { @@ -15,7 +15,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ /* - Return values: 0 for a normal exit, 1 for environmental problems + Exit status: 0 for a normal exit, 1 for environmental problems (file not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or invalid input file, 3 for an internal consistency error (eg, bug) which caused clzip to panic. @@ -52,7 +52,7 @@ #endif #include "carg_parser.h" -#include "clzip.h" +#include "lzip.h" #include "decoder.h" #include "encoder.h" @@ -127,6 +127,10 @@ static void show_help( void ) "scale optimal for all files. If your files are large, very repetitive,\n" "etc, you may need to use the --match-length and --dictionary-size\n" "options directly to achieve optimal performance.\n" + "\nExit status: 0 for a normal exit, 1 for environmental problems (file\n" + "not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n" + "invalid input file, 3 for an internal consistency error (eg, bug) which\n" + "caused clzip to panic.\n" "\nReport bugs to lzip-bug@nongnu.org\n" "Clzip home page: http://www.nongnu.org/lzip/clzip.html\n" ); } @@ -155,8 +159,9 @@ void show_header( const File_header header ) for( i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i ) { num /= factor; if( num % factor != 0 ) exact = false; p = prefix[i]; np = ""; } - fprintf( stderr, "version %d, dictionary size %s%4u %sB. ", - Fh_version( header ), np, num, p ); + if( verbosity >= 4 ) + fprintf( stderr, "version %d, ", Fh_version( header ) ); + fprintf( stderr, "dictionary size %s%4u %sB. ", np, num, p ); } @@ -549,7 +554,7 @@ static int decompress( const int infd, struct Pretty_print * const pp, retval = 2; break; } if( verbosity >= 2 || ( verbosity == 1 && first_member ) ) - { Pp_show_msg( pp, 0 ); if( verbosity >= 2 ) show_header( header ); } + { Pp_show_msg( pp, 0 ); if( verbosity >= 3 ) show_header( header ); } if( !LZd_init( &decoder, header, &rdec, outfd ) ) { @@ -573,13 +578,11 @@ static int decompress( const int infd, struct Pretty_print * const pp, retval = 2; break; } if( verbosity >= 2 ) - { if( testing ) fprintf( stderr, "ok\n" ); - else fprintf( stderr, "done\n" ); Pp_reset( pp ); } + { fprintf( stderr, testing ? "ok\n" : "done\n" ); Pp_reset( pp ); } } Rd_free( &rdec ); if( verbosity == 1 && retval == 0 ) - { if( testing ) fprintf( stderr, "ok\n" ); - else fprintf( stderr, "done\n" ); } + fprintf( stderr, testing ? "ok\n" : "done\n" ); return retval; } @@ -702,6 +705,7 @@ int main( const int argc, const char * const argv[] ) { 'h', "help", ap_no }, { 'k', "keep", ap_no }, { 'm', "match-length", ap_yes }, + { 'n', "threads", ap_yes }, { 'o', "output", ap_yes }, { 'q', "quiet", ap_no }, { 's', "dictionary-size", ap_yes }, @@ -741,6 +745,7 @@ int main( const int argc, const char * const argv[] ) case 'k': keep_input_files = true; break; case 'm': encoder_options.match_len_limit = getnum( arg, min_match_len_limit, max_match_len ); break; + case 'n': break; case 'o': default_output_filename = arg; break; case 'q': verbosity = -1; break; case 's': encoder_options.dictionary_size = get_dict_size( arg ); diff --git a/testsuite/check.sh b/testsuite/check.sh index ed0ca50..d38ebb0 100755 --- a/testsuite/check.sh +++ b/testsuite/check.sh @@ -26,6 +26,15 @@ fail=0 printf "testing clzip-%s..." "$2" +"${LZIP}" -cqs-1 in > /dev/null +if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi +"${LZIP}" -cqs0 in > /dev/null +if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi +"${LZIP}" -cqs4095 in > /dev/null +if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi +"${LZIP}" -cqm274 in > /dev/null +if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi + "${LZIP}" -t "${testdir}"/test.txt.lz || fail=1 "${LZIP}" -cd "${testdir}"/test.txt.lz > copy || fail=1 cmp in copy || fail=1 @@ -38,15 +47,6 @@ if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi cmp in copy || fail=1 printf . -"${LZIP}" -cqs-1 in > out -if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi -"${LZIP}" -cqs0 in > out -if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi -"${LZIP}" -cqs4095 in > out -if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi -"${LZIP}" -cqm274 in > out -if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi - for i in s4Ki 0 1 2 3 4 5 6 7 8 9 ; do "${LZIP}" -k -$i in || fail=1 mv -f in.lz copy.lz || fail=1 |