summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--ChangeLog8
-rw-r--r--INSTALL7
-rw-r--r--Makefile.in6
-rw-r--r--NEWS15
-rw-r--r--README23
-rw-r--r--carg_parser.c10
-rw-r--r--carg_parser.h2
-rwxr-xr-xconfigure28
-rw-r--r--decoder.c68
-rw-r--r--decoder.h71
-rw-r--r--doc/clzip.17
-rw-r--r--doc/clzip.info54
-rw-r--r--doc/clzip.texinfo43
-rw-r--r--encoder.c28
-rw-r--r--encoder.h38
-rw-r--r--lzip.h (renamed from clzip.h)18
-rw-r--r--main.c23
-rwxr-xr-xtestsuite/check.sh18
18 files changed, 253 insertions, 214 deletions
diff --git a/ChangeLog b/ChangeLog
index 0f365bb..f1ce217 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+2013-05-13 Antonio Diaz Diaz <antonio@gnu.org>
+
+ * Version 1.5-pre1 released.
+ * Decompression time has been reduced by 1%.
+ * main.c (show_header): Show header version if verbosity >= 4.
+ * Ignore option '-n, --threads' for compatibility with plzip.
+ * configure: Options now accept a separate argument.
+
2013-02-18 Antonio Diaz Diaz <ant_diaz@teleline.es>
* Version 1.4 released.
diff --git a/INSTALL b/INSTALL
index 4466443..7670406 100644
--- a/INSTALL
+++ b/INSTALL
@@ -1,7 +1,7 @@
Requirements
------------
You will need a C compiler.
-I use gcc 4.7.2 and 3.3.6, but the code should compile with any
+I use gcc 4.8.0 and 3.3.6, but the code should compile with any
standards compliant compiler.
Gcc is available at http://gcc.gnu.org.
@@ -36,8 +36,9 @@ the main archive.
typing 'make install-bin', 'make install-info' or 'make install-man'
respectively.
-5a. Type 'make install-as-lzip' to install the program and any data
- files and documentation, and link the program to the name 'lzip'.
+ Instead of 'make install', you can type 'make install-as-lzip' to
+ install the program and any data files and documentation, and link
+ the program to the name 'lzip'.
Another way
diff --git a/Makefile.in b/Makefile.in
index a27a481..bc932b7 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -29,9 +29,9 @@ main.o : main.c
$(objs) : Makefile
carg_parser.o : carg_parser.h
-decoder.o : clzip.h decoder.h
-encoder.o : clzip.h encoder.h
-main.o : carg_parser.h clzip.h decoder.h encoder.h
+decoder.o : lzip.h decoder.h
+encoder.o : lzip.h encoder.h
+main.o : carg_parser.h lzip.h decoder.h encoder.h
doc : info man
diff --git a/NEWS b/NEWS
index e854c8a..ec9961a 100644
--- a/NEWS
+++ b/NEWS
@@ -1,13 +1,10 @@
-Changes in version 1.4:
+Changes in version 1.5:
-Multi-step trials have been implemented.
+Decompression time has been reduced by 1%.
-Compression ratio has been slightly increased.
+File version is now shown only if verbosity >= 4.
-Compression time has been reduced by 10%.
+Option "-n, --threads" is now accepted and ignored for compatibility
+with plzip.
-Decompression time has been reduced by 8%.
-
-The target "install-as-lzip" has been added to the Makefile.
-
-The target "install-bin" has been added to the Makefile.
+"configure" now accepts options with a separate argument.
diff --git a/README b/README
index 72d434b..26d527d 100644
--- a/README
+++ b/README
@@ -6,6 +6,10 @@ gzip or bzip2. Clzip decompresses almost as fast as gzip and compresses
better than bzip2, which makes it well suited for software distribution
and data archiving.
+Clzip uses the same well-defined exit status values used by bzip2, which
+makes it safer when used in pipes or scripts than compressors returning
+ambiguous warning values, like gzip.
+
Clzip uses the lzip file format; the files produced by clzip are fully
compatible with lzip-1.4 or newer. Clzip is in fact a C language version
of lzip, intended for embedded devices or systems lacking a C++
@@ -47,15 +51,16 @@ memory requirement is affected at compression time by the choice of
dictionary size limit.
As a self-check for your protection, clzip stores in the member trailer
-the 32-bit CRC of the original data and the size of the original data,
-to make sure that the decompressed version of the data is identical to
-the original. This guards against corruption of the compressed data, and
-against undetected bugs in clzip (hopefully very unlikely). The chances
-of data corruption going undetected are microscopic, less than one
-chance in 4000 million for each member processed. Be aware, though, that
-the check occurs upon decompression, so it can only tell you that
-something is wrong. It can't help you recover the original uncompressed
-data.
+the 32-bit CRC of the original data, the size of the original data and
+the size of the member. These values, together with the value remaining
+in the range decoder and the end-of-stream marker, provide a very safe 4
+factor integrity checking which guarantees that the decompressed version
+of the data is identical to the original. This guards against corruption
+of the compressed data, and against undetected bugs in clzip (hopefully
+very unlikely). The chances of data corruption going undetected are
+microscopic. Be aware, though, that the check occurs upon decompression,
+so it can only tell you that something is wrong. It can't help you
+recover the original uncompressed data.
Clzip implements a simplified version of the LZMA (Lempel-Ziv-Markov
chain-Algorithm) algorithm. The high compression of LZMA comes from
diff --git a/carg_parser.c b/carg_parser.c
index 973bb7e..a86f76f 100644
--- a/carg_parser.c
+++ b/carg_parser.c
@@ -1,5 +1,5 @@
/* Arg_parser - POSIX/GNU command line argument parser. (C version)
- Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012
+ Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013
Antonio Diaz Diaz.
This library is free software: you can redistribute it and/or modify
@@ -89,15 +89,14 @@ static char parse_long_option( struct Arg_parser * const ap,
int * const argindp )
{
unsigned len;
- int index = -1;
- int i;
+ int index = -1, i;
char exact = 0, ambig = 0;
for( len = 0; opt[len+2] && opt[len+2] != '='; ++len ) ;
/* Test all long options for either exact match or abbreviated matches. */
for( i = 0; options[i].code != 0; ++i )
- if( options[i].name && !strncmp( options[i].name, &opt[2], len ) )
+ if( options[i].name && strncmp( options[i].name, &opt[2], len ) == 0 )
{
if( strlen( options[i].name ) == len ) /* Exact match found */
{ index = i; exact = 1; break; }
@@ -165,8 +164,7 @@ static char parse_short_option( struct Arg_parser * const ap,
while( cind > 0 )
{
- int index = -1;
- int i;
+ int index = -1, i;
const unsigned char code = opt[cind];
char code_str[2];
code_str[0] = code; code_str[1] = 0;
diff --git a/carg_parser.h b/carg_parser.h
index 3575dd7..41aa7b3 100644
--- a/carg_parser.h
+++ b/carg_parser.h
@@ -1,5 +1,5 @@
/* Arg_parser - POSIX/GNU command line argument parser. (C version)
- Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012
+ Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013
Antonio Diaz Diaz.
This library is free software: you can redistribute it and/or modify
diff --git a/configure b/configure
index a234bb3..81068f8 100755
--- a/configure
+++ b/configure
@@ -5,12 +5,10 @@
# This configure script is free software: you have unlimited permission
# to copy, distribute and modify it.
-args=
-no_create=
pkgname=clzip
-pkgversion=1.4
+pkgversion=1.5-pre1
progname=clzip
-srctrigger=clzip.h
+srctrigger=doc/clzip.texinfo
# clear some things potentially inherited from environment.
LC_ALL=C
@@ -36,10 +34,12 @@ if [ ! -x /bin/gcc ] &&
fi
# Loop over all args
-while [ -n "$1" ] ; do
+args=
+no_create=
+while [ $# != 0 ] ; do
# Get the first arg, and shuffle
- option=$1
+ option=$1 ; arg2=no
shift
# Add the argument quoted to args
@@ -74,6 +74,14 @@ while [ -n "$1" ] ; do
--version | -V)
echo "Configure script for ${pkgname} version ${pkgversion}"
exit 0 ;;
+ --srcdir) srcdir=$1 ; arg2=yes ;;
+ --prefix) prefix=$1 ; arg2=yes ;;
+ --exec-prefix) exec_prefix=$1 ; arg2=yes ;;
+ --bindir) bindir=$1 ; arg2=yes ;;
+ --datarootdir) datarootdir=$1 ; arg2=yes ;;
+ --infodir) infodir=$1 ; arg2=yes ;;
+ --mandir) mandir=$1 ; arg2=yes ;;
+
--srcdir=*) srcdir=${optarg} ;;
--prefix=*) prefix=${optarg} ;;
--exec-prefix=*) exec_prefix=${optarg} ;;
@@ -93,6 +101,14 @@ while [ -n "$1" ] ; do
echo "configure: Unrecognized option: \"${option}\"; use --help for usage." 1>&2
exit 1 ;;
esac
+
+ # Check if the option took a separate argument
+ if [ "${arg2}" = yes ] ; then
+ if [ $# != 0 ] ; then args="${args} \"$1\"" ; shift
+ else echo "configure: Missing argument to \"${option}\"" 1>&2
+ exit 1
+ fi
+ fi
done
# Find the source files, if location was not specified.
diff --git a/decoder.c b/decoder.c
index b40dafd..d3f2bf0 100644
--- a/decoder.c
+++ b/decoder.c
@@ -25,7 +25,7 @@
#include <string.h>
#include <unistd.h>
-#include "clzip.h"
+#include "lzip.h"
#include "decoder.h"
@@ -124,10 +124,10 @@ bool LZd_verify_trailer( struct LZ_decoder * const decoder,
File_trailer trailer;
const int trailer_size = Ft_versioned_size( decoder->member_version );
const unsigned long long member_size =
- Rd_member_position( decoder->range_decoder ) + trailer_size;
+ Rd_member_position( decoder->rdec ) + trailer_size;
bool error = false;
- int size = Rd_read_data( decoder->range_decoder, trailer, trailer_size );
+ int size = Rd_read_data( decoder->rdec, trailer, trailer_size );
if( size < trailer_size )
{
error = true;
@@ -142,7 +142,7 @@ bool LZd_verify_trailer( struct LZ_decoder * const decoder,
if( decoder->member_version == 0 ) Ft_set_member_size( trailer, member_size );
- if( decoder->range_decoder->code != 0 )
+ if( decoder->rdec->code != 0 )
{
error = true;
Pp_show_msg( pp, "Range decoder final code is not zero" );
@@ -177,7 +177,7 @@ bool LZd_verify_trailer( struct LZ_decoder * const decoder,
Ft_get_member_size( trailer ), member_size, member_size );
}
}
- if( !error && pp->verbosity >= 3 && LZd_data_position( decoder ) > 0 && member_size > 0 )
+ if( !error && pp->verbosity >= 2 && LZd_data_position( decoder ) > 0 && member_size > 0 )
fprintf( stderr, "%6.3f:1, %6.3f bits/byte, %5.2f%% saved. ",
(double)LZd_data_position( decoder ) / member_size,
( 8.0 * member_size ) / LZd_data_position( decoder ),
@@ -199,84 +199,82 @@ int LZd_decode_member( struct LZ_decoder * const decoder,
unsigned rep1 = 0; /* used for efficient coding of */
unsigned rep2 = 0; /* repeated distances */
unsigned rep3 = 0;
-
State state = 0;
- Rd_load( decoder->range_decoder );
- while( !Rd_finished( decoder->range_decoder ) )
+ Rd_load( decoder->rdec );
+ while( !Rd_finished( decoder->rdec ) )
{
const int pos_state = LZd_data_position( decoder ) & pos_state_mask;
- if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_match[state][pos_state] ) == 0 )
+ if( Rd_decode_bit( decoder->rdec, &decoder->bm_match[state][pos_state] ) == 0 ) /* 1st bit */
{
const uint8_t prev_byte = LZd_get_prev_byte( decoder );
if( St_is_char( state ) )
{
state -= ( state < 4 ) ? state : 3;
- LZd_put_byte( decoder, Rd_decode_tree( decoder->range_decoder,
+ LZd_put_byte( decoder, Rd_decode_tree( decoder->rdec,
decoder->bm_literal[get_lit_state(prev_byte)], 8 ) );
}
else
{
state -= ( state < 10 ) ? 3 : 6;
- LZd_put_byte( decoder, Rd_decode_matched( decoder->range_decoder,
- decoder->bm_literal[get_lit_state(prev_byte)], LZd_get_byte( decoder, rep0 ) ) );
+ LZd_put_byte( decoder, Rd_decode_matched( decoder->rdec,
+ decoder->bm_literal[get_lit_state(prev_byte)],
+ LZd_get_byte( decoder, rep0 ) ) );
}
}
else
{
int len;
- if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_rep[state] ) == 1 )
+ if( Rd_decode_bit( decoder->rdec, &decoder->bm_rep[state] ) == 1 ) /* 2nd bit */
{
- len = 0;
- if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_rep0[state] ) == 1 )
+ if( Rd_decode_bit( decoder->rdec, &decoder->bm_rep0[state] ) == 0 ) /* 3rd bit */
+ {
+ if( Rd_decode_bit( decoder->rdec, &decoder->bm_len[state][pos_state] ) == 0 ) /* 4th bit */
+ { state = St_set_short_rep( state );
+ LZd_put_byte( decoder, LZd_get_byte( decoder, rep0 ) ); continue; }
+ }
+ else
{
unsigned distance;
- if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_rep1[state] ) == 0 )
+ if( Rd_decode_bit( decoder->rdec, &decoder->bm_rep1[state] ) == 0 ) /* 4th bit */
distance = rep1;
else
{
- if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_rep2[state] ) == 0 )
+ if( Rd_decode_bit( decoder->rdec, &decoder->bm_rep2[state] ) == 0 ) /* 5th bit */
distance = rep2;
- else { distance = rep3; rep3 = rep2; }
+ else
+ { distance = rep3; rep3 = rep2; }
rep2 = rep1;
}
rep1 = rep0;
rep0 = distance;
}
- else
- {
- if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_len[state][pos_state] ) == 0 )
- { state = St_set_short_rep( state ); len = 1; }
- }
- if( len == 0 )
- {
- state = St_set_rep( state );
- len = min_match_len + Led_decode( &decoder->rep_match_len_decoder, decoder->range_decoder, pos_state );
- }
+ state = St_set_rep( state );
+ len = min_match_len + Rd_decode_len( decoder->rdec, &decoder->rep_len_model, pos_state );
}
else
{
int dis_slot;
const unsigned rep0_saved = rep0;
- len = min_match_len + Led_decode( &decoder->len_decoder, decoder->range_decoder, pos_state );
- dis_slot = Rd_decode_tree6( decoder->range_decoder, decoder->bm_dis_slot[get_dis_state(len)] );
+ len = min_match_len + Rd_decode_len( decoder->rdec, &decoder->match_len_model, pos_state );
+ dis_slot = Rd_decode_tree6( decoder->rdec, decoder->bm_dis_slot[get_dis_state(len)] );
if( dis_slot < start_dis_model ) rep0 = dis_slot;
else
{
const int direct_bits = ( dis_slot >> 1 ) - 1;
rep0 = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
if( dis_slot < end_dis_model )
- rep0 += Rd_decode_tree_reversed( decoder->range_decoder,
+ rep0 += Rd_decode_tree_reversed( decoder->rdec,
decoder->bm_dis + rep0 - dis_slot - 1,
direct_bits );
else
{
- rep0 += Rd_decode( decoder->range_decoder, direct_bits - dis_align_bits ) << dis_align_bits;
- rep0 += Rd_decode_tree_reversed4( decoder->range_decoder, decoder->bm_align );
+ rep0 += Rd_decode( decoder->rdec, direct_bits - dis_align_bits ) << dis_align_bits;
+ rep0 += Rd_decode_tree_reversed4( decoder->rdec, decoder->bm_align );
if( rep0 == 0xFFFFFFFFU ) /* Marker found */
{
rep0 = rep0_saved;
- Rd_normalize( decoder->range_decoder );
+ Rd_normalize( decoder->rdec );
LZd_flush_data( decoder );
if( len == min_match_len ) /* End Of Stream marker */
{
@@ -284,7 +282,7 @@ int LZd_decode_member( struct LZ_decoder * const decoder,
}
if( len == min_match_len + 1 ) /* Sync Flush marker */
{
- Rd_load( decoder->range_decoder ); continue;
+ Rd_load( decoder->rdec ); continue;
}
if( pp->verbosity >= 0 )
{
diff --git a/decoder.h b/decoder.h
index c18ccbe..1c6ed3d 100644
--- a/decoder.h
+++ b/decoder.h
@@ -140,24 +140,24 @@ static inline int Rd_decode_bit( struct Range_decoder * const rdec,
static inline int Rd_decode_tree( struct Range_decoder * const rdec,
Bit_model bm[], const int num_bits )
{
- int model = 1;
+ int symbol = 1;
int i;
for( i = num_bits; i > 0; --i )
- model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] );
- return model - (1 << num_bits);
+ symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] );
+ return symbol - (1 << num_bits);
}
static inline int Rd_decode_tree6( struct Range_decoder * const rdec,
Bit_model bm[] )
{
- int model = 1;
- model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] );
- model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] );
- model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] );
- model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] );
- model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] );
- model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] );
- return model - (1 << 6);
+ int symbol = 1;
+ symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] );
+ symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] );
+ symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] );
+ symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] );
+ symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] );
+ symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] );
+ return symbol - (1 << 6);
}
static inline int Rd_decode_tree_reversed( struct Range_decoder * const rdec,
@@ -213,36 +213,17 @@ static inline int Rd_decode_matched( struct Range_decoder * const rdec,
return symbol - 0x100;
}
-
-struct Len_decoder
- {
- Bit_model choice1;
- Bit_model choice2;
- Bit_model bm_low[pos_states][len_low_symbols];
- Bit_model bm_mid[pos_states][len_mid_symbols];
- Bit_model bm_high[len_high_symbols];
- };
-
-static inline void Led_init( struct Len_decoder * const len_decoder )
- {
- Bm_init( &len_decoder->choice1 );
- Bm_init( &len_decoder->choice2 );
- Bm_array_init( len_decoder->bm_low[0], pos_states * len_low_symbols );
- Bm_array_init( len_decoder->bm_mid[0], pos_states * len_mid_symbols );
- Bm_array_init( len_decoder->bm_high, len_high_symbols );
- }
-
-static inline int Led_decode( struct Len_decoder * const len_decoder,
- struct Range_decoder * const rdec,
- const int pos_state )
+static inline int Rd_decode_len( struct Range_decoder * const rdec,
+ struct Len_model * const lm,
+ const int pos_state )
{
- if( Rd_decode_bit( rdec, &len_decoder->choice1 ) == 0 )
- return Rd_decode_tree( rdec, len_decoder->bm_low[pos_state], len_low_bits );
- if( Rd_decode_bit( rdec, &len_decoder->choice2 ) == 0 )
+ if( Rd_decode_bit( rdec, &lm->choice1 ) == 0 )
+ return Rd_decode_tree( rdec, lm->bm_low[pos_state], len_low_bits );
+ if( Rd_decode_bit( rdec, &lm->choice2 ) == 0 )
return len_low_symbols +
- Rd_decode_tree( rdec, len_decoder->bm_mid[pos_state], len_mid_bits );
+ Rd_decode_tree( rdec, lm->bm_mid[pos_state], len_mid_bits );
return len_low_symbols + len_mid_symbols +
- Rd_decode_tree( rdec, len_decoder->bm_high, len_high_bits );
+ Rd_decode_tree( rdec, lm->bm_high, len_high_bits );
}
@@ -269,9 +250,9 @@ struct LZ_decoder
Bit_model bm_dis[modeled_distances-end_dis_model];
Bit_model bm_align[dis_align_size];
- struct Range_decoder * range_decoder;
- struct Len_decoder len_decoder;
- struct Len_decoder rep_match_len_decoder;
+ struct Range_decoder * rdec;
+ struct Len_model match_len_model;
+ struct Len_model rep_len_model;
};
void LZd_flush_data( struct LZ_decoder * const decoder );
@@ -322,7 +303,7 @@ static inline void LZd_copy_block( struct LZ_decoder * const decoder,
static inline bool LZd_init( struct LZ_decoder * const decoder,
const File_header header,
- struct Range_decoder * const rdec, const int ofd )
+ struct Range_decoder * const rde, const int ofd )
{
decoder->partial_data_pos = 0;
decoder->dictionary_size = Fh_get_dictionary_size( header );
@@ -346,9 +327,9 @@ static inline bool LZd_init( struct LZ_decoder * const decoder,
Bm_array_init( decoder->bm_dis, modeled_distances - end_dis_model );
Bm_array_init( decoder->bm_align, dis_align_size );
- decoder->range_decoder = rdec;
- Led_init( &decoder->len_decoder );
- Led_init( &decoder->rep_match_len_decoder );
+ decoder->rdec = rde;
+ Lm_init( &decoder->match_len_model );
+ Lm_init( &decoder->rep_len_model );
decoder->buffer[decoder->buffer_size-1] = 0; /* prev_byte of first_byte */
return true;
}
diff --git a/doc/clzip.1 b/doc/clzip.1
index 02181a7..4fc2a26 100644
--- a/doc/clzip.1
+++ b/doc/clzip.1
@@ -1,5 +1,5 @@
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.37.1.
-.TH CLZIP "1" "February 2013" "Clzip 1.4" "User Commands"
+.TH CLZIP "1" "May 2013" "Clzip 1.5-pre1" "User Commands"
.SH NAME
Clzip \- reduces the size of files
.SH SYNOPSIS
@@ -71,6 +71,11 @@ The bidimensional parameter space of LZMA can't be mapped to a linear
scale optimal for all files. If your files are large, very repetitive,
etc, you may need to use the \fB\-\-match\-length\fR and \fB\-\-dictionary\-size\fR
options directly to achieve optimal performance.
+.PP
+Exit status: 0 for a normal exit, 1 for environmental problems (file
+not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or
+invalid input file, 3 for an internal consistency error (eg, bug) which
+caused clzip to panic.
.SH "REPORTING BUGS"
Report bugs to lzip\-bug@nongnu.org
.br
diff --git a/doc/clzip.info b/doc/clzip.info
index ccec058..41723f3 100644
--- a/doc/clzip.info
+++ b/doc/clzip.info
@@ -12,7 +12,7 @@ File: clzip.info, Node: Top, Next: Introduction, Up: (dir)
Clzip Manual
************
-This manual is for Clzip (version 1.4, 18 February 2013).
+This manual is for Clzip (version 1.5-pre1, 13 May 2013).
* Menu:
@@ -42,6 +42,10 @@ gzip or bzip2. Clzip decompresses almost as fast as gzip and compresses
better than bzip2, which makes it well suited for software distribution
and data archiving.
+ Clzip uses the same well-defined exit status values used by bzip2,
+which makes it safer when used in pipes or scripts than compressors
+returning ambiguous warning values, like gzip.
+
Clzip uses the lzip file format; the files produced by clzip are
fully compatible with lzip-1.4 or newer. Clzip is in fact a C language
version of lzip, intended for embedded devices or systems lacking a C++
@@ -96,20 +100,16 @@ filename.tlz becomes filename.tar
anyothername becomes anyothername.out
As a self-check for your protection, clzip stores in the member
-trailer the 32-bit CRC of the original data and the size of the
-original data, to make sure that the decompressed version of the data
-is identical to the original. This guards against corruption of the
-compressed data, and against undetected bugs in clzip (hopefully very
-unlikely). The chances of data corruption going undetected are
-microscopic, less than one chance in 4000 million for each member
-processed. Be aware, though, that the check occurs upon decompression,
-so it can only tell you that something is wrong. It can't help you
-recover the original uncompressed data.
-
- Return values: 0 for a normal exit, 1 for environmental problems
-(file not found, invalid flags, I/O errors, etc), 2 to indicate a
-corrupt or invalid input file, 3 for an internal consistency error (eg,
-bug) which caused clzip to panic.
+trailer the 32-bit CRC of the original data, the size of the original
+data and the size of the member. These values, together with the value
+remaining in the range decoder and the end-of-stream marker, provide a
+very safe 4 factor integrity checking which guarantees that the
+decompressed version of the data is identical to the original. This
+guards against corruption of the compressed data, and against
+undetected bugs in clzip (hopefully very unlikely). The chances of data
+corruption going undetected are microscopic. Be aware, though, that the
+check occurs upon decompression, so it can only tell you that something
+is wrong. It can't help you recover the original uncompressed data.

File: clzip.info, Node: Algorithm, Next: Invoking Clzip, Prev: Introduction, Up: Top
@@ -326,6 +326,12 @@ E exabyte (10^18) | Ei exbibyte (2^60)
Z zettabyte (10^21) | Zi zebibyte (2^70)
Y yottabyte (10^24) | Yi yobibyte (2^80)
+
+ Exit status: 0 for a normal exit, 1 for environmental problems (file
+not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or
+invalid input file, 3 for an internal consistency error (eg, bug) which
+caused clzip to panic.
+

File: clzip.info, Node: File Format, Next: Examples, Prev: Invoking Clzip, Up: Top
@@ -378,6 +384,7 @@ additional information before, between, or after them.
Bits 4-0 contain the base 2 logarithm of the base size (12 to 29).
Bits 7-5 contain the number of wedges (0 to 7) to substract from
the base size to obtain the dictionary size.
+ Example: 0xD3 = (2^19 - 6 * 2^15) = (512KiB - 6 * 32KiB) = 320KiB
Valid values for dictionary size range from 4KiB to 512MiB.
`Lzma stream'
@@ -392,8 +399,9 @@ additional information before, between, or after them.
`Member size (8 bytes)'
Total size of the member, including header and trailer. This field
- acts as a distributed index, and facilitates safe recovery of
- undamaged members from multi-member files.
+ acts as a distributed index, allows the verification of stream
+ integrity, and facilitates safe recovery of undamaged members from
+ multi-member files.

@@ -509,12 +517,12 @@ Concept Index
Tag Table:
Node: Top226
Node: Introduction920
-Node: Algorithm4755
-Node: Invoking Clzip7279
-Node: File Format12551
-Node: Examples14860
-Node: Problems16821
-Node: Concept Index17347
+Node: Algorithm4811
+Node: Invoking Clzip7335
+Node: File Format12847
+Node: Examples15277
+Node: Problems17238
+Node: Concept Index17764

End Tag Table
diff --git a/doc/clzip.texinfo b/doc/clzip.texinfo
index 1d0479f..e372d60 100644
--- a/doc/clzip.texinfo
+++ b/doc/clzip.texinfo
@@ -6,8 +6,8 @@
@finalout
@c %**end of header
-@set UPDATED 18 February 2013
-@set VERSION 1.4
+@set UPDATED 13 May 2013
+@set VERSION 1.5-pre1
@dircategory Data Compression
@direntry
@@ -61,6 +61,10 @@ gzip or bzip2. Clzip decompresses almost as fast as gzip and compresses
better than bzip2, which makes it well suited for software distribution
and data archiving.
+Clzip uses the same well-defined exit status values used by bzip2, which
+makes it safer when used in pipes or scripts than compressors returning
+ambiguous warning values, like gzip.
+
Clzip uses the lzip file format; the files produced by clzip are fully
compatible with lzip-1.4 or newer. Clzip is in fact a C language version
of lzip, intended for embedded devices or systems lacking a C++
@@ -117,20 +121,16 @@ file from that of the compressed file as follows:
@end multitable
As a self-check for your protection, clzip stores in the member trailer
-the 32-bit CRC of the original data and the size of the original data,
-to make sure that the decompressed version of the data is identical to
-the original. This guards against corruption of the compressed data, and
-against undetected bugs in clzip (hopefully very unlikely). The chances
-of data corruption going undetected are microscopic, less than one
-chance in 4000 million for each member processed. Be aware, though, that
-the check occurs upon decompression, so it can only tell you that
-something is wrong. It can't help you recover the original uncompressed
-data.
-
-Return values: 0 for a normal exit, 1 for environmental problems (file
-not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or
-invalid input file, 3 for an internal consistency error (eg, bug) which
-caused clzip to panic.
+the 32-bit CRC of the original data, the size of the original data and
+the size of the member. These values, together with the value remaining
+in the range decoder and the end-of-stream marker, provide a very safe 4
+factor integrity checking which guarantees that the decompressed version
+of the data is identical to the original. This guards against corruption
+of the compressed data, and against undetected bugs in clzip (hopefully
+very unlikely). The chances of data corruption going undetected are
+microscopic. Be aware, though, that the check occurs upon decompression,
+so it can only tell you that something is wrong. It can't help you
+recover the original uncompressed data.
@node Algorithm
@@ -349,6 +349,12 @@ Table of SI and binary prefixes (unit multipliers):
@item Y @tab yottabyte (10^24) @tab | @tab Yi @tab yobibyte (2^80)
@end multitable
+@sp 1
+Exit status: 0 for a normal exit, 1 for environmental problems (file not
+found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or
+invalid input file, 3 for an internal consistency error (eg, bug) which
+caused clzip to panic.
+
@node File Format
@chapter File Format
@@ -404,6 +410,7 @@ wedges between 0 and 7. The size of a wedge is (base_size / 16).@*
Bits 4-0 contain the base 2 logarithm of the base size (12 to 29).@*
Bits 7-5 contain the number of wedges (0 to 7) to substract from the
base size to obtain the dictionary size.@*
+Example: 0xD3 = (2^19 - 6 * 2^15) = (512KiB - 6 * 32KiB) = 320KiB@*
Valid values for dictionary size range from 4KiB to 512MiB.
@item Lzma stream
@@ -418,8 +425,8 @@ Size of the uncompressed original data.
@item Member size (8 bytes)
Total size of the member, including header and trailer. This field acts
-as a distributed index, and facilitates safe recovery of undamaged
-members from multi-member files.
+as a distributed index, allows the verification of stream integrity, and
+facilitates safe recovery of undamaged members from multi-member files.
@end table
diff --git a/encoder.c b/encoder.c
index cb6c8d6..5b005b0 100644
--- a/encoder.c
+++ b/encoder.c
@@ -23,7 +23,7 @@
#include <stdlib.h>
#include <string.h>
-#include "clzip.h"
+#include "lzip.h"
#include "encoder.h"
@@ -259,22 +259,22 @@ void Lee_encode( struct Len_encoder * const len_encoder,
symbol -= min_match_len;
if( symbol < len_low_symbols )
{
- Re_encode_bit( renc, &len_encoder->choice1, 0 );
- Re_encode_tree( renc, len_encoder->bm_low[pos_state], symbol, len_low_bits );
+ Re_encode_bit( renc, &len_encoder->lm.choice1, 0 );
+ Re_encode_tree( renc, len_encoder->lm.bm_low[pos_state], symbol, len_low_bits );
}
else
{
- Re_encode_bit( renc, &len_encoder->choice1, 1 );
+ Re_encode_bit( renc, &len_encoder->lm.choice1, 1 );
if( symbol < len_low_symbols + len_mid_symbols )
{
- Re_encode_bit( renc, &len_encoder->choice2, 0 );
- Re_encode_tree( renc, len_encoder->bm_mid[pos_state],
+ Re_encode_bit( renc, &len_encoder->lm.choice2, 0 );
+ Re_encode_tree( renc, len_encoder->lm.bm_mid[pos_state],
symbol - len_low_symbols, len_mid_bits );
}
else
{
- Re_encode_bit( renc, &len_encoder->choice2, 1 );
- Re_encode_tree( renc, len_encoder->bm_high,
+ Re_encode_bit( renc, &len_encoder->lm.choice2, 1 );
+ Re_encode_tree( renc, len_encoder->lm.bm_high,
symbol - len_low_symbols - len_mid_symbols, len_high_bits );
}
}
@@ -369,8 +369,8 @@ bool LZe_init( struct LZ_encoder * const encoder,
encoder->matchfinder = mf;
if( !Re_init( &encoder->range_encoder, outfd ) ) return false;
- Lee_init( &encoder->len_encoder, encoder->matchfinder->match_len_limit );
- Lee_init( &encoder->rep_match_len_encoder, encoder->matchfinder->match_len_limit );
+ Lee_init( &encoder->match_len_encoder, encoder->matchfinder->match_len_limit );
+ Lee_init( &encoder->rep_len_encoder, encoder->matchfinder->match_len_limit );
encoder->num_dis_slots =
2 * real_bits( encoder->matchfinder->dictionary_size - 1 );
@@ -473,7 +473,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const encoder,
for( len = min_match_len; len <= replens[rep]; ++len )
Tr_update( &encoder->trials[len], price +
- Lee_price( &encoder->rep_match_len_encoder, len, pos_state ),
+ Lee_price( &encoder->rep_len_encoder, len, pos_state ),
rep, 0 );
}
@@ -654,7 +654,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const encoder,
LZe_price_rep( encoder, rep, cur_state, pos_state );
for( i = min_match_len; i <= len; ++i )
Tr_update( &encoder->trials[cur+i], price +
- Lee_price( &encoder->rep_match_len_encoder, i, pos_state ),
+ Lee_price( &encoder->rep_len_encoder, i, pos_state ),
rep, cur );
if( rep == 0 ) start_len = len + 1; /* discard shorter matches */
@@ -671,7 +671,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const encoder,
pos_state2 = ( pos_state + len ) & pos_state_mask;
state2 = St_set_rep( cur_state );
- price += Lee_price( &encoder->rep_match_len_encoder, len, pos_state ) +
+ price += Lee_price( &encoder->rep_len_encoder, len, pos_state ) +
price0( encoder->bm_match[state2][pos_state2] ) +
LZe_price_matched( encoder, data[len-1], data[len], data[len-dis] );
pos_state2 = ( pos_state2 + 1 ) & pos_state_mask;
@@ -829,7 +829,7 @@ bool LZe_encode_member( struct LZ_encoder * const encoder,
if( len == 1 ) state = St_set_short_rep( state );
else
{
- Lee_encode( &encoder->rep_match_len_encoder, &encoder->range_encoder, len, pos_state );
+ Lee_encode( &encoder->rep_len_encoder, &encoder->range_encoder, len, pos_state );
state = St_set_rep( state );
}
}
diff --git a/encoder.h b/encoder.h
index e39d7c4..a69f552 100644
--- a/encoder.h
+++ b/encoder.h
@@ -107,9 +107,9 @@ static inline int price_symbol_reversed( const Bit_model bm[], int symbol,
for( i = num_bits; i > 0; --i )
{
const int bit = symbol & 1;
- symbol >>= 1;
price += price_bit( bm[model], bit );
model = ( model << 1 ) | bit;
+ symbol >>= 1;
}
return price;
}
@@ -376,11 +376,7 @@ static inline void Re_encode_matched( struct Range_encoder * const renc,
struct Len_encoder
{
- Bit_model choice1;
- Bit_model choice2;
- Bit_model bm_low[pos_states][len_low_symbols];
- Bit_model bm_mid[pos_states][len_mid_symbols];
- Bit_model bm_high[len_high_symbols];
+ struct Len_model lm;
int prices[pos_states][max_len_symbols];
int len_symbols;
int counters[pos_states];
@@ -390,21 +386,21 @@ static inline void Lee_update_prices( struct Len_encoder * const len_encoder,
const int pos_state )
{
int * const pps = len_encoder->prices[pos_state];
- int tmp = price0( len_encoder->choice1 );
+ int tmp = price0( len_encoder->lm.choice1 );
int len = 0;
for( ; len < len_low_symbols && len < len_encoder->len_symbols; ++len )
pps[len] = tmp +
- price_symbol( len_encoder->bm_low[pos_state], len, len_low_bits );
- tmp = price1( len_encoder->choice1 );
+ price_symbol( len_encoder->lm.bm_low[pos_state], len, len_low_bits );
+ tmp = price1( len_encoder->lm.choice1 );
for( ; len < len_low_symbols + len_mid_symbols && len < len_encoder->len_symbols; ++len )
- pps[len] = tmp + price0( len_encoder->choice2 ) +
- price_symbol( len_encoder->bm_mid[pos_state], len - len_low_symbols, len_mid_bits );
+ pps[len] = tmp + price0( len_encoder->lm.choice2 ) +
+ price_symbol( len_encoder->lm.bm_mid[pos_state], len - len_low_symbols, len_mid_bits );
for( ; len < len_encoder->len_symbols; ++len )
/* using 4 slots per value makes "Lee_price" faster */
len_encoder->prices[3][len] = len_encoder->prices[2][len] =
len_encoder->prices[1][len] = len_encoder->prices[0][len] =
- tmp + price1( len_encoder->choice2 ) +
- price_symbol( len_encoder->bm_high, len - len_low_symbols - len_mid_symbols, len_high_bits );
+ tmp + price1( len_encoder->lm.choice2 ) +
+ price_symbol( len_encoder->lm.bm_high, len - len_low_symbols - len_mid_symbols, len_high_bits );
len_encoder->counters[pos_state] = len_encoder->len_symbols;
}
@@ -412,11 +408,7 @@ static inline void Lee_init( struct Len_encoder * const len_encoder,
const int match_len_limit )
{
int i;
- Bm_init( &len_encoder->choice1 );
- Bm_init( &len_encoder->choice2 );
- Bm_array_init( len_encoder->bm_low[0], pos_states * len_low_symbols );
- Bm_array_init( len_encoder->bm_mid[0], pos_states * len_mid_symbols );
- Bm_array_init( len_encoder->bm_high, len_high_symbols );
+ Lm_init( &len_encoder->lm );
len_encoder->len_symbols = match_len_limit + 1 - min_match_len;
for( i = 0; i < pos_states; ++i ) Lee_update_prices( len_encoder, i );
}
@@ -502,8 +494,8 @@ struct LZ_encoder
struct Matchfinder * matchfinder;
struct Range_encoder range_encoder;
- struct Len_encoder len_encoder;
- struct Len_encoder rep_match_len_encoder;
+ struct Len_encoder match_len_encoder;
+ struct Len_encoder rep_len_encoder;
int num_dis_slots;
struct Pair pairs[max_match_len+1];
@@ -572,7 +564,7 @@ static inline int LZe_price_rep0_len( const struct LZ_encoder * const encoder,
const State state, const int pos_state )
{
return LZe_price_rep( encoder, 0, state, pos_state ) +
- Lee_price( &encoder->rep_match_len_encoder, len, pos_state );
+ Lee_price( &encoder->rep_len_encoder, len, pos_state );
}
static inline int LZe_price_dis( const struct LZ_encoder * const encoder,
@@ -589,7 +581,7 @@ static inline int LZe_price_pair( const struct LZ_encoder * const encoder,
const int dis, const int len,
const int pos_state )
{
- return Lee_price( &encoder->len_encoder, len, pos_state ) +
+ return Lee_price( &encoder->match_len_encoder, len, pos_state ) +
LZe_price_dis( encoder, dis, get_dis_state( len ) );
}
@@ -620,7 +612,7 @@ static inline void LZe_encode_pair( struct LZ_encoder * const encoder,
const int pos_state )
{
const int dis_slot = get_slot( dis );
- Lee_encode( &encoder->len_encoder, &encoder->range_encoder, len, pos_state );
+ Lee_encode( &encoder->match_len_encoder, &encoder->range_encoder, len, pos_state );
Re_encode_tree( &encoder->range_encoder,
encoder->bm_dis_slot[get_dis_state(len)],
dis_slot, dis_slot_bits );
diff --git a/clzip.h b/lzip.h
index dd63438..1996e97 100644
--- a/clzip.h
+++ b/lzip.h
@@ -94,6 +94,24 @@ static inline void Bm_init( Bit_model * const probability )
static inline void Bm_array_init( Bit_model * const p, const int size )
{ int i = 0; while( i < size ) p[i++] = bit_model_total / 2; }
+struct Len_model
+ {
+ Bit_model choice1;
+ Bit_model choice2;
+ Bit_model bm_low[pos_states][len_low_symbols];
+ Bit_model bm_mid[pos_states][len_mid_symbols];
+ Bit_model bm_high[len_high_symbols];
+ };
+
+static inline void Lm_init( struct Len_model * const lm )
+ {
+ Bm_init( &lm->choice1 );
+ Bm_init( &lm->choice2 );
+ Bm_array_init( lm->bm_low[0], pos_states * len_low_symbols );
+ Bm_array_init( lm->bm_mid[0], pos_states * len_mid_symbols );
+ Bm_array_init( lm->bm_high, len_high_symbols );
+ }
+
struct Pretty_print
{
diff --git a/main.c b/main.c
index aea4e18..9ca4f90 100644
--- a/main.c
+++ b/main.c
@@ -15,7 +15,7 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
- Return values: 0 for a normal exit, 1 for environmental problems
+ Exit status: 0 for a normal exit, 1 for environmental problems
(file not found, invalid flags, I/O errors, etc), 2 to indicate a
corrupt or invalid input file, 3 for an internal consistency error
(eg, bug) which caused clzip to panic.
@@ -52,7 +52,7 @@
#endif
#include "carg_parser.h"
-#include "clzip.h"
+#include "lzip.h"
#include "decoder.h"
#include "encoder.h"
@@ -127,6 +127,10 @@ static void show_help( void )
"scale optimal for all files. If your files are large, very repetitive,\n"
"etc, you may need to use the --match-length and --dictionary-size\n"
"options directly to achieve optimal performance.\n"
+ "\nExit status: 0 for a normal exit, 1 for environmental problems (file\n"
+ "not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n"
+ "invalid input file, 3 for an internal consistency error (eg, bug) which\n"
+ "caused clzip to panic.\n"
"\nReport bugs to lzip-bug@nongnu.org\n"
"Clzip home page: http://www.nongnu.org/lzip/clzip.html\n" );
}
@@ -155,8 +159,9 @@ void show_header( const File_header header )
for( i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i )
{ num /= factor; if( num % factor != 0 ) exact = false;
p = prefix[i]; np = ""; }
- fprintf( stderr, "version %d, dictionary size %s%4u %sB. ",
- Fh_version( header ), np, num, p );
+ if( verbosity >= 4 )
+ fprintf( stderr, "version %d, ", Fh_version( header ) );
+ fprintf( stderr, "dictionary size %s%4u %sB. ", np, num, p );
}
@@ -549,7 +554,7 @@ static int decompress( const int infd, struct Pretty_print * const pp,
retval = 2; break; }
if( verbosity >= 2 || ( verbosity == 1 && first_member ) )
- { Pp_show_msg( pp, 0 ); if( verbosity >= 2 ) show_header( header ); }
+ { Pp_show_msg( pp, 0 ); if( verbosity >= 3 ) show_header( header ); }
if( !LZd_init( &decoder, header, &rdec, outfd ) )
{
@@ -573,13 +578,11 @@ static int decompress( const int infd, struct Pretty_print * const pp,
retval = 2; break;
}
if( verbosity >= 2 )
- { if( testing ) fprintf( stderr, "ok\n" );
- else fprintf( stderr, "done\n" ); Pp_reset( pp ); }
+ { fprintf( stderr, testing ? "ok\n" : "done\n" ); Pp_reset( pp ); }
}
Rd_free( &rdec );
if( verbosity == 1 && retval == 0 )
- { if( testing ) fprintf( stderr, "ok\n" );
- else fprintf( stderr, "done\n" ); }
+ fprintf( stderr, testing ? "ok\n" : "done\n" );
return retval;
}
@@ -702,6 +705,7 @@ int main( const int argc, const char * const argv[] )
{ 'h', "help", ap_no },
{ 'k', "keep", ap_no },
{ 'm', "match-length", ap_yes },
+ { 'n', "threads", ap_yes },
{ 'o', "output", ap_yes },
{ 'q', "quiet", ap_no },
{ 's', "dictionary-size", ap_yes },
@@ -741,6 +745,7 @@ int main( const int argc, const char * const argv[] )
case 'k': keep_input_files = true; break;
case 'm': encoder_options.match_len_limit =
getnum( arg, min_match_len_limit, max_match_len ); break;
+ case 'n': break;
case 'o': default_output_filename = arg; break;
case 'q': verbosity = -1; break;
case 's': encoder_options.dictionary_size = get_dict_size( arg );
diff --git a/testsuite/check.sh b/testsuite/check.sh
index ed0ca50..d38ebb0 100755
--- a/testsuite/check.sh
+++ b/testsuite/check.sh
@@ -26,6 +26,15 @@ fail=0
printf "testing clzip-%s..." "$2"
+"${LZIP}" -cqs-1 in > /dev/null
+if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi
+"${LZIP}" -cqs0 in > /dev/null
+if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi
+"${LZIP}" -cqs4095 in > /dev/null
+if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi
+"${LZIP}" -cqm274 in > /dev/null
+if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi
+
"${LZIP}" -t "${testdir}"/test.txt.lz || fail=1
"${LZIP}" -cd "${testdir}"/test.txt.lz > copy || fail=1
cmp in copy || fail=1
@@ -38,15 +47,6 @@ if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi
cmp in copy || fail=1
printf .
-"${LZIP}" -cqs-1 in > out
-if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi
-"${LZIP}" -cqs0 in > out
-if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi
-"${LZIP}" -cqs4095 in > out
-if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi
-"${LZIP}" -cqm274 in > out
-if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi
-
for i in s4Ki 0 1 2 3 4 5 6 7 8 9 ; do
"${LZIP}" -k -$i in || fail=1
mv -f in.lz copy.lz || fail=1