From ca285b91ed1b2f1cc91533f7e0b2cfab25c1712e Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 6 Nov 2015 12:37:46 +0100 Subject: Merging upstream version 1.5~pre1. Signed-off-by: Daniel Baumann --- ChangeLog | 8 ++ INSTALL | 7 +- Makefile.in | 6 +- NEWS | 15 ++- README | 23 +++-- carg_parser.c | 10 +- carg_parser.h | 2 +- clzip.h | 268 ------------------------------------------------- configure | 28 ++++-- decoder.c | 68 +++++++------ decoder.h | 71 +++++-------- doc/clzip.1 | 7 +- doc/clzip.info | 54 +++++----- doc/clzip.texinfo | 43 ++++---- encoder.c | 28 +++--- encoder.h | 38 +++---- lzip.h | 286 +++++++++++++++++++++++++++++++++++++++++++++++++++++ main.c | 23 +++-- testsuite/check.sh | 18 ++-- 19 files changed, 521 insertions(+), 482 deletions(-) delete mode 100644 clzip.h create mode 100644 lzip.h diff --git a/ChangeLog b/ChangeLog index 0f365bb..f1ce217 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,11 @@ +2013-05-13 Antonio Diaz Diaz + + * Version 1.5-pre1 released. + * Decompression time has been reduced by 1%. + * main.c (show_header): Show header version if verbosity >= 4. + * Ignore option '-n, --threads' for compatibility with plzip. + * configure: Options now accept a separate argument. + 2013-02-18 Antonio Diaz Diaz * Version 1.4 released. diff --git a/INSTALL b/INSTALL index 4466443..7670406 100644 --- a/INSTALL +++ b/INSTALL @@ -1,7 +1,7 @@ Requirements ------------ You will need a C compiler. -I use gcc 4.7.2 and 3.3.6, but the code should compile with any +I use gcc 4.8.0 and 3.3.6, but the code should compile with any standards compliant compiler. Gcc is available at http://gcc.gnu.org. @@ -36,8 +36,9 @@ the main archive. typing 'make install-bin', 'make install-info' or 'make install-man' respectively. -5a. Type 'make install-as-lzip' to install the program and any data - files and documentation, and link the program to the name 'lzip'. + Instead of 'make install', you can type 'make install-as-lzip' to + install the program and any data files and documentation, and link + the program to the name 'lzip'. Another way diff --git a/Makefile.in b/Makefile.in index a27a481..bc932b7 100644 --- a/Makefile.in +++ b/Makefile.in @@ -29,9 +29,9 @@ main.o : main.c $(objs) : Makefile carg_parser.o : carg_parser.h -decoder.o : clzip.h decoder.h -encoder.o : clzip.h encoder.h -main.o : carg_parser.h clzip.h decoder.h encoder.h +decoder.o : lzip.h decoder.h +encoder.o : lzip.h encoder.h +main.o : carg_parser.h lzip.h decoder.h encoder.h doc : info man diff --git a/NEWS b/NEWS index e854c8a..ec9961a 100644 --- a/NEWS +++ b/NEWS @@ -1,13 +1,10 @@ -Changes in version 1.4: +Changes in version 1.5: -Multi-step trials have been implemented. +Decompression time has been reduced by 1%. -Compression ratio has been slightly increased. +File version is now shown only if verbosity >= 4. -Compression time has been reduced by 10%. +Option "-n, --threads" is now accepted and ignored for compatibility +with plzip. -Decompression time has been reduced by 8%. - -The target "install-as-lzip" has been added to the Makefile. - -The target "install-bin" has been added to the Makefile. +"configure" now accepts options with a separate argument. diff --git a/README b/README index 72d434b..26d527d 100644 --- a/README +++ b/README @@ -6,6 +6,10 @@ gzip or bzip2. Clzip decompresses almost as fast as gzip and compresses better than bzip2, which makes it well suited for software distribution and data archiving. +Clzip uses the same well-defined exit status values used by bzip2, which +makes it safer when used in pipes or scripts than compressors returning +ambiguous warning values, like gzip. + Clzip uses the lzip file format; the files produced by clzip are fully compatible with lzip-1.4 or newer. Clzip is in fact a C language version of lzip, intended for embedded devices or systems lacking a C++ @@ -47,15 +51,16 @@ memory requirement is affected at compression time by the choice of dictionary size limit. As a self-check for your protection, clzip stores in the member trailer -the 32-bit CRC of the original data and the size of the original data, -to make sure that the decompressed version of the data is identical to -the original. This guards against corruption of the compressed data, and -against undetected bugs in clzip (hopefully very unlikely). The chances -of data corruption going undetected are microscopic, less than one -chance in 4000 million for each member processed. Be aware, though, that -the check occurs upon decompression, so it can only tell you that -something is wrong. It can't help you recover the original uncompressed -data. +the 32-bit CRC of the original data, the size of the original data and +the size of the member. These values, together with the value remaining +in the range decoder and the end-of-stream marker, provide a very safe 4 +factor integrity checking which guarantees that the decompressed version +of the data is identical to the original. This guards against corruption +of the compressed data, and against undetected bugs in clzip (hopefully +very unlikely). The chances of data corruption going undetected are +microscopic. Be aware, though, that the check occurs upon decompression, +so it can only tell you that something is wrong. It can't help you +recover the original uncompressed data. Clzip implements a simplified version of the LZMA (Lempel-Ziv-Markov chain-Algorithm) algorithm. The high compression of LZMA comes from diff --git a/carg_parser.c b/carg_parser.c index 973bb7e..a86f76f 100644 --- a/carg_parser.c +++ b/carg_parser.c @@ -1,5 +1,5 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C version) - Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 + Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This library is free software: you can redistribute it and/or modify @@ -89,15 +89,14 @@ static char parse_long_option( struct Arg_parser * const ap, int * const argindp ) { unsigned len; - int index = -1; - int i; + int index = -1, i; char exact = 0, ambig = 0; for( len = 0; opt[len+2] && opt[len+2] != '='; ++len ) ; /* Test all long options for either exact match or abbreviated matches. */ for( i = 0; options[i].code != 0; ++i ) - if( options[i].name && !strncmp( options[i].name, &opt[2], len ) ) + if( options[i].name && strncmp( options[i].name, &opt[2], len ) == 0 ) { if( strlen( options[i].name ) == len ) /* Exact match found */ { index = i; exact = 1; break; } @@ -165,8 +164,7 @@ static char parse_short_option( struct Arg_parser * const ap, while( cind > 0 ) { - int index = -1; - int i; + int index = -1, i; const unsigned char code = opt[cind]; char code_str[2]; code_str[0] = code; code_str[1] = 0; diff --git a/carg_parser.h b/carg_parser.h index 3575dd7..41aa7b3 100644 --- a/carg_parser.h +++ b/carg_parser.h @@ -1,5 +1,5 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C version) - Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 + Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This library is free software: you can redistribute it and/or modify diff --git a/clzip.h b/clzip.h deleted file mode 100644 index dd63438..0000000 --- a/clzip.h +++ /dev/null @@ -1,268 +0,0 @@ -/* Clzip - Data compressor based on the LZMA algorithm - Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#ifndef max - #define max(x,y) ((x) >= (y) ? (x) : (y)) -#endif -#ifndef min - #define min(x,y) ((x) <= (y) ? (x) : (y)) -#endif - -typedef int State; - -enum { states = 12 }; - -static inline bool St_is_char( const State st ) { return st < 7; } - -static inline State St_set_char( const State st ) - { - static const State next[states] = { 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5 }; - return next[st]; - } - -static inline State St_set_match( const State st ) - { return ( ( st < 7 ) ? 7 : 10 ); } - -static inline State St_set_rep( const State st ) - { return ( ( st < 7 ) ? 8 : 11 ); } - -static inline State St_set_short_rep( const State st ) - { return ( ( st < 7 ) ? 9 : 11 ); } - - -enum { - min_dictionary_bits = 12, - min_dictionary_size = 1 << min_dictionary_bits, - max_dictionary_bits = 29, - max_dictionary_size = 1 << max_dictionary_bits, - literal_context_bits = 3, - pos_state_bits = 2, - pos_states = 1 << pos_state_bits, - pos_state_mask = pos_states - 1, - - dis_slot_bits = 6, - start_dis_model = 4, - end_dis_model = 14, - modeled_distances = 1 << (end_dis_model / 2), /* 128 */ - dis_align_bits = 4, - dis_align_size = 1 << dis_align_bits, - - len_low_bits = 3, - len_mid_bits = 3, - len_high_bits = 8, - len_low_symbols = 1 << len_low_bits, - len_mid_symbols = 1 << len_mid_bits, - len_high_symbols = 1 << len_high_bits, - max_len_symbols = len_low_symbols + len_mid_symbols + len_high_symbols, - - min_match_len = 2, /* must be 2 */ - max_match_len = min_match_len + max_len_symbols - 1, /* 273 */ - min_match_len_limit = 5, - - max_dis_states = 4 }; - -static inline int get_dis_state( const int len ) - { return min( len - min_match_len, max_dis_states - 1 ); } - -static inline int get_lit_state( const uint8_t prev_byte ) - { return ( prev_byte >> ( 8 - literal_context_bits ) ); } - - -enum { bit_model_move_bits = 5, - bit_model_total_bits = 11, - bit_model_total = 1 << bit_model_total_bits }; - -typedef int Bit_model; - -static inline void Bm_init( Bit_model * const probability ) - { *probability = bit_model_total / 2; } - -static inline void Bm_array_init( Bit_model * const p, const int size ) - { int i = 0; while( i < size ) p[i++] = bit_model_total / 2; } - - -struct Pretty_print - { - const char * name; - const char * stdin_name; - int longest_name; - int verbosity; - bool first_post; - }; - -void Pp_init( struct Pretty_print * const pp, const char * const filenames[], - const int num_filenames, const int v ); - -static inline void Pp_set_name( struct Pretty_print * const pp, - const char * const filename ) - { - if( filename && filename[0] && strcmp( filename, "-" ) != 0 ) - pp->name = filename; - else pp->name = pp->stdin_name; - pp->first_post = true; - } - -static inline void Pp_reset( struct Pretty_print * const pp ) - { if( pp->name && pp->name[0] ) pp->first_post = true; } -void Pp_show_msg( struct Pretty_print * const pp, const char * const msg ); - - -typedef uint32_t CRC32[256]; /* Table of CRCs of all 8-bit messages. */ - -extern CRC32 crc32; - -static inline void CRC32_init( void ) - { - unsigned n; - for( n = 0; n < 256; ++n ) - { - unsigned c = n; - int k; - for( k = 0; k < 8; ++k ) - { if( c & 1 ) c = 0xEDB88320U ^ ( c >> 1 ); else c >>= 1; } - crc32[n] = c; - } - } - -static inline void CRC32_update_byte( uint32_t * const crc, const uint8_t byte ) - { *crc = crc32[(*crc^byte)&0xFF] ^ ( *crc >> 8 ); } - -static inline void CRC32_update_buf( uint32_t * const crc, - const uint8_t * const buffer, const int size ) - { - int i; - for( i = 0; i < size; ++i ) - *crc = crc32[(*crc^buffer[i])&0xFF] ^ ( *crc >> 8 ); - } - - -static inline int real_bits( unsigned value ) - { - int bits = 0; - while( value > 0 ) { value >>= 1; ++bits; } - return bits; - } - - -static const uint8_t magic_string[4] = { 0x4C, 0x5A, 0x49, 0x50 }; /* "LZIP" */ - -typedef uint8_t File_header[6]; /* 0-3 magic bytes */ - /* 4 version */ - /* 5 coded_dict_size */ -enum { Fh_size = 6 }; - -static inline void Fh_set_magic( File_header data ) - { memcpy( data, magic_string, 4 ); data[4] = 1; } - -static inline bool Fh_verify_magic( const File_header data ) - { return ( memcmp( data, magic_string, 4 ) == 0 ); } - -static inline uint8_t Fh_version( const File_header data ) - { return data[4]; } - -static inline bool Fh_verify_version( const File_header data ) - { return ( data[4] <= 1 ); } - -static inline unsigned Fh_get_dictionary_size( const File_header data ) - { - unsigned sz = ( 1 << ( data[5] & 0x1F ) ); - if( sz > min_dictionary_size ) - sz -= ( sz / 16 ) * ( ( data[5] >> 5 ) & 7 ); - return sz; - } - -static inline bool Fh_set_dictionary_size( File_header data, const int sz ) - { - if( sz >= min_dictionary_size && sz <= max_dictionary_size ) - { - data[5] = real_bits( sz - 1 ); - if( sz > min_dictionary_size ) - { - const int base_size = 1 << data[5]; - const int wedge = base_size / 16; - int i; - for( i = 7; i >= 1; --i ) - if( base_size - ( i * wedge ) >= sz ) - { data[5] |= ( i << 5 ); break; } - } - return true; - } - return false; - } - - -typedef uint8_t File_trailer[20]; - /* 0-3 CRC32 of the uncompressed data */ - /* 4-11 size of the uncompressed data */ - /* 12-19 member size including header and trailer */ - -enum { Ft_size = 20 }; - -static inline int Ft_versioned_size( const int version ) - { return ( ( version >= 1 ) ? 20 : 12 ); } - -static inline unsigned Ft_get_data_crc( const File_trailer data ) - { - unsigned tmp = 0; - int i; - for( i = 3; i >= 0; --i ) { tmp <<= 8; tmp += data[i]; } - return tmp; - } - -static inline void Ft_set_data_crc( File_trailer data, unsigned crc ) - { - int i; - for( i = 0; i <= 3; ++i ) { data[i] = (uint8_t)crc; crc >>= 8; } - } - -static inline unsigned long long Ft_get_data_size( const File_trailer data ) - { - unsigned long long tmp = 0; - int i; - for( i = 11; i >= 4; --i ) { tmp <<= 8; tmp += data[i]; } - return tmp; - } - -static inline void Ft_set_data_size( File_trailer data, unsigned long long sz ) - { - int i; - for( i = 4; i <= 11; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; } - } - -static inline unsigned long long Ft_get_member_size( const File_trailer data ) - { - unsigned long long tmp = 0; - int i; - for( i = 19; i >= 12; --i ) { tmp <<= 8; tmp += data[i]; } - return tmp; - } - -static inline void Ft_set_member_size( File_trailer data, unsigned long long sz ) - { - int i; - for( i = 12; i <= 19; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; } - } - - -/* defined in decoder.c */ -int readblock( const int fd, uint8_t * const buf, const int size ); -int writeblock( const int fd, const uint8_t * const buf, const int size ); - -/* defined in main.c */ -void cleanup_and_fail( const int retval ); -void show_error( const char * const msg, const int errcode, const bool help ); -void internal_error( const char * const msg ); diff --git a/configure b/configure index a234bb3..81068f8 100755 --- a/configure +++ b/configure @@ -5,12 +5,10 @@ # This configure script is free software: you have unlimited permission # to copy, distribute and modify it. -args= -no_create= pkgname=clzip -pkgversion=1.4 +pkgversion=1.5-pre1 progname=clzip -srctrigger=clzip.h +srctrigger=doc/clzip.texinfo # clear some things potentially inherited from environment. LC_ALL=C @@ -36,10 +34,12 @@ if [ ! -x /bin/gcc ] && fi # Loop over all args -while [ -n "$1" ] ; do +args= +no_create= +while [ $# != 0 ] ; do # Get the first arg, and shuffle - option=$1 + option=$1 ; arg2=no shift # Add the argument quoted to args @@ -74,6 +74,14 @@ while [ -n "$1" ] ; do --version | -V) echo "Configure script for ${pkgname} version ${pkgversion}" exit 0 ;; + --srcdir) srcdir=$1 ; arg2=yes ;; + --prefix) prefix=$1 ; arg2=yes ;; + --exec-prefix) exec_prefix=$1 ; arg2=yes ;; + --bindir) bindir=$1 ; arg2=yes ;; + --datarootdir) datarootdir=$1 ; arg2=yes ;; + --infodir) infodir=$1 ; arg2=yes ;; + --mandir) mandir=$1 ; arg2=yes ;; + --srcdir=*) srcdir=${optarg} ;; --prefix=*) prefix=${optarg} ;; --exec-prefix=*) exec_prefix=${optarg} ;; @@ -93,6 +101,14 @@ while [ -n "$1" ] ; do echo "configure: Unrecognized option: \"${option}\"; use --help for usage." 1>&2 exit 1 ;; esac + + # Check if the option took a separate argument + if [ "${arg2}" = yes ] ; then + if [ $# != 0 ] ; then args="${args} \"$1\"" ; shift + else echo "configure: Missing argument to \"${option}\"" 1>&2 + exit 1 + fi + fi done # Find the source files, if location was not specified. diff --git a/decoder.c b/decoder.c index b40dafd..d3f2bf0 100644 --- a/decoder.c +++ b/decoder.c @@ -25,7 +25,7 @@ #include #include -#include "clzip.h" +#include "lzip.h" #include "decoder.h" @@ -124,10 +124,10 @@ bool LZd_verify_trailer( struct LZ_decoder * const decoder, File_trailer trailer; const int trailer_size = Ft_versioned_size( decoder->member_version ); const unsigned long long member_size = - Rd_member_position( decoder->range_decoder ) + trailer_size; + Rd_member_position( decoder->rdec ) + trailer_size; bool error = false; - int size = Rd_read_data( decoder->range_decoder, trailer, trailer_size ); + int size = Rd_read_data( decoder->rdec, trailer, trailer_size ); if( size < trailer_size ) { error = true; @@ -142,7 +142,7 @@ bool LZd_verify_trailer( struct LZ_decoder * const decoder, if( decoder->member_version == 0 ) Ft_set_member_size( trailer, member_size ); - if( decoder->range_decoder->code != 0 ) + if( decoder->rdec->code != 0 ) { error = true; Pp_show_msg( pp, "Range decoder final code is not zero" ); @@ -177,7 +177,7 @@ bool LZd_verify_trailer( struct LZ_decoder * const decoder, Ft_get_member_size( trailer ), member_size, member_size ); } } - if( !error && pp->verbosity >= 3 && LZd_data_position( decoder ) > 0 && member_size > 0 ) + if( !error && pp->verbosity >= 2 && LZd_data_position( decoder ) > 0 && member_size > 0 ) fprintf( stderr, "%6.3f:1, %6.3f bits/byte, %5.2f%% saved. ", (double)LZd_data_position( decoder ) / member_size, ( 8.0 * member_size ) / LZd_data_position( decoder ), @@ -199,84 +199,82 @@ int LZd_decode_member( struct LZ_decoder * const decoder, unsigned rep1 = 0; /* used for efficient coding of */ unsigned rep2 = 0; /* repeated distances */ unsigned rep3 = 0; - State state = 0; - Rd_load( decoder->range_decoder ); - while( !Rd_finished( decoder->range_decoder ) ) + Rd_load( decoder->rdec ); + while( !Rd_finished( decoder->rdec ) ) { const int pos_state = LZd_data_position( decoder ) & pos_state_mask; - if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_match[state][pos_state] ) == 0 ) + if( Rd_decode_bit( decoder->rdec, &decoder->bm_match[state][pos_state] ) == 0 ) /* 1st bit */ { const uint8_t prev_byte = LZd_get_prev_byte( decoder ); if( St_is_char( state ) ) { state -= ( state < 4 ) ? state : 3; - LZd_put_byte( decoder, Rd_decode_tree( decoder->range_decoder, + LZd_put_byte( decoder, Rd_decode_tree( decoder->rdec, decoder->bm_literal[get_lit_state(prev_byte)], 8 ) ); } else { state -= ( state < 10 ) ? 3 : 6; - LZd_put_byte( decoder, Rd_decode_matched( decoder->range_decoder, - decoder->bm_literal[get_lit_state(prev_byte)], LZd_get_byte( decoder, rep0 ) ) ); + LZd_put_byte( decoder, Rd_decode_matched( decoder->rdec, + decoder->bm_literal[get_lit_state(prev_byte)], + LZd_get_byte( decoder, rep0 ) ) ); } } else { int len; - if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_rep[state] ) == 1 ) + if( Rd_decode_bit( decoder->rdec, &decoder->bm_rep[state] ) == 1 ) /* 2nd bit */ { - len = 0; - if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_rep0[state] ) == 1 ) + if( Rd_decode_bit( decoder->rdec, &decoder->bm_rep0[state] ) == 0 ) /* 3rd bit */ + { + if( Rd_decode_bit( decoder->rdec, &decoder->bm_len[state][pos_state] ) == 0 ) /* 4th bit */ + { state = St_set_short_rep( state ); + LZd_put_byte( decoder, LZd_get_byte( decoder, rep0 ) ); continue; } + } + else { unsigned distance; - if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_rep1[state] ) == 0 ) + if( Rd_decode_bit( decoder->rdec, &decoder->bm_rep1[state] ) == 0 ) /* 4th bit */ distance = rep1; else { - if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_rep2[state] ) == 0 ) + if( Rd_decode_bit( decoder->rdec, &decoder->bm_rep2[state] ) == 0 ) /* 5th bit */ distance = rep2; - else { distance = rep3; rep3 = rep2; } + else + { distance = rep3; rep3 = rep2; } rep2 = rep1; } rep1 = rep0; rep0 = distance; } - else - { - if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_len[state][pos_state] ) == 0 ) - { state = St_set_short_rep( state ); len = 1; } - } - if( len == 0 ) - { - state = St_set_rep( state ); - len = min_match_len + Led_decode( &decoder->rep_match_len_decoder, decoder->range_decoder, pos_state ); - } + state = St_set_rep( state ); + len = min_match_len + Rd_decode_len( decoder->rdec, &decoder->rep_len_model, pos_state ); } else { int dis_slot; const unsigned rep0_saved = rep0; - len = min_match_len + Led_decode( &decoder->len_decoder, decoder->range_decoder, pos_state ); - dis_slot = Rd_decode_tree6( decoder->range_decoder, decoder->bm_dis_slot[get_dis_state(len)] ); + len = min_match_len + Rd_decode_len( decoder->rdec, &decoder->match_len_model, pos_state ); + dis_slot = Rd_decode_tree6( decoder->rdec, decoder->bm_dis_slot[get_dis_state(len)] ); if( dis_slot < start_dis_model ) rep0 = dis_slot; else { const int direct_bits = ( dis_slot >> 1 ) - 1; rep0 = ( 2 | ( dis_slot & 1 ) ) << direct_bits; if( dis_slot < end_dis_model ) - rep0 += Rd_decode_tree_reversed( decoder->range_decoder, + rep0 += Rd_decode_tree_reversed( decoder->rdec, decoder->bm_dis + rep0 - dis_slot - 1, direct_bits ); else { - rep0 += Rd_decode( decoder->range_decoder, direct_bits - dis_align_bits ) << dis_align_bits; - rep0 += Rd_decode_tree_reversed4( decoder->range_decoder, decoder->bm_align ); + rep0 += Rd_decode( decoder->rdec, direct_bits - dis_align_bits ) << dis_align_bits; + rep0 += Rd_decode_tree_reversed4( decoder->rdec, decoder->bm_align ); if( rep0 == 0xFFFFFFFFU ) /* Marker found */ { rep0 = rep0_saved; - Rd_normalize( decoder->range_decoder ); + Rd_normalize( decoder->rdec ); LZd_flush_data( decoder ); if( len == min_match_len ) /* End Of Stream marker */ { @@ -284,7 +282,7 @@ int LZd_decode_member( struct LZ_decoder * const decoder, } if( len == min_match_len + 1 ) /* Sync Flush marker */ { - Rd_load( decoder->range_decoder ); continue; + Rd_load( decoder->rdec ); continue; } if( pp->verbosity >= 0 ) { diff --git a/decoder.h b/decoder.h index c18ccbe..1c6ed3d 100644 --- a/decoder.h +++ b/decoder.h @@ -140,24 +140,24 @@ static inline int Rd_decode_bit( struct Range_decoder * const rdec, static inline int Rd_decode_tree( struct Range_decoder * const rdec, Bit_model bm[], const int num_bits ) { - int model = 1; + int symbol = 1; int i; for( i = num_bits; i > 0; --i ) - model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] ); - return model - (1 << num_bits); + symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); + return symbol - (1 << num_bits); } static inline int Rd_decode_tree6( struct Range_decoder * const rdec, Bit_model bm[] ) { - int model = 1; - model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] ); - model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] ); - model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] ); - model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] ); - model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] ); - model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] ); - return model - (1 << 6); + int symbol = 1; + symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); + symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); + symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); + symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); + symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); + symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); + return symbol - (1 << 6); } static inline int Rd_decode_tree_reversed( struct Range_decoder * const rdec, @@ -213,36 +213,17 @@ static inline int Rd_decode_matched( struct Range_decoder * const rdec, return symbol - 0x100; } - -struct Len_decoder - { - Bit_model choice1; - Bit_model choice2; - Bit_model bm_low[pos_states][len_low_symbols]; - Bit_model bm_mid[pos_states][len_mid_symbols]; - Bit_model bm_high[len_high_symbols]; - }; - -static inline void Led_init( struct Len_decoder * const len_decoder ) - { - Bm_init( &len_decoder->choice1 ); - Bm_init( &len_decoder->choice2 ); - Bm_array_init( len_decoder->bm_low[0], pos_states * len_low_symbols ); - Bm_array_init( len_decoder->bm_mid[0], pos_states * len_mid_symbols ); - Bm_array_init( len_decoder->bm_high, len_high_symbols ); - } - -static inline int Led_decode( struct Len_decoder * const len_decoder, - struct Range_decoder * const rdec, - const int pos_state ) +static inline int Rd_decode_len( struct Range_decoder * const rdec, + struct Len_model * const lm, + const int pos_state ) { - if( Rd_decode_bit( rdec, &len_decoder->choice1 ) == 0 ) - return Rd_decode_tree( rdec, len_decoder->bm_low[pos_state], len_low_bits ); - if( Rd_decode_bit( rdec, &len_decoder->choice2 ) == 0 ) + if( Rd_decode_bit( rdec, &lm->choice1 ) == 0 ) + return Rd_decode_tree( rdec, lm->bm_low[pos_state], len_low_bits ); + if( Rd_decode_bit( rdec, &lm->choice2 ) == 0 ) return len_low_symbols + - Rd_decode_tree( rdec, len_decoder->bm_mid[pos_state], len_mid_bits ); + Rd_decode_tree( rdec, lm->bm_mid[pos_state], len_mid_bits ); return len_low_symbols + len_mid_symbols + - Rd_decode_tree( rdec, len_decoder->bm_high, len_high_bits ); + Rd_decode_tree( rdec, lm->bm_high, len_high_bits ); } @@ -269,9 +250,9 @@ struct LZ_decoder Bit_model bm_dis[modeled_distances-end_dis_model]; Bit_model bm_align[dis_align_size]; - struct Range_decoder * range_decoder; - struct Len_decoder len_decoder; - struct Len_decoder rep_match_len_decoder; + struct Range_decoder * rdec; + struct Len_model match_len_model; + struct Len_model rep_len_model; }; void LZd_flush_data( struct LZ_decoder * const decoder ); @@ -322,7 +303,7 @@ static inline void LZd_copy_block( struct LZ_decoder * const decoder, static inline bool LZd_init( struct LZ_decoder * const decoder, const File_header header, - struct Range_decoder * const rdec, const int ofd ) + struct Range_decoder * const rde, const int ofd ) { decoder->partial_data_pos = 0; decoder->dictionary_size = Fh_get_dictionary_size( header ); @@ -346,9 +327,9 @@ static inline bool LZd_init( struct LZ_decoder * const decoder, Bm_array_init( decoder->bm_dis, modeled_distances - end_dis_model ); Bm_array_init( decoder->bm_align, dis_align_size ); - decoder->range_decoder = rdec; - Led_init( &decoder->len_decoder ); - Led_init( &decoder->rep_match_len_decoder ); + decoder->rdec = rde; + Lm_init( &decoder->match_len_model ); + Lm_init( &decoder->rep_len_model ); decoder->buffer[decoder->buffer_size-1] = 0; /* prev_byte of first_byte */ return true; } diff --git a/doc/clzip.1 b/doc/clzip.1 index 02181a7..4fc2a26 100644 --- a/doc/clzip.1 +++ b/doc/clzip.1 @@ -1,5 +1,5 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.37.1. -.TH CLZIP "1" "February 2013" "Clzip 1.4" "User Commands" +.TH CLZIP "1" "May 2013" "Clzip 1.5-pre1" "User Commands" .SH NAME Clzip \- reduces the size of files .SH SYNOPSIS @@ -71,6 +71,11 @@ The bidimensional parameter space of LZMA can't be mapped to a linear scale optimal for all files. If your files are large, very repetitive, etc, you may need to use the \fB\-\-match\-length\fR and \fB\-\-dictionary\-size\fR options directly to achieve optimal performance. +.PP +Exit status: 0 for a normal exit, 1 for environmental problems (file +not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or +invalid input file, 3 for an internal consistency error (eg, bug) which +caused clzip to panic. .SH "REPORTING BUGS" Report bugs to lzip\-bug@nongnu.org .br diff --git a/doc/clzip.info b/doc/clzip.info index ccec058..41723f3 100644 --- a/doc/clzip.info +++ b/doc/clzip.info @@ -12,7 +12,7 @@ File: clzip.info, Node: Top, Next: Introduction, Up: (dir) Clzip Manual ************ -This manual is for Clzip (version 1.4, 18 February 2013). +This manual is for Clzip (version 1.5-pre1, 13 May 2013). * Menu: @@ -42,6 +42,10 @@ gzip or bzip2. Clzip decompresses almost as fast as gzip and compresses better than bzip2, which makes it well suited for software distribution and data archiving. + Clzip uses the same well-defined exit status values used by bzip2, +which makes it safer when used in pipes or scripts than compressors +returning ambiguous warning values, like gzip. + Clzip uses the lzip file format; the files produced by clzip are fully compatible with lzip-1.4 or newer. Clzip is in fact a C language version of lzip, intended for embedded devices or systems lacking a C++ @@ -96,20 +100,16 @@ filename.tlz becomes filename.tar anyothername becomes anyothername.out As a self-check for your protection, clzip stores in the member -trailer the 32-bit CRC of the original data and the size of the -original data, to make sure that the decompressed version of the data -is identical to the original. This guards against corruption of the -compressed data, and against undetected bugs in clzip (hopefully very -unlikely). The chances of data corruption going undetected are -microscopic, less than one chance in 4000 million for each member -processed. Be aware, though, that the check occurs upon decompression, -so it can only tell you that something is wrong. It can't help you -recover the original uncompressed data. - - Return values: 0 for a normal exit, 1 for environmental problems -(file not found, invalid flags, I/O errors, etc), 2 to indicate a -corrupt or invalid input file, 3 for an internal consistency error (eg, -bug) which caused clzip to panic. +trailer the 32-bit CRC of the original data, the size of the original +data and the size of the member. These values, together with the value +remaining in the range decoder and the end-of-stream marker, provide a +very safe 4 factor integrity checking which guarantees that the +decompressed version of the data is identical to the original. This +guards against corruption of the compressed data, and against +undetected bugs in clzip (hopefully very unlikely). The chances of data +corruption going undetected are microscopic. Be aware, though, that the +check occurs upon decompression, so it can only tell you that something +is wrong. It can't help you recover the original uncompressed data.  File: clzip.info, Node: Algorithm, Next: Invoking Clzip, Prev: Introduction, Up: Top @@ -326,6 +326,12 @@ E exabyte (10^18) | Ei exbibyte (2^60) Z zettabyte (10^21) | Zi zebibyte (2^70) Y yottabyte (10^24) | Yi yobibyte (2^80) + + Exit status: 0 for a normal exit, 1 for environmental problems (file +not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or +invalid input file, 3 for an internal consistency error (eg, bug) which +caused clzip to panic. +  File: clzip.info, Node: File Format, Next: Examples, Prev: Invoking Clzip, Up: Top @@ -378,6 +384,7 @@ additional information before, between, or after them. Bits 4-0 contain the base 2 logarithm of the base size (12 to 29). Bits 7-5 contain the number of wedges (0 to 7) to substract from the base size to obtain the dictionary size. + Example: 0xD3 = (2^19 - 6 * 2^15) = (512KiB - 6 * 32KiB) = 320KiB Valid values for dictionary size range from 4KiB to 512MiB. `Lzma stream' @@ -392,8 +399,9 @@ additional information before, between, or after them. `Member size (8 bytes)' Total size of the member, including header and trailer. This field - acts as a distributed index, and facilitates safe recovery of - undamaged members from multi-member files. + acts as a distributed index, allows the verification of stream + integrity, and facilitates safe recovery of undamaged members from + multi-member files.  @@ -509,12 +517,12 @@ Concept Index Tag Table: Node: Top226 Node: Introduction920 -Node: Algorithm4755 -Node: Invoking Clzip7279 -Node: File Format12551 -Node: Examples14860 -Node: Problems16821 -Node: Concept Index17347 +Node: Algorithm4811 +Node: Invoking Clzip7335 +Node: File Format12847 +Node: Examples15277 +Node: Problems17238 +Node: Concept Index17764  End Tag Table diff --git a/doc/clzip.texinfo b/doc/clzip.texinfo index 1d0479f..e372d60 100644 --- a/doc/clzip.texinfo +++ b/doc/clzip.texinfo @@ -6,8 +6,8 @@ @finalout @c %**end of header -@set UPDATED 18 February 2013 -@set VERSION 1.4 +@set UPDATED 13 May 2013 +@set VERSION 1.5-pre1 @dircategory Data Compression @direntry @@ -61,6 +61,10 @@ gzip or bzip2. Clzip decompresses almost as fast as gzip and compresses better than bzip2, which makes it well suited for software distribution and data archiving. +Clzip uses the same well-defined exit status values used by bzip2, which +makes it safer when used in pipes or scripts than compressors returning +ambiguous warning values, like gzip. + Clzip uses the lzip file format; the files produced by clzip are fully compatible with lzip-1.4 or newer. Clzip is in fact a C language version of lzip, intended for embedded devices or systems lacking a C++ @@ -117,20 +121,16 @@ file from that of the compressed file as follows: @end multitable As a self-check for your protection, clzip stores in the member trailer -the 32-bit CRC of the original data and the size of the original data, -to make sure that the decompressed version of the data is identical to -the original. This guards against corruption of the compressed data, and -against undetected bugs in clzip (hopefully very unlikely). The chances -of data corruption going undetected are microscopic, less than one -chance in 4000 million for each member processed. Be aware, though, that -the check occurs upon decompression, so it can only tell you that -something is wrong. It can't help you recover the original uncompressed -data. - -Return values: 0 for a normal exit, 1 for environmental problems (file -not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or -invalid input file, 3 for an internal consistency error (eg, bug) which -caused clzip to panic. +the 32-bit CRC of the original data, the size of the original data and +the size of the member. These values, together with the value remaining +in the range decoder and the end-of-stream marker, provide a very safe 4 +factor integrity checking which guarantees that the decompressed version +of the data is identical to the original. This guards against corruption +of the compressed data, and against undetected bugs in clzip (hopefully +very unlikely). The chances of data corruption going undetected are +microscopic. Be aware, though, that the check occurs upon decompression, +so it can only tell you that something is wrong. It can't help you +recover the original uncompressed data. @node Algorithm @@ -349,6 +349,12 @@ Table of SI and binary prefixes (unit multipliers): @item Y @tab yottabyte (10^24) @tab | @tab Yi @tab yobibyte (2^80) @end multitable +@sp 1 +Exit status: 0 for a normal exit, 1 for environmental problems (file not +found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or +invalid input file, 3 for an internal consistency error (eg, bug) which +caused clzip to panic. + @node File Format @chapter File Format @@ -404,6 +410,7 @@ wedges between 0 and 7. The size of a wedge is (base_size / 16).@* Bits 4-0 contain the base 2 logarithm of the base size (12 to 29).@* Bits 7-5 contain the number of wedges (0 to 7) to substract from the base size to obtain the dictionary size.@* +Example: 0xD3 = (2^19 - 6 * 2^15) = (512KiB - 6 * 32KiB) = 320KiB@* Valid values for dictionary size range from 4KiB to 512MiB. @item Lzma stream @@ -418,8 +425,8 @@ Size of the uncompressed original data. @item Member size (8 bytes) Total size of the member, including header and trailer. This field acts -as a distributed index, and facilitates safe recovery of undamaged -members from multi-member files. +as a distributed index, allows the verification of stream integrity, and +facilitates safe recovery of undamaged members from multi-member files. @end table diff --git a/encoder.c b/encoder.c index cb6c8d6..5b005b0 100644 --- a/encoder.c +++ b/encoder.c @@ -23,7 +23,7 @@ #include #include -#include "clzip.h" +#include "lzip.h" #include "encoder.h" @@ -259,22 +259,22 @@ void Lee_encode( struct Len_encoder * const len_encoder, symbol -= min_match_len; if( symbol < len_low_symbols ) { - Re_encode_bit( renc, &len_encoder->choice1, 0 ); - Re_encode_tree( renc, len_encoder->bm_low[pos_state], symbol, len_low_bits ); + Re_encode_bit( renc, &len_encoder->lm.choice1, 0 ); + Re_encode_tree( renc, len_encoder->lm.bm_low[pos_state], symbol, len_low_bits ); } else { - Re_encode_bit( renc, &len_encoder->choice1, 1 ); + Re_encode_bit( renc, &len_encoder->lm.choice1, 1 ); if( symbol < len_low_symbols + len_mid_symbols ) { - Re_encode_bit( renc, &len_encoder->choice2, 0 ); - Re_encode_tree( renc, len_encoder->bm_mid[pos_state], + Re_encode_bit( renc, &len_encoder->lm.choice2, 0 ); + Re_encode_tree( renc, len_encoder->lm.bm_mid[pos_state], symbol - len_low_symbols, len_mid_bits ); } else { - Re_encode_bit( renc, &len_encoder->choice2, 1 ); - Re_encode_tree( renc, len_encoder->bm_high, + Re_encode_bit( renc, &len_encoder->lm.choice2, 1 ); + Re_encode_tree( renc, len_encoder->lm.bm_high, symbol - len_low_symbols - len_mid_symbols, len_high_bits ); } } @@ -369,8 +369,8 @@ bool LZe_init( struct LZ_encoder * const encoder, encoder->matchfinder = mf; if( !Re_init( &encoder->range_encoder, outfd ) ) return false; - Lee_init( &encoder->len_encoder, encoder->matchfinder->match_len_limit ); - Lee_init( &encoder->rep_match_len_encoder, encoder->matchfinder->match_len_limit ); + Lee_init( &encoder->match_len_encoder, encoder->matchfinder->match_len_limit ); + Lee_init( &encoder->rep_len_encoder, encoder->matchfinder->match_len_limit ); encoder->num_dis_slots = 2 * real_bits( encoder->matchfinder->dictionary_size - 1 ); @@ -473,7 +473,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const encoder, for( len = min_match_len; len <= replens[rep]; ++len ) Tr_update( &encoder->trials[len], price + - Lee_price( &encoder->rep_match_len_encoder, len, pos_state ), + Lee_price( &encoder->rep_len_encoder, len, pos_state ), rep, 0 ); } @@ -654,7 +654,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const encoder, LZe_price_rep( encoder, rep, cur_state, pos_state ); for( i = min_match_len; i <= len; ++i ) Tr_update( &encoder->trials[cur+i], price + - Lee_price( &encoder->rep_match_len_encoder, i, pos_state ), + Lee_price( &encoder->rep_len_encoder, i, pos_state ), rep, cur ); if( rep == 0 ) start_len = len + 1; /* discard shorter matches */ @@ -671,7 +671,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const encoder, pos_state2 = ( pos_state + len ) & pos_state_mask; state2 = St_set_rep( cur_state ); - price += Lee_price( &encoder->rep_match_len_encoder, len, pos_state ) + + price += Lee_price( &encoder->rep_len_encoder, len, pos_state ) + price0( encoder->bm_match[state2][pos_state2] ) + LZe_price_matched( encoder, data[len-1], data[len], data[len-dis] ); pos_state2 = ( pos_state2 + 1 ) & pos_state_mask; @@ -829,7 +829,7 @@ bool LZe_encode_member( struct LZ_encoder * const encoder, if( len == 1 ) state = St_set_short_rep( state ); else { - Lee_encode( &encoder->rep_match_len_encoder, &encoder->range_encoder, len, pos_state ); + Lee_encode( &encoder->rep_len_encoder, &encoder->range_encoder, len, pos_state ); state = St_set_rep( state ); } } diff --git a/encoder.h b/encoder.h index e39d7c4..a69f552 100644 --- a/encoder.h +++ b/encoder.h @@ -107,9 +107,9 @@ static inline int price_symbol_reversed( const Bit_model bm[], int symbol, for( i = num_bits; i > 0; --i ) { const int bit = symbol & 1; - symbol >>= 1; price += price_bit( bm[model], bit ); model = ( model << 1 ) | bit; + symbol >>= 1; } return price; } @@ -376,11 +376,7 @@ static inline void Re_encode_matched( struct Range_encoder * const renc, struct Len_encoder { - Bit_model choice1; - Bit_model choice2; - Bit_model bm_low[pos_states][len_low_symbols]; - Bit_model bm_mid[pos_states][len_mid_symbols]; - Bit_model bm_high[len_high_symbols]; + struct Len_model lm; int prices[pos_states][max_len_symbols]; int len_symbols; int counters[pos_states]; @@ -390,21 +386,21 @@ static inline void Lee_update_prices( struct Len_encoder * const len_encoder, const int pos_state ) { int * const pps = len_encoder->prices[pos_state]; - int tmp = price0( len_encoder->choice1 ); + int tmp = price0( len_encoder->lm.choice1 ); int len = 0; for( ; len < len_low_symbols && len < len_encoder->len_symbols; ++len ) pps[len] = tmp + - price_symbol( len_encoder->bm_low[pos_state], len, len_low_bits ); - tmp = price1( len_encoder->choice1 ); + price_symbol( len_encoder->lm.bm_low[pos_state], len, len_low_bits ); + tmp = price1( len_encoder->lm.choice1 ); for( ; len < len_low_symbols + len_mid_symbols && len < len_encoder->len_symbols; ++len ) - pps[len] = tmp + price0( len_encoder->choice2 ) + - price_symbol( len_encoder->bm_mid[pos_state], len - len_low_symbols, len_mid_bits ); + pps[len] = tmp + price0( len_encoder->lm.choice2 ) + + price_symbol( len_encoder->lm.bm_mid[pos_state], len - len_low_symbols, len_mid_bits ); for( ; len < len_encoder->len_symbols; ++len ) /* using 4 slots per value makes "Lee_price" faster */ len_encoder->prices[3][len] = len_encoder->prices[2][len] = len_encoder->prices[1][len] = len_encoder->prices[0][len] = - tmp + price1( len_encoder->choice2 ) + - price_symbol( len_encoder->bm_high, len - len_low_symbols - len_mid_symbols, len_high_bits ); + tmp + price1( len_encoder->lm.choice2 ) + + price_symbol( len_encoder->lm.bm_high, len - len_low_symbols - len_mid_symbols, len_high_bits ); len_encoder->counters[pos_state] = len_encoder->len_symbols; } @@ -412,11 +408,7 @@ static inline void Lee_init( struct Len_encoder * const len_encoder, const int match_len_limit ) { int i; - Bm_init( &len_encoder->choice1 ); - Bm_init( &len_encoder->choice2 ); - Bm_array_init( len_encoder->bm_low[0], pos_states * len_low_symbols ); - Bm_array_init( len_encoder->bm_mid[0], pos_states * len_mid_symbols ); - Bm_array_init( len_encoder->bm_high, len_high_symbols ); + Lm_init( &len_encoder->lm ); len_encoder->len_symbols = match_len_limit + 1 - min_match_len; for( i = 0; i < pos_states; ++i ) Lee_update_prices( len_encoder, i ); } @@ -502,8 +494,8 @@ struct LZ_encoder struct Matchfinder * matchfinder; struct Range_encoder range_encoder; - struct Len_encoder len_encoder; - struct Len_encoder rep_match_len_encoder; + struct Len_encoder match_len_encoder; + struct Len_encoder rep_len_encoder; int num_dis_slots; struct Pair pairs[max_match_len+1]; @@ -572,7 +564,7 @@ static inline int LZe_price_rep0_len( const struct LZ_encoder * const encoder, const State state, const int pos_state ) { return LZe_price_rep( encoder, 0, state, pos_state ) + - Lee_price( &encoder->rep_match_len_encoder, len, pos_state ); + Lee_price( &encoder->rep_len_encoder, len, pos_state ); } static inline int LZe_price_dis( const struct LZ_encoder * const encoder, @@ -589,7 +581,7 @@ static inline int LZe_price_pair( const struct LZ_encoder * const encoder, const int dis, const int len, const int pos_state ) { - return Lee_price( &encoder->len_encoder, len, pos_state ) + + return Lee_price( &encoder->match_len_encoder, len, pos_state ) + LZe_price_dis( encoder, dis, get_dis_state( len ) ); } @@ -620,7 +612,7 @@ static inline void LZe_encode_pair( struct LZ_encoder * const encoder, const int pos_state ) { const int dis_slot = get_slot( dis ); - Lee_encode( &encoder->len_encoder, &encoder->range_encoder, len, pos_state ); + Lee_encode( &encoder->match_len_encoder, &encoder->range_encoder, len, pos_state ); Re_encode_tree( &encoder->range_encoder, encoder->bm_dis_slot[get_dis_state(len)], dis_slot, dis_slot_bits ); diff --git a/lzip.h b/lzip.h new file mode 100644 index 0000000..1996e97 --- /dev/null +++ b/lzip.h @@ -0,0 +1,286 @@ +/* Clzip - Data compressor based on the LZMA algorithm + Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#ifndef max + #define max(x,y) ((x) >= (y) ? (x) : (y)) +#endif +#ifndef min + #define min(x,y) ((x) <= (y) ? (x) : (y)) +#endif + +typedef int State; + +enum { states = 12 }; + +static inline bool St_is_char( const State st ) { return st < 7; } + +static inline State St_set_char( const State st ) + { + static const State next[states] = { 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5 }; + return next[st]; + } + +static inline State St_set_match( const State st ) + { return ( ( st < 7 ) ? 7 : 10 ); } + +static inline State St_set_rep( const State st ) + { return ( ( st < 7 ) ? 8 : 11 ); } + +static inline State St_set_short_rep( const State st ) + { return ( ( st < 7 ) ? 9 : 11 ); } + + +enum { + min_dictionary_bits = 12, + min_dictionary_size = 1 << min_dictionary_bits, + max_dictionary_bits = 29, + max_dictionary_size = 1 << max_dictionary_bits, + literal_context_bits = 3, + pos_state_bits = 2, + pos_states = 1 << pos_state_bits, + pos_state_mask = pos_states - 1, + + dis_slot_bits = 6, + start_dis_model = 4, + end_dis_model = 14, + modeled_distances = 1 << (end_dis_model / 2), /* 128 */ + dis_align_bits = 4, + dis_align_size = 1 << dis_align_bits, + + len_low_bits = 3, + len_mid_bits = 3, + len_high_bits = 8, + len_low_symbols = 1 << len_low_bits, + len_mid_symbols = 1 << len_mid_bits, + len_high_symbols = 1 << len_high_bits, + max_len_symbols = len_low_symbols + len_mid_symbols + len_high_symbols, + + min_match_len = 2, /* must be 2 */ + max_match_len = min_match_len + max_len_symbols - 1, /* 273 */ + min_match_len_limit = 5, + + max_dis_states = 4 }; + +static inline int get_dis_state( const int len ) + { return min( len - min_match_len, max_dis_states - 1 ); } + +static inline int get_lit_state( const uint8_t prev_byte ) + { return ( prev_byte >> ( 8 - literal_context_bits ) ); } + + +enum { bit_model_move_bits = 5, + bit_model_total_bits = 11, + bit_model_total = 1 << bit_model_total_bits }; + +typedef int Bit_model; + +static inline void Bm_init( Bit_model * const probability ) + { *probability = bit_model_total / 2; } + +static inline void Bm_array_init( Bit_model * const p, const int size ) + { int i = 0; while( i < size ) p[i++] = bit_model_total / 2; } + +struct Len_model + { + Bit_model choice1; + Bit_model choice2; + Bit_model bm_low[pos_states][len_low_symbols]; + Bit_model bm_mid[pos_states][len_mid_symbols]; + Bit_model bm_high[len_high_symbols]; + }; + +static inline void Lm_init( struct Len_model * const lm ) + { + Bm_init( &lm->choice1 ); + Bm_init( &lm->choice2 ); + Bm_array_init( lm->bm_low[0], pos_states * len_low_symbols ); + Bm_array_init( lm->bm_mid[0], pos_states * len_mid_symbols ); + Bm_array_init( lm->bm_high, len_high_symbols ); + } + + +struct Pretty_print + { + const char * name; + const char * stdin_name; + int longest_name; + int verbosity; + bool first_post; + }; + +void Pp_init( struct Pretty_print * const pp, const char * const filenames[], + const int num_filenames, const int v ); + +static inline void Pp_set_name( struct Pretty_print * const pp, + const char * const filename ) + { + if( filename && filename[0] && strcmp( filename, "-" ) != 0 ) + pp->name = filename; + else pp->name = pp->stdin_name; + pp->first_post = true; + } + +static inline void Pp_reset( struct Pretty_print * const pp ) + { if( pp->name && pp->name[0] ) pp->first_post = true; } +void Pp_show_msg( struct Pretty_print * const pp, const char * const msg ); + + +typedef uint32_t CRC32[256]; /* Table of CRCs of all 8-bit messages. */ + +extern CRC32 crc32; + +static inline void CRC32_init( void ) + { + unsigned n; + for( n = 0; n < 256; ++n ) + { + unsigned c = n; + int k; + for( k = 0; k < 8; ++k ) + { if( c & 1 ) c = 0xEDB88320U ^ ( c >> 1 ); else c >>= 1; } + crc32[n] = c; + } + } + +static inline void CRC32_update_byte( uint32_t * const crc, const uint8_t byte ) + { *crc = crc32[(*crc^byte)&0xFF] ^ ( *crc >> 8 ); } + +static inline void CRC32_update_buf( uint32_t * const crc, + const uint8_t * const buffer, const int size ) + { + int i; + for( i = 0; i < size; ++i ) + *crc = crc32[(*crc^buffer[i])&0xFF] ^ ( *crc >> 8 ); + } + + +static inline int real_bits( unsigned value ) + { + int bits = 0; + while( value > 0 ) { value >>= 1; ++bits; } + return bits; + } + + +static const uint8_t magic_string[4] = { 0x4C, 0x5A, 0x49, 0x50 }; /* "LZIP" */ + +typedef uint8_t File_header[6]; /* 0-3 magic bytes */ + /* 4 version */ + /* 5 coded_dict_size */ +enum { Fh_size = 6 }; + +static inline void Fh_set_magic( File_header data ) + { memcpy( data, magic_string, 4 ); data[4] = 1; } + +static inline bool Fh_verify_magic( const File_header data ) + { return ( memcmp( data, magic_string, 4 ) == 0 ); } + +static inline uint8_t Fh_version( const File_header data ) + { return data[4]; } + +static inline bool Fh_verify_version( const File_header data ) + { return ( data[4] <= 1 ); } + +static inline unsigned Fh_get_dictionary_size( const File_header data ) + { + unsigned sz = ( 1 << ( data[5] & 0x1F ) ); + if( sz > min_dictionary_size ) + sz -= ( sz / 16 ) * ( ( data[5] >> 5 ) & 7 ); + return sz; + } + +static inline bool Fh_set_dictionary_size( File_header data, const int sz ) + { + if( sz >= min_dictionary_size && sz <= max_dictionary_size ) + { + data[5] = real_bits( sz - 1 ); + if( sz > min_dictionary_size ) + { + const int base_size = 1 << data[5]; + const int wedge = base_size / 16; + int i; + for( i = 7; i >= 1; --i ) + if( base_size - ( i * wedge ) >= sz ) + { data[5] |= ( i << 5 ); break; } + } + return true; + } + return false; + } + + +typedef uint8_t File_trailer[20]; + /* 0-3 CRC32 of the uncompressed data */ + /* 4-11 size of the uncompressed data */ + /* 12-19 member size including header and trailer */ + +enum { Ft_size = 20 }; + +static inline int Ft_versioned_size( const int version ) + { return ( ( version >= 1 ) ? 20 : 12 ); } + +static inline unsigned Ft_get_data_crc( const File_trailer data ) + { + unsigned tmp = 0; + int i; + for( i = 3; i >= 0; --i ) { tmp <<= 8; tmp += data[i]; } + return tmp; + } + +static inline void Ft_set_data_crc( File_trailer data, unsigned crc ) + { + int i; + for( i = 0; i <= 3; ++i ) { data[i] = (uint8_t)crc; crc >>= 8; } + } + +static inline unsigned long long Ft_get_data_size( const File_trailer data ) + { + unsigned long long tmp = 0; + int i; + for( i = 11; i >= 4; --i ) { tmp <<= 8; tmp += data[i]; } + return tmp; + } + +static inline void Ft_set_data_size( File_trailer data, unsigned long long sz ) + { + int i; + for( i = 4; i <= 11; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; } + } + +static inline unsigned long long Ft_get_member_size( const File_trailer data ) + { + unsigned long long tmp = 0; + int i; + for( i = 19; i >= 12; --i ) { tmp <<= 8; tmp += data[i]; } + return tmp; + } + +static inline void Ft_set_member_size( File_trailer data, unsigned long long sz ) + { + int i; + for( i = 12; i <= 19; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; } + } + + +/* defined in decoder.c */ +int readblock( const int fd, uint8_t * const buf, const int size ); +int writeblock( const int fd, const uint8_t * const buf, const int size ); + +/* defined in main.c */ +void cleanup_and_fail( const int retval ); +void show_error( const char * const msg, const int errcode, const bool help ); +void internal_error( const char * const msg ); diff --git a/main.c b/main.c index aea4e18..9ca4f90 100644 --- a/main.c +++ b/main.c @@ -15,7 +15,7 @@ along with this program. If not, see . */ /* - Return values: 0 for a normal exit, 1 for environmental problems + Exit status: 0 for a normal exit, 1 for environmental problems (file not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or invalid input file, 3 for an internal consistency error (eg, bug) which caused clzip to panic. @@ -52,7 +52,7 @@ #endif #include "carg_parser.h" -#include "clzip.h" +#include "lzip.h" #include "decoder.h" #include "encoder.h" @@ -127,6 +127,10 @@ static void show_help( void ) "scale optimal for all files. If your files are large, very repetitive,\n" "etc, you may need to use the --match-length and --dictionary-size\n" "options directly to achieve optimal performance.\n" + "\nExit status: 0 for a normal exit, 1 for environmental problems (file\n" + "not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n" + "invalid input file, 3 for an internal consistency error (eg, bug) which\n" + "caused clzip to panic.\n" "\nReport bugs to lzip-bug@nongnu.org\n" "Clzip home page: http://www.nongnu.org/lzip/clzip.html\n" ); } @@ -155,8 +159,9 @@ void show_header( const File_header header ) for( i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i ) { num /= factor; if( num % factor != 0 ) exact = false; p = prefix[i]; np = ""; } - fprintf( stderr, "version %d, dictionary size %s%4u %sB. ", - Fh_version( header ), np, num, p ); + if( verbosity >= 4 ) + fprintf( stderr, "version %d, ", Fh_version( header ) ); + fprintf( stderr, "dictionary size %s%4u %sB. ", np, num, p ); } @@ -549,7 +554,7 @@ static int decompress( const int infd, struct Pretty_print * const pp, retval = 2; break; } if( verbosity >= 2 || ( verbosity == 1 && first_member ) ) - { Pp_show_msg( pp, 0 ); if( verbosity >= 2 ) show_header( header ); } + { Pp_show_msg( pp, 0 ); if( verbosity >= 3 ) show_header( header ); } if( !LZd_init( &decoder, header, &rdec, outfd ) ) { @@ -573,13 +578,11 @@ static int decompress( const int infd, struct Pretty_print * const pp, retval = 2; break; } if( verbosity >= 2 ) - { if( testing ) fprintf( stderr, "ok\n" ); - else fprintf( stderr, "done\n" ); Pp_reset( pp ); } + { fprintf( stderr, testing ? "ok\n" : "done\n" ); Pp_reset( pp ); } } Rd_free( &rdec ); if( verbosity == 1 && retval == 0 ) - { if( testing ) fprintf( stderr, "ok\n" ); - else fprintf( stderr, "done\n" ); } + fprintf( stderr, testing ? "ok\n" : "done\n" ); return retval; } @@ -702,6 +705,7 @@ int main( const int argc, const char * const argv[] ) { 'h', "help", ap_no }, { 'k', "keep", ap_no }, { 'm', "match-length", ap_yes }, + { 'n', "threads", ap_yes }, { 'o', "output", ap_yes }, { 'q', "quiet", ap_no }, { 's', "dictionary-size", ap_yes }, @@ -741,6 +745,7 @@ int main( const int argc, const char * const argv[] ) case 'k': keep_input_files = true; break; case 'm': encoder_options.match_len_limit = getnum( arg, min_match_len_limit, max_match_len ); break; + case 'n': break; case 'o': default_output_filename = arg; break; case 'q': verbosity = -1; break; case 's': encoder_options.dictionary_size = get_dict_size( arg ); diff --git a/testsuite/check.sh b/testsuite/check.sh index ed0ca50..d38ebb0 100755 --- a/testsuite/check.sh +++ b/testsuite/check.sh @@ -26,6 +26,15 @@ fail=0 printf "testing clzip-%s..." "$2" +"${LZIP}" -cqs-1 in > /dev/null +if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi +"${LZIP}" -cqs0 in > /dev/null +if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi +"${LZIP}" -cqs4095 in > /dev/null +if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi +"${LZIP}" -cqm274 in > /dev/null +if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi + "${LZIP}" -t "${testdir}"/test.txt.lz || fail=1 "${LZIP}" -cd "${testdir}"/test.txt.lz > copy || fail=1 cmp in copy || fail=1 @@ -38,15 +47,6 @@ if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi cmp in copy || fail=1 printf . -"${LZIP}" -cqs-1 in > out -if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi -"${LZIP}" -cqs0 in > out -if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi -"${LZIP}" -cqs4095 in > out -if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi -"${LZIP}" -cqm274 in > out -if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi - for i in s4Ki 0 1 2 3 4 5 6 7 8 9 ; do "${LZIP}" -k -$i in || fail=1 mv -f in.lz copy.lz || fail=1 -- cgit v1.2.3