diff options
-rw-r--r-- | ChangeLog | 7 | ||||
-rw-r--r-- | Makefile.in | 18 | ||||
-rw-r--r-- | NEWS | 5 | ||||
-rwxr-xr-x | configure | 9 | ||||
-rw-r--r-- | doc/lzlib.info | 26 | ||||
-rw-r--r-- | doc/lzlib.texinfo | 6 | ||||
-rw-r--r-- | encoder.cc | 26 | ||||
-rw-r--r-- | encoder.h | 30 | ||||
-rw-r--r-- | lzcheck.cc | 64 | ||||
-rw-r--r-- | lzlib.h | 2 | ||||
-rw-r--r-- | main.cc | 10 | ||||
-rwxr-xr-x | testsuite/check.sh | 14 |
12 files changed, 114 insertions, 103 deletions
@@ -1,3 +1,10 @@ +2009-10-20 Antonio Diaz Diaz <ant_diaz@teleline.es> + + * Version 0.7 released. + * Compression time has been reduced by 4%. + * testsuite/check.sh: Removed -9 to run in less than 256MiB of RAM. + * lzcheck.cc: Read files of any size up to 2^63 bytes. + 2009-09-02 Antonio Diaz Diaz <ant_diaz@teleline.es> * Version 0.6 released. diff --git a/Makefile.in b/Makefile.in index 86a9b18..3d51c30 100644 --- a/Makefile.in +++ b/Makefile.in @@ -74,7 +74,8 @@ $(VPATH)/doc/$(pkgname).info : $(VPATH)/doc/$(pkgname).texinfo man : $(VPATH)/doc/$(progname).1 $(VPATH)/doc/$(progname).1 : $(progname) - help2man -o $(VPATH)/doc/$(progname).1 --no-info ./$(progname) + help2man -n 'test program for the lzlib library' \ + -o $(VPATH)/doc/$(progname).1 --no-info ./$(progname) Makefile : $(VPATH)/configure $(VPATH)/Makefile.in ./config.status @@ -83,27 +84,27 @@ check : all $(VPATH)/testsuite/check.sh lzcheck @$(VPATH)/testsuite/check.sh $(VPATH)/testsuite install : all install-info - if test ! -d $(DESTDIR)$(includedir) ; then $(INSTALL_DIR) $(DESTDIR)$(includedir) ; fi - if test ! -d $(DESTDIR)$(libdir) ; then $(INSTALL_DIR) $(DESTDIR)$(libdir) ; fi + if [ ! -d $(DESTDIR)$(includedir) ] ; then $(INSTALL_DIR) $(DESTDIR)$(includedir) ; fi + if [ ! -d $(DESTDIR)$(libdir) ] ; then $(INSTALL_DIR) $(DESTDIR)$(libdir) ; fi $(INSTALL_DATA) $(VPATH)/$(pkgname).h $(DESTDIR)$(includedir)/$(pkgname).h $(INSTALL_DATA) ./$(libname).a $(DESTDIR)$(libdir)/$(libname).a - if test -n "$(progname_shared)" ; then \ + if [ -n "$(progname_shared)" ] ; then \ $(INSTALL_PROGRAM) ./$(libname).so.$(pkgversion) $(DESTDIR)$(libdir)/$(libname).so.$(pkgversion) ; \ - if test -a $(DESTDIR)$(libdir)/$(libname).so.$(soversion) ; then \ + if [ -e $(DESTDIR)$(libdir)/$(libname).so.$(soversion) ] ; then \ run_ldconfig=no ; rm -f $(DESTDIR)$(libdir)/$(libname).so.$(soversion) ; \ else run_ldconfig=yes ; \ fi ; \ cd $(DESTDIR)$(libdir) && ln -s $(libname).so.$(pkgversion) $(libname).so.$(soversion) ; \ - if test $${run_ldconfig} = yes ; then $(LDCONFIG) $(DESTDIR)$(libdir) ; fi ; \ + if [ $${run_ldconfig} = yes ] ; then $(LDCONFIG) $(DESTDIR)$(libdir) ; fi ; \ fi install-info : - if test ! -d $(DESTDIR)$(infodir) ; then $(INSTALL_DIR) $(DESTDIR)$(infodir) ; fi + if [ ! -d $(DESTDIR)$(infodir) ] ; then $(INSTALL_DIR) $(DESTDIR)$(infodir) ; fi $(INSTALL_DATA) $(VPATH)/doc/$(pkgname).info $(DESTDIR)$(infodir)/$(pkgname).info -install-info --info-dir=$(DESTDIR)$(infodir) $(DESTDIR)$(infodir)/$(pkgname).info install-man : - if test ! -d $(DESTDIR)$(mandir)/man1 ; then $(INSTALL_DIR) $(DESTDIR)$(mandir)/man1 ; fi + if [ ! -d $(DESTDIR)$(mandir)/man1 ] ; then $(INSTALL_DIR) $(DESTDIR)$(mandir)/man1 ; fi $(INSTALL_DATA) $(VPATH)/doc/$(progname).1 $(DESTDIR)$(mandir)/man1/$(progname).1 install-strip : all @@ -145,6 +146,7 @@ dist : clean : -rm -f $(progname) $(progname)_profiled $(objs) $(lib_objs) *.a -rm -f $(progname)_shared $(sh_lib_objs) *.so.$(pkgversion) + -rm -f lzcheck.o distclean : clean -rm -f Makefile config.status *.tar *.tar.lz @@ -1,4 +1,3 @@ -Changes in version 0.6: +Changes in version 0.7: -The LZ_compress_sync_flush mechanism has been fixed. (Last few bytes -weren't inmediately available). +Compression time has been reduced by 4%. @@ -5,13 +5,12 @@ # This configure script is free software: you have unlimited permission # to copy, distribute and modify it. # -# Date of this version: 2009-09-02 +# Date of this version: 2009-10-20 -invocation_name=$0 args= no_create= pkgname=lzlib -pkgversion=0.6 +pkgversion=0.7 soversion=0 progname=minilzip progname_shared=${progname}_shared @@ -129,7 +128,7 @@ if [ x"${srcdir}" = x ] ; then if [ ! -r ${srcdir}/${srctrigger} ] ; then srcdir=.. ; fi if [ ! -r ${srcdir}/${srctrigger} ] ; then ## the sed command below emulates the dirname command - srcdir=`echo ${invocation_name} | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'` + srcdir=`echo $0 | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'` fi fi @@ -178,7 +177,7 @@ if [ x${no_create} = x ] ; then # This script is free software: you have unlimited permission # to copy, distribute and modify it. -exec /bin/sh ${invocation_name} ${args} --no-create +exec /bin/sh $0 ${args} --no-create EOF chmod +x config.status fi diff --git a/doc/lzlib.info b/doc/lzlib.info index b9ddf30..40862bd 100644 --- a/doc/lzlib.info +++ b/doc/lzlib.info @@ -9,10 +9,10 @@ END-INFO-DIR-ENTRY File: lzlib.info, Node: Top, Next: Introduction, Up: (dir) -Lzlib -***** +Lzlib Manual +************ -This manual is for Lzlib (version 0.6, 2 September 2009). +This manual is for Lzlib (version 0.7, 20 October 2009). * Menu: @@ -519,15 +519,15 @@ Concept Index Tag Table: Node: Top219 -Node: Introduction1015 -Node: Library Version2790 -Node: Buffering3435 -Node: Compression Functions4542 -Node: Decompression Functions10044 -Node: Error Codes13482 -Node: Data Format15418 -Node: Examples17385 -Node: Problems18819 -Node: Concept Index19389 +Node: Introduction1028 +Node: Library Version2803 +Node: Buffering3448 +Node: Compression Functions4555 +Node: Decompression Functions10057 +Node: Error Codes13495 +Node: Data Format15431 +Node: Examples17398 +Node: Problems18832 +Node: Concept Index19402 End Tag Table diff --git a/doc/lzlib.texinfo b/doc/lzlib.texinfo index 409b053..a824739 100644 --- a/doc/lzlib.texinfo +++ b/doc/lzlib.texinfo @@ -1,12 +1,12 @@ \input texinfo @c -*-texinfo-*- @c %**start of header @setfilename lzlib.info -@settitle Lzlib +@settitle Lzlib Manual @finalout @c %**end of header -@set UPDATED 2 September 2009 -@set VERSION 0.6 +@set UPDATED 20 October 2009 +@set VERSION 0.7 @dircategory Data Compression @direntry @@ -309,6 +309,7 @@ int LZ_encoder::best_pair_sequence( const int reps[num_rep_distances], trials[0].state = state; for( int i = 0; i < num_rep_distances; ++i ) trials[0].reps[i] = reps[i]; + const uint8_t prev_byte = matchfinder[-1]; const uint8_t cur_byte = matchfinder[0]; const uint8_t match_byte = matchfinder[-reps[0]-1]; unsigned int position = matchfinder.data_position(); @@ -318,9 +319,9 @@ int LZ_encoder::best_pair_sequence( const int reps[num_rep_distances], trials[1].prev_index = 0; trials[1].price = price0( bm_match[state()][pos_state] ); if( state.is_char() ) - trials[1].price += literal_encoder.price_symbol( matchfinder[-1], cur_byte ); + trials[1].price += literal_encoder.price_symbol( prev_byte, cur_byte ); else - trials[1].price += literal_encoder.price_matched( matchfinder[-1], cur_byte, match_byte ); + trials[1].price += literal_encoder.price_matched( prev_byte, cur_byte, match_byte ); const int match_price = price1( bm_match[state()][pos_state] ); const int rep_match_price = match_price + price1( bm_rep[state()] ); @@ -354,9 +355,13 @@ int LZ_encoder::best_pair_sequence( const int reps[num_rep_distances], } for( int rep = 0; rep < num_rep_distances; ++rep ) + { + const int price = rep_match_price + + price_rep( rep, state, pos_state ); for( int len = min_match_len; len <= replens[rep]; ++len ) - trials[len].update( rep, 0, rep_match_price + - price_rep( rep, len, state, pos_state ) ); + trials[len].update( rep, 0, price + + rep_match_len_encoder.price( len, pos_state ) ); + } int cur = 0; int num_trials = main_len; @@ -396,14 +401,15 @@ int LZ_encoder::best_pair_sequence( const int reps[num_rep_distances], mtf_reps( cur_trial.dis, cur_trial.reps ); } + const uint8_t prev_byte = matchfinder[-1]; const uint8_t cur_byte = matchfinder[0]; const uint8_t match_byte = matchfinder[-cur_trial.reps[0]-1]; const int pos_state = ++position & pos_state_mask; int next_price = cur_trial.price + price0( bm_match[cur_trial.state()][pos_state] ); if( cur_trial.state.is_char() ) - next_price += literal_encoder.price_symbol( matchfinder[-1], cur_byte ); + next_price += literal_encoder.price_symbol( prev_byte, cur_byte ); else - next_price += literal_encoder.price_matched( matchfinder[-1], cur_byte, match_byte ); + next_price += literal_encoder.price_matched( prev_byte, cur_byte, match_byte ); if( !matchfinder.move_pos() ) return 0; Trial & next_trial = trials[cur+1]; @@ -429,11 +435,13 @@ int LZ_encoder::best_pair_sequence( const int reps[num_rep_distances], while( len < len_limit && data[len] == data[len-dis] ) ++len; if( len >= min_match_len ) { + const int price = rep_match_price + + price_rep( rep, cur_trial.state, pos_state ); while( num_trials < cur + len ) trials[++num_trials].price = infinite_price; for( ; len >= min_match_len; --len ) - trials[cur+len].update( rep, cur, rep_match_price + - price_rep( rep, len, cur_trial.state, pos_state ) ); + trials[cur+len].update( rep, cur, price + + rep_match_len_encoder.price( len, pos_state ) ); } } @@ -447,7 +455,7 @@ int LZ_encoder::best_pair_sequence( const int reps[num_rep_distances], while( num_trials < cur + newlen ) trials[++num_trials].price = infinite_price; - for( int len = newlen; len >= min_match_len; --len ) + for( int len = min_match_len; len <= newlen; ++len ) trials[cur+len].update( match_distances[len] + num_rep_distances, cur, normal_match_price + price_pair( match_distances[len], len, pos_state ) ); @@ -55,6 +55,7 @@ public: extern const Dis_slots dis_slots; + class Prob_prices { int data[bit_model_total >> 2]; @@ -79,6 +80,7 @@ public: extern const Prob_prices prob_prices; + inline int price0( const Bit_model & bm ) throw() { return prob_prices[bm.probability]; } @@ -88,6 +90,7 @@ inline int price1( const Bit_model & bm ) throw() inline int price_bit( const Bit_model & bm, const int bit ) throw() { if( bit ) return price1( bm ); else return price0( bm ); } + inline int price_symbol( const Bit_model bm[], int symbol, const int num_bits ) throw() { symbol |= ( 1 << num_bits ); @@ -101,6 +104,7 @@ inline int price_symbol( const Bit_model bm[], int symbol, const int num_bits ) return price; } + inline int price_symbol_reversed( const Bit_model bm[], int symbol, const int num_bits ) throw() { @@ -116,6 +120,7 @@ inline int price_symbol_reversed( const Bit_model bm[], int symbol, return price; } + inline int price_matched( const Bit_model bm[], const int symbol, const int match_byte ) throw() { @@ -424,7 +429,7 @@ class LZ_encoder { State state; int dis; - int prev_index; + int prev_index; // index of prev trial in trials[] int price; // dual use var; cumulative price, match length int reps[num_rep_distances]; void update( const int d, const int p_i, const int pr ) throw() @@ -489,25 +494,18 @@ class LZ_encoder return price0( bm_rep0[state()] ) + price0( bm_len[state()][pos_state] ); } - int price_rep( const int rep, const int len, const State & state, + int price_rep( const int rep, const State & state, const int pos_state ) const throw() { - int price = rep_match_len_encoder.price( len, pos_state ); - if( rep == 0 ) - { - price += price0( bm_rep0[state()] ); - price += price1( bm_len[state()][pos_state] ); - } + if( rep == 0 ) return price0( bm_rep0[state()] ) + + price1( bm_len[state()][pos_state] ); + int price = price1( bm_rep0[state()] ); + if( rep == 1 ) + price += price0( bm_rep1[state()] ); else { - price += price1( bm_rep0[state()] ); - if( rep == 1 ) - price += price0( bm_rep1[state()] ); - else - { - price += price1( bm_rep1[state()] ); - price += price_bit( bm_rep2[state()], rep - 2 ); - } + price += price1( bm_rep1[state()] ); + price += price_bit( bm_rep2[state()], rep - 2 ); } return price; } @@ -29,6 +29,11 @@ #define ULLONG_MAX 0xFFFFFFFFFFFFFFFFULL #endif +const int buffer_size = 65536; +uint8_t in_buffer[buffer_size]; +uint8_t mid_buffer[buffer_size]; +uint8_t out_buffer[buffer_size]; + int main( const int argc, const char * argv[] ) { @@ -38,28 +43,15 @@ int main( const int argc, const char * argv[] ) return 1; } - FILE *f = std::fopen( argv[1], "rb" ); - if( !f ) + FILE *file = std::fopen( argv[1], "rb" ); + if( !file ) { std::fprintf( stderr, "Can't open file `%s' for reading\n", argv[1] ); return 1; } +// std::fprintf( stderr, "lzcheck: testing file `%s'\n", argv[1] ); - const int in_buffer_size = 1 << 20; - const int mid_buffer_size = 65536; - const int out_buffer_size = in_buffer_size; - uint8_t in_buffer[in_buffer_size]; - uint8_t mid_buffer[mid_buffer_size]; - uint8_t out_buffer[out_buffer_size]; - const int in_size = std::fread( in_buffer, 1, in_buffer_size, f ); - if( in_size >= in_buffer_size ) - { - std::fprintf( stderr, "input file `%s' too big.\n", argv[1] ); - return 1; - } - std::fclose( f ); - - const int dictionary_size = in_buffer_size; + const int dictionary_size = 1 << 20; const int match_len_limit = 80; const long long member_size = LLONG_MAX; void * encoder = LZ_compress_open( dictionary_size, match_len_limit, @@ -85,27 +77,35 @@ int main( const int argc, const char * argv[] ) return 1; } - for( int l = 0, r = 0; r < in_size; l = r ) + while( true ) { - while( ++r < in_size && in_buffer[r-1] != '\n' ) ; - LZ_compress_write( encoder, in_buffer + l, r - l ); - LZ_compress_sync_flush( encoder ); - int mid_size = LZ_compress_read( encoder, mid_buffer, mid_buffer_size ); - LZ_decompress_write( decoder, mid_buffer, mid_size ); - int out_size = LZ_decompress_read( decoder, out_buffer, out_buffer_size ); - - if( out_size != r - l || std::memcmp( in_buffer + l, out_buffer, out_size ) ) + const int read_size = std::fread( in_buffer, 1, buffer_size, file ); + if( read_size <= 0 ) break; + + for( int l = 0, r = 1; r <= read_size; l = r, ++r ) { - std::printf( "sync error at pos %d. in_size = %d, out_size = %d\n", - l, r - l, out_size ); - for( int i = l; i < r; ++i ) std::putchar( in_buffer[i] ); - if( in_buffer[r-1] != '\n' ) std::putchar( '\n' ); - for( int i = 0; i < out_size; ++i ) std::putchar( out_buffer[i] ); - std::putchar( '\n' ); + while( r < read_size && in_buffer[r-1] != '\n' ) ++r; + const int in_size = LZ_compress_write( encoder, in_buffer + l, r - l ); + if( in_size < r - l ) r = l + in_size; + LZ_compress_sync_flush( encoder ); + const int mid_size = LZ_compress_read( encoder, mid_buffer, buffer_size ); + LZ_decompress_write( decoder, mid_buffer, mid_size ); + const int out_size = LZ_decompress_read( decoder, out_buffer, buffer_size ); + + if( out_size != in_size || std::memcmp( in_buffer + l, out_buffer, out_size ) ) + { + std::printf( "sync error at pos %d. in_size = %d, out_size = %d\n", + l, in_size, out_size ); + for( int i = 0; i < in_size; ++i ) std::putchar( in_buffer[l+i] ); + if( in_buffer[l+in_size-1] != '\n' ) std::putchar( '\n' ); + for( int i = 0; i < out_size; ++i ) std::putchar( out_buffer[i] ); + std::putchar( '\n' ); + } } } LZ_decompress_close( decoder ); LZ_compress_close( encoder ); + std::fclose( file ); return 0; } @@ -29,7 +29,7 @@ extern "C" { #endif -const char * const LZ_version_string = "0.6"; +const char * const LZ_version_string = "0.7"; enum { min_dictionary_bits = 12, min_dictionary_size = 1 << min_dictionary_bits, @@ -183,7 +183,7 @@ const char * format_num( long long num, long long limit = 9999, } -long long getnum( const char * ptr, const int bs, +long long getnum( const char * ptr, const int bs = 0, const long long llimit = LLONG_MIN + 1, const long long ulimit = LLONG_MAX ) throw() { @@ -462,8 +462,6 @@ int compress( const long long member_size, const long long volume_size, if( in_size == 0 ) LZ_compress_finish( encoder ); else if( in_size != LZ_compress_write( encoder, in_buffer, in_size ) ) internal_error( "library error" ); -// for( int i = 0; i < 10000; ++i ) -// LZ_compress_sync_flush( encoder ); } int out_size = LZ_compress_read( encoder, out_buffer, out_buffer_size ); // std::fprintf( stderr, "%6d in_size, %5d out_size.\n", in_size, out_size ); @@ -601,15 +599,15 @@ int decompress( const int inhandle, const Pretty_print & pp, extern "C" void signal_handler( int ) throw() { show_error( "Control-C or similar caught, quitting." ); - cleanup_and_fail( 0 ); + cleanup_and_fail( 1 ); } void set_signals() throw() { - signal( SIGTERM, signal_handler ); signal( SIGHUP, signal_handler ); signal( SIGINT, signal_handler ); + signal( SIGTERM, signal_handler ); } } // end namespace @@ -838,7 +836,7 @@ int main( const int argc, const char * argv[] ) const int eindex = extension_index( input_filename ); inhandle = open_instream( input_filename, &in_stats, program_mode, eindex, force, to_stdout ); - if( inhandle < 0 ) continue; + if( inhandle < 0 ) { if( retval < 1 ) retval = 1; continue; } if( program_mode != m_test ) { if( to_stdout ) outhandle = STDOUT_FILENO; diff --git a/testsuite/check.sh b/testsuite/check.sh index 7b67c93..62f9e49 100755 --- a/testsuite/check.sh +++ b/testsuite/check.sh @@ -18,7 +18,7 @@ if [ ! -x "${LZIP}" ] ; then exit 1 fi -if [ -d tmp ] ; then rm -r tmp ; fi +if [ -d tmp ] ; then rm -rf tmp ; fi mkdir tmp echo -n "testing minilzip..." cd "${objdir}"/tmp @@ -29,7 +29,7 @@ fail=0 "${LZIP}" -cd "${testdir}"/COPYING.lz > copy || fail=1 cmp in copy || fail=1 -for i in s4096 1 2 3 4 5 6 7 8 9; do +for i in s4096 1 2 3 4 5 6 7 8; do "${LZIP}" -k -$i in || fail=1 mv -f in.lz copy.lz || fail=1 echo -n "garbage" >> copy.lz || fail=1 @@ -38,7 +38,7 @@ for i in s4096 1 2 3 4 5 6 7 8 9; do echo -n . done -for i in s4096 1 2 3 4 5 6 7 8 9; do +for i in s4096 1 2 3 4 5 6 7 8; do "${LZIP}" -c -$i in > out || fail=1 echo -n "g" >> out || fail=1 "${LZIP}" -cd out > copy || fail=1 @@ -46,25 +46,25 @@ for i in s4096 1 2 3 4 5 6 7 8 9; do echo -n . done -for i in s4096 1 2 3 4 5 6 7 8 9; do +for i in s4096 1 2 3 4 5 6 7 8; do "${LZIP}" -c -$i < in > out || fail=1 "${LZIP}" -d < out > copy || fail=1 cmp in copy || fail=1 echo -n . done -for i in s4096 1 2 3 4 5 6 7 8 9; do +for i in s4096 1 2 3 4 5 6 7 8; do "${LZIP}" -f -$i -o out < in || fail=1 "${LZIP}" -df -o copy < out.lz || fail=1 cmp in copy || fail=1 echo -n . done -"${LZCHECK}" in || fail=1 +"${LZCHECK}" in 2>/dev/null || fail=1 echo -n . echo -if [ ${fail} = 0 ]; then +if [ ${fail} = 0 ] ; then echo "tests completed successfully." cd "${objdir}" && rm -r tmp else |