diff options
-rw-r--r-- | ChangeLog | 5 | ||||
-rw-r--r-- | INSTALL | 2 | ||||
-rw-r--r-- | Makefile.in | 5 | ||||
-rw-r--r-- | NEWS | 7 | ||||
-rwxr-xr-x | configure | 4 | ||||
-rw-r--r-- | decoder.cc | 13 | ||||
-rw-r--r-- | decoder.h | 7 | ||||
-rw-r--r-- | doc/lzlib.info | 34 | ||||
-rw-r--r-- | doc/lzlib.texinfo | 16 | ||||
-rw-r--r-- | encoder.cc | 2 | ||||
-rw-r--r-- | encoder.h | 4 | ||||
-rw-r--r-- | lzcheck.cc | 111 | ||||
-rw-r--r-- | lzip.h | 2 | ||||
-rw-r--r-- | lzlib.cc | 20 | ||||
-rw-r--r-- | lzlib.h | 6 | ||||
-rw-r--r-- | main.cc | 4 | ||||
-rwxr-xr-x | testsuite/check.sh | 8 |
17 files changed, 200 insertions, 50 deletions
@@ -1,3 +1,8 @@ +2009-09-02 Antonio Diaz Diaz <ant_diaz@teleline.es> + + * Version 0.6 released. + * The LZ_compress_sync_flush mechanism has been fixed. + 2009-07-03 Antonio Diaz Diaz <ant_diaz@teleline.es> * Version 0.5 released. @@ -1,7 +1,7 @@ Requirements ------------ You will need a C++ compiler. -I use gcc 4.3.3 and 3.3.6, but the code should compile with any +I use gcc 4.3.4 and 3.3.6, but the code should compile with any standards compliant compiler. Gcc is available at http://gcc.gnu.org. diff --git a/Makefile.in b/Makefile.in index 1d05051..86a9b18 100644 --- a/Makefile.in +++ b/Makefile.in @@ -34,6 +34,9 @@ $(progname)_shared : $(objs) $(libname).so.$(pkgversion) $(progname)_profiled : $(objs) $(libname).a $(CXX) $(LDFLAGS) -pg -o $(progname)_profiled $(objs) $(libname).a +lzcheck : lzcheck.o $(libname).a + $(CXX) $(LDFLAGS) -o lzcheck lzcheck.o $(libname).a + main.o : main.cc $(CXX) $(CPPFLAGS) $(CXXFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $< @@ -76,7 +79,7 @@ $(VPATH)/doc/$(progname).1 : $(progname) Makefile : $(VPATH)/configure $(VPATH)/Makefile.in ./config.status -check : all $(VPATH)/testsuite/check.sh +check : all $(VPATH)/testsuite/check.sh lzcheck @$(VPATH)/testsuite/check.sh $(VPATH)/testsuite install : all install-info @@ -1,5 +1,4 @@ -Changes in version 0.5: +Changes in version 0.6: -Decompression speed has been improved. - -A build problem with the Sun C++ compiler has been fixed. +The LZ_compress_sync_flush mechanism has been fixed. (Last few bytes +weren't inmediately available). @@ -5,13 +5,13 @@ # This configure script is free software: you have unlimited permission # to copy, distribute and modify it. # -# Date of this version: 2009-07-03 +# Date of this version: 2009-09-02 invocation_name=$0 args= no_create= pkgname=lzlib -pkgversion=0.5 +pkgversion=0.6 soversion=0 progname=minilzip progname_shared=${progname}_shared @@ -70,7 +70,7 @@ int Circular_buffer::read_data( uint8_t * const out_buffer, const int out_size ) } -int Circular_buffer::write_data( uint8_t * const in_buffer, const int in_size ) throw() +int Circular_buffer::write_data( const uint8_t * const in_buffer, const int in_size ) throw() { int size = 0; if( put >= get ) @@ -109,6 +109,7 @@ bool LZ_decoder::verify_trailer() else error = true; } if( format_version == 0 ) trailer.member_size( member_position() ); + if( !range_decoder.code_is_zero() ) error = true; if( trailer.data_crc() != crc() ) error = true; if( trailer.data_size() != data_position() ) error = true; if( trailer.member_size() != member_position() ) error = true; @@ -122,6 +123,14 @@ int LZ_decoder::decode_member() { if( member_finished_ ) return 0; if( !range_decoder.try_reload() ) return 0; + if( verify_trailer_pending ) + { + if( range_decoder.available_bytes() < File_trailer::size( format_version ) ) + return 0; + verify_trailer_pending = false; + member_finished_ = true; + if( verify_trailer() ) return 0; else return 3; + } while( true ) { if( range_decoder.finished() ) return 2; @@ -192,6 +201,8 @@ int LZ_decoder::decode_member() range_decoder.normalize(); if( len == min_match_len ) // End Of Stream marker { + if( range_decoder.available_bytes() < File_trailer::size( format_version ) ) + { verify_trailer_pending = true; return 0; } member_finished_ = true; if( verify_trailer() ) return 0; else return 3; } @@ -27,7 +27,7 @@ class Input_buffer : public Circular_buffer { - enum { min_available_bytes = 8 + sizeof( File_trailer ) }; + enum { min_available_bytes = 8 }; bool at_stream_end_; public: @@ -47,7 +47,7 @@ public: ( at_stream_end_ || used_bytes() >= min_available_bytes ) ); } - int write_data( uint8_t * const in_buffer, const int in_size ) throw() + int write_data( const uint8_t * const in_buffer, const int in_size ) throw() { if( at_stream_end_ || in_size <= 0 ) return 0; return Circular_buffer::write_data( in_buffer, in_size ); @@ -75,6 +75,7 @@ public: bool at_stream_end() const throw() { return ibuf.at_stream_end(); } int available_bytes() const throw() { return ibuf.used_bytes(); } + bool code_is_zero() const throw() { return ( code == 0 ); } bool enough_available_bytes() const throw() { return ibuf.enough_available_bytes(); } bool finished() const throw() { return ibuf.finished(); } @@ -233,6 +234,7 @@ class LZ_decoder : public Circular_buffer const int dictionary_size; uint32_t crc_; bool member_finished_; + bool verify_trailer_pending; unsigned int rep0; // rep[0-3] latest four distances unsigned int rep1; // used for efficient coding of unsigned int rep2; // repeated distances @@ -298,6 +300,7 @@ public: dictionary_size( header.dictionary_size() ), crc_( 0xFFFFFFFF ), member_finished_( false ), + verify_trailer_pending( false ), rep0( 0 ), rep1( 0 ), rep2( 0 ), diff --git a/doc/lzlib.info b/doc/lzlib.info index e688ea6..b9ddf30 100644 --- a/doc/lzlib.info +++ b/doc/lzlib.info @@ -12,7 +12,7 @@ File: lzlib.info, Node: Top, Next: Introduction, Up: (dir) Lzlib ***** -This manual is for Lzlib (version 0.5, 3 July 2009). +This manual is for Lzlib (version 0.6, 2 September 2009). * Menu: @@ -53,6 +53,12 @@ read/write functions until all the data has been processed by the library. This interface is safer and less error prone than the traditional zlib interface. + Compression/decompression is done when the read function is called. +This means the value returned by the position functions will not be +updated until some data is read, even if you write a lot of data. If +you want the data to be compressed in advance, just call the read +function with a SIZE equal to 0. + Lzlib will correctly decompress a data stream which is the concatenation of two or more compressed data streams. The result is the concatenation of the corresponding uncompressed data streams. Integrity @@ -443,7 +449,7 @@ Example 1: Normal compression (MEMBER_SIZE > total output). 4) go back to step 2 until all input data has been written 5) LZ_compress_finish 6) LZ_compress_read - 7) go back to step 6 until LZ_compress_read returns 0 + 7) go back to step 6 until LZ_compress_finished returns 1 8) LZ_compress_close @@ -455,7 +461,7 @@ Example 2: Decompression. 4) go back to step 2 until all input data has been written 5) LZ_decompress_finish 6) LZ_decompress_read - 7) go back to step 6 until LZ_decompress_read returns 0 + 7) go back to step 6 until LZ_decompress_finished returns 1 8) LZ_decompress_close @@ -469,7 +475,7 @@ Example 3: Multimember compression (MEMBER_SIZE < total output). 6) go back to step 2 until all input data has been written 7) LZ_compress_finish 8) LZ_compress_read - 9) go back to step 8 until LZ_compress_read returns 0 + 9) go back to step 8 until LZ_compress_finished returns 1 10) LZ_compress_close @@ -513,15 +519,15 @@ Concept Index Tag Table: Node: Top219 -Node: Introduction1010 -Node: Library Version2477 -Node: Buffering3122 -Node: Compression Functions4229 -Node: Decompression Functions9731 -Node: Error Codes13169 -Node: Data Format15105 -Node: Examples17072 -Node: Problems18494 -Node: Concept Index19064 +Node: Introduction1015 +Node: Library Version2790 +Node: Buffering3435 +Node: Compression Functions4542 +Node: Decompression Functions10044 +Node: Error Codes13482 +Node: Data Format15418 +Node: Examples17385 +Node: Problems18819 +Node: Concept Index19389 End Tag Table diff --git a/doc/lzlib.texinfo b/doc/lzlib.texinfo index d0cdd2b..409b053 100644 --- a/doc/lzlib.texinfo +++ b/doc/lzlib.texinfo @@ -5,8 +5,8 @@ @finalout @c %**end of header -@set UPDATED 3 July 2009 -@set VERSION 0.5 +@set UPDATED 2 September 2009 +@set VERSION 0.6 @dircategory Data Compression @direntry @@ -69,6 +69,12 @@ read/write functions until all the data has been processed by the library. This interface is safer and less error prone than the traditional zlib interface. +Compression/decompression is done when the read function is called. This +means the value returned by the position functions will not be updated +until some data is read, even if you write a lot of data. If you want +the data to be compressed in advance, just call the read function with a +@var{size} equal to 0. + Lzlib will correctly decompress a data stream which is the concatenation of two or more compressed data streams. The result is the concatenation of the corresponding uncompressed data streams. Integrity testing of @@ -515,7 +521,7 @@ Example 1: Normal compression (@var{member_size} > total output). 4) go back to step 2 until all input data has been written 5) LZ_compress_finish 6) LZ_compress_read -7) go back to step 6 until LZ_compress_read returns 0 +7) go back to step 6 until LZ_compress_finished returns 1 8) LZ_compress_close @end example @@ -531,7 +537,7 @@ Example 2: Decompression. 4) go back to step 2 until all input data has been written 5) LZ_decompress_finish 6) LZ_decompress_read -7) go back to step 6 until LZ_decompress_read returns 0 +7) go back to step 6 until LZ_decompress_finished returns 1 8) LZ_decompress_close @end example @@ -549,7 +555,7 @@ Example 3: Multimember compression (@var{member_size} < total output). 6) go back to step 2 until all input data has been written 7) LZ_compress_finish 8) LZ_compress_read - 9) go back to step 8 until LZ_compress_read returns 0 + 9) go back to step 8 until LZ_compress_finished returns 1 10) LZ_compress_close @end example @@ -44,7 +44,7 @@ const Dis_slots dis_slots; const Prob_prices prob_prices; -int Matchfinder::write_data( uint8_t * const in_buffer, const int in_size ) throw() +int Matchfinder::write_data( const uint8_t * const in_buffer, const int in_size ) throw() { if( at_stream_end_ ) return 0; const int size = std::min( buffer_size - stream_pos, in_size ); @@ -210,7 +210,7 @@ public: return i; } - int write_data( uint8_t * const in_buffer, const int in_size ) throw(); + int write_data( const uint8_t * const in_buffer, const int in_size ) throw(); void reset() throw(); bool move_pos() throw(); int longest_match_len( int * const distances = 0 ) throw(); @@ -417,7 +417,7 @@ class LZ_encoder { enum { dis_align_mask = dis_align_size - 1, infinite_price = 0x0FFFFFFF, - max_marker_size = 15, + max_marker_size = 16, num_rep_distances = 4 }; // must be 4 struct Trial diff --git a/lzcheck.cc b/lzcheck.cc new file mode 100644 index 0000000..b56081e --- /dev/null +++ b/lzcheck.cc @@ -0,0 +1,111 @@ +/* Lzcheck - A test program for the lzlib library + Copyright (C) 2009 Antonio Diaz Diaz. + + This program is free software: you have unlimited permission + to copy, distribute and modify it. + + Usage is: + lzcheck filename.txt + + This program reads the specified text file and then compresses it, + line by line, to test the flushing mechanism. +*/ + +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <stdint.h> +#include <unistd.h> + +#include "lzlib.h" + +#ifndef LLONG_MAX +#define LLONG_MAX 0x7FFFFFFFFFFFFFFFLL +#endif +#ifndef LLONG_MIN +#define LLONG_MIN (-LLONG_MAX - 1LL) +#endif +#ifndef ULLONG_MAX +#define ULLONG_MAX 0xFFFFFFFFFFFFFFFFULL +#endif + + +int main( const int argc, const char * argv[] ) + { + if( argc < 2 ) + { + std::fprintf( stderr, "Usage: lzcheck filename.txt\n" ); + return 1; + } + + FILE *f = std::fopen( argv[1], "rb" ); + if( !f ) + { + std::fprintf( stderr, "Can't open file `%s' for reading\n", argv[1] ); + return 1; + } + + const int in_buffer_size = 1 << 20; + const int mid_buffer_size = 65536; + const int out_buffer_size = in_buffer_size; + uint8_t in_buffer[in_buffer_size]; + uint8_t mid_buffer[mid_buffer_size]; + uint8_t out_buffer[out_buffer_size]; + const int in_size = std::fread( in_buffer, 1, in_buffer_size, f ); + if( in_size >= in_buffer_size ) + { + std::fprintf( stderr, "input file `%s' too big.\n", argv[1] ); + return 1; + } + std::fclose( f ); + + const int dictionary_size = in_buffer_size; + const int match_len_limit = 80; + const long long member_size = LLONG_MAX; + void * encoder = LZ_compress_open( dictionary_size, match_len_limit, + member_size ); + if( !encoder || LZ_compress_errno( encoder ) != LZ_ok ) + { + const bool mem_error = ( LZ_compress_errno( encoder ) == LZ_mem_error ); + LZ_compress_close( encoder ); + if( mem_error ) + { + std::fprintf( stderr, "not enough memory.\n" ); + return 1; + } + std::fprintf( stderr, "internal error: invalid argument to encoder.\n" ); + return 3; + } + + void * decoder = LZ_decompress_open(); + if( !decoder || LZ_decompress_errno( decoder ) != LZ_ok ) + { + LZ_decompress_close( decoder ); + std::fprintf( stderr, "not enough memory.\n" ); + return 1; + } + + for( int l = 0, r = 0; r < in_size; l = r ) + { + while( ++r < in_size && in_buffer[r-1] != '\n' ) ; + LZ_compress_write( encoder, in_buffer + l, r - l ); + LZ_compress_sync_flush( encoder ); + int mid_size = LZ_compress_read( encoder, mid_buffer, mid_buffer_size ); + LZ_decompress_write( decoder, mid_buffer, mid_size ); + int out_size = LZ_decompress_read( decoder, out_buffer, out_buffer_size ); + + if( out_size != r - l || std::memcmp( in_buffer + l, out_buffer, out_size ) ) + { + std::printf( "sync error at pos %d. in_size = %d, out_size = %d\n", + l, r - l, out_size ); + for( int i = l; i < r; ++i ) std::putchar( in_buffer[i] ); + if( in_buffer[r-1] != '\n' ) std::putchar( '\n' ); + for( int i = 0; i < out_size; ++i ) std::putchar( out_buffer[i] ); + std::putchar( '\n' ); + } + } + + LZ_decompress_close( decoder ); + LZ_compress_close( encoder ); + return 0; + } @@ -277,5 +277,5 @@ public: } int read_data( uint8_t * const out_buffer, const int out_size ) throw(); - int write_data( uint8_t * const in_buffer, const int in_size ) throw(); + int write_data( const uint8_t * const in_buffer, const int in_size ) throw(); }; @@ -45,7 +45,7 @@ struct Encoder Matchfinder * matchfinder; LZ_encoder * lz_encoder; LZ_errno lz_errno; - bool flush_pending; + int flush_pending; const File_header member_header; Encoder( const File_header & header ) throw() @@ -55,7 +55,7 @@ struct Encoder matchfinder( 0 ), lz_encoder( 0 ), lz_errno( LZ_ok ), - flush_pending( false ), + flush_pending( 0 ), member_header( header ) {} }; @@ -180,7 +180,7 @@ int LZ_compress_finish( void * const encoder ) if( !verify_encoder( encoder ) ) return -1; Encoder & e = *(Encoder *)encoder; e.matchfinder->flushing( true ); - e.flush_pending = false; + e.flush_pending = 0; return 0; } @@ -191,12 +191,12 @@ int LZ_compress_sync_flush( void * const encoder ) Encoder & e = *(Encoder *)encoder; if( !e.flush_pending && !e.matchfinder->at_stream_end() ) { - e.flush_pending = true; + e.flush_pending = 2; // 2 consecutive markers guarantee decoding e.matchfinder->flushing( true ); if( !e.lz_encoder->encode_member( false ) ) { e.lz_errno = LZ_library_error; return -1; } - if( e.lz_encoder->sync_flush() ) - { e.matchfinder->flushing( false ); e.flush_pending = false; } + while( e.flush_pending > 0 && e.lz_encoder->sync_flush() ) + { if( --e.flush_pending <= 0 ) e.matchfinder->flushing( false ); } } return 0; } @@ -209,13 +209,13 @@ int LZ_compress_read( void * const encoder, uint8_t * const buffer, Encoder & e = *(Encoder *)encoder; if( !e.lz_encoder->encode_member( !e.flush_pending ) ) { e.lz_errno = LZ_library_error; return -1; } - if( e.flush_pending && e.lz_encoder->sync_flush() ) - { e.matchfinder->flushing( false ); e.flush_pending = false; } + while( e.flush_pending > 0 && e.lz_encoder->sync_flush() ) + { if( --e.flush_pending <= 0 ) e.matchfinder->flushing( false ); } return e.lz_encoder->read_data( buffer, size ); } -int LZ_compress_write( void * const encoder, uint8_t * const buffer, +int LZ_compress_write( void * const encoder, const uint8_t * const buffer, const int size ) { if( !verify_encoder( encoder ) ) return -1; @@ -370,7 +370,7 @@ int LZ_decompress_read( void * const decoder, uint8_t * const buffer, } -int LZ_decompress_write( void * const decoder, uint8_t * const buffer, +int LZ_decompress_write( void * const decoder, const uint8_t * const buffer, const int size ) { if( !verify_decoder( decoder ) ) return -1; @@ -29,7 +29,7 @@ extern "C" { #endif -const char * const LZ_version_string = "0.5"; +const char * const LZ_version_string = "0.6"; enum { min_dictionary_bits = 12, min_dictionary_size = 1 << min_dictionary_bits, @@ -54,7 +54,7 @@ int LZ_compress_sync_flush( void * const encoder ); int LZ_compress_read( void * const encoder, uint8_t * const buffer, const int size ); -int LZ_compress_write( void * const encoder, uint8_t * const buffer, +int LZ_compress_write( void * const encoder, const uint8_t * const buffer, const int size ); int LZ_compress_write_size( void * const encoder ); @@ -74,7 +74,7 @@ int LZ_decompress_finish( void * const decoder ); int LZ_decompress_read( void * const decoder, uint8_t * const buffer, const int size ); -int LZ_decompress_write( void * const decoder, uint8_t * const buffer, +int LZ_decompress_write( void * const decoder, const uint8_t * const buffer, const int size ); enum LZ_errno LZ_decompress_errno( void * const decoder ); @@ -26,6 +26,7 @@ #include <algorithm> #include <cerrno> #include <climits> +#include <csignal> #include <cstdio> #include <cstdlib> #include <cstring> @@ -33,7 +34,6 @@ #include <vector> #include <fcntl.h> #include <stdint.h> -#include <signal.h> #include <unistd.h> #include <utime.h> #include <sys/stat.h> @@ -462,6 +462,8 @@ int compress( const long long member_size, const long long volume_size, if( in_size == 0 ) LZ_compress_finish( encoder ); else if( in_size != LZ_compress_write( encoder, in_buffer, in_size ) ) internal_error( "library error" ); +// for( int i = 0; i < 10000; ++i ) +// LZ_compress_sync_flush( encoder ); } int out_size = LZ_compress_read( encoder, out_buffer, out_buffer_size ); // std::fprintf( stderr, "%6d in_size, %5d out_size.\n", in_size, out_size ); diff --git a/testsuite/check.sh b/testsuite/check.sh index 2ce2ca5..7b67c93 100755 --- a/testsuite/check.sh +++ b/testsuite/check.sh @@ -10,6 +10,7 @@ export LC_ALL objdir=`pwd` testdir=`cd "$1" ; pwd` LZIP="${objdir}"/minilzip +LZCHECK="${objdir}"/lzcheck framework_failure() { echo 'failure in testing framework'; exit 1; } if [ ! -x "${LZIP}" ] ; then @@ -59,10 +60,13 @@ for i in s4096 1 2 3 4 5 6 7 8 9; do echo -n . done +"${LZCHECK}" in || fail=1 +echo -n . + echo -if test ${fail} = 0; then +if [ ${fail} = 0 ]; then echo "tests completed successfully." - if cd "${objdir}" ; then rm -r tmp ; fi + cd "${objdir}" && rm -r tmp else echo "tests failed." fi |