From 447d8df5e4c1f6c985791cb3fea2922fcc1cfbee Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 7 Nov 2015 14:33:30 +0100 Subject: Adding upstream version 0.4. Signed-off-by: Daniel Baumann --- ChangeLog | 14 ++++- Makefile.in | 15 +++-- NEWS | 11 +++- README | 4 +- configure | 6 +- decoder.cc | 36 +++++++----- decoder.h | 110 ++++++++++++++++++++++++------------ doc/lzlib.info | 104 +++++++++++++++++++++++----------- doc/lzlib.texinfo | 79 ++++++++++++++++++++------ encoder.cc | 132 ++++++++++++++++++++++++++++--------------- encoder.h | 77 ++++++++++++------------- lzip.h | 9 ++- lzlib.cc | 80 +++++++++++++++++--------- lzlib.h | 9 +-- main.cc | 163 ++++++++++++++++++++++++++--------------------------- testsuite/check.sh | 2 + 16 files changed, 535 insertions(+), 316 deletions(-) diff --git a/ChangeLog b/ChangeLog index f90fe5f..d8ac767 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,15 +1,23 @@ -2009-05-03 Antonio Diaz +2009-06-03 Antonio Diaz Diaz + + * Version 0.4 released. + * Added new function LZ_compress_sync_flush. + * Added new function LZ_compress_write_size. + * Decompression speed has been improved. + * Added chapter "Buffering" to the manual. + +2009-05-03 Antonio Diaz Diaz * Version 0.3 released. * Lzilib is now built as a shared library (in addition to static). -2009-04-26 Antonio Diaz +2009-04-26 Antonio Diaz Diaz * Version 0.2 released. * Fixed a segfault when decompressing trailing garbage. * Fixed a false positive in LZ_(de)compress_finished. -2009-04-21 Antonio Diaz +2009-04-21 Antonio Diaz Diaz * Version 0.1 released. diff --git a/Makefile.in b/Makefile.in index 2249365..8c5f93c 100644 --- a/Makefile.in +++ b/Makefile.in @@ -12,9 +12,9 @@ sh_lib_objs = sh_decoder.o sh_encoder.o sh_lzlib.o objs = arg_parser.o main.o -.PHONY : all doc check install install-info \ - uninstall uninstall-info \ - dist clean distclean +.PHONY : all install install-info install-man install-strip \ + uninstall uninstall-info uninstall-man \ + doc info man check dist clean distclean all : $(progname) $(progname_shared) @@ -60,15 +60,17 @@ arg_parser.o : Makefile arg_parser.h main.o : Makefile arg_parser.h lzlib.h $(libname).a -doc : info $(VPATH)/doc/$(progname).1 +doc : info man info : $(VPATH)/doc/$(pkgname).info $(VPATH)/doc/$(pkgname).info : $(VPATH)/doc/$(pkgname).texinfo cd $(VPATH)/doc && makeinfo $(pkgname).texinfo +man : $(VPATH)/doc/$(progname).1 + $(VPATH)/doc/$(progname).1 : $(progname) - help2man -o $(VPATH)/doc/$(progname).1 ./$(progname) + help2man -o $(VPATH)/doc/$(progname).1 --no-info ./$(progname) Makefile : $(VPATH)/configure $(VPATH)/Makefile.in ./config.status @@ -96,6 +98,9 @@ install-info : $(INSTALL_DATA) $(VPATH)/doc/$(pkgname).info $(DESTDIR)$(infodir)/$(pkgname).info -install-info --info-dir=$(DESTDIR)$(infodir) $(DESTDIR)$(infodir)/$(pkgname).info +install-strip : all + $(MAKE) INSTALL_PROGRAM='$(INSTALL_PROGRAM) -s' install + uninstall : uninstall-info -rm -f $(DESTDIR)$(includedir)/$(pkgname).h -rm -f $(DESTDIR)$(libdir)/$(libname).a diff --git a/NEWS b/NEWS index 5586409..f97bcd8 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,10 @@ -Changes in version 0.3: +Changes in version 0.4: -Lzilib is now built as a shared library (in addition to static). +Partial flush of the compressed data has been implemented with the +function LZ_compress_sync_flush. + +The function LZ_compress_write_size has been added. + +Decompression speed has been improved. + +The chapter "Buffering" has been added to the manual. diff --git a/README b/README index 461e233..dabf521 100644 --- a/README +++ b/README @@ -1,7 +1,7 @@ Description -The lzlib compression library provides in-memory LZMA compression and -decompression functions, including integrity checking of the +Lzlib is a data compression library providing in-memory LZMA compression +and decompression functions, including integrity checking of the uncompressed data. The compressed data format used by the library is the lzip format. diff --git a/configure b/configure index 1301d47..ba2cad0 100755 --- a/configure +++ b/configure @@ -5,13 +5,13 @@ # This configure script is free software: you have unlimited permission # to copy, distribute and modify it. # -# Date of this version: 2009-05-03 +# Date of this version: 2009-06-03 invocation_name=$0 args= no_create= pkgname=lzlib -pkgversion=0.3 +pkgversion=0.4 soversion=0 progname=minilzip progname_shared=${progname}_shared @@ -115,7 +115,7 @@ while [ x"$1" != x ] ; do CXXFLAGS=*) CXXFLAGS=${optarg} ;; LDFLAGS=*) LDFLAGS=${optarg} ;; - --build=* | --enable-* | --with-* | --*dir=* | *=* | *-*-*) ;; + --* | *=* | *-*-*) ;; *) echo "configure: Unrecognized option: \"${option}\"; use --help for usage." 1>&2 exit 1 ;; diff --git a/decoder.cc b/decoder.cc index aa394de..f68edf9 100644 --- a/decoder.cc +++ b/decoder.cc @@ -51,7 +51,7 @@ int Circular_buffer::read_data( uint8_t * const out_buffer, const int out_size ) size = std::min( buffer_size - get, out_size ); if( size > 0 ) { - std::memmove( out_buffer, buffer + get, size ); + std::memcpy( out_buffer, buffer + get, size ); get += size; if( get >= buffer_size ) get = 0; } @@ -61,7 +61,7 @@ int Circular_buffer::read_data( uint8_t * const out_buffer, const int out_size ) const int size2 = std::min( put - get, out_size - size ); if( size2 > 0 ) { - std::memmove( out_buffer + size, buffer + get, size2 ); + std::memcpy( out_buffer + size, buffer + get, size2 ); get += size2; size += size2; } @@ -78,7 +78,7 @@ int Circular_buffer::write_data( uint8_t * const in_buffer, const int in_size ) size = std::min( buffer_size - put - (get == 0), in_size ); if( size > 0 ) { - std::memmove( buffer + put, in_buffer, size ); + std::memcpy( buffer + put, in_buffer, size ); put += size; if( put >= buffer_size ) put = 0; } @@ -88,7 +88,7 @@ int Circular_buffer::write_data( uint8_t * const in_buffer, const int in_size ) const int size2 = std::min( get - put - 1, in_size - size ); if( size2 > 0 ) { - std::memmove( buffer + put, in_buffer + size, size2 ); + std::memcpy( buffer + put, in_buffer + size, size2 ); put += size2; size += size2; } @@ -104,8 +104,9 @@ bool LZ_decoder::verify_trailer() const int trailer_size = trailer.size( format_version ); for( int i = 0; i < trailer_size && !error; ++i ) { - if( range_decoder.finished() ) error = true; - ((uint8_t *)&trailer)[i] = range_decoder.get_byte(); + if( !range_decoder.finished() ) + ((uint8_t *)&trailer)[i] = range_decoder.get_byte(); + else error = true; } if( format_version == 0 ) trailer.member_size( member_position() ); if( trailer.data_crc() != crc() ) error = true; @@ -120,14 +121,12 @@ bool LZ_decoder::verify_trailer() int LZ_decoder::decode_member() { if( member_finished_ ) return 0; + if( !range_decoder.try_reload() ) return 0; while( true ) { - if( range_decoder.available_bytes() <= 0 || - ( !range_decoder.at_stream_end() && - range_decoder.available_bytes() < min_available_bytes ) ) - return 0; // need more data - if( free_bytes() < max_match_len ) return 0; if( range_decoder.finished() ) return 2; + if( !range_decoder.enough_available_bytes() || !enough_free_bytes() ) + return 0; const int pos_state = data_position() & pos_state_mask; if( range_decoder.decode_bit( bm_match[state()][pos_state] ) == 0 ) { @@ -173,9 +172,8 @@ int LZ_decoder::decode_member() } else { - rep3 = rep2; rep2 = rep1; rep1 = rep0; + unsigned int rep0_saved = rep0; len = min_match_len + len_decoder.decode( range_decoder, pos_state ); - state.set_match(); const int dis_slot = range_decoder.decode_tree( bm_dis_slot[get_dis_state(len)], dis_slot_bits ); if( dis_slot < start_dis_model ) rep0 = dis_slot; else @@ -190,17 +188,27 @@ int LZ_decoder::decode_member() rep0 += range_decoder.decode_tree_reversed( bm_align, dis_align_bits ); if( rep0 == 0xFFFFFFFF ) // Marker found { + rep0 = rep0_saved; + range_decoder.normalize(); if( len == min_match_len ) // End Of Stream marker { member_finished_ = true; if( verify_trailer() ) return 0; else return 3; } + if( len == min_match_len + 1 ) // Sync Flush marker + { + if( range_decoder.try_reload( true ) ) continue; + else return 0; + } return 4; } + if( rep0 >= (unsigned int)dictionary_size ) return 1; } } + rep3 = rep2; rep2 = rep1; rep1 = rep0_saved; + state.set_match(); } - if( !copy_block( rep0, len ) ) return 1; + copy_block( rep0, len ); prev_byte = get_byte( 0 ); } } diff --git a/decoder.h b/decoder.h index 785f310..6484898 100644 --- a/decoder.h +++ b/decoder.h @@ -25,10 +25,9 @@ Public License. */ -const int min_available_bytes = 8 + sizeof( File_trailer ); - class Input_buffer : public Circular_buffer { + enum { min_available_bytes = 8 + sizeof( File_trailer ) }; bool at_stream_end_; public: @@ -42,6 +41,12 @@ public: bool finished() const throw() { return at_stream_end_ && !used_bytes(); } void purge() throw() { at_stream_end_ = true; Circular_buffer::reset(); } + bool enough_available_bytes() const throw() + { + return ( used_bytes() > 0 && + ( at_stream_end_ || used_bytes() >= min_available_bytes ) ); + } + int write_data( uint8_t * const in_buffer, const int in_size ) throw() { if( at_stream_end_ || in_size <= 0 ) return 0; @@ -55,6 +60,7 @@ class Range_decoder mutable long long member_pos; uint32_t code; uint32_t range; + bool reload_pending; Input_buffer & ibuf; public: @@ -63,62 +69,86 @@ public: member_pos( header_size ), code( 0 ), range( 0xFFFFFFFF ), + reload_pending( false ), ibuf( buf ) { for( int i = 0; i < 5; ++i ) code = (code << 8) | get_byte(); } + bool at_stream_end() const throw() { return ibuf.at_stream_end(); } + int available_bytes() const throw() { return ibuf.used_bytes(); } + bool enough_available_bytes() const throw() + { return ibuf.enough_available_bytes(); } + bool finished() const throw() { return ibuf.finished(); } + long long member_position() const throw() { return member_pos; } + uint8_t get_byte() const { ++member_pos; return ibuf.get_byte(); } - bool at_stream_end() const throw() { return ibuf.at_stream_end(); } - int available_bytes() const throw() { return ibuf.used_bytes(); } - bool finished() const throw() { return ibuf.finished(); } - long long member_position() const throw() { return member_pos; } + bool try_reload( const bool force = false ) throw() + { + if( force ) reload_pending = true; + if( reload_pending && available_bytes() >= 5 ) + { + code = 0; + range = 0xFFFFFFFF; + reload_pending = false; + for( int i = 0; i < 5; ++i ) code = (code << 8) | get_byte(); + } + return !reload_pending; + } + + void normalize() + { + if( range <= 0x00FFFFFF ) + { range <<= 8; code = (code << 8) | get_byte(); } + } int decode( const int num_bits ) { int symbol = 0; - for( int i = num_bits - 1; i >= 0; --i ) + for( int i = num_bits; i > 0; --i ) { - range >>= 1; symbol <<= 1; - if( code >= range ) - { code -= range; symbol |= 1; } if( range <= 0x00FFFFFF ) - { range <<= 8; code = (code << 8) | get_byte(); } + { + range <<= 7; code = (code << 8) | get_byte(); + if( code >= range ) { code -= range; symbol |= 1; } + } + else + { + range >>= 1; + if( code >= range ) { code -= range; symbol |= 1; } + } } return symbol; } int decode_bit( Bit_model & bm ) { - int symbol; + normalize(); const uint32_t bound = ( range >> bit_model_total_bits ) * bm.probability; if( code < bound ) { range = bound; bm.probability += (bit_model_total - bm.probability) >> bit_model_move_bits; - symbol = 0; + return 0; } else { range -= bound; code -= bound; bm.probability -= bm.probability >> bit_model_move_bits; - symbol = 1; + return 1; } - if( range <= 0x00FFFFFF ) - { range <<= 8; code = (code << 8) | get_byte(); } - return symbol; } int decode_tree( Bit_model bm[], const int num_bits ) { int model = 1; for( int i = num_bits; i > 0; --i ) - model = ( model << 1 ) | decode_bit( bm[model-1] ); + model = ( model << 1 ) | decode_bit( bm[model] ); return model - (1 << num_bits); } @@ -126,27 +156,31 @@ public: { int model = 1; int symbol = 0; - for( int i = 1; i < (1 << num_bits); i <<= 1 ) + for( int i = 0; i < num_bits; ++i ) { - const int bit = decode_bit( bm[model-1] ); - model = ( model << 1 ) | bit; - if( bit ) symbol |= i; + const int bit = decode_bit( bm[model] ); + model <<= 1; + if( bit ) { model |= 1; symbol |= (1 << i); } } return symbol; } int decode_matched( Bit_model bm[], const int match_byte ) { + Bit_model *bm1 = bm + 0x100; int symbol = 1; - for( int i = 7; i >= 0; --i ) + for( int i = 1; i <= 8; ++i ) { - const int match_bit = ( match_byte >> i ) & 1; - const int bit = decode_bit( bm[(match_bit<<8)+symbol+0xFF] ); + const int match_bit = ( match_byte << i ) & 0x100; + const int bit = decode_bit( bm1[match_bit+symbol] ); symbol = ( symbol << 1 ) | bit; - if( match_bit != bit ) break; + if( ( match_bit && !bit ) || ( !match_bit && bit ) ) + { + while( ++i <= 8 ) + symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); + break; + } } - while( symbol < 0x100 ) - symbol = ( symbol << 1 ) | decode_bit( bm[symbol-1] ); return symbol & 0xFF; } }; @@ -193,6 +227,7 @@ public: class LZ_decoder : public Circular_buffer { + enum { min_free_bytes = max_match_len }; long long partial_data_pos; const int format_version; const int dictionary_size; @@ -220,7 +255,6 @@ class LZ_decoder : public Circular_buffer Len_decoder rep_match_len_decoder; Literal_decoder literal_decoder; -// using Circular_buffer::get_byte; uint8_t get_byte( const int distance ) const throw() { int i = put - distance - 1; @@ -235,20 +269,23 @@ class LZ_decoder : public Circular_buffer if( ++put >= buffer_size ) { partial_data_pos += put; put = 0; } } - bool copy_block( const int distance, int len ) + void copy_block( const int distance, int len ) { - if( distance < 0 || distance >= dictionary_size || - len <= 0 || len > max_match_len ) return false; int i = put - distance - 1; if( i < 0 ) i += buffer_size; - for( ; len > 0 ; --len ) + if( len < buffer_size - std::max( put, i ) && len <= distance ) + { + crc32.update( crc_, buffer + i, len ); + std::memcpy( buffer + put, buffer + i, len ); + put += len; + } + else for( ; len > 0 ; --len ) { crc32.update( crc_, buffer[i] ); buffer[put] = buffer[i]; if( ++put >= buffer_size ) { partial_data_pos += put; put = 0; } if( ++i >= buffer_size ) i = 0; } - return true; } bool verify_trailer(); @@ -256,7 +293,7 @@ class LZ_decoder : public Circular_buffer public: LZ_decoder( const File_header & header, Input_buffer & ibuf ) : - Circular_buffer( std::max( 65536, header.dictionary_size() ) + max_match_len ), + Circular_buffer( std::max( 65536, header.dictionary_size() ) + min_free_bytes ), partial_data_pos( 0 ), format_version( header.version ), dictionary_size( header.dictionary_size() ), @@ -270,6 +307,9 @@ public: range_decoder( sizeof header, ibuf ), literal_decoder() {} + bool enough_free_bytes() const throw() + { return free_bytes() >= min_free_bytes; } + uint32_t crc() const throw() { return crc_ ^ 0xFFFFFFFF; } int decode_member(); bool member_finished() const throw() diff --git a/doc/lzlib.info b/doc/lzlib.info index 28aea4d..fff59c2 100644 --- a/doc/lzlib.info +++ b/doc/lzlib.info @@ -12,12 +12,13 @@ File: lzlib.info, Node: Top, Next: Introduction, Up: (dir) Lzlib ***** -This manual is for Lzlib (version 0.3, 3 May 2009). +This manual is for Lzlib (version 0.4, 3 June 2009). * Menu: * Introduction:: Purpose and features of Lzlib * Library Version:: Checking library version +* Buffering:: Sizes of Lzlib's buffers * Compression Functions:: Descriptions of the compression functions * Decompression Functions:: Descriptions of the decompression functions * Error Codes:: Meaning of codes returned by functions @@ -38,8 +39,8 @@ File: lzlib.info, Node: Introduction, Next: Library Version, Prev: Top, Up: 1 Introduction ************** -The lzlib compression library provides in-memory LZMA compression and -decompression functions, including integrity checking of the +Lzlib is a data compression library providing in-memory LZMA compression +and decompression functions, including integrity checking of the uncompressed data. The compressed data format used by the library is the lzip format. @@ -68,7 +69,7 @@ Igor Pavlov. For a description of the LZMA algorithm, see the Lzip manual.  -File: lzlib.info, Node: Library Version, Next: Compression Functions, Prev: Introduction, Up: Top +File: lzlib.info, Node: Library Version, Next: Buffering, Prev: Introduction, Up: Top 2 Library Version ***************** @@ -88,9 +89,37 @@ application. error( "bad library version" );  -File: lzlib.info, Node: Compression Functions, Next: Decompression Functions, Prev: Library Version, Up: Top +File: lzlib.info, Node: Buffering, Next: Compression Functions, Prev: Library Version, Up: Top -3 Compression Functions +3 Buffering +*********** + +Lzlib internal functions need access to a memory chunk at least as large +as the dictionary size (sliding window). For efficiency reasons, the +input buffer for compression is twice as large as the dictionary size. +Finally, for security reasons, lzlib uses two more internal buffers. + + These are the four buffers used by lzlib, and their guaranteed +minimum sizes: + + * Input compression buffer. Written to by the `LZ_compress_write' + function. Its size is two times the dictionary size set with the + `LZ_compress_open' function or 128KiB, whichever is larger. + + * Output compression buffer. Read from by the `LZ_compress_read' + function. Its size is 64KiB. + + * Input decompression buffer. Written to by the + `LZ_decompress_write' function. Its size is 64KiB. + + * Output decompression buffer. Read from by the `LZ_decompress_read' + function. Its size is the dictionary size set with the + `LZ_decompress_open' function or 64KiB, whichever is larger. + + +File: lzlib.info, Node: Compression Functions, Next: Decompression Functions, Prev: Buffering, Up: Top + +4 Compression Functions *********************** These are the functions used to compress data. In case of error, all of @@ -123,6 +152,13 @@ verified by calling `LZ_compress_errno' before using it. stream, give MEMBER_SIZE a value larger than the amount of data to be produced, for example LLONG_MAX. + -- Function: int LZ_compress_restart_member ( void * const ENCODER, + const long long MEMBER_SIZE ) + Use this function to start a new member, in a multimember data + stream. Call this function only after + `LZ_compress_member_finished' indicates that the current member + has been fully read (with the `LZ_compress_read' function). + -- Function: int LZ_compress_close ( void * const ENCODER ) Frees all dynamically allocated data structures for this stream. This function discards any unprocessed input and does not flush @@ -133,17 +169,11 @@ verified by calling `LZ_compress_errno' before using it. Use this function to tell `lzlib' that all the data for this stream has already been written (with the `LZ_compress_write' function). - -- Function: int LZ_compress_finish_member ( void * const ENCODER ) - Use this function to tell `lzlib' that all the data for the current - member, in a multimember data stream, has already been written - (with the `LZ_compress_write' function). - - -- Function: int LZ_compress_restart_member ( void * const ENCODER, - const long long MEMBER_SIZE ) - Use this function to start a new member, in a multimember data - stream. Call this function only after - `LZ_compress_member_finished' indicates that the current member - has been fully read (with the `LZ_compress_read' function). + -- Function: int LZ_compress_sync_flush ( void * const ENCODER ) + Use this function to make available to `LZ_compress_read' all the + data already written with the `LZ_compress_write' function. + Repeated use of `LZ_compress_sync_flush' may degrade compression + ratio, so use it only when needed. -- Function: int LZ_compress_read ( void * const ENCODER, uint8_t * const BUFFER, const int SIZE ) @@ -165,6 +195,14 @@ verified by calling `LZ_compress_errno' before using it. might be less than SIZE. Note that writing less than SIZE bytes is not an error. + -- Function: int LZ_compress_write_size ( void * const ENCODER ) + The `LZ_compress_write_size' function returns the maximum number of + bytes that can be inmediately written through the + `LZ_compress_write' function. + + It is guaranteed that an inmediate call to `LZ_compress_write' will + accept a SIZE up to the returned number of bytes. + -- Function: enum LZ_errno LZ_compress_errno ( void * const ENCODER ) Returns the current error code for ENCODER (*note Error Codes::) @@ -199,7 +237,7 @@ verified by calling `LZ_compress_errno' before using it.  File: lzlib.info, Node: Decompression Functions, Next: Error Codes, Prev: Compression Functions, Up: Top -4 Decompression Functions +5 Decompression Functions ************************* These are the functions used to decompress data. In case of error, all @@ -275,7 +313,7 @@ be verified by calling `LZ_decompress_errno' before using it.  File: lzlib.info, Node: Error Codes, Next: Data Format, Prev: Decompression Functions, Up: Top -5 Error Codes +6 Error Codes ************* Most library functions return -1 to indicate that they have failed. But @@ -286,7 +324,7 @@ what kind of error it was, you need to verify the error code by calling Library functions do not change the value returned by `LZ_(de)compress_errno' when they succeed; thus, the value returned by `LZ_(de)compress_errno' after a successful call is not necessarily -zero, and you should not use `LZ_(de)compress_errno' to determine +LZ_ok, and you should not use `LZ_(de)compress_errno' to determine whether a call failed. If the call failed, then you can examine `LZ_(de)compress_errno'. @@ -327,7 +365,7 @@ whether a call failed. If the call failed, then you can examine  File: lzlib.info, Node: Data Format, Next: Examples, Prev: Error Codes, Up: Top -6 Data Format +7 Data Format ************* In the diagram below, a box like this: @@ -389,7 +427,7 @@ with no additional information before, between, or after them.  File: lzlib.info, Node: Examples, Next: Problems, Prev: Data Format, Up: Top -7 A small tutorial with examples +8 A small tutorial with examples ******************************** This chaper shows the order in which the library functions should be @@ -437,7 +475,7 @@ Example 3: Multimember compression (MEMBER_SIZE < total output).  File: lzlib.info, Node: Problems, Next: Concept Index, Prev: Examples, Up: Top -8 Reporting Bugs +9 Reporting Bugs **************** There are probably bugs in Lzlib. There are certainly errors and @@ -459,6 +497,7 @@ Concept Index [index] * Menu: +* buffering: Buffering. (line 6) * bugs: Problems. (line 6) * compression functions: Compression Functions. (line 6) * data format: Data Format. (line 6) @@ -474,14 +513,15 @@ Concept Index  Tag Table: Node: Top219 -Node: Introduction968 -Node: Library Version2428 -Node: Compression Functions3085 -Node: Decompression Functions8178 -Node: Error Codes11616 -Node: Data Format13551 -Node: Examples15518 -Node: Problems16940 -Node: Concept Index17510 +Node: Introduction1010 +Node: Library Version2477 +Node: Buffering3122 +Node: Compression Functions4229 +Node: Decompression Functions9731 +Node: Error Codes13169 +Node: Data Format15105 +Node: Examples17072 +Node: Problems18494 +Node: Concept Index19064  End Tag Table diff --git a/doc/lzlib.texinfo b/doc/lzlib.texinfo index 69d96d4..044bd04 100644 --- a/doc/lzlib.texinfo +++ b/doc/lzlib.texinfo @@ -5,8 +5,8 @@ @finalout @c %**end of header -@set UPDATED 3 May 2009 -@set VERSION 0.3 +@set UPDATED 3 June 2009 +@set VERSION 0.4 @dircategory Data Compression @direntry @@ -34,6 +34,7 @@ This manual is for Lzlib (version @value{VERSION}, @value{UPDATED}). @menu * Introduction:: Purpose and features of Lzlib * Library Version:: Checking library version +* Buffering:: Sizes of Lzlib's buffers * Compression Functions:: Descriptions of the compression functions * Decompression Functions:: Descriptions of the decompression functions * Error Codes:: Meaning of codes returned by functions @@ -54,8 +55,8 @@ to copy, distribute and modify it. @chapter Introduction @cindex introduction -The lzlib compression library provides in-memory LZMA compression and -decompression functions, including integrity checking of the +Lzlib is a data compression library providing in-memory LZMA compression +and decompression functions, including integrity checking of the uncompressed data. The compressed data format used by the library is the lzip format. @@ -106,6 +107,37 @@ if( LZ_version()[0] != LZ_version_string[0] ) @end example +@node Buffering +@chapter Buffering +@cindex buffering + +Lzlib internal functions need access to a memory chunk at least as large +as the dictionary size (sliding window). For efficiency reasons, the +input buffer for compression is twice as large as the dictionary size. +Finally, for security reasons, lzlib uses two more internal buffers. + +These are the four buffers used by lzlib, and their guaranteed minimum +sizes: + +@itemize @bullet +@item Input compression buffer. Written to by the +@samp{LZ_compress_write} function. Its size is two times the dictionary +size set with the @samp{LZ_compress_open} function or 128KiB, whichever +is larger. + +@item Output compression buffer. Read from by the +@samp{LZ_compress_read} function. Its size is 64KiB. + +@item Input decompression buffer. Written to by the +@samp{LZ_decompress_write} function. Its size is 64KiB. + +@item Output decompression buffer. Read from by the +@samp{LZ_decompress_read} function. Its size is the dictionary size set +with the @samp{LZ_decompress_open} function or 64KiB, whichever is +larger. +@end itemize + + @node Compression Functions @chapter Compression Functions @cindex compression functions @@ -142,6 +174,14 @@ for example LLONG_MAX. @end deftypefun +@deftypefun int LZ_compress_restart_member ( void * const @var{encoder}, const long long @var{member_size} ) +Use this function to start a new member, in a multimember data stream. +Call this function only after @samp{LZ_compress_member_finished} +indicates that the current member has been fully read (with the +@samp{LZ_compress_read} function). +@end deftypefun + + @deftypefun int LZ_compress_close ( void * const @var{encoder} ) Frees all dynamically allocated data structures for this stream. This function discards any unprocessed input and does not flush any pending @@ -156,18 +196,11 @@ has already been written (with the @samp{LZ_compress_write} function). @end deftypefun -@deftypefun int LZ_compress_finish_member ( void * const @var{encoder} ) -Use this function to tell @samp{lzlib} that all the data for the current -member, in a multimember data stream, has already been written (with the -@samp{LZ_compress_write} function). -@end deftypefun - - -@deftypefun int LZ_compress_restart_member ( void * const @var{encoder}, const long long @var{member_size} ) -Use this function to start a new member, in a multimember data stream. -Call this function only after @samp{LZ_compress_member_finished} -indicates that the current member has been fully read (with the -@samp{LZ_compress_read} function). +@deftypefun int LZ_compress_sync_flush ( void * const @var{encoder} ) +Use this function to make available to @samp{LZ_compress_read} all the +data already written with the @samp{LZ_compress_write} function. +Repeated use of @samp{LZ_compress_sync_flush} may degrade compression +ratio, so use it only when needed. @end deftypefun @@ -194,6 +227,16 @@ not an error. @end deftypefun +@deftypefun int LZ_compress_write_size ( void * const @var{encoder} ) +The @samp{LZ_compress_write_size} function returns the maximum number of +bytes that can be inmediately written through the @samp{LZ_compress_write} +function. + +It is guaranteed that an inmediate call to @samp{LZ_compress_write} will +accept a @var{size} up to the returned number of bytes. +@end deftypefun + + @deftypefun {enum LZ_errno} LZ_compress_errno ( void * const @var{encoder} ) Returns the current error code for @var{encoder} (@pxref{Error Codes}) @end deftypefun @@ -340,8 +383,8 @@ what kind of error it was, you need to verify the error code by calling Library functions do not change the value returned by @samp{LZ_(de)compress_errno} when they succeed; thus, the value returned by @samp{LZ_(de)compress_errno} after a successful call is not -necessarily zero, and you should not use @samp{LZ_(de)compress_errno} to -determine whether a call failed. If the call failed, then you can +necessarily LZ_ok, and you should not use @samp{LZ_(de)compress_errno} +to determine whether a call failed. If the call failed, then you can examine @samp{LZ_(de)compress_errno}. The error codes are defined in the header file @samp{lzlib.h}. diff --git a/encoder.cc b/encoder.cc index 3bc855b..cca154c 100644 --- a/encoder.cc +++ b/encoder.cc @@ -47,32 +47,45 @@ const Prob_prices prob_prices; int Matchfinder::write_data( uint8_t * const in_buffer, const int in_size ) throw() { if( at_stream_end_ ) return 0; - if( pos >= pos_limit ) - { - const int offset = pos - dictionary_size_ - max_num_trials; - const int size = stream_pos - offset; -// std::fprintf( stderr, "%6d offset, %5d size, %4d margin.\n", -// offset, size, after_size - ( pos - pos_limit ) ); - std::memmove( buffer, buffer + offset, size ); - partial_data_pos += offset; - pos -= offset; - stream_pos -= offset; - for( int i = 0; i < num_prev_positions; ++i ) - if( prev_positions[i] >= 0 ) prev_positions[i] -= offset; - for( int i = 0; i < 2 * dictionary_size_; ++i ) - if( prev_pos_tree[i] >= 0 ) prev_pos_tree[i] -= offset; - } const int size = std::min( buffer_size - stream_pos, in_size ); if( size > 0 ) { - std::memmove( buffer + stream_pos, in_buffer, size ); + std::memcpy( buffer + stream_pos, in_buffer, size ); stream_pos += size; } return size; } -bool Matchfinder::reset() throw() +Matchfinder::Matchfinder( const int dict_size, const int len_limit ) + : + partial_data_pos( 0 ), + dictionary_size_( dict_size ), + after_size( max_num_trials + max_match_len ), + buffer_size( ( 2 * std::max( 65536, dictionary_size_ ) ) + + max_num_trials + after_size ), + buffer( new( std::nothrow ) uint8_t[buffer_size] ), + pos( 0 ), + cyclic_pos( 0 ), + stream_pos( 0 ), + pos_limit( buffer_size - after_size ), + match_len_limit_( len_limit ), + prev_positions( new( std::nothrow ) int32_t[num_prev_positions] ), + at_stream_end_( false ) + { + prev_pos_tree = new( std::nothrow ) int32_t[2*dictionary_size_]; + if( !buffer || !prev_positions || !prev_pos_tree ) + { + if( prev_pos_tree ) delete[] prev_pos_tree; + if( prev_positions ) delete[] prev_positions; + if( buffer ) delete[] buffer; + throw std::bad_alloc(); + } + for( int i = 0; i < num_prev_positions; ++i ) prev_positions[i] = -1; + } + + +void Matchfinder::reset() throw() { const int size = stream_pos - pos; std::memmove( buffer, buffer + pos, size ); @@ -81,25 +94,43 @@ bool Matchfinder::reset() throw() pos = 0; cyclic_pos = 0; for( int i = 0; i < num_prev_positions; ++i ) prev_positions[i] = -1; - return true; } bool Matchfinder::move_pos() throw() { if( ++cyclic_pos >= dictionary_size_ ) cyclic_pos = 0; - if( ++pos > stream_pos ) { pos = stream_pos; return false; } + if( ++pos >= pos_limit ) + { + if( pos > stream_pos ) { pos = stream_pos; return false; } + else + { + const int offset = pos - dictionary_size_ - max_num_trials; + const int size = stream_pos - offset; + std::memmove( buffer, buffer + offset, size ); + partial_data_pos += offset; + pos -= offset; + stream_pos -= offset; + for( int i = 0; i < num_prev_positions; ++i ) + if( prev_positions[i] >= 0 ) prev_positions[i] -= offset; + for( int i = 0; i < 2 * dictionary_size_; ++i ) + if( prev_pos_tree[i] >= 0 ) prev_pos_tree[i] -= offset; + } + } return true; } int Matchfinder::longest_match_len( int * const distances ) throw() { + int idx0 = cyclic_pos << 1; + int idx1 = idx0 + 1; int len_limit = match_len_limit_; if( len_limit > available_bytes() ) { len_limit = available_bytes(); - if( len_limit < 4 ) return 0; + if( len_limit < 4 ) + { prev_pos_tree[idx0] = prev_pos_tree[idx1] = -1; return 0; } } int maxlen = min_match_len - 1; @@ -131,16 +162,12 @@ int Matchfinder::longest_match_len( int * const distances ) throw() int newpos = prev_positions[key4]; prev_positions[key4] = pos; - int idx0 = cyclic_pos << 1; - int idx1 = idx0 + 1; - int len0 = 0, len1 = 0; - for( int count = 16 + ( match_len_limit_ / 2 ); ; ) { if( newpos < min_pos || --count < 0 ) { prev_pos_tree[idx0] = prev_pos_tree[idx1] = -1; break; } const uint8_t * const newdata = buffer + newpos; - int len = std::min( len0, len1 ); + int len = 0; while( len < len_limit && newdata[len] == data[len] ) ++len; const int delta = pos - newpos; @@ -156,14 +183,12 @@ int Matchfinder::longest_match_len( int * const distances ) throw() prev_pos_tree[idx0] = newpos; idx0 = newidx + 1; newpos = prev_pos_tree[idx0]; - len0 = len; } else { prev_pos_tree[idx1] = newpos; idx1 = newidx; newpos = prev_pos_tree[idx1]; - len1 = len; } } else @@ -432,9 +457,26 @@ int LZ_encoder::best_pair_sequence( const int reps[num_rep_distances], } + // Sync Flush mark => (dis == 0xFFFFFFFF, len == min_match_len+1) +bool LZ_encoder::sync_flush() + { + if( member_finished_ || range_encoder.free_bytes() < max_marker_size ) + return false; + const int pos_state = ( matchfinder.data_position() ) & pos_state_mask; + range_encoder.encode_bit( bm_match[state()][pos_state], 1 ); + range_encoder.encode_bit( bm_rep[state()], 0 ); + encode_pair( 0xFFFFFFFF, min_match_len + 1, pos_state ); + range_encoder.flush(); + return true; + } + + // End Of Stream mark => (dis == 0xFFFFFFFF, len == min_match_len) -void LZ_encoder::flush( const State & state ) +bool LZ_encoder::full_flush() { + if( member_finished_ || + range_encoder.free_bytes() < (int)sizeof( File_trailer ) + max_marker_size ) + return false; const int pos_state = ( matchfinder.data_position() ) & pos_state_mask; range_encoder.encode_bit( bm_match[state()][pos_state], 1 ); range_encoder.encode_bit( bm_rep[state()], 0 ); @@ -445,14 +487,15 @@ void LZ_encoder::flush( const State & state ) trailer.data_size( matchfinder.data_position() ); trailer.member_size( range_encoder.member_position() + sizeof trailer ); for( unsigned int i = 0; i < sizeof trailer; ++i ) - range_encoder.put_byte( (( uint8_t *)&trailer)[i] ); + range_encoder.put_byte( ((uint8_t *)&trailer)[i] ); + return true; } LZ_encoder::LZ_encoder( Matchfinder & mf, const File_header & header, const long long member_size ) : - member_size_limit( member_size - sizeof( File_trailer ) - 15 ), + member_size_limit( member_size - sizeof( File_trailer ) - max_marker_size ), longest_match_found( 0 ), crc_( 0xFFFFFFFF ), matchfinder( mf ), @@ -469,19 +512,21 @@ LZ_encoder::LZ_encoder( Matchfinder & mf, const File_header & header, fill_align_prices(); for( unsigned int i = 0; i < sizeof header; ++i ) - range_encoder.put_byte( (( uint8_t *)&header)[i] ); + range_encoder.put_byte( ((uint8_t *)&header)[i] ); } -bool LZ_encoder::encode_member() +bool LZ_encoder::encode_member( const bool finish ) { if( member_finished_ ) return true; - if( !matchfinder.finished() && !matchfinder.available_bytes() ) - return true; // need at least 1 byte + if( range_encoder.member_position() >= member_size_limit ) + { if( full_flush() ) { member_finished_ = true; } return true; } - if( range_encoder.member_position() == sizeof( File_header ) && - !matchfinder.finished() ) // copy first byte + // copy first byte + if( matchfinder.data_position() == 0 && !matchfinder.finished() ) { + if( matchfinder.available_bytes() < 4 && !matchfinder.at_stream_end() ) + return true; range_encoder.encode_bit( bm_match[state()][0], 0 ); const uint8_t cur_byte = matchfinder[0]; literal_encoder.encode( range_encoder, prev_byte, cur_byte ); @@ -493,12 +538,12 @@ bool LZ_encoder::encode_member() while( true ) { if( matchfinder.finished() ) - { flush( state ); member_finished_ = true; return true; } - if( !matchfinder.available_bytes() || - ( !matchfinder.at_stream_end() && - matchfinder.available_bytes() < max_num_trials + max_match_len ) ) - return true; // need more data - if( range_encoder.free_bytes() < 2 * max_num_trials ) return true; + { + if( finish && full_flush() ) member_finished_ = true; + return true; + } + if( !matchfinder.enough_available_bytes() || + !range_encoder.enough_free_bytes() ) return true; if( fill_counter <= 0 ) { fill_distance_prices(); fill_counter = 512; } int ahead = best_pair_sequence( rep_distances, state ); @@ -563,8 +608,7 @@ bool LZ_encoder::encode_member() if( range_encoder.member_position() >= member_size_limit ) { if( !matchfinder.dec_pos( ahead ) ) return false; - flush( state ); - member_finished_ = true; + if( full_flush() ) member_finished_ = true; return true; } if( ahead <= 0 ) break; diff --git a/encoder.h b/encoder.h index af8c441..cdfd751 100644 --- a/encoder.h +++ b/encoder.h @@ -96,7 +96,7 @@ inline int price_symbol( const Bit_model bm[], int symbol, const int num_bits ) { const int bit = symbol & 1; symbol >>= 1; - price += price_bit( bm[symbol-1], bit ); + price += price_bit( bm[symbol], bit ); } return price; } @@ -110,7 +110,7 @@ inline int price_symbol_reversed( const Bit_model bm[], int symbol, { const int bit = symbol & 1; symbol >>= 1; - price += price_bit( bm[model-1], bit ); + price += price_bit( bm[model], bit ); model = ( model << 1 ) | bit; } return price; @@ -126,14 +126,14 @@ inline int price_matched( const Bit_model bm[], const int symbol, { const int match_bit = ( match_byte >> i ) & 1; const int bit = ( symbol >> i ) & 1; - price += price_bit( bm[(match_bit<<8)+model+0xFF], bit ); + price += price_bit( bm[(match_bit<<8)+model+0x100], bit ); model = ( model << 1 ) | bit; if( match_bit != bit ) { while( --i >= 0 ) { const int bit = ( symbol >> i ) & 1; - price += price_bit( bm[model-1], bit ); + price += price_bit( bm[model], bit ); model = ( model << 1 ) | bit; } break; @@ -166,32 +166,7 @@ class Matchfinder bool at_stream_end_; // stream_pos shows real end of file public: - Matchfinder( const int dict_size, const int len_limit ) - : - partial_data_pos( 0 ), - dictionary_size_( dict_size ), - after_size( max_num_trials + max_match_len ), - buffer_size( ( 2 * std::max( 65536, dictionary_size_ ) ) + - max_num_trials + after_size ), - buffer( new( std::nothrow ) uint8_t[buffer_size] ), - pos( 0 ), - cyclic_pos( 0 ), - stream_pos( 0 ), - pos_limit( buffer_size - after_size ), - match_len_limit_( len_limit ), - prev_positions( new( std::nothrow ) int32_t[num_prev_positions] ), - at_stream_end_( false ) - { - prev_pos_tree = new( std::nothrow ) int32_t[2*dictionary_size_]; - if( !buffer || !prev_positions || !prev_pos_tree ) - { - if( prev_pos_tree ) delete[] prev_pos_tree; - if( prev_positions ) delete[] prev_positions; - if( buffer ) delete[] buffer; - throw std::bad_alloc(); - } - for( int i = 0; i < num_prev_positions; ++i ) prev_positions[i] = -1; - } + Matchfinder( const int dict_size, const int len_limit ); ~Matchfinder() { delete[] prev_pos_tree; delete[] prev_positions; delete[] buffer; } @@ -201,8 +176,9 @@ public: int available_bytes() const throw() { return stream_pos - pos; } long long data_position() const throw() { return partial_data_pos + pos; } int dictionary_size() const throw() { return dictionary_size_; } - void finish() throw() { at_stream_end_ = true; } + void flushing( const bool b ) throw() { at_stream_end_ = b; } bool finished() const throw() { return at_stream_end_ && pos >= stream_pos; } + int free_bytes() const throw() { return buffer_size - stream_pos; } int match_len_limit() const throw() { return match_len_limit_; } const uint8_t * ptr_to_current_pos() const throw() { return buffer + pos; } @@ -215,6 +191,12 @@ public: return true; } + bool enough_available_bytes() const throw() + { + return ( stream_pos > pos && + ( at_stream_end_ || stream_pos - pos >= after_size ) ); + } + int true_match_len( const int index, const int distance, int len_limit ) const throw() { if( index + len_limit > available_bytes() ) @@ -226,7 +208,7 @@ public: } int write_data( uint8_t * const in_buffer, const int in_size ) throw(); - bool reset() throw(); + void reset() throw(); bool move_pos() throw(); int longest_match_len( int * const distances = 0 ) throw(); }; @@ -234,6 +216,7 @@ public: class Range_encoder : public Circular_buffer { + enum { min_free_bytes = 2 * max_num_trials }; uint64_t low; long long partial_member_pos; uint32_t range; @@ -256,13 +239,16 @@ class Range_encoder : public Circular_buffer public: Range_encoder() : - Circular_buffer( 65536 + (2 * max_num_trials) ), + Circular_buffer( 65536 + min_free_bytes ), low( 0 ), partial_member_pos( 0 ), range( 0xFFFFFFFF ), ff_count( 0 ), cache( 0 ) {} + bool enough_free_bytes() const throw() + { return free_bytes() >= min_free_bytes; } + int read_data( uint8_t * const out_buffer, const int out_size ) throw() { const int size = Circular_buffer::read_data( out_buffer, out_size ); @@ -270,7 +256,14 @@ public: return size; } - void flush() { for( int i = 0; i < 5; ++i ) shift_low(); } + void flush() + { + for( int i = 0; i < 5; ++i ) shift_low(); + low = 0; + range = 0xFFFFFFFF; + ff_count = 0; + cache = 0; + } long long member_position() const throw() { return partial_member_pos + used_bytes() + ff_count; } @@ -309,7 +302,7 @@ public: for( int i = num_bits; i > 0; --i, mask >>= 1 ) { const int bit = ( symbol & mask ); - encode_bit( bm[model-1], bit ); + encode_bit( bm[model], bit ); model <<= 1; if( bit ) model |= 1; } @@ -321,7 +314,7 @@ public: for( int i = num_bits; i > 0; --i ) { const int bit = symbol & 1; - encode_bit( bm[model-1], bit ); + encode_bit( bm[model], bit ); model = ( model << 1 ) | bit; symbol >>= 1; } @@ -334,14 +327,14 @@ public: { const int bit = ( symbol >> i ) & 1; const int match_bit = ( match_byte >> i ) & 1; - encode_bit( bm[(match_bit<<8)+model+0xFF], bit ); + encode_bit( bm[(match_bit<<8)+model+0x100], bit ); model = ( model << 1 ) | bit; if( match_bit != bit ) { while( --i >= 0 ) { const int bit = ( symbol >> i ) & 1; - encode_bit( bm[model-1], bit ); + encode_bit( bm[model], bit ); model = ( model << 1 ) | bit; } break; @@ -421,6 +414,7 @@ class LZ_encoder { enum { dis_align_mask = dis_align_size - 1, infinite_price = 0x0FFFFFFF, + max_marker_size = 15, num_rep_distances = 4 }; // must be 4 struct Trial @@ -589,19 +583,18 @@ class LZ_encoder int best_pair_sequence( const int reps[num_rep_distances], const State & state ); - void flush( const State & state ); + bool full_flush(); public: LZ_encoder( Matchfinder & mf, const File_header & header, const long long member_size ); - bool encode_member(); - void finish_member() - { if( !member_finished_ ) { flush( state ); member_finished_ = true; } } + bool encode_member( const bool finish ); bool member_finished() const throw() { return member_finished_ && !range_encoder.used_bytes(); } int read_data( uint8_t * const buffer, const int size ) throw() { return range_encoder.read_data( buffer, size ); } + bool sync_flush(); long long member_position() const throw() { return range_encoder.member_position(); } diff --git a/lzip.h b/lzip.h index a568988..cf8e56e 100644 --- a/lzip.h +++ b/lzip.h @@ -121,16 +121,21 @@ public: uint32_t operator[]( const uint8_t byte ) const throw() { return data[byte]; } void update( uint32_t & crc, const uint8_t byte ) const throw() { crc = data[(crc^byte)&0xFF] ^ ( crc >> 8 ); } + void update( uint32_t & crc, const uint8_t * const buffer, const int size ) const throw() + { + for( int i = 0; i < size; ++i ) + crc = data[(crc^buffer[i])&0xFF] ^ ( crc >> 8 ); + } }; extern const CRC32 crc32; -const char * const magic_string = "LZIP"; +const uint8_t magic_string[4] = { 'L', 'Z', 'I', 'P' }; struct File_header { - char magic[4]; + uint8_t magic[4]; uint8_t version; uint8_t coded_dict_size; diff --git a/lzlib.cc b/lzlib.cc index d3f9777..c64ed43 100644 --- a/lzlib.cc +++ b/lzlib.cc @@ -45,6 +45,7 @@ struct Encoder Matchfinder * matchfinder; LZ_encoder * lz_encoder; LZ_errno lz_errno; + bool flush_pending; const File_header member_header; Encoder( const File_header & header ) throw() @@ -54,6 +55,7 @@ struct Encoder matchfinder( 0 ), lz_encoder( 0 ), lz_errno( LZ_ok ), + flush_pending( false ), member_header( header ) {} }; @@ -140,6 +142,28 @@ void * LZ_compress_open( const int dictionary_size, const int match_len_limit, } +int LZ_compress_restart_member( void * const encoder, + const long long member_size ) + { + if( !verify_encoder( encoder ) ) return -1; + Encoder & e = *(Encoder *)encoder; + if( !e.lz_encoder->member_finished() ) + { e.lz_errno = LZ_sequence_error; return -1; } + + e.partial_in_size += e.matchfinder->data_position(); + e.partial_out_size += e.lz_encoder->member_position(); + e.matchfinder->reset(); + + delete e.lz_encoder; + try { + e.lz_encoder = new LZ_encoder( *e.matchfinder, e.member_header, member_size ); + } + catch( std::bad_alloc ) + { e.lz_encoder = 0; e.lz_errno = LZ_mem_error; return -1; } + return 0; + } + + int LZ_compress_close( void * const encoder ) { if( !encoder ) return -1; @@ -154,38 +178,26 @@ int LZ_compress_close( void * const encoder ) int LZ_compress_finish( void * const encoder ) { if( !verify_encoder( encoder ) ) return -1; - ((Encoder *)encoder)->matchfinder->finish(); - return 0; - } - - -int LZ_compress_finish_member( void * const encoder ) - { - if( !verify_encoder( encoder ) ) return -1; - ((Encoder *)encoder)->lz_encoder->finish_member(); + Encoder & e = *(Encoder *)encoder; + e.matchfinder->flushing( true ); + e.flush_pending = false; return 0; } -int LZ_compress_restart_member( void * const encoder, - const long long member_size ) +int LZ_compress_sync_flush( void * const encoder ) { if( !verify_encoder( encoder ) ) return -1; Encoder & e = *(Encoder *)encoder; - if( !e.lz_encoder->member_finished() ) - { e.lz_errno = LZ_sequence_error; return -1; } - - e.partial_in_size += e.matchfinder->data_position(); - e.partial_out_size += e.lz_encoder->member_position(); - if( !e.matchfinder->reset() ) - { e.lz_errno = LZ_library_error; return -1; } - - delete e.lz_encoder; - try { - e.lz_encoder = new LZ_encoder( *e.matchfinder, e.member_header, member_size ); + if( !e.flush_pending && !e.matchfinder->at_stream_end() ) + { + e.flush_pending = true; + e.matchfinder->flushing( true ); + if( !e.lz_encoder->encode_member( false ) ) + { e.lz_errno = LZ_library_error; return -1; } + if( e.lz_encoder->sync_flush() ) + { e.matchfinder->flushing( false ); e.flush_pending = false; } } - catch( std::bad_alloc ) - { e.lz_encoder = 0; e.lz_errno = LZ_mem_error; return -1; } return 0; } @@ -195,8 +207,10 @@ int LZ_compress_read( void * const encoder, uint8_t * const buffer, { if( !verify_encoder( encoder ) ) return -1; Encoder & e = *(Encoder *)encoder; - if( !e.lz_encoder->encode_member() ) + if( !e.lz_encoder->encode_member( !e.flush_pending ) ) { e.lz_errno = LZ_library_error; return -1; } + if( e.flush_pending && e.lz_encoder->sync_flush() ) + { e.matchfinder->flushing( false ); e.flush_pending = false; } return e.lz_encoder->read_data( buffer, size ); } @@ -205,7 +219,18 @@ int LZ_compress_write( void * const encoder, uint8_t * const buffer, const int size ) { if( !verify_encoder( encoder ) ) return -1; - return ((Encoder *)encoder)->matchfinder->write_data( buffer, size ); + Encoder & e = *(Encoder *)encoder; + if( e.flush_pending ) return 0; + return e.matchfinder->write_data( buffer, size ); + } + + +int LZ_compress_write_size( void * const encoder ) + { + if( !verify_encoder( encoder ) ) return -1; + Encoder & e = *(Encoder *)encoder; + if( e.flush_pending ) return 0; + return e.matchfinder->free_bytes(); } @@ -220,7 +245,8 @@ int LZ_compress_finished( void * const encoder ) { if( !verify_encoder( encoder ) ) return -1; Encoder & e = *(Encoder *)encoder; - return ( e.matchfinder->finished() && e.lz_encoder->member_finished() ); + return ( !e.flush_pending && e.matchfinder->finished() && + e.lz_encoder->member_finished() ); } diff --git a/lzlib.h b/lzlib.h index c03f7e9..f2ef9d3 100644 --- a/lzlib.h +++ b/lzlib.h @@ -29,7 +29,7 @@ extern "C" { #endif -const char * const LZ_version_string = "0.3"; +const char * const LZ_version_string = "0.4"; enum { min_dictionary_bits = 12, min_dictionary_size = 1 << min_dictionary_bits, @@ -46,16 +46,17 @@ const char * LZ_version( void ); void * LZ_compress_open( const int dictionary_size, const int match_len_limit, const long long member_size ); -int LZ_compress_close( void * const encoder ); -int LZ_compress_finish( void * const encoder ); -int LZ_compress_finish_member( void * const encoder ); int LZ_compress_restart_member( void * const encoder, const long long member_size ); +int LZ_compress_close( void * const encoder ); +int LZ_compress_finish( void * const encoder ); +int LZ_compress_sync_flush( void * const encoder ); int LZ_compress_read( void * const encoder, uint8_t * const buffer, const int size ); int LZ_compress_write( void * const encoder, uint8_t * const buffer, const int size ); +int LZ_compress_write_size( void * const encoder ); enum LZ_errno LZ_compress_errno( void * const encoder ); int LZ_compress_finished( void * const encoder ); diff --git a/main.cc b/main.cc index ae57d92..d9cf922 100644 --- a/main.cc +++ b/main.cc @@ -52,6 +52,11 @@ #define ULLONG_MAX 0xFFFFFFFFFFFFFFFFULL #endif +void show_error( const char * msg, const int errcode = 0, const bool help = false ) throw(); +void internal_error( const char * msg ); +int readblock( const int fd, char * buf, const int size ) throw(); +int writeblock( const int fd, const char * buf, const int size ) throw(); + namespace { @@ -117,7 +122,7 @@ void show_help() throw() { std::printf( "%s - A test program for the lzlib library.\n", Program_name ); std::printf( "\nUsage: %s [options] [files]\n", invocation_name ); - std::printf( "Options:\n" ); + std::printf( "\nOptions:\n" ); std::printf( " -h, --help display this help and exit\n" ); std::printf( " -V, --version output version information and exit\n" ); std::printf( " -b, --member-size= set member size limit in bytes\n" ); @@ -125,7 +130,7 @@ void show_help() throw() std::printf( " -d, --decompress decompress\n" ); std::printf( " -f, --force overwrite existing output files\n" ); std::printf( " -k, --keep keep (don't delete) input files\n" ); - std::printf( " -m, --match-length= set match length limit in bytes [64]\n" ); + std::printf( " -m, --match-length= set match length limit in bytes [80]\n" ); std::printf( " -o, --output= if reading stdin, place the output into \n" ); std::printf( " -q, --quiet suppress all messages\n" ); std::printf( " -s, --dictionary-size= set dictionary size limit in bytes [8MiB]\n" ); @@ -154,30 +159,6 @@ void show_version() throw() } -void show_error( const char * msg, const int errcode = 0, const bool help = false ) throw() - { - if( verbosity >= 0 ) - { - if( msg && msg[0] != 0 ) - { - std::fprintf( stderr, "%s: %s", program_name, msg ); - if( errcode > 0 ) std::fprintf( stderr, ": %s", std::strerror( errcode ) ); - std::fprintf( stderr, "\n" ); - } - if( help && invocation_name && invocation_name[0] != 0 ) - std::fprintf( stderr, "Try `%s --help' for more information.\n", invocation_name ); - } - } - - -void internal_error( const char * msg ) - { - std::string s( "internal error: " ); s += msg; - show_error( s.c_str() ); - std::exit( 3 ); - } - - const char * format_num( long long num, long long limit = 9999, const int set_prefix = 0 ) throw() { @@ -451,43 +432,6 @@ bool next_filename() } -// Returns the number of bytes really read. -// If (returned value < size) and (errno == 0), means EOF was reached. -// -int readblock( const int fd, char * buf, const int size ) throw() - { - int rest = size; - errno = 0; - while( rest > 0 ) - { - errno = 0; - const int n = read( fd, buf + size - rest, rest ); - if( n > 0 ) rest -= n; - else if( n == 0 ) break; - else if( errno != EINTR && errno != EAGAIN ) break; - } - return ( rest > 0 ) ? size - rest : size; - } - - -// Returns the number of bytes really written. -// If (returned value < size), it is always an error. -// -int writeblock( const int fd, const char * buf, const int size ) throw() - { - int rest = size; - errno = 0; - while( rest > 0 ) - { - errno = 0; - const int n = write( fd, buf + size - rest, rest ); - if( n > 0 ) rest -= n; - else if( errno && errno != EINTR && errno != EAGAIN ) break; - } - return ( rest > 0 ) ? size - rest : size; - } - - int compress( const long long member_size, const long long volume_size, lzma_options encoder_options, const int inhandle, const Pretty_print & pp, const struct stat * in_statsp, @@ -509,20 +453,15 @@ int compress( const long long member_size, const long long volume_size, long long partial_volume_size = 0; const int out_buffer_size = 65536, in_buffer_size = 8 * out_buffer_size; uint8_t in_buffer[in_buffer_size], out_buffer[out_buffer_size]; - int in_pos = 0, in_stream_pos = 0; while( true ) { - if( in_stream_pos == 0 ) - { - in_stream_pos = readblock( inhandle, (char *)in_buffer, in_buffer_size ); - if( in_stream_pos == 0 ) LZ_compress_finish( encoder ); - } - int in_size = 0; - if( in_pos < in_stream_pos ) + int in_size = std::min( LZ_compress_write_size( encoder ), in_buffer_size ); + if( in_size > 0 ) { - in_size = LZ_compress_write( encoder, in_buffer + in_pos, in_stream_pos - in_pos ); - in_pos += in_size; - if( in_pos >= in_stream_pos ) { in_stream_pos = 0; in_pos = 0; } + in_size = readblock( inhandle, (char *)in_buffer, in_size ); + if( in_size == 0 ) LZ_compress_finish( encoder ); + else if( in_size != LZ_compress_write( encoder, in_buffer, in_size ) ) + internal_error( "library error" ); } int out_size = LZ_compress_read( encoder, out_buffer, out_buffer_size ); // std::fprintf( stderr, "%6d in_size, %5d out_size.\n", in_size, out_size ); @@ -639,7 +578,7 @@ int decompress( const int inhandle, const Pretty_print & pp, } pp(); show_error( "read error", errno ); return 1; } - else if( out_size > 0 ) + else if( out_size > 0 && outhandle >= 0 ) { const int wr = writeblock( outhandle, (char *)out_buffer, out_size ); if( wr != out_size ) @@ -691,16 +630,77 @@ void Pretty_print::operator()( const char * const msg ) const throw() } +void show_error( const char * msg, const int errcode, const bool help ) throw() + { + if( verbosity >= 0 ) + { + if( msg && msg[0] != 0 ) + { + std::fprintf( stderr, "%s: %s", program_name, msg ); + if( errcode > 0 ) std::fprintf( stderr, ": %s", std::strerror( errcode ) ); + std::fprintf( stderr, "\n" ); + } + if( help && invocation_name && invocation_name[0] != 0 ) + std::fprintf( stderr, "Try `%s --help' for more information.\n", invocation_name ); + } + } + + +void internal_error( const char * msg ) + { + std::string s( "internal error: " ); s += msg; + show_error( s.c_str() ); + std::exit( 3 ); + } + + +// Returns the number of bytes really read. +// If (returned value < size) and (errno == 0), means EOF was reached. +// +int readblock( const int fd, char * buf, const int size ) throw() + { + int rest = size; + errno = 0; + while( rest > 0 ) + { + errno = 0; + const int n = read( fd, buf + size - rest, rest ); + if( n > 0 ) rest -= n; + else if( n == 0 ) break; + else if( errno != EINTR && errno != EAGAIN ) break; + } + return ( rest > 0 ) ? size - rest : size; + } + + +// Returns the number of bytes really written. +// If (returned value < size), it is always an error. +// +int writeblock( const int fd, const char * buf, const int size ) throw() + { + int rest = size; + errno = 0; + while( rest > 0 ) + { + errno = 0; + const int n = write( fd, buf + size - rest, rest ); + if( n > 0 ) rest -= n; + else if( errno && errno != EINTR && errno != EAGAIN ) break; + } + return ( rest > 0 ) ? size - rest : size; + } + + int main( const int argc, const char * argv[] ) { // Mapping from gzip/bzip2 style 1..9 compression modes // to the corresponding LZMA compression modes. const lzma_options option_mapping[] = { - { 1 << 22, 10 }, // -1 - { 1 << 22, 12 }, // -2 - { 1 << 22, 17 }, // -3 - { 1 << 22, 26 }, // -4 + { 1 << 20, 10 }, // -1 + { 1 << 20, 12 }, // -2 + { 1 << 20, 17 }, // -3 + { 1 << 21, 26 }, // -4 { 1 << 22, 44 }, // -5 { 1 << 23, 80 }, // -6 { 1 << 24, 108 }, // -7 @@ -800,10 +800,7 @@ int main( const int argc, const char * argv[] ) Pretty_print pp( filenames ); if( program_mode == m_test ) - { - output_filename = "/dev/null"; - if( !open_outstream( true ) ) return 1; - } + outhandle = -1; int retval = 0; for( unsigned int i = 0; i < filenames.size(); ++i ) diff --git a/testsuite/check.sh b/testsuite/check.sh index b4ee42f..4431864 100755 --- a/testsuite/check.sh +++ b/testsuite/check.sh @@ -5,6 +5,8 @@ # This script is free software: you have unlimited permission # to copy, distribute and modify it. +LC_ALL=C +export LC_ALL objdir=`pwd` testdir=`cd "$1" ; pwd` LZIP="${objdir}"/minilzip -- cgit v1.2.3