summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaniel Baumann <mail@daniel-baumann.ch>2015-11-07 13:33:30 +0000
committerDaniel Baumann <mail@daniel-baumann.ch>2015-11-07 13:33:30 +0000
commit447d8df5e4c1f6c985791cb3fea2922fcc1cfbee (patch)
treeec719a03bcff2f05604f2ae3c9604c85501944ed
parentAdding upstream version 0.3. (diff)
downloadlzlib-447d8df5e4c1f6c985791cb3fea2922fcc1cfbee.tar.xz
lzlib-447d8df5e4c1f6c985791cb3fea2922fcc1cfbee.zip
Adding upstream version 0.4.upstream/0.4
Signed-off-by: Daniel Baumann <mail@daniel-baumann.ch>
-rw-r--r--ChangeLog14
-rw-r--r--Makefile.in15
-rw-r--r--NEWS11
-rw-r--r--README4
-rwxr-xr-xconfigure6
-rw-r--r--decoder.cc36
-rw-r--r--decoder.h110
-rw-r--r--doc/lzlib.info104
-rw-r--r--doc/lzlib.texinfo79
-rw-r--r--encoder.cc132
-rw-r--r--encoder.h77
-rw-r--r--lzip.h9
-rw-r--r--lzlib.cc80
-rw-r--r--lzlib.h9
-rw-r--r--main.cc163
-rwxr-xr-xtestsuite/check.sh2
16 files changed, 535 insertions, 316 deletions
diff --git a/ChangeLog b/ChangeLog
index f90fe5f..d8ac767 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,15 +1,23 @@
-2009-05-03 Antonio Diaz <ant_diaz@teleline.es>
+2009-06-03 Antonio Diaz Diaz <ant_diaz@teleline.es>
+
+ * Version 0.4 released.
+ * Added new function LZ_compress_sync_flush.
+ * Added new function LZ_compress_write_size.
+ * Decompression speed has been improved.
+ * Added chapter "Buffering" to the manual.
+
+2009-05-03 Antonio Diaz Diaz <ant_diaz@teleline.es>
* Version 0.3 released.
* Lzilib is now built as a shared library (in addition to static).
-2009-04-26 Antonio Diaz <ant_diaz@teleline.es>
+2009-04-26 Antonio Diaz Diaz <ant_diaz@teleline.es>
* Version 0.2 released.
* Fixed a segfault when decompressing trailing garbage.
* Fixed a false positive in LZ_(de)compress_finished.
-2009-04-21 Antonio Diaz <ant_diaz@teleline.es>
+2009-04-21 Antonio Diaz Diaz <ant_diaz@teleline.es>
* Version 0.1 released.
diff --git a/Makefile.in b/Makefile.in
index 2249365..8c5f93c 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -12,9 +12,9 @@ sh_lib_objs = sh_decoder.o sh_encoder.o sh_lzlib.o
objs = arg_parser.o main.o
-.PHONY : all doc check install install-info \
- uninstall uninstall-info \
- dist clean distclean
+.PHONY : all install install-info install-man install-strip \
+ uninstall uninstall-info uninstall-man \
+ doc info man check dist clean distclean
all : $(progname) $(progname_shared)
@@ -60,15 +60,17 @@ arg_parser.o : Makefile arg_parser.h
main.o : Makefile arg_parser.h lzlib.h $(libname).a
-doc : info $(VPATH)/doc/$(progname).1
+doc : info man
info : $(VPATH)/doc/$(pkgname).info
$(VPATH)/doc/$(pkgname).info : $(VPATH)/doc/$(pkgname).texinfo
cd $(VPATH)/doc && makeinfo $(pkgname).texinfo
+man : $(VPATH)/doc/$(progname).1
+
$(VPATH)/doc/$(progname).1 : $(progname)
- help2man -o $(VPATH)/doc/$(progname).1 ./$(progname)
+ help2man -o $(VPATH)/doc/$(progname).1 --no-info ./$(progname)
Makefile : $(VPATH)/configure $(VPATH)/Makefile.in
./config.status
@@ -96,6 +98,9 @@ install-info :
$(INSTALL_DATA) $(VPATH)/doc/$(pkgname).info $(DESTDIR)$(infodir)/$(pkgname).info
-install-info --info-dir=$(DESTDIR)$(infodir) $(DESTDIR)$(infodir)/$(pkgname).info
+install-strip : all
+ $(MAKE) INSTALL_PROGRAM='$(INSTALL_PROGRAM) -s' install
+
uninstall : uninstall-info
-rm -f $(DESTDIR)$(includedir)/$(pkgname).h
-rm -f $(DESTDIR)$(libdir)/$(libname).a
diff --git a/NEWS b/NEWS
index 5586409..f97bcd8 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,10 @@
-Changes in version 0.3:
+Changes in version 0.4:
-Lzilib is now built as a shared library (in addition to static).
+Partial flush of the compressed data has been implemented with the
+function LZ_compress_sync_flush.
+
+The function LZ_compress_write_size has been added.
+
+Decompression speed has been improved.
+
+The chapter "Buffering" has been added to the manual.
diff --git a/README b/README
index 461e233..dabf521 100644
--- a/README
+++ b/README
@@ -1,7 +1,7 @@
Description
-The lzlib compression library provides in-memory LZMA compression and
-decompression functions, including integrity checking of the
+Lzlib is a data compression library providing in-memory LZMA compression
+and decompression functions, including integrity checking of the
uncompressed data. The compressed data format used by the library is the
lzip format.
diff --git a/configure b/configure
index 1301d47..ba2cad0 100755
--- a/configure
+++ b/configure
@@ -5,13 +5,13 @@
# This configure script is free software: you have unlimited permission
# to copy, distribute and modify it.
#
-# Date of this version: 2009-05-03
+# Date of this version: 2009-06-03
invocation_name=$0
args=
no_create=
pkgname=lzlib
-pkgversion=0.3
+pkgversion=0.4
soversion=0
progname=minilzip
progname_shared=${progname}_shared
@@ -115,7 +115,7 @@ while [ x"$1" != x ] ; do
CXXFLAGS=*) CXXFLAGS=${optarg} ;;
LDFLAGS=*) LDFLAGS=${optarg} ;;
- --build=* | --enable-* | --with-* | --*dir=* | *=* | *-*-*) ;;
+ --* | *=* | *-*-*) ;;
*)
echo "configure: Unrecognized option: \"${option}\"; use --help for usage." 1>&2
exit 1 ;;
diff --git a/decoder.cc b/decoder.cc
index aa394de..f68edf9 100644
--- a/decoder.cc
+++ b/decoder.cc
@@ -51,7 +51,7 @@ int Circular_buffer::read_data( uint8_t * const out_buffer, const int out_size )
size = std::min( buffer_size - get, out_size );
if( size > 0 )
{
- std::memmove( out_buffer, buffer + get, size );
+ std::memcpy( out_buffer, buffer + get, size );
get += size;
if( get >= buffer_size ) get = 0;
}
@@ -61,7 +61,7 @@ int Circular_buffer::read_data( uint8_t * const out_buffer, const int out_size )
const int size2 = std::min( put - get, out_size - size );
if( size2 > 0 )
{
- std::memmove( out_buffer + size, buffer + get, size2 );
+ std::memcpy( out_buffer + size, buffer + get, size2 );
get += size2;
size += size2;
}
@@ -78,7 +78,7 @@ int Circular_buffer::write_data( uint8_t * const in_buffer, const int in_size )
size = std::min( buffer_size - put - (get == 0), in_size );
if( size > 0 )
{
- std::memmove( buffer + put, in_buffer, size );
+ std::memcpy( buffer + put, in_buffer, size );
put += size;
if( put >= buffer_size ) put = 0;
}
@@ -88,7 +88,7 @@ int Circular_buffer::write_data( uint8_t * const in_buffer, const int in_size )
const int size2 = std::min( get - put - 1, in_size - size );
if( size2 > 0 )
{
- std::memmove( buffer + put, in_buffer + size, size2 );
+ std::memcpy( buffer + put, in_buffer + size, size2 );
put += size2;
size += size2;
}
@@ -104,8 +104,9 @@ bool LZ_decoder::verify_trailer()
const int trailer_size = trailer.size( format_version );
for( int i = 0; i < trailer_size && !error; ++i )
{
- if( range_decoder.finished() ) error = true;
- ((uint8_t *)&trailer)[i] = range_decoder.get_byte();
+ if( !range_decoder.finished() )
+ ((uint8_t *)&trailer)[i] = range_decoder.get_byte();
+ else error = true;
}
if( format_version == 0 ) trailer.member_size( member_position() );
if( trailer.data_crc() != crc() ) error = true;
@@ -120,14 +121,12 @@ bool LZ_decoder::verify_trailer()
int LZ_decoder::decode_member()
{
if( member_finished_ ) return 0;
+ if( !range_decoder.try_reload() ) return 0;
while( true )
{
- if( range_decoder.available_bytes() <= 0 ||
- ( !range_decoder.at_stream_end() &&
- range_decoder.available_bytes() < min_available_bytes ) )
- return 0; // need more data
- if( free_bytes() < max_match_len ) return 0;
if( range_decoder.finished() ) return 2;
+ if( !range_decoder.enough_available_bytes() || !enough_free_bytes() )
+ return 0;
const int pos_state = data_position() & pos_state_mask;
if( range_decoder.decode_bit( bm_match[state()][pos_state] ) == 0 )
{
@@ -173,9 +172,8 @@ int LZ_decoder::decode_member()
}
else
{
- rep3 = rep2; rep2 = rep1; rep1 = rep0;
+ unsigned int rep0_saved = rep0;
len = min_match_len + len_decoder.decode( range_decoder, pos_state );
- state.set_match();
const int dis_slot = range_decoder.decode_tree( bm_dis_slot[get_dis_state(len)], dis_slot_bits );
if( dis_slot < start_dis_model ) rep0 = dis_slot;
else
@@ -190,17 +188,27 @@ int LZ_decoder::decode_member()
rep0 += range_decoder.decode_tree_reversed( bm_align, dis_align_bits );
if( rep0 == 0xFFFFFFFF ) // Marker found
{
+ rep0 = rep0_saved;
+ range_decoder.normalize();
if( len == min_match_len ) // End Of Stream marker
{
member_finished_ = true;
if( verify_trailer() ) return 0; else return 3;
}
+ if( len == min_match_len + 1 ) // Sync Flush marker
+ {
+ if( range_decoder.try_reload( true ) ) continue;
+ else return 0;
+ }
return 4;
}
+ if( rep0 >= (unsigned int)dictionary_size ) return 1;
}
}
+ rep3 = rep2; rep2 = rep1; rep1 = rep0_saved;
+ state.set_match();
}
- if( !copy_block( rep0, len ) ) return 1;
+ copy_block( rep0, len );
prev_byte = get_byte( 0 );
}
}
diff --git a/decoder.h b/decoder.h
index 785f310..6484898 100644
--- a/decoder.h
+++ b/decoder.h
@@ -25,10 +25,9 @@
Public License.
*/
-const int min_available_bytes = 8 + sizeof( File_trailer );
-
class Input_buffer : public Circular_buffer
{
+ enum { min_available_bytes = 8 + sizeof( File_trailer ) };
bool at_stream_end_;
public:
@@ -42,6 +41,12 @@ public:
bool finished() const throw() { return at_stream_end_ && !used_bytes(); }
void purge() throw() { at_stream_end_ = true; Circular_buffer::reset(); }
+ bool enough_available_bytes() const throw()
+ {
+ return ( used_bytes() > 0 &&
+ ( at_stream_end_ || used_bytes() >= min_available_bytes ) );
+ }
+
int write_data( uint8_t * const in_buffer, const int in_size ) throw()
{
if( at_stream_end_ || in_size <= 0 ) return 0;
@@ -55,6 +60,7 @@ class Range_decoder
mutable long long member_pos;
uint32_t code;
uint32_t range;
+ bool reload_pending;
Input_buffer & ibuf;
public:
@@ -63,62 +69,86 @@ public:
member_pos( header_size ),
code( 0 ),
range( 0xFFFFFFFF ),
+ reload_pending( false ),
ibuf( buf )
{ for( int i = 0; i < 5; ++i ) code = (code << 8) | get_byte(); }
+ bool at_stream_end() const throw() { return ibuf.at_stream_end(); }
+ int available_bytes() const throw() { return ibuf.used_bytes(); }
+ bool enough_available_bytes() const throw()
+ { return ibuf.enough_available_bytes(); }
+ bool finished() const throw() { return ibuf.finished(); }
+ long long member_position() const throw() { return member_pos; }
+
uint8_t get_byte() const
{
++member_pos;
return ibuf.get_byte();
}
- bool at_stream_end() const throw() { return ibuf.at_stream_end(); }
- int available_bytes() const throw() { return ibuf.used_bytes(); }
- bool finished() const throw() { return ibuf.finished(); }
- long long member_position() const throw() { return member_pos; }
+ bool try_reload( const bool force = false ) throw()
+ {
+ if( force ) reload_pending = true;
+ if( reload_pending && available_bytes() >= 5 )
+ {
+ code = 0;
+ range = 0xFFFFFFFF;
+ reload_pending = false;
+ for( int i = 0; i < 5; ++i ) code = (code << 8) | get_byte();
+ }
+ return !reload_pending;
+ }
+
+ void normalize()
+ {
+ if( range <= 0x00FFFFFF )
+ { range <<= 8; code = (code << 8) | get_byte(); }
+ }
int decode( const int num_bits )
{
int symbol = 0;
- for( int i = num_bits - 1; i >= 0; --i )
+ for( int i = num_bits; i > 0; --i )
{
- range >>= 1;
symbol <<= 1;
- if( code >= range )
- { code -= range; symbol |= 1; }
if( range <= 0x00FFFFFF )
- { range <<= 8; code = (code << 8) | get_byte(); }
+ {
+ range <<= 7; code = (code << 8) | get_byte();
+ if( code >= range ) { code -= range; symbol |= 1; }
+ }
+ else
+ {
+ range >>= 1;
+ if( code >= range ) { code -= range; symbol |= 1; }
+ }
}
return symbol;
}
int decode_bit( Bit_model & bm )
{
- int symbol;
+ normalize();
const uint32_t bound = ( range >> bit_model_total_bits ) * bm.probability;
if( code < bound )
{
range = bound;
bm.probability += (bit_model_total - bm.probability) >> bit_model_move_bits;
- symbol = 0;
+ return 0;
}
else
{
range -= bound;
code -= bound;
bm.probability -= bm.probability >> bit_model_move_bits;
- symbol = 1;
+ return 1;
}
- if( range <= 0x00FFFFFF )
- { range <<= 8; code = (code << 8) | get_byte(); }
- return symbol;
}
int decode_tree( Bit_model bm[], const int num_bits )
{
int model = 1;
for( int i = num_bits; i > 0; --i )
- model = ( model << 1 ) | decode_bit( bm[model-1] );
+ model = ( model << 1 ) | decode_bit( bm[model] );
return model - (1 << num_bits);
}
@@ -126,27 +156,31 @@ public:
{
int model = 1;
int symbol = 0;
- for( int i = 1; i < (1 << num_bits); i <<= 1 )
+ for( int i = 0; i < num_bits; ++i )
{
- const int bit = decode_bit( bm[model-1] );
- model = ( model << 1 ) | bit;
- if( bit ) symbol |= i;
+ const int bit = decode_bit( bm[model] );
+ model <<= 1;
+ if( bit ) { model |= 1; symbol |= (1 << i); }
}
return symbol;
}
int decode_matched( Bit_model bm[], const int match_byte )
{
+ Bit_model *bm1 = bm + 0x100;
int symbol = 1;
- for( int i = 7; i >= 0; --i )
+ for( int i = 1; i <= 8; ++i )
{
- const int match_bit = ( match_byte >> i ) & 1;
- const int bit = decode_bit( bm[(match_bit<<8)+symbol+0xFF] );
+ const int match_bit = ( match_byte << i ) & 0x100;
+ const int bit = decode_bit( bm1[match_bit+symbol] );
symbol = ( symbol << 1 ) | bit;
- if( match_bit != bit ) break;
+ if( ( match_bit && !bit ) || ( !match_bit && bit ) )
+ {
+ while( ++i <= 8 )
+ symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
+ break;
+ }
}
- while( symbol < 0x100 )
- symbol = ( symbol << 1 ) | decode_bit( bm[symbol-1] );
return symbol & 0xFF;
}
};
@@ -193,6 +227,7 @@ public:
class LZ_decoder : public Circular_buffer
{
+ enum { min_free_bytes = max_match_len };
long long partial_data_pos;
const int format_version;
const int dictionary_size;
@@ -220,7 +255,6 @@ class LZ_decoder : public Circular_buffer
Len_decoder rep_match_len_decoder;
Literal_decoder literal_decoder;
-// using Circular_buffer::get_byte;
uint8_t get_byte( const int distance ) const throw()
{
int i = put - distance - 1;
@@ -235,20 +269,23 @@ class LZ_decoder : public Circular_buffer
if( ++put >= buffer_size ) { partial_data_pos += put; put = 0; }
}
- bool copy_block( const int distance, int len )
+ void copy_block( const int distance, int len )
{
- if( distance < 0 || distance >= dictionary_size ||
- len <= 0 || len > max_match_len ) return false;
int i = put - distance - 1;
if( i < 0 ) i += buffer_size;
- for( ; len > 0 ; --len )
+ if( len < buffer_size - std::max( put, i ) && len <= distance )
+ {
+ crc32.update( crc_, buffer + i, len );
+ std::memcpy( buffer + put, buffer + i, len );
+ put += len;
+ }
+ else for( ; len > 0 ; --len )
{
crc32.update( crc_, buffer[i] );
buffer[put] = buffer[i];
if( ++put >= buffer_size ) { partial_data_pos += put; put = 0; }
if( ++i >= buffer_size ) i = 0;
}
- return true;
}
bool verify_trailer();
@@ -256,7 +293,7 @@ class LZ_decoder : public Circular_buffer
public:
LZ_decoder( const File_header & header, Input_buffer & ibuf )
:
- Circular_buffer( std::max( 65536, header.dictionary_size() ) + max_match_len ),
+ Circular_buffer( std::max( 65536, header.dictionary_size() ) + min_free_bytes ),
partial_data_pos( 0 ),
format_version( header.version ),
dictionary_size( header.dictionary_size() ),
@@ -270,6 +307,9 @@ public:
range_decoder( sizeof header, ibuf ),
literal_decoder() {}
+ bool enough_free_bytes() const throw()
+ { return free_bytes() >= min_free_bytes; }
+
uint32_t crc() const throw() { return crc_ ^ 0xFFFFFFFF; }
int decode_member();
bool member_finished() const throw()
diff --git a/doc/lzlib.info b/doc/lzlib.info
index 28aea4d..fff59c2 100644
--- a/doc/lzlib.info
+++ b/doc/lzlib.info
@@ -12,12 +12,13 @@ File: lzlib.info, Node: Top, Next: Introduction, Up: (dir)
Lzlib
*****
-This manual is for Lzlib (version 0.3, 3 May 2009).
+This manual is for Lzlib (version 0.4, 3 June 2009).
* Menu:
* Introduction:: Purpose and features of Lzlib
* Library Version:: Checking library version
+* Buffering:: Sizes of Lzlib's buffers
* Compression Functions:: Descriptions of the compression functions
* Decompression Functions:: Descriptions of the decompression functions
* Error Codes:: Meaning of codes returned by functions
@@ -38,8 +39,8 @@ File: lzlib.info, Node: Introduction, Next: Library Version, Prev: Top, Up:
1 Introduction
**************
-The lzlib compression library provides in-memory LZMA compression and
-decompression functions, including integrity checking of the
+Lzlib is a data compression library providing in-memory LZMA compression
+and decompression functions, including integrity checking of the
uncompressed data. The compressed data format used by the library is the
lzip format.
@@ -68,7 +69,7 @@ Igor Pavlov. For a description of the LZMA algorithm, see the Lzip
manual.

-File: lzlib.info, Node: Library Version, Next: Compression Functions, Prev: Introduction, Up: Top
+File: lzlib.info, Node: Library Version, Next: Buffering, Prev: Introduction, Up: Top
2 Library Version
*****************
@@ -88,9 +89,37 @@ application.
error( "bad library version" );

-File: lzlib.info, Node: Compression Functions, Next: Decompression Functions, Prev: Library Version, Up: Top
+File: lzlib.info, Node: Buffering, Next: Compression Functions, Prev: Library Version, Up: Top
-3 Compression Functions
+3 Buffering
+***********
+
+Lzlib internal functions need access to a memory chunk at least as large
+as the dictionary size (sliding window). For efficiency reasons, the
+input buffer for compression is twice as large as the dictionary size.
+Finally, for security reasons, lzlib uses two more internal buffers.
+
+ These are the four buffers used by lzlib, and their guaranteed
+minimum sizes:
+
+ * Input compression buffer. Written to by the `LZ_compress_write'
+ function. Its size is two times the dictionary size set with the
+ `LZ_compress_open' function or 128KiB, whichever is larger.
+
+ * Output compression buffer. Read from by the `LZ_compress_read'
+ function. Its size is 64KiB.
+
+ * Input decompression buffer. Written to by the
+ `LZ_decompress_write' function. Its size is 64KiB.
+
+ * Output decompression buffer. Read from by the `LZ_decompress_read'
+ function. Its size is the dictionary size set with the
+ `LZ_decompress_open' function or 64KiB, whichever is larger.
+
+
+File: lzlib.info, Node: Compression Functions, Next: Decompression Functions, Prev: Buffering, Up: Top
+
+4 Compression Functions
***********************
These are the functions used to compress data. In case of error, all of
@@ -123,6 +152,13 @@ verified by calling `LZ_compress_errno' before using it.
stream, give MEMBER_SIZE a value larger than the amount of data to
be produced, for example LLONG_MAX.
+ -- Function: int LZ_compress_restart_member ( void * const ENCODER,
+ const long long MEMBER_SIZE )
+ Use this function to start a new member, in a multimember data
+ stream. Call this function only after
+ `LZ_compress_member_finished' indicates that the current member
+ has been fully read (with the `LZ_compress_read' function).
+
-- Function: int LZ_compress_close ( void * const ENCODER )
Frees all dynamically allocated data structures for this stream.
This function discards any unprocessed input and does not flush
@@ -133,17 +169,11 @@ verified by calling `LZ_compress_errno' before using it.
Use this function to tell `lzlib' that all the data for this stream
has already been written (with the `LZ_compress_write' function).
- -- Function: int LZ_compress_finish_member ( void * const ENCODER )
- Use this function to tell `lzlib' that all the data for the current
- member, in a multimember data stream, has already been written
- (with the `LZ_compress_write' function).
-
- -- Function: int LZ_compress_restart_member ( void * const ENCODER,
- const long long MEMBER_SIZE )
- Use this function to start a new member, in a multimember data
- stream. Call this function only after
- `LZ_compress_member_finished' indicates that the current member
- has been fully read (with the `LZ_compress_read' function).
+ -- Function: int LZ_compress_sync_flush ( void * const ENCODER )
+ Use this function to make available to `LZ_compress_read' all the
+ data already written with the `LZ_compress_write' function.
+ Repeated use of `LZ_compress_sync_flush' may degrade compression
+ ratio, so use it only when needed.
-- Function: int LZ_compress_read ( void * const ENCODER, uint8_t *
const BUFFER, const int SIZE )
@@ -165,6 +195,14 @@ verified by calling `LZ_compress_errno' before using it.
might be less than SIZE. Note that writing less than SIZE bytes is
not an error.
+ -- Function: int LZ_compress_write_size ( void * const ENCODER )
+ The `LZ_compress_write_size' function returns the maximum number of
+ bytes that can be inmediately written through the
+ `LZ_compress_write' function.
+
+ It is guaranteed that an inmediate call to `LZ_compress_write' will
+ accept a SIZE up to the returned number of bytes.
+
-- Function: enum LZ_errno LZ_compress_errno ( void * const ENCODER )
Returns the current error code for ENCODER (*note Error Codes::)
@@ -199,7 +237,7 @@ verified by calling `LZ_compress_errno' before using it.

File: lzlib.info, Node: Decompression Functions, Next: Error Codes, Prev: Compression Functions, Up: Top
-4 Decompression Functions
+5 Decompression Functions
*************************
These are the functions used to decompress data. In case of error, all
@@ -275,7 +313,7 @@ be verified by calling `LZ_decompress_errno' before using it.

File: lzlib.info, Node: Error Codes, Next: Data Format, Prev: Decompression Functions, Up: Top
-5 Error Codes
+6 Error Codes
*************
Most library functions return -1 to indicate that they have failed. But
@@ -286,7 +324,7 @@ what kind of error it was, you need to verify the error code by calling
Library functions do not change the value returned by
`LZ_(de)compress_errno' when they succeed; thus, the value returned by
`LZ_(de)compress_errno' after a successful call is not necessarily
-zero, and you should not use `LZ_(de)compress_errno' to determine
+LZ_ok, and you should not use `LZ_(de)compress_errno' to determine
whether a call failed. If the call failed, then you can examine
`LZ_(de)compress_errno'.
@@ -327,7 +365,7 @@ whether a call failed. If the call failed, then you can examine

File: lzlib.info, Node: Data Format, Next: Examples, Prev: Error Codes, Up: Top
-6 Data Format
+7 Data Format
*************
In the diagram below, a box like this:
@@ -389,7 +427,7 @@ with no additional information before, between, or after them.

File: lzlib.info, Node: Examples, Next: Problems, Prev: Data Format, Up: Top
-7 A small tutorial with examples
+8 A small tutorial with examples
********************************
This chaper shows the order in which the library functions should be
@@ -437,7 +475,7 @@ Example 3: Multimember compression (MEMBER_SIZE < total output).

File: lzlib.info, Node: Problems, Next: Concept Index, Prev: Examples, Up: Top
-8 Reporting Bugs
+9 Reporting Bugs
****************
There are probably bugs in Lzlib. There are certainly errors and
@@ -459,6 +497,7 @@ Concept Index
* Menu:
+* buffering: Buffering. (line 6)
* bugs: Problems. (line 6)
* compression functions: Compression Functions. (line 6)
* data format: Data Format. (line 6)
@@ -474,14 +513,15 @@ Concept Index

Tag Table:
Node: Top219
-Node: Introduction968
-Node: Library Version2428
-Node: Compression Functions3085
-Node: Decompression Functions8178
-Node: Error Codes11616
-Node: Data Format13551
-Node: Examples15518
-Node: Problems16940
-Node: Concept Index17510
+Node: Introduction1010
+Node: Library Version2477
+Node: Buffering3122
+Node: Compression Functions4229
+Node: Decompression Functions9731
+Node: Error Codes13169
+Node: Data Format15105
+Node: Examples17072
+Node: Problems18494
+Node: Concept Index19064

End Tag Table
diff --git a/doc/lzlib.texinfo b/doc/lzlib.texinfo
index 69d96d4..044bd04 100644
--- a/doc/lzlib.texinfo
+++ b/doc/lzlib.texinfo
@@ -5,8 +5,8 @@
@finalout
@c %**end of header
-@set UPDATED 3 May 2009
-@set VERSION 0.3
+@set UPDATED 3 June 2009
+@set VERSION 0.4
@dircategory Data Compression
@direntry
@@ -34,6 +34,7 @@ This manual is for Lzlib (version @value{VERSION}, @value{UPDATED}).
@menu
* Introduction:: Purpose and features of Lzlib
* Library Version:: Checking library version
+* Buffering:: Sizes of Lzlib's buffers
* Compression Functions:: Descriptions of the compression functions
* Decompression Functions:: Descriptions of the decompression functions
* Error Codes:: Meaning of codes returned by functions
@@ -54,8 +55,8 @@ to copy, distribute and modify it.
@chapter Introduction
@cindex introduction
-The lzlib compression library provides in-memory LZMA compression and
-decompression functions, including integrity checking of the
+Lzlib is a data compression library providing in-memory LZMA compression
+and decompression functions, including integrity checking of the
uncompressed data. The compressed data format used by the library is the
lzip format.
@@ -106,6 +107,37 @@ if( LZ_version()[0] != LZ_version_string[0] )
@end example
+@node Buffering
+@chapter Buffering
+@cindex buffering
+
+Lzlib internal functions need access to a memory chunk at least as large
+as the dictionary size (sliding window). For efficiency reasons, the
+input buffer for compression is twice as large as the dictionary size.
+Finally, for security reasons, lzlib uses two more internal buffers.
+
+These are the four buffers used by lzlib, and their guaranteed minimum
+sizes:
+
+@itemize @bullet
+@item Input compression buffer. Written to by the
+@samp{LZ_compress_write} function. Its size is two times the dictionary
+size set with the @samp{LZ_compress_open} function or 128KiB, whichever
+is larger.
+
+@item Output compression buffer. Read from by the
+@samp{LZ_compress_read} function. Its size is 64KiB.
+
+@item Input decompression buffer. Written to by the
+@samp{LZ_decompress_write} function. Its size is 64KiB.
+
+@item Output decompression buffer. Read from by the
+@samp{LZ_decompress_read} function. Its size is the dictionary size set
+with the @samp{LZ_decompress_open} function or 64KiB, whichever is
+larger.
+@end itemize
+
+
@node Compression Functions
@chapter Compression Functions
@cindex compression functions
@@ -142,6 +174,14 @@ for example LLONG_MAX.
@end deftypefun
+@deftypefun int LZ_compress_restart_member ( void * const @var{encoder}, const long long @var{member_size} )
+Use this function to start a new member, in a multimember data stream.
+Call this function only after @samp{LZ_compress_member_finished}
+indicates that the current member has been fully read (with the
+@samp{LZ_compress_read} function).
+@end deftypefun
+
+
@deftypefun int LZ_compress_close ( void * const @var{encoder} )
Frees all dynamically allocated data structures for this stream. This
function discards any unprocessed input and does not flush any pending
@@ -156,18 +196,11 @@ has already been written (with the @samp{LZ_compress_write} function).
@end deftypefun
-@deftypefun int LZ_compress_finish_member ( void * const @var{encoder} )
-Use this function to tell @samp{lzlib} that all the data for the current
-member, in a multimember data stream, has already been written (with the
-@samp{LZ_compress_write} function).
-@end deftypefun
-
-
-@deftypefun int LZ_compress_restart_member ( void * const @var{encoder}, const long long @var{member_size} )
-Use this function to start a new member, in a multimember data stream.
-Call this function only after @samp{LZ_compress_member_finished}
-indicates that the current member has been fully read (with the
-@samp{LZ_compress_read} function).
+@deftypefun int LZ_compress_sync_flush ( void * const @var{encoder} )
+Use this function to make available to @samp{LZ_compress_read} all the
+data already written with the @samp{LZ_compress_write} function.
+Repeated use of @samp{LZ_compress_sync_flush} may degrade compression
+ratio, so use it only when needed.
@end deftypefun
@@ -194,6 +227,16 @@ not an error.
@end deftypefun
+@deftypefun int LZ_compress_write_size ( void * const @var{encoder} )
+The @samp{LZ_compress_write_size} function returns the maximum number of
+bytes that can be inmediately written through the @samp{LZ_compress_write}
+function.
+
+It is guaranteed that an inmediate call to @samp{LZ_compress_write} will
+accept a @var{size} up to the returned number of bytes.
+@end deftypefun
+
+
@deftypefun {enum LZ_errno} LZ_compress_errno ( void * const @var{encoder} )
Returns the current error code for @var{encoder} (@pxref{Error Codes})
@end deftypefun
@@ -340,8 +383,8 @@ what kind of error it was, you need to verify the error code by calling
Library functions do not change the value returned by
@samp{LZ_(de)compress_errno} when they succeed; thus, the value returned
by @samp{LZ_(de)compress_errno} after a successful call is not
-necessarily zero, and you should not use @samp{LZ_(de)compress_errno} to
-determine whether a call failed. If the call failed, then you can
+necessarily LZ_ok, and you should not use @samp{LZ_(de)compress_errno}
+to determine whether a call failed. If the call failed, then you can
examine @samp{LZ_(de)compress_errno}.
The error codes are defined in the header file @samp{lzlib.h}.
diff --git a/encoder.cc b/encoder.cc
index 3bc855b..cca154c 100644
--- a/encoder.cc
+++ b/encoder.cc
@@ -47,32 +47,45 @@ const Prob_prices prob_prices;
int Matchfinder::write_data( uint8_t * const in_buffer, const int in_size ) throw()
{
if( at_stream_end_ ) return 0;
- if( pos >= pos_limit )
- {
- const int offset = pos - dictionary_size_ - max_num_trials;
- const int size = stream_pos - offset;
-// std::fprintf( stderr, "%6d offset, %5d size, %4d margin.\n",
-// offset, size, after_size - ( pos - pos_limit ) );
- std::memmove( buffer, buffer + offset, size );
- partial_data_pos += offset;
- pos -= offset;
- stream_pos -= offset;
- for( int i = 0; i < num_prev_positions; ++i )
- if( prev_positions[i] >= 0 ) prev_positions[i] -= offset;
- for( int i = 0; i < 2 * dictionary_size_; ++i )
- if( prev_pos_tree[i] >= 0 ) prev_pos_tree[i] -= offset;
- }
const int size = std::min( buffer_size - stream_pos, in_size );
if( size > 0 )
{
- std::memmove( buffer + stream_pos, in_buffer, size );
+ std::memcpy( buffer + stream_pos, in_buffer, size );
stream_pos += size;
}
return size;
}
-bool Matchfinder::reset() throw()
+Matchfinder::Matchfinder( const int dict_size, const int len_limit )
+ :
+ partial_data_pos( 0 ),
+ dictionary_size_( dict_size ),
+ after_size( max_num_trials + max_match_len ),
+ buffer_size( ( 2 * std::max( 65536, dictionary_size_ ) ) +
+ max_num_trials + after_size ),
+ buffer( new( std::nothrow ) uint8_t[buffer_size] ),
+ pos( 0 ),
+ cyclic_pos( 0 ),
+ stream_pos( 0 ),
+ pos_limit( buffer_size - after_size ),
+ match_len_limit_( len_limit ),
+ prev_positions( new( std::nothrow ) int32_t[num_prev_positions] ),
+ at_stream_end_( false )
+ {
+ prev_pos_tree = new( std::nothrow ) int32_t[2*dictionary_size_];
+ if( !buffer || !prev_positions || !prev_pos_tree )
+ {
+ if( prev_pos_tree ) delete[] prev_pos_tree;
+ if( prev_positions ) delete[] prev_positions;
+ if( buffer ) delete[] buffer;
+ throw std::bad_alloc();
+ }
+ for( int i = 0; i < num_prev_positions; ++i ) prev_positions[i] = -1;
+ }
+
+
+void Matchfinder::reset() throw()
{
const int size = stream_pos - pos;
std::memmove( buffer, buffer + pos, size );
@@ -81,25 +94,43 @@ bool Matchfinder::reset() throw()
pos = 0;
cyclic_pos = 0;
for( int i = 0; i < num_prev_positions; ++i ) prev_positions[i] = -1;
- return true;
}
bool Matchfinder::move_pos() throw()
{
if( ++cyclic_pos >= dictionary_size_ ) cyclic_pos = 0;
- if( ++pos > stream_pos ) { pos = stream_pos; return false; }
+ if( ++pos >= pos_limit )
+ {
+ if( pos > stream_pos ) { pos = stream_pos; return false; }
+ else
+ {
+ const int offset = pos - dictionary_size_ - max_num_trials;
+ const int size = stream_pos - offset;
+ std::memmove( buffer, buffer + offset, size );
+ partial_data_pos += offset;
+ pos -= offset;
+ stream_pos -= offset;
+ for( int i = 0; i < num_prev_positions; ++i )
+ if( prev_positions[i] >= 0 ) prev_positions[i] -= offset;
+ for( int i = 0; i < 2 * dictionary_size_; ++i )
+ if( prev_pos_tree[i] >= 0 ) prev_pos_tree[i] -= offset;
+ }
+ }
return true;
}
int Matchfinder::longest_match_len( int * const distances ) throw()
{
+ int idx0 = cyclic_pos << 1;
+ int idx1 = idx0 + 1;
int len_limit = match_len_limit_;
if( len_limit > available_bytes() )
{
len_limit = available_bytes();
- if( len_limit < 4 ) return 0;
+ if( len_limit < 4 )
+ { prev_pos_tree[idx0] = prev_pos_tree[idx1] = -1; return 0; }
}
int maxlen = min_match_len - 1;
@@ -131,16 +162,12 @@ int Matchfinder::longest_match_len( int * const distances ) throw()
int newpos = prev_positions[key4];
prev_positions[key4] = pos;
- int idx0 = cyclic_pos << 1;
- int idx1 = idx0 + 1;
- int len0 = 0, len1 = 0;
-
for( int count = 16 + ( match_len_limit_ / 2 ); ; )
{
if( newpos < min_pos || --count < 0 )
{ prev_pos_tree[idx0] = prev_pos_tree[idx1] = -1; break; }
const uint8_t * const newdata = buffer + newpos;
- int len = std::min( len0, len1 );
+ int len = 0;
while( len < len_limit && newdata[len] == data[len] ) ++len;
const int delta = pos - newpos;
@@ -156,14 +183,12 @@ int Matchfinder::longest_match_len( int * const distances ) throw()
prev_pos_tree[idx0] = newpos;
idx0 = newidx + 1;
newpos = prev_pos_tree[idx0];
- len0 = len;
}
else
{
prev_pos_tree[idx1] = newpos;
idx1 = newidx;
newpos = prev_pos_tree[idx1];
- len1 = len;
}
}
else
@@ -432,9 +457,26 @@ int LZ_encoder::best_pair_sequence( const int reps[num_rep_distances],
}
+ // Sync Flush mark => (dis == 0xFFFFFFFF, len == min_match_len+1)
+bool LZ_encoder::sync_flush()
+ {
+ if( member_finished_ || range_encoder.free_bytes() < max_marker_size )
+ return false;
+ const int pos_state = ( matchfinder.data_position() ) & pos_state_mask;
+ range_encoder.encode_bit( bm_match[state()][pos_state], 1 );
+ range_encoder.encode_bit( bm_rep[state()], 0 );
+ encode_pair( 0xFFFFFFFF, min_match_len + 1, pos_state );
+ range_encoder.flush();
+ return true;
+ }
+
+
// End Of Stream mark => (dis == 0xFFFFFFFF, len == min_match_len)
-void LZ_encoder::flush( const State & state )
+bool LZ_encoder::full_flush()
{
+ if( member_finished_ ||
+ range_encoder.free_bytes() < (int)sizeof( File_trailer ) + max_marker_size )
+ return false;
const int pos_state = ( matchfinder.data_position() ) & pos_state_mask;
range_encoder.encode_bit( bm_match[state()][pos_state], 1 );
range_encoder.encode_bit( bm_rep[state()], 0 );
@@ -445,14 +487,15 @@ void LZ_encoder::flush( const State & state )
trailer.data_size( matchfinder.data_position() );
trailer.member_size( range_encoder.member_position() + sizeof trailer );
for( unsigned int i = 0; i < sizeof trailer; ++i )
- range_encoder.put_byte( (( uint8_t *)&trailer)[i] );
+ range_encoder.put_byte( ((uint8_t *)&trailer)[i] );
+ return true;
}
LZ_encoder::LZ_encoder( Matchfinder & mf, const File_header & header,
const long long member_size )
:
- member_size_limit( member_size - sizeof( File_trailer ) - 15 ),
+ member_size_limit( member_size - sizeof( File_trailer ) - max_marker_size ),
longest_match_found( 0 ),
crc_( 0xFFFFFFFF ),
matchfinder( mf ),
@@ -469,19 +512,21 @@ LZ_encoder::LZ_encoder( Matchfinder & mf, const File_header & header,
fill_align_prices();
for( unsigned int i = 0; i < sizeof header; ++i )
- range_encoder.put_byte( (( uint8_t *)&header)[i] );
+ range_encoder.put_byte( ((uint8_t *)&header)[i] );
}
-bool LZ_encoder::encode_member()
+bool LZ_encoder::encode_member( const bool finish )
{
if( member_finished_ ) return true;
- if( !matchfinder.finished() && !matchfinder.available_bytes() )
- return true; // need at least 1 byte
+ if( range_encoder.member_position() >= member_size_limit )
+ { if( full_flush() ) { member_finished_ = true; } return true; }
- if( range_encoder.member_position() == sizeof( File_header ) &&
- !matchfinder.finished() ) // copy first byte
+ // copy first byte
+ if( matchfinder.data_position() == 0 && !matchfinder.finished() )
{
+ if( matchfinder.available_bytes() < 4 && !matchfinder.at_stream_end() )
+ return true;
range_encoder.encode_bit( bm_match[state()][0], 0 );
const uint8_t cur_byte = matchfinder[0];
literal_encoder.encode( range_encoder, prev_byte, cur_byte );
@@ -493,12 +538,12 @@ bool LZ_encoder::encode_member()
while( true )
{
if( matchfinder.finished() )
- { flush( state ); member_finished_ = true; return true; }
- if( !matchfinder.available_bytes() ||
- ( !matchfinder.at_stream_end() &&
- matchfinder.available_bytes() < max_num_trials + max_match_len ) )
- return true; // need more data
- if( range_encoder.free_bytes() < 2 * max_num_trials ) return true;
+ {
+ if( finish && full_flush() ) member_finished_ = true;
+ return true;
+ }
+ if( !matchfinder.enough_available_bytes() ||
+ !range_encoder.enough_free_bytes() ) return true;
if( fill_counter <= 0 ) { fill_distance_prices(); fill_counter = 512; }
int ahead = best_pair_sequence( rep_distances, state );
@@ -563,8 +608,7 @@ bool LZ_encoder::encode_member()
if( range_encoder.member_position() >= member_size_limit )
{
if( !matchfinder.dec_pos( ahead ) ) return false;
- flush( state );
- member_finished_ = true;
+ if( full_flush() ) member_finished_ = true;
return true;
}
if( ahead <= 0 ) break;
diff --git a/encoder.h b/encoder.h
index af8c441..cdfd751 100644
--- a/encoder.h
+++ b/encoder.h
@@ -96,7 +96,7 @@ inline int price_symbol( const Bit_model bm[], int symbol, const int num_bits )
{
const int bit = symbol & 1;
symbol >>= 1;
- price += price_bit( bm[symbol-1], bit );
+ price += price_bit( bm[symbol], bit );
}
return price;
}
@@ -110,7 +110,7 @@ inline int price_symbol_reversed( const Bit_model bm[], int symbol,
{
const int bit = symbol & 1;
symbol >>= 1;
- price += price_bit( bm[model-1], bit );
+ price += price_bit( bm[model], bit );
model = ( model << 1 ) | bit;
}
return price;
@@ -126,14 +126,14 @@ inline int price_matched( const Bit_model bm[], const int symbol,
{
const int match_bit = ( match_byte >> i ) & 1;
const int bit = ( symbol >> i ) & 1;
- price += price_bit( bm[(match_bit<<8)+model+0xFF], bit );
+ price += price_bit( bm[(match_bit<<8)+model+0x100], bit );
model = ( model << 1 ) | bit;
if( match_bit != bit )
{
while( --i >= 0 )
{
const int bit = ( symbol >> i ) & 1;
- price += price_bit( bm[model-1], bit );
+ price += price_bit( bm[model], bit );
model = ( model << 1 ) | bit;
}
break;
@@ -166,32 +166,7 @@ class Matchfinder
bool at_stream_end_; // stream_pos shows real end of file
public:
- Matchfinder( const int dict_size, const int len_limit )
- :
- partial_data_pos( 0 ),
- dictionary_size_( dict_size ),
- after_size( max_num_trials + max_match_len ),
- buffer_size( ( 2 * std::max( 65536, dictionary_size_ ) ) +
- max_num_trials + after_size ),
- buffer( new( std::nothrow ) uint8_t[buffer_size] ),
- pos( 0 ),
- cyclic_pos( 0 ),
- stream_pos( 0 ),
- pos_limit( buffer_size - after_size ),
- match_len_limit_( len_limit ),
- prev_positions( new( std::nothrow ) int32_t[num_prev_positions] ),
- at_stream_end_( false )
- {
- prev_pos_tree = new( std::nothrow ) int32_t[2*dictionary_size_];
- if( !buffer || !prev_positions || !prev_pos_tree )
- {
- if( prev_pos_tree ) delete[] prev_pos_tree;
- if( prev_positions ) delete[] prev_positions;
- if( buffer ) delete[] buffer;
- throw std::bad_alloc();
- }
- for( int i = 0; i < num_prev_positions; ++i ) prev_positions[i] = -1;
- }
+ Matchfinder( const int dict_size, const int len_limit );
~Matchfinder()
{ delete[] prev_pos_tree; delete[] prev_positions; delete[] buffer; }
@@ -201,8 +176,9 @@ public:
int available_bytes() const throw() { return stream_pos - pos; }
long long data_position() const throw() { return partial_data_pos + pos; }
int dictionary_size() const throw() { return dictionary_size_; }
- void finish() throw() { at_stream_end_ = true; }
+ void flushing( const bool b ) throw() { at_stream_end_ = b; }
bool finished() const throw() { return at_stream_end_ && pos >= stream_pos; }
+ int free_bytes() const throw() { return buffer_size - stream_pos; }
int match_len_limit() const throw() { return match_len_limit_; }
const uint8_t * ptr_to_current_pos() const throw() { return buffer + pos; }
@@ -215,6 +191,12 @@ public:
return true;
}
+ bool enough_available_bytes() const throw()
+ {
+ return ( stream_pos > pos &&
+ ( at_stream_end_ || stream_pos - pos >= after_size ) );
+ }
+
int true_match_len( const int index, const int distance, int len_limit ) const throw()
{
if( index + len_limit > available_bytes() )
@@ -226,7 +208,7 @@ public:
}
int write_data( uint8_t * const in_buffer, const int in_size ) throw();
- bool reset() throw();
+ void reset() throw();
bool move_pos() throw();
int longest_match_len( int * const distances = 0 ) throw();
};
@@ -234,6 +216,7 @@ public:
class Range_encoder : public Circular_buffer
{
+ enum { min_free_bytes = 2 * max_num_trials };
uint64_t low;
long long partial_member_pos;
uint32_t range;
@@ -256,13 +239,16 @@ class Range_encoder : public Circular_buffer
public:
Range_encoder()
:
- Circular_buffer( 65536 + (2 * max_num_trials) ),
+ Circular_buffer( 65536 + min_free_bytes ),
low( 0 ),
partial_member_pos( 0 ),
range( 0xFFFFFFFF ),
ff_count( 0 ),
cache( 0 ) {}
+ bool enough_free_bytes() const throw()
+ { return free_bytes() >= min_free_bytes; }
+
int read_data( uint8_t * const out_buffer, const int out_size ) throw()
{
const int size = Circular_buffer::read_data( out_buffer, out_size );
@@ -270,7 +256,14 @@ public:
return size;
}
- void flush() { for( int i = 0; i < 5; ++i ) shift_low(); }
+ void flush()
+ {
+ for( int i = 0; i < 5; ++i ) shift_low();
+ low = 0;
+ range = 0xFFFFFFFF;
+ ff_count = 0;
+ cache = 0;
+ }
long long member_position() const throw()
{ return partial_member_pos + used_bytes() + ff_count; }
@@ -309,7 +302,7 @@ public:
for( int i = num_bits; i > 0; --i, mask >>= 1 )
{
const int bit = ( symbol & mask );
- encode_bit( bm[model-1], bit );
+ encode_bit( bm[model], bit );
model <<= 1;
if( bit ) model |= 1;
}
@@ -321,7 +314,7 @@ public:
for( int i = num_bits; i > 0; --i )
{
const int bit = symbol & 1;
- encode_bit( bm[model-1], bit );
+ encode_bit( bm[model], bit );
model = ( model << 1 ) | bit;
symbol >>= 1;
}
@@ -334,14 +327,14 @@ public:
{
const int bit = ( symbol >> i ) & 1;
const int match_bit = ( match_byte >> i ) & 1;
- encode_bit( bm[(match_bit<<8)+model+0xFF], bit );
+ encode_bit( bm[(match_bit<<8)+model+0x100], bit );
model = ( model << 1 ) | bit;
if( match_bit != bit )
{
while( --i >= 0 )
{
const int bit = ( symbol >> i ) & 1;
- encode_bit( bm[model-1], bit );
+ encode_bit( bm[model], bit );
model = ( model << 1 ) | bit;
}
break;
@@ -421,6 +414,7 @@ class LZ_encoder
{
enum { dis_align_mask = dis_align_size - 1,
infinite_price = 0x0FFFFFFF,
+ max_marker_size = 15,
num_rep_distances = 4 }; // must be 4
struct Trial
@@ -589,19 +583,18 @@ class LZ_encoder
int best_pair_sequence( const int reps[num_rep_distances],
const State & state );
- void flush( const State & state );
+ bool full_flush();
public:
LZ_encoder( Matchfinder & mf, const File_header & header,
const long long member_size );
- bool encode_member();
- void finish_member()
- { if( !member_finished_ ) { flush( state ); member_finished_ = true; } }
+ bool encode_member( const bool finish );
bool member_finished() const throw()
{ return member_finished_ && !range_encoder.used_bytes(); }
int read_data( uint8_t * const buffer, const int size ) throw()
{ return range_encoder.read_data( buffer, size ); }
+ bool sync_flush();
long long member_position() const throw()
{ return range_encoder.member_position(); }
diff --git a/lzip.h b/lzip.h
index a568988..cf8e56e 100644
--- a/lzip.h
+++ b/lzip.h
@@ -121,16 +121,21 @@ public:
uint32_t operator[]( const uint8_t byte ) const throw() { return data[byte]; }
void update( uint32_t & crc, const uint8_t byte ) const throw()
{ crc = data[(crc^byte)&0xFF] ^ ( crc >> 8 ); }
+ void update( uint32_t & crc, const uint8_t * const buffer, const int size ) const throw()
+ {
+ for( int i = 0; i < size; ++i )
+ crc = data[(crc^buffer[i])&0xFF] ^ ( crc >> 8 );
+ }
};
extern const CRC32 crc32;
-const char * const magic_string = "LZIP";
+const uint8_t magic_string[4] = { 'L', 'Z', 'I', 'P' };
struct File_header
{
- char magic[4];
+ uint8_t magic[4];
uint8_t version;
uint8_t coded_dict_size;
diff --git a/lzlib.cc b/lzlib.cc
index d3f9777..c64ed43 100644
--- a/lzlib.cc
+++ b/lzlib.cc
@@ -45,6 +45,7 @@ struct Encoder
Matchfinder * matchfinder;
LZ_encoder * lz_encoder;
LZ_errno lz_errno;
+ bool flush_pending;
const File_header member_header;
Encoder( const File_header & header ) throw()
@@ -54,6 +55,7 @@ struct Encoder
matchfinder( 0 ),
lz_encoder( 0 ),
lz_errno( LZ_ok ),
+ flush_pending( false ),
member_header( header )
{}
};
@@ -140,6 +142,28 @@ void * LZ_compress_open( const int dictionary_size, const int match_len_limit,
}
+int LZ_compress_restart_member( void * const encoder,
+ const long long member_size )
+ {
+ if( !verify_encoder( encoder ) ) return -1;
+ Encoder & e = *(Encoder *)encoder;
+ if( !e.lz_encoder->member_finished() )
+ { e.lz_errno = LZ_sequence_error; return -1; }
+
+ e.partial_in_size += e.matchfinder->data_position();
+ e.partial_out_size += e.lz_encoder->member_position();
+ e.matchfinder->reset();
+
+ delete e.lz_encoder;
+ try {
+ e.lz_encoder = new LZ_encoder( *e.matchfinder, e.member_header, member_size );
+ }
+ catch( std::bad_alloc )
+ { e.lz_encoder = 0; e.lz_errno = LZ_mem_error; return -1; }
+ return 0;
+ }
+
+
int LZ_compress_close( void * const encoder )
{
if( !encoder ) return -1;
@@ -154,38 +178,26 @@ int LZ_compress_close( void * const encoder )
int LZ_compress_finish( void * const encoder )
{
if( !verify_encoder( encoder ) ) return -1;
- ((Encoder *)encoder)->matchfinder->finish();
- return 0;
- }
-
-
-int LZ_compress_finish_member( void * const encoder )
- {
- if( !verify_encoder( encoder ) ) return -1;
- ((Encoder *)encoder)->lz_encoder->finish_member();
+ Encoder & e = *(Encoder *)encoder;
+ e.matchfinder->flushing( true );
+ e.flush_pending = false;
return 0;
}
-int LZ_compress_restart_member( void * const encoder,
- const long long member_size )
+int LZ_compress_sync_flush( void * const encoder )
{
if( !verify_encoder( encoder ) ) return -1;
Encoder & e = *(Encoder *)encoder;
- if( !e.lz_encoder->member_finished() )
- { e.lz_errno = LZ_sequence_error; return -1; }
-
- e.partial_in_size += e.matchfinder->data_position();
- e.partial_out_size += e.lz_encoder->member_position();
- if( !e.matchfinder->reset() )
- { e.lz_errno = LZ_library_error; return -1; }
-
- delete e.lz_encoder;
- try {
- e.lz_encoder = new LZ_encoder( *e.matchfinder, e.member_header, member_size );
+ if( !e.flush_pending && !e.matchfinder->at_stream_end() )
+ {
+ e.flush_pending = true;
+ e.matchfinder->flushing( true );
+ if( !e.lz_encoder->encode_member( false ) )
+ { e.lz_errno = LZ_library_error; return -1; }
+ if( e.lz_encoder->sync_flush() )
+ { e.matchfinder->flushing( false ); e.flush_pending = false; }
}
- catch( std::bad_alloc )
- { e.lz_encoder = 0; e.lz_errno = LZ_mem_error; return -1; }
return 0;
}
@@ -195,8 +207,10 @@ int LZ_compress_read( void * const encoder, uint8_t * const buffer,
{
if( !verify_encoder( encoder ) ) return -1;
Encoder & e = *(Encoder *)encoder;
- if( !e.lz_encoder->encode_member() )
+ if( !e.lz_encoder->encode_member( !e.flush_pending ) )
{ e.lz_errno = LZ_library_error; return -1; }
+ if( e.flush_pending && e.lz_encoder->sync_flush() )
+ { e.matchfinder->flushing( false ); e.flush_pending = false; }
return e.lz_encoder->read_data( buffer, size );
}
@@ -205,7 +219,18 @@ int LZ_compress_write( void * const encoder, uint8_t * const buffer,
const int size )
{
if( !verify_encoder( encoder ) ) return -1;
- return ((Encoder *)encoder)->matchfinder->write_data( buffer, size );
+ Encoder & e = *(Encoder *)encoder;
+ if( e.flush_pending ) return 0;
+ return e.matchfinder->write_data( buffer, size );
+ }
+
+
+int LZ_compress_write_size( void * const encoder )
+ {
+ if( !verify_encoder( encoder ) ) return -1;
+ Encoder & e = *(Encoder *)encoder;
+ if( e.flush_pending ) return 0;
+ return e.matchfinder->free_bytes();
}
@@ -220,7 +245,8 @@ int LZ_compress_finished( void * const encoder )
{
if( !verify_encoder( encoder ) ) return -1;
Encoder & e = *(Encoder *)encoder;
- return ( e.matchfinder->finished() && e.lz_encoder->member_finished() );
+ return ( !e.flush_pending && e.matchfinder->finished() &&
+ e.lz_encoder->member_finished() );
}
diff --git a/lzlib.h b/lzlib.h
index c03f7e9..f2ef9d3 100644
--- a/lzlib.h
+++ b/lzlib.h
@@ -29,7 +29,7 @@
extern "C" {
#endif
-const char * const LZ_version_string = "0.3";
+const char * const LZ_version_string = "0.4";
enum { min_dictionary_bits = 12,
min_dictionary_size = 1 << min_dictionary_bits,
@@ -46,16 +46,17 @@ const char * LZ_version( void );
void * LZ_compress_open( const int dictionary_size, const int match_len_limit,
const long long member_size );
-int LZ_compress_close( void * const encoder );
-int LZ_compress_finish( void * const encoder );
-int LZ_compress_finish_member( void * const encoder );
int LZ_compress_restart_member( void * const encoder,
const long long member_size );
+int LZ_compress_close( void * const encoder );
+int LZ_compress_finish( void * const encoder );
+int LZ_compress_sync_flush( void * const encoder );
int LZ_compress_read( void * const encoder, uint8_t * const buffer,
const int size );
int LZ_compress_write( void * const encoder, uint8_t * const buffer,
const int size );
+int LZ_compress_write_size( void * const encoder );
enum LZ_errno LZ_compress_errno( void * const encoder );
int LZ_compress_finished( void * const encoder );
diff --git a/main.cc b/main.cc
index ae57d92..d9cf922 100644
--- a/main.cc
+++ b/main.cc
@@ -52,6 +52,11 @@
#define ULLONG_MAX 0xFFFFFFFFFFFFFFFFULL
#endif
+void show_error( const char * msg, const int errcode = 0, const bool help = false ) throw();
+void internal_error( const char * msg );
+int readblock( const int fd, char * buf, const int size ) throw();
+int writeblock( const int fd, const char * buf, const int size ) throw();
+
namespace {
@@ -117,7 +122,7 @@ void show_help() throw()
{
std::printf( "%s - A test program for the lzlib library.\n", Program_name );
std::printf( "\nUsage: %s [options] [files]\n", invocation_name );
- std::printf( "Options:\n" );
+ std::printf( "\nOptions:\n" );
std::printf( " -h, --help display this help and exit\n" );
std::printf( " -V, --version output version information and exit\n" );
std::printf( " -b, --member-size=<n> set member size limit in bytes\n" );
@@ -125,7 +130,7 @@ void show_help() throw()
std::printf( " -d, --decompress decompress\n" );
std::printf( " -f, --force overwrite existing output files\n" );
std::printf( " -k, --keep keep (don't delete) input files\n" );
- std::printf( " -m, --match-length=<n> set match length limit in bytes [64]\n" );
+ std::printf( " -m, --match-length=<n> set match length limit in bytes [80]\n" );
std::printf( " -o, --output=<file> if reading stdin, place the output into <file>\n" );
std::printf( " -q, --quiet suppress all messages\n" );
std::printf( " -s, --dictionary-size=<n> set dictionary size limit in bytes [8MiB]\n" );
@@ -154,30 +159,6 @@ void show_version() throw()
}
-void show_error( const char * msg, const int errcode = 0, const bool help = false ) throw()
- {
- if( verbosity >= 0 )
- {
- if( msg && msg[0] != 0 )
- {
- std::fprintf( stderr, "%s: %s", program_name, msg );
- if( errcode > 0 ) std::fprintf( stderr, ": %s", std::strerror( errcode ) );
- std::fprintf( stderr, "\n" );
- }
- if( help && invocation_name && invocation_name[0] != 0 )
- std::fprintf( stderr, "Try `%s --help' for more information.\n", invocation_name );
- }
- }
-
-
-void internal_error( const char * msg )
- {
- std::string s( "internal error: " ); s += msg;
- show_error( s.c_str() );
- std::exit( 3 );
- }
-
-
const char * format_num( long long num, long long limit = 9999,
const int set_prefix = 0 ) throw()
{
@@ -451,43 +432,6 @@ bool next_filename()
}
-// Returns the number of bytes really read.
-// If (returned value < size) and (errno == 0), means EOF was reached.
-//
-int readblock( const int fd, char * buf, const int size ) throw()
- {
- int rest = size;
- errno = 0;
- while( rest > 0 )
- {
- errno = 0;
- const int n = read( fd, buf + size - rest, rest );
- if( n > 0 ) rest -= n;
- else if( n == 0 ) break;
- else if( errno != EINTR && errno != EAGAIN ) break;
- }
- return ( rest > 0 ) ? size - rest : size;
- }
-
-
-// Returns the number of bytes really written.
-// If (returned value < size), it is always an error.
-//
-int writeblock( const int fd, const char * buf, const int size ) throw()
- {
- int rest = size;
- errno = 0;
- while( rest > 0 )
- {
- errno = 0;
- const int n = write( fd, buf + size - rest, rest );
- if( n > 0 ) rest -= n;
- else if( errno && errno != EINTR && errno != EAGAIN ) break;
- }
- return ( rest > 0 ) ? size - rest : size;
- }
-
-
int compress( const long long member_size, const long long volume_size,
lzma_options encoder_options, const int inhandle,
const Pretty_print & pp, const struct stat * in_statsp,
@@ -509,20 +453,15 @@ int compress( const long long member_size, const long long volume_size,
long long partial_volume_size = 0;
const int out_buffer_size = 65536, in_buffer_size = 8 * out_buffer_size;
uint8_t in_buffer[in_buffer_size], out_buffer[out_buffer_size];
- int in_pos = 0, in_stream_pos = 0;
while( true )
{
- if( in_stream_pos == 0 )
- {
- in_stream_pos = readblock( inhandle, (char *)in_buffer, in_buffer_size );
- if( in_stream_pos == 0 ) LZ_compress_finish( encoder );
- }
- int in_size = 0;
- if( in_pos < in_stream_pos )
+ int in_size = std::min( LZ_compress_write_size( encoder ), in_buffer_size );
+ if( in_size > 0 )
{
- in_size = LZ_compress_write( encoder, in_buffer + in_pos, in_stream_pos - in_pos );
- in_pos += in_size;
- if( in_pos >= in_stream_pos ) { in_stream_pos = 0; in_pos = 0; }
+ in_size = readblock( inhandle, (char *)in_buffer, in_size );
+ if( in_size == 0 ) LZ_compress_finish( encoder );
+ else if( in_size != LZ_compress_write( encoder, in_buffer, in_size ) )
+ internal_error( "library error" );
}
int out_size = LZ_compress_read( encoder, out_buffer, out_buffer_size );
// std::fprintf( stderr, "%6d in_size, %5d out_size.\n", in_size, out_size );
@@ -639,7 +578,7 @@ int decompress( const int inhandle, const Pretty_print & pp,
}
pp(); show_error( "read error", errno ); return 1;
}
- else if( out_size > 0 )
+ else if( out_size > 0 && outhandle >= 0 )
{
const int wr = writeblock( outhandle, (char *)out_buffer, out_size );
if( wr != out_size )
@@ -691,16 +630,77 @@ void Pretty_print::operator()( const char * const msg ) const throw()
}
+void show_error( const char * msg, const int errcode, const bool help ) throw()
+ {
+ if( verbosity >= 0 )
+ {
+ if( msg && msg[0] != 0 )
+ {
+ std::fprintf( stderr, "%s: %s", program_name, msg );
+ if( errcode > 0 ) std::fprintf( stderr, ": %s", std::strerror( errcode ) );
+ std::fprintf( stderr, "\n" );
+ }
+ if( help && invocation_name && invocation_name[0] != 0 )
+ std::fprintf( stderr, "Try `%s --help' for more information.\n", invocation_name );
+ }
+ }
+
+
+void internal_error( const char * msg )
+ {
+ std::string s( "internal error: " ); s += msg;
+ show_error( s.c_str() );
+ std::exit( 3 );
+ }
+
+
+// Returns the number of bytes really read.
+// If (returned value < size) and (errno == 0), means EOF was reached.
+//
+int readblock( const int fd, char * buf, const int size ) throw()
+ {
+ int rest = size;
+ errno = 0;
+ while( rest > 0 )
+ {
+ errno = 0;
+ const int n = read( fd, buf + size - rest, rest );
+ if( n > 0 ) rest -= n;
+ else if( n == 0 ) break;
+ else if( errno != EINTR && errno != EAGAIN ) break;
+ }
+ return ( rest > 0 ) ? size - rest : size;
+ }
+
+
+// Returns the number of bytes really written.
+// If (returned value < size), it is always an error.
+//
+int writeblock( const int fd, const char * buf, const int size ) throw()
+ {
+ int rest = size;
+ errno = 0;
+ while( rest > 0 )
+ {
+ errno = 0;
+ const int n = write( fd, buf + size - rest, rest );
+ if( n > 0 ) rest -= n;
+ else if( errno && errno != EINTR && errno != EAGAIN ) break;
+ }
+ return ( rest > 0 ) ? size - rest : size;
+ }
+
+
int main( const int argc, const char * argv[] )
{
// Mapping from gzip/bzip2 style 1..9 compression modes
// to the corresponding LZMA compression modes.
const lzma_options option_mapping[] =
{
- { 1 << 22, 10 }, // -1
- { 1 << 22, 12 }, // -2
- { 1 << 22, 17 }, // -3
- { 1 << 22, 26 }, // -4
+ { 1 << 20, 10 }, // -1
+ { 1 << 20, 12 }, // -2
+ { 1 << 20, 17 }, // -3
+ { 1 << 21, 26 }, // -4
{ 1 << 22, 44 }, // -5
{ 1 << 23, 80 }, // -6
{ 1 << 24, 108 }, // -7
@@ -800,10 +800,7 @@ int main( const int argc, const char * argv[] )
Pretty_print pp( filenames );
if( program_mode == m_test )
- {
- output_filename = "/dev/null";
- if( !open_outstream( true ) ) return 1;
- }
+ outhandle = -1;
int retval = 0;
for( unsigned int i = 0; i < filenames.size(); ++i )
diff --git a/testsuite/check.sh b/testsuite/check.sh
index b4ee42f..4431864 100755
--- a/testsuite/check.sh
+++ b/testsuite/check.sh
@@ -5,6 +5,8 @@
# This script is free software: you have unlimited permission
# to copy, distribute and modify it.
+LC_ALL=C
+export LC_ALL
objdir=`pwd`
testdir=`cd "$1" ; pwd`
LZIP="${objdir}"/minilzip