17 files changed, 200 insertions, 50 deletions
diff --git a/ChangeLog b/ChangeLog
index b510dc0..91d1283 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2009-09-02  Antonio Diaz Diaz  <ant_diaz@teleline.es>
+
+	* Version 0.6 released.
+	* The LZ_compress_sync_flush mechanism has been fixed.
+
 2009-07-03  Antonio Diaz Diaz  <ant_diaz@teleline.es>
 
 	* Version 0.5 released.
diff --git a/INSTALL b/INSTALL
index be94132..e1d20f6 100644
--- a/INSTALL
+++ b/INSTALL
@@ -1,7 +1,7 @@
 Requirements
 ------------
 You will need a C++ compiler.
-I use gcc 4.3.3 and 3.3.6, but the code should compile with any
+I use gcc 4.3.4 and 3.3.6, but the code should compile with any
 standards compliant compiler.
 Gcc is available at http://gcc.gnu.org.
 
diff --git a/Makefile.in b/Makefile.in
index 1d05051..86a9b18 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -34,6 +34,9 @@ $(progname)_shared : $(objs) $(libname).so.$(pkgversion)
 $(progname)_profiled : $(objs) $(libname).a
 	$(CXX) $(LDFLAGS) -pg -o $(progname)_profiled $(objs) $(libname).a
 
+lzcheck : lzcheck.o $(libname).a
+	$(CXX) $(LDFLAGS) -o lzcheck lzcheck.o $(libname).a
+
 main.o : main.cc
 	$(CXX) $(CPPFLAGS) $(CXXFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $<
 
@@ -76,7 +79,7 @@ $(VPATH)/doc/$(progname).1 : $(progname)
 Makefile : $(VPATH)/configure $(VPATH)/Makefile.in
 	./config.status
 
-check : all $(VPATH)/testsuite/check.sh
+check : all $(VPATH)/testsuite/check.sh lzcheck
 	@$(VPATH)/testsuite/check.sh $(VPATH)/testsuite
 
 install : all install-info
diff --git a/NEWS b/NEWS
index 7a7df99..bdd21cd 100644
--- a/NEWS
+++ b/NEWS
@@ -1,5 +1,4 @@
-Changes in version 0.5:
+Changes in version 0.6:
 
-Decompression speed has been improved.
-
-A build problem with the Sun C++ compiler has been fixed.
+The LZ_compress_sync_flush mechanism has been fixed. (Last few bytes
+weren't inmediately available).
diff --git a/configure b/configure
index 4255d2f..4bb67f5 100755
--- a/configure
+++ b/configure
@@ -5,13 +5,13 @@
 # This configure script is free software: you have unlimited permission
 # to copy, distribute and modify it.
 #
-# Date of this version: 2009-07-03
+# Date of this version: 2009-09-02
 
 invocation_name=$0
 args=
 no_create=
 pkgname=lzlib
-pkgversion=0.5
+pkgversion=0.6
 soversion=0
 progname=minilzip
 progname_shared=${progname}_shared
diff --git a/decoder.cc b/decoder.cc
index b426b3e..5f44309 100644
--- a/decoder.cc
+++ b/decoder.cc
@@ -70,7 +70,7 @@ int Circular_buffer::read_data( uint8_t * const out_buffer, const int out_size )
   }
 
 
-int Circular_buffer::write_data( uint8_t * const in_buffer, const int in_size ) throw()
+int Circular_buffer::write_data( const uint8_t * const in_buffer, const int in_size ) throw()
   {
   int size = 0;
   if( put >= get )
@@ -109,6 +109,7 @@ bool LZ_decoder::verify_trailer()
     else error = true;
     }
   if( format_version == 0 ) trailer.member_size( member_position() );
+  if( !range_decoder.code_is_zero() ) error = true;
   if( trailer.data_crc() != crc() ) error = true;
   if( trailer.data_size() != data_position() ) error = true;
   if( trailer.member_size() != member_position() ) error = true;
@@ -122,6 +123,14 @@ int LZ_decoder::decode_member()
   {
   if( member_finished_ ) return 0;
   if( !range_decoder.try_reload() ) return 0;
+  if( verify_trailer_pending )
+    {
+    if( range_decoder.available_bytes() < File_trailer::size( format_version ) )
+      return 0;
+    verify_trailer_pending = false;
+    member_finished_ = true;
+    if( verify_trailer() ) return 0; else return 3;
+    }
   while( true )
     {
     if( range_decoder.finished() ) return 2;
@@ -192,6 +201,8 @@ int LZ_decoder::decode_member()
               range_decoder.normalize();
               if( len == min_match_len )	// End Of Stream marker
                 {
+                if( range_decoder.available_bytes() < File_trailer::size( format_version ) )
+                  { verify_trailer_pending = true; return 0; }
                 member_finished_ = true;
                 if( verify_trailer() ) return 0; else return 3;
                 }
diff --git a/decoder.h b/decoder.h
index bc9297a..e95a85e 100644
--- a/decoder.h
+++ b/decoder.h
@@ -27,7 +27,7 @@
 
 class Input_buffer : public Circular_buffer
   {
-  enum { min_available_bytes = 8 + sizeof( File_trailer ) };
+  enum { min_available_bytes = 8 };
   bool at_stream_end_;
 
 public:
@@ -47,7 +47,7 @@ public:
            ( at_stream_end_ || used_bytes() >= min_available_bytes ) );
     }
 
-  int write_data( uint8_t * const in_buffer, const int in_size ) throw()
+  int write_data( const uint8_t * const in_buffer, const int in_size ) throw()
     {
     if( at_stream_end_ || in_size <= 0 ) return 0;
     return Circular_buffer::write_data( in_buffer, in_size );
@@ -75,6 +75,7 @@ public:
 
   bool at_stream_end() const throw() { return ibuf.at_stream_end(); }
   int available_bytes() const throw() { return ibuf.used_bytes(); }
+  bool code_is_zero() const throw() { return ( code == 0 ); }
   bool enough_available_bytes() const throw()
     { return ibuf.enough_available_bytes(); }
   bool finished() const throw() { return ibuf.finished(); }
@@ -233,6 +234,7 @@ class LZ_decoder : public Circular_buffer
   const int dictionary_size;
   uint32_t crc_;
   bool member_finished_;
+  bool verify_trailer_pending;
   unsigned int rep0;		// rep[0-3] latest four distances
   unsigned int rep1;		// used for efficient coding of
   unsigned int rep2;		// repeated distances
@@ -298,6 +300,7 @@ public:
     dictionary_size( header.dictionary_size() ),
     crc_( 0xFFFFFFFF ),
     member_finished_( false ),
+    verify_trailer_pending( false ),
     rep0( 0 ),
     rep1( 0 ),
     rep2( 0 ),
diff --git a/doc/lzlib.info b/doc/lzlib.info
index e688ea6..b9ddf30 100644
--- a/doc/lzlib.info
+++ b/doc/lzlib.info
@@ -12,7 +12,7 @@ File: lzlib.info,  Node: Top,  Next: Introduction,  Up: (dir)
 Lzlib
 *****
 
-This manual is for Lzlib (version 0.5, 3 July 2009).
+This manual is for Lzlib (version 0.6, 2 September 2009).
 
 * Menu:
 
@@ -53,6 +53,12 @@ read/write functions until all the data has been processed by the
 library. This interface is safer and less error prone than the
 traditional zlib interface.
 
+   Compression/decompression is done when the read function is called.
+This means the value returned by the position functions will not be
+updated until some data is read, even if you write a lot of data. If
+you want the data to be compressed in advance, just call the read
+function with a SIZE equal to 0.
+
    Lzlib will correctly decompress a data stream which is the
 concatenation of two or more compressed data streams. The result is the
 concatenation of the corresponding uncompressed data streams. Integrity
@@ -443,7 +449,7 @@ Example 1: Normal compression (MEMBER_SIZE > total output).
      4) go back to step 2 until all input data has been written
      5) LZ_compress_finish
      6) LZ_compress_read
-     7) go back to step 6 until LZ_compress_read returns 0
+     7) go back to step 6 until LZ_compress_finished returns 1
      8) LZ_compress_close
 
 
@@ -455,7 +461,7 @@ Example 2: Decompression.
      4) go back to step 2 until all input data has been written
      5) LZ_decompress_finish
      6) LZ_decompress_read
-     7) go back to step 6 until LZ_decompress_read returns 0
+     7) go back to step 6 until LZ_decompress_finished returns 1
      8) LZ_decompress_close
 
 
@@ -469,7 +475,7 @@ Example 3: Multimember compression (MEMBER_SIZE < total output).
       6) go back to step 2 until all input data has been written
       7) LZ_compress_finish
       8) LZ_compress_read
-      9) go back to step 8 until LZ_compress_read returns 0
+      9) go back to step 8 until LZ_compress_finished returns 1
      10) LZ_compress_close
 
 
@@ -513,15 +519,15 @@ Concept Index
 
 Tag Table:
 Node: Top219
-Node: Introduction1010
-Node: Library Version2477
-Node: Buffering3122
-Node: Compression Functions4229
-Node: Decompression Functions9731
-Node: Error Codes13169
-Node: Data Format15105
-Node: Examples17072
-Node: Problems18494
-Node: Concept Index19064
+Node: Introduction1015
+Node: Library Version2790
+Node: Buffering3435
+Node: Compression Functions4542
+Node: Decompression Functions10044
+Node: Error Codes13482
+Node: Data Format15418
+Node: Examples17385
+Node: Problems18819
+Node: Concept Index19389
 
 End Tag Table
diff --git a/doc/lzlib.texinfo b/doc/lzlib.texinfo
index d0cdd2b..409b053 100644
--- a/doc/lzlib.texinfo
+++ b/doc/lzlib.texinfo
@@ -5,8 +5,8 @@
 @finalout
 @c %**end of header
 
-@set UPDATED 3 July 2009
-@set VERSION 0.5
+@set UPDATED 2 September 2009
+@set VERSION 0.6
 
 @dircategory Data Compression
 @direntry
@@ -69,6 +69,12 @@ read/write functions until all the data has been processed by the
 library. This interface is safer and less error prone than the
 traditional zlib interface.
 
+Compression/decompression is done when the read function is called. This
+means the value returned by the position functions will not be updated
+until some data is read, even if you write a lot of data. If you want
+the data to be compressed in advance, just call the read function with a
+@var{size} equal to 0.
+
 Lzlib will correctly decompress a data stream which is the concatenation
 of two or more compressed data streams. The result is the concatenation
 of the corresponding uncompressed data streams. Integrity testing of
@@ -515,7 +521,7 @@ Example 1: Normal compression (@var{member_size} > total output).
 4) go back to step 2 until all input data has been written
 5) LZ_compress_finish
 6) LZ_compress_read
-7) go back to step 6 until LZ_compress_read returns 0
+7) go back to step 6 until LZ_compress_finished returns 1
 8) LZ_compress_close
 @end example
 
@@ -531,7 +537,7 @@ Example 2: Decompression.
 4) go back to step 2 until all input data has been written
 5) LZ_decompress_finish
 6) LZ_decompress_read
-7) go back to step 6 until LZ_decompress_read returns 0
+7) go back to step 6 until LZ_decompress_finished returns 1
 8) LZ_decompress_close
 @end example
 
@@ -549,7 +555,7 @@ Example 3: Multimember compression (@var{member_size} < total output).
  6) go back to step 2 until all input data has been written
  7) LZ_compress_finish
  8) LZ_compress_read
- 9) go back to step 8 until LZ_compress_read returns 0
+ 9) go back to step 8 until LZ_compress_finished returns 1
 10) LZ_compress_close
 @end example
 
diff --git a/encoder.cc b/encoder.cc
index 51ffb95..6fa8f06 100644
--- a/encoder.cc
+++ b/encoder.cc
@@ -44,7 +44,7 @@ const Dis_slots dis_slots;
 const Prob_prices prob_prices;
 
 
-int Matchfinder::write_data( uint8_t * const in_buffer, const int in_size ) throw()
+int Matchfinder::write_data( const uint8_t * const in_buffer, const int in_size ) throw()
   {
   if( at_stream_end_ ) return 0;
   const int size = std::min( buffer_size - stream_pos, in_size );
diff --git a/encoder.h b/encoder.h
index 9583e4f..6384005 100644
--- a/encoder.h
+++ b/encoder.h
@@ -210,7 +210,7 @@ public:
     return i;
     }
 
-  int write_data( uint8_t * const in_buffer, const int in_size ) throw();
+  int write_data( const uint8_t * const in_buffer, const int in_size ) throw();
   void reset() throw();
   bool move_pos() throw();
   int longest_match_len( int * const distances = 0 ) throw();
@@ -417,7 +417,7 @@ class LZ_encoder
   {
   enum { dis_align_mask = dis_align_size - 1,
          infinite_price = 0x0FFFFFFF,
-         max_marker_size = 15,
+         max_marker_size = 16,
          num_rep_distances = 4 };	// must be 4
 
   struct Trial
diff --git a/lzcheck.cc b/lzcheck.cc
new file mode 100644
index 0000000..b56081e
--- /dev/null
+++ b/lzcheck.cc
@@ -0,0 +1,111 @@
+/*  Lzcheck - A test program for the lzlib library
+    Copyright (C) 2009 Antonio Diaz Diaz.
+
+    This program is free software: you have unlimited permission
+    to copy, distribute and modify it.
+
+    Usage is:
+      lzcheck filename.txt
+
+    This program reads the specified text file and then compresses it,
+    line by line, to test the flushing mechanism.
+*/
+
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <stdint.h>
+#include <unistd.h>
+
+#include "lzlib.h"
+
+#ifndef LLONG_MAX
+#define LLONG_MAX  0x7FFFFFFFFFFFFFFFLL
+#endif
+#ifndef LLONG_MIN
+#define LLONG_MIN  (-LLONG_MAX - 1LL)
+#endif
+#ifndef ULLONG_MAX
+#define ULLONG_MAX 0xFFFFFFFFFFFFFFFFULL
+#endif
+
+
+int main( const int argc, const char * argv[] )
+  {
+  if( argc < 2 )
+    {
+    std::fprintf( stderr, "Usage: lzcheck filename.txt\n" );
+    return 1;
+    }
+
+  FILE *f = std::fopen( argv[1], "rb" );
+  if( !f )
+    {
+    std::fprintf( stderr, "Can't open file `%s' for reading\n", argv[1] );
+    return 1;
+    }
+
+  const int in_buffer_size = 1 << 20;
+  const int mid_buffer_size = 65536;
+  const int out_buffer_size = in_buffer_size;
+  uint8_t in_buffer[in_buffer_size];
+  uint8_t mid_buffer[mid_buffer_size];
+  uint8_t out_buffer[out_buffer_size];
+  const int in_size = std::fread( in_buffer, 1, in_buffer_size, f );
+  if( in_size >= in_buffer_size )
+    {
+    std::fprintf( stderr, "input file `%s' too big.\n", argv[1] );
+    return 1;
+    }
+  std::fclose( f );
+
+  const int dictionary_size = in_buffer_size;
+  const int match_len_limit = 80;
+  const long long member_size = LLONG_MAX;
+  void * encoder = LZ_compress_open( dictionary_size, match_len_limit,
+                                     member_size );
+  if( !encoder || LZ_compress_errno( encoder ) != LZ_ok )
+    {
+    const bool mem_error = ( LZ_compress_errno( encoder ) == LZ_mem_error );
+    LZ_compress_close( encoder );
+    if( mem_error )
+      {
+      std::fprintf( stderr, "not enough memory.\n" );
+      return 1;
+      }
+    std::fprintf( stderr, "internal error: invalid argument to encoder.\n" );
+    return 3;
+    }
+
+  void * decoder = LZ_decompress_open();
+  if( !decoder || LZ_decompress_errno( decoder ) != LZ_ok )
+    {
+    LZ_decompress_close( decoder );
+    std::fprintf( stderr, "not enough memory.\n" );
+    return 1;
+    }
+
+  for( int l = 0, r = 0; r < in_size; l = r )
+    {
+    while( ++r < in_size && in_buffer[r-1] != '\n' ) ;
+    LZ_compress_write( encoder, in_buffer + l, r - l );
+    LZ_compress_sync_flush( encoder );
+    int mid_size = LZ_compress_read( encoder, mid_buffer, mid_buffer_size );
+    LZ_decompress_write( decoder, mid_buffer, mid_size );
+    int out_size = LZ_decompress_read( decoder, out_buffer, out_buffer_size );
+
+    if( out_size != r - l || std::memcmp( in_buffer + l, out_buffer, out_size ) )
+      {
+      std::printf( "sync error at pos %d. in_size = %d, out_size = %d\n",
+                   l, r - l, out_size );
+      for( int i = l; i < r; ++i ) std::putchar( in_buffer[i] );
+      if( in_buffer[r-1] != '\n' ) std::putchar( '\n' );
+      for( int i = 0; i < out_size; ++i ) std::putchar( out_buffer[i] );
+      std::putchar( '\n' );
+      }
+    }
+
+  LZ_decompress_close( decoder );
+  LZ_compress_close( encoder );
+  return 0;
+  }
diff --git a/lzip.h b/lzip.h
index cf8e56e..60329a8 100644
--- a/lzip.h
+++ b/lzip.h
@@ -277,5 +277,5 @@ public:
     }
 
   int read_data( uint8_t * const out_buffer, const int out_size ) throw();
-  int write_data( uint8_t * const in_buffer, const int in_size ) throw();
+  int write_data( const uint8_t * const in_buffer, const int in_size ) throw();
   };
diff --git a/lzlib.cc b/lzlib.cc
index 76212db..1eb191a 100644
--- a/lzlib.cc
+++ b/lzlib.cc
@@ -45,7 +45,7 @@ struct Encoder
   Matchfinder * matchfinder;
   LZ_encoder * lz_encoder;
   LZ_errno lz_errno;
-  bool flush_pending;
+  int flush_pending;
   const File_header member_header;
 
   Encoder( const File_header & header ) throw()
@@ -55,7 +55,7 @@ struct Encoder
     matchfinder( 0 ),
     lz_encoder( 0 ),
     lz_errno( LZ_ok ),
-    flush_pending( false ),
+    flush_pending( 0 ),
     member_header( header )
     {}
   };
@@ -180,7 +180,7 @@ int LZ_compress_finish( void * const encoder )
   if( !verify_encoder( encoder ) ) return -1;
   Encoder & e = *(Encoder *)encoder;
   e.matchfinder->flushing( true );
-  e.flush_pending = false;
+  e.flush_pending = 0;
   return 0;
   }
 
@@ -191,12 +191,12 @@ int LZ_compress_sync_flush( void * const encoder )
   Encoder & e = *(Encoder *)encoder;
   if( !e.flush_pending && !e.matchfinder->at_stream_end() )
     {
-    e.flush_pending = true;
+    e.flush_pending = 2;	// 2 consecutive markers guarantee decoding
     e.matchfinder->flushing( true );
     if( !e.lz_encoder->encode_member( false ) )
       { e.lz_errno = LZ_library_error; return -1; }
-    if( e.lz_encoder->sync_flush() )
-      { e.matchfinder->flushing( false ); e.flush_pending = false; }
+    while( e.flush_pending > 0 && e.lz_encoder->sync_flush() )
+      { if( --e.flush_pending <= 0 ) e.matchfinder->flushing( false ); }
     }
   return 0;
   }
@@ -209,13 +209,13 @@ int LZ_compress_read( void * const encoder, uint8_t * const buffer,
   Encoder & e = *(Encoder *)encoder;
   if( !e.lz_encoder->encode_member( !e.flush_pending ) )
     { e.lz_errno = LZ_library_error; return -1; }
-  if( e.flush_pending && e.lz_encoder->sync_flush() )
-    { e.matchfinder->flushing( false ); e.flush_pending = false; }
+  while( e.flush_pending > 0 && e.lz_encoder->sync_flush() )
+    { if( --e.flush_pending <= 0 ) e.matchfinder->flushing( false ); }
   return e.lz_encoder->read_data( buffer, size );
   }
 
 
-int LZ_compress_write( void * const encoder, uint8_t * const buffer,
+int LZ_compress_write( void * const encoder, const uint8_t * const buffer,
                        const int size )
   {
   if( !verify_encoder( encoder ) ) return -1;
@@ -370,7 +370,7 @@ int LZ_decompress_read( void * const decoder, uint8_t * const buffer,
   }
 
 
-int LZ_decompress_write( void * const decoder, uint8_t * const buffer,
+int LZ_decompress_write( void * const decoder, const uint8_t * const buffer,
                          const int size )
   {
   if( !verify_decoder( decoder ) ) return -1;
diff --git a/lzlib.h b/lzlib.h
index 7dc9aab..53818bb 100644
--- a/lzlib.h
+++ b/lzlib.h
@@ -29,7 +29,7 @@
 extern "C" {
 #endif
 
-const char * const LZ_version_string = "0.5";
+const char * const LZ_version_string = "0.6";
 
 enum { min_dictionary_bits = 12,
        min_dictionary_size = 1 << min_dictionary_bits,
@@ -54,7 +54,7 @@ int LZ_compress_sync_flush( void * const encoder );
 
 int LZ_compress_read( void * const encoder, uint8_t * const buffer,
                       const int size );
-int LZ_compress_write( void * const encoder, uint8_t * const buffer,
+int LZ_compress_write( void * const encoder, const uint8_t * const buffer,
                        const int size );
 int LZ_compress_write_size( void * const encoder );
 
@@ -74,7 +74,7 @@ int LZ_decompress_finish( void * const decoder );
 
 int LZ_decompress_read( void * const decoder, uint8_t * const buffer,
                         const int size );
-int LZ_decompress_write( void * const decoder, uint8_t * const buffer,
+int LZ_decompress_write( void * const decoder, const uint8_t * const buffer,
                          const int size );
 
 enum LZ_errno LZ_decompress_errno( void * const decoder );
diff --git a/main.cc b/main.cc
index 11ee21f..ef7da7b 100644
--- a/main.cc
+++ b/main.cc
@@ -26,6 +26,7 @@
 #include <algorithm>
 #include <cerrno>
 #include <climits>
+#include <csignal>
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
@@ -33,7 +34,6 @@
 #include <vector>
 #include <fcntl.h>
 #include <stdint.h>
-#include <signal.h>
 #include <unistd.h>
 #include <utime.h>
 #include <sys/stat.h>
@@ -462,6 +462,8 @@ int compress( const long long member_size, const long long volume_size,
       if( in_size == 0 ) LZ_compress_finish( encoder );
       else if( in_size != LZ_compress_write( encoder, in_buffer, in_size ) )
         internal_error( "library error" );
+//      for( int i = 0; i < 10000; ++i )
+//        LZ_compress_sync_flush( encoder );
       }
     int out_size = LZ_compress_read( encoder, out_buffer, out_buffer_size );
 //    std::fprintf( stderr, "%6d in_size, %5d out_size.\n", in_size, out_size );
diff --git a/testsuite/check.sh b/testsuite/check.sh
index 2ce2ca5..7b67c93 100755
--- a/testsuite/check.sh
+++ b/testsuite/check.sh
@@ -10,6 +10,7 @@ export LC_ALL
 objdir=`pwd`
 testdir=`cd "$1" ; pwd`
 LZIP="${objdir}"/minilzip
+LZCHECK="${objdir}"/lzcheck
 framework_failure() { echo 'failure in testing framework'; exit 1; }
 
 if [ ! -x "${LZIP}" ] ; then
@@ -59,10 +60,13 @@ for i in s4096 1 2 3 4 5 6 7 8 9; do
 	echo -n .
 done
 
+"${LZCHECK}" in || fail=1
+echo -n .
+
 echo
-if test ${fail} = 0; then
+if [ ${fail} = 0 ]; then
 	echo "tests completed successfully."
-	if cd "${objdir}" ; then rm -r tmp ; fi
+	cd "${objdir}" && rm -r tmp
 else
 	echo "tests failed."
 fi