17 files changed, 242 insertions, 147 deletions
diff --git a/ChangeLog b/ChangeLog
index daa565b..dfd0c86 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,14 @@
+2009-05-21  Antonio Diaz  <ant_diaz@teleline.es>
+
+	* Version 1.6-pre2 released.
+	* Decompression time has been reduced by 17%.
+	* Added support for .tbz extension to lzdiff and lzgrep.
+	* Added man pages for lzdiff, lzgrep and lziprecover.
+	* encoder.cc (Matchfinder): Reduce memory use to 9x if input file
+	  is smaller than dictionary size limit.
+	* testsuite/check.sh: Test lziprecover.
+	* Export LC_ALL=C in all scripts.
+
 2009-04-27  Antonio Diaz  <ant_diaz@teleline.es>
 
 	* Version 1.6-pre1 released.
@@ -18,6 +29,7 @@
 	* Lzdiff and lzgrep now accept "-h" for "--help" and
 	  "-V" for "--version".
 	* Makefile.in: Man page is now installed by default.
+	* testsuite/check.sh: Verify that files are open in binary mode.
 
 2009-01-24  Antonio Diaz  <ant_diaz@teleline.es>
 
diff --git a/Makefile.in b/Makefile.in
index 644f141..1b9d4fb 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -9,17 +9,17 @@ SHELL = /bin/sh
 objs = arg_parser.o decoder.o encoder.o main.o
 
 
-.PHONY : all doc check install install-info install-man install-extra \
+.PHONY : all install install-info install-man install-extra install-strip \
          uninstall uninstall-info uninstall-man uninstall-extra \
-         install-strip dist clean distclean
+         doc info man check dist clean distclean
 
 all : $(progname) lziprecover
 
 $(progname) : $(objs)
 	$(CXX) $(LDFLAGS) -o $(progname) $(objs)
 
-profiled_$(progname) : $(objs)
-	$(CXX) $(LDFLAGS) -pg -o profiled_$(progname) $(objs)
+$(progname)_profiled : $(objs)
+	$(CXX) $(LDFLAGS) -pg -o $(progname)_profiled $(objs)
 
 lziprecover : arg_parser.o lziprecover.o
 	$(CXX) $(LDFLAGS) -o lziprecover arg_parser.o lziprecover.o
@@ -40,16 +40,28 @@ encoder.o     : lzip.h encoder.h
 main.o        : arg_parser.h lzip.h decoder.h encoder.h
 lziprecover.o : arg_parser.h lzip.h Makefile
 
-doc : info $(VPATH)/doc/$(progname).1
+doc : info man
 
 info : $(VPATH)/doc/$(progname).info
 
 $(VPATH)/doc/$(progname).info : $(VPATH)/doc/$(progname).texinfo
 	cd $(VPATH)/doc && makeinfo $(progname).texinfo
 
+man : $(VPATH)/doc/$(progname).1 $(VPATH)/doc/lziprecover.1 \
+      $(VPATH)/doc/lzdiff.1 $(VPATH)/doc/lzgrep.1
+
 $(VPATH)/doc/$(progname).1 : $(progname)
 	help2man -o $(VPATH)/doc/$(progname).1 ./$(progname)
 
+$(VPATH)/doc/lziprecover.1 : lziprecover
+	help2man -o $(VPATH)/doc/lziprecover.1 --no-info ./lziprecover
+
+$(VPATH)/doc/lzdiff.1 : $(VPATH)/lzdiff
+	help2man -o $(VPATH)/doc/lzdiff.1 --no-info $(VPATH)/lzdiff
+
+$(VPATH)/doc/lzgrep.1 : $(VPATH)/lzgrep
+	help2man -o $(VPATH)/doc/lzgrep.1 --no-info $(VPATH)/lzgrep
+
 Makefile : $(VPATH)/configure $(VPATH)/Makefile.in
 	./config.status
 
@@ -118,7 +130,7 @@ dist :
 	lzip -v -9 $(DISTNAME).tar
 
 clean :
-	-rm -f $(progname) profiled_$(progname) $(objs)
+	-rm -f $(progname) $(progname)_profiled $(objs)
 	-rm -f lziprecover lziprecover.o
 
 distclean : clean
diff --git a/NEWS b/NEWS
index e052c16..86ccf02 100644
--- a/NEWS
+++ b/NEWS
@@ -1,5 +1,15 @@
 Changes in version 1.6:
 
+Decompression time has been reduced by 17%.
+
+Support for .tbz extension has been added to lzdiff and lzgrep.
+
+Man pages for lzdiff, lzgrep and lziprecover have been added to the
+distribution.
+
+Memory use has been reduced to 9x if the input file is smaller than
+dictionary size limit.
+
 Flush calls have been added to decompressor to allow partial recovery
 of the uncompressed data when decompressing a corrupt file.
 
diff --git a/configure b/configure
index 71ff5f3..c31a1d8 100755
--- a/configure
+++ b/configure
@@ -5,13 +5,13 @@
 # This configure script is free software: you have unlimited permission
 # to copy, distribute and modify it.
 #
-# Date of this version: 2009-04-27
+# Date of this version: 2009-05-21
 
 invocation_name=$0
 args=
 no_create=
 progname=lzip
-progversion=1.6-pre1
+progversion=1.6-pre2
 srctrigger=lzip.h
 
 # clear some things potentially inherited from environment.
diff --git a/decoder.cc b/decoder.cc
index 341740b..aeac2cd 100644
--- a/decoder.cc
+++ b/decoder.cc
@@ -47,6 +47,7 @@ void LZ_decoder::flush_data()
   {
   if( !member_finished )
     {
+    crc32.update( crc_, buffer, pos );
     if( odes_ >= 0 )
       {
       const int wr = writeblock( odes_, (char *)buffer, pos );
@@ -65,7 +66,9 @@ bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const
   const int trailer_size = trailer.size( format_version );
   for( int i = 0; i < trailer_size && !error; ++i )
     {
-    if( range_decoder.finished() )
+    if( !range_decoder.finished() )
+      ((uint8_t *)&trailer)[i] = range_decoder.get_byte();
+    else
       {
       error = true;
       if( verbosity >= 0 )
@@ -75,7 +78,6 @@ bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const
                               " some checks may fail.\n", i );
         }
       }
-    ((uint8_t *)&trailer)[i] = range_decoder.read_byte();
     }
   if( format_version == 0 ) trailer.member_size( member_position() );
   if( trailer.data_crc() != crc() )
@@ -195,23 +197,22 @@ int LZ_decoder::decode_member( const Pretty_print & pp )
             rep0 += range_decoder.decode_tree_reversed( bm_align, dis_align_bits );
             if( rep0 == 0xFFFFFFFF )		// Marker found
               {
+              range_decoder.normalize();
+              flush_data();
               if( len == min_match_len )	// End Of Stream marker
-                {
-                flush_data();
-                if( verify_trailer( pp ) ) return 0; else return 3;
-                }
+                { if( verify_trailer( pp ) ) return 0; else return 3; }
               if( verbosity >= 0 )
                 {
                 pp();
                 std::fprintf( stderr, "unsupported marker code `%d'.\n", len );
                 }
-              flush_data();
               return 4;
               }
+            if( rep0 >= (unsigned int)buffer_size ) { flush_data(); return 1; }
             }
           }
         }
-      if( !copy_block( rep0, len ) ) { flush_data(); return 1; }
+      copy_block( rep0, len );
       prev_byte = get_byte( 0 );
       }
     }
diff --git a/decoder.h b/decoder.h
index ab921dd..aaf85a6 100644
--- a/decoder.h
+++ b/decoder.h
@@ -40,7 +40,7 @@ public:
 
   bool finished() const throw() { return at_stream_end && pos >= stream_pos; }
 
-  uint8_t read_byte()
+  uint8_t get_byte()
     {
     if( pos >= stream_pos && !read_block() ) return 0;
     return buffer[pos++];
@@ -62,59 +62,68 @@ public:
     code( 0 ),
     range( 0xFFFFFFFF ),
     ibuf( buf )
-    { for( int i = 0; i < 5; ++i ) code = (code << 8) | read_byte(); }
+    { for( int i = 0; i < 5; ++i ) code = (code << 8) | get_byte(); }
 
-  uint8_t read_byte() const
+  bool finished() const throw() { return ibuf.finished(); }
+  long long member_position() const throw() { return member_pos; }
+
+  uint8_t get_byte() const
     {
     ++member_pos;
-    return ibuf.read_byte();
+    return ibuf.get_byte();
     }
 
-  long long member_position() const throw() { return member_pos; }
-  bool finished() const throw() { return ibuf.finished(); }
+  void normalize()
+    {
+    if( range <= 0x00FFFFFF )
+      { range <<= 8; code = (code << 8) | get_byte(); }
+    }
 
   int decode( const int num_bits )
     {
     int symbol = 0;
-    for( int i = num_bits - 1; i >= 0; --i )
+    for( int i = num_bits; i > 0; --i )
       {
-      range >>= 1;
       symbol <<= 1;
-      if( code >= range )
-        { code -= range; symbol |= 1; }
       if( range <= 0x00FFFFFF )
-        { range <<= 8; code = (code << 8) | read_byte(); }
+        {
+        range <<= 7; code = (code << 8) | get_byte();
+        if( code >= range ) { code -= range; symbol |= 1; }
+        }
+      else
+        {
+        range >>= 1;
+        if( code >= range ) { code -= range; symbol |= 1; }
+        }
       }
     return symbol;
     }
 
   int decode_bit( Bit_model & bm )
     {
-    int symbol;
+    if( range <= 0x00FFFFFF )
+      { range <<= 8; code = (code << 8) | get_byte(); }
     const uint32_t bound = ( range >> bit_model_total_bits ) * bm.probability;
     if( code < bound )
       {
       range = bound;
       bm.probability += (bit_model_total - bm.probability) >> bit_model_move_bits;
-      symbol = 0;
+      return 0;
       }
     else
       {
       range -= bound;
       code -= bound;
       bm.probability -= bm.probability >> bit_model_move_bits;
-      symbol = 1;
+      return 1;
       }
-    if( range <= 0x00FFFFFF )
-      { range <<= 8; code = (code << 8) | read_byte(); }
-    return symbol;
     }
 
   int decode_tree( Bit_model bm[], const int num_bits )
     {
     int model = 1;
     for( int i = num_bits; i > 0; --i )
-      model = ( model << 1 ) | decode_bit( bm[model-1] );
+      model = ( model << 1 ) | decode_bit( bm[model] );
     return model - (1 << num_bits);
     }
 
@@ -122,27 +131,31 @@ public:
     {
     int model = 1;
     int symbol = 0;
-    for( int i = 1; i < (1 << num_bits); i <<= 1 )
+    for( int i = 0; i < num_bits; ++i )
       {
-      const int bit = decode_bit( bm[model-1] );
-      model = ( model << 1 ) | bit;
-      if( bit ) symbol |= i;
+      const int bit = decode_bit( bm[model] );
+      model <<= 1;
+      if( bit ) { model |= 1; symbol |= (1 << i); }
       }
     return symbol;
     }
 
   int decode_matched( Bit_model bm[], const int match_byte )
     {
+    Bit_model *bm1 = bm + 0x100;
     int symbol = 1;
-    for( int i = 7; i >= 0; --i )
+    for( int i = 1; i <= 8; ++i )
       {
-      const int match_bit = ( match_byte >> i ) & 1;
-      const int bit = decode_bit( bm[(match_bit<<8)+symbol+0xFF] );
+      const int match_bit = ( match_byte << i ) & 0x100;
+      const int bit = decode_bit( bm1[match_bit+symbol] );
       symbol = ( symbol << 1 ) | bit;
-      if( match_bit != bit ) break;
+      if( ( match_bit && !bit ) || ( !match_bit && bit ) )
+        {
+        while( ++i <= 8 )
+          symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
+        break;
+        }
       }
-    while( symbol < 0x100 )
-      symbol = ( symbol << 1 ) | decode_bit( bm[symbol-1] );
     return symbol & 0xFF;
     }
   };
@@ -215,32 +228,32 @@ class LZ_decoder
 
   uint8_t get_byte( const int distance ) const throw()
     {
-    int newpos = pos - distance - 1;
-    if( newpos < 0 ) newpos += buffer_size;
-    return buffer[newpos];
+    int i = pos - distance - 1;
+    if( i < 0 ) i += buffer_size;
+    return buffer[i];
     }
 
   void put_byte( const uint8_t b )
     {
-    crc32.update( crc_, b );
     buffer[pos] = b;
     if( ++pos >= buffer_size ) flush_data();
     }
 
-  bool copy_block( const int distance, int len )
+  void copy_block( const int distance, int len )
     {
-    if( distance < 0 || distance >= buffer_size ||
-        len <= 0 || len > max_match_len ) return false;
-    int newpos = pos - distance - 1;
-    if( newpos < 0 ) newpos += buffer_size;
-    for( ; len > 0 ; --len )
+    int i = pos - distance - 1;
+    if( i < 0 ) i += buffer_size;
+    if( len < buffer_size - std::max( pos, i ) && len <= distance )
+      {
+      std::memcpy( buffer + pos, buffer + i, len );
+      pos += len;
+      }
+    else for( ; len > 0 ; --len )
       {
-      crc32.update( crc_, buffer[newpos] );
-      buffer[pos] = buffer[newpos];
+      buffer[pos] = buffer[i];
       if( ++pos >= buffer_size ) flush_data();
-      if( ++newpos >= buffer_size ) newpos = 0;
+      if( ++i >= buffer_size ) i = 0;
       }
-    return true;
     }
 
   void flush_data();
diff --git a/doc/lzip.1 b/doc/lzip.1
index a1a7591..bcd4c5a 100644
--- a/doc/lzip.1
+++ b/doc/lzip.1
@@ -1,7 +1,7 @@
 .\" DO NOT MODIFY THIS FILE!  It was generated by help2man 1.36.
-.TH LZIP "1" "April 2009" "Lzip 1.6-pre1" "User Commands"
+.TH LZIP "1" "May 2009" "Lzip 1.6-pre2" "User Commands"
 .SH NAME
-Lzip \- manual page for Lzip 1.6-pre1
+Lzip \- manual page for Lzip 1.6-pre2
 .SH SYNOPSIS
 .B lzip
 [\fIoptions\fR] [\fIfiles\fR]
diff --git a/doc/lzip.info b/doc/lzip.info
index d40f84f..e1e5029 100644
--- a/doc/lzip.info
+++ b/doc/lzip.info
@@ -11,7 +11,7 @@ File: lzip.info,  Node: Top,  Next: Introduction,  Up: (dir)
 Lzip
 ****
 
-This manual is for Lzip (version 1.6-pre1, 27 April 2009).
+This manual is for Lzip (version 1.6-pre2, 21 May 2009).
 
 * Menu:
 
@@ -67,11 +67,12 @@ also split the compressed output in volumes of a given size, even when
 reading from standard input. This allows the direct creation of
 multivolume compressed tar archives.
 
-   The amount of memory required for compression is about 2 times the
-dictionary size limit plus 8 times the dictionary size really used. For
-decompression is a little more than the dictionary size really used.
-Lzip will automatically use the smallest possible dictionary size for
-each member without exceeding the given limit. It is important to
+   The amount of memory required for compression is about 5 MiB plus 1
+or 2 times the dictionary size limit (1 if input file size is less than
+dictionary size limit, else 2) plus 8 times the dictionary size really
+used. For decompression is a little more than the dictionary size really
+used. Lzip will automatically use the smallest possible dictionary size
+for each member without exceeding the given limit. It is important to
 appreciate that the decompression memory requirement is affected at
 compression time by the choice of dictionary size limit.
 
@@ -558,15 +559,15 @@ Concept Index
 
 Tag Table:
 Node: Top224
-Node: Introduction967
-Node: Algorithm4208
-Node: Invoking Lzip6434
-Node: File Format10781
-Node: Examples12735
-Node: Lzdiff14568
-Node: Lzgrep15887
-Node: Lziprecover16922
-Node: Problems17619
-Node: Concept Index18144
+Node: Introduction965
+Node: Algorithm4288
+Node: Invoking Lzip6514
+Node: File Format10861
+Node: Examples12815
+Node: Lzdiff14648
+Node: Lzgrep15967
+Node: Lziprecover17002
+Node: Problems17699
+Node: Concept Index18224
 
 End Tag Table
diff --git a/doc/lzip.texinfo b/doc/lzip.texinfo
index f29b29e..87e223f 100644
--- a/doc/lzip.texinfo
+++ b/doc/lzip.texinfo
@@ -5,8 +5,8 @@
 @finalout
 @c %**end of header
 
-@set UPDATED 27 April 2009
-@set VERSION 1.6-pre1
+@set UPDATED 21 May 2009
+@set VERSION 1.6-pre2
 
 @dircategory Data Compression
 @direntry
@@ -84,11 +84,12 @@ compressed output in volumes of a given size, even when reading from
 standard input. This allows the direct creation of multivolume
 compressed tar archives.
 
-The amount of memory required for compression is about 2 times the
-dictionary size limit plus 8 times the dictionary size really used. For
-decompression is a little more than the dictionary size really used.
-Lzip will automatically use the smallest possible dictionary size for
-each member without exceeding the given limit. It is important to
+The amount of memory required for compression is about 5 MiB plus 1 or 2
+times the dictionary size limit (1 if input file size is less than
+dictionary size limit, else 2) plus 8 times the dictionary size really
+used. For decompression is a little more than the dictionary size really
+used. Lzip will automatically use the smallest possible dictionary size
+for each member without exceeding the given limit. It is important to
 appreciate that the decompression memory requirement is affected at
 compression time by the choice of dictionary size limit.
 
diff --git a/encoder.cc b/encoder.cc
index f1a033b..291f1d1 100644
--- a/encoder.cc
+++ b/encoder.cc
@@ -43,6 +43,41 @@ bool Matchfinder::read_block() throw()
   }
 
 
+Matchfinder::Matchfinder( const int dict_size, const int len_limit,
+                          const int ides )
+  :
+  partial_data_pos( 0 ),
+  pos( 0 ),
+  cyclic_pos( 0 ),
+  stream_pos( 0 ),
+  ides_( ides ),
+  match_len_limit_( len_limit ),
+  prev_positions( new int32_t[num_prev_positions] ),
+  at_stream_end( false )
+  {
+  const int buffer_size_limit = ( 2 * dict_size ) +
+                                max_num_trials + max_match_len;
+  buffer_size = std::max( 65536, dict_size );
+  buffer = (uint8_t *)std::malloc( buffer_size );
+  if( !buffer ) throw std::bad_alloc();
+  if( !read_block() ) throw Error( "read error" );
+  if( !at_stream_end && buffer_size < buffer_size_limit )
+    {
+    buffer_size = buffer_size_limit;
+    buffer = (uint8_t *)std::realloc( buffer, buffer_size );
+    if( !buffer ) throw std::bad_alloc();
+    if( !read_block() ) throw Error( "read error" );
+    }
+  if( at_stream_end && stream_pos < dict_size )
+    dictionary_size_ = std::max( min_dictionary_size, stream_pos );
+  else dictionary_size_ = dict_size;
+  pos_limit = buffer_size;
+  if( !at_stream_end ) pos_limit -= max_match_len;
+  prev_pos_tree = new int32_t[2*dictionary_size_];
+  for( int i = 0; i < num_prev_positions; ++i ) prev_positions[i] = -1;
+  }
+
+
 bool Matchfinder::reset() throw()
   {
   const int size = stream_pos - pos;
@@ -433,7 +468,7 @@ void LZ_encoder::flush( const State & state )
   trailer.data_size( matchfinder.data_position() );
   trailer.member_size( range_encoder.member_position() + sizeof trailer );
   for( unsigned int i = 0; i < sizeof trailer; ++i )
-    range_encoder.put_byte( (( uint8_t *)&trailer)[i] );
+    range_encoder.put_byte( ((uint8_t *)&trailer)[i] );
   range_encoder.flush_data();
   }
 
@@ -453,7 +488,7 @@ LZ_encoder::LZ_encoder( Matchfinder & mf, const File_header & header,
   fill_align_prices();
 
   for( unsigned int i = 0; i < sizeof header; ++i )
-    range_encoder.put_byte( (( uint8_t *)&header)[i] );
+    range_encoder.put_byte( ((uint8_t *)&header)[i] );
   }
 
 
diff --git a/encoder.h b/encoder.h
index 59cebb8..41daa48 100644
--- a/encoder.h
+++ b/encoder.h
@@ -86,7 +86,7 @@ inline int price_symbol( const Bit_model bm[], int symbol, const int num_bits )
     {
     const int bit = symbol & 1;
     symbol >>= 1;
-    price += price_bit( bm[symbol-1], bit );
+    price += price_bit( bm[symbol], bit );
     }
   return price;
   }
@@ -100,7 +100,7 @@ inline int price_symbol_reversed( const Bit_model bm[], int symbol,
     {
     const int bit = symbol & 1;
     symbol >>= 1;
-    price += price_bit( bm[model-1], bit );
+    price += price_bit( bm[model], bit );
     model = ( model << 1 ) | bit;
     }
   return price;
@@ -116,14 +116,14 @@ inline int price_matched( const Bit_model bm[], const int symbol,
     {
     const int match_bit = ( match_byte >> i ) & 1;
     const int bit = ( symbol >> i ) & 1;
-    price += price_bit( bm[(match_bit<<8)+model+0xFF], bit );
+    price += price_bit( bm[(match_bit<<8)+model+0x100], bit );
     model = ( model << 1 ) | bit;
     if( match_bit != bit )
       {
       while( --i >= 0 )
         {
         const int bit = ( symbol >> i ) & 1;
-        price += price_bit( bm[model-1], bit );
+        price += price_bit( bm[model], bit );
         model = ( model << 1 ) | bit;
         }
       break;
@@ -143,13 +143,12 @@ class Matchfinder
 
   long long partial_data_pos;
   int dictionary_size_;		// bytes to keep in buffer before pos
-  const int after_size;		// bytes to keep in buffer after pos
-  const int buffer_size;
-  uint8_t * const buffer;
+  int buffer_size;
+  uint8_t * buffer;
   int pos;
   int cyclic_pos;
   int stream_pos;		// first byte not yet read from file
-  const int pos_limit;		// when reached, a new block must be read
+  int pos_limit;		// when reached, a new block must be read
   const int ides_;
   const int match_len_limit_;
   int32_t * const prev_positions;	// last seen position of key
@@ -159,32 +158,10 @@ class Matchfinder
   bool read_block() throw();
 
 public:
-  Matchfinder( const int dict_size, const int len_limit, const int ides )
-    :
-    partial_data_pos( 0 ),
-    dictionary_size_( dict_size ),
-    after_size( max_match_len ),
-    buffer_size( ( 2 * std::max( 65536, dictionary_size_ ) ) +
-                 max_num_trials + after_size ),
-    buffer( new uint8_t[buffer_size] ),
-    pos( 0 ),
-    cyclic_pos( 0 ),
-    stream_pos( 0 ),
-    pos_limit( buffer_size - after_size ),
-    ides_( ides ),
-    match_len_limit_( len_limit ),
-    prev_positions( new int32_t[num_prev_positions] ),
-    at_stream_end( false )
-    {
-    if( !read_block() ) throw Error( "read error" );
-    if( at_stream_end && stream_pos < dictionary_size_ )
-      dictionary_size_ = std::max( min_dictionary_size, stream_pos );
-    prev_pos_tree = new int32_t[2*dictionary_size_];
-    for( int i = 0; i < num_prev_positions; ++i ) prev_positions[i] = -1;
-    }
+  Matchfinder( const int dict_size, const int len_limit, const int ides );
 
   ~Matchfinder()
-    { delete[] prev_pos_tree; delete[] prev_positions; delete[] buffer; }
+    { delete[] prev_pos_tree; delete[] prev_positions; std::free( buffer ); }
 
   uint8_t operator[]( const int i ) const throw() { return buffer[pos+i]; }
   int available_bytes() const throw() { return stream_pos - pos; }
@@ -318,7 +295,7 @@ public:
     for( int i = num_bits; i > 0; --i, mask >>= 1 )
       {
       const int bit = ( symbol & mask );
-      encode_bit( bm[model-1], bit );
+      encode_bit( bm[model], bit );
       model <<= 1;
       if( bit ) model |= 1;
       }
@@ -330,7 +307,7 @@ public:
     for( int i = num_bits; i > 0; --i )
       {
       const int bit = symbol & 1;
-      encode_bit( bm[model-1], bit );
+      encode_bit( bm[model], bit );
       model = ( model << 1 ) | bit;
       symbol >>= 1;
       }
@@ -343,14 +320,14 @@ public:
       {
       const int bit = ( symbol >> i ) & 1;
       const int match_bit = ( match_byte >> i ) & 1;
-      encode_bit( bm[(match_bit<<8)+model+0xFF], bit );
+      encode_bit( bm[(match_bit<<8)+model+0x100], bit );
       model = ( model << 1 ) | bit;
       if( match_bit != bit )
         {
         while( --i >= 0 )
           {
           const int bit = ( symbol >> i ) & 1;
-          encode_bit( bm[model-1], bit );
+          encode_bit( bm[model], bit );
           model = ( model << 1 ) | bit;
           }
         break;
diff --git a/lzdiff b/lzdiff
index f289a95..c3c605d 100755
--- a/lzdiff
+++ b/lzdiff
@@ -5,6 +5,8 @@
 # This script is free software: you have unlimited permission
 # to copy, distribute and modify it.
 
+LC_ALL=C
+export LC_ALL
 invocation_name=$0
 args=
 default_ext=.lz
@@ -39,9 +41,12 @@ while [ x"$1" != x ] ; do
 		echo "      --lzip          use lzip as default decompressor (default)"
 		echo "      --diff          use diff to compare files (default)"
 		echo "      --cmp           use cmp to compare files"
+		echo
+		echo "Report bugs to lzip-bug@nongnu.org"
+		echo "Lzip home page: http://www.nongnu.org/lzip/lzip.html"
 		exit 0 ;;
 	--version | --ve* | -V)
-		echo "Lzdiff 0.3"
+		echo "Lzdiff 0.4"
 		echo "Copyright (C) 2009 Antonio Diaz Diaz."
 		echo "This script is free software: you have unlimited permission"
 		echo "to copy, distribute and modify it."
@@ -93,12 +98,14 @@ if test -z "${file2}"; then
 		file2=`echo "${file1}" | sed 's/tgz$/tar/'` ;;
 	*.bz2)
 		file2=`echo "${file1}" | sed 's/.bz2$//'` ;;
+	*.tbz)
+		file2=`echo "${file1}" | sed 's/tbz$/tar/'` ;;
 	*.tbz2)
 		file2=`echo "${file1}" | sed 's/tbz2$/tar/'` ;;
 	*.lz)
 		file2=`echo "${file1}" | sed 's/.lz$//'` ;;
 	*.tlz)
-		file2=`echo "${file1}" | sed 's/.tlz$/tar/'` ;;
+		file2=`echo "${file1}" | sed 's/tlz$/tar/'` ;;
 	*)
 		file2="${file1}"; file1="${file1}${default_ext}" ;;
 	esac
@@ -107,14 +114,14 @@ fi
 prog1=
 prog2=
 case "${file1}" in
-*.gz | *.tgz)	prog1=gzip ;;
-*.bz2 | *.tbz2)	prog1=bzip2 ;;
-*.lz | *.tlz)	prog1=lzip ;;
+*.gz | *.tgz)		prog1=gzip ;;
+*.bz2 | *.tbz | *.tbz2)	prog1=bzip2 ;;
+*.lz | *.tlz)		prog1=lzip ;;
 esac
 case "${file2}" in
-*.gz | *.tgz)	prog2=gzip ;;
-*.bz2 | *.tbz2)	prog2=bzip2 ;;
-*.lz | *.tlz)	prog2=lzip ;;
+*.gz | *.tgz)		prog2=gzip ;;
+*.bz2 | *.tbz | *.tbz2)	prog2=bzip2 ;;
+*.lz | *.tlz)		prog2=lzip ;;
 esac
 
 retval=0
diff --git a/lzgrep b/lzgrep
index 9d9eaeb..0991f58 100755
--- a/lzgrep
+++ b/lzgrep
@@ -5,6 +5,8 @@
 # This script is free software: you have unlimited permission
 # to copy, distribute and modify it.
 
+LC_ALL=C
+export LC_ALL
 invocation_name=$0
 args=
 default_prog=lzip
@@ -37,9 +39,12 @@ while [ x"$1" != x ] ; do
 		echo "      --gzip          use gzip as default decompressor"
 		echo "      --bzip2         use bzip2 as default decompressor"
 		echo "      --lzip          use lzip as default decompressor (default)"
+		echo
+		echo "Report bugs to lzip-bug@nongnu.org"
+		echo "Lzip home page: http://www.nongnu.org/lzip/lzip.html"
 		exit 0 ;;
 	--version | --ve* | -V)
-		echo "Lzgrep 0.3"
+		echo "Lzgrep 0.4"
 		echo "Copyright (C) 2009 Antonio Diaz Diaz."
 		echo "This script is free software: you have unlimited permission"
 		echo "to copy, distribute and modify it."
@@ -93,7 +98,7 @@ for i in "$@" ; do
 	case "$i" in
 	*.gz | *.tgz)
 		prog="gzip -cdfq" ;;
-	*.bz2 | *.tbz2)
+	*.bz2 | *.tbz | *.tbz2)
 		prog="bzip2 -cdfq" ;;
 	*.lz | *.tlz)
 		prog="lzip -cdfq" ;;
diff --git a/lzip.h b/lzip.h
index 89afe96..a50941f 100644
--- a/lzip.h
+++ b/lzip.h
@@ -150,14 +150,19 @@ public:
   uint32_t operator[]( const uint8_t byte ) const throw() { return data[byte]; }
   void update( uint32_t & crc, const uint8_t byte ) const throw()
     { crc = data[(crc^byte)&0xFF] ^ ( crc >> 8 ); }
+  void update( uint32_t & crc, const uint8_t * const buffer, const int size ) const throw()
+    {
+    for( int i = 0; i < size; ++i )
+      crc = data[(crc^buffer[i])&0xFF] ^ ( crc >> 8 );
+    }
   };
 
 
-const char * const magic_string = "LZIP";
+const uint8_t magic_string[4] = { 'L', 'Z', 'I', 'P' };
 
 struct File_header
   {
-  char magic[4];
+  uint8_t magic[4];
   uint8_t version;
   uint8_t coded_dict_size;
 
diff --git a/lziprecover.cc b/lziprecover.cc
index bca2744..deed53c 100644
--- a/lziprecover.cc
+++ b/lziprecover.cc
@@ -49,6 +49,9 @@ const char * const program_year    = "2009";
 void show_help() throw()
   {
   std::printf( "%s - Member recoverer program for lzip compressed files.\n", Program_name );
+  std::printf( "\nSearches for members in .lz files, and writes each member in its own .lz\n" );
+  std::printf( "file. You can then use `lzip -t' to test the integrity of the resulting\n" );
+  std::printf( "files, and decompress those which are undamaged.\n" );
   std::printf( "\nUsage: %s [options] file\n", invocation_name );
   std::printf( "Options:\n" );
   std::printf( "  -h, --help                 display this help and exit\n" );
@@ -56,6 +59,7 @@ void show_help() throw()
   std::printf( "  -q, --quiet                suppress all messages\n" );
   std::printf( "  -v, --verbose              be verbose (a 2nd -v gives more)\n" );
   std::printf( "\nReport bugs to lzip-bug@nongnu.org\n");
+  std::printf( "Lzip home page: http://www.nongnu.org/lzip/lzip.html\n" );
   }
 
 
@@ -119,7 +123,7 @@ bool next_filename( std::string & output_filename )
   }
 
 
-int search_header( const char * buffer, const int size, const int pos,
+int search_header( const uint8_t * buffer, const int size, const int pos,
                    const long long last_header_pos,
                    const long long partial_file_pos )
   {
@@ -129,7 +133,7 @@ int search_header( const char * buffer, const int size, const int pos,
       {
       File_trailer trailer;
       for( unsigned int j = 0; j < sizeof trailer; ++j )
-        ((char *)&trailer)[j] = buffer[i-(sizeof trailer)+j];
+        ((uint8_t *)&trailer)[j] = buffer[i-(sizeof trailer)+j];
       if( partial_file_pos + i - trailer.member_size() == last_header_pos )
         return i;
       }
@@ -137,11 +141,11 @@ int search_header( const char * buffer, const int size, const int pos,
   }
 
 
-bool verify_header( const char * buffer, const int pos )
+bool verify_header( const uint8_t * buffer, const int pos )
   {
   File_header header;
   for( unsigned int i = 0; i < sizeof header; ++i )
-    ((char *)&header)[i] = buffer[pos+i];
+    ((uint8_t *)&header)[i] = buffer[pos+i];
   if( !header.verify_magic() )
     {
     show_error( "bad magic number (file not created by lzip).\n" );
@@ -163,18 +167,18 @@ bool verify_header( const char * buffer, const int pos )
   }
 
 
-int process_file( const std::string & input_filename, char * & base_buffer )
+int process_file( const std::string & input_filename, uint8_t * & base_buffer )
   {
   const int hsize = sizeof( File_header );
   const int tsize = sizeof( File_trailer );
   const int buffer_size = 65536;
   const int base_buffer_size = tsize + buffer_size + hsize;
-  base_buffer = new char[base_buffer_size];
-  char * const buffer = base_buffer + tsize;
+  base_buffer = new uint8_t[base_buffer_size];
+  uint8_t * const buffer = base_buffer + tsize;
 
   const int inhandle = open_instream( input_filename );
   if( inhandle < 0 ) return 1;
-  int size = readblock( inhandle, buffer, buffer_size + hsize ) - hsize;
+  int size = readblock( inhandle, (char *)buffer, buffer_size + hsize ) - hsize;
   bool at_stream_end = ( size < buffer_size );
   if( size != buffer_size && errno )
     { show_error( "read error", errno ); return 1; }
@@ -195,7 +199,7 @@ int process_file( const std::string & input_filename, char * & base_buffer )
                                       last_header_pos, partial_file_pos );
     if( newpos > pos )
       {
-      const int wr = writeblock( outhandle, buffer + pos, newpos - pos );
+      const int wr = writeblock( outhandle, (char *)buffer + pos, newpos - pos );
       if( wr != newpos - pos )
         { show_error( "write error", errno ); return 1; }
       if( close( outhandle ) != 0 )
@@ -213,7 +217,7 @@ int process_file( const std::string & input_filename, char * & base_buffer )
       if( !at_stream_end )
         {
         partial_file_pos += buffer_size;
-        const int wr = writeblock( outhandle, buffer + pos, buffer_size - pos );
+        const int wr = writeblock( outhandle, (char *)buffer + pos, buffer_size - pos );
         if( wr != buffer_size - pos )
           { show_error( "write error", errno ); return 1; }
         std::memcpy( base_buffer, base_buffer + buffer_size, tsize + hsize );
@@ -221,13 +225,13 @@ int process_file( const std::string & input_filename, char * & base_buffer )
         }
       else
         {
-        const int wr = writeblock( outhandle, buffer + pos, size + hsize - pos );
+        const int wr = writeblock( outhandle, (char *)buffer + pos, size + hsize - pos );
         if( wr != size + hsize - pos )
           { show_error( "write error", errno ); return 1; }
         break;
         }
       }
-    size = readblock( inhandle, buffer + hsize, buffer_size );
+    size = readblock( inhandle, (char *)buffer + hsize, buffer_size );
     at_stream_end = ( size < buffer_size );
     if( size != buffer_size && errno )
       { show_error( "read error", errno ); return 1; }
@@ -339,7 +343,7 @@ int main( const int argc, const char * argv[] )
   if( argind + 1 != parser.arguments() )
     { show_error( "you must specify exactly 1 file", 0, true ); return 1; }
 
-  char * base_buffer;
+  uint8_t * base_buffer;
   const int retval = process_file( parser.argument( argind ), base_buffer );
 
   delete[] base_buffer;
diff --git a/main.cc b/main.cc
index fa91d9d..87d186a 100644
--- a/main.cc
+++ b/main.cc
@@ -473,7 +473,7 @@ int decompress( const int inhandle, const Pretty_print & pp,
       {
       File_header header;
       for( unsigned int i = 0; i < sizeof header; ++i )
-        ((uint8_t *)&header)[i] = ibuf.read_byte();
+        ((uint8_t *)&header)[i] = ibuf.get_byte();
       if( ibuf.finished() )
         {
         if( first_pass ) { pp( "error reading member header" ); return 1; }
diff --git a/testsuite/check.sh b/testsuite/check.sh
index 83ebc6f..f871f28 100755
--- a/testsuite/check.sh
+++ b/testsuite/check.sh
@@ -5,9 +5,12 @@
 # This script is free software: you have unlimited permission
 # to copy, distribute and modify it.
 
+LC_ALL=C
+export LC_ALL
 objdir=`pwd`
 testdir=`cd "$1" ; pwd`
 LZIP="${objdir}"/lzip
+LZIPRECOVER="${objdir}"/lziprecover
 framework_failure() { echo 'failure in testing framework'; exit 1; }
 
 if [ ! -x "${LZIP}" ] ; then
@@ -57,6 +60,15 @@ for i in 1 2 3 4 5 6 7 8 9; do
 	echo -n .
 done
 
+"${LZIP}" -c in in in > out || fail=1
+echo -n "garbage" >> out || fail=1
+"${LZIPRECOVER}" out || fail=1
+for i in 1 2 3; do
+	"${LZIP}" -cd rec0000${i}out > copy || fail=1
+	cmp in copy || fail=1
+	echo -n .
+done
+
 echo
 if test ${fail} = 0; then
 	echo "tests completed successfully."