21 files changed, 1013 insertions, 825 deletions
diff --git a/ChangeLog b/ChangeLog
index b8264e5..0f365bb 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,15 @@
+2013-02-18  Antonio Diaz Diaz  <ant_diaz@teleline.es>
+
+	* Version 1.4 released.
+	* Multi-step trials have been implemented.
+	* Compression ratio has been slightly increased.
+	* Compression time has been reduced by 10%.
+	* Decompression time has been reduced by 8%.
+	* Makefile.in: Added new target 'install-as-lzip'.
+	* Makefile.in: Added new target 'install-bin'.
+	* main.c: Use 'setmode' instead of '_setmode' on Windows and OS/2.
+	* main.c: Define 'strtoull' to 'strtoul' on Windows.
+
 2012-02-25  Antonio Diaz Diaz  <ant_diaz@teleline.es>
 
 	* Version 1.3 released.
@@ -45,7 +57,7 @@
 	* Translated to C from the C++ source of lzip 1.10.
 
 
-Copyright (C) 2010, 2011, 2012 Antonio Diaz Diaz.
+Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
 
 This file is a collection of facts, and thus it is not copyrightable,
 but just in case, you have unlimited permission to copy, distribute and
diff --git a/INSTALL b/INSTALL
index a0aad32..4466443 100644
--- a/INSTALL
+++ b/INSTALL
@@ -1,7 +1,7 @@
 Requirements
 ------------
 You will need a C compiler.
-I use gcc 4.3.5 and 3.3.6, but the code should compile with any
+I use gcc 4.7.2 and 3.3.6, but the code should compile with any
 standards compliant compiler.
 Gcc is available at http://gcc.gnu.org.
 
@@ -32,6 +32,13 @@ the main archive.
 5. Type 'make install' to install the program and any data files and
    documentation.
 
+   You can install only the program, the info manual or the man page
+   typing 'make install-bin', 'make install-info' or 'make install-man'
+   respectively.
+
+5a. Type 'make install-as-lzip' to install the program and any data
+    files and documentation, and link the program to the name 'lzip'.
+
 
 Another way
 -----------
@@ -50,7 +57,7 @@ After running 'configure', you can run 'make' and 'make install' as
 explained above.
 
 
-Copyright (C) 2010, 2011, 2012 Antonio Diaz Diaz.
+Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
 
 This file is free documentation: you have unlimited permission to copy,
 distribute and modify it.
diff --git a/Makefile.in b/Makefile.in
index 31524a2..a27a481 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -6,11 +6,11 @@ INSTALL_DATA = $(INSTALL) -p -m 644
 INSTALL_DIR = $(INSTALL) -d -m 755
 SHELL = /bin/sh
 
-objs = carg_parser.o decoder.o encoder.o main.o
+objs = carg_parser.o encoder.o decoder.o main.o
 
 
-.PHONY : all install install-info install-man install-strip \
-         uninstall uninstall-info uninstall-man \
+.PHONY : all install install-bin install-info install-man install-strip \
+         install-as-lzip uninstall uninstall-bin uninstall-info uninstall-man \
          doc info man check dist clean distclean
 
 all : $(progname)
@@ -53,14 +53,16 @@ Makefile : $(VPATH)/configure $(VPATH)/Makefile.in
 check : all
 	@$(VPATH)/testsuite/check.sh $(VPATH)/testsuite $(pkgversion)
 
-install : all install-info install-man
+install : install-bin install-info install-man
+
+install-bin : all
 	if [ ! -d "$(DESTDIR)$(bindir)" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(bindir)" ; fi
 	$(INSTALL_PROGRAM) ./$(progname) "$(DESTDIR)$(bindir)/$(progname)"
 
 install-info :
 	if [ ! -d "$(DESTDIR)$(infodir)" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(infodir)" ; fi
 	$(INSTALL_DATA) $(VPATH)/doc/$(pkgname).info "$(DESTDIR)$(infodir)/$(pkgname).info"
-	-install-info --info-dir="$(DESTDIR)$(infodir)" $(DESTDIR)$(infodir)/$(pkgname).info
+	-install-info --info-dir="$(DESTDIR)$(infodir)" "$(DESTDIR)$(infodir)/$(pkgname).info"
 
 install-man :
 	if [ ! -d "$(DESTDIR)$(mandir)/man1" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(mandir)/man1" ; fi
@@ -69,7 +71,13 @@ install-man :
 install-strip : all
 	$(MAKE) INSTALL_PROGRAM='$(INSTALL_PROGRAM) -s' install
 
-uninstall : uninstall-info uninstall-man
+install-as-lzip : install
+	-rm -f "$(DESTDIR)$(bindir)/lzip"
+	cd "$(DESTDIR)$(bindir)" && ln -s $(progname) lzip
+
+uninstall : uninstall-bin uninstall-info uninstall-man
+
+uninstall-bin :
 	-rm -f "$(DESTDIR)$(bindir)/$(progname)"
 
 uninstall-info :
@@ -95,8 +103,7 @@ dist : doc
 	  $(DISTNAME)/doc/$(pkgname).texinfo \
 	  $(DISTNAME)/testsuite/check.sh \
 	  $(DISTNAME)/testsuite/test.txt \
-	  $(DISTNAME)/testsuite/test_sync.lz \
-	  $(DISTNAME)/testsuite/test_v[01].lz \
+	  $(DISTNAME)/testsuite/test.txt.lz \
 	  $(DISTNAME)/*.h \
 	  $(DISTNAME)/*.c
 	rm -f $(DISTNAME)
diff --git a/NEWS b/NEWS
index 36f565f..e854c8a 100644
--- a/NEWS
+++ b/NEWS
@@ -1,12 +1,13 @@
-Changes in version 1.3:
+Changes in version 1.4:
 
-Inability to change output file attributes has been downgraded from
-error to warning.
+Multi-step trials have been implemented.
 
-A small change has been made in the "--help" output and man page.
+Compression ratio has been slightly increased.
 
-Quote characters in messages have been changed as advised by GNU Coding
-Standards.
+Compression time has been reduced by 10%.
 
-Configure option "--datadir" has been renamed to "--datarootdir" to
-follow GNU Standards.
+Decompression time has been reduced by 8%.
+
+The target "install-as-lzip" has been added to the Makefile.
+
+The target "install-bin" has been added to the Makefile.
diff --git a/README b/README
index 71b1ad8..72d434b 100644
--- a/README
+++ b/README
@@ -37,6 +37,10 @@ also split the compressed output in volumes of a given size, even when
 reading from standard input. This allows the direct creation of
 multivolume compressed tar archives.
 
+Clzip is able to compress and decompress streams of unlimited size by
+automatically creating multi-member output. The members so created are
+large (about 2^60 bytes each).
+
 Clzip will automatically use the smallest possible dictionary size
 without exceeding the given limit. Keep in mind that the decompression
 memory requirement is affected at compression time by the choice of
@@ -68,7 +72,7 @@ range encoding), Igor Pavlov (for putting all the above together in
 LZMA), and Julian Seward (for bzip2's CLI and the idea of unzcrash).
 
 
-Copyright (C) 2010, 2011, 2012 Antonio Diaz Diaz.
+Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
 
 This file is free documentation: you have unlimited permission to copy,
 distribute and modify it.
diff --git a/carg_parser.c b/carg_parser.c
index 326bd41..973bb7e 100644
--- a/carg_parser.c
+++ b/carg_parser.c
@@ -88,7 +88,7 @@ static char parse_long_option( struct Arg_parser * const ap,
                                const struct ap_Option options[],
                                int * const argindp )
   {
-  unsigned int len;
+  unsigned len;
   int index = -1;
   int i;
   char exact = 0, ambig = 0;
diff --git a/clzip.h b/clzip.h
index 42c014e..dd63438 100644
--- a/clzip.h
+++ b/clzip.h
@@ -1,5 +1,5 @@
 /*  Clzip - Data compressor based on the LZMA algorithm
-    Copyright (C) 2010, 2011, 2012 Antonio Diaz Diaz.
+    Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
 
     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -22,39 +22,26 @@
   #define min(x,y) ((x) <= (y) ? (x) : (y))
 #endif
 
-typedef unsigned char State;
+typedef int State;
 
 enum { states = 12 };
 
 static inline bool St_is_char( const State st ) { return st < 7; }
 
-static inline void St_set_char( State * const st )
+static inline State St_set_char( const State st )
   {
-  static const unsigned char next[states] =
-    { 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5 };
-  *st = next[*st];
+  static const State next[states] = { 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5 };
+  return next[st];
   }
 
-static inline void St_set_match( State * const st )
-  {
-  static const unsigned char next[states] =
-    { 7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10 };
-  *st = next[*st];
-  }
+static inline State St_set_match( const State st )
+  { return ( ( st < 7 ) ? 7 : 10 ); }
 
-static inline void St_set_rep( State * const st )
-  {
-  static const unsigned char next[states] =
-    { 8, 8, 8, 8, 8, 8, 8, 11, 11, 11, 11, 11 };
-  *st = next[*st];
-  }
+static inline State St_set_rep( const State st )
+  { return ( ( st < 7 ) ? 8 : 11 ); }
 
-static inline void St_set_short_rep( State * const st )
-  {
-  static const unsigned char next[states] =
-    { 9, 9, 9, 9, 9, 9, 9, 11, 11, 11, 11, 11 };
-  *st = next[*st];
-  }
+static inline State St_set_short_rep( const State st )
+  { return ( ( st < 7 ) ? 9 : 11 ); }
 
 
 enum {
@@ -70,7 +57,7 @@ enum {
   dis_slot_bits = 6,
   start_dis_model = 4,
   end_dis_model = 14,
-  modeled_distances = 1 << (end_dis_model / 2),
+  modeled_distances = 1 << (end_dis_model / 2),		/* 128 */
   dis_align_bits = 4,
   dis_align_size = 1 << dis_align_bits,
 
@@ -88,23 +75,25 @@ enum {
 
   max_dis_states = 4 };
 
-static inline int get_dis_state( int len )
-  {
-  len -= min_match_len;
-  if( len >= max_dis_states ) len = max_dis_states - 1;
-  return len;
-  }
+static inline int get_dis_state( const int len )
+  { return min( len - min_match_len, max_dis_states - 1 ); }
+
+static inline int get_lit_state( const uint8_t prev_byte )
+  { return ( prev_byte >> ( 8 - literal_context_bits ) ); }
 
 
 enum { bit_model_move_bits = 5,
        bit_model_total_bits = 11,
        bit_model_total = 1 << bit_model_total_bits };
 
-typedef unsigned int Bit_model;
+typedef int Bit_model;
 
 static inline void Bm_init( Bit_model * const probability )
   { *probability = bit_model_total / 2; }
 
+static inline void Bm_array_init( Bit_model * const p, const int size )
+  { int i = 0; while( i < size ) p[i++] = bit_model_total / 2; }
+
 
 struct Pretty_print
   {
@@ -121,7 +110,7 @@ void Pp_init( struct Pretty_print * const pp, const char * const filenames[],
 static inline void Pp_set_name( struct Pretty_print * const pp,
                                 const char * const filename )
   {
-  if( filename && filename[0] && strcmp( filename, "-" ) )
+  if( filename && filename[0] && strcmp( filename, "-" ) != 0 )
     pp->name = filename;
   else pp->name = pp->stdin_name;
   pp->first_post = true;
@@ -136,12 +125,12 @@ typedef uint32_t CRC32[256];	/* Table of CRCs of all 8-bit messages. */
 
 extern CRC32 crc32;
 
-static inline void CRC32_init()
+static inline void CRC32_init( void )
   {
-  unsigned int n;
+  unsigned n;
   for( n = 0; n < 256; ++n )
     {
-    unsigned int c = n;
+    unsigned c = n;
     int k;
     for( k = 0; k < 8; ++k )
       { if( c & 1 ) c = 0xEDB88320U ^ ( c >> 1 ); else c >>= 1; }
@@ -149,10 +138,11 @@ static inline void CRC32_init()
     }
   }
 
-static inline void CRC32_update_byte( uint32_t * crc, const uint8_t byte )
+static inline void CRC32_update_byte( uint32_t * const crc, const uint8_t byte )
   { *crc = crc32[(*crc^byte)&0xFF] ^ ( *crc >> 8 ); }
-static inline void CRC32_update_buf( uint32_t * crc, const uint8_t * const buffer,
-                                     const int size )
+
+static inline void CRC32_update_buf( uint32_t * const crc,
+                                     const uint8_t * const buffer, const int size )
   {
   int i;
   for( i = 0; i < size; ++i )
@@ -160,16 +150,15 @@ static inline void CRC32_update_buf( uint32_t * crc, const uint8_t * const buffe
   }
 
 
-static inline int real_bits( const unsigned int value )
+static inline int real_bits( unsigned value )
   {
-  int bits = 0, i = 1;
-  unsigned int mask = 1;
-  for( ; mask > 0; ++i, mask <<= 1 ) if( value & mask ) bits = i;
+  int bits = 0;
+  while( value > 0 ) { value >>= 1; ++bits; }
   return bits;
   }
 
 
-static const uint8_t magic_string[4] = { 'L', 'Z', 'I', 'P' };
+static const uint8_t magic_string[4] = { 0x4C, 0x5A, 0x49, 0x50 }; /* "LZIP" */
 
 typedef uint8_t File_header[6];		/* 0-3 magic bytes */
 					/*   4 version */
@@ -188,11 +177,11 @@ static inline uint8_t Fh_version( const File_header data )
 static inline bool Fh_verify_version( const File_header data )
   { return ( data[4] <= 1 ); }
 
-static inline int Fh_get_dictionary_size( const File_header data )
+static inline unsigned Fh_get_dictionary_size( const File_header data )
   {
-  int sz = ( 1 << ( data[5] & 0x1F ) );
-  if( sz > min_dictionary_size && sz <= max_dictionary_size )
-    sz -= ( sz / 16 ) * ( ( data[5] >> 5 ) & 0x07 );
+  unsigned sz = ( 1 << ( data[5] & 0x1F ) );
+  if( sz > min_dictionary_size )
+    sz -= ( sz / 16 ) * ( ( data[5] >> 5 ) & 7 );
   return sz;
   }
 
@@ -226,54 +215,54 @@ enum { Ft_size = 20 };
 static inline int Ft_versioned_size( const int version )
   { return ( ( version >= 1 ) ? 20 : 12 ); }
 
-static inline uint32_t Ft_get_data_crc( const File_trailer data )
+static inline unsigned Ft_get_data_crc( const File_trailer data )
   {
-  uint32_t tmp = 0;
+  unsigned tmp = 0;
   int i;
   for( i = 3; i >= 0; --i ) { tmp <<= 8; tmp += data[i]; }
   return tmp;
   }
 
-static inline void Ft_set_data_crc( File_trailer data, uint32_t crc )
+static inline void Ft_set_data_crc( File_trailer data, unsigned crc )
   {
   int i;
   for( i = 0; i <= 3; ++i ) { data[i] = (uint8_t)crc; crc >>= 8; }
   }
 
-static inline long long Ft_get_data_size( const File_trailer data )
+static inline unsigned long long Ft_get_data_size( const File_trailer data )
   {
-  long long tmp = 0;
+  unsigned long long tmp = 0;
   int i;
   for( i = 11; i >= 4; --i ) { tmp <<= 8; tmp += data[i]; }
   return tmp;
   }
 
-static inline void Ft_set_data_size( File_trailer data, long long sz )
+static inline void Ft_set_data_size( File_trailer data, unsigned long long sz )
   {
   int i;
   for( i = 4; i <= 11; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; }
   }
 
-static inline long long Ft_get_member_size( const File_trailer data )
+static inline unsigned long long Ft_get_member_size( const File_trailer data )
   {
-  long long tmp = 0;
+  unsigned long long tmp = 0;
   int i;
   for( i = 19; i >= 12; --i ) { tmp <<= 8; tmp += data[i]; }
   return tmp;
   }
 
-static inline void Ft_set_member_size( File_trailer data, long long sz )
+static inline void Ft_set_member_size( File_trailer data, unsigned long long sz )
   {
   int i;
   for( i = 12; i <= 19; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; }
   }
 
 
+/* defined in decoder.c */
+int readblock( const int fd, uint8_t * const buf, const int size );
+int writeblock( const int fd, const uint8_t * const buf, const int size );
+
 /* defined in main.c */
 void cleanup_and_fail( const int retval );
 void show_error( const char * const msg, const int errcode, const bool help );
 void internal_error( const char * const msg );
-
-/* defined in decoder.c */
-int readblock( const int fd, uint8_t * const buf, const int size );
-int writeblock( const int fd, const uint8_t * const buf, const int size );
diff --git a/configure b/configure
index 3ecf2bc..a234bb3 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
 #! /bin/sh
 # configure script for Clzip - Data compressor based on the LZMA algorithm
-# Copyright (C) 2010, 2011, 2012 Antonio Diaz Diaz.
+# Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
 #
 # This configure script is free software: you have unlimited permission
 # to copy, distribute and modify it.
@@ -8,7 +8,7 @@
 args=
 no_create=
 pkgname=clzip
-pkgversion=1.3
+pkgversion=1.4
 progname=clzip
 srctrigger=clzip.h
 
@@ -22,11 +22,19 @@ bindir='$(exec_prefix)/bin'
 datarootdir='$(prefix)/share'
 infodir='$(datarootdir)/info'
 mandir='$(datarootdir)/man'
-CC=
+CC=gcc
 CPPFLAGS=
 CFLAGS='-Wall -W -O2'
 LDFLAGS=
 
+# checking whether we are using GNU C.
+if [ ! -x /bin/gcc ] &&
+   [ ! -x /usr/bin/gcc ] &&
+   [ ! -x /usr/local/bin/gcc ] ; then
+	CC=cc
+	CFLAGS='-W -O2'
+fi
+
 # Loop over all args
 while [ -n "$1" ] ; do
 
@@ -91,14 +99,14 @@ done
 srcdirtext=
 if [ -z "${srcdir}" ] ; then
 	srcdirtext="or . or .." ; srcdir=.
-	if [ ! -r ${srcdir}/${srctrigger} ] ; then srcdir=.. ; fi
-	if [ ! -r ${srcdir}/${srctrigger} ] ; then
+	if [ ! -r "${srcdir}/${srctrigger}" ] ; then srcdir=.. ; fi
+	if [ ! -r "${srcdir}/${srctrigger}" ] ; then
 		## the sed command below emulates the dirname command
 		srcdir=`echo $0 | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'`
 	fi
 fi
 
-if [ ! -r ${srcdir}/${srctrigger} ] ; then
+if [ ! -r "${srcdir}/${srctrigger}" ] ; then
 	exec 1>&2
 	echo
 	echo "configure: Can't find sources in ${srcdir} ${srcdirtext}"
@@ -107,18 +115,7 @@ if [ ! -r ${srcdir}/${srctrigger} ] ; then
 fi
 
 # Set srcdir to . if that's what it is.
-if [ "`pwd`" = "`cd ${srcdir} ; pwd`" ] ; then srcdir=. ; fi
-
-# checking whether we are using GNU C.
-if [ -z "${CC}" ] ; then		# Let the user override the test.
-	if [ -x /bin/gcc ] ||
-	   [ -x /usr/bin/gcc ] ||
-	   [ -x /usr/local/bin/gcc ] ; then
-		CC="gcc"
-	else
-		CC="cc"
-	fi
-fi
+if [ "`pwd`" = "`cd "${srcdir}" ; pwd`" ] ; then srcdir=. ; fi
 
 echo
 if [ -z "${no_create}" ] ; then
@@ -152,7 +149,7 @@ echo "LDFLAGS = ${LDFLAGS}"
 rm -f Makefile
 cat > Makefile << EOF
 # Makefile for Clzip - Data compressor based on the LZMA algorithm
-# Copyright (C) 2010, 2011, 2012 Antonio Diaz Diaz.
+# Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
 # This file was generated automatically by configure. Do not edit.
 #
 # This Makefile is free software: you have unlimited permission
@@ -173,6 +170,6 @@ CPPFLAGS = ${CPPFLAGS}
 CFLAGS = ${CFLAGS}
 LDFLAGS = ${LDFLAGS}
 EOF
-cat ${srcdir}/Makefile.in >> Makefile
+cat "${srcdir}/Makefile.in" >> Makefile
 
 echo "OK. Now you can run make."
diff --git a/decoder.c b/decoder.c
index 02f7f0b..b40dafd 100644
--- a/decoder.c
+++ b/decoder.c
@@ -1,5 +1,5 @@
 /*  Clzip - Data compressor based on the LZMA algorithm
-    Copyright (C) 2010, 2011, 2012 Antonio Diaz Diaz.
+    Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
 
     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -32,23 +32,40 @@
 CRC32 crc32;
 
 
+void Pp_show_msg( struct Pretty_print * const pp, const char * const msg )
+  {
+  if( pp->verbosity >= 0 )
+    {
+    if( pp->first_post )
+      {
+      int i, len;
+      pp->first_post = false;
+      fprintf( stderr, "  %s: ", pp->name );
+      len = pp->longest_name - strlen( pp->name );
+      for( i = 0; i < len; ++i ) fprintf( stderr, " " );
+      if( !msg ) fflush( stderr );
+      }
+    if( msg ) fprintf( stderr, "%s.\n", msg );
+    }
+  }
+
+
 /* Returns the number of bytes really read.
    If (returned value < size) and (errno == 0), means EOF was reached.
 */
 int readblock( const int fd, uint8_t * const buf, const int size )
   {
   int rest = size;
-  while( true )
+  errno = 0;
+  while( rest > 0 )
     {
-    int n;
-    errno = 0;
-    if( rest <= 0 ) break;
-    n = read( fd, buf + size - rest, rest );
+    const int n = read( fd, buf + size - rest, rest );
     if( n > 0 ) rest -= n;
-    else if( n == 0 ) break;
+    else if( n == 0 ) break;				/* EOF */
     else if( errno != EINTR && errno != EAGAIN ) break;
+    errno = 0;
     }
-  return ( rest > 0 ) ? size - rest : size;
+  return size - rest;
   }
 
 
@@ -58,16 +75,15 @@ int readblock( const int fd, uint8_t * const buf, const int size )
 int writeblock( const int fd, const uint8_t * const buf, const int size )
   {
   int rest = size;
-  while( true )
+  errno = 0;
+  while( rest > 0 )
     {
-    int n;
-    errno = 0;
-    if( rest <= 0 ) break;
-    n = write( fd, buf + size - rest, rest );
+    const int n = write( fd, buf + size - rest, rest );
     if( n > 0 ) rest -= n;
     else if( n < 0 && errno != EINTR && errno != EAGAIN ) break;
+    errno = 0;
     }
-  return ( rest > 0 ) ? size - rest : size;
+  return size - rest;
   }
 
 
@@ -107,7 +123,7 @@ bool LZd_verify_trailer( struct LZ_decoder * const decoder,
   {
   File_trailer trailer;
   const int trailer_size = Ft_versioned_size( decoder->member_version );
-  const long long member_size =
+  const unsigned long long member_size =
     Rd_member_position( decoder->range_decoder ) + trailer_size;
   bool error = false;
 
@@ -123,7 +139,9 @@ bool LZd_verify_trailer( struct LZ_decoder * const decoder,
       }
     while( size < trailer_size ) trailer[size++] = 0;
     }
+
   if( decoder->member_version == 0 ) Ft_set_member_size( trailer, member_size );
+
   if( decoder->range_decoder->code != 0 )
     {
     error = true;
@@ -136,8 +154,7 @@ bool LZd_verify_trailer( struct LZ_decoder * const decoder,
       {
       Pp_show_msg( pp, 0 );
       fprintf( stderr, "CRC mismatch; trailer says %08X, data CRC is %08X.\n",
-               (unsigned int)Ft_get_data_crc( trailer ),
-               (unsigned int)LZd_crc( decoder ) );
+               Ft_get_data_crc( trailer ), LZd_crc( decoder ) );
       }
     }
   if( Ft_get_data_size( trailer ) != LZd_data_position( decoder ) )
@@ -146,7 +163,7 @@ bool LZd_verify_trailer( struct LZ_decoder * const decoder,
     if( pp->verbosity >= 0 )
       {
       Pp_show_msg( pp, 0 );
-      fprintf( stderr, "Data size mismatch; trailer says %lld, data size is %lld (0x%llX).\n",
+      fprintf( stderr, "Data size mismatch; trailer says %llu, data size is %llu (0x%llX).\n",
                Ft_get_data_size( trailer ), LZd_data_position( decoder ), LZd_data_position( decoder ) );
       }
     }
@@ -156,7 +173,7 @@ bool LZd_verify_trailer( struct LZ_decoder * const decoder,
     if( pp->verbosity >= 0 )
       {
       Pp_show_msg( pp, 0 );
-      fprintf( stderr, "Member size mismatch; trailer says %lld, member size is %lld (0x%llX).\n",
+      fprintf( stderr, "Member size mismatch; trailer says %llu, member size is %llu (0x%llX).\n",
                Ft_get_member_size( trailer ), member_size, member_size );
       }
     }
@@ -166,8 +183,8 @@ bool LZd_verify_trailer( struct LZ_decoder * const decoder,
              ( 8.0 * member_size ) / LZd_data_position( decoder ),
              100.0 * ( 1.0 - ( (double)member_size / LZd_data_position( decoder ) ) ) );
   if( !error && pp->verbosity >= 4 )
-    fprintf( stderr, "data CRC %08X, data size %9lld, member size %8lld.  ",
-             (unsigned int)Ft_get_data_crc( trailer ),
+    fprintf( stderr, "data CRC %08X, data size %9llu, member size %8llu.  ",
+             Ft_get_data_crc( trailer ),
              Ft_get_data_size( trailer ), Ft_get_member_size( trailer ) );
   return !error;
   }
@@ -178,10 +195,11 @@ bool LZd_verify_trailer( struct LZ_decoder * const decoder,
 int LZd_decode_member( struct LZ_decoder * const decoder,
                        struct Pretty_print * const pp )
   {
-  unsigned int rep0 = 0;	/* rep[0-3] latest four distances */
-  unsigned int rep1 = 0;	/* used for efficient coding of */
-  unsigned int rep2 = 0;	/* repeated distances */
-  unsigned int rep3 = 0;
+  unsigned rep0 = 0;		/* rep[0-3] latest four distances */
+  unsigned rep1 = 0;		/* used for efficient coding of */
+  unsigned rep2 = 0;		/* repeated distances */
+  unsigned rep3 = 0;
+
   State state = 0;
   Rd_load( decoder->range_decoder );
 
@@ -192,12 +210,17 @@ int LZd_decode_member( struct LZ_decoder * const decoder,
       {
       const uint8_t prev_byte = LZd_get_prev_byte( decoder );
       if( St_is_char( state ) )
-        LZd_put_byte( decoder, Lid_decode( &decoder->literal_decoder,
-          decoder->range_decoder, prev_byte ) );
+        {
+        state -= ( state < 4 ) ? state : 3;
+        LZd_put_byte( decoder, Rd_decode_tree( decoder->range_decoder,
+                      decoder->bm_literal[get_lit_state(prev_byte)], 8 ) );
+        }
       else
-        LZd_put_byte( decoder, Lid_decode_matched( &decoder->literal_decoder,
-          decoder->range_decoder, prev_byte, LZd_get_byte( decoder, rep0 ) ) );
-      St_set_char( &state );
+        {
+        state -= ( state < 10 ) ? 3 : 6;
+        LZd_put_byte( decoder, Rd_decode_matched( decoder->range_decoder,
+          decoder->bm_literal[get_lit_state(prev_byte)], LZd_get_byte( decoder, rep0 ) ) );
+        }
       }
     else
       {
@@ -207,7 +230,7 @@ int LZd_decode_member( struct LZ_decoder * const decoder,
         len = 0;
         if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_rep0[state] ) == 1 )
           {
-          unsigned int distance;
+          unsigned distance;
           if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_rep1[state] ) == 0 )
             distance = rep1;
           else
@@ -223,31 +246,33 @@ int LZd_decode_member( struct LZ_decoder * const decoder,
         else
           {
           if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_len[state][pos_state] ) == 0 )
-            { St_set_short_rep( &state ); len = 1; }
+            { state = St_set_short_rep( state ); len = 1; }
           }
         if( len == 0 )
           {
-          St_set_rep( &state );
+          state = St_set_rep( state );
           len = min_match_len + Led_decode( &decoder->rep_match_len_decoder, decoder->range_decoder, pos_state );
           }
         }
       else
         {
         int dis_slot;
-        const unsigned int rep0_saved = rep0;
+        const unsigned rep0_saved = rep0;
         len = min_match_len + Led_decode( &decoder->len_decoder, decoder->range_decoder, pos_state );
-        dis_slot = Rd_decode_tree( decoder->range_decoder, decoder->bm_dis_slot[get_dis_state(len)], dis_slot_bits );
+        dis_slot = Rd_decode_tree6( decoder->range_decoder, decoder->bm_dis_slot[get_dis_state(len)] );
         if( dis_slot < start_dis_model ) rep0 = dis_slot;
         else
           {
           const int direct_bits = ( dis_slot >> 1 ) - 1;
           rep0 = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
           if( dis_slot < end_dis_model )
-            rep0 += Rd_decode_tree_reversed( decoder->range_decoder, decoder->bm_dis + rep0 - dis_slot, direct_bits );
+            rep0 += Rd_decode_tree_reversed( decoder->range_decoder,
+                                             decoder->bm_dis + rep0 - dis_slot - 1,
+                                             direct_bits );
           else
             {
             rep0 += Rd_decode( decoder->range_decoder, direct_bits - dis_align_bits ) << dis_align_bits;
-            rep0 += Rd_decode_tree_reversed( decoder->range_decoder, decoder->bm_align, dis_align_bits );
+            rep0 += Rd_decode_tree_reversed4( decoder->range_decoder, decoder->bm_align );
             if( rep0 == 0xFFFFFFFFU )		/* Marker found */
               {
               rep0 = rep0_saved;
@@ -271,9 +296,9 @@ int LZd_decode_member( struct LZ_decoder * const decoder,
             }
           }
         rep3 = rep2; rep2 = rep1; rep1 = rep0_saved;
-        St_set_match( &state );
-        if( rep0 >= (unsigned int)decoder->dictionary_size ||
-            ( rep0 >= (unsigned int)decoder->pos && !decoder->partial_data_pos ) )
+        state = St_set_match( state );
+        if( rep0 >= (unsigned)decoder->dictionary_size ||
+            ( rep0 >= (unsigned)decoder->pos && !decoder->partial_data_pos ) )
           { LZd_flush_data( decoder ); return 1; }
         }
       LZd_copy_block( decoder, rep0, len );
diff --git a/decoder.h b/decoder.h
index 6445d1b..c18ccbe 100644
--- a/decoder.h
+++ b/decoder.h
@@ -1,5 +1,5 @@
 /*  Clzip - Data compressor based on the LZMA algorithm
-    Copyright (C) 2010, 2011, 2012 Antonio Diaz Diaz.
+    Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
 
     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -19,7 +19,7 @@ enum { rd_buffer_size = 16384 };
 
 struct Range_decoder
   {
-  long long partial_member_pos;
+  unsigned long long partial_member_pos;
   uint8_t * buffer;		/* input buffer */
   int pos;			/* current pos in buffer */
   int stream_pos;		/* when reached, a new block must be read */
@@ -51,7 +51,8 @@ static inline void Rd_free( struct Range_decoder * const rdec )
 static inline bool Rd_finished( struct Range_decoder * const rdec )
   { return rdec->pos >= rdec->stream_pos && !Rd_read_block( rdec ); }
 
-static inline long long Rd_member_position( const struct Range_decoder * const rdec )
+static inline unsigned long long
+Rd_member_position( const struct Range_decoder * const rdec )
   { return rdec->partial_member_pos + rdec->pos; }
 
 static inline void Rd_reset_member_position( struct Range_decoder * const rdec )
@@ -59,7 +60,7 @@ static inline void Rd_reset_member_position( struct Range_decoder * const rdec )
 
 static inline uint8_t Rd_get_byte( struct Range_decoder * const rdec )
   {
-  if( Rd_finished( rdec ) ) return 0x55;	/* make code != 0 */
+  if( Rd_finished( rdec ) ) return 0xAA;	/* make code != 0 */
   return rdec->buffer[rdec->pos++];
   }
 
@@ -74,16 +75,16 @@ static inline int Rd_read_data( struct Range_decoder * const rdec,
     rdec->pos += rd;
     rest -= rd;
     }
-  return ( rest > 0 ) ? size - rest : size;
+  return size - rest;
   }
 
 static inline void Rd_load( struct Range_decoder * const rdec )
   {
   int i;
   rdec->code = 0;
-  rdec->range = 0xFFFFFFFFU;
   for( i = 0; i < 5; ++i )
     rdec->code = (rdec->code << 8) | Rd_get_byte( rdec );
+  rdec->range = 0xFFFFFFFFU;
   }
 
 static inline void Rd_normalize( struct Range_decoder * const rdec )
@@ -102,20 +103,15 @@ static inline int Rd_decode( struct Range_decoder * const rdec,
   int i;
   for( i = num_bits; i > 0; --i )
     {
-    symbol <<= 1;
-    if( rdec->range <= 0x00FFFFFFU )
-      {
-      rdec->range <<= 7;
-      rdec->code = (rdec->code << 8) | Rd_get_byte( rdec );
-      if( rdec->code >= rdec->range )
-        { rdec->code -= rdec->range; symbol |= 1; }
-      }
-    else
-      {
-      rdec->range >>= 1;
-      if( rdec->code >= rdec->range )
-        { rdec->code -= rdec->range; symbol |= 1; }
-      }
+    uint32_t mask;
+    Rd_normalize( rdec );
+    rdec->range >>= 1;
+/*    symbol <<= 1; */
+/*    if( rdec->code >= rdec->range ) { rdec->code -= rdec->range; symbol |= 1; } */
+    mask = 0U - (rdec->code < rdec->range);
+    rdec->code -= rdec->range;
+    rdec->code += rdec->range & mask;
+    symbol = (symbol << 1) + (mask + 1);
     }
   return symbol;
   }
@@ -151,6 +147,19 @@ static inline int Rd_decode_tree( struct Range_decoder * const rdec,
   return model - (1 << num_bits);
   }
 
+static inline int Rd_decode_tree6( struct Range_decoder * const rdec,
+                                   Bit_model bm[] )
+  {
+  int model = 1;
+  model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] );
+  model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] );
+  model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] );
+  model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] );
+  model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] );
+  model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] );
+  return model - (1 << 6);
+  }
+
 static inline int Rd_decode_tree_reversed( struct Range_decoder * const rdec,
                                            Bit_model bm[], const int num_bits )
   {
@@ -159,32 +168,49 @@ static inline int Rd_decode_tree_reversed( struct Range_decoder * const rdec,
   int i;
   for( i = 0; i < num_bits; ++i )
     {
-    const int bit = Rd_decode_bit( rdec, &bm[model] );
+    const bool bit = Rd_decode_bit( rdec, &bm[model] );
     model <<= 1;
-    if( bit ) { model |= 1; symbol |= (1 << i); }
+    if( bit ) { ++model; symbol |= (1 << i); }
     }
   return symbol;
   }
 
+static inline int Rd_decode_tree_reversed4( struct Range_decoder * const rdec,
+                                            Bit_model bm[] )
+  {
+  int model = 1;
+  int symbol = 0;
+  int bit = Rd_decode_bit( rdec, &bm[model] );
+  model = (model << 1) + bit; symbol |= bit;
+  bit = Rd_decode_bit( rdec, &bm[model] );
+  model = (model << 1) + bit; symbol |= (bit << 1);
+  bit = Rd_decode_bit( rdec, &bm[model] );
+  model = (model << 1) + bit; symbol |= (bit << 2);
+  if( Rd_decode_bit( rdec, &bm[model] ) ) symbol |= 8;
+  return symbol;
+  }
+
 static inline int Rd_decode_matched( struct Range_decoder * const rdec,
-                                     Bit_model bm[], const int match_byte )
+                                     Bit_model bm[], int match_byte )
   {
   Bit_model * const bm1 = bm + 0x100;
   int symbol = 1;
   int i;
   for( i = 7; i >= 0; --i )
     {
-    const int match_bit = ( match_byte >> i ) & 1;
-    const int bit = Rd_decode_bit( rdec, &bm1[(match_bit<<8)+symbol] );
+    int match_bit, bit;
+    match_byte <<= 1;
+    match_bit = match_byte & 0x100;
+    bit = Rd_decode_bit( rdec, &bm1[match_bit+symbol] );
     symbol = ( symbol << 1 ) | bit;
-    if( match_bit != bit )
+    if( match_bit != bit << 8 )
       {
-      while( --i >= 0 )
+      while( symbol < 0x100 )
         symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] );
       break;
       }
     }
-  return symbol & 0xFF;
+  return symbol - 0x100;
   }
 
 
@@ -199,17 +225,11 @@ struct Len_decoder
 
 static inline void Led_init( struct Len_decoder * const len_decoder )
   {
-  int i, j;
   Bm_init( &len_decoder->choice1 );
   Bm_init( &len_decoder->choice2 );
-  for( i = 0; i < pos_states; ++i )
-    for( j = 0; j < len_low_symbols; ++j )
-      Bm_init( &len_decoder->bm_low[i][j] );
-  for( i = 0; i < pos_states; ++i )
-    for( j = 0; j < len_mid_symbols; ++j )
-      Bm_init( &len_decoder->bm_mid[i][j] );
-  for( i = 0; i < len_high_symbols; ++i )
-    Bm_init( &len_decoder->bm_high[i] );
+  Bm_array_init( len_decoder->bm_low[0], pos_states * len_low_symbols );
+  Bm_array_init( len_decoder->bm_mid[0], pos_states * len_mid_symbols );
+  Bm_array_init( len_decoder->bm_high, len_high_symbols );
   }
 
 static inline int Led_decode( struct Len_decoder * const len_decoder,
@@ -226,37 +246,9 @@ static inline int Led_decode( struct Len_decoder * const len_decoder,
   }
 
 
-struct Literal_decoder
-  {
-  Bit_model bm_literal[1<<literal_context_bits][0x300];
-  };
-
-static inline void Lid_init( struct Literal_decoder * const lidec )
-  {
-  int i, j;
-  for( i = 0; i < 1<<literal_context_bits; ++i )
-    for( j = 0; j < 0x300; ++j )
-      Bm_init( &lidec->bm_literal[i][j] );
-  }
-
-static inline int Lid_state( const uint8_t prev_byte )
-  { return ( prev_byte >> ( 8 - literal_context_bits ) ); }
-
-static inline uint8_t Lid_decode( struct Literal_decoder * const lidec,
-                                  struct Range_decoder * const rdec,
-                                  const uint8_t prev_byte )
-  { return Rd_decode_tree( rdec, lidec->bm_literal[Lid_state(prev_byte)], 8 ); }
-
-static inline uint8_t Lid_decode_matched( struct Literal_decoder * const lidec,
-                                          struct Range_decoder * const rdec,
-                                          const uint8_t prev_byte,
-                                          const uint8_t match_byte )
-  { return Rd_decode_matched( rdec, lidec->bm_literal[Lid_state(prev_byte)], match_byte ); }
-
-
 struct LZ_decoder
   {
-  long long partial_data_pos;
+  unsigned long long partial_data_pos;
   int dictionary_size;
   int buffer_size;
   uint8_t * buffer;		/* output buffer */
@@ -266,6 +258,7 @@ struct LZ_decoder
   int outfd;			/* output file descriptor */
   int member_version;
 
+  Bit_model bm_literal[1<<literal_context_bits][0x300];
   Bit_model bm_match[states][pos_states];
   Bit_model bm_rep[states];
   Bit_model bm_rep0[states];
@@ -273,13 +266,12 @@ struct LZ_decoder
   Bit_model bm_rep2[states];
   Bit_model bm_len[states][pos_states];
   Bit_model bm_dis_slot[max_dis_states][1<<dis_slot_bits];
-  Bit_model bm_dis[modeled_distances-end_dis_model+1];
+  Bit_model bm_dis[modeled_distances-end_dis_model];
   Bit_model bm_align[dis_align_size];
 
   struct Range_decoder * range_decoder;
   struct Len_decoder len_decoder;
   struct Len_decoder rep_match_len_decoder;
-  struct Literal_decoder literal_decoder;
   };
 
 void LZd_flush_data( struct LZ_decoder * const decoder );
@@ -315,7 +307,7 @@ static inline void LZd_copy_block( struct LZ_decoder * const decoder,
   int i = decoder->pos - distance - 1;
   if( i < 0 ) i += decoder->buffer_size;
   if( len < decoder->buffer_size - max( decoder->pos, i ) &&
-      len <= abs( decoder->pos - i ) )
+      len <= abs( decoder->pos - i ) )		/* no wrap, no overlap */
     {
     memcpy( decoder->buffer + decoder->pos, decoder->buffer + i, len );
     decoder->pos += len;
@@ -332,7 +324,6 @@ static inline bool LZd_init( struct LZ_decoder * const decoder,
                              const File_header header,
                              struct Range_decoder * const rdec, const int ofd )
   {
-  int i, j;
   decoder->partial_data_pos = 0;
   decoder->dictionary_size = Fh_get_dictionary_size( header );
   decoder->buffer_size = max( 65536, decoder->dictionary_size );
@@ -344,30 +335,20 @@ static inline bool LZd_init( struct LZ_decoder * const decoder,
   decoder->outfd = ofd;
   decoder->member_version = Fh_version( header );
 
-  for( i = 0; i < states; ++i )
-    {
-    for( j = 0; j < pos_states; ++j )
-      {
-      Bm_init( &decoder->bm_match[i][j] );
-      Bm_init( &decoder->bm_len[i][j] );
-      }
-    Bm_init( &decoder->bm_rep[i] );
-    Bm_init( &decoder->bm_rep0[i] );
-    Bm_init( &decoder->bm_rep1[i] );
-    Bm_init( &decoder->bm_rep2[i] );
-    }
-  for( i = 0; i < max_dis_states; ++i )
-    for( j = 0; j < 1<<dis_slot_bits; ++j )
-      Bm_init( &decoder->bm_dis_slot[i][j] );
-  for( i = 0; i < modeled_distances-end_dis_model+1; ++i )
-    Bm_init( &decoder->bm_dis[i] );
-  for( i = 0; i < dis_align_size; ++i )
-    Bm_init( &decoder->bm_align[i] );
+  Bm_array_init( decoder->bm_literal[0], (1 << literal_context_bits) * 0x300 );
+  Bm_array_init( decoder->bm_match[0], states * pos_states );
+  Bm_array_init( decoder->bm_rep, states );
+  Bm_array_init( decoder->bm_rep0, states );
+  Bm_array_init( decoder->bm_rep1, states );
+  Bm_array_init( decoder->bm_rep2, states );
+  Bm_array_init( decoder->bm_len[0], states * pos_states );
+  Bm_array_init( decoder->bm_dis_slot[0], max_dis_states * (1 << dis_slot_bits) );
+  Bm_array_init( decoder->bm_dis, modeled_distances - end_dis_model );
+  Bm_array_init( decoder->bm_align, dis_align_size );
 
   decoder->range_decoder = rdec;
   Led_init( &decoder->len_decoder );
   Led_init( &decoder->rep_match_len_decoder );
-  Lid_init( &decoder->literal_decoder );
   decoder->buffer[decoder->buffer_size-1] = 0;	/* prev_byte of first_byte */
   return true;
   }
@@ -375,10 +356,11 @@ static inline bool LZd_init( struct LZ_decoder * const decoder,
 static inline void LZd_free( struct LZ_decoder * const decoder )
   { free( decoder->buffer ); }
 
-static inline uint32_t LZd_crc( const struct LZ_decoder * const decoder )
+static inline unsigned LZd_crc( const struct LZ_decoder * const decoder )
   { return decoder->crc ^ 0xFFFFFFFFU; }
 
-static inline long long LZd_data_position( const struct LZ_decoder * const decoder )
+static inline unsigned long long
+LZd_data_position( const struct LZ_decoder * const decoder )
   { return decoder->partial_data_pos + decoder->pos; }
 
 int LZd_decode_member( struct LZ_decoder * const decoder,
diff --git a/doc/clzip.1 b/doc/clzip.1
index 48401e2..02181a7 100644
--- a/doc/clzip.1
+++ b/doc/clzip.1
@@ -1,5 +1,5 @@
 .\" DO NOT MODIFY THIS FILE!  It was generated by help2man 1.37.1.
-.TH CLZIP "1" "February 2012" "Clzip 1.3" "User Commands"
+.TH CLZIP "1" "February 2013" "Clzip 1.4" "User Commands"
 .SH NAME
 Clzip \- reduces the size of files
 .SH SYNOPSIS
@@ -76,7 +76,7 @@ Report bugs to lzip\-bug@nongnu.org
 .br
 Clzip home page: http://www.nongnu.org/lzip/clzip.html
 .SH COPYRIGHT
-Copyright \(co 2012 Antonio Diaz Diaz.
+Copyright \(co 2013 Antonio Diaz Diaz.
 License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
 .br
 This is free software: you are free to change and redistribute it.
diff --git a/doc/clzip.info b/doc/clzip.info
index e4b9d3e..ccec058 100644
--- a/doc/clzip.info
+++ b/doc/clzip.info
@@ -12,7 +12,7 @@ File: clzip.info,  Node: Top,  Next: Introduction,  Up: (dir)
 Clzip Manual
 ************
 
-This manual is for Clzip (version 1.3, 25 February 2012).
+This manual is for Clzip (version 1.4, 18 February 2013).
 
 * Menu:
 
@@ -25,7 +25,7 @@ This manual is for Clzip (version 1.3, 25 February 2012).
 * Concept Index::         Index of concepts
 
 
-   Copyright (C) 2010, 2011, 2012 Antonio Diaz Diaz.
+   Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
 
    This manual is free documentation: you have unlimited permission to
 copy, distribute and modify it.
@@ -73,11 +73,15 @@ also split the compressed output in volumes of a given size, even when
 reading from standard input. This allows the direct creation of
 multivolume compressed tar archives.
 
-   The amount of memory required for compression is about 5 MiB plus 1
-or 2 times the dictionary size limit (1 if input file size is less than
-dictionary size limit, else 2) plus 8 times the dictionary size really
-used. The amount of memory required for decompression is only a few tens
-of KiB larger than the dictionary size really used.
+   Clzip is able to compress and decompress streams of unlimited size by
+automatically creating multi-member output. The members so created are
+large (about 2^60 bytes each).
+
+   The amount of memory required for compression is about 1 or 2 times
+the dictionary size limit (1 if input file size is less than dictionary
+size limit, else 2) plus 9 times the dictionary size really used. The
+amount of memory required for decompression is only a few tens of KiB
+larger than the dictionary size really used.
 
    Clzip will automatically use the smallest possible dictionary size
 without exceeding the given limit. Keep in mind that the decompression
@@ -328,7 +332,12 @@ File: clzip.info,  Node: File Format,  Next: Examples,  Prev: Invoking Clzip,  U
 4 File Format
 *************
 
-In the diagram below, a box like this:
+Perfection is reached, not when there is no longer anything to add, but
+when there is no longer anything to take away.
+-- Antoine de Saint-Exupery
+
+
+   In the diagram below, a box like this:
 +---+
 |   | <-- the vertical bars might be missing
 +---+
@@ -357,15 +366,18 @@ additional information before, between, or after them.
      "LZIP".
 
 `VN (version number, 1 byte)'
-     Just in case something needs to be modified in the future. Valid
-     values are 0 and 1. Version 0 files are deprecated. They can
-     contain only one member and lack the `Member size' field.
+     Just in case something needs to be modified in the future. 1 for
+     now.
 
 `DS (coded dictionary size, 1 byte)'
-     Bits 4-0 contain the base 2 logarithm of the base dictionary size.
-     Bits 7-5 contain the number of "wedges" to substract from the base
-     dictionary size to obtain the dictionary size. The size of a wedge
-     is (base dictionary size / 16).
+     Lzip divides the distance between any two powers of 2 into 8
+     equally spaced intervals, named "wedges". The dictionary size is
+     calculated by taking a power of 2 (the base size) and substracting
+     from it a number of wedges between 0 and 7. The size of a wedge is
+     (base_size / 16).
+     Bits 4-0 contain the base 2 logarithm of the base size (12 to 29).
+     Bits 7-5 contain the number of wedges (0 to 7) to substract from
+     the base size to obtain the dictionary size.
      Valid values for dictionary size range from 4KiB to 512MiB.
 
 `Lzma stream'
@@ -379,9 +391,9 @@ additional information before, between, or after them.
      Size of the uncompressed original data.
 
 `Member size (8 bytes)'
-     Total size of the member, including header and trailer. This
-     facilitates safe recovery of undamaged members from multi-member
-     files.
+     Total size of the member, including header and trailer. This field
+     acts as a distributed index, and facilitates safe recovery of
+     undamaged members from multi-member files.
 
 
 
@@ -496,13 +508,13 @@ Concept Index
 
 Tag Table:
 Node: Top226
-Node: Introduction914
-Node: Algorithm4584
-Node: Invoking Clzip7108
-Node: File Format12380
-Node: Examples14375
-Node: Problems16336
-Node: Concept Index16862
+Node: Introduction920
+Node: Algorithm4755
+Node: Invoking Clzip7279
+Node: File Format12551
+Node: Examples14860
+Node: Problems16821
+Node: Concept Index17347
 
 End Tag Table
 
diff --git a/doc/clzip.texinfo b/doc/clzip.texinfo
index 284ed3f..1d0479f 100644
--- a/doc/clzip.texinfo
+++ b/doc/clzip.texinfo
@@ -6,8 +6,8 @@
 @finalout
 @c %**end of header
 
-@set UPDATED 25 February 2012
-@set VERSION 1.3
+@set UPDATED 18 February 2013
+@set VERSION 1.4
 
 @dircategory Data Compression
 @direntry
@@ -45,7 +45,7 @@ This manual is for Clzip (version @value{VERSION}, @value{UPDATED}).
 @end menu
 
 @sp 1
-Copyright @copyright{} 2010, 2011, 2012 Antonio Diaz Diaz.
+Copyright @copyright{} 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
 
 This manual is free documentation: you have unlimited permission
 to copy, distribute and modify it.
@@ -92,11 +92,15 @@ also split the compressed output in volumes of a given size, even when
 reading from standard input. This allows the direct creation of
 multivolume compressed tar archives.
 
-The amount of memory required for compression is about 5 MiB plus 1 or 2
-times the dictionary size limit (1 if input file size is less than
-dictionary size limit, else 2) plus 8 times the dictionary size really
-used. The amount of memory required for decompression is only a few tens
-of KiB larger than the dictionary size really used.
+Clzip is able to compress and decompress streams of unlimited size by
+automatically creating multi-member output. The members so created are
+large (about 2^60 bytes each).
+
+The amount of memory required for compression is about 1 or 2 times the
+dictionary size limit (1 if input file size is less than dictionary size
+limit, else 2) plus 9 times the dictionary size really used. The amount
+of memory required for decompression is only a few tens of KiB larger
+than the dictionary size really used.
 
 Clzip will automatically use the smallest possible dictionary size
 without exceeding the given limit. Keep in mind that the decompression
@@ -350,6 +354,11 @@ Table of SI and binary prefixes (unit multipliers):
 @chapter File Format
 @cindex file format
 
+Perfection is reached, not when there is no longer anything to add, but
+when there is no longer anything to take away.@*
+--- Antoine de Saint-Exupery
+
+@sp 1
 In the diagram below, a box like this:
 @verbatim
 +---+
@@ -385,15 +394,16 @@ All multibyte values are stored in little endian order.
 A four byte string, identifying the lzip format, with the value "LZIP".
 
 @item VN (version number, 1 byte)
-Just in case something needs to be modified in the future. Valid values
-are 0 and 1. Version 0 files are deprecated. They can contain only one
-member and lack the @samp{Member size} field.
+Just in case something needs to be modified in the future. 1 for now.
 
 @item DS (coded dictionary size, 1 byte)
-Bits 4-0 contain the base 2 logarithm of the base dictionary size.@*
-Bits 7-5 contain the number of "wedges" to substract from the base
-dictionary size to obtain the dictionary size. The size of a wedge is
-(base dictionary size / 16).@*
+Lzip divides the distance between any two powers of 2 into 8 equally
+spaced intervals, named "wedges". The dictionary size is calculated by
+taking a power of 2 (the base size) and substracting from it a number of
+wedges between 0 and 7. The size of a wedge is (base_size / 16).@*
+Bits 4-0 contain the base 2 logarithm of the base size (12 to 29).@*
+Bits 7-5 contain the number of wedges (0 to 7) to substract from the
+base size to obtain the dictionary size.@*
 Valid values for dictionary size range from 4KiB to 512MiB.
 
 @item Lzma stream
@@ -407,8 +417,9 @@ CRC of the uncompressed original data.
 Size of the uncompressed original data.
 
 @item Member size (8 bytes)
-Total size of the member, including header and trailer. This facilitates
-safe recovery of undamaged members from multi-member files.
+Total size of the member, including header and trailer. This field acts
+as a distributed index, and facilitates safe recovery of undamaged
+members from multi-member files.
 
 @end table
 
diff --git a/encoder.c b/encoder.c
index 84e2586..cb6c8d6 100644
--- a/encoder.c
+++ b/encoder.c
@@ -1,5 +1,5 @@
 /*  Clzip - Data compressor based on the LZMA algorithm
-    Copyright (C) 2010, 2011, 2012 Antonio Diaz Diaz.
+    Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
 
     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -60,9 +60,9 @@ void Mf_normalize_pos( struct Matchfinder * const mf )
     mf->partial_data_pos += offset;
     mf->pos -= offset;
     mf->stream_pos -= offset;
-    for( i = 0; i < num_prev_positions; ++i )
+    for( i = 0; i < mf->num_prev_positions; ++i )
       if( mf->prev_positions[i] >= 0 ) mf->prev_positions[i] -= offset;
-    for( i = 0; i < 2 * mf->dictionary_size; ++i )
+    for( i = 0; i < 2 * ( mf->dictionary_size + 1 ); ++i )
       if( mf->prev_pos_tree[i] >= 0 ) mf->prev_pos_tree[i] -= offset;
     Mf_read_block( mf );
     }
@@ -70,44 +70,53 @@ void Mf_normalize_pos( struct Matchfinder * const mf )
 
 
 bool Mf_init( struct Matchfinder * const mf,
-              const int dict_size, const int len_limit, const int ifd )
+              const int dict_size, const int match_len_limit, const int ifd )
   {
   const int buffer_size_limit = ( 2 * dict_size ) + before_size + after_size;
-  int i;
+  int i, size;
+
   mf->partial_data_pos = 0;
-  mf->prev_positions = (int32_t *)malloc( num_prev_positions * sizeof (int32_t) );
-  if( !mf->prev_positions ) return false;
+  mf->match_len_limit = match_len_limit;
   mf->pos = 0;
   mf->cyclic_pos = 0;
   mf->stream_pos = 0;
-  mf->match_len_limit = len_limit;
-  mf->cycles = ( len_limit < max_match_len ) ? 16 + ( len_limit / 2 ) : 256;
+  mf->cycles = ( match_len_limit < max_match_len ) ?
+               16 + ( match_len_limit / 2 ) : 256;
   mf->infd = ifd;
   mf->at_stream_end = false;
 
-  for( i = 0; i < num_prev_positions; ++i ) mf->prev_positions[i] = -1;
   mf->buffer_size = max( 65536, dict_size );
   mf->buffer = (uint8_t *)malloc( mf->buffer_size );
-  if( !mf->buffer ) { free( mf->prev_positions ); return false; }
+  if( !mf->buffer ) return false;
   if( Mf_read_block( mf ) && !mf->at_stream_end &&
       mf->buffer_size < buffer_size_limit )
     {
+    uint8_t * tmp;
     mf->buffer_size = buffer_size_limit;
-    uint8_t * const tmp = (uint8_t *)realloc( mf->buffer, mf->buffer_size );
-    if( !tmp )
-      { free( mf->buffer ); free( mf->prev_positions ); return false; }
+    tmp = (uint8_t *)realloc( mf->buffer, mf->buffer_size );
+    if( !tmp ) { free( mf->buffer ); return false; }
     mf->buffer = tmp;
     Mf_read_block( mf );
     }
   if( mf->at_stream_end && mf->stream_pos < dict_size )
     mf->dictionary_size = max( min_dictionary_size, mf->stream_pos );
-  else mf->dictionary_size = dict_size;
+  else
+    mf->dictionary_size = dict_size;
   mf->pos_limit = mf->buffer_size;
   if( !mf->at_stream_end ) mf->pos_limit -= after_size;
-  mf->prev_pos_tree =
-    (int32_t *)malloc( 2 * mf->dictionary_size * sizeof (int32_t) );
-  if( !mf->prev_pos_tree )
-    { free( mf->buffer ); free( mf->prev_positions ); return false; }
+  size = 1 << max( 16, real_bits( mf->dictionary_size - 1 ) - 2 );
+  if( mf->dictionary_size > 1 << 26 )
+    size >>= 1;
+  mf->key4_mask = size - 1;
+  size += num_prev_positions2;
+  size += num_prev_positions3;
+
+  mf->num_prev_positions = size;
+  size += ( 2 * ( mf->dictionary_size + 1 ) );
+  mf->prev_positions = (int32_t *)malloc( size * sizeof (int32_t) );
+  if( !mf->prev_positions ) { free( mf->buffer ); return false; }
+  mf->prev_pos_tree = mf->prev_positions + mf->num_prev_positions;
+  for( i = 0; i < mf->num_prev_positions; ++i ) mf->prev_positions[i] = -1;
   return true;
   }
 
@@ -121,23 +130,24 @@ void Mf_reset( struct Matchfinder * const mf )
   mf->stream_pos -= mf->pos;
   mf->pos = 0;
   mf->cyclic_pos = 0;
-  for( i = 0; i < num_prev_positions; ++i ) mf->prev_positions[i] = -1;
+  for( i = 0; i < mf->num_prev_positions; ++i ) mf->prev_positions[i] = -1;
   Mf_read_block( mf );
   }
 
 
-int Mf_longest_match_len( struct Matchfinder * const mf, int * const distances )
+int Mf_get_match_pairs( struct Matchfinder * const mf, struct Pair * pairs )
   {
   int32_t * ptr0 = mf->prev_pos_tree + ( mf->cyclic_pos << 1 );
   int32_t * ptr1 = ptr0 + 1;
   int32_t * newptr;
-  const uint8_t * newdata;
   int len = 0, len0 = 0, len1 = 0;
   int maxlen = min_match_len - 1;
-  const int min_pos = (mf->pos >= mf->dictionary_size) ?
-                      (mf->pos - mf->dictionary_size + 1) : 0;
+  int num_pairs = 0;
+  const int min_pos = (mf->pos > mf->dictionary_size) ?
+                       mf->pos - mf->dictionary_size : 0;
   const uint8_t * const data = mf->buffer + mf->pos;
-  int count, delta, key2, key3, key4, newpos, tmp;
+  int count, delta, key2, key3, key4, newpos;
+  unsigned tmp;
   int len_limit = mf->match_len_limit;
 
   if( len_limit > Mf_available_bytes( mf ) )
@@ -146,24 +156,39 @@ int Mf_longest_match_len( struct Matchfinder * const mf, int * const distances )
     if( len_limit < 4 ) return 0;
     }
 
-  key2 = num_prev_positions4 + num_prev_positions3 +
-         ( ( (int)data[0] << 8 ) | data[1] );
-  tmp = crc32[data[0]] ^ data[1] ^ ( (uint32_t)data[2] << 8 );
-  key3 = num_prev_positions4 + (int)( tmp & ( num_prev_positions3 - 1 ) );
-  key4 = (int)( ( tmp ^ ( crc32[data[3]] << 5 ) ) &
-                ( num_prev_positions4 - 1 ) );
+  tmp = crc32[data[0]] ^ data[1];
+  key2 = tmp & ( num_prev_positions2 - 1 );
+  tmp ^= (uint32_t)data[2] << 8;
+  key3 = num_prev_positions2 + ( tmp & ( num_prev_positions3 - 1 ) );
+  key4 = num_prev_positions2 + num_prev_positions3 +
+         ( ( tmp ^ ( crc32[data[3]] << 5 ) ) & mf->key4_mask );
 
-  if( distances )
+  if( pairs )
     {
-    int np = mf->prev_positions[key2];
-    if( np >= min_pos )
-      { distances[2] = mf->pos - np - 1; maxlen = 2; }
-    else distances[2] = 0x7FFFFFFF;
-    np = mf->prev_positions[key3];
-    if( np >= min_pos && mf->buffer[np] == data[0] )
-      { distances[3] = mf->pos - np - 1; maxlen = 3; }
-    else distances[3] = 0x7FFFFFFF;
-    distances[4] = 0x7FFFFFFF;
+    int np2 = mf->prev_positions[key2];
+    int np3 = mf->prev_positions[key3];
+    if( np2 >= min_pos && mf->buffer[np2] == data[0] )
+      {
+      pairs[0].dis = mf->pos - np2 - 1;
+      pairs[0].len = maxlen = 2;
+      num_pairs = 1;
+      }
+    if( np2 != np3 && np3 >= min_pos && mf->buffer[np3] == data[0] )
+      {
+      maxlen = 3;
+      pairs[num_pairs].dis = mf->pos - np3 - 1;
+      ++num_pairs;
+      np2 = np3;
+      }
+    if( num_pairs > 0 )
+      {
+      delta = mf->pos - np2;
+      while( maxlen < len_limit && data[maxlen-delta] == data[maxlen] )
+        ++maxlen;
+      pairs[num_pairs-1].len = maxlen;
+      if( maxlen >= len_limit ) pairs = 0;
+      }
+    if( maxlen < 3 ) maxlen = 3;
     }
 
   mf->prev_positions[key2] = mf->pos;
@@ -174,46 +199,43 @@ int Mf_longest_match_len( struct Matchfinder * const mf, int * const distances )
   for( count = mf->cycles; ; )
     {
     if( newpos < min_pos || --count < 0 ) { *ptr0 = *ptr1 = -1; break; }
-    newdata = mf->buffer + newpos;
-    while( len < len_limit && newdata[len] == data[len] ) ++len;
 
     delta = mf->pos - newpos;
-    if( distances ) while( maxlen < len ) distances[++maxlen] = delta - 1;
-
     newptr = mf->prev_pos_tree +
       ( ( mf->cyclic_pos - delta +
-          ( ( mf->cyclic_pos >= delta ) ? 0 : mf->dictionary_size ) ) << 1 );
-
-    if( len < len_limit )
+          ( (mf->cyclic_pos >= delta) ? 0 : mf->dictionary_size + 1 ) ) << 1 );
+    if( data[len-delta] == data[len] )
       {
-      if( newdata[len] < data[len] )
+      while( ++len < len_limit && data[len-delta] == data[len] ) {}
+      if( pairs && maxlen < len )
         {
-        *ptr0 = newpos;
-        ptr0 = newptr + 1;
-        newpos = *ptr0;
-        len0 = len; if( len1 < len ) len = len1;
+        pairs[num_pairs].dis = delta - 1;
+        pairs[num_pairs].len = maxlen = len;
+        ++num_pairs;
         }
-      else
+      if( len >= len_limit )
         {
-        *ptr1 = newpos;
-        ptr1 = newptr;
-        newpos = *ptr1;
-        len1 = len; if( len0 < len ) len = len0;
+        *ptr0 = newptr[0];
+        *ptr1 = newptr[1];
+        break;
         }
       }
+    if( data[len-delta] < data[len] )
+      {
+      *ptr0 = newpos;
+      ptr0 = newptr + 1;
+      newpos = *ptr0;
+      len0 = len; if( len1 < len ) len = len1;
+      }
     else
       {
-      *ptr0 = newptr[0];
-      *ptr1 = newptr[1];
-      break;
+      *ptr1 = newpos;
+      ptr1 = newptr;
+      newpos = *ptr1;
+      len1 = len; if( len0 < len ) len = len0;
       }
     }
-  if( distances )
-    {
-    if( distances[3] > distances[4] ) distances[3] = distances[4];
-    if( distances[2] > distances[3] ) distances[2] = distances[3];
-    }
-  return maxlen;
+  return num_pairs;
   }
 
 
@@ -222,8 +244,7 @@ void Re_flush_data( struct Range_encoder * const renc )
   if( renc->pos > 0 )
     {
     if( renc->outfd >= 0 &&
-        writeblock( renc->outfd, renc->buffer,
-                    renc->pos ) != renc->pos )
+        writeblock( renc->outfd, renc->buffer, renc->pos ) != renc->pos )
       { show_error( "Write error", errno, false ); cleanup_and_fail( 1 ); }
     renc->partial_member_pos += renc->pos;
     renc->pos = 0;
@@ -263,7 +284,7 @@ void Lee_encode( struct Len_encoder * const len_encoder,
 
 
      /* End Of Stream mark => (dis == 0xFFFFFFFFU, len == min_match_len) */
-void LZe_full_flush( struct LZ_encoder * const encoder, const State state )
+static void LZe_full_flush( struct LZ_encoder * const encoder, const State state )
   {
   int i;
   const int pos_state = Mf_data_position( encoder->matchfinder ) & pos_state_mask;
@@ -282,7 +303,7 @@ void LZe_full_flush( struct LZ_encoder * const encoder, const State state )
   }
 
 
-void LZe_fill_align_prices( struct LZ_encoder * const encoder )
+static void LZe_fill_align_prices( struct LZ_encoder * const encoder )
   {
   int i;
   for( i = 0; i < dis_align_size; ++i )
@@ -292,7 +313,7 @@ void LZe_fill_align_prices( struct LZ_encoder * const encoder )
   }
 
 
-void LZe_fill_distance_prices( struct LZ_encoder * const encoder )
+static void LZe_fill_distance_prices( struct LZ_encoder * const encoder )
   {
   int dis, dis_state;
   for( dis = start_dis_model; dis < modeled_distances; ++dis )
@@ -301,7 +322,7 @@ void LZe_fill_distance_prices( struct LZ_encoder * const encoder )
     const int direct_bits = ( dis_slot >> 1 ) - 1;
     const int base = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
     const int price =
-      price_symbol_reversed( encoder->bm_dis + base - dis_slot,
+      price_symbol_reversed( encoder->bm_dis + base - dis_slot - 1,
                              dis - base, direct_bits );
     for( dis_state = 0; dis_state < max_dis_states; ++dis_state )
       encoder->dis_prices[dis_state][dis] = price;
@@ -317,7 +338,7 @@ void LZe_fill_distance_prices( struct LZ_encoder * const encoder )
       dsp[slot] = price_symbol( bmds, slot, dis_slot_bits );
     for( ; slot < encoder->num_dis_slots; ++slot )
       dsp[slot] = price_symbol( bmds, slot, dis_slot_bits ) +
-                  (((( slot >> 1 ) - 1 ) - dis_align_bits ) << price_shift );
+                  (((( slot >> 1 ) - 1 ) - dis_align_bits ) << price_shift_bits );
 
     for( dis = 0; dis < start_dis_model; ++dis )
       dp[dis] = dsp[dis];
@@ -331,39 +352,29 @@ bool LZe_init( struct LZ_encoder * const encoder,
                struct Matchfinder * const mf,
                const File_header header, const int outfd )
   {
-  int i, j;
-  encoder->longest_match_found = 0;
+  int i;
+  encoder->pending_num_pairs = 0;
   encoder->crc = 0xFFFFFFFFU;
 
-  for( i = 0; i < states; ++i )
-    {
-    for( j = 0; j < pos_states; ++j )
-      {
-      Bm_init( &encoder->bm_match[i][j] );
-      Bm_init( &encoder->bm_len[i][j] );
-      }
-    Bm_init( &encoder->bm_rep[i] );
-    Bm_init( &encoder->bm_rep0[i] );
-    Bm_init( &encoder->bm_rep1[i] );
-    Bm_init( &encoder->bm_rep2[i] );
-    }
-  for( i = 0; i < max_dis_states; ++i )
-    for( j = 0; j < 1<<dis_slot_bits; ++j )
-      Bm_init( &encoder->bm_dis_slot[i][j] );
-  for( i = 0; i < modeled_distances-end_dis_model+1; ++i )
-    Bm_init( &encoder->bm_dis[i] );
-  for( i = 0; i < dis_align_size; ++i )
-    Bm_init( &encoder->bm_align[i] );
+  Bm_array_init( encoder->bm_literal[0], (1 << literal_context_bits) * 0x300 );
+  Bm_array_init( encoder->bm_match[0], states * pos_states );
+  Bm_array_init( encoder->bm_rep, states );
+  Bm_array_init( encoder->bm_rep0, states );
+  Bm_array_init( encoder->bm_rep1, states );
+  Bm_array_init( encoder->bm_rep2, states );
+  Bm_array_init( encoder->bm_len[0], states * pos_states );
+  Bm_array_init( encoder->bm_dis_slot[0], max_dis_states * (1 << dis_slot_bits) );
+  Bm_array_init( encoder->bm_dis, modeled_distances - end_dis_model );
+  Bm_array_init( encoder->bm_align, dis_align_size );
 
   encoder->matchfinder = mf;
   if( !Re_init( &encoder->range_encoder, outfd ) ) return false;
   Lee_init( &encoder->len_encoder, encoder->matchfinder->match_len_limit );
   Lee_init( &encoder->rep_match_len_encoder, encoder->matchfinder->match_len_limit );
-  Lie_init( &encoder->literal_encoder );
   encoder->num_dis_slots =
     2 * real_bits( encoder->matchfinder->dictionary_size - 1 );
 
-  LZe_fill_align_prices( encoder );
+  encoder->align_price_count = 0;
 
   for( i = 0; i < Fh_size; ++i )
     Re_put_byte( &encoder->range_encoder, header[i] );
@@ -375,24 +386,27 @@ bool LZe_init( struct LZ_encoder * const encoder,
    trials[0]..trials[retval-1] contain the steps to encode.
    ( trials[0].dis == -1 && trials[0].price == 1 ) means literal.
 */
-int LZe_sequence_optimizer( struct LZ_encoder * const encoder,
-                            const int reps[num_rep_distances],
-                            const State state )
+static int LZe_sequence_optimizer( struct LZ_encoder * const encoder,
+                                   const int reps[num_rep_distances],
+                                   const State state )
   {
-  int main_len, i, rep, cur = 0, num_trials;
+  int main_len, num_pairs, i, rep, cur = 0, num_trials, len;
   int replens[num_rep_distances];
   int rep_index = 0;
 
-  if( encoder->longest_match_found > 0 )	/* from previous call */
+  if( encoder->pending_num_pairs > 0 )		/* from previous call */
     {
-    main_len = encoder->longest_match_found;
-    encoder->longest_match_found = 0;
+    num_pairs = encoder->pending_num_pairs;
+    encoder->pending_num_pairs = 0;
     }
-  else main_len = LZe_read_match_distances( encoder );
+  else
+    num_pairs = LZe_read_match_distances( encoder );
+  main_len = ( num_pairs > 0 ) ? encoder->pairs[num_pairs-1].len : 0;
 
   for( i = 0; i < num_rep_distances; ++i )
     {
-    replens[i] = Mf_true_match_len( encoder->matchfinder, 0, reps[i] + 1, max_match_len );
+    replens[i] =
+      Mf_true_match_len( encoder->matchfinder, 0, reps[i] + 1, max_match_len );
     if( replens[i] > replens[rep_index] ) rep_index = i;
     }
   if( replens[rep_index] >= encoder->matchfinder->match_len_limit )
@@ -405,9 +419,7 @@ int LZe_sequence_optimizer( struct LZ_encoder * const encoder,
 
   if( main_len >= encoder->matchfinder->match_len_limit )
     {
-    encoder->trials[0].dis =
-      encoder->match_distances[encoder->matchfinder->match_len_limit] +
-      num_rep_distances;
+    encoder->trials[0].dis = encoder->pairs[num_pairs-1].dis + num_rep_distances;
     encoder->trials[0].price = main_len;
     LZe_move_pos( encoder, main_len );
     return main_len;
@@ -422,23 +434,22 @@ int LZe_sequence_optimizer( struct LZ_encoder * const encoder,
   const uint8_t match_byte = Mf_peek( encoder->matchfinder, -reps[0]-1 );
 
   encoder->trials[0].state = state;
-  for( i = 0; i < num_rep_distances; ++i )
-    encoder->trials[0].reps[i] = reps[i];
   encoder->trials[1].dis = -1;
-  encoder->trials[1].prev_index = 0;
   encoder->trials[1].price = price0( encoder->bm_match[state][pos_state] );
   if( St_is_char( state ) )
     encoder->trials[1].price +=
-      Lie_price_symbol( &encoder->literal_encoder, prev_byte, cur_byte );
+      LZe_price_literal( encoder, prev_byte, cur_byte );
   else
     encoder->trials[1].price +=
-      Lie_price_matched( &encoder->literal_encoder, prev_byte, cur_byte, match_byte );
+      LZe_price_matched( encoder, prev_byte, cur_byte, match_byte );
 
   if( match_byte == cur_byte )
-    Tr_update( &encoder->trials[1], 0, 0, rep_match_price +
-               LZe_price_rep_len1( encoder, state, pos_state ) );
+    Tr_update( &encoder->trials[1], rep_match_price +
+               LZe_price_rep_len1( encoder, state, pos_state ), 0, 0 );
+
+  num_trials = max( main_len, replens[rep_index] );
 
-  if( main_len < min_match_len )
+  if( num_trials < min_match_len )
     {
     encoder->trials[0].dis = encoder->trials[1].dis;
     encoder->trials[0].price = 1;
@@ -446,45 +457,51 @@ int LZe_sequence_optimizer( struct LZ_encoder * const encoder,
     return 1;
     }
 
-  if( main_len <= replens[rep_index] )
+  for( i = 0; i < num_rep_distances; ++i )
+    encoder->trials[0].reps[i] = reps[i];
+  encoder->trials[1].prev_index = 0;
+  encoder->trials[1].prev_index2 = single_step_trial;
+
+  for( len = min_match_len; len <= num_trials; ++len )
+    encoder->trials[len].price = infinite_price;
+
+  for( rep = 0; rep < num_rep_distances; ++rep )
     {
-    int len;
-    main_len = replens[rep_index];
-    for( len = min_match_len; len <= main_len; ++len )
-      encoder->trials[len].price = infinite_price;
+    int price;
+    if( replens[rep] < min_match_len ) continue;
+    price = rep_match_price + LZe_price_rep( encoder, rep, state, pos_state );
+
+    for( len = min_match_len; len <= replens[rep]; ++len )
+      Tr_update( &encoder->trials[len], price +
+                 Lee_price( &encoder->rep_match_len_encoder, len, pos_state ),
+                 rep, 0 );
     }
-  else
+
+  if( main_len > replens[0] )
     {
-    int len;
     const int normal_match_price = match_price + price0( encoder->bm_rep[state] );
-    for( len = min_match_len; len <= main_len; ++len )
+    i = 0, len = max( replens[0] + 1, min_match_len );
+    while( len > encoder->pairs[i].len ) ++i;
+    while( true )
       {
-      encoder->trials[len].dis = encoder->match_distances[len] + num_rep_distances;
-      encoder->trials[len].prev_index = 0;
-      encoder->trials[len].price = normal_match_price +
-        LZe_price_pair( encoder, encoder->match_distances[len], len, pos_state );
+      const int dis = encoder->pairs[i].dis;
+      Tr_update( &encoder->trials[len], normal_match_price +
+                 LZe_price_pair( encoder, dis, len, pos_state ),
+                 dis + num_rep_distances, 0 );
+      if( ++len > encoder->pairs[i].len && ++i >= num_pairs ) break;
       }
     }
-
-  for( rep = 0; rep < num_rep_distances; ++rep )
-    {
-    const int price = rep_match_price +
-                      LZe_price_rep( encoder, rep, state, pos_state );
-    int len;
-    for( len = min_match_len; len <= replens[rep]; ++len )
-      Tr_update( &encoder->trials[len], rep, 0, price +
-                 Lee_price( &encoder->rep_match_len_encoder, len, pos_state ) );
-    }
   }
 
-  num_trials = main_len;
   Mf_move_pos( encoder->matchfinder );
 
-  while( true )
+  while( true )				/* price optimization loop */
     {
     struct Trial *cur_trial, *next_trial;
-    int newlen, pos_state, prev_index, len_limit;
+    int newlen, pos_state, prev_index, prev_index2, available_bytes, len_limit;
+    int start_len = min_match_len;
     int next_price, match_price, rep_match_price;
+    State cur_state;
     uint8_t prev_byte, cur_byte, match_byte;
 
     if( ++cur >= num_trials )		/* no more initialized trials */
@@ -492,33 +509,70 @@ int LZe_sequence_optimizer( struct LZ_encoder * const encoder,
       LZe_backward( encoder, cur );
       return cur;
       }
-    newlen = LZe_read_match_distances( encoder );
+
+    num_pairs = LZe_read_match_distances( encoder );
+    newlen = ( num_pairs > 0 ) ? encoder->pairs[num_pairs-1].len : 0;
     if( newlen >= encoder->matchfinder->match_len_limit )
       {
-      encoder->longest_match_found = newlen;
+      encoder->pending_num_pairs = num_pairs;
       LZe_backward( encoder, cur );
       return cur;
       }
 
+    /* give final values to current trial */
     cur_trial = &encoder->trials[cur];
     prev_index = cur_trial->prev_index;
+    prev_index2 = cur_trial->prev_index2;
 
-    cur_trial->state = encoder->trials[prev_index].state;
-
-    for( i = 0; i < num_rep_distances; ++i )
-      cur_trial->reps[i] = encoder->trials[prev_index].reps[i];
+    if( prev_index2 != single_step_trial )
+      {
+      --prev_index;
+      if( prev_index2 >= 0 )
+        {
+        cur_state = encoder->trials[prev_index2].state;
+        if( cur_trial->dis2 < num_rep_distances )
+          cur_state = St_set_rep( cur_state );
+        else
+          cur_state = St_set_match( cur_state );
+        }
+      else
+        cur_state = encoder->trials[prev_index].state;
+      cur_state = St_set_char( cur_state );
+      }
+    else
+      cur_state = encoder->trials[prev_index].state;
 
     if( prev_index == cur - 1 )
       {
-      if( cur_trial->dis == 0 ) St_set_short_rep( &cur_trial->state );
-      else St_set_char( &cur_trial->state );
+      if( cur_trial->dis == 0 )
+        cur_state = St_set_short_rep( cur_state );
+      else
+        cur_state = St_set_char( cur_state );
+      for( i = 0; i < num_rep_distances; ++i )
+        cur_trial->reps[i] = encoder->trials[prev_index].reps[i];
       }
     else
       {
-      if( cur_trial->dis < num_rep_distances ) St_set_rep( &cur_trial->state );
-      else St_set_match( &cur_trial->state );
-      LZe_mtf_reps( cur_trial->dis, cur_trial->reps );
+      int dis;
+      if( prev_index2 >= 0 )
+        {
+        dis = cur_trial->dis2;
+        prev_index = prev_index2;
+        cur_state = St_set_rep( cur_state );
+        }
+      else
+        {
+        dis = cur_trial->dis;
+        if( dis < num_rep_distances )
+          cur_state = St_set_rep( cur_state );
+        else
+          cur_state = St_set_match( cur_state );
+        }
+      for( i = 0; i < num_rep_distances; ++i )
+        cur_trial->reps[i] = encoder->trials[prev_index].reps[i];
+      LZe_mtf_reps( dis, cur_trial->reps );
       }
+    cur_trial->state = cur_state;
 
     pos_state = Mf_data_position( encoder->matchfinder ) & pos_state_mask;
     prev_byte = Mf_peek( encoder->matchfinder, -1 );
@@ -526,81 +580,162 @@ int LZe_sequence_optimizer( struct LZ_encoder * const encoder,
     match_byte = Mf_peek( encoder->matchfinder, -cur_trial->reps[0]-1 );
 
     next_price = cur_trial->price +
-                 price0( encoder->bm_match[cur_trial->state][pos_state] );
-    if( St_is_char( cur_trial->state ) )
-      next_price += Lie_price_symbol( &encoder->literal_encoder,
-                                      prev_byte, cur_byte );
+                 price0( encoder->bm_match[cur_state][pos_state] );
+    if( St_is_char( cur_state ) )
+      next_price += LZe_price_literal( encoder, prev_byte, cur_byte );
     else
-      next_price += Lie_price_matched( &encoder->literal_encoder,
+      next_price += LZe_price_matched( encoder,
                                        prev_byte, cur_byte, match_byte );
     Mf_move_pos( encoder->matchfinder );
 
+    /* try last updates to next trial */
     next_trial = &encoder->trials[cur+1];
 
-    Tr_update( next_trial, -1, cur, next_price );
+    Tr_update( next_trial, next_price, -1, cur );
 
-    match_price = cur_trial->price + price1( encoder->bm_match[cur_trial->state][pos_state] );
-    rep_match_price = match_price + price1( encoder->bm_rep[cur_trial->state] );
+    match_price = cur_trial->price + price1( encoder->bm_match[cur_state][pos_state] );
+    rep_match_price = match_price + price1( encoder->bm_rep[cur_state] );
 
     if( match_byte == cur_byte && next_trial->dis != 0 )
-      Tr_update( next_trial, 0, cur, rep_match_price +
-                 LZe_price_rep_len1( encoder, cur_trial->state, pos_state ) );
+      {
+      const int price = rep_match_price +
+                        LZe_price_rep_len1( encoder, cur_state, pos_state );
+      if( price <= next_trial->price )
+        {
+        next_trial->price = price;
+        next_trial->dis = 0;
+        next_trial->prev_index = cur;
+        next_trial->prev_index2 = single_step_trial;
+        }
+      }
 
-    len_limit = min( min( max_num_trials - 1 - cur,
-                          Mf_available_bytes( encoder->matchfinder ) ),
-                     encoder->matchfinder->match_len_limit );
-    if( len_limit < min_match_len ) continue;
+    available_bytes = min( Mf_available_bytes( encoder->matchfinder ) + 1,
+                           max_num_trials - 1 - cur );
+    if( available_bytes < min_match_len ) continue;
 
-    for( rep = 0; rep < num_rep_distances; ++rep )
+    len_limit = min( encoder->matchfinder->match_len_limit, available_bytes );
+
+    /* try literal + rep0 */
+    if( match_byte != cur_byte && next_trial->prev_index != cur )
       {
-      const int dis = cur_trial->reps[rep] + 1;
-      int len = 0;
       const uint8_t * const data = Mf_ptr_to_current_pos( encoder->matchfinder ) - 1;
-      while( len < len_limit && data[len] == data[len-dis] ) ++len;
-      if( len >= min_match_len )
+      const int dis = cur_trial->reps[0] + 1;
+      const int limit = min( encoder->matchfinder->match_len_limit + 1,
+                             available_bytes );
+      len = 1;
+      while( len < limit && data[len-dis] == data[len] ) ++len;
+      if( --len >= min_match_len )
         {
-        const int price = rep_match_price +
-                          LZe_price_rep( encoder, rep, cur_trial->state, pos_state );
-        while( num_trials < cur + len )
+        const int pos_state2 = ( pos_state + 1 ) & pos_state_mask;
+        const State state2 = St_set_char( cur_state );
+        const int price = next_price +
+                  price1( encoder->bm_match[state2][pos_state2] ) +
+                  price1( encoder->bm_rep[state2] ) +
+                  LZe_price_rep0_len( encoder, len, state2, pos_state2 );
+        while( num_trials < cur + 1 + len )
           encoder->trials[++num_trials].price = infinite_price;
-        for( ; len >= min_match_len; --len )
-          Tr_update( &encoder->trials[cur+len], rep, cur, price +
-                     Lee_price( &encoder->rep_match_len_encoder, len, pos_state ) );
+        Tr_update2( &encoder->trials[cur+1+len], price, 0, cur + 1 );
         }
       }
 
-    if( newlen <= len_limit &&
-        ( newlen > min_match_len ||
-          ( newlen == min_match_len &&
-            encoder->match_distances[min_match_len] < modeled_distances ) ) )
+    /* try rep distances */
+    for( rep = 0; rep < num_rep_distances; ++rep )
       {
+      const uint8_t * const data = Mf_ptr_to_current_pos( encoder->matchfinder ) - 1;
+      int price;
+      const int dis = cur_trial->reps[rep] + 1;
+
+      if( data[-dis] != data[0] || data[1-dis] != data[1] ) continue;
+      for( len = min_match_len; len < len_limit; ++len )
+        if( data[len-dis] != data[len] ) break;
+      while( num_trials < cur + len )
+        encoder->trials[++num_trials].price = infinite_price;
+      price = rep_match_price +
+              LZe_price_rep( encoder, rep, cur_state, pos_state );
+      for( i = min_match_len; i <= len; ++i )
+        Tr_update( &encoder->trials[cur+i], price +
+                   Lee_price( &encoder->rep_match_len_encoder, i, pos_state ),
+                   rep, cur );
+
+      if( rep == 0 ) start_len = len + 1;	/* discard shorter matches */
+
+      /* try rep + literal + rep0 */
+      {
+      int len2 = len + 1, pos_state2;
+      const int limit = min( encoder->matchfinder->match_len_limit + len2,
+                             available_bytes );
+      State state2;
+      while( len2 < limit && data[len2-dis] == data[len2] ) ++len2;
+      len2 -= len + 1;
+      if( len2 < min_match_len ) continue;
+
+      pos_state2 = ( pos_state + len ) & pos_state_mask;
+      state2 = St_set_rep( cur_state );
+      price += Lee_price( &encoder->rep_match_len_encoder, len, pos_state ) +
+               price0( encoder->bm_match[state2][pos_state2] ) +
+               LZe_price_matched( encoder, data[len-1], data[len], data[len-dis] );
+      pos_state2 = ( pos_state2 + 1 ) & pos_state_mask;
+      state2 = St_set_char( state2 );
+      price += price1( encoder->bm_match[state2][pos_state2] ) +
+               price1( encoder->bm_rep[state2] ) +
+               LZe_price_rep0_len( encoder, len2, state2, pos_state2 );
+      while( num_trials < cur + len + 1 + len2 )
+        encoder->trials[++num_trials].price = infinite_price;
+      Tr_update3( &encoder->trials[cur+len+1+len2], price, 0, cur + len + 1,
+                  rep, cur );
+      }
+      }
+
+    /* try matches */
+    if( newlen >= start_len && newlen <= len_limit )
+      {
+      int dis;
       const int normal_match_price = match_price +
-                                     price0( encoder->bm_rep[cur_trial->state] );
-      int len;
-      int dis = encoder->match_distances[min_match_len];
-      int dis_state = get_dis_state( min_match_len );
-      int dis_price = infinite_price;
+                                     price0( encoder->bm_rep[cur_state] );
 
       while( num_trials < cur + newlen )
         encoder->trials[++num_trials].price = infinite_price;
 
-      if( dis < modeled_distances )
-        Tr_update( &encoder->trials[cur+min_match_len], dis + num_rep_distances,
-                   cur, normal_match_price + encoder->dis_prices[dis_state][dis] +
-                   Lee_price( &encoder->len_encoder, min_match_len, pos_state ) );
-
-      for( len = min_match_len + 1; len <= newlen; ++len )
+      i = 0;
+      while( start_len > encoder->pairs[i].len ) ++i;
+      dis = encoder->pairs[i].dis;
+      for( len = start_len; ; ++len )
         {
-        if( dis != encoder->match_distances[len] ||
-            dis_state < max_dis_states - 1 )
+        int price = normal_match_price +
+                    LZe_price_pair( encoder, dis, len, pos_state );
+
+        Tr_update( &encoder->trials[cur+len], price, dis + num_rep_distances, cur );
+
+        /* try match + literal + rep0 */
+        if( len == encoder->pairs[i].len )
           {
-          dis = encoder->match_distances[len];
-          dis_state = get_dis_state( len );
-          dis_price = LZe_price_dis( encoder, dis, dis_state );
+          const uint8_t * const data = Mf_ptr_to_current_pos( encoder->matchfinder ) - 1;
+          const int dis2 = dis + 1;
+          int len2 = len + 1;
+          const int limit = min( encoder->matchfinder->match_len_limit + len2,
+                                 available_bytes );
+          while( len2 < limit && data[len2-dis2] == data[len2] ) ++len2;
+          len2 -= len + 1;
+          if( len2 >= min_match_len )
+            {
+            int pos_state2 = ( pos_state + len ) & pos_state_mask;
+            State state2 = St_set_match( cur_state );
+            price += price0( encoder->bm_match[state2][pos_state2] ) +
+                     LZe_price_matched( encoder, data[len-1], data[len], data[len-dis2] );
+            pos_state2 = ( pos_state2 + 1 ) & pos_state_mask;
+            state2 = St_set_char( state2 );
+            price += price1( encoder->bm_match[state2][pos_state2] ) +
+                     price1( encoder->bm_rep[state2] ) +
+                     LZe_price_rep0_len( encoder, len2, state2, pos_state2 );
+
+            while( num_trials < cur + len + 1 + len2 )
+              encoder->trials[++num_trials].price = infinite_price;
+            Tr_update3( &encoder->trials[cur+len+1+len2], price, 0,
+                        cur + len + 1, dis + num_rep_distances, cur );
+            }
+          if( ++i >= num_pairs ) break;
+          dis = encoder->pairs[i].dis;
           }
-        Tr_update( &encoder->trials[cur+len], dis + num_rep_distances, cur,
-                   normal_match_price + dis_price +
-                   Lee_price( &encoder->len_encoder, len, pos_state ) );
         }
       }
     }
@@ -608,12 +743,12 @@ int LZe_sequence_optimizer( struct LZ_encoder * const encoder,
 
 
 bool LZe_encode_member( struct LZ_encoder * const encoder,
-                        const long long member_size )
+                        const unsigned long long member_size )
   {
-  const long long member_size_limit =
+  const unsigned long long member_size_limit =
     member_size - Ft_size - max_marker_size;
   const int fill_count =
-    ( encoder->matchfinder->match_len_limit > 12 ) ? 512 : 2048;
+    ( encoder->matchfinder->match_len_limit > 12 ) ? 128 : 512;
   int fill_counter = 0;
   int ahead, i;
   int rep_distances[num_rep_distances];
@@ -629,20 +764,24 @@ bool LZe_encode_member( struct LZ_encoder * const encoder,
     const uint8_t prev_byte = 0;
     const uint8_t cur_byte = Mf_peek( encoder->matchfinder, 0 );
     Re_encode_bit( &encoder->range_encoder, &encoder->bm_match[state][0], 0 );
-    Lie_encode( &encoder->literal_encoder, &encoder->range_encoder, prev_byte, cur_byte );
+    LZe_encode_literal( encoder, prev_byte, cur_byte );
     CRC32_update_byte( &encoder->crc, cur_byte );
-    Mf_longest_match_len( encoder->matchfinder, 0 );
+    Mf_get_match_pairs( encoder->matchfinder, 0 );
     Mf_move_pos( encoder->matchfinder );
     }
 
   while( !Mf_finished( encoder->matchfinder ) )
     {
-    if( fill_counter <= 0 )
-      { LZe_fill_distance_prices( encoder ); fill_counter = fill_count; }
+    if( encoder->pending_num_pairs == 0 )
+      {
+      if( fill_counter <= 0 )
+        { LZe_fill_distance_prices( encoder ); fill_counter = fill_count; }
+      if( encoder->align_price_count <= 0 )
+        LZe_fill_align_prices( encoder );
+      }
 
     ahead = LZe_sequence_optimizer( encoder, rep_distances, state );
     if( ahead <= 0 ) return false;		/* can't happen */
-    fill_counter -= ahead;
 
     for( i = 0; ; )
       {
@@ -660,18 +799,16 @@ bool LZe_encode_member( struct LZ_encoder * const encoder,
         const uint8_t cur_byte = Mf_peek( encoder->matchfinder, -ahead );
         CRC32_update_byte( &encoder->crc, cur_byte );
         if( St_is_char( state ) )
-          Lie_encode( &encoder->literal_encoder, &encoder->range_encoder,
-                      prev_byte, cur_byte );
+          LZe_encode_literal( encoder, prev_byte, cur_byte );
         else
           {
           const uint8_t match_byte =
             Mf_peek( encoder->matchfinder, -ahead-rep_distances[0]-1 );
-          Lie_encode_matched( &encoder->literal_encoder, &encoder->range_encoder,
-                              prev_byte, cur_byte, match_byte );
+          LZe_encode_matched( encoder, prev_byte, cur_byte, match_byte );
           }
-        St_set_char( &state );
+        state = St_set_char( state );
         }
-      else				/* match or repeated match */
+      else					/* match or repeated match */
         {
         CRC32_update_buf( &encoder->crc, Mf_ptr_to_current_pos( encoder->matchfinder ) - ahead, len );
         LZe_mtf_reps( dis, rep_distances );
@@ -689,17 +826,18 @@ bool LZe_encode_member( struct LZ_encoder * const encoder,
             if( dis > 1 )
               Re_encode_bit( &encoder->range_encoder, &encoder->bm_rep2[state], dis > 2 );
             }
-          if( len == 1 ) St_set_short_rep( &state );
+          if( len == 1 ) state = St_set_short_rep( state );
           else
             {
             Lee_encode( &encoder->rep_match_len_encoder, &encoder->range_encoder, len, pos_state );
-            St_set_rep( &state );
+            state = St_set_rep( state );
             }
           }
         else
           {
           LZe_encode_pair( encoder, dis - num_rep_distances, len, pos_state );
-          St_set_match( &state );
+          --fill_counter;
+          state = St_set_match( state );
           }
         }
       ahead -= len; i += len;
diff --git a/encoder.h b/encoder.h
index 13df958..e39d7c4 100644
--- a/encoder.h
+++ b/encoder.h
@@ -1,5 +1,5 @@
 /*  Clzip - Data compressor based on the LZMA algorithm
-    Copyright (C) 2010, 2011, 2012 Antonio Diaz Diaz.
+    Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
 
     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -16,17 +16,19 @@
 */
 
 enum { max_num_trials = 1 << 12,
-       price_shift = 6 };
+       price_shift_bits = 6,
+       price_step_bits = 2,
+       price_step = 1 << price_step_bits };
 
-typedef unsigned char Dis_slots[1<<12];
+typedef uint8_t Dis_slots[1<<10];
 
 extern Dis_slots dis_slots;
 
-static inline void Dis_slots_init()
+static inline void Dis_slots_init( void )
   {
   int i, size, slot;
   for( slot = 0; slot < 4; ++slot ) dis_slots[slot] = slot;
-  for( i = 4, size = 2, slot = 4; slot < 24; slot += 2 )
+  for( i = 4, size = 2, slot = 4; slot < 20; slot += 2 )
     {
     memset( &dis_slots[i], slot, size );
     memset( &dis_slots[i+size], slot + 1, size );
@@ -35,40 +37,47 @@ static inline void Dis_slots_init()
     }
   }
 
-static inline int get_slot( const uint32_t dis )
+static inline uint8_t get_slot( const uint32_t dis )
   {
-  if( dis < (1 << 12) ) return dis_slots[dis];
-  if( dis < (1 << 23) ) return dis_slots[dis>>11] + 22;
-  return dis_slots[dis>>22] + 44;
+  if( dis < (1 << 10) ) return dis_slots[dis];
+  if( dis < (1 << 19) ) return dis_slots[dis>> 9] + 18;
+  if( dis < (1 << 28) ) return dis_slots[dis>>18] + 36;
+  return dis_slots[dis>>27] + 54;
   }
 
 
-typedef int Prob_prices[bit_model_total >> 2];
+typedef short Prob_prices[bit_model_total >> price_step_bits];
 
 extern Prob_prices prob_prices;
 
-static inline void Prob_prices_init()
+static inline void Prob_prices_init( void )
   {
-  const int num_bits = ( bit_model_total_bits - 2 );
-  int i, j = 1, end = 2;
-  prob_prices[0] = bit_model_total_bits << price_shift;
-  for( i = num_bits - 1; i >= 0; --i, end <<= 1 )
+  int i, j;
+  for( i = price_step / 2; i < bit_model_total; i += price_step )
     {
-    for( ; j < end; ++j )
-      prob_prices[j] = ( i << price_shift ) +
-                       ( ((end - j) << price_shift) >> (num_bits - i - 1) );
+    unsigned val = i;
+    int bits = 0;			/* base 2 logarithm of val */
+    for( j = 0; j < price_shift_bits; ++j )
+      {
+      val = val * val;
+      bits <<= 1;
+      while( val >= 1 << 16 ) { val >>= 1; ++bits; }
+      }
+    bits += 15;				/* remaining bits in val */
+    prob_prices[i >> price_step_bits] =
+      ( bit_model_total_bits << price_shift_bits ) - bits;
     }
   }
 
 static inline int get_price( const int probability )
-  { return prob_prices[probability >> 2]; }
+  { return prob_prices[probability >> price_step_bits]; }
 
 
 static inline int price0( const Bit_model probability )
   { return get_price( probability ); }
 
 static inline int price1( const Bit_model probability )
-  { return get_price( bit_model_total-probability ); }
+  { return get_price( bit_model_total - probability ); }
 
 static inline int price_bit( const Bit_model bm, const int bit )
   { if( bit ) return price1( bm ); else return price0( bm ); }
@@ -106,58 +115,57 @@ static inline int price_symbol_reversed( const Bit_model bm[], int symbol,
   }
 
 
-static inline int price_matched( const Bit_model bm[], const int symbol,
-                                 const int match_byte )
+static inline int price_matched( const Bit_model bm[], unsigned symbol,
+                                 unsigned match_byte )
   {
   int price = 0;
-  int model = 1;
-  int i;
-
-  for( i = 7; i >= 0; --i )
-    {
-    const int match_bit = ( match_byte >> i ) & 1;
-    int bit = ( symbol >> i ) & 1;
-    price += price_bit( bm[(match_bit<<8)+model+0x100], bit );
-    model = ( model << 1 ) | bit;
-    if( match_bit != bit )
-      {
-      while( --i >= 0 )
-        {
-        bit = ( symbol >> i ) & 1;
-        price += price_bit( bm[model], bit );
-        model = ( model << 1 ) | bit;
-        }
-      break;
-      }
+  unsigned mask = 0x100;
+  symbol |= 0x100;
+
+  do {
+    unsigned bit, match_bit;
+    match_byte <<= 1;
+    match_bit = match_byte & mask;
+    symbol <<= 1;
+    bit = symbol & 0x100;
+    price += price_bit( bm[match_bit+(symbol>>9)+mask], bit );
+    mask &= ~(match_byte ^ symbol);	/* if( match_bit != bit ) mask = 0; */
     }
+  while( symbol < 0x10000 );
   return price;
   }
 
 
+struct Pair			/* distance-length pair */
+  {
+  int dis;
+  int len;
+  };
+
+
 enum { /* bytes to keep in buffer before dictionary */
        before_size = max_num_trials + 1,
        /* bytes to keep in buffer after pos */
-       after_size = max_match_len,
-       num_prev_positions4 = 1 << 20,
-       num_prev_positions3 = 1 << 18,
-       num_prev_positions2 = 1 << 16,
-       num_prev_positions = num_prev_positions4 + num_prev_positions3 +
-                            num_prev_positions2 };
+       after_size = ( 2 * max_match_len ) + 1,
+       num_prev_positions3 = 1 << 16,
+       num_prev_positions2 = 1 << 10 };
 
 struct Matchfinder
   {
-  long long partial_data_pos;
+  unsigned long long partial_data_pos;
   uint8_t * buffer;		/* input buffer */
   int32_t * prev_positions;	/* last seen position of key */
-  int32_t * prev_pos_tree;
-  int dictionary_size;		/* bytes to keep in buffer before pos */
+  int32_t * prev_pos_tree;	/* previous positions of key */
+  int match_len_limit;
   int buffer_size;
+  int dictionary_size;		/* bytes to keep in buffer before pos */
   int pos;			/* current pos in buffer */
-  int cyclic_pos;		/* current pos in dictionary */
-  int stream_pos;		/* first byte not yet read from file */
+  int cyclic_pos;		/* cycles through [0, dictionary_size] */
   int pos_limit;		/* when reached, a new block must be read */
-  int match_len_limit;
+  int stream_pos;		/* first byte not yet read from file */
   int cycles;
+  unsigned key4_mask;
+  int num_prev_positions;	/* size of prev_positions */
   int infd;			/* input file descriptor */
   bool at_stream_end;		/* stream_pos shows real end of file */
   };
@@ -166,13 +174,12 @@ bool Mf_read_block( struct Matchfinder * const mf );
 void Mf_normalize_pos( struct Matchfinder * const mf );
 
 bool Mf_init( struct Matchfinder * const mf,
-              const int dict_size, const int len_limit, const int ifd );
+              const int dict_size, const int match_len_limit, const int ifd );
 
 static inline void Mf_free( struct Matchfinder * const mf )
   {
-  free( mf->prev_pos_tree ); mf->prev_pos_tree = 0;
-  free( mf->buffer ); mf->buffer = 0;
-  free( mf->prev_positions ); mf->prev_positions = 0;
+  free( mf->prev_positions );
+  free( mf->buffer );
   }
 
 static inline uint8_t Mf_peek( const struct Matchfinder * const mf, const int i )
@@ -181,13 +188,15 @@ static inline uint8_t Mf_peek( const struct Matchfinder * const mf, const int i
 static inline int Mf_available_bytes( const struct Matchfinder * const mf )
   { return mf->stream_pos - mf->pos; }
 
-static inline long long Mf_data_position( const struct Matchfinder * const mf )
+static inline unsigned long long
+Mf_data_position( const struct Matchfinder * const mf )
   { return mf->partial_data_pos + mf->pos; }
 
 static inline bool Mf_finished( const struct Matchfinder * const mf )
   { return mf->at_stream_end && mf->pos >= mf->stream_pos; }
 
-static inline const uint8_t * Mf_ptr_to_current_pos( const struct Matchfinder * const mf )
+static inline const uint8_t *
+Mf_ptr_to_current_pos( const struct Matchfinder * const mf )
   { return mf->buffer + mf->pos; }
 
 static inline bool Mf_dec_pos( struct Matchfinder * const mf,
@@ -196,7 +205,7 @@ static inline bool Mf_dec_pos( struct Matchfinder * const mf,
   if( ahead < 0 || mf->pos < ahead ) return false;
   mf->pos -= ahead;
   mf->cyclic_pos -= ahead;
-  if( mf->cyclic_pos < 0 ) mf->cyclic_pos += mf->dictionary_size;
+  if( mf->cyclic_pos < 0 ) mf->cyclic_pos += mf->dictionary_size + 1;
   return true;
   }
 
@@ -215,12 +224,12 @@ static inline int Mf_true_match_len( const struct Matchfinder * const mf,
 
 static inline void Mf_move_pos( struct Matchfinder * const mf )
   {
-  if( ++mf->cyclic_pos >= mf->dictionary_size ) mf->cyclic_pos = 0;
+  if( ++mf->cyclic_pos > mf->dictionary_size ) mf->cyclic_pos = 0;
   if( ++mf->pos >= mf->pos_limit ) Mf_normalize_pos( mf );
   }
 
 void Mf_reset( struct Matchfinder * const mf );
-int Mf_longest_match_len( struct Matchfinder * const mf, int * const distances );
+int Mf_get_match_pairs( struct Matchfinder * const mf, struct Pair * pairs );
 
 
 enum { re_buffer_size = 65536 };
@@ -228,7 +237,7 @@ enum { re_buffer_size = 65536 };
 struct Range_encoder
   {
   uint64_t low;
-  long long partial_member_pos;
+  unsigned long long partial_member_pos;
   uint8_t * buffer;		/* output buffer */
   int pos;			/* current pos in buffer */
   uint32_t range;
@@ -277,7 +286,8 @@ static inline bool Re_init( struct Range_encoder * const renc, const int ofd )
 static inline void Re_free( struct Range_encoder * const renc )
   { free( renc->buffer ); }
 
-static inline long long Re_member_position( const struct Range_encoder * const renc )
+static inline unsigned long long
+Re_member_position( const struct Range_encoder * const renc )
   { return renc->partial_member_pos + renc->pos + renc->ff_count; }
 
 static inline void Re_flush( struct Range_encoder * const renc )
@@ -345,27 +355,22 @@ static inline void Re_encode_tree_reversed( struct Range_encoder * const renc,
   }
 
 static inline void Re_encode_matched( struct Range_encoder * const renc,
-                                      Bit_model bm[], int symbol, int match_byte )
-  {
-  int model = 1;
-  int i;
-  for( i = 7; i >= 0; --i )
-    {
-    const int match_bit = ( match_byte >> i ) & 1;
-    int bit = ( symbol >> i ) & 1;
-    Re_encode_bit( renc, &bm[(match_bit<<8)+model+0x100], bit );
-    model = ( model << 1 ) | bit;
-    if( match_bit != bit )
-      {
-      while( --i >= 0 )
-        {
-        bit = ( symbol >> i ) & 1;
-        Re_encode_bit( renc, &bm[model], bit );
-        model = ( model << 1 ) | bit;
-        }
-      break;
-      }
+                                      Bit_model bm[], unsigned symbol,
+                                      unsigned match_byte )
+  {
+  unsigned mask = 0x100;
+  symbol |= 0x100;
+
+  do {
+    unsigned bit, match_bit;
+    match_byte <<= 1;
+    match_bit = match_byte & mask;
+    symbol <<= 1;
+    bit = symbol & 0x100;
+    Re_encode_bit( renc, &bm[match_bit+(symbol>>9)+mask], bit );
+    mask &= ~(match_byte ^ symbol);	/* if( match_bit != bit ) mask = 0; */
     }
+  while( symbol < 0x10000 );
   }
 
 
@@ -404,20 +409,15 @@ static inline void Lee_update_prices( struct Len_encoder * const len_encoder,
   }
 
 static inline void Lee_init( struct Len_encoder * const len_encoder,
-                             const int len_limit )
+                             const int match_len_limit )
   {
-  int i, j;
+  int i;
   Bm_init( &len_encoder->choice1 );
   Bm_init( &len_encoder->choice2 );
-  for( i = 0; i < pos_states; ++i )
-    for( j = 0; j < len_low_symbols; ++j )
-      Bm_init( &len_encoder->bm_low[i][j] );
-  for( i = 0; i < pos_states; ++i )
-    for( j = 0; j < len_mid_symbols; ++j )
-      Bm_init( &len_encoder->bm_mid[i][j] );
-  for( i = 0; i < len_high_symbols; ++i )
-    Bm_init( &len_encoder->bm_high[i] );
-  len_encoder->len_symbols = len_limit + 1 - min_match_len;
+  Bm_array_init( len_encoder->bm_low[0], pos_states * len_low_symbols );
+  Bm_array_init( len_encoder->bm_mid[0], pos_states * len_mid_symbols );
+  Bm_array_init( len_encoder->bm_high, len_high_symbols );
+  len_encoder->len_symbols = match_len_limit + 1 - min_match_len;
   for( i = 0; i < pos_states; ++i ) Lee_update_prices( len_encoder, i );
   }
 
@@ -430,71 +430,66 @@ static inline int Lee_price( const struct Len_encoder * const len_encoder,
   { return len_encoder->prices[pos_state][symbol - min_match_len]; }
 
 
-struct Literal_encoder
-  {
-  Bit_model bm_literal[1<<literal_context_bits][0x300];
-  };
-
-static inline void Lie_init( struct Literal_encoder * const lienc )
-  {
-  int i, j;
-  for( i = 0; i < 1<<literal_context_bits; ++i )
-    for( j = 0; j < 0x300; ++j )
-      Bm_init( &lienc->bm_literal[i][j] );
-  }
-
-static inline int Lie_state( const uint8_t prev_byte )
-  { return ( prev_byte >> ( 8 - literal_context_bits ) ); }
-
-static inline void Lie_encode( struct Literal_encoder * const lienc,
-                               struct Range_encoder * const renc,
-                               uint8_t prev_byte, uint8_t symbol )
-  { Re_encode_tree( renc, lienc->bm_literal[Lie_state(prev_byte)], symbol, 8 ); }
-
-static inline void Lie_encode_matched( struct Literal_encoder * const lienc,
-                                       struct Range_encoder * const renc,
-                                       uint8_t prev_byte, uint8_t symbol,
-                                       uint8_t match_byte )
-  { Re_encode_matched( renc, lienc->bm_literal[Lie_state(prev_byte)],
-                       symbol, match_byte ); }
-
-static inline int Lie_price_symbol( const struct Literal_encoder * const lienc,
-                                    uint8_t prev_byte, uint8_t symbol )
-  { return price_symbol( lienc->bm_literal[Lie_state(prev_byte)], symbol, 8 ); }
-
-static inline int Lie_price_matched( const struct Literal_encoder * const lienc,
-                                     uint8_t prev_byte, uint8_t symbol,
-                                     uint8_t match_byte )
-  { return price_matched( lienc->bm_literal[Lie_state(prev_byte)],
-                          symbol, match_byte ); }
-
-
 enum { infinite_price = 0x0FFFFFFF,
        max_marker_size = 16,
-       num_rep_distances = 4 };		/* must be 4 */
+       num_rep_distances = 4,			/* must be 4 */
+       single_step_trial = -2,
+       dual_step_trial = -1 };
 
 struct Trial
   {
   State state;
-  int dis;
-  int prev_index;	/* index of prev trial in trials[] */
   int price;		/* dual use var; cumulative price, match length */
+  int dis;		/* rep index or match distance */
+  int prev_index;	/* index of prev trial in trials[] */
+  int dis2;
+  int prev_index2;	/*   -2  trial is single step */
+			/*   -1  literal + rep0 */
+			/* >= 0  rep or match + literal + rep0 */
   int reps[num_rep_distances];
   };
 
-static inline void Tr_update( struct Trial * const trial,
-                              const int d, const int p_i, const int pr )
+static inline void Tr_update( struct Trial * const trial, const int pr,
+                              const int d, const int p_i )
   {
   if( pr < trial->price )
-    { trial->dis = d; trial->prev_index = p_i; trial->price = pr; }
+    {
+    trial->price = pr;
+    trial->dis = d; trial->prev_index = p_i;
+    trial->prev_index2 = single_step_trial;
+    }
+  }
+
+static inline void Tr_update2( struct Trial * const trial, const int pr,
+                               const int d, const int p_i )
+  {
+  if( pr < trial->price )
+    {
+    trial->price = pr;
+    trial->dis = d; trial->prev_index = p_i;
+    trial->prev_index2 = dual_step_trial;
+    }
+  }
+
+static inline void Tr_update3( struct Trial * const trial, const int pr,
+                               const int d, const int p_i,
+                               const int d2, const int p_i2 )
+  {
+  if( pr < trial->price )
+    {
+    trial->price = pr;
+    trial->dis = d; trial->prev_index = p_i;
+    trial->dis2 = d2; trial->prev_index2 = p_i2;
+    }
   }
 
 
 struct LZ_encoder
   {
-  int longest_match_found;
+  int pending_num_pairs;
   uint32_t crc;
 
+  Bit_model bm_literal[1<<literal_context_bits][0x300];
   Bit_model bm_match[states][pos_states];
   Bit_model bm_rep[states];
   Bit_model bm_rep0[states];
@@ -502,17 +497,16 @@ struct LZ_encoder
   Bit_model bm_rep2[states];
   Bit_model bm_len[states][pos_states];
   Bit_model bm_dis_slot[max_dis_states][1<<dis_slot_bits];
-  Bit_model bm_dis[modeled_distances-end_dis_model+1];
+  Bit_model bm_dis[modeled_distances-end_dis_model];
   Bit_model bm_align[dis_align_size];
 
   struct Matchfinder * matchfinder;
   struct Range_encoder range_encoder;
   struct Len_encoder len_encoder;
   struct Len_encoder rep_match_len_encoder;
-  struct Literal_encoder literal_encoder;
 
   int num_dis_slots;
-  int match_distances[max_match_len+1];
+  struct Pair pairs[max_match_len+1];
   struct Trial trials[max_num_trials];
 
   int dis_slot_prices[max_dis_states][2*max_dictionary_bits];
@@ -521,11 +515,6 @@ struct LZ_encoder
   int align_price_count;
   };
 
-void LZe_full_flush( struct LZ_encoder * const encoder, const State state );
-
-void LZe_fill_align_prices( struct LZ_encoder * const encoder );
-void LZe_fill_distance_prices( struct LZ_encoder * const encoder );
-
 bool LZe_init( struct LZ_encoder * const encoder,
                struct Matchfinder * const mf,
                const File_header header, const int outfd );
@@ -533,7 +522,7 @@ bool LZe_init( struct LZ_encoder * const encoder,
 static inline void LZe_free( struct LZ_encoder * const encoder )
   { Re_free( &encoder->range_encoder ); }
 
-static inline uint32_t LZe_crc( const struct LZ_encoder * const encoder )
+static inline unsigned LZe_crc( const struct LZ_encoder * const encoder )
   { return encoder->crc ^ 0xFFFFFFFFU; }
 
        /* move-to-front dis in/into reps */
@@ -578,6 +567,14 @@ static inline int LZe_price_rep( const struct LZ_encoder * const encoder,
   return price;
   }
 
+static inline int LZe_price_rep0_len( const struct LZ_encoder * const encoder,
+                                      const int len,
+                                      const State state, const int pos_state )
+  {
+  return LZe_price_rep( encoder, 0, state, pos_state ) +
+         Lee_price( &encoder->rep_match_len_encoder, len, pos_state );
+  }
+
 static inline int LZe_price_dis( const struct LZ_encoder * const encoder,
                                  const int dis, const int dis_state )
   {
@@ -592,12 +589,32 @@ static inline int LZe_price_pair( const struct LZ_encoder * const encoder,
                                   const int dis, const int len,
                                   const int pos_state )
   {
-  if( len <= min_match_len && dis >= modeled_distances )
-    return infinite_price;
   return Lee_price( &encoder->len_encoder, len, pos_state ) +
          LZe_price_dis( encoder, dis, get_dis_state( len ) );
   }
 
+static inline int LZe_price_literal( const struct LZ_encoder * const encoder,
+                                    uint8_t prev_byte, uint8_t symbol )
+  { return price_symbol( encoder->bm_literal[get_lit_state(prev_byte)], symbol, 8 ); }
+
+static inline int LZe_price_matched( const struct LZ_encoder * const encoder,
+                                     uint8_t prev_byte, uint8_t symbol,
+                                     uint8_t match_byte )
+  { return price_matched( encoder->bm_literal[get_lit_state(prev_byte)],
+                          symbol, match_byte ); }
+
+static inline void LZe_encode_literal( struct LZ_encoder * const encoder,
+                                       uint8_t prev_byte, uint8_t symbol )
+  { Re_encode_tree( &encoder->range_encoder,
+                    encoder->bm_literal[get_lit_state(prev_byte)], symbol, 8 ); }
+
+static inline void LZe_encode_matched( struct LZ_encoder * const encoder,
+                                       uint8_t prev_byte, uint8_t symbol,
+                                       uint8_t match_byte )
+  { Re_encode_matched( &encoder->range_encoder,
+                       encoder->bm_literal[get_lit_state(prev_byte)],
+                       symbol, match_byte ); }
+
 static inline void LZe_encode_pair( struct LZ_encoder * const encoder,
                                     const uint32_t dis, const int len,
                                     const int pos_state )
@@ -616,7 +633,7 @@ static inline void LZe_encode_pair( struct LZ_encoder * const encoder,
 
     if( dis_slot < end_dis_model )
       Re_encode_tree_reversed( &encoder->range_encoder,
-                               encoder->bm_dis + base - dis_slot,
+                               encoder->bm_dis + base - dis_slot - 1,
                                direct_dis, direct_bits );
     else
       {
@@ -624,20 +641,27 @@ static inline void LZe_encode_pair( struct LZ_encoder * const encoder,
                  direct_bits - dis_align_bits );
       Re_encode_tree_reversed( &encoder->range_encoder, encoder->bm_align,
                                direct_dis, dis_align_bits );
-      if( --encoder->align_price_count <= 0 ) LZe_fill_align_prices( encoder );
+      --encoder->align_price_count;
       }
     }
   }
 
 static inline int LZe_read_match_distances( struct LZ_encoder * const encoder )
   {
-  int len = Mf_longest_match_len( encoder->matchfinder,
-                                  encoder->match_distances );
-  if( len == encoder->matchfinder->match_len_limit && len < max_match_len )
-    len += Mf_true_match_len( encoder->matchfinder, len,
-                              encoder->match_distances[len] + 1,
-                              max_match_len - len );
-  return len;
+  const int num_pairs =
+    Mf_get_match_pairs( encoder->matchfinder, encoder->pairs );
+  if( num_pairs > 0 )
+    {
+    int len = encoder->pairs[num_pairs-1].len;
+    if( len == encoder->matchfinder->match_len_limit && len < max_match_len )
+      {
+      len += Mf_true_match_len( encoder->matchfinder, len,
+                                encoder->pairs[num_pairs-1].dis + 1,
+                                max_match_len - len );
+      encoder->pairs[num_pairs-1].len = len;
+      }
+    }
+  return num_pairs;
   }
 
 static inline void LZe_move_pos( struct LZ_encoder * const encoder, int n )
@@ -645,7 +669,7 @@ static inline void LZe_move_pos( struct LZ_encoder * const encoder, int n )
   if( --n >= 0 ) Mf_move_pos( encoder->matchfinder );
   while( --n >= 0 )
     {
-    Mf_longest_match_len( encoder->matchfinder, 0 );
+    Mf_get_match_pairs( encoder->matchfinder, 0 );
     Mf_move_pos( encoder->matchfinder );
     }
   }
@@ -657,15 +681,25 @@ static inline void LZe_backward( struct LZ_encoder * const encoder, int cur )
     {
     const int prev_index = encoder->trials[cur].prev_index;
     struct Trial * const prev_trial = &encoder->trials[prev_index];
+
+    if( encoder->trials[cur].prev_index2 != single_step_trial )
+      {
+      prev_trial->dis = -1;
+      prev_trial->prev_index = prev_index - 1;
+      prev_trial->prev_index2 = single_step_trial;
+      if( encoder->trials[cur].prev_index2 >= 0 )
+        {
+        struct Trial * const prev_trial2 = &encoder->trials[prev_index-1];
+        prev_trial2->dis = encoder->trials[cur].dis2;
+        prev_trial2->prev_index = encoder->trials[cur].prev_index2;
+        prev_trial2->prev_index2 = single_step_trial;
+        }
+      }
     prev_trial->price = cur - prev_index;			/* len */
     cur = *dis; *dis = prev_trial->dis; prev_trial->dis = cur;
     cur = prev_index;
     }
   }
 
-int LZe_sequence_optimizer( struct LZ_encoder * const encoder,
-                            const int reps[num_rep_distances],
-                            const State state );
-
 bool LZe_encode_member( struct LZ_encoder * const encoder,
-                        const long long member_size );
+                        const unsigned long long member_size );
diff --git a/main.c b/main.c
index 91215a9..aea4e18 100644
--- a/main.c
+++ b/main.c
@@ -1,5 +1,5 @@
 /*  Clzip - Data compressor based on the LZMA algorithm
-    Copyright (C) 2010, 2011, 2012 Antonio Diaz Diaz.
+    Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
 
     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -39,6 +39,7 @@
 #include <io.h>
 #define fchmod(x,y) 0
 #define fchown(x,y,z) 0
+#define strtoull strtoul
 #define SIGHUP SIGTERM
 #define S_ISSOCK(x) 0
 #define S_IRGRP 0
@@ -59,22 +60,10 @@
 #error "Environments where CHAR_BIT != 8 are not supported."
 #endif
 
-#ifndef LLONG_MAX
-#define LLONG_MAX  0x7FFFFFFFFFFFFFFFLL
-#endif
-#ifndef LLONG_MIN
-#define LLONG_MIN  (-LLONG_MAX - 1LL)
-#endif
-#ifndef ULLONG_MAX
-#define ULLONG_MAX 0xFFFFFFFFFFFFFFFFULL
-#endif
-
-long long int llabs( long long int number );
-
 
 const char * const Program_name = "Clzip";
 const char * const program_name = "clzip";
-const char * const program_year = "2012";
+const char * const program_year = "2013";
 const char * invocation_name = 0;
 
 #ifdef O_BINARY
@@ -95,6 +84,8 @@ struct Lzma_options
   };
 
 enum Mode { m_compress, m_decompress, m_test };
+const unsigned long long max_member_size = 0x1000000000000000ULL;
+const unsigned long long max_volume_size = 0x7FFFFFFFFFFFFFFFULL;
 
 char * output_filename = 0;
 int outfd = -1;
@@ -105,21 +96,7 @@ mode_t outfd_mode = S_IRUSR | S_IWUSR;
 bool delete_output_on_interrupt = false;
 
 
-/* assure at least a minimum size for buffer 'buf' */
-static void * resize_buffer( void * buf, const int min_size )
-  {
-  if( buf ) buf = realloc( buf, min_size );
-  else buf = malloc( min_size );
-  if( !buf )
-    {
-    show_error( "Not enough memory.", 0, false );
-    cleanup_and_fail( 1 );
-    }
-  return buf;
-  }
-
-
-static void show_help()
+static void show_help( void )
   {
   printf( "%s - Data compressor based on the LZMA algorithm.\n", Program_name );
   printf( "\nUsage: %s [options] [files]\n", invocation_name );
@@ -155,7 +132,7 @@ static void show_help()
   }
 
 
-static void show_version()
+static void show_version( void )
   {
   printf( "%s %s\n", Program_name, PROGVERSION );
   printf( "Copyright (C) %s Antonio Diaz Diaz.\n", program_year );
@@ -165,31 +142,32 @@ static void show_version()
   }
 
 
-static const char * format_num( long long num )
+void show_header( const File_header header )
   {
   const char * const prefix[8] =
     { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" };
-  enum { buf_size = 16, factor = 1024 };
-  static char buf[buf_size];
-  const char *p = "";
+  enum { factor = 1024 };
+  const char * p = "";
+  const char * np = "  ";
+  unsigned num = Fh_get_dictionary_size( header ), i;
   bool exact = ( num % factor == 0 );
-  int i;
 
-  for( i = 0; i < 8 && ( llabs( num ) > 9999 ||
-                         ( exact && llabs( num ) >= factor ) ); ++i )
-    { num /= factor; if( num % factor != 0 ) exact = false; p = prefix[i]; }
-  snprintf( buf, buf_size, "%lld %s", num, p );
-  return buf;
+  for( i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i )
+    { num /= factor; if( num % factor != 0 ) exact = false;
+      p = prefix[i]; np = ""; }
+  fprintf( stderr, "version %d, dictionary size %s%4u %sB.  ",
+           Fh_version( header ), np, num, p );
   }
 
 
-static long long getnum( const char * const ptr,
-                         const long long llimit, const long long ulimit )
+static unsigned long long getnum( const char * const ptr,
+                                  const unsigned long long llimit,
+                                  const unsigned long long ulimit )
   {
-  long long result;
-  char *tail;
+  unsigned long long result;
+  char * tail;
   errno = 0;
-  result = strtoll( ptr, &tail, 0 );
+  result = strtoull( ptr, &tail, 0 );
   if( tail == ptr )
     {
     show_error( "Bad or missing numerical argument.", 0, true );
@@ -199,8 +177,7 @@ static long long getnum( const char * const ptr,
   if( !errno && tail[0] )
     {
     int factor = ( tail[1] == 'i' ) ? 1024 : 1000;
-    int exponent = 0;
-    int i;
+    int exponent = 0, i;
     bool bad_multiplier = false;
     switch( tail[0] )
       {
@@ -225,7 +202,7 @@ static long long getnum( const char * const ptr,
       }
     for( i = 0; i < exponent; ++i )
       {
-      if( LLONG_MAX / factor >= llabs( result ) ) result *= factor;
+      if( ulimit / factor >= result ) result *= factor;
       else { errno = ERANGE; break; }
       }
     }
@@ -241,7 +218,7 @@ static long long getnum( const char * const ptr,
 
 static int get_dict_size( const char * const arg )
   {
-  char *tail;
+  char * tail;
   int bits = strtol( arg, &tail, 0 );
   if( bits >= min_dictionary_bits &&
       bits <= max_dictionary_bits && *tail == 0 )
@@ -250,6 +227,20 @@ static int get_dict_size( const char * const arg )
   }
 
 
+static int extension_index( const char * const name )
+  {
+  int i;
+  for( i = 0; known_extensions[i].from; ++i )
+    {
+    const char * const ext = known_extensions[i].from;
+    if( strlen( name ) > strlen( ext ) &&
+        strncmp( name + strlen( name ) - strlen( ext ), ext, strlen( ext ) ) == 0 )
+      return i;
+    }
+  return -1;
+  }
+
+
 static int open_instream( const char * const name, struct stat * const in_statsp,
                           const enum Mode program_mode, const int eindex,
                           const bool recompress, const bool to_stdout )
@@ -293,6 +284,20 @@ static int open_instream( const char * const name, struct stat * const in_statsp
   }
 
 
+/* assure at least a minimum size for buffer 'buf' */
+static void * resize_buffer( void * buf, const int min_size )
+  {
+  if( buf ) buf = realloc( buf, min_size );
+  else buf = malloc( min_size );
+  if( !buf )
+    {
+    show_error( "Not enough memory.", 0, false );
+    cleanup_and_fail( 1 );
+    }
+  return buf;
+  }
+
+
 static void set_c_outname( const char * const name, const bool multifile )
   {
   output_filename = resize_buffer( output_filename, strlen( name ) + 5 +
@@ -303,20 +308,6 @@ static void set_c_outname( const char * const name, const bool multifile )
   }
 
 
-static int extension_index( const char * const name )
-  {
-  int i;
-  for( i = 0; known_extensions[i].from; ++i )
-    {
-    const char * const ext = known_extensions[i].from;
-    if( strlen( name ) > strlen( ext ) &&
-        strncmp( name + strlen( name ) - strlen( ext ), ext, strlen( ext ) ) == 0 )
-      return i;
-    }
-  return -1;
-  }
-
-
 static void set_d_outname( const char * const name, const int i )
   {
   if( i >= 0 )
@@ -419,9 +410,9 @@ static void close_and_set_permissions( const struct stat * const in_statsp )
   }
 
 
-static bool next_filename()
+static bool next_filename( void )
   {
-  const unsigned int len = strlen( known_extensions[0].from );
+  const unsigned len = strlen( known_extensions[0].from );
   int i, j;
 
   if( strlen( output_filename ) >= len + 5 )		/* "*00001.lz" */
@@ -434,18 +425,19 @@ static bool next_filename()
   }
 
 
-static int compress( const long long member_size, const long long volume_size,
+static int compress( const unsigned long long member_size,
+                     const unsigned long long volume_size,
                      const struct Lzma_options * const encoder_options,
                      const int infd, struct Pretty_print * const pp,
                      const struct stat * const in_statsp )
   {
-  long long in_size = 0, out_size = 0, partial_volume_size = 0;
+  unsigned long long in_size = 0, out_size = 0, partial_volume_size = 0;
   int retval = 0;
-  File_header header;
   struct Matchfinder matchfinder;
+  File_header header;
+  Fh_set_magic( header );
 
   if( verbosity >= 1 ) Pp_show_msg( pp, 0 );
-  Fh_set_magic( header );
   if( !Fh_set_dictionary_size( header, encoder_options->dictionary_size ) ||
       encoder_options->match_len_limit < min_match_len_limit ||
       encoder_options->match_len_limit > max_match_len )
@@ -459,11 +451,11 @@ static int compress( const long long member_size, const long long volume_size,
     }
   Fh_set_dictionary_size( header, matchfinder.dictionary_size );
 
-  while( true )		/* encode one member per iteration */
+  while( true )			/* encode one member per iteration */
     {
     struct LZ_encoder encoder;
-    const long long size =
-      min( member_size, volume_size - partial_volume_size );
+    const unsigned long long size = ( ( volume_size > 0 ) ?
+      min( member_size, volume_size - partial_volume_size ) : member_size );
     if( !LZe_init( &encoder, &matchfinder, header, outfd ) )
       {
       show_error( "Not enough memory. Try a smaller dictionary size.", 0, false );
@@ -473,19 +465,22 @@ static int compress( const long long member_size, const long long volume_size,
       { Pp_show_msg( pp, "Encoder error" ); retval = 1; break; }
     in_size += Mf_data_position( &matchfinder );
     out_size += Re_member_position( &encoder.range_encoder );
-    partial_volume_size += Re_member_position( &encoder.range_encoder );
     LZe_free( &encoder );
     if( Mf_finished( &matchfinder ) ) break;
-    if( partial_volume_size >= volume_size - min_dictionary_size )
+    if( volume_size > 0 )
       {
-      partial_volume_size = 0;
-      if( delete_output_on_interrupt )
+      partial_volume_size += Re_member_position( &encoder.range_encoder );
+      if( partial_volume_size >= volume_size - min_dictionary_size )
         {
-        close_and_set_permissions( in_statsp );
-        if( !next_filename() )
-          { Pp_show_msg( pp, "Too many volume files" ); retval = 1; break; }
-        if( !open_outstream( true ) ) { retval = 1; break; }
-        delete_output_on_interrupt = true;
+        partial_volume_size = 0;
+        if( delete_output_on_interrupt )
+          {
+          close_and_set_permissions( in_statsp );
+          if( !next_filename() )
+            { Pp_show_msg( pp, "Too many volume files" ); retval = 1; break; }
+          if( !open_outstream( true ) ) { retval = 1; break; }
+          delete_output_on_interrupt = true;
+          }
         }
       }
     Mf_reset( &matchfinder );
@@ -493,11 +488,11 @@ static int compress( const long long member_size, const long long volume_size,
 
   if( retval == 0 && verbosity >= 1 )
     {
-    if( in_size <= 0 || out_size <= 0 )
+    if( in_size == 0 || out_size == 0 )
       fprintf( stderr, " no data compressed.\n" );
     else
       fprintf( stderr, "%6.3f:1, %6.3f bits/byte, "
-                       "%5.2f%% saved, %lld in, %lld out.\n",
+                       "%5.2f%% saved, %llu in, %llu out.\n",
                (double)in_size / out_size,
                ( 8.0 * out_size ) / in_size,
                100.0 * ( 1.0 - ( (double)out_size / in_size ) ),
@@ -511,9 +506,9 @@ static int compress( const long long member_size, const long long volume_size,
 static int decompress( const int infd, struct Pretty_print * const pp,
                        const bool testing )
   {
-  long long partial_file_pos = 0;
+  unsigned long long partial_file_pos = 0;
   struct Range_decoder rdec;
-  int retval = 0, result;
+  int retval = 0;
   bool first_member;
   if( !Rd_init( &rdec, infd ) )
     {
@@ -521,13 +516,14 @@ static int decompress( const int infd, struct Pretty_print * const pp,
     cleanup_and_fail( 1 );
     }
 
-  for( first_member = true; ; first_member = false, Pp_reset( pp ) )
+  for( first_member = true; ; first_member = false )
     {
+    int result;
     File_header header;
     struct LZ_decoder decoder;
     Rd_reset_member_position( &rdec );
     Rd_read_data( &rdec, header, Fh_size );
-    if( Rd_finished( &rdec ) )		/* End Of File */
+    if( Rd_finished( &rdec ) )			/* End Of File */
       {
       if( first_member )
         { Pp_show_msg( pp, "Error reading member header" ); retval = 1; }
@@ -553,13 +549,7 @@ static int decompress( const int infd, struct Pretty_print * const pp,
         retval = 2; break; }
 
     if( verbosity >= 2 || ( verbosity == 1 && first_member ) )
-      {
-      Pp_show_msg( pp, 0 );
-      if( verbosity >= 2 )
-        fprintf( stderr, "version %d, dictionary size %7sB.  ",
-                 Fh_version( header ),
-                 format_num( Fh_get_dictionary_size( header ) ) );
-      }
+      { Pp_show_msg( pp, 0 ); if( verbosity >= 2 ) show_header( header ); }
 
     if( !LZd_init( &decoder, header, &rdec, outfd ) )
       {
@@ -575,16 +565,16 @@ static int decompress( const int infd, struct Pretty_print * const pp,
         {
         Pp_show_msg( pp, 0 );
         if( result == 2 )
-          fprintf( stderr, "File ends unexpectedly at pos %lld\n",
+          fprintf( stderr, "File ends unexpectedly at pos %llu\n",
                    partial_file_pos );
         else
-          fprintf( stderr, "Decoder error at pos %lld\n", partial_file_pos );
+          fprintf( stderr, "Decoder error at pos %llu\n", partial_file_pos );
         }
       retval = 2; break;
       }
     if( verbosity >= 2 )
       { if( testing ) fprintf( stderr, "ok\n" );
-        else fprintf( stderr, "done\n" ); }
+        else fprintf( stderr, "done\n" ); Pp_reset( pp ); }
     }
   Rd_free( &rdec );
   if( verbosity == 1 && retval == 0 )
@@ -596,13 +586,13 @@ static int decompress( const int infd, struct Pretty_print * const pp,
 
 void signal_handler( int sig )
   {
-  sig = 0;			/* keep compiler happy */
+  if( sig ) {}				/* keep compiler happy */
   show_error( "Control-C or similar caught, quitting.", 0, false );
   cleanup_and_fail( 1 );
   }
 
 
-static void set_signals()
+static void set_signals( void )
   {
   signal( SIGHUP, signal_handler );
   signal( SIGINT, signal_handler );
@@ -613,7 +603,7 @@ static void set_signals()
 void Pp_init( struct Pretty_print * const pp, const char * const filenames[],
               const int num_filenames, const int v )
   {
-  unsigned int stdin_name_len;
+  unsigned stdin_name_len;
   int i;
   pp->name = 0;
   pp->stdin_name = "(stdin)";
@@ -625,31 +615,13 @@ void Pp_init( struct Pretty_print * const pp, const char * const filenames[],
   for( i = 0; i < num_filenames; ++i )
     {
     const char * const s = filenames[i];
-    const int len = ( !strcmp( s, "-" ) ? stdin_name_len : strlen( s ) );
+    const int len = ( (strcmp( s, "-" ) == 0) ? stdin_name_len : strlen( s ) );
     if( len > pp->longest_name ) pp->longest_name = len;
     }
   if( pp->longest_name == 0 ) pp->longest_name = stdin_name_len;
   }
 
 
-void Pp_show_msg( struct Pretty_print * const pp, const char * const msg )
-  {
-  if( verbosity >= 0 )
-    {
-    if( pp->first_post )
-      {
-      int i, len;
-      pp->first_post = false;
-      fprintf( stderr, "  %s: ", pp->name );
-      len = pp->longest_name - strlen( pp->name );
-      for( i = 0; i < len; ++i ) fprintf( stderr, " " );
-      if( !msg ) fflush( stderr );
-      }
-    if( msg ) fprintf( stderr, "%s.\n", msg );
-    }
-  }
-
-
 void show_error( const char * const msg, const int errcode, const bool help )
   {
   if( verbosity >= 0 )
@@ -660,7 +632,7 @@ void show_error( const char * const msg, const int errcode, const bool help )
       if( errcode > 0 ) fprintf( stderr, ": %s", strerror( errcode ) );
       fprintf( stderr, "\n" );
       }
-    if( help && invocation_name && invocation_name[0] )
+    if( help )
       fprintf( stderr, "Try '%s --help' for more information.\n",
                invocation_name );
     }
@@ -692,8 +664,8 @@ int main( const int argc, const char * const argv[] )
     { 3 << 23, 132 },		/* -8 */
     { 1 << 25, 273 } };		/* -9 */
   struct Lzma_options encoder_options = option_mapping[6];  /* default = "-6" */
-  long long member_size = LLONG_MAX;
-  long long volume_size = LLONG_MAX;
+  unsigned long long member_size = max_member_size;
+  unsigned long long volume_size = 0;
   const char * input_filename = "";
   const char * default_output_filename = "";
   const char ** filenames = 0;
@@ -753,14 +725,14 @@ int main( const int argc, const char * const argv[] )
     {
     const int code = ap_code( &parser, argind );
     const char * const arg = ap_argument( &parser, argind );
-    if( !code ) break;				/* no more options */
+    if( !code ) break;					/* no more options */
     switch( code )
       {
       case '0':
       case '1': case '2': case '3': case '4':
       case '5': case '6': case '7': case '8': case '9':
                 encoder_options = option_mapping[code-'0']; break;
-      case 'b': member_size = getnum( arg, 100000, LLONG_MAX / 2 ); break;
+      case 'b': member_size = getnum( arg, 100000, max_member_size ); break;
       case 'c': to_stdout = true; break;
       case 'd': program_mode = m_decompress; break;
       case 'f': force = true; break;
@@ -773,7 +745,7 @@ int main( const int argc, const char * const argv[] )
       case 'q': verbosity = -1; break;
       case 's': encoder_options.dictionary_size = get_dict_size( arg );
                 break;
-      case 'S': volume_size = getnum( arg, 100000, LLONG_MAX / 2 ); break;
+      case 'S': volume_size = getnum( arg, 100000, max_volume_size ); break;
       case 't': program_mode = m_test; break;
       case 'v': if( verbosity < 4 ) ++verbosity; break;
       case 'V': show_version(); return 0;
@@ -782,8 +754,8 @@ int main( const int argc, const char * const argv[] )
     } /* end process options */
 
 #if defined(__MSVCRT__) || defined(__OS2__)
-  _fsetmode( stdin, "b" );
-  _fsetmode( stdout, "b" );
+  setmode( STDIN_FILENO, O_BINARY );
+  setmode( STDOUT_FILENO, O_BINARY );
 #endif
 
   if( program_mode == m_test )
@@ -794,21 +766,16 @@ int main( const int argc, const char * const argv[] )
     Prob_prices_init();
     }
 
-  for( ; argind < ap_arguments( &parser ); ++argind )
-    {
-    if( strcmp( ap_argument( &parser, argind ), "-" ) )
-      filenames_given = true;
-    ++num_filenames;
-    filenames = resize_buffer( filenames, num_filenames * sizeof (char *) );
-    filenames[num_filenames-1] = ap_argument( &parser, argind );
-    }
+  num_filenames = max( 1, ap_arguments( &parser ) - argind );
+  filenames = resize_buffer( filenames, num_filenames * sizeof filenames[0] );
+  filenames[0] = "-";
 
-  if( num_filenames == 0 )
+  for( i = 0; argind + i < ap_arguments( &parser ); ++i )
     {
-    ++num_filenames;
-    filenames = resize_buffer( filenames, sizeof (char *) );
-    filenames[num_filenames-1] = "-";
+    filenames[i] = ap_argument( &parser, argind + i );
+    if( strcmp( filenames[i], "-" ) != 0 ) filenames_given = true;
     }
+
   if( !to_stdout && program_mode != m_test &&
       ( filenames_given || default_output_filename[0] ) )
     set_signals();
@@ -823,7 +790,7 @@ int main( const int argc, const char * const argv[] )
     const struct stat * in_statsp;
     output_filename[0] = 0;
 
-    if( !filenames[i][0] || !strcmp( filenames[i], "-" ) )
+    if( !filenames[i][0] || strcmp( filenames[i], "-" ) == 0 )
       {
       input_filename = "";
       infd = STDIN_FILENO;
@@ -834,7 +801,7 @@ int main( const int argc, const char * const argv[] )
         else
           {
           if( program_mode == m_compress )
-            set_c_outname( default_output_filename, volume_size != LLONG_MAX );
+            set_c_outname( default_output_filename, volume_size > 0 );
           else
             {
             output_filename = resize_buffer( output_filename,
@@ -844,7 +811,7 @@ int main( const int argc, const char * const argv[] )
           outfd_mode = all_rw;
           if( !open_outstream( force ) )
             {
-            if( outfd == -1 && retval < 1 ) retval = 1;
+            if( retval < 1 ) retval = 1;
             close( infd ); infd = -1;
             continue;
             }
@@ -864,12 +831,12 @@ int main( const int argc, const char * const argv[] )
         else
           {
           if( program_mode == m_compress )
-            set_c_outname( input_filename, volume_size != LLONG_MAX );
+            set_c_outname( input_filename, volume_size > 0 );
           else set_d_outname( input_filename, eindex );
           outfd_mode = usr_rw;
           if( !open_outstream( force ) )
             {
-            if( outfd == -1 && retval < 1 ) retval = 1;
+            if( retval < 1 ) retval = 1;
             close( infd ); infd = -1;
             continue;
             }
diff --git a/testsuite/check.sh b/testsuite/check.sh
index 9d07625..ed0ca50 100755
--- a/testsuite/check.sh
+++ b/testsuite/check.sh
@@ -1,6 +1,6 @@
 #! /bin/sh
 # check script for Clzip - Data compressor based on the LZMA algorithm
-# Copyright (C) 2010, 2011, 2012 Antonio Diaz Diaz.
+# Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
 #
 # This script is free software: you have unlimited permission
 # to copy, distribute and modify it.
@@ -26,31 +26,27 @@ fail=0
 
 printf "testing clzip-%s..." "$2"
 
-"${LZIP}" -t "${testdir}"/test_v0.lz || fail=1
-printf .
-"${LZIP}" -cd "${testdir}"/test_v0.lz > copy || fail=1
-cmp in copy || fail=1
-printf .
-
-"${LZIP}" -t "${testdir}"/test_v1.lz || fail=1
-printf .
-"${LZIP}" -cd "${testdir}"/test_v1.lz > copy || fail=1
-cmp in copy || fail=1
-printf .
-
-"${LZIP}" -t "${testdir}"/test_sync.lz || fail=1
-printf .
-"${LZIP}" -cd "${testdir}"/test_sync.lz > copy || fail=1
+"${LZIP}" -t "${testdir}"/test.txt.lz || fail=1
+"${LZIP}" -cd "${testdir}"/test.txt.lz > copy || fail=1
 cmp in copy || fail=1
 printf .
 
-"${LZIP}" -cfq "${testdir}"/test_v1.lz > out
+"${LZIP}" -cfq "${testdir}"/test.txt.lz > out
 if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi
-"${LZIP}" -cF "${testdir}"/test_v1.lz > out || fail=1
+"${LZIP}" -cF "${testdir}"/test.txt.lz > out || fail=1
 "${LZIP}" -cd out | "${LZIP}" -d > copy || fail=1
 cmp in copy || fail=1
 printf .
 
+"${LZIP}" -cqs-1 in > out
+if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi
+"${LZIP}" -cqs0 in > out
+if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi
+"${LZIP}" -cqs4095 in > out
+if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi
+"${LZIP}" -cqm274 in > out
+if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi
+
 for i in s4Ki 0 1 2 3 4 5 6 7 8 9 ; do
 	"${LZIP}" -k -$i in || fail=1
 	mv -f in.lz copy.lz || fail=1
@@ -87,6 +83,12 @@ done
 cmp in anyothername.out || fail=1
 printf .
 
+cat in in > in2 || framework_failure
+"${LZIP}" < in2 > out2 || fail=1
+"${LZIP}" -d < out2 > copy2 || fail=1
+cmp in2 copy2 || fail=1
+printf .
+
 echo
 if [ ${fail} = 0 ] ; then
 	echo "tests completed successfully."
diff --git a/testsuite/test.txt.lz b/testsuite/test.txt.lz
new file mode 100644
index 0000000..4db881a
--- /dev/null
+++ b/testsuite/test.txt.lz
diff --git a/testsuite/test_sync.lz b/testsuite/test_sync.lz
deleted file mode 100644
index 419fa97..0000000
--- a/testsuite/test_sync.lz
+++ /dev/null
diff --git a/testsuite/test_v0.lz b/testsuite/test_v0.lz
deleted file mode 100644
index a09b1e8..0000000
--- a/testsuite/test_v0.lz
+++ /dev/null
diff --git a/testsuite/test_v1.lz b/testsuite/test_v1.lz
deleted file mode 100644
index f1c79eb..0000000
--- a/testsuite/test_v1.lz
+++ /dev/null