Adding upstream version 1.17.upstream/1.17

Signed-off-by: Daniel Baumann <mail@daniel-baumann.ch>
author: Daniel Baumann <mail@daniel-baumann.ch> 2015-11-07 10:08:36 +0000
committer: Daniel Baumann <mail@daniel-baumann.ch> 2015-11-07 10:08:36 +0000
commit: 62327bcaed81d8c02f11aec2c49f7d09c5edddb7 (patch)
tree: 7eb93beb78b7ee8ab3288c5b31a3f7c99c8f2572
parent: Adding upstream version 1.17~rc2. (diff)
download: lzip-62327bcaed81d8c02f11aec2c49f7d09c5edddb7.tar.xz
lzip-62327bcaed81d8c02f11aec2c49f7d09c5edddb7.zip
17 files changed, 643 insertions, 616 deletions
diff --git a/ChangeLog b/ChangeLog
index aa3faae..e3ebebe 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,17 +1,8 @@
-2015-05-25  Antonio Diaz Diaz  <antonio@gnu.org>
+2015-07-12  Antonio Diaz Diaz  <antonio@gnu.org>
 
-	* Version 1.17-rc2 released.
-	* lzip.texi: Added chapter 'Quality assurance'.
-
-2015-04-17  Antonio Diaz Diaz  <antonio@gnu.org>
-
-	* Version 1.17-rc1 released.
-	* main.cc (compress): Fixed spurious warning about uninitialized var.
-
-2015-03-26  Antonio Diaz Diaz  <antonio@gnu.org>
-
-	* Version 1.17-pre1 released.
+	* Version 1.17 released.
 	* Reorganization of the compression code.
+	* lzip.texi: Added chapter 'Quality assurance'.
 	* Makefile.in: Added new targets 'install*-compress'.
 
 2014-08-26  Antonio Diaz Diaz  <antonio@gnu.org>
diff --git a/README b/README
index 894b77a..8a31263 100644
--- a/README
+++ b/README
@@ -40,6 +40,13 @@ each file without exceeding the given limit. Keep in mind that the
 decompression memory requirement is affected at compression time by the
 choice of dictionary size limit.
 
+The amount of memory required for compression is about 1 or 2 times the
+dictionary size limit (1 if input file size is less than dictionary size
+limit, else 2) plus 9 times the dictionary size really used. The option
+'-0' is special and only requires about 1.5 MiB at most. The amount of
+memory required for decompression is about 46 kB larger than the
+dictionary size really used.
+
 When compressing, lzip replaces every file given in the command line
 with a compressed version of itself, with the name "original_name.lz".
 When decompressing, lzip attempts to guess the name for the decompressed
@@ -69,8 +76,8 @@ corresponding uncompressed files. Integrity testing of concatenated
 compressed files is also supported.
 
 Lzip can produce multi-member files and safely recover, with
-lziprecover, the undamaged members in case of file damage. Lzip can also
-split the compressed output in volumes of a given size, even when
+lziprecover, the undamaged members in case of file damage. Lzip can
+also split the compressed output in volumes of a given size, even when
 reading from standard input. This allows the direct creation of
 multivolume compressed tar archives.
 
@@ -88,7 +95,7 @@ used by lzip could be developed, and the resulting sequence could also
 be coded using the LZMA coding scheme.
 
 Lzip currently implements two variants of the LZMA algorithm; fast
-(used by option -0) and normal (used by all other compression levels).
+(used by option '-0') and normal (used by all other compression levels).
 
 The high compression of LZMA comes from combining two basic, well-proven
 compression ideas: sliding dictionaries (LZ77/78) and markov models (the
diff --git a/configure b/configure
index 2a09e4f..9845c11 100755
--- a/configure
+++ b/configure
@@ -6,7 +6,7 @@
 # to copy, distribute and modify it.
 
 pkgname=lzip
-pkgversion=1.17-rc2
+pkgversion=1.17
 progname=lzip
 srctrigger=doc/${pkgname}.texi
 
diff --git a/decoder.cc b/decoder.cc
index 5eb9221..113479a 100644
--- a/decoder.cc
+++ b/decoder.cc
@@ -43,7 +43,7 @@ void Pretty_print::operator()( const char * const msg ) const
       first_post = false;
       std::fprintf( stderr, "  %s: ", name_.c_str() );
       for( unsigned i = 0; i < longest_name - name_.size(); ++i )
-        std::fprintf( stderr, " " );
+        std::fputc( ' ', stderr );
       if( !msg ) std::fflush( stderr );
       }
     if( msg ) std::fprintf( stderr, "%s\n", msg );
@@ -62,7 +62,7 @@ int readblock( const int fd, uint8_t * const buf, const int size )
     {
     const int n = read( fd, buf + sz, size - sz );
     if( n > 0 ) sz += n;
-    else if( n == 0 ) break;				/* EOF */
+    else if( n == 0 ) break;				// EOF
     else if( errno != EINTR ) break;
     errno = 0;
     }
@@ -149,7 +149,7 @@ bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const
     if( verbosity >= 0 )
       {
       pp();
-      std::fprintf( stderr, "CRC mismatch; trailer says %08X, data CRC is %08X.\n",
+      std::fprintf( stderr, "CRC mismatch; trailer says %08X, data CRC is %08X\n",
                     trailer.data_crc(), crc() );
       }
     }
@@ -159,7 +159,7 @@ bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const
     if( verbosity >= 0 )
       {
       pp();
-      std::fprintf( stderr, "Data size mismatch; trailer says %llu, data size is %llu (0x%llX).\n",
+      std::fprintf( stderr, "Data size mismatch; trailer says %llu, data size is %llu (0x%llX)\n",
                     trailer.data_size(), data_position(), data_position() );
       }
     }
@@ -169,7 +169,7 @@ bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const
     if( verbosity >= 0 )
       {
       pp();
-      std::fprintf( stderr, "Member size mismatch; trailer says %llu, member size is %llu (0x%llX).\n",
+      std::fprintf( stderr, "Member size mismatch; trailer says %llu, member size is %llu (0x%llX)\n",
                     trailer.member_size(), member_size, member_size );
       }
     }
@@ -201,9 +201,9 @@ int LZ_decoder::decode_member( const Pretty_print & pp )
   Bit_model bm_align[dis_align_size];
   Len_model match_len_model;
   Len_model rep_len_model;
-  unsigned rep0 = 0;		/* rep[0-3] latest four distances */
-  unsigned rep1 = 0;		/* used for efficient coding of */
-  unsigned rep2 = 0;		/* repeated distances */
+  unsigned rep0 = 0;		// rep[0-3] latest four distances
+  unsigned rep1 = 0;		// used for efficient coding of
+  unsigned rep2 = 0;		// repeated distances
   unsigned rep3 = 0;
   State state;
 
@@ -226,7 +226,7 @@ int LZ_decoder::decode_member( const Pretty_print & pp )
                                        peek( rep0 ) ) );
         }
       }
-    else
+    else					// match or repeated match
       {
       int len;
       if( rdec.decode_bit( bm_rep[state()] ) != 0 )		// 2nd bit
@@ -255,7 +255,7 @@ int LZ_decoder::decode_member( const Pretty_print & pp )
         state.set_rep();
         len = min_match_len + rdec.decode_len( rep_len_model, pos_state );
         }
-      else
+      else					// match
         {
         const unsigned rep0_saved = rep0;
         len = min_match_len + rdec.decode_len( match_len_model, pos_state );
@@ -272,23 +272,23 @@ int LZ_decoder::decode_member( const Pretty_print & pp )
             {
             rep0 += rdec.decode( direct_bits - dis_align_bits ) << dis_align_bits;
             rep0 += rdec.decode_tree_reversed4( bm_align );
-            if( rep0 == 0xFFFFFFFFU )		/* marker found */
+            if( rep0 == 0xFFFFFFFFU )		// marker found
               {
               rep0 = rep0_saved;
               rdec.normalize();
               flush_data();
-              if( len == min_match_len )	/* End Of Stream marker */
+              if( len == min_match_len )	// End Of Stream marker
                 {
                 if( verify_trailer( pp ) ) return 0; else return 3;
                 }
-              if( len == min_match_len + 1 )	/* Sync Flush marker */
+              if( len == min_match_len + 1 )	// Sync Flush marker
                 {
                 rdec.load(); continue;
                 }
               if( verbosity >= 0 )
                 {
                 pp();
-                std::fprintf( stderr, "Unsupported marker code '%d'.\n", len );
+                std::fprintf( stderr, "Unsupported marker code '%d'\n", len );
                 }
               return 4;
               }
diff --git a/decoder.h b/decoder.h
index 9419669..98d42ce 100644
--- a/decoder.h
+++ b/decoder.h
@@ -19,12 +19,12 @@ class Range_decoder
   {
   enum { buffer_size = 16384 };
   unsigned long long partial_member_pos;
-  uint8_t * const buffer;	/* input buffer */
-  int pos;			/* current pos in buffer */
-  int stream_pos;		/* when reached, a new block must be read */
+  uint8_t * const buffer;	// input buffer
+  int pos;			// current pos in buffer
+  int stream_pos;		// when reached, a new block must be read
   uint32_t code;
   uint32_t range;
-  const int infd;		/* input file descriptor */
+  const int infd;		// input file descriptor
   bool at_stream_end;
 
   bool read_block();
@@ -213,11 +213,11 @@ class LZ_decoder
   Range_decoder & rdec;
   const unsigned dictionary_size;
   const int buffer_size;
-  uint8_t * const buffer;	/* output buffer */
-  int pos;			/* current pos in buffer */
-  int stream_pos;		/* first byte not yet written to file */
+  uint8_t * const buffer;	// output buffer
+  int pos;			// current pos in buffer
+  int stream_pos;		// first byte not yet written to file
   uint32_t crc_;
-  const int outfd;		/* output file descriptor */
+  const int outfd;		// output file descriptor
   const int member_version;
 
   void flush_data();
diff --git a/doc/lzip.1 b/doc/lzip.1
index 6b779f1..b6acca6 100644
--- a/doc/lzip.1
+++ b/doc/lzip.1
@@ -1,5 +1,5 @@
 .\" DO NOT MODIFY THIS FILE!  It was generated by help2man 1.46.1.
-.TH LZIP "1" "May 2015" "lzip 1.17-rc2" "User Commands"
+.TH LZIP "1" "July 2015" "lzip 1.17" "User Commands"
 .SH NAME
 lzip \- reduces the size of files
 .SH SYNOPSIS
@@ -28,7 +28,7 @@ decompress
 overwrite existing output files
 .TP
 \fB\-F\fR, \fB\-\-recompress\fR
-force recompression of compressed files
+force re\-compression of compressed files
 .TP
 \fB\-k\fR, \fB\-\-keep\fR
 keep (don't delete) input files
diff --git a/doc/lzip.info b/doc/lzip.info
index 6854503..f0aa011 100644
--- a/doc/lzip.info
+++ b/doc/lzip.info
@@ -11,16 +11,16 @@ File: lzip.info,  Node: Top,  Next: Introduction,  Up: (dir)
 Lzip Manual
 ***********
 
-This manual is for Lzip (version 1.17-rc2, 25 May 2015).
+This manual is for Lzip (version 1.17, 12 July 2015).
 
 * Menu:
 
 * Introduction::           Purpose and features of lzip
-* Algorithm::              How lzip compresses the data
 * Invoking lzip::          Command line interface
+* Quality assurance::      Design, development and testing of lzip
 * File format::            Detailed format of the compressed file
+* Algorithm::              How lzip compresses the data
 * Stream format::          Format of the LZMA stream in lzip files
-* Quality assurance::      Design, development and testing of lzip
 * Examples::               A small tutorial with examples
 * Problems::               Reporting bugs
 * Reference source code::  Source code illustrating stream format
@@ -33,7 +33,7 @@ This manual is for Lzip (version 1.17-rc2, 25 May 2015).
 copy, distribute and modify it.
 
 
-File: lzip.info,  Node: Introduction,  Next: Algorithm,  Prev: Top,  Up: Top
+File: lzip.info,  Node: Introduction,  Next: Invoking lzip,  Prev: Top,  Up: Top
 
 1 Introduction
 **************
@@ -51,7 +51,8 @@ availability:
      recovery means. The lziprecover program can repair bit-flip errors
      (one of the most common forms of data corruption) in lzip files,
      and provides data recovery capabilities, including error-checked
-     merging of damaged copies of a file.
+     merging of damaged copies of a file.  *note Data safety:
+     (lziprecover)Data safety.
 
    * The lzip format is as simple as possible (but not simpler). The
      lzip manual provides the code of a simple decompressor along with
@@ -85,6 +86,11 @@ which makes it safer than compressors returning ambiguous warning
 values (like gzip) when it is used as a back end for other programs
 like tar or zutils.
 
+   Lzip will automatically use the smallest possible dictionary size for
+each file without exceeding the given limit. Keep in mind that the
+decompression memory requirement is affected at compression time by the
+choice of dictionary size limit.
+
    The amount of memory required for compression is about 1 or 2 times
 the dictionary size limit (1 if input file size is less than dictionary
 size limit, else 2) plus 9 times the dictionary size really used. The
@@ -92,11 +98,6 @@ option '-0' is special and only requires about 1.5 MiB at most. The
 amount of memory required for decompression is about 46 kB larger than
 the dictionary size really used.
 
-   Lzip will automatically use the smallest possible dictionary size for
-each file without exceeding the given limit. Keep in mind that the
-decompression memory requirement is affected at compression time by the
-choice of dictionary size limit.
-
    When compressing, lzip replaces every file given in the command line
 with a compressed version of itself, with the name "original_name.lz".
 When decompressing, lzip attempts to guess the name for the decompressed
@@ -126,8 +127,8 @@ corresponding uncompressed files. Integrity testing of concatenated
 compressed files is also supported.
 
    Lzip can produce multi-member files and safely recover, with
-lziprecover, the undamaged members in case of file damage. Lzip can also
-split the compressed output in volumes of a given size, even when
+lziprecover, the undamaged members in case of file damage. Lzip can
+also split the compressed output in volumes of a given size, even when
 reading from standard input. This allows the direct creation of
 multivolume compressed tar archives.
 
@@ -136,75 +137,9 @@ automatically creating multi-member output. The members so created are
 large, about 2 PiB each.
 
 
-File: lzip.info,  Node: Algorithm,  Next: Invoking lzip,  Prev: Introduction,  Up: Top
-
-2 Algorithm
-***********
-
-In spite of its name (Lempel-Ziv-Markov chain-Algorithm), LZMA is not a
-concrete algorithm; it is more like "any algorithm using the LZMA coding
-scheme". For example, the option '-0' of lzip uses the scheme in almost
-the simplest way possible; issuing the longest match it can find, or a
-literal byte if it can't find a match. Inversely, a much more elaborated
-way of finding coding sequences of minimum size than the one currently
-used by lzip could be developed, and the resulting sequence could also
-be coded using the LZMA coding scheme.
-
-   Lzip currently implements two variants of the LZMA algorithm; fast
-(used by option -0) and normal (used by all other compression levels).
-
-   The high compression of LZMA comes from combining two basic,
-well-proven compression ideas: sliding dictionaries (LZ77/78) and
-markov models (the thing used by every compression algorithm that uses
-a range encoder or similar order-0 entropy coder as its last stage)
-with segregation of contexts according to what the bits are used for.
-
-   Lzip is a two stage compressor. The first stage is a Lempel-Ziv
-coder, which reduces redundancy by translating chunks of data to their
-corresponding distance-length pairs. The second stage is a range encoder
-that uses a different probability model for each type of data;
-distances, lengths, literal bytes, etc.
-
-   Here is how it works, step by step:
-
-   1) The member header is written to the output stream.
-
-   2) The first byte is coded literally, because there are no previous
-bytes to which the match finder can refer to.
-
-   3) The main encoder advances to the next byte in the input data and
-calls the match finder.
-
-   4) The match finder fills an array with the minimum distances before
-the current byte where a match of a given length can be found.
-
-   5) Go back to step 3 until a sequence (formed of pairs, repeated
-distances and literal bytes) of minimum price has been formed. Where the
-price represents the number of output bits produced.
-
-   6) The range encoder encodes the sequence produced by the main
-encoder and sends the produced bytes to the output stream.
-
-   7) Go back to step 3 until the input data are finished or until the
-member or volume size limits are reached.
-
-   8) The range encoder is flushed.
-
-   9) The member trailer is written to the output stream.
-
-   10) If there are more data to compress, go back to step 1.
-
-
-The ideas embodied in lzip are due to (at least) the following people:
-Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrey Markov (for
-the definition of Markov chains), G.N.N. Martin (for the definition of
-range encoding), Igor Pavlov (for putting all the above together in
-LZMA), and Julian Seward (for bzip2's CLI).
-
-
-File: lzip.info,  Node: Invoking lzip,  Next: File format,  Prev: Algorithm,  Up: Top
+File: lzip.info,  Node: Invoking lzip,  Next: Quality assurance,  Prev: Introduction,  Up: Top
 
-3 Invoking lzip
+2 Invoking lzip
 ***************
 
 The format for running lzip is:
@@ -244,7 +179,7 @@ The format for running lzip is:
 
 '-F'
 '--recompress'
-     Force recompression of files whose name already has the '.lz' or
+     Force re-compression of files whose name already has the '.lz' or
      '.tlz' suffix.
 
 '-k'
@@ -362,7 +297,155 @@ invalid input file, 3 for an internal consistency error (eg, bug) which
 caused lzip to panic.
 
 
-File: lzip.info,  Node: File format,  Next: Stream format,  Prev: Invoking lzip,  Up: Top
+File: lzip.info,  Node: Quality assurance,  Next: File format,  Prev: Invoking lzip,  Up: Top
+
+3 Design, development and testing of lzip
+*****************************************
+
+There are two ways of constructing a software design. One way is to make
+it so simple that there are obviously no deficiencies and the other is
+to make it so complicated that there are no obvious deficiencies.
+-- C.A.R. Hoare
+
+   Lzip has been designed, written and tested with great care to be the
+standard general-purpose compressor for unix-like systems. This chapter
+describes the lessons learned from previous compressors (gzip and
+bzip2), and their application to the design of lzip.
+
+
+3.1 Format design
+=================
+
+When gzip was designed in 1992, computers and operating systems were
+much less capable than they are today. Gzip tried to work around some of
+those limitations, like 8.3 file names, with additional fields in its
+file format.
+
+   Today those limitations have mostly disappeared, and the format of
+gzip has proved to be unnecessarily complicated. It includes fields
+that were never used, others that have lost its usefulness, and finally
+others that have become too limited.
+
+   Bzip2 was designed 5 years later, and its format is simpler than the
+one of gzip.
+
+   Probably the worst defect of the gzip format from the point of view
+of data safety is the variable size of its header. If the byte at
+offset 3 (flags) of a gzip member gets corrupted, it mat become very
+difficult to recover the data, even if the compressed blocks are
+intact, because it can't be known with certainty where the compressed
+blocks begin.
+
+   By contrast, the header of a lzip member has a fixed length of 6. The
+lzma stream in a lzip member always starts at offset 6, making it
+trivial to recover the data even if the whole header becomes corrupt.
+
+   Bzip2 also provides a header of fixed length and marks the begin and
+end of each compressed block with six magic bytes, making it possible to
+find the compressed blocks even in case of file damage. But bzip2 does
+not store the size of each compressed block, as lzip does.
+
+   Lzip provides better data recovery capabilities than any other
+gzip-like compressor because its format has been designed from the
+beginning to be simple and safe. It would be very difficult to write an
+automatic recovery tool like lziprecover for the gzip format. And, as
+far as I know, it has never been writen.
+
+   The lzip format is designed for long-term archiving. Therefore it
+excludes any unneeded features that may interfere with the future
+extraction of the uncompressed data.
+
+
+3.1.1 Gzip format (mis)features not present in lzip
+---------------------------------------------------
+
+'Multiple algorithms'
+     Gzip provides a CM (Compression Method) field that has never been
+     used because it is a bad idea to begin with. New compression
+     methods may require additional fields, making it impossible to
+     implement new methods and, at the same time, keep the same format.
+     This field does not solve the problem of format proliferation; it
+     just makes the problem less obvious.
+
+'Optional fields in header'
+     Unless special precautions are taken, optional fields are
+     generally a bad idea because they produce a header of variable
+     size. The gzip header has 2 fields that, in addition to being
+     optional, are zero-terminated.  This means that if any byte inside
+     the field gets zeroed, or if the terminating zero gets altered,
+     gzip won't be able to find neither the header CRC nor the
+     compressed blocks.
+
+'Optional CRC for the header'
+     Using an optional checksum for the header is not only a bad idea,
+     it is an error; it may prevent the extraction of perfectly good
+     data. For example, if the checksum is used and the bit enabling it
+     is reset by a bit-flip, the header will appear to be intact (in
+     spite of being corrupt) while the compressed blocks will appear to
+     be totally unrecoverable (in spite of being intact). Very
+     misleading indeed.
+
+
+3.1.2 Lzip format improvements over gzip and bzip2
+--------------------------------------------------
+
+'64-bit size field'
+     Probably the most frequently reported shortcoming of the gzip
+     format is that it only stores the least significant 32 bits of the
+     uncompressed size. The size of any file larger than 4 GiB gets
+     truncated.
+
+     Bzip2 does not store the uncompressed size of the file.
+
+     The lzip format provides a 64-bit field for the uncompressed size.
+     Additionaly, lzip produces multi-member output automatically when
+     the size is too large for a single member, allowing for an
+     unlimited uncompressed size.
+
+'Distributed index'
+     The lzip format provides a distributed index that, among other
+     things, helps plzip to decompress several times faster than pigz
+     and helps lziprecover do its job. Neither the gzip format nor the
+     bzip2 format do provide an index.
+
+     A distributed index is safer and more scalable than a monolithic
+     index.  The monolithic index introduces a single point of failure
+     in the compressed file and may limit the number of members or the
+     total uncompressed size.
+
+
+3.2 Quality of implementation
+=============================
+
+'Multiple implementations'
+     Just like the lzip format provides 4 factor protection against
+     undetected data corruption, the development methodology of the lzip
+     family of compressors provides 3 factor protection against
+     undetected programming errors.
+
+     Three related but independent compressor implementations, lzip,
+     clzip and minilzip/lzlib, are developed concurrently. Every stable
+     release of any of them is subjected to a hundred hours of
+     intensive testing to verify that it produces identical output to
+     the other two. This guarantees that all three implement the same
+     algorithm, and makes it unlikely that any of them may contain
+     serious undiscovered errors. In fact, no errors have been
+     discovered in lzip since 2009.
+
+'Dictionary size'
+     Lzip automatically uses the smallest possible dictionary size for
+     each file. In addition to reducing the amount of memory required
+     for decompression, this feature also minimizes the probability of
+     being affected by RAM errors during compression.
+
+'Exit status'
+     Returning a warning status of 2 is a design flaw of compress that
+     leaked into the design of gzip. Both bzip2 and lzip are free from
+     this flaw.
+
+
+
+File: lzip.info,  Node: File format,  Next: Algorithm,  Prev: Quality assurance,  Up: Top
 
 4 File format
 *************
@@ -433,9 +516,75 @@ additional information before, between, or after them.
 
 
 
-File: lzip.info,  Node: Stream format,  Next: Quality assurance,  Prev: File format,  Up: Top
+File: lzip.info,  Node: Algorithm,  Next: Stream format,  Prev: File format,  Up: Top
+
+5 Algorithm
+***********
+
+In spite of its name (Lempel-Ziv-Markov chain-Algorithm), LZMA is not a
+concrete algorithm; it is more like "any algorithm using the LZMA coding
+scheme". For example, the option '-0' of lzip uses the scheme in almost
+the simplest way possible; issuing the longest match it can find, or a
+literal byte if it can't find a match. Inversely, a much more elaborated
+way of finding coding sequences of minimum size than the one currently
+used by lzip could be developed, and the resulting sequence could also
+be coded using the LZMA coding scheme.
+
+   Lzip currently implements two variants of the LZMA algorithm; fast
+(used by option '-0') and normal (used by all other compression levels).
+
+   The high compression of LZMA comes from combining two basic,
+well-proven compression ideas: sliding dictionaries (LZ77/78) and
+markov models (the thing used by every compression algorithm that uses
+a range encoder or similar order-0 entropy coder as its last stage)
+with segregation of contexts according to what the bits are used for.
+
+   Lzip is a two stage compressor. The first stage is a Lempel-Ziv
+coder, which reduces redundancy by translating chunks of data to their
+corresponding distance-length pairs. The second stage is a range encoder
+that uses a different probability model for each type of data;
+distances, lengths, literal bytes, etc.
+
+   Here is how it works, step by step:
+
+   1) The member header is written to the output stream.
+
+   2) The first byte is coded literally, because there are no previous
+bytes to which the match finder can refer to.
+
+   3) The main encoder advances to the next byte in the input data and
+calls the match finder.
+
+   4) The match finder fills an array with the minimum distances before
+the current byte where a match of a given length can be found.
+
+   5) Go back to step 3 until a sequence (formed of pairs, repeated
+distances and literal bytes) of minimum price has been formed. Where the
+price represents the number of output bits produced.
+
+   6) The range encoder encodes the sequence produced by the main
+encoder and sends the produced bytes to the output stream.
+
+   7) Go back to step 3 until the input data are finished or until the
+member or volume size limits are reached.
+
+   8) The range encoder is flushed.
+
+   9) The member trailer is written to the output stream.
+
+   10) If there are more data to compress, go back to step 1.
+
+
+The ideas embodied in lzip are due to (at least) the following people:
+Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrey Markov (for
+the definition of Markov chains), G.N.N. Martin (for the definition of
+range encoding), Igor Pavlov (for putting all the above together in
+LZMA), and Julian Seward (for bzip2's CLI).
+
+
+File: lzip.info,  Node: Stream format,  Next: Examples,  Prev: Algorithm,  Up: Top
 
-5 Format of the LZMA stream in lzip files
+6 Format of the LZMA stream in lzip files
 *****************************************
 
 The LZMA algorithm has three parameters, called "special LZMA
@@ -473,7 +622,7 @@ the lzip download directory. The source code of lzd is included in
 appendix A.  *note Reference source code::
 
 
-5.1 What is coded
+6.1 What is coded
 =================
 
 The LZMA stream includes literals, matches and repeated matches (matches
@@ -525,7 +674,7 @@ slot + direct_bits                   distances from 4 to 127
 slot + (direct_bits - 4) + 4 bits    distances from 128 to 2^32 - 1
 
 
-5.2 The coding contexts
+6.2 The coding contexts
 =======================
 
 These contexts ('Bit_model' in the source), are integers or arrays of
@@ -615,7 +764,7 @@ difference is found, the rest of the byte is decoded using the normal
 bit tree context. (See 'decode_matched' in the source).
 
 
-5.3 The range decoder
+6.3 The range decoder
 =====================
 
 The LZMA stream is consumed one byte at a time by the range decoder.
@@ -635,7 +784,7 @@ range decoder. This is done by shifting 5 bytes in the initialization of
 source).
 
 
-5.4 Decoding the LZMA stream
+6.4 Decoding the LZMA stream
 ============================
 
 After decoding the member header and obtaining the dictionary size, the
@@ -646,144 +795,7 @@ with the appropriate contexts to decode the different coding sequences
 Stream" marker is decoded.
 
 
-File: lzip.info,  Node: Quality assurance,  Next: Examples,  Prev: Stream format,  Up: Top
-
-6 Design, development and testing of lzip
-*****************************************
-
-There are two ways of constructing a software design. One way is to make
-it so simple that there are obviously no deficiencies and the other is
-to make it so complicated that there are no obvious deficiencies.
--- C.A.R. Hoare
-
-   Lzip has been designed, written and tested with great care to be the
-standard general-purpose compressor for unix-like systems. This chapter
-describes the lessons learned from previous compressors (gzip and
-bzip2), and their application to the design of lzip.
-
-
-6.1 Format design
-=================
-
-When gzip was designed in 1992, computers and operating systems were
-much less capable than they are today. Gzip tried to work around some of
-those limitations, like 8.3 file names, with additional fields in its
-file format.
-
-   Today those limitations have mostly disappeared, and the format of
-gzip has proved to be unnecessarily complicated. It includes fields
-that were never used, others that have lost its usefulness, and finally
-others that have become too limited.
-
-   Bzip2 was designed 5 years later, and its format is in some aspects
-simpler than the one of gzip. But bzip2 also shows complexities in its
-file format which slow down decompression and, in retrospect, are
-unnecessary.
-
-   Probably the worst defect of the gzip format from the point of view
-of data safety is the variable size of its header. If the byte at
-offset 3 (flags) of a gzip member gets corrupted, it mat become very
-difficult to recover the data, even if the compressed blocks are
-intact, because it can't be known with certainty where the compressed
-blocks begin.
-
-   By contrast, the lzma stream in a lzip member always starts at
-offset 6, making it trivial to recover the data even if the whole
-header becomes corrupt.
-
-   Lzip provides better data recovery capabilities than any other
-gzip-like compressor because its format has been designed from the
-beginning to be simple and safe. It would be very difficult to write an
-automatic recovery tool like lziprecover for the gzip format. And, as
-far as I know, it has never been writen.
-
-   The lzip format is designed for long-term archiving. Therefore it
-excludes any unneeded features that may interfere with the future
-extraction of the uncompressed data.
-
-
-6.1.1 Gzip format (mis)features not present in lzip
----------------------------------------------------
-
-'Multiple algorithms'
-     Gzip provides a CM (Compression Method) field that has never been
-     used because it is a bad idea to begin with. New compression
-     methods may require additional fields, making it impossible to
-     implement new methods and, at the same time, keep the same format.
-     This field does not solve the problem of format proliferation; it
-     just makes the problem less obvious.
-
-'Optional fields in header'
-     Unless special precautions are taken, optional fields are
-     generally a bad idea because they produce a header of variable
-     size. The gzip header has 2 fields that, in addition to being
-     optional, are zero-terminated.  This means that if any byte inside
-     the field gets zeroed, or if the terminating zero gets altered,
-     gzip won't be able to find neither the header CRC nor the
-     compressed blocks.
-
-     Using an optional checksum for the header is not only a bad idea,
-     it is an error; it may prevent the extraction of perfectly good
-     data. For example, if the checksum is used and the bit enabling it
-     is reset by a bit-flip, the header will appear to be intact (in
-     spite of being corrupt) while the compressed blocks will appear to
-     be totally unrecoverable (in spite of being intact). Very
-     misleading indeed.
-
-
-6.1.2 Lzip format improvements over gzip
-----------------------------------------
-
-'64-bit size field'
-     Probably the most frequently reported shortcoming of the gzip
-     format is that it only stores the least significant 32 bits of the
-     uncompressed size. The size of any file larger than 4 GiB gets
-     truncated.
-
-     The lzip format provides a 64-bit field for the uncompressed size.
-     Additionaly, lzip produces multi-member output automatically when
-     the size is too large for a single member, allowing an unlimited
-     uncompressed size.
-
-'Distributed index'
-     The lzip format provides a distributed index that, among other
-     things, helps plzip to decompress several times faster than pigz
-     and helps lziprecover do its job. The gzip format does not provide
-     an index.
-
-     A distributed index is safer and more scalable than a monolithic
-     index.  The monolithic index introduces a single point of failure
-     in the compressed file and may limit the number of members or the
-     total uncompressed size.
-
-
-6.2 Quality of implementation
-=============================
-
-Three related but independent compressor implementations, lzip, clzip
-and minilzip/lzlib, are developed concurrently. Every stable release of
-any of them is subjected to a hundred hours of intensive testing to
-verify that it produces identical output to the other two. This
-guarantees that all three implement the same algorithm, and makes it
-unlikely that any of them may contain serious undiscovered errors. In
-fact, no errors have been discovered in lzip since 2009.
-
-   Just like the lzip format provides 4 factor protection against
-undetected data corruption, the development methodology described above
-provides 3 factor protection against undetected programming errors in
-lzip.
-
-   Lzip automatically uses the smallest possible dictionary size for
-each file. In addition to reducing the amount of memory required for
-decompression, this feature also minimizes the probability of being
-affected by RAM errors during compression.
-
-   Returning a warning status of 2 is a design flaw of compress that
-leaked into the design of gzip. Both bzip2 and lzip are free form this
-flaw.
-
-
-File: lzip.info,  Node: Examples,  Next: Problems,  Prev: Quality assurance,  Up: Top
+File: lzip.info,  Node: Examples,  Next: Problems,  Prev: Stream format,  Up: Top
 
 7 A small tutorial with examples
 ********************************
@@ -876,7 +888,7 @@ File: lzip.info,  Node: Reference source code,  Next: Concept index,  Prev: Prob
 Appendix A Reference source code
 ********************************
 
-/*  Lzd - Educational decompressor for lzip files
+/*  Lzd - Educational decompressor for the lzip format
     Copyright (C) 2013-2015 Antonio Diaz Diaz.
 
     This program is free software: you have unlimited permission
@@ -1133,7 +1145,7 @@ class LZ_decoder
     }
 
 public:
-  LZ_decoder( const unsigned dict_size )
+  explicit LZ_decoder( const unsigned dict_size )
     :
     partial_data_pos( 0 ),
     dictionary_size( dict_size ),
@@ -1160,7 +1172,7 @@ void LZ_decoder::flush_data()
     crc32.update_buf( crc_, buffer + stream_pos, size );
     errno = 0;
     if( std::fwrite( buffer + stream_pos, 1, size, stdout ) != size )
-      { std::fprintf( stderr, "Write error: %s.\n", std::strerror( errno ) );
+      { std::fprintf( stderr, "Write error: %s\n", std::strerror( errno ) );
         std::exit( 1 ); }
     if( pos >= dictionary_size ) { partial_data_pos += pos; pos = 0; }
     stream_pos = pos;
@@ -1202,7 +1214,7 @@ bool LZ_decoder::decode_member()		// Returns false if error
         put_byte( rdec.decode_matched( bm, peek( rep0 ) ) );
       state.set_char();
       }
-    else
+    else					// match or repeated match
       {
       int len;
       if( rdec.decode_bit( bm_rep[state()] ) != 0 )		// 2nd bit
@@ -1231,7 +1243,7 @@ bool LZ_decoder::decode_member()		// Returns false if error
         state.set_rep();
         len = min_match_len + rdec.decode_len( rep_len_model, pos_state );
         }
-      else
+      else					// match
         {
         rep3 = rep2; rep2 = rep1; rep1 = rep0;
         len = min_match_len + rdec.decode_len( match_len_model, pos_state );
@@ -1273,7 +1285,7 @@ int main( const int argc, const char * const argv[] )
   {
   if( argc > 1 )
     {
-    std::printf( "Lzd %s - Educational decompressor for lzip files.\n",
+    std::printf( "Lzd %s - Educational decompressor for the lzip format.\n",
                  PROGVERSION );
     std::printf( "Study the source to learn how a lzip decompressor works.\n"
                  "See the lzip manual for an explanation of the code.\n"
@@ -1300,19 +1312,19 @@ int main( const int argc, const char * const argv[] )
     if( std::feof( stdin ) || std::memcmp( header, "LZIP\x01", 5 ) != 0 )
       {
       if( first_member )
-        { std::fprintf( stderr, "Bad magic number (file not in lzip format)\n" );
+        { std::fputs( "Bad magic number (file not in lzip format).\n", stderr );
           return 2; }
       break;
       }
     unsigned dict_size = 1 << ( header[5] & 0x1F );
     dict_size -= ( dict_size / 16 ) * ( ( header[5] >> 5 ) & 7 );
     if( dict_size < min_dictionary_size || dict_size > max_dictionary_size )
-      { std::fprintf( stderr, "Invalid dictionary size in member header\n" );
+      { std::fputs( "Invalid dictionary size in member header.\n", stderr );
         return 2; }
 
     LZ_decoder decoder( dict_size );
     if( !decoder.decode_member() )
-      { std::fprintf( stderr, "Data error\n" ); return 2; }
+      { std::fputs( "Data error\n", stderr ); return 2; }
 
     File_trailer trailer;
     for( int i = 0; i < 20; ++i ) trailer[i] = std::getc( stdin );
@@ -1321,11 +1333,11 @@ int main( const int argc, const char * const argv[] )
     unsigned long long data_size = 0;
     for( int i = 11; i >= 4; --i ) { data_size <<= 8; data_size += trailer[i]; }
     if( crc != decoder.crc() || data_size != decoder.data_position() )
-      { std::fprintf( stderr, "CRC error\n" ); return 2; }
+      { std::fputs( "CRC error\n", stderr ); return 2; }
     }
 
   if( std::fclose( stdout ) != 0 )
-    { std::fprintf( stderr, "Can't close stdout: %s.\n", std::strerror( errno ) );
+    { std::fprintf( stderr, "Can't close stdout: %s\n", std::strerror( errno ) );
       return 1; }
   return 0;
   }
@@ -1357,16 +1369,16 @@ Concept index
 
 Tag Table:
 Node: Top208
-Node: Introduction1090
-Node: Algorithm6008
-Node: Invoking lzip8833
-Node: File format14421
-Node: Stream format16806
-Node: Quality assurance26247
-Node: Examples32269
-Node: Problems34230
-Node: Reference source code34760
-Node: Concept index48358
+Node: Introduction1087
+Node: Invoking lzip6060
+Node: Quality assurance11658
+Node: File format18171
+Node: Algorithm20556
+Node: Stream format23382
+Node: Examples32812
+Node: Problems34769
+Node: Reference source code35299
+Node: Concept index48952
 
 End Tag Table
 
diff --git a/doc/lzip.texi b/doc/lzip.texi
index ac44ee9..69f44ae 100644
--- a/doc/lzip.texi
+++ b/doc/lzip.texi
@@ -6,8 +6,8 @@
 @finalout
 @c %**end of header
 
-@set UPDATED 25 May 2015
-@set VERSION 1.17-rc2
+@set UPDATED 12 July 2015
+@set VERSION 1.17
 
 @dircategory Data Compression
 @direntry
@@ -36,11 +36,11 @@ This manual is for Lzip (version @value{VERSION}, @value{UPDATED}).
 
 @menu
 * Introduction::           Purpose and features of lzip
-* Algorithm::              How lzip compresses the data
 * Invoking lzip::          Command line interface
+* Quality assurance::      Design, development and testing of lzip
 * File format::            Detailed format of the compressed file
+* Algorithm::              How lzip compresses the data
 * Stream format::          Format of the LZMA stream in lzip files
-* Quality assurance::      Design, development and testing of lzip
 * Examples::               A small tutorial with examples
 * Problems::               Reporting bugs
 * Reference source code::  Source code illustrating stream format
@@ -70,10 +70,14 @@ availability:
 @itemize @bullet
 @item
 The lzip format provides very safe integrity checking and some data
-recovery means. The lziprecover program can repair bit-flip errors (one
-of the most common forms of data corruption) in lzip files, and provides
-data recovery capabilities, including error-checked merging of damaged
-copies of a file.
+recovery means. The
+@uref{http://www.nongnu.org/lzip/manual/lziprecover_manual.html#Data-safety,,lziprecover}
+program can repair bit-flip errors (one of the most common forms of data
+corruption) in lzip files, and provides data recovery capabilities,
+including error-checked merging of damaged copies of a file.
+@ifnothtml
+@ref{Data safety,,,lziprecover}.
+@end ifnothtml
 
 @item
 The lzip format is as simple as possible (but not simpler). The lzip
@@ -109,6 +113,11 @@ makes it safer than compressors returning ambiguous warning values (like
 gzip) when it is used as a back end for other programs like tar or
 zutils.
 
+Lzip will automatically use the smallest possible dictionary size for
+each file without exceeding the given limit. Keep in mind that the
+decompression memory requirement is affected at compression time by the
+choice of dictionary size limit.
+
 The amount of memory required for compression is about 1 or 2 times the
 dictionary size limit (1 if input file size is less than dictionary size
 limit, else 2) plus 9 times the dictionary size really used. The option
@@ -116,11 +125,6 @@ limit, else 2) plus 9 times the dictionary size really used. The option
 of memory required for decompression is about 46 kB larger than the
 dictionary size really used.
 
-Lzip will automatically use the smallest possible dictionary size for
-each file without exceeding the given limit. Keep in mind that the
-decompression memory requirement is affected at compression time by the
-choice of dictionary size limit.
-
 When compressing, lzip replaces every file given in the command line
 with a compressed version of itself, with the name "original_name.lz".
 When decompressing, lzip attempts to guess the name for the decompressed
@@ -152,8 +156,8 @@ corresponding uncompressed files. Integrity testing of concatenated
 compressed files is also supported.
 
 Lzip can produce multi-member files and safely recover, with
-lziprecover, the undamaged members in case of file damage. Lzip can also
-split the compressed output in volumes of a given size, even when
+lziprecover, the undamaged members in case of file damage. Lzip can
+also split the compressed output in volumes of a given size, even when
 reading from standard input. This allows the direct creation of
 multivolume compressed tar archives.
 
@@ -162,72 +166,6 @@ automatically creating multi-member output. The members so created are
 large, about 2 PiB each.
 
 
-@node Algorithm
-@chapter Algorithm
-@cindex algorithm
-
-In spite of its name (Lempel-Ziv-Markov chain-Algorithm), LZMA is not a
-concrete algorithm; it is more like "any algorithm using the LZMA coding
-scheme". For example, the option '-0' of lzip uses the scheme in almost
-the simplest way possible; issuing the longest match it can find, or a
-literal byte if it can't find a match. Inversely, a much more elaborated
-way of finding coding sequences of minimum size than the one currently
-used by lzip could be developed, and the resulting sequence could also
-be coded using the LZMA coding scheme.
-
-Lzip currently implements two variants of the LZMA algorithm; fast
-(used by option -0) and normal (used by all other compression levels).
-
-The high compression of LZMA comes from combining two basic, well-proven
-compression ideas: sliding dictionaries (LZ77/78) and markov models (the
-thing used by every compression algorithm that uses a range encoder or
-similar order-0 entropy coder as its last stage) with segregation of
-contexts according to what the bits are used for.
-
-Lzip is a two stage compressor. The first stage is a Lempel-Ziv coder,
-which reduces redundancy by translating chunks of data to their
-corresponding distance-length pairs. The second stage is a range encoder
-that uses a different probability model for each type of data;
-distances, lengths, literal bytes, etc.
-
-Here is how it works, step by step:
-
-1) The member header is written to the output stream.
-
-2) The first byte is coded literally, because there are no previous
-bytes to which the match finder can refer to.
-
-3) The main encoder advances to the next byte in the input data and
-calls the match finder.
-
-4) The match finder fills an array with the minimum distances before the
-current byte where a match of a given length can be found.
-
-5) Go back to step 3 until a sequence (formed of pairs, repeated
-distances and literal bytes) of minimum price has been formed. Where the
-price represents the number of output bits produced.
-
-6) The range encoder encodes the sequence produced by the main encoder
-and sends the produced bytes to the output stream.
-
-7) Go back to step 3 until the input data are finished or until the
-member or volume size limits are reached.
-
-8) The range encoder is flushed.
-
-9) The member trailer is written to the output stream.
-
-10) If there are more data to compress, go back to step 1.
-
-@sp 1
-@noindent
-The ideas embodied in lzip are due to (at least) the following people:
-Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrey Markov (for
-the definition of Markov chains), G.N.N. Martin (for the definition of
-range encoding), Igor Pavlov (for putting all the above together in
-LZMA), and Julian Seward (for bzip2's CLI).
-
-
 @node Invoking lzip
 @chapter Invoking lzip
 @cindex invoking
@@ -274,7 +212,7 @@ Force overwrite of output files.
 
 @item -F
 @itemx --recompress
-Force recompression of files whose name already has the @samp{.lz} or
+Force re-compression of files whose name already has the @samp{.lz} or
 @samp{.tlz} suffix.
 
 @item -k
@@ -392,6 +330,157 @@ invalid input file, 3 for an internal consistency error (eg, bug) which
 caused lzip to panic.
 
 
+@node Quality assurance
+@chapter Design, development and testing of lzip
+@cindex quality assurance
+
+There are two ways of constructing a software design. One way is to make
+it so simple that there are obviously no deficiencies and the other is
+to make it so complicated that there are no obvious deficiencies.@*
+--- C.A.R. Hoare
+
+Lzip has been designed, written and tested with great care to be the
+standard general-purpose compressor for unix-like systems. This chapter
+describes the lessons learned from previous compressors (gzip and
+bzip2), and their application to the design of lzip.
+
+@sp 1
+@section Format design
+
+When gzip was designed in 1992, computers and operating systems were
+much less capable than they are today. Gzip tried to work around some of
+those limitations, like 8.3 file names, with additional fields in its
+file format.
+
+Today those limitations have mostly disappeared, and the format of gzip
+has proved to be unnecessarily complicated. It includes fields that were
+never used, others that have lost its usefulness, and finally others
+that have become too limited.
+
+Bzip2 was designed 5 years later, and its format is simpler than the one
+of gzip.
+
+Probably the worst defect of the gzip format from the point of view of
+data safety is the variable size of its header. If the byte at offset 3
+(flags) of a gzip member gets corrupted, it mat become very difficult to
+recover the data, even if the compressed blocks are intact, because it
+can't be known with certainty where the compressed blocks begin.
+
+By contrast, the header of a lzip member has a fixed length of 6. The
+lzma stream in a lzip member always starts at offset 6, making it
+trivial to recover the data even if the whole header becomes corrupt.
+
+Bzip2 also provides a header of fixed length and marks the begin and end
+of each compressed block with six magic bytes, making it possible to
+find the compressed blocks even in case of file damage. But bzip2 does
+not store the size of each compressed block, as lzip does.
+
+Lzip provides better data recovery capabilities than any other gzip-like
+compressor because its format has been designed from the beginning to be
+simple and safe. It would be very difficult to write an automatic
+recovery tool like lziprecover for the gzip format. And, as far as I
+know, it has never been writen.
+
+The lzip format is designed for long-term archiving. Therefore it
+excludes any unneeded features that may interfere with the future
+extraction of the uncompressed data.
+
+@sp 1
+@subsection Gzip format (mis)features not present in lzip
+
+@table @samp
+@item Multiple algorithms
+
+Gzip provides a CM (Compression Method) field that has never been used
+because it is a bad idea to begin with. New compression methods may
+require additional fields, making it impossible to implement new methods
+and, at the same time, keep the same format. This field does not solve
+the problem of format proliferation; it just makes the problem less
+obvious.
+
+@item Optional fields in header
+
+Unless special precautions are taken, optional fields are generally a
+bad idea because they produce a header of variable size. The gzip header
+has 2 fields that, in addition to being optional, are zero-terminated.
+This means that if any byte inside the field gets zeroed, or if the
+terminating zero gets altered, gzip won't be able to find neither the
+header CRC nor the compressed blocks.
+
+@item Optional CRC for the header
+
+Using an optional checksum for the header is not only a bad idea, it is
+an error; it may prevent the extraction of perfectly good data. For
+example, if the checksum is used and the bit enabling it is reset by a
+bit-flip, the header will appear to be intact (in spite of being
+corrupt) while the compressed blocks will appear to be totally
+unrecoverable (in spite of being intact). Very misleading indeed.
+
+@end table
+
+@subsection Lzip format improvements over gzip and bzip2
+
+@table @samp
+@item 64-bit size field
+
+Probably the most frequently reported shortcoming of the gzip format is
+that it only stores the least significant 32 bits of the uncompressed
+size. The size of any file larger than 4 GiB gets truncated.
+
+Bzip2 does not store the uncompressed size of the file.
+
+The lzip format provides a 64-bit field for the uncompressed size.
+Additionaly, lzip produces multi-member output automatically when the
+size is too large for a single member, allowing for an unlimited
+uncompressed size.
+
+@item Distributed index
+
+The lzip format provides a distributed index that, among other things,
+helps plzip to decompress several times faster than pigz and helps
+lziprecover do its job. Neither the gzip format nor the bzip2 format do
+provide an index.
+
+A distributed index is safer and more scalable than a monolithic index.
+The monolithic index introduces a single point of failure in the
+compressed file and may limit the number of members or the total
+uncompressed size.
+
+@end table
+
+@section Quality of implementation
+
+@table @samp
+@item Multiple implementations
+
+Just like the lzip format provides 4 factor protection against
+undetected data corruption, the development methodology of the lzip
+family of compressors provides 3 factor protection against undetected
+programming errors.
+
+Three related but independent compressor implementations, lzip, clzip
+and minilzip/lzlib, are developed concurrently. Every stable release of
+any of them is subjected to a hundred hours of intensive testing to
+verify that it produces identical output to the other two. This
+guarantees that all three implement the same algorithm, and makes it
+unlikely that any of them may contain serious undiscovered errors. In
+fact, no errors have been discovered in lzip since 2009.
+
+@item Dictionary size
+
+Lzip automatically uses the smallest possible dictionary size for each
+file. In addition to reducing the amount of memory required for
+decompression, this feature also minimizes the probability of being
+affected by RAM errors during compression.
+
+@item Exit status
+
+Returning a warning status of 2 is a design flaw of compress that leaked
+into the design of gzip. Both bzip2 and lzip are free from this flaw.
+
+@end table
+
+
 @node File format
 @chapter File format
 @cindex file format
@@ -468,6 +557,72 @@ facilitates safe recovery of undamaged members from multi-member files.
 @end table
 
 
+@node Algorithm
+@chapter Algorithm
+@cindex algorithm
+
+In spite of its name (Lempel-Ziv-Markov chain-Algorithm), LZMA is not a
+concrete algorithm; it is more like "any algorithm using the LZMA coding
+scheme". For example, the option @samp{-0} of lzip uses the scheme in almost
+the simplest way possible; issuing the longest match it can find, or a
+literal byte if it can't find a match. Inversely, a much more elaborated
+way of finding coding sequences of minimum size than the one currently
+used by lzip could be developed, and the resulting sequence could also
+be coded using the LZMA coding scheme.
+
+Lzip currently implements two variants of the LZMA algorithm; fast
+(used by option @samp{-0}) and normal (used by all other compression levels).
+
+The high compression of LZMA comes from combining two basic, well-proven
+compression ideas: sliding dictionaries (LZ77/78) and markov models (the
+thing used by every compression algorithm that uses a range encoder or
+similar order-0 entropy coder as its last stage) with segregation of
+contexts according to what the bits are used for.
+
+Lzip is a two stage compressor. The first stage is a Lempel-Ziv coder,
+which reduces redundancy by translating chunks of data to their
+corresponding distance-length pairs. The second stage is a range encoder
+that uses a different probability model for each type of data;
+distances, lengths, literal bytes, etc.
+
+Here is how it works, step by step:
+
+1) The member header is written to the output stream.
+
+2) The first byte is coded literally, because there are no previous
+bytes to which the match finder can refer to.
+
+3) The main encoder advances to the next byte in the input data and
+calls the match finder.
+
+4) The match finder fills an array with the minimum distances before the
+current byte where a match of a given length can be found.
+
+5) Go back to step 3 until a sequence (formed of pairs, repeated
+distances and literal bytes) of minimum price has been formed. Where the
+price represents the number of output bits produced.
+
+6) The range encoder encodes the sequence produced by the main encoder
+and sends the produced bytes to the output stream.
+
+7) Go back to step 3 until the input data are finished or until the
+member or volume size limits are reached.
+
+8) The range encoder is flushed.
+
+9) The member trailer is written to the output stream.
+
+10) If there are more data to compress, go back to step 1.
+
+@sp 1
+@noindent
+The ideas embodied in lzip are due to (at least) the following people:
+Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrey Markov (for
+the definition of Markov chains), G.N.N. Martin (for the definition of
+range encoding), Igor Pavlov (for putting all the above together in
+LZMA), and Julian Seward (for bzip2's CLI).
+
+
 @node Stream format
 @chapter Format of the LZMA stream in lzip files
 @cindex format of the LZMA stream
@@ -690,140 +845,6 @@ sequences (matches, repeated matches, and literal bytes), until the "End
 Of Stream" marker is decoded.
 
 
-@node Quality assurance
-@chapter Design, development and testing of lzip
-@cindex quality assurance
-
-There are two ways of constructing a software design. One way is to make
-it so simple that there are obviously no deficiencies and the other is
-to make it so complicated that there are no obvious deficiencies.@*
---- C.A.R. Hoare
-
-Lzip has been designed, written and tested with great care to be the
-standard general-purpose compressor for unix-like systems. This chapter
-describes the lessons learned from previous compressors (gzip and
-bzip2), and their application to the design of lzip.
-
-@sp 1
-@section Format design
-
-When gzip was designed in 1992, computers and operating systems were
-much less capable than they are today. Gzip tried to work around some of
-those limitations, like 8.3 file names, with additional fields in its
-file format.
-
-Today those limitations have mostly disappeared, and the format of gzip
-has proved to be unnecessarily complicated. It includes fields that were
-never used, others that have lost its usefulness, and finally others
-that have become too limited.
-
-Bzip2 was designed 5 years later, and its format is in some aspects
-simpler than the one of gzip. But bzip2 also shows complexities in its
-file format which slow down decompression and, in retrospect, are
-unnecessary.
-
-Probably the worst defect of the gzip format from the point of view of
-data safety is the variable size of its header. If the byte at offset 3
-(flags) of a gzip member gets corrupted, it mat become very difficult to
-recover the data, even if the compressed blocks are intact, because it
-can't be known with certainty where the compressed blocks begin.
-
-By contrast, the lzma stream in a lzip member always starts at offset 6,
-making it trivial to recover the data even if the whole header becomes
-corrupt.
-
-Lzip provides better data recovery capabilities than any other gzip-like
-compressor because its format has been designed from the beginning to be
-simple and safe. It would be very difficult to write an automatic
-recovery tool like lziprecover for the gzip format. And, as far as I
-know, it has never been writen.
-
-The lzip format is designed for long-term archiving. Therefore it
-excludes any unneeded features that may interfere with the future
-extraction of the uncompressed data.
-
-@sp 1
-@subsection Gzip format (mis)features not present in lzip
-
-@table @samp
-@item Multiple algorithms
-
-Gzip provides a CM (Compression Method) field that has never been used
-because it is a bad idea to begin with. New compression methods may
-require additional fields, making it impossible to implement new methods
-and, at the same time, keep the same format. This field does not solve
-the problem of format proliferation; it just makes the problem less
-obvious.
-
-@item Optional fields in header
-
-Unless special precautions are taken, optional fields are generally a
-bad idea because they produce a header of variable size. The gzip header
-has 2 fields that, in addition to being optional, are zero-terminated.
-This means that if any byte inside the field gets zeroed, or if the
-terminating zero gets altered, gzip won't be able to find neither the
-header CRC nor the compressed blocks.
-
-Using an optional checksum for the header is not only a bad idea, it is
-an error; it may prevent the extraction of perfectly good data. For
-example, if the checksum is used and the bit enabling it is reset by a
-bit-flip, the header will appear to be intact (in spite of being
-corrupt) while the compressed blocks will appear to be totally
-unrecoverable (in spite of being intact). Very misleading indeed.
-
-@end table
-
-@subsection Lzip format improvements over gzip
-
-@table @samp
-@item 64-bit size field
-
-Probably the most frequently reported shortcoming of the gzip format is
-that it only stores the least significant 32 bits of the uncompressed
-size. The size of any file larger than 4 GiB gets truncated.
-
-The lzip format provides a 64-bit field for the uncompressed size.
-Additionaly, lzip produces multi-member output automatically when the
-size is too large for a single member, allowing an unlimited
-uncompressed size.
-
-@item Distributed index
-
-The lzip format provides a distributed index that, among other things,
-helps plzip to decompress several times faster than pigz and helps
-lziprecover do its job. The gzip format does not provide an index.
-
-A distributed index is safer and more scalable than a monolithic index.
-The monolithic index introduces a single point of failure in the
-compressed file and may limit the number of members or the total
-uncompressed size.
-
-@end table
-
-@section Quality of implementation
-
-Three related but independent compressor implementations, lzip, clzip
-and minilzip/lzlib, are developed concurrently. Every stable release of
-any of them is subjected to a hundred hours of intensive testing to
-verify that it produces identical output to the other two. This
-guarantees that all three implement the same algorithm, and makes it
-unlikely that any of them may contain serious undiscovered errors. In
-fact, no errors have been discovered in lzip since 2009.
-
-Just like the lzip format provides 4 factor protection against
-undetected data corruption, the development methodology described above
-provides 3 factor protection against undetected programming errors in
-lzip.
-
-Lzip automatically uses the smallest possible dictionary size for each
-file. In addition to reducing the amount of memory required for
-decompression, this feature also minimizes the probability of being
-affected by RAM errors during compression.
-
-Returning a warning status of 2 is a design flaw of compress that leaked
-into the design of gzip. Both bzip2 and lzip are free form this flaw.
-
-
 @node Examples
 @chapter A small tutorial with examples
 @cindex examples
@@ -947,7 +968,7 @@ find by running @w{@code{lzip --version}}.
 @cindex reference source code
 
 @verbatim
-/*  Lzd - Educational decompressor for lzip files
+/*  Lzd - Educational decompressor for the lzip format
     Copyright (C) 2013-2015 Antonio Diaz Diaz.
 
     This program is free software: you have unlimited permission
@@ -1204,7 +1225,7 @@ class LZ_decoder
     }
 
 public:
-  LZ_decoder( const unsigned dict_size )
+  explicit LZ_decoder( const unsigned dict_size )
     :
     partial_data_pos( 0 ),
     dictionary_size( dict_size ),
@@ -1231,7 +1252,7 @@ void LZ_decoder::flush_data()
     crc32.update_buf( crc_, buffer + stream_pos, size );
     errno = 0;
     if( std::fwrite( buffer + stream_pos, 1, size, stdout ) != size )
-      { std::fprintf( stderr, "Write error: %s.\n", std::strerror( errno ) );
+      { std::fprintf( stderr, "Write error: %s\n", std::strerror( errno ) );
         std::exit( 1 ); }
     if( pos >= dictionary_size ) { partial_data_pos += pos; pos = 0; }
     stream_pos = pos;
@@ -1273,7 +1294,7 @@ bool LZ_decoder::decode_member()		// Returns false if error
         put_byte( rdec.decode_matched( bm, peek( rep0 ) ) );
       state.set_char();
       }
-    else
+    else					// match or repeated match
       {
       int len;
       if( rdec.decode_bit( bm_rep[state()] ) != 0 )		// 2nd bit
@@ -1302,7 +1323,7 @@ bool LZ_decoder::decode_member()		// Returns false if error
         state.set_rep();
         len = min_match_len + rdec.decode_len( rep_len_model, pos_state );
         }
-      else
+      else					// match
         {
         rep3 = rep2; rep2 = rep1; rep1 = rep0;
         len = min_match_len + rdec.decode_len( match_len_model, pos_state );
@@ -1344,7 +1365,7 @@ int main( const int argc, const char * const argv[] )
   {
   if( argc > 1 )
     {
-    std::printf( "Lzd %s - Educational decompressor for lzip files.\n",
+    std::printf( "Lzd %s - Educational decompressor for the lzip format.\n",
                  PROGVERSION );
     std::printf( "Study the source to learn how a lzip decompressor works.\n"
                  "See the lzip manual for an explanation of the code.\n"
@@ -1371,19 +1392,19 @@ int main( const int argc, const char * const argv[] )
     if( std::feof( stdin ) || std::memcmp( header, "LZIP\x01", 5 ) != 0 )
       {
       if( first_member )
-        { std::fprintf( stderr, "Bad magic number (file not in lzip format)\n" );
+        { std::fputs( "Bad magic number (file not in lzip format).\n", stderr );
           return 2; }
       break;
       }
     unsigned dict_size = 1 << ( header[5] & 0x1F );
     dict_size -= ( dict_size / 16 ) * ( ( header[5] >> 5 ) & 7 );
     if( dict_size < min_dictionary_size || dict_size > max_dictionary_size )
-      { std::fprintf( stderr, "Invalid dictionary size in member header\n" );
+      { std::fputs( "Invalid dictionary size in member header.\n", stderr );
         return 2; }
 
     LZ_decoder decoder( dict_size );
     if( !decoder.decode_member() )
-      { std::fprintf( stderr, "Data error\n" ); return 2; }
+      { std::fputs( "Data error\n", stderr ); return 2; }
 
     File_trailer trailer;
     for( int i = 0; i < 20; ++i ) trailer[i] = std::getc( stdin );
@@ -1392,11 +1413,11 @@ int main( const int argc, const char * const argv[] )
     unsigned long long data_size = 0;
     for( int i = 11; i >= 4; --i ) { data_size <<= 8; data_size += trailer[i]; }
     if( crc != decoder.crc() || data_size != decoder.data_position() )
-      { std::fprintf( stderr, "CRC error\n" ); return 2; }
+      { std::fputs( "CRC error\n", stderr ); return 2; }
     }
 
   if( std::fclose( stdout ) != 0 )
-    { std::fprintf( stderr, "Can't close stdout: %s.\n", std::strerror( errno ) );
+    { std::fprintf( stderr, "Can't close stdout: %s\n", std::strerror( errno ) );
       return 1; }
   return 0;
   }
diff --git a/encoder.cc b/encoder.cc
index 3e707f3..51c0069 100644
--- a/encoder.cc
+++ b/encoder.cc
@@ -75,7 +75,7 @@ int LZ_encoder::get_match_pairs( Pair * pairs )
       while( maxlen < len_limit && data[maxlen-delta] == data[maxlen] )
         ++maxlen;
       pairs[num_pairs-1].len = maxlen;
-      if( maxlen >= len_limit ) pairs = 0;	/* done. now just skip */
+      if( maxlen >= len_limit ) pairs = 0;	// done. now just skip
       }
     if( maxlen < 3 ) maxlen = 3;
     }
@@ -269,10 +269,10 @@ int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
     }
 
   int cur = 0;
-  while( true )				/* price optimization loop */
+  while( true )				// price optimization loop
     {
     move_pos();
-    if( ++cur >= num_trials )		/* no more initialized trials */
+    if( ++cur >= num_trials )		// no more initialized trials
       {
       backward( cur );
       return cur;
@@ -287,7 +287,7 @@ int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
       return cur;
       }
 
-    /* give final values to current trial */
+    // give final values to current trial
     Trial & cur_trial = trials[cur];
     State cur_state;
     {
@@ -298,7 +298,7 @@ int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
     if( prev_index2 == single_step_trial )
       {
       cur_state = trials[prev_index].state;
-      if( prev_index + 1 == cur )			/* len == 1 */
+      if( prev_index + 1 == cur )			// len == 1
         {
         if( dis == 0 ) cur_state.set_short_rep();
         else cur_state.set_char();			// literal
@@ -306,14 +306,14 @@ int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
       else if( dis < num_rep_distances ) cur_state.set_rep();
       else cur_state.set_match();
       }
-    else if( prev_index2 == dual_step_trial )		/* dis == 0 */
+    else if( prev_index2 == dual_step_trial )		// dis == 0
       {
       --prev_index;
       cur_state = trials[prev_index].state;
       cur_state.set_char();
       cur_state.set_rep();
       }
-    else	/* if( prev_index2 >= 0 ) */
+    else	// if( prev_index2 >= 0 )
       {
       prev_index = prev_index2;
       cur_state = trials[prev_index].state;
@@ -340,7 +340,7 @@ int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
     else
       next_price += price_matched( prev_byte, cur_byte, match_byte );
 
-    /* try last updates to next trial */
+    // try last updates to next trial
     Trial & next_trial = trials[cur+1];
 
     next_trial.update( next_price, -1, cur );		// literal
@@ -366,7 +366,7 @@ int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
 
     const int len_limit = std::min( match_len_limit, triable_bytes );
 
-    /* try literal + rep0 */
+    // try literal + rep0
     if( match_byte != cur_byte && next_trial.prev_index != cur )
       {
       const uint8_t * const data = ptr_to_current_pos();
@@ -390,7 +390,7 @@ int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
 
     int start_len = min_match_len;
 
-    /* try rep distances */
+    // try rep distances
     for( int rep = 0; rep < num_rep_distances; ++rep )
       {
       const uint8_t * const data = ptr_to_current_pos();
@@ -407,9 +407,9 @@ int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
         trials[cur+i].update( price + rep_len_prices.price( i, pos_state ),
                               rep, cur );
 
-      if( rep == 0 ) start_len = len + 1;	/* discard shorter matches */
+      if( rep == 0 ) start_len = len + 1;	// discard shorter matches
 
-      /* try rep + literal + rep0 */
+      // try rep + literal + rep0
       int len2 = len + 1;
       const int limit = std::min( match_len_limit + len2, triable_bytes );
       while( len2 < limit && data[len2-dis] == data[len2] ) ++len2;
@@ -431,7 +431,7 @@ int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
       trials[cur+len+1+len2].update3( price, rep, cur + len + 1, cur );
       }
 
-    /* try matches */
+    // try matches
     if( newlen >= start_len && newlen <= len_limit )
       {
       const int normal_match_price = match_price +
@@ -449,7 +449,7 @@ int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
 
         trials[cur+len].update( price, dis + num_rep_distances, cur );
 
-        /* try match + literal + rep0 */
+        // try match + literal + rep0
         if( len == pairs[i].len )
           {
           const uint8_t * const data = ptr_to_current_pos();
@@ -500,7 +500,7 @@ bool LZ_encoder::encode_member( const unsigned long long member_size )
   for( int i = 0; i < num_rep_distances; ++i ) reps[i] = 0;
 
   if( data_position() != 0 || renc.member_position() != File_header::size )
-    return false;				/* can be called only once */
+    return false;				// can be called only once
 
   if( !data_finished() )			// encode first byte
     {
@@ -517,7 +517,7 @@ bool LZ_encoder::encode_member( const unsigned long long member_size )
     {
     if( price_counter <= 0 && pending_num_pairs == 0 )
       {
-      price_counter = price_count;	/* recalculate prices every these bytes */
+      price_counter = price_count;	// recalculate prices every these bytes
       if( dis_price_counter <= 0 )
         { dis_price_counter = dis_price_count; update_distance_prices(); }
       if( align_price_counter <= 0 )
@@ -531,7 +531,7 @@ bool LZ_encoder::encode_member( const unsigned long long member_size )
       }
 
     int ahead = sequence_optimizer( reps, state );
-    if( ahead <= 0 ) return false;		/* can't happen */
+    if( ahead <= 0 ) return false;		// can't happen
     price_counter -= ahead;
 
     for( int i = 0; ahead > 0; )
@@ -542,7 +542,7 @@ bool LZ_encoder::encode_member( const unsigned long long member_size )
 
       bool bit = ( dis < 0 );
       renc.encode_bit( bm_match[state()][pos_state], !bit );
-      if( bit )					/* literal byte */
+      if( bit )					// literal byte
         {
         const uint8_t prev_byte = peek( ahead + 1 );
         const uint8_t cur_byte = peek( ahead );
@@ -556,13 +556,13 @@ bool LZ_encoder::encode_member( const unsigned long long member_size )
           }
         state.set_char();
         }
-      else					/* match or repeated match */
+      else					// match or repeated match
         {
         crc32.update_buf( crc_, ptr_to_current_pos() - ahead, len );
         mtf_reps( dis, reps );
         bit = ( dis < num_rep_distances );
         renc.encode_bit( bm_rep[state()], bit );
-        if( bit )				/* repeated match */
+        if( bit )				// repeated match
           {
           bit = ( dis == 0 );
           renc.encode_bit( bm_rep0[state()], !bit );
@@ -582,7 +582,7 @@ bool LZ_encoder::encode_member( const unsigned long long member_size )
             state.set_rep();
             }
           }
-        else					/* match */
+        else					// match
           {
           encode_pair( dis - num_rep_distances, len, pos_state );
           if( get_slot( dis - num_rep_distances ) >= end_dis_model )
diff --git a/encoder.h b/encoder.h
index 81cc1e0..9579a85 100644
--- a/encoder.h
+++ b/encoder.h
@@ -76,7 +76,7 @@ public:
 
 class LZ_encoder : public LZ_encoder_base
   {
-  struct Pair			/* distance-length pair */
+  struct Pair			// distance-length pair
     {
     int dis;
     int len;
@@ -90,12 +90,12 @@ class LZ_encoder : public LZ_encoder_base
   struct Trial
     {
     State state;
-    int price;		/* dual use var; cumulative price, match length */
-    int dis;		/* rep index or match distance. (-1 for literal) */
-    int prev_index;	/* index of prev trial in trials[] */
-    int prev_index2;	/*   -2  trial is single step */
-			/*   -1  literal + rep0 */
-			/* >= 0  ( rep or match ) + literal + rep0 */
+    int price;		// dual use var; cumulative price, match length
+    int dis;		// rep index or match distance. (-1 for literal)
+    int prev_index;	// index of prev trial in trials[]
+    int prev_index2;	//   -2  trial is single step
+			//   -1  literal + rep0
+			// >= 0  ( rep or match ) + literal + rep0
     int reps[num_rep_distances];
 
     void update( const int pr, const int distance, const int p_i )
@@ -145,7 +145,7 @@ class LZ_encoder : public LZ_encoder_base
   int get_match_pairs( Pair * pairs = 0 );
   void update_distance_prices();
 
-       /* move-to-front dis in/into reps if( dis > 0 ) */
+       // move-to-front dis in/into reps if( dis > 0 )
   static void mtf_reps( const int dis, int reps[num_rep_distances] )
     {
     if( dis >= num_rep_distances )
@@ -255,7 +255,7 @@ class LZ_encoder : public LZ_encoder_base
                           const State state );
 
   enum { before = max_num_trials + 1,
-         /* bytes to keep in buffer after pos */
+         // bytes to keep in buffer after pos
          after_size = ( 2 * max_match_len ) + 1,
          dict_factor = 2,
          num_prev_positions3 = 1 << 16,
diff --git a/encoder_base.cc b/encoder_base.cc
index 982f12c..a8bbbd7 100644
--- a/encoder_base.cc
+++ b/encoder_base.cc
@@ -142,7 +142,7 @@ void Range_encoder::flush_data()
   }
 
 
-     /* End Of Stream mark => (dis == 0xFFFFFFFFU, len == min_match_len) */
+     // End Of Stream mark => (dis == 0xFFFFFFFFU, len == min_match_len)
 void LZ_encoder_base::full_flush( const State state )
   {
   const int pos_state = data_position() & pos_state_mask;
diff --git a/encoder_base.h b/encoder_base.h
index 27c7a90..b032fae 100644
--- a/encoder_base.h
+++ b/encoder_base.h
@@ -60,14 +60,14 @@ public:
     for( int i = 0; i < bit_model_total >> price_step_bits; ++i )
       {
       unsigned val = ( i * price_step ) + ( price_step / 2 );
-      int bits = 0;				/* base 2 logarithm of val */
+      int bits = 0;				// base 2 logarithm of val
       for( int j = 0; j < price_shift_bits; ++j )
         {
         val = val * val;
         bits <<= 1;
         while( val >= 1 << 16 ) { val >>= 1; ++bits; }
         }
-      bits += 15;				/* remaining bits in val */
+      bits += 15;				// remaining bits in val
       data[i] = ( bit_model_total_bits << price_shift_bits ) - bits;
       }
     }
@@ -155,7 +155,7 @@ inline int price_matched( const Bit_model bm[], int symbol, int match_byte )
     symbol <<= 1;
     const int bit = symbol & 0x100;
     price += price_bit( bm[match_bit+(symbol>>9)+mask], bit );
-    mask &= ~(match_byte ^ symbol);	/* if( match_bit != bit ) mask = 0; */
+    mask &= ~(match_byte ^ symbol);	// if( match_bit != bit ) mask = 0;
     }
   while( symbol < 0x10000 );
   return price;
@@ -172,21 +172,21 @@ class Matchfinder_base
 
 protected:
   unsigned long long partial_data_pos;
-  uint8_t * buffer;		/* input buffer */
-  int32_t * prev_positions;	/* 1 + last seen position of key. else 0 */
-  int32_t * pos_array;		/* may be tree or chain */
-  const int before_size;	/* bytes to keep in buffer before dictionary */
+  uint8_t * buffer;		// input buffer
+  int32_t * prev_positions;	// 1 + last seen position of key. else 0
+  int32_t * pos_array;		// may be tree or chain
+  const int before_size;	// bytes to keep in buffer before dictionary
   int buffer_size;
-  int dictionary_size;		/* bytes to keep in buffer before pos */
-  int pos;			/* current pos in buffer */
-  int cyclic_pos;		/* cycles through [0, dictionary_size] */
-  int stream_pos;		/* first byte not yet read from file */
-  int pos_limit;		/* when reached, a new block must be read */
+  int dictionary_size;		// bytes to keep in buffer before pos
+  int pos;			// current pos in buffer
+  int cyclic_pos;		// cycles through [0, dictionary_size]
+  int stream_pos;		// first byte not yet read from file
+  int pos_limit;		// when reached, a new block must be read
   int key4_mask;
-  int num_prev_positions;	/* size of prev_positions */
+  int num_prev_positions;	// size of prev_positions
   int pos_array_size;
-  const int infd;		/* input file descriptor */
-  bool at_stream_end;		/* stream_pos shows real end of file */
+  const int infd;		// input file descriptor
+  bool at_stream_end;		// stream_pos shows real end of file
 
   Matchfinder_base( const int before, const int dict_size,
                     const int after_size, const int dict_factor,
@@ -228,11 +228,11 @@ class Range_encoder
   enum { buffer_size = 65536 };
   uint64_t low;
   unsigned long long partial_member_pos;
-  uint8_t * const buffer;	/* output buffer */
-  int pos;			/* current pos in buffer */
+  uint8_t * const buffer;	// output buffer
+  int pos;			// current pos in buffer
   uint32_t range;
   unsigned ff_count;
-  const int outfd;		/* output file descriptor */
+  const int outfd;		// output file descriptor
   uint8_t cache;
   File_header header;
 
@@ -377,7 +377,7 @@ public:
       symbol <<= 1;
       const int bit = symbol & 0x100;
       encode_bit( bm[match_bit+(symbol>>9)+mask], bit );
-      mask &= ~(match_byte ^ symbol);	/* if( match_bit != bit ) mask = 0; */
+      mask &= ~(match_byte ^ symbol);	// if( match_bit != bit ) mask = 0;
       }
     while( symbol < 0x10000 );
     }
@@ -406,7 +406,7 @@ class LZ_encoder_base : public Matchfinder_base
   {
 protected:
   enum { max_marker_size = 16,
-         num_rep_distances = 4 };	/* must be 4 */
+         num_rep_distances = 4 };	// must be 4
 
   uint32_t crc_;
 
diff --git a/fast_encoder.cc b/fast_encoder.cc
index 1ecd169..90361e9 100644
--- a/fast_encoder.cc
+++ b/fast_encoder.cc
@@ -88,7 +88,7 @@ bool FLZ_encoder::encode_member( const unsigned long long member_size )
   for( int i = 0; i < num_rep_distances; ++i ) reps[i] = 0;
 
   if( data_position() != 0 || renc.member_position() != File_header::size )
-    return false;				/* can be called only once */
+    return false;				// can be called only once
 
   if( !data_finished() )			// encode first byte
     {
@@ -179,7 +179,7 @@ bool FLZ_encoder::encode_member( const unsigned long long member_size )
         }
       }
 
-    /* literal byte */
+    // literal byte
     renc.encode_bit( bm_match[state()][pos_state], 0 );
     if( state.is_char() )
       encode_literal( prev_byte, cur_byte );
diff --git a/fast_encoder.h b/fast_encoder.h
index b26e388..36177df 100644
--- a/fast_encoder.h
+++ b/fast_encoder.h
@@ -17,7 +17,7 @@
 
 class FLZ_encoder : public LZ_encoder_base
   {
-  int key4;			/* key made from latest 4 bytes */
+  int key4;			// key made from latest 4 bytes
 
   void reset_key4()
     {
@@ -45,7 +45,7 @@ class FLZ_encoder : public LZ_encoder_base
 
   enum { before = 0,
          dict_size = 65536,
-         /* bytes to keep in buffer after pos */
+         // bytes to keep in buffer after pos
          after_size = max_match_len,
          dict_factor = 16,
          num_prev_positions23 = 0,
diff --git a/lzip.h b/lzip.h
index 4a8bc98..9c16ef2 100644
--- a/lzip.h
+++ b/lzip.h
@@ -40,7 +40,7 @@ public:
 
 enum {
   min_dictionary_bits = 12,
-  min_dictionary_size = 1 << min_dictionary_bits,	/* >= modeled_distances */
+  min_dictionary_size = 1 << min_dictionary_bits,	// >= modeled_distances
   max_dictionary_bits = 29,
   max_dictionary_size = 1 << max_dictionary_bits,
   literal_context_bits = 3,
@@ -52,7 +52,7 @@ enum {
   dis_slot_bits = 6,
   start_dis_model = 4,
   end_dis_model = 14,
-  modeled_distances = 1 << (end_dis_model / 2),		/* 128 */
+  modeled_distances = 1 << (end_dis_model / 2),		// 128
   dis_align_bits = 4,
   dis_align_size = 1 << dis_align_bits,
 
@@ -64,8 +64,8 @@ enum {
   len_high_symbols = 1 << len_high_bits,
   max_len_symbols = len_low_symbols + len_mid_symbols + len_high_symbols,
 
-  min_match_len = 2,					/* must be 2 */
-  max_match_len = min_match_len + max_len_symbols - 1,	/* 273 */
+  min_match_len = 2,					// must be 2
+  max_match_len = min_match_len + max_len_symbols - 1,	// 273
   min_match_len_limit = 5 };
 
 inline int get_len_state( const int len )
@@ -185,9 +185,9 @@ const uint8_t magic_string[4] = { 0x4C, 0x5A, 0x49, 0x50 };	// "LZIP"
 
 struct File_header
   {
-  uint8_t data[6];			/* 0-3 magic bytes */
-					/*   4 version */
-					/*   5 coded_dict_size */
+  uint8_t data[6];			// 0-3 magic bytes
+					//   4 version
+					//   5 coded_dict_size
   enum { size = 6 };
 
   void set_magic() { std::memcpy( data, magic_string, 4 ); data[4] = 1; }
@@ -227,9 +227,9 @@ struct File_header
 
 struct File_trailer
   {
-  uint8_t data[20];	/*  0-3  CRC32 of the uncompressed data */
-			/*  4-11 size of the uncompressed data */
-			/* 12-19 member size including header and trailer */
+  uint8_t data[20];	//  0-3  CRC32 of the uncompressed data
+			//  4-11 size of the uncompressed data
+			// 12-19 member size including header and trailer
 
   static int size( const int version = 1 )
     { return ( ( version >= 1 ) ? 20 : 12 ); }
diff --git a/main.cc b/main.cc
index 27cc156..ac07852 100644
--- a/main.cc
+++ b/main.cc
@@ -83,8 +83,8 @@ struct { const char * from; const char * to; } const known_extensions[] = {
 
 struct Lzma_options
   {
-  int dictionary_size;		/* 4 KiB .. 512 MiB */
-  int match_len_limit;		/* 5 .. 273 */
+  int dictionary_size;		// 4 KiB .. 512 MiB
+  int match_len_limit;		// 5 .. 273
   };
 
 enum Mode { m_compress, m_decompress, m_test };
@@ -108,7 +108,7 @@ void show_help()
                "  -c, --stdout                   send output to standard output\n"
                "  -d, --decompress               decompress\n"
                "  -f, --force                    overwrite existing output files\n"
-               "  -F, --recompress               force recompression of compressed files\n"
+               "  -F, --recompress               force re-compression of compressed files\n"
                "  -k, --keep                     keep (don't delete) input files\n"
                "  -m, --match-length=<bytes>     set match length limit in bytes [36]\n"
                "  -o, --output=<file>            if reading stdin, place the output into <file>\n"
@@ -265,7 +265,7 @@ int open_instream( const char * const name, struct stat * const in_statsp,
     if( infd < 0 )
       {
       if( verbosity >= 0 )
-        std::fprintf( stderr, "%s: Can't open input file '%s': %s.\n",
+        std::fprintf( stderr, "%s: Can't open input file '%s': %s\n",
                       program_name, name, std::strerror( errno ) );
       }
     else
@@ -282,7 +282,7 @@ int open_instream( const char * const name, struct stat * const in_statsp,
           std::fprintf( stderr, "%s: Input file '%s' is not a regular file%s.\n",
                         program_name, name,
                         ( can_read && !no_ofile ) ?
-                        " and '--stdout' was not specified" : "" );
+                        ",\n      and '--stdout' was not specified" : "" );
         close( infd );
         infd = -1;
         }
@@ -314,7 +314,7 @@ void set_d_outname( const std::string & name, const int i )
     }
   output_filename = name; output_filename += ".out";
   if( verbosity >= 1 )
-    std::fprintf( stderr, "%s: Can't guess original name for '%s' -- using '%s'.\n",
+    std::fprintf( stderr, "%s: Can't guess original name for '%s' -- using '%s'\n",
                   program_name, name.c_str(), output_filename.c_str() );
   }
 
@@ -331,7 +331,7 @@ bool open_outstream( const bool force )
       std::fprintf( stderr, "%s: Output file '%s' already exists, skipping.\n",
                     program_name, output_filename.c_str() );
     else
-      std::fprintf( stderr, "%s: Can't create output file '%s': %s.\n",
+      std::fprintf( stderr, "%s: Can't create output file '%s': %s\n",
                     program_name, output_filename.c_str(), std::strerror( errno ) );
     }
   return ( outfd >= 0 );
@@ -371,14 +371,14 @@ void cleanup_and_fail( const int retval )
   }
 
 
-     /* Set permissions, owner and times. */
+     // Set permissions, owner and times.
 void close_and_set_permissions( const struct stat * const in_statsp )
   {
   bool warning = false;
   if( in_statsp )
     {
     const mode_t mode = in_statsp->st_mode;
-    /* fchown will in many cases return with EPERM, which can be safely ignored. */
+    // fchown will in many cases return with EPERM, which can be safely ignored.
     if( fchown( outfd, in_statsp->st_uid, in_statsp->st_gid ) == 0 )
       { if( fchmod( outfd, mode ) != 0 ) warning = true; }
     else
@@ -440,7 +440,7 @@ int compress( const unsigned long long member_size,
       }
 
     unsigned long long in_size = 0, out_size = 0, partial_volume_size = 0;
-    while( true )		/* encode one member per iteration */
+    while( true )		// encode one member per iteration
       {
       const unsigned long long size = ( volume_size > 0 ) ?
         std::min( member_size, volume_size - partial_volume_size ) : member_size;
@@ -472,7 +472,7 @@ int compress( const unsigned long long member_size,
     if( retval == 0 && verbosity >= 1 )
       {
       if( in_size == 0 || out_size == 0 )
-        std::fprintf( stderr, " no data compressed.\n" );
+        std::fputs( " no data compressed.\n", stderr );
       else
         std::fprintf( stderr, "%6.3f:1, %6.3f bits/byte, "
                               "%5.2f%% saved, %llu in, %llu out.\n",
@@ -582,23 +582,20 @@ int decompress( const int infd, const Pretty_print & pp, const bool testing )
         if( verbosity >= 0 && result <= 2 )
           {
           pp();
-          if( result == 2 )
-            std::fprintf( stderr, "File ends unexpectedly at pos %llu.\n",
-                          partial_file_pos );
-          else
-            std::fprintf( stderr, "Decoder error at pos %llu.\n",
-                          partial_file_pos );
+          std::fprintf( stderr, "%s at pos %llu\n", ( result == 2 ) ?
+                        "File ends unexpectedly" : "Decoder error",
+                        partial_file_pos );
           }
         retval = 2; break;
         }
       if( verbosity >= 2 )
-        { std::fprintf( stderr, testing ? "ok\n" : "done\n" ); pp.reset(); }
+        { std::fputs( testing ? "ok\n" : "done\n", stderr ); pp.reset(); }
       }
     }
   catch( std::bad_alloc ) { pp( "Not enough memory." ); retval = 1; }
   catch( Error e ) { pp(); show_error( e.msg, errno ); retval = 1; }
   if( verbosity == 1 && retval == 0 )
-    std::fprintf( stderr, testing ? "ok\n" : "done\n" );
+    std::fputs( testing ? "ok\n" : "done\n", stderr );
   return retval;
   }
 
@@ -631,8 +628,8 @@ void show_error( const char * const msg, const int errcode, const bool help )
       {
       std::fprintf( stderr, "%s: %s", program_name, msg );
       if( errcode > 0 )
-        std::fprintf( stderr, ": %s.", std::strerror( errcode ) );
-      std::fprintf( stderr, "\n" );
+        std::fprintf( stderr, ": %s", std::strerror( errcode ) );
+      std::fputc( '\n', stderr );
       }
     if( help )
       std::fprintf( stderr, "Try '%s --help' for more information.\n",
@@ -654,14 +651,14 @@ void show_progress( const unsigned long long partial_size,
                     const Pretty_print * const p,
                     const unsigned long long cfile_size )
   {
-  static unsigned long long csize = 0;		/* file_size / 100 */
+  static unsigned long long csize = 0;		// file_size / 100
   static unsigned long long psize = 0;
   static const Matchfinder_base * mb = 0;
   static const Pretty_print * pp = 0;
 
   if( verbosity >= 2 )
     {
-    if( m )					/* initialize static vars */
+    if( m )					// initialize static vars
       { csize = cfile_size; psize = partial_size; mb = m; pp = p; }
     if( mb && pp )
       {
@@ -681,16 +678,16 @@ int main( const int argc, const char * const argv[] )
      to the corresponding LZMA compression modes. */
   const Lzma_options option_mapping[] =
     {
-    { 1 << 16,  16 },		/* -0 entry values not used */
-    { 1 << 20,   5 },		/* -1 */
-    { 3 << 19,   6 },		/* -2 */
-    { 1 << 21,   8 },		/* -3 */
-    { 3 << 20,  12 },		/* -4 */
-    { 1 << 22,  20 },		/* -5 */
-    { 1 << 23,  36 },		/* -6 */
-    { 1 << 24,  68 },		/* -7 */
-    { 3 << 23, 132 },		/* -8 */
-    { 1 << 25, 273 } };		/* -9 */
+    { 1 << 16,  16 },		// -0 entry values not used
+    { 1 << 20,   5 },		// -1
+    { 3 << 19,   6 },		// -2
+    { 1 << 21,   8 },		// -3
+    { 3 << 20,  12 },		// -4
+    { 1 << 22,  20 },		// -5
+    { 1 << 23,  36 },		// -6
+    { 1 << 24,  68 },		// -7
+    { 3 << 23, 132 },		// -8
+    { 1 << 25, 273 } };		// -9
   Lzma_options encoder_options = option_mapping[6];	// default = "-6"
   const unsigned long long max_member_size = 0x0008000000000000ULL;
   const unsigned long long max_volume_size = 0x4000000000000000ULL;
@@ -746,7 +743,7 @@ int main( const int argc, const char * const argv[] )
   for( ; argind < parser.arguments(); ++argind )
     {
     const int code = parser.code( argind );
-    if( !code ) break;					/* no more options */
+    if( !code ) break;					// no more options
     const std::string & arg = parser.argument( argind );
     switch( code )
       {
@@ -777,7 +774,7 @@ int main( const int argc, const char * const argv[] )
       case 'V': show_version(); return 0;
       default : internal_error( "uncaught option." );
       }
-    } /* end process options */
+    } // end process options
 
 #if defined(__MSVCRT__) || defined(__OS2__)
   setmode( STDIN_FILENO, O_BINARY );
diff --git a/testsuite/check.sh b/testsuite/check.sh
index 1f9b9e4..ba77d0a 100755
--- a/testsuite/check.sh
+++ b/testsuite/check.sh
@@ -1,6 +1,6 @@
 #! /bin/sh
 # check script for Lzip - LZMA lossless data compressor
-# Copyright (C) 2008-2014 Antonio Diaz Diaz.
+# Copyright (C) 2008-2015 Antonio Diaz Diaz.
 #
 # This script is free software: you have unlimited permission
 # to copy, distribute and modify it.
@@ -79,7 +79,6 @@ printf .
 cat in in > in2 || framework_failure
 "${LZIP}" -o copy2 < in2 || fail=1
 "${LZIP}" -t copy2.lz || fail=1
-printf .
 "${LZIP}" -cd copy2.lz > copy2 || fail=1
 cmp in2 copy2 || fail=1
 printf .
author	Daniel Baumann <mail@daniel-baumann.ch>	2015-11-07 10:08:36 +0000
committer	Daniel Baumann <mail@daniel-baumann.ch>	2015-11-07 10:08:36 +0000
commit	62327bcaed81d8c02f11aec2c49f7d09c5edddb7 (patch)
tree	7eb93beb78b7ee8ab3288c5b31a3f7c99c8f2572
parent	Adding upstream version 1.17~rc2. (diff)
download	lzip-62327bcaed81d8c02f11aec2c49f7d09c5edddb7.tar.xz lzip-62327bcaed81d8c02f11aec2c49f7d09c5edddb7.zip