From def42230e27ddcb133e6778eefa2300cdbf3e95b Mon Sep 17 00:00:00 2001
From: Daniel Baumann <mail@daniel-baumann.ch>
Date: Sat, 7 Nov 2015 07:46:02 +0100
Subject: Adding upstream version 0.3.

Signed-off-by: Daniel Baumann <mail@daniel-baumann.ch>
---
 ChangeLog          |   5 +
 INSTALL            |   6 +-
 Makefile.in        |  10 +-
 NEWS               |   4 +-
 README             |   2 +-
 configure          |  46 ++++--
 decoder.cc         | 363 ------------------------------------------
 lzd.cc             | 459 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 main.cc            | 115 --------------
 testsuite/check.sh |   9 ++
 10 files changed, 513 insertions(+), 506 deletions(-)
 delete mode 100644 decoder.cc
 create mode 100644 lzd.cc
 delete mode 100644 main.cc

diff --git a/ChangeLog b/ChangeLog
index c3be740..0f516f1 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2013-07-24  Antonio Diaz Diaz  <antonio@gnu.org>
+
+	* Version 0.3 released.
+	* decoder.cc and main.cc have been merged into lzd.cc.
+
 2013-05-06  Antonio Diaz Diaz  <antonio@gnu.org>
 
 	* Version 0.2 released.
diff --git a/INSTALL b/INSTALL
index 09ac834..61635f7 100644
--- a/INSTALL
+++ b/INSTALL
@@ -1,7 +1,7 @@
 Requirements
 ------------
 You will need a C++ compiler.
-I use gcc 4.8.0 and 3.3.6, but the code should compile with any
+I use gcc 4.8.1 and 3.3.6, but the code should compile with any
 standards compliant compiler.
 Gcc is available at http://gcc.gnu.org.
 
@@ -10,9 +10,9 @@ Procedure
 ---------
 1. Unpack the archive if you have not done so already:
 
-	lzip -cd lzd[version].tar.lz | tar -xf -
+	tar -xf lzd[version].tar.lz
 or
-	gzip -cd lzd[version].tar.gz | tar -xf -
+	lzip -cd lzd[version].tar.lz | tar -xf -
 
 This creates the directory ./lzd[version] containing the source from
 the main archive.
diff --git a/Makefile.in b/Makefile.in
index d313f51..9a4b5ec 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -6,7 +6,7 @@ INSTALL_DATA = $(INSTALL) -p -m 644
 INSTALL_DIR = $(INSTALL) -d -m 755
 SHELL = /bin/sh
 
-objs = main.o
+objs = lzd.o
 
 
 .PHONY : all install install-bin install-info install-man install-strip \
@@ -21,14 +21,10 @@ $(progname) : $(objs)
 $(progname)_profiled : $(objs)
 	$(CXX) $(LDFLAGS) -pg -o $@ $(objs)
 
-main.o : main.cc
-	$(CXX) $(CPPFLAGS) $(CXXFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $<
-
 %.o : %.cc
-	$(CXX) $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $<
+	$(CXX) $(CPPFLAGS) $(CXXFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $<
 
-$(objs)        : Makefile
-main.o         : decoder.cc
+$(objs) : Makefile
 
 
 doc :
diff --git a/NEWS b/NEWS
index 4caf5f7..2c8be2e 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,3 @@
-Changes in version 0.2:
+Changes in version 0.3:
 
-Added a missing "#include" for OS/2.
+All the code is now contained in a single file (lzd.cc).
diff --git a/README b/README
index ed832ca..fcfe639 100644
--- a/README
+++ b/README
@@ -1,6 +1,6 @@
 Description
 
-Lzd is a very simplified decompressor for lzip files with an educational
+Lzd is a simplified decompressor for lzip files with an educational
 purpose. Studying its source is a good first step to understand how lzip
 works. It is not safe to use lzd for any real work.
 
diff --git a/configure b/configure
index 7b3f916..98acee7 100755
--- a/configure
+++ b/configure
@@ -5,12 +5,10 @@
 # This configure script is free software: you have unlimited permission
 # to copy, distribute and modify it.
 
-args=
-no_create=
 pkgname=lzd
-pkgversion=0.2
+pkgversion=0.3
 progname=lzd
-srctrigger=decoder.cc
+srctrigger=lzd.cc
 
 # clear some things potentially inherited from environment.
 LC_ALL=C
@@ -28,18 +26,19 @@ CXXFLAGS='-Wall -W -O2'
 LDFLAGS=
 
 # checking whether we are using GNU C++.
-if [ ! -x /bin/g++ ] &&
-   [ ! -x /usr/bin/g++ ] &&
-   [ ! -x /usr/local/bin/g++ ] ; then
+${CXX} --version > /dev/null 2>&1
+if [ $? != 0 ] ; then
 	CXX=c++
 	CXXFLAGS='-W -O2'
 fi
 
 # Loop over all args
-while [ -n "$1" ] ; do
+args=
+no_create=
+while [ $# != 0 ] ; do
 
 	# Get the first arg, and shuffle
-	option=$1
+	option=$1 ; arg2=no
 	shift
 
 	# Add the argument quoted to args
@@ -74,6 +73,14 @@ while [ -n "$1" ] ; do
 	--version | -V)
 		echo "Configure script for ${pkgname} version ${pkgversion}"
 		exit 0 ;;
+	--srcdir)            srcdir=$1 ; arg2=yes ;;
+	--prefix)            prefix=$1 ; arg2=yes ;;
+	--exec-prefix)  exec_prefix=$1 ; arg2=yes ;;
+	--bindir)            bindir=$1 ; arg2=yes ;;
+	--datarootdir)  datarootdir=$1 ; arg2=yes ;;
+	--infodir)          infodir=$1 ; arg2=yes ;;
+	--mandir)            mandir=$1 ; arg2=yes ;;
+
 	--srcdir=*)            srcdir=${optarg} ;;
 	--prefix=*)            prefix=${optarg} ;;
 	--exec-prefix=*)  exec_prefix=${optarg} ;;
@@ -88,11 +95,22 @@ while [ -n "$1" ] ; do
 	CXXFLAGS=*) CXXFLAGS=${optarg} ;;
 	LDFLAGS=*)   LDFLAGS=${optarg} ;;
 
-	--* | *=* | *-*-*) ;;
+	--*)
+		echo "configure: WARNING: unrecognized option: '${option}'" 1>&2 ;;
+	*=* | *-*-*) ;;
 	*)
-		echo "configure: Unrecognized option: \"${option}\"; use --help for usage." 1>&2
+		echo "configure: unrecognized option: '${option}'" 1>&2
+		echo "Try 'configure --help' for more information." 1>&2
 		exit 1 ;;
 	esac
+
+	# Check if the option took a separate argument
+	if [ "${arg2}" = yes ] ; then
+		if [ $# != 0 ] ; then args="${args} \"$1\"" ; shift
+		else echo "configure: Missing argument to '${option}'" 1>&2
+			exit 1
+		fi
+	fi
 done
 
 # Find the source files, if location was not specified.
@@ -107,10 +125,8 @@ if [ -z "${srcdir}" ] ; then
 fi
 
 if [ ! -r "${srcdir}/${srctrigger}" ] ; then
-	exec 1>&2
-	echo
-	echo "configure: Can't find sources in ${srcdir} ${srcdirtext}"
-	echo "configure: (At least ${srctrigger} is missing)."
+	echo "configure: Can't find sources in ${srcdir} ${srcdirtext}" 1>&2
+	echo "configure: (At least ${srctrigger} is missing)." 1>&2
 	exit 1
 fi
 
diff --git a/decoder.cc b/decoder.cc
deleted file mode 100644
index fbfcdb3..0000000
--- a/decoder.cc
+++ /dev/null
@@ -1,363 +0,0 @@
-/*  Lzd - Educational decompressor for lzip files
-    Copyright (C) 2013 Antonio Diaz Diaz.
-
-    This program is free software: you have unlimited permission
-    to copy, distribute and modify it.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-*/
-
-class State
-  {
-  int st;
-
-public:
-  enum { states = 12 };
-  State() : st( 0 ) {}
-  int operator()() const { return st; }
-  bool is_char() const { return st < 7; }
-
-  void set_char()
-    {
-    static const int next[states] = { 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5 };
-    st = next[st];
-    }
-
-  void set_match()     { st = ( ( st < 7 ) ? 7 : 10 ); }
-  void set_rep()       { st = ( ( st < 7 ) ? 8 : 11 ); }
-  void set_short_rep() { st = ( ( st < 7 ) ? 9 : 11 ); }
-  };
-
-
-enum {
-  literal_context_bits = 3,
-  pos_state_bits = 2,
-  pos_states = 1 << pos_state_bits,
-  pos_state_mask = pos_states - 1,
-
-  max_dis_states = 4,
-  dis_slot_bits = 6,
-  start_dis_model = 4,
-  end_dis_model = 14,
-  modeled_distances = 1 << (end_dis_model / 2),		// 128
-  dis_align_bits = 4,
-  dis_align_size = 1 << dis_align_bits,
-
-  len_low_bits = 3,
-  len_mid_bits = 3,
-  len_high_bits = 8,
-  len_low_symbols = 1 << len_low_bits,
-  len_mid_symbols = 1 << len_mid_bits,
-  len_high_symbols = 1 << len_high_bits,
-  max_len_symbols = len_low_symbols + len_mid_symbols + len_high_symbols,
-  min_match_len = 2,					// must be 2
-
-  bit_model_move_bits = 5,
-  bit_model_total_bits = 11,
-  bit_model_total = 1 << bit_model_total_bits };
-
-
-struct Bit_model
-  {
-  int probability;
-  Bit_model() : probability( bit_model_total / 2 ) {}
-  };
-
-struct Len_model
-  {
-  Bit_model choice1;
-  Bit_model choice2;
-  Bit_model bm_low[pos_states][len_low_symbols];
-  Bit_model bm_mid[pos_states][len_mid_symbols];
-  Bit_model bm_high[len_high_symbols];
-  };
-
-
-class Range_decoder
-  {
-  uint32_t code;
-  uint32_t range;
-
-public:
-  Range_decoder() : code( 0 ), range( 0xFFFFFFFFU )
-    {
-    for( int i = 0; i < 5; ++i ) code = (code << 8) | std::getc( stdin );
-    }
-
-  int decode( const int num_bits )
-    {
-    int symbol = 0;
-    for( int i = 0; i < num_bits; ++i )
-      {
-      range >>= 1;
-      symbol <<= 1;
-      if( code >= range ) { code -= range; symbol |= 1; }
-      if( range <= 0x00FFFFFFU )			// normalize
-        { range <<= 8; code = (code << 8) | std::getc( stdin ); }
-      }
-    return symbol;
-    }
-
-  int decode_bit( Bit_model & bm )
-    {
-    int symbol;
-    const uint32_t bound = ( range >> bit_model_total_bits ) * bm.probability;
-    if( code < bound )
-      {
-      range = bound;
-      bm.probability += (bit_model_total - bm.probability) >> bit_model_move_bits;
-      symbol = 0;
-      }
-    else
-      {
-      range -= bound;
-      code -= bound;
-      bm.probability -= bm.probability >> bit_model_move_bits;
-      symbol = 1;
-      }
-    if( range <= 0x00FFFFFFU )				// normalize
-      { range <<= 8; code = (code << 8) | std::getc( stdin ); }
-    return symbol;
-    }
-
-  int decode_tree( Bit_model bm[], const int num_bits )
-    {
-    int symbol = 1;
-    for( int i = 0; i < num_bits; ++i )
-      symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
-    return symbol - (1 << num_bits);
-    }
-
-  int decode_tree_reversed( Bit_model bm[], const int num_bits )
-    {
-    int symbol = decode_tree( bm, num_bits );
-    int reversed_symbol = 0;
-    for( int i = 0; i < num_bits; ++i )
-      {
-      reversed_symbol = ( reversed_symbol << 1 ) | ( symbol & 1 );
-      symbol >>= 1;
-      }
-    return reversed_symbol;
-    }
-
-  int decode_matched( Bit_model bm[], const int match_byte )
-    {
-    Bit_model * const bm1 = bm + 0x100;
-    int symbol = 1;
-    for( int i = 7; i >= 0; --i )
-      {
-      const int match_bit = ( match_byte >> i ) & 1;
-      const int bit = decode_bit( bm1[(match_bit<<8)+symbol] );
-      symbol = ( symbol << 1 ) | bit;
-      if( match_bit != bit )
-        {
-        while( symbol < 0x100 )
-          symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
-        break;
-        }
-      }
-    return symbol - 0x100;
-    }
-
-  int decode_len( Len_model & lm, const int pos_state )
-    {
-    if( decode_bit( lm.choice1 ) == 0 )
-      return min_match_len +
-             decode_tree( lm.bm_low[pos_state], len_low_bits );
-    if( decode_bit( lm.choice2 ) == 0 )
-      return min_match_len + len_low_symbols +
-             decode_tree( lm.bm_mid[pos_state], len_mid_bits );
-    return min_match_len + len_low_symbols + len_mid_symbols +
-           decode_tree( lm.bm_high, len_high_bits );
-    }
-  };
-
-
-class LZ_decoder
-  {
-  unsigned long long partial_data_pos;
-  Range_decoder rdec;
-  const unsigned dictionary_size;
-  uint8_t * const buffer;	// output buffer
-  unsigned pos;			// current pos in buffer
-  unsigned stream_pos;		// first byte not yet written to stdout
-  uint32_t crc_;
-
-  void flush_data();
-
-  uint8_t get_byte( const unsigned distance ) const
-    {
-    int i = pos - distance - 1;
-    if( i < 0 ) i += dictionary_size;
-    return buffer[i];
-    }
-
-  void put_byte( const uint8_t b )
-    {
-    buffer[pos] = b;
-    if( ++pos >= dictionary_size ) flush_data();
-    }
-
-public:
-  LZ_decoder( const unsigned dict_size )
-    :
-    partial_data_pos( 0 ),
-    dictionary_size( dict_size ),
-    buffer( new uint8_t[dictionary_size] ),
-    pos( 0 ),
-    stream_pos( 0 ),
-    crc_( 0xFFFFFFFFU )
-    { buffer[dictionary_size-1] = 0; }		// prev_byte of first_byte
-
-  ~LZ_decoder() { delete[] buffer; }
-
-  unsigned crc() const { return crc_ ^ 0xFFFFFFFFU; }
-  unsigned long long data_position() const { return partial_data_pos + pos; }
-
-  bool decode_member();
-  };
-
-
-class CRC32
-  {
-  uint32_t data[256];		// Table of CRCs of all 8-bit messages.
-
-public:
-  CRC32()
-    {
-    for( unsigned n = 0; n < 256; ++n )
-      {
-      unsigned c = n;
-      for( int k = 0; k < 8; ++k )
-        { if( c & 1 ) c = 0xEDB88320U ^ ( c >> 1 ); else c >>= 1; }
-      data[n] = c;
-      }
-    }
-
-  void update( uint32_t & crc, const uint8_t * buffer, const int size ) const
-    {
-    for( int i = 0; i < size; ++i )
-      crc = data[(crc^buffer[i])&0xFF] ^ ( crc >> 8 );
-    }
-  };
-
-const CRC32 crc32;
-
-
-void LZ_decoder::flush_data()
-  {
-  if( pos > stream_pos )
-    {
-    const unsigned size = pos - stream_pos;
-    crc32.update( crc_, buffer + stream_pos, size );
-    errno = 0;
-    if( std::fwrite( buffer + stream_pos, 1, size, stdout ) != size )
-      { std::fprintf( stderr, "Write error: %s\n", std::strerror( errno ) );
-        std::exit( 1 ); }
-    if( pos >= dictionary_size ) { partial_data_pos += pos; pos = 0; }
-    stream_pos = pos;
-    }
-  }
-
-
-bool LZ_decoder::decode_member()		// Returns false if error
-  {
-  Bit_model bm_literal[1<<literal_context_bits][0x300];
-  Bit_model bm_match[State::states][pos_states];
-  Bit_model bm_rep[State::states];
-  Bit_model bm_rep0[State::states];
-  Bit_model bm_rep1[State::states];
-  Bit_model bm_rep2[State::states];
-  Bit_model bm_len[State::states][pos_states];
-  Bit_model bm_dis_slot[max_dis_states][1<<dis_slot_bits];
-  Bit_model bm_dis[modeled_distances-end_dis_model];
-  Bit_model bm_align[dis_align_size];
-  Len_model match_len_model;
-  Len_model rep_len_model;
-  unsigned rep0 = 0;			// rep[0-3] latest four distances
-  unsigned rep1 = 0;			// used for efficient coding of
-  unsigned rep2 = 0;			// repeated distances
-  unsigned rep3 = 0;
-  State state;
-
-  while( !std::feof( stdin ) && !std::ferror( stdin ) )
-    {
-    const int pos_state = data_position() & pos_state_mask;
-    if( rdec.decode_bit( bm_match[state()][pos_state] ) == 0 )	// 1st bit
-      {
-      const uint8_t prev_byte = get_byte( 0 );
-      const int literal_state = prev_byte >> ( 8 - literal_context_bits );
-      Bit_model * const bm = bm_literal[literal_state];
-      if( state.is_char() )
-        put_byte( rdec.decode_tree( bm, 8 ) );
-      else
-        put_byte( rdec.decode_matched( bm, get_byte( rep0 ) ) );
-      state.set_char();
-      }
-    else
-      {
-      int len;
-      if( rdec.decode_bit( bm_rep[state()] ) == 1 )		// 2nd bit
-        {
-        if( rdec.decode_bit( bm_rep0[state()] ) == 0 )		// 3rd bit
-          {
-          if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit
-            { state.set_short_rep(); put_byte( get_byte( rep0 ) ); continue; }
-          }
-        else
-          {
-          unsigned distance;
-          if( rdec.decode_bit( bm_rep1[state()] ) == 0 )	// 4th bit
-            distance = rep1;
-          else
-            {
-            if( rdec.decode_bit( bm_rep2[state()] ) == 0 )	// 5th bit
-              distance = rep2;
-            else
-              { distance = rep3; rep3 = rep2; }
-            rep2 = rep1;
-            }
-          rep1 = rep0;
-          rep0 = distance;
-          }
-        len = rdec.decode_len( rep_len_model, pos_state );
-        state.set_rep();
-        }
-      else
-        {
-        rep3 = rep2; rep2 = rep1; rep1 = rep0;
-        len = rdec.decode_len( match_len_model, pos_state );
-        const int dis_state = std::min( len - min_match_len, max_dis_states - 1 );
-        const int dis_slot =
-          rdec.decode_tree( bm_dis_slot[dis_state], dis_slot_bits );
-        if( dis_slot < start_dis_model ) rep0 = dis_slot;
-        else
-          {
-          const int direct_bits = ( dis_slot >> 1 ) - 1;
-          rep0 = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
-          if( dis_slot < end_dis_model )
-            rep0 += rdec.decode_tree_reversed( bm_dis + rep0 - dis_slot - 1,
-                                               direct_bits );
-          else
-            {
-            rep0 += rdec.decode( direct_bits - dis_align_bits ) << dis_align_bits;
-            rep0 += rdec.decode_tree_reversed( bm_align, dis_align_bits );
-            if( rep0 == 0xFFFFFFFFU )		// Marker found
-              {
-              flush_data();
-              return ( len == min_match_len );	// End Of Stream marker
-              }
-            }
-          }
-        state.set_match();
-        if( rep0 >= dictionary_size || ( rep0 >= pos && !partial_data_pos ) )
-          return false;
-        }
-      for( int i = 0; i < len; ++i )
-        put_byte( get_byte( rep0 ) );
-      }
-    }
-  return false;
-  }
diff --git a/lzd.cc b/lzd.cc
new file mode 100644
index 0000000..0ac7b64
--- /dev/null
+++ b/lzd.cc
@@ -0,0 +1,459 @@
+/*  Lzd - Educational decompressor for lzip files
+    Copyright (C) 2013 Antonio Diaz Diaz.
+
+    This program is free software: you have unlimited permission
+    to copy, distribute and modify it.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+*/
+/*
+    Exit status: 0 for a normal exit, 1 for environmental problems
+    (file not found, invalid flags, I/O errors, etc), 2 to indicate a
+    corrupt or invalid input file.
+*/
+
+#include <algorithm>
+#include <cerrno>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <stdint.h>
+#include <unistd.h>
+#if defined(__MSVCRT__) || defined(__OS2__)
+#include <fcntl.h>
+#include <io.h>
+#endif
+
+
+class State
+  {
+  int st;
+
+public:
+  enum { states = 12 };
+  State() : st( 0 ) {}
+  int operator()() const { return st; }
+  bool is_char() const { return st < 7; }
+
+  void set_char()
+    {
+    static const int next[states] = { 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5 };
+    st = next[st];
+    }
+  void set_match()     { st = ( st < 7 ) ? 7 : 10; }
+  void set_rep()       { st = ( st < 7 ) ? 8 : 11; }
+  void set_short_rep() { st = ( st < 7 ) ? 9 : 11; }
+  };
+
+
+enum {
+  min_dictionary_size = 1 << 12,
+  max_dictionary_size = 1 << 29,
+  literal_context_bits = 3,
+  pos_state_bits = 2,
+  pos_states = 1 << pos_state_bits,
+  pos_state_mask = pos_states - 1,
+
+  dis_slot_bits = 6,
+  start_dis_model = 4,
+  end_dis_model = 14,
+  modeled_distances = 1 << (end_dis_model / 2),		// 128
+  dis_align_bits = 4,
+  dis_align_size = 1 << dis_align_bits,
+
+  len_low_bits = 3,
+  len_mid_bits = 3,
+  len_high_bits = 8,
+  len_low_symbols = 1 << len_low_bits,
+  len_mid_symbols = 1 << len_mid_bits,
+  len_high_symbols = 1 << len_high_bits,
+  max_len_symbols = len_low_symbols + len_mid_symbols + len_high_symbols,
+
+  min_match_len = 2,					// must be 2
+  max_dis_states = 4,
+
+  bit_model_move_bits = 5,
+  bit_model_total_bits = 11,
+  bit_model_total = 1 << bit_model_total_bits };
+
+struct Bit_model
+  {
+  int probability;
+  Bit_model() : probability( bit_model_total / 2 ) {}
+  };
+
+struct Len_model
+  {
+  Bit_model choice1;
+  Bit_model choice2;
+  Bit_model bm_low[pos_states][len_low_symbols];
+  Bit_model bm_mid[pos_states][len_mid_symbols];
+  Bit_model bm_high[len_high_symbols];
+  };
+
+
+class CRC32
+  {
+  uint32_t data[256];		// Table of CRCs of all 8-bit messages.
+
+public:
+  CRC32()
+    {
+    for( unsigned n = 0; n < 256; ++n )
+      {
+      unsigned c = n;
+      for( int k = 0; k < 8; ++k )
+        { if( c & 1 ) c = 0xEDB88320U ^ ( c >> 1 ); else c >>= 1; }
+      data[n] = c;
+      }
+    }
+
+  void update( uint32_t & crc, const uint8_t * const buffer, const int size ) const
+    {
+    for( int i = 0; i < size; ++i )
+      crc = data[(crc^buffer[i])&0xFF] ^ ( crc >> 8 );
+    }
+  };
+
+const CRC32 crc32;
+
+
+typedef uint8_t File_header[6];	// 0-3 magic, 4 version, 5 coded_dict_size
+
+typedef uint8_t File_trailer[20];
+			//  0-3  CRC32 of the uncompressed data
+			//  4-11 size of the uncompressed data
+			// 12-19 member size including header and trailer
+
+class Range_decoder
+  {
+  uint32_t code;
+  uint32_t range;
+
+public:
+  Range_decoder() : code( 0 ), range( 0xFFFFFFFFU )
+    {
+    for( int i = 0; i < 5; ++i ) code = (code << 8) | get_byte();
+    }
+
+  uint8_t get_byte() { return std::getc( stdin ); }
+
+  int decode( const int num_bits )
+    {
+    int symbol = 0;
+    for( int i = 0; i < num_bits; ++i )
+      {
+      range >>= 1;
+      symbol <<= 1;
+      if( code >= range ) { code -= range; symbol |= 1; }
+      if( range <= 0x00FFFFFFU )			// normalize
+        { range <<= 8; code = (code << 8) | get_byte(); }
+      }
+    return symbol;
+    }
+
+  int decode_bit( Bit_model & bm )
+    {
+    int symbol;
+    const uint32_t bound = ( range >> bit_model_total_bits ) * bm.probability;
+    if( code < bound )
+      {
+      range = bound;
+      bm.probability += (bit_model_total - bm.probability) >> bit_model_move_bits;
+      symbol = 0;
+      }
+    else
+      {
+      range -= bound;
+      code -= bound;
+      bm.probability -= bm.probability >> bit_model_move_bits;
+      symbol = 1;
+      }
+    if( range <= 0x00FFFFFFU )				// normalize
+      { range <<= 8; code = (code << 8) | get_byte(); }
+    return symbol;
+    }
+
+  int decode_tree( Bit_model bm[], const int num_bits )
+    {
+    int symbol = 1;
+    for( int i = 0; i < num_bits; ++i )
+      symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
+    return symbol - (1 << num_bits);
+    }
+
+  int decode_tree_reversed( Bit_model bm[], const int num_bits )
+    {
+    int symbol = decode_tree( bm, num_bits );
+    int reversed_symbol = 0;
+    for( int i = 0; i < num_bits; ++i )
+      {
+      reversed_symbol = ( reversed_symbol << 1 ) | ( symbol & 1 );
+      symbol >>= 1;
+      }
+    return reversed_symbol;
+    }
+
+  int decode_matched( Bit_model bm[], const int match_byte )
+    {
+    Bit_model * const bm1 = bm + 0x100;
+    int symbol = 1;
+    for( int i = 7; i >= 0; --i )
+      {
+      const int match_bit = ( match_byte >> i ) & 1;
+      const int bit = decode_bit( bm1[(match_bit<<8)+symbol] );
+      symbol = ( symbol << 1 ) | bit;
+      if( match_bit != bit )
+        {
+        while( symbol < 0x100 )
+          symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
+        break;
+        }
+      }
+    return symbol - 0x100;
+    }
+
+  int decode_len( Len_model & lm, const int pos_state )
+    {
+    if( decode_bit( lm.choice1 ) == 0 )
+      return decode_tree( lm.bm_low[pos_state], len_low_bits );
+    if( decode_bit( lm.choice2 ) == 0 )
+      return len_low_symbols +
+             decode_tree( lm.bm_mid[pos_state], len_mid_bits );
+    return len_low_symbols + len_mid_symbols +
+           decode_tree( lm.bm_high, len_high_bits );
+    }
+  };
+
+
+class LZ_decoder
+  {
+  unsigned long long partial_data_pos;
+  Range_decoder rdec;
+  const unsigned dictionary_size;
+  uint8_t * const buffer;	// output buffer
+  unsigned pos;			// current pos in buffer
+  unsigned stream_pos;		// first byte not yet written to stdout
+  uint32_t crc_;
+
+  void flush_data();
+
+  uint8_t get_byte( const unsigned distance ) const
+    {
+    unsigned i = pos - distance - 1;
+    if( pos <= distance ) i += dictionary_size;
+    return buffer[i];
+    }
+
+  void put_byte( const uint8_t b )
+    {
+    buffer[pos] = b;
+    if( ++pos >= dictionary_size ) flush_data();
+    }
+
+public:
+  LZ_decoder( const unsigned dict_size )
+    :
+    partial_data_pos( 0 ),
+    dictionary_size( dict_size ),
+    buffer( new uint8_t[dictionary_size] ),
+    pos( 0 ),
+    stream_pos( 0 ),
+    crc_( 0xFFFFFFFFU )
+    { buffer[dictionary_size-1] = 0; }		// prev_byte of first_byte
+
+  ~LZ_decoder() { delete[] buffer; }
+
+  unsigned crc() const { return crc_ ^ 0xFFFFFFFFU; }
+  unsigned long long data_position() const { return partial_data_pos + pos; }
+
+  bool decode_member();
+  };
+
+
+void LZ_decoder::flush_data()
+  {
+  if( pos > stream_pos )
+    {
+    const unsigned size = pos - stream_pos;
+    crc32.update( crc_, buffer + stream_pos, size );
+    errno = 0;
+    if( std::fwrite( buffer + stream_pos, 1, size, stdout ) != size )
+      { std::fprintf( stderr, "Write error: %s\n", std::strerror( errno ) );
+        std::exit( 1 ); }
+    if( pos >= dictionary_size ) { partial_data_pos += pos; pos = 0; }
+    stream_pos = pos;
+    }
+  }
+
+
+bool LZ_decoder::decode_member()		// Returns false if error
+  {
+  Bit_model bm_literal[1<<literal_context_bits][0x300];
+  Bit_model bm_match[State::states][pos_states];
+  Bit_model bm_rep[State::states];
+  Bit_model bm_rep0[State::states];
+  Bit_model bm_rep1[State::states];
+  Bit_model bm_rep2[State::states];
+  Bit_model bm_len[State::states][pos_states];
+  Bit_model bm_dis_slot[max_dis_states][1<<dis_slot_bits];
+  Bit_model bm_dis[modeled_distances-end_dis_model];
+  Bit_model bm_align[dis_align_size];
+  Len_model match_len_model;
+  Len_model rep_len_model;
+  unsigned rep0 = 0;			// rep[0-3] latest four distances
+  unsigned rep1 = 0;			// used for efficient coding of
+  unsigned rep2 = 0;			// repeated distances
+  unsigned rep3 = 0;
+  State state;
+
+  while( !std::feof( stdin ) && !std::ferror( stdin ) )
+    {
+    const int pos_state = data_position() & pos_state_mask;
+    if( rdec.decode_bit( bm_match[state()][pos_state] ) == 0 )	// 1st bit
+      {
+      const uint8_t prev_byte = get_byte( 0 );
+      const int literal_state = prev_byte >> ( 8 - literal_context_bits );
+      Bit_model * const bm = bm_literal[literal_state];
+      if( state.is_char() )
+        put_byte( rdec.decode_tree( bm, 8 ) );
+      else
+        put_byte( rdec.decode_matched( bm, get_byte( rep0 ) ) );
+      state.set_char();
+      }
+    else
+      {
+      int len;
+      if( rdec.decode_bit( bm_rep[state()] ) == 1 )		// 2nd bit
+        {
+        if( rdec.decode_bit( bm_rep0[state()] ) == 0 )		// 3rd bit
+          {
+          if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit
+            { state.set_short_rep(); put_byte( get_byte( rep0 ) ); continue; }
+          }
+        else
+          {
+          unsigned distance;
+          if( rdec.decode_bit( bm_rep1[state()] ) == 0 )	// 4th bit
+            distance = rep1;
+          else
+            {
+            if( rdec.decode_bit( bm_rep2[state()] ) == 0 )	// 5th bit
+              distance = rep2;
+            else
+              { distance = rep3; rep3 = rep2; }
+            rep2 = rep1;
+            }
+          rep1 = rep0;
+          rep0 = distance;
+          }
+        len = min_match_len + rdec.decode_len( rep_len_model, pos_state );
+        state.set_rep();
+        }
+      else
+        {
+        rep3 = rep2; rep2 = rep1; rep1 = rep0;
+        len = min_match_len + rdec.decode_len( match_len_model, pos_state );
+        const int dis_state = std::min( len - min_match_len, max_dis_states - 1 );
+        const int dis_slot =
+          rdec.decode_tree( bm_dis_slot[dis_state], dis_slot_bits );
+        if( dis_slot < start_dis_model ) rep0 = dis_slot;
+        else
+          {
+          const int direct_bits = ( dis_slot >> 1 ) - 1;
+          rep0 = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
+          if( dis_slot < end_dis_model )
+            rep0 += rdec.decode_tree_reversed( bm_dis + rep0 - dis_slot - 1,
+                                               direct_bits );
+          else
+            {
+            rep0 += rdec.decode( direct_bits - dis_align_bits ) << dis_align_bits;
+            rep0 += rdec.decode_tree_reversed( bm_align, dis_align_bits );
+            if( rep0 == 0xFFFFFFFFU )		// Marker found
+              {
+              flush_data();
+              return ( len == min_match_len );	// End Of Stream marker
+              }
+            }
+          }
+        state.set_match();
+        if( rep0 >= dictionary_size || ( rep0 >= pos && !partial_data_pos ) )
+          return false;
+        }
+      for( int i = 0; i < len; ++i )
+        put_byte( get_byte( rep0 ) );
+      }
+    }
+  return false;
+  }
+
+
+int main( const int argc, const char * const argv[] )
+  {
+  if( argc > 1 )
+    {
+    std::printf( "Lzd %s - Educational decompressor for lzip files.\n",
+                 PROGVERSION );
+    std::printf( "Study the source to learn how a lzip decompressor works.\n"
+                 "See the lzip manual for an explanation of the code.\n"
+                 "It is not safe to use lzd for any real work.\n"
+                 "\nUsage: %s < file.lz > file\n", argv[0] );
+    std::printf( "Lzd decompresses from standard input to standard output.\n"
+                 "\nCopyright (C) 2013 Antonio Diaz Diaz.\n"
+                 "This is free software: you are free to change and redistribute it.\n"
+                 "There is NO WARRANTY, to the extent permitted by law.\n"
+                 "Report bugs to lzip-bug@nongnu.org\n"
+                 "Lzip home page: http://www.nongnu.org/lzip/lzip.html\n" );
+    return 0;
+    }
+
+#if defined(__MSVCRT__) || defined(__OS2__)
+  setmode( STDIN_FILENO, O_BINARY );
+  setmode( STDOUT_FILENO, O_BINARY );
+#endif
+
+  for( bool first_member = true; ; first_member = false )
+    {
+    File_header header;
+    for( int i = 0; i < 6; ++i )
+      header[i] = std::getc( stdin );
+    if( std::feof( stdin ) || std::memcmp( header, "LZIP", 4 ) != 0 )
+      {
+      if( first_member )
+        { std::fprintf( stderr, "Bad magic number (file not in lzip format)\n" );
+          return 2; }
+      break;
+      }
+    if( header[4] != 1 )
+      {
+      std::fprintf( stderr, "Version %d member format not supported.\n",
+                    header[4] );
+      return 2;
+      }
+    unsigned dict_size = 1 << ( header[5] & 0x1F );
+    dict_size -= ( dict_size / 16 ) * ( ( header[5] >> 5 ) & 7 );
+    if( dict_size < min_dictionary_size || dict_size > max_dictionary_size )
+      { std::fprintf( stderr, "Invalid dictionary size in member header\n" );
+        return 2; }
+
+    LZ_decoder decoder( dict_size );
+    if( !decoder.decode_member() )
+      { std::fprintf( stderr, "Data error\n" ); return 2; }
+
+    File_trailer trailer;
+    for( int i = 0; i < 20; ++i ) trailer[i] = std::getc( stdin );
+    unsigned crc = 0;
+    for( int i = 3; i >= 0; --i ) { crc <<= 8; crc += trailer[i]; }
+    unsigned long long data_size = 0;
+    for( int i = 11; i >= 4; --i ) { data_size <<= 8; data_size += trailer[i]; }
+    if( crc != decoder.crc() || data_size != decoder.data_position() )
+      { std::fprintf( stderr, "CRC error\n" ); return 2; }
+    }
+
+  if( std::fclose( stdout ) != 0 )
+    { std::fprintf( stderr, "Can't close stdout: %s\n", std::strerror( errno ) );
+      return 1; }
+  return 0;
+  }
diff --git a/main.cc b/main.cc
deleted file mode 100644
index bba5c6a..0000000
--- a/main.cc
+++ /dev/null
@@ -1,115 +0,0 @@
-/*  Lzd - Educational decompressor for lzip files
-    Copyright (C) 2013 Antonio Diaz Diaz.
-
-    This program is free software: you have unlimited permission
-    to copy, distribute and modify it.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-*/
-/*
-    Exit status: 0 for a normal exit, 1 for environmental problems
-    (file not found, invalid flags, I/O errors, etc), 2 to indicate a
-    corrupt or invalid input file.
-*/
-
-#include <algorithm>
-#include <cerrno>
-#include <cstdio>
-#include <cstdlib>
-#include <cstring>
-#include <stdint.h>
-#include <unistd.h>
-#if defined(__MSVCRT__) || defined(__OS2__)
-#include <fcntl.h>
-#include <io.h>
-#endif
-
-#include "decoder.cc"
-
-
-enum { min_dictionary_size = 1 << 12,
-       max_dictionary_size = 1 << 29 };
-
-typedef uint8_t File_header[6];	// 0-3 magic, 4 version, 5 coded_dict_size
-
-typedef uint8_t File_trailer[20];
-			//  0-3  CRC32 of the uncompressed data
-			//  4-11 size of the uncompressed data
-			// 12-19 member size including header and trailer
-
-
-int main( const int argc, const char * const argv[] )
-  {
-  if( argc > 1 )
-    {
-    std::printf( "Lzd %s - Educational decompressor for lzip files.\n",
-                 PROGVERSION );
-    std::printf( "Study the source to learn how a simple lzip decompressor works.\n"
-                 "It is not safe to use it for any real work.\n"
-                 "\nUsage: %s < file.lz > file\n", argv[0] );
-    std::printf( "Lzd decompresses from standard input to standard output.\n"
-                 "\nCopyright (C) 2013 Antonio Diaz Diaz.\n"
-                 "This is free software: you are free to change and redistribute it.\n"
-                 "There is NO WARRANTY, to the extent permitted by law.\n"
-                 "Report bugs to lzip-bug@nongnu.org\n"
-                 "Lzip home page: http://www.nongnu.org/lzip/lzip.html\n" );
-    return 0;
-    }
-
-#if defined(__MSVCRT__) || defined(__OS2__)
-  setmode( STDIN_FILENO, O_BINARY );
-  setmode( STDOUT_FILENO, O_BINARY );
-#endif
-
-  if( isatty( STDIN_FILENO ) )
-    {
-    std::fprintf( stderr, "I won't read compressed data from a terminal.\n"
-                          "Try '%s --help' for more information.\n", argv[0] );
-    return 1;
-    }
-
-  for( bool first_member = true; ; first_member = false )
-    {
-    File_header header;
-    for( int i = 0; i < 6; ++i )
-      header[i] = std::getc( stdin );
-    if( std::feof( stdin ) || std::memcmp( header, "LZIP", 4 ) != 0 )
-      {
-      if( first_member )
-        { std::fprintf( stderr, "Bad magic number (file not in lzip format)\n" );
-          return 2; }
-      break;
-      }
-    if( header[4] != 1 )
-      {
-      std::fprintf( stderr, "Version %d member format not supported.\n",
-                    header[4] );
-      return 2;
-      }
-    unsigned dict_size = 1 << ( header[5] & 0x1F );
-    dict_size -= ( dict_size / 16 ) * ( ( header[5] >> 5 ) & 7 );
-    if( dict_size < min_dictionary_size || dict_size > max_dictionary_size )
-      { std::fprintf( stderr, "Invalid dictionary size in member header\n" );
-        return 2; }
-
-    LZ_decoder decoder( dict_size );
-    if( !decoder.decode_member() )
-      { std::fprintf( stderr, "Data error\n" ); return 2; }
-
-    File_trailer trailer;
-    for( int i = 0; i < 20; ++i ) trailer[i] = std::getc( stdin );
-    unsigned crc = 0;
-    for( int i = 3; i >= 0; --i ) { crc <<= 8; crc += trailer[i]; }
-    unsigned long long data_size = 0;
-    for( int i = 11; i >= 4; --i ) { data_size <<= 8; data_size += trailer[i]; }
-    if( crc != decoder.crc() || data_size != decoder.data_position() )
-      { std::fprintf( stderr, "CRC error\n" ); return 2; }
-    }
-
-  if( std::fclose( stdout ) != 0 )
-    { std::fprintf( stderr, "Can't close stdout: %s\n", std::strerror( errno ) );
-      return 1; }
-  return 0;
-  }
diff --git a/testsuite/check.sh b/testsuite/check.sh
index 5fc6d18..a701bcb 100755
--- a/testsuite/check.sh
+++ b/testsuite/check.sh
@@ -27,6 +27,15 @@ fail=0
 
 printf "testing lzd-%s..." "$2"
 
+"${LZIP}" < "${in_lz}" > /dev/full 2> /dev/null
+if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi
+"${LZIP}" < "${in}" 2> /dev/null
+if [ $? = 2 ] ; then printf . ; else fail=1 ; printf - ; fi
+dd if="${in_lz}" bs=1 count=6 2> /dev/null | "${LZIP}" 2> /dev/null
+if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
+dd if="${in_lz}" bs=1 count=20 2> /dev/null | "${LZIP}" 2> /dev/null
+if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
+
 "${LZIP}" < "${in_lz}" > copy || fail=1
 cmp "${in}" copy || fail=1
 printf .
-- 
cgit v1.2.3