summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog5
-rw-r--r--INSTALL6
-rw-r--r--Makefile.in10
-rw-r--r--NEWS4
-rw-r--r--README2
-rwxr-xr-xconfigure46
-rw-r--r--lzd.cc (renamed from decoder.cc)182
-rw-r--r--main.cc115
-rwxr-xr-xtestsuite/check.sh9
9 files changed, 193 insertions, 186 deletions
diff --git a/ChangeLog b/ChangeLog
index c3be740..0f516f1 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2013-07-24 Antonio Diaz Diaz <antonio@gnu.org>
+
+ * Version 0.3 released.
+ * decoder.cc and main.cc have been merged into lzd.cc.
+
2013-05-06 Antonio Diaz Diaz <antonio@gnu.org>
* Version 0.2 released.
diff --git a/INSTALL b/INSTALL
index 09ac834..61635f7 100644
--- a/INSTALL
+++ b/INSTALL
@@ -1,7 +1,7 @@
Requirements
------------
You will need a C++ compiler.
-I use gcc 4.8.0 and 3.3.6, but the code should compile with any
+I use gcc 4.8.1 and 3.3.6, but the code should compile with any
standards compliant compiler.
Gcc is available at http://gcc.gnu.org.
@@ -10,9 +10,9 @@ Procedure
---------
1. Unpack the archive if you have not done so already:
- lzip -cd lzd[version].tar.lz | tar -xf -
+ tar -xf lzd[version].tar.lz
or
- gzip -cd lzd[version].tar.gz | tar -xf -
+ lzip -cd lzd[version].tar.lz | tar -xf -
This creates the directory ./lzd[version] containing the source from
the main archive.
diff --git a/Makefile.in b/Makefile.in
index d313f51..9a4b5ec 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -6,7 +6,7 @@ INSTALL_DATA = $(INSTALL) -p -m 644
INSTALL_DIR = $(INSTALL) -d -m 755
SHELL = /bin/sh
-objs = main.o
+objs = lzd.o
.PHONY : all install install-bin install-info install-man install-strip \
@@ -21,14 +21,10 @@ $(progname) : $(objs)
$(progname)_profiled : $(objs)
$(CXX) $(LDFLAGS) -pg -o $@ $(objs)
-main.o : main.cc
- $(CXX) $(CPPFLAGS) $(CXXFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $<
-
%.o : %.cc
- $(CXX) $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $<
+ $(CXX) $(CPPFLAGS) $(CXXFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $<
-$(objs) : Makefile
-main.o : decoder.cc
+$(objs) : Makefile
doc :
diff --git a/NEWS b/NEWS
index 4caf5f7..2c8be2e 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,3 @@
-Changes in version 0.2:
+Changes in version 0.3:
-Added a missing "#include" for OS/2.
+All the code is now contained in a single file (lzd.cc).
diff --git a/README b/README
index ed832ca..fcfe639 100644
--- a/README
+++ b/README
@@ -1,6 +1,6 @@
Description
-Lzd is a very simplified decompressor for lzip files with an educational
+Lzd is a simplified decompressor for lzip files with an educational
purpose. Studying its source is a good first step to understand how lzip
works. It is not safe to use lzd for any real work.
diff --git a/configure b/configure
index 7b3f916..98acee7 100755
--- a/configure
+++ b/configure
@@ -5,12 +5,10 @@
# This configure script is free software: you have unlimited permission
# to copy, distribute and modify it.
-args=
-no_create=
pkgname=lzd
-pkgversion=0.2
+pkgversion=0.3
progname=lzd
-srctrigger=decoder.cc
+srctrigger=lzd.cc
# clear some things potentially inherited from environment.
LC_ALL=C
@@ -28,18 +26,19 @@ CXXFLAGS='-Wall -W -O2'
LDFLAGS=
# checking whether we are using GNU C++.
-if [ ! -x /bin/g++ ] &&
- [ ! -x /usr/bin/g++ ] &&
- [ ! -x /usr/local/bin/g++ ] ; then
+${CXX} --version > /dev/null 2>&1
+if [ $? != 0 ] ; then
CXX=c++
CXXFLAGS='-W -O2'
fi
# Loop over all args
-while [ -n "$1" ] ; do
+args=
+no_create=
+while [ $# != 0 ] ; do
# Get the first arg, and shuffle
- option=$1
+ option=$1 ; arg2=no
shift
# Add the argument quoted to args
@@ -74,6 +73,14 @@ while [ -n "$1" ] ; do
--version | -V)
echo "Configure script for ${pkgname} version ${pkgversion}"
exit 0 ;;
+ --srcdir) srcdir=$1 ; arg2=yes ;;
+ --prefix) prefix=$1 ; arg2=yes ;;
+ --exec-prefix) exec_prefix=$1 ; arg2=yes ;;
+ --bindir) bindir=$1 ; arg2=yes ;;
+ --datarootdir) datarootdir=$1 ; arg2=yes ;;
+ --infodir) infodir=$1 ; arg2=yes ;;
+ --mandir) mandir=$1 ; arg2=yes ;;
+
--srcdir=*) srcdir=${optarg} ;;
--prefix=*) prefix=${optarg} ;;
--exec-prefix=*) exec_prefix=${optarg} ;;
@@ -88,11 +95,22 @@ while [ -n "$1" ] ; do
CXXFLAGS=*) CXXFLAGS=${optarg} ;;
LDFLAGS=*) LDFLAGS=${optarg} ;;
- --* | *=* | *-*-*) ;;
+ --*)
+ echo "configure: WARNING: unrecognized option: '${option}'" 1>&2 ;;
+ *=* | *-*-*) ;;
*)
- echo "configure: Unrecognized option: \"${option}\"; use --help for usage." 1>&2
+ echo "configure: unrecognized option: '${option}'" 1>&2
+ echo "Try 'configure --help' for more information." 1>&2
exit 1 ;;
esac
+
+ # Check if the option took a separate argument
+ if [ "${arg2}" = yes ] ; then
+ if [ $# != 0 ] ; then args="${args} \"$1\"" ; shift
+ else echo "configure: Missing argument to '${option}'" 1>&2
+ exit 1
+ fi
+ fi
done
# Find the source files, if location was not specified.
@@ -107,10 +125,8 @@ if [ -z "${srcdir}" ] ; then
fi
if [ ! -r "${srcdir}/${srctrigger}" ] ; then
- exec 1>&2
- echo
- echo "configure: Can't find sources in ${srcdir} ${srcdirtext}"
- echo "configure: (At least ${srctrigger} is missing)."
+ echo "configure: Can't find sources in ${srcdir} ${srcdirtext}" 1>&2
+ echo "configure: (At least ${srctrigger} is missing)." 1>&2
exit 1
fi
diff --git a/decoder.cc b/lzd.cc
index fbfcdb3..0ac7b64 100644
--- a/decoder.cc
+++ b/lzd.cc
@@ -8,6 +8,24 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*/
+/*
+ Exit status: 0 for a normal exit, 1 for environmental problems
+ (file not found, invalid flags, I/O errors, etc), 2 to indicate a
+ corrupt or invalid input file.
+*/
+
+#include <algorithm>
+#include <cerrno>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <stdint.h>
+#include <unistd.h>
+#if defined(__MSVCRT__) || defined(__OS2__)
+#include <fcntl.h>
+#include <io.h>
+#endif
+
class State
{
@@ -24,20 +42,20 @@ public:
static const int next[states] = { 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5 };
st = next[st];
}
-
- void set_match() { st = ( ( st < 7 ) ? 7 : 10 ); }
- void set_rep() { st = ( ( st < 7 ) ? 8 : 11 ); }
- void set_short_rep() { st = ( ( st < 7 ) ? 9 : 11 ); }
+ void set_match() { st = ( st < 7 ) ? 7 : 10; }
+ void set_rep() { st = ( st < 7 ) ? 8 : 11; }
+ void set_short_rep() { st = ( st < 7 ) ? 9 : 11; }
};
enum {
+ min_dictionary_size = 1 << 12,
+ max_dictionary_size = 1 << 29,
literal_context_bits = 3,
pos_state_bits = 2,
pos_states = 1 << pos_state_bits,
pos_state_mask = pos_states - 1,
- max_dis_states = 4,
dis_slot_bits = 6,
start_dis_model = 4,
end_dis_model = 14,
@@ -52,13 +70,14 @@ enum {
len_mid_symbols = 1 << len_mid_bits,
len_high_symbols = 1 << len_high_bits,
max_len_symbols = len_low_symbols + len_mid_symbols + len_high_symbols,
+
min_match_len = 2, // must be 2
+ max_dis_states = 4,
bit_model_move_bits = 5,
bit_model_total_bits = 11,
bit_model_total = 1 << bit_model_total_bits };
-
struct Bit_model
{
int probability;
@@ -75,6 +94,39 @@ struct Len_model
};
+class CRC32
+ {
+ uint32_t data[256]; // Table of CRCs of all 8-bit messages.
+
+public:
+ CRC32()
+ {
+ for( unsigned n = 0; n < 256; ++n )
+ {
+ unsigned c = n;
+ for( int k = 0; k < 8; ++k )
+ { if( c & 1 ) c = 0xEDB88320U ^ ( c >> 1 ); else c >>= 1; }
+ data[n] = c;
+ }
+ }
+
+ void update( uint32_t & crc, const uint8_t * const buffer, const int size ) const
+ {
+ for( int i = 0; i < size; ++i )
+ crc = data[(crc^buffer[i])&0xFF] ^ ( crc >> 8 );
+ }
+ };
+
+const CRC32 crc32;
+
+
+typedef uint8_t File_header[6]; // 0-3 magic, 4 version, 5 coded_dict_size
+
+typedef uint8_t File_trailer[20];
+ // 0-3 CRC32 of the uncompressed data
+ // 4-11 size of the uncompressed data
+ // 12-19 member size including header and trailer
+
class Range_decoder
{
uint32_t code;
@@ -83,9 +135,11 @@ class Range_decoder
public:
Range_decoder() : code( 0 ), range( 0xFFFFFFFFU )
{
- for( int i = 0; i < 5; ++i ) code = (code << 8) | std::getc( stdin );
+ for( int i = 0; i < 5; ++i ) code = (code << 8) | get_byte();
}
+ uint8_t get_byte() { return std::getc( stdin ); }
+
int decode( const int num_bits )
{
int symbol = 0;
@@ -95,7 +149,7 @@ public:
symbol <<= 1;
if( code >= range ) { code -= range; symbol |= 1; }
if( range <= 0x00FFFFFFU ) // normalize
- { range <<= 8; code = (code << 8) | std::getc( stdin ); }
+ { range <<= 8; code = (code << 8) | get_byte(); }
}
return symbol;
}
@@ -118,7 +172,7 @@ public:
symbol = 1;
}
if( range <= 0x00FFFFFFU ) // normalize
- { range <<= 8; code = (code << 8) | std::getc( stdin ); }
+ { range <<= 8; code = (code << 8) | get_byte(); }
return symbol;
}
@@ -164,12 +218,11 @@ public:
int decode_len( Len_model & lm, const int pos_state )
{
if( decode_bit( lm.choice1 ) == 0 )
- return min_match_len +
- decode_tree( lm.bm_low[pos_state], len_low_bits );
+ return decode_tree( lm.bm_low[pos_state], len_low_bits );
if( decode_bit( lm.choice2 ) == 0 )
- return min_match_len + len_low_symbols +
+ return len_low_symbols +
decode_tree( lm.bm_mid[pos_state], len_mid_bits );
- return min_match_len + len_low_symbols + len_mid_symbols +
+ return len_low_symbols + len_mid_symbols +
decode_tree( lm.bm_high, len_high_bits );
}
};
@@ -189,8 +242,8 @@ class LZ_decoder
uint8_t get_byte( const unsigned distance ) const
{
- int i = pos - distance - 1;
- if( i < 0 ) i += dictionary_size;
+ unsigned i = pos - distance - 1;
+ if( pos <= distance ) i += dictionary_size;
return buffer[i];
}
@@ -220,32 +273,6 @@ public:
};
-class CRC32
- {
- uint32_t data[256]; // Table of CRCs of all 8-bit messages.
-
-public:
- CRC32()
- {
- for( unsigned n = 0; n < 256; ++n )
- {
- unsigned c = n;
- for( int k = 0; k < 8; ++k )
- { if( c & 1 ) c = 0xEDB88320U ^ ( c >> 1 ); else c >>= 1; }
- data[n] = c;
- }
- }
-
- void update( uint32_t & crc, const uint8_t * buffer, const int size ) const
- {
- for( int i = 0; i < size; ++i )
- crc = data[(crc^buffer[i])&0xFF] ^ ( crc >> 8 );
- }
- };
-
-const CRC32 crc32;
-
-
void LZ_decoder::flush_data()
{
if( pos > stream_pos )
@@ -322,13 +349,13 @@ bool LZ_decoder::decode_member() // Returns false if error
rep1 = rep0;
rep0 = distance;
}
- len = rdec.decode_len( rep_len_model, pos_state );
+ len = min_match_len + rdec.decode_len( rep_len_model, pos_state );
state.set_rep();
}
else
{
rep3 = rep2; rep2 = rep1; rep1 = rep0;
- len = rdec.decode_len( match_len_model, pos_state );
+ len = min_match_len + rdec.decode_len( match_len_model, pos_state );
const int dis_state = std::min( len - min_match_len, max_dis_states - 1 );
const int dis_slot =
rdec.decode_tree( bm_dis_slot[dis_state], dis_slot_bits );
@@ -361,3 +388,72 @@ bool LZ_decoder::decode_member() // Returns false if error
}
return false;
}
+
+
+int main( const int argc, const char * const argv[] )
+ {
+ if( argc > 1 )
+ {
+ std::printf( "Lzd %s - Educational decompressor for lzip files.\n",
+ PROGVERSION );
+ std::printf( "Study the source to learn how a lzip decompressor works.\n"
+ "See the lzip manual for an explanation of the code.\n"
+ "It is not safe to use lzd for any real work.\n"
+ "\nUsage: %s < file.lz > file\n", argv[0] );
+ std::printf( "Lzd decompresses from standard input to standard output.\n"
+ "\nCopyright (C) 2013 Antonio Diaz Diaz.\n"
+ "This is free software: you are free to change and redistribute it.\n"
+ "There is NO WARRANTY, to the extent permitted by law.\n"
+ "Report bugs to lzip-bug@nongnu.org\n"
+ "Lzip home page: http://www.nongnu.org/lzip/lzip.html\n" );
+ return 0;
+ }
+
+#if defined(__MSVCRT__) || defined(__OS2__)
+ setmode( STDIN_FILENO, O_BINARY );
+ setmode( STDOUT_FILENO, O_BINARY );
+#endif
+
+ for( bool first_member = true; ; first_member = false )
+ {
+ File_header header;
+ for( int i = 0; i < 6; ++i )
+ header[i] = std::getc( stdin );
+ if( std::feof( stdin ) || std::memcmp( header, "LZIP", 4 ) != 0 )
+ {
+ if( first_member )
+ { std::fprintf( stderr, "Bad magic number (file not in lzip format)\n" );
+ return 2; }
+ break;
+ }
+ if( header[4] != 1 )
+ {
+ std::fprintf( stderr, "Version %d member format not supported.\n",
+ header[4] );
+ return 2;
+ }
+ unsigned dict_size = 1 << ( header[5] & 0x1F );
+ dict_size -= ( dict_size / 16 ) * ( ( header[5] >> 5 ) & 7 );
+ if( dict_size < min_dictionary_size || dict_size > max_dictionary_size )
+ { std::fprintf( stderr, "Invalid dictionary size in member header\n" );
+ return 2; }
+
+ LZ_decoder decoder( dict_size );
+ if( !decoder.decode_member() )
+ { std::fprintf( stderr, "Data error\n" ); return 2; }
+
+ File_trailer trailer;
+ for( int i = 0; i < 20; ++i ) trailer[i] = std::getc( stdin );
+ unsigned crc = 0;
+ for( int i = 3; i >= 0; --i ) { crc <<= 8; crc += trailer[i]; }
+ unsigned long long data_size = 0;
+ for( int i = 11; i >= 4; --i ) { data_size <<= 8; data_size += trailer[i]; }
+ if( crc != decoder.crc() || data_size != decoder.data_position() )
+ { std::fprintf( stderr, "CRC error\n" ); return 2; }
+ }
+
+ if( std::fclose( stdout ) != 0 )
+ { std::fprintf( stderr, "Can't close stdout: %s\n", std::strerror( errno ) );
+ return 1; }
+ return 0;
+ }
diff --git a/main.cc b/main.cc
deleted file mode 100644
index bba5c6a..0000000
--- a/main.cc
+++ /dev/null
@@ -1,115 +0,0 @@
-/* Lzd - Educational decompressor for lzip files
- Copyright (C) 2013 Antonio Diaz Diaz.
-
- This program is free software: you have unlimited permission
- to copy, distribute and modify it.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-*/
-/*
- Exit status: 0 for a normal exit, 1 for environmental problems
- (file not found, invalid flags, I/O errors, etc), 2 to indicate a
- corrupt or invalid input file.
-*/
-
-#include <algorithm>
-#include <cerrno>
-#include <cstdio>
-#include <cstdlib>
-#include <cstring>
-#include <stdint.h>
-#include <unistd.h>
-#if defined(__MSVCRT__) || defined(__OS2__)
-#include <fcntl.h>
-#include <io.h>
-#endif
-
-#include "decoder.cc"
-
-
-enum { min_dictionary_size = 1 << 12,
- max_dictionary_size = 1 << 29 };
-
-typedef uint8_t File_header[6]; // 0-3 magic, 4 version, 5 coded_dict_size
-
-typedef uint8_t File_trailer[20];
- // 0-3 CRC32 of the uncompressed data
- // 4-11 size of the uncompressed data
- // 12-19 member size including header and trailer
-
-
-int main( const int argc, const char * const argv[] )
- {
- if( argc > 1 )
- {
- std::printf( "Lzd %s - Educational decompressor for lzip files.\n",
- PROGVERSION );
- std::printf( "Study the source to learn how a simple lzip decompressor works.\n"
- "It is not safe to use it for any real work.\n"
- "\nUsage: %s < file.lz > file\n", argv[0] );
- std::printf( "Lzd decompresses from standard input to standard output.\n"
- "\nCopyright (C) 2013 Antonio Diaz Diaz.\n"
- "This is free software: you are free to change and redistribute it.\n"
- "There is NO WARRANTY, to the extent permitted by law.\n"
- "Report bugs to lzip-bug@nongnu.org\n"
- "Lzip home page: http://www.nongnu.org/lzip/lzip.html\n" );
- return 0;
- }
-
-#if defined(__MSVCRT__) || defined(__OS2__)
- setmode( STDIN_FILENO, O_BINARY );
- setmode( STDOUT_FILENO, O_BINARY );
-#endif
-
- if( isatty( STDIN_FILENO ) )
- {
- std::fprintf( stderr, "I won't read compressed data from a terminal.\n"
- "Try '%s --help' for more information.\n", argv[0] );
- return 1;
- }
-
- for( bool first_member = true; ; first_member = false )
- {
- File_header header;
- for( int i = 0; i < 6; ++i )
- header[i] = std::getc( stdin );
- if( std::feof( stdin ) || std::memcmp( header, "LZIP", 4 ) != 0 )
- {
- if( first_member )
- { std::fprintf( stderr, "Bad magic number (file not in lzip format)\n" );
- return 2; }
- break;
- }
- if( header[4] != 1 )
- {
- std::fprintf( stderr, "Version %d member format not supported.\n",
- header[4] );
- return 2;
- }
- unsigned dict_size = 1 << ( header[5] & 0x1F );
- dict_size -= ( dict_size / 16 ) * ( ( header[5] >> 5 ) & 7 );
- if( dict_size < min_dictionary_size || dict_size > max_dictionary_size )
- { std::fprintf( stderr, "Invalid dictionary size in member header\n" );
- return 2; }
-
- LZ_decoder decoder( dict_size );
- if( !decoder.decode_member() )
- { std::fprintf( stderr, "Data error\n" ); return 2; }
-
- File_trailer trailer;
- for( int i = 0; i < 20; ++i ) trailer[i] = std::getc( stdin );
- unsigned crc = 0;
- for( int i = 3; i >= 0; --i ) { crc <<= 8; crc += trailer[i]; }
- unsigned long long data_size = 0;
- for( int i = 11; i >= 4; --i ) { data_size <<= 8; data_size += trailer[i]; }
- if( crc != decoder.crc() || data_size != decoder.data_position() )
- { std::fprintf( stderr, "CRC error\n" ); return 2; }
- }
-
- if( std::fclose( stdout ) != 0 )
- { std::fprintf( stderr, "Can't close stdout: %s\n", std::strerror( errno ) );
- return 1; }
- return 0;
- }
diff --git a/testsuite/check.sh b/testsuite/check.sh
index 5fc6d18..a701bcb 100755
--- a/testsuite/check.sh
+++ b/testsuite/check.sh
@@ -27,6 +27,15 @@ fail=0
printf "testing lzd-%s..." "$2"
+"${LZIP}" < "${in_lz}" > /dev/full 2> /dev/null
+if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi
+"${LZIP}" < "${in}" 2> /dev/null
+if [ $? = 2 ] ; then printf . ; else fail=1 ; printf - ; fi
+dd if="${in_lz}" bs=1 count=6 2> /dev/null | "${LZIP}" 2> /dev/null
+if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
+dd if="${in_lz}" bs=1 count=20 2> /dev/null | "${LZIP}" 2> /dev/null
+if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
+
"${LZIP}" < "${in_lz}" > copy || fail=1
cmp "${in}" copy || fail=1
printf .