diff options
-rw-r--r-- | ChangeLog | 5 | ||||
-rw-r--r-- | INSTALL | 6 | ||||
-rw-r--r-- | Makefile.in | 10 | ||||
-rw-r--r-- | NEWS | 4 | ||||
-rw-r--r-- | README | 2 | ||||
-rwxr-xr-x | configure | 46 | ||||
-rw-r--r-- | lzd.cc (renamed from decoder.cc) | 182 | ||||
-rw-r--r-- | main.cc | 115 | ||||
-rwxr-xr-x | testsuite/check.sh | 9 |
9 files changed, 193 insertions, 186 deletions
@@ -1,3 +1,8 @@ +2013-07-24 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 0.3 released. + * decoder.cc and main.cc have been merged into lzd.cc. + 2013-05-06 Antonio Diaz Diaz <antonio@gnu.org> * Version 0.2 released. @@ -1,7 +1,7 @@ Requirements ------------ You will need a C++ compiler. -I use gcc 4.8.0 and 3.3.6, but the code should compile with any +I use gcc 4.8.1 and 3.3.6, but the code should compile with any standards compliant compiler. Gcc is available at http://gcc.gnu.org. @@ -10,9 +10,9 @@ Procedure --------- 1. Unpack the archive if you have not done so already: - lzip -cd lzd[version].tar.lz | tar -xf - + tar -xf lzd[version].tar.lz or - gzip -cd lzd[version].tar.gz | tar -xf - + lzip -cd lzd[version].tar.lz | tar -xf - This creates the directory ./lzd[version] containing the source from the main archive. diff --git a/Makefile.in b/Makefile.in index d313f51..9a4b5ec 100644 --- a/Makefile.in +++ b/Makefile.in @@ -6,7 +6,7 @@ INSTALL_DATA = $(INSTALL) -p -m 644 INSTALL_DIR = $(INSTALL) -d -m 755 SHELL = /bin/sh -objs = main.o +objs = lzd.o .PHONY : all install install-bin install-info install-man install-strip \ @@ -21,14 +21,10 @@ $(progname) : $(objs) $(progname)_profiled : $(objs) $(CXX) $(LDFLAGS) -pg -o $@ $(objs) -main.o : main.cc - $(CXX) $(CPPFLAGS) $(CXXFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $< - %.o : %.cc - $(CXX) $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $< + $(CXX) $(CPPFLAGS) $(CXXFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $< -$(objs) : Makefile -main.o : decoder.cc +$(objs) : Makefile doc : @@ -1,3 +1,3 @@ -Changes in version 0.2: +Changes in version 0.3: -Added a missing "#include" for OS/2. +All the code is now contained in a single file (lzd.cc). @@ -1,6 +1,6 @@ Description -Lzd is a very simplified decompressor for lzip files with an educational +Lzd is a simplified decompressor for lzip files with an educational purpose. Studying its source is a good first step to understand how lzip works. It is not safe to use lzd for any real work. @@ -5,12 +5,10 @@ # This configure script is free software: you have unlimited permission # to copy, distribute and modify it. -args= -no_create= pkgname=lzd -pkgversion=0.2 +pkgversion=0.3 progname=lzd -srctrigger=decoder.cc +srctrigger=lzd.cc # clear some things potentially inherited from environment. LC_ALL=C @@ -28,18 +26,19 @@ CXXFLAGS='-Wall -W -O2' LDFLAGS= # checking whether we are using GNU C++. -if [ ! -x /bin/g++ ] && - [ ! -x /usr/bin/g++ ] && - [ ! -x /usr/local/bin/g++ ] ; then +${CXX} --version > /dev/null 2>&1 +if [ $? != 0 ] ; then CXX=c++ CXXFLAGS='-W -O2' fi # Loop over all args -while [ -n "$1" ] ; do +args= +no_create= +while [ $# != 0 ] ; do # Get the first arg, and shuffle - option=$1 + option=$1 ; arg2=no shift # Add the argument quoted to args @@ -74,6 +73,14 @@ while [ -n "$1" ] ; do --version | -V) echo "Configure script for ${pkgname} version ${pkgversion}" exit 0 ;; + --srcdir) srcdir=$1 ; arg2=yes ;; + --prefix) prefix=$1 ; arg2=yes ;; + --exec-prefix) exec_prefix=$1 ; arg2=yes ;; + --bindir) bindir=$1 ; arg2=yes ;; + --datarootdir) datarootdir=$1 ; arg2=yes ;; + --infodir) infodir=$1 ; arg2=yes ;; + --mandir) mandir=$1 ; arg2=yes ;; + --srcdir=*) srcdir=${optarg} ;; --prefix=*) prefix=${optarg} ;; --exec-prefix=*) exec_prefix=${optarg} ;; @@ -88,11 +95,22 @@ while [ -n "$1" ] ; do CXXFLAGS=*) CXXFLAGS=${optarg} ;; LDFLAGS=*) LDFLAGS=${optarg} ;; - --* | *=* | *-*-*) ;; + --*) + echo "configure: WARNING: unrecognized option: '${option}'" 1>&2 ;; + *=* | *-*-*) ;; *) - echo "configure: Unrecognized option: \"${option}\"; use --help for usage." 1>&2 + echo "configure: unrecognized option: '${option}'" 1>&2 + echo "Try 'configure --help' for more information." 1>&2 exit 1 ;; esac + + # Check if the option took a separate argument + if [ "${arg2}" = yes ] ; then + if [ $# != 0 ] ; then args="${args} \"$1\"" ; shift + else echo "configure: Missing argument to '${option}'" 1>&2 + exit 1 + fi + fi done # Find the source files, if location was not specified. @@ -107,10 +125,8 @@ if [ -z "${srcdir}" ] ; then fi if [ ! -r "${srcdir}/${srctrigger}" ] ; then - exec 1>&2 - echo - echo "configure: Can't find sources in ${srcdir} ${srcdirtext}" - echo "configure: (At least ${srctrigger} is missing)." + echo "configure: Can't find sources in ${srcdir} ${srcdirtext}" 1>&2 + echo "configure: (At least ${srctrigger} is missing)." 1>&2 exit 1 fi @@ -8,6 +8,24 @@ but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. */ +/* + Exit status: 0 for a normal exit, 1 for environmental problems + (file not found, invalid flags, I/O errors, etc), 2 to indicate a + corrupt or invalid input file. +*/ + +#include <algorithm> +#include <cerrno> +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <stdint.h> +#include <unistd.h> +#if defined(__MSVCRT__) || defined(__OS2__) +#include <fcntl.h> +#include <io.h> +#endif + class State { @@ -24,20 +42,20 @@ public: static const int next[states] = { 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5 }; st = next[st]; } - - void set_match() { st = ( ( st < 7 ) ? 7 : 10 ); } - void set_rep() { st = ( ( st < 7 ) ? 8 : 11 ); } - void set_short_rep() { st = ( ( st < 7 ) ? 9 : 11 ); } + void set_match() { st = ( st < 7 ) ? 7 : 10; } + void set_rep() { st = ( st < 7 ) ? 8 : 11; } + void set_short_rep() { st = ( st < 7 ) ? 9 : 11; } }; enum { + min_dictionary_size = 1 << 12, + max_dictionary_size = 1 << 29, literal_context_bits = 3, pos_state_bits = 2, pos_states = 1 << pos_state_bits, pos_state_mask = pos_states - 1, - max_dis_states = 4, dis_slot_bits = 6, start_dis_model = 4, end_dis_model = 14, @@ -52,13 +70,14 @@ enum { len_mid_symbols = 1 << len_mid_bits, len_high_symbols = 1 << len_high_bits, max_len_symbols = len_low_symbols + len_mid_symbols + len_high_symbols, + min_match_len = 2, // must be 2 + max_dis_states = 4, bit_model_move_bits = 5, bit_model_total_bits = 11, bit_model_total = 1 << bit_model_total_bits }; - struct Bit_model { int probability; @@ -75,6 +94,39 @@ struct Len_model }; +class CRC32 + { + uint32_t data[256]; // Table of CRCs of all 8-bit messages. + +public: + CRC32() + { + for( unsigned n = 0; n < 256; ++n ) + { + unsigned c = n; + for( int k = 0; k < 8; ++k ) + { if( c & 1 ) c = 0xEDB88320U ^ ( c >> 1 ); else c >>= 1; } + data[n] = c; + } + } + + void update( uint32_t & crc, const uint8_t * const buffer, const int size ) const + { + for( int i = 0; i < size; ++i ) + crc = data[(crc^buffer[i])&0xFF] ^ ( crc >> 8 ); + } + }; + +const CRC32 crc32; + + +typedef uint8_t File_header[6]; // 0-3 magic, 4 version, 5 coded_dict_size + +typedef uint8_t File_trailer[20]; + // 0-3 CRC32 of the uncompressed data + // 4-11 size of the uncompressed data + // 12-19 member size including header and trailer + class Range_decoder { uint32_t code; @@ -83,9 +135,11 @@ class Range_decoder public: Range_decoder() : code( 0 ), range( 0xFFFFFFFFU ) { - for( int i = 0; i < 5; ++i ) code = (code << 8) | std::getc( stdin ); + for( int i = 0; i < 5; ++i ) code = (code << 8) | get_byte(); } + uint8_t get_byte() { return std::getc( stdin ); } + int decode( const int num_bits ) { int symbol = 0; @@ -95,7 +149,7 @@ public: symbol <<= 1; if( code >= range ) { code -= range; symbol |= 1; } if( range <= 0x00FFFFFFU ) // normalize - { range <<= 8; code = (code << 8) | std::getc( stdin ); } + { range <<= 8; code = (code << 8) | get_byte(); } } return symbol; } @@ -118,7 +172,7 @@ public: symbol = 1; } if( range <= 0x00FFFFFFU ) // normalize - { range <<= 8; code = (code << 8) | std::getc( stdin ); } + { range <<= 8; code = (code << 8) | get_byte(); } return symbol; } @@ -164,12 +218,11 @@ public: int decode_len( Len_model & lm, const int pos_state ) { if( decode_bit( lm.choice1 ) == 0 ) - return min_match_len + - decode_tree( lm.bm_low[pos_state], len_low_bits ); + return decode_tree( lm.bm_low[pos_state], len_low_bits ); if( decode_bit( lm.choice2 ) == 0 ) - return min_match_len + len_low_symbols + + return len_low_symbols + decode_tree( lm.bm_mid[pos_state], len_mid_bits ); - return min_match_len + len_low_symbols + len_mid_symbols + + return len_low_symbols + len_mid_symbols + decode_tree( lm.bm_high, len_high_bits ); } }; @@ -189,8 +242,8 @@ class LZ_decoder uint8_t get_byte( const unsigned distance ) const { - int i = pos - distance - 1; - if( i < 0 ) i += dictionary_size; + unsigned i = pos - distance - 1; + if( pos <= distance ) i += dictionary_size; return buffer[i]; } @@ -220,32 +273,6 @@ public: }; -class CRC32 - { - uint32_t data[256]; // Table of CRCs of all 8-bit messages. - -public: - CRC32() - { - for( unsigned n = 0; n < 256; ++n ) - { - unsigned c = n; - for( int k = 0; k < 8; ++k ) - { if( c & 1 ) c = 0xEDB88320U ^ ( c >> 1 ); else c >>= 1; } - data[n] = c; - } - } - - void update( uint32_t & crc, const uint8_t * buffer, const int size ) const - { - for( int i = 0; i < size; ++i ) - crc = data[(crc^buffer[i])&0xFF] ^ ( crc >> 8 ); - } - }; - -const CRC32 crc32; - - void LZ_decoder::flush_data() { if( pos > stream_pos ) @@ -322,13 +349,13 @@ bool LZ_decoder::decode_member() // Returns false if error rep1 = rep0; rep0 = distance; } - len = rdec.decode_len( rep_len_model, pos_state ); + len = min_match_len + rdec.decode_len( rep_len_model, pos_state ); state.set_rep(); } else { rep3 = rep2; rep2 = rep1; rep1 = rep0; - len = rdec.decode_len( match_len_model, pos_state ); + len = min_match_len + rdec.decode_len( match_len_model, pos_state ); const int dis_state = std::min( len - min_match_len, max_dis_states - 1 ); const int dis_slot = rdec.decode_tree( bm_dis_slot[dis_state], dis_slot_bits ); @@ -361,3 +388,72 @@ bool LZ_decoder::decode_member() // Returns false if error } return false; } + + +int main( const int argc, const char * const argv[] ) + { + if( argc > 1 ) + { + std::printf( "Lzd %s - Educational decompressor for lzip files.\n", + PROGVERSION ); + std::printf( "Study the source to learn how a lzip decompressor works.\n" + "See the lzip manual for an explanation of the code.\n" + "It is not safe to use lzd for any real work.\n" + "\nUsage: %s < file.lz > file\n", argv[0] ); + std::printf( "Lzd decompresses from standard input to standard output.\n" + "\nCopyright (C) 2013 Antonio Diaz Diaz.\n" + "This is free software: you are free to change and redistribute it.\n" + "There is NO WARRANTY, to the extent permitted by law.\n" + "Report bugs to lzip-bug@nongnu.org\n" + "Lzip home page: http://www.nongnu.org/lzip/lzip.html\n" ); + return 0; + } + +#if defined(__MSVCRT__) || defined(__OS2__) + setmode( STDIN_FILENO, O_BINARY ); + setmode( STDOUT_FILENO, O_BINARY ); +#endif + + for( bool first_member = true; ; first_member = false ) + { + File_header header; + for( int i = 0; i < 6; ++i ) + header[i] = std::getc( stdin ); + if( std::feof( stdin ) || std::memcmp( header, "LZIP", 4 ) != 0 ) + { + if( first_member ) + { std::fprintf( stderr, "Bad magic number (file not in lzip format)\n" ); + return 2; } + break; + } + if( header[4] != 1 ) + { + std::fprintf( stderr, "Version %d member format not supported.\n", + header[4] ); + return 2; + } + unsigned dict_size = 1 << ( header[5] & 0x1F ); + dict_size -= ( dict_size / 16 ) * ( ( header[5] >> 5 ) & 7 ); + if( dict_size < min_dictionary_size || dict_size > max_dictionary_size ) + { std::fprintf( stderr, "Invalid dictionary size in member header\n" ); + return 2; } + + LZ_decoder decoder( dict_size ); + if( !decoder.decode_member() ) + { std::fprintf( stderr, "Data error\n" ); return 2; } + + File_trailer trailer; + for( int i = 0; i < 20; ++i ) trailer[i] = std::getc( stdin ); + unsigned crc = 0; + for( int i = 3; i >= 0; --i ) { crc <<= 8; crc += trailer[i]; } + unsigned long long data_size = 0; + for( int i = 11; i >= 4; --i ) { data_size <<= 8; data_size += trailer[i]; } + if( crc != decoder.crc() || data_size != decoder.data_position() ) + { std::fprintf( stderr, "CRC error\n" ); return 2; } + } + + if( std::fclose( stdout ) != 0 ) + { std::fprintf( stderr, "Can't close stdout: %s\n", std::strerror( errno ) ); + return 1; } + return 0; + } diff --git a/main.cc b/main.cc deleted file mode 100644 index bba5c6a..0000000 --- a/main.cc +++ /dev/null @@ -1,115 +0,0 @@ -/* Lzd - Educational decompressor for lzip files - Copyright (C) 2013 Antonio Diaz Diaz. - - This program is free software: you have unlimited permission - to copy, distribute and modify it. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -*/ -/* - Exit status: 0 for a normal exit, 1 for environmental problems - (file not found, invalid flags, I/O errors, etc), 2 to indicate a - corrupt or invalid input file. -*/ - -#include <algorithm> -#include <cerrno> -#include <cstdio> -#include <cstdlib> -#include <cstring> -#include <stdint.h> -#include <unistd.h> -#if defined(__MSVCRT__) || defined(__OS2__) -#include <fcntl.h> -#include <io.h> -#endif - -#include "decoder.cc" - - -enum { min_dictionary_size = 1 << 12, - max_dictionary_size = 1 << 29 }; - -typedef uint8_t File_header[6]; // 0-3 magic, 4 version, 5 coded_dict_size - -typedef uint8_t File_trailer[20]; - // 0-3 CRC32 of the uncompressed data - // 4-11 size of the uncompressed data - // 12-19 member size including header and trailer - - -int main( const int argc, const char * const argv[] ) - { - if( argc > 1 ) - { - std::printf( "Lzd %s - Educational decompressor for lzip files.\n", - PROGVERSION ); - std::printf( "Study the source to learn how a simple lzip decompressor works.\n" - "It is not safe to use it for any real work.\n" - "\nUsage: %s < file.lz > file\n", argv[0] ); - std::printf( "Lzd decompresses from standard input to standard output.\n" - "\nCopyright (C) 2013 Antonio Diaz Diaz.\n" - "This is free software: you are free to change and redistribute it.\n" - "There is NO WARRANTY, to the extent permitted by law.\n" - "Report bugs to lzip-bug@nongnu.org\n" - "Lzip home page: http://www.nongnu.org/lzip/lzip.html\n" ); - return 0; - } - -#if defined(__MSVCRT__) || defined(__OS2__) - setmode( STDIN_FILENO, O_BINARY ); - setmode( STDOUT_FILENO, O_BINARY ); -#endif - - if( isatty( STDIN_FILENO ) ) - { - std::fprintf( stderr, "I won't read compressed data from a terminal.\n" - "Try '%s --help' for more information.\n", argv[0] ); - return 1; - } - - for( bool first_member = true; ; first_member = false ) - { - File_header header; - for( int i = 0; i < 6; ++i ) - header[i] = std::getc( stdin ); - if( std::feof( stdin ) || std::memcmp( header, "LZIP", 4 ) != 0 ) - { - if( first_member ) - { std::fprintf( stderr, "Bad magic number (file not in lzip format)\n" ); - return 2; } - break; - } - if( header[4] != 1 ) - { - std::fprintf( stderr, "Version %d member format not supported.\n", - header[4] ); - return 2; - } - unsigned dict_size = 1 << ( header[5] & 0x1F ); - dict_size -= ( dict_size / 16 ) * ( ( header[5] >> 5 ) & 7 ); - if( dict_size < min_dictionary_size || dict_size > max_dictionary_size ) - { std::fprintf( stderr, "Invalid dictionary size in member header\n" ); - return 2; } - - LZ_decoder decoder( dict_size ); - if( !decoder.decode_member() ) - { std::fprintf( stderr, "Data error\n" ); return 2; } - - File_trailer trailer; - for( int i = 0; i < 20; ++i ) trailer[i] = std::getc( stdin ); - unsigned crc = 0; - for( int i = 3; i >= 0; --i ) { crc <<= 8; crc += trailer[i]; } - unsigned long long data_size = 0; - for( int i = 11; i >= 4; --i ) { data_size <<= 8; data_size += trailer[i]; } - if( crc != decoder.crc() || data_size != decoder.data_position() ) - { std::fprintf( stderr, "CRC error\n" ); return 2; } - } - - if( std::fclose( stdout ) != 0 ) - { std::fprintf( stderr, "Can't close stdout: %s\n", std::strerror( errno ) ); - return 1; } - return 0; - } diff --git a/testsuite/check.sh b/testsuite/check.sh index 5fc6d18..a701bcb 100755 --- a/testsuite/check.sh +++ b/testsuite/check.sh @@ -27,6 +27,15 @@ fail=0 printf "testing lzd-%s..." "$2" +"${LZIP}" < "${in_lz}" > /dev/full 2> /dev/null +if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi +"${LZIP}" < "${in}" 2> /dev/null +if [ $? = 2 ] ; then printf . ; else fail=1 ; printf - ; fi +dd if="${in_lz}" bs=1 count=6 2> /dev/null | "${LZIP}" 2> /dev/null +if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi +dd if="${in_lz}" bs=1 count=20 2> /dev/null | "${LZIP}" 2> /dev/null +if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi + "${LZIP}" < "${in_lz}" > copy || fail=1 cmp "${in}" copy || fail=1 printf . |