From def42230e27ddcb133e6778eefa2300cdbf3e95b Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 7 Nov 2015 07:46:02 +0100 Subject: Adding upstream version 0.3. Signed-off-by: Daniel Baumann --- ChangeLog | 5 + INSTALL | 6 +- Makefile.in | 10 +- NEWS | 4 +- README | 2 +- configure | 46 ++++-- decoder.cc | 363 ------------------------------------------ lzd.cc | 459 +++++++++++++++++++++++++++++++++++++++++++++++++++++ main.cc | 115 -------------- testsuite/check.sh | 9 ++ 10 files changed, 513 insertions(+), 506 deletions(-) delete mode 100644 decoder.cc create mode 100644 lzd.cc delete mode 100644 main.cc diff --git a/ChangeLog b/ChangeLog index c3be740..0f516f1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +2013-07-24 Antonio Diaz Diaz + + * Version 0.3 released. + * decoder.cc and main.cc have been merged into lzd.cc. + 2013-05-06 Antonio Diaz Diaz * Version 0.2 released. diff --git a/INSTALL b/INSTALL index 09ac834..61635f7 100644 --- a/INSTALL +++ b/INSTALL @@ -1,7 +1,7 @@ Requirements ------------ You will need a C++ compiler. -I use gcc 4.8.0 and 3.3.6, but the code should compile with any +I use gcc 4.8.1 and 3.3.6, but the code should compile with any standards compliant compiler. Gcc is available at http://gcc.gnu.org. @@ -10,9 +10,9 @@ Procedure --------- 1. Unpack the archive if you have not done so already: - lzip -cd lzd[version].tar.lz | tar -xf - + tar -xf lzd[version].tar.lz or - gzip -cd lzd[version].tar.gz | tar -xf - + lzip -cd lzd[version].tar.lz | tar -xf - This creates the directory ./lzd[version] containing the source from the main archive. diff --git a/Makefile.in b/Makefile.in index d313f51..9a4b5ec 100644 --- a/Makefile.in +++ b/Makefile.in @@ -6,7 +6,7 @@ INSTALL_DATA = $(INSTALL) -p -m 644 INSTALL_DIR = $(INSTALL) -d -m 755 SHELL = /bin/sh -objs = main.o +objs = lzd.o .PHONY : all install install-bin install-info install-man install-strip \ @@ -21,14 +21,10 @@ $(progname) : $(objs) $(progname)_profiled : $(objs) $(CXX) $(LDFLAGS) -pg -o $@ $(objs) -main.o : main.cc - $(CXX) $(CPPFLAGS) $(CXXFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $< - %.o : %.cc - $(CXX) $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $< + $(CXX) $(CPPFLAGS) $(CXXFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $< -$(objs) : Makefile -main.o : decoder.cc +$(objs) : Makefile doc : diff --git a/NEWS b/NEWS index 4caf5f7..2c8be2e 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,3 @@ -Changes in version 0.2: +Changes in version 0.3: -Added a missing "#include" for OS/2. +All the code is now contained in a single file (lzd.cc). diff --git a/README b/README index ed832ca..fcfe639 100644 --- a/README +++ b/README @@ -1,6 +1,6 @@ Description -Lzd is a very simplified decompressor for lzip files with an educational +Lzd is a simplified decompressor for lzip files with an educational purpose. Studying its source is a good first step to understand how lzip works. It is not safe to use lzd for any real work. diff --git a/configure b/configure index 7b3f916..98acee7 100755 --- a/configure +++ b/configure @@ -5,12 +5,10 @@ # This configure script is free software: you have unlimited permission # to copy, distribute and modify it. -args= -no_create= pkgname=lzd -pkgversion=0.2 +pkgversion=0.3 progname=lzd -srctrigger=decoder.cc +srctrigger=lzd.cc # clear some things potentially inherited from environment. LC_ALL=C @@ -28,18 +26,19 @@ CXXFLAGS='-Wall -W -O2' LDFLAGS= # checking whether we are using GNU C++. -if [ ! -x /bin/g++ ] && - [ ! -x /usr/bin/g++ ] && - [ ! -x /usr/local/bin/g++ ] ; then +${CXX} --version > /dev/null 2>&1 +if [ $? != 0 ] ; then CXX=c++ CXXFLAGS='-W -O2' fi # Loop over all args -while [ -n "$1" ] ; do +args= +no_create= +while [ $# != 0 ] ; do # Get the first arg, and shuffle - option=$1 + option=$1 ; arg2=no shift # Add the argument quoted to args @@ -74,6 +73,14 @@ while [ -n "$1" ] ; do --version | -V) echo "Configure script for ${pkgname} version ${pkgversion}" exit 0 ;; + --srcdir) srcdir=$1 ; arg2=yes ;; + --prefix) prefix=$1 ; arg2=yes ;; + --exec-prefix) exec_prefix=$1 ; arg2=yes ;; + --bindir) bindir=$1 ; arg2=yes ;; + --datarootdir) datarootdir=$1 ; arg2=yes ;; + --infodir) infodir=$1 ; arg2=yes ;; + --mandir) mandir=$1 ; arg2=yes ;; + --srcdir=*) srcdir=${optarg} ;; --prefix=*) prefix=${optarg} ;; --exec-prefix=*) exec_prefix=${optarg} ;; @@ -88,11 +95,22 @@ while [ -n "$1" ] ; do CXXFLAGS=*) CXXFLAGS=${optarg} ;; LDFLAGS=*) LDFLAGS=${optarg} ;; - --* | *=* | *-*-*) ;; + --*) + echo "configure: WARNING: unrecognized option: '${option}'" 1>&2 ;; + *=* | *-*-*) ;; *) - echo "configure: Unrecognized option: \"${option}\"; use --help for usage." 1>&2 + echo "configure: unrecognized option: '${option}'" 1>&2 + echo "Try 'configure --help' for more information." 1>&2 exit 1 ;; esac + + # Check if the option took a separate argument + if [ "${arg2}" = yes ] ; then + if [ $# != 0 ] ; then args="${args} \"$1\"" ; shift + else echo "configure: Missing argument to '${option}'" 1>&2 + exit 1 + fi + fi done # Find the source files, if location was not specified. @@ -107,10 +125,8 @@ if [ -z "${srcdir}" ] ; then fi if [ ! -r "${srcdir}/${srctrigger}" ] ; then - exec 1>&2 - echo - echo "configure: Can't find sources in ${srcdir} ${srcdirtext}" - echo "configure: (At least ${srctrigger} is missing)." + echo "configure: Can't find sources in ${srcdir} ${srcdirtext}" 1>&2 + echo "configure: (At least ${srctrigger} is missing)." 1>&2 exit 1 fi diff --git a/decoder.cc b/decoder.cc deleted file mode 100644 index fbfcdb3..0000000 --- a/decoder.cc +++ /dev/null @@ -1,363 +0,0 @@ -/* Lzd - Educational decompressor for lzip files - Copyright (C) 2013 Antonio Diaz Diaz. - - This program is free software: you have unlimited permission - to copy, distribute and modify it. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -*/ - -class State - { - int st; - -public: - enum { states = 12 }; - State() : st( 0 ) {} - int operator()() const { return st; } - bool is_char() const { return st < 7; } - - void set_char() - { - static const int next[states] = { 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5 }; - st = next[st]; - } - - void set_match() { st = ( ( st < 7 ) ? 7 : 10 ); } - void set_rep() { st = ( ( st < 7 ) ? 8 : 11 ); } - void set_short_rep() { st = ( ( st < 7 ) ? 9 : 11 ); } - }; - - -enum { - literal_context_bits = 3, - pos_state_bits = 2, - pos_states = 1 << pos_state_bits, - pos_state_mask = pos_states - 1, - - max_dis_states = 4, - dis_slot_bits = 6, - start_dis_model = 4, - end_dis_model = 14, - modeled_distances = 1 << (end_dis_model / 2), // 128 - dis_align_bits = 4, - dis_align_size = 1 << dis_align_bits, - - len_low_bits = 3, - len_mid_bits = 3, - len_high_bits = 8, - len_low_symbols = 1 << len_low_bits, - len_mid_symbols = 1 << len_mid_bits, - len_high_symbols = 1 << len_high_bits, - max_len_symbols = len_low_symbols + len_mid_symbols + len_high_symbols, - min_match_len = 2, // must be 2 - - bit_model_move_bits = 5, - bit_model_total_bits = 11, - bit_model_total = 1 << bit_model_total_bits }; - - -struct Bit_model - { - int probability; - Bit_model() : probability( bit_model_total / 2 ) {} - }; - -struct Len_model - { - Bit_model choice1; - Bit_model choice2; - Bit_model bm_low[pos_states][len_low_symbols]; - Bit_model bm_mid[pos_states][len_mid_symbols]; - Bit_model bm_high[len_high_symbols]; - }; - - -class Range_decoder - { - uint32_t code; - uint32_t range; - -public: - Range_decoder() : code( 0 ), range( 0xFFFFFFFFU ) - { - for( int i = 0; i < 5; ++i ) code = (code << 8) | std::getc( stdin ); - } - - int decode( const int num_bits ) - { - int symbol = 0; - for( int i = 0; i < num_bits; ++i ) - { - range >>= 1; - symbol <<= 1; - if( code >= range ) { code -= range; symbol |= 1; } - if( range <= 0x00FFFFFFU ) // normalize - { range <<= 8; code = (code << 8) | std::getc( stdin ); } - } - return symbol; - } - - int decode_bit( Bit_model & bm ) - { - int symbol; - const uint32_t bound = ( range >> bit_model_total_bits ) * bm.probability; - if( code < bound ) - { - range = bound; - bm.probability += (bit_model_total - bm.probability) >> bit_model_move_bits; - symbol = 0; - } - else - { - range -= bound; - code -= bound; - bm.probability -= bm.probability >> bit_model_move_bits; - symbol = 1; - } - if( range <= 0x00FFFFFFU ) // normalize - { range <<= 8; code = (code << 8) | std::getc( stdin ); } - return symbol; - } - - int decode_tree( Bit_model bm[], const int num_bits ) - { - int symbol = 1; - for( int i = 0; i < num_bits; ++i ) - symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); - return symbol - (1 << num_bits); - } - - int decode_tree_reversed( Bit_model bm[], const int num_bits ) - { - int symbol = decode_tree( bm, num_bits ); - int reversed_symbol = 0; - for( int i = 0; i < num_bits; ++i ) - { - reversed_symbol = ( reversed_symbol << 1 ) | ( symbol & 1 ); - symbol >>= 1; - } - return reversed_symbol; - } - - int decode_matched( Bit_model bm[], const int match_byte ) - { - Bit_model * const bm1 = bm + 0x100; - int symbol = 1; - for( int i = 7; i >= 0; --i ) - { - const int match_bit = ( match_byte >> i ) & 1; - const int bit = decode_bit( bm1[(match_bit<<8)+symbol] ); - symbol = ( symbol << 1 ) | bit; - if( match_bit != bit ) - { - while( symbol < 0x100 ) - symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); - break; - } - } - return symbol - 0x100; - } - - int decode_len( Len_model & lm, const int pos_state ) - { - if( decode_bit( lm.choice1 ) == 0 ) - return min_match_len + - decode_tree( lm.bm_low[pos_state], len_low_bits ); - if( decode_bit( lm.choice2 ) == 0 ) - return min_match_len + len_low_symbols + - decode_tree( lm.bm_mid[pos_state], len_mid_bits ); - return min_match_len + len_low_symbols + len_mid_symbols + - decode_tree( lm.bm_high, len_high_bits ); - } - }; - - -class LZ_decoder - { - unsigned long long partial_data_pos; - Range_decoder rdec; - const unsigned dictionary_size; - uint8_t * const buffer; // output buffer - unsigned pos; // current pos in buffer - unsigned stream_pos; // first byte not yet written to stdout - uint32_t crc_; - - void flush_data(); - - uint8_t get_byte( const unsigned distance ) const - { - int i = pos - distance - 1; - if( i < 0 ) i += dictionary_size; - return buffer[i]; - } - - void put_byte( const uint8_t b ) - { - buffer[pos] = b; - if( ++pos >= dictionary_size ) flush_data(); - } - -public: - LZ_decoder( const unsigned dict_size ) - : - partial_data_pos( 0 ), - dictionary_size( dict_size ), - buffer( new uint8_t[dictionary_size] ), - pos( 0 ), - stream_pos( 0 ), - crc_( 0xFFFFFFFFU ) - { buffer[dictionary_size-1] = 0; } // prev_byte of first_byte - - ~LZ_decoder() { delete[] buffer; } - - unsigned crc() const { return crc_ ^ 0xFFFFFFFFU; } - unsigned long long data_position() const { return partial_data_pos + pos; } - - bool decode_member(); - }; - - -class CRC32 - { - uint32_t data[256]; // Table of CRCs of all 8-bit messages. - -public: - CRC32() - { - for( unsigned n = 0; n < 256; ++n ) - { - unsigned c = n; - for( int k = 0; k < 8; ++k ) - { if( c & 1 ) c = 0xEDB88320U ^ ( c >> 1 ); else c >>= 1; } - data[n] = c; - } - } - - void update( uint32_t & crc, const uint8_t * buffer, const int size ) const - { - for( int i = 0; i < size; ++i ) - crc = data[(crc^buffer[i])&0xFF] ^ ( crc >> 8 ); - } - }; - -const CRC32 crc32; - - -void LZ_decoder::flush_data() - { - if( pos > stream_pos ) - { - const unsigned size = pos - stream_pos; - crc32.update( crc_, buffer + stream_pos, size ); - errno = 0; - if( std::fwrite( buffer + stream_pos, 1, size, stdout ) != size ) - { std::fprintf( stderr, "Write error: %s\n", std::strerror( errno ) ); - std::exit( 1 ); } - if( pos >= dictionary_size ) { partial_data_pos += pos; pos = 0; } - stream_pos = pos; - } - } - - -bool LZ_decoder::decode_member() // Returns false if error - { - Bit_model bm_literal[1<> ( 8 - literal_context_bits ); - Bit_model * const bm = bm_literal[literal_state]; - if( state.is_char() ) - put_byte( rdec.decode_tree( bm, 8 ) ); - else - put_byte( rdec.decode_matched( bm, get_byte( rep0 ) ) ); - state.set_char(); - } - else - { - int len; - if( rdec.decode_bit( bm_rep[state()] ) == 1 ) // 2nd bit - { - if( rdec.decode_bit( bm_rep0[state()] ) == 0 ) // 3rd bit - { - if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit - { state.set_short_rep(); put_byte( get_byte( rep0 ) ); continue; } - } - else - { - unsigned distance; - if( rdec.decode_bit( bm_rep1[state()] ) == 0 ) // 4th bit - distance = rep1; - else - { - if( rdec.decode_bit( bm_rep2[state()] ) == 0 ) // 5th bit - distance = rep2; - else - { distance = rep3; rep3 = rep2; } - rep2 = rep1; - } - rep1 = rep0; - rep0 = distance; - } - len = rdec.decode_len( rep_len_model, pos_state ); - state.set_rep(); - } - else - { - rep3 = rep2; rep2 = rep1; rep1 = rep0; - len = rdec.decode_len( match_len_model, pos_state ); - const int dis_state = std::min( len - min_match_len, max_dis_states - 1 ); - const int dis_slot = - rdec.decode_tree( bm_dis_slot[dis_state], dis_slot_bits ); - if( dis_slot < start_dis_model ) rep0 = dis_slot; - else - { - const int direct_bits = ( dis_slot >> 1 ) - 1; - rep0 = ( 2 | ( dis_slot & 1 ) ) << direct_bits; - if( dis_slot < end_dis_model ) - rep0 += rdec.decode_tree_reversed( bm_dis + rep0 - dis_slot - 1, - direct_bits ); - else - { - rep0 += rdec.decode( direct_bits - dis_align_bits ) << dis_align_bits; - rep0 += rdec.decode_tree_reversed( bm_align, dis_align_bits ); - if( rep0 == 0xFFFFFFFFU ) // Marker found - { - flush_data(); - return ( len == min_match_len ); // End Of Stream marker - } - } - } - state.set_match(); - if( rep0 >= dictionary_size || ( rep0 >= pos && !partial_data_pos ) ) - return false; - } - for( int i = 0; i < len; ++i ) - put_byte( get_byte( rep0 ) ); - } - } - return false; - } diff --git a/lzd.cc b/lzd.cc new file mode 100644 index 0000000..0ac7b64 --- /dev/null +++ b/lzd.cc @@ -0,0 +1,459 @@ +/* Lzd - Educational decompressor for lzip files + Copyright (C) 2013 Antonio Diaz Diaz. + + This program is free software: you have unlimited permission + to copy, distribute and modify it. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ +/* + Exit status: 0 for a normal exit, 1 for environmental problems + (file not found, invalid flags, I/O errors, etc), 2 to indicate a + corrupt or invalid input file. +*/ + +#include +#include +#include +#include +#include +#include +#include +#if defined(__MSVCRT__) || defined(__OS2__) +#include +#include +#endif + + +class State + { + int st; + +public: + enum { states = 12 }; + State() : st( 0 ) {} + int operator()() const { return st; } + bool is_char() const { return st < 7; } + + void set_char() + { + static const int next[states] = { 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5 }; + st = next[st]; + } + void set_match() { st = ( st < 7 ) ? 7 : 10; } + void set_rep() { st = ( st < 7 ) ? 8 : 11; } + void set_short_rep() { st = ( st < 7 ) ? 9 : 11; } + }; + + +enum { + min_dictionary_size = 1 << 12, + max_dictionary_size = 1 << 29, + literal_context_bits = 3, + pos_state_bits = 2, + pos_states = 1 << pos_state_bits, + pos_state_mask = pos_states - 1, + + dis_slot_bits = 6, + start_dis_model = 4, + end_dis_model = 14, + modeled_distances = 1 << (end_dis_model / 2), // 128 + dis_align_bits = 4, + dis_align_size = 1 << dis_align_bits, + + len_low_bits = 3, + len_mid_bits = 3, + len_high_bits = 8, + len_low_symbols = 1 << len_low_bits, + len_mid_symbols = 1 << len_mid_bits, + len_high_symbols = 1 << len_high_bits, + max_len_symbols = len_low_symbols + len_mid_symbols + len_high_symbols, + + min_match_len = 2, // must be 2 + max_dis_states = 4, + + bit_model_move_bits = 5, + bit_model_total_bits = 11, + bit_model_total = 1 << bit_model_total_bits }; + +struct Bit_model + { + int probability; + Bit_model() : probability( bit_model_total / 2 ) {} + }; + +struct Len_model + { + Bit_model choice1; + Bit_model choice2; + Bit_model bm_low[pos_states][len_low_symbols]; + Bit_model bm_mid[pos_states][len_mid_symbols]; + Bit_model bm_high[len_high_symbols]; + }; + + +class CRC32 + { + uint32_t data[256]; // Table of CRCs of all 8-bit messages. + +public: + CRC32() + { + for( unsigned n = 0; n < 256; ++n ) + { + unsigned c = n; + for( int k = 0; k < 8; ++k ) + { if( c & 1 ) c = 0xEDB88320U ^ ( c >> 1 ); else c >>= 1; } + data[n] = c; + } + } + + void update( uint32_t & crc, const uint8_t * const buffer, const int size ) const + { + for( int i = 0; i < size; ++i ) + crc = data[(crc^buffer[i])&0xFF] ^ ( crc >> 8 ); + } + }; + +const CRC32 crc32; + + +typedef uint8_t File_header[6]; // 0-3 magic, 4 version, 5 coded_dict_size + +typedef uint8_t File_trailer[20]; + // 0-3 CRC32 of the uncompressed data + // 4-11 size of the uncompressed data + // 12-19 member size including header and trailer + +class Range_decoder + { + uint32_t code; + uint32_t range; + +public: + Range_decoder() : code( 0 ), range( 0xFFFFFFFFU ) + { + for( int i = 0; i < 5; ++i ) code = (code << 8) | get_byte(); + } + + uint8_t get_byte() { return std::getc( stdin ); } + + int decode( const int num_bits ) + { + int symbol = 0; + for( int i = 0; i < num_bits; ++i ) + { + range >>= 1; + symbol <<= 1; + if( code >= range ) { code -= range; symbol |= 1; } + if( range <= 0x00FFFFFFU ) // normalize + { range <<= 8; code = (code << 8) | get_byte(); } + } + return symbol; + } + + int decode_bit( Bit_model & bm ) + { + int symbol; + const uint32_t bound = ( range >> bit_model_total_bits ) * bm.probability; + if( code < bound ) + { + range = bound; + bm.probability += (bit_model_total - bm.probability) >> bit_model_move_bits; + symbol = 0; + } + else + { + range -= bound; + code -= bound; + bm.probability -= bm.probability >> bit_model_move_bits; + symbol = 1; + } + if( range <= 0x00FFFFFFU ) // normalize + { range <<= 8; code = (code << 8) | get_byte(); } + return symbol; + } + + int decode_tree( Bit_model bm[], const int num_bits ) + { + int symbol = 1; + for( int i = 0; i < num_bits; ++i ) + symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); + return symbol - (1 << num_bits); + } + + int decode_tree_reversed( Bit_model bm[], const int num_bits ) + { + int symbol = decode_tree( bm, num_bits ); + int reversed_symbol = 0; + for( int i = 0; i < num_bits; ++i ) + { + reversed_symbol = ( reversed_symbol << 1 ) | ( symbol & 1 ); + symbol >>= 1; + } + return reversed_symbol; + } + + int decode_matched( Bit_model bm[], const int match_byte ) + { + Bit_model * const bm1 = bm + 0x100; + int symbol = 1; + for( int i = 7; i >= 0; --i ) + { + const int match_bit = ( match_byte >> i ) & 1; + const int bit = decode_bit( bm1[(match_bit<<8)+symbol] ); + symbol = ( symbol << 1 ) | bit; + if( match_bit != bit ) + { + while( symbol < 0x100 ) + symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); + break; + } + } + return symbol - 0x100; + } + + int decode_len( Len_model & lm, const int pos_state ) + { + if( decode_bit( lm.choice1 ) == 0 ) + return decode_tree( lm.bm_low[pos_state], len_low_bits ); + if( decode_bit( lm.choice2 ) == 0 ) + return len_low_symbols + + decode_tree( lm.bm_mid[pos_state], len_mid_bits ); + return len_low_symbols + len_mid_symbols + + decode_tree( lm.bm_high, len_high_bits ); + } + }; + + +class LZ_decoder + { + unsigned long long partial_data_pos; + Range_decoder rdec; + const unsigned dictionary_size; + uint8_t * const buffer; // output buffer + unsigned pos; // current pos in buffer + unsigned stream_pos; // first byte not yet written to stdout + uint32_t crc_; + + void flush_data(); + + uint8_t get_byte( const unsigned distance ) const + { + unsigned i = pos - distance - 1; + if( pos <= distance ) i += dictionary_size; + return buffer[i]; + } + + void put_byte( const uint8_t b ) + { + buffer[pos] = b; + if( ++pos >= dictionary_size ) flush_data(); + } + +public: + LZ_decoder( const unsigned dict_size ) + : + partial_data_pos( 0 ), + dictionary_size( dict_size ), + buffer( new uint8_t[dictionary_size] ), + pos( 0 ), + stream_pos( 0 ), + crc_( 0xFFFFFFFFU ) + { buffer[dictionary_size-1] = 0; } // prev_byte of first_byte + + ~LZ_decoder() { delete[] buffer; } + + unsigned crc() const { return crc_ ^ 0xFFFFFFFFU; } + unsigned long long data_position() const { return partial_data_pos + pos; } + + bool decode_member(); + }; + + +void LZ_decoder::flush_data() + { + if( pos > stream_pos ) + { + const unsigned size = pos - stream_pos; + crc32.update( crc_, buffer + stream_pos, size ); + errno = 0; + if( std::fwrite( buffer + stream_pos, 1, size, stdout ) != size ) + { std::fprintf( stderr, "Write error: %s\n", std::strerror( errno ) ); + std::exit( 1 ); } + if( pos >= dictionary_size ) { partial_data_pos += pos; pos = 0; } + stream_pos = pos; + } + } + + +bool LZ_decoder::decode_member() // Returns false if error + { + Bit_model bm_literal[1<> ( 8 - literal_context_bits ); + Bit_model * const bm = bm_literal[literal_state]; + if( state.is_char() ) + put_byte( rdec.decode_tree( bm, 8 ) ); + else + put_byte( rdec.decode_matched( bm, get_byte( rep0 ) ) ); + state.set_char(); + } + else + { + int len; + if( rdec.decode_bit( bm_rep[state()] ) == 1 ) // 2nd bit + { + if( rdec.decode_bit( bm_rep0[state()] ) == 0 ) // 3rd bit + { + if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit + { state.set_short_rep(); put_byte( get_byte( rep0 ) ); continue; } + } + else + { + unsigned distance; + if( rdec.decode_bit( bm_rep1[state()] ) == 0 ) // 4th bit + distance = rep1; + else + { + if( rdec.decode_bit( bm_rep2[state()] ) == 0 ) // 5th bit + distance = rep2; + else + { distance = rep3; rep3 = rep2; } + rep2 = rep1; + } + rep1 = rep0; + rep0 = distance; + } + len = min_match_len + rdec.decode_len( rep_len_model, pos_state ); + state.set_rep(); + } + else + { + rep3 = rep2; rep2 = rep1; rep1 = rep0; + len = min_match_len + rdec.decode_len( match_len_model, pos_state ); + const int dis_state = std::min( len - min_match_len, max_dis_states - 1 ); + const int dis_slot = + rdec.decode_tree( bm_dis_slot[dis_state], dis_slot_bits ); + if( dis_slot < start_dis_model ) rep0 = dis_slot; + else + { + const int direct_bits = ( dis_slot >> 1 ) - 1; + rep0 = ( 2 | ( dis_slot & 1 ) ) << direct_bits; + if( dis_slot < end_dis_model ) + rep0 += rdec.decode_tree_reversed( bm_dis + rep0 - dis_slot - 1, + direct_bits ); + else + { + rep0 += rdec.decode( direct_bits - dis_align_bits ) << dis_align_bits; + rep0 += rdec.decode_tree_reversed( bm_align, dis_align_bits ); + if( rep0 == 0xFFFFFFFFU ) // Marker found + { + flush_data(); + return ( len == min_match_len ); // End Of Stream marker + } + } + } + state.set_match(); + if( rep0 >= dictionary_size || ( rep0 >= pos && !partial_data_pos ) ) + return false; + } + for( int i = 0; i < len; ++i ) + put_byte( get_byte( rep0 ) ); + } + } + return false; + } + + +int main( const int argc, const char * const argv[] ) + { + if( argc > 1 ) + { + std::printf( "Lzd %s - Educational decompressor for lzip files.\n", + PROGVERSION ); + std::printf( "Study the source to learn how a lzip decompressor works.\n" + "See the lzip manual for an explanation of the code.\n" + "It is not safe to use lzd for any real work.\n" + "\nUsage: %s < file.lz > file\n", argv[0] ); + std::printf( "Lzd decompresses from standard input to standard output.\n" + "\nCopyright (C) 2013 Antonio Diaz Diaz.\n" + "This is free software: you are free to change and redistribute it.\n" + "There is NO WARRANTY, to the extent permitted by law.\n" + "Report bugs to lzip-bug@nongnu.org\n" + "Lzip home page: http://www.nongnu.org/lzip/lzip.html\n" ); + return 0; + } + +#if defined(__MSVCRT__) || defined(__OS2__) + setmode( STDIN_FILENO, O_BINARY ); + setmode( STDOUT_FILENO, O_BINARY ); +#endif + + for( bool first_member = true; ; first_member = false ) + { + File_header header; + for( int i = 0; i < 6; ++i ) + header[i] = std::getc( stdin ); + if( std::feof( stdin ) || std::memcmp( header, "LZIP", 4 ) != 0 ) + { + if( first_member ) + { std::fprintf( stderr, "Bad magic number (file not in lzip format)\n" ); + return 2; } + break; + } + if( header[4] != 1 ) + { + std::fprintf( stderr, "Version %d member format not supported.\n", + header[4] ); + return 2; + } + unsigned dict_size = 1 << ( header[5] & 0x1F ); + dict_size -= ( dict_size / 16 ) * ( ( header[5] >> 5 ) & 7 ); + if( dict_size < min_dictionary_size || dict_size > max_dictionary_size ) + { std::fprintf( stderr, "Invalid dictionary size in member header\n" ); + return 2; } + + LZ_decoder decoder( dict_size ); + if( !decoder.decode_member() ) + { std::fprintf( stderr, "Data error\n" ); return 2; } + + File_trailer trailer; + for( int i = 0; i < 20; ++i ) trailer[i] = std::getc( stdin ); + unsigned crc = 0; + for( int i = 3; i >= 0; --i ) { crc <<= 8; crc += trailer[i]; } + unsigned long long data_size = 0; + for( int i = 11; i >= 4; --i ) { data_size <<= 8; data_size += trailer[i]; } + if( crc != decoder.crc() || data_size != decoder.data_position() ) + { std::fprintf( stderr, "CRC error\n" ); return 2; } + } + + if( std::fclose( stdout ) != 0 ) + { std::fprintf( stderr, "Can't close stdout: %s\n", std::strerror( errno ) ); + return 1; } + return 0; + } diff --git a/main.cc b/main.cc deleted file mode 100644 index bba5c6a..0000000 --- a/main.cc +++ /dev/null @@ -1,115 +0,0 @@ -/* Lzd - Educational decompressor for lzip files - Copyright (C) 2013 Antonio Diaz Diaz. - - This program is free software: you have unlimited permission - to copy, distribute and modify it. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -*/ -/* - Exit status: 0 for a normal exit, 1 for environmental problems - (file not found, invalid flags, I/O errors, etc), 2 to indicate a - corrupt or invalid input file. -*/ - -#include -#include -#include -#include -#include -#include -#include -#if defined(__MSVCRT__) || defined(__OS2__) -#include -#include -#endif - -#include "decoder.cc" - - -enum { min_dictionary_size = 1 << 12, - max_dictionary_size = 1 << 29 }; - -typedef uint8_t File_header[6]; // 0-3 magic, 4 version, 5 coded_dict_size - -typedef uint8_t File_trailer[20]; - // 0-3 CRC32 of the uncompressed data - // 4-11 size of the uncompressed data - // 12-19 member size including header and trailer - - -int main( const int argc, const char * const argv[] ) - { - if( argc > 1 ) - { - std::printf( "Lzd %s - Educational decompressor for lzip files.\n", - PROGVERSION ); - std::printf( "Study the source to learn how a simple lzip decompressor works.\n" - "It is not safe to use it for any real work.\n" - "\nUsage: %s < file.lz > file\n", argv[0] ); - std::printf( "Lzd decompresses from standard input to standard output.\n" - "\nCopyright (C) 2013 Antonio Diaz Diaz.\n" - "This is free software: you are free to change and redistribute it.\n" - "There is NO WARRANTY, to the extent permitted by law.\n" - "Report bugs to lzip-bug@nongnu.org\n" - "Lzip home page: http://www.nongnu.org/lzip/lzip.html\n" ); - return 0; - } - -#if defined(__MSVCRT__) || defined(__OS2__) - setmode( STDIN_FILENO, O_BINARY ); - setmode( STDOUT_FILENO, O_BINARY ); -#endif - - if( isatty( STDIN_FILENO ) ) - { - std::fprintf( stderr, "I won't read compressed data from a terminal.\n" - "Try '%s --help' for more information.\n", argv[0] ); - return 1; - } - - for( bool first_member = true; ; first_member = false ) - { - File_header header; - for( int i = 0; i < 6; ++i ) - header[i] = std::getc( stdin ); - if( std::feof( stdin ) || std::memcmp( header, "LZIP", 4 ) != 0 ) - { - if( first_member ) - { std::fprintf( stderr, "Bad magic number (file not in lzip format)\n" ); - return 2; } - break; - } - if( header[4] != 1 ) - { - std::fprintf( stderr, "Version %d member format not supported.\n", - header[4] ); - return 2; - } - unsigned dict_size = 1 << ( header[5] & 0x1F ); - dict_size -= ( dict_size / 16 ) * ( ( header[5] >> 5 ) & 7 ); - if( dict_size < min_dictionary_size || dict_size > max_dictionary_size ) - { std::fprintf( stderr, "Invalid dictionary size in member header\n" ); - return 2; } - - LZ_decoder decoder( dict_size ); - if( !decoder.decode_member() ) - { std::fprintf( stderr, "Data error\n" ); return 2; } - - File_trailer trailer; - for( int i = 0; i < 20; ++i ) trailer[i] = std::getc( stdin ); - unsigned crc = 0; - for( int i = 3; i >= 0; --i ) { crc <<= 8; crc += trailer[i]; } - unsigned long long data_size = 0; - for( int i = 11; i >= 4; --i ) { data_size <<= 8; data_size += trailer[i]; } - if( crc != decoder.crc() || data_size != decoder.data_position() ) - { std::fprintf( stderr, "CRC error\n" ); return 2; } - } - - if( std::fclose( stdout ) != 0 ) - { std::fprintf( stderr, "Can't close stdout: %s\n", std::strerror( errno ) ); - return 1; } - return 0; - } diff --git a/testsuite/check.sh b/testsuite/check.sh index 5fc6d18..a701bcb 100755 --- a/testsuite/check.sh +++ b/testsuite/check.sh @@ -27,6 +27,15 @@ fail=0 printf "testing lzd-%s..." "$2" +"${LZIP}" < "${in_lz}" > /dev/full 2> /dev/null +if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi +"${LZIP}" < "${in}" 2> /dev/null +if [ $? = 2 ] ; then printf . ; else fail=1 ; printf - ; fi +dd if="${in_lz}" bs=1 count=6 2> /dev/null | "${LZIP}" 2> /dev/null +if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi +dd if="${in_lz}" bs=1 count=20 2> /dev/null | "${LZIP}" 2> /dev/null +if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi + "${LZIP}" < "${in_lz}" > copy || fail=1 cmp "${in}" copy || fail=1 printf . -- cgit v1.2.3