diff options
-rw-r--r-- | COPYING | 17 | ||||
-rw-r--r-- | ChangeLog | 8 | ||||
-rw-r--r-- | INSTALL | 2 | ||||
-rw-r--r-- | Makefile.in | 5 | ||||
-rw-r--r-- | NEWS | 7 | ||||
-rw-r--r-- | README | 12 | ||||
-rwxr-xr-x | configure | 21 | ||||
-rw-r--r-- | lzd.cc | 78 | ||||
-rwxr-xr-x | testsuite/check.sh | 48 | ||||
-rw-r--r-- | testsuite/test.txt.lz | bin | 7376 -> 7376 bytes |
10 files changed, 124 insertions, 74 deletions
@@ -0,0 +1,17 @@ + Lzd - Educational decompressor for the lzip format + Copyright (C) Antonio Diaz Diaz. + + This program is free software. Redistribution and use in source and + binary forms, with or without modification, are permitted provided + that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. @@ -1,3 +1,9 @@ +2017-05-02 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 1.0 released. + * lzd.cc: Minor code improvements. + * testsuite/check.sh: A POSIX shell is required to run the tests. + 2016-05-10 Antonio Diaz Diaz <antonio@gnu.org> * Version 0.9 released. @@ -43,7 +49,7 @@ * Version 0.1 released. -Copyright (C) 2013-2016 Antonio Diaz Diaz. +Copyright (C) 2013-2017 Antonio Diaz Diaz. This file is a collection of facts, and thus it is not copyrightable, but just in case, you have unlimited permission to copy, distribute and @@ -50,7 +50,7 @@ After running 'configure', you can run 'make' and 'make install' as explained above. -Copyright (C) 2013-2016 Antonio Diaz Diaz. +Copyright (C) 2013-2017 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute and modify it. diff --git a/Makefile.in b/Makefile.in index 42cdcc0..471ff4b 100644 --- a/Makefile.in +++ b/Makefile.in @@ -95,16 +95,17 @@ dist : doc ln -sf $(VPATH) $(DISTNAME) tar -Hustar --owner=root --group=root -cvf $(DISTNAME).tar \ $(DISTNAME)/AUTHORS \ + $(DISTNAME)/COPYING \ $(DISTNAME)/ChangeLog \ $(DISTNAME)/INSTALL \ $(DISTNAME)/Makefile.in \ $(DISTNAME)/NEWS \ $(DISTNAME)/README \ $(DISTNAME)/configure \ + $(DISTNAME)/*.cc \ $(DISTNAME)/testsuite/check.sh \ $(DISTNAME)/testsuite/test.txt \ - $(DISTNAME)/testsuite/test.txt.lz \ - $(DISTNAME)/*.cc + $(DISTNAME)/testsuite/test.txt.lz rm -f $(DISTNAME) lzip -v -9 $(DISTNAME).tar @@ -1,4 +1,5 @@ -Changes in version 0.9: +Changes in version 1.0: -A configure warning happening on some shells when testing for g++ has -been fixed. +Minor code improvements have been made. + +The tests have been improved. @@ -24,11 +24,11 @@ availability: merging of damaged copies of a file. * The lzip format is as simple as possible (but not simpler). The - lzip manual provides the code of a simple decompressor along with a - detailed explanation of how it works, so that with the only help of - the lzip manual it would be possible for a digital archaeologist to - extract the data from a lzip file long after quantum computers - eventually render LZMA obsolete. + lzip manual provides the source code of a simple decompressor along + with a detailed explanation of how it works, so that with the only + help of the lzip manual it would be possible for a digital + archaeologist to extract the data from a lzip file long after + quantum computers eventually render LZMA obsolete. * Additionally the lzip reference implementation is copylefted, which guarantees that it will remain free forever. @@ -45,7 +45,7 @@ range encoding), and Igor Pavlov (for putting all the above together in LZMA). -Copyright (C) 2013-2016 Antonio Diaz Diaz. +Copyright (C) 2013-2017 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute and modify it. @@ -1,12 +1,12 @@ #! /bin/sh # configure script for Lzd - Educational decompressor for the lzip format -# Copyright (C) 2013-2016 Antonio Diaz Diaz. +# Copyright (C) 2013-2017 Antonio Diaz Diaz. # # This configure script is free software: you have unlimited permission # to copy, distribute and modify it. pkgname=lzd -pkgversion=0.9 +pkgversion=1.0 progname=lzd srctrigger=lzd.cc @@ -26,11 +26,11 @@ CXXFLAGS='-Wall -W -O2' LDFLAGS= # checking whether we are using GNU C++. -if /bin/sh -c "${CXX} --version" > /dev/null 2>&1 ; then true -else +/bin/sh -c "${CXX} --version" > /dev/null 2>&1 || + { CXX=c++ - CXXFLAGS='-W -O2' -fi + CXXFLAGS=-O2 + } # Loop over all args args= @@ -52,9 +52,12 @@ while [ $# != 0 ] ; do # Process the options case ${option} in --help | -h) - echo "Usage: configure [options]" + echo "Usage: $0 [OPTION]... [VAR=VALUE]..." + echo + echo "To assign makefile variables (e.g., CXX, CXXFLAGS...), specify them as" + echo "arguments to configure in the form VAR=VALUE." echo - echo "Options: [defaults in brackets]" + echo "Options and variables: [defaults in brackets]" echo " -h, --help display this help and exit" echo " -V, --version output version information and exit" echo " --srcdir=DIR find the sources in DIR [. or ..]" @@ -165,7 +168,7 @@ echo "LDFLAGS = ${LDFLAGS}" rm -f Makefile cat > Makefile << EOF # Makefile for Lzd - Educational decompressor for the lzip format -# Copyright (C) 2013-2016 Antonio Diaz Diaz. +# Copyright (C) 2013-2017 Antonio Diaz Diaz. # This file was generated automatically by configure. Don't edit. # # This Makefile is free software: you have unlimited permission @@ -1,5 +1,5 @@ /* Lzd - Educational decompressor for the lzip format - Copyright (C) 2013-2016 Antonio Diaz Diaz. + Copyright (C) 2013-2017 Antonio Diaz Diaz. This program is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -150,10 +150,10 @@ public: uint8_t get_byte() { return std::getc( stdin ); } - int decode( const int num_bits ) + unsigned decode( const int num_bits ) { - int symbol = 0; - for( int i = 0; i < num_bits; ++i ) + unsigned symbol = 0; + for( int i = num_bits; i > 0; --i ) { range >>= 1; symbol <<= 1; @@ -164,9 +164,9 @@ public: return symbol; } - int decode_bit( Bit_model & bm ) + unsigned decode_bit( Bit_model & bm ) { - int symbol; + unsigned symbol; const uint32_t bound = ( range >> bit_model_total_bits ) * bm.probability; if( code < bound ) { @@ -186,18 +186,18 @@ public: return symbol; } - int decode_tree( Bit_model bm[], const int num_bits ) + unsigned decode_tree( Bit_model bm[], const int num_bits ) { - int symbol = 1; + unsigned symbol = 1; for( int i = 0; i < num_bits; ++i ) symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); return symbol - (1 << num_bits); } - int decode_tree_reversed( Bit_model bm[], const int num_bits ) + unsigned decode_tree_reversed( Bit_model bm[], const int num_bits ) { - int symbol = decode_tree( bm, num_bits ); - int reversed_symbol = 0; + unsigned symbol = decode_tree( bm, num_bits ); + unsigned reversed_symbol = 0; for( int i = 0; i < num_bits; ++i ) { reversed_symbol = ( reversed_symbol << 1 ) | ( symbol & 1 ); @@ -206,14 +206,13 @@ public: return reversed_symbol; } - int decode_matched( Bit_model bm[], const int match_byte ) + unsigned decode_matched( Bit_model bm[], const unsigned match_byte ) { - Bit_model * const bm1 = bm + 0x100; - int symbol = 1; + unsigned symbol = 1; for( int i = 7; i >= 0; --i ) { - const int match_bit = ( match_byte >> i ) & 1; - const int bit = decode_bit( bm1[(match_bit<<8)+symbol] ); + const unsigned match_bit = ( match_byte >> i ) & 1; + const unsigned bit = decode_bit( bm[symbol+(match_bit<<8)+0x100] ); symbol = ( symbol << 1 ) | bit; if( match_bit != bit ) { @@ -225,7 +224,7 @@ public: return symbol & 0xFF; } - int decode_len( Len_model & lm, const int pos_state ) + unsigned decode_len( Len_model & lm, const int pos_state ) { if( decode_bit( lm.choice1 ) == 0 ) return decode_tree( lm.bm_low[pos_state], len_low_bits ); @@ -253,9 +252,9 @@ class LZ_decoder uint8_t peek( const unsigned distance ) const { - unsigned i = pos - distance - 1; - if( pos <= distance ) i += dictionary_size; - return buffer[i]; + if( pos > distance ) return buffer[pos - distance - 1]; + if( pos_wrapped ) return buffer[dictionary_size + pos - distance - 1]; + return 0; // prev_byte of first byte } void put_byte( const uint8_t b ) @@ -274,7 +273,7 @@ public: stream_pos( 0 ), crc_( 0xFFFFFFFFU ), pos_wrapped( false ) - { buffer[dictionary_size-1] = 0; } // prev_byte of first byte + {} ~LZ_decoder() { delete[] buffer; } @@ -312,13 +311,13 @@ bool LZ_decoder::decode_member() // Returns false if error Bit_model bm_rep2[State::states]; Bit_model bm_len[State::states][pos_states]; Bit_model bm_dis_slot[len_states][1<<dis_slot_bits]; - Bit_model bm_dis[modeled_distances-end_dis_model]; + Bit_model bm_dis[modeled_distances-end_dis_model+1]; Bit_model bm_align[dis_align_size]; Len_model match_len_model; Len_model rep_len_model; - unsigned rep0 = 0; // rep[0-3] latest four distances - unsigned rep1 = 0; // used for efficient coding of - unsigned rep2 = 0; // repeated distances + unsigned rep0 = 0; // rep[0-3] latest four distances + unsigned rep1 = 0; // used for efficient coding of + unsigned rep2 = 0; // repeated distances unsigned rep3 = 0; State state; @@ -341,7 +340,12 @@ bool LZ_decoder::decode_member() // Returns false if error int len; if( rdec.decode_bit( bm_rep[state()] ) != 0 ) // 2nd bit { - if( rdec.decode_bit( bm_rep0[state()] ) != 0 ) // 3rd bit + if( rdec.decode_bit( bm_rep0[state()] ) == 0 ) // 3rd bit + { + if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit + { state.set_short_rep(); put_byte( peek( rep0 ) ); continue; } + } + else { unsigned distance; if( rdec.decode_bit( bm_rep1[state()] ) == 0 ) // 4th bit @@ -357,11 +361,6 @@ bool LZ_decoder::decode_member() // Returns false if error rep1 = rep0; rep0 = distance; } - else - { - if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit - { state.set_short_rep(); put_byte( peek( rep0 ) ); continue; } - } state.set_rep(); len = min_match_len + rdec.decode_len( rep_len_model, pos_state ); } @@ -370,15 +369,14 @@ bool LZ_decoder::decode_member() // Returns false if error rep3 = rep2; rep2 = rep1; rep1 = rep0; len = min_match_len + rdec.decode_len( match_len_model, pos_state ); const int len_state = std::min( len - min_match_len, len_states - 1 ); - const int dis_slot = - rdec.decode_tree( bm_dis_slot[len_state], dis_slot_bits ); - if( dis_slot < start_dis_model ) rep0 = dis_slot; - else + rep0 = rdec.decode_tree( bm_dis_slot[len_state], dis_slot_bits ); + if( rep0 >= start_dis_model ) { + const unsigned dis_slot = rep0; const int direct_bits = ( dis_slot >> 1 ) - 1; rep0 = ( 2 | ( dis_slot & 1 ) ) << direct_bits; if( dis_slot < end_dis_model ) - rep0 += rdec.decode_tree_reversed( bm_dis + rep0 - dis_slot - 1, + rep0 += rdec.decode_tree_reversed( bm_dis + ( rep0 - dis_slot ), direct_bits ); else { @@ -414,7 +412,7 @@ int main( const int argc, const char * const argv[] ) "It is not safe to use lzd for any real work.\n" "\nUsage: %s < file.lz > file\n", argv[0] ); std::printf( "Lzd decompresses from standard input to standard output.\n" - "\nCopyright (C) 2016 Antonio Diaz Diaz.\n" + "\nCopyright (C) 2017 Antonio Diaz Diaz.\n" "This is free software: you are free to change and redistribute it.\n" "There is NO WARRANTY, to the extent permitted by law.\n" "Report bugs to lzip-bug@nongnu.org\n" @@ -429,7 +427,7 @@ int main( const int argc, const char * const argv[] ) for( bool first_member = true; ; first_member = false ) { - File_header header; + File_header header; // verify header for( int i = 0; i < 6; ++i ) header[i] = std::getc( stdin ); if( std::feof( stdin ) || std::memcmp( header, "LZIP\x01", 5 ) != 0 ) { @@ -444,11 +442,11 @@ int main( const int argc, const char * const argv[] ) { std::fputs( "Invalid dictionary size in member header.\n", stderr ); return 2; } - LZ_decoder decoder( dict_size ); + LZ_decoder decoder( dict_size ); // decode LZMA stream if( !decoder.decode_member() ) { std::fputs( "Data error\n", stderr ); return 2; } - File_trailer trailer; + File_trailer trailer; // verify trailer for( int i = 0; i < 20; ++i ) trailer[i] = std::getc( stdin ); unsigned crc = 0; for( int i = 3; i >= 0; --i ) { crc <<= 8; crc += trailer[i]; } diff --git a/testsuite/check.sh b/testsuite/check.sh index f03377f..71cabc4 100755 --- a/testsuite/check.sh +++ b/testsuite/check.sh @@ -1,6 +1,6 @@ #! /bin/sh # check script for Lzd - Educational decompressor for lzip files -# Copyright (C) 2013-2016 Antonio Diaz Diaz. +# Copyright (C) 2013-2017 Antonio Diaz Diaz. # # This script is free software: you have unlimited permission # to copy, distribute and modify it. @@ -17,6 +17,13 @@ if [ ! -f "${LZIP}" ] || [ ! -x "${LZIP}" ] ; then exit 1 fi +[ -e "${LZIP}" ] 2> /dev/null || + { + echo "$0: a POSIX shell is required to run the tests" + echo "Try bash -c \"$0 $1 $2\"" + exit 1 + } + if [ -d tmp ] ; then rm -rf tmp ; fi mkdir tmp cd "${objdir}"/tmp || framework_failure @@ -24,24 +31,41 @@ cd "${objdir}"/tmp || framework_failure in="${testdir}"/test.txt in_lz="${testdir}"/test.txt.lz fail=0 +test_failed() { fail=1 ; printf " $1" ; [ -z "$2" ] || printf "($2)" ; } printf "testing lzd-%s..." "$2" "${LZIP}" < "${in}" 2> /dev/null -if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi -dd if="${in_lz}" bs=1 count=6 2> /dev/null | "${LZIP}" 2> /dev/null -if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi -dd if="${in_lz}" bs=1 count=20 2> /dev/null | "${LZIP}" > /dev/null 2>&1 -if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi +[ $? = 2 ] || test_failed $LINENO -"${LZIP}" < "${in_lz}" > copy || fail=1 -cmp "${in}" copy || fail=1 -printf . +"${LZIP}" < "${in_lz}" > copy || test_failed $LINENO +cmp "${in}" copy || test_failed $LINENO cat "${in}" "${in}" > in2 || framework_failure -cat "${in_lz}" "${in_lz}" | "${LZIP}" > copy2 || fail=1 -cmp in2 copy2 || fail=1 -printf . +cat "${in_lz}" "${in_lz}" > in2.lz || framework_failure +"${LZIP}" < in2.lz > copy2 || test_failed $LINENO +cmp in2 copy2 || test_failed $LINENO + +printf "\ntesting bad input..." + +cat "${in_lz}" "${in_lz}" "${in_lz}" > in3.lz || framework_failure +if dd if=in3.lz of=trunc.lz bs=14752 count=1 2> /dev/null && + [ -e trunc.lz ] && cmp in2.lz trunc.lz > /dev/null 2>&1 ; then +# can't detect truncated header of non-first member + for i in 6 20 14734 14758 ; do + dd if=in3.lz of=trunc.lz bs=$i count=1 2> /dev/null + "${LZIP}" < trunc.lz > out 2> /dev/null + [ $? = 2 ] || test_failed $LINENO $i + done +else + printf "\nwarning: skipping truncation test: 'dd' does not work on your system." +fi + +cat "${in_lz}" > ingin.lz || framework_failure +printf "g" >> ingin.lz || framework_failure +cat "${in_lz}" >> ingin.lz || framework_failure +"${LZIP}" < ingin.lz > copy || test_failed $LINENO +cmp "${in}" copy || test_failed $LINENO echo if [ ${fail} = 0 ] ; then diff --git a/testsuite/test.txt.lz b/testsuite/test.txt.lz Binary files differindex 41d2e39..22cea6e 100644 --- a/testsuite/test.txt.lz +++ b/testsuite/test.txt.lz |