summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--COPYING17
-rw-r--r--ChangeLog8
-rw-r--r--INSTALL2
-rw-r--r--Makefile.in5
-rw-r--r--NEWS7
-rw-r--r--README12
-rwxr-xr-xconfigure21
-rw-r--r--lzd.cc78
-rwxr-xr-xtestsuite/check.sh48
-rw-r--r--testsuite/test.txt.lzbin7376 -> 7376 bytes
10 files changed, 124 insertions, 74 deletions
diff --git a/COPYING b/COPYING
new file mode 100644
index 0000000..5b9d8b9
--- /dev/null
+++ b/COPYING
@@ -0,0 +1,17 @@
+ Lzd - Educational decompressor for the lzip format
+ Copyright (C) Antonio Diaz Diaz.
+
+ This program is free software. Redistribution and use in source and
+ binary forms, with or without modification, are permitted provided
+ that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
diff --git a/ChangeLog b/ChangeLog
index c7c931f..380d736 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+2017-05-02 Antonio Diaz Diaz <antonio@gnu.org>
+
+ * Version 1.0 released.
+ * lzd.cc: Minor code improvements.
+ * testsuite/check.sh: A POSIX shell is required to run the tests.
+
2016-05-10 Antonio Diaz Diaz <antonio@gnu.org>
* Version 0.9 released.
@@ -43,7 +49,7 @@
* Version 0.1 released.
-Copyright (C) 2013-2016 Antonio Diaz Diaz.
+Copyright (C) 2013-2017 Antonio Diaz Diaz.
This file is a collection of facts, and thus it is not copyrightable,
but just in case, you have unlimited permission to copy, distribute and
diff --git a/INSTALL b/INSTALL
index dc2e623..c245516 100644
--- a/INSTALL
+++ b/INSTALL
@@ -50,7 +50,7 @@ After running 'configure', you can run 'make' and 'make install' as
explained above.
-Copyright (C) 2013-2016 Antonio Diaz Diaz.
+Copyright (C) 2013-2017 Antonio Diaz Diaz.
This file is free documentation: you have unlimited permission to copy,
distribute and modify it.
diff --git a/Makefile.in b/Makefile.in
index 42cdcc0..471ff4b 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -95,16 +95,17 @@ dist : doc
ln -sf $(VPATH) $(DISTNAME)
tar -Hustar --owner=root --group=root -cvf $(DISTNAME).tar \
$(DISTNAME)/AUTHORS \
+ $(DISTNAME)/COPYING \
$(DISTNAME)/ChangeLog \
$(DISTNAME)/INSTALL \
$(DISTNAME)/Makefile.in \
$(DISTNAME)/NEWS \
$(DISTNAME)/README \
$(DISTNAME)/configure \
+ $(DISTNAME)/*.cc \
$(DISTNAME)/testsuite/check.sh \
$(DISTNAME)/testsuite/test.txt \
- $(DISTNAME)/testsuite/test.txt.lz \
- $(DISTNAME)/*.cc
+ $(DISTNAME)/testsuite/test.txt.lz
rm -f $(DISTNAME)
lzip -v -9 $(DISTNAME).tar
diff --git a/NEWS b/NEWS
index 085da72..2f713df 100644
--- a/NEWS
+++ b/NEWS
@@ -1,4 +1,5 @@
-Changes in version 0.9:
+Changes in version 1.0:
-A configure warning happening on some shells when testing for g++ has
-been fixed.
+Minor code improvements have been made.
+
+The tests have been improved.
diff --git a/README b/README
index ae73fc1..e16763e 100644
--- a/README
+++ b/README
@@ -24,11 +24,11 @@ availability:
merging of damaged copies of a file.
* The lzip format is as simple as possible (but not simpler). The
- lzip manual provides the code of a simple decompressor along with a
- detailed explanation of how it works, so that with the only help of
- the lzip manual it would be possible for a digital archaeologist to
- extract the data from a lzip file long after quantum computers
- eventually render LZMA obsolete.
+ lzip manual provides the source code of a simple decompressor along
+ with a detailed explanation of how it works, so that with the only
+ help of the lzip manual it would be possible for a digital
+ archaeologist to extract the data from a lzip file long after
+ quantum computers eventually render LZMA obsolete.
* Additionally the lzip reference implementation is copylefted, which
guarantees that it will remain free forever.
@@ -45,7 +45,7 @@ range encoding), and Igor Pavlov (for putting all the above together in
LZMA).
-Copyright (C) 2013-2016 Antonio Diaz Diaz.
+Copyright (C) 2013-2017 Antonio Diaz Diaz.
This file is free documentation: you have unlimited permission to copy,
distribute and modify it.
diff --git a/configure b/configure
index 226e838..7499b98 100755
--- a/configure
+++ b/configure
@@ -1,12 +1,12 @@
#! /bin/sh
# configure script for Lzd - Educational decompressor for the lzip format
-# Copyright (C) 2013-2016 Antonio Diaz Diaz.
+# Copyright (C) 2013-2017 Antonio Diaz Diaz.
#
# This configure script is free software: you have unlimited permission
# to copy, distribute and modify it.
pkgname=lzd
-pkgversion=0.9
+pkgversion=1.0
progname=lzd
srctrigger=lzd.cc
@@ -26,11 +26,11 @@ CXXFLAGS='-Wall -W -O2'
LDFLAGS=
# checking whether we are using GNU C++.
-if /bin/sh -c "${CXX} --version" > /dev/null 2>&1 ; then true
-else
+/bin/sh -c "${CXX} --version" > /dev/null 2>&1 ||
+ {
CXX=c++
- CXXFLAGS='-W -O2'
-fi
+ CXXFLAGS=-O2
+ }
# Loop over all args
args=
@@ -52,9 +52,12 @@ while [ $# != 0 ] ; do
# Process the options
case ${option} in
--help | -h)
- echo "Usage: configure [options]"
+ echo "Usage: $0 [OPTION]... [VAR=VALUE]..."
+ echo
+ echo "To assign makefile variables (e.g., CXX, CXXFLAGS...), specify them as"
+ echo "arguments to configure in the form VAR=VALUE."
echo
- echo "Options: [defaults in brackets]"
+ echo "Options and variables: [defaults in brackets]"
echo " -h, --help display this help and exit"
echo " -V, --version output version information and exit"
echo " --srcdir=DIR find the sources in DIR [. or ..]"
@@ -165,7 +168,7 @@ echo "LDFLAGS = ${LDFLAGS}"
rm -f Makefile
cat > Makefile << EOF
# Makefile for Lzd - Educational decompressor for the lzip format
-# Copyright (C) 2013-2016 Antonio Diaz Diaz.
+# Copyright (C) 2013-2017 Antonio Diaz Diaz.
# This file was generated automatically by configure. Don't edit.
#
# This Makefile is free software: you have unlimited permission
diff --git a/lzd.cc b/lzd.cc
index 56dce37..6fd536a 100644
--- a/lzd.cc
+++ b/lzd.cc
@@ -1,5 +1,5 @@
/* Lzd - Educational decompressor for the lzip format
- Copyright (C) 2013-2016 Antonio Diaz Diaz.
+ Copyright (C) 2013-2017 Antonio Diaz Diaz.
This program is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided
@@ -150,10 +150,10 @@ public:
uint8_t get_byte() { return std::getc( stdin ); }
- int decode( const int num_bits )
+ unsigned decode( const int num_bits )
{
- int symbol = 0;
- for( int i = 0; i < num_bits; ++i )
+ unsigned symbol = 0;
+ for( int i = num_bits; i > 0; --i )
{
range >>= 1;
symbol <<= 1;
@@ -164,9 +164,9 @@ public:
return symbol;
}
- int decode_bit( Bit_model & bm )
+ unsigned decode_bit( Bit_model & bm )
{
- int symbol;
+ unsigned symbol;
const uint32_t bound = ( range >> bit_model_total_bits ) * bm.probability;
if( code < bound )
{
@@ -186,18 +186,18 @@ public:
return symbol;
}
- int decode_tree( Bit_model bm[], const int num_bits )
+ unsigned decode_tree( Bit_model bm[], const int num_bits )
{
- int symbol = 1;
+ unsigned symbol = 1;
for( int i = 0; i < num_bits; ++i )
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
return symbol - (1 << num_bits);
}
- int decode_tree_reversed( Bit_model bm[], const int num_bits )
+ unsigned decode_tree_reversed( Bit_model bm[], const int num_bits )
{
- int symbol = decode_tree( bm, num_bits );
- int reversed_symbol = 0;
+ unsigned symbol = decode_tree( bm, num_bits );
+ unsigned reversed_symbol = 0;
for( int i = 0; i < num_bits; ++i )
{
reversed_symbol = ( reversed_symbol << 1 ) | ( symbol & 1 );
@@ -206,14 +206,13 @@ public:
return reversed_symbol;
}
- int decode_matched( Bit_model bm[], const int match_byte )
+ unsigned decode_matched( Bit_model bm[], const unsigned match_byte )
{
- Bit_model * const bm1 = bm + 0x100;
- int symbol = 1;
+ unsigned symbol = 1;
for( int i = 7; i >= 0; --i )
{
- const int match_bit = ( match_byte >> i ) & 1;
- const int bit = decode_bit( bm1[(match_bit<<8)+symbol] );
+ const unsigned match_bit = ( match_byte >> i ) & 1;
+ const unsigned bit = decode_bit( bm[symbol+(match_bit<<8)+0x100] );
symbol = ( symbol << 1 ) | bit;
if( match_bit != bit )
{
@@ -225,7 +224,7 @@ public:
return symbol & 0xFF;
}
- int decode_len( Len_model & lm, const int pos_state )
+ unsigned decode_len( Len_model & lm, const int pos_state )
{
if( decode_bit( lm.choice1 ) == 0 )
return decode_tree( lm.bm_low[pos_state], len_low_bits );
@@ -253,9 +252,9 @@ class LZ_decoder
uint8_t peek( const unsigned distance ) const
{
- unsigned i = pos - distance - 1;
- if( pos <= distance ) i += dictionary_size;
- return buffer[i];
+ if( pos > distance ) return buffer[pos - distance - 1];
+ if( pos_wrapped ) return buffer[dictionary_size + pos - distance - 1];
+ return 0; // prev_byte of first byte
}
void put_byte( const uint8_t b )
@@ -274,7 +273,7 @@ public:
stream_pos( 0 ),
crc_( 0xFFFFFFFFU ),
pos_wrapped( false )
- { buffer[dictionary_size-1] = 0; } // prev_byte of first byte
+ {}
~LZ_decoder() { delete[] buffer; }
@@ -312,13 +311,13 @@ bool LZ_decoder::decode_member() // Returns false if error
Bit_model bm_rep2[State::states];
Bit_model bm_len[State::states][pos_states];
Bit_model bm_dis_slot[len_states][1<<dis_slot_bits];
- Bit_model bm_dis[modeled_distances-end_dis_model];
+ Bit_model bm_dis[modeled_distances-end_dis_model+1];
Bit_model bm_align[dis_align_size];
Len_model match_len_model;
Len_model rep_len_model;
- unsigned rep0 = 0; // rep[0-3] latest four distances
- unsigned rep1 = 0; // used for efficient coding of
- unsigned rep2 = 0; // repeated distances
+ unsigned rep0 = 0; // rep[0-3] latest four distances
+ unsigned rep1 = 0; // used for efficient coding of
+ unsigned rep2 = 0; // repeated distances
unsigned rep3 = 0;
State state;
@@ -341,7 +340,12 @@ bool LZ_decoder::decode_member() // Returns false if error
int len;
if( rdec.decode_bit( bm_rep[state()] ) != 0 ) // 2nd bit
{
- if( rdec.decode_bit( bm_rep0[state()] ) != 0 ) // 3rd bit
+ if( rdec.decode_bit( bm_rep0[state()] ) == 0 ) // 3rd bit
+ {
+ if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit
+ { state.set_short_rep(); put_byte( peek( rep0 ) ); continue; }
+ }
+ else
{
unsigned distance;
if( rdec.decode_bit( bm_rep1[state()] ) == 0 ) // 4th bit
@@ -357,11 +361,6 @@ bool LZ_decoder::decode_member() // Returns false if error
rep1 = rep0;
rep0 = distance;
}
- else
- {
- if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit
- { state.set_short_rep(); put_byte( peek( rep0 ) ); continue; }
- }
state.set_rep();
len = min_match_len + rdec.decode_len( rep_len_model, pos_state );
}
@@ -370,15 +369,14 @@ bool LZ_decoder::decode_member() // Returns false if error
rep3 = rep2; rep2 = rep1; rep1 = rep0;
len = min_match_len + rdec.decode_len( match_len_model, pos_state );
const int len_state = std::min( len - min_match_len, len_states - 1 );
- const int dis_slot =
- rdec.decode_tree( bm_dis_slot[len_state], dis_slot_bits );
- if( dis_slot < start_dis_model ) rep0 = dis_slot;
- else
+ rep0 = rdec.decode_tree( bm_dis_slot[len_state], dis_slot_bits );
+ if( rep0 >= start_dis_model )
{
+ const unsigned dis_slot = rep0;
const int direct_bits = ( dis_slot >> 1 ) - 1;
rep0 = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
if( dis_slot < end_dis_model )
- rep0 += rdec.decode_tree_reversed( bm_dis + rep0 - dis_slot - 1,
+ rep0 += rdec.decode_tree_reversed( bm_dis + ( rep0 - dis_slot ),
direct_bits );
else
{
@@ -414,7 +412,7 @@ int main( const int argc, const char * const argv[] )
"It is not safe to use lzd for any real work.\n"
"\nUsage: %s < file.lz > file\n", argv[0] );
std::printf( "Lzd decompresses from standard input to standard output.\n"
- "\nCopyright (C) 2016 Antonio Diaz Diaz.\n"
+ "\nCopyright (C) 2017 Antonio Diaz Diaz.\n"
"This is free software: you are free to change and redistribute it.\n"
"There is NO WARRANTY, to the extent permitted by law.\n"
"Report bugs to lzip-bug@nongnu.org\n"
@@ -429,7 +427,7 @@ int main( const int argc, const char * const argv[] )
for( bool first_member = true; ; first_member = false )
{
- File_header header;
+ File_header header; // verify header
for( int i = 0; i < 6; ++i ) header[i] = std::getc( stdin );
if( std::feof( stdin ) || std::memcmp( header, "LZIP\x01", 5 ) != 0 )
{
@@ -444,11 +442,11 @@ int main( const int argc, const char * const argv[] )
{ std::fputs( "Invalid dictionary size in member header.\n", stderr );
return 2; }
- LZ_decoder decoder( dict_size );
+ LZ_decoder decoder( dict_size ); // decode LZMA stream
if( !decoder.decode_member() )
{ std::fputs( "Data error\n", stderr ); return 2; }
- File_trailer trailer;
+ File_trailer trailer; // verify trailer
for( int i = 0; i < 20; ++i ) trailer[i] = std::getc( stdin );
unsigned crc = 0;
for( int i = 3; i >= 0; --i ) { crc <<= 8; crc += trailer[i]; }
diff --git a/testsuite/check.sh b/testsuite/check.sh
index f03377f..71cabc4 100755
--- a/testsuite/check.sh
+++ b/testsuite/check.sh
@@ -1,6 +1,6 @@
#! /bin/sh
# check script for Lzd - Educational decompressor for lzip files
-# Copyright (C) 2013-2016 Antonio Diaz Diaz.
+# Copyright (C) 2013-2017 Antonio Diaz Diaz.
#
# This script is free software: you have unlimited permission
# to copy, distribute and modify it.
@@ -17,6 +17,13 @@ if [ ! -f "${LZIP}" ] || [ ! -x "${LZIP}" ] ; then
exit 1
fi
+[ -e "${LZIP}" ] 2> /dev/null ||
+ {
+ echo "$0: a POSIX shell is required to run the tests"
+ echo "Try bash -c \"$0 $1 $2\""
+ exit 1
+ }
+
if [ -d tmp ] ; then rm -rf tmp ; fi
mkdir tmp
cd "${objdir}"/tmp || framework_failure
@@ -24,24 +31,41 @@ cd "${objdir}"/tmp || framework_failure
in="${testdir}"/test.txt
in_lz="${testdir}"/test.txt.lz
fail=0
+test_failed() { fail=1 ; printf " $1" ; [ -z "$2" ] || printf "($2)" ; }
printf "testing lzd-%s..." "$2"
"${LZIP}" < "${in}" 2> /dev/null
-if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
-dd if="${in_lz}" bs=1 count=6 2> /dev/null | "${LZIP}" 2> /dev/null
-if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
-dd if="${in_lz}" bs=1 count=20 2> /dev/null | "${LZIP}" > /dev/null 2>&1
-if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
+[ $? = 2 ] || test_failed $LINENO
-"${LZIP}" < "${in_lz}" > copy || fail=1
-cmp "${in}" copy || fail=1
-printf .
+"${LZIP}" < "${in_lz}" > copy || test_failed $LINENO
+cmp "${in}" copy || test_failed $LINENO
cat "${in}" "${in}" > in2 || framework_failure
-cat "${in_lz}" "${in_lz}" | "${LZIP}" > copy2 || fail=1
-cmp in2 copy2 || fail=1
-printf .
+cat "${in_lz}" "${in_lz}" > in2.lz || framework_failure
+"${LZIP}" < in2.lz > copy2 || test_failed $LINENO
+cmp in2 copy2 || test_failed $LINENO
+
+printf "\ntesting bad input..."
+
+cat "${in_lz}" "${in_lz}" "${in_lz}" > in3.lz || framework_failure
+if dd if=in3.lz of=trunc.lz bs=14752 count=1 2> /dev/null &&
+ [ -e trunc.lz ] && cmp in2.lz trunc.lz > /dev/null 2>&1 ; then
+# can't detect truncated header of non-first member
+ for i in 6 20 14734 14758 ; do
+ dd if=in3.lz of=trunc.lz bs=$i count=1 2> /dev/null
+ "${LZIP}" < trunc.lz > out 2> /dev/null
+ [ $? = 2 ] || test_failed $LINENO $i
+ done
+else
+ printf "\nwarning: skipping truncation test: 'dd' does not work on your system."
+fi
+
+cat "${in_lz}" > ingin.lz || framework_failure
+printf "g" >> ingin.lz || framework_failure
+cat "${in_lz}" >> ingin.lz || framework_failure
+"${LZIP}" < ingin.lz > copy || test_failed $LINENO
+cmp "${in}" copy || test_failed $LINENO
echo
if [ ${fail} = 0 ] ; then
diff --git a/testsuite/test.txt.lz b/testsuite/test.txt.lz
index 41d2e39..22cea6e 100644
--- a/testsuite/test.txt.lz
+++ b/testsuite/test.txt.lz
Binary files differ