From e3a2fd8499eb887ee794ff8f6d0ecfa39e160b05 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Tue, 23 Jan 2024 06:17:07 +0100 Subject: Adding upstream version 1.4. Signed-off-by: Daniel Baumann --- ChangeLog | 9 +++++++-- INSTALL | 5 +++-- Makefile.in | 7 +++++-- NEWS | 7 +++---- README | 37 ++++++------------------------------ configure | 19 ++++++++++++------- lzd.cc | 55 ++++++++++++++++++++++++++++-------------------------- testsuite/check.sh | 28 +++++++++++++-------------- 8 files changed, 79 insertions(+), 88 deletions(-) diff --git a/ChangeLog b/ChangeLog index 042b251..f417214 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +2024-01-02 Antonio Diaz Diaz + + * Version 1.4 released. + * lzd.cc: Use header_size and trailer_size instead of 6 and 20. + 2022-10-24 Antonio Diaz Diaz * Version 1.3 released. @@ -6,7 +11,7 @@ 2021-01-04 Antonio Diaz Diaz * Version 1.2 released. - * lzd.cc (main): Verify also mismatches in member size. + * lzd.cc (main): Check also mismatches in member size. Accept and ignore the option '-d' for compatibility with zutils. Remove warning about "lzd not safe for real work". Print license notice. @@ -70,7 +75,7 @@ * Version 0.1 released. -Copyright (C) 2013-2022 Antonio Diaz Diaz. +Copyright (C) 2013-2024 Antonio Diaz Diaz. This file is a collection of facts, and thus it is not copyrightable, but just in case, you have unlimited permission to copy, distribute, and diff --git a/INSTALL b/INSTALL index ecf03f1..2c82368 100644 --- a/INSTALL +++ b/INSTALL @@ -31,7 +31,8 @@ extracted from the archive. 4. Optionally, type 'make check' to run the tests that come with lzd. 5. Type 'make install' to install the program and any data files and - documentation. + documentation. You need root privileges to install into a prefix owned + by root. Another way @@ -51,7 +52,7 @@ After running 'configure', you can run 'make' and 'make install' as explained above. -Copyright (C) 2013-2022 Antonio Diaz Diaz. +Copyright (C) 2013-2024 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute, and modify it. diff --git a/Makefile.in b/Makefile.in index ced714d..120a41b 100644 --- a/Makefile.in +++ b/Makefile.in @@ -24,15 +24,18 @@ $(progname) : $(objs) %.o : %.cc $(CXX) $(CPPFLAGS) $(CXXFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $< -$(objs) : Makefile +# prevent 'make' from trying to remake source files +$(VPATH)/configure $(VPATH)/Makefile.in $(VPATH)/doc/$(pkgname).texi : ; +%.h %.cc : ; +$(objs) : Makefile doc : info : $(VPATH)/doc/$(pkgname).info $(VPATH)/doc/$(pkgname).info : $(VPATH)/doc/$(pkgname).texi - cd $(VPATH)/doc && makeinfo $(pkgname).texi + cd $(VPATH)/doc && $(MAKEINFO) $(pkgname).texi man : $(VPATH)/doc/$(progname).1 diff --git a/NEWS b/NEWS index 2416499..be2c30b 100644 --- a/NEWS +++ b/NEWS @@ -1,5 +1,4 @@ -Changes in version 1.3: +Changes in version 1.4: -Lzd now discards the first byte of the LZMA stream explicity (instead of -shifting 1 extra byte in the initialization of 'code') to make the intent -clearer. +Define constants 'header_size' and 'trailer_size' instead of using 6 and 20. +(Lzd is supposed to be educational, and using numbers is bad practice). diff --git a/README b/README index fae23bc..03fb742 100644 --- a/README +++ b/README @@ -7,7 +7,7 @@ lzip works. Lzd is written in C++. The source code of lzd is used in the lzip manual as a reference decompressor in the description of the lzip file format. Reading the lzip manual will help you understand the source code. Lzd is compliant with the -lzip specification; it verifies the 3 integrity factors. +lzip specification; it checks the 3 integrity factors. The source code of lzd is also used as a reference in the description of the media type 'application/lzip'. @@ -18,33 +18,9 @@ ignores) the option '-d' for compatibility with other lzip tools. In particular, accepting the option '-d' allows lzd to be used as argument to the option '--lz' of the tools from the zutils package. -Lzd will correctly decompress the concatenation of two or more compressed +Lzd correctly decompresses the concatenation of two or more compressed files. The result is the concatenation of the corresponding decompressed -data. Integrity of such concatenated compressed input is also verified. - -The lzip file format is designed for data sharing and long-term archiving, -taking into account both data integrity and decoder availability: - - * The lzip format provides very safe integrity checking and some data - recovery means. The program lziprecover can repair bit flip errors - (one of the most common forms of data corruption) in lzip files, and - provides data recovery capabilities, including error-checked merging - of damaged copies of a file. - - * The lzip format is as simple as possible (but not simpler). The lzip - manual provides the source code of a simple decompressor along with a - detailed explanation of how it works, so that with the only help of the - lzip manual it would be possible for a digital archaeologist to extract - the data from a lzip file long after quantum computers eventually - render LZMA obsolete. - - * Additionally the lzip reference implementation is copylefted, which - guarantees that it will remain free forever. - -A nice feature of the lzip format is that a corrupt byte is easier to repair -the nearer it is from the beginning of the file. Therefore, with the help of -lziprecover, losing an entire archive just because of a corrupt byte near -the beginning is a thing of the past. +data. Integrity of such concatenated compressed input is also checked. The ideas embodied in lzd are due to (at least) the following people: Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrei Markov (for the @@ -52,11 +28,10 @@ definition of Markov chains), G.N.N. Martin (for the definition of range encoding), and Igor Pavlov (for putting all the above together in LZMA). -Copyright (C) 2013-2022 Antonio Diaz Diaz. +Copyright (C) 2013-2024 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute, and modify it. -The file Makefile.in is a data file used by configure to produce the -Makefile. It has the same copyright owner and permissions that configure -itself. +The file Makefile.in is a data file used by configure to produce the Makefile. +It has the same copyright owner and permissions that configure itself. diff --git a/configure b/configure index cf10b79..a84fda2 100755 --- a/configure +++ b/configure @@ -1,12 +1,12 @@ #! /bin/sh # configure script for Lzd - Educational decompressor for the lzip format -# Copyright (C) 2013-2022 Antonio Diaz Diaz. +# Copyright (C) 2013-2024 Antonio Diaz Diaz. # # This configure script is free software: you have unlimited permission # to copy, distribute, and modify it. pkgname=lzd -pkgversion=1.3 +pkgversion=1.4 progname=lzd srctrigger=lzd.cc @@ -24,6 +24,7 @@ CXX=g++ CPPFLAGS= CXXFLAGS='-Wall -W -O2' LDFLAGS= +MAKEINFO=makeinfo # checking whether we are using GNU C++. /bin/sh -c "${CXX} --version" > /dev/null 2>&1 || { CXX=c++ ; CXXFLAGS=-O2 ; } @@ -65,10 +66,11 @@ while [ $# != 0 ] ; do echo " --infodir=DIR info files directory [${infodir}]" echo " --mandir=DIR man pages directory [${mandir}]" echo " CXX=COMPILER C++ compiler to use [${CXX}]" - echo " CPPFLAGS=OPTIONS command line options for the preprocessor [${CPPFLAGS}]" - echo " CXXFLAGS=OPTIONS command line options for the C++ compiler [${CXXFLAGS}]" + echo " CPPFLAGS=OPTIONS command-line options for the preprocessor [${CPPFLAGS}]" + echo " CXXFLAGS=OPTIONS command-line options for the C++ compiler [${CXXFLAGS}]" echo " CXXFLAGS+=OPTIONS append options to the current value of CXXFLAGS" - echo " LDFLAGS=OPTIONS command line options for the linker [${LDFLAGS}]" + echo " LDFLAGS=OPTIONS command-line options for the linker [${LDFLAGS}]" + echo " MAKEINFO=NAME makeinfo program to use [${MAKEINFO}]" echo exit 0 ;; --version | -V) @@ -96,6 +98,7 @@ while [ $# != 0 ] ; do CXXFLAGS=*) CXXFLAGS=${optarg} ;; CXXFLAGS+=*) CXXFLAGS="${CXXFLAGS} ${optarg}" ;; LDFLAGS=*) LDFLAGS=${optarg} ;; + MAKEINFO=*) MAKEINFO=${optarg} ;; --*) echo "configure: WARNING: unrecognized option: '${option}'" 1>&2 ;; @@ -147,7 +150,7 @@ if [ -z "${no_create}" ] ; then # This script is free software: you have unlimited permission # to copy, distribute, and modify it. -exec /bin/sh $0 ${args} --no-create +exec /bin/sh "$0" ${args} --no-create EOF chmod +x config.status fi @@ -164,10 +167,11 @@ echo "CXX = ${CXX}" echo "CPPFLAGS = ${CPPFLAGS}" echo "CXXFLAGS = ${CXXFLAGS}" echo "LDFLAGS = ${LDFLAGS}" +echo "MAKEINFO = ${MAKEINFO}" rm -f Makefile cat > Makefile << EOF # Makefile for Lzd - Educational decompressor for the lzip format -# Copyright (C) 2013-2022 Antonio Diaz Diaz. +# Copyright (C) 2013-2024 Antonio Diaz Diaz. # This file was generated automatically by configure. Don't edit. # # This Makefile is free software: you have unlimited permission @@ -187,6 +191,7 @@ CXX = ${CXX} CPPFLAGS = ${CPPFLAGS} CXXFLAGS = ${CXXFLAGS} LDFLAGS = ${LDFLAGS} +MAKEINFO = ${MAKEINFO} EOF cat "${srcdir}/Makefile.in" >> Makefile diff --git a/lzd.cc b/lzd.cc index 9e1c3b8..3cf3f13 100644 --- a/lzd.cc +++ b/lzd.cc @@ -1,5 +1,5 @@ /* Lzd - Educational decompressor for the lzip format - Copyright (C) 2013-2022 Antonio Diaz Diaz. + Copyright (C) 2013-2024 Antonio Diaz Diaz. This program is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -18,7 +18,7 @@ */ /* Exit status: 0 for a normal exit, 1 for environmental problems - (file not found, invalid command line options, I/O errors, etc), 2 to + (file not found, invalid command-line options, I/O errors, etc), 2 to indicate a corrupt or invalid input file. */ @@ -130,10 +130,11 @@ public: const CRC32 crc32; -typedef uint8_t Lzip_header[6]; // 0-3 magic bytes - // 4 version - // 5 coded dictionary size -typedef uint8_t Lzip_trailer[20]; +enum { header_size = 6, trailer_size = 20 }; +typedef uint8_t Lzip_header[header_size]; // 0-3 magic bytes + // 4 version + // 5 coded dictionary size +typedef uint8_t Lzip_trailer[trailer_size]; // 0-3 CRC32 of the uncompressed data // 4-11 size of the uncompressed data // 12-19 member size including header and trailer @@ -145,7 +146,8 @@ class Range_decoder uint32_t range; public: - Range_decoder() : member_pos( 6 ), code( 0 ), range( 0xFFFFFFFFU ) + Range_decoder() + : member_pos( header_size ), code( 0 ), range( 0xFFFFFFFFU ) { get_byte(); // discard first byte of the LZMA stream for( int i = 0; i < 4; ++i ) code = ( code << 8 ) | get_byte(); @@ -168,9 +170,9 @@ public: return symbol; } - unsigned decode_bit( Bit_model & bm ) + bool decode_bit( Bit_model & bm ) { - unsigned symbol; + bool symbol; const uint32_t bound = ( range >> bit_model_total_bits ) * bm.probability; if( code < bound ) { @@ -216,8 +218,8 @@ public: unsigned symbol = 1; for( int i = 7; i >= 0; --i ) { - const unsigned match_bit = ( match_byte >> i ) & 1; - const unsigned bit = decode_bit( bm[symbol+(match_bit<<8)+0x100] ); + const bool match_bit = ( match_byte >> i ) & 1; + const bool bit = decode_bit( bm[symbol+(match_bit<<8)+0x100] ); symbol = ( symbol << 1 ) | bit; if( match_bit != bit ) { @@ -232,11 +234,12 @@ public: unsigned decode_len( Len_model & lm, const int pos_state ) { if( decode_bit( lm.choice1 ) == 0 ) - return decode_tree( lm.bm_low[pos_state], len_low_bits ); + return min_match_len + + decode_tree( lm.bm_low[pos_state], len_low_bits ); if( decode_bit( lm.choice2 ) == 0 ) - return len_low_symbols + + return min_match_len + len_low_symbols + decode_tree( lm.bm_mid[pos_state], len_mid_bits ); - return len_low_symbols + len_mid_symbols + + return min_match_len + len_low_symbols + len_mid_symbols + decode_tree( lm.bm_high, len_high_bits ); } }; @@ -371,12 +374,12 @@ bool LZ_decoder::decode_member() // Return false if error rep0 = distance; } state.set_rep(); - len = min_match_len + rdec.decode_len( rep_len_model, pos_state ); + len = rdec.decode_len( rep_len_model, pos_state ); } else // match { rep3 = rep2; rep2 = rep1; rep1 = rep0; - len = min_match_len + rdec.decode_len( match_len_model, pos_state ); + len = rdec.decode_len( match_len_model, pos_state ); const int len_state = std::min( len - min_match_len, len_states - 1 ); rep0 = rdec.decode_tree( bm_dis_slot[len_state], dis_slot_bits ); if( rep0 >= start_dis_model ) @@ -395,7 +398,7 @@ bool LZ_decoder::decode_member() // Return false if error if( rep0 == 0xFFFFFFFFU ) // marker found { flush_data(); - return ( len == min_match_len ); // End Of Stream marker + return len == min_match_len; // End Of Stream marker } } } @@ -420,10 +423,10 @@ int main( const int argc, const char * const argv[] ) "See the lzip manual for an explanation of the code.\n" "\nUsage: %s [-d] < file.lz > file\n" "Lzd decompresses from standard input to standard output.\n" - "\nCopyright (C) 2022 Antonio Diaz Diaz.\n" + "\nCopyright (C) 2024 Antonio Diaz Diaz.\n" "License 2-clause BSD.\n" - "This is free software: you are free to change and redistribute it.\n" - "There is NO WARRANTY, to the extent permitted by law.\n" + "This is free software: you are free to change and redistribute " + "it.\nThere is NO WARRANTY, to the extent permitted by law.\n" "Report bugs to lzip-bug@nongnu.org\n" "Lzd home page: http://www.nongnu.org/lzip/lzd.html\n", PROGVERSION, argv[0] ); @@ -437,8 +440,8 @@ int main( const int argc, const char * const argv[] ) for( bool first_member = true; ; first_member = false ) { - Lzip_header header; // verify header - for( int i = 0; i < 6; ++i ) header[i] = std::getc( stdin ); + Lzip_header header; // check header + for( int i = 0; i < header_size; ++i ) header[i] = std::getc( stdin ); if( std::feof( stdin ) || std::memcmp( header, "LZIP\x01", 5 ) != 0 ) { if( first_member ) @@ -449,15 +452,15 @@ int main( const int argc, const char * const argv[] ) unsigned dict_size = 1 << ( header[5] & 0x1F ); dict_size -= ( dict_size / 16 ) * ( ( header[5] >> 5 ) & 7 ); if( dict_size < min_dictionary_size || dict_size > max_dictionary_size ) - { std::fputs( "Invalid dictionary size in member header.\n", stderr ); - return 2; } + { std::fputs( "Invalid dictionary size in member header.\n", + stderr ); return 2; } LZ_decoder decoder( dict_size ); // decode LZMA stream if( !decoder.decode_member() ) { std::fputs( "Data error\n", stderr ); return 2; } - Lzip_trailer trailer; // verify trailer - for( int i = 0; i < 20; ++i ) trailer[i] = decoder.get_byte(); + Lzip_trailer trailer; // check trailer + for( int i = 0; i < trailer_size; ++i ) trailer[i] = decoder.get_byte(); int retval = 0; unsigned crc = 0; for( int i = 3; i >= 0; --i ) crc = ( crc << 8 ) + trailer[i]; diff --git a/testsuite/check.sh b/testsuite/check.sh index 33ffb8a..b041fe8 100755 --- a/testsuite/check.sh +++ b/testsuite/check.sh @@ -1,6 +1,6 @@ #! /bin/sh # check script for Lzd - Educational decompressor for the lzip format -# Copyright (C) 2013-2022 Antonio Diaz Diaz. +# Copyright (C) 2013-2024 Antonio Diaz Diaz. # # This script is free software: you have unlimited permission # to copy, distribute, and modify it. @@ -41,20 +41,20 @@ printf "testing lzd-%s..." "$2" [ $? = 2 ] || test_failed $LINENO for i in "${in_lz}" "${in_em}" ; do - "${LZIP}" < "$i" > copy || test_failed $LINENO "$i" - cmp "${in}" copy || test_failed $LINENO "$i" + "${LZIP}" < "$i" > out || test_failed $LINENO "$i" + cmp "${in}" out || test_failed $LINENO "$i" done cat "${in}" "${in}" > in2 || framework_failure -cat "${in_lz}" "${in_lz}" | "${LZIP}" > copy2 || test_failed $LINENO -cmp in2 copy2 || test_failed $LINENO -rm -f copy2 || framework_failure +cat "${in_lz}" "${in_lz}" | "${LZIP}" > out2 || test_failed $LINENO +cmp in2 out2 || test_failed $LINENO +rm -f out2 || framework_failure -cat "${in_lz}" "${in_lz}" > copy2.lz || framework_failure -printf "\ngarbage" >> copy2.lz || framework_failure -"${LZIP}" -d < copy2.lz > copy2 || test_failed $LINENO -cmp in2 copy2 || test_failed $LINENO -rm -f in2 copy2 copy2.lz || framework_failure +cat "${in_lz}" "${in_lz}" > out2.lz || framework_failure +printf "\ngarbage" >> out2.lz || framework_failure +"${LZIP}" -d < out2.lz > out2 || test_failed $LINENO +cmp in2 out2 || test_failed $LINENO +rm -f in2 out2 out2.lz || framework_failure printf "\ntesting bad input..." @@ -90,9 +90,9 @@ rm -f in2.lz in3.lz trunc.lz || framework_failure cat "${in_lz}" > ingin.lz || framework_failure printf "g" >> ingin.lz || framework_failure cat "${in_lz}" >> ingin.lz || framework_failure -"${LZIP}" -d < ingin.lz > copy || test_failed $LINENO -cmp "${in}" copy || test_failed $LINENO -rm -f copy ingin.lz || framework_failure +"${LZIP}" -d < ingin.lz > out || test_failed $LINENO +cmp "${in}" out || test_failed $LINENO +rm -f out ingin.lz || framework_failure echo if [ ${fail} = 0 ] ; then -- cgit v1.2.3