diff options
-rw-r--r-- | ChangeLog | 12 | ||||
-rw-r--r-- | Makefile.in | 24 | ||||
-rw-r--r-- | NEWS | 10 | ||||
-rwxr-xr-x | configure | 4 | ||||
-rw-r--r-- | decoder.cc | 17 | ||||
-rw-r--r-- | decoder.h | 97 | ||||
-rw-r--r-- | doc/lzip.1 | 4 | ||||
-rw-r--r-- | doc/lzip.info | 33 | ||||
-rw-r--r-- | doc/lzip.texinfo | 15 | ||||
-rw-r--r-- | encoder.cc | 39 | ||||
-rw-r--r-- | encoder.h | 49 | ||||
-rwxr-xr-x | lzdiff | 23 | ||||
-rwxr-xr-x | lzgrep | 9 | ||||
-rw-r--r-- | lzip.h | 9 | ||||
-rw-r--r-- | lziprecover.cc | 30 | ||||
-rw-r--r-- | main.cc | 2 | ||||
-rwxr-xr-x | testsuite/check.sh | 12 |
17 files changed, 242 insertions, 147 deletions
@@ -1,3 +1,14 @@ +2009-05-21 Antonio Diaz <ant_diaz@teleline.es> + + * Version 1.6-pre2 released. + * Decompression time has been reduced by 17%. + * Added support for .tbz extension to lzdiff and lzgrep. + * Added man pages for lzdiff, lzgrep and lziprecover. + * encoder.cc (Matchfinder): Reduce memory use to 9x if input file + is smaller than dictionary size limit. + * testsuite/check.sh: Test lziprecover. + * Export LC_ALL=C in all scripts. + 2009-04-27 Antonio Diaz <ant_diaz@teleline.es> * Version 1.6-pre1 released. @@ -18,6 +29,7 @@ * Lzdiff and lzgrep now accept "-h" for "--help" and "-V" for "--version". * Makefile.in: Man page is now installed by default. + * testsuite/check.sh: Verify that files are open in binary mode. 2009-01-24 Antonio Diaz <ant_diaz@teleline.es> diff --git a/Makefile.in b/Makefile.in index 644f141..1b9d4fb 100644 --- a/Makefile.in +++ b/Makefile.in @@ -9,17 +9,17 @@ SHELL = /bin/sh objs = arg_parser.o decoder.o encoder.o main.o -.PHONY : all doc check install install-info install-man install-extra \ +.PHONY : all install install-info install-man install-extra install-strip \ uninstall uninstall-info uninstall-man uninstall-extra \ - install-strip dist clean distclean + doc info man check dist clean distclean all : $(progname) lziprecover $(progname) : $(objs) $(CXX) $(LDFLAGS) -o $(progname) $(objs) -profiled_$(progname) : $(objs) - $(CXX) $(LDFLAGS) -pg -o profiled_$(progname) $(objs) +$(progname)_profiled : $(objs) + $(CXX) $(LDFLAGS) -pg -o $(progname)_profiled $(objs) lziprecover : arg_parser.o lziprecover.o $(CXX) $(LDFLAGS) -o lziprecover arg_parser.o lziprecover.o @@ -40,16 +40,28 @@ encoder.o : lzip.h encoder.h main.o : arg_parser.h lzip.h decoder.h encoder.h lziprecover.o : arg_parser.h lzip.h Makefile -doc : info $(VPATH)/doc/$(progname).1 +doc : info man info : $(VPATH)/doc/$(progname).info $(VPATH)/doc/$(progname).info : $(VPATH)/doc/$(progname).texinfo cd $(VPATH)/doc && makeinfo $(progname).texinfo +man : $(VPATH)/doc/$(progname).1 $(VPATH)/doc/lziprecover.1 \ + $(VPATH)/doc/lzdiff.1 $(VPATH)/doc/lzgrep.1 + $(VPATH)/doc/$(progname).1 : $(progname) help2man -o $(VPATH)/doc/$(progname).1 ./$(progname) +$(VPATH)/doc/lziprecover.1 : lziprecover + help2man -o $(VPATH)/doc/lziprecover.1 --no-info ./lziprecover + +$(VPATH)/doc/lzdiff.1 : $(VPATH)/lzdiff + help2man -o $(VPATH)/doc/lzdiff.1 --no-info $(VPATH)/lzdiff + +$(VPATH)/doc/lzgrep.1 : $(VPATH)/lzgrep + help2man -o $(VPATH)/doc/lzgrep.1 --no-info $(VPATH)/lzgrep + Makefile : $(VPATH)/configure $(VPATH)/Makefile.in ./config.status @@ -118,7 +130,7 @@ dist : lzip -v -9 $(DISTNAME).tar clean : - -rm -f $(progname) profiled_$(progname) $(objs) + -rm -f $(progname) $(progname)_profiled $(objs) -rm -f lziprecover lziprecover.o distclean : clean @@ -1,5 +1,15 @@ Changes in version 1.6: +Decompression time has been reduced by 17%. + +Support for .tbz extension has been added to lzdiff and lzgrep. + +Man pages for lzdiff, lzgrep and lziprecover have been added to the +distribution. + +Memory use has been reduced to 9x if the input file is smaller than +dictionary size limit. + Flush calls have been added to decompressor to allow partial recovery of the uncompressed data when decompressing a corrupt file. @@ -5,13 +5,13 @@ # This configure script is free software: you have unlimited permission # to copy, distribute and modify it. # -# Date of this version: 2009-04-27 +# Date of this version: 2009-05-21 invocation_name=$0 args= no_create= progname=lzip -progversion=1.6-pre1 +progversion=1.6-pre2 srctrigger=lzip.h # clear some things potentially inherited from environment. @@ -47,6 +47,7 @@ void LZ_decoder::flush_data() { if( !member_finished ) { + crc32.update( crc_, buffer, pos ); if( odes_ >= 0 ) { const int wr = writeblock( odes_, (char *)buffer, pos ); @@ -65,7 +66,9 @@ bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const const int trailer_size = trailer.size( format_version ); for( int i = 0; i < trailer_size && !error; ++i ) { - if( range_decoder.finished() ) + if( !range_decoder.finished() ) + ((uint8_t *)&trailer)[i] = range_decoder.get_byte(); + else { error = true; if( verbosity >= 0 ) @@ -75,7 +78,6 @@ bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const " some checks may fail.\n", i ); } } - ((uint8_t *)&trailer)[i] = range_decoder.read_byte(); } if( format_version == 0 ) trailer.member_size( member_position() ); if( trailer.data_crc() != crc() ) @@ -195,23 +197,22 @@ int LZ_decoder::decode_member( const Pretty_print & pp ) rep0 += range_decoder.decode_tree_reversed( bm_align, dis_align_bits ); if( rep0 == 0xFFFFFFFF ) // Marker found { + range_decoder.normalize(); + flush_data(); if( len == min_match_len ) // End Of Stream marker - { - flush_data(); - if( verify_trailer( pp ) ) return 0; else return 3; - } + { if( verify_trailer( pp ) ) return 0; else return 3; } if( verbosity >= 0 ) { pp(); std::fprintf( stderr, "unsupported marker code `%d'.\n", len ); } - flush_data(); return 4; } + if( rep0 >= (unsigned int)buffer_size ) { flush_data(); return 1; } } } } - if( !copy_block( rep0, len ) ) { flush_data(); return 1; } + copy_block( rep0, len ); prev_byte = get_byte( 0 ); } } @@ -40,7 +40,7 @@ public: bool finished() const throw() { return at_stream_end && pos >= stream_pos; } - uint8_t read_byte() + uint8_t get_byte() { if( pos >= stream_pos && !read_block() ) return 0; return buffer[pos++]; @@ -62,59 +62,68 @@ public: code( 0 ), range( 0xFFFFFFFF ), ibuf( buf ) - { for( int i = 0; i < 5; ++i ) code = (code << 8) | read_byte(); } + { for( int i = 0; i < 5; ++i ) code = (code << 8) | get_byte(); } - uint8_t read_byte() const + bool finished() const throw() { return ibuf.finished(); } + long long member_position() const throw() { return member_pos; } + + uint8_t get_byte() const { ++member_pos; - return ibuf.read_byte(); + return ibuf.get_byte(); } - long long member_position() const throw() { return member_pos; } - bool finished() const throw() { return ibuf.finished(); } + void normalize() + { + if( range <= 0x00FFFFFF ) + { range <<= 8; code = (code << 8) | get_byte(); } + } int decode( const int num_bits ) { int symbol = 0; - for( int i = num_bits - 1; i >= 0; --i ) + for( int i = num_bits; i > 0; --i ) { - range >>= 1; symbol <<= 1; - if( code >= range ) - { code -= range; symbol |= 1; } if( range <= 0x00FFFFFF ) - { range <<= 8; code = (code << 8) | read_byte(); } + { + range <<= 7; code = (code << 8) | get_byte(); + if( code >= range ) { code -= range; symbol |= 1; } + } + else + { + range >>= 1; + if( code >= range ) { code -= range; symbol |= 1; } + } } return symbol; } int decode_bit( Bit_model & bm ) { - int symbol; + if( range <= 0x00FFFFFF ) + { range <<= 8; code = (code << 8) | get_byte(); } const uint32_t bound = ( range >> bit_model_total_bits ) * bm.probability; if( code < bound ) { range = bound; bm.probability += (bit_model_total - bm.probability) >> bit_model_move_bits; - symbol = 0; + return 0; } else { range -= bound; code -= bound; bm.probability -= bm.probability >> bit_model_move_bits; - symbol = 1; + return 1; } - if( range <= 0x00FFFFFF ) - { range <<= 8; code = (code << 8) | read_byte(); } - return symbol; } int decode_tree( Bit_model bm[], const int num_bits ) { int model = 1; for( int i = num_bits; i > 0; --i ) - model = ( model << 1 ) | decode_bit( bm[model-1] ); + model = ( model << 1 ) | decode_bit( bm[model] ); return model - (1 << num_bits); } @@ -122,27 +131,31 @@ public: { int model = 1; int symbol = 0; - for( int i = 1; i < (1 << num_bits); i <<= 1 ) + for( int i = 0; i < num_bits; ++i ) { - const int bit = decode_bit( bm[model-1] ); - model = ( model << 1 ) | bit; - if( bit ) symbol |= i; + const int bit = decode_bit( bm[model] ); + model <<= 1; + if( bit ) { model |= 1; symbol |= (1 << i); } } return symbol; } int decode_matched( Bit_model bm[], const int match_byte ) { + Bit_model *bm1 = bm + 0x100; int symbol = 1; - for( int i = 7; i >= 0; --i ) + for( int i = 1; i <= 8; ++i ) { - const int match_bit = ( match_byte >> i ) & 1; - const int bit = decode_bit( bm[(match_bit<<8)+symbol+0xFF] ); + const int match_bit = ( match_byte << i ) & 0x100; + const int bit = decode_bit( bm1[match_bit+symbol] ); symbol = ( symbol << 1 ) | bit; - if( match_bit != bit ) break; + if( ( match_bit && !bit ) || ( !match_bit && bit ) ) + { + while( ++i <= 8 ) + symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); + break; + } } - while( symbol < 0x100 ) - symbol = ( symbol << 1 ) | decode_bit( bm[symbol-1] ); return symbol & 0xFF; } }; @@ -215,32 +228,32 @@ class LZ_decoder uint8_t get_byte( const int distance ) const throw() { - int newpos = pos - distance - 1; - if( newpos < 0 ) newpos += buffer_size; - return buffer[newpos]; + int i = pos - distance - 1; + if( i < 0 ) i += buffer_size; + return buffer[i]; } void put_byte( const uint8_t b ) { - crc32.update( crc_, b ); buffer[pos] = b; if( ++pos >= buffer_size ) flush_data(); } - bool copy_block( const int distance, int len ) + void copy_block( const int distance, int len ) { - if( distance < 0 || distance >= buffer_size || - len <= 0 || len > max_match_len ) return false; - int newpos = pos - distance - 1; - if( newpos < 0 ) newpos += buffer_size; - for( ; len > 0 ; --len ) + int i = pos - distance - 1; + if( i < 0 ) i += buffer_size; + if( len < buffer_size - std::max( pos, i ) && len <= distance ) + { + std::memcpy( buffer + pos, buffer + i, len ); + pos += len; + } + else for( ; len > 0 ; --len ) { - crc32.update( crc_, buffer[newpos] ); - buffer[pos] = buffer[newpos]; + buffer[pos] = buffer[i]; if( ++pos >= buffer_size ) flush_data(); - if( ++newpos >= buffer_size ) newpos = 0; + if( ++i >= buffer_size ) i = 0; } - return true; } void flush_data(); @@ -1,7 +1,7 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.36. -.TH LZIP "1" "April 2009" "Lzip 1.6-pre1" "User Commands" +.TH LZIP "1" "May 2009" "Lzip 1.6-pre2" "User Commands" .SH NAME -Lzip \- manual page for Lzip 1.6-pre1 +Lzip \- manual page for Lzip 1.6-pre2 .SH SYNOPSIS .B lzip [\fIoptions\fR] [\fIfiles\fR] diff --git a/doc/lzip.info b/doc/lzip.info index d40f84f..e1e5029 100644 --- a/doc/lzip.info +++ b/doc/lzip.info @@ -11,7 +11,7 @@ File: lzip.info, Node: Top, Next: Introduction, Up: (dir) Lzip **** -This manual is for Lzip (version 1.6-pre1, 27 April 2009). +This manual is for Lzip (version 1.6-pre2, 21 May 2009). * Menu: @@ -67,11 +67,12 @@ also split the compressed output in volumes of a given size, even when reading from standard input. This allows the direct creation of multivolume compressed tar archives. - The amount of memory required for compression is about 2 times the -dictionary size limit plus 8 times the dictionary size really used. For -decompression is a little more than the dictionary size really used. -Lzip will automatically use the smallest possible dictionary size for -each member without exceeding the given limit. It is important to + The amount of memory required for compression is about 5 MiB plus 1 +or 2 times the dictionary size limit (1 if input file size is less than +dictionary size limit, else 2) plus 8 times the dictionary size really +used. For decompression is a little more than the dictionary size really +used. Lzip will automatically use the smallest possible dictionary size +for each member without exceeding the given limit. It is important to appreciate that the decompression memory requirement is affected at compression time by the choice of dictionary size limit. @@ -558,15 +559,15 @@ Concept Index Tag Table: Node: Top224 -Node: Introduction967 -Node: Algorithm4208 -Node: Invoking Lzip6434 -Node: File Format10781 -Node: Examples12735 -Node: Lzdiff14568 -Node: Lzgrep15887 -Node: Lziprecover16922 -Node: Problems17619 -Node: Concept Index18144 +Node: Introduction965 +Node: Algorithm4288 +Node: Invoking Lzip6514 +Node: File Format10861 +Node: Examples12815 +Node: Lzdiff14648 +Node: Lzgrep15967 +Node: Lziprecover17002 +Node: Problems17699 +Node: Concept Index18224 End Tag Table diff --git a/doc/lzip.texinfo b/doc/lzip.texinfo index f29b29e..87e223f 100644 --- a/doc/lzip.texinfo +++ b/doc/lzip.texinfo @@ -5,8 +5,8 @@ @finalout @c %**end of header -@set UPDATED 27 April 2009 -@set VERSION 1.6-pre1 +@set UPDATED 21 May 2009 +@set VERSION 1.6-pre2 @dircategory Data Compression @direntry @@ -84,11 +84,12 @@ compressed output in volumes of a given size, even when reading from standard input. This allows the direct creation of multivolume compressed tar archives. -The amount of memory required for compression is about 2 times the -dictionary size limit plus 8 times the dictionary size really used. For -decompression is a little more than the dictionary size really used. -Lzip will automatically use the smallest possible dictionary size for -each member without exceeding the given limit. It is important to +The amount of memory required for compression is about 5 MiB plus 1 or 2 +times the dictionary size limit (1 if input file size is less than +dictionary size limit, else 2) plus 8 times the dictionary size really +used. For decompression is a little more than the dictionary size really +used. Lzip will automatically use the smallest possible dictionary size +for each member without exceeding the given limit. It is important to appreciate that the decompression memory requirement is affected at compression time by the choice of dictionary size limit. @@ -43,6 +43,41 @@ bool Matchfinder::read_block() throw() } +Matchfinder::Matchfinder( const int dict_size, const int len_limit, + const int ides ) + : + partial_data_pos( 0 ), + pos( 0 ), + cyclic_pos( 0 ), + stream_pos( 0 ), + ides_( ides ), + match_len_limit_( len_limit ), + prev_positions( new int32_t[num_prev_positions] ), + at_stream_end( false ) + { + const int buffer_size_limit = ( 2 * dict_size ) + + max_num_trials + max_match_len; + buffer_size = std::max( 65536, dict_size ); + buffer = (uint8_t *)std::malloc( buffer_size ); + if( !buffer ) throw std::bad_alloc(); + if( !read_block() ) throw Error( "read error" ); + if( !at_stream_end && buffer_size < buffer_size_limit ) + { + buffer_size = buffer_size_limit; + buffer = (uint8_t *)std::realloc( buffer, buffer_size ); + if( !buffer ) throw std::bad_alloc(); + if( !read_block() ) throw Error( "read error" ); + } + if( at_stream_end && stream_pos < dict_size ) + dictionary_size_ = std::max( min_dictionary_size, stream_pos ); + else dictionary_size_ = dict_size; + pos_limit = buffer_size; + if( !at_stream_end ) pos_limit -= max_match_len; + prev_pos_tree = new int32_t[2*dictionary_size_]; + for( int i = 0; i < num_prev_positions; ++i ) prev_positions[i] = -1; + } + + bool Matchfinder::reset() throw() { const int size = stream_pos - pos; @@ -433,7 +468,7 @@ void LZ_encoder::flush( const State & state ) trailer.data_size( matchfinder.data_position() ); trailer.member_size( range_encoder.member_position() + sizeof trailer ); for( unsigned int i = 0; i < sizeof trailer; ++i ) - range_encoder.put_byte( (( uint8_t *)&trailer)[i] ); + range_encoder.put_byte( ((uint8_t *)&trailer)[i] ); range_encoder.flush_data(); } @@ -453,7 +488,7 @@ LZ_encoder::LZ_encoder( Matchfinder & mf, const File_header & header, fill_align_prices(); for( unsigned int i = 0; i < sizeof header; ++i ) - range_encoder.put_byte( (( uint8_t *)&header)[i] ); + range_encoder.put_byte( ((uint8_t *)&header)[i] ); } @@ -86,7 +86,7 @@ inline int price_symbol( const Bit_model bm[], int symbol, const int num_bits ) { const int bit = symbol & 1; symbol >>= 1; - price += price_bit( bm[symbol-1], bit ); + price += price_bit( bm[symbol], bit ); } return price; } @@ -100,7 +100,7 @@ inline int price_symbol_reversed( const Bit_model bm[], int symbol, { const int bit = symbol & 1; symbol >>= 1; - price += price_bit( bm[model-1], bit ); + price += price_bit( bm[model], bit ); model = ( model << 1 ) | bit; } return price; @@ -116,14 +116,14 @@ inline int price_matched( const Bit_model bm[], const int symbol, { const int match_bit = ( match_byte >> i ) & 1; const int bit = ( symbol >> i ) & 1; - price += price_bit( bm[(match_bit<<8)+model+0xFF], bit ); + price += price_bit( bm[(match_bit<<8)+model+0x100], bit ); model = ( model << 1 ) | bit; if( match_bit != bit ) { while( --i >= 0 ) { const int bit = ( symbol >> i ) & 1; - price += price_bit( bm[model-1], bit ); + price += price_bit( bm[model], bit ); model = ( model << 1 ) | bit; } break; @@ -143,13 +143,12 @@ class Matchfinder long long partial_data_pos; int dictionary_size_; // bytes to keep in buffer before pos - const int after_size; // bytes to keep in buffer after pos - const int buffer_size; - uint8_t * const buffer; + int buffer_size; + uint8_t * buffer; int pos; int cyclic_pos; int stream_pos; // first byte not yet read from file - const int pos_limit; // when reached, a new block must be read + int pos_limit; // when reached, a new block must be read const int ides_; const int match_len_limit_; int32_t * const prev_positions; // last seen position of key @@ -159,32 +158,10 @@ class Matchfinder bool read_block() throw(); public: - Matchfinder( const int dict_size, const int len_limit, const int ides ) - : - partial_data_pos( 0 ), - dictionary_size_( dict_size ), - after_size( max_match_len ), - buffer_size( ( 2 * std::max( 65536, dictionary_size_ ) ) + - max_num_trials + after_size ), - buffer( new uint8_t[buffer_size] ), - pos( 0 ), - cyclic_pos( 0 ), - stream_pos( 0 ), - pos_limit( buffer_size - after_size ), - ides_( ides ), - match_len_limit_( len_limit ), - prev_positions( new int32_t[num_prev_positions] ), - at_stream_end( false ) - { - if( !read_block() ) throw Error( "read error" ); - if( at_stream_end && stream_pos < dictionary_size_ ) - dictionary_size_ = std::max( min_dictionary_size, stream_pos ); - prev_pos_tree = new int32_t[2*dictionary_size_]; - for( int i = 0; i < num_prev_positions; ++i ) prev_positions[i] = -1; - } + Matchfinder( const int dict_size, const int len_limit, const int ides ); ~Matchfinder() - { delete[] prev_pos_tree; delete[] prev_positions; delete[] buffer; } + { delete[] prev_pos_tree; delete[] prev_positions; std::free( buffer ); } uint8_t operator[]( const int i ) const throw() { return buffer[pos+i]; } int available_bytes() const throw() { return stream_pos - pos; } @@ -318,7 +295,7 @@ public: for( int i = num_bits; i > 0; --i, mask >>= 1 ) { const int bit = ( symbol & mask ); - encode_bit( bm[model-1], bit ); + encode_bit( bm[model], bit ); model <<= 1; if( bit ) model |= 1; } @@ -330,7 +307,7 @@ public: for( int i = num_bits; i > 0; --i ) { const int bit = symbol & 1; - encode_bit( bm[model-1], bit ); + encode_bit( bm[model], bit ); model = ( model << 1 ) | bit; symbol >>= 1; } @@ -343,14 +320,14 @@ public: { const int bit = ( symbol >> i ) & 1; const int match_bit = ( match_byte >> i ) & 1; - encode_bit( bm[(match_bit<<8)+model+0xFF], bit ); + encode_bit( bm[(match_bit<<8)+model+0x100], bit ); model = ( model << 1 ) | bit; if( match_bit != bit ) { while( --i >= 0 ) { const int bit = ( symbol >> i ) & 1; - encode_bit( bm[model-1], bit ); + encode_bit( bm[model], bit ); model = ( model << 1 ) | bit; } break; @@ -5,6 +5,8 @@ # This script is free software: you have unlimited permission # to copy, distribute and modify it. +LC_ALL=C +export LC_ALL invocation_name=$0 args= default_ext=.lz @@ -39,9 +41,12 @@ while [ x"$1" != x ] ; do echo " --lzip use lzip as default decompressor (default)" echo " --diff use diff to compare files (default)" echo " --cmp use cmp to compare files" + echo + echo "Report bugs to lzip-bug@nongnu.org" + echo "Lzip home page: http://www.nongnu.org/lzip/lzip.html" exit 0 ;; --version | --ve* | -V) - echo "Lzdiff 0.3" + echo "Lzdiff 0.4" echo "Copyright (C) 2009 Antonio Diaz Diaz." echo "This script is free software: you have unlimited permission" echo "to copy, distribute and modify it." @@ -93,12 +98,14 @@ if test -z "${file2}"; then file2=`echo "${file1}" | sed 's/tgz$/tar/'` ;; *.bz2) file2=`echo "${file1}" | sed 's/.bz2$//'` ;; + *.tbz) + file2=`echo "${file1}" | sed 's/tbz$/tar/'` ;; *.tbz2) file2=`echo "${file1}" | sed 's/tbz2$/tar/'` ;; *.lz) file2=`echo "${file1}" | sed 's/.lz$//'` ;; *.tlz) - file2=`echo "${file1}" | sed 's/.tlz$/tar/'` ;; + file2=`echo "${file1}" | sed 's/tlz$/tar/'` ;; *) file2="${file1}"; file1="${file1}${default_ext}" ;; esac @@ -107,14 +114,14 @@ fi prog1= prog2= case "${file1}" in -*.gz | *.tgz) prog1=gzip ;; -*.bz2 | *.tbz2) prog1=bzip2 ;; -*.lz | *.tlz) prog1=lzip ;; +*.gz | *.tgz) prog1=gzip ;; +*.bz2 | *.tbz | *.tbz2) prog1=bzip2 ;; +*.lz | *.tlz) prog1=lzip ;; esac case "${file2}" in -*.gz | *.tgz) prog2=gzip ;; -*.bz2 | *.tbz2) prog2=bzip2 ;; -*.lz | *.tlz) prog2=lzip ;; +*.gz | *.tgz) prog2=gzip ;; +*.bz2 | *.tbz | *.tbz2) prog2=bzip2 ;; +*.lz | *.tlz) prog2=lzip ;; esac retval=0 @@ -5,6 +5,8 @@ # This script is free software: you have unlimited permission # to copy, distribute and modify it. +LC_ALL=C +export LC_ALL invocation_name=$0 args= default_prog=lzip @@ -37,9 +39,12 @@ while [ x"$1" != x ] ; do echo " --gzip use gzip as default decompressor" echo " --bzip2 use bzip2 as default decompressor" echo " --lzip use lzip as default decompressor (default)" + echo + echo "Report bugs to lzip-bug@nongnu.org" + echo "Lzip home page: http://www.nongnu.org/lzip/lzip.html" exit 0 ;; --version | --ve* | -V) - echo "Lzgrep 0.3" + echo "Lzgrep 0.4" echo "Copyright (C) 2009 Antonio Diaz Diaz." echo "This script is free software: you have unlimited permission" echo "to copy, distribute and modify it." @@ -93,7 +98,7 @@ for i in "$@" ; do case "$i" in *.gz | *.tgz) prog="gzip -cdfq" ;; - *.bz2 | *.tbz2) + *.bz2 | *.tbz | *.tbz2) prog="bzip2 -cdfq" ;; *.lz | *.tlz) prog="lzip -cdfq" ;; @@ -150,14 +150,19 @@ public: uint32_t operator[]( const uint8_t byte ) const throw() { return data[byte]; } void update( uint32_t & crc, const uint8_t byte ) const throw() { crc = data[(crc^byte)&0xFF] ^ ( crc >> 8 ); } + void update( uint32_t & crc, const uint8_t * const buffer, const int size ) const throw() + { + for( int i = 0; i < size; ++i ) + crc = data[(crc^buffer[i])&0xFF] ^ ( crc >> 8 ); + } }; -const char * const magic_string = "LZIP"; +const uint8_t magic_string[4] = { 'L', 'Z', 'I', 'P' }; struct File_header { - char magic[4]; + uint8_t magic[4]; uint8_t version; uint8_t coded_dict_size; diff --git a/lziprecover.cc b/lziprecover.cc index bca2744..deed53c 100644 --- a/lziprecover.cc +++ b/lziprecover.cc @@ -49,6 +49,9 @@ const char * const program_year = "2009"; void show_help() throw() { std::printf( "%s - Member recoverer program for lzip compressed files.\n", Program_name ); + std::printf( "\nSearches for members in .lz files, and writes each member in its own .lz\n" ); + std::printf( "file. You can then use `lzip -t' to test the integrity of the resulting\n" ); + std::printf( "files, and decompress those which are undamaged.\n" ); std::printf( "\nUsage: %s [options] file\n", invocation_name ); std::printf( "Options:\n" ); std::printf( " -h, --help display this help and exit\n" ); @@ -56,6 +59,7 @@ void show_help() throw() std::printf( " -q, --quiet suppress all messages\n" ); std::printf( " -v, --verbose be verbose (a 2nd -v gives more)\n" ); std::printf( "\nReport bugs to lzip-bug@nongnu.org\n"); + std::printf( "Lzip home page: http://www.nongnu.org/lzip/lzip.html\n" ); } @@ -119,7 +123,7 @@ bool next_filename( std::string & output_filename ) } -int search_header( const char * buffer, const int size, const int pos, +int search_header( const uint8_t * buffer, const int size, const int pos, const long long last_header_pos, const long long partial_file_pos ) { @@ -129,7 +133,7 @@ int search_header( const char * buffer, const int size, const int pos, { File_trailer trailer; for( unsigned int j = 0; j < sizeof trailer; ++j ) - ((char *)&trailer)[j] = buffer[i-(sizeof trailer)+j]; + ((uint8_t *)&trailer)[j] = buffer[i-(sizeof trailer)+j]; if( partial_file_pos + i - trailer.member_size() == last_header_pos ) return i; } @@ -137,11 +141,11 @@ int search_header( const char * buffer, const int size, const int pos, } -bool verify_header( const char * buffer, const int pos ) +bool verify_header( const uint8_t * buffer, const int pos ) { File_header header; for( unsigned int i = 0; i < sizeof header; ++i ) - ((char *)&header)[i] = buffer[pos+i]; + ((uint8_t *)&header)[i] = buffer[pos+i]; if( !header.verify_magic() ) { show_error( "bad magic number (file not created by lzip).\n" ); @@ -163,18 +167,18 @@ bool verify_header( const char * buffer, const int pos ) } -int process_file( const std::string & input_filename, char * & base_buffer ) +int process_file( const std::string & input_filename, uint8_t * & base_buffer ) { const int hsize = sizeof( File_header ); const int tsize = sizeof( File_trailer ); const int buffer_size = 65536; const int base_buffer_size = tsize + buffer_size + hsize; - base_buffer = new char[base_buffer_size]; - char * const buffer = base_buffer + tsize; + base_buffer = new uint8_t[base_buffer_size]; + uint8_t * const buffer = base_buffer + tsize; const int inhandle = open_instream( input_filename ); if( inhandle < 0 ) return 1; - int size = readblock( inhandle, buffer, buffer_size + hsize ) - hsize; + int size = readblock( inhandle, (char *)buffer, buffer_size + hsize ) - hsize; bool at_stream_end = ( size < buffer_size ); if( size != buffer_size && errno ) { show_error( "read error", errno ); return 1; } @@ -195,7 +199,7 @@ int process_file( const std::string & input_filename, char * & base_buffer ) last_header_pos, partial_file_pos ); if( newpos > pos ) { - const int wr = writeblock( outhandle, buffer + pos, newpos - pos ); + const int wr = writeblock( outhandle, (char *)buffer + pos, newpos - pos ); if( wr != newpos - pos ) { show_error( "write error", errno ); return 1; } if( close( outhandle ) != 0 ) @@ -213,7 +217,7 @@ int process_file( const std::string & input_filename, char * & base_buffer ) if( !at_stream_end ) { partial_file_pos += buffer_size; - const int wr = writeblock( outhandle, buffer + pos, buffer_size - pos ); + const int wr = writeblock( outhandle, (char *)buffer + pos, buffer_size - pos ); if( wr != buffer_size - pos ) { show_error( "write error", errno ); return 1; } std::memcpy( base_buffer, base_buffer + buffer_size, tsize + hsize ); @@ -221,13 +225,13 @@ int process_file( const std::string & input_filename, char * & base_buffer ) } else { - const int wr = writeblock( outhandle, buffer + pos, size + hsize - pos ); + const int wr = writeblock( outhandle, (char *)buffer + pos, size + hsize - pos ); if( wr != size + hsize - pos ) { show_error( "write error", errno ); return 1; } break; } } - size = readblock( inhandle, buffer + hsize, buffer_size ); + size = readblock( inhandle, (char *)buffer + hsize, buffer_size ); at_stream_end = ( size < buffer_size ); if( size != buffer_size && errno ) { show_error( "read error", errno ); return 1; } @@ -339,7 +343,7 @@ int main( const int argc, const char * argv[] ) if( argind + 1 != parser.arguments() ) { show_error( "you must specify exactly 1 file", 0, true ); return 1; } - char * base_buffer; + uint8_t * base_buffer; const int retval = process_file( parser.argument( argind ), base_buffer ); delete[] base_buffer; @@ -473,7 +473,7 @@ int decompress( const int inhandle, const Pretty_print & pp, { File_header header; for( unsigned int i = 0; i < sizeof header; ++i ) - ((uint8_t *)&header)[i] = ibuf.read_byte(); + ((uint8_t *)&header)[i] = ibuf.get_byte(); if( ibuf.finished() ) { if( first_pass ) { pp( "error reading member header" ); return 1; } diff --git a/testsuite/check.sh b/testsuite/check.sh index 83ebc6f..f871f28 100755 --- a/testsuite/check.sh +++ b/testsuite/check.sh @@ -5,9 +5,12 @@ # This script is free software: you have unlimited permission # to copy, distribute and modify it. +LC_ALL=C +export LC_ALL objdir=`pwd` testdir=`cd "$1" ; pwd` LZIP="${objdir}"/lzip +LZIPRECOVER="${objdir}"/lziprecover framework_failure() { echo 'failure in testing framework'; exit 1; } if [ ! -x "${LZIP}" ] ; then @@ -57,6 +60,15 @@ for i in 1 2 3 4 5 6 7 8 9; do echo -n . done +"${LZIP}" -c in in in > out || fail=1 +echo -n "garbage" >> out || fail=1 +"${LZIPRECOVER}" out || fail=1 +for i in 1 2 3; do + "${LZIP}" -cd rec0000${i}out > copy || fail=1 + cmp in copy || fail=1 + echo -n . +done + echo if test ${fail} = 0; then echo "tests completed successfully." |