diff options
-rw-r--r-- | ChangeLog | 12 | ||||
-rw-r--r-- | Makefile.in | 30 | ||||
-rw-r--r-- | NEWS | 13 | ||||
-rwxr-xr-x | configure | 6 | ||||
-rw-r--r-- | decoder.cc | 53 | ||||
-rw-r--r-- | decoder.h | 98 | ||||
-rw-r--r-- | doc/lzip.1 | 5 | ||||
-rw-r--r-- | doc/lzip.info | 52 | ||||
-rw-r--r-- | doc/lzip.texinfo | 40 | ||||
-rw-r--r-- | doc/lziprecover.1 | 5 | ||||
-rw-r--r-- | encoder.cc | 89 | ||||
-rw-r--r-- | encoder.h | 64 | ||||
-rw-r--r-- | lzip.h | 93 | ||||
-rw-r--r-- | lziprecover.cc | 161 | ||||
-rw-r--r-- | main.cc | 223 | ||||
-rwxr-xr-x | testsuite/check.sh | 32 |
16 files changed, 502 insertions, 474 deletions
@@ -1,3 +1,15 @@ +2010-04-05 Antonio Diaz Diaz <ant_diaz@teleline.es> + + * Version 1.10 released. + * decoder.h: Input_buffer integrated in Range_decoder. + * main.cc: File specified with option "-o" is now created with + mode 0666 if umask allows it, deleted if interrupted by user. + * main.cc: New constant "o_binary". + * main.cc: Dictionary size for options -2, -3, -4 and -8 has + been changed to improve linearity of compressed sizes. + * lzip.h: Fixed warnings produced by over-optimization (-O3). + * Makefile.in: Added quotes to directory names. + 2010-01-17 Antonio Diaz Diaz <ant_diaz@teleline.es> * Version 1.9 released. diff --git a/Makefile.in b/Makefile.in index 09b0294..e88a1b6 100644 --- a/Makefile.in +++ b/Makefile.in @@ -64,34 +64,34 @@ check : all @$(VPATH)/testsuite/check.sh $(VPATH)/testsuite install : all install-info install-man - if [ ! -d $(DESTDIR)$(bindir) ] ; then $(INSTALL_DIR) $(DESTDIR)$(bindir) ; fi - $(INSTALL_PROGRAM) ./$(progname) $(DESTDIR)$(bindir)/$(progname) - $(INSTALL_PROGRAM) ./lziprecover $(DESTDIR)$(bindir)/lziprecover + if [ ! -d "$(DESTDIR)$(bindir)" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(bindir)" ; fi + $(INSTALL_PROGRAM) ./$(progname) "$(DESTDIR)$(bindir)/$(progname)" + $(INSTALL_PROGRAM) ./lziprecover "$(DESTDIR)$(bindir)/lziprecover" install-info : - if [ ! -d $(DESTDIR)$(infodir) ] ; then $(INSTALL_DIR) $(DESTDIR)$(infodir) ; fi - $(INSTALL_DATA) $(VPATH)/doc/$(pkgname).info $(DESTDIR)$(infodir)/$(pkgname).info - -install-info --info-dir=$(DESTDIR)$(infodir) $(DESTDIR)$(infodir)/$(pkgname).info + if [ ! -d "$(DESTDIR)$(infodir)" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(infodir)" ; fi + $(INSTALL_DATA) $(VPATH)/doc/$(pkgname).info "$(DESTDIR)$(infodir)/$(pkgname).info" + -install-info --info-dir="$(DESTDIR)$(infodir)" $(DESTDIR)$(infodir)/$(pkgname).info install-man : - if [ ! -d $(DESTDIR)$(mandir)/man1 ] ; then $(INSTALL_DIR) $(DESTDIR)$(mandir)/man1 ; fi - $(INSTALL_DATA) $(VPATH)/doc/$(progname).1 $(DESTDIR)$(mandir)/man1/$(progname).1 - $(INSTALL_DATA) $(VPATH)/doc/lziprecover.1 $(DESTDIR)$(mandir)/man1/lziprecover.1 + if [ ! -d "$(DESTDIR)$(mandir)/man1" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(mandir)/man1" ; fi + $(INSTALL_DATA) $(VPATH)/doc/$(progname).1 "$(DESTDIR)$(mandir)/man1/$(progname).1" + $(INSTALL_DATA) $(VPATH)/doc/lziprecover.1 "$(DESTDIR)$(mandir)/man1/lziprecover.1" install-strip : all $(MAKE) INSTALL_PROGRAM='$(INSTALL_PROGRAM) -s' install uninstall : uninstall-info uninstall-man - -rm -f $(DESTDIR)$(bindir)/$(progname) - -rm -f $(DESTDIR)$(bindir)/lziprecover + -rm -f "$(DESTDIR)$(bindir)/$(progname)" + -rm -f "$(DESTDIR)$(bindir)/lziprecover" uninstall-info : - -install-info --info-dir=$(DESTDIR)$(infodir) --remove $(DESTDIR)$(infodir)/$(pkgname).info - -rm -f $(DESTDIR)$(infodir)/$(pkgname).info + -install-info --info-dir="$(DESTDIR)$(infodir)" --remove "$(DESTDIR)$(infodir)/$(pkgname).info" + -rm -f "$(DESTDIR)$(infodir)/$(pkgname).info" uninstall-man : - -rm -f $(DESTDIR)$(mandir)/man1/$(progname).1 - -rm -f $(DESTDIR)$(mandir)/man1/lziprecover.1 + -rm -f "$(DESTDIR)$(mandir)/man1/$(progname).1" + -rm -f "$(DESTDIR)$(mandir)/man1/lziprecover.1" dist : doc ln -sf $(VPATH) $(DISTNAME) @@ -1,7 +1,12 @@ -Changes in version 1.9: +Changes in version 1.10: -Lzip now returns at least 1 if closing stdout fails. +File specified with option "-o" is now created with write permission for +all if umask allows it, and deleted if lzip is interrupted by the user. -Lintian warnings in Debian about man pages have been fixed. +Regular files are now open in binary mode in non-POSIX platforms +defining the O_BINARY macro. -The "COPYING" file is no more used as data for testing. +Dictionary size for options -2, -3, -4 and -8 has been changed to +improve linearity of compressed sizes. + +Compiler warnings produced by over-optimization (-O3) have been fixed. @@ -5,12 +5,12 @@ # This configure script is free software: you have unlimited permission # to copy, distribute and modify it. # -# Date of this version: 2010-01-17 +# Date of this version: 2010-04-05 args= no_create= pkgname=lzip -pkgversion=1.9 +pkgversion=1.10 progname=lzip srctrigger=lzip.h @@ -135,7 +135,7 @@ if [ -z "${CXX}" ] ; then # Let the user override the test. fi echo -if [ -z ${no_create} ] ; then +if [ -z "${no_create}" ] ; then echo "creating config.status" rm -f config.status cat > config.status << EOF @@ -33,13 +33,16 @@ const CRC32 crc32; -bool Input_buffer::read_block() +bool Range_decoder::read_block() { - if( at_stream_end ) return false; - stream_pos = readblock( ides_, (char *)buffer, buffer_size ); - if( stream_pos != buffer_size && errno ) throw Error( "read error" ); - pos = 0; - at_stream_end = ( stream_pos < buffer_size ); + if( !at_stream_end ) + { + stream_pos = readblock( infd_, buffer, buffer_size ); + if( stream_pos != buffer_size && errno ) throw Error( "read error" ); + at_stream_end = ( stream_pos < buffer_size ); + partial_member_pos += pos; + pos = 0; + } return !finished(); } @@ -50,8 +53,8 @@ void LZ_decoder::flush_data() if( size > 0 ) { crc32.update( crc_, buffer + stream_pos, size ); - if( odes_ >= 0 && - writeblock( odes_, (char *)buffer + stream_pos, size ) != size ) + if( outfd_ >= 0 && + writeblock( outfd_, buffer + stream_pos, size ) != size ) throw Error( "write error" ); if( pos >= buffer_size ) { partial_data_pos += pos; pos = 0; } stream_pos = pos; @@ -61,13 +64,15 @@ void LZ_decoder::flush_data() bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const { - bool error = false; File_trailer trailer; - const int trailer_size = trailer.size( format_version ); + const int trailer_size = File_trailer::size( member_version ); + const long long member_size = member_position() + trailer_size; + bool error = false; + for( int i = 0; i < trailer_size && !error; ++i ) { if( !range_decoder.finished() ) - ((uint8_t *)&trailer)[i] = range_decoder.get_byte(); + trailer.data[i] = range_decoder.get_byte(); else { error = true; @@ -77,9 +82,10 @@ bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const std::fprintf( stderr, "trailer truncated at trailer position %d;" " some checks may fail.\n", i ); } + for( ; i < trailer_size; ++i ) trailer.data[i] = 0; } } - if( format_version == 0 ) trailer.member_size( member_position() ); + if( member_version == 0 ) trailer.member_size( member_size ); if( !range_decoder.code_is_zero() ) { error = true; @@ -104,23 +110,19 @@ bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const error = true; if( verbosity >= 0 ) { - if( trailer.data_size() >= 0 ) - { pp(); - std::fprintf( stderr, "data size mismatch; trailer says %lld, data size is %lld.\n", - trailer.data_size(), data_position() ); } - else pp( "member trailer is corrupt" ); + pp(); + std::fprintf( stderr, "data size mismatch; trailer says %lld, data size is %lld (0x%llX).\n", + trailer.data_size(), data_position(), data_position() ); } } - if( trailer.member_size() != member_position() ) + if( trailer.member_size() != member_size ) { error = true; if( verbosity >= 0 ) { - if( trailer.member_size() >= 0 ) - { pp(); - std::fprintf( stderr, "member size mismatch; trailer says %lld, member size is %lld.\n", - trailer.member_size(), member_position() ); } - else pp( "member trailer is corrupt" ); + pp(); + std::fprintf( stderr, "member size mismatch; trailer says %lld, member size is %lld (0x%llX).\n", + trailer.member_size(), member_size, member_size ); } } if( !error && verbosity >= 3 ) @@ -140,6 +142,7 @@ int LZ_decoder::decode_member( const Pretty_print & pp ) unsigned int rep2 = 0; // repeated distances unsigned int rep3 = 0; State state; + range_decoder.load(); while( true ) { @@ -203,7 +206,7 @@ int LZ_decoder::decode_member( const Pretty_print & pp ) { rep0 += range_decoder.decode( direct_bits - dis_align_bits ) << dis_align_bits; rep0 += range_decoder.decode_tree_reversed( bm_align, dis_align_bits ); - if( rep0 == 0xFFFFFFFF ) // Marker found + if( rep0 == 0xFFFFFFFFU ) // Marker found { rep0 = rep0_saved; range_decoder.normalize(); @@ -214,7 +217,7 @@ int LZ_decoder::decode_member( const Pretty_print & pp ) } if( len == min_match_len + 1 ) // Sync Flush marker { - range_decoder.reload(); continue; + range_decoder.load(); continue; } if( verbosity >= 0 ) { @@ -15,74 +15,57 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ -class Input_buffer +class Range_decoder { - enum { buffer_size = 65536 }; - uint8_t * const buffer; + enum { buffer_size = 16384 }; + long long partial_member_pos; + uint8_t * const buffer; // input buffer int pos; int stream_pos; // when reached, a new block must be read - const int ides_; // input file descriptor + uint32_t code; + uint32_t range; + const int infd_; // input file descriptor bool at_stream_end; bool read_block(); public: - Input_buffer( const int ides ) + Range_decoder( const int infd ) : + partial_member_pos( 0 ), buffer( new uint8_t[buffer_size] ), pos( 0 ), stream_pos( 0 ), - ides_( ides ), + code( 0 ), + range( 0xFFFFFFFFU ), + infd_( infd ), at_stream_end( false ) {} - ~Input_buffer() { delete[] buffer; } + ~Range_decoder() { delete[] buffer; } + bool code_is_zero() const throw() { return ( code == 0 ); } bool finished() const throw() { return at_stream_end && pos >= stream_pos; } + long long member_position() const throw() + { return partial_member_pos + pos; } + void reset_member_position() throw() + { partial_member_pos = -pos; } uint8_t get_byte() { if( pos >= stream_pos && !read_block() ) return 0; return buffer[pos++]; } - }; - - -class Range_decoder - { - mutable long long member_pos; - uint32_t code; - uint32_t range; - Input_buffer & ibuf; - -public: - Range_decoder( const int header_size, Input_buffer & buf ) - : - member_pos( header_size ), - code( 0 ), - range( 0xFFFFFFFF ), - ibuf( buf ) - { for( int i = 0; i < 5; ++i ) code = (code << 8) | get_byte(); } - - bool code_is_zero() const throw() { return ( code == 0 ); } - bool finished() const throw() { return ibuf.finished(); } - long long member_position() const throw() { return member_pos; } - - uint8_t get_byte() const - { - ++member_pos; - return ibuf.get_byte(); - } - void reload() throw() + void load() throw() { code = 0; - range = 0xFFFFFFFF; + range = 0xFFFFFFFFU; for( int i = 0; i < 5; ++i ) code = (code << 8) | get_byte(); } void normalize() { - if( range <= 0x00FFFFFF ) + if( range <= 0x00FFFFFFU ) { range <<= 8; code = (code << 8) | get_byte(); } } @@ -92,7 +75,7 @@ public: for( int i = num_bits; i > 0; --i ) { symbol <<= 1; - if( range <= 0x00FFFFFF ) + if( range <= 0x00FFFFFFU ) { range <<= 7; code = (code << 8) | get_byte(); if( code >= range ) { code -= range; symbol |= 1; } @@ -148,16 +131,16 @@ public: int decode_matched( Bit_model bm[], const int match_byte ) { - Bit_model *bm1 = bm + 0x100; + Bit_model * const bm1 = bm + 0x100; int symbol = 1; - for( int i = 1; i <= 8; ++i ) + for( int i = 7; i >= 0; --i ) { - const int match_bit = ( match_byte << i ) & 0x100; - const int bit = decode_bit( bm1[match_bit+symbol] ); + const int match_bit = ( match_byte >> i ) & 1; + const int bit = decode_bit( bm1[(match_bit<<8)+symbol] ); symbol = ( symbol << 1 ) | bit; - if( ( match_bit && !bit ) || ( !match_bit && bit ) ) + if( match_bit != bit ) { - while( ++i <= 8 ) + while( --i >= 0 ) symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); break; } @@ -193,30 +176,30 @@ class Literal_decoder { Bit_model bm_literal[1<<literal_context_bits][0x300]; - int state( const int prev_byte ) const throw() + int lstate( const int prev_byte ) const throw() { return ( prev_byte >> ( 8 - literal_context_bits ) ); } public: uint8_t decode( Range_decoder & range_decoder, const uint8_t prev_byte ) - { return range_decoder.decode_tree( bm_literal[state(prev_byte)], 8 ); } + { return range_decoder.decode_tree( bm_literal[lstate(prev_byte)], 8 ); } uint8_t decode_matched( Range_decoder & range_decoder, const uint8_t prev_byte, const uint8_t match_byte ) - { return range_decoder.decode_matched( bm_literal[state(prev_byte)], match_byte ); } + { return range_decoder.decode_matched( bm_literal[lstate(prev_byte)], match_byte ); } }; class LZ_decoder { long long partial_data_pos; - const int format_version; + const int member_version; const int dictionary_size; const int buffer_size; uint8_t * const buffer; int pos; int stream_pos; // first byte not yet written to file uint32_t crc_; - const int odes_; // output file descriptor + const int outfd_; // output file descriptor Bit_model bm_match[State::states][pos_states]; Bit_model bm_rep[State::states]; @@ -228,7 +211,7 @@ class LZ_decoder Bit_model bm_dis[modeled_distances-end_dis_model]; Bit_model bm_align[dis_align_size]; - Range_decoder range_decoder; + Range_decoder & range_decoder; Len_decoder len_decoder; Len_decoder rep_match_len_decoder; Literal_decoder literal_decoder; @@ -267,24 +250,23 @@ class LZ_decoder bool verify_trailer( const Pretty_print & pp ) const; public: - LZ_decoder( const File_header & header, Input_buffer & ibuf, const int odes ) + LZ_decoder( const File_header & header, Range_decoder & rdec, const int outfd ) : partial_data_pos( 0 ), - format_version( header.version ), + member_version( header.version() ), dictionary_size( header.dictionary_size() ), buffer_size( std::max( 65536, dictionary_size ) ), buffer( new uint8_t[buffer_size] ), pos( 0 ), stream_pos( 0 ), - crc_( 0xFFFFFFFF ), - odes_( odes ), - range_decoder( sizeof header, ibuf ), - literal_decoder() + crc_( 0xFFFFFFFFU ), + outfd_( outfd ), + range_decoder( rdec ) { buffer[buffer_size-1] = 0; } // prev_byte of first_byte ~LZ_decoder() { delete[] buffer; } - uint32_t crc() const throw() { return crc_ ^ 0xFFFFFFFF; } + uint32_t crc() const throw() { return crc_ ^ 0xFFFFFFFFU; } int decode_member( const Pretty_print & pp ); long long member_position() const throw() @@ -1,5 +1,5 @@ -.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.36. -.TH LZIP "1" "January 2010" "Lzip 1.9" "User Commands" +.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.37.1. +.TH LZIP "1" "April 2010" "Lzip 1.10" "User Commands" .SH NAME Lzip \- data compressor based on the LZMA algorithm .SH SYNOPSIS @@ -66,6 +66,7 @@ Numbers may be followed by a multiplier: k = kB = 10^3 = 1000, Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc... .SH "REPORTING BUGS" Report bugs to lzip\-bug@nongnu.org +.br Lzip home page: http://www.nongnu.org/lzip/lzip.html .SH COPYRIGHT Copyright \(co 2010 Antonio Diaz Diaz. diff --git a/doc/lzip.info b/doc/lzip.info index 5e2e530..7914b91 100644 --- a/doc/lzip.info +++ b/doc/lzip.info @@ -11,7 +11,7 @@ File: lzip.info, Node: Top, Next: Introduction, Up: (dir) Lzip Manual *********** -This manual is for Lzip (version 1.9, 17 January 2010). +This manual is for Lzip (version 1.10, 5 April 2010). * Menu: @@ -68,11 +68,11 @@ multivolume compressed tar archives. The amount of memory required for compression is about 5 MiB plus 1 or 2 times the dictionary size limit (1 if input file size is less than dictionary size limit, else 2) plus 8 times the dictionary size really -used. For decompression is a little more than the dictionary size really -used. Lzip will automatically use the smallest possible dictionary size -without exceeding the given limit. It is important to appreciate that -the decompression memory requirement is affected at compression time by -the choice of dictionary size limit. +used. For decompression it is a little more than the dictionary size +really used. Lzip will automatically use the smallest possible +dictionary size without exceeding the given limit. It is important to +appreciate that the decompression memory requirement is affected at +compression time by the choice of dictionary size limit. When decompressing, lzip attempts to guess the name for the decompressed file from that of the compressed file as follows: @@ -254,15 +254,15 @@ The format for running lzip is: slower than `-1'. These options have no effect when decompressing. Level Dictionary size Match length limit - -1 1MiB 10 bytes - -2 1MiB 12 bytes - -3 1MiB 17 bytes - -4 2MiB 26 bytes - -5 4MiB 44 bytes - -6 8MiB 80 bytes - -7 16MiB 108 bytes - -8 16MiB 163 bytes - -9 32MiB 273 bytes + -1 1 MiB 10 bytes + -2 1.5 MiB 12 bytes + -3 2 MiB 17 bytes + -4 3 MiB 26 bytes + -5 4 MiB 44 bytes + -6 8 MiB 80 bytes + -7 16 MiB 108 bytes + -8 24 MiB 163 bytes + -9 32 MiB 273 bytes `--fast' `--best' @@ -419,6 +419,12 @@ writes each member in its own .lz file. You can then use `lzip -t' to test the integrity of the resulting files, and decompress those which are undamaged. + Data from damaged members can be partially recovered writing it to +stdout as shown in the following example (the resulting file may contain +garbage data at the end): + + lzip -cd rec00001file.lz > rec00001file + Lziprecover takes a single argument, the name of the damaged file, and writes a number of files `rec00001file.lz', `rec00002file.lz', etc, containing the extracted members. The output filenames are designed so @@ -466,13 +472,13 @@ Concept Index Tag Table: Node: Top224 -Node: Introduction899 -Node: Algorithm4206 -Node: Invoking Lzip6432 -Node: File Format10779 -Node: Examples12733 -Node: Lziprecover14571 -Node: Problems15270 -Node: Concept Index15795 +Node: Introduction897 +Node: Algorithm4207 +Node: Invoking Lzip6433 +Node: File Format10780 +Node: Examples12734 +Node: Lziprecover14572 +Node: Problems15487 +Node: Concept Index16012 End Tag Table diff --git a/doc/lzip.texinfo b/doc/lzip.texinfo index a6d5d79..9cacd16 100644 --- a/doc/lzip.texinfo +++ b/doc/lzip.texinfo @@ -5,8 +5,8 @@ @finalout @c %**end of header -@set UPDATED 17 January 2010 -@set VERSION 1.9 +@set UPDATED 5 April 2010 +@set VERSION 1.10 @dircategory Data Compression @direntry @@ -85,11 +85,11 @@ compressed tar archives. The amount of memory required for compression is about 5 MiB plus 1 or 2 times the dictionary size limit (1 if input file size is less than dictionary size limit, else 2) plus 8 times the dictionary size really -used. For decompression is a little more than the dictionary size really -used. Lzip will automatically use the smallest possible dictionary size -without exceeding the given limit. It is important to appreciate that -the decompression memory requirement is affected at compression time by -the choice of dictionary size limit. +used. For decompression it is a little more than the dictionary size +really used. Lzip will automatically use the smallest possible +dictionary size without exceeding the given limit. It is important to +appreciate that the decompression memory requirement is affected at +compression time by the choice of dictionary size limit. When decompressing, lzip attempts to guess the name for the decompressed file from that of the compressed file as follows: @@ -274,15 +274,15 @@ as shown in the table below. Note that @samp{-9} can be much slower than @multitable {Level} {Dictionary size} {Match length limit} @item Level @tab Dictionary size @tab Match length limit -@item -1 @tab 1MiB @tab 10 bytes -@item -2 @tab 1MiB @tab 12 bytes -@item -3 @tab 1MiB @tab 17 bytes -@item -4 @tab 2MiB @tab 26 bytes -@item -5 @tab 4MiB @tab 44 bytes -@item -6 @tab 8MiB @tab 80 bytes -@item -7 @tab 16MiB @tab 108 bytes -@item -8 @tab 16MiB @tab 163 bytes -@item -9 @tab 32MiB @tab 273 bytes +@item -1 @tab 1 MiB @tab 10 bytes +@item -2 @tab 1.5 MiB @tab 12 bytes +@item -3 @tab 2 MiB @tab 17 bytes +@item -4 @tab 3 MiB @tab 26 bytes +@item -5 @tab 4 MiB @tab 44 bytes +@item -6 @tab 8 MiB @tab 80 bytes +@item -7 @tab 16 MiB @tab 108 bytes +@item -8 @tab 24 MiB @tab 163 bytes +@item -9 @tab 32 MiB @tab 273 bytes @end multitable @item --fast @@ -468,6 +468,14 @@ writes each member in its own .lz file. You can then use @w{@samp{lzip -t}} to test the integrity of the resulting files, and decompress those which are undamaged. +Data from damaged members can be partially recovered writing it to +stdout as shown in the following example (the resulting file may contain +garbage data at the end): + +@example +lzip -cd rec00001file.lz > rec00001file +@end example + Lziprecover takes a single argument, the name of the damaged file, and writes a number of files @samp{rec00001file.lz}, @samp{rec00002file.lz}, etc, containing the extracted members. The output filenames are designed diff --git a/doc/lziprecover.1 b/doc/lziprecover.1 index 95f5709..0a60e62 100644 --- a/doc/lziprecover.1 +++ b/doc/lziprecover.1 @@ -1,5 +1,5 @@ -.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.36. -.TH LZIPRECOVER "1" "January 2010" "Lziprecover 1.9" "User Commands" +.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.37.1. +.TH LZIPRECOVER "1" "April 2010" "Lziprecover 1.10" "User Commands" .SH NAME Lziprecover \- recover undamaged members from lzip files .SH SYNOPSIS @@ -26,6 +26,7 @@ suppress all messages be verbose (a 2nd \fB\-v\fR gives more) .SH "REPORTING BUGS" Report bugs to lzip\-bug@nongnu.org +.br Lzip home page: http://www.nongnu.org/lzip/lzip.html .SH COPYRIGHT Copyright \(co 2010 Antonio Diaz Diaz. @@ -36,7 +36,7 @@ Prob_prices prob_prices; bool Matchfinder::read_block() throw() { const int size = buffer_size - stream_pos; - const int rd = readblock( ides_, (char *)buffer + stream_pos, size ); + const int rd = readblock( infd_, buffer + stream_pos, size ); stream_pos += rd; if( rd < size ) at_stream_end = true; return ( rd == size || !errno ); @@ -44,13 +44,13 @@ bool Matchfinder::read_block() throw() Matchfinder::Matchfinder( const int dict_size, const int len_limit, - const int ides ) + const int infd ) : partial_data_pos( 0 ), pos( 0 ), cyclic_pos( 0 ), stream_pos( 0 ), - ides_( ides ), + infd_( infd ), match_len_limit_( len_limit ), prev_positions( new int32_t[num_prev_positions] ), at_stream_end( false ) @@ -80,7 +80,7 @@ Matchfinder::Matchfinder( const int dict_size, const int len_limit, bool Matchfinder::reset() throw() { const int size = stream_pos - pos; - std::memmove( buffer, buffer + pos, size ); + if( size > 0 ) std::memmove( buffer, buffer + pos, size ); partial_data_pos = 0; stream_pos -= pos; pos = 0; @@ -130,10 +130,11 @@ int Matchfinder::longest_match_len( int * const distances ) throw() const uint8_t * const data = buffer + pos; const int key2 = num_prev_positions4 + num_prev_positions3 + ( ( (int)data[0] << 8 ) | data[1] ); - const int tmp = crc32[data[0]] ^ data[1] ^ ( (int)data[2] << 8 ); - const int key3 = num_prev_positions4 + ( tmp & ( num_prev_positions3 - 1 ) ); - const int key4 = ( tmp ^ ( crc32[data[3]] << 5 ) ) & - ( num_prev_positions4 - 1 ); + const uint32_t tmp = crc32[data[0]] ^ data[1] ^ ( (uint32_t)data[2] << 8 ); + const int key3 = num_prev_positions4 + + (int)( tmp & ( num_prev_positions3 - 1 ) ); + const int key4 = (int)( ( tmp ^ ( crc32[data[3]] << 5 ) ) & + ( num_prev_positions4 - 1 ) ); if( distances ) { @@ -242,8 +243,8 @@ void LZ_encoder::fill_distance_prices() throw() { for( int dis_state = 0; dis_state < max_dis_states; ++dis_state ) { - int * dsp = dis_slot_prices[dis_state]; - const Bit_model * bmds = bm_dis_slot[dis_state]; + int * const dsp = dis_slot_prices[dis_state]; + const Bit_model * const bmds = bm_dis_slot[dis_state]; int slot = 0; for( ; slot < end_dis_model && slot < num_dis_slots; ++slot ) dsp[slot] = price_symbol( bmds, slot, dis_slot_bits ); @@ -251,7 +252,7 @@ void LZ_encoder::fill_distance_prices() throw() dsp[slot] = price_symbol( bmds, slot, dis_slot_bits ) + (((( slot >> 1 ) - 1 ) - dis_align_bits ) << price_shift ); - int * dp = dis_prices[dis_state]; + int * const dp = dis_prices[dis_state]; int dis = 0; for( ; dis < start_dis_model; ++dis ) dp[dis] = dsp[dis]; @@ -267,8 +268,10 @@ void LZ_encoder::fill_distance_prices() throw() } -// Return value: ( dis == -1 ) && ( len == 1 ) means literal -int LZ_encoder::best_pair_sequence( const int reps[num_rep_distances], +// Return value == number of bytes advanced (ahead). +// trials[0]..trials[retval-1] contain the steps to encode. +// ( trials[0].dis == -1 && trials[0].price == 1 ) means literal. +int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances], const State & state ) { int main_len; @@ -303,15 +306,14 @@ int LZ_encoder::best_pair_sequence( const int reps[num_rep_distances], return main_len; } - trials[0].state = state; - for( int i = 0; i < num_rep_distances; ++i ) trials[0].reps[i] = reps[i]; - + { + const int pos_state = matchfinder.data_position() & pos_state_mask; const uint8_t prev_byte = matchfinder[-1]; const uint8_t cur_byte = matchfinder[0]; const uint8_t match_byte = matchfinder[-reps[0]-1]; - unsigned int position = matchfinder.data_position(); - const int pos_state = position & pos_state_mask; + trials[0].state = state; + for( int i = 0; i < num_rep_distances; ++i ) trials[0].reps[i] = reps[i]; trials[1].dis = -1; trials[1].prev_index = 0; trials[1].price = price0( bm_match[state()][pos_state] ); @@ -359,6 +361,7 @@ int LZ_encoder::best_pair_sequence( const int reps[num_rep_distances], trials[len].update( rep, 0, price + rep_match_len_encoder.price( len, pos_state ) ); } + } int cur = 0; int num_trials = main_len; @@ -366,7 +369,7 @@ int LZ_encoder::best_pair_sequence( const int reps[num_rep_distances], while( true ) { - if( ++cur >= num_trials ) + if( ++cur >= num_trials ) // no more initialized trials { backward( cur ); return cur; @@ -398,10 +401,11 @@ int LZ_encoder::best_pair_sequence( const int reps[num_rep_distances], mtf_reps( cur_trial.dis, cur_trial.reps ); } + const int pos_state = matchfinder.data_position() & pos_state_mask; const uint8_t prev_byte = matchfinder[-1]; const uint8_t cur_byte = matchfinder[0]; const uint8_t match_byte = matchfinder[-cur_trial.reps[0]-1]; - const int pos_state = ++position & pos_state_mask; + int next_price = cur_trial.price + price0( bm_match[cur_trial.state()][pos_state] ); if( cur_trial.state.is_char() ) next_price += literal_encoder.price_symbol( prev_byte, cur_byte ); @@ -445,7 +449,7 @@ int LZ_encoder::best_pair_sequence( const int reps[num_rep_distances], if( newlen <= len_limit && ( newlen > min_match_len || ( newlen == min_match_len && - match_distances[newlen] < modeled_distances ) ) ) + match_distances[min_match_len] < modeled_distances ) ) ) { const int normal_match_price = match_price + price0( bm_rep[cur_trial.state()] ); @@ -461,54 +465,55 @@ int LZ_encoder::best_pair_sequence( const int reps[num_rep_distances], } - // End Of Stream mark => (dis == 0xFFFFFFFF, len == min_match_len) + // End Of Stream mark => (dis == 0xFFFFFFFFU, len == min_match_len) void LZ_encoder::full_flush( const State & state ) { - const int pos_state = ( matchfinder.data_position() ) & pos_state_mask; + const int pos_state = matchfinder.data_position() & pos_state_mask; range_encoder.encode_bit( bm_match[state()][pos_state], 1 ); range_encoder.encode_bit( bm_rep[state()], 0 ); - encode_pair( 0xFFFFFFFF, min_match_len, pos_state ); + encode_pair( 0xFFFFFFFFU, min_match_len, pos_state ); range_encoder.flush(); File_trailer trailer; trailer.data_crc( crc() ); trailer.data_size( matchfinder.data_position() ); - trailer.member_size( range_encoder.member_position() + sizeof trailer ); - for( unsigned int i = 0; i < sizeof trailer; ++i ) - range_encoder.put_byte( ((uint8_t *)&trailer)[i] ); + trailer.member_size( range_encoder.member_position() + File_trailer::size() ); + for( int i = 0; i < File_trailer::size(); ++i ) + range_encoder.put_byte( trailer.data[i] ); range_encoder.flush_data(); } LZ_encoder::LZ_encoder( Matchfinder & mf, const File_header & header, - const int odes ) + const int outfd ) : longest_match_found( 0 ), - crc_( 0xFFFFFFFF ), + crc_( 0xFFFFFFFFU ), matchfinder( mf ), - range_encoder( odes ), + range_encoder( outfd ), len_encoder( matchfinder.match_len_limit() ), rep_match_len_encoder( matchfinder.match_len_limit() ), - literal_encoder(), num_dis_slots( 2 * File_header::real_bits( matchfinder.dictionary_size() - 1 ) ) { fill_align_prices(); - for( unsigned int i = 0; i < sizeof header; ++i ) - range_encoder.put_byte( ((uint8_t *)&header)[i] ); + for( int i = 0; i < File_header::size; ++i ) + range_encoder.put_byte( header.data[i] ); } bool LZ_encoder::encode_member( const long long member_size ) { - if( range_encoder.member_position() != sizeof( File_header ) ) + if( range_encoder.member_position() != File_header::size ) return false; // can be called only once - const long long member_size_limit = member_size - sizeof( File_trailer ) - 16; + const long long member_size_limit = + member_size - File_trailer::size() - max_marker_size; int fill_counter = 0; int rep_distances[num_rep_distances]; State state; for( int i = 0; i < num_rep_distances; ++i ) rep_distances[i] = 0; - if( !matchfinder.finished() ) // encode first byte + // encode first byte + if( matchfinder.data_position() == 0 && !matchfinder.finished() ) { range_encoder.encode_bit( bm_match[state()][0], 0 ); const uint8_t prev_byte = 0; @@ -523,22 +528,23 @@ bool LZ_encoder::encode_member( const long long member_size ) if( matchfinder.finished() ) { full_flush( state ); return true; } if( fill_counter <= 0 ) { fill_distance_prices(); fill_counter = 512; } - int ahead = best_pair_sequence( rep_distances, state ); + int ahead = sequence_optimizer( rep_distances, state ); if( ahead <= 0 ) return false; fill_counter -= ahead; for( int i = 0; ; ) { const int pos_state = ( matchfinder.data_position() - ahead ) & pos_state_mask; - int dis = trials[i].dis; + const int dis = trials[i].dis; const int len = trials[i].price; bool bit = ( dis < 0 && len == 1 ); range_encoder.encode_bit( bm_match[state()][pos_state], !bit ); - if( bit ) + if( bit ) // literal byte { const uint8_t prev_byte = matchfinder[-ahead-1]; const uint8_t cur_byte = matchfinder[-ahead]; + crc32.update( crc_, cur_byte ); if( state.is_char() ) literal_encoder.encode( range_encoder, prev_byte, cur_byte ); else @@ -548,8 +554,9 @@ bool LZ_encoder::encode_member( const long long member_size ) } state.set_char(); } - else + else // match or repeated match { + crc32.update( crc_, matchfinder.ptr_to_current_pos() - ahead, len ); mtf_reps( dis, rep_distances ); bit = ( dis < num_rep_distances ); range_encoder.encode_bit( bm_rep[state()], bit ); @@ -578,8 +585,6 @@ bool LZ_encoder::encode_member( const long long member_size ) state.set_match(); } } - for( int j = 0; j < len; ++j ) - crc32.update( crc_, matchfinder[j-ahead] ); ahead -= len; i += len; if( range_encoder.member_position() >= member_size_limit ) { @@ -64,8 +64,8 @@ public: } } - int operator[]( const int symbol ) const throw() - { return data[symbol >> 2]; } + int operator[]( const int probability ) const throw() + { return data[probability >> 2]; } }; extern Prob_prices prob_prices; @@ -120,14 +120,14 @@ inline int price_matched( const Bit_model bm[], const int symbol, for( int i = 7; i >= 0; --i ) { const int match_bit = ( match_byte >> i ) & 1; - const int bit = ( symbol >> i ) & 1; + int bit = ( symbol >> i ) & 1; price += price_bit( bm[(match_bit<<8)+model+0x100], bit ); model = ( model << 1 ) | bit; if( match_bit != bit ) { while( --i >= 0 ) { - const int bit = ( symbol >> i ) & 1; + bit = ( symbol >> i ) & 1; price += price_bit( bm[model], bit ); model = ( model << 1 ) | bit; } @@ -158,7 +158,7 @@ class Matchfinder int cyclic_pos; int stream_pos; // first byte not yet read from file int pos_limit; // when reached, a new block must be read - const int ides_; // input file descriptor + const int infd_; // input file descriptor const int match_len_limit_; int32_t * const prev_positions; // last seen position of key int32_t * prev_pos_tree; @@ -167,7 +167,7 @@ class Matchfinder bool read_block() throw(); public: - Matchfinder( const int dict_size, const int len_limit, const int ides ); + Matchfinder( const int dict_size, const int len_limit, const int infd ); ~Matchfinder() { delete[] prev_pos_tree; delete[] prev_positions; std::free( buffer ); } @@ -214,32 +214,32 @@ class Range_encoder int pos; uint32_t range; int ff_count; - const int odes_; // output file descriptor + const int outfd_; // output file descriptor uint8_t cache; void shift_low() { const uint32_t carry = low >> 32; - if( low < 0xFF000000LL || carry == 1 ) + if( low < 0xFF000000U || carry == 1 ) { put_byte( cache + carry ); for( ; ff_count > 0; --ff_count ) put_byte( 0xFF + carry ); cache = low >> 24; } else ++ff_count; - low = ( low & 0x00FFFFFFLL ) << 8; + low = ( low & 0x00FFFFFFU ) << 8; } public: - Range_encoder( const int odes ) + Range_encoder( const int outfd ) : low( 0 ), partial_member_pos( 0 ), buffer( new uint8_t[buffer_size] ), pos( 0 ), - range( 0xFFFFFFFF ), + range( 0xFFFFFFFFU ), ff_count( 0 ), - odes_( odes ), + outfd_( outfd ), cache( 0 ) {} ~Range_encoder() { delete[] buffer; } @@ -248,9 +248,9 @@ public: { if( pos > 0 ) { - if( odes_ >= 0 ) + if( outfd_ >= 0 ) { - const int wr = writeblock( odes_, (char *)buffer, pos ); + const int wr = writeblock( outfd_, buffer, pos ); if( wr != pos ) throw Error( "write error" ); } partial_member_pos += pos; @@ -275,7 +275,7 @@ public: { range >>= 1; if( (symbol >> i) & 1 ) low += range; - if( range <= 0x00FFFFFF ) { range <<= 8; shift_low(); } + if( range <= 0x00FFFFFFU ) { range <<= 8; shift_low(); } } } @@ -293,7 +293,7 @@ public: range -= bound; bm.probability -= bm.probability >> bit_model_move_bits; } - if( range <= 0x00FFFFFF ) { range <<= 8; shift_low(); } + if( range <= 0x00FFFFFFU ) { range <<= 8; shift_low(); } } void encode_tree( Bit_model bm[], const int symbol, const int num_bits ) @@ -326,15 +326,15 @@ public: int model = 1; for( int i = 7; i >= 0; --i ) { - const int bit = ( symbol >> i ) & 1; const int match_bit = ( match_byte >> i ) & 1; + int bit = ( symbol >> i ) & 1; encode_bit( bm[(match_bit<<8)+model+0x100], bit ); model = ( model << 1 ) | bit; if( match_bit != bit ) { while( --i >= 0 ) { - const int bit = ( symbol >> i ) & 1; + bit = ( symbol >> i ) & 1; encode_bit( bm[model], bit ); model = ( model << 1 ) | bit; } @@ -359,17 +359,17 @@ class Len_encoder void update_prices( const int pos_state ) throw() { int * const pps = prices[pos_state]; - int price = price0( choice1 ); + int tmp = price0( choice1 ); int len = 0; for( ; len < len_low_symbols && len < len_symbols; ++len ) - pps[len] = price + + pps[len] = tmp + price_symbol( bm_low[pos_state], len, len_low_bits ); - price = price1( choice1 ); + tmp = price1( choice1 ); for( ; len < len_low_symbols + len_mid_symbols && len < len_symbols; ++len ) - pps[len] = price + price0( choice2 ) + + pps[len] = tmp + price0( choice2 ) + price_symbol( bm_mid[pos_state], len - len_low_symbols, len_mid_bits ); for( ; len < len_symbols; ++len ) - pps[len] = price + price1( choice2 ) + + pps[len] = tmp + price1( choice2 ) + price_symbol( bm_high, len - len_low_symbols - len_mid_symbols, len_high_bits ); counters[pos_state] = len_symbols; } @@ -393,21 +393,21 @@ class Literal_encoder { Bit_model bm_literal[1<<literal_context_bits][0x300]; - int state( const int prev_byte ) const throw() + int lstate( const int prev_byte ) const throw() { return ( prev_byte >> ( 8 - literal_context_bits ) ); } public: void encode( Range_encoder & range_encoder, uint8_t prev_byte, uint8_t symbol ) - { range_encoder.encode_tree( bm_literal[state(prev_byte)], symbol, 8 ); } + { range_encoder.encode_tree( bm_literal[lstate(prev_byte)], symbol, 8 ); } void encode_matched( Range_encoder & range_encoder, uint8_t prev_byte, uint8_t match_byte, uint8_t symbol ) - { range_encoder.encode_matched( bm_literal[state(prev_byte)], symbol, match_byte ); } + { range_encoder.encode_matched( bm_literal[lstate(prev_byte)], symbol, match_byte ); } int price_matched( uint8_t prev_byte, uint8_t symbol, uint8_t match_byte ) const throw() - { return ::price_matched( bm_literal[state(prev_byte)], symbol, match_byte ); } + { return ::price_matched( bm_literal[lstate(prev_byte)], symbol, match_byte ); } int price_symbol( uint8_t prev_byte, uint8_t symbol ) const throw() - { return ::price_symbol( bm_literal[state(prev_byte)], symbol, 8 ); } + { return ::price_symbol( bm_literal[lstate(prev_byte)], symbol, 8 ); } }; @@ -415,6 +415,7 @@ class LZ_encoder { enum { dis_align_mask = dis_align_size - 1, infinite_price = 0x0FFFFFFF, + max_marker_size = 16, num_rep_distances = 4 }; // must be 4 struct Trial @@ -459,8 +460,9 @@ class LZ_encoder void fill_align_prices() throw(); void fill_distance_prices() throw(); - uint32_t crc() const throw() { return crc_ ^ 0xFFFFFFFF; } + uint32_t crc() const throw() { return crc_ ^ 0xFFFFFFFFU; } + // move-to-front dis in/into reps void mtf_reps( const int dis, int reps[num_rep_distances] ) throw() { if( dis >= num_rep_distances ) @@ -567,13 +569,13 @@ class LZ_encoder } } - int best_pair_sequence( const int reps[num_rep_distances], + int sequence_optimizer( const int reps[num_rep_distances], const State & state ); void full_flush( const State & state ); public: - LZ_encoder( Matchfinder & mf, const File_header & header, const int odes ); + LZ_encoder( Matchfinder & mf, const File_header & header, const int outfd ); bool encode_member( const long long member_size ); @@ -22,7 +22,7 @@ class State public: enum { states = 12 }; State() throw() : st( 0 ) {} - int operator()() const throw() { return st; } + unsigned char operator()() const throw() { return st; } bool is_char() const throw() { return st < 7; } void set_char() throw() @@ -100,16 +100,15 @@ struct Bit_model class Pretty_print { const char * const stdin_name; - const unsigned int stdin_name_len; unsigned int longest_name; std::string name_; mutable bool first_post; public: Pretty_print( const std::vector< std::string > & filenames ) - : stdin_name( "(stdin)" ), stdin_name_len( std::strlen( stdin_name ) ), - longest_name( 0 ), first_post( false ) + : stdin_name( "(stdin)" ), longest_name( 0 ), first_post( false ) { + const unsigned int stdin_name_len = std::strlen( stdin_name ); for( unsigned int i = 0; i < filenames.size(); ++i ) { const std::string & s = filenames[i]; @@ -143,7 +142,7 @@ public: { unsigned int c = n; for( int k = 0; k < 8; ++k ) - { if( c & 1 ) c = 0xEDB88320 ^ ( c >> 1 ); else c >>= 1; } + { if( c & 1 ) c = 0xEDB88320U ^ ( c >> 1 ); else c >>= 1; } data[n] = c; } } @@ -165,22 +164,19 @@ const uint8_t magic_string[4] = { 'L', 'Z', 'I', 'P' }; struct File_header { - uint8_t magic[4]; - uint8_t version; - uint8_t coded_dict_size; + uint8_t data[6]; // 0-3 magic bytes + // 4 version + // 5 coded_dict_size + enum { size = 6 }; void set_magic() throw() - { std::memcpy( magic, magic_string, sizeof magic ); version = 1; } + { std::memcpy( data, magic_string, 4 ); data[4] = 1; } bool verify_magic() const throw() - { - return ( std::memcmp( magic, magic_string, sizeof magic ) == 0 ); - } + { return ( std::memcmp( data, magic_string, 4 ) == 0 ); } - bool verify_version() const throw() - { - return ( version <= 1 ); - } + uint8_t version() const throw() { return data[4]; } + bool verify_version() const throw() { return ( data[4] <= 1 ); } static int real_bits( const int value ) throw() { @@ -192,24 +188,24 @@ struct File_header int dictionary_size() const throw() { - int size = ( 1 << ( coded_dict_size & 0x1F ) ); - if( size > min_dictionary_size && size <= max_dictionary_size ) - size -= ( size / 16 ) * ( ( coded_dict_size >> 5 ) & 0x07 ); - return size; + int sz = ( 1 << ( data[5] & 0x1F ) ); + if( sz > min_dictionary_size && sz <= max_dictionary_size ) + sz -= ( sz / 16 ) * ( ( data[5] >> 5 ) & 0x07 ); + return sz; } - bool dictionary_size( const int size ) throw() + bool dictionary_size( const int sz ) throw() { - if( size >= min_dictionary_size && size <= max_dictionary_size ) + if( sz >= min_dictionary_size && sz <= max_dictionary_size ) { - coded_dict_size = real_bits( size - 1 ); - if( size > min_dictionary_size ) + data[5] = real_bits( sz - 1 ); + if( sz > min_dictionary_size ) { - const int base_size = 1 << coded_dict_size; + const int base_size = 1 << data[5]; const int wedge = base_size / 16; for( int i = 7; i >= 1; --i ) - if( base_size - ( i * wedge ) >= size ) - { coded_dict_size |= ( i << 5 ); break; } + if( base_size - ( i * wedge ) >= sz ) + { data[5] |= ( i << 5 ); break; } } return true; } @@ -220,63 +216,58 @@ struct File_header struct File_trailer { - uint8_t data_crc_[4]; // CRC32 of the uncompressed data - uint8_t data_size_[8]; // size of the uncompressed data - uint8_t member_size_[8]; // member size including header and trailer + uint8_t data[20]; // 0-3 CRC32 of the uncompressed data + // 4-11 size of the uncompressed data + // 12-19 member size including header and trailer - static int size( const int version ) - { return sizeof( File_trailer ) - ( ( version >= 1 ) ? 0 : 8 ); } + static int size( const int version = 1 ) + { return ( ( version >= 1 ) ? 20 : 12 ); } uint32_t data_crc() const throw() { uint32_t tmp = 0; - for( int i = 3; i >= 0; --i ) { tmp <<= 8; tmp += data_crc_[i]; } + for( int i = 3; i >= 0; --i ) { tmp <<= 8; tmp += data[i]; } return tmp; } void data_crc( uint32_t crc ) throw() - { - for( int i = 0; i < 4; ++i ) - { data_crc_[i] = (uint8_t)crc; crc >>= 8; } - } + { for( int i = 0; i <= 3; ++i ) { data[i] = (uint8_t)crc; crc >>= 8; } } long long data_size() const throw() { long long tmp = 0; - for( int i = 7; i >= 0; --i ) { tmp <<= 8; tmp += data_size_[i]; } + for( int i = 11; i >= 4; --i ) { tmp <<= 8; tmp += data[i]; } return tmp; } - void data_size( long long size ) throw() + void data_size( long long sz ) throw() { - for( int i = 0; i < 8; ++i ) - { data_size_[i] = (uint8_t)size; size >>= 8; } + for( int i = 4; i <= 11; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; } } long long member_size() const throw() { long long tmp = 0; - for( int i = 7; i >= 0; --i ) { tmp <<= 8; tmp += member_size_[i]; } + for( int i = 19; i >= 12; --i ) { tmp <<= 8; tmp += data[i]; } return tmp; } - void member_size( long long size ) throw() + void member_size( long long sz ) throw() { - for( int i = 0; i < 8; ++i ) - { member_size_[i] = (uint8_t)size; size >>= 8; } + for( int i = 12; i <= 19; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; } } }; struct Error { - const char * s; - Error( const char * p ) throw() : s( p ) {} + const char * const s; + Error( const char * const p ) throw() : s( p ) {} }; extern int verbosity; -void show_error( const char * msg, const int errcode = 0, const bool help = false ) throw(); -void internal_error( const char * msg ); -int readblock( const int fd, char * buf, const int size ) throw(); -int writeblock( const int fd, const char * buf, const int size ) throw(); +void show_error( const char * const msg, const int errcode = 0, const bool help = false ) throw(); +void internal_error( const char * const msg ); +int readblock( const int fd, uint8_t * const buf, const int size ) throw(); +int writeblock( const int fd, const uint8_t * const buf, const int size ) throw(); diff --git a/lziprecover.cc b/lziprecover.cc index dda4b5e..3acd46c 100644 --- a/lziprecover.cc +++ b/lziprecover.cc @@ -45,6 +45,12 @@ const char * const Program_name = "Lziprecover"; const char * const program_name = "lziprecover"; const char * const program_year = "2010"; +#ifdef O_BINARY +const int o_binary = O_BINARY; +#else +const int o_binary = 0; +#endif + void show_help() throw() { @@ -75,8 +81,8 @@ void show_version() throw() int open_instream( const std::string & input_filename ) throw() { - int ides = open( input_filename.c_str(), O_RDONLY ); - if( ides < 0 ) + int infd = open( input_filename.c_str(), O_RDONLY | o_binary ); + if( infd < 0 ) { if( verbosity >= 0 ) std::fprintf( stderr, "%s: Can't open input file `%s': %s.\n", @@ -85,30 +91,32 @@ int open_instream( const std::string & input_filename ) throw() else { struct stat in_stats; - const int i = fstat( ides, &in_stats ); + const int i = fstat( infd, &in_stats ); if( i < 0 || !S_ISREG( in_stats.st_mode ) ) { if( verbosity >= 0 ) std::fprintf( stderr, "%s: input file `%s' is not a regular file.\n", program_name, input_filename.c_str() ); - close( ides ); - ides = -1; + close( infd ); + infd = -1; } } - return ides; + return infd; } int open_outstream( const std::string & output_filename ) throw() { - int odes = open( output_filename.c_str(), O_CREAT | O_TRUNC | O_WRONLY, S_IRUSR | S_IWUSR ); - if( odes < 0 ) + int outfd = open( output_filename.c_str(), + O_CREAT | O_TRUNC | O_WRONLY | o_binary, + S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH ); + if( outfd < 0 ) { if( verbosity >= 0 ) std::fprintf( stderr, "%s: Can't create output file `%s': %s.\n", program_name, output_filename.c_str(), std::strerror( errno ) ); } - return odes; + return outfd; } @@ -123,44 +131,26 @@ bool next_filename( std::string & output_filename ) } -int search_header( const uint8_t * buffer, const int size, const int pos, - const long long last_header_pos, - const long long partial_file_pos ) - { - for( int i = pos; i < size; ++i ) - if( buffer[i] == magic_string[0] && buffer[i+1] == magic_string[1] && - buffer[i+2] == magic_string[2] && buffer[i+3] == magic_string[3] ) - { - File_trailer trailer; - for( unsigned int j = 0; j < sizeof trailer; ++j ) - ((uint8_t *)&trailer)[j] = buffer[i-(sizeof trailer)+j]; - if( partial_file_pos + i - trailer.member_size() == last_header_pos ) - return i; - } - return -1; - } - - -bool verify_header( const uint8_t * buffer, const int pos ) +bool verify_header( const uint8_t * const buffer, const int pos ) { File_header header; - for( unsigned int i = 0; i < sizeof header; ++i ) - ((uint8_t *)&header)[i] = buffer[pos+i]; + for( int i = 0; i < File_header::size; ++i ) + header.data[i] = buffer[pos+i]; if( !header.verify_magic() ) { - show_error( "bad magic number (file not created by lzip).\n" ); + show_error( "bad magic number (file not in lzip format).\n" ); return false; } - if( header.version == 0 ) + if( header.version() == 0 ) { show_error( "version 0 member format can't be recovered.\n" ); return false; } - if( header.version != 1 ) + if( header.version() != 1 ) { if( verbosity >= 0 ) std::fprintf( stderr, "version %d member format not supported, newer %s needed.\n", - header.version, program_name ); + header.version(), program_name ); return false; } return true; @@ -169,16 +159,16 @@ bool verify_header( const uint8_t * buffer, const int pos ) int process_file( const std::string & input_filename, uint8_t * & base_buffer ) { - const int hsize = sizeof( File_header ); - const int tsize = sizeof( File_trailer ); + const int hsize = File_header::size; + const int tsize = File_trailer::size(); const int buffer_size = 65536; const int base_buffer_size = tsize + buffer_size + hsize; base_buffer = new uint8_t[base_buffer_size]; uint8_t * const buffer = base_buffer + tsize; - const int inhandle = open_instream( input_filename ); - if( inhandle < 0 ) return 1; - int size = readblock( inhandle, (char *)buffer, buffer_size + hsize ) - hsize; + const int infd = open_instream( input_filename ); + if( infd < 0 ) return 1; + int size = readblock( infd, buffer, buffer_size + hsize ) - hsize; bool at_stream_end = ( size < buffer_size ); if( size != buffer_size && errno ) { show_error( "read error", errno ); return 1; } @@ -187,57 +177,60 @@ int process_file( const std::string & input_filename, uint8_t * & base_buffer ) if( !verify_header( buffer, 0 ) ) return 2; std::string output_filename( "rec00001" ); output_filename += input_filename; - int outhandle = open_outstream( output_filename ); - if( outhandle < 0 ) { close( inhandle ); return 1; } + int outfd = open_outstream( output_filename ); + if( outfd < 0 ) { close( infd ); return 1; } - long long last_header_pos = 0; - long long partial_file_pos = 0; - int pos = 0; - while( size > 0 ) + long long partial_member_size = 0; + while( true ) { - const int newpos = search_header( buffer, size - hsize, pos + hsize, - last_header_pos, partial_file_pos ); - if( newpos > pos ) + int pos = 0; + for( int newpos = 1; newpos <= size; ++newpos ) + if( buffer[newpos] == magic_string[0] && + buffer[newpos+1] == magic_string[1] && + buffer[newpos+2] == magic_string[2] && + buffer[newpos+3] == magic_string[3] ) + { + long long member_size = 0; + for( int i = 1; i <= 8; ++i ) + { member_size <<= 8; member_size += base_buffer[tsize+newpos-i]; } + if( partial_member_size + newpos - pos == member_size ) + { // header found + const int wr = writeblock( outfd, buffer + pos, newpos - pos ); + if( wr != newpos - pos ) + { show_error( "write error", errno ); return 1; } + if( close( outfd ) != 0 ) + { show_error( "error closing output file", errno ); return 1; } + if( !next_filename( output_filename ) ) + { show_error( "too many members in file" ); close( infd ); return 1; } + outfd = open_outstream( output_filename ); + if( outfd < 0 ) { close( infd ); return 1; } + partial_member_size = 0; + pos = newpos; + } + } + + if( at_stream_end ) { - const int wr = writeblock( outhandle, (char *)buffer + pos, newpos - pos ); - if( wr != newpos - pos ) + const int wr = writeblock( outfd, buffer + pos, size + hsize - pos ); + if( wr != size + hsize - pos ) { show_error( "write error", errno ); return 1; } - if( close( outhandle ) != 0 ) - { show_error( "error closing output file", errno ); return 1; } - if( !next_filename( output_filename ) ) - { show_error( "too many members in file" ); close( inhandle ); return 1; } - outhandle = open_outstream( output_filename ); - if( outhandle < 0 ) { close( inhandle ); return 1; } - last_header_pos = partial_file_pos + newpos; - pos = newpos; - continue; + break; } - else + if( pos < buffer_size ) { - if( !at_stream_end ) - { - partial_file_pos += buffer_size; - const int wr = writeblock( outhandle, (char *)buffer + pos, buffer_size - pos ); - if( wr != buffer_size - pos ) - { show_error( "write error", errno ); return 1; } - std::memcpy( base_buffer, base_buffer + buffer_size, tsize + hsize ); - pos = 0; - } - else - { - const int wr = writeblock( outhandle, (char *)buffer + pos, size + hsize - pos ); - if( wr != size + hsize - pos ) - { show_error( "write error", errno ); return 1; } - break; - } + partial_member_size += buffer_size - pos; + const int wr = writeblock( outfd, buffer + pos, buffer_size - pos ); + if( wr != buffer_size - pos ) + { show_error( "write error", errno ); return 1; } } - size = readblock( inhandle, (char *)buffer + hsize, buffer_size ); + std::memcpy( base_buffer, base_buffer + buffer_size, tsize + hsize ); + size = readblock( infd, buffer + hsize, buffer_size ); at_stream_end = ( size < buffer_size ); if( size != buffer_size && errno ) { show_error( "read error", errno ); return 1; } } - close( inhandle ); - if( close( outhandle ) != 0 ) + close( infd ); + if( close( outfd ) != 0 ) { show_error( "error closing output file", errno ); return 1; } return 0; } @@ -248,7 +241,7 @@ int process_file( const std::string & input_filename, uint8_t * & base_buffer ) int verbosity = 0; -void show_error( const char * msg, const int errcode, const bool help ) throw() +void show_error( const char * const msg, const int errcode, const bool help ) throw() { if( verbosity >= 0 ) { @@ -264,7 +257,7 @@ void show_error( const char * msg, const int errcode, const bool help ) throw() } -void internal_error( const char * msg ) +void internal_error( const char * const msg ) { std::string s( "internal error: " ); s += msg; show_error( s.c_str() ); @@ -275,7 +268,7 @@ void internal_error( const char * msg ) // Returns the number of bytes really read. // If (returned value < size) and (errno == 0), means EOF was reached. // -int readblock( const int fd, char * buf, const int size ) throw() +int readblock( const int fd, uint8_t * const buf, const int size ) throw() { int rest = size; errno = 0; @@ -294,7 +287,7 @@ int readblock( const int fd, char * buf, const int size ) throw() // Returns the number of bytes really written. // If (returned value < size), it is always an error. // -int writeblock( const int fd, const char * buf, const int size ) throw() +int writeblock( const int fd, const uint8_t * const buf, const int size ) throw() { int rest = size; errno = 0; @@ -309,7 +302,7 @@ int writeblock( const int fd, const char * buf, const int size ) throw() } -int main( const int argc, const char * argv[] ) +int main( const int argc, const char * const argv[] ) { invocation_name = argv[0]; @@ -37,13 +37,16 @@ #include <unistd.h> #include <utime.h> #include <sys/stat.h> -#include <sys/time.h> #include "arg_parser.h" #include "lzip.h" #include "decoder.h" #include "encoder.h" +#if CHAR_BIT != 8 +#error "Environments where CHAR_BIT != 8 are not supported." +#endif + #ifndef LLONG_MAX #define LLONG_MAX 0x7FFFFFFFFFFFFFFFLL #endif @@ -62,12 +65,18 @@ const char * const Program_name = "Lzip"; const char * const program_name = "lzip"; const char * const program_year = "2010"; +#ifdef O_BINARY +const int o_binary = O_BINARY; +#else +const int o_binary = 0; +#endif + struct { const char * from; const char * to; } const known_extensions[] = { { ".lz", "" }, { ".tlz", ".tar" }, { 0, 0 } }; -struct lzma_options +struct Lzma_options { int dictionary_size; // 4KiB..512MiB int match_len_limit; // 5..273 @@ -76,7 +85,8 @@ struct lzma_options enum Mode { m_compress = 0, m_decompress, m_test }; std::string output_filename; -int outhandle = -1; +int outfd = -1; +mode_t outfd_mode = S_IRUSR | S_IWUSR; bool delete_output_on_interrupt = false; @@ -145,7 +155,7 @@ const char * format_num( long long num, long long limit = 9999, } -long long getnum( const char * ptr, const int bs = 0, +long long getnum( const char * const ptr, const int bs = 0, const long long llimit = LLONG_MIN + 1, const long long ulimit = LLONG_MAX ) throw() { @@ -203,11 +213,12 @@ long long getnum( const char * ptr, const int bs = 0, } -int get_dict_size( const char * arg ) throw() +int get_dict_size( const char * const arg ) throw() { char *tail; int bits = std::strtol( arg, &tail, 0 ); - if( bits >= min_dictionary_bits && bits <= max_dictionary_bits && *tail == 0 ) + if( bits >= min_dictionary_bits && + bits <= max_dictionary_bits && *tail == 0 ) return ( 1 << bits ); return getnum( arg, 0, min_dictionary_size, max_dictionary_size ); } @@ -226,11 +237,11 @@ int extension_index( const std::string & name ) throw() } -int open_instream( const std::string & name, struct stat * in_statsp, +int open_instream( const std::string & name, struct stat * const in_statsp, const Mode program_mode, const int eindex, const bool force, const bool to_stdout ) throw() { - int inhandle = -1; + int infd = -1; if( program_mode == m_compress && !force && eindex >= 0 ) { if( verbosity >= 0 ) @@ -240,8 +251,8 @@ int open_instream( const std::string & name, struct stat * in_statsp, } else { - inhandle = open( name.c_str(), O_RDONLY ); - if( inhandle < 0 ) + infd = open( name.c_str(), O_RDONLY | o_binary ); + if( infd < 0 ) { if( verbosity >= 0 ) std::fprintf( stderr, "%s: Can't open input file `%s': %s.\n", @@ -249,7 +260,7 @@ int open_instream( const std::string & name, struct stat * in_statsp, } else { - const int i = fstat( inhandle, in_statsp ); + const int i = fstat( infd, in_statsp ); const mode_t & mode = in_statsp->st_mode; if( i < 0 || !( S_ISREG( mode ) || ( to_stdout && ( S_ISFIFO( mode ) || S_ISSOCK( mode ) || @@ -259,12 +270,12 @@ int open_instream( const std::string & name, struct stat * in_statsp, std::fprintf( stderr, "%s: input file `%s' is not a regular file%s.\n", program_name, name.c_str(), to_stdout ? "" : " and `--stdout' was not specified" ); - close( inhandle ); - inhandle = -1; + close( infd ); + infd = -1; } } } - return inhandle; + return infd; } @@ -297,17 +308,16 @@ void set_d_outname( const std::string & name, const int i ) throw() bool open_outstream( const bool force ) throw() { - if( force ) - outhandle = open( output_filename.c_str(), O_CREAT | O_TRUNC | O_WRONLY, - S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH ); - else outhandle = open( output_filename.c_str(), O_CREAT | O_EXCL | O_WRONLY, - S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH ); - if( outhandle < 0 ) + int flags = O_CREAT | O_WRONLY | o_binary; + if( force ) flags |= O_TRUNC; else flags |= O_EXCL; + + outfd = open( output_filename.c_str(), flags, outfd_mode ); + if( outfd < 0 ) { - if( errno == EEXIST ) outhandle = -2; else outhandle = -1; + if( errno == EEXIST ) outfd = -2; else outfd = -1; if( verbosity >= 0 ) { - if( outhandle == -2 ) + if( outfd == -2 ) std::fprintf( stderr, "%s: Output file %s already exists, skipping.\n", program_name, output_filename.c_str() ); else @@ -315,19 +325,19 @@ bool open_outstream( const bool force ) throw() program_name, output_filename.c_str(), std::strerror( errno ) ); } } - return ( outhandle >= 0 ); + return ( outfd >= 0 ); } -bool check_tty( const int inhandle, const Mode program_mode ) throw() +bool check_tty( const int infd, const Mode program_mode ) throw() { - if( program_mode == m_compress && isatty( outhandle ) ) + if( program_mode == m_compress && isatty( outfd ) ) { show_error( "I won't write compressed data to a terminal.", 0, true ); return false; } if( ( program_mode == m_decompress || program_mode == m_test ) && - isatty( inhandle ) ) + isatty( infd ) ) { show_error( "I won't read compressed data from a terminal.", 0, true ); return false; @@ -340,10 +350,11 @@ void cleanup_and_fail( const int retval ) throw() { if( delete_output_on_interrupt ) { + delete_output_on_interrupt = false; if( verbosity >= 0 ) std::fprintf( stderr, "%s: Deleting output file `%s', if it exists.\n", program_name, output_filename.c_str() ); - if( outhandle >= 0 ) { close( outhandle ); outhandle = -1; } + if( outfd >= 0 ) { close( outfd ); outfd = -1; } if( std::remove( output_filename.c_str() ) != 0 ) show_error( "WARNING: deletion of output file (apparently) failed." ); } @@ -357,11 +368,11 @@ void close_and_set_permissions( const struct stat * const in_statsp ) bool error = false; if( in_statsp ) { - if( fchmod( outhandle, in_statsp->st_mode ) != 0 ) error = true; - else (void)fchown( outhandle, in_statsp->st_uid, in_statsp->st_gid ); + if( fchmod( outfd, in_statsp->st_mode ) != 0 ) error = true; + else (void)fchown( outfd, in_statsp->st_uid, in_statsp->st_gid ); // fchown will in many cases return with EPERM, which can be safely ignored. } - if( close( outhandle ) == 0 ) outhandle = -1; + if( close( outfd ) == 0 ) outfd = -1; else cleanup_and_fail( 1 ); delete_output_on_interrupt = false; if( !in_statsp ) return; @@ -394,7 +405,7 @@ bool next_filename() int compress( const long long member_size, const long long volume_size, - const lzma_options & encoder_options, const int inhandle, + const Lzma_options & encoder_options, const int infd, const Pretty_print & pp, const struct stat * const in_statsp ) { if( verbosity >= 1 ) pp(); @@ -404,20 +415,21 @@ int compress( const long long member_size, const long long volume_size, encoder_options.match_len_limit < min_match_len_limit || encoder_options.match_len_limit > max_match_len ) internal_error( "invalid argument to encoder" ); + int retval = 0; try { Matchfinder matchfinder( header.dictionary_size(), - encoder_options.match_len_limit, inhandle ); + encoder_options.match_len_limit, infd ); header.dictionary_size( matchfinder.dictionary_size() ); long long in_size = 0, out_size = 0, partial_volume_size = 0; while( true ) // encode one member per iteration { - LZ_encoder encoder( matchfinder, header, outhandle ); + LZ_encoder encoder( matchfinder, header, outfd ); const long long size = std::min( member_size, volume_size - partial_volume_size ); if( !encoder.encode_member( size ) ) - { pp(); show_error( "read error", errno ); return 1; } + { pp(); show_error( "read error", errno ); retval = 1; break; } in_size += matchfinder.data_position(); out_size += encoder.member_position(); if( matchfinder.finished() ) break; @@ -429,16 +441,17 @@ int compress( const long long member_size, const long long volume_size, { close_and_set_permissions( in_statsp ); if( !next_filename() ) - { pp(); show_error( "too many volume files" ); return 1; } - if( !open_outstream( true ) ) return 1; + { pp(); show_error( "too many volume files" ); retval = 1; break; } + if( !open_outstream( true ) ) { retval = 1; break; } delete_output_on_interrupt = true; } } if( !matchfinder.reset() ) - { pp(); show_error( "read error", errno ); return 1; } + { pp(); show_error( "can't reset matchfinder", errno ); + retval = 1; break; } } - if( verbosity >= 1 ) + if( retval == 0 && verbosity >= 1 ) { if( in_size <= 0 || out_size <= 0 ) std::fprintf( stderr, "no data compressed.\n" ); @@ -454,62 +467,61 @@ int compress( const long long member_size, const long long volume_size, catch( std::bad_alloc ) { pp( "not enough memory. Try a smaller dictionary size" ); - return 1; + retval = 1; } - catch( Error e ) { pp(); show_error( e.s, errno ); return 1; } - return 0; + catch( Error e ) { pp(); show_error( e.s, errno ); retval = 1; } + return retval; } -int decompress( const int inhandle, const Pretty_print & pp, - const bool testing ) +int decompress( const int infd, const Pretty_print & pp, const bool testing ) { + int retval = 0; + try { - Input_buffer ibuf( inhandle ); + Range_decoder rdec( infd ); long long partial_file_pos = 0; for( bool first_member = true; ; first_member = false, pp.reset() ) { File_header header; - for( unsigned int i = 0; i < sizeof header; ++i ) - ((uint8_t *)&header)[i] = ibuf.get_byte(); - if( ibuf.finished() ) // End Of File + rdec.reset_member_position(); + for( int i = 0; i < File_header::size; ++i ) + header.data[i] = rdec.get_byte(); + if( rdec.finished() ) // End Of File { - if( !first_member ) break; - pp( "error reading member header" ); return 1; + if( first_member ) { pp( "error reading member header" ); retval = 1; } + break; } if( !header.verify_magic() ) { if( !first_member ) break; // trailing garbage - if( verbosity >= 0 ) - { pp(); - std::fprintf( stderr, "bad magic number (file not created by %s).\n", - program_name ); } - return 2; + pp( "bad magic number (file not in lzip format)" ); + retval = 2; break; } if( !header.verify_version() ) { if( verbosity >= 0 ) { pp(); std::fprintf( stderr, "version %d member format not supported, newer %s needed.\n", - header.version, program_name ); } - return 2; + header.version(), program_name ); } + retval = 2; break; } if( header.dictionary_size() < min_dictionary_size || header.dictionary_size() > max_dictionary_size ) - { pp( "invalid dictionary size in member header" ); return 2; } + { pp( "invalid dictionary size in member header" ); retval = 2; break; } if( verbosity >= 1 ) { pp(); if( verbosity >= 2 ) std::fprintf( stderr, "version %d, dictionary size %7sB. ", - header.version, + header.version(), format_num( header.dictionary_size() ) ); } - LZ_decoder decoder( header, ibuf, outhandle ); + LZ_decoder decoder( header, rdec, outfd ); const int result = decoder.decode_member( pp ); - partial_file_pos += decoder.member_position(); + partial_file_pos += rdec.member_position(); if( result != 0 ) { if( verbosity >= 0 && result <= 2 ) @@ -522,7 +534,7 @@ int decompress( const int inhandle, const Pretty_print & pp, std::fprintf( stderr, "decoder error at pos %lld\n", partial_file_pos ); } - return 2; + retval = 2; break; } if( verbosity >= 1 ) { if( testing ) std::fprintf( stderr, "ok\n" ); @@ -532,10 +544,10 @@ int decompress( const int inhandle, const Pretty_print & pp, catch( std::bad_alloc ) { pp( "not enough memory. Find a machine with more memory" ); - return 1; + retval = 1; } - catch( Error e ) { pp(); show_error( e.s, errno ); return 1; } - return 0; + catch( Error e ) { pp(); show_error( e.s, errno ); retval = 1; } + return retval; } @@ -548,9 +560,9 @@ extern "C" void signal_handler( int ) throw() void set_signals() throw() { - signal( SIGHUP, signal_handler ); - signal( SIGINT, signal_handler ); - signal( SIGTERM, signal_handler ); + std::signal( SIGHUP, signal_handler ); + std::signal( SIGINT, signal_handler ); + std::signal( SIGTERM, signal_handler ); } } // end namespace @@ -576,7 +588,7 @@ void Pretty_print::operator()( const char * const msg ) const throw() } -void show_error( const char * msg, const int errcode, const bool help ) throw() +void show_error( const char * const msg, const int errcode, const bool help ) throw() { if( verbosity >= 0 ) { @@ -592,7 +604,7 @@ void show_error( const char * msg, const int errcode, const bool help ) throw() } -void internal_error( const char * msg ) +void internal_error( const char * const msg ) { std::string s( "internal error: " ); s += msg; show_error( s.c_str() ); @@ -603,7 +615,7 @@ void internal_error( const char * msg ) // Returns the number of bytes really read. // If (returned value < size) and (errno == 0), means EOF was reached. // -int readblock( const int fd, char * buf, const int size ) throw() +int readblock( const int fd, uint8_t * const buf, const int size ) throw() { int rest = size; errno = 0; @@ -622,7 +634,7 @@ int readblock( const int fd, char * buf, const int size ) throw() // Returns the number of bytes really written. // If (returned value < size), it is always an error. // -int writeblock( const int fd, const char * buf, const int size ) throw() +int writeblock( const int fd, const uint8_t * const buf, const int size ) throw() { int rest = size; errno = 0; @@ -637,25 +649,26 @@ int writeblock( const int fd, const char * buf, const int size ) throw() } -int main( const int argc, const char * argv[] ) +int main( const int argc, const char * const argv[] ) { // Mapping from gzip/bzip2 style 1..9 compression modes // to the corresponding LZMA compression modes. - const lzma_options option_mapping[] = + const Lzma_options option_mapping[] = { + { 1 << 16, 5 }, // -0 { 1 << 20, 10 }, // -1 - { 1 << 20, 12 }, // -2 - { 1 << 20, 17 }, // -3 - { 1 << 21, 26 }, // -4 + { 3 << 19, 12 }, // -2 + { 1 << 21, 17 }, // -3 + { 3 << 20, 26 }, // -4 { 1 << 22, 44 }, // -5 { 1 << 23, 80 }, // -6 { 1 << 24, 108 }, // -7 - { 1 << 24, 163 }, // -8 + { 3 << 23, 163 }, // -8 { 1 << 25, 273 } }; // -9 - lzma_options encoder_options = option_mapping[5]; // default = "-6" + Lzma_options encoder_options = option_mapping[6]; // default = "-6" long long member_size = LLONG_MAX; long long volume_size = LLONG_MAX; - int inhandle = -1; + int infd = -1; Mode program_mode = m_compress; bool force = false; bool keep_input_files = false; @@ -668,6 +681,7 @@ int main( const int argc, const char * argv[] ) const Arg_parser::Option options[] = { + { '0', 0, Arg_parser::no }, { '1', "fast", Arg_parser::no }, { '2', 0, Arg_parser::no }, { '3', 0, Arg_parser::no }, @@ -680,6 +694,7 @@ int main( const int argc, const char * argv[] ) { 'b', "member-size", Arg_parser::yes }, { 'c', "stdout", Arg_parser::no }, { 'd', "decompress", Arg_parser::no }, + { 'e', "extreme", Arg_parser::no }, { 'f', "force", Arg_parser::no }, { 'h', "help", Arg_parser::no }, { 'k', "keep", Arg_parser::no }, @@ -702,21 +717,21 @@ int main( const int argc, const char * argv[] ) { const int code = parser.code( argind ); if( !code ) break; // no more options - const char * arg = parser.argument( argind ).c_str(); + const char * const arg = parser.argument( argind ).c_str(); switch( code ) { - case '1': case '2': case '3': - case '4': case '5': case '6': - case '7': case '8': case '9': - encoder_options = option_mapping[code-'1']; break; + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + encoder_options = option_mapping[code-'0']; break; case 'b': member_size = getnum( arg, 0, 100000, LLONG_MAX / 2 ); break; case 'c': to_stdout = true; break; case 'd': program_mode = m_decompress; break; + case 'e': break; // ignored by now case 'f': force = true; break; case 'h': show_help(); return 0; case 'k': keep_input_files = true; break; case 'm': encoder_options.match_len_limit = - getnum( arg, 0, min_match_len_limit, max_match_len ); break; + getnum( arg, 0, min_match_len_limit, max_match_len ); break; case 'o': default_output_filename = arg; break; case 'q': verbosity = -1; break; case 's': encoder_options.dictionary_size = get_dict_size( arg ); @@ -737,11 +752,13 @@ int main( const int argc, const char * argv[] ) } if( filenames.empty() ) filenames.push_back("-"); - if( filenames_given ) set_signals(); + if( !to_stdout && program_mode != m_test && + ( filenames_given || default_output_filename.size() ) ) + set_signals(); Pretty_print pp( filenames ); if( program_mode == m_test ) - outhandle = -1; + outfd = -1; else if( program_mode == m_compress ) { dis_slots.init(); @@ -757,20 +774,21 @@ int main( const int argc, const char * argv[] ) if( !filenames[i].size() || filenames[i] == "-" ) { input_filename.clear(); - inhandle = STDIN_FILENO; + infd = STDIN_FILENO; if( program_mode != m_test ) { if( to_stdout || !default_output_filename.size() ) - outhandle = STDOUT_FILENO; + outfd = STDOUT_FILENO; else { if( program_mode == m_compress ) set_c_outname( default_output_filename, volume_size != LLONG_MAX ); else output_filename = default_output_filename; + outfd_mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH; if( !open_outstream( force ) ) { - if( outhandle == -1 && retval < 1 ) retval = 1; - close( inhandle ); inhandle = -1; + if( outfd == -1 && retval < 1 ) retval = 1; + close( infd ); infd = -1; continue; } } @@ -780,28 +798,29 @@ int main( const int argc, const char * argv[] ) { input_filename = filenames[i]; const int eindex = extension_index( input_filename ); - inhandle = open_instream( input_filename, &in_stats, program_mode, - eindex, force, to_stdout ); - if( inhandle < 0 ) { if( retval < 1 ) retval = 1; continue; } + infd = open_instream( input_filename, &in_stats, program_mode, + eindex, force, to_stdout ); + if( infd < 0 ) { if( retval < 1 ) retval = 1; continue; } if( program_mode != m_test ) { - if( to_stdout ) outhandle = STDOUT_FILENO; + if( to_stdout ) outfd = STDOUT_FILENO; else { if( program_mode == m_compress ) set_c_outname( input_filename, volume_size != LLONG_MAX ); else set_d_outname( input_filename, eindex ); + outfd_mode = S_IRUSR | S_IWUSR; if( !open_outstream( force ) ) { - if( outhandle == -1 && retval < 1 ) retval = 1; - close( inhandle ); inhandle = -1; + if( outfd == -1 && retval < 1 ) retval = 1; + close( infd ); infd = -1; continue; } } } } - if( !check_tty( inhandle, program_mode ) ) return 1; + if( !check_tty( infd, program_mode ) ) return 1; if( output_filename.size() && !to_stdout && program_mode != m_test ) delete_output_on_interrupt = true; @@ -809,10 +828,10 @@ int main( const int argc, const char * argv[] ) pp.set_name( input_filename ); int tmp = 0; if( program_mode == m_compress ) - tmp = compress( member_size, volume_size, encoder_options, inhandle, + tmp = compress( member_size, volume_size, encoder_options, infd, pp, in_statsp ); else - tmp = decompress( inhandle, pp, program_mode == m_test ); + tmp = decompress( infd, pp, program_mode == m_test ); if( tmp > retval ) retval = tmp; if( tmp && program_mode != m_test ) cleanup_and_fail( retval ); @@ -820,12 +839,12 @@ int main( const int argc, const char * argv[] ) close_and_set_permissions( in_statsp ); if( input_filename.size() ) { - close( inhandle ); inhandle = -1; + close( infd ); infd = -1; if( !keep_input_files && !to_stdout && program_mode != m_test ) std::remove( input_filename.c_str() ); } } - if( outhandle >= 0 && close( outhandle ) != 0 ) + if( outfd >= 0 && close( outfd ) != 0 ) { if( verbosity >= 0 ) std::fprintf( stderr, "%s: Can't close stdout: %s.\n", diff --git a/testsuite/check.sh b/testsuite/check.sh index ec61c89..56ebc2b 100755 --- a/testsuite/check.sh +++ b/testsuite/check.sh @@ -11,7 +11,7 @@ objdir=`pwd` testdir=`cd "$1" ; pwd` LZIP="${objdir}"/lzip LZIPRECOVER="${objdir}"/lziprecover -framework_failure() { echo 'failure in testing framework'; exit 1; } +framework_failure() { echo "failure in testing framework" ; exit 1 ; } if [ ! -x "${LZIP}" ] ; then echo "${LZIP}: cannot execute" @@ -20,7 +20,7 @@ fi if [ -d tmp ] ; then rm -rf tmp ; fi mkdir tmp -echo -n "testing lzip..." +printf "testing lzip..." cd "${objdir}"/tmp cat "${testdir}"/test1 > in || framework_failure @@ -29,44 +29,44 @@ fail=0 "${LZIP}" -cd "${testdir}"/test1.lz > copy || fail=1 cmp in copy || fail=1 -for i in s4096 1 2 3 4 5 6 7 8 9; do +for i in s4Ki 0 1 2 3 4 5 6 7 8 9 ; do "${LZIP}" -k -$i in || fail=1 mv -f in.lz copy.lz || fail=1 - echo -n "garbage" >> copy.lz || fail=1 + printf "garbage" >> copy.lz || fail=1 "${LZIP}" -df copy.lz || fail=1 cmp in copy || fail=1 - echo -n . + printf . done -for i in s4096 1 2 3 4 5 6 7 8 9; do +for i in s4Ki 0 1 2 3 4 5 6 7 8 9 ; do "${LZIP}" -c -$i in > out || fail=1 - echo -n "g" >> out || fail=1 + printf "g" >> out || fail=1 "${LZIP}" -cd out > copy || fail=1 cmp in copy || fail=1 - echo -n . + printf . done -for i in s4096 1 2 3 4 5 6 7 8 9; do +for i in s4Ki 0 1 2 3 4 5 6 7 8 9 ; do "${LZIP}" -$i < in > out || fail=1 "${LZIP}" -d < out > copy || fail=1 cmp in copy || fail=1 - echo -n . + printf . done -for i in s4096 1 2 3 4 5 6 7 8 9; do +for i in s4Ki 0 1 2 3 4 5 6 7 8 9 ; do "${LZIP}" -f -$i -o out < in || fail=1 "${LZIP}" -df -o copy < out.lz || fail=1 cmp in copy || fail=1 - echo -n . + printf . done -"${LZIP}" -c in in in > out || fail=1 -echo -n "garbage" >> out || fail=1 +"${LZIP}" -ce in in in > out || fail=1 +printf "garbage" >> out || fail=1 "${LZIPRECOVER}" out || fail=1 -for i in 1 2 3; do +for i in 1 2 3 ; do "${LZIP}" -cd rec0000${i}out > copy || fail=1 cmp in copy || fail=1 - echo -n . + printf . done echo |