From 585fba50b00b5716bbde7a1b05cbab114af8cdb0 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 7 Nov 2015 12:53:03 +0100 Subject: Adding upstream version 1.18~pre2. Signed-off-by: Daniel Baumann --- ChangeLog | 12 +++++ NEWS | 16 +++++++ configure | 2 +- decoder.cc | 22 ++++----- decoder.h | 51 ++++++++++---------- doc/lziprecover.1 | 5 +- doc/lziprecover.info | 129 +++++++++++++++++++++++++++++++++++++-------------- doc/lziprecover.texi | 101 ++++++++++++++++++++++++++++++++-------- file_index.cc | 11 +++-- file_index.h | 13 ++++-- lzip.h | 24 +++++----- main.cc | 86 +++++++++++++++++++--------------- mtester.cc | 12 ++--- mtester.h | 47 ++++++++++--------- range_dec.cc | 20 ++++---- repair.cc | 3 +- split.cc | 38 +++++++-------- testsuite/check.sh | 55 +++++++++++++++++----- 18 files changed, 427 insertions(+), 220 deletions(-) diff --git a/ChangeLog b/ChangeLog index 75abd30..835dec0 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,15 @@ +2015-09-16 Antonio Diaz Diaz + + * Version 1.18-pre2 released. + * main.cc: Added new option '-a, --trailing-error'. + * Decompression time has been reduced by 2%. + * main.cc (decompress): Print up to 6 bytes of trailing data + when '-tvvvv' is specified. + * range_dec.cc (list_file): Show dictionary size and size of + trailing data (if any) with '-lv'. + * lzip.texi: Added chapter 'Trailing data'. + * testsuite/check.sh: Don't check error messages. + 2015-06-30 Antonio Diaz Diaz * Version 1.18-pre1 released. diff --git a/NEWS b/NEWS index c46157d..4e36544 100644 --- a/NEWS +++ b/NEWS @@ -7,5 +7,21 @@ attempting to repair it. errors in these bytes sometimes can't be detected until the end of the member. +The option "-a, --trailing-error", which makes lzip exit with error +status 2 if any remaining input is detected after decompressing the last +member, has been added. + The new option "-x, --show-packets", which shows the LZMA packets (coding sequences) coded in a given file, has been added. + +Decompression time has been reduced by 2%. + +Up to 6 bytes of trailing data are printed when "-tvvvv" is specified. + +Dictionary size and size of trailing data (if any) are printed when +"-lv" is specified. + +The new chapter "Trailing data" has been added to the manual. + +Fixed a harmless check failure on Windows caused by the failed +comparison of a message in text mode. diff --git a/configure b/configure index aad4fc1..a76d668 100755 --- a/configure +++ b/configure @@ -6,7 +6,7 @@ # to copy, distribute and modify it. pkgname=lziprecover -pkgversion=1.18-pre1 +pkgversion=1.18-pre2 progname=lziprecover srctrigger=doc/${pkgname}.texi diff --git a/decoder.cc b/decoder.cc index 5de3a6f..895bd9c 100644 --- a/decoder.cc +++ b/decoder.cc @@ -42,7 +42,7 @@ void Pretty_print::operator()( const char * const msg, FILE * const f ) const { first_post = false; std::fprintf( f, " %s: ", name_.c_str() ); - for( unsigned i = 0; i < longest_name - name_.size(); ++i ) + for( unsigned i = name_.size(); i < longest_name; ++i ) std::fputc( ' ', f ); if( !msg ) std::fflush( f ); } @@ -62,7 +62,7 @@ long readblock( const int fd, uint8_t * const buf, const long size ) { const int n = read( fd, buf + sz, std::min( 1L << 20, size - sz ) ); if( n > 0 ) sz += n; - else if( n == 0 ) break; /* EOF */ + else if( n == 0 ) break; // EOF else if( errno != EINTR ) break; errno = 0; } @@ -117,7 +117,7 @@ void LZ_decoder::flush_data() if( s > 0 && writeblock( outfd, buffer + stream_pos + i, s ) != s ) throw Error( "Write error" ); } - if( pos >= buffer_size ) { partial_data_pos += pos; pos = 0; } + if( pos >= dictionary_size ) { partial_data_pos += pos; pos = 0; } stream_pos = pos; } } @@ -206,9 +206,9 @@ int LZ_decoder::decode_member( const Pretty_print & pp ) Bit_model bm_align[dis_align_size]; Len_model match_len_model; Len_model rep_len_model; - unsigned rep0 = 0; /* rep[0-3] latest four distances */ - unsigned rep1 = 0; /* used for efficient coding of */ - unsigned rep2 = 0; /* repeated distances */ + unsigned rep0 = 0; // rep[0-3] latest four distances + unsigned rep1 = 0; // used for efficient coding of + unsigned rep2 = 0; // repeated distances unsigned rep3 = 0; State state; @@ -231,7 +231,7 @@ int LZ_decoder::decode_member( const Pretty_print & pp ) peek( rep0 ) ) ); } } - else /* match or repeated match */ + else // match or repeated match { int len; if( rdec.decode_bit( bm_rep[state()] ) != 0 ) // 2nd bit @@ -260,7 +260,7 @@ int LZ_decoder::decode_member( const Pretty_print & pp ) state.set_rep(); len = min_match_len + rdec.decode_len( rep_len_model, pos_state ); } - else /* match */ + else // match { const unsigned rep0_saved = rep0; len = min_match_len + rdec.decode_len( match_len_model, pos_state ); @@ -277,16 +277,16 @@ int LZ_decoder::decode_member( const Pretty_print & pp ) { rep0 += rdec.decode( direct_bits - dis_align_bits ) << dis_align_bits; rep0 += rdec.decode_tree_reversed4( bm_align ); - if( rep0 == 0xFFFFFFFFU ) /* marker found */ + if( rep0 == 0xFFFFFFFFU ) // marker found { rep0 = rep0_saved; rdec.normalize(); flush_data(); - if( len == min_match_len ) /* End Of Stream marker */ + if( len == min_match_len ) // End Of Stream marker { if( verify_trailer( pp ) ) return 0; else return 3; } - if( len == min_match_len + 1 ) /* Sync Flush marker */ + if( len == min_match_len + 1 ) // Sync Flush marker { rdec.load(); continue; } diff --git a/decoder.h b/decoder.h index 5e6e16c..5ffc0be 100644 --- a/decoder.h +++ b/decoder.h @@ -19,12 +19,12 @@ class Range_decoder { enum { buffer_size = 16384 }; unsigned long long partial_member_pos; - uint8_t * const buffer; /* input buffer */ - int pos; /* current pos in buffer */ - int stream_pos; /* when reached, a new block must be read */ + uint8_t * const buffer; // input buffer + int pos; // current pos in buffer + int stream_pos; // when reached, a new block must be read uint32_t code; uint32_t range; - const int infd; /* input file descriptor */ + const int infd; // input file descriptor bool at_stream_end; bool read_block(); @@ -214,12 +214,11 @@ class LZ_decoder unsigned long long partial_data_pos; Range_decoder & rdec; const unsigned dictionary_size; - const int buffer_size; - uint8_t * const buffer; /* output buffer */ - int pos; /* current pos in buffer */ - int stream_pos; /* first byte not yet written to file */ + uint8_t * const buffer; // output buffer + unsigned pos; // current pos in buffer + unsigned stream_pos; // first byte not yet written to file uint32_t crc_; - const int outfd; /* output file descriptor */ + const int outfd; // output file descriptor unsigned long long stream_position() const { return partial_data_pos + stream_pos; } @@ -228,37 +227,42 @@ class LZ_decoder uint8_t peek_prev() const { - const int i = ( ( pos > 0 ) ? pos : buffer_size ) - 1; + const unsigned i = ( ( pos > 0 ) ? pos : dictionary_size ) - 1; return buffer[i]; } - uint8_t peek( const int distance ) const + uint8_t peek( const unsigned distance ) const { - int i = pos - distance - 1; - if( i < 0 ) i += buffer_size; + unsigned i = pos - distance - 1; + if( pos <= distance ) i += dictionary_size; return buffer[i]; } void put_byte( const uint8_t b ) { buffer[pos] = b; - if( ++pos >= buffer_size ) flush_data(); + if( ++pos >= dictionary_size ) flush_data(); } - void copy_block( const int distance, int len ) + void copy_block( const unsigned distance, unsigned len ) { - int i = pos - distance - 1; - if( i < 0 ) i += buffer_size; - if( len < buffer_size - std::max( pos, i ) && len <= std::abs( pos - i ) ) + unsigned i = pos - distance - 1; + bool fast; + if( pos <= distance ) + { i += dictionary_size; + fast = ( len <= dictionary_size - i && len <= i - pos ); } + else + fast = ( len < dictionary_size - pos && len <= pos - i ); + if( fast ) // no wrap, no overlap { - std::memcpy( buffer + pos, buffer + i, len ); // no wrap, no overlap + std::memcpy( buffer + pos, buffer + i, len ); pos += len; } else for( ; len > 0; --len ) { buffer[pos] = buffer[i]; - if( ++pos >= buffer_size ) flush_data(); - if( ++i >= buffer_size ) i = 0; + if( ++pos >= dictionary_size ) flush_data(); + if( ++i >= dictionary_size ) i = 0; } } @@ -275,13 +279,12 @@ public: partial_data_pos( 0 ), rdec( rde ), dictionary_size( header.dictionary_size() ), - buffer_size( std::max( 65536U, dictionary_size ) ), - buffer( new uint8_t[buffer_size] ), + buffer( new uint8_t[dictionary_size] ), pos( 0 ), stream_pos( 0 ), crc_( 0xFFFFFFFFU ), outfd( ofd ) - { buffer[buffer_size-1] = 0; } // prev_byte of first byte + { buffer[dictionary_size-1] = 0; } // prev_byte of first byte ~LZ_decoder() { delete[] buffer; } diff --git a/doc/lziprecover.1 b/doc/lziprecover.1 index 99b61dd..87c0598 100644 --- a/doc/lziprecover.1 +++ b/doc/lziprecover.1 @@ -1,5 +1,5 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.46.1. -.TH LZIPRECOVER "1" "June 2015" "lziprecover 1.18-pre1" "User Commands" +.TH LZIPRECOVER "1" "September 2015" "lziprecover 1.18-pre2" "User Commands" .SH NAME lziprecover \- recovers data from damaged lzip files .SH SYNOPSIS @@ -23,6 +23,9 @@ display this help and exit \fB\-V\fR, \fB\-\-version\fR output version information and exit .TP +\fB\-a\fR, \fB\-\-trailing\-error\fR +exit with error status if trailing data +.TP \fB\-c\fR, \fB\-\-stdout\fR send decompressed output to standard output .TP diff --git a/doc/lziprecover.info b/doc/lziprecover.info index 654e60c..8d7bc66 100644 --- a/doc/lziprecover.info +++ b/doc/lziprecover.info @@ -12,7 +12,7 @@ File: lziprecover.info, Node: Top, Next: Introduction, Up: (dir) Lziprecover Manual ****************** -This manual is for Lziprecover (version 1.18-pre1, 30 June 2015). +This manual is for Lziprecover (version 1.18-pre2, 16 September 2015). * Menu: @@ -23,6 +23,7 @@ This manual is for Lziprecover (version 1.18-pre1, 30 June 2015). * Merging files:: Fixing several damaged copies * File names:: Names of the files produced by lziprecover * File format:: Detailed format of the compressed file +* Trailing data:: Extra data appended to the file * Examples:: A small tutorial with examples * Unzcrash:: Testing the robustness of decompressors * Problems:: Reporting bugs @@ -54,7 +55,7 @@ availability: recovery means. The lziprecover program can repair bit-flip errors (one of the most common forms of data corruption) in lzip files, and provides data recovery capabilities, including error-checked - merging of damaged copies of a file. + merging of damaged copies of a file. *Note Data safety::. * The lzip format is as simple as possible (but not simpler). The lzip manual provides the code of a simple decompressor along with @@ -125,6 +126,13 @@ The format for running lziprecover is: Print the version number of lziprecover on the standard output and exit. +'-a' +'--trailing-error' + Exit with error status 2 if any remaining input is detected after + decompressing the last member. Such remaining input is usually + trailing garbage that can be safely ignored. *Note + concat-example::. + '-c' '--stdout' Decompress to standard output. Needed when reading from a named @@ -133,7 +141,9 @@ The format for running lziprecover is: '-d' '--decompress' - Decompress. + Decompress the specified file(s). If a file fails to decompress, + lziprecover exits immediately without decompressing the rest of the + files. '-D RANGE' '--range-decompress=RANGE' @@ -220,7 +230,8 @@ The format for running lziprecover is: Check integrity of the specified file(s), but don't decompress them. This really performs a trial decompression and throws away the result. Use it together with '-v' to see information about - the file. + the file(s). If a file fails the test, lziprecover continues + checking the rest of the files. '-v' '--verbose' @@ -228,7 +239,7 @@ The format for running lziprecover is: When decompressing or testing, further -v's (up to 4) increase the verbosity level, showing status, compression ratio, dictionary size, trailer contents (CRC, data size, member size), and up to 6 - bytes of trailing garbage (if any). + bytes of trailing data (if any). Numbers given as arguments to options may be followed by a multiplier @@ -387,7 +398,7 @@ original file name ends with one of the extensions '.tar.lz', '.lz' or '.tlz', the string '_fixed' is inserted before the extension.  -File: lziprecover.info, Node: File format, Next: Examples, Prev: File names, Up: Top +File: lziprecover.info, Node: File format, Next: Trailing data, Prev: File names, Up: Top 7 File format ************* @@ -421,7 +432,7 @@ additional information before, between, or after them. All multibyte values are stored in little endian order. -'ID string' +'ID string (the "magic" bytes)' A four byte string, identifying the lzip format, with the value "LZIP" (0x4C, 0x5A, 0x49, 0x50). @@ -458,9 +469,42 @@ additional information before, between, or after them.  -File: lziprecover.info, Node: Examples, Next: Unzcrash, Prev: File format, Up: Top +File: lziprecover.info, Node: Trailing data, Next: Examples, Prev: File format, Up: Top + +8 Extra data appended to the file +********************************* + +Sometimes extra data is found appended to a lzip file after the last +member. Such trailing data may be: + + * Padding added to make the file size a multiple of some block size, + for example when writing to a tape. + + * Garbage added by some not totally successful copy operation. + + * Useful data added by the user; a cryptographically secure hash, a + description of file contents, etc. + + * Malicious data added to the file in order to make its total size + and hash value (for a chosen hash) coincide with those of another + file. + + * In very rare cases, trailing data could be the corrupt header of + another member. In multi-member or concatenated files the + probability of corruption happening in the magic bytes is 5 times + smaller than the probability of getting a false positive caused by + the corruption of the integrity information itself. Therefore it + can be considered to be below the noise level. + + Trailing data can be safely ignored in most cases. In some cases, +like user-added data, it is expected to be ignored. In those cases +where a file containing trailing data must be rejected, the option +'--trailing-error' can be used. *Note --trailing-error::. + + +File: lziprecover.info, Node: Examples, Next: Unzcrash, Prev: Trailing data, Up: Top -8 A small tutorial with examples +9 A small tutorial with examples ******************************** Example 1: Restore a regular file from its compressed version @@ -475,29 +519,38 @@ show status. lziprecover -tv file.lz -Example 3: Decompress 'file.lz' partially until 10 KiB of decompressed +Example 3: The right way of concatenating compressed files. *Note +Trailing data::. + + Don't do this + cat file1.lz file2.lz file3.lz | lziprecover -d + Do this instead + lziprecover -cd file1.lz file2.lz file3.lz + + +Example 4: Decompress 'file.lz' partially until 10 KiB of decompressed data are produced. lziprecover -D 0,10KiB file.lz -Example 4: Decompress 'file.lz' partially from decompressed byte 10000 +Example 5: Decompress 'file.lz' partially from decompressed byte 10000 to decompressed byte 15000 (5000 bytes are produced). lziprecover -D 10000-15000 file.lz -Example 5: Repair small errors in the file 'file.lz'. (Indented lines +Example 6: Repair small errors in the file 'file.lz'. (Indented lines are abridged diagnostic messages from lziprecover). lziprecover -v -R file.lz Copy of input file repaired successfully. lziprecover -tv file_fixed.lz - ok + file_fixed.lz: ok mv file_fixed.lz file.lz -Example 6: Split the multi-member file 'file.lz' and write each member +Example 7: Split the multi-member file 'file.lz' and write each member in its own 'recXXXfile.lz' file. Then use 'lziprecover -t' to test the integrity of the resulting files. @@ -505,26 +558,26 @@ integrity of the resulting files. lziprecover -tv rec*file.lz -Example 7: Recover a compressed backup from two copies on CD-ROM with -error-checked merging of copies (*Note GNU ddrescue manual: +Example 8: Recover a compressed backup from two copies on CD-ROM with +error-checked merging of copies. (*Note GNU ddrescue manual: (ddrescue)Top, for details about ddrescue). - ddrescue -d -r1 -b2048 /dev/cdrom cdimage1 logfile1 + ddrescue -d -r1 -b2048 /dev/cdrom cdimage1 mapfile1 mount -t iso9660 -o loop,ro cdimage1 /mnt/cdimage cp /mnt/cdimage/backup.tar.lz rescued1.tar.lz umount /mnt/cdimage (insert second copy in the CD drive) - ddrescue -d -r1 -b2048 /dev/cdrom cdimage2 logfile2 + ddrescue -d -r1 -b2048 /dev/cdrom cdimage2 mapfile2 mount -t iso9660 -o loop,ro cdimage2 /mnt/cdimage cp /mnt/cdimage/backup.tar.lz rescued2.tar.lz umount /mnt/cdimage lziprecover -m -v -o backup.tar.lz rescued1.tar.lz rescued2.tar.lz Input files merged successfully. lziprecover -tv backup.tar.lz - ok + backup.tar.lz: ok -Example 8: Recover the first volume of those created with the command +Example 9: Recover the first volume of those created with the command 'lzip -b 32MiB -S 650MB big_db' from two copies, 'big_db1_00001.lz' and 'big_db2_00001.lz', with member 07 damaged in the first copy, member 18 damaged in the second copy, and member 12 damaged in both copies. The @@ -533,13 +586,13 @@ correct file produced is saved in 'big_db_00001.lz'. lziprecover -m -v -o big_db_00001.lz big_db1_00001.lz big_db2_00001.lz Input files merged successfully. lziprecover -tv big_db_00001.lz - ok + big_db_00001.lz: ok  File: lziprecover.info, Node: Unzcrash, Next: Problems, Prev: Examples, Up: Top -9 Testing the robustness of decompressors -***************************************** +10 Testing the robustness of decompressors +****************************************** The lziprecover package also includes unzcrash, a program written to test robustness to decompression of corrupted data, inspired by @@ -615,7 +668,7 @@ caused unzcrash to panic.  File: lziprecover.info, Node: Problems, Next: Concept index, Prev: Unzcrash, Up: Top -10 Reporting bugs +11 Reporting bugs ***************** There are probably bugs in lziprecover. There are certainly errors and @@ -646,24 +699,28 @@ Concept index * invoking: Invoking lziprecover. (line 6) * merging files: Merging files. (line 6) * repairing files: Repairing files. (line 6) +* trailing data: Trailing data. (line 6) * unzcrash: Unzcrash. (line 6)  Tag Table: Node: Top231 -Node: Introduction1214 -Node: Invoking lziprecover4310 -Node: Data safety9743 -Node: Repairing files11667 -Node: Merging files13569 -Node: File names15410 -Node: File format15874 -Node: Examples18278 -Ref: ddrescue-example19524 -Node: Unzcrash20780 -Node: Problems23334 -Node: Concept index23886 +Node: Introduction1278 +Node: Invoking lziprecover4395 +Ref: --trailing-error4860 +Node: Data safety10294 +Node: Repairing files12218 +Node: Merging files14120 +Node: File names15961 +Node: File format16425 +Node: Trailing data18854 +Node: Examples20230 +Ref: concat-example20661 +Ref: ddrescue-example21725 +Node: Unzcrash23015 +Node: Problems25571 +Node: Concept index26123  End Tag Table diff --git a/doc/lziprecover.texi b/doc/lziprecover.texi index 29045e7..e29a59f 100644 --- a/doc/lziprecover.texi +++ b/doc/lziprecover.texi @@ -6,8 +6,8 @@ @finalout @c %**end of header -@set UPDATED 30 June 2015 -@set VERSION 1.18-pre1 +@set UPDATED 16 September 2015 +@set VERSION 1.18-pre2 @dircategory Data Compression @direntry @@ -42,6 +42,7 @@ This manual is for Lziprecover (version @value{VERSION}, @value{UPDATED}). * Merging files:: Fixing several damaged copies * File names:: Names of the files produced by lziprecover * File format:: Detailed format of the compressed file +* Trailing data:: Extra data appended to the file * Examples:: A small tutorial with examples * Unzcrash:: Testing the robustness of decompressors * Problems:: Reporting bugs @@ -75,7 +76,7 @@ The lzip format provides very safe integrity checking and some data recovery means. The lziprecover program can repair bit-flip errors (one of the most common forms of data corruption) in lzip files, and provides data recovery capabilities, including error-checked merging of damaged -copies of a file. +copies of a file. @xref{Data safety}. @item The lzip format is as simple as possible (but not simpler). The lzip @@ -152,6 +153,13 @@ Print an informative help message describing the options and exit. @itemx --version Print the version number of lziprecover on the standard output and exit. +@anchor{--trailing-error} +@item -a +@itemx --trailing-error +Exit with error status 2 if any remaining input is detected after +decompressing the last member. Such remaining input is usually trailing +garbage that can be safely ignored. @xref{concat-example}. + @item -c @itemx --stdout Decompress to standard output. Needed when reading from a named pipe @@ -160,7 +168,9 @@ data as possible when decompressing a corrupt file. @item -d @itemx --decompress -Decompress. +Decompress the specified file(s). If a file fails to decompress, +lziprecover exits immediately without decompressing the rest of the +files. @item -D @var{range} @itemx --range-decompress=@var{range} @@ -246,7 +256,9 @@ on the number of members in @samp{@var{file}}. @itemx --test Check integrity of the specified file(s), but don't decompress them. This really performs a trial decompression and throws away the result. -Use it together with @samp{-v} to see information about the file. +Use it together with @samp{-v} to see information about the file(s). If +a file fails the test, lziprecover continues checking the rest of the +files. @item -v @itemx --verbose @@ -254,7 +266,7 @@ Verbose mode.@* When decompressing or testing, further -v's (up to 4) increase the verbosity level, showing status, compression ratio, dictionary size, trailer contents (CRC, data size, member size), and up to 6 bytes of -trailing garbage (if any). +trailing data (if any). @end table @@ -456,7 +468,7 @@ Each member has the following structure: All multibyte values are stored in little endian order. @table @samp -@item ID string +@item ID string (the "magic" bytes) A four byte string, identifying the lzip format, with the value "LZIP" (0x4C, 0x5A, 0x49, 0x50). @@ -499,6 +511,44 @@ facilitates safe recovery of undamaged members from multi-member files. @end table +@node Trailing data +@chapter Extra data appended to the file +@cindex trailing data + +Sometimes extra data is found appended to a lzip file after the last +member. Such trailing data may be: + +@itemize @bullet +@item +Padding added to make the file size a multiple of some block size, for +example when writing to a tape. + +@item +Garbage added by some not totally successful copy operation. + +@item +Useful data added by the user; a cryptographically secure hash, a +description of file contents, etc. + +@item +Malicious data added to the file in order to make its total size and +hash value (for a chosen hash) coincide with those of another file. + +@item +In very rare cases, trailing data could be the corrupt header of another +member. In multi-member or concatenated files the probability of +corruption happening in the magic bytes is 5 times smaller than the +probability of getting a false positive caused by the corruption of the +integrity information itself. Therefore it can be considered to be below +the noise level. +@end itemize + +Trailing data can be safely ignored in most cases. In some cases, like +user-added data, it is expected to be ignored. In those cases where a +file containing trailing data must be rejected, the option +@samp{--trailing-error} can be used. @xref{--trailing-error}. + + @node Examples @chapter A small tutorial with examples @cindex examples @@ -520,9 +570,22 @@ and show status. lziprecover -tv file.lz @end example +@sp 1 +@anchor{concat-example} +@noindent +Example 3: The right way of concatenating compressed files. +@xref{Trailing data}. + +@example +Don't do this + cat file1.lz file2.lz file3.lz | lziprecover -d +Do this instead + lziprecover -cd file1.lz file2.lz file3.lz +@end example + @sp 1 @noindent -Example 3: Decompress @samp{file.lz} partially until 10 KiB of +Example 4: Decompress @samp{file.lz} partially until 10 KiB of decompressed data are produced. @example @@ -531,7 +594,7 @@ lziprecover -D 0,10KiB file.lz @sp 1 @noindent -Example 4: Decompress @samp{file.lz} partially from decompressed byte +Example 5: Decompress @samp{file.lz} partially from decompressed byte 10000 to decompressed byte 15000 (5000 bytes are produced). @example @@ -540,20 +603,20 @@ lziprecover -D 10000-15000 file.lz @sp 1 @noindent -Example 5: Repair small errors in the file @samp{file.lz}. (Indented +Example 6: Repair small errors in the file @samp{file.lz}. (Indented lines are abridged diagnostic messages from lziprecover). @example lziprecover -v -R file.lz Copy of input file repaired successfully. lziprecover -tv file_fixed.lz - ok + file_fixed.lz: ok mv file_fixed.lz file.lz @end example @sp 1 @noindent -Example 6: Split the multi-member file @samp{file.lz} and write each +Example 7: Split the multi-member file @samp{file.lz} and write each member in its own @samp{recXXXfile.lz} file. Then use @w{@samp{lziprecover -t}} to test the integrity of the resulting files. @@ -565,8 +628,8 @@ lziprecover -tv rec*file.lz @sp 1 @anchor{ddrescue-example} @noindent -Example 7: Recover a compressed backup from two copies on CD-ROM with -error-checked merging of copies +Example 8: Recover a compressed backup from two copies on CD-ROM with +error-checked merging of copies. @ifnothtml (@xref{Top,GNU ddrescue manual,,ddrescue}, @end ifnothtml @@ -577,24 +640,24 @@ error-checked merging of copies for details about ddrescue). @example -ddrescue -d -r1 -b2048 /dev/cdrom cdimage1 logfile1 +ddrescue -d -r1 -b2048 /dev/cdrom cdimage1 mapfile1 mount -t iso9660 -o loop,ro cdimage1 /mnt/cdimage cp /mnt/cdimage/backup.tar.lz rescued1.tar.lz umount /mnt/cdimage (insert second copy in the CD drive) -ddrescue -d -r1 -b2048 /dev/cdrom cdimage2 logfile2 +ddrescue -d -r1 -b2048 /dev/cdrom cdimage2 mapfile2 mount -t iso9660 -o loop,ro cdimage2 /mnt/cdimage cp /mnt/cdimage/backup.tar.lz rescued2.tar.lz umount /mnt/cdimage lziprecover -m -v -o backup.tar.lz rescued1.tar.lz rescued2.tar.lz Input files merged successfully. lziprecover -tv backup.tar.lz - ok + backup.tar.lz: ok @end example @sp 1 @noindent -Example 8: Recover the first volume of those created with the command +Example 9: Recover the first volume of those created with the command @w{@samp{lzip -b 32MiB -S 650MB big_db}} from two copies, @samp{big_db1_00001.lz} and @samp{big_db2_00001.lz}, with member 07 damaged in the first copy, member 18 damaged in the second copy, and @@ -605,7 +668,7 @@ member 12 damaged in both copies. The correct file produced is saved in lziprecover -m -v -o big_db_00001.lz big_db1_00001.lz big_db2_00001.lz Input files merged successfully. lziprecover -tv big_db_00001.lz - ok + big_db_00001.lz: ok @end example diff --git a/file_index.cc b/file_index.cc index a1a0f30..af55417 100644 --- a/file_index.cc +++ b/file_index.cc @@ -86,7 +86,7 @@ File_index::File_index( const int infd ) if( member_size < min_member_size || member_size > pos ) { if( member_vector.empty() ) - { --pos; continue; } // maybe trailing garbage + { --pos; continue; } // maybe trailing data set_num_error( "Member size in trailer is corrupt at pos ", pos - 8 ); break; } @@ -96,10 +96,11 @@ File_index::File_index( const int infd ) if( !header.verify_magic() || !header.verify_version() ) { if( member_vector.empty() ) - { --pos; continue; } // maybe trailing garbage + { --pos; continue; } // maybe trailing data set_num_error( "Bad header at pos ", pos - member_size ); break; } + const unsigned dictionary_size = header.dictionary_size(); if( member_vector.empty() && isize - pos > File_header::size && seek_read( infd, header.data, File_header::size, pos ) == File_header::size && header.verify_magic() && header.verify_version() ) @@ -109,7 +110,7 @@ File_index::File_index( const int infd ) } pos -= member_size; member_vector.push_back( Member( 0, trailer.data_size(), - pos, member_size ) ); + pos, member_size, dictionary_size ) ); } if( pos != 0 || member_vector.empty() ) { @@ -184,7 +185,7 @@ File_index::File_index( const std::vector< int > & infd_vector, } if( !done ) { - if( member_vector.empty() ) // maybe trailing garbage + if( member_vector.empty() ) // maybe trailing data { --pos; continue; } set_num_error( "Member size in trailer may be corrupt at pos ", pos - 8 ); break; @@ -202,7 +203,7 @@ File_index::File_index( const std::vector< int > & infd_vector, } pos -= member_size; member_vector.push_back( Member( 0, trailer.data_size(), - pos, member_size ) ); + pos, member_size, 0 ) ); } error: if( pos != 0 || member_vector.empty() ) diff --git a/file_index.h b/file_index.h index eff1157..5084fcb 100644 --- a/file_index.h +++ b/file_index.h @@ -20,10 +20,11 @@ class File_index struct Member { Block dblock, mblock; // data block, member block + unsigned dictionary_size; Member( const long long dp, const long long ds, - const long long mp, const long long ms ) - : dblock( dp, ds ), mblock( mp, ms ) {} + const long long mp, const long long ms, const unsigned dict_size ) + : dblock( dp, ds ), mblock( mp, ms ), dictionary_size( dict_size ) {} bool operator==( const Member & m ) const { return ( mblock == m.mblock ); } bool operator!=( const Member & m ) const { return ( mblock != m.mblock ); } @@ -36,10 +37,10 @@ class File_index void set_errno_error( const char * const msg ); void set_num_error( const char * const msg1, unsigned long long num, - const char * const msg2 = "." ); + const char * const msg2 = "" ); public: - File_index() : error_( "No index." ), isize( 0 ), retval_( 2 ) {} + File_index() : error_( "No index" ), isize( 0 ), retval_( 2 ) {} explicit File_index( const int infd ); File_index( const std::vector< int > & infd_vector, const long long fsize ); @@ -65,7 +66,7 @@ public: { if( member_vector.size() ) return member_vector.back().mblock.end(); else return 0; } - // total size including trailing garbage (if any) + // total size including trailing data (if any) long long file_size() const { if( isize >= 0 ) return isize; else return 0; } @@ -73,4 +74,6 @@ public: { return member_vector[i].dblock; } const Block & mblock( const long i ) const { return member_vector[i].mblock; } + unsigned dictionary_size( const long i ) const + { return member_vector[i].dictionary_size; } }; diff --git a/lzip.h b/lzip.h index e46b9b8..3bdc27c 100644 --- a/lzip.h +++ b/lzip.h @@ -40,7 +40,7 @@ public: enum { min_dictionary_bits = 12, - min_dictionary_size = 1 << min_dictionary_bits, /* >= modeled_distances */ + min_dictionary_size = 1 << min_dictionary_bits, // >= modeled_distances max_dictionary_bits = 29, max_dictionary_size = 1 << max_dictionary_bits, min_member_size = 36, @@ -53,7 +53,7 @@ enum { dis_slot_bits = 6, start_dis_model = 4, end_dis_model = 14, - modeled_distances = 1 << (end_dis_model / 2), /* 128 */ + modeled_distances = 1 << (end_dis_model / 2), // 128 dis_align_bits = 4, dis_align_size = 1 << dis_align_bits, @@ -65,8 +65,8 @@ enum { len_high_symbols = 1 << len_high_bits, max_len_symbols = len_low_symbols + len_mid_symbols + len_high_symbols, - min_match_len = 2, /* must be 2 */ - max_match_len = min_match_len + max_len_symbols - 1, /* 273 */ + min_match_len = 2, // must be 2 + max_match_len = min_match_len + max_len_symbols - 1, // 273 min_match_len_limit = 5 }; inline int get_len_state( const int len ) @@ -109,6 +109,7 @@ public: : stdin_name( "(stdin)" ), longest_name( 0 ), verbosity_( v ), first_post( false ) { + if( verbosity_ <= 0 ) return; const unsigned stdin_name_len = std::strlen( stdin_name ); for( unsigned i = 0; i < filenames.size(); ++i ) { @@ -186,9 +187,9 @@ const uint8_t magic_string[4] = { 0x4C, 0x5A, 0x49, 0x50 }; // "LZIP" struct File_header { - uint8_t data[6]; /* 0-3 magic bytes */ - /* 4 version */ - /* 5 coded_dict_size */ + uint8_t data[6]; // 0-3 magic bytes + // 4 version + // 5 coded_dict_size enum { size = 6 }; void set_magic() { std::memcpy( data, magic_string, 4 ); data[4] = 1; } @@ -228,9 +229,9 @@ struct File_header struct File_trailer { - uint8_t data[20]; /* 0-3 CRC32 of the uncompressed data */ - /* 4-11 size of the uncompressed data */ - /* 12-19 member size including header and trailer */ + uint8_t data[20]; // 0-3 CRC32 of the uncompressed data + // 4-11 size of the uncompressed data + // 12-19 member size including header and trailer enum { size = 20 }; @@ -291,7 +292,7 @@ int open_instream( const char * const name, struct stat * const in_statsp, const bool no_ofile, const bool reg_only = false ); bool file_exists( const std::string & filename ); int open_outstream_rw( const std::string & output_filename, const bool force ); -void show_header( const unsigned dictionary_size ); +void show_header( const unsigned dictionary_size, const int vlevel = 3 ); void show_error( const char * const msg, const int errcode = 0, const bool help = false ); void internal_error( const char * const msg ); @@ -328,6 +329,7 @@ int debug_show_packets( const std::string & input_filename, const uint8_t bad_value ); // defined in split.cc +bool verify_header( const File_header & header, const Pretty_print & pp ); int split_file( const std::string & input_filename, const std::string & default_output_filename, const int verbosity, const bool force ); diff --git a/main.cc b/main.cc index 9425858..7845d90 100644 --- a/main.cc +++ b/main.cc @@ -24,6 +24,7 @@ #define _FILE_OFFSET_BITS 64 #include +#include #include #include #include @@ -104,6 +105,7 @@ void show_help() std::printf( "\nOptions:\n" " -h, --help display this help and exit\n" " -V, --version output version information and exit\n" + " -a, --trailing-error exit with error status if trailing data\n" " -c, --stdout send decompressed output to standard output\n" " -d, --decompress decompress\n" " -D, --range-decompress= decompress a range of bytes (N-M) to stdout\n" @@ -146,9 +148,9 @@ void show_version() } // end namespace -void show_header( const unsigned dictionary_size ) +void show_header( const unsigned dictionary_size, const int vlevel ) { - if( verbosity >= 3 ) + if( verbosity >= vlevel ) { const char * const prefix[8] = { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" }; @@ -309,7 +311,7 @@ int open_instream( const char * const name, struct stat * const in_statsp, std::fprintf( stderr, "%s: Input file '%s' is not a regular file%s.\n", program_name, name, ( can_read && !no_ofile ) ? - " and '--stdout' was not specified" : "" ); + ",\n and '--stdout' was not specified" : "" ); close( infd ); infd = -1; } @@ -373,14 +375,14 @@ void cleanup_and_fail( const int retval ) } - /* Set permissions, owner and times. */ + // Set permissions, owner and times. void close_and_set_permissions( const struct stat * const in_statsp ) { bool warning = false; if( in_statsp ) { const mode_t mode = in_statsp->st_mode; - /* fchown will in many cases return with EPERM, which can be safely ignored. */ + // fchown will in many cases return with EPERM, which can be safely ignored. if( fchown( outfd, in_statsp->st_uid, in_statsp->st_gid ) == 0 ) { if( fchmod( outfd, mode ) != 0 ) warning = true; } else @@ -424,36 +426,42 @@ unsigned char xdigit( const int value ) } -void show_trailing_garbage( const uint8_t * const data, const int size, - const Pretty_print & pp, const bool all ) +bool show_trailing_data( const uint8_t * const data, const int size, + const Pretty_print & pp, const bool all, + const bool ignore_trailing ) { - std::string garbage_msg; - if( !all ) garbage_msg = "first bytes of "; - garbage_msg += "trailing garbage found = "; - bool text = true; - for( int i = 0; i < size; ++i ) - if( !std::isprint( data[i] ) ) { text = false; break; } - if( text ) - { - garbage_msg += '\''; - garbage_msg.append( (const char *)data, size ); - garbage_msg += '\''; - } - else + if( verbosity >= 4 || !ignore_trailing ) { + std::string msg; + if( !all ) msg = "first bytes of "; + msg += "trailing data = "; + bool text = true; for( int i = 0; i < size; ++i ) + if( !std::isprint( data[i] ) ) { text = false; break; } + if( text ) + { + msg += '\''; + msg.append( (const char *)data, size ); + msg += '\''; + } + else { - if( i > 0 ) garbage_msg += ' '; - garbage_msg += xdigit( data[i] >> 4 ); - garbage_msg += xdigit( data[i] & 0x0F ); + for( int i = 0; i < size; ++i ) + { + if( i > 0 ) msg += ' '; + msg += xdigit( data[i] >> 4 ); + msg += xdigit( data[i] & 0x0F ); + } } + pp( msg.c_str() ); + if( !ignore_trailing ) show_error( "Trailing data not allowed." ); } - garbage_msg += '.'; - pp( garbage_msg.c_str() ); + return ignore_trailing; } -int decompress( const int infd, const Pretty_print & pp, const bool testing ) +int decompress( const int infd, const Pretty_print & pp, + const bool ignore_trailing, const bool testing ) { int retval = 0; @@ -469,16 +477,17 @@ int decompress( const int infd, const Pretty_print & pp, const bool testing ) { if( first_member ) { pp( "File ends unexpectedly at member header." ); retval = 2; } - else if( verbosity >= 4 && size > 0 ) - show_trailing_garbage( header.data, size, pp, true ); + else if( size > 0 && !show_trailing_data( header.data, size, pp, + true, ignore_trailing ) ) + retval = 2; break; } if( !header.verify_magic() ) { if( first_member ) { pp( "Bad magic number (file not in lzip format)." ); retval = 2; } - else if( verbosity >= 4 ) - show_trailing_garbage( header.data, size, pp, false ); + else if( !show_trailing_data( header.data, size, pp, false, ignore_trailing ) ) + retval = 2; break; } if( !header.verify_version() ) @@ -620,13 +629,15 @@ int main( const int argc, const char * const argv[] ) Mode program_mode = m_none; uint8_t bad_value = 0; bool force = false; - bool ignore = false; + bool ignore_errors = false; + bool ignore_trailing = true; bool keep_input_files = false; bool to_stdout = false; invocation_name = argv[0]; const Arg_parser::Option options[] = { + { 'a', "trailing-error", Arg_parser::no }, { 'c', "stdout", Arg_parser::no }, { 'd', "decompress", Arg_parser::no }, { 'D', "range-decompress", Arg_parser::yes }, @@ -657,17 +668,18 @@ int main( const int argc, const char * const argv[] ) for( ; argind < parser.arguments(); ++argind ) { const int code = parser.code( argind ); - if( !code ) break; /* no more options */ + if( !code ) break; // no more options const std::string & arg = parser.argument( argind ); switch( code ) { + case 'a': ignore_trailing = false; break; case 'c': to_stdout = true; break; case 'd': set_mode( program_mode, m_decompress ); break; case 'D': set_mode( program_mode, m_range_dec ); parse_range( arg.c_str(), range ); break; case 'f': force = true; break; case 'h': show_help(); return 0; - case 'i': ignore = true; break; + case 'i': ignore_errors = true; break; case 'k': keep_input_files = true; break; case 'l': set_mode( program_mode, m_list ); break; case 'm': set_mode( program_mode, m_merge ); break; @@ -688,7 +700,7 @@ int main( const int argc, const char * const argv[] ) parse_pos_value( arg.c_str(), bad_pos, bad_value ); break; default : internal_error( "uncaught option." ); } - } /* end process options */ + } // end process options #if defined(__MSVCRT__) || defined(__OS2__) setmode( STDIN_FILENO, O_BINARY ); @@ -731,8 +743,8 @@ int main( const int argc, const char * const argv[] ) return merge_files( filenames, default_output_filename, verbosity, force ); case m_range_dec: one_file( filenames.size() ); - return range_decompress( filenames[0], default_output_filename, - range, verbosity, force, ignore, to_stdout ); + return range_decompress( filenames[0], default_output_filename, range, + verbosity, force, ignore_errors, to_stdout ); case m_repair: one_file( filenames.size() ); if( default_output_filename.empty() ) @@ -823,7 +835,7 @@ int main( const int argc, const char * const argv[] ) delete_output_on_interrupt = true; const struct stat * const in_statsp = input_filename.size() ? &in_stats : 0; pp.set_name( input_filename ); - const int tmp = decompress( infd, pp, program_mode == m_test ); + const int tmp = decompress( infd, pp, ignore_trailing, program_mode == m_test ); if( tmp > retval ) retval = tmp; if( tmp && program_mode != m_test ) cleanup_and_fail( retval ); diff --git a/mtester.cc b/mtester.cc index a9ac06e..92de2e0 100644 --- a/mtester.cc +++ b/mtester.cc @@ -56,7 +56,7 @@ void LZ_mtester::flush_data() { const int size = pos - stream_pos; crc32.update_buf( crc_, buffer + stream_pos, size ); - if( pos >= buffer_size ) { partial_data_pos += pos; pos = 0; } + if( pos >= dictionary_size ) { partial_data_pos += pos; pos = 0; } stream_pos = pos; } } @@ -89,11 +89,11 @@ void LZ_mtester::print_block( const int len ) void LZ_mtester::duplicate_buffer() { - uint8_t * const tmp = new uint8_t[buffer_size]; + uint8_t * const tmp = new uint8_t[dictionary_size]; if( data_position() > 0 ) std::memcpy( tmp, buffer, std::min( data_position(), - (unsigned long long)buffer_size ) ); - else tmp[buffer_size-1] = 0; // prev_byte of first byte + (unsigned long long)dictionary_size ) ); + else tmp[dictionary_size-1] = 0; // prev_byte of first byte buffer = tmp; } @@ -232,7 +232,7 @@ int LZ_mtester::debug_decode_member( const long long dpos, const long long mpos, format_byte( match_byte ) ); } } - else /* match or repeated match */ + else // match or repeated match { int len; if( rdec.decode_bit( bm_rep[state()] ) != 0 ) // 2nd bit @@ -271,7 +271,7 @@ int LZ_mtester::debug_decode_member( const long long dpos, const long long mpos, std::printf( "%6llu %6llu rep%c %6u,%3d (%6llu)", mp, dp, rep + '0', rep0 + 1, len, dp - rep0 - 1 ); } - else /* match */ + else // match { const unsigned rep0_saved = rep0; len = min_match_len + rdec.decode_len( match_len_model, pos_state ); diff --git a/mtester.h b/mtester.h index 795d8e4..71df64d 100644 --- a/mtester.h +++ b/mtester.h @@ -194,14 +194,13 @@ class LZ_mtester unsigned long long partial_data_pos; Range_mtester rdec; const unsigned dictionary_size; - const int buffer_size; - uint8_t * buffer; /* output buffer */ - int pos; /* current pos in buffer */ - int stream_pos; /* first byte not yet written to file */ + uint8_t * buffer; // output buffer + unsigned pos; // current pos in buffer + unsigned stream_pos; // first byte not yet written to file uint32_t crc_; - unsigned rep0; /* rep[0-3] latest four distances */ - unsigned rep1; /* used for efficient coding of */ - unsigned rep2; /* repeated distances */ + unsigned rep0; // rep[0-3] latest four distances + unsigned rep1; // used for efficient coding of + unsigned rep2; // repeated distances unsigned rep3; State state; @@ -225,37 +224,42 @@ class LZ_mtester uint8_t peek_prev() const { - const int i = ( ( pos > 0 ) ? pos : buffer_size ) - 1; + const unsigned i = ( ( pos > 0 ) ? pos : dictionary_size ) - 1; return buffer[i]; } - uint8_t peek( const int distance ) const + uint8_t peek( const unsigned distance ) const { - int i = pos - distance - 1; - if( i < 0 ) i += buffer_size; + unsigned i = pos - distance - 1; + if( pos <= distance ) i += dictionary_size; return buffer[i]; } void put_byte( const uint8_t b ) { buffer[pos] = b; - if( ++pos >= buffer_size ) flush_data(); + if( ++pos >= dictionary_size ) flush_data(); } - void copy_block( const int distance, int len ) + void copy_block( const unsigned distance, unsigned len ) { - int i = pos - distance - 1; - if( i < 0 ) i += buffer_size; - if( len < buffer_size - std::max( pos, i ) && len <= std::abs( pos - i ) ) + unsigned i = pos - distance - 1; + bool fast; + if( pos <= distance ) + { i += dictionary_size; + fast = ( len <= dictionary_size - i && len <= i - pos ); } + else + fast = ( len < dictionary_size - pos && len <= pos - i ); + if( fast ) // no wrap, no overlap { - std::memcpy( buffer + pos, buffer + i, len ); // no wrap, no overlap + std::memcpy( buffer + pos, buffer + i, len ); pos += len; } else for( ; len > 0; --len ) { buffer[pos] = buffer[i]; - if( ++pos >= buffer_size ) flush_data(); - if( ++i >= buffer_size ) i = 0; + if( ++pos >= dictionary_size ) flush_data(); + if( ++i >= dictionary_size ) i = 0; } } @@ -268,8 +272,7 @@ public: partial_data_pos( 0 ), rdec( ibuf, ibuf_size ), dictionary_size( dict_size ), - buffer_size( std::max( 65536U, dictionary_size ) ), - buffer( new uint8_t[buffer_size] ), + buffer( new uint8_t[dictionary_size] ), pos( 0 ), stream_pos( 0 ), crc_( 0xFFFFFFFFU ), @@ -277,7 +280,7 @@ public: rep1( 0 ), rep2( 0 ), rep3( 0 ) - { buffer[buffer_size-1] = 0; } // prev_byte of first byte + { buffer[dictionary_size-1] = 0; } // prev_byte of first byte ~LZ_mtester() { delete[] buffer; } diff --git a/range_dec.cc b/range_dec.cc index d4a2b2c..c6ccb7a 100644 --- a/range_dec.cc +++ b/range_dec.cc @@ -48,16 +48,7 @@ int decompress_member( const int infd, const int outfd, rdec.read_data( header.data, File_header::size ); if( rdec.finished() ) // End Of File { pp( "File ends unexpectedly at member header." ); return 2; } - if( !header.verify_magic() ) - { pp( "Bad magic number (file not in lzip format)." ); return 2; } - if( !header.verify_version() ) - { - if( pp.verbosity() >= 0 ) - { pp(); - std::fprintf( stderr, "Version %d member format not supported.\n", - header.version() ); } - return 2; - } + if( !verify_header( header, pp ) ) return 2; const unsigned dictionary_size = header.dictionary_size(); if( dictionary_size < min_dictionary_size || dictionary_size > max_dictionary_size ) @@ -101,7 +92,12 @@ int list_file( const char * const input_filename, const Pretty_print & pp ) { const unsigned long long data_size = file_index.data_end(); const unsigned long long file_size = file_index.file_end(); + unsigned dictionary_size = 0; + for( long i = 0; i < file_index.members(); ++i ) + if( dictionary_size < file_index.dictionary_size( i ) ) + dictionary_size = file_index.dictionary_size( i ); pp( 0, stdout ); + show_header( dictionary_size, 1 ); if( data_size > 0 && file_size > 0 ) std::printf( "%6.3f:1, %6.3f bits/byte, %5.2f%% saved. ", (double)data_size / file_size, @@ -123,6 +119,10 @@ int list_file( const char * const input_filename, const Pretty_print & pp ) db.pos(), db.size(), mb.pos(), mb.size() ); } } + const long long trailing_size = file_index.file_size() - file_index.file_end(); + if( pp.verbosity() >= 1 && trailing_size > 0 ) + std::printf( " %lld bytes of trailing data at end of file.\n", + trailing_size ); } return 0; } diff --git a/repair.cc b/repair.cc index e83b843..e8588e3 100644 --- a/repair.cc +++ b/repair.cc @@ -41,10 +41,9 @@ bool gross_damage( const long long msize, const uint8_t * const mbuffer ) enum { maxlen = 6 }; // max number of consecutive identical bytes long i = File_header::size; const long end = msize - File_trailer::size - maxlen; - uint8_t byte; while( i < end ) { - byte = mbuffer[i]; + const uint8_t byte = mbuffer[i]; int len = 0; // does not count the first byte while( mbuffer[++i] == byte && ++len < maxlen ) {} if( len >= maxlen ) return true; diff --git a/split.cc b/split.cc index 2ffb359..fc92ea4 100644 --- a/split.cc +++ b/split.cc @@ -62,25 +62,6 @@ bool next_filename( std::string & output_filename, const int max_digits ) } -bool verify_header( const File_header & header, const Pretty_print & pp ) - { - if( !header.verify_magic() ) - { - pp( "Bad magic number (file not in lzip format)." ); - return false; - } - if( !header.verify_version() ) - { - if( pp.verbosity() >= 0 ) - { pp(); - std::fprintf( stderr, "Version %d member format not supported.\n", - header.version() ); } - return false; - } - return true; - } - - // Search forward from 'pos' for "LZIP" (Boyer-Moore algorithm) // Returns pos of found string or 'pos+size' if not found. // @@ -205,6 +186,25 @@ int do_split_file( const std::string & input_filename, uint8_t * & base_buffer, } // end namespace +bool verify_header( const File_header & header, const Pretty_print & pp ) + { + if( !header.verify_magic() ) + { + pp( "Bad magic number (file not in lzip format)." ); + return false; + } + if( !header.verify_version() ) + { + if( pp.verbosity() >= 0 ) + { pp(); + std::fprintf( stderr, "Version %d member format not supported.\n", + header.version() ); } + return false; + } + return true; + } + + int split_file( const std::string & input_filename, const std::string & default_output_filename, const int verbosity, const bool force ) diff --git a/testsuite/check.sh b/testsuite/check.sh index 8a9c81d..537f5f0 100755 --- a/testsuite/check.sh +++ b/testsuite/check.sh @@ -57,13 +57,10 @@ fail=0 printf "testing lziprecover-%s..." "$2" -printf " in: Bad magic number (file not in lzip format).\n" > msg -"${LZIP}" -t in 2> out -if [ $? = 2 ] && cmp out msg ; then printf . ; else printf - ; fail=1 ; fi -printf " (stdin): Bad magic number (file not in lzip format).\n" > msg -"${LZIP}" -t < in 2> out -if [ $? = 2 ] && cmp out msg ; then printf . ; else printf - ; fail=1 ; fi -rm -f out msg +"${LZIP}" -tq in +if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi +"${LZIP}" -tq < in +if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi "${LZIP}" -cdq in if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi "${LZIP}" -cdq < in @@ -110,6 +107,15 @@ cmp in2 copy2 || fail=1 printf . printf "garbage" >> copy2.lz || framework_failure +rm -f copy2 +"${LZIP}" -atq copy2.lz +if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi +"${LZIP}" -atq < copy2.lz +if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi +"${LZIP}" -adkq copy2.lz +if [ $? = 2 ] && [ ! -e copy2 ] ; then printf . ; else printf - ; fail=1 ; fi +"${LZIP}" -adkq -o copy2 < copy2.lz +if [ $? = 2 ] && [ ! -e copy2 ] ; then printf . ; else printf - ; fail=1 ; fi printf "to be overwritten" > copy2 || framework_failure "${LZIP}" -df copy2.lz || fail=1 cmp in2 copy2 || fail=1 @@ -125,6 +131,8 @@ if [ $? = 2 ] && cmp "${f5b1}" copy ; then printf . ; else printf - ; fail=1 ; f "${LZIPRECOVER}" -D0 -iq "${f5b1_lz}" > copy if [ $? = 2 ] && cmp "${f5b1}" copy ; then printf . ; else printf - ; fail=1 ; fi +printf "\ntesting --merge ..." + rm -f copy.lz "${LZIPRECOVER}" -m -o copy.lz "${fox5_lz}" "${f5b1_lz}" if [ $? = 0 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi @@ -144,8 +152,8 @@ for i in "${f5b1_lz}" "${f5b3_lz}" "${f5b4_lz}" "${f5b5_lz}" ; do cmp "${fox5_lz}" copy.lz || fail=1 "${LZIPRECOVER}" -mf -o copy.lz "${i}" "${f5b2_lz}" || fail=1 cmp "${fox5_lz}" copy.lz || fail=1 + printf . done -printf . for i in "${f5b3_lz}" "${f5b4_lz}" "${f5b5_lz}" ; do "${LZIPRECOVER}" -mf -o copy.lz "${f5b1_lz}" "${f5b2_lz}" "${i}" || fail=1 @@ -160,21 +168,25 @@ for i in "${f5b3_lz}" "${f5b4_lz}" "${f5b5_lz}" ; do cmp "${fox5_lz}" copy.lz || fail=1 "${LZIPRECOVER}" -mf -o copy.lz "${i}" "${f5b2_lz}" "${f5b1_lz}" || fail=1 cmp "${fox5_lz}" copy.lz || fail=1 + printf . done -printf . "${LZIPRECOVER}" -mf -o copy.lz "${f5b3_lz}" "${f5b4_lz}" "${f5b5_lz}" || fail=1 cmp "${fox5_lz}" copy.lz || fail=1 +printf . "${LZIPRECOVER}" -mf -o copy.lz "${f5b1_lz}" "${f5b3_lz}" "${f5b4_lz}" "${f5b5_lz}" || fail=1 cmp "${fox5_lz}" copy.lz || fail=1 +printf . "${LZIPRECOVER}" -mf -o copy.lz "${f5b2_lz}" "${f5b3_lz}" "${f5b4_lz}" "${f5b5_lz}" || fail=1 cmp "${fox5_lz}" copy.lz || fail=1 +printf . "${LZIPRECOVER}" -mf -o copy.lz "${f5b1_lz}" "${f5b2_lz}" "${f5b3_lz}" "${f5b4_lz}" "${f5b5_lz}" || fail=1 cmp "${fox5_lz}" copy.lz || fail=1 printf . "${LZIPRECOVER}" -mf -o copy.lz "${bad1_lz}" "${bad2_lz}" || fail=1 cmp "${in_lz}" copy.lz || fail=1 +printf . "${LZIPRECOVER}" -mf -o copy.lz "${bad2_lz}" "${bad1_lz}" || fail=1 cmp "${in_lz}" copy.lz || fail=1 printf . @@ -185,14 +197,19 @@ cat "${bad2_lz}" "${in_lz}" "${bad2_lz}" "${bad2_lz}" > bad22.lz || framework_fa cat "${in_lz}" "${in_lz}" "${in_lz}" "${in_lz}" > copy4.lz || framework_failure "${LZIPRECOVER}" -mf -o out4.lz bad11.lz bad12.lz bad22.lz || fail=1 cmp out4.lz copy4.lz || fail=1 +printf . "${LZIPRECOVER}" -mf -o out4.lz bad11.lz bad22.lz bad12.lz || fail=1 cmp out4.lz copy4.lz || fail=1 +printf . "${LZIPRECOVER}" -mf -o out4.lz bad12.lz bad11.lz bad22.lz || fail=1 cmp out4.lz copy4.lz || fail=1 +printf . "${LZIPRECOVER}" -mf -o out4.lz bad12.lz bad22.lz bad11.lz || fail=1 cmp out4.lz copy4.lz || fail=1 +printf . "${LZIPRECOVER}" -mf -o out4.lz bad22.lz bad11.lz bad12.lz || fail=1 cmp out4.lz copy4.lz || fail=1 +printf . "${LZIPRECOVER}" -mf -o out4.lz bad22.lz bad12.lz bad11.lz || fail=1 cmp out4.lz copy4.lz || fail=1 printf . @@ -204,19 +221,24 @@ for i in "${bad1_lz}" "${bad2_lz}" ; do "${LZIPRECOVER}" -mf -o copy.lz "${j}" "${i}" || fail=1 cmp "${in_lz}" copy.lz || fail=1 done + printf . done -printf . "${LZIPRECOVER}" -mf -o copy.lz "${bad3_lz}" "${bad4_lz}" "${bad5_lz}" || fail=1 cmp "${in_lz}" copy.lz || fail=1 +printf . "${LZIPRECOVER}" -mf -o copy.lz "${bad3_lz}" "${bad5_lz}" "${bad4_lz}" || fail=1 cmp "${in_lz}" copy.lz || fail=1 +printf . "${LZIPRECOVER}" -mf -o copy.lz "${bad4_lz}" "${bad3_lz}" "${bad5_lz}" || fail=1 cmp "${in_lz}" copy.lz || fail=1 +printf . "${LZIPRECOVER}" -mf -o copy.lz "${bad4_lz}" "${bad5_lz}" "${bad3_lz}" || fail=1 cmp "${in_lz}" copy.lz || fail=1 +printf . "${LZIPRECOVER}" -mf -o copy.lz "${bad5_lz}" "${bad3_lz}" "${bad4_lz}" || fail=1 cmp "${in_lz}" copy.lz || fail=1 +printf . "${LZIPRECOVER}" -mf -o copy.lz "${bad5_lz}" "${bad4_lz}" "${bad3_lz}" || fail=1 cmp "${in_lz}" copy.lz || fail=1 printf . @@ -227,18 +249,25 @@ cat "${bad5_lz}" "${bad3_lz}" "${bad4_lz}" "${in_lz}" > bad534.lz || framework_f cat "${in_lz}" "${in_lz}" "${in_lz}" "${in_lz}" > copy4.lz || framework_failure "${LZIPRECOVER}" -mf -o out4.lz bad345.lz bad453.lz bad534.lz || fail=1 cmp out4.lz copy4.lz || fail=1 +printf . "${LZIPRECOVER}" -mf -o out4.lz bad345.lz bad534.lz bad453.lz || fail=1 cmp out4.lz copy4.lz || fail=1 +printf . "${LZIPRECOVER}" -mf -o out4.lz bad453.lz bad345.lz bad534.lz || fail=1 cmp out4.lz copy4.lz || fail=1 +printf . "${LZIPRECOVER}" -mf -o out4.lz bad453.lz bad534.lz bad345.lz || fail=1 cmp out4.lz copy4.lz || fail=1 +printf . "${LZIPRECOVER}" -mf -o out4.lz bad534.lz bad345.lz bad453.lz || fail=1 cmp out4.lz copy4.lz || fail=1 +printf . "${LZIPRECOVER}" -mf -o out4.lz bad534.lz bad453.lz bad345.lz || fail=1 cmp out4.lz copy4.lz || fail=1 printf . +printf "\ntesting --repair ..." + rm -f copy.lz "${LZIPRECOVER}" -R -o copy.lz "${fox5_lz}" || fail=1 if [ $? = 0 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi @@ -250,6 +279,7 @@ if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi "${LZIPRECOVER}" -Rf -o copy.lz "${f5b1_lz}" || fail=1 cmp "${fox5_lz}" copy.lz || fail=1 +printf . "${LZIPRECOVER}" -Rf -o copy.lz "${bad1_lz}" || fail=1 cmp "${in_lz}" copy.lz || fail=1 printf . @@ -264,14 +294,17 @@ mv copy.lz copy.tlz || framework_failure "${LZIPRECOVER}" -R copy.tlz || fail=1 if [ $? = 0 ] && [ -e copy_fixed.tlz ] ; then printf . ; else printf - ; fail=1 ; fi +printf "\ntesting --split ..." + cat "${in_lz}" "${in_lz}" "${in_lz}" > copy || framework_failure printf "garbage" >> copy || fail=1 "${LZIPRECOVER}" -s -o copy.lz copy || fail=1 +printf . for i in 1 2 3 ; do "${LZIPRECOVER}" -cd rec${i}copy.lz > copy || fail=1 cmp in copy || fail=1 + printf . done -printf . echo if [ ${fail} = 0 ] ; then -- cgit v1.2.3