diff options
-rw-r--r-- | ChangeLog | 8 | ||||
-rw-r--r-- | INSTALL | 2 | ||||
-rw-r--r-- | Makefile.in | 9 | ||||
-rw-r--r-- | NEWS | 27 | ||||
-rw-r--r-- | README | 11 | ||||
-rwxr-xr-x | configure | 10 | ||||
-rw-r--r-- | decoder.cc | 16 | ||||
-rw-r--r-- | decoder.h | 8 | ||||
-rw-r--r-- | doc/lziprecover.1 | 2 | ||||
-rw-r--r-- | doc/lziprecover.info | 167 | ||||
-rw-r--r-- | doc/lziprecover.texinfo | 133 | ||||
-rw-r--r-- | file_index.cc | 136 | ||||
-rw-r--r-- | file_index.h | 35 | ||||
-rw-r--r-- | lzip.h | 12 | ||||
-rw-r--r-- | main.cc | 1 | ||||
-rw-r--r-- | merge.cc | 378 | ||||
-rw-r--r-- | range_dec.cc | 19 | ||||
-rw-r--r-- | repair.cc | 115 | ||||
-rw-r--r-- | split.cc | 5 | ||||
-rwxr-xr-x | testsuite/check.sh | 166 | ||||
-rw-r--r-- | testsuite/fox5.lz (renamed from testsuite/fox5_bad.lz) | bin | 400 -> 400 bytes | |||
-rw-r--r-- | testsuite/fox5_bad1.lz | bin | 0 -> 400 bytes | |||
-rw-r--r-- | testsuite/fox5_bad1.txt (renamed from testsuite/fox5_bad.txt) | 2 | ||||
-rw-r--r-- | testsuite/fox5_bad2.lz | bin | 0 -> 400 bytes | |||
-rw-r--r-- | testsuite/fox5_bad3.lz | bin | 0 -> 400 bytes | |||
-rw-r--r-- | testsuite/fox5_bad4.lz | bin | 0 -> 400 bytes | |||
-rw-r--r-- | testsuite/fox5_bad5.lz | bin | 0 -> 400 bytes | |||
-rw-r--r-- | testsuite/test.txt.lz | bin | 0 -> 11518 bytes | |||
-rw-r--r-- | testsuite/test_bad1.lz | bin | 11548 -> 11518 bytes | |||
-rw-r--r-- | testsuite/test_bad2.lz | bin | 11548 -> 11518 bytes | |||
-rw-r--r-- | testsuite/test_bad3.lz | bin | 11548 -> 11518 bytes | |||
-rw-r--r-- | testsuite/test_bad4.lz | bin | 11548 -> 11518 bytes | |||
-rw-r--r-- | testsuite/test_bad5.lz | bin | 11548 -> 11518 bytes | |||
-rw-r--r-- | testsuite/test_v0.lz | bin | 11540 -> 0 bytes | |||
-rw-r--r-- | testsuite/test_v1.lz | bin | 11548 -> 0 bytes | |||
-rw-r--r-- | testsuite/unzcrash.cc | 10 |
36 files changed, 785 insertions, 487 deletions
@@ -1,3 +1,11 @@ +2013-06-17 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 1.15-pre1 released. + * repair.cc: Repair multi-member files with up to one byte error + per member. + * merge.cc: Merge multi-member files. + * Added chapters 'Repairing Files' and 'Merging Files' to the manual. + 2013-05-31 Antonio Diaz Diaz <antonio@gnu.org> * Version 1.14 released. @@ -1,7 +1,7 @@ Requirements ------------ You will need a C++ compiler. -I use gcc 4.8.0 and 3.3.6, but the code should compile with any +I use gcc 4.8.1 and 3.3.6, but the code should compile with any standards compliant compiler. Gcc is available at http://gcc.gnu.org. diff --git a/Makefile.in b/Makefile.in index e769333..bb69807 100644 --- a/Makefile.in +++ b/Makefile.in @@ -42,7 +42,7 @@ file_index.o : lzip.h file_index.h main.o : arg_parser.h lzip.h decoder.h merge.o : lzip.h decoder.h file_index.h range_dec.o : lzip.h decoder.h file_index.h -repair.o : lzip.h +repair.o : lzip.h file_index.h split.o : lzip.h unzcrash.o : arg_parser.h Makefile @@ -115,12 +115,13 @@ dist : doc $(DISTNAME)/doc/$(pkgname).info \ $(DISTNAME)/doc/$(pkgname).texinfo \ $(DISTNAME)/testsuite/check.sh \ - $(DISTNAME)/testsuite/fox5_bad.lz \ - $(DISTNAME)/testsuite/fox5_bad.txt \ + $(DISTNAME)/testsuite/fox5.lz \ + $(DISTNAME)/testsuite/fox5_bad[1-5].lz \ + $(DISTNAME)/testsuite/fox5_bad1.txt \ $(DISTNAME)/testsuite/test.txt \ + $(DISTNAME)/testsuite/test.txt.lz \ $(DISTNAME)/testsuite/test921-1921.txt \ $(DISTNAME)/testsuite/test_bad[1-5].lz \ - $(DISTNAME)/testsuite/test_v[01].lz \ $(DISTNAME)/testsuite/unzcrash.cc \ $(DISTNAME)/*.h \ $(DISTNAME)/*.cc @@ -1,23 +1,10 @@ -Changes in version 1.14: +Changes in version 1.15: -The new option "-i, --ignore-errors", which in conjunction with "-D" -decompresses all the recoverable data in all members of a file without -having to split it first, has been added. +Lziprecover can now repair multi-member files with up to one byte error +per member, without having to split them first. -Option "-l, --list" now accepts more than one file. +Lziprecover can now merge multi-member files without having to split +them first even if some copies have the header and the trailer damaged. -Decompression time has been reduced by 12%. - -"--split" now uses as few digits as possible in the names of the files -produced, depending on the number of members in the input file. - -"--split" in verbose mode now shows the names of files being created. - -When decompressing or testing, file version is now shown only if -verbosity >= 4. - -"configure" now accepts options with a separate argument. - -The target "install-as-lzip" has been added to the Makefile. - -The target "install-bin" has been added to the Makefile. +The chapters "Repairing Files" and "Merging Files" have been added to +the manual. @@ -2,14 +2,15 @@ Description Lziprecover is a data recovery tool and decompressor for files in the lzip compressed data format (.lz) able to repair slightly damaged files, -recover badly damaged files from two or more copies, extract undamaged -members from multi-member files, decompress files and test integrity of -files. +recover badly damaged files from two or more copies, extract data from +damaged files, decompress files and test integrity of files. Lziprecover is able to recover or decompress files produced by any of the compressors in the lzip family; lzip, plzip, minilzip/lzlib, clzip -and pdlzip. This recovery capability contributes to make the lzip format -one of the best options for long-term data archiving. +and pdlzip. It makes lzip files resistant to bit-flip, one of the most +common forms of data corruption, and its recovery capabilities +contribute to make of the lzip format one of the best options for +long-term data archiving. Lziprecover is able to efficiently extract a range of bytes from a multi-member file, because it only decompresses the members containing @@ -6,7 +6,7 @@ # to copy, distribute and modify it. pkgname=lziprecover -pkgversion=1.14 +pkgversion=1.15-pre1 progname=lziprecover srctrigger=doc/lziprecover.texinfo @@ -100,7 +100,7 @@ while [ $# != 0 ] ; do *=* | *-*-*) ;; *) echo "configure: unrecognized option: '${option}'" 1>&2 - echo "Try 'configure --help' for more information." + echo "Try 'configure --help' for more information." 1>&2 exit 1 ;; esac @@ -125,10 +125,8 @@ if [ -z "${srcdir}" ] ; then fi if [ ! -r "${srcdir}/${srctrigger}" ] ; then - exec 1>&2 - echo - echo "configure: Can't find sources in ${srcdir} ${srcdirtext}" - echo "configure: (At least ${srctrigger} is missing)." + echo "configure: Can't find sources in ${srcdir} ${srcdirtext}" 1>&2 + echo "configure: (At least ${srctrigger} is missing)." 1>&2 exit 1 fi @@ -126,7 +126,7 @@ void LZ_decoder::flush_data() bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const { File_trailer trailer; - const int trailer_size = File_trailer::size( member_version ); + const int trailer_size = File_trailer::size; const unsigned long long member_size = rdec.member_position() + trailer_size; bool error = false; @@ -144,8 +144,6 @@ bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const while( size < trailer_size ) trailer.data[size++] = 0; } - if( member_version == 0 ) trailer.member_size( member_size ); - if( !rdec.code_is_zero() ) { error = true; @@ -220,7 +218,7 @@ int LZ_decoder::decode_member( const Pretty_print & pp ) while( !rdec.finished() ) { const int pos_state = data_position() & pos_state_mask; - if( rdec.decode_bit( bm_match[state()][pos_state] ) == 0 ) + if( rdec.decode_bit( bm_match[state()][pos_state] ) == 0 ) // 1st bit { const uint8_t prev_byte = get_prev_byte(); if( state.is_char() ) @@ -233,21 +231,21 @@ int LZ_decoder::decode_member( const Pretty_print & pp ) else { int len; - if( rdec.decode_bit( bm_rep[state()] ) == 1 ) + if( rdec.decode_bit( bm_rep[state()] ) == 1 ) // 2nd bit { - if( rdec.decode_bit( bm_rep0[state()] ) == 0 ) + if( rdec.decode_bit( bm_rep0[state()] ) == 0 ) // 3rd bit { - if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) + if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit { state.set_short_rep(); put_byte( get_byte( rep0 ) ); continue; } } else { unsigned distance; - if( rdec.decode_bit( bm_rep1[state()] ) == 0 ) + if( rdec.decode_bit( bm_rep1[state()] ) == 0 ) // 4th bit distance = rep1; else { - if( rdec.decode_bit( bm_rep2[state()] ) == 0 ) + if( rdec.decode_bit( bm_rep2[state()] ) == 0 ) // 5th bit distance = rep2; else { distance = rep3; rep3 = rep2; } @@ -176,11 +176,11 @@ public: match_byte <<= 1; const int match_bit = match_byte & 0x100; const int bit = decode_bit( bm1[match_bit+symbol] ); - symbol = ( symbol << 1 ) + bit; + symbol = ( symbol << 1 ) | bit; if( match_bit != bit << 8 ) { while( symbol < 0x100 ) - symbol = ( symbol << 1 ) + decode_bit( bm[symbol] ); + symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); break; } } @@ -213,7 +213,6 @@ class LZ_decoder int stream_pos; // first byte not yet written to file uint32_t crc_; const int outfd; // output file descriptor - const int member_version; unsigned long long stream_position() const { return partial_data_pos + stream_pos; } void flush_data(); @@ -273,8 +272,7 @@ public: pos( 0 ), stream_pos( 0 ), crc_( 0xFFFFFFFFU ), - outfd( ofd ), - member_version( header.version() ) + outfd( ofd ) { buffer[buffer_size-1] = 0; } // prev_byte of first_byte ~LZ_decoder() { delete[] buffer; } diff --git a/doc/lziprecover.1 b/doc/lziprecover.1 index f62094a..fce7640 100644 --- a/doc/lziprecover.1 +++ b/doc/lziprecover.1 @@ -1,5 +1,5 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.37.1. -.TH LZIPRECOVER "1" "May 2013" "Lziprecover 1.14" "User Commands" +.TH LZIPRECOVER "1" "June 2013" "Lziprecover 1.15-pre1" "User Commands" .SH NAME Lziprecover \- recovers data from damaged lzip files .SH SYNOPSIS diff --git a/doc/lziprecover.info b/doc/lziprecover.info index 83267d5..0d39838 100644 --- a/doc/lziprecover.info +++ b/doc/lziprecover.info @@ -12,16 +12,18 @@ File: lziprecover.info, Node: Top, Next: Introduction, Up: (dir) Lziprecover Manual ****************** -This manual is for Lziprecover (version 1.14, 31 May 2013). +This manual is for Lziprecover (version 1.15-pre1, 17 June 2013). * Menu: -* Introduction:: Purpose and features of lziprecover -* Invoking Lziprecover:: Command line interface -* File Format:: Detailed format of the compressed file -* Examples:: A small tutorial with examples -* Problems:: Reporting bugs -* Concept Index:: Index of concepts +* Introduction:: Purpose and features of lziprecover +* Invoking Lziprecover:: Command line interface +* Repairing Files:: Fixing bit-flip and similar errors +* Merging Files:: Fixing several damaged copies +* File Format:: Detailed format of the compressed file +* Examples:: A small tutorial with examples +* Problems:: Reporting bugs +* Concept Index:: Index of concepts Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. @@ -37,14 +39,15 @@ File: lziprecover.info, Node: Introduction, Next: Invoking Lziprecover, Prev: Lziprecover is a data recovery tool and decompressor for files in the lzip compressed data format (.lz) able to repair slightly damaged files, -recover badly damaged files from two or more copies, extract undamaged -members from multi-member files, decompress files and test integrity of -files. +recover badly damaged files from two or more copies, extract data from +damaged files, decompress files and test integrity of files. Lziprecover is able to recover or decompress files produced by any of the compressors in the lzip family; lzip, plzip, minilzip/lzlib, clzip -and pdlzip. This recovery capability contributes to make the lzip format -one of the best options for long-term data archiving. +and pdlzip. It makes lzip files resistant to bit-flip, one of the most +common forms of data corruption, and its recovery capabilities +contribute to make of the lzip format one of the best options for +long-term data archiving. Lziprecover is able to efficiently extract a range of bytes from a multi-member file, because it only decompresses the members containing @@ -61,19 +64,22 @@ damaged files themselves are never modified. When decompressing or testing file integrity, lziprecover behaves like lzip or lunzip. - If the files are too damaged for lziprecover to repair them, data -from damaged members can be partially recovered writing it to stdout as -shown in the following example (the resulting file may contain some -garbage data at the end): + If a file is too damaged for lziprecover to repair it, all the +recoverable data in all members of the file can be extracted with the +following command (the resulting file may contain errors and some +garbage data may be produced at the end of each member): - lziprecover -cd rec01file.lz > rec01file + lziprecover -D0 -i -o file -q file.lz If the cause of file corruption is damaged media, the combination GNU ddrescue + lziprecover is the best option for recovering data from multiple damaged copies. *Note ddrescue-example::, for an example. + Lziprecover is not a replacement for regular backups, but a last +line of defense for the case where the backups are also damaged. + -File: lziprecover.info, Node: Invoking Lziprecover, Next: File Format, Prev: Introduction, Up: Top +File: lziprecover.info, Node: Invoking Lziprecover, Next: Repairing Files, Prev: Introduction, Up: Top 2 Invoking Lziprecover ********************** @@ -141,18 +147,10 @@ The format for running lziprecover is: `-m' `--merge' Try to produce a correct file merging the good parts of two or more - damaged copies. The copies must be single-member files. The merge - will fail if the copies have too many damaged areas or if the same - byte is damaged in all copies. If successful, a repaired copy is - written to the file `FILE_fixed.lz'. The exit status is 0 if the - file could be repaired, 2 otherwise. - - To give you an idea of its possibilities, when merging two copies - each of them with one damaged area affecting 1 percent of the - copy, the probability of obtaining a correct file is about 98 - percent. With three such copies the probability rises to 99.97 - percent. For large files with small errors, the probability - approaches 100 percent even with only two copies. + damaged copies. If successful, a repaired copy is written to the + file `FILE_fixed.lz'. The exit status is 0 if a correct file could + be produced, 2 otherwise. See the chapter Merging Files (*note + Merging Files::) for a complete description of the merge mode. `-o FILE' `--output=FILE' @@ -168,10 +166,12 @@ The format for running lziprecover is: `-R' `--repair' - Try to repair a small error, affecting only one byte, in a - single-member FILE. If successful, a repaired copy is written to - the file `FILE_fixed.lz'. `FILE' is not modified at all. The exit - status is 0 if the file could be repaired, 2 otherwise. + Try to repair a file with small errors (up to one byte error per + member). If successful, a repaired copy is written to the file + `FILE_fixed.lz'. `FILE' is not modified at all. The exit status + is 0 if the file could be repaired, 2 otherwise. See the chapter + Repairing Files (*note Repairing Files::) for a complete + description of the repair mode. `-s' `--split' @@ -227,9 +227,52 @@ invalid input file, 3 for an internal consistency error (eg, bug) which caused lziprecover to panic. -File: lziprecover.info, Node: File Format, Next: Examples, Prev: Invoking Lziprecover, Up: Top +File: lziprecover.info, Node: Repairing Files, Next: Merging Files, Prev: Invoking Lziprecover, Up: Top + +3 Repairing Files +***************** + +Lziprecover is able to repair files with small errors (up to one byte +error per member). The error may be located anywhere in the file except +in the header (first 6 bytes of each member) or in the `Member size' +field of the trailer (last 8 bytes of each member). This makes lzip +files resistant to bit-flip, one of the most common forms of data +corruption. + + Bit-flip happens when one bit in the file is changed from 0 to 1 or +vice versa. It may be caused by bad RAM or even by natural radiation. I +have seen a case of bit-flip in a file stored in an USB flash drive. + + +File: lziprecover.info, Node: Merging Files, Next: File Format, Prev: Repairing Files, Up: Top + +4 Merging Files +*************** + +If you have several copies of a file but all of them are too damaged to +repair them (*note Repairing Files::), lziprecover can try to produce a +correct file merging the good parts of the damaged copies. + + The merge may succeed even if some copies of the file have all the +headers and trailers damaged, as long as there is at least one copy of +every header and trailer intact, even if they are in different copies of +the file. + + The merge will fail if the damaged areas overlap (at least one byte +is damaged in all copies), or are adjacent and the boundary can't be +determined, or if the copies have too many damaged areas. + + To give you an idea of its possibilities, when merging two copies +each of them with one damaged area affecting 1 percent of the copy, the +probability of obtaining a correct file is about 98 percent. With three +such copies the probability rises to 99.97 percent. For large files with +small errors, the probability approaches 100 percent even with only two +copies. + + +File: lziprecover.info, Node: File Format, Next: Examples, Prev: Merging Files, Up: Top -3 File Format +5 File Format ************* Perfection is reached, not when there is no longer anything to add, but @@ -302,7 +345,7 @@ additional information before, between, or after them. File: lziprecover.info, Node: Examples, Next: Problems, Prev: File Format, Up: Top -4 A small tutorial with examples +6 A small tutorial with examples ******************************** Example 1: Restore a regular file from its compressed version @@ -329,9 +372,8 @@ to decompressed byte 15000 (5000 bytes are produced). lziprecover -D 10000-15000 file.lz -Example 5: Repair a one-byte corruption in the single-member file -`file.lz'. (Indented lines are abridged error messages from -lziprecover). +Example 5: Repair small errors in the file `file.lz'. (Indented lines +are abridged diagnostic messages from lziprecover). lziprecover -v -R file.lz Copy of input file repaired successfully. @@ -365,31 +407,16 @@ error-checked merging of copies (*Note GNU ddrescue manual: Example 8: Recover the first volume of those created with the command `lzip -b 32MiB -S 650MB big_db' from two copies, `big_db1_00001.lz' and `big_db2_00001.lz', with member 07 damaged in the first copy, member 18 -damaged in the second copy, and member 12 damaged in both copies. Two -correct copies are produced and compared. - - lziprecover -s big_db1_00001.lz - lziprecover -s big_db2_00001.lz - lziprecover -t rec*big_db1_00001.lz - rec07big_db1_00001.lz: crc mismatch - rec12big_db1_00001.lz: crc mismatch - lziprecover -t rec*big_db2_00001.lz - rec12big_db2_00001.lz: crc mismatch - rec18big_db2_00001.lz: crc mismatch - lziprecover -m -v rec12big_db1_00001.lz rec12big_db2_00001.lz +damaged in the second copy, and member 12 damaged in both copies. The +correct file produced is saved in `big_db_00001.lz'. + + lziprecover -m -v -o big_db_00001.lz big_db1_00001.lz big_db2_00001.lz Input files merged successfully - cp rec07big_db2_00001.lz rec07big_db1_00001.lz - cp rec12big_db1_00001_fixed.lz rec12big_db1_00001.lz - cp rec12big_db1_00001_fixed.lz rec12big_db2_00001.lz - cp rec18big_db1_00001.lz rec18big_db2_00001.lz - cat rec*big_db1_00001.lz > big_db3_00001.lz - cat rec*big_db2_00001.lz > big_db4_00001.lz - zcmp big_db3_00001.lz big_db4_00001.lz File: lziprecover.info, Node: Problems, Next: Concept Index, Prev: Examples, Up: Top -5 Reporting Bugs +7 Reporting Bugs **************** There are probably bugs in lziprecover. There are certainly errors and @@ -415,19 +442,23 @@ Concept Index * file format: File Format. (line 6) * getting help: Problems. (line 6) * introduction: Introduction. (line 6) -* invoking lziprecover: Invoking Lziprecover. (line 6) +* invoking: Invoking Lziprecover. (line 6) +* merging files: Merging Files. (line 6) +* repairing files: Repairing Files. (line 6) Tag Table: Node: Top231 -Node: Introduction901 -Node: Invoking Lziprecover2685 -Node: File Format8447 -Node: Examples10938 -Ref: ddrescue-example12158 -Node: Problems13938 -Node: Concept Index14488 +Node: Introduction1032 +Node: Invoking Lziprecover3052 +Node: Repairing Files8489 +Node: Merging Files9208 +Node: File Format10338 +Node: Examples12822 +Ref: ddrescue-example14024 +Node: Problems15134 +Node: Concept Index15684 End Tag Table diff --git a/doc/lziprecover.texinfo b/doc/lziprecover.texinfo index 142cdae..54834e9 100644 --- a/doc/lziprecover.texinfo +++ b/doc/lziprecover.texinfo @@ -6,8 +6,8 @@ @finalout @c %**end of header -@set UPDATED 31 May 2013 -@set VERSION 1.14 +@set UPDATED 17 June 2013 +@set VERSION 1.15-pre1 @dircategory Data Compression @direntry @@ -35,12 +35,14 @@ This manual is for Lziprecover (version @value{VERSION}, @value{UPDATED}). @menu -* Introduction:: Purpose and features of lziprecover -* Invoking Lziprecover:: Command line interface -* File Format:: Detailed format of the compressed file -* Examples:: A small tutorial with examples -* Problems:: Reporting bugs -* Concept Index:: Index of concepts +* Introduction:: Purpose and features of lziprecover +* Invoking Lziprecover:: Command line interface +* Repairing Files:: Fixing bit-flip and similar errors +* Merging Files:: Fixing several damaged copies +* File Format:: Detailed format of the compressed file +* Examples:: A small tutorial with examples +* Problems:: Reporting bugs +* Concept Index:: Index of concepts @end menu @sp 1 @@ -56,14 +58,15 @@ to copy, distribute and modify it. Lziprecover is a data recovery tool and decompressor for files in the lzip compressed data format (.lz) able to repair slightly damaged files, -recover badly damaged files from two or more copies, extract undamaged -members from multi-member files, decompress files and test integrity of -files. +recover badly damaged files from two or more copies, extract data from +damaged files, decompress files and test integrity of files. Lziprecover is able to recover or decompress files produced by any of the compressors in the lzip family; lzip, plzip, minilzip/lzlib, clzip -and pdlzip. This recovery capability contributes to make the lzip format -one of the best options for long-term data archiving. +and pdlzip. It makes lzip files resistant to bit-flip, one of the most +common forms of data corruption, and its recovery capabilities +contribute to make of the lzip format one of the best options for +long-term data archiving. Lziprecover is able to efficiently extract a range of bytes from a multi-member file, because it only decompresses the members containing @@ -80,23 +83,26 @@ damaged files themselves are never modified. When decompressing or testing file integrity, lziprecover behaves like lzip or lunzip. -If the files are too damaged for lziprecover to repair them, data from -damaged members can be partially recovered writing it to stdout as shown -in the following example (the resulting file may contain some garbage -data at the end): +If a file is too damaged for lziprecover to repair it, all the +recoverable data in all members of the file can be extracted with the +following command (the resulting file may contain errors and some +garbage data may be produced at the end of each member): @example -lziprecover -cd rec01file.lz > rec01file +lziprecover -D0 -i -o file -q file.lz @end example If the cause of file corruption is damaged media, the combination @w{GNU ddrescue + lziprecover} is the best option for recovering data from multiple damaged copies. @xref{ddrescue-example}, for an example. +Lziprecover is not a replacement for regular backups, but a last line of +defense for the case where the backups are also damaged. + @node Invoking Lziprecover @chapter Invoking Lziprecover -@cindex invoking lziprecover +@cindex invoking The format for running lziprecover is: @@ -164,18 +170,10 @@ information about the members in the file. @item -m @itemx --merge Try to produce a correct file merging the good parts of two or more -damaged copies. The copies must be single-member files. The merge will -fail if the copies have too many damaged areas or if the same byte is -damaged in all copies. If successful, a repaired copy is written to the -file @samp{@var{file}_fixed.lz}. The exit status is 0 if the file could -be repaired, 2 otherwise. - -To give you an idea of its possibilities, when merging two copies each -of them with one damaged area affecting 1 percent of the copy, the -probability of obtaining a correct file is about 98 percent. With three -such copies the probability rises to 99.97 percent. For large files with -small errors, the probability approaches 100 percent even with only two -copies. +damaged copies. If successful, a repaired copy is written to the file +@samp{@var{file}_fixed.lz}. The exit status is 0 if a correct file could +be produced, 2 otherwise. See the chapter Merging Files (@pxref{Merging +Files}) for a complete description of the merge mode. @item -o @var{file} @itemx --output=@var{file} @@ -192,10 +190,12 @@ Quiet operation. Suppress all messages. @item -R @itemx --repair -Try to repair a small error, affecting only one byte, in a single-member -@var{file}. If successful, a repaired copy is written to the file +Try to repair a file with small errors (up to one byte error per member). +If successful, a repaired copy is written to the file @samp{@var{file}_fixed.lz}. @samp{@var{file}} is not modified at all. The exit status is 0 if the file could be repaired, 2 otherwise. +See the chapter Repairing Files (@pxref{Repairing Files}) for a complete +description of the repair mode. @item -s @itemx --split @@ -252,6 +252,47 @@ invalid input file, 3 for an internal consistency error (eg, bug) which caused lziprecover to panic. +@node Repairing Files +@chapter Repairing Files +@cindex repairing files + +Lziprecover is able to repair files with small errors (up to one byte +error per member). The error may be located anywhere in the file except +in the header (first 6 bytes of each member) or in the @samp{Member +size} field of the trailer (last 8 bytes of each member). This makes +lzip files resistant to bit-flip, one of the most common forms of data +corruption. + +Bit-flip happens when one bit in the file is changed from 0 to 1 or vice +versa. It may be caused by bad RAM or even by natural radiation. I have +seen a case of bit-flip in a file stored in an USB flash drive. + + +@node Merging Files +@chapter Merging Files +@cindex merging files + +If you have several copies of a file but all of them are too damaged to +repair them (@pxref{Repairing Files}), lziprecover can try to produce a +correct file merging the good parts of the damaged copies. + +The merge may succeed even if some copies of the file have all the +headers and trailers damaged, as long as there is at least one copy of +every header and trailer intact, even if they are in different copies of +the file. + +The merge will fail if the damaged areas overlap (at least one byte is +damaged in all copies), or are adjacent and the boundary can't be +determined, or if the copies have too many damaged areas. + +To give you an idea of its possibilities, when merging two copies each +of them with one damaged area affecting 1 percent of the copy, the +probability of obtaining a correct file is about 98 percent. With three +such copies the probability rises to 99.97 percent. For large files with +small errors, the probability approaches 100 percent even with only two +copies. + + @node File Format @chapter File Format @cindex file format @@ -368,9 +409,8 @@ lziprecover -D 10000-15000 file.lz @sp 1 @noindent -Example 5: Repair a one-byte corruption in the single-member file -@samp{file.lz}. (Indented lines are abridged error messages from -lziprecover). +Example 5: Repair small errors in the file @samp{file.lz}. (Indented +lines are abridged diagnostic messages from lziprecover). @example lziprecover -v -R file.lz @@ -422,27 +462,12 @@ Example 8: Recover the first volume of those created with the command @w{@code{lzip -b 32MiB -S 650MB big_db}} from two copies, @samp{big_db1_00001.lz} and @samp{big_db2_00001.lz}, with member 07 damaged in the first copy, member 18 damaged in the second copy, and -member 12 damaged in both copies. Two correct copies are produced and -compared. +member 12 damaged in both copies. The correct file produced is saved in +@samp{big_db_00001.lz}. @example -lziprecover -s big_db1_00001.lz -lziprecover -s big_db2_00001.lz -lziprecover -t rec*big_db1_00001.lz - rec07big_db1_00001.lz: crc mismatch - rec12big_db1_00001.lz: crc mismatch -lziprecover -t rec*big_db2_00001.lz - rec12big_db2_00001.lz: crc mismatch - rec18big_db2_00001.lz: crc mismatch -lziprecover -m -v rec12big_db1_00001.lz rec12big_db2_00001.lz +lziprecover -m -v -o big_db_00001.lz big_db1_00001.lz big_db2_00001.lz Input files merged successfully -cp rec07big_db2_00001.lz rec07big_db1_00001.lz -cp rec12big_db1_00001_fixed.lz rec12big_db1_00001.lz -cp rec12big_db1_00001_fixed.lz rec12big_db2_00001.lz -cp rec18big_db1_00001.lz rec18big_db2_00001.lz -cat rec*big_db1_00001.lz > big_db3_00001.lz -cat rec*big_db2_00001.lz > big_db4_00001.lz -zcmp big_db3_00001.lz big_db4_00001.lz @end example diff --git a/file_index.cc b/file_index.cc index 41bee41..997003a 100644 --- a/file_index.cc +++ b/file_index.cc @@ -52,21 +52,32 @@ const char * format_num( unsigned long long num, } -File_index::File_index( const int infd ) : retval_( 0 ) +Block Block::split( const long long pos ) + { + if( pos_ < pos && end() > pos ) + { + const Block b( pos_, pos - pos_ ); + pos_ = pos; size_ -= b.size_; + return b; + } + return Block( 0, 0 ); + } + + +File_index::File_index( const int infd ) + : + isize( lseek( infd, 0, SEEK_END ) ), retval_( 0 ) { - const long long isize = lseek( infd, 0, SEEK_END ); if( isize < 0 ) { error_ = "Input file is not seekable :"; error_ += std::strerror( errno ); retval_ = 1; return; } + if( isize < min_member_size ) + { error_ = "Input file is too short."; retval_ = 2; return; } if( isize > INT64_MAX ) { error_ = "Input file is too long (2^63 bytes or more)."; retval_ = 2; return; } - long long pos = isize; // always points to a header or EOF - File_header header; - File_trailer trailer; - if( isize < min_member_size ) - { error_ = "Input file is too short."; retval_ = 2; return; } + File_header header; if( seek_read( infd, header.data, File_header::size, 0 ) != File_header::size ) { error_ = "Error reading member header :"; error_ += std::strerror( errno ); retval_ = 1; return; } @@ -77,10 +88,12 @@ File_index::File_index( const int infd ) : retval_( 0 ) { error_ = "Version "; error_ += format_num( header.version() ); error_ += "member format not supported."; retval_ = 2; return; } + long long pos = isize; // always points to a header or to EOF while( pos >= min_member_size ) { - if( seek_read( infd, trailer.data, File_trailer::size(), - pos - File_trailer::size() ) != File_trailer::size() ) + File_trailer trailer; + if( seek_read( infd, trailer.data, File_trailer::size, + pos - File_trailer::size ) != File_trailer::size ) { error_ = "Error reading member trailer :"; error_ += std::strerror( errno ); retval_ = 1; break; } const long long member_size = trailer.member_size(); @@ -105,14 +118,113 @@ File_index::File_index( const int infd ) : retval_( 0 ) if( member_vector.size() == 0 && isize - pos > File_header::size && seek_read( infd, header.data, File_header::size, pos ) == File_header::size && header.verify_magic() && header.verify_version() ) - { // last trailer is corrupt - error_ = "Member size in trailer is corrupt at pos "; - error_ += format_num( isize - 8 ); retval_ = 2; break; + { + error_ = "Last member in input file is truncated or corrupt."; + retval_ = 2; break; + } + pos -= member_size; + member_vector.push_back( Member( 0, trailer.data_size(), + pos, member_size ) ); + } + if( pos != 0 || member_vector.size() == 0 ) + { + member_vector.clear(); + if( retval_ == 0 ) { error_ = "Can't create file index."; retval_ = 2; } + return; + } + std::reverse( member_vector.begin(), member_vector.end() ); + for( unsigned i = 0; i < member_vector.size() - 1; ++i ) + { + const long long end = member_vector[i].dblock.end(); + if( end < 0 || end > INT64_MAX ) + { + member_vector.clear(); + error_ = "Data in input file is too long (2^63 bytes or more)."; + retval_ = 2; return; + } + member_vector[i+1].dblock.pos( end ); + } + } + + +// All files in 'infd_vector' must be at least 'fsize' bytes long. +File_index::File_index( const std::vector< int > & infd_vector, + const long long fsize ) + : + isize( fsize ), retval_( 0 ) + { + if( isize < 0 ) + { error_ = "Input file is not seekable :"; + error_ += std::strerror( errno ); retval_ = 1; return; } + if( isize < min_member_size ) + { error_ = "Input file is too short."; retval_ = 2; return; } + if( isize > INT64_MAX ) + { error_ = "Input file is too long (2^63 bytes or more)."; + retval_ = 2; return; } + + const int files = infd_vector.size(); + File_header header; + bool done = false; + for( int i = 0; i < files && !done; ++i ) + { + const int infd = infd_vector[i]; + if( seek_read( infd, header.data, File_header::size, 0 ) != File_header::size ) + { error_ = "Error reading member header :"; + error_ += std::strerror( errno ); retval_ = 1; return; } + if( header.verify_magic() && header.verify_version() ) done = true; + } + if( !done ) + { error_ = "Bad magic number (file not in lzip format)."; + retval_ = 2; return; } + + long long pos = isize; // always points to a header or to EOF + while( pos >= min_member_size ) + { + long long member_size; + File_trailer trailer; + done = false; + for( int it = 0; it < files && !done; ++it ) + { + const int tfd = infd_vector[it]; + if( seek_read( tfd, trailer.data, File_trailer::size, + pos - File_trailer::size ) != File_trailer::size ) + { error_ = "Error reading member trailer :"; + error_ += std::strerror( errno ); retval_ = 1; goto error; } + member_size = trailer.member_size(); + if( member_size >= min_member_size && member_size <= pos ) + for( int ih = 0; ih < files && !done; ++ih ) + { + const int hfd = infd_vector[ih]; + if( seek_read( hfd, header.data, File_header::size, + pos - member_size ) != File_header::size ) + { error_ = "Error reading member header :"; + error_ += std::strerror( errno ); retval_ = 1; goto error; } + if( header.verify_magic() && header.verify_version() ) done = true; + } + } + if( !done ) + { + if( member_vector.size() == 0 ) // maybe trailing garbage + { --pos; continue; } + error_ = "Member size in trailer may be corrupt at pos "; + error_ += format_num( pos - 8 ); retval_ = 2; break; } + if( member_vector.size() == 0 && isize - pos > File_header::size ) + for( int i = 0; i < files; ++i ) + { + const int infd = infd_vector[i]; + if( seek_read( infd, header.data, File_header::size, pos ) == File_header::size && + header.verify_magic() && header.verify_version() ) + { + error_ = "Last member in input file is truncated or corrupt."; + retval_ = 2; goto error; + } + } pos -= member_size; member_vector.push_back( Member( 0, trailer.data_size(), pos, member_size ) ); } +error: if( pos != 0 || member_vector.size() == 0 ) { member_vector.clear(); diff --git a/file_index.h b/file_index.h index 2f055b1..92cf11c 100644 --- a/file_index.h +++ b/file_index.h @@ -25,7 +25,8 @@ class Block long long pos_, size_; // pos + size <= INT64_MAX public: - Block( const long long p, const long long s ) : pos_( p ), size_( s ) {} + Block( const long long p, const long long s ) + : pos_( p ), size_( s ) {} long long pos() const { return pos_; } long long size() const { return size_; } @@ -34,9 +35,17 @@ public: void pos( const long long p ) { pos_ = p; } void size( const long long s ) { size_ = s; } + bool operator==( const Block & b ) const + { return pos_ == b.pos_ && size_ == b.size_; } + bool operator!=( const Block & b ) const + { return pos_ != b.pos_ || size_ != b.size_; } + + bool operator<( const Block & b ) const { return pos_ < b.pos_; } + bool overlaps( const Block & b ) const { return ( pos_ < b.end() && b.pos_ < end() ); } void shift( Block & b ) { ++size_; ++b.pos_; --b.size_; } + Block split( const long long pos ); }; @@ -49,18 +58,35 @@ class File_index Member( const long long dp, const long long ds, const long long mp, const long long ms ) : dblock( dp, ds ), mblock( mp, ms ) {} + + bool operator==( const Member & m ) const { return ( mblock == m.mblock ); } + bool operator!=( const Member & m ) const { return ( mblock != m.mblock ); } }; std::vector< Member > member_vector; std::string error_; + long long isize; int retval_; public: - File_index( const int infd ); + File_index() : error_( "No index." ), isize( 0 ), retval_( 2 ) {} + explicit File_index( const int infd ); + File_index( const std::vector< int > & infd_vector, const long long fsize ); + int members() const { return member_vector.size(); } const std::string & error() const { return error_; } int retval() const { return retval_; } + bool operator==( const File_index & fi ) const + { + if( retval_ || fi.retval_ || isize != fi.isize || + member_vector.size() != fi.member_vector.size() ) return false; + for( unsigned i = 0; i < member_vector.size(); ++i ) + if( member_vector[i] != fi.member_vector[i] ) return false; + return true; + } + bool operator!=( const File_index & fi ) const { return !( *this == fi ); } + long long data_end() const { if( member_vector.size() ) return member_vector.back().dblock.end(); else return 0; } @@ -69,11 +95,14 @@ public: { if( member_vector.size() ) return member_vector.back().mblock.end(); else return 0; } + // total size including trailing garbage (if any) + long long file_size() const + { if( isize >= 0 ) return isize; else return 0; } + const Block & dblock( const int i ) const { return member_vector[i].dblock; } const Block & mblock( const int i ) const { return member_vector[i].mblock; } - int members() const { return (int)member_vector.size(); } }; @@ -195,7 +195,7 @@ struct File_header { return ( std::memcmp( data, magic_string, 4 ) == 0 ); } uint8_t version() const { return data[4]; } - bool verify_version() const { return ( data[4] <= 1 ); } + bool verify_version() const { return ( data[4] == 1 ); } unsigned dictionary_size() const { @@ -231,8 +231,7 @@ struct File_trailer // 4-11 size of the uncompressed data // 12-19 member size including header and trailer - static int size( const int version = 1 ) - { return ( ( version >= 1 ) ? 20 : 12 ); } + enum { size = 20 }; unsigned data_crc() const { @@ -301,16 +300,15 @@ void cleanup_and_fail( const std::string & output_filename, const int outfd, const int retval ); bool copy_file( const int infd, const int outfd, const long long max_size = -1 ); -bool try_decompress( const int fd, const unsigned long long file_size, - long long * failure_posp = 0 ); +bool try_decompress_member( const int fd, const unsigned long long msize, + long long * failure_posp = 0 ); bool verify_header( const File_header & header, const int verbosity ); -bool verify_single_member( const int fd, const long long file_size, - const int verbosity ); int merge_files( const std::vector< std::string > & filenames, const std::string & output_filename, const int verbosity, const bool force ); // defined in range_dec.cc +bool safe_seek( const int fd, const long long pos ); int list_files( const std::vector< std::string > & filenames, const int verbosity ); int range_decompress( const std::string & input_filename, @@ -113,7 +113,6 @@ void show_help() " -R, --repair try to repair a small error in file\n" " -s, --split split multi-member file in single-member files\n" " -t, --test test compressed file integrity\n" -// " -u, --update convert file from version 0 to version 1\n" " -v, --verbose be verbose (a 2nd -v gives more)\n" "Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n" "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n" @@ -35,70 +35,107 @@ namespace { -bool copy_and_diff_file( const std::vector< int > & infd_vector, - const int outfd, std::vector< Block > & block_vector ) +// Add 'bv' to 'block_vector' splitting blocks as needed to keep all the +// edges (pos and end of every block). +// 'block_vector' contains the result. 'bv' is destroyed. +void combine( std::vector< Block > & block_vector, std::vector< Block > & bv ) { + if( block_vector.empty() ) { block_vector.swap( bv ); return; } + unsigned i1 = 0, i2 = 0; + while( i1 < block_vector.size() && i2 < bv.size() ) + { + Block & b1 = block_vector[i1]; + Block & b2 = bv[i2]; + if( b1.overlaps( b2 ) ) + { + if( b1 < b2 ) + { + Block b = b1.split( b2.pos() ); + block_vector.insert( block_vector.begin() + i1, b ); ++i1; + } + else if( b2 < b1 ) + { + Block b( b2.pos(), b1.pos() - b2.pos() ); + b2.split( b1.pos() ); + block_vector.insert( block_vector.begin() + i1, b ); ++i1; + } + else if( b1.end() < b2.end() ) { b2.split( b1.end() ); ++i1; } + else if( b2.end() < b1.end() ) + { + Block b = b1.split( b2.end() ); + block_vector.insert( block_vector.begin() + i1, b ); ++i1; ++i2; + } + else { ++i1; ++i2; } // blocks are identical + } + else if( b1 < b2 ) ++i1; + else { block_vector.insert( block_vector.begin() + i1, b2 ); ++i1; ++i2; } + } + if( i2 < bv.size() ) // tail copy + block_vector.insert( block_vector.end(), bv.begin() + i2, bv.end() ); + } + + +bool diff_member( const long long mpos, const long long msize, + const std::vector< int > & infd_vector, + std::vector< Block > & block_vector ) + { + const int files = infd_vector.size(); const int buffer_size = 65536; - std::vector< uint8_t * > buffer_vector( infd_vector.size() ); - for( unsigned i = 0; i < infd_vector.size(); ++i ) - buffer_vector[i] = new uint8_t[buffer_size]; - Block b( 0, 0 ); - long long partial_pos = 0; - int equal_bytes = 0; - bool error = false; + uint8_t * const buffer1 = new uint8_t[buffer_size]; + uint8_t * const buffer2 = new uint8_t[buffer_size]; - while( true ) + bool error = false; + for( int i1 = 0; i1 + 1 < files && !error; ++i1 ) { - const int rd = readblock( infd_vector[0], buffer_vector[0], buffer_size ); - if( rd != buffer_size && errno ) - { show_error( "Error reading input file", errno ); error = true; break; } - if( rd > 0 ) + for( int i2 = i1 + 1; i2 < files && !error; ++i2 ) { - for( unsigned i = 1; i < infd_vector.size(); ++i ) - if( readblock( infd_vector[i], buffer_vector[i], rd ) != rd ) - { show_error( "Error reading input file", errno ); - error = true; break; } - if( error ) break; - const int wr = writeblock( outfd, buffer_vector[0], rd ); - if( wr != rd ) - { show_error( "Error writing output file", errno ); - error = true; break; } - for( int i = 0; i < rd; ++i ) + std::vector< Block > bv; + long long partial_pos = 0; + const int fd1 = infd_vector[i1], fd2 = infd_vector[i2]; + int begin = -1; // begin of block. -1 means no block + bool prev_equal = true; + if( !safe_seek( fd1, mpos ) || !safe_seek( fd2, mpos ) ) + { error = true; break; } + + while( msize > partial_pos ) { - while( i < rd && b.pos() == 0 ) - { - for( unsigned j = 1; j < infd_vector.size(); ++j ) - if( buffer_vector[0][i] != buffer_vector[j][i] ) - { b.pos( partial_pos + i ); break; } // begin block - ++i; - } - while( i < rd && b.pos() > 0 ) + const int size = std::min( (long long)buffer_size, msize - partial_pos ); + const int rd = readblock( fd1, buffer1, size ); + if( rd != size && errno ) + { show_error( "Error reading input file", errno ); error = true; break; } + if( rd > 0 ) { - ++equal_bytes; - for( unsigned j = 1; j < infd_vector.size(); ++j ) - if( buffer_vector[0][i] != buffer_vector[j][i] ) - { equal_bytes = 0; break; } - if( equal_bytes >= 2 ) // end block + if( readblock( fd2, buffer2, rd ) != rd ) + { show_error( "Error reading input file", errno ); + error = true; break; } + for( int i = 0; i < rd; ++i ) { - b.size( partial_pos + i - ( equal_bytes - 1 ) - b.pos() ); - block_vector.push_back( b ); - b.pos( 0 ); - equal_bytes = 0; + if( buffer1[i] != buffer2[i] ) + { + prev_equal = false; + if( begin < 0 ) begin = partial_pos + i; // begin block + } + else if( !prev_equal ) prev_equal = true; + else if( begin >= 0 ) // end block + { + Block b( mpos + begin, partial_pos + i - 1 - begin ); + begin = -1; + bv.push_back( b ); + } } - ++i; + partial_pos += rd; } + if( rd < buffer_size ) break; // EOF } - partial_pos += rd; + if( begin >= 0 ) // finish last block + { + Block b( mpos + begin, partial_pos - prev_equal - begin ); + bv.push_back( b ); + } + combine( block_vector, bv ); } - if( rd < buffer_size ) break; // EOF - } - if( b.pos() > 0 ) // finish last block - { - b.size( partial_pos - b.pos() ); - block_vector.push_back( b ); } - for( unsigned i = 0; i < infd_vector.size(); ++i ) - delete[] buffer_vector[i]; + delete[] buffer2; delete[] buffer1; return !error; } @@ -116,15 +153,16 @@ int ipow( const unsigned base, const unsigned exponent ) int open_input_files( const std::vector< std::string > & filenames, - std::vector< int > & infd_vector, long long & isize, - const int verbosity ) + std::vector< int > & infd_vector, + File_index & file_index, const int verbosity ) { + const int files = filenames.size(); bool identical = false; - for( unsigned i = 1; i < filenames.size(); ++i ) + for( int i = 1; i < files; ++i ) if( filenames[0] == filenames[i] ) { identical = true; break; } if( !identical ) - for( unsigned i = 0; i < filenames.size(); ++i ) + for( int i = 0; i < files; ++i ) { struct stat in_stats; ino_t st_ino0 = 0; @@ -137,15 +175,27 @@ int open_input_files( const std::vector< std::string > & filenames, } if( identical ) { show_error( "Two input files are the same." ); return 2; } - isize = 0; - for( unsigned i = 0; i < filenames.size(); ++i ) + long long isize = 0; + for( int i = 0; i < files; ++i ) { - const long long tmp = lseek( infd_vector[i], 0, SEEK_END ); - if( tmp < 0 ) + long long tmp; + const File_index fi( infd_vector[i] ); + if( fi.retval() == 0 ) // file format is intact + { + if( file_index.retval() != 0 ) file_index = fi; + else if( file_index != fi ) + { show_error( "Input files are different." ); return 2; } + tmp = file_index.file_size(); + } + else // file format is damaged { - if( verbosity >= 0 ) - std::fprintf( stderr, "File '%s' is not seekable.\n", filenames[i].c_str() ); - return 1; + tmp = lseek( infd_vector[i], 0, SEEK_END ); + if( tmp < 0 ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "File '%s' is not seekable.\n", filenames[i].c_str() ); + return 1; + } } if( i == 0 ) { @@ -157,23 +207,33 @@ int open_input_files( const std::vector< std::string > & filenames, { show_error( "Sizes of input files are different." ); return 2; } } - for( unsigned i = 0; i < filenames.size(); ++i ) - if( !verify_single_member( infd_vector[i], isize, verbosity ) ) - return 2; + if( file_index.retval() != 0 ) + { + const File_index fi( infd_vector, isize ); + if( fi.retval() == 0 ) // file format could be recovered + file_index = fi; + else + { show_error( "Format damaged in all input files." ); return 2; } + } - for( unsigned i = 0; i < filenames.size(); ++i ) + for( int i = 0; i < files; ++i ) { - if( lseek( infd_vector[i], 0, SEEK_SET ) < 0 ) - { show_error( "Seek error in input file", errno ); return 1; } - if( try_decompress( infd_vector[i], isize ) ) + const int infd = infd_vector[i]; + bool error = false; + for( int j = 0; j < file_index.members(); ++j ) + { + const long long mpos = file_index.mblock( j ).pos(); + const long long msize = file_index.mblock( j ).size(); + if( !safe_seek( infd, mpos ) ) return 1; + if( !try_decompress_member( infd, msize ) ) { error = true; break; } + } + if( !error ) { if( verbosity >= 1 ) std::printf( "File '%s' has no errors. Recovery is not needed.\n", filenames[i].c_str() ); return 0; } - if( lseek( infd_vector[i], 0, SEEK_SET ) < 0 ) - { show_error( "Seek error in input file", errno ); return 1; } } return -1; } @@ -221,16 +281,15 @@ bool copy_file( const int infd, const int outfd, const long long max_size ) } -bool try_decompress( const int fd, const unsigned long long file_size, - long long * failure_posp ) +bool try_decompress_member( const int fd, const unsigned long long msize, + long long * failure_posp ) { try { Range_decoder rdec( fd ); File_header header; rdec.read_data( header.data, File_header::size ); if( !rdec.finished() && // End Of File - header.verify_magic() && - header.version() == 1 && + header.verify_magic() && header.verify_version() && header.dictionary_size() >= min_dictionary_size && header.dictionary_size() <= max_dictionary_size ) { @@ -238,7 +297,7 @@ bool try_decompress( const int fd, const unsigned long long file_size, Pretty_print dummy( "", -1 ); if( decoder.decode_member( dummy ) == 0 && - rdec.member_position() == file_size ) return true; + rdec.member_position() == msize ) return true; if( failure_posp ) *failure_posp = rdec.member_position(); } } @@ -259,12 +318,7 @@ bool verify_header( const File_header & header, const int verbosity ) show_error( "Bad magic number (file not in lzip format)." ); return false; } - if( header.version() == 0 ) - { - show_error( "Version 0 member format can't be recovered." ); - return false; - } - if( header.version() != 1 ) + if( !header.verify_version() ) { if( verbosity >= 0 ) std::fprintf( stderr, "Version %d member format not supported.\n", @@ -275,116 +329,106 @@ bool verify_header( const File_header & header, const int verbosity ) } -bool verify_single_member( const int fd, const long long file_size, - const int verbosity ) - { - File_header header; - if( lseek( fd, 0, SEEK_SET ) < 0 || - readblock( fd, header.data, File_header::size ) != File_header::size ) - { show_error( "Error reading member header", errno ); return false; } - if( !verify_header( header, verbosity ) ) return false; - - File_trailer trailer; - if( lseek( fd, -File_trailer::size(), SEEK_END ) < 0 || - readblock( fd, trailer.data, File_trailer::size() ) != File_trailer::size() ) - { show_error( "Error reading member trailer", errno ); return false; } - const long long member_size = trailer.member_size(); - if( member_size != file_size ) - { - if( member_size < file_size && - lseek( fd, -member_size, SEEK_END ) > 0 && - readblock( fd, header.data, File_header::size ) == File_header::size && - verify_header( header, verbosity ) ) - show_error( "Input file has more than 1 member. Split it first." ); - else - show_error( "Member size in input file trailer is corrupt." ); - return false; - } - return true; - } - - int merge_files( const std::vector< std::string > & filenames, const std::string & output_filename, const int verbosity, const bool force ) { - std::vector< int > infd_vector( filenames.size() ); - long long isize = 0; - const int retval = open_input_files( filenames, infd_vector, isize, verbosity ); + const int files = filenames.size(); + std::vector< int > infd_vector( files ); + File_index file_index; + const int retval = + open_input_files( filenames, infd_vector, file_index, verbosity ); if( retval >= 0 ) return retval; + if( !safe_seek( infd_vector[0], 0 ) ) return 1; const int outfd = open_outstream_rw( output_filename, force ); if( outfd < 0 ) return 1; - - // vector of data blocks differing among the copies of the input file. - std::vector< Block > block_vector; - if( !copy_and_diff_file( infd_vector, outfd, block_vector ) ) + if( !copy_file( infd_vector[0], outfd ) ) // copy whole file cleanup_and_fail( output_filename, outfd, 1 ); - if( block_vector.size() == 0 ) - { show_error( "Input files are identical. Recovery is not possible." ); - cleanup_and_fail( output_filename, outfd, 2 ); } - - const bool single_block = ( block_vector.size() == 1 ); - if( single_block && block_vector[0].size() < 2 ) - { show_error( "Input files have the same byte damaged." - " Try repairing one of them." ); - cleanup_and_fail( output_filename, outfd, 2 ); } + for( int j = 0; j < file_index.members(); ++j ) + { + const long long mpos = file_index.mblock( j ).pos(); + const long long msize = file_index.mblock( j ).size(); + // vector of data blocks differing among the copies of the current member + std::vector< Block > block_vector; + if( !diff_member( mpos, msize, infd_vector, block_vector ) || + !safe_seek( outfd, mpos ) ) + cleanup_and_fail( output_filename, outfd, 1 ); + + if( block_vector.size() == 0 ) + { + if( file_index.members() > 1 && try_decompress_member( outfd, msize ) ) + continue; + show_error( "Input files are (partially) identical. Recovery is not possible." ); + cleanup_and_fail( output_filename, outfd, 2 ); + } - if( ipow( filenames.size(), block_vector.size() ) >= INT_MAX || - ( single_block && - ipow( filenames.size(), 2 ) >= INT_MAX / block_vector[0].size() ) ) - { show_error( "Input files are too damaged. Recovery is not possible." ); - cleanup_and_fail( output_filename, outfd, 2 ); } + const int size0 = block_vector[0].size(); + const bool single_block = ( block_vector.size() == 1 ); + if( ipow( files, block_vector.size() ) >= INT_MAX || + ( single_block && ipow( files, 2 ) >= INT_MAX / size0 ) ) + { show_error( "Input files are too damaged. Recovery is not possible." ); + cleanup_and_fail( output_filename, outfd, 2 ); } - const int shifts = ( single_block ? block_vector[0].size() - 1 : 1 ); - if( single_block ) - { - Block b( block_vector[0].pos() + 1, block_vector[0].size() - 1 ); - block_vector[0].size( 1 ); - block_vector.push_back( b ); - } + const int shifts = ( single_block && size0 > 1 ) ? size0 - 1 : 1; + if( single_block && size0 > 1 ) + { + Block b( block_vector[0].pos() + 1, size0 - 1 ); + block_vector[0].size( 1 ); + block_vector.push_back( b ); + } - const int base_variations = ipow( filenames.size(), block_vector.size() ); - const int variations = ( base_variations * shifts ) - 2; - bool done = false; - for( int var = 1; var <= variations; ++var ) - { - if( verbosity >= 1 ) + if( verbosity >= 1 && file_index.members() > 1 ) { - std::printf( "Trying variation %d of %d \r", var, variations ); + std::printf( "Merging member %d\n", j + 1 ); std::fflush( stdout ); } - int tmp = var; - for( unsigned i = 0; i < block_vector.size(); ++i ) + const int base_variations = ipow( files, block_vector.size() ); + const int variations = base_variations * shifts; + bool done = false; + for( int var = 0; var < variations; ++var ) { - const int infd = infd_vector[tmp % filenames.size()]; - tmp /= filenames.size(); - if( lseek( infd, block_vector[i].pos(), SEEK_SET ) < 0 || - lseek( outfd, block_vector[i].pos(), SEEK_SET ) < 0 || - !copy_file( infd, outfd, block_vector[i].size() ) ) - { show_error( "Error reading output file", errno ); - cleanup_and_fail( output_filename, outfd, 1 ); } + if( verbosity >= 1 ) + { + std::printf( "Trying variation %d of %d \r", var + 1, variations ); + std::fflush( stdout ); + } + int tmp = var; + for( unsigned i = 0; i < block_vector.size(); ++i ) + { + const int infd = infd_vector[tmp % files]; + tmp /= files; + if( lseek( infd, block_vector[i].pos(), SEEK_SET ) < 0 || + lseek( outfd, block_vector[i].pos(), SEEK_SET ) < 0 || + !copy_file( infd, outfd, block_vector[i].size() ) ) + { show_error( "Error reading output file", errno ); + cleanup_and_fail( output_filename, outfd, 1 ); } + } + if( !safe_seek( outfd, mpos ) ) + cleanup_and_fail( output_filename, outfd, 1 ); + if( try_decompress_member( outfd, msize ) ) + { done = true; break; } + if( var > 0 && var % base_variations == 0 ) + block_vector[0].shift( block_vector[1] ); + } + if( verbosity >= 1 ) std::printf( "\n" ); + if( !done ) + { + if( verbosity >= 2 ) + for( unsigned i = 0; i < block_vector.size(); ++i ) + std::fprintf( stderr, "area %2d from offset %6lld to %6lld\n", i + 1, + block_vector[i].pos(), block_vector[i].end() - 1 ); + show_error( "Some error areas overlap. Can't recover input file." ); + cleanup_and_fail( output_filename, outfd, 2 ); } - if( lseek( outfd, 0, SEEK_SET ) < 0 ) - { show_error( "Seek error in output file", errno ); - cleanup_and_fail( output_filename, outfd, 1 ); } - if( try_decompress( outfd, isize ) ) - { done = true; break; } - if( var % base_variations == 0 ) block_vector[0].shift( block_vector[1] ); } - if( verbosity >= 1 ) std::printf( "\n" ); if( close( outfd ) != 0 ) { show_error( "Error closing output file", errno ); cleanup_and_fail( output_filename, -1, 1 ); } - if( !done ) - { - show_error( "Some error areas overlap. Can't recover input file." ); - cleanup_and_fail( output_filename, -1, 2 ); - } if( verbosity >= 1 ) std::printf( "Input files merged successfully.\n" ); return 0; diff --git a/range_dec.cc b/range_dec.cc index 59be01f..27ceba3 100644 --- a/range_dec.cc +++ b/range_dec.cc @@ -101,13 +101,6 @@ void parse_range( const char * const ptr, Block & range ) } -bool safe_seek( const int fd, const long long pos ) - { - if( lseek( fd, pos, SEEK_SET ) == pos ) return true; - show_error( "Seek error", errno ); return false; - } - - int decompress_member( const int infd, const int outfd, const Pretty_print & pp, const unsigned long long mpos, @@ -170,7 +163,7 @@ int list_file( const std::string & input_filename, const Pretty_print & pp ) const int infd = open_instream( input_filename, &in_stats, true, true ); if( infd < 0 ) return 1; - File_index file_index( infd ); + const File_index file_index( infd ); close( infd ); if( file_index.retval() != 0 ) { show_error( file_index.error().c_str() ); return file_index.retval(); } @@ -208,6 +201,13 @@ int list_file( const std::string & input_filename, const Pretty_print & pp ) } // end namespace +bool safe_seek( const int fd, const long long pos ) + { + if( lseek( fd, pos, SEEK_SET ) == pos ) return true; + show_error( "Seek error", errno ); return false; + } + + int list_files( const std::vector< std::string > & filenames, const int verbosity ) { @@ -234,7 +234,7 @@ int range_decompress( const std::string & input_filename, const int infd = open_instream( input_filename, &in_stats, true, true ); if( infd < 0 ) return 1; - File_index file_index( infd ); + const File_index file_index( infd ); if( file_index.retval() != 0 ) { show_error( file_index.error().c_str() ); return file_index.retval(); } @@ -259,6 +259,7 @@ int range_decompress( const std::string & input_filename, else { outfd = open_outstream_rw( output_filename, force ); if( outfd < 0 ) return 1; } + Pretty_print pp( input_filename, verbosity ); int retval = 0; for( int i = 0; i < file_index.members(); ++i ) @@ -28,6 +28,7 @@ #include <sys/stat.h> #include "lzip.h" +#include "file_index.h" int seek_read( const int fd, uint8_t * const buf, const int size, @@ -55,72 +56,84 @@ int repair_file( const std::string & input_filename, struct stat in_stats; const int infd = open_instream( input_filename, &in_stats, true, true ); if( infd < 0 ) return 1; - const long long isize = lseek( infd, 0, SEEK_END ); - if( isize < 0 ) - { show_error( "Input file is not seekable", errno ); return 1; } - if( isize < min_member_size ) - { show_error( "Input file is too short." ); return 2; } - if( !verify_single_member( infd, isize, verbosity ) ) return 2; - if( lseek( infd, 0, SEEK_SET ) < 0 ) - { show_error( "Seek error in input file", errno ); return 1; } - - long long failure_pos = 0; - if( try_decompress( infd, isize, &failure_pos ) ) - { - if( verbosity >= 1 ) - std::printf( "Input file has no errors. Recovery is not needed.\n" ); - return 0; - } - if( failure_pos >= isize - 8 ) failure_pos = isize - 8 - 1; - if( failure_pos < File_header::size ) - { show_error( "Can't repair error in input file." ); return 2; } - - if( lseek( infd, 0, SEEK_SET ) < 0 ) - { show_error( "Seek error in input file", errno ); return 1; } - - const int outfd = open_outstream_rw( output_filename, force ); - if( outfd < 0 ) { close( infd ); return 1; } - if( !copy_file( infd, outfd ) ) - cleanup_and_fail( output_filename, outfd, 1 ); - - const long long min_pos = - std::max( (long long)File_header::size, failure_pos - 1000 ); - bool done = false; - for( long long pos = failure_pos; pos >= min_pos && !done ; --pos ) + + const File_index file_index( infd ); + if( file_index.retval() != 0 ) + { show_error( file_index.error().c_str() ); return file_index.retval(); } + + int outfd = -1; + for( int i = 0; i < file_index.members(); ++i ) { + const long long mpos = file_index.mblock( i ).pos(); + const long long msize = file_index.mblock( i ).size(); + if( !safe_seek( infd, mpos ) ) + cleanup_and_fail( output_filename, outfd, 1 ); + long long failure_pos = 0; + if( try_decompress_member( infd, msize, &failure_pos ) ) continue; + if( failure_pos >= msize - 8 ) failure_pos = msize - 8 - 1; + if( failure_pos < File_header::size ) + { show_error( "Can't repair error in input file." ); + cleanup_and_fail( output_filename, outfd, 2 ); } + + if( outfd < 0 ) // first damaged member found + { + if( !safe_seek( infd, 0 ) ) return 1; + outfd = open_outstream_rw( output_filename, force ); + if( outfd < 0 ) { close( infd ); return 1; } + if( !copy_file( infd, outfd ) ) // copy whole file + cleanup_and_fail( output_filename, outfd, 1 ); + } + if( verbosity >= 1 ) { - std::printf( "Trying position %llu \r", pos ); + std::printf( "Repairing member %d\n", i + 1 ); std::fflush( stdout ); } - uint8_t byte; - if( seek_read( outfd, &byte, 1, pos ) != 1 ) - { show_error( "Error reading output file", errno ); - cleanup_and_fail( output_filename, outfd, 1 ); } - for( int i = 0; i < 256; ++i ) + const long long min_pos = + std::max( (long long)File_header::size, failure_pos - 1000 ); + bool done = false; + for( long long pos = failure_pos; pos >= min_pos && !done ; --pos ) { - ++byte; - if( seek_write( outfd, &byte, 1, pos ) != 1 || - lseek( outfd, 0, SEEK_SET ) < 0 ) - { show_error( "Error writing output file", errno ); + if( verbosity >= 1 ) + { + std::printf( "Trying position %llu \r", mpos + pos ); + std::fflush( stdout ); + } + uint8_t byte; + if( seek_read( outfd, &byte, 1, mpos + pos ) != 1 ) + { show_error( "Error reading output file", errno ); cleanup_and_fail( output_filename, outfd, 1 ); } - if( i == 255 ) break; - if( try_decompress( outfd, isize ) ) - { done = true; break; } + for( int i = 0; i < 256; ++i ) + { + ++byte; + if( seek_write( outfd, &byte, 1, mpos + pos ) != 1 || + lseek( outfd, mpos, SEEK_SET ) < 0 ) + { show_error( "Error writing output file", errno ); + cleanup_and_fail( output_filename, outfd, 1 ); } + if( i == 255 ) break; + if( try_decompress_member( outfd, msize ) ) + { done = true; break; } + } + } + if( verbosity >= 1 ) std::printf( "\n" ); + if( !done ) + { + show_error( "Error is larger than 1 byte. Can't repair input file." ); + cleanup_and_fail( output_filename, outfd, 2 ); } } - if( verbosity >= 1 ) std::printf( "\n" ); + if( outfd < 0 ) + { + if( verbosity >= 1 ) + std::printf( "Input file has no errors. Recovery is not needed.\n" ); + return 0; + } if( close( outfd ) != 0 ) { show_error( "Error closing output file", errno ); cleanup_and_fail( output_filename, -1, 1 ); } - if( !done ) - { - show_error( "Error is larger than 1 byte. Can't repair input file." ); - cleanup_and_fail( output_filename, -1, 2 ); - } if( verbosity >= 1 ) std::printf( "Copy of input file repaired successfully.\n" ); return 0; @@ -90,7 +90,7 @@ int do_split_file( const std::string & input_filename, uint8_t * & base_buffer, const int verbosity, const bool force ) { const int hsize = File_header::size; - const int tsize = File_trailer::size(); + const int tsize = File_trailer::size; const int buffer_size = 65536; const int base_buffer_size = tsize + buffer_size + hsize; base_buffer = new uint8_t[base_buffer_size]; @@ -99,7 +99,8 @@ int do_split_file( const std::string & input_filename, uint8_t * & base_buffer, struct stat in_stats; const int infd = open_instream( input_filename, &in_stats, true, true ); if( infd < 0 ) return 1; - File_index file_index( infd ); + const File_index file_index( infd ); + if( file_index.retval() != 0 ) show_error( file_index.error().c_str() ); const int max_members = ( file_index.retval() ? 999999 : file_index.members() ); int max_digits = 1; for( int i = max_members; i >= 10; i /= 10 ) ++max_digits; diff --git a/testsuite/check.sh b/testsuite/check.sh index ea6e768..4ac7f59 100755 --- a/testsuite/check.sh +++ b/testsuite/check.sh @@ -23,10 +23,15 @@ mkdir tmp cd "${objdir}"/tmp in="${testdir}"/test.txt -in_lz="${testdir}"/test_v1.lz +in_lz="${testdir}"/test.txt.lz inD="${testdir}"/test921-1921.txt -fox5="${testdir}"/fox5_bad.txt -fox5_lz="${testdir}"/fox5_bad.lz +fox5_lz="${testdir}"/fox5.lz +f5b1="${testdir}"/fox5_bad1.txt +f5b1_lz="${testdir}"/fox5_bad1.lz +f5b2_lz="${testdir}"/fox5_bad2.lz +f5b3_lz="${testdir}"/fox5_bad3.lz +f5b4_lz="${testdir}"/fox5_bad4.lz +f5b5_lz="${testdir}"/fox5_bad5.lz bad1_lz="${testdir}"/test_bad1.lz bad2_lz="${testdir}"/test_bad2.lz bad3_lz="${testdir}"/test_bad3.lz @@ -35,91 +40,140 @@ bad5_lz="${testdir}"/test_bad5.lz fail=0 # Description of test files for lziprecover: -# fox5_bad.lz: byte at offset 188 changed from 0x34 to 0x33 +# fox5_bad1.lz: byte at offset 62 changed from 0x50 to 0x70 (CRC) +# byte at offset 144 changed from 0x2D to 0x2E (data_size) +# byte at offset 188 changed from 0x34 to 0x33 (mid stream) +# byte at offset 247 changed from 0x2A to 0x2B (first byte) +# byte at offset 378 changed from 0xA0 to 0x20 (EOS marker) +# fox5_bad2.lz: [ 30- 49] --> zeroed; +# fox5_bad3.lz: [100-299] --> zeroed; +# fox5_bad4.lz: [250-349] --> zeroed; +# fox5_bad5.lz: [300-399] --> zeroed; # test_bad1.lz: byte at offset 67 changed from 0xCC to 0x33 -# test_bad2.lz: [ 34- 66) --> copy of bytes [ 68- 100) -# test_bad3.lz: [ 512-1536) --> zeroed; [2560-3584) --> zeroed -# test_bad4.lz: [3072-4096) --> random data; [4608-5632) --> zeroed -# test_bad5.lz: [1024-2048) --> random data; [5120-6144) --> random data +# test_bad2.lz: [ 34- 65] --> copy of bytes [ 68- 99] +# test_bad3.lz: [ 512-1535] --> zeroed; [2560-3583] --> zeroed +# test_bad4.lz: [3072-4095] --> random data; [4608-5631] --> zeroed +# test_bad5.lz: [1024-2047] --> random data; [5120-6143] --> random data printf "testing lziprecover-%s..." "$2" "${LZIPRECOVER}" -lq -if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi +if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi "${LZIPRECOVER}" -mq "${bad1_lz}" -if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi +if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi "${LZIPRECOVER}" -Rq -if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi +if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi "${LZIPRECOVER}" -sq -if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi +if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi -"${LZIP}" -t "${testdir}"/test_v0.lz || fail=1 -printf . -"${LZIP}" -cd "${testdir}"/test_v0.lz > copy || fail=1 -cmp "${in}" copy || fail=1 -printf . - -"${LZIP}" -t "${testdir}"/test_v1.lz || fail=1 -printf . -"${LZIP}" -cd "${testdir}"/test_v1.lz > copy || fail=1 +"${LZIP}" -t "${testdir}"/test.txt.lz || fail=1 +"${LZIP}" -cd "${testdir}"/test.txt.lz > copy || fail=1 cmp "${in}" copy || fail=1 printf . "${LZIPRECOVER}" -D 921-1921 -fo copy "${in_lz}" || fail=1 cmp "${inD}" copy || fail=1 -printf . "${LZIPRECOVER}" -D 921,1000 "${in_lz}" > copy || fail=1 cmp "${inD}" copy || fail=1 printf . -"${LZIPRECOVER}" -D0 -iq -fo copy "${fox5_lz}" -if [ $? = 2 ] && cmp "${fox5}" copy ; then printf . ; else fail=1 ; printf - ; fi -"${LZIPRECOVER}" -D0 -iq "${fox5_lz}" > copy -if [ $? = 2 ] && cmp "${fox5}" copy ; then printf . ; else fail=1 ; printf - ; fi - +"${LZIPRECOVER}" -D0 -iq "${f5b1_lz}" -fo copy +if [ $? = 2 ] && cmp "${f5b1}" copy ; then printf . ; else fail=1 ; printf - ; fi +"${LZIPRECOVER}" -D0 -iq "${f5b1_lz}" > copy +if [ $? = 2 ] && cmp "${f5b1}" copy ; then printf . ; else fail=1 ; printf - ; fi + +rm -f copy.lz +"${LZIPRECOVER}" -m -o copy.lz "${fox5_lz}" "${f5b1_lz}" +if [ $? = 0 ] && [ ! -e copy.lz ] ; then printf . ; else fail=1 ; printf - ; fi +"${LZIPRECOVER}" -m -o copy.lz "${f5b1_lz}" "${fox5_lz}" +if [ $? = 0 ] && [ ! -e copy.lz ] ; then printf . ; else fail=1 ; printf - ; fi "${LZIPRECOVER}" -m -o copy.lz "${bad1_lz}" "${bad2_lz}" "${bad1_lz}" -q -if [ $? != 2 ] ; then fail=1 ; printf - ; else printf . ; fi -"${LZIPRECOVER}" -m -o copy.lz "${bad1_lz}" "${bad2_lz}" || fail=1 -"${LZIPRECOVER}" -df copy.lz || fail=1 -cmp "${in}" copy || fail=1 +if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else fail=1 ; printf - ; fi +"${LZIPRECOVER}" -m -o copy.lz "${f5b1_lz}" "${f5b5_lz}" -q +if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else fail=1 ; printf - ; fi +"${LZIPRECOVER}" -m -o copy.lz "${f5b3_lz}" "${f5b5_lz}" -q +if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else fail=1 ; printf - ; fi +"${LZIPRECOVER}" -m -o copy.lz "${bad3_lz}" "${bad4_lz}" -q +if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else fail=1 ; printf - ; fi + +for i in "${f5b1_lz}" "${f5b3_lz}" "${f5b4_lz}" "${f5b5_lz}" ; do + "${LZIPRECOVER}" -mf -o copy.lz "${f5b2_lz}" "${i}" || fail=1 + cmp "${fox5_lz}" copy.lz || fail=1 + "${LZIPRECOVER}" -mf -o copy.lz "${i}" "${f5b2_lz}" || fail=1 + cmp "${fox5_lz}" copy.lz || fail=1 + printf . +done + +for i in "${f5b3_lz}" "${f5b4_lz}" "${f5b5_lz}" ; do + "${LZIPRECOVER}" -mf -o copy.lz "${f5b1_lz}" "${f5b2_lz}" "${i}" || fail=1 + cmp "${fox5_lz}" copy.lz || fail=1 + "${LZIPRECOVER}" -mf -o copy.lz "${f5b1_lz}" "${i}" "${f5b2_lz}" || fail=1 + cmp "${fox5_lz}" copy.lz || fail=1 + "${LZIPRECOVER}" -mf -o copy.lz "${f5b2_lz}" "${f5b1_lz}" "${i}" || fail=1 + cmp "${fox5_lz}" copy.lz || fail=1 + "${LZIPRECOVER}" -mf -o copy.lz "${f5b2_lz}" "${i}" "${f5b1_lz}" || fail=1 + cmp "${fox5_lz}" copy.lz || fail=1 + "${LZIPRECOVER}" -mf -o copy.lz "${i}" "${f5b1_lz}" "${f5b2_lz}" || fail=1 + cmp "${fox5_lz}" copy.lz || fail=1 + "${LZIPRECOVER}" -mf -o copy.lz "${i}" "${f5b2_lz}" "${f5b1_lz}" || fail=1 + cmp "${fox5_lz}" copy.lz || fail=1 + printf . +done + +"${LZIPRECOVER}" -mf -o copy.lz "${f5b3_lz}" "${f5b4_lz}" "${f5b5_lz}" || fail=1 +cmp "${fox5_lz}" copy.lz || fail=1 +"${LZIPRECOVER}" -mf -o copy.lz "${f5b1_lz}" "${f5b3_lz}" "${f5b4_lz}" "${f5b5_lz}" || fail=1 +cmp "${fox5_lz}" copy.lz || fail=1 +"${LZIPRECOVER}" -mf -o copy.lz "${f5b2_lz}" "${f5b3_lz}" "${f5b4_lz}" "${f5b5_lz}" || fail=1 +cmp "${fox5_lz}" copy.lz || fail=1 +"${LZIPRECOVER}" -mf -o copy.lz "${f5b1_lz}" "${f5b2_lz}" "${f5b3_lz}" "${f5b4_lz}" "${f5b5_lz}" || fail=1 +cmp "${fox5_lz}" copy.lz || fail=1 printf . -"${LZIPRECOVER}" -m -o copy.lz "${bad2_lz}" "${bad1_lz}" || fail=1 -"${LZIPRECOVER}" -df copy.lz || fail=1 -cmp "${in}" copy || fail=1 + +"${LZIPRECOVER}" -mf -o copy.lz "${bad1_lz}" "${bad2_lz}" || fail=1 +cmp "${in_lz}" copy.lz || fail=1 +"${LZIPRECOVER}" -mf -o copy.lz "${bad2_lz}" "${bad1_lz}" || fail=1 +cmp "${in_lz}" copy.lz || fail=1 printf . for i in "${bad1_lz}" "${bad2_lz}" ; do for j in "${bad3_lz}" "${bad4_lz}" "${bad5_lz}" ; do - "${LZIPRECOVER}" -m -o copy.lz "${i}" "${j}" || fail=1 - "${LZIPRECOVER}" -df copy.lz || fail=1 - cmp "${in}" copy || fail=1 - printf . - "${LZIPRECOVER}" -m -o copy.lz "${j}" "${i}" || fail=1 - "${LZIPRECOVER}" -df copy.lz || fail=1 - cmp "${in}" copy || fail=1 + "${LZIPRECOVER}" -mf -o copy.lz "${i}" "${j}" || fail=1 + cmp "${in_lz}" copy.lz || fail=1 + "${LZIPRECOVER}" -mf -o copy.lz "${j}" "${i}" || fail=1 + cmp "${in_lz}" copy.lz || fail=1 printf . done done -"${LZIPRECOVER}" -m -o copy.lz "${bad3_lz}" "${bad4_lz}" "${bad5_lz}" || fail=1 -"${LZIPRECOVER}" -df copy.lz || fail=1 -cmp "${in}" copy || fail=1 +"${LZIPRECOVER}" -mf -o copy.lz "${bad3_lz}" "${bad4_lz}" "${bad5_lz}" || fail=1 +cmp "${in_lz}" copy.lz || fail=1 printf . -"${LZIPRECOVER}" -m -o copy.lz "${bad4_lz}" "${bad5_lz}" "${bad3_lz}" || fail=1 -"${LZIPRECOVER}" -df copy.lz || fail=1 -cmp "${in}" copy || fail=1 +"${LZIPRECOVER}" -mf -o copy.lz "${bad3_lz}" "${bad5_lz}" "${bad4_lz}" || fail=1 +cmp "${in_lz}" copy.lz || fail=1 printf . -"${LZIPRECOVER}" -m -o copy.lz "${bad5_lz}" "${bad3_lz}" "${bad4_lz}" || fail=1 -"${LZIPRECOVER}" -df copy.lz || fail=1 -cmp "${in}" copy || fail=1 +"${LZIPRECOVER}" -mf -o copy.lz "${bad4_lz}" "${bad3_lz}" "${bad5_lz}" || fail=1 +cmp "${in_lz}" copy.lz || fail=1 printf . - -"${LZIPRECOVER}" -R "${in_lz}" || fail=1 +"${LZIPRECOVER}" -mf -o copy.lz "${bad4_lz}" "${bad5_lz}" "${bad3_lz}" || fail=1 +cmp "${in_lz}" copy.lz || fail=1 +printf . +"${LZIPRECOVER}" -mf -o copy.lz "${bad5_lz}" "${bad3_lz}" "${bad4_lz}" || fail=1 +cmp "${in_lz}" copy.lz || fail=1 +printf . +"${LZIPRECOVER}" -mf -o copy.lz "${bad5_lz}" "${bad4_lz}" "${bad3_lz}" || fail=1 +cmp "${in_lz}" copy.lz || fail=1 printf . + +rm -f copy.lz +"${LZIPRECOVER}" -R -o copy.lz "${fox5_lz}" || fail=1 +if [ $? = 0 ] && [ ! -e copy.lz ] ; then printf . ; else fail=1 ; printf - ; fi "${LZIPRECOVER}" -R -o copy.lz "${bad2_lz}" -q -if [ $? != 2 ] ; then fail=1 ; printf - ; else printf . ; fi -"${LZIPRECOVER}" -R -o copy.lz "${bad1_lz}" || fail=1 -"${LZIPRECOVER}" -df copy.lz || fail=1 -cmp "${in}" copy || fail=1 +if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else fail=1 ; printf - ; fi +"${LZIPRECOVER}" -Rf -o copy.lz "${f5b1_lz}" || fail=1 +cmp "${fox5_lz}" copy.lz || fail=1 +printf . +"${LZIPRECOVER}" -Rf -o copy.lz "${bad1_lz}" || fail=1 +cmp "${in_lz}" copy.lz || fail=1 printf . cat "${in_lz}" "${in_lz}" "${in_lz}" > copy || framework_failure diff --git a/testsuite/fox5_bad.lz b/testsuite/fox5.lz Binary files differindex 8bfd314..3472f64 100644 --- a/testsuite/fox5_bad.lz +++ b/testsuite/fox5.lz diff --git a/testsuite/fox5_bad1.lz b/testsuite/fox5_bad1.lz Binary files differnew file mode 100644 index 0000000..a3b5658 --- /dev/null +++ b/testsuite/fox5_bad1.lz diff --git a/testsuite/fox5_bad.txt b/testsuite/fox5_bad1.txt index 813a71f..14e5367 100644 --- a/testsuite/fox5_bad.txt +++ b/testsuite/fox5_bad1.txt @@ -1,4 +1,4 @@ The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. -The quick brown fox c††zzzzzzzzzzzzzzzzzzzzzzThe quick brown fox jumps over the lazy dog. +The quick brown fox c††zzzzzzzzzzzzzzzzzzzzzzVhe quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. diff --git a/testsuite/fox5_bad2.lz b/testsuite/fox5_bad2.lz Binary files differnew file mode 100644 index 0000000..9993ea7 --- /dev/null +++ b/testsuite/fox5_bad2.lz diff --git a/testsuite/fox5_bad3.lz b/testsuite/fox5_bad3.lz Binary files differnew file mode 100644 index 0000000..ef58e47 --- /dev/null +++ b/testsuite/fox5_bad3.lz diff --git a/testsuite/fox5_bad4.lz b/testsuite/fox5_bad4.lz Binary files differnew file mode 100644 index 0000000..0474bb9 --- /dev/null +++ b/testsuite/fox5_bad4.lz diff --git a/testsuite/fox5_bad5.lz b/testsuite/fox5_bad5.lz Binary files differnew file mode 100644 index 0000000..6ec2740 --- /dev/null +++ b/testsuite/fox5_bad5.lz diff --git a/testsuite/test.txt.lz b/testsuite/test.txt.lz Binary files differnew file mode 100644 index 0000000..4db881a --- /dev/null +++ b/testsuite/test.txt.lz diff --git a/testsuite/test_bad1.lz b/testsuite/test_bad1.lz Binary files differindex 0b84883..687c8a1 100644 --- a/testsuite/test_bad1.lz +++ b/testsuite/test_bad1.lz diff --git a/testsuite/test_bad2.lz b/testsuite/test_bad2.lz Binary files differindex cce6a3c..c8ac08c 100644 --- a/testsuite/test_bad2.lz +++ b/testsuite/test_bad2.lz diff --git a/testsuite/test_bad3.lz b/testsuite/test_bad3.lz Binary files differindex a1676bb..3ed8936 100644 --- a/testsuite/test_bad3.lz +++ b/testsuite/test_bad3.lz diff --git a/testsuite/test_bad4.lz b/testsuite/test_bad4.lz Binary files differindex a8f89a3..c912871 100644 --- a/testsuite/test_bad4.lz +++ b/testsuite/test_bad4.lz diff --git a/testsuite/test_bad5.lz b/testsuite/test_bad5.lz Binary files differindex 73e0142..7c70365 100644 --- a/testsuite/test_bad5.lz +++ b/testsuite/test_bad5.lz diff --git a/testsuite/test_v0.lz b/testsuite/test_v0.lz Binary files differdeleted file mode 100644 index a09b1e8..0000000 --- a/testsuite/test_v0.lz +++ /dev/null diff --git a/testsuite/test_v1.lz b/testsuite/test_v1.lz Binary files differdeleted file mode 100644 index f1c79eb..0000000 --- a/testsuite/test_v1.lz +++ /dev/null diff --git a/testsuite/unzcrash.cc b/testsuite/unzcrash.cc index abf61bb..24defa6 100644 --- a/testsuite/unzcrash.cc +++ b/testsuite/unzcrash.cc @@ -58,11 +58,11 @@ void show_help() " -h, --help display this help and exit\n" " -V, --version output version information and exit\n" " -b, --bits=<range> test N-bit errors instead of full byte\n" - " -p, --position=<bytes> first byte position to test\n" + " -p, --position=<bytes> first byte position to test [default 0]\n" " -q, --quiet suppress all messages\n" - " -s, --size=<bytes> number of byte positions to test\n" + " -s, --size=<bytes> number of byte positions to test [all]\n" " -v, --verbose be verbose (a 2nd -v gives more)\n" - "Examples of <range>: 1 1,2,3 1-4 1,3-5,8\n" + "Examples of <range>: 1 1,2,3 1-4 1,3-5,8 1-3,5-8\n" "\nReport bugs to lzip-bug@nongnu.org\n" "Lzip home page: http://www.nongnu.org/lzip/lzip.html\n" ); } @@ -109,8 +109,8 @@ unsigned long long getnum( const char * const ptr, const unsigned long long llimit, const unsigned long long ulimit ) { - errno = 0; char * tail; + errno = 0; unsigned long long result = strtoull( ptr, &tail, 0 ); if( tail == ptr ) { @@ -172,7 +172,7 @@ public: bool includes( const int i ) const { return ( i >= 1 && i <= 8 && data[i-1] ); } - // Recognized formats: 1 1,2,3 1-4 1,3-5,8 + // Recognized formats: 1 1,2,3 1-4 1,3-5,8 1-3,5-8 bool parse( const char * p ) { for( int i = 0; i < 8; ++i ) data[i] = false; |