From ea9b55f6d0c2eb30dffe1a4815d3696d77888b6a Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 7 Nov 2015 12:49:54 +0100 Subject: Adding upstream version 1.17~pre1. Signed-off-by: Daniel Baumann --- ChangeLog | 8 ++ INSTALL | 4 + Makefile.in | 33 +++-- NEWS | 21 ++- README | 17 ++- configure | 2 +- decoder.cc | 3 +- doc/lziprecover.1 | 9 +- doc/lziprecover.info | 123 ++++++++++------- doc/lziprecover.texi | 84 +++++++----- main.cc | 15 ++- merge.cc | 200 ++++++++++++++++++++-------- repair.cc | 12 +- split.cc | 5 +- testsuite/check.sh | 48 ++++++- testsuite/test_bad1.lz | Bin 7376 -> 7376 bytes testsuite/unzcrash.cc | 352 ------------------------------------------------ unzcrash.cc | 355 +++++++++++++++++++++++++++++++++++++++++++++++++ 18 files changed, 762 insertions(+), 529 deletions(-) delete mode 100644 testsuite/unzcrash.cc create mode 100644 unzcrash.cc diff --git a/ChangeLog b/ChangeLog index 102355f..98719e2 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,11 @@ +2014-10-16 Antonio Diaz Diaz + + * Version 1.17-pre1 released. + * merge.cc: New block selection algorithm makes merge much faster. + * Makefile.in: Added new targets 'install*-compress'. + * testsuite/unzcrash.cc: Moved to top directory. + * Added chapter 'File names' to the manual. + 2014-08-29 Antonio Diaz Diaz * Version 1.16 released. diff --git a/INSTALL b/INSTALL index 3ad9030..4f27d5c 100644 --- a/INSTALL +++ b/INSTALL @@ -32,6 +32,10 @@ the main archive. 5. Type 'make install' to install the program and any data files and documentation. + Or type 'make install-compress', which additionally compresses the + info manual and the man page after installation. (Installing + compressed docs may become the default in the future). + You can install only the program, the info manual or the man page by typing 'make install-bin', 'make install-info' or 'make install-man' respectively. diff --git a/Makefile.in b/Makefile.in index eed8920..ae25dd6 100644 --- a/Makefile.in +++ b/Makefile.in @@ -11,7 +11,9 @@ objs = arg_parser.o file_index.o merge.o mtester.o range_dec.o repair.o \ unzobjs = arg_parser.o unzcrash.o -.PHONY : all install install-bin install-info install-man install-strip \ +.PHONY : all install install-bin install-info install-man \ + install-strip install-compress install-strip-compress \ + install-bin-strip install-info-compress install-man-compress \ install-as-lzip uninstall uninstall-bin uninstall-info uninstall-man \ doc info man check dist clean distclean @@ -20,16 +22,13 @@ all : $(progname) $(progname) : $(objs) $(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $(objs) -$(progname)_profiled : $(objs) - $(CXX) $(CXXFLAGS) $(LDFLAGS) -pg -o $@ $(objs) - unzcrash : $(unzobjs) $(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $(unzobjs) main.o : main.cc $(CXX) $(CXXFLAGS) $(CPPFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $< -unzcrash.o : testsuite/unzcrash.cc +unzcrash.o : unzcrash.cc $(CXX) $(CXXFLAGS) $(CPPFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $< %.o : %.cc @@ -67,38 +66,49 @@ check : all @$(VPATH)/testsuite/check.sh $(VPATH)/testsuite $(pkgversion) install : install-bin install-info install-man +install-strip : install-bin-strip install-info install-man +install-compress : install-bin install-info-compress install-man-compress +install-strip-compress : install-bin-strip install-info-compress install-man-compress install-bin : all if [ ! -d "$(DESTDIR)$(bindir)" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(bindir)" ; fi $(INSTALL_PROGRAM) ./$(progname) "$(DESTDIR)$(bindir)/$(progname)" +install-bin-strip : all + $(MAKE) INSTALL_PROGRAM='$(INSTALL_PROGRAM) -s' install-bin + install-info : if [ ! -d "$(DESTDIR)$(infodir)" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(infodir)" ; fi + -rm -f "$(DESTDIR)$(infodir)/$(pkgname).info"* $(INSTALL_DATA) $(VPATH)/doc/$(pkgname).info "$(DESTDIR)$(infodir)/$(pkgname).info" -install-info --info-dir="$(DESTDIR)$(infodir)" "$(DESTDIR)$(infodir)/$(pkgname).info" +install-info-compress : install-info + lzip -v -9 "$(DESTDIR)$(infodir)/$(pkgname).info" + install-man : if [ ! -d "$(DESTDIR)$(mandir)/man1" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(mandir)/man1" ; fi + -rm -f "$(DESTDIR)$(mandir)/man1/$(progname).1"* $(INSTALL_DATA) $(VPATH)/doc/$(progname).1 "$(DESTDIR)$(mandir)/man1/$(progname).1" -install-strip : all - $(MAKE) INSTALL_PROGRAM='$(INSTALL_PROGRAM) -s' install +install-man-compress : install-man + lzip -v -9 "$(DESTDIR)$(mandir)/man1/$(progname).1" install-as-lzip : install -rm -f "$(DESTDIR)$(bindir)/lzip" cd "$(DESTDIR)$(bindir)" && ln -s $(progname) lzip -uninstall : uninstall-bin uninstall-info uninstall-man +uninstall : uninstall-man uninstall-info uninstall-bin uninstall-bin : -rm -f "$(DESTDIR)$(bindir)/$(progname)" uninstall-info : -install-info --info-dir="$(DESTDIR)$(infodir)" --remove "$(DESTDIR)$(infodir)/$(pkgname).info" - -rm -f "$(DESTDIR)$(infodir)/$(pkgname).info" + -rm -f "$(DESTDIR)$(infodir)/$(pkgname).info"* uninstall-man : - -rm -f "$(DESTDIR)$(mandir)/man1/$(progname).1" + -rm -f "$(DESTDIR)$(mandir)/man1/$(progname).1"* dist : doc ln -sf $(VPATH) $(DISTNAME) @@ -122,14 +132,13 @@ dist : doc $(DISTNAME)/testsuite/test.txt.lz \ $(DISTNAME)/testsuite/test21723.txt \ $(DISTNAME)/testsuite/test_bad[1-5].lz \ - $(DISTNAME)/testsuite/unzcrash.cc \ $(DISTNAME)/*.h \ $(DISTNAME)/*.cc rm -f $(DISTNAME) lzip -v -9 $(DISTNAME).tar clean : - -rm -f $(progname) $(progname)_profiled $(objs) + -rm -f $(progname) $(objs) -rm -f unzcrash unzcrash.o distclean : clean diff --git a/NEWS b/NEWS index 4813c44..da32c67 100644 --- a/NEWS +++ b/NEWS @@ -1,14 +1,13 @@ -Changes in version 1.16: +Changes in version 1.17: -Repairing of single-byte errors is now about 10 times faster depending -on file size and position of error. +Merging files now uses an algorithm similar to the ones used to solve +the "Master Mind" game, which makes it much faster. Up to 2 orders of +magnitude faster depending on number of files and number of errors. +Please, report as a bug any files correctly merged by lziprecover 1.16 +that this version can't merge. -Copying of file dates, permissions, and ownership now behaves like "cp -p". -(If the user ID or the group ID can't be duplicated, the file permission -bits S_ISUID and S_ISGID are cleared). +The targets "install-compress", "install-strip-compress", +"install-info-compress" and "install-man-compress" have been added to +the Makefile. -Some minor improvements have been made. - -"lziprecover.texinfo" has been renamed to "lziprecover.texi". - -The license has been changed to GPL version 2 or later. +The chapter "File names" has been added to the manual. diff --git a/README b/README index e533d4a..c457365 100644 --- a/README +++ b/README @@ -2,11 +2,13 @@ Description Lziprecover is a data recovery tool and decompressor for files in the lzip compressed data format (.lz), able to repair slightly damaged -files, recover badly damaged files from two or more copies, extract data -from damaged files, decompress files and test integrity of files. +files, produce a correct file by merging the good parts of two or more +damaged copies, extract data from damaged files, decompress files and +test integrity of files. -The lzip file format is designed for long-term data archiving, taking -into account both data integrity and decoder availability: +The lzip file format is designed for data sharing and long-term +archiving, taking into account both data integrity and decoder +availability: * The lzip format provides very safe integrity checking and some data recovery means. The lziprecover program can repair bit-flip errors @@ -21,8 +23,8 @@ into account both data integrity and decoder availability: extract the data from a lzip file long after quantum computers eventually render LZMA obsolete. - * Additionally lzip is copylefted, which guarantees that it will - remain free forever. + * Additionally the lzip reference implementation is copylefted, which + guarantees that it will remain free forever. A nice feature of the lzip format is that a corrupt byte is easier to repair the nearer it is from the beginning of the file. Therefore, with @@ -61,7 +63,8 @@ of them with one damaged area affecting 1 percent of the copy, the probability of obtaining a correct file is about 98 percent. With three such copies the probability rises to 99.97 percent. For large files (a few MB) with small errors (one sector damaged per copy), the probability -approaches 100 percent even with only two copies. +approaches 100 percent even with only two copies. (Supposing that the +errors are randomly located inside each copy). Lziprecover is not a replacement for regular backups, but a last line of defense for the case where the backups are also damaged. diff --git a/configure b/configure index 420c2ad..5cba27a 100755 --- a/configure +++ b/configure @@ -6,7 +6,7 @@ # to copy, distribute and modify it. pkgname=lziprecover -pkgversion=1.16 +pkgversion=1.17-pre1 progname=lziprecover srctrigger=doc/${pkgname}.texi diff --git a/decoder.cc b/decoder.cc index 3555a87..f7e8f54 100644 --- a/decoder.cc +++ b/decoder.cc @@ -60,8 +60,7 @@ long readblock( const int fd, uint8_t * const buf, const long size ) errno = 0; while( sz < size ) { - const int psz = std::min( 65536L, size - sz ); - const int n = read( fd, buf + sz, psz ); + const int n = read( fd, buf + sz, std::min( 1L << 20, size - sz ) ); if( n > 0 ) sz += n; else if( n == 0 ) break; // EOF else if( errno != EINTR ) break; diff --git a/doc/lziprecover.1 b/doc/lziprecover.1 index ffd4a6d..7eefb7a 100644 --- a/doc/lziprecover.1 +++ b/doc/lziprecover.1 @@ -1,5 +1,5 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.46.1. -.TH LZIPRECOVER "1" "August 2014" "lziprecover 1.16" "User Commands" +.TH LZIPRECOVER "1" "October 2014" "lziprecover 1.17-pre1" "User Commands" .SH NAME lziprecover \- recovers data from damaged lzip files .SH SYNOPSIS @@ -7,6 +7,13 @@ lziprecover \- recovers data from damaged lzip files [\fI\,options\/\fR] [\fI\,files\/\fR] .SH DESCRIPTION Lziprecover \- Data recovery tool and decompressor for the lzip format. +Lziprecover can repair perfectly most files with small errors (up to one +single\-byte error per member), without the need of any extra redundance +at all. Losing an entire archive just because of a corrupt byte near the +beginning is a thing of the past. +Lziprecover can also produce a correct file by merging the good parts of +two or more damaged copies, extract data from damaged files, decompress +files and test integrity of files. .SH OPTIONS .TP \fB\-h\fR, \fB\-\-help\fR diff --git a/doc/lziprecover.info b/doc/lziprecover.info index b97224a..6c636e8 100644 --- a/doc/lziprecover.info +++ b/doc/lziprecover.info @@ -12,7 +12,7 @@ File: lziprecover.info, Node: Top, Next: Introduction, Up: (dir) Lziprecover Manual ****************** -This manual is for Lziprecover (version 1.16, 29 August 2014). +This manual is for Lziprecover (version 1.17-pre1, 16 October 2014). * Menu: @@ -20,6 +20,7 @@ This manual is for Lziprecover (version 1.16, 29 August 2014). * Invoking lziprecover:: Command line interface * Repairing files:: Fixing bit-flip and similar errors * Merging files:: Fixing several damaged copies +* File names:: Names of the files produced by lziprecover * File format:: Detailed format of the compressed file * Examples:: A small tutorial with examples * Unzcrash:: Testing the robustness of decompressors @@ -40,11 +41,13 @@ File: lziprecover.info, Node: Introduction, Next: Invoking lziprecover, Prev: Lziprecover is a data recovery tool and decompressor for files in the lzip compressed data format (.lz), able to repair slightly damaged -files, recover badly damaged files from two or more copies, extract data -from damaged files, decompress files and test integrity of files. +files, produce a correct file by merging the good parts of two or more +damaged copies, extract data from damaged files, decompress files and +test integrity of files. - The lzip file format is designed for long-term data archiving, taking -into account both data integrity and decoder availability: + The lzip file format is designed for data sharing and long-term +archiving, taking into account both data integrity and decoder +availability: * The lzip format provides very safe integrity checking and some data recovery means. The lziprecover program can repair bit-flip errors @@ -59,8 +62,8 @@ into account both data integrity and decoder availability: archaeologist to extract the data from a lzip file long after quantum computers eventually render LZMA obsolete. - * Additionally lzip is copylefted, which guarantees that it will - remain free forever. + * Additionally the lzip reference implementation is copylefted, which + guarantees that it will remain free forever. A nice feature of the lzip format is that a corrupt byte is easier to repair the nearer it is from the beginning of the file. Therefore, with @@ -168,11 +171,12 @@ The format for running lziprecover is: '-m' '--merge' - Try to produce a correct file merging the good parts of two or more - damaged copies. If successful, a repaired copy is written to the - file 'FILE_fixed.lz'. The exit status is 0 if a correct file could - be produced, 2 otherwise. See the chapter 'Merging files' (*note - Merging files::) for a complete description of the merge mode. + Try to produce a correct file by merging the good parts of two or + more damaged copies. If successful, a repaired copy is written to + the file 'FILE_fixed.lz'. The exit status is 0 if a correct file + could be produced, 2 otherwise. See the chapter 'Merging files' + (*note Merging files::) for a complete description of the merge + mode. '-o FILE' '--output=FILE' @@ -203,11 +207,11 @@ The format for running lziprecover is: undamaged, and try to repair or partially decompress those which are damaged. - The names of the files produced are in the form 'rec01FILE.lz', - 'rec02FILE.lz', etc, and are designed so that the use of wildcards - in subsequent processing, for example, - 'lziprecover -cd rec*FILE.lz > recovered_data', processes the - files in the correct order. The number of digits used in the names + The names of the files produced are in the form 'rec01FILE', + 'rec02FILE', etc, and are designed so that the use of wildcards in + subsequent processing, for example, + 'lziprecover -cd rec*FILE > recovered_data', processes the files + in the correct order. The number of digits used in the names varies depending on the number of members in 'FILE'. '-t' @@ -253,17 +257,26 @@ File: lziprecover.info, Node: Repairing files, Next: Merging files, Prev: Inv 3 Repairing files ***************** -Lziprecover is usually able to repair files with small errors (up to one -byte error per member). The error may be located anywhere in the file -except in the header (first 6 bytes of each member) or in the 'Member -size' field of the trailer (last 8 bytes of each member). This makes -lzip files resistant to bit-flip, one of the most common forms of data -corruption. +Lziprecover can repair perfectly most files with small errors (up to one +single-byte error per member), without the need of any extra redundance +at all. If the reparation is successful, the repaired file will be +identical bit for bit to the original. + + The error may be located anywhere in the file except in the header +(first 6 bytes of each member) or in the 'Member size' field of the +trailer (last 8 bytes of each member). This makes lzip files resistant +to bit-flip, one of the most common forms of data corruption. Bit-flip happens when one bit in the file is changed from 0 to 1 or vice versa. It may be caused by bad RAM or even by natural radiation. I have seen a case of bit-flip in a file stored on an USB flash drive. + One byte may seem small, but most file corruptions not produced by +I/O errors just affect one byte, or even one bit, of the file. Also, +unlike magnetic media, where errors usually affect a whole sector, +solid-state storage devices tend to produce single-byte errors, making +of lzip the perfect format for data stored on such devices. + Repairing a file can take some time. Small files or files with the error located near the beginning can be repaired in a few seconds. But repairing a large file compressed with a large dictionary size and with @@ -274,14 +287,14 @@ cause much more loss of data than errors located near the end. So lziprecover repairs more efficiently the worst errors.  -File: lziprecover.info, Node: Merging files, Next: File format, Prev: Repairing files, Up: Top +File: lziprecover.info, Node: Merging files, Next: File names, Prev: Repairing files, Up: Top 4 Merging files *************** If you have several copies of a file but all of them are too damaged to repair them (*note Repairing files::), lziprecover can try to produce a -correct file merging the good parts of the damaged copies. +correct file by merging the good parts of the damaged copies. The merge may succeed even if some copies of the file have all the headers and trailers damaged, as long as there is at least one copy of @@ -293,14 +306,14 @@ is damaged in all copies), or are adjacent and the boundary can't be determined, or if the copies have too many damaged areas. All the copies must have the same size. If some of them have been -truncated and are therefore smaller than they should, you can extend -them to the correct size with the following command before merging them -with the other copies: +truncated and are therefore smaller than they should, they can be +extended to the correct size with the following command before merging +them with the other copies: ddrescue --extend-outfile= small_file.lz extended_file.lz If some of the copies have got garbage data at the end and are -therefore larger than they should, you can reduce their sizes to the +therefore larger than they should, their sizes can be reduced to the correct value with the following command before merging them with the other copies: @@ -311,12 +324,24 @@ each of them with one damaged area affecting 1 percent of the copy, the probability of obtaining a correct file is about 98 percent. With three such copies the probability rises to 99.97 percent. For large files (a few MB) with small errors (one sector damaged per copy), the probability -approaches 100 percent even with only two copies. +approaches 100 percent even with only two copies. (Supposing that the +errors are randomly located inside each copy). + + +File: lziprecover.info, Node: File names, Next: File format, Prev: Merging files, Up: Top + +5 Names of the files produced by lziprecover +******************************************** + +The name of the fixed file produced by '--merge' and '--repair' is made +by appending the string '_fixed.lz' to the original file name. If the +original file name ends with one of the extensions '.tar.lz', '.lz' or +'.tlz', the string '_fixed' is inserted before the extension.  -File: lziprecover.info, Node: File format, Next: Examples, Prev: Merging files, Up: Top +File: lziprecover.info, Node: File format, Next: Examples, Prev: File names, Up: Top -5 File format +6 File format ************* Perfection is reached, not when there is no longer anything to add, but @@ -389,7 +414,7 @@ additional information before, between, or after them.  File: lziprecover.info, Node: Examples, Next: Unzcrash, Prev: File format, Up: Top -6 A small tutorial with examples +7 A small tutorial with examples ******************************** Example 1: Restore a regular file from its compressed version @@ -460,7 +485,7 @@ correct file produced is saved in 'big_db_00001.lz'.  File: lziprecover.info, Node: Unzcrash, Next: Problems, Prev: Examples, Up: Top -7 Testing the robustness of decompressors +8 Testing the robustness of decompressors ***************************************** The lziprecover package also includes unzcrash, a program written to @@ -476,9 +501,9 @@ memory accesses. If it does, please, report it as a bug. Unzcrash really executes as a subprocess the shell command specified in the first non-option argument, and then writes the file specified in the second non-option argument to the standard input of the subprocess, -modifying the corresponding byte each time. Therefore you can use -unzcrash to test any decompressor (not only lzip), or even other decoder -programs with a suitable command line syntax. +modifying the corresponding byte each time. Therefore unzcrash can be +used to test any decompressor (not only lzip), or even other decoder +programs having a suitable command line syntax. The format for running unzcrash is: @@ -537,7 +562,7 @@ caused unzcrash to panic.  File: lziprecover.info, Node: Problems, Next: Concept index, Prev: Unzcrash, Up: Top -8 Reporting bugs +9 Reporting bugs **************** There are probably bugs in lziprecover. There are certainly errors and @@ -561,6 +586,7 @@ Concept index * bugs: Problems. (line 6) * examples: Examples. (line 6) * file format: File format. (line 6) +* file names: File names. (line 6) * getting help: Problems. (line 6) * introduction: Introduction. (line 6) * invoking: Invoking lziprecover. (line 6) @@ -572,16 +598,17 @@ Concept index  Tag Table: Node: Top231 -Node: Introduction1077 -Node: Invoking lziprecover4105 -Node: Repairing files9543 -Node: Merging files10733 -Node: File format12504 -Node: Examples15014 -Ref: ddrescue-example16215 -Node: Unzcrash17324 -Node: Problems19876 -Node: Concept index20426 +Node: Introduction1153 +Node: Invoking lziprecover4249 +Node: Repairing files9686 +Node: Merging files11371 +Node: File names13212 +Node: File format13676 +Node: Examples16183 +Ref: ddrescue-example17384 +Node: Unzcrash18493 +Node: Problems21047 +Node: Concept index21597  End Tag Table diff --git a/doc/lziprecover.texi b/doc/lziprecover.texi index 80d6eb4..08d4312 100644 --- a/doc/lziprecover.texi +++ b/doc/lziprecover.texi @@ -6,8 +6,8 @@ @finalout @c %**end of header -@set UPDATED 29 August 2014 -@set VERSION 1.16 +@set UPDATED 16 October 2014 +@set VERSION 1.17-pre1 @dircategory Data Compression @direntry @@ -39,6 +39,7 @@ This manual is for Lziprecover (version @value{VERSION}, @value{UPDATED}). * Invoking lziprecover:: Command line interface * Repairing files:: Fixing bit-flip and similar errors * Merging files:: Fixing several damaged copies +* File names:: Names of the files produced by lziprecover * File format:: Detailed format of the compressed file * Examples:: A small tutorial with examples * Unzcrash:: Testing the robustness of decompressors @@ -59,11 +60,13 @@ to copy, distribute and modify it. Lziprecover is a data recovery tool and decompressor for files in the lzip compressed data format (.lz), able to repair slightly damaged -files, recover badly damaged files from two or more copies, extract data -from damaged files, decompress files and test integrity of files. +files, produce a correct file by merging the good parts of two or more +damaged copies, extract data from damaged files, decompress files and +test integrity of files. -The lzip file format is designed for long-term data archiving, taking -into account both data integrity and decoder availability: +The lzip file format is designed for data sharing and long-term +archiving, taking into account both data integrity and decoder +availability: @itemize @bullet @item @@ -82,8 +85,8 @@ data from a lzip file long after quantum computers eventually render LZMA obsolete. @item -Additionally lzip is copylefted, which guarantees that it will remain -free forever. +Additionally the lzip reference implementation is copylefted, which +guarantees that it will remain free forever. @end itemize A nice feature of the lzip format is that a corrupt byte is easier to @@ -196,7 +199,7 @@ information about the members in the file. @item -m @itemx --merge -Try to produce a correct file merging the good parts of two or more +Try to produce a correct file by merging the good parts of two or more damaged copies. If successful, a repaired copy is written to the file @samp{@var{file}_fixed.lz}. The exit status is 0 if a correct file could be produced, 2 otherwise. See the chapter @samp{Merging files} @@ -231,12 +234,12 @@ Search for members in @samp{@var{file}} and write each member in its own integrity of the resulting files, decompress those which are undamaged, and try to repair or partially decompress those which are damaged. -The names of the files produced are in the form -@samp{rec01@var{file}.lz}, @samp{rec02@var{file}.lz}, etc, and are -designed so that the use of wildcards in subsequent processing, for -example, @w{@samp{lziprecover -cd rec*@var{file}.lz > recovered_data}}, -processes the files in the correct order. The number of digits used in -the names varies depending on the number of members in @samp{@var{file}}. +The names of the files produced are in the form @samp{rec01@var{file}}, +@samp{rec02@var{file}}, etc, and are designed so that the use of +wildcards in subsequent processing, for example, @w{@samp{lziprecover +-cd rec*@var{file} > recovered_data}}, processes the files in the +correct order. The number of digits used in the names varies depending +on the number of members in @samp{@var{file}}. @item -t @itemx --test @@ -282,17 +285,26 @@ caused lziprecover to panic. @chapter Repairing files @cindex repairing files -Lziprecover is usually able to repair files with small errors (up to one -byte error per member). The error may be located anywhere in the file -except in the header (first 6 bytes of each member) or in the -@samp{Member size} field of the trailer (last 8 bytes of each member). -This makes lzip files resistant to bit-flip, one of the most common -forms of data corruption. +Lziprecover can repair perfectly most files with small errors (up to one +single-byte error per member), without the need of any extra redundance +at all. If the reparation is successful, the repaired file will be +identical bit for bit to the original. + +The error may be located anywhere in the file except in the header +(first 6 bytes of each member) or in the @samp{Member size} field of the +trailer (last 8 bytes of each member). This makes lzip files resistant +to bit-flip, one of the most common forms of data corruption. Bit-flip happens when one bit in the file is changed from 0 to 1 or vice versa. It may be caused by bad RAM or even by natural radiation. I have seen a case of bit-flip in a file stored on an USB flash drive. +One byte may seem small, but most file corruptions not produced by I/O +errors just affect one byte, or even one bit, of the file. Also, unlike +magnetic media, where errors usually affect a whole sector, solid-state +storage devices tend to produce single-byte errors, making of lzip the +perfect format for data stored on such devices. + Repairing a file can take some time. Small files or files with the error located near the beginning can be repaired in a few seconds. But repairing a large file compressed with a large dictionary size and with @@ -309,7 +321,7 @@ repairs more efficiently the worst errors. If you have several copies of a file but all of them are too damaged to repair them (@pxref{Repairing files}), lziprecover can try to produce a -correct file merging the good parts of the damaged copies. +correct file by merging the good parts of the damaged copies. The merge may succeed even if some copies of the file have all the headers and trailers damaged, as long as there is at least one copy of @@ -321,16 +333,16 @@ damaged in all copies), or are adjacent and the boundary can't be determined, or if the copies have too many damaged areas. All the copies must have the same size. If some of them have been -truncated and are therefore smaller than they should, you can extend -them to the correct size with the following command before merging them -with the other copies: +truncated and are therefore smaller than they should, they can be +extended to the correct size with the following command before merging +them with the other copies: @example ddrescue --extend-outfile= small_file.lz extended_file.lz @end example If some of the copies have got garbage data at the end and are therefore -larger than they should, you can reduce their sizes to the correct value +larger than they should, their sizes can be reduced to the correct value with the following command before merging them with the other copies: @example @@ -342,7 +354,19 @@ of them with one damaged area affecting 1 percent of the copy, the probability of obtaining a correct file is about 98 percent. With three such copies the probability rises to 99.97 percent. For large files (a few MB) with small errors (one sector damaged per copy), the probability -approaches 100 percent even with only two copies. +approaches 100 percent even with only two copies. (Supposing that the +errors are randomly located inside each copy). + + +@node File names +@chapter Names of the files produced by lziprecover +@cindex file names + +The name of the fixed file produced by @samp{--merge} and +@samp{--repair} is made by appending the string @samp{_fixed.lz} to the +original file name. If the original file name ends with one of the +extensions @samp{.tar.lz}, @samp{.lz} or @samp{.tlz}, the string +@samp{_fixed} is inserted before the extension. @node File format @@ -541,9 +565,9 @@ accesses. If it does, please, report it as a bug. Unzcrash really executes as a subprocess the shell command specified in the first non-option argument, and then writes the file specified in the second non-option argument to the standard input of the subprocess, -modifying the corresponding byte each time. Therefore you can use -unzcrash to test any decompressor (not only lzip), or even other decoder -programs with a suitable command line syntax. +modifying the corresponding byte each time. Therefore unzcrash can be +used to test any decompressor (not only lzip), or even other decoder +programs having a suitable command line syntax. The format for running unzcrash is: diff --git a/main.cc b/main.cc index 0bc2322..167e8c6 100644 --- a/main.cc +++ b/main.cc @@ -92,7 +92,14 @@ bool delete_output_on_interrupt = false; void show_help() { std::printf( "%s - Data recovery tool and decompressor for the lzip format.\n", Program_name ); - std::printf( "\nUsage: %s [options] [files]\n", invocation_name ); + std::printf( "Lziprecover can repair perfectly most files with small errors (up to one\n" + "single-byte error per member), without the need of any extra redundance\n" + "at all. Losing an entire archive just because of a corrupt byte near the\n" + "beginning is a thing of the past.\n" + "Lziprecover can also produce a correct file by merging the good parts of\n" + "two or more damaged copies, extract data from damaged files, decompress\n" + "files and test integrity of files.\n" + "\nUsage: %s [options] [files]\n", invocation_name ); std::printf( "\nOptions:\n" " -h, --help display this help and exit\n" " -V, --version output version information and exit\n" @@ -304,10 +311,12 @@ void close_and_set_permissions( const struct stat * const in_statsp ) std::string insert_fixed( std::string name ) { - if( name.size() > 4 && name.compare( name.size() - 4, 4, ".tlz" ) == 0 ) - name.insert( name.size() - 4, "_fixed" ); + if( name.size() > 7 && name.compare( name.size() - 7, 7, ".tar.lz" ) == 0 ) + name.insert( name.size() - 7, "_fixed" ); else if( name.size() > 3 && name.compare( name.size() - 3, 3, ".lz" ) == 0 ) name.insert( name.size() - 3, "_fixed" ); + else if( name.size() > 4 && name.compare( name.size() - 4, 4, ".tlz" ) == 0 ) + name.insert( name.size() - 4, "_fixed" ); else name += "_fixed.lz"; return name; } diff --git a/merge.cc b/merge.cc index 3cfa430..5f034eb 100644 --- a/merge.cc +++ b/merge.cc @@ -78,18 +78,22 @@ void combine( std::vector< Block > & block_vector, std::vector< Block > & bv ) // positions in 'block_vector' are absolute file positions. bool diff_member( const long long mpos, const long long msize, const std::vector< int > & infd_vector, - std::vector< Block > & block_vector ) + std::vector< Block > & block_vector, + std::vector< int > & color_vector ) { const int files = infd_vector.size(); const int buffer_size = 65536; uint8_t * const buffer1 = new uint8_t[buffer_size]; uint8_t * const buffer2 = new uint8_t[buffer_size]; + int next_color = 1; bool error = false; - for( int i1 = 0; i1 + 1 < files && !error; ++i1 ) + for( int i1 = 0; i1 < files && !error; ++i1 ) { for( int i2 = i1 + 1; i2 < files && !error; ++i2 ) { + if( color_vector[i1] != 0 && color_vector[i1] == color_vector[i2] ) + continue; std::vector< Block > bv; long long partial_pos = 0; const int fd1 = infd_vector[i1], fd2 = infd_vector[i2]; @@ -98,7 +102,7 @@ bool diff_member( const long long mpos, const long long msize, if( !safe_seek( fd1, mpos ) || !safe_seek( fd2, mpos ) ) { error = true; break; } - while( msize > partial_pos ) + while( partial_pos < msize ) { const int size = std::min( (long long)buffer_size, msize - partial_pos ); const int rd = readblock( fd1, buffer1, size ); @@ -133,21 +137,32 @@ bool diff_member( const long long mpos, const long long msize, Block b( mpos + begin, partial_pos - prev_equal - begin ); bv.push_back( b ); } + if( bv.empty() ) // members are identical, set to same color + { + if( color_vector[i1] == 0 ) + { + if( color_vector[i2] != 0 ) color_vector[i1] = color_vector[i2]; + else color_vector[i1] = color_vector[i2] = next_color++; + } + else if( color_vector[i2] == 0 ) color_vector[i2] = color_vector[i1]; + else internal_error( "different colors assigned to identical members." ); + } combine( block_vector, bv ); } + if( color_vector[i1] == 0 ) color_vector[i1] = next_color++; } delete[] buffer2; delete[] buffer1; return !error; } -int ipow( const unsigned base, const unsigned exponent ) +long ipow( const unsigned base, const unsigned exponent ) { - unsigned result = 1; + unsigned long result = 1; for( unsigned i = 0; i < exponent; ++i ) { - if( INT_MAX / base >= result ) result *= base; - else { result = INT_MAX; break; } + if( LONG_MAX / base >= result ) result *= base; + else { result = LONG_MAX; break; } } return result; } @@ -239,6 +254,116 @@ int open_input_files( const std::vector< std::string > & filenames, return -1; } + +bool try_merge_member( const long long mpos, const long long msize, + const std::vector< Block > & block_vector, + const std::vector< int > & color_vector, + const std::vector< int > & infd_vector, + const std::string & output_filename, + const int outfd, const int verbosity ) + { + const int blocks = block_vector.size(); + const int files = infd_vector.size(); + const long variations = ipow( files, blocks ); + if( variations >= LONG_MAX ) + { + if( files > 2 ) + show_error( "Too many damaged blocks. Try merging fewer files." ); + else + show_error( "Too many damaged blocks. Merging is not possible." ); + cleanup_and_fail( output_filename, outfd, 2 ); + } + int bi = 0; // block index + std::vector< int > file_idx( blocks, 0 ); // file to read each block from + + while( bi >= 0 ) + { + if( verbosity >= 1 ) + { + long var = 0; + for( int i = 0; i < blocks; ++i ) + var = ( var * files ) + file_idx[i]; + std::printf( "Trying variation %ld of %ld \r", var + 1, variations ); + std::fflush( stdout ); + } + while( bi < blocks ) + { + const int infd = infd_vector[file_idx[bi]]; + if( !safe_seek( infd, block_vector[bi].pos() ) || + !safe_seek( outfd, block_vector[bi].pos() ) || + !copy_file( infd, outfd, block_vector[bi].size() ) ) + cleanup_and_fail( output_filename, outfd, 1 ); + ++bi; + } + if( !safe_seek( outfd, mpos ) ) + cleanup_and_fail( output_filename, outfd, 1 ); + long long failure_pos = 0; + if( try_decompress_member( outfd, msize, &failure_pos ) ) return true; + while( bi > 0 && mpos + failure_pos < block_vector[bi-1].pos() ) --bi; + while( --bi >= 0 ) + { + while( ++file_idx[bi] < files ) + { + const int color = color_vector[file_idx[bi]]; + bool done = true; + for( int i = file_idx[bi] - 1; i >= 0; --i ) + if( color_vector[i] == color ) { done = false; break; } + if( done ) break; + } + if( file_idx[bi] < files ) break; + file_idx[bi] = 0; + } + } + return false; + } + + +bool try_merge_member1( const long long mpos, const long long msize, + const std::vector< Block > & block_vector, + const std::vector< int > & color_vector, + const std::vector< int > & infd_vector, + const std::string & output_filename, + const int outfd, const int verbosity ) + { + if( block_vector.size() != 1 || block_vector[0].size() <= 1 ) return false; + const long long pos = block_vector[0].pos(); + const long long size = block_vector[0].size(); + const int files = infd_vector.size(); + const int variations = files * ( files - 1 ); + uint8_t byte; + + for( int i1 = 0; i1 < files; ++i1 ) + for( int i2 = 0; i2 < files; ++i2 ) + { + if( i1 == i2 || color_vector[i1] == color_vector[i2] ) continue; + const int infd = infd_vector[i1]; + if( !safe_seek( infd, pos ) || + !safe_seek( infd_vector[i2], pos ) || + !safe_seek( outfd, pos ) || + !copy_file( infd_vector[i2], outfd, size ) ) + cleanup_and_fail( output_filename, outfd, 1 ); + const int var = ( i1 * ( files - 1 ) ) + i2 - ( i2 > i1 ) + 1; + for( long long i = 0; i < size; ++i ) + { + if( verbosity >= 1 ) + { + std::printf( "Trying variation %d of %d, position %lld \r", + var, variations, pos + i ); + std::fflush( stdout ); + } + if( !safe_seek( outfd, pos + i ) || + readblock( infd, &byte, 1 ) != 1 || + writeblock( outfd, &byte, 1 ) != 1 || + !safe_seek( outfd, mpos ) ) + cleanup_and_fail( output_filename, outfd, 1 ); + long long failure_pos = 0; + if( try_decompress_member( outfd, msize, &failure_pos ) ) return true; + if( mpos + failure_pos <= pos + i ) break; + } + } + return false; + } + } // end namespace @@ -327,7 +452,8 @@ int merge_files( const std::vector< std::string > & filenames, const long long msize = file_index.mblock( j ).size(); // vector of data blocks differing among the copies of the current member std::vector< Block > block_vector; - if( !diff_member( mpos, msize, infd_vector, block_vector ) || + std::vector< int > color_vector( files, 0 ); + if( !diff_member( mpos, msize, infd_vector, block_vector, color_vector ) || !safe_seek( outfd, mpos ) ) cleanup_and_fail( output_filename, outfd, 1 ); @@ -335,63 +461,33 @@ int merge_files( const std::vector< std::string > & filenames, { if( file_index.members() > 1 && try_decompress_member( outfd, msize ) ) continue; - show_error( "Input files are (partially) identical. Recovery is not possible." ); + show_error( "Input files are (partially) identical. Merging is not possible." ); cleanup_and_fail( output_filename, outfd, 2 ); } - const int size0 = block_vector[0].size(); - const bool single_block = ( block_vector.size() == 1 ); - if( ipow( files, block_vector.size() ) >= INT_MAX || - ( single_block && ipow( files, 2 ) >= INT_MAX / size0 ) ) - { show_error( "Input files are too damaged. Recovery is not possible." ); - cleanup_and_fail( output_filename, outfd, 2 ); } - - const int shifts = ( single_block && size0 > 1 ) ? size0 - 1 : 1; - if( single_block && size0 > 1 ) - { - Block b( block_vector[0].pos() + 1, size0 - 1 ); - block_vector[0].size( 1 ); - block_vector.push_back( b ); - } - if( verbosity >= 1 && file_index.members() > 1 ) { - std::printf( "Merging member %ld\n", j + 1 ); + std::printf( "Merging member %ld of %ld\n", + j + 1, (long)file_index.members() ); std::fflush( stdout ); } - const int base_variations = ipow( files, block_vector.size() ); - const int variations = base_variations * shifts; + bool done = false; - for( int var = 0; var < variations; ++var ) + if( file_index.members() > 1 || block_vector.size() > 1 ) { - if( verbosity >= 1 ) - { - std::printf( "Trying variation %d of %d \r", var + 1, variations ); - std::fflush( stdout ); - } - int tmp = var; - for( unsigned i = 0; i < block_vector.size(); ++i ) - { - const int infd = infd_vector[tmp % files]; - tmp /= files; - if( !safe_seek( infd, block_vector[i].pos() ) || - !safe_seek( outfd, block_vector[i].pos() ) || - !copy_file( infd, outfd, block_vector[i].size() ) ) - cleanup_and_fail( output_filename, outfd, 1 ); - } - if( !safe_seek( outfd, mpos ) ) - cleanup_and_fail( output_filename, outfd, 1 ); - if( try_decompress_member( outfd, msize ) ) - { done = true; break; } - if( var > 0 && var % base_variations == 0 ) - block_vector[0].shift( block_vector[1] ); + done = try_merge_member( mpos, msize, block_vector, color_vector, + infd_vector, output_filename, outfd, verbosity ); + if( !done && verbosity >= 1 ) std::fputs( "\n", stdout ); } - if( verbosity >= 1 ) std::printf( "\n" ); + if( !done ) + done = try_merge_member1( mpos, msize, block_vector, color_vector, + infd_vector, output_filename, outfd, verbosity ); + if( verbosity >= 1 ) std::fputs( "\n", stdout ); if( !done ) { if( verbosity >= 2 ) for( unsigned i = 0; i < block_vector.size(); ++i ) - std::fprintf( stderr, "area %2d from offset %6lld to %6lld\n", i + 1, + std::fprintf( stderr, "area %2d from position %6lld to %6lld\n", i + 1, block_vector[i].pos(), block_vector[i].end() - 1 ); show_error( "Some error areas overlap. Can't recover input file." ); cleanup_and_fail( output_filename, outfd, 2 ); @@ -404,6 +500,6 @@ int merge_files( const std::vector< std::string > & filenames, cleanup_and_fail( output_filename, -1, 1 ); } if( verbosity >= 1 ) - std::printf( "Input files merged successfully.\n" ); + std::fputs( "Input files merged successfully.\n", stdout ); return 0; } diff --git a/repair.cc b/repair.cc index ed611e1..54a4d89 100644 --- a/repair.cc +++ b/repair.cc @@ -71,8 +71,8 @@ int repair_file( const std::string & input_filename, if( verbosity >= 1 ) // damaged member found { - std::printf( "Repairing member %ld (failure pos = %llu)\n", - i + 1, mpos + failure_pos ); + std::printf( "Repairing member %ld of %ld (failure pos = %llu)\n", + i + 1, (long)file_index.members(), mpos + failure_pos ); std::fflush( stdout ); } uint8_t * const mbuffer = read_member( infd, mpos, msize ); @@ -80,7 +80,7 @@ int repair_file( const std::string & input_filename, cleanup_and_fail( output_filename, outfd, 1 ); long pos = failure_pos; bool done = false; - while( pos >= File_header::size && pos > failure_pos - 20000 && !done ) + while( pos >= File_header::size && pos > failure_pos - 40000 && !done ) { const long min_pos = std::max( (long)File_header::size, pos - 1000 ); const LZ_mtester * master = prepare_master( mbuffer, msize, min_pos - 16 ); @@ -118,7 +118,7 @@ int repair_file( const std::string & input_filename, delete master; } delete[] mbuffer; - if( verbosity >= 1 ) std::printf( "\n" ); + if( verbosity >= 1 ) std::fputs( "\n", stdout ); if( !done ) { show_error( "Can't repair input file. Error is probably larger than 1 byte." ); @@ -129,7 +129,7 @@ int repair_file( const std::string & input_filename, if( outfd < 0 ) { if( verbosity >= 1 ) - std::printf( "Input file has no errors. Recovery is not needed.\n" ); + std::fputs( "Input file has no errors. Recovery is not needed.\n", stdout ); return 0; } if( close( outfd ) != 0 ) @@ -138,6 +138,6 @@ int repair_file( const std::string & input_filename, cleanup_and_fail( output_filename, -1, 1 ); } if( verbosity >= 1 ) - std::printf( "Copy of input file repaired successfully.\n" ); + std::fputs( "Copy of input file repaired successfully.\n", stdout ); return 0; } diff --git a/split.cc b/split.cc index 91b6263..7b3f9fd 100644 --- a/split.cc +++ b/split.cc @@ -43,9 +43,8 @@ void first_filename( const std::string & input_filename, output_filename = input_filename; int b = output_filename.size(); while( b > 0 && output_filename[b-1] != '/' ) --b; - output_filename.insert( b, 1, '1' ); - if( max_digits > 1 ) output_filename.insert( b, max_digits - 1, '0' ); - output_filename.insert( b, "rec" ); + output_filename.insert( b, "rec1" ); + if( max_digits > 1 ) output_filename.insert( b + 3, max_digits - 1, '0' ); } diff --git a/testsuite/check.sh b/testsuite/check.sh index e2cad6f..ef60713 100755 --- a/testsuite/check.sh +++ b/testsuite/check.sh @@ -49,7 +49,7 @@ fail=0 # fox5_bad3.lz: [100-299] --> zeroed; # fox5_bad4.lz: [250-349] --> zeroed; # fox5_bad5.lz: [300-399] --> zeroed; -# test_bad1.lz: byte at offset 67 changed from 0x70 to 0x79 +# test_bad1.lz: byte at offset 66 changed from 0xA6 to 0x46 # test_bad2.lz: [ 34- 65] --> copy of bytes [ 68- 99] # test_bad3.lz: [ 512-1535] --> zeroed; [2560-3583] --> zeroed # test_bad4.lz: [3072-4095] --> random data; [4608-5631] --> zeroed @@ -180,6 +180,24 @@ cmp "${in_lz}" copy.lz || fail=1 cmp "${in_lz}" copy.lz || fail=1 printf . +cat "${bad1_lz}" "${in_lz}" "${bad1_lz}" "${bad1_lz}" > bad11.lz || framework_failure +cat "${bad1_lz}" "${in_lz}" "${bad2_lz}" "${in_lz}" > bad12.lz || framework_failure +cat "${bad2_lz}" "${in_lz}" "${bad2_lz}" "${bad2_lz}" > bad22.lz || framework_failure +cat "${in_lz}" "${in_lz}" "${in_lz}" "${in_lz}" > copy4.lz || framework_failure +"${LZIPRECOVER}" -mf -o out4.lz bad11.lz bad12.lz bad22.lz || fail=1 +cmp out4.lz copy4.lz || fail=1 +"${LZIPRECOVER}" -mf -o out4.lz bad11.lz bad22.lz bad12.lz || fail=1 +cmp out4.lz copy4.lz || fail=1 +"${LZIPRECOVER}" -mf -o out4.lz bad12.lz bad11.lz bad22.lz || fail=1 +cmp out4.lz copy4.lz || fail=1 +"${LZIPRECOVER}" -mf -o out4.lz bad12.lz bad22.lz bad11.lz || fail=1 +cmp out4.lz copy4.lz || fail=1 +"${LZIPRECOVER}" -mf -o out4.lz bad22.lz bad11.lz bad12.lz || fail=1 +cmp out4.lz copy4.lz || fail=1 +"${LZIPRECOVER}" -mf -o out4.lz bad22.lz bad12.lz bad11.lz || fail=1 +cmp out4.lz copy4.lz || fail=1 +printf . + for i in "${bad1_lz}" "${bad2_lz}" ; do for j in "${bad3_lz}" "${bad4_lz}" "${bad5_lz}" ; do "${LZIPRECOVER}" -mf -o copy.lz "${i}" "${j}" || fail=1 @@ -209,6 +227,24 @@ printf . cmp "${in_lz}" copy.lz || fail=1 printf . +cat "${bad3_lz}" "${bad4_lz}" "${bad5_lz}" "${in_lz}" > bad345.lz || framework_failure +cat "${bad4_lz}" "${bad5_lz}" "${bad3_lz}" "${in_lz}" > bad453.lz || framework_failure +cat "${bad5_lz}" "${bad3_lz}" "${bad4_lz}" "${in_lz}" > bad534.lz || framework_failure +cat "${in_lz}" "${in_lz}" "${in_lz}" "${in_lz}" > copy4.lz || framework_failure +"${LZIPRECOVER}" -mf -o out4.lz bad345.lz bad453.lz bad534.lz || fail=1 +cmp out4.lz copy4.lz || fail=1 +"${LZIPRECOVER}" -mf -o out4.lz bad345.lz bad534.lz bad453.lz || fail=1 +cmp out4.lz copy4.lz || fail=1 +"${LZIPRECOVER}" -mf -o out4.lz bad453.lz bad345.lz bad534.lz || fail=1 +cmp out4.lz copy4.lz || fail=1 +"${LZIPRECOVER}" -mf -o out4.lz bad453.lz bad534.lz bad345.lz || fail=1 +cmp out4.lz copy4.lz || fail=1 +"${LZIPRECOVER}" -mf -o out4.lz bad534.lz bad345.lz bad453.lz || fail=1 +cmp out4.lz copy4.lz || fail=1 +"${LZIPRECOVER}" -mf -o out4.lz bad534.lz bad453.lz bad345.lz || fail=1 +cmp out4.lz copy4.lz || fail=1 +printf . + rm -f copy.lz "${LZIPRECOVER}" -R -o copy.lz "${fox5_lz}" || fail=1 if [ $? = 0 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi @@ -220,6 +256,16 @@ cmp "${fox5_lz}" copy.lz || fail=1 cmp "${in_lz}" copy.lz || fail=1 printf . +cat "${f5b1_lz}" > copy.tar.lz || framework_failure +"${LZIPRECOVER}" -R copy.tar.lz || fail=1 +if [ $? = 0 ] && [ -e copy_fixed.tar.lz ] ; then printf . ; else printf - ; fail=1 ; fi +mv copy.tar.lz copy.lz || framework_failure +"${LZIPRECOVER}" -R copy.lz || fail=1 +if [ $? = 0 ] && [ -e copy_fixed.lz ] ; then printf . ; else printf - ; fail=1 ; fi +mv copy.lz copy.tlz || framework_failure +"${LZIPRECOVER}" -R copy.tlz || fail=1 +if [ $? = 0 ] && [ -e copy_fixed.tlz ] ; then printf . ; else printf - ; fail=1 ; fi + cat "${in_lz}" "${in_lz}" "${in_lz}" > copy || framework_failure printf "garbage" >> copy || fail=1 "${LZIPRECOVER}" -s -o copy.lz copy || fail=1 diff --git a/testsuite/test_bad1.lz b/testsuite/test_bad1.lz index 5f36fb7..d63dcbf 100644 Binary files a/testsuite/test_bad1.lz and b/testsuite/test_bad1.lz differ diff --git a/testsuite/unzcrash.cc b/testsuite/unzcrash.cc deleted file mode 100644 index 470727d..0000000 --- a/testsuite/unzcrash.cc +++ /dev/null @@ -1,352 +0,0 @@ -/* Unzcrash - Tests robustness of decompressors to corrupted data. - Inspired by unzcrash.c from Julian Seward's bzip2. - Copyright (C) 2008-2014 Antonio Diaz Diaz. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ -/* - Exit status: 0 for a normal exit, 1 for environmental problems - (file not found, invalid flags, I/O errors, etc), 2 to indicate a - corrupt or invalid input file, 3 for an internal consistency error - (eg, bug) which caused unzcrash to panic. -*/ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "../arg_parser.h" - -#if CHAR_BIT != 8 -#error "Environments where CHAR_BIT != 8 are not supported." -#endif - - -namespace { - -const char * const Program_name = "Unzcrash"; -const char * const program_name = "unzcrash"; -const char * const program_year = "2014"; -const char * invocation_name = 0; - -int verbosity = 0; - - -void show_help() - { - std::printf( "%s - Tests robustness of decompressors to corrupted data.\n", Program_name ); - std::printf( "\nUsage: %s [options] \"lzip -tv\" filename.lz\n", invocation_name ); - std::printf( "\nThis program reads the specified file and then repeatedly decompresses\n" - "it, increasing 256 times each byte of the compressed data, so as to test\n" - "all possible one-byte errors. This should not cause any invalid memory\n" - "accesses. If it does, please, report it as a bug.\n" - "\nOptions:\n" - " -h, --help display this help and exit\n" - " -V, --version output version information and exit\n" - " -b, --bits= test N-bit errors instead of full byte\n" - " -p, --position= first byte position to test [default 0]\n" - " -q, --quiet suppress all messages\n" - " -s, --size= number of byte positions to test [all]\n" - " -v, --verbose be verbose (a 2nd -v gives more)\n" - "Examples of : 1 1,2,3 1-4 1,3-5,8 1-3,5-8\n" - "\nExit status: 0 for a normal exit, 1 for environmental problems (file\n" - "not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n" - "invalid input file, 3 for an internal consistency error (eg, bug) which\n" - "caused unzcrash to panic.\n" - "\nReport bugs to lzip-bug@nongnu.org\n" - "Lziprecover home page: http://www.nongnu.org/lzip/lziprecover.html\n" ); - } - - -void show_version() - { - std::printf( "%s %s\n", program_name, PROGVERSION ); - std::printf( "Copyright (C) %s Antonio Diaz Diaz.\n", program_year ); - std::printf( "License GPLv2+: GNU GPL version 2 or later \n" - "This is free software: you are free to change and redistribute it.\n" - "There is NO WARRANTY, to the extent permitted by law.\n" ); - } - - -void show_error( const char * const msg, const int errcode = 0, - const bool help = false ) - { - if( verbosity >= 0 ) - { - if( msg && msg[0] ) - { - std::fprintf( stderr, "%s: %s", program_name, msg ); - if( errcode > 0 ) - std::fprintf( stderr, ": %s", std::strerror( errcode ) ); - std::fprintf( stderr, "\n" ); - } - if( help ) - std::fprintf( stderr, "Try '%s --help' for more information.\n", - invocation_name ); - } - } - - -void internal_error( const char * const msg ) - { - if( verbosity >= 0 ) - std::fprintf( stderr, "%s: internal error: %s\n", program_name, msg ); - std::exit( 3 ); - } - - -unsigned long long getnum( const char * const ptr, - const unsigned long long llimit, - const unsigned long long ulimit ) - { - char * tail; - errno = 0; - unsigned long long result = strtoull( ptr, &tail, 0 ); - if( tail == ptr ) - { - show_error( "Bad or missing numerical argument.", 0, true ); - std::exit( 1 ); - } - - if( !errno && tail[0] ) - { - int factor = ( tail[1] == 'i' ) ? 1024 : 1000; - int exponent = 0; - bool bad_multiplier = false; - switch( tail[0] ) - { - case ' ': break; - case 'Y': exponent = 8; break; - case 'Z': exponent = 7; break; - case 'E': exponent = 6; break; - case 'P': exponent = 5; break; - case 'T': exponent = 4; break; - case 'G': exponent = 3; break; - case 'M': exponent = 2; break; - case 'K': if( factor == 1024 ) exponent = 1; else bad_multiplier = true; - break; - case 'k': if( factor == 1000 ) exponent = 1; else bad_multiplier = true; - break; - default : bad_multiplier = true; - } - if( bad_multiplier ) - { - show_error( "Bad multiplier in numerical argument.", 0, true ); - std::exit( 1 ); - } - for( int i = 0; i < exponent; ++i ) - { - if( ulimit / factor >= result ) result *= factor; - else { errno = ERANGE; break; } - } - } - if( !errno && ( result < llimit || result > ulimit ) ) errno = ERANGE; - if( errno ) - { - show_error( "Numerical argument out of limits." ); - std::exit( 1 ); - } - return result; - } - - -class Bitset8 // 8 value bitset (1 to 8) - { - bool data[8]; - static bool valid_digit( const unsigned char ch ) - { return ( ch >= '1' && ch <= '8' ); } - -public: - Bitset8() { for( int i = 0; i < 8; ++i ) data[i] = true; } - - bool includes( const int i ) const - { return ( i >= 1 && i <= 8 && data[i-1] ); } - - // Recognized formats: 1 1,2,3 1-4 1,3-5,8 1-3,5-8 - bool parse( const char * p ) - { - for( int i = 0; i < 8; ++i ) data[i] = false; - while( true ) - { - const unsigned char ch1 = *p++; - if( !valid_digit( ch1 ) ) break; - if( *p != '-' ) data[ch1-'1'] = true; - else - { - ++p; - if( !valid_digit( *p ) || ch1 > *p ) break; - for( int c = ch1; c <= *p; ++c ) data[c-'1'] = true; - ++p; - } - if( *p == 0 ) return true; - if( *p == ',' ) ++p; else break; - } - show_error( "Invalid value or range." ); - return false; - } - - // number of N-bit errors per byte (N=0 to 8): 1 8 28 56 70 56 28 8 1 - void print() const - { - std::fflush( stderr ); - int c = 0; - for( int i = 0; i < 8; ++i ) if( data[i] ) ++c; - if( c == 8 ) std::printf( "Testing full byte.\n" ); - else if( c == 0 ) std::printf( "Nothing to test.\n" ); - else - { - std::printf( "Testing " ); - for( int i = 0; i < 8; ++i ) - if( data[i] ) - { - std::printf( "%d", i + 1 ); - if( --c ) std::printf( "," ); - } - std::printf( " bit errors.\n" ); - } - std::fflush( stdout ); - } - }; - - -int differing_bits( const uint8_t byte1, const uint8_t byte2 ) - { - int count = 0; - uint8_t dif = byte1 ^ byte2; - while( dif ) - { count += ( dif & 1 ); dif >>= 1; } - return count; - } - -} // end namespace - - -int main( const int argc, const char * const argv[] ) - { - enum { buffer_size = 3 << 20 }; - Bitset8 bits; // if Bitset8::parse not called test full byte - int pos = 0; - int max_size = buffer_size; - invocation_name = argv[0]; - - const Arg_parser::Option options[] = - { - { 'h', "help", Arg_parser::no }, - { 'b', "bits", Arg_parser::yes }, - { 'p', "position", Arg_parser::yes }, - { 'q', "quiet", Arg_parser::no }, - { 's', "size", Arg_parser::yes }, - { 'v', "verbose", Arg_parser::no }, - { 'V', "version", Arg_parser::no }, - { 0 , 0, Arg_parser::no } }; - - const Arg_parser parser( argc, argv, options ); - if( parser.error().size() ) // bad option - { show_error( parser.error().c_str(), 0, true ); return 1; } - - int argind = 0; - for( ; argind < parser.arguments(); ++argind ) - { - const int code = parser.code( argind ); - if( !code ) break; // no more options - const char * const arg = parser.argument( argind ).c_str(); - switch( code ) - { - case 'h': show_help(); return 0; - case 'b': if( !bits.parse( arg ) ) return 1; break; - case 'p': pos = getnum( arg, 0, buffer_size - 1 ); break; - case 'q': verbosity = -1; break; - case 's': max_size = getnum( arg, 1, buffer_size ); break; - case 'v': if( verbosity < 4 ) ++verbosity; break; - case 'V': show_version(); return 0; - default : internal_error( "uncaught option." ); - } - } // end process options - - if( argind + 2 != parser.arguments() ) - { - if( verbosity >= 0 ) - std::fprintf( stderr, "Usage: %s \"lzip -tv\" filename.lz\n", - invocation_name ); - return 1; - } - - FILE *f = std::fopen( parser.argument( argind + 1 ).c_str(), "rb" ); - if( !f ) - { - if( verbosity >= 0 ) - std::fprintf( stderr, "Can't open file '%s' for reading\n", - parser.argument( argind + 1 ).c_str() ); - return 1; - } - - uint8_t * const buffer = new uint8_t[buffer_size]; - const int size = std::fread( buffer, 1, buffer_size, f ); - if( size >= buffer_size ) - { - if( verbosity >= 0 ) - std::fprintf( stderr, "input file '%s' is too large.\n", - parser.argument( argind + 1 ).c_str() ); - return 2; - } - std::fclose( f ); - - f = popen( parser.argument( argind ).c_str(), "w" ); - if( !f ) - { show_error( "Can't open pipe", errno ); return 1; } - const int wr = std::fwrite( buffer, 1, size, f ); - if( wr != size || pclose( f ) != 0 ) - { - if( verbosity >= 0 ) - std::fprintf( stderr, "Could not run '%s' : %s.\n", - parser.argument( argind ).c_str(), std::strerror( errno ) ); - return 1; - } - - std::signal( SIGPIPE, SIG_IGN ); - if( verbosity >= 1 ) bits.print(); - - const int end = ( ( pos + max_size < size ) ? pos + max_size : size ); - for( int i = pos; i < end; ++i ) - { - if( verbosity >= 0 ) - std::fprintf( stderr, "byte %d\n", i ); - const uint8_t byte = buffer[i]; - for( int j = 0; j < 255; ++j ) - { - ++buffer[i]; - if( bits.includes( differing_bits( byte, buffer[i] ) ) ) - { - f = popen( parser.argument( argind ).c_str(), "w" ); - if( !f ) - { show_error( "Can't open pipe", errno ); return 1; } - std::fwrite( buffer, 1, size, f ); - if( pclose( f ) == 0 && verbosity >= 0 ) - std::fprintf( stderr, "0x%02X (0x%02X+0x%02X) passed the test\n", - buffer[i], byte, j + 1 ); - } - } - buffer[i] = byte; - } - - delete[] buffer; - return 0; - } diff --git a/unzcrash.cc b/unzcrash.cc new file mode 100644 index 0000000..0b44997 --- /dev/null +++ b/unzcrash.cc @@ -0,0 +1,355 @@ +/* Unzcrash - Tests robustness of decompressors to corrupted data. + Inspired by unzcrash.c from Julian Seward's bzip2. + Copyright (C) 2008-2014 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ +/* + Exit status: 0 for a normal exit, 1 for environmental problems + (file not found, invalid flags, I/O errors, etc), 2 to indicate a + corrupt or invalid input file, 3 for an internal consistency error + (eg, bug) which caused unzcrash to panic. +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "arg_parser.h" + +#if CHAR_BIT != 8 +#error "Environments where CHAR_BIT != 8 are not supported." +#endif + + +namespace { + +const char * const Program_name = "Unzcrash"; +const char * const program_name = "unzcrash"; +const char * const program_year = "2014"; +const char * invocation_name = 0; + +int verbosity = 0; + + +void show_help() + { + std::printf( "%s - Tests robustness of decompressors to corrupted data.\n", Program_name ); + std::printf( "\nUsage: %s [options] \"lzip -tv\" filename.lz\n", invocation_name ); + std::printf( "\nThis program reads the specified file and then repeatedly decompresses\n" + "it, increasing 256 times each byte of the compressed data, so as to test\n" + "all possible one-byte errors. This should not cause any invalid memory\n" + "accesses. If it does, please, report it as a bug.\n" + "\nOptions:\n" + " -h, --help display this help and exit\n" + " -V, --version output version information and exit\n" + " -b, --bits= test N-bit errors instead of full byte\n" + " -p, --position= first byte position to test [default 0]\n" + " -q, --quiet suppress all messages\n" + " -s, --size= number of byte positions to test [all]\n" + " -v, --verbose be verbose (a 2nd -v gives more)\n" + "Examples of : 1 1,2,3 1-4 1,3-5,8 1-3,5-8\n" + "\nExit status: 0 for a normal exit, 1 for environmental problems (file\n" + "not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n" + "invalid input file, 3 for an internal consistency error (eg, bug) which\n" + "caused unzcrash to panic.\n" + "\nReport bugs to lzip-bug@nongnu.org\n" + "Lziprecover home page: http://www.nongnu.org/lzip/lziprecover.html\n" ); + } + + +void show_version() + { + std::printf( "%s %s\n", program_name, PROGVERSION ); + std::printf( "Copyright (C) %s Antonio Diaz Diaz.\n", program_year ); + std::printf( "License GPLv2+: GNU GPL version 2 or later \n" + "This is free software: you are free to change and redistribute it.\n" + "There is NO WARRANTY, to the extent permitted by law.\n" ); + } + + +void show_error( const char * const msg, const int errcode = 0, + const bool help = false ) + { + if( verbosity >= 0 ) + { + if( msg && msg[0] ) + { + std::fprintf( stderr, "%s: %s", program_name, msg ); + if( errcode > 0 ) + std::fprintf( stderr, ": %s", std::strerror( errcode ) ); + std::fprintf( stderr, "\n" ); + } + if( help ) + std::fprintf( stderr, "Try '%s --help' for more information.\n", + invocation_name ); + } + } + + +void internal_error( const char * const msg ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: internal error: %s\n", program_name, msg ); + std::exit( 3 ); + } + + +unsigned long long getnum( const char * const ptr, + const unsigned long long llimit, + const unsigned long long ulimit ) + { + char * tail; + errno = 0; + unsigned long long result = strtoull( ptr, &tail, 0 ); + if( tail == ptr ) + { + show_error( "Bad or missing numerical argument.", 0, true ); + std::exit( 1 ); + } + + if( !errno && tail[0] ) + { + int factor = ( tail[1] == 'i' ) ? 1024 : 1000; + int exponent = 0; + bool bad_multiplier = false; + switch( tail[0] ) + { + case ' ': break; + case 'Y': exponent = 8; break; + case 'Z': exponent = 7; break; + case 'E': exponent = 6; break; + case 'P': exponent = 5; break; + case 'T': exponent = 4; break; + case 'G': exponent = 3; break; + case 'M': exponent = 2; break; + case 'K': if( factor == 1024 ) exponent = 1; else bad_multiplier = true; + break; + case 'k': if( factor == 1000 ) exponent = 1; else bad_multiplier = true; + break; + default : bad_multiplier = true; + } + if( bad_multiplier ) + { + show_error( "Bad multiplier in numerical argument.", 0, true ); + std::exit( 1 ); + } + for( int i = 0; i < exponent; ++i ) + { + if( ulimit / factor >= result ) result *= factor; + else { errno = ERANGE; break; } + } + } + if( !errno && ( result < llimit || result > ulimit ) ) errno = ERANGE; + if( errno ) + { + show_error( "Numerical argument out of limits." ); + std::exit( 1 ); + } + return result; + } + + +class Bitset8 // 8 value bitset (1 to 8) + { + bool data[8]; + static bool valid_digit( const unsigned char ch ) + { return ( ch >= '1' && ch <= '8' ); } + +public: + Bitset8() { for( int i = 0; i < 8; ++i ) data[i] = true; } + + bool includes( const int i ) const + { return ( i >= 1 && i <= 8 && data[i-1] ); } + + // Recognized formats: 1 1,2,3 1-4 1,3-5,8 1-3,5-8 + bool parse( const char * p ) + { + for( int i = 0; i < 8; ++i ) data[i] = false; + while( true ) + { + const unsigned char ch1 = *p++; + if( !valid_digit( ch1 ) ) break; + if( *p != '-' ) data[ch1-'1'] = true; + else + { + ++p; + if( !valid_digit( *p ) || ch1 > *p ) break; + for( int c = ch1; c <= *p; ++c ) data[c-'1'] = true; + ++p; + } + if( *p == 0 ) return true; + if( *p == ',' ) ++p; else break; + } + show_error( "Invalid value or range." ); + return false; + } + + // number of N-bit errors per byte (N=0 to 8): 1 8 28 56 70 56 28 8 1 + void print() const + { + std::fflush( stderr ); + int c = 0; + for( int i = 0; i < 8; ++i ) if( data[i] ) ++c; + if( c == 8 ) std::fputs( "Testing full byte.\n", stdout ); + else if( c == 0 ) std::fputs( "Nothing to test.\n", stdout ); + else + { + std::fputs( "Testing ", stdout ); + for( int i = 0; i < 8; ++i ) + if( data[i] ) + { + std::printf( "%d", i + 1 ); + if( --c ) std::fputs( ",", stdout ); + } + std::fputs( " bit errors.\n", stdout ); + } + std::fflush( stdout ); + } + }; + + +int differing_bits( const uint8_t byte1, const uint8_t byte2 ) + { + int count = 0; + uint8_t dif = byte1 ^ byte2; + while( dif ) + { count += ( dif & 1 ); dif >>= 1; } + return count; + } + +} // end namespace + + +int main( const int argc, const char * const argv[] ) + { + enum { buffer_size = 75 << 20 }; + Bitset8 bits; // if Bitset8::parse not called test full byte + int pos = 0; + int max_size = buffer_size; + invocation_name = argv[0]; + + const Arg_parser::Option options[] = + { + { 'h', "help", Arg_parser::no }, + { 'b', "bits", Arg_parser::yes }, + { 'p', "position", Arg_parser::yes }, + { 'q', "quiet", Arg_parser::no }, + { 's', "size", Arg_parser::yes }, + { 'v', "verbose", Arg_parser::no }, + { 'V', "version", Arg_parser::no }, + { 0 , 0, Arg_parser::no } }; + + const Arg_parser parser( argc, argv, options ); + if( parser.error().size() ) // bad option + { show_error( parser.error().c_str(), 0, true ); return 1; } + + int argind = 0; + for( ; argind < parser.arguments(); ++argind ) + { + const int code = parser.code( argind ); + if( !code ) break; // no more options + const char * const arg = parser.argument( argind ).c_str(); + switch( code ) + { + case 'h': show_help(); return 0; + case 'b': if( !bits.parse( arg ) ) return 1; break; + case 'p': pos = getnum( arg, 0, buffer_size - 1 ); break; + case 'q': verbosity = -1; break; + case 's': max_size = getnum( arg, 1, buffer_size ); break; + case 'v': if( verbosity < 4 ) ++verbosity; break; + case 'V': show_version(); return 0; + default : internal_error( "uncaught option." ); + } + } // end process options + + if( argind + 2 != parser.arguments() ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "Usage: %s \"lzip -tv\" filename.lz\n", + invocation_name ); + return 1; + } + + FILE *f = std::fopen( parser.argument( argind + 1 ).c_str(), "rb" ); + if( !f ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "Can't open file '%s' for reading\n", + parser.argument( argind + 1 ).c_str() ); + return 1; + } + + uint8_t * const buffer = new uint8_t[buffer_size]; + const int size = std::fread( buffer, 1, buffer_size, f ); + if( size >= buffer_size ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "input file '%s' is too large.\n", + parser.argument( argind + 1 ).c_str() ); + return 2; + } + std::fclose( f ); + + f = popen( parser.argument( argind ).c_str(), "w" ); + if( !f ) + { show_error( "Can't open pipe", errno ); return 1; } + const int wr = std::fwrite( buffer, 1, size, f ); + if( wr != size || pclose( f ) != 0 ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "Could not run '%s' : %s.\n", + parser.argument( argind ).c_str(), std::strerror( errno ) ); + return 1; + } + + std::signal( SIGPIPE, SIG_IGN ); + if( verbosity >= 1 ) bits.print(); + + const int end = ( ( pos + max_size < size ) ? pos + max_size : size ); + for( int i = pos; i < end; ++i ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "byte %d\n", i ); + const uint8_t byte = buffer[i]; + for( int j = 1; j < 256; ++j ) + { + ++buffer[i]; + if( bits.includes( differing_bits( byte, buffer[i] ) ) ) + { + if( verbosity >= 2 ) + std::fprintf( stderr, "0x%02X (0x%02X+0x%02X) ", + buffer[i], byte, j ); + f = popen( parser.argument( argind ).c_str(), "w" ); + if( !f ) + { show_error( "Can't open pipe", errno ); return 1; } + std::fwrite( buffer, 1, size, f ); + if( pclose( f ) == 0 && verbosity >= 0 ) + std::fprintf( stderr, "0x%02X (0x%02X+0x%02X) passed the test\n", + buffer[i], byte, j ); + } + } + buffer[i] = byte; + } + + delete[] buffer; + return 0; + } -- cgit v1.2.3