From 5a1def2b887dfc18a50032e8645df79a91ebeecd Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 7 Nov 2015 12:50:34 +0100 Subject: Merging upstream version 1.17~rc1. Signed-off-by: Daniel Baumann --- ChangeLog | 13 +++- INSTALL | 2 +- Makefile.in | 17 ++--- NEWS | 8 +++ README | 2 +- arg_parser.cc | 2 +- arg_parser.h | 2 +- block.cc | 33 +++++++++ block.h | 62 +++++++++++++++++ configure | 6 +- decoder.cc | 2 +- decoder.h | 2 +- doc/lziprecover.1 | 5 +- doc/lziprecover.info | 101 +++++++++++++++++++++------- doc/lziprecover.texi | 60 +++++++++++++++-- file_index.cc | 15 +---- file_index.h | 35 +--------- lzip.h | 11 +-- main.cc | 157 ++++++++++++++++++++++++++++++++++++------- merge.cc | 5 +- mtester.cc | 2 +- mtester.h | 2 +- range_dec.cc | 125 ++++++++-------------------------- repair.cc | 186 +++++++++++++++++++++++++++++++++++++++++++++++++-- split.cc | 5 +- testsuite/check.sh | 2 +- unzcrash.cc | 4 +- 27 files changed, 625 insertions(+), 241 deletions(-) create mode 100644 block.cc create mode 100644 block.h diff --git a/ChangeLog b/ChangeLog index 98719e2..d6529ef 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,7 +1,14 @@ +2015-01-21 Antonio Diaz Diaz + + * Version 1.17-rc1 released. + * repair.cc: Repair time has been reduced by 15%. + * Added new option '-y, --debug-delay'. + * Added new option '-z, --debug-repair'. + 2014-10-16 Antonio Diaz Diaz * Version 1.17-pre1 released. - * merge.cc: New block selection algorithm makes merge much faster. + * New block selection algorithm makes merge up to 100 times faster. * Makefile.in: Added new targets 'install*-compress'. * testsuite/unzcrash.cc: Moved to top directory. * Added chapter 'File names' to the manual. @@ -9,7 +16,7 @@ 2014-08-29 Antonio Diaz Diaz * Version 1.16 released. - * New class LZ_mtester makes repair much faster. + * New class LZ_mtester makes repair up to 10 times faster. * main.cc (close_and_set_permissions): Behave like 'cp -p'. * lziprecover.texinfo: Renamed to lziprecover.texi. * License changed to GPL version 2 or later. @@ -99,7 +106,7 @@ * testsuite/unzcrash.cc: Test all 1-byte errors. -Copyright (C) 2009-2014 Antonio Diaz Diaz. +Copyright (C) 2009-2015 Antonio Diaz Diaz. This file is a collection of facts, and thus it is not copyrightable, but just in case, you have unlimited permission to copy, distribute and diff --git a/INSTALL b/INSTALL index 4f27d5c..7ec0887 100644 --- a/INSTALL +++ b/INSTALL @@ -62,7 +62,7 @@ After running 'configure', you can run 'make' and 'make install' as explained above. -Copyright (C) 2009-2014 Antonio Diaz Diaz. +Copyright (C) 2009-2015 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute and modify it. diff --git a/Makefile.in b/Makefile.in index ae25dd6..d9c2033 100644 --- a/Makefile.in +++ b/Makefile.in @@ -6,8 +6,8 @@ INSTALL_DATA = $(INSTALL) -m 644 INSTALL_DIR = $(INSTALL) -d -m 755 SHELL = /bin/sh -objs = arg_parser.o file_index.o merge.o mtester.o range_dec.o repair.o \ - split.o decoder.o main.o +objs = arg_parser.o block.o file_index.o merge.o mtester.o range_dec.o \ + repair.o split.o decoder.o main.o unzobjs = arg_parser.o unzcrash.o @@ -36,14 +36,15 @@ unzcrash.o : unzcrash.cc $(objs) : Makefile arg_parser.o : arg_parser.h +block.o : block.h decoder.o : lzip.h decoder.h -file_index.o : lzip.h file_index.h -main.o : arg_parser.h lzip.h decoder.h -merge.o : lzip.h decoder.h file_index.h +file_index.o : lzip.h block.h file_index.h +main.o : arg_parser.h lzip.h decoder.h block.h +merge.o : lzip.h decoder.h block.h file_index.h mtester.o : lzip.h mtester.h -range_dec.o : lzip.h decoder.h file_index.h -repair.o : lzip.h file_index.h mtester.h -split.o : lzip.h +range_dec.o : lzip.h decoder.h block.h file_index.h +repair.o : lzip.h mtester.h block.h file_index.h +split.o : lzip.h block.h file_index.h unzcrash.o : arg_parser.h Makefile diff --git a/NEWS b/NEWS index da32c67..70fd3fd 100644 --- a/NEWS +++ b/NEWS @@ -6,6 +6,14 @@ magnitude faster depending on number of files and number of errors. Please, report as a bug any files correctly merged by lziprecover 1.16 that this version can't merge. +Repair time has been reduced by 15%. + +The new option "-y, --debug-delay", which finds the max error detection +delay in a given range of positions, has been added. + +The new option "-z, --debug-repair", which test repairs a one-byte error +at a given position, has been added. + The targets "install-compress", "install-strip-compress", "install-info-compress" and "install-man-compress" have been added to the Makefile. diff --git a/README b/README index c457365..d660810 100644 --- a/README +++ b/README @@ -75,7 +75,7 @@ unzcrash.c from Julian Seward's bzip2. Type 'make unzcrash' in the lziprecover source directory to build it. Then try 'unzcrash --help'. -Copyright (C) 2009-2014 Antonio Diaz Diaz. +Copyright (C) 2009-2015 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute and modify it. diff --git a/arg_parser.cc b/arg_parser.cc index 74f9298..55764bd 100644 --- a/arg_parser.cc +++ b/arg_parser.cc @@ -1,5 +1,5 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C++ version) - Copyright (C) 2006-2014 Antonio Diaz Diaz. + Copyright (C) 2006-2015 Antonio Diaz Diaz. This library is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/arg_parser.h b/arg_parser.h index d80c353..2e8731c 100644 --- a/arg_parser.h +++ b/arg_parser.h @@ -1,5 +1,5 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C++ version) - Copyright (C) 2006-2014 Antonio Diaz Diaz. + Copyright (C) 2006-2015 Antonio Diaz Diaz. This library is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/block.cc b/block.cc new file mode 100644 index 0000000..31e82c7 --- /dev/null +++ b/block.cc @@ -0,0 +1,33 @@ +/* Lziprecover - Data recovery tool for the lzip format + Copyright (C) 2009-2015 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include +#include + +#include "block.h" + + +Block Block::split( const long long pos ) + { + if( pos > pos_ && pos < end() ) + { + const Block b( pos_, pos - pos_ ); + pos_ = pos; size_ -= b.size_; + return b; + } + return Block( 0, 0 ); + } diff --git a/block.h b/block.h new file mode 100644 index 0000000..4b2ab39 --- /dev/null +++ b/block.h @@ -0,0 +1,62 @@ +/* Lziprecover - Data recovery tool for the lzip format + Copyright (C) 2009-2015 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#ifndef INT64_MAX +#define INT64_MAX 0x7FFFFFFFFFFFFFFFLL +#endif + + +class Block + { + long long pos_, size_; // pos + size <= INT64_MAX + +public: + Block( const long long p, const long long s ) : pos_( p ), size_( s ) {} + + long long pos() const { return pos_; } + long long size() const { return size_; } + long long end() const { return pos_ + size_; } + + void pos( const long long p ) { pos_ = p; } + void size( const long long s ) { size_ = s; } + + bool operator==( const Block & b ) const + { return pos_ == b.pos_ && size_ == b.size_; } + bool operator!=( const Block & b ) const + { return pos_ != b.pos_ || size_ != b.size_; } + + bool operator<( const Block & b ) const { return pos_ < b.pos_; } + + bool includes( const long long pos ) const + { return ( pos_ <= pos && end() > pos ); } + bool overlaps( const Block & b ) const + { return ( pos_ < b.end() && b.pos_ < end() ); } + + void shift( Block & b ) { ++size_; ++b.pos_; --b.size_; } + Block split( const long long pos ); + }; + + +// defined in range_dec.cc +int range_decompress( const std::string & input_filename, + const std::string & output_filename, + Block range, const int verbosity, const bool force, + const bool ignore, const bool to_stdout ); + +// defined in repair.cc +int debug_delay( const std::string & input_filename, Block range, + const int verbosity ); diff --git a/configure b/configure index 5cba27a..747b3a9 100755 --- a/configure +++ b/configure @@ -1,12 +1,12 @@ #! /bin/sh # configure script for Lziprecover - Data recovery tool for the lzip format -# Copyright (C) 2009-2014 Antonio Diaz Diaz. +# Copyright (C) 2009-2015 Antonio Diaz Diaz. # # This configure script is free software: you have unlimited permission # to copy, distribute and modify it. pkgname=lziprecover -pkgversion=1.17-pre1 +pkgversion=1.17-rc1 progname=lziprecover srctrigger=doc/${pkgname}.texi @@ -165,7 +165,7 @@ echo "LDFLAGS = ${LDFLAGS}" rm -f Makefile cat > Makefile << EOF # Makefile for Lziprecover - Data recovery tool for the lzip format -# Copyright (C) 2009-2014 Antonio Diaz Diaz. +# Copyright (C) 2009-2015 Antonio Diaz Diaz. # This file was generated automatically by configure. Do not edit. # # This Makefile is free software: you have unlimited permission diff --git a/decoder.cc b/decoder.cc index f7e8f54..59587e0 100644 --- a/decoder.cc +++ b/decoder.cc @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2014 Antonio Diaz Diaz. + Copyright (C) 2009-2015 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/decoder.h b/decoder.h index 740d7b2..6d8c919 100644 --- a/decoder.h +++ b/decoder.h @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2014 Antonio Diaz Diaz. + Copyright (C) 2009-2015 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/doc/lziprecover.1 b/doc/lziprecover.1 index 7eefb7a..c2f55d7 100644 --- a/doc/lziprecover.1 +++ b/doc/lziprecover.1 @@ -1,5 +1,5 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.46.1. -.TH LZIPRECOVER "1" "October 2014" "lziprecover 1.17-pre1" "User Commands" +.TH LZIPRECOVER "1" "January 2015" "lziprecover 1.17-rc1" "User Commands" .SH NAME lziprecover \- recovers data from damaged lzip files .SH SYNOPSIS @@ -7,6 +7,7 @@ lziprecover \- recovers data from damaged lzip files [\fI\,options\/\fR] [\fI\,files\/\fR] .SH DESCRIPTION Lziprecover \- Data recovery tool and decompressor for the lzip format. +.PP Lziprecover can repair perfectly most files with small errors (up to one single\-byte error per member), without the need of any extra redundance at all. Losing an entire archive just because of a corrupt byte near the @@ -76,7 +77,7 @@ Report bugs to lzip\-bug@nongnu.org .br Lziprecover home page: http://www.nongnu.org/lzip/lziprecover.html .SH COPYRIGHT -Copyright \(co 2014 Antonio Diaz Diaz. +Copyright \(co 2015 Antonio Diaz Diaz. License GPLv2+: GNU GPL version 2 or later .br This is free software: you are free to change and redistribute it. diff --git a/doc/lziprecover.info b/doc/lziprecover.info index 6c636e8..05c1196 100644 --- a/doc/lziprecover.info +++ b/doc/lziprecover.info @@ -12,12 +12,13 @@ File: lziprecover.info, Node: Top, Next: Introduction, Up: (dir) Lziprecover Manual ****************** -This manual is for Lziprecover (version 1.17-pre1, 16 October 2014). +This manual is for Lziprecover (version 1.17-rc1, 21 January 2015). * Menu: * Introduction:: Purpose and features of lziprecover * Invoking lziprecover:: Command line interface +* Data safety:: Protecting data from accidental loss * Repairing files:: Fixing bit-flip and similar errors * Merging files:: Fixing several damaged copies * File names:: Names of the files produced by lziprecover @@ -28,7 +29,7 @@ This manual is for Lziprecover (version 1.17-pre1, 16 October 2014). * Concept index:: Index of concepts - Copyright (C) 2009-2014 Antonio Diaz Diaz. + Copyright (C) 2009-2015 Antonio Diaz Diaz. This manual is free documentation: you have unlimited permission to copy, distribute and modify it. @@ -104,7 +105,7 @@ like lzip or lunzip. line of defense for the case where the backups are also damaged.  -File: lziprecover.info, Node: Invoking lziprecover, Next: Repairing files, Prev: Introduction, Up: Top +File: lziprecover.info, Node: Invoking lziprecover, Next: Data safety, Prev: Introduction, Up: Top 2 Invoking lziprecover ********************** @@ -252,20 +253,66 @@ invalid input file, 3 for an internal consistency error (eg, bug) which caused lziprecover to panic.  -File: lziprecover.info, Node: Repairing files, Next: Merging files, Prev: Invoking lziprecover, Up: Top +File: lziprecover.info, Node: Data safety, Next: Repairing files, Prev: Invoking lziprecover, Up: Top -3 Repairing files +3 Protecting data from accidental loss +************************************** + +There are 3 main types of data corruption that may cause data loss: +single-byte errors, multi-byte errors (generally affecting a whole +sector in a block device), and total device failure. + + Lziprecover protects natively against single-byte errors (*note +Repairing files::), as long as file integrity is checked frequently +enough that a second single-byte error does not develop in the same +member before the first one is repaired. + + Lziprecover also protects against multi-byte errors (*note Merging +files::), if at least one backup copy of the file is made. + + The only remedy for total device failure is storing backup copies in +separate media. + + How does lzip compare with gzip and bzip2 with respect to data +safety? Lets suppose that you made a backup copy of your valuable +scientific data, compressed it, and stored two copies on separate +media. Years later you notice that both copies are corrupt. + + If you compressed with gzip and both copies suffer any damage in the +data stream, even if it is just one altered bit, the original data can't +be recovered. + + If you used bzip2, and if the file is large enough to contain more +than one compressed data block (usually larger than 900 kB), and if no +block is damaged in both files, then the data can be manually recovered +by splitting the files with bzip2recover, verifying every block and then +copying the right blocks in the right order in another file. + + But if you used lzip, the data can be automatically recovered as +long as no byte is damaged in both files. + + Note that each error in a bzip2 file makes a whole block unusable, +but each error in a lzip file only affects the damaged bytes, making it +possible to recover a file with thousands of errors. + + +File: lziprecover.info, Node: Repairing files, Next: Merging files, Prev: Data safety, Up: Top + +4 Repairing files ***************** Lziprecover can repair perfectly most files with small errors (up to one single-byte error per member), without the need of any extra redundance at all. If the reparation is successful, the repaired file will be -identical bit for bit to the original. +identical bit for bit to the original. This makes lzip files resistant +to bit-flip, one of the most common forms of data corruption. The error may be located anywhere in the file except in the header (first 6 bytes of each member) or in the 'Member size' field of the -trailer (last 8 bytes of each member). This makes lzip files resistant -to bit-flip, one of the most common forms of data corruption. +trailer (last 8 bytes of each member). If the error is in the header it +can be easily repaired with a text editor like GNU Moe (*note File +format::). If the error is in the member size, it is enough to ignore +the message about 'bad member size' when decompressing. Bit-flip happens when one bit in the file is changed from 0 to 1 or vice versa. It may be caused by bad RAM or even by natural radiation. I @@ -289,7 +336,7 @@ lziprecover repairs more efficiently the worst errors.  File: lziprecover.info, Node: Merging files, Next: File names, Prev: Repairing files, Up: Top -4 Merging files +5 Merging files *************** If you have several copies of a file but all of them are too damaged to @@ -330,7 +377,7 @@ errors are randomly located inside each copy).  File: lziprecover.info, Node: File names, Next: File format, Prev: Merging files, Up: Top -5 Names of the files produced by lziprecover +6 Names of the files produced by lziprecover ******************************************** The name of the fixed file produced by '--merge' and '--repair' is made @@ -341,7 +388,7 @@ original file name ends with one of the extensions '.tar.lz', '.lz' or  File: lziprecover.info, Node: File format, Next: Examples, Prev: File names, Up: Top -6 File format +7 File format ************* Perfection is reached, not when there is no longer anything to add, but @@ -414,7 +461,7 @@ additional information before, between, or after them.  File: lziprecover.info, Node: Examples, Next: Unzcrash, Prev: File format, Up: Top -7 A small tutorial with examples +8 A small tutorial with examples ******************************** Example 1: Restore a regular file from its compressed version @@ -485,7 +532,7 @@ correct file produced is saved in 'big_db_00001.lz'.  File: lziprecover.info, Node: Unzcrash, Next: Problems, Prev: Examples, Up: Top -8 Testing the robustness of decompressors +9 Testing the robustness of decompressors ***************************************** The lziprecover package also includes unzcrash, a program written to @@ -562,8 +609,8 @@ caused unzcrash to panic.  File: lziprecover.info, Node: Problems, Next: Concept index, Prev: Unzcrash, Up: Top -9 Reporting bugs -**************** +10 Reporting bugs +***************** There are probably bugs in lziprecover. There are certainly errors and omissions in this manual. If you report them, they will get fixed. If @@ -584,6 +631,7 @@ Concept index * Menu: * bugs: Problems. (line 6) +* data safety: Data safety. (line 6) * examples: Examples. (line 6) * file format: File format. (line 6) * file names: File names. (line 6) @@ -598,17 +646,18 @@ Concept index  Tag Table: Node: Top231 -Node: Introduction1153 -Node: Invoking lziprecover4249 -Node: Repairing files9686 -Node: Merging files11371 -Node: File names13212 -Node: File format13676 -Node: Examples16183 -Ref: ddrescue-example17384 -Node: Unzcrash18493 -Node: Problems21047 -Node: Concept index21597 +Node: Introduction1216 +Node: Invoking lziprecover4312 +Node: Data safety9745 +Node: Repairing files11661 +Node: Merging files13563 +Node: File names15404 +Node: File format15868 +Node: Examples18375 +Ref: ddrescue-example19576 +Node: Unzcrash20685 +Node: Problems23239 +Node: Concept index23791  End Tag Table diff --git a/doc/lziprecover.texi b/doc/lziprecover.texi index 08d4312..85f0385 100644 --- a/doc/lziprecover.texi +++ b/doc/lziprecover.texi @@ -6,8 +6,8 @@ @finalout @c %**end of header -@set UPDATED 16 October 2014 -@set VERSION 1.17-pre1 +@set UPDATED 21 January 2015 +@set VERSION 1.17-rc1 @dircategory Data Compression @direntry @@ -37,6 +37,7 @@ This manual is for Lziprecover (version @value{VERSION}, @value{UPDATED}). @menu * Introduction:: Purpose and features of lziprecover * Invoking lziprecover:: Command line interface +* Data safety:: Protecting data from accidental loss * Repairing files:: Fixing bit-flip and similar errors * Merging files:: Fixing several damaged copies * File names:: Names of the files produced by lziprecover @@ -48,7 +49,7 @@ This manual is for Lziprecover (version @value{VERSION}, @value{UPDATED}). @end menu @sp 1 -Copyright @copyright{} 2009-2014 Antonio Diaz Diaz. +Copyright @copyright{} 2009-2015 Antonio Diaz Diaz. This manual is free documentation: you have unlimited permission to copy, distribute and modify it. @@ -281,6 +282,48 @@ invalid input file, 3 for an internal consistency error (eg, bug) which caused lziprecover to panic. +@node Data safety +@chapter Protecting data from accidental loss +@cindex data safety + +There are 3 main types of data corruption that may cause data loss: +single-byte errors, multi-byte errors (generally affecting a whole +sector in a block device), and total device failure. + +Lziprecover protects natively against single-byte errors +(@pxref{Repairing files}), as long as file integrity is checked +frequently enough that a second single-byte error does not develop in +the same member before the first one is repaired. + +Lziprecover also protects against multi-byte errors (@pxref{Merging +files}), if at least one backup copy of the file is made. + +The only remedy for total device failure is storing backup copies in +separate media. + +How does lzip compare with gzip and bzip2 with respect to data safety? +Lets suppose that you made a backup copy of your valuable scientific +data, compressed it, and stored two copies on separate media. Years +later you notice that both copies are corrupt. + +If you compressed with gzip and both copies suffer any damage in the +data stream, even if it is just one altered bit, the original data can't +be recovered. + +If you used bzip2, and if the file is large enough to contain more than +one compressed data block (usually larger than 900 kB), and if no block +is damaged in both files, then the data can be manually recovered by +splitting the files with bzip2recover, verifying every block and then +copying the right blocks in the right order in another file. + +But if you used lzip, the data can be automatically recovered as long as +no byte is damaged in both files. + +Note that each error in a bzip2 file makes a whole block unusable, but +each error in a lzip file only affects the damaged bytes, making it +possible to recover a file with thousands of errors. + + @node Repairing files @chapter Repairing files @cindex repairing files @@ -288,12 +331,15 @@ caused lziprecover to panic. Lziprecover can repair perfectly most files with small errors (up to one single-byte error per member), without the need of any extra redundance at all. If the reparation is successful, the repaired file will be -identical bit for bit to the original. +identical bit for bit to the original. This makes lzip files resistant +to bit-flip, one of the most common forms of data corruption. The error may be located anywhere in the file except in the header (first 6 bytes of each member) or in the @samp{Member size} field of the -trailer (last 8 bytes of each member). This makes lzip files resistant -to bit-flip, one of the most common forms of data corruption. +trailer (last 8 bytes of each member). If the error is in the header it +can be easily repaired with a text editor like GNU Moe (@pxref{File +format}). If the error is in the member size, it is enough to ignore the +message about @samp{bad member size} when decompressing. Bit-flip happens when one bit in the file is changed from 0 to 1 or vice versa. It may be caused by bad RAM or even by natural radiation. I have @@ -641,7 +687,7 @@ for all eternity, if not longer. If you find a bug in lziprecover, please send electronic mail to @email{lzip-bug@@nongnu.org}. Include the version number, which you can -find by running @w{@samp{lziprecover --version}}. +find by running @w{@code{lziprecover --version}}. @node Concept index diff --git a/file_index.cc b/file_index.cc index f7e1cd0..de685a3 100644 --- a/file_index.cc +++ b/file_index.cc @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2014 Antonio Diaz Diaz. + Copyright (C) 2009-2015 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -25,6 +25,7 @@ #include #include "lzip.h" +#include "block.h" #include "file_index.h" @@ -37,18 +38,6 @@ int seek_read( const int fd, uint8_t * const buf, const int size, } -Block Block::split( const long long pos ) - { - if( pos > pos_ && pos < end() ) - { - const Block b( pos_, pos - pos_ ); - pos_ = pos; size_ -= b.size_; - return b; - } - return Block( 0, 0 ); - } - - void File_index::set_errno_error( const char * const msg ) { error_ = msg; error_ += std::strerror( errno ); error_ += '.'; diff --git a/file_index.h b/file_index.h index cf42d83..eff1157 100644 --- a/file_index.h +++ b/file_index.h @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2014 Antonio Diaz Diaz. + Copyright (C) 2009-2015 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -15,39 +15,6 @@ along with this program. If not, see . */ -#ifndef INT64_MAX -#define INT64_MAX 0x7FFFFFFFFFFFFFFFLL -#endif - - -class Block - { - long long pos_, size_; // pos + size <= INT64_MAX - -public: - Block( const long long p, const long long s ) : pos_( p ), size_( s ) {} - - long long pos() const { return pos_; } - long long size() const { return size_; } - long long end() const { return pos_ + size_; } - - void pos( const long long p ) { pos_ = p; } - void size( const long long s ) { size_ = s; } - - bool operator==( const Block & b ) const - { return pos_ == b.pos_ && size_ == b.size_; } - bool operator!=( const Block & b ) const - { return pos_ != b.pos_ || size_ != b.size_; } - - bool operator<( const Block & b ) const { return pos_ < b.pos_; } - - bool overlaps( const Block & b ) const - { return ( pos_ < b.end() && b.pos_ < end() ); } - void shift( Block & b ) { ++size_; ++b.pos_; --b.size_; } - Block split( const long long pos ); - }; - - class File_index { struct Member diff --git a/lzip.h b/lzip.h index 61c0445..58de55f 100644 --- a/lzip.h +++ b/lzip.h @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2014 Antonio Diaz Diaz. + Copyright (C) 2009-2015 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -309,18 +309,19 @@ int merge_files( const std::vector< std::string > & filenames, const bool force ); // defined in range_dec.cc +const char * format_num( unsigned long long num, + unsigned long long limit = -1ULL, + const int set_prefix = 0 ); bool safe_seek( const int fd, const long long pos ); int list_files( const std::vector< std::string > & filenames, const int verbosity ); -int range_decompress( const std::string & input_filename, - const std::string & default_output_filename, - const std::string & range_string, const int verbosity, - const bool force, const bool ignore, const bool to_stdout ); // defined in repair.cc int repair_file( const std::string & input_filename, const std::string & output_filename, const int verbosity, const bool force ); +int debug_repair( const std::string & input_filename, const long long bad_pos, + const int verbosity, const uint8_t bad_value ); // defined in split.cc int split_file( const std::string & input_filename, diff --git a/main.cc b/main.cc index 167e8c6..192f32b 100644 --- a/main.cc +++ b/main.cc @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2014 Antonio Diaz Diaz. + Copyright (C) 2009-2015 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -55,6 +55,7 @@ #include "arg_parser.h" #include "lzip.h" #include "decoder.h" +#include "block.h" #ifndef O_BINARY #define O_BINARY 0 @@ -69,7 +70,7 @@ namespace { const char * const Program_name = "Lziprecover"; const char * const program_name = "lziprecover"; -const char * const program_year = "2014"; +const char * const program_year = "2015"; const char * invocation_name = 0; struct { const char * from; const char * to; } const known_extensions[] = { @@ -77,8 +78,8 @@ struct { const char * from; const char * to; } const known_extensions[] = { { ".tlz", ".tar" }, { 0, 0 } }; -enum Mode { m_none, m_decompress, m_list, m_merge, m_range, m_repair, - m_split, m_test }; +enum Mode { m_none, m_debug_delay, m_debug_repair, m_decompress, m_list, + m_merge, m_range_dec, m_repair, m_split, m_test }; std::string output_filename; int outfd = -1; @@ -92,7 +93,7 @@ bool delete_output_on_interrupt = false; void show_help() { std::printf( "%s - Data recovery tool and decompressor for the lzip format.\n", Program_name ); - std::printf( "Lziprecover can repair perfectly most files with small errors (up to one\n" + std::printf( "\nLziprecover can repair perfectly most files with small errors (up to one\n" "single-byte error per member), without the need of any extra redundance\n" "at all. Losing an entire archive just because of a corrupt byte near the\n" "beginning is a thing of the past.\n" @@ -116,8 +117,13 @@ void show_help() " -R, --repair try to repair a small error in file\n" " -s, --split split multi-member file in single-member files\n" " -t, --test test compressed file integrity\n" - " -v, --verbose be verbose (a 2nd -v gives more)\n" - "Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n" + " -v, --verbose be verbose (a 2nd -v gives more)\n" ); + if( verbosity >= 1 ) + { + std::printf( " -y, --debug-delay= find max error detection delay in \n" + " -z, --debug-repair=, test repair one-byte error at \n" ); + } + std::printf( "Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n" "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n" "\nExit status: 0 for a normal exit, 1 for environmental problems (file\n" "not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n" @@ -141,22 +147,109 @@ void show_version() void show_header( const File_header & header ) { - const char * const prefix[8] = - { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" }; - enum { factor = 1024 }; - const char * p = ""; - const char * np = " "; - unsigned num = header.dictionary_size(); - bool exact = ( num % factor == 0 ); - - for( int i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i ) - { num /= factor; if( num % factor != 0 ) exact = false; - p = prefix[i]; np = ""; } - std::fprintf( stderr, "dictionary size %s%4u %sB. ", np, num, p ); + if( verbosity >= 3 ) + { + const char * const prefix[8] = + { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" }; + enum { factor = 1024 }; + const char * p = ""; + const char * np = " "; + unsigned num = header.dictionary_size(); + bool exact = ( num % factor == 0 ); + + for( int i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i ) + { num /= factor; if( num % factor != 0 ) exact = false; + p = prefix[i]; np = ""; } + std::fprintf( stderr, "dictionary size %s%4u %sB. ", np, num, p ); + } } namespace { +// Returns the number of chars read, or 0 if error. +// +int parse_long_long( const char * const ptr, long long & value ) + { + char * tail; + errno = 0; + value = strtoll( ptr, &tail, 0 ); + if( tail == ptr || errno || value < 0 ) return 0; + int c = tail - ptr; + + if( ptr[c] ) + { + const int factor = ( ptr[c+1] == 'i' ) ? 1024 : 1000; + int exponent = 0; + switch( ptr[c] ) + { + case 'Y': exponent = 8; break; + case 'Z': exponent = 7; break; + case 'E': exponent = 6; break; + case 'P': exponent = 5; break; + case 'T': exponent = 4; break; + case 'G': exponent = 3; break; + case 'M': exponent = 2; break; + case 'K': if( factor == 1024 ) exponent = 1; else return 0; break; + case 'k': if( factor == 1000 ) exponent = 1; else return 0; break; + } + if( exponent > 0 ) + { + ++c; + if( ptr[c] == 'i' ) { ++c; if( value ) format_num( 0, 0, -1 ); } + if( ptr[c] == 'B' ) ++c; + for( int i = 0; i < exponent; ++i ) + { + if( INT64_MAX / factor >= value ) value *= factor; + else return 0; + } + } + } + return c; + } + + +// Recognized formats: - , +// +void parse_range( const char * const ptr, Block & range ) + { + long long value = 0; + int c = parse_long_long( ptr, value ); // pos + if( c && value >= 0 && value < INT64_MAX && + ( ptr[c] == 0 || ptr[c] == ',' || ptr[c] == '-' ) ) + { + range.pos( value ); + if( ptr[c] == 0 ) { range.size( INT64_MAX - value ); return; } + const bool issize = ( ptr[c] == ',' ); + c = parse_long_long( ptr + c + 1, value ); // size + if( c && value > 0 && ( issize || value > range.pos() ) ) + { + if( !issize ) value -= range.pos(); + if( INT64_MAX - range.pos() >= value ) { range.size( value ); return; } + } + } + show_error( "Bad decompression range.", 0, true ); + std::exit( 1 ); + } + + +// Recognized format: , +// +void parse_pos_value( const char * const ptr, long long & pos, uint8_t & value ) + { + long long val = 0; + int c = parse_long_long( ptr, val ); // pos + if( c && val >= 0 && val < INT64_MAX && ptr[c] == ',' ) + { + pos = val; + c = parse_long_long( ptr + c + 1, val ); // value + if( c && val >= 0 && val < 256 ) + { value = val; return; } + } + show_error( "Bad file position or byte value.", 0, true ); + std::exit( 1 ); + } + + void one_file( const int files ) { if( files != 1 ) @@ -400,7 +493,7 @@ int decompress( const int infd, const Pretty_print & pp, const bool testing ) { pp( "Invalid dictionary size in member header." ); retval = 2; break; } if( verbosity >= 2 || ( verbosity == 1 && first_member ) ) - { pp(); if( verbosity >= 3 ) show_header( header ); } + { pp(); show_header( header ); } LZ_decoder decoder( header, rdec, outfd ); const int result = decoder.decode_member( pp ); @@ -504,12 +597,14 @@ void internal_error( const char * const msg ) int main( const int argc, const char * const argv[] ) { + Block range( 0, 0 ); + long long bad_pos = 0; std::string input_filename; std::string default_output_filename; - std::string range_string; std::vector< std::string > filenames; int infd = -1; Mode program_mode = m_none; + uint8_t bad_value = 0; bool force = false; bool ignore = false; bool keep_input_files = false; @@ -535,6 +630,8 @@ int main( const int argc, const char * const argv[] ) { 't', "test", Arg_parser::no }, { 'v', "verbose", Arg_parser::no }, { 'V', "version", Arg_parser::no }, + { 'y', "debug-delay", Arg_parser::yes }, + { 'z', "debug-repair", Arg_parser::yes }, { 0 , 0, Arg_parser::no } }; const Arg_parser parser( argc, argv, options ); @@ -551,8 +648,8 @@ int main( const int argc, const char * const argv[] ) { case 'c': to_stdout = true; break; case 'd': set_mode( program_mode, m_decompress ); break; - case 'D': set_mode( program_mode, m_range ); - range_string = arg; break; + case 'D': set_mode( program_mode, m_range_dec ); + parse_range( arg.c_str(), range ); break; case 'f': force = true; break; case 'h': show_help(); return 0; case 'i': ignore = true; break; @@ -567,6 +664,10 @@ int main( const int argc, const char * const argv[] ) case 't': set_mode( program_mode, m_test ); break; case 'v': if( verbosity < 4 ) ++verbosity; break; case 'V': show_version(); return 0; + case 'y': set_mode( program_mode, m_debug_delay ); + parse_range( arg.c_str(), range ); break; + case 'z': set_mode( program_mode, m_debug_repair ); + parse_pos_value( arg.c_str(), bad_pos, bad_value ); break; default : internal_error( "uncaught option." ); } } // end process options @@ -593,6 +694,12 @@ int main( const int argc, const char * const argv[] ) switch( program_mode ) { case m_none: internal_error( "invalid operation." ); break; + case m_debug_delay: + one_file( filenames.size() ); + return debug_delay( filenames[0], range, verbosity ); + case m_debug_repair: + one_file( filenames.size() ); + return debug_repair( filenames[0], bad_pos, verbosity, bad_value ); case m_decompress: break; case m_list: if( filenames.size() < 1 ) @@ -604,10 +711,10 @@ int main( const int argc, const char * const argv[] ) if( default_output_filename.empty() ) default_output_filename = insert_fixed( filenames[0] ); return merge_files( filenames, default_output_filename, verbosity, force ); - case m_range: + case m_range_dec: one_file( filenames.size() ); return range_decompress( filenames[0], default_output_filename, - range_string, verbosity, force, ignore, to_stdout ); + range, verbosity, force, ignore, to_stdout ); case m_repair: one_file( filenames.size() ); if( default_output_filename.empty() ) diff --git a/merge.cc b/merge.cc index 5f034eb..36c2faf 100644 --- a/merge.cc +++ b/merge.cc @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2014 Antonio Diaz Diaz. + Copyright (C) 2009-2015 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -30,6 +30,7 @@ #include "lzip.h" #include "decoder.h" +#include "block.h" #include "file_index.h" @@ -255,6 +256,7 @@ int open_input_files( const std::vector< std::string > & filenames, } + // merge block by block bool try_merge_member( const long long mpos, const long long msize, const std::vector< Block > & block_vector, const std::vector< int > & color_vector, @@ -318,6 +320,7 @@ bool try_merge_member( const long long mpos, const long long msize, } + // merge a single block split at every possible position bool try_merge_member1( const long long mpos, const long long msize, const std::vector< Block > & block_vector, const std::vector< int > & color_vector, diff --git a/mtester.cc b/mtester.cc index 58a56ed..c23d51b 100644 --- a/mtester.cc +++ b/mtester.cc @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2014 Antonio Diaz Diaz. + Copyright (C) 2009-2015 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/mtester.h b/mtester.h index 2896a44..f4da88f 100644 --- a/mtester.h +++ b/mtester.h @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2014 Antonio Diaz Diaz. + Copyright (C) 2009-2015 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/range_dec.cc b/range_dec.cc index 0fa9708..8b4d6e0 100644 --- a/range_dec.cc +++ b/range_dec.cc @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2014 Antonio Diaz Diaz. + Copyright (C) 2009-2015 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -30,101 +30,12 @@ #include "lzip.h" #include "decoder.h" +#include "block.h" #include "file_index.h" namespace { -const char * format_num( unsigned long long num, - unsigned long long limit = -1ULL, - const int set_prefix = 0 ) - { - const char * const si_prefix[8] = - { "k", "M", "G", "T", "P", "E", "Z", "Y" }; - const char * const binary_prefix[8] = - { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" }; - static bool si = true; - static char buf[32]; - - if( set_prefix ) si = ( set_prefix > 0 ); - const unsigned factor = ( si ? 1000 : 1024 ); - const char * const * prefix = ( si ? si_prefix : binary_prefix ); - const char * p = ""; - bool exact = ( num % factor == 0 ); - - for( int i = 0; i < 8 && ( num > limit || ( exact && num >= factor ) ); ++i ) - { num /= factor; if( num % factor != 0 ) exact = false; p = prefix[i]; } - snprintf( buf, sizeof buf, "%llu %s", num, p ); - return buf; - } - - -// Returns the number of chars read, or 0 if error. -// -int parse_long_long( const char * const ptr, long long & value ) - { - char * tail; - errno = 0; - value = strtoll( ptr, &tail, 0 ); - if( tail == ptr || errno || value < 0 ) return 0; - int c = tail - ptr; - - if( ptr[c] ) - { - const int factor = ( ptr[c+1] == 'i' ) ? 1024 : 1000; - int exponent = 0; - switch( ptr[c] ) - { - case 'Y': exponent = 8; break; - case 'Z': exponent = 7; break; - case 'E': exponent = 6; break; - case 'P': exponent = 5; break; - case 'T': exponent = 4; break; - case 'G': exponent = 3; break; - case 'M': exponent = 2; break; - case 'K': if( factor == 1024 ) exponent = 1; else return 0; break; - case 'k': if( factor == 1000 ) exponent = 1; else return 0; break; - } - if( exponent > 0 ) - { - ++c; - if( ptr[c] == 'i' ) { ++c; if( value ) format_num( 0, 0, -1 ); } - if( ptr[c] == 'B' ) ++c; - for( int i = 0; i < exponent; ++i ) - { - if( INT64_MAX / factor >= value ) value *= factor; - else return 0; - } - } - } - return c; - } - - -// Recognized formats: - , -// -void parse_range( const char * const ptr, Block & range ) - { - long long value = 0; - int c = parse_long_long( ptr, value ); // pos - if( c && value >= 0 && value < INT64_MAX && - ( ptr[c] == 0 || ptr[c] == ',' || ptr[c] == '-' ) ) - { - range.pos( value ); - if( ptr[c] == 0 ) { range.size( INT64_MAX - value ); return; } - const bool issize = ( ptr[c] == ',' ); - c = parse_long_long( ptr + c + 1, value ); // size - if( c && value > 0 && ( issize || value > range.pos() ) ) - { - if( !issize ) value -= range.pos(); - if( INT64_MAX - range.pos() >= value ) { range.size( value ); return; } - } - } - show_error( "Bad decompression range.", 0, true ); - std::exit( 1 ); - } - - int decompress_member( const int infd, const int outfd, const Pretty_print & pp, const unsigned long long mpos, @@ -221,6 +132,30 @@ int list_file( const char * const input_filename, const Pretty_print & pp ) } // end namespace +const char * format_num( unsigned long long num, + unsigned long long limit, + const int set_prefix ) + { + const char * const si_prefix[8] = + { "k", "M", "G", "T", "P", "E", "Z", "Y" }; + const char * const binary_prefix[8] = + { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" }; + static bool si = true; + static char buf[32]; + + if( set_prefix ) si = ( set_prefix > 0 ); + const unsigned factor = ( si ? 1000 : 1024 ); + const char * const * prefix = ( si ? si_prefix : binary_prefix ); + const char * p = ""; + bool exact = ( num % factor == 0 ); + + for( int i = 0; i < 8 && ( num > limit || ( exact && num >= factor ) ); ++i ) + { num /= factor; if( num % factor != 0 ) exact = false; p = prefix[i]; } + snprintf( buf, sizeof buf, "%llu %s", num, p ); + return buf; + } + + bool safe_seek( const int fd, const long long pos ) { if( lseek( fd, pos, SEEK_SET ) == pos ) return true; @@ -245,11 +180,9 @@ int list_files( const std::vector< std::string > & filenames, int range_decompress( const std::string & input_filename, const std::string & output_filename, - const std::string & range_string, const int verbosity, - const bool force, const bool ignore, const bool to_stdout ) + Block range, const int verbosity, const bool force, + const bool ignore, const bool to_stdout ) { - Block range( 0, 0 ); - parse_range( range_string.c_str(), range ); struct stat in_stats; const int infd = open_instream( input_filename.c_str(), &in_stats, true, true ); if( infd < 0 ) return 1; @@ -262,7 +195,7 @@ int range_decompress( const std::string & input_filename, if( range.end() > file_index.data_end() ) range.size( std::max( 0LL, file_index.data_end() - range.pos() ) ); if( range.size() <= 0 ) - { if( verbosity >= 1 ) pp( "Nothing to do." ); return 0; } + { if( verbosity >= 0 ) pp( "Nothing to do." ); return 0; } if( verbosity >= 1 ) { diff --git a/repair.cc b/repair.cc index 54a4d89..066b5bd 100644 --- a/repair.cc +++ b/repair.cc @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2014 Antonio Diaz Diaz. + Copyright (C) 2009-2015 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -29,9 +29,12 @@ #include #include "lzip.h" -#include "file_index.h" #include "mtester.h" +#include "block.h" +#include "file_index.h" + +namespace { int seek_write( const int fd, const uint8_t * const buf, const int size, const long long pos ) @@ -41,6 +44,8 @@ int seek_write( const int fd, const uint8_t * const buf, const int size, return 0; } +} // end namespace + int repair_file( const std::string & input_filename, const std::string & output_filename, const int verbosity, @@ -80,13 +85,13 @@ int repair_file( const std::string & input_filename, cleanup_and_fail( output_filename, outfd, 1 ); long pos = failure_pos; bool done = false; - while( pos >= File_header::size && pos > failure_pos - 40000 && !done ) + while( pos >= File_header::size && pos > failure_pos - 50000 && !done ) { - const long min_pos = std::max( (long)File_header::size, pos - 1000 ); + const long min_pos = std::max( (long)File_header::size, pos - 100 ); const LZ_mtester * master = prepare_master( mbuffer, msize, min_pos - 16 ); if( !master ) cleanup_and_fail( output_filename, outfd, 1 ); - for( ; pos >= min_pos && !done ; --pos ) + for( ; pos >= min_pos && !done; --pos ) { if( verbosity >= 1 ) { @@ -141,3 +146,174 @@ int repair_file( const std::string & input_filename, std::fputs( "Copy of input file repaired successfully.\n", stdout ); return 0; } + + +int debug_delay( const std::string & input_filename, Block range, + const int verbosity ) + { + struct stat in_stats; + const int infd = open_instream( input_filename.c_str(), &in_stats, true, true ); + if( infd < 0 ) return 1; + + Pretty_print pp( input_filename, verbosity ); + const File_index file_index( infd ); + if( file_index.retval() != 0 ) + { pp( file_index.error().c_str() ); return file_index.retval(); } + + if( range.end() > file_index.file_end() ) + range.size( std::max( 0LL, file_index.file_end() - range.pos() ) ); + if( range.size() <= 0 ) + { if( verbosity >= 0 ) pp( "Nothing to do." ); return 0; } + + for( long i = 0; i < file_index.members(); ++i ) + { + const Block & mb = file_index.mblock( i ); + if( !range.overlaps( mb ) ) continue; + const long long mpos = file_index.mblock( i ).pos(); + const long long msize = file_index.mblock( i ).size(); + if( verbosity >= 1 ) // damaged member found + { + std::printf( "Finding max delay in member %ld of %ld (member pos = %llu)\n", + i + 1, (long)file_index.members(), mpos ); + std::fflush( stdout ); + } + uint8_t * const mbuffer = read_member( infd, mpos, msize ); + if( !mbuffer ) + { show_error( "Can't read member." ); return 1; } + long pos = std::max( range.pos() - mpos, File_header::size + 1LL ); + const long end = std::min( range.end() - mpos, msize ); + long max_delay = 0; + while( pos < end ) + { + const LZ_mtester * master = prepare_master( mbuffer, msize, pos - 16 ); + if( !master ) + { show_error( "Can't prepare master." ); return 1; } + const long partial_end = std::min( pos + 100, end ); + for( ; pos < partial_end; ++pos ) + { + if( verbosity >= 1 ) + { + std::printf( "Delays in position %llu \r", mpos + pos ); + std::fflush( stdout ); + } + int value = -1; + for( int j = 0; j < 256; ++j ) + { + ++mbuffer[pos]; + if( j == 255 ) break; + long failure_pos; + if( test_member_rest( *master, &failure_pos ) ) continue; + const long delay = failure_pos - pos; + if( delay > max_delay ) { max_delay = delay; value = mbuffer[pos]; } + } + if( value >= 0 && verbosity >= 0 ) + { + std::printf( "New max delay %lu at position %llu (0x%02X)\n", + max_delay, mpos + pos, value ); + std::fflush( stdout ); + } + if( pos + max_delay >= msize ) { pos = end; break; } + } + delete master; + } + delete[] mbuffer; + if( verbosity >= 1 ) std::fputs( "\n", stdout ); + } + + if( verbosity >= 1 ) std::fputs( "Done.\n", stdout ); + return 0; + } + + +int debug_repair( const std::string & input_filename, const long long bad_pos, + const int verbosity, const uint8_t bad_value ) + { + struct stat in_stats; + const int infd = open_instream( input_filename.c_str(), &in_stats, true, true ); + if( infd < 0 ) return 1; + + Pretty_print pp( input_filename, verbosity ); + const File_index file_index( infd ); + if( file_index.retval() != 0 ) + { pp( file_index.error().c_str() ); return file_index.retval(); } + + long idx = 0; + for( ; idx < file_index.members(); ++idx ) + if( file_index.mblock( idx ).includes( bad_pos ) ) break; + if( idx >= file_index.members() ) + { if( verbosity >= 0 ) pp( "Nothing to do." ); return 0; } + + const long long mpos = file_index.mblock( idx ).pos(); + const long long msize = file_index.mblock( idx ).size(); + { + long long failure_pos = 0; + if( !safe_seek( infd, mpos ) ) + { show_error( "Can't seek to member." ); return 1; } + if( !try_decompress_member( infd, msize, &failure_pos ) ) + { + if( verbosity >= 0 ) + std::printf( "Member %ld of %ld already damaged (failure pos = %llu)\n", + idx + 1, (long)file_index.members(), mpos + failure_pos ); + return 1; + } + } + uint8_t * const mbuffer = read_member( infd, mpos, msize ); + if( !mbuffer ) + { show_error( "Can't read member." ); return 1; } + const uint8_t good_value = mbuffer[bad_pos]; + mbuffer[bad_pos] = bad_value; + long failure_pos = 0; + { + const LZ_mtester * master = prepare_master( mbuffer, msize, 0 ); + if( !master ) + { show_error( "Can't prepare master." ); return 1; } + if( test_member_rest( *master, &failure_pos ) ) + { + if( verbosity >= 1 ) + std::fputs( "Member decompressed with no errors.\n", stdout ); + return 0; + } + if( verbosity >= 1 ) + { + std::printf( "Test repairing member %ld of %ld\n" + " (damage pos = %llu (0x%02X->0x%02X), failure pos = %llu)\n", + idx + 1, (long)file_index.members(), mpos + bad_pos, + good_value, bad_value, mpos + failure_pos ); + std::fflush( stdout ); + } + } + long pos = failure_pos; + bool done = false; + while( pos >= File_header::size && pos > failure_pos - 50000 && !done ) + { + const long min_pos = std::max( (long)File_header::size, pos - 100 ); + const LZ_mtester * master = prepare_master( mbuffer, msize, min_pos - 16 ); + if( !master ) + { show_error( "Can't prepare master." ); return 1; } + for( ; pos >= min_pos && !done; --pos ) + { + if( verbosity >= 1 ) + { + std::printf( "Trying position %llu \r", mpos + pos ); + std::fflush( stdout ); + } + for( int j = 0; j < 256; ++j ) + { + ++mbuffer[pos]; + if( j == 255 ) break; + if( test_member_rest( *master ) ) { done = true; break; } + } + } + delete master; + } + delete[] mbuffer; + if( verbosity >= 1 ) std::fputs( "\n", stdout ); + if( !done ) + { + show_error( "Can't repair input file. There is a bug somewhere." ); + return 3; + } + if( verbosity >= 1 ) + std::fputs( "Member repaired successfully.\n", stdout ); + return 0; + } diff --git a/split.cc b/split.cc index 7b3f9fd..2ffb359 100644 --- a/split.cc +++ b/split.cc @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2014 Antonio Diaz Diaz. + Copyright (C) 2009-2015 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -28,6 +28,7 @@ #include #include "lzip.h" +#include "block.h" #include "file_index.h" @@ -81,7 +82,7 @@ bool verify_header( const File_header & header, const Pretty_print & pp ) // Search forward from 'pos' for "LZIP" (Boyer-Moore algorithm) -// Return pos of found string or 'pos+size' if not found. +// Returns pos of found string or 'pos+size' if not found. // int find_magic( const uint8_t * const buffer, const int pos, const int size ) { diff --git a/testsuite/check.sh b/testsuite/check.sh index ef60713..1df77a3 100755 --- a/testsuite/check.sh +++ b/testsuite/check.sh @@ -1,6 +1,6 @@ #! /bin/sh # check script for Lziprecover - Data recovery tool for the lzip format -# Copyright (C) 2009-2014 Antonio Diaz Diaz. +# Copyright (C) 2009-2015 Antonio Diaz Diaz. # # This script is free software: you have unlimited permission # to copy, distribute and modify it. diff --git a/unzcrash.cc b/unzcrash.cc index 0b44997..fd1c816 100644 --- a/unzcrash.cc +++ b/unzcrash.cc @@ -1,6 +1,6 @@ /* Unzcrash - Tests robustness of decompressors to corrupted data. Inspired by unzcrash.c from Julian Seward's bzip2. - Copyright (C) 2008-2014 Antonio Diaz Diaz. + Copyright (C) 2008-2015 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -44,7 +44,7 @@ namespace { const char * const Program_name = "Unzcrash"; const char * const program_name = "unzcrash"; -const char * const program_year = "2014"; +const char * const program_year = "2015"; const char * invocation_name = 0; int verbosity = 0; -- cgit v1.2.3