summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaniel Baumann <mail@daniel-baumann.ch>2015-11-07 11:50:27 +0000
committerDaniel Baumann <mail@daniel-baumann.ch>2015-11-07 11:50:27 +0000
commit16e805860b4a789ee2a239aaf1011188e0ba4a79 (patch)
treef2898eef69d3e6dfca0278db399460eaed095eb4
parentAdding upstream version 1.17~pre1. (diff)
downloadlziprecover-16e805860b4a789ee2a239aaf1011188e0ba4a79.tar.xz
lziprecover-16e805860b4a789ee2a239aaf1011188e0ba4a79.zip
Adding upstream version 1.17~rc1.upstream/1.17_rc1
Signed-off-by: Daniel Baumann <mail@daniel-baumann.ch>
-rw-r--r--ChangeLog13
-rw-r--r--INSTALL2
-rw-r--r--Makefile.in17
-rw-r--r--NEWS8
-rw-r--r--README2
-rw-r--r--arg_parser.cc2
-rw-r--r--arg_parser.h2
-rw-r--r--block.cc33
-rw-r--r--block.h62
-rwxr-xr-xconfigure6
-rw-r--r--decoder.cc2
-rw-r--r--decoder.h2
-rw-r--r--doc/lziprecover.15
-rw-r--r--doc/lziprecover.info101
-rw-r--r--doc/lziprecover.texi60
-rw-r--r--file_index.cc15
-rw-r--r--file_index.h35
-rw-r--r--lzip.h11
-rw-r--r--main.cc157
-rw-r--r--merge.cc5
-rw-r--r--mtester.cc2
-rw-r--r--mtester.h2
-rw-r--r--range_dec.cc125
-rw-r--r--repair.cc186
-rw-r--r--split.cc5
-rwxr-xr-xtestsuite/check.sh2
-rw-r--r--unzcrash.cc4
27 files changed, 625 insertions, 241 deletions
diff --git a/ChangeLog b/ChangeLog
index 98719e2..d6529ef 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,7 +1,14 @@
+2015-01-21 Antonio Diaz Diaz <antonio@gnu.org>
+
+ * Version 1.17-rc1 released.
+ * repair.cc: Repair time has been reduced by 15%.
+ * Added new option '-y, --debug-delay'.
+ * Added new option '-z, --debug-repair'.
+
2014-10-16 Antonio Diaz Diaz <antonio@gnu.org>
* Version 1.17-pre1 released.
- * merge.cc: New block selection algorithm makes merge much faster.
+ * New block selection algorithm makes merge up to 100 times faster.
* Makefile.in: Added new targets 'install*-compress'.
* testsuite/unzcrash.cc: Moved to top directory.
* Added chapter 'File names' to the manual.
@@ -9,7 +16,7 @@
2014-08-29 Antonio Diaz Diaz <antonio@gnu.org>
* Version 1.16 released.
- * New class LZ_mtester makes repair much faster.
+ * New class LZ_mtester makes repair up to 10 times faster.
* main.cc (close_and_set_permissions): Behave like 'cp -p'.
* lziprecover.texinfo: Renamed to lziprecover.texi.
* License changed to GPL version 2 or later.
@@ -99,7 +106,7 @@
* testsuite/unzcrash.cc: Test all 1-byte errors.
-Copyright (C) 2009-2014 Antonio Diaz Diaz.
+Copyright (C) 2009-2015 Antonio Diaz Diaz.
This file is a collection of facts, and thus it is not copyrightable,
but just in case, you have unlimited permission to copy, distribute and
diff --git a/INSTALL b/INSTALL
index 4f27d5c..7ec0887 100644
--- a/INSTALL
+++ b/INSTALL
@@ -62,7 +62,7 @@ After running 'configure', you can run 'make' and 'make install' as
explained above.
-Copyright (C) 2009-2014 Antonio Diaz Diaz.
+Copyright (C) 2009-2015 Antonio Diaz Diaz.
This file is free documentation: you have unlimited permission to copy,
distribute and modify it.
diff --git a/Makefile.in b/Makefile.in
index ae25dd6..d9c2033 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -6,8 +6,8 @@ INSTALL_DATA = $(INSTALL) -m 644
INSTALL_DIR = $(INSTALL) -d -m 755
SHELL = /bin/sh
-objs = arg_parser.o file_index.o merge.o mtester.o range_dec.o repair.o \
- split.o decoder.o main.o
+objs = arg_parser.o block.o file_index.o merge.o mtester.o range_dec.o \
+ repair.o split.o decoder.o main.o
unzobjs = arg_parser.o unzcrash.o
@@ -36,14 +36,15 @@ unzcrash.o : unzcrash.cc
$(objs) : Makefile
arg_parser.o : arg_parser.h
+block.o : block.h
decoder.o : lzip.h decoder.h
-file_index.o : lzip.h file_index.h
-main.o : arg_parser.h lzip.h decoder.h
-merge.o : lzip.h decoder.h file_index.h
+file_index.o : lzip.h block.h file_index.h
+main.o : arg_parser.h lzip.h decoder.h block.h
+merge.o : lzip.h decoder.h block.h file_index.h
mtester.o : lzip.h mtester.h
-range_dec.o : lzip.h decoder.h file_index.h
-repair.o : lzip.h file_index.h mtester.h
-split.o : lzip.h
+range_dec.o : lzip.h decoder.h block.h file_index.h
+repair.o : lzip.h mtester.h block.h file_index.h
+split.o : lzip.h block.h file_index.h
unzcrash.o : arg_parser.h Makefile
diff --git a/NEWS b/NEWS
index da32c67..70fd3fd 100644
--- a/NEWS
+++ b/NEWS
@@ -6,6 +6,14 @@ magnitude faster depending on number of files and number of errors.
Please, report as a bug any files correctly merged by lziprecover 1.16
that this version can't merge.
+Repair time has been reduced by 15%.
+
+The new option "-y, --debug-delay", which finds the max error detection
+delay in a given range of positions, has been added.
+
+The new option "-z, --debug-repair", which test repairs a one-byte error
+at a given position, has been added.
+
The targets "install-compress", "install-strip-compress",
"install-info-compress" and "install-man-compress" have been added to
the Makefile.
diff --git a/README b/README
index c457365..d660810 100644
--- a/README
+++ b/README
@@ -75,7 +75,7 @@ unzcrash.c from Julian Seward's bzip2. Type 'make unzcrash' in the
lziprecover source directory to build it. Then try 'unzcrash --help'.
-Copyright (C) 2009-2014 Antonio Diaz Diaz.
+Copyright (C) 2009-2015 Antonio Diaz Diaz.
This file is free documentation: you have unlimited permission to copy,
distribute and modify it.
diff --git a/arg_parser.cc b/arg_parser.cc
index 74f9298..55764bd 100644
--- a/arg_parser.cc
+++ b/arg_parser.cc
@@ -1,5 +1,5 @@
/* Arg_parser - POSIX/GNU command line argument parser. (C++ version)
- Copyright (C) 2006-2014 Antonio Diaz Diaz.
+ Copyright (C) 2006-2015 Antonio Diaz Diaz.
This library is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
diff --git a/arg_parser.h b/arg_parser.h
index d80c353..2e8731c 100644
--- a/arg_parser.h
+++ b/arg_parser.h
@@ -1,5 +1,5 @@
/* Arg_parser - POSIX/GNU command line argument parser. (C++ version)
- Copyright (C) 2006-2014 Antonio Diaz Diaz.
+ Copyright (C) 2006-2015 Antonio Diaz Diaz.
This library is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
diff --git a/block.cc b/block.cc
new file mode 100644
index 0000000..31e82c7
--- /dev/null
+++ b/block.cc
@@ -0,0 +1,33 @@
+/* Lziprecover - Data recovery tool for the lzip format
+ Copyright (C) 2009-2015 Antonio Diaz Diaz.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <string>
+#include <vector>
+
+#include "block.h"
+
+
+Block Block::split( const long long pos )
+ {
+ if( pos > pos_ && pos < end() )
+ {
+ const Block b( pos_, pos - pos_ );
+ pos_ = pos; size_ -= b.size_;
+ return b;
+ }
+ return Block( 0, 0 );
+ }
diff --git a/block.h b/block.h
new file mode 100644
index 0000000..4b2ab39
--- /dev/null
+++ b/block.h
@@ -0,0 +1,62 @@
+/* Lziprecover - Data recovery tool for the lzip format
+ Copyright (C) 2009-2015 Antonio Diaz Diaz.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef INT64_MAX
+#define INT64_MAX 0x7FFFFFFFFFFFFFFFLL
+#endif
+
+
+class Block
+ {
+ long long pos_, size_; // pos + size <= INT64_MAX
+
+public:
+ Block( const long long p, const long long s ) : pos_( p ), size_( s ) {}
+
+ long long pos() const { return pos_; }
+ long long size() const { return size_; }
+ long long end() const { return pos_ + size_; }
+
+ void pos( const long long p ) { pos_ = p; }
+ void size( const long long s ) { size_ = s; }
+
+ bool operator==( const Block & b ) const
+ { return pos_ == b.pos_ && size_ == b.size_; }
+ bool operator!=( const Block & b ) const
+ { return pos_ != b.pos_ || size_ != b.size_; }
+
+ bool operator<( const Block & b ) const { return pos_ < b.pos_; }
+
+ bool includes( const long long pos ) const
+ { return ( pos_ <= pos && end() > pos ); }
+ bool overlaps( const Block & b ) const
+ { return ( pos_ < b.end() && b.pos_ < end() ); }
+
+ void shift( Block & b ) { ++size_; ++b.pos_; --b.size_; }
+ Block split( const long long pos );
+ };
+
+
+// defined in range_dec.cc
+int range_decompress( const std::string & input_filename,
+ const std::string & output_filename,
+ Block range, const int verbosity, const bool force,
+ const bool ignore, const bool to_stdout );
+
+// defined in repair.cc
+int debug_delay( const std::string & input_filename, Block range,
+ const int verbosity );
diff --git a/configure b/configure
index 5cba27a..747b3a9 100755
--- a/configure
+++ b/configure
@@ -1,12 +1,12 @@
#! /bin/sh
# configure script for Lziprecover - Data recovery tool for the lzip format
-# Copyright (C) 2009-2014 Antonio Diaz Diaz.
+# Copyright (C) 2009-2015 Antonio Diaz Diaz.
#
# This configure script is free software: you have unlimited permission
# to copy, distribute and modify it.
pkgname=lziprecover
-pkgversion=1.17-pre1
+pkgversion=1.17-rc1
progname=lziprecover
srctrigger=doc/${pkgname}.texi
@@ -165,7 +165,7 @@ echo "LDFLAGS = ${LDFLAGS}"
rm -f Makefile
cat > Makefile << EOF
# Makefile for Lziprecover - Data recovery tool for the lzip format
-# Copyright (C) 2009-2014 Antonio Diaz Diaz.
+# Copyright (C) 2009-2015 Antonio Diaz Diaz.
# This file was generated automatically by configure. Do not edit.
#
# This Makefile is free software: you have unlimited permission
diff --git a/decoder.cc b/decoder.cc
index f7e8f54..59587e0 100644
--- a/decoder.cc
+++ b/decoder.cc
@@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
- Copyright (C) 2009-2014 Antonio Diaz Diaz.
+ Copyright (C) 2009-2015 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
diff --git a/decoder.h b/decoder.h
index 740d7b2..6d8c919 100644
--- a/decoder.h
+++ b/decoder.h
@@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
- Copyright (C) 2009-2014 Antonio Diaz Diaz.
+ Copyright (C) 2009-2015 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
diff --git a/doc/lziprecover.1 b/doc/lziprecover.1
index 7eefb7a..c2f55d7 100644
--- a/doc/lziprecover.1
+++ b/doc/lziprecover.1
@@ -1,5 +1,5 @@
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.46.1.
-.TH LZIPRECOVER "1" "October 2014" "lziprecover 1.17-pre1" "User Commands"
+.TH LZIPRECOVER "1" "January 2015" "lziprecover 1.17-rc1" "User Commands"
.SH NAME
lziprecover \- recovers data from damaged lzip files
.SH SYNOPSIS
@@ -7,6 +7,7 @@ lziprecover \- recovers data from damaged lzip files
[\fI\,options\/\fR] [\fI\,files\/\fR]
.SH DESCRIPTION
Lziprecover \- Data recovery tool and decompressor for the lzip format.
+.PP
Lziprecover can repair perfectly most files with small errors (up to one
single\-byte error per member), without the need of any extra redundance
at all. Losing an entire archive just because of a corrupt byte near the
@@ -76,7 +77,7 @@ Report bugs to lzip\-bug@nongnu.org
.br
Lziprecover home page: http://www.nongnu.org/lzip/lziprecover.html
.SH COPYRIGHT
-Copyright \(co 2014 Antonio Diaz Diaz.
+Copyright \(co 2015 Antonio Diaz Diaz.
License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>
.br
This is free software: you are free to change and redistribute it.
diff --git a/doc/lziprecover.info b/doc/lziprecover.info
index 6c636e8..05c1196 100644
--- a/doc/lziprecover.info
+++ b/doc/lziprecover.info
@@ -12,12 +12,13 @@ File: lziprecover.info, Node: Top, Next: Introduction, Up: (dir)
Lziprecover Manual
******************
-This manual is for Lziprecover (version 1.17-pre1, 16 October 2014).
+This manual is for Lziprecover (version 1.17-rc1, 21 January 2015).
* Menu:
* Introduction:: Purpose and features of lziprecover
* Invoking lziprecover:: Command line interface
+* Data safety:: Protecting data from accidental loss
* Repairing files:: Fixing bit-flip and similar errors
* Merging files:: Fixing several damaged copies
* File names:: Names of the files produced by lziprecover
@@ -28,7 +29,7 @@ This manual is for Lziprecover (version 1.17-pre1, 16 October 2014).
* Concept index:: Index of concepts
- Copyright (C) 2009-2014 Antonio Diaz Diaz.
+ Copyright (C) 2009-2015 Antonio Diaz Diaz.
This manual is free documentation: you have unlimited permission to
copy, distribute and modify it.
@@ -104,7 +105,7 @@ like lzip or lunzip.
line of defense for the case where the backups are also damaged.

-File: lziprecover.info, Node: Invoking lziprecover, Next: Repairing files, Prev: Introduction, Up: Top
+File: lziprecover.info, Node: Invoking lziprecover, Next: Data safety, Prev: Introduction, Up: Top
2 Invoking lziprecover
**********************
@@ -252,20 +253,66 @@ invalid input file, 3 for an internal consistency error (eg, bug) which
caused lziprecover to panic.

-File: lziprecover.info, Node: Repairing files, Next: Merging files, Prev: Invoking lziprecover, Up: Top
+File: lziprecover.info, Node: Data safety, Next: Repairing files, Prev: Invoking lziprecover, Up: Top
-3 Repairing files
+3 Protecting data from accidental loss
+**************************************
+
+There are 3 main types of data corruption that may cause data loss:
+single-byte errors, multi-byte errors (generally affecting a whole
+sector in a block device), and total device failure.
+
+ Lziprecover protects natively against single-byte errors (*note
+Repairing files::), as long as file integrity is checked frequently
+enough that a second single-byte error does not develop in the same
+member before the first one is repaired.
+
+ Lziprecover also protects against multi-byte errors (*note Merging
+files::), if at least one backup copy of the file is made.
+
+ The only remedy for total device failure is storing backup copies in
+separate media.
+
+ How does lzip compare with gzip and bzip2 with respect to data
+safety? Lets suppose that you made a backup copy of your valuable
+scientific data, compressed it, and stored two copies on separate
+media. Years later you notice that both copies are corrupt.
+
+ If you compressed with gzip and both copies suffer any damage in the
+data stream, even if it is just one altered bit, the original data can't
+be recovered.
+
+ If you used bzip2, and if the file is large enough to contain more
+than one compressed data block (usually larger than 900 kB), and if no
+block is damaged in both files, then the data can be manually recovered
+by splitting the files with bzip2recover, verifying every block and then
+copying the right blocks in the right order in another file.
+
+ But if you used lzip, the data can be automatically recovered as
+long as no byte is damaged in both files.
+
+ Note that each error in a bzip2 file makes a whole block unusable,
+but each error in a lzip file only affects the damaged bytes, making it
+possible to recover a file with thousands of errors.
+
+
+File: lziprecover.info, Node: Repairing files, Next: Merging files, Prev: Data safety, Up: Top
+
+4 Repairing files
*****************
Lziprecover can repair perfectly most files with small errors (up to one
single-byte error per member), without the need of any extra redundance
at all. If the reparation is successful, the repaired file will be
-identical bit for bit to the original.
+identical bit for bit to the original. This makes lzip files resistant
+to bit-flip, one of the most common forms of data corruption.
The error may be located anywhere in the file except in the header
(first 6 bytes of each member) or in the 'Member size' field of the
-trailer (last 8 bytes of each member). This makes lzip files resistant
-to bit-flip, one of the most common forms of data corruption.
+trailer (last 8 bytes of each member). If the error is in the header it
+can be easily repaired with a text editor like GNU Moe (*note File
+format::). If the error is in the member size, it is enough to ignore
+the message about 'bad member size' when decompressing.
Bit-flip happens when one bit in the file is changed from 0 to 1 or
vice versa. It may be caused by bad RAM or even by natural radiation. I
@@ -289,7 +336,7 @@ lziprecover repairs more efficiently the worst errors.

File: lziprecover.info, Node: Merging files, Next: File names, Prev: Repairing files, Up: Top
-4 Merging files
+5 Merging files
***************
If you have several copies of a file but all of them are too damaged to
@@ -330,7 +377,7 @@ errors are randomly located inside each copy).

File: lziprecover.info, Node: File names, Next: File format, Prev: Merging files, Up: Top
-5 Names of the files produced by lziprecover
+6 Names of the files produced by lziprecover
********************************************
The name of the fixed file produced by '--merge' and '--repair' is made
@@ -341,7 +388,7 @@ original file name ends with one of the extensions '.tar.lz', '.lz' or

File: lziprecover.info, Node: File format, Next: Examples, Prev: File names, Up: Top
-6 File format
+7 File format
*************
Perfection is reached, not when there is no longer anything to add, but
@@ -414,7 +461,7 @@ additional information before, between, or after them.

File: lziprecover.info, Node: Examples, Next: Unzcrash, Prev: File format, Up: Top
-7 A small tutorial with examples
+8 A small tutorial with examples
********************************
Example 1: Restore a regular file from its compressed version
@@ -485,7 +532,7 @@ correct file produced is saved in 'big_db_00001.lz'.

File: lziprecover.info, Node: Unzcrash, Next: Problems, Prev: Examples, Up: Top
-8 Testing the robustness of decompressors
+9 Testing the robustness of decompressors
*****************************************
The lziprecover package also includes unzcrash, a program written to
@@ -562,8 +609,8 @@ caused unzcrash to panic.

File: lziprecover.info, Node: Problems, Next: Concept index, Prev: Unzcrash, Up: Top
-9 Reporting bugs
-****************
+10 Reporting bugs
+*****************
There are probably bugs in lziprecover. There are certainly errors and
omissions in this manual. If you report them, they will get fixed. If
@@ -584,6 +631,7 @@ Concept index
* Menu:
* bugs: Problems. (line 6)
+* data safety: Data safety. (line 6)
* examples: Examples. (line 6)
* file format: File format. (line 6)
* file names: File names. (line 6)
@@ -598,17 +646,18 @@ Concept index

Tag Table:
Node: Top231
-Node: Introduction1153
-Node: Invoking lziprecover4249
-Node: Repairing files9686
-Node: Merging files11371
-Node: File names13212
-Node: File format13676
-Node: Examples16183
-Ref: ddrescue-example17384
-Node: Unzcrash18493
-Node: Problems21047
-Node: Concept index21597
+Node: Introduction1216
+Node: Invoking lziprecover4312
+Node: Data safety9745
+Node: Repairing files11661
+Node: Merging files13563
+Node: File names15404
+Node: File format15868
+Node: Examples18375
+Ref: ddrescue-example19576
+Node: Unzcrash20685
+Node: Problems23239
+Node: Concept index23791

End Tag Table
diff --git a/doc/lziprecover.texi b/doc/lziprecover.texi
index 08d4312..85f0385 100644
--- a/doc/lziprecover.texi
+++ b/doc/lziprecover.texi
@@ -6,8 +6,8 @@
@finalout
@c %**end of header
-@set UPDATED 16 October 2014
-@set VERSION 1.17-pre1
+@set UPDATED 21 January 2015
+@set VERSION 1.17-rc1
@dircategory Data Compression
@direntry
@@ -37,6 +37,7 @@ This manual is for Lziprecover (version @value{VERSION}, @value{UPDATED}).
@menu
* Introduction:: Purpose and features of lziprecover
* Invoking lziprecover:: Command line interface
+* Data safety:: Protecting data from accidental loss
* Repairing files:: Fixing bit-flip and similar errors
* Merging files:: Fixing several damaged copies
* File names:: Names of the files produced by lziprecover
@@ -48,7 +49,7 @@ This manual is for Lziprecover (version @value{VERSION}, @value{UPDATED}).
@end menu
@sp 1
-Copyright @copyright{} 2009-2014 Antonio Diaz Diaz.
+Copyright @copyright{} 2009-2015 Antonio Diaz Diaz.
This manual is free documentation: you have unlimited permission
to copy, distribute and modify it.
@@ -281,6 +282,48 @@ invalid input file, 3 for an internal consistency error (eg, bug) which
caused lziprecover to panic.
+@node Data safety
+@chapter Protecting data from accidental loss
+@cindex data safety
+
+There are 3 main types of data corruption that may cause data loss:
+single-byte errors, multi-byte errors (generally affecting a whole
+sector in a block device), and total device failure.
+
+Lziprecover protects natively against single-byte errors
+(@pxref{Repairing files}), as long as file integrity is checked
+frequently enough that a second single-byte error does not develop in
+the same member before the first one is repaired.
+
+Lziprecover also protects against multi-byte errors (@pxref{Merging
+files}), if at least one backup copy of the file is made.
+
+The only remedy for total device failure is storing backup copies in
+separate media.
+
+How does lzip compare with gzip and bzip2 with respect to data safety?
+Lets suppose that you made a backup copy of your valuable scientific
+data, compressed it, and stored two copies on separate media. Years
+later you notice that both copies are corrupt.
+
+If you compressed with gzip and both copies suffer any damage in the
+data stream, even if it is just one altered bit, the original data can't
+be recovered.
+
+If you used bzip2, and if the file is large enough to contain more than
+one compressed data block (usually larger than 900 kB), and if no block
+is damaged in both files, then the data can be manually recovered by
+splitting the files with bzip2recover, verifying every block and then
+copying the right blocks in the right order in another file.
+
+But if you used lzip, the data can be automatically recovered as long as
+no byte is damaged in both files.
+
+Note that each error in a bzip2 file makes a whole block unusable, but
+each error in a lzip file only affects the damaged bytes, making it
+possible to recover a file with thousands of errors.
+
+
@node Repairing files
@chapter Repairing files
@cindex repairing files
@@ -288,12 +331,15 @@ caused lziprecover to panic.
Lziprecover can repair perfectly most files with small errors (up to one
single-byte error per member), without the need of any extra redundance
at all. If the reparation is successful, the repaired file will be
-identical bit for bit to the original.
+identical bit for bit to the original. This makes lzip files resistant
+to bit-flip, one of the most common forms of data corruption.
The error may be located anywhere in the file except in the header
(first 6 bytes of each member) or in the @samp{Member size} field of the
-trailer (last 8 bytes of each member). This makes lzip files resistant
-to bit-flip, one of the most common forms of data corruption.
+trailer (last 8 bytes of each member). If the error is in the header it
+can be easily repaired with a text editor like GNU Moe (@pxref{File
+format}). If the error is in the member size, it is enough to ignore the
+message about @samp{bad member size} when decompressing.
Bit-flip happens when one bit in the file is changed from 0 to 1 or vice
versa. It may be caused by bad RAM or even by natural radiation. I have
@@ -641,7 +687,7 @@ for all eternity, if not longer.
If you find a bug in lziprecover, please send electronic mail to
@email{lzip-bug@@nongnu.org}. Include the version number, which you can
-find by running @w{@samp{lziprecover --version}}.
+find by running @w{@code{lziprecover --version}}.
@node Concept index
diff --git a/file_index.cc b/file_index.cc
index f7e1cd0..de685a3 100644
--- a/file_index.cc
+++ b/file_index.cc
@@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
- Copyright (C) 2009-2014 Antonio Diaz Diaz.
+ Copyright (C) 2009-2015 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -25,6 +25,7 @@
#include <unistd.h>
#include "lzip.h"
+#include "block.h"
#include "file_index.h"
@@ -37,18 +38,6 @@ int seek_read( const int fd, uint8_t * const buf, const int size,
}
-Block Block::split( const long long pos )
- {
- if( pos > pos_ && pos < end() )
- {
- const Block b( pos_, pos - pos_ );
- pos_ = pos; size_ -= b.size_;
- return b;
- }
- return Block( 0, 0 );
- }
-
-
void File_index::set_errno_error( const char * const msg )
{
error_ = msg; error_ += std::strerror( errno ); error_ += '.';
diff --git a/file_index.h b/file_index.h
index cf42d83..eff1157 100644
--- a/file_index.h
+++ b/file_index.h
@@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
- Copyright (C) 2009-2014 Antonio Diaz Diaz.
+ Copyright (C) 2009-2015 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -15,39 +15,6 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-#ifndef INT64_MAX
-#define INT64_MAX 0x7FFFFFFFFFFFFFFFLL
-#endif
-
-
-class Block
- {
- long long pos_, size_; // pos + size <= INT64_MAX
-
-public:
- Block( const long long p, const long long s ) : pos_( p ), size_( s ) {}
-
- long long pos() const { return pos_; }
- long long size() const { return size_; }
- long long end() const { return pos_ + size_; }
-
- void pos( const long long p ) { pos_ = p; }
- void size( const long long s ) { size_ = s; }
-
- bool operator==( const Block & b ) const
- { return pos_ == b.pos_ && size_ == b.size_; }
- bool operator!=( const Block & b ) const
- { return pos_ != b.pos_ || size_ != b.size_; }
-
- bool operator<( const Block & b ) const { return pos_ < b.pos_; }
-
- bool overlaps( const Block & b ) const
- { return ( pos_ < b.end() && b.pos_ < end() ); }
- void shift( Block & b ) { ++size_; ++b.pos_; --b.size_; }
- Block split( const long long pos );
- };
-
-
class File_index
{
struct Member
diff --git a/lzip.h b/lzip.h
index 61c0445..58de55f 100644
--- a/lzip.h
+++ b/lzip.h
@@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
- Copyright (C) 2009-2014 Antonio Diaz Diaz.
+ Copyright (C) 2009-2015 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -309,18 +309,19 @@ int merge_files( const std::vector< std::string > & filenames,
const bool force );
// defined in range_dec.cc
+const char * format_num( unsigned long long num,
+ unsigned long long limit = -1ULL,
+ const int set_prefix = 0 );
bool safe_seek( const int fd, const long long pos );
int list_files( const std::vector< std::string > & filenames,
const int verbosity );
-int range_decompress( const std::string & input_filename,
- const std::string & default_output_filename,
- const std::string & range_string, const int verbosity,
- const bool force, const bool ignore, const bool to_stdout );
// defined in repair.cc
int repair_file( const std::string & input_filename,
const std::string & output_filename, const int verbosity,
const bool force );
+int debug_repair( const std::string & input_filename, const long long bad_pos,
+ const int verbosity, const uint8_t bad_value );
// defined in split.cc
int split_file( const std::string & input_filename,
diff --git a/main.cc b/main.cc
index 167e8c6..192f32b 100644
--- a/main.cc
+++ b/main.cc
@@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
- Copyright (C) 2009-2014 Antonio Diaz Diaz.
+ Copyright (C) 2009-2015 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -55,6 +55,7 @@
#include "arg_parser.h"
#include "lzip.h"
#include "decoder.h"
+#include "block.h"
#ifndef O_BINARY
#define O_BINARY 0
@@ -69,7 +70,7 @@ namespace {
const char * const Program_name = "Lziprecover";
const char * const program_name = "lziprecover";
-const char * const program_year = "2014";
+const char * const program_year = "2015";
const char * invocation_name = 0;
struct { const char * from; const char * to; } const known_extensions[] = {
@@ -77,8 +78,8 @@ struct { const char * from; const char * to; } const known_extensions[] = {
{ ".tlz", ".tar" },
{ 0, 0 } };
-enum Mode { m_none, m_decompress, m_list, m_merge, m_range, m_repair,
- m_split, m_test };
+enum Mode { m_none, m_debug_delay, m_debug_repair, m_decompress, m_list,
+ m_merge, m_range_dec, m_repair, m_split, m_test };
std::string output_filename;
int outfd = -1;
@@ -92,7 +93,7 @@ bool delete_output_on_interrupt = false;
void show_help()
{
std::printf( "%s - Data recovery tool and decompressor for the lzip format.\n", Program_name );
- std::printf( "Lziprecover can repair perfectly most files with small errors (up to one\n"
+ std::printf( "\nLziprecover can repair perfectly most files with small errors (up to one\n"
"single-byte error per member), without the need of any extra redundance\n"
"at all. Losing an entire archive just because of a corrupt byte near the\n"
"beginning is a thing of the past.\n"
@@ -116,8 +117,13 @@ void show_help()
" -R, --repair try to repair a small error in file\n"
" -s, --split split multi-member file in single-member files\n"
" -t, --test test compressed file integrity\n"
- " -v, --verbose be verbose (a 2nd -v gives more)\n"
- "Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n"
+ " -v, --verbose be verbose (a 2nd -v gives more)\n" );
+ if( verbosity >= 1 )
+ {
+ std::printf( " -y, --debug-delay=<range> find max error detection delay in <range>\n"
+ " -z, --debug-repair=<pos>,<val> test repair one-byte error at <pos>\n" );
+ }
+ std::printf( "Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n"
"Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n"
"\nExit status: 0 for a normal exit, 1 for environmental problems (file\n"
"not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n"
@@ -141,22 +147,109 @@ void show_version()
void show_header( const File_header & header )
{
- const char * const prefix[8] =
- { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" };
- enum { factor = 1024 };
- const char * p = "";
- const char * np = " ";
- unsigned num = header.dictionary_size();
- bool exact = ( num % factor == 0 );
-
- for( int i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i )
- { num /= factor; if( num % factor != 0 ) exact = false;
- p = prefix[i]; np = ""; }
- std::fprintf( stderr, "dictionary size %s%4u %sB. ", np, num, p );
+ if( verbosity >= 3 )
+ {
+ const char * const prefix[8] =
+ { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" };
+ enum { factor = 1024 };
+ const char * p = "";
+ const char * np = " ";
+ unsigned num = header.dictionary_size();
+ bool exact = ( num % factor == 0 );
+
+ for( int i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i )
+ { num /= factor; if( num % factor != 0 ) exact = false;
+ p = prefix[i]; np = ""; }
+ std::fprintf( stderr, "dictionary size %s%4u %sB. ", np, num, p );
+ }
}
namespace {
+// Returns the number of chars read, or 0 if error.
+//
+int parse_long_long( const char * const ptr, long long & value )
+ {
+ char * tail;
+ errno = 0;
+ value = strtoll( ptr, &tail, 0 );
+ if( tail == ptr || errno || value < 0 ) return 0;
+ int c = tail - ptr;
+
+ if( ptr[c] )
+ {
+ const int factor = ( ptr[c+1] == 'i' ) ? 1024 : 1000;
+ int exponent = 0;
+ switch( ptr[c] )
+ {
+ case 'Y': exponent = 8; break;
+ case 'Z': exponent = 7; break;
+ case 'E': exponent = 6; break;
+ case 'P': exponent = 5; break;
+ case 'T': exponent = 4; break;
+ case 'G': exponent = 3; break;
+ case 'M': exponent = 2; break;
+ case 'K': if( factor == 1024 ) exponent = 1; else return 0; break;
+ case 'k': if( factor == 1000 ) exponent = 1; else return 0; break;
+ }
+ if( exponent > 0 )
+ {
+ ++c;
+ if( ptr[c] == 'i' ) { ++c; if( value ) format_num( 0, 0, -1 ); }
+ if( ptr[c] == 'B' ) ++c;
+ for( int i = 0; i < exponent; ++i )
+ {
+ if( INT64_MAX / factor >= value ) value *= factor;
+ else return 0;
+ }
+ }
+ }
+ return c;
+ }
+
+
+// Recognized formats: <begin> <begin>-<end> <begin>,<size>
+//
+void parse_range( const char * const ptr, Block & range )
+ {
+ long long value = 0;
+ int c = parse_long_long( ptr, value ); // pos
+ if( c && value >= 0 && value < INT64_MAX &&
+ ( ptr[c] == 0 || ptr[c] == ',' || ptr[c] == '-' ) )
+ {
+ range.pos( value );
+ if( ptr[c] == 0 ) { range.size( INT64_MAX - value ); return; }
+ const bool issize = ( ptr[c] == ',' );
+ c = parse_long_long( ptr + c + 1, value ); // size
+ if( c && value > 0 && ( issize || value > range.pos() ) )
+ {
+ if( !issize ) value -= range.pos();
+ if( INT64_MAX - range.pos() >= value ) { range.size( value ); return; }
+ }
+ }
+ show_error( "Bad decompression range.", 0, true );
+ std::exit( 1 );
+ }
+
+
+// Recognized format: <pos>,<value>
+//
+void parse_pos_value( const char * const ptr, long long & pos, uint8_t & value )
+ {
+ long long val = 0;
+ int c = parse_long_long( ptr, val ); // pos
+ if( c && val >= 0 && val < INT64_MAX && ptr[c] == ',' )
+ {
+ pos = val;
+ c = parse_long_long( ptr + c + 1, val ); // value
+ if( c && val >= 0 && val < 256 )
+ { value = val; return; }
+ }
+ show_error( "Bad file position or byte value.", 0, true );
+ std::exit( 1 );
+ }
+
+
void one_file( const int files )
{
if( files != 1 )
@@ -400,7 +493,7 @@ int decompress( const int infd, const Pretty_print & pp, const bool testing )
{ pp( "Invalid dictionary size in member header." ); retval = 2; break; }
if( verbosity >= 2 || ( verbosity == 1 && first_member ) )
- { pp(); if( verbosity >= 3 ) show_header( header ); }
+ { pp(); show_header( header ); }
LZ_decoder decoder( header, rdec, outfd );
const int result = decoder.decode_member( pp );
@@ -504,12 +597,14 @@ void internal_error( const char * const msg )
int main( const int argc, const char * const argv[] )
{
+ Block range( 0, 0 );
+ long long bad_pos = 0;
std::string input_filename;
std::string default_output_filename;
- std::string range_string;
std::vector< std::string > filenames;
int infd = -1;
Mode program_mode = m_none;
+ uint8_t bad_value = 0;
bool force = false;
bool ignore = false;
bool keep_input_files = false;
@@ -535,6 +630,8 @@ int main( const int argc, const char * const argv[] )
{ 't', "test", Arg_parser::no },
{ 'v', "verbose", Arg_parser::no },
{ 'V', "version", Arg_parser::no },
+ { 'y', "debug-delay", Arg_parser::yes },
+ { 'z', "debug-repair", Arg_parser::yes },
{ 0 , 0, Arg_parser::no } };
const Arg_parser parser( argc, argv, options );
@@ -551,8 +648,8 @@ int main( const int argc, const char * const argv[] )
{
case 'c': to_stdout = true; break;
case 'd': set_mode( program_mode, m_decompress ); break;
- case 'D': set_mode( program_mode, m_range );
- range_string = arg; break;
+ case 'D': set_mode( program_mode, m_range_dec );
+ parse_range( arg.c_str(), range ); break;
case 'f': force = true; break;
case 'h': show_help(); return 0;
case 'i': ignore = true; break;
@@ -567,6 +664,10 @@ int main( const int argc, const char * const argv[] )
case 't': set_mode( program_mode, m_test ); break;
case 'v': if( verbosity < 4 ) ++verbosity; break;
case 'V': show_version(); return 0;
+ case 'y': set_mode( program_mode, m_debug_delay );
+ parse_range( arg.c_str(), range ); break;
+ case 'z': set_mode( program_mode, m_debug_repair );
+ parse_pos_value( arg.c_str(), bad_pos, bad_value ); break;
default : internal_error( "uncaught option." );
}
} // end process options
@@ -593,6 +694,12 @@ int main( const int argc, const char * const argv[] )
switch( program_mode )
{
case m_none: internal_error( "invalid operation." ); break;
+ case m_debug_delay:
+ one_file( filenames.size() );
+ return debug_delay( filenames[0], range, verbosity );
+ case m_debug_repair:
+ one_file( filenames.size() );
+ return debug_repair( filenames[0], bad_pos, verbosity, bad_value );
case m_decompress: break;
case m_list:
if( filenames.size() < 1 )
@@ -604,10 +711,10 @@ int main( const int argc, const char * const argv[] )
if( default_output_filename.empty() )
default_output_filename = insert_fixed( filenames[0] );
return merge_files( filenames, default_output_filename, verbosity, force );
- case m_range:
+ case m_range_dec:
one_file( filenames.size() );
return range_decompress( filenames[0], default_output_filename,
- range_string, verbosity, force, ignore, to_stdout );
+ range, verbosity, force, ignore, to_stdout );
case m_repair:
one_file( filenames.size() );
if( default_output_filename.empty() )
diff --git a/merge.cc b/merge.cc
index 5f034eb..36c2faf 100644
--- a/merge.cc
+++ b/merge.cc
@@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
- Copyright (C) 2009-2014 Antonio Diaz Diaz.
+ Copyright (C) 2009-2015 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -30,6 +30,7 @@
#include "lzip.h"
#include "decoder.h"
+#include "block.h"
#include "file_index.h"
@@ -255,6 +256,7 @@ int open_input_files( const std::vector< std::string > & filenames,
}
+ // merge block by block
bool try_merge_member( const long long mpos, const long long msize,
const std::vector< Block > & block_vector,
const std::vector< int > & color_vector,
@@ -318,6 +320,7 @@ bool try_merge_member( const long long mpos, const long long msize,
}
+ // merge a single block split at every possible position
bool try_merge_member1( const long long mpos, const long long msize,
const std::vector< Block > & block_vector,
const std::vector< int > & color_vector,
diff --git a/mtester.cc b/mtester.cc
index 58a56ed..c23d51b 100644
--- a/mtester.cc
+++ b/mtester.cc
@@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
- Copyright (C) 2009-2014 Antonio Diaz Diaz.
+ Copyright (C) 2009-2015 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
diff --git a/mtester.h b/mtester.h
index 2896a44..f4da88f 100644
--- a/mtester.h
+++ b/mtester.h
@@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
- Copyright (C) 2009-2014 Antonio Diaz Diaz.
+ Copyright (C) 2009-2015 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
diff --git a/range_dec.cc b/range_dec.cc
index 0fa9708..8b4d6e0 100644
--- a/range_dec.cc
+++ b/range_dec.cc
@@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
- Copyright (C) 2009-2014 Antonio Diaz Diaz.
+ Copyright (C) 2009-2015 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -30,101 +30,12 @@
#include "lzip.h"
#include "decoder.h"
+#include "block.h"
#include "file_index.h"
namespace {
-const char * format_num( unsigned long long num,
- unsigned long long limit = -1ULL,
- const int set_prefix = 0 )
- {
- const char * const si_prefix[8] =
- { "k", "M", "G", "T", "P", "E", "Z", "Y" };
- const char * const binary_prefix[8] =
- { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" };
- static bool si = true;
- static char buf[32];
-
- if( set_prefix ) si = ( set_prefix > 0 );
- const unsigned factor = ( si ? 1000 : 1024 );
- const char * const * prefix = ( si ? si_prefix : binary_prefix );
- const char * p = "";
- bool exact = ( num % factor == 0 );
-
- for( int i = 0; i < 8 && ( num > limit || ( exact && num >= factor ) ); ++i )
- { num /= factor; if( num % factor != 0 ) exact = false; p = prefix[i]; }
- snprintf( buf, sizeof buf, "%llu %s", num, p );
- return buf;
- }
-
-
-// Returns the number of chars read, or 0 if error.
-//
-int parse_long_long( const char * const ptr, long long & value )
- {
- char * tail;
- errno = 0;
- value = strtoll( ptr, &tail, 0 );
- if( tail == ptr || errno || value < 0 ) return 0;
- int c = tail - ptr;
-
- if( ptr[c] )
- {
- const int factor = ( ptr[c+1] == 'i' ) ? 1024 : 1000;
- int exponent = 0;
- switch( ptr[c] )
- {
- case 'Y': exponent = 8; break;
- case 'Z': exponent = 7; break;
- case 'E': exponent = 6; break;
- case 'P': exponent = 5; break;
- case 'T': exponent = 4; break;
- case 'G': exponent = 3; break;
- case 'M': exponent = 2; break;
- case 'K': if( factor == 1024 ) exponent = 1; else return 0; break;
- case 'k': if( factor == 1000 ) exponent = 1; else return 0; break;
- }
- if( exponent > 0 )
- {
- ++c;
- if( ptr[c] == 'i' ) { ++c; if( value ) format_num( 0, 0, -1 ); }
- if( ptr[c] == 'B' ) ++c;
- for( int i = 0; i < exponent; ++i )
- {
- if( INT64_MAX / factor >= value ) value *= factor;
- else return 0;
- }
- }
- }
- return c;
- }
-
-
-// Recognized formats: <begin> <begin>-<end> <begin>,<size>
-//
-void parse_range( const char * const ptr, Block & range )
- {
- long long value = 0;
- int c = parse_long_long( ptr, value ); // pos
- if( c && value >= 0 && value < INT64_MAX &&
- ( ptr[c] == 0 || ptr[c] == ',' || ptr[c] == '-' ) )
- {
- range.pos( value );
- if( ptr[c] == 0 ) { range.size( INT64_MAX - value ); return; }
- const bool issize = ( ptr[c] == ',' );
- c = parse_long_long( ptr + c + 1, value ); // size
- if( c && value > 0 && ( issize || value > range.pos() ) )
- {
- if( !issize ) value -= range.pos();
- if( INT64_MAX - range.pos() >= value ) { range.size( value ); return; }
- }
- }
- show_error( "Bad decompression range.", 0, true );
- std::exit( 1 );
- }
-
-
int decompress_member( const int infd, const int outfd,
const Pretty_print & pp,
const unsigned long long mpos,
@@ -221,6 +132,30 @@ int list_file( const char * const input_filename, const Pretty_print & pp )
} // end namespace
+const char * format_num( unsigned long long num,
+ unsigned long long limit,
+ const int set_prefix )
+ {
+ const char * const si_prefix[8] =
+ { "k", "M", "G", "T", "P", "E", "Z", "Y" };
+ const char * const binary_prefix[8] =
+ { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" };
+ static bool si = true;
+ static char buf[32];
+
+ if( set_prefix ) si = ( set_prefix > 0 );
+ const unsigned factor = ( si ? 1000 : 1024 );
+ const char * const * prefix = ( si ? si_prefix : binary_prefix );
+ const char * p = "";
+ bool exact = ( num % factor == 0 );
+
+ for( int i = 0; i < 8 && ( num > limit || ( exact && num >= factor ) ); ++i )
+ { num /= factor; if( num % factor != 0 ) exact = false; p = prefix[i]; }
+ snprintf( buf, sizeof buf, "%llu %s", num, p );
+ return buf;
+ }
+
+
bool safe_seek( const int fd, const long long pos )
{
if( lseek( fd, pos, SEEK_SET ) == pos ) return true;
@@ -245,11 +180,9 @@ int list_files( const std::vector< std::string > & filenames,
int range_decompress( const std::string & input_filename,
const std::string & output_filename,
- const std::string & range_string, const int verbosity,
- const bool force, const bool ignore, const bool to_stdout )
+ Block range, const int verbosity, const bool force,
+ const bool ignore, const bool to_stdout )
{
- Block range( 0, 0 );
- parse_range( range_string.c_str(), range );
struct stat in_stats;
const int infd = open_instream( input_filename.c_str(), &in_stats, true, true );
if( infd < 0 ) return 1;
@@ -262,7 +195,7 @@ int range_decompress( const std::string & input_filename,
if( range.end() > file_index.data_end() )
range.size( std::max( 0LL, file_index.data_end() - range.pos() ) );
if( range.size() <= 0 )
- { if( verbosity >= 1 ) pp( "Nothing to do." ); return 0; }
+ { if( verbosity >= 0 ) pp( "Nothing to do." ); return 0; }
if( verbosity >= 1 )
{
diff --git a/repair.cc b/repair.cc
index 54a4d89..066b5bd 100644
--- a/repair.cc
+++ b/repair.cc
@@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
- Copyright (C) 2009-2014 Antonio Diaz Diaz.
+ Copyright (C) 2009-2015 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -29,9 +29,12 @@
#include <sys/stat.h>
#include "lzip.h"
-#include "file_index.h"
#include "mtester.h"
+#include "block.h"
+#include "file_index.h"
+
+namespace {
int seek_write( const int fd, const uint8_t * const buf, const int size,
const long long pos )
@@ -41,6 +44,8 @@ int seek_write( const int fd, const uint8_t * const buf, const int size,
return 0;
}
+} // end namespace
+
int repair_file( const std::string & input_filename,
const std::string & output_filename, const int verbosity,
@@ -80,13 +85,13 @@ int repair_file( const std::string & input_filename,
cleanup_and_fail( output_filename, outfd, 1 );
long pos = failure_pos;
bool done = false;
- while( pos >= File_header::size && pos > failure_pos - 40000 && !done )
+ while( pos >= File_header::size && pos > failure_pos - 50000 && !done )
{
- const long min_pos = std::max( (long)File_header::size, pos - 1000 );
+ const long min_pos = std::max( (long)File_header::size, pos - 100 );
const LZ_mtester * master = prepare_master( mbuffer, msize, min_pos - 16 );
if( !master )
cleanup_and_fail( output_filename, outfd, 1 );
- for( ; pos >= min_pos && !done ; --pos )
+ for( ; pos >= min_pos && !done; --pos )
{
if( verbosity >= 1 )
{
@@ -141,3 +146,174 @@ int repair_file( const std::string & input_filename,
std::fputs( "Copy of input file repaired successfully.\n", stdout );
return 0;
}
+
+
+int debug_delay( const std::string & input_filename, Block range,
+ const int verbosity )
+ {
+ struct stat in_stats;
+ const int infd = open_instream( input_filename.c_str(), &in_stats, true, true );
+ if( infd < 0 ) return 1;
+
+ Pretty_print pp( input_filename, verbosity );
+ const File_index file_index( infd );
+ if( file_index.retval() != 0 )
+ { pp( file_index.error().c_str() ); return file_index.retval(); }
+
+ if( range.end() > file_index.file_end() )
+ range.size( std::max( 0LL, file_index.file_end() - range.pos() ) );
+ if( range.size() <= 0 )
+ { if( verbosity >= 0 ) pp( "Nothing to do." ); return 0; }
+
+ for( long i = 0; i < file_index.members(); ++i )
+ {
+ const Block & mb = file_index.mblock( i );
+ if( !range.overlaps( mb ) ) continue;
+ const long long mpos = file_index.mblock( i ).pos();
+ const long long msize = file_index.mblock( i ).size();
+ if( verbosity >= 1 ) // damaged member found
+ {
+ std::printf( "Finding max delay in member %ld of %ld (member pos = %llu)\n",
+ i + 1, (long)file_index.members(), mpos );
+ std::fflush( stdout );
+ }
+ uint8_t * const mbuffer = read_member( infd, mpos, msize );
+ if( !mbuffer )
+ { show_error( "Can't read member." ); return 1; }
+ long pos = std::max( range.pos() - mpos, File_header::size + 1LL );
+ const long end = std::min( range.end() - mpos, msize );
+ long max_delay = 0;
+ while( pos < end )
+ {
+ const LZ_mtester * master = prepare_master( mbuffer, msize, pos - 16 );
+ if( !master )
+ { show_error( "Can't prepare master." ); return 1; }
+ const long partial_end = std::min( pos + 100, end );
+ for( ; pos < partial_end; ++pos )
+ {
+ if( verbosity >= 1 )
+ {
+ std::printf( "Delays in position %llu \r", mpos + pos );
+ std::fflush( stdout );
+ }
+ int value = -1;
+ for( int j = 0; j < 256; ++j )
+ {
+ ++mbuffer[pos];
+ if( j == 255 ) break;
+ long failure_pos;
+ if( test_member_rest( *master, &failure_pos ) ) continue;
+ const long delay = failure_pos - pos;
+ if( delay > max_delay ) { max_delay = delay; value = mbuffer[pos]; }
+ }
+ if( value >= 0 && verbosity >= 0 )
+ {
+ std::printf( "New max delay %lu at position %llu (0x%02X)\n",
+ max_delay, mpos + pos, value );
+ std::fflush( stdout );
+ }
+ if( pos + max_delay >= msize ) { pos = end; break; }
+ }
+ delete master;
+ }
+ delete[] mbuffer;
+ if( verbosity >= 1 ) std::fputs( "\n", stdout );
+ }
+
+ if( verbosity >= 1 ) std::fputs( "Done.\n", stdout );
+ return 0;
+ }
+
+
+int debug_repair( const std::string & input_filename, const long long bad_pos,
+ const int verbosity, const uint8_t bad_value )
+ {
+ struct stat in_stats;
+ const int infd = open_instream( input_filename.c_str(), &in_stats, true, true );
+ if( infd < 0 ) return 1;
+
+ Pretty_print pp( input_filename, verbosity );
+ const File_index file_index( infd );
+ if( file_index.retval() != 0 )
+ { pp( file_index.error().c_str() ); return file_index.retval(); }
+
+ long idx = 0;
+ for( ; idx < file_index.members(); ++idx )
+ if( file_index.mblock( idx ).includes( bad_pos ) ) break;
+ if( idx >= file_index.members() )
+ { if( verbosity >= 0 ) pp( "Nothing to do." ); return 0; }
+
+ const long long mpos = file_index.mblock( idx ).pos();
+ const long long msize = file_index.mblock( idx ).size();
+ {
+ long long failure_pos = 0;
+ if( !safe_seek( infd, mpos ) )
+ { show_error( "Can't seek to member." ); return 1; }
+ if( !try_decompress_member( infd, msize, &failure_pos ) )
+ {
+ if( verbosity >= 0 )
+ std::printf( "Member %ld of %ld already damaged (failure pos = %llu)\n",
+ idx + 1, (long)file_index.members(), mpos + failure_pos );
+ return 1;
+ }
+ }
+ uint8_t * const mbuffer = read_member( infd, mpos, msize );
+ if( !mbuffer )
+ { show_error( "Can't read member." ); return 1; }
+ const uint8_t good_value = mbuffer[bad_pos];
+ mbuffer[bad_pos] = bad_value;
+ long failure_pos = 0;
+ {
+ const LZ_mtester * master = prepare_master( mbuffer, msize, 0 );
+ if( !master )
+ { show_error( "Can't prepare master." ); return 1; }
+ if( test_member_rest( *master, &failure_pos ) )
+ {
+ if( verbosity >= 1 )
+ std::fputs( "Member decompressed with no errors.\n", stdout );
+ return 0;
+ }
+ if( verbosity >= 1 )
+ {
+ std::printf( "Test repairing member %ld of %ld\n"
+ " (damage pos = %llu (0x%02X->0x%02X), failure pos = %llu)\n",
+ idx + 1, (long)file_index.members(), mpos + bad_pos,
+ good_value, bad_value, mpos + failure_pos );
+ std::fflush( stdout );
+ }
+ }
+ long pos = failure_pos;
+ bool done = false;
+ while( pos >= File_header::size && pos > failure_pos - 50000 && !done )
+ {
+ const long min_pos = std::max( (long)File_header::size, pos - 100 );
+ const LZ_mtester * master = prepare_master( mbuffer, msize, min_pos - 16 );
+ if( !master )
+ { show_error( "Can't prepare master." ); return 1; }
+ for( ; pos >= min_pos && !done; --pos )
+ {
+ if( verbosity >= 1 )
+ {
+ std::printf( "Trying position %llu \r", mpos + pos );
+ std::fflush( stdout );
+ }
+ for( int j = 0; j < 256; ++j )
+ {
+ ++mbuffer[pos];
+ if( j == 255 ) break;
+ if( test_member_rest( *master ) ) { done = true; break; }
+ }
+ }
+ delete master;
+ }
+ delete[] mbuffer;
+ if( verbosity >= 1 ) std::fputs( "\n", stdout );
+ if( !done )
+ {
+ show_error( "Can't repair input file. There is a bug somewhere." );
+ return 3;
+ }
+ if( verbosity >= 1 )
+ std::fputs( "Member repaired successfully.\n", stdout );
+ return 0;
+ }
diff --git a/split.cc b/split.cc
index 7b3f9fd..2ffb359 100644
--- a/split.cc
+++ b/split.cc
@@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
- Copyright (C) 2009-2014 Antonio Diaz Diaz.
+ Copyright (C) 2009-2015 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -28,6 +28,7 @@
#include <sys/stat.h>
#include "lzip.h"
+#include "block.h"
#include "file_index.h"
@@ -81,7 +82,7 @@ bool verify_header( const File_header & header, const Pretty_print & pp )
// Search forward from 'pos' for "LZIP" (Boyer-Moore algorithm)
-// Return pos of found string or 'pos+size' if not found.
+// Returns pos of found string or 'pos+size' if not found.
//
int find_magic( const uint8_t * const buffer, const int pos, const int size )
{
diff --git a/testsuite/check.sh b/testsuite/check.sh
index ef60713..1df77a3 100755
--- a/testsuite/check.sh
+++ b/testsuite/check.sh
@@ -1,6 +1,6 @@
#! /bin/sh
# check script for Lziprecover - Data recovery tool for the lzip format
-# Copyright (C) 2009-2014 Antonio Diaz Diaz.
+# Copyright (C) 2009-2015 Antonio Diaz Diaz.
#
# This script is free software: you have unlimited permission
# to copy, distribute and modify it.
diff --git a/unzcrash.cc b/unzcrash.cc
index 0b44997..fd1c816 100644
--- a/unzcrash.cc
+++ b/unzcrash.cc
@@ -1,6 +1,6 @@
/* Unzcrash - Tests robustness of decompressors to corrupted data.
Inspired by unzcrash.c from Julian Seward's bzip2.
- Copyright (C) 2008-2014 Antonio Diaz Diaz.
+ Copyright (C) 2008-2015 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -44,7 +44,7 @@ namespace {
const char * const Program_name = "Unzcrash";
const char * const program_name = "unzcrash";
-const char * const program_year = "2014";
+const char * const program_year = "2015";
const char * invocation_name = 0;
int verbosity = 0;