summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--ChangeLog8
-rw-r--r--INSTALL2
-rw-r--r--Makefile.in9
-rw-r--r--NEWS27
-rw-r--r--README11
-rwxr-xr-xconfigure10
-rw-r--r--decoder.cc16
-rw-r--r--decoder.h8
-rw-r--r--doc/lziprecover.12
-rw-r--r--doc/lziprecover.info167
-rw-r--r--doc/lziprecover.texinfo133
-rw-r--r--file_index.cc136
-rw-r--r--file_index.h35
-rw-r--r--lzip.h12
-rw-r--r--main.cc1
-rw-r--r--merge.cc378
-rw-r--r--range_dec.cc19
-rw-r--r--repair.cc115
-rw-r--r--split.cc5
-rwxr-xr-xtestsuite/check.sh166
-rw-r--r--testsuite/fox5.lz (renamed from testsuite/fox5_bad.lz)bin400 -> 400 bytes
-rw-r--r--testsuite/fox5_bad1.lzbin0 -> 400 bytes
-rw-r--r--testsuite/fox5_bad1.txt (renamed from testsuite/fox5_bad.txt)2
-rw-r--r--testsuite/fox5_bad2.lzbin0 -> 400 bytes
-rw-r--r--testsuite/fox5_bad3.lzbin0 -> 400 bytes
-rw-r--r--testsuite/fox5_bad4.lzbin0 -> 400 bytes
-rw-r--r--testsuite/fox5_bad5.lzbin0 -> 400 bytes
-rw-r--r--testsuite/test.txt.lzbin0 -> 11518 bytes
-rw-r--r--testsuite/test_bad1.lzbin11548 -> 11518 bytes
-rw-r--r--testsuite/test_bad2.lzbin11548 -> 11518 bytes
-rw-r--r--testsuite/test_bad3.lzbin11548 -> 11518 bytes
-rw-r--r--testsuite/test_bad4.lzbin11548 -> 11518 bytes
-rw-r--r--testsuite/test_bad5.lzbin11548 -> 11518 bytes
-rw-r--r--testsuite/test_v0.lzbin11540 -> 0 bytes
-rw-r--r--testsuite/test_v1.lzbin11548 -> 0 bytes
-rw-r--r--testsuite/unzcrash.cc10
36 files changed, 785 insertions, 487 deletions
diff --git a/ChangeLog b/ChangeLog
index acbbb44..f2f71e7 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+2013-06-17 Antonio Diaz Diaz <antonio@gnu.org>
+
+ * Version 1.15-pre1 released.
+ * repair.cc: Repair multi-member files with up to one byte error
+ per member.
+ * merge.cc: Merge multi-member files.
+ * Added chapters 'Repairing Files' and 'Merging Files' to the manual.
+
2013-05-31 Antonio Diaz Diaz <antonio@gnu.org>
* Version 1.14 released.
diff --git a/INSTALL b/INSTALL
index cc04763..465a543 100644
--- a/INSTALL
+++ b/INSTALL
@@ -1,7 +1,7 @@
Requirements
------------
You will need a C++ compiler.
-I use gcc 4.8.0 and 3.3.6, but the code should compile with any
+I use gcc 4.8.1 and 3.3.6, but the code should compile with any
standards compliant compiler.
Gcc is available at http://gcc.gnu.org.
diff --git a/Makefile.in b/Makefile.in
index e769333..bb69807 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -42,7 +42,7 @@ file_index.o : lzip.h file_index.h
main.o : arg_parser.h lzip.h decoder.h
merge.o : lzip.h decoder.h file_index.h
range_dec.o : lzip.h decoder.h file_index.h
-repair.o : lzip.h
+repair.o : lzip.h file_index.h
split.o : lzip.h
unzcrash.o : arg_parser.h Makefile
@@ -115,12 +115,13 @@ dist : doc
$(DISTNAME)/doc/$(pkgname).info \
$(DISTNAME)/doc/$(pkgname).texinfo \
$(DISTNAME)/testsuite/check.sh \
- $(DISTNAME)/testsuite/fox5_bad.lz \
- $(DISTNAME)/testsuite/fox5_bad.txt \
+ $(DISTNAME)/testsuite/fox5.lz \
+ $(DISTNAME)/testsuite/fox5_bad[1-5].lz \
+ $(DISTNAME)/testsuite/fox5_bad1.txt \
$(DISTNAME)/testsuite/test.txt \
+ $(DISTNAME)/testsuite/test.txt.lz \
$(DISTNAME)/testsuite/test921-1921.txt \
$(DISTNAME)/testsuite/test_bad[1-5].lz \
- $(DISTNAME)/testsuite/test_v[01].lz \
$(DISTNAME)/testsuite/unzcrash.cc \
$(DISTNAME)/*.h \
$(DISTNAME)/*.cc
diff --git a/NEWS b/NEWS
index 1558dc8..70b39a1 100644
--- a/NEWS
+++ b/NEWS
@@ -1,23 +1,10 @@
-Changes in version 1.14:
+Changes in version 1.15:
-The new option "-i, --ignore-errors", which in conjunction with "-D"
-decompresses all the recoverable data in all members of a file without
-having to split it first, has been added.
+Lziprecover can now repair multi-member files with up to one byte error
+per member, without having to split them first.
-Option "-l, --list" now accepts more than one file.
+Lziprecover can now merge multi-member files without having to split
+them first even if some copies have the header and the trailer damaged.
-Decompression time has been reduced by 12%.
-
-"--split" now uses as few digits as possible in the names of the files
-produced, depending on the number of members in the input file.
-
-"--split" in verbose mode now shows the names of files being created.
-
-When decompressing or testing, file version is now shown only if
-verbosity >= 4.
-
-"configure" now accepts options with a separate argument.
-
-The target "install-as-lzip" has been added to the Makefile.
-
-The target "install-bin" has been added to the Makefile.
+The chapters "Repairing Files" and "Merging Files" have been added to
+the manual.
diff --git a/README b/README
index e5b3641..e96690f 100644
--- a/README
+++ b/README
@@ -2,14 +2,15 @@ Description
Lziprecover is a data recovery tool and decompressor for files in the
lzip compressed data format (.lz) able to repair slightly damaged files,
-recover badly damaged files from two or more copies, extract undamaged
-members from multi-member files, decompress files and test integrity of
-files.
+recover badly damaged files from two or more copies, extract data from
+damaged files, decompress files and test integrity of files.
Lziprecover is able to recover or decompress files produced by any of
the compressors in the lzip family; lzip, plzip, minilzip/lzlib, clzip
-and pdlzip. This recovery capability contributes to make the lzip format
-one of the best options for long-term data archiving.
+and pdlzip. It makes lzip files resistant to bit-flip, one of the most
+common forms of data corruption, and its recovery capabilities
+contribute to make of the lzip format one of the best options for
+long-term data archiving.
Lziprecover is able to efficiently extract a range of bytes from a
multi-member file, because it only decompresses the members containing
diff --git a/configure b/configure
index 7faf560..da0011b 100755
--- a/configure
+++ b/configure
@@ -6,7 +6,7 @@
# to copy, distribute and modify it.
pkgname=lziprecover
-pkgversion=1.14
+pkgversion=1.15-pre1
progname=lziprecover
srctrigger=doc/lziprecover.texinfo
@@ -100,7 +100,7 @@ while [ $# != 0 ] ; do
*=* | *-*-*) ;;
*)
echo "configure: unrecognized option: '${option}'" 1>&2
- echo "Try 'configure --help' for more information."
+ echo "Try 'configure --help' for more information." 1>&2
exit 1 ;;
esac
@@ -125,10 +125,8 @@ if [ -z "${srcdir}" ] ; then
fi
if [ ! -r "${srcdir}/${srctrigger}" ] ; then
- exec 1>&2
- echo
- echo "configure: Can't find sources in ${srcdir} ${srcdirtext}"
- echo "configure: (At least ${srctrigger} is missing)."
+ echo "configure: Can't find sources in ${srcdir} ${srcdirtext}" 1>&2
+ echo "configure: (At least ${srctrigger} is missing)." 1>&2
exit 1
fi
diff --git a/decoder.cc b/decoder.cc
index 8ed1aae..497471c 100644
--- a/decoder.cc
+++ b/decoder.cc
@@ -126,7 +126,7 @@ void LZ_decoder::flush_data()
bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const
{
File_trailer trailer;
- const int trailer_size = File_trailer::size( member_version );
+ const int trailer_size = File_trailer::size;
const unsigned long long member_size =
rdec.member_position() + trailer_size;
bool error = false;
@@ -144,8 +144,6 @@ bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const
while( size < trailer_size ) trailer.data[size++] = 0;
}
- if( member_version == 0 ) trailer.member_size( member_size );
-
if( !rdec.code_is_zero() )
{
error = true;
@@ -220,7 +218,7 @@ int LZ_decoder::decode_member( const Pretty_print & pp )
while( !rdec.finished() )
{
const int pos_state = data_position() & pos_state_mask;
- if( rdec.decode_bit( bm_match[state()][pos_state] ) == 0 )
+ if( rdec.decode_bit( bm_match[state()][pos_state] ) == 0 ) // 1st bit
{
const uint8_t prev_byte = get_prev_byte();
if( state.is_char() )
@@ -233,21 +231,21 @@ int LZ_decoder::decode_member( const Pretty_print & pp )
else
{
int len;
- if( rdec.decode_bit( bm_rep[state()] ) == 1 )
+ if( rdec.decode_bit( bm_rep[state()] ) == 1 ) // 2nd bit
{
- if( rdec.decode_bit( bm_rep0[state()] ) == 0 )
+ if( rdec.decode_bit( bm_rep0[state()] ) == 0 ) // 3rd bit
{
- if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 )
+ if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit
{ state.set_short_rep(); put_byte( get_byte( rep0 ) ); continue; }
}
else
{
unsigned distance;
- if( rdec.decode_bit( bm_rep1[state()] ) == 0 )
+ if( rdec.decode_bit( bm_rep1[state()] ) == 0 ) // 4th bit
distance = rep1;
else
{
- if( rdec.decode_bit( bm_rep2[state()] ) == 0 )
+ if( rdec.decode_bit( bm_rep2[state()] ) == 0 ) // 5th bit
distance = rep2;
else
{ distance = rep3; rep3 = rep2; }
diff --git a/decoder.h b/decoder.h
index a153bcb..0c4697b 100644
--- a/decoder.h
+++ b/decoder.h
@@ -176,11 +176,11 @@ public:
match_byte <<= 1;
const int match_bit = match_byte & 0x100;
const int bit = decode_bit( bm1[match_bit+symbol] );
- symbol = ( symbol << 1 ) + bit;
+ symbol = ( symbol << 1 ) | bit;
if( match_bit != bit << 8 )
{
while( symbol < 0x100 )
- symbol = ( symbol << 1 ) + decode_bit( bm[symbol] );
+ symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
break;
}
}
@@ -213,7 +213,6 @@ class LZ_decoder
int stream_pos; // first byte not yet written to file
uint32_t crc_;
const int outfd; // output file descriptor
- const int member_version;
unsigned long long stream_position() const { return partial_data_pos + stream_pos; }
void flush_data();
@@ -273,8 +272,7 @@ public:
pos( 0 ),
stream_pos( 0 ),
crc_( 0xFFFFFFFFU ),
- outfd( ofd ),
- member_version( header.version() )
+ outfd( ofd )
{ buffer[buffer_size-1] = 0; } // prev_byte of first_byte
~LZ_decoder() { delete[] buffer; }
diff --git a/doc/lziprecover.1 b/doc/lziprecover.1
index f62094a..fce7640 100644
--- a/doc/lziprecover.1
+++ b/doc/lziprecover.1
@@ -1,5 +1,5 @@
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.37.1.
-.TH LZIPRECOVER "1" "May 2013" "Lziprecover 1.14" "User Commands"
+.TH LZIPRECOVER "1" "June 2013" "Lziprecover 1.15-pre1" "User Commands"
.SH NAME
Lziprecover \- recovers data from damaged lzip files
.SH SYNOPSIS
diff --git a/doc/lziprecover.info b/doc/lziprecover.info
index 83267d5..0d39838 100644
--- a/doc/lziprecover.info
+++ b/doc/lziprecover.info
@@ -12,16 +12,18 @@ File: lziprecover.info, Node: Top, Next: Introduction, Up: (dir)
Lziprecover Manual
******************
-This manual is for Lziprecover (version 1.14, 31 May 2013).
+This manual is for Lziprecover (version 1.15-pre1, 17 June 2013).
* Menu:
-* Introduction:: Purpose and features of lziprecover
-* Invoking Lziprecover:: Command line interface
-* File Format:: Detailed format of the compressed file
-* Examples:: A small tutorial with examples
-* Problems:: Reporting bugs
-* Concept Index:: Index of concepts
+* Introduction:: Purpose and features of lziprecover
+* Invoking Lziprecover:: Command line interface
+* Repairing Files:: Fixing bit-flip and similar errors
+* Merging Files:: Fixing several damaged copies
+* File Format:: Detailed format of the compressed file
+* Examples:: A small tutorial with examples
+* Problems:: Reporting bugs
+* Concept Index:: Index of concepts
Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
@@ -37,14 +39,15 @@ File: lziprecover.info, Node: Introduction, Next: Invoking Lziprecover, Prev:
Lziprecover is a data recovery tool and decompressor for files in the
lzip compressed data format (.lz) able to repair slightly damaged files,
-recover badly damaged files from two or more copies, extract undamaged
-members from multi-member files, decompress files and test integrity of
-files.
+recover badly damaged files from two or more copies, extract data from
+damaged files, decompress files and test integrity of files.
Lziprecover is able to recover or decompress files produced by any of
the compressors in the lzip family; lzip, plzip, minilzip/lzlib, clzip
-and pdlzip. This recovery capability contributes to make the lzip format
-one of the best options for long-term data archiving.
+and pdlzip. It makes lzip files resistant to bit-flip, one of the most
+common forms of data corruption, and its recovery capabilities
+contribute to make of the lzip format one of the best options for
+long-term data archiving.
Lziprecover is able to efficiently extract a range of bytes from a
multi-member file, because it only decompresses the members containing
@@ -61,19 +64,22 @@ damaged files themselves are never modified.
When decompressing or testing file integrity, lziprecover behaves
like lzip or lunzip.
- If the files are too damaged for lziprecover to repair them, data
-from damaged members can be partially recovered writing it to stdout as
-shown in the following example (the resulting file may contain some
-garbage data at the end):
+ If a file is too damaged for lziprecover to repair it, all the
+recoverable data in all members of the file can be extracted with the
+following command (the resulting file may contain errors and some
+garbage data may be produced at the end of each member):
- lziprecover -cd rec01file.lz > rec01file
+ lziprecover -D0 -i -o file -q file.lz
If the cause of file corruption is damaged media, the combination
GNU ddrescue + lziprecover is the best option for recovering data from
multiple damaged copies. *Note ddrescue-example::, for an example.
+ Lziprecover is not a replacement for regular backups, but a last
+line of defense for the case where the backups are also damaged.
+

-File: lziprecover.info, Node: Invoking Lziprecover, Next: File Format, Prev: Introduction, Up: Top
+File: lziprecover.info, Node: Invoking Lziprecover, Next: Repairing Files, Prev: Introduction, Up: Top
2 Invoking Lziprecover
**********************
@@ -141,18 +147,10 @@ The format for running lziprecover is:
`-m'
`--merge'
Try to produce a correct file merging the good parts of two or more
- damaged copies. The copies must be single-member files. The merge
- will fail if the copies have too many damaged areas or if the same
- byte is damaged in all copies. If successful, a repaired copy is
- written to the file `FILE_fixed.lz'. The exit status is 0 if the
- file could be repaired, 2 otherwise.
-
- To give you an idea of its possibilities, when merging two copies
- each of them with one damaged area affecting 1 percent of the
- copy, the probability of obtaining a correct file is about 98
- percent. With three such copies the probability rises to 99.97
- percent. For large files with small errors, the probability
- approaches 100 percent even with only two copies.
+ damaged copies. If successful, a repaired copy is written to the
+ file `FILE_fixed.lz'. The exit status is 0 if a correct file could
+ be produced, 2 otherwise. See the chapter Merging Files (*note
+ Merging Files::) for a complete description of the merge mode.
`-o FILE'
`--output=FILE'
@@ -168,10 +166,12 @@ The format for running lziprecover is:
`-R'
`--repair'
- Try to repair a small error, affecting only one byte, in a
- single-member FILE. If successful, a repaired copy is written to
- the file `FILE_fixed.lz'. `FILE' is not modified at all. The exit
- status is 0 if the file could be repaired, 2 otherwise.
+ Try to repair a file with small errors (up to one byte error per
+ member). If successful, a repaired copy is written to the file
+ `FILE_fixed.lz'. `FILE' is not modified at all. The exit status
+ is 0 if the file could be repaired, 2 otherwise. See the chapter
+ Repairing Files (*note Repairing Files::) for a complete
+ description of the repair mode.
`-s'
`--split'
@@ -227,9 +227,52 @@ invalid input file, 3 for an internal consistency error (eg, bug) which
caused lziprecover to panic.

-File: lziprecover.info, Node: File Format, Next: Examples, Prev: Invoking Lziprecover, Up: Top
+File: lziprecover.info, Node: Repairing Files, Next: Merging Files, Prev: Invoking Lziprecover, Up: Top
+
+3 Repairing Files
+*****************
+
+Lziprecover is able to repair files with small errors (up to one byte
+error per member). The error may be located anywhere in the file except
+in the header (first 6 bytes of each member) or in the `Member size'
+field of the trailer (last 8 bytes of each member). This makes lzip
+files resistant to bit-flip, one of the most common forms of data
+corruption.
+
+ Bit-flip happens when one bit in the file is changed from 0 to 1 or
+vice versa. It may be caused by bad RAM or even by natural radiation. I
+have seen a case of bit-flip in a file stored in an USB flash drive.
+
+
+File: lziprecover.info, Node: Merging Files, Next: File Format, Prev: Repairing Files, Up: Top
+
+4 Merging Files
+***************
+
+If you have several copies of a file but all of them are too damaged to
+repair them (*note Repairing Files::), lziprecover can try to produce a
+correct file merging the good parts of the damaged copies.
+
+ The merge may succeed even if some copies of the file have all the
+headers and trailers damaged, as long as there is at least one copy of
+every header and trailer intact, even if they are in different copies of
+the file.
+
+ The merge will fail if the damaged areas overlap (at least one byte
+is damaged in all copies), or are adjacent and the boundary can't be
+determined, or if the copies have too many damaged areas.
+
+ To give you an idea of its possibilities, when merging two copies
+each of them with one damaged area affecting 1 percent of the copy, the
+probability of obtaining a correct file is about 98 percent. With three
+such copies the probability rises to 99.97 percent. For large files with
+small errors, the probability approaches 100 percent even with only two
+copies.
+
+
+File: lziprecover.info, Node: File Format, Next: Examples, Prev: Merging Files, Up: Top
-3 File Format
+5 File Format
*************
Perfection is reached, not when there is no longer anything to add, but
@@ -302,7 +345,7 @@ additional information before, between, or after them.

File: lziprecover.info, Node: Examples, Next: Problems, Prev: File Format, Up: Top
-4 A small tutorial with examples
+6 A small tutorial with examples
********************************
Example 1: Restore a regular file from its compressed version
@@ -329,9 +372,8 @@ to decompressed byte 15000 (5000 bytes are produced).
lziprecover -D 10000-15000 file.lz
-Example 5: Repair a one-byte corruption in the single-member file
-`file.lz'. (Indented lines are abridged error messages from
-lziprecover).
+Example 5: Repair small errors in the file `file.lz'. (Indented lines
+are abridged diagnostic messages from lziprecover).
lziprecover -v -R file.lz
Copy of input file repaired successfully.
@@ -365,31 +407,16 @@ error-checked merging of copies (*Note GNU ddrescue manual:
Example 8: Recover the first volume of those created with the command
`lzip -b 32MiB -S 650MB big_db' from two copies, `big_db1_00001.lz' and
`big_db2_00001.lz', with member 07 damaged in the first copy, member 18
-damaged in the second copy, and member 12 damaged in both copies. Two
-correct copies are produced and compared.
-
- lziprecover -s big_db1_00001.lz
- lziprecover -s big_db2_00001.lz
- lziprecover -t rec*big_db1_00001.lz
- rec07big_db1_00001.lz: crc mismatch
- rec12big_db1_00001.lz: crc mismatch
- lziprecover -t rec*big_db2_00001.lz
- rec12big_db2_00001.lz: crc mismatch
- rec18big_db2_00001.lz: crc mismatch
- lziprecover -m -v rec12big_db1_00001.lz rec12big_db2_00001.lz
+damaged in the second copy, and member 12 damaged in both copies. The
+correct file produced is saved in `big_db_00001.lz'.
+
+ lziprecover -m -v -o big_db_00001.lz big_db1_00001.lz big_db2_00001.lz
Input files merged successfully
- cp rec07big_db2_00001.lz rec07big_db1_00001.lz
- cp rec12big_db1_00001_fixed.lz rec12big_db1_00001.lz
- cp rec12big_db1_00001_fixed.lz rec12big_db2_00001.lz
- cp rec18big_db1_00001.lz rec18big_db2_00001.lz
- cat rec*big_db1_00001.lz > big_db3_00001.lz
- cat rec*big_db2_00001.lz > big_db4_00001.lz
- zcmp big_db3_00001.lz big_db4_00001.lz

File: lziprecover.info, Node: Problems, Next: Concept Index, Prev: Examples, Up: Top
-5 Reporting Bugs
+7 Reporting Bugs
****************
There are probably bugs in lziprecover. There are certainly errors and
@@ -415,19 +442,23 @@ Concept Index
* file format: File Format. (line 6)
* getting help: Problems. (line 6)
* introduction: Introduction. (line 6)
-* invoking lziprecover: Invoking Lziprecover. (line 6)
+* invoking: Invoking Lziprecover. (line 6)
+* merging files: Merging Files. (line 6)
+* repairing files: Repairing Files. (line 6)

Tag Table:
Node: Top231
-Node: Introduction901
-Node: Invoking Lziprecover2685
-Node: File Format8447
-Node: Examples10938
-Ref: ddrescue-example12158
-Node: Problems13938
-Node: Concept Index14488
+Node: Introduction1032
+Node: Invoking Lziprecover3052
+Node: Repairing Files8489
+Node: Merging Files9208
+Node: File Format10338
+Node: Examples12822
+Ref: ddrescue-example14024
+Node: Problems15134
+Node: Concept Index15684

End Tag Table
diff --git a/doc/lziprecover.texinfo b/doc/lziprecover.texinfo
index 142cdae..54834e9 100644
--- a/doc/lziprecover.texinfo
+++ b/doc/lziprecover.texinfo
@@ -6,8 +6,8 @@
@finalout
@c %**end of header
-@set UPDATED 31 May 2013
-@set VERSION 1.14
+@set UPDATED 17 June 2013
+@set VERSION 1.15-pre1
@dircategory Data Compression
@direntry
@@ -35,12 +35,14 @@
This manual is for Lziprecover (version @value{VERSION}, @value{UPDATED}).
@menu
-* Introduction:: Purpose and features of lziprecover
-* Invoking Lziprecover:: Command line interface
-* File Format:: Detailed format of the compressed file
-* Examples:: A small tutorial with examples
-* Problems:: Reporting bugs
-* Concept Index:: Index of concepts
+* Introduction:: Purpose and features of lziprecover
+* Invoking Lziprecover:: Command line interface
+* Repairing Files:: Fixing bit-flip and similar errors
+* Merging Files:: Fixing several damaged copies
+* File Format:: Detailed format of the compressed file
+* Examples:: A small tutorial with examples
+* Problems:: Reporting bugs
+* Concept Index:: Index of concepts
@end menu
@sp 1
@@ -56,14 +58,15 @@ to copy, distribute and modify it.
Lziprecover is a data recovery tool and decompressor for files in the
lzip compressed data format (.lz) able to repair slightly damaged files,
-recover badly damaged files from two or more copies, extract undamaged
-members from multi-member files, decompress files and test integrity of
-files.
+recover badly damaged files from two or more copies, extract data from
+damaged files, decompress files and test integrity of files.
Lziprecover is able to recover or decompress files produced by any of
the compressors in the lzip family; lzip, plzip, minilzip/lzlib, clzip
-and pdlzip. This recovery capability contributes to make the lzip format
-one of the best options for long-term data archiving.
+and pdlzip. It makes lzip files resistant to bit-flip, one of the most
+common forms of data corruption, and its recovery capabilities
+contribute to make of the lzip format one of the best options for
+long-term data archiving.
Lziprecover is able to efficiently extract a range of bytes from a
multi-member file, because it only decompresses the members containing
@@ -80,23 +83,26 @@ damaged files themselves are never modified.
When decompressing or testing file integrity, lziprecover behaves like
lzip or lunzip.
-If the files are too damaged for lziprecover to repair them, data from
-damaged members can be partially recovered writing it to stdout as shown
-in the following example (the resulting file may contain some garbage
-data at the end):
+If a file is too damaged for lziprecover to repair it, all the
+recoverable data in all members of the file can be extracted with the
+following command (the resulting file may contain errors and some
+garbage data may be produced at the end of each member):
@example
-lziprecover -cd rec01file.lz > rec01file
+lziprecover -D0 -i -o file -q file.lz
@end example
If the cause of file corruption is damaged media, the combination
@w{GNU ddrescue + lziprecover} is the best option for recovering data
from multiple damaged copies. @xref{ddrescue-example}, for an example.
+Lziprecover is not a replacement for regular backups, but a last line of
+defense for the case where the backups are also damaged.
+
@node Invoking Lziprecover
@chapter Invoking Lziprecover
-@cindex invoking lziprecover
+@cindex invoking
The format for running lziprecover is:
@@ -164,18 +170,10 @@ information about the members in the file.
@item -m
@itemx --merge
Try to produce a correct file merging the good parts of two or more
-damaged copies. The copies must be single-member files. The merge will
-fail if the copies have too many damaged areas or if the same byte is
-damaged in all copies. If successful, a repaired copy is written to the
-file @samp{@var{file}_fixed.lz}. The exit status is 0 if the file could
-be repaired, 2 otherwise.
-
-To give you an idea of its possibilities, when merging two copies each
-of them with one damaged area affecting 1 percent of the copy, the
-probability of obtaining a correct file is about 98 percent. With three
-such copies the probability rises to 99.97 percent. For large files with
-small errors, the probability approaches 100 percent even with only two
-copies.
+damaged copies. If successful, a repaired copy is written to the file
+@samp{@var{file}_fixed.lz}. The exit status is 0 if a correct file could
+be produced, 2 otherwise. See the chapter Merging Files (@pxref{Merging
+Files}) for a complete description of the merge mode.
@item -o @var{file}
@itemx --output=@var{file}
@@ -192,10 +190,12 @@ Quiet operation. Suppress all messages.
@item -R
@itemx --repair
-Try to repair a small error, affecting only one byte, in a single-member
-@var{file}. If successful, a repaired copy is written to the file
+Try to repair a file with small errors (up to one byte error per member).
+If successful, a repaired copy is written to the file
@samp{@var{file}_fixed.lz}. @samp{@var{file}} is not modified at all.
The exit status is 0 if the file could be repaired, 2 otherwise.
+See the chapter Repairing Files (@pxref{Repairing Files}) for a complete
+description of the repair mode.
@item -s
@itemx --split
@@ -252,6 +252,47 @@ invalid input file, 3 for an internal consistency error (eg, bug) which
caused lziprecover to panic.
+@node Repairing Files
+@chapter Repairing Files
+@cindex repairing files
+
+Lziprecover is able to repair files with small errors (up to one byte
+error per member). The error may be located anywhere in the file except
+in the header (first 6 bytes of each member) or in the @samp{Member
+size} field of the trailer (last 8 bytes of each member). This makes
+lzip files resistant to bit-flip, one of the most common forms of data
+corruption.
+
+Bit-flip happens when one bit in the file is changed from 0 to 1 or vice
+versa. It may be caused by bad RAM or even by natural radiation. I have
+seen a case of bit-flip in a file stored in an USB flash drive.
+
+
+@node Merging Files
+@chapter Merging Files
+@cindex merging files
+
+If you have several copies of a file but all of them are too damaged to
+repair them (@pxref{Repairing Files}), lziprecover can try to produce a
+correct file merging the good parts of the damaged copies.
+
+The merge may succeed even if some copies of the file have all the
+headers and trailers damaged, as long as there is at least one copy of
+every header and trailer intact, even if they are in different copies of
+the file.
+
+The merge will fail if the damaged areas overlap (at least one byte is
+damaged in all copies), or are adjacent and the boundary can't be
+determined, or if the copies have too many damaged areas.
+
+To give you an idea of its possibilities, when merging two copies each
+of them with one damaged area affecting 1 percent of the copy, the
+probability of obtaining a correct file is about 98 percent. With three
+such copies the probability rises to 99.97 percent. For large files with
+small errors, the probability approaches 100 percent even with only two
+copies.
+
+
@node File Format
@chapter File Format
@cindex file format
@@ -368,9 +409,8 @@ lziprecover -D 10000-15000 file.lz
@sp 1
@noindent
-Example 5: Repair a one-byte corruption in the single-member file
-@samp{file.lz}. (Indented lines are abridged error messages from
-lziprecover).
+Example 5: Repair small errors in the file @samp{file.lz}. (Indented
+lines are abridged diagnostic messages from lziprecover).
@example
lziprecover -v -R file.lz
@@ -422,27 +462,12 @@ Example 8: Recover the first volume of those created with the command
@w{@code{lzip -b 32MiB -S 650MB big_db}} from two copies,
@samp{big_db1_00001.lz} and @samp{big_db2_00001.lz}, with member 07
damaged in the first copy, member 18 damaged in the second copy, and
-member 12 damaged in both copies. Two correct copies are produced and
-compared.
+member 12 damaged in both copies. The correct file produced is saved in
+@samp{big_db_00001.lz}.
@example
-lziprecover -s big_db1_00001.lz
-lziprecover -s big_db2_00001.lz
-lziprecover -t rec*big_db1_00001.lz
- rec07big_db1_00001.lz: crc mismatch
- rec12big_db1_00001.lz: crc mismatch
-lziprecover -t rec*big_db2_00001.lz
- rec12big_db2_00001.lz: crc mismatch
- rec18big_db2_00001.lz: crc mismatch
-lziprecover -m -v rec12big_db1_00001.lz rec12big_db2_00001.lz
+lziprecover -m -v -o big_db_00001.lz big_db1_00001.lz big_db2_00001.lz
Input files merged successfully
-cp rec07big_db2_00001.lz rec07big_db1_00001.lz
-cp rec12big_db1_00001_fixed.lz rec12big_db1_00001.lz
-cp rec12big_db1_00001_fixed.lz rec12big_db2_00001.lz
-cp rec18big_db1_00001.lz rec18big_db2_00001.lz
-cat rec*big_db1_00001.lz > big_db3_00001.lz
-cat rec*big_db2_00001.lz > big_db4_00001.lz
-zcmp big_db3_00001.lz big_db4_00001.lz
@end example
diff --git a/file_index.cc b/file_index.cc
index 41bee41..997003a 100644
--- a/file_index.cc
+++ b/file_index.cc
@@ -52,21 +52,32 @@ const char * format_num( unsigned long long num,
}
-File_index::File_index( const int infd ) : retval_( 0 )
+Block Block::split( const long long pos )
+ {
+ if( pos_ < pos && end() > pos )
+ {
+ const Block b( pos_, pos - pos_ );
+ pos_ = pos; size_ -= b.size_;
+ return b;
+ }
+ return Block( 0, 0 );
+ }
+
+
+File_index::File_index( const int infd )
+ :
+ isize( lseek( infd, 0, SEEK_END ) ), retval_( 0 )
{
- const long long isize = lseek( infd, 0, SEEK_END );
if( isize < 0 )
{ error_ = "Input file is not seekable :";
error_ += std::strerror( errno ); retval_ = 1; return; }
+ if( isize < min_member_size )
+ { error_ = "Input file is too short."; retval_ = 2; return; }
if( isize > INT64_MAX )
{ error_ = "Input file is too long (2^63 bytes or more).";
retval_ = 2; return; }
- long long pos = isize; // always points to a header or EOF
- File_header header;
- File_trailer trailer;
- if( isize < min_member_size )
- { error_ = "Input file is too short."; retval_ = 2; return; }
+ File_header header;
if( seek_read( infd, header.data, File_header::size, 0 ) != File_header::size )
{ error_ = "Error reading member header :";
error_ += std::strerror( errno ); retval_ = 1; return; }
@@ -77,10 +88,12 @@ File_index::File_index( const int infd ) : retval_( 0 )
{ error_ = "Version "; error_ += format_num( header.version() );
error_ += "member format not supported."; retval_ = 2; return; }
+ long long pos = isize; // always points to a header or to EOF
while( pos >= min_member_size )
{
- if( seek_read( infd, trailer.data, File_trailer::size(),
- pos - File_trailer::size() ) != File_trailer::size() )
+ File_trailer trailer;
+ if( seek_read( infd, trailer.data, File_trailer::size,
+ pos - File_trailer::size ) != File_trailer::size )
{ error_ = "Error reading member trailer :";
error_ += std::strerror( errno ); retval_ = 1; break; }
const long long member_size = trailer.member_size();
@@ -105,14 +118,113 @@ File_index::File_index( const int infd ) : retval_( 0 )
if( member_vector.size() == 0 && isize - pos > File_header::size &&
seek_read( infd, header.data, File_header::size, pos ) == File_header::size &&
header.verify_magic() && header.verify_version() )
- { // last trailer is corrupt
- error_ = "Member size in trailer is corrupt at pos ";
- error_ += format_num( isize - 8 ); retval_ = 2; break;
+ {
+ error_ = "Last member in input file is truncated or corrupt.";
+ retval_ = 2; break;
+ }
+ pos -= member_size;
+ member_vector.push_back( Member( 0, trailer.data_size(),
+ pos, member_size ) );
+ }
+ if( pos != 0 || member_vector.size() == 0 )
+ {
+ member_vector.clear();
+ if( retval_ == 0 ) { error_ = "Can't create file index."; retval_ = 2; }
+ return;
+ }
+ std::reverse( member_vector.begin(), member_vector.end() );
+ for( unsigned i = 0; i < member_vector.size() - 1; ++i )
+ {
+ const long long end = member_vector[i].dblock.end();
+ if( end < 0 || end > INT64_MAX )
+ {
+ member_vector.clear();
+ error_ = "Data in input file is too long (2^63 bytes or more).";
+ retval_ = 2; return;
+ }
+ member_vector[i+1].dblock.pos( end );
+ }
+ }
+
+
+// All files in 'infd_vector' must be at least 'fsize' bytes long.
+File_index::File_index( const std::vector< int > & infd_vector,
+ const long long fsize )
+ :
+ isize( fsize ), retval_( 0 )
+ {
+ if( isize < 0 )
+ { error_ = "Input file is not seekable :";
+ error_ += std::strerror( errno ); retval_ = 1; return; }
+ if( isize < min_member_size )
+ { error_ = "Input file is too short."; retval_ = 2; return; }
+ if( isize > INT64_MAX )
+ { error_ = "Input file is too long (2^63 bytes or more).";
+ retval_ = 2; return; }
+
+ const int files = infd_vector.size();
+ File_header header;
+ bool done = false;
+ for( int i = 0; i < files && !done; ++i )
+ {
+ const int infd = infd_vector[i];
+ if( seek_read( infd, header.data, File_header::size, 0 ) != File_header::size )
+ { error_ = "Error reading member header :";
+ error_ += std::strerror( errno ); retval_ = 1; return; }
+ if( header.verify_magic() && header.verify_version() ) done = true;
+ }
+ if( !done )
+ { error_ = "Bad magic number (file not in lzip format).";
+ retval_ = 2; return; }
+
+ long long pos = isize; // always points to a header or to EOF
+ while( pos >= min_member_size )
+ {
+ long long member_size;
+ File_trailer trailer;
+ done = false;
+ for( int it = 0; it < files && !done; ++it )
+ {
+ const int tfd = infd_vector[it];
+ if( seek_read( tfd, trailer.data, File_trailer::size,
+ pos - File_trailer::size ) != File_trailer::size )
+ { error_ = "Error reading member trailer :";
+ error_ += std::strerror( errno ); retval_ = 1; goto error; }
+ member_size = trailer.member_size();
+ if( member_size >= min_member_size && member_size <= pos )
+ for( int ih = 0; ih < files && !done; ++ih )
+ {
+ const int hfd = infd_vector[ih];
+ if( seek_read( hfd, header.data, File_header::size,
+ pos - member_size ) != File_header::size )
+ { error_ = "Error reading member header :";
+ error_ += std::strerror( errno ); retval_ = 1; goto error; }
+ if( header.verify_magic() && header.verify_version() ) done = true;
+ }
+ }
+ if( !done )
+ {
+ if( member_vector.size() == 0 ) // maybe trailing garbage
+ { --pos; continue; }
+ error_ = "Member size in trailer may be corrupt at pos ";
+ error_ += format_num( pos - 8 ); retval_ = 2; break;
}
+ if( member_vector.size() == 0 && isize - pos > File_header::size )
+ for( int i = 0; i < files; ++i )
+ {
+ const int infd = infd_vector[i];
+ if( seek_read( infd, header.data, File_header::size, pos ) == File_header::size &&
+ header.verify_magic() && header.verify_version() )
+ {
+ error_ = "Last member in input file is truncated or corrupt.";
+ retval_ = 2; goto error;
+ }
+ }
pos -= member_size;
member_vector.push_back( Member( 0, trailer.data_size(),
pos, member_size ) );
}
+error:
if( pos != 0 || member_vector.size() == 0 )
{
member_vector.clear();
diff --git a/file_index.h b/file_index.h
index 2f055b1..92cf11c 100644
--- a/file_index.h
+++ b/file_index.h
@@ -25,7 +25,8 @@ class Block
long long pos_, size_; // pos + size <= INT64_MAX
public:
- Block( const long long p, const long long s ) : pos_( p ), size_( s ) {}
+ Block( const long long p, const long long s )
+ : pos_( p ), size_( s ) {}
long long pos() const { return pos_; }
long long size() const { return size_; }
@@ -34,9 +35,17 @@ public:
void pos( const long long p ) { pos_ = p; }
void size( const long long s ) { size_ = s; }
+ bool operator==( const Block & b ) const
+ { return pos_ == b.pos_ && size_ == b.size_; }
+ bool operator!=( const Block & b ) const
+ { return pos_ != b.pos_ || size_ != b.size_; }
+
+ bool operator<( const Block & b ) const { return pos_ < b.pos_; }
+
bool overlaps( const Block & b ) const
{ return ( pos_ < b.end() && b.pos_ < end() ); }
void shift( Block & b ) { ++size_; ++b.pos_; --b.size_; }
+ Block split( const long long pos );
};
@@ -49,18 +58,35 @@ class File_index
Member( const long long dp, const long long ds,
const long long mp, const long long ms )
: dblock( dp, ds ), mblock( mp, ms ) {}
+
+ bool operator==( const Member & m ) const { return ( mblock == m.mblock ); }
+ bool operator!=( const Member & m ) const { return ( mblock != m.mblock ); }
};
std::vector< Member > member_vector;
std::string error_;
+ long long isize;
int retval_;
public:
- File_index( const int infd );
+ File_index() : error_( "No index." ), isize( 0 ), retval_( 2 ) {}
+ explicit File_index( const int infd );
+ File_index( const std::vector< int > & infd_vector, const long long fsize );
+ int members() const { return member_vector.size(); }
const std::string & error() const { return error_; }
int retval() const { return retval_; }
+ bool operator==( const File_index & fi ) const
+ {
+ if( retval_ || fi.retval_ || isize != fi.isize ||
+ member_vector.size() != fi.member_vector.size() ) return false;
+ for( unsigned i = 0; i < member_vector.size(); ++i )
+ if( member_vector[i] != fi.member_vector[i] ) return false;
+ return true;
+ }
+ bool operator!=( const File_index & fi ) const { return !( *this == fi ); }
+
long long data_end() const
{ if( member_vector.size() ) return member_vector.back().dblock.end();
else return 0; }
@@ -69,11 +95,14 @@ public:
{ if( member_vector.size() ) return member_vector.back().mblock.end();
else return 0; }
+ // total size including trailing garbage (if any)
+ long long file_size() const
+ { if( isize >= 0 ) return isize; else return 0; }
+
const Block & dblock( const int i ) const
{ return member_vector[i].dblock; }
const Block & mblock( const int i ) const
{ return member_vector[i].mblock; }
- int members() const { return (int)member_vector.size(); }
};
diff --git a/lzip.h b/lzip.h
index fcad09e..ea02a5f 100644
--- a/lzip.h
+++ b/lzip.h
@@ -195,7 +195,7 @@ struct File_header
{ return ( std::memcmp( data, magic_string, 4 ) == 0 ); }
uint8_t version() const { return data[4]; }
- bool verify_version() const { return ( data[4] <= 1 ); }
+ bool verify_version() const { return ( data[4] == 1 ); }
unsigned dictionary_size() const
{
@@ -231,8 +231,7 @@ struct File_trailer
// 4-11 size of the uncompressed data
// 12-19 member size including header and trailer
- static int size( const int version = 1 )
- { return ( ( version >= 1 ) ? 20 : 12 ); }
+ enum { size = 20 };
unsigned data_crc() const
{
@@ -301,16 +300,15 @@ void cleanup_and_fail( const std::string & output_filename,
const int outfd, const int retval );
bool copy_file( const int infd, const int outfd,
const long long max_size = -1 );
-bool try_decompress( const int fd, const unsigned long long file_size,
- long long * failure_posp = 0 );
+bool try_decompress_member( const int fd, const unsigned long long msize,
+ long long * failure_posp = 0 );
bool verify_header( const File_header & header, const int verbosity );
-bool verify_single_member( const int fd, const long long file_size,
- const int verbosity );
int merge_files( const std::vector< std::string > & filenames,
const std::string & output_filename, const int verbosity,
const bool force );
// defined in range_dec.cc
+bool safe_seek( const int fd, const long long pos );
int list_files( const std::vector< std::string > & filenames,
const int verbosity );
int range_decompress( const std::string & input_filename,
diff --git a/main.cc b/main.cc
index b3e6474..21a5b04 100644
--- a/main.cc
+++ b/main.cc
@@ -113,7 +113,6 @@ void show_help()
" -R, --repair try to repair a small error in file\n"
" -s, --split split multi-member file in single-member files\n"
" -t, --test test compressed file integrity\n"
-// " -u, --update convert file from version 0 to version 1\n"
" -v, --verbose be verbose (a 2nd -v gives more)\n"
"Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n"
"Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n"
diff --git a/merge.cc b/merge.cc
index c254a59..c216870 100644
--- a/merge.cc
+++ b/merge.cc
@@ -35,70 +35,107 @@
namespace {
-bool copy_and_diff_file( const std::vector< int > & infd_vector,
- const int outfd, std::vector< Block > & block_vector )
+// Add 'bv' to 'block_vector' splitting blocks as needed to keep all the
+// edges (pos and end of every block).
+// 'block_vector' contains the result. 'bv' is destroyed.
+void combine( std::vector< Block > & block_vector, std::vector< Block > & bv )
{
+ if( block_vector.empty() ) { block_vector.swap( bv ); return; }
+ unsigned i1 = 0, i2 = 0;
+ while( i1 < block_vector.size() && i2 < bv.size() )
+ {
+ Block & b1 = block_vector[i1];
+ Block & b2 = bv[i2];
+ if( b1.overlaps( b2 ) )
+ {
+ if( b1 < b2 )
+ {
+ Block b = b1.split( b2.pos() );
+ block_vector.insert( block_vector.begin() + i1, b ); ++i1;
+ }
+ else if( b2 < b1 )
+ {
+ Block b( b2.pos(), b1.pos() - b2.pos() );
+ b2.split( b1.pos() );
+ block_vector.insert( block_vector.begin() + i1, b ); ++i1;
+ }
+ else if( b1.end() < b2.end() ) { b2.split( b1.end() ); ++i1; }
+ else if( b2.end() < b1.end() )
+ {
+ Block b = b1.split( b2.end() );
+ block_vector.insert( block_vector.begin() + i1, b ); ++i1; ++i2;
+ }
+ else { ++i1; ++i2; } // blocks are identical
+ }
+ else if( b1 < b2 ) ++i1;
+ else { block_vector.insert( block_vector.begin() + i1, b2 ); ++i1; ++i2; }
+ }
+ if( i2 < bv.size() ) // tail copy
+ block_vector.insert( block_vector.end(), bv.begin() + i2, bv.end() );
+ }
+
+
+bool diff_member( const long long mpos, const long long msize,
+ const std::vector< int > & infd_vector,
+ std::vector< Block > & block_vector )
+ {
+ const int files = infd_vector.size();
const int buffer_size = 65536;
- std::vector< uint8_t * > buffer_vector( infd_vector.size() );
- for( unsigned i = 0; i < infd_vector.size(); ++i )
- buffer_vector[i] = new uint8_t[buffer_size];
- Block b( 0, 0 );
- long long partial_pos = 0;
- int equal_bytes = 0;
- bool error = false;
+ uint8_t * const buffer1 = new uint8_t[buffer_size];
+ uint8_t * const buffer2 = new uint8_t[buffer_size];
- while( true )
+ bool error = false;
+ for( int i1 = 0; i1 + 1 < files && !error; ++i1 )
{
- const int rd = readblock( infd_vector[0], buffer_vector[0], buffer_size );
- if( rd != buffer_size && errno )
- { show_error( "Error reading input file", errno ); error = true; break; }
- if( rd > 0 )
+ for( int i2 = i1 + 1; i2 < files && !error; ++i2 )
{
- for( unsigned i = 1; i < infd_vector.size(); ++i )
- if( readblock( infd_vector[i], buffer_vector[i], rd ) != rd )
- { show_error( "Error reading input file", errno );
- error = true; break; }
- if( error ) break;
- const int wr = writeblock( outfd, buffer_vector[0], rd );
- if( wr != rd )
- { show_error( "Error writing output file", errno );
- error = true; break; }
- for( int i = 0; i < rd; ++i )
+ std::vector< Block > bv;
+ long long partial_pos = 0;
+ const int fd1 = infd_vector[i1], fd2 = infd_vector[i2];
+ int begin = -1; // begin of block. -1 means no block
+ bool prev_equal = true;
+ if( !safe_seek( fd1, mpos ) || !safe_seek( fd2, mpos ) )
+ { error = true; break; }
+
+ while( msize > partial_pos )
{
- while( i < rd && b.pos() == 0 )
- {
- for( unsigned j = 1; j < infd_vector.size(); ++j )
- if( buffer_vector[0][i] != buffer_vector[j][i] )
- { b.pos( partial_pos + i ); break; } // begin block
- ++i;
- }
- while( i < rd && b.pos() > 0 )
+ const int size = std::min( (long long)buffer_size, msize - partial_pos );
+ const int rd = readblock( fd1, buffer1, size );
+ if( rd != size && errno )
+ { show_error( "Error reading input file", errno ); error = true; break; }
+ if( rd > 0 )
{
- ++equal_bytes;
- for( unsigned j = 1; j < infd_vector.size(); ++j )
- if( buffer_vector[0][i] != buffer_vector[j][i] )
- { equal_bytes = 0; break; }
- if( equal_bytes >= 2 ) // end block
+ if( readblock( fd2, buffer2, rd ) != rd )
+ { show_error( "Error reading input file", errno );
+ error = true; break; }
+ for( int i = 0; i < rd; ++i )
{
- b.size( partial_pos + i - ( equal_bytes - 1 ) - b.pos() );
- block_vector.push_back( b );
- b.pos( 0 );
- equal_bytes = 0;
+ if( buffer1[i] != buffer2[i] )
+ {
+ prev_equal = false;
+ if( begin < 0 ) begin = partial_pos + i; // begin block
+ }
+ else if( !prev_equal ) prev_equal = true;
+ else if( begin >= 0 ) // end block
+ {
+ Block b( mpos + begin, partial_pos + i - 1 - begin );
+ begin = -1;
+ bv.push_back( b );
+ }
}
- ++i;
+ partial_pos += rd;
}
+ if( rd < buffer_size ) break; // EOF
}
- partial_pos += rd;
+ if( begin >= 0 ) // finish last block
+ {
+ Block b( mpos + begin, partial_pos - prev_equal - begin );
+ bv.push_back( b );
+ }
+ combine( block_vector, bv );
}
- if( rd < buffer_size ) break; // EOF
- }
- if( b.pos() > 0 ) // finish last block
- {
- b.size( partial_pos - b.pos() );
- block_vector.push_back( b );
}
- for( unsigned i = 0; i < infd_vector.size(); ++i )
- delete[] buffer_vector[i];
+ delete[] buffer2; delete[] buffer1;
return !error;
}
@@ -116,15 +153,16 @@ int ipow( const unsigned base, const unsigned exponent )
int open_input_files( const std::vector< std::string > & filenames,
- std::vector< int > & infd_vector, long long & isize,
- const int verbosity )
+ std::vector< int > & infd_vector,
+ File_index & file_index, const int verbosity )
{
+ const int files = filenames.size();
bool identical = false;
- for( unsigned i = 1; i < filenames.size(); ++i )
+ for( int i = 1; i < files; ++i )
if( filenames[0] == filenames[i] )
{ identical = true; break; }
if( !identical )
- for( unsigned i = 0; i < filenames.size(); ++i )
+ for( int i = 0; i < files; ++i )
{
struct stat in_stats;
ino_t st_ino0 = 0;
@@ -137,15 +175,27 @@ int open_input_files( const std::vector< std::string > & filenames,
}
if( identical ) { show_error( "Two input files are the same." ); return 2; }
- isize = 0;
- for( unsigned i = 0; i < filenames.size(); ++i )
+ long long isize = 0;
+ for( int i = 0; i < files; ++i )
{
- const long long tmp = lseek( infd_vector[i], 0, SEEK_END );
- if( tmp < 0 )
+ long long tmp;
+ const File_index fi( infd_vector[i] );
+ if( fi.retval() == 0 ) // file format is intact
+ {
+ if( file_index.retval() != 0 ) file_index = fi;
+ else if( file_index != fi )
+ { show_error( "Input files are different." ); return 2; }
+ tmp = file_index.file_size();
+ }
+ else // file format is damaged
{
- if( verbosity >= 0 )
- std::fprintf( stderr, "File '%s' is not seekable.\n", filenames[i].c_str() );
- return 1;
+ tmp = lseek( infd_vector[i], 0, SEEK_END );
+ if( tmp < 0 )
+ {
+ if( verbosity >= 0 )
+ std::fprintf( stderr, "File '%s' is not seekable.\n", filenames[i].c_str() );
+ return 1;
+ }
}
if( i == 0 )
{
@@ -157,23 +207,33 @@ int open_input_files( const std::vector< std::string > & filenames,
{ show_error( "Sizes of input files are different." ); return 2; }
}
- for( unsigned i = 0; i < filenames.size(); ++i )
- if( !verify_single_member( infd_vector[i], isize, verbosity ) )
- return 2;
+ if( file_index.retval() != 0 )
+ {
+ const File_index fi( infd_vector, isize );
+ if( fi.retval() == 0 ) // file format could be recovered
+ file_index = fi;
+ else
+ { show_error( "Format damaged in all input files." ); return 2; }
+ }
- for( unsigned i = 0; i < filenames.size(); ++i )
+ for( int i = 0; i < files; ++i )
{
- if( lseek( infd_vector[i], 0, SEEK_SET ) < 0 )
- { show_error( "Seek error in input file", errno ); return 1; }
- if( try_decompress( infd_vector[i], isize ) )
+ const int infd = infd_vector[i];
+ bool error = false;
+ for( int j = 0; j < file_index.members(); ++j )
+ {
+ const long long mpos = file_index.mblock( j ).pos();
+ const long long msize = file_index.mblock( j ).size();
+ if( !safe_seek( infd, mpos ) ) return 1;
+ if( !try_decompress_member( infd, msize ) ) { error = true; break; }
+ }
+ if( !error )
{
if( verbosity >= 1 )
std::printf( "File '%s' has no errors. Recovery is not needed.\n",
filenames[i].c_str() );
return 0;
}
- if( lseek( infd_vector[i], 0, SEEK_SET ) < 0 )
- { show_error( "Seek error in input file", errno ); return 1; }
}
return -1;
}
@@ -221,16 +281,15 @@ bool copy_file( const int infd, const int outfd, const long long max_size )
}
-bool try_decompress( const int fd, const unsigned long long file_size,
- long long * failure_posp )
+bool try_decompress_member( const int fd, const unsigned long long msize,
+ long long * failure_posp )
{
try {
Range_decoder rdec( fd );
File_header header;
rdec.read_data( header.data, File_header::size );
if( !rdec.finished() && // End Of File
- header.verify_magic() &&
- header.version() == 1 &&
+ header.verify_magic() && header.verify_version() &&
header.dictionary_size() >= min_dictionary_size &&
header.dictionary_size() <= max_dictionary_size )
{
@@ -238,7 +297,7 @@ bool try_decompress( const int fd, const unsigned long long file_size,
Pretty_print dummy( "", -1 );
if( decoder.decode_member( dummy ) == 0 &&
- rdec.member_position() == file_size ) return true;
+ rdec.member_position() == msize ) return true;
if( failure_posp ) *failure_posp = rdec.member_position();
}
}
@@ -259,12 +318,7 @@ bool verify_header( const File_header & header, const int verbosity )
show_error( "Bad magic number (file not in lzip format)." );
return false;
}
- if( header.version() == 0 )
- {
- show_error( "Version 0 member format can't be recovered." );
- return false;
- }
- if( header.version() != 1 )
+ if( !header.verify_version() )
{
if( verbosity >= 0 )
std::fprintf( stderr, "Version %d member format not supported.\n",
@@ -275,116 +329,106 @@ bool verify_header( const File_header & header, const int verbosity )
}
-bool verify_single_member( const int fd, const long long file_size,
- const int verbosity )
- {
- File_header header;
- if( lseek( fd, 0, SEEK_SET ) < 0 ||
- readblock( fd, header.data, File_header::size ) != File_header::size )
- { show_error( "Error reading member header", errno ); return false; }
- if( !verify_header( header, verbosity ) ) return false;
-
- File_trailer trailer;
- if( lseek( fd, -File_trailer::size(), SEEK_END ) < 0 ||
- readblock( fd, trailer.data, File_trailer::size() ) != File_trailer::size() )
- { show_error( "Error reading member trailer", errno ); return false; }
- const long long member_size = trailer.member_size();
- if( member_size != file_size )
- {
- if( member_size < file_size &&
- lseek( fd, -member_size, SEEK_END ) > 0 &&
- readblock( fd, header.data, File_header::size ) == File_header::size &&
- verify_header( header, verbosity ) )
- show_error( "Input file has more than 1 member. Split it first." );
- else
- show_error( "Member size in input file trailer is corrupt." );
- return false;
- }
- return true;
- }
-
-
int merge_files( const std::vector< std::string > & filenames,
const std::string & output_filename, const int verbosity,
const bool force )
{
- std::vector< int > infd_vector( filenames.size() );
- long long isize = 0;
- const int retval = open_input_files( filenames, infd_vector, isize, verbosity );
+ const int files = filenames.size();
+ std::vector< int > infd_vector( files );
+ File_index file_index;
+ const int retval =
+ open_input_files( filenames, infd_vector, file_index, verbosity );
if( retval >= 0 ) return retval;
+ if( !safe_seek( infd_vector[0], 0 ) ) return 1;
const int outfd = open_outstream_rw( output_filename, force );
if( outfd < 0 ) return 1;
-
- // vector of data blocks differing among the copies of the input file.
- std::vector< Block > block_vector;
- if( !copy_and_diff_file( infd_vector, outfd, block_vector ) )
+ if( !copy_file( infd_vector[0], outfd ) ) // copy whole file
cleanup_and_fail( output_filename, outfd, 1 );
- if( block_vector.size() == 0 )
- { show_error( "Input files are identical. Recovery is not possible." );
- cleanup_and_fail( output_filename, outfd, 2 ); }
-
- const bool single_block = ( block_vector.size() == 1 );
- if( single_block && block_vector[0].size() < 2 )
- { show_error( "Input files have the same byte damaged."
- " Try repairing one of them." );
- cleanup_and_fail( output_filename, outfd, 2 ); }
+ for( int j = 0; j < file_index.members(); ++j )
+ {
+ const long long mpos = file_index.mblock( j ).pos();
+ const long long msize = file_index.mblock( j ).size();
+ // vector of data blocks differing among the copies of the current member
+ std::vector< Block > block_vector;
+ if( !diff_member( mpos, msize, infd_vector, block_vector ) ||
+ !safe_seek( outfd, mpos ) )
+ cleanup_and_fail( output_filename, outfd, 1 );
+
+ if( block_vector.size() == 0 )
+ {
+ if( file_index.members() > 1 && try_decompress_member( outfd, msize ) )
+ continue;
+ show_error( "Input files are (partially) identical. Recovery is not possible." );
+ cleanup_and_fail( output_filename, outfd, 2 );
+ }
- if( ipow( filenames.size(), block_vector.size() ) >= INT_MAX ||
- ( single_block &&
- ipow( filenames.size(), 2 ) >= INT_MAX / block_vector[0].size() ) )
- { show_error( "Input files are too damaged. Recovery is not possible." );
- cleanup_and_fail( output_filename, outfd, 2 ); }
+ const int size0 = block_vector[0].size();
+ const bool single_block = ( block_vector.size() == 1 );
+ if( ipow( files, block_vector.size() ) >= INT_MAX ||
+ ( single_block && ipow( files, 2 ) >= INT_MAX / size0 ) )
+ { show_error( "Input files are too damaged. Recovery is not possible." );
+ cleanup_and_fail( output_filename, outfd, 2 ); }
- const int shifts = ( single_block ? block_vector[0].size() - 1 : 1 );
- if( single_block )
- {
- Block b( block_vector[0].pos() + 1, block_vector[0].size() - 1 );
- block_vector[0].size( 1 );
- block_vector.push_back( b );
- }
+ const int shifts = ( single_block && size0 > 1 ) ? size0 - 1 : 1;
+ if( single_block && size0 > 1 )
+ {
+ Block b( block_vector[0].pos() + 1, size0 - 1 );
+ block_vector[0].size( 1 );
+ block_vector.push_back( b );
+ }
- const int base_variations = ipow( filenames.size(), block_vector.size() );
- const int variations = ( base_variations * shifts ) - 2;
- bool done = false;
- for( int var = 1; var <= variations; ++var )
- {
- if( verbosity >= 1 )
+ if( verbosity >= 1 && file_index.members() > 1 )
{
- std::printf( "Trying variation %d of %d \r", var, variations );
+ std::printf( "Merging member %d\n", j + 1 );
std::fflush( stdout );
}
- int tmp = var;
- for( unsigned i = 0; i < block_vector.size(); ++i )
+ const int base_variations = ipow( files, block_vector.size() );
+ const int variations = base_variations * shifts;
+ bool done = false;
+ for( int var = 0; var < variations; ++var )
{
- const int infd = infd_vector[tmp % filenames.size()];
- tmp /= filenames.size();
- if( lseek( infd, block_vector[i].pos(), SEEK_SET ) < 0 ||
- lseek( outfd, block_vector[i].pos(), SEEK_SET ) < 0 ||
- !copy_file( infd, outfd, block_vector[i].size() ) )
- { show_error( "Error reading output file", errno );
- cleanup_and_fail( output_filename, outfd, 1 ); }
+ if( verbosity >= 1 )
+ {
+ std::printf( "Trying variation %d of %d \r", var + 1, variations );
+ std::fflush( stdout );
+ }
+ int tmp = var;
+ for( unsigned i = 0; i < block_vector.size(); ++i )
+ {
+ const int infd = infd_vector[tmp % files];
+ tmp /= files;
+ if( lseek( infd, block_vector[i].pos(), SEEK_SET ) < 0 ||
+ lseek( outfd, block_vector[i].pos(), SEEK_SET ) < 0 ||
+ !copy_file( infd, outfd, block_vector[i].size() ) )
+ { show_error( "Error reading output file", errno );
+ cleanup_and_fail( output_filename, outfd, 1 ); }
+ }
+ if( !safe_seek( outfd, mpos ) )
+ cleanup_and_fail( output_filename, outfd, 1 );
+ if( try_decompress_member( outfd, msize ) )
+ { done = true; break; }
+ if( var > 0 && var % base_variations == 0 )
+ block_vector[0].shift( block_vector[1] );
+ }
+ if( verbosity >= 1 ) std::printf( "\n" );
+ if( !done )
+ {
+ if( verbosity >= 2 )
+ for( unsigned i = 0; i < block_vector.size(); ++i )
+ std::fprintf( stderr, "area %2d from offset %6lld to %6lld\n", i + 1,
+ block_vector[i].pos(), block_vector[i].end() - 1 );
+ show_error( "Some error areas overlap. Can't recover input file." );
+ cleanup_and_fail( output_filename, outfd, 2 );
}
- if( lseek( outfd, 0, SEEK_SET ) < 0 )
- { show_error( "Seek error in output file", errno );
- cleanup_and_fail( output_filename, outfd, 1 ); }
- if( try_decompress( outfd, isize ) )
- { done = true; break; }
- if( var % base_variations == 0 ) block_vector[0].shift( block_vector[1] );
}
- if( verbosity >= 1 ) std::printf( "\n" );
if( close( outfd ) != 0 )
{
show_error( "Error closing output file", errno );
cleanup_and_fail( output_filename, -1, 1 );
}
- if( !done )
- {
- show_error( "Some error areas overlap. Can't recover input file." );
- cleanup_and_fail( output_filename, -1, 2 );
- }
if( verbosity >= 1 )
std::printf( "Input files merged successfully.\n" );
return 0;
diff --git a/range_dec.cc b/range_dec.cc
index 59be01f..27ceba3 100644
--- a/range_dec.cc
+++ b/range_dec.cc
@@ -101,13 +101,6 @@ void parse_range( const char * const ptr, Block & range )
}
-bool safe_seek( const int fd, const long long pos )
- {
- if( lseek( fd, pos, SEEK_SET ) == pos ) return true;
- show_error( "Seek error", errno ); return false;
- }
-
-
int decompress_member( const int infd, const int outfd,
const Pretty_print & pp,
const unsigned long long mpos,
@@ -170,7 +163,7 @@ int list_file( const std::string & input_filename, const Pretty_print & pp )
const int infd = open_instream( input_filename, &in_stats, true, true );
if( infd < 0 ) return 1;
- File_index file_index( infd );
+ const File_index file_index( infd );
close( infd );
if( file_index.retval() != 0 )
{ show_error( file_index.error().c_str() ); return file_index.retval(); }
@@ -208,6 +201,13 @@ int list_file( const std::string & input_filename, const Pretty_print & pp )
} // end namespace
+bool safe_seek( const int fd, const long long pos )
+ {
+ if( lseek( fd, pos, SEEK_SET ) == pos ) return true;
+ show_error( "Seek error", errno ); return false;
+ }
+
+
int list_files( const std::vector< std::string > & filenames,
const int verbosity )
{
@@ -234,7 +234,7 @@ int range_decompress( const std::string & input_filename,
const int infd = open_instream( input_filename, &in_stats, true, true );
if( infd < 0 ) return 1;
- File_index file_index( infd );
+ const File_index file_index( infd );
if( file_index.retval() != 0 )
{ show_error( file_index.error().c_str() ); return file_index.retval(); }
@@ -259,6 +259,7 @@ int range_decompress( const std::string & input_filename,
else
{ outfd = open_outstream_rw( output_filename, force );
if( outfd < 0 ) return 1; }
+
Pretty_print pp( input_filename, verbosity );
int retval = 0;
for( int i = 0; i < file_index.members(); ++i )
diff --git a/repair.cc b/repair.cc
index e9cef61..712002b 100644
--- a/repair.cc
+++ b/repair.cc
@@ -28,6 +28,7 @@
#include <sys/stat.h>
#include "lzip.h"
+#include "file_index.h"
int seek_read( const int fd, uint8_t * const buf, const int size,
@@ -55,72 +56,84 @@ int repair_file( const std::string & input_filename,
struct stat in_stats;
const int infd = open_instream( input_filename, &in_stats, true, true );
if( infd < 0 ) return 1;
- const long long isize = lseek( infd, 0, SEEK_END );
- if( isize < 0 )
- { show_error( "Input file is not seekable", errno ); return 1; }
- if( isize < min_member_size )
- { show_error( "Input file is too short." ); return 2; }
- if( !verify_single_member( infd, isize, verbosity ) ) return 2;
- if( lseek( infd, 0, SEEK_SET ) < 0 )
- { show_error( "Seek error in input file", errno ); return 1; }
-
- long long failure_pos = 0;
- if( try_decompress( infd, isize, &failure_pos ) )
- {
- if( verbosity >= 1 )
- std::printf( "Input file has no errors. Recovery is not needed.\n" );
- return 0;
- }
- if( failure_pos >= isize - 8 ) failure_pos = isize - 8 - 1;
- if( failure_pos < File_header::size )
- { show_error( "Can't repair error in input file." ); return 2; }
-
- if( lseek( infd, 0, SEEK_SET ) < 0 )
- { show_error( "Seek error in input file", errno ); return 1; }
-
- const int outfd = open_outstream_rw( output_filename, force );
- if( outfd < 0 ) { close( infd ); return 1; }
- if( !copy_file( infd, outfd ) )
- cleanup_and_fail( output_filename, outfd, 1 );
-
- const long long min_pos =
- std::max( (long long)File_header::size, failure_pos - 1000 );
- bool done = false;
- for( long long pos = failure_pos; pos >= min_pos && !done ; --pos )
+
+ const File_index file_index( infd );
+ if( file_index.retval() != 0 )
+ { show_error( file_index.error().c_str() ); return file_index.retval(); }
+
+ int outfd = -1;
+ for( int i = 0; i < file_index.members(); ++i )
{
+ const long long mpos = file_index.mblock( i ).pos();
+ const long long msize = file_index.mblock( i ).size();
+ if( !safe_seek( infd, mpos ) )
+ cleanup_and_fail( output_filename, outfd, 1 );
+ long long failure_pos = 0;
+ if( try_decompress_member( infd, msize, &failure_pos ) ) continue;
+ if( failure_pos >= msize - 8 ) failure_pos = msize - 8 - 1;
+ if( failure_pos < File_header::size )
+ { show_error( "Can't repair error in input file." );
+ cleanup_and_fail( output_filename, outfd, 2 ); }
+
+ if( outfd < 0 ) // first damaged member found
+ {
+ if( !safe_seek( infd, 0 ) ) return 1;
+ outfd = open_outstream_rw( output_filename, force );
+ if( outfd < 0 ) { close( infd ); return 1; }
+ if( !copy_file( infd, outfd ) ) // copy whole file
+ cleanup_and_fail( output_filename, outfd, 1 );
+ }
+
if( verbosity >= 1 )
{
- std::printf( "Trying position %llu \r", pos );
+ std::printf( "Repairing member %d\n", i + 1 );
std::fflush( stdout );
}
- uint8_t byte;
- if( seek_read( outfd, &byte, 1, pos ) != 1 )
- { show_error( "Error reading output file", errno );
- cleanup_and_fail( output_filename, outfd, 1 ); }
- for( int i = 0; i < 256; ++i )
+ const long long min_pos =
+ std::max( (long long)File_header::size, failure_pos - 1000 );
+ bool done = false;
+ for( long long pos = failure_pos; pos >= min_pos && !done ; --pos )
{
- ++byte;
- if( seek_write( outfd, &byte, 1, pos ) != 1 ||
- lseek( outfd, 0, SEEK_SET ) < 0 )
- { show_error( "Error writing output file", errno );
+ if( verbosity >= 1 )
+ {
+ std::printf( "Trying position %llu \r", mpos + pos );
+ std::fflush( stdout );
+ }
+ uint8_t byte;
+ if( seek_read( outfd, &byte, 1, mpos + pos ) != 1 )
+ { show_error( "Error reading output file", errno );
cleanup_and_fail( output_filename, outfd, 1 ); }
- if( i == 255 ) break;
- if( try_decompress( outfd, isize ) )
- { done = true; break; }
+ for( int i = 0; i < 256; ++i )
+ {
+ ++byte;
+ if( seek_write( outfd, &byte, 1, mpos + pos ) != 1 ||
+ lseek( outfd, mpos, SEEK_SET ) < 0 )
+ { show_error( "Error writing output file", errno );
+ cleanup_and_fail( output_filename, outfd, 1 ); }
+ if( i == 255 ) break;
+ if( try_decompress_member( outfd, msize ) )
+ { done = true; break; }
+ }
+ }
+ if( verbosity >= 1 ) std::printf( "\n" );
+ if( !done )
+ {
+ show_error( "Error is larger than 1 byte. Can't repair input file." );
+ cleanup_and_fail( output_filename, outfd, 2 );
}
}
- if( verbosity >= 1 ) std::printf( "\n" );
+ if( outfd < 0 )
+ {
+ if( verbosity >= 1 )
+ std::printf( "Input file has no errors. Recovery is not needed.\n" );
+ return 0;
+ }
if( close( outfd ) != 0 )
{
show_error( "Error closing output file", errno );
cleanup_and_fail( output_filename, -1, 1 );
}
- if( !done )
- {
- show_error( "Error is larger than 1 byte. Can't repair input file." );
- cleanup_and_fail( output_filename, -1, 2 );
- }
if( verbosity >= 1 )
std::printf( "Copy of input file repaired successfully.\n" );
return 0;
diff --git a/split.cc b/split.cc
index ac7627d..bec8a26 100644
--- a/split.cc
+++ b/split.cc
@@ -90,7 +90,7 @@ int do_split_file( const std::string & input_filename, uint8_t * & base_buffer,
const int verbosity, const bool force )
{
const int hsize = File_header::size;
- const int tsize = File_trailer::size();
+ const int tsize = File_trailer::size;
const int buffer_size = 65536;
const int base_buffer_size = tsize + buffer_size + hsize;
base_buffer = new uint8_t[base_buffer_size];
@@ -99,7 +99,8 @@ int do_split_file( const std::string & input_filename, uint8_t * & base_buffer,
struct stat in_stats;
const int infd = open_instream( input_filename, &in_stats, true, true );
if( infd < 0 ) return 1;
- File_index file_index( infd );
+ const File_index file_index( infd );
+ if( file_index.retval() != 0 ) show_error( file_index.error().c_str() );
const int max_members = ( file_index.retval() ? 999999 : file_index.members() );
int max_digits = 1;
for( int i = max_members; i >= 10; i /= 10 ) ++max_digits;
diff --git a/testsuite/check.sh b/testsuite/check.sh
index ea6e768..4ac7f59 100755
--- a/testsuite/check.sh
+++ b/testsuite/check.sh
@@ -23,10 +23,15 @@ mkdir tmp
cd "${objdir}"/tmp
in="${testdir}"/test.txt
-in_lz="${testdir}"/test_v1.lz
+in_lz="${testdir}"/test.txt.lz
inD="${testdir}"/test921-1921.txt
-fox5="${testdir}"/fox5_bad.txt
-fox5_lz="${testdir}"/fox5_bad.lz
+fox5_lz="${testdir}"/fox5.lz
+f5b1="${testdir}"/fox5_bad1.txt
+f5b1_lz="${testdir}"/fox5_bad1.lz
+f5b2_lz="${testdir}"/fox5_bad2.lz
+f5b3_lz="${testdir}"/fox5_bad3.lz
+f5b4_lz="${testdir}"/fox5_bad4.lz
+f5b5_lz="${testdir}"/fox5_bad5.lz
bad1_lz="${testdir}"/test_bad1.lz
bad2_lz="${testdir}"/test_bad2.lz
bad3_lz="${testdir}"/test_bad3.lz
@@ -35,91 +40,140 @@ bad5_lz="${testdir}"/test_bad5.lz
fail=0
# Description of test files for lziprecover:
-# fox5_bad.lz: byte at offset 188 changed from 0x34 to 0x33
+# fox5_bad1.lz: byte at offset 62 changed from 0x50 to 0x70 (CRC)
+# byte at offset 144 changed from 0x2D to 0x2E (data_size)
+# byte at offset 188 changed from 0x34 to 0x33 (mid stream)
+# byte at offset 247 changed from 0x2A to 0x2B (first byte)
+# byte at offset 378 changed from 0xA0 to 0x20 (EOS marker)
+# fox5_bad2.lz: [ 30- 49] --> zeroed;
+# fox5_bad3.lz: [100-299] --> zeroed;
+# fox5_bad4.lz: [250-349] --> zeroed;
+# fox5_bad5.lz: [300-399] --> zeroed;
# test_bad1.lz: byte at offset 67 changed from 0xCC to 0x33
-# test_bad2.lz: [ 34- 66) --> copy of bytes [ 68- 100)
-# test_bad3.lz: [ 512-1536) --> zeroed; [2560-3584) --> zeroed
-# test_bad4.lz: [3072-4096) --> random data; [4608-5632) --> zeroed
-# test_bad5.lz: [1024-2048) --> random data; [5120-6144) --> random data
+# test_bad2.lz: [ 34- 65] --> copy of bytes [ 68- 99]
+# test_bad3.lz: [ 512-1535] --> zeroed; [2560-3583] --> zeroed
+# test_bad4.lz: [3072-4095] --> random data; [4608-5631] --> zeroed
+# test_bad5.lz: [1024-2047] --> random data; [5120-6143] --> random data
printf "testing lziprecover-%s..." "$2"
"${LZIPRECOVER}" -lq
-if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi
+if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi
"${LZIPRECOVER}" -mq "${bad1_lz}"
-if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi
+if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi
"${LZIPRECOVER}" -Rq
-if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi
+if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi
"${LZIPRECOVER}" -sq
-if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi
+if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi
-"${LZIP}" -t "${testdir}"/test_v0.lz || fail=1
-printf .
-"${LZIP}" -cd "${testdir}"/test_v0.lz > copy || fail=1
-cmp "${in}" copy || fail=1
-printf .
-
-"${LZIP}" -t "${testdir}"/test_v1.lz || fail=1
-printf .
-"${LZIP}" -cd "${testdir}"/test_v1.lz > copy || fail=1
+"${LZIP}" -t "${testdir}"/test.txt.lz || fail=1
+"${LZIP}" -cd "${testdir}"/test.txt.lz > copy || fail=1
cmp "${in}" copy || fail=1
printf .
"${LZIPRECOVER}" -D 921-1921 -fo copy "${in_lz}" || fail=1
cmp "${inD}" copy || fail=1
-printf .
"${LZIPRECOVER}" -D 921,1000 "${in_lz}" > copy || fail=1
cmp "${inD}" copy || fail=1
printf .
-"${LZIPRECOVER}" -D0 -iq -fo copy "${fox5_lz}"
-if [ $? = 2 ] && cmp "${fox5}" copy ; then printf . ; else fail=1 ; printf - ; fi
-"${LZIPRECOVER}" -D0 -iq "${fox5_lz}" > copy
-if [ $? = 2 ] && cmp "${fox5}" copy ; then printf . ; else fail=1 ; printf - ; fi
-
+"${LZIPRECOVER}" -D0 -iq "${f5b1_lz}" -fo copy
+if [ $? = 2 ] && cmp "${f5b1}" copy ; then printf . ; else fail=1 ; printf - ; fi
+"${LZIPRECOVER}" -D0 -iq "${f5b1_lz}" > copy
+if [ $? = 2 ] && cmp "${f5b1}" copy ; then printf . ; else fail=1 ; printf - ; fi
+
+rm -f copy.lz
+"${LZIPRECOVER}" -m -o copy.lz "${fox5_lz}" "${f5b1_lz}"
+if [ $? = 0 ] && [ ! -e copy.lz ] ; then printf . ; else fail=1 ; printf - ; fi
+"${LZIPRECOVER}" -m -o copy.lz "${f5b1_lz}" "${fox5_lz}"
+if [ $? = 0 ] && [ ! -e copy.lz ] ; then printf . ; else fail=1 ; printf - ; fi
"${LZIPRECOVER}" -m -o copy.lz "${bad1_lz}" "${bad2_lz}" "${bad1_lz}" -q
-if [ $? != 2 ] ; then fail=1 ; printf - ; else printf . ; fi
-"${LZIPRECOVER}" -m -o copy.lz "${bad1_lz}" "${bad2_lz}" || fail=1
-"${LZIPRECOVER}" -df copy.lz || fail=1
-cmp "${in}" copy || fail=1
+if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else fail=1 ; printf - ; fi
+"${LZIPRECOVER}" -m -o copy.lz "${f5b1_lz}" "${f5b5_lz}" -q
+if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else fail=1 ; printf - ; fi
+"${LZIPRECOVER}" -m -o copy.lz "${f5b3_lz}" "${f5b5_lz}" -q
+if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else fail=1 ; printf - ; fi
+"${LZIPRECOVER}" -m -o copy.lz "${bad3_lz}" "${bad4_lz}" -q
+if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else fail=1 ; printf - ; fi
+
+for i in "${f5b1_lz}" "${f5b3_lz}" "${f5b4_lz}" "${f5b5_lz}" ; do
+ "${LZIPRECOVER}" -mf -o copy.lz "${f5b2_lz}" "${i}" || fail=1
+ cmp "${fox5_lz}" copy.lz || fail=1
+ "${LZIPRECOVER}" -mf -o copy.lz "${i}" "${f5b2_lz}" || fail=1
+ cmp "${fox5_lz}" copy.lz || fail=1
+ printf .
+done
+
+for i in "${f5b3_lz}" "${f5b4_lz}" "${f5b5_lz}" ; do
+ "${LZIPRECOVER}" -mf -o copy.lz "${f5b1_lz}" "${f5b2_lz}" "${i}" || fail=1
+ cmp "${fox5_lz}" copy.lz || fail=1
+ "${LZIPRECOVER}" -mf -o copy.lz "${f5b1_lz}" "${i}" "${f5b2_lz}" || fail=1
+ cmp "${fox5_lz}" copy.lz || fail=1
+ "${LZIPRECOVER}" -mf -o copy.lz "${f5b2_lz}" "${f5b1_lz}" "${i}" || fail=1
+ cmp "${fox5_lz}" copy.lz || fail=1
+ "${LZIPRECOVER}" -mf -o copy.lz "${f5b2_lz}" "${i}" "${f5b1_lz}" || fail=1
+ cmp "${fox5_lz}" copy.lz || fail=1
+ "${LZIPRECOVER}" -mf -o copy.lz "${i}" "${f5b1_lz}" "${f5b2_lz}" || fail=1
+ cmp "${fox5_lz}" copy.lz || fail=1
+ "${LZIPRECOVER}" -mf -o copy.lz "${i}" "${f5b2_lz}" "${f5b1_lz}" || fail=1
+ cmp "${fox5_lz}" copy.lz || fail=1
+ printf .
+done
+
+"${LZIPRECOVER}" -mf -o copy.lz "${f5b3_lz}" "${f5b4_lz}" "${f5b5_lz}" || fail=1
+cmp "${fox5_lz}" copy.lz || fail=1
+"${LZIPRECOVER}" -mf -o copy.lz "${f5b1_lz}" "${f5b3_lz}" "${f5b4_lz}" "${f5b5_lz}" || fail=1
+cmp "${fox5_lz}" copy.lz || fail=1
+"${LZIPRECOVER}" -mf -o copy.lz "${f5b2_lz}" "${f5b3_lz}" "${f5b4_lz}" "${f5b5_lz}" || fail=1
+cmp "${fox5_lz}" copy.lz || fail=1
+"${LZIPRECOVER}" -mf -o copy.lz "${f5b1_lz}" "${f5b2_lz}" "${f5b3_lz}" "${f5b4_lz}" "${f5b5_lz}" || fail=1
+cmp "${fox5_lz}" copy.lz || fail=1
printf .
-"${LZIPRECOVER}" -m -o copy.lz "${bad2_lz}" "${bad1_lz}" || fail=1
-"${LZIPRECOVER}" -df copy.lz || fail=1
-cmp "${in}" copy || fail=1
+
+"${LZIPRECOVER}" -mf -o copy.lz "${bad1_lz}" "${bad2_lz}" || fail=1
+cmp "${in_lz}" copy.lz || fail=1
+"${LZIPRECOVER}" -mf -o copy.lz "${bad2_lz}" "${bad1_lz}" || fail=1
+cmp "${in_lz}" copy.lz || fail=1
printf .
for i in "${bad1_lz}" "${bad2_lz}" ; do
for j in "${bad3_lz}" "${bad4_lz}" "${bad5_lz}" ; do
- "${LZIPRECOVER}" -m -o copy.lz "${i}" "${j}" || fail=1
- "${LZIPRECOVER}" -df copy.lz || fail=1
- cmp "${in}" copy || fail=1
- printf .
- "${LZIPRECOVER}" -m -o copy.lz "${j}" "${i}" || fail=1
- "${LZIPRECOVER}" -df copy.lz || fail=1
- cmp "${in}" copy || fail=1
+ "${LZIPRECOVER}" -mf -o copy.lz "${i}" "${j}" || fail=1
+ cmp "${in_lz}" copy.lz || fail=1
+ "${LZIPRECOVER}" -mf -o copy.lz "${j}" "${i}" || fail=1
+ cmp "${in_lz}" copy.lz || fail=1
printf .
done
done
-"${LZIPRECOVER}" -m -o copy.lz "${bad3_lz}" "${bad4_lz}" "${bad5_lz}" || fail=1
-"${LZIPRECOVER}" -df copy.lz || fail=1
-cmp "${in}" copy || fail=1
+"${LZIPRECOVER}" -mf -o copy.lz "${bad3_lz}" "${bad4_lz}" "${bad5_lz}" || fail=1
+cmp "${in_lz}" copy.lz || fail=1
printf .
-"${LZIPRECOVER}" -m -o copy.lz "${bad4_lz}" "${bad5_lz}" "${bad3_lz}" || fail=1
-"${LZIPRECOVER}" -df copy.lz || fail=1
-cmp "${in}" copy || fail=1
+"${LZIPRECOVER}" -mf -o copy.lz "${bad3_lz}" "${bad5_lz}" "${bad4_lz}" || fail=1
+cmp "${in_lz}" copy.lz || fail=1
printf .
-"${LZIPRECOVER}" -m -o copy.lz "${bad5_lz}" "${bad3_lz}" "${bad4_lz}" || fail=1
-"${LZIPRECOVER}" -df copy.lz || fail=1
-cmp "${in}" copy || fail=1
+"${LZIPRECOVER}" -mf -o copy.lz "${bad4_lz}" "${bad3_lz}" "${bad5_lz}" || fail=1
+cmp "${in_lz}" copy.lz || fail=1
printf .
-
-"${LZIPRECOVER}" -R "${in_lz}" || fail=1
+"${LZIPRECOVER}" -mf -o copy.lz "${bad4_lz}" "${bad5_lz}" "${bad3_lz}" || fail=1
+cmp "${in_lz}" copy.lz || fail=1
+printf .
+"${LZIPRECOVER}" -mf -o copy.lz "${bad5_lz}" "${bad3_lz}" "${bad4_lz}" || fail=1
+cmp "${in_lz}" copy.lz || fail=1
+printf .
+"${LZIPRECOVER}" -mf -o copy.lz "${bad5_lz}" "${bad4_lz}" "${bad3_lz}" || fail=1
+cmp "${in_lz}" copy.lz || fail=1
printf .
+
+rm -f copy.lz
+"${LZIPRECOVER}" -R -o copy.lz "${fox5_lz}" || fail=1
+if [ $? = 0 ] && [ ! -e copy.lz ] ; then printf . ; else fail=1 ; printf - ; fi
"${LZIPRECOVER}" -R -o copy.lz "${bad2_lz}" -q
-if [ $? != 2 ] ; then fail=1 ; printf - ; else printf . ; fi
-"${LZIPRECOVER}" -R -o copy.lz "${bad1_lz}" || fail=1
-"${LZIPRECOVER}" -df copy.lz || fail=1
-cmp "${in}" copy || fail=1
+if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else fail=1 ; printf - ; fi
+"${LZIPRECOVER}" -Rf -o copy.lz "${f5b1_lz}" || fail=1
+cmp "${fox5_lz}" copy.lz || fail=1
+printf .
+"${LZIPRECOVER}" -Rf -o copy.lz "${bad1_lz}" || fail=1
+cmp "${in_lz}" copy.lz || fail=1
printf .
cat "${in_lz}" "${in_lz}" "${in_lz}" > copy || framework_failure
diff --git a/testsuite/fox5_bad.lz b/testsuite/fox5.lz
index 8bfd314..3472f64 100644
--- a/testsuite/fox5_bad.lz
+++ b/testsuite/fox5.lz
Binary files differ
diff --git a/testsuite/fox5_bad1.lz b/testsuite/fox5_bad1.lz
new file mode 100644
index 0000000..a3b5658
--- /dev/null
+++ b/testsuite/fox5_bad1.lz
Binary files differ
diff --git a/testsuite/fox5_bad.txt b/testsuite/fox5_bad1.txt
index 813a71f..14e5367 100644
--- a/testsuite/fox5_bad.txt
+++ b/testsuite/fox5_bad1.txt
@@ -1,4 +1,4 @@
The quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog.
-The quick brown fox c††zzzzzzzzzzzzzzzzzzzzzzThe quick brown fox jumps over the lazy dog.
+The quick brown fox c††zzzzzzzzzzzzzzzzzzzzzzVhe quick brown fox jumps over the lazy dog.
The quick brown fox jumps over the lazy dog.
diff --git a/testsuite/fox5_bad2.lz b/testsuite/fox5_bad2.lz
new file mode 100644
index 0000000..9993ea7
--- /dev/null
+++ b/testsuite/fox5_bad2.lz
Binary files differ
diff --git a/testsuite/fox5_bad3.lz b/testsuite/fox5_bad3.lz
new file mode 100644
index 0000000..ef58e47
--- /dev/null
+++ b/testsuite/fox5_bad3.lz
Binary files differ
diff --git a/testsuite/fox5_bad4.lz b/testsuite/fox5_bad4.lz
new file mode 100644
index 0000000..0474bb9
--- /dev/null
+++ b/testsuite/fox5_bad4.lz
Binary files differ
diff --git a/testsuite/fox5_bad5.lz b/testsuite/fox5_bad5.lz
new file mode 100644
index 0000000..6ec2740
--- /dev/null
+++ b/testsuite/fox5_bad5.lz
Binary files differ
diff --git a/testsuite/test.txt.lz b/testsuite/test.txt.lz
new file mode 100644
index 0000000..4db881a
--- /dev/null
+++ b/testsuite/test.txt.lz
Binary files differ
diff --git a/testsuite/test_bad1.lz b/testsuite/test_bad1.lz
index 0b84883..687c8a1 100644
--- a/testsuite/test_bad1.lz
+++ b/testsuite/test_bad1.lz
Binary files differ
diff --git a/testsuite/test_bad2.lz b/testsuite/test_bad2.lz
index cce6a3c..c8ac08c 100644
--- a/testsuite/test_bad2.lz
+++ b/testsuite/test_bad2.lz
Binary files differ
diff --git a/testsuite/test_bad3.lz b/testsuite/test_bad3.lz
index a1676bb..3ed8936 100644
--- a/testsuite/test_bad3.lz
+++ b/testsuite/test_bad3.lz
Binary files differ
diff --git a/testsuite/test_bad4.lz b/testsuite/test_bad4.lz
index a8f89a3..c912871 100644
--- a/testsuite/test_bad4.lz
+++ b/testsuite/test_bad4.lz
Binary files differ
diff --git a/testsuite/test_bad5.lz b/testsuite/test_bad5.lz
index 73e0142..7c70365 100644
--- a/testsuite/test_bad5.lz
+++ b/testsuite/test_bad5.lz
Binary files differ
diff --git a/testsuite/test_v0.lz b/testsuite/test_v0.lz
deleted file mode 100644
index a09b1e8..0000000
--- a/testsuite/test_v0.lz
+++ /dev/null
Binary files differ
diff --git a/testsuite/test_v1.lz b/testsuite/test_v1.lz
deleted file mode 100644
index f1c79eb..0000000
--- a/testsuite/test_v1.lz
+++ /dev/null
Binary files differ
diff --git a/testsuite/unzcrash.cc b/testsuite/unzcrash.cc
index abf61bb..24defa6 100644
--- a/testsuite/unzcrash.cc
+++ b/testsuite/unzcrash.cc
@@ -58,11 +58,11 @@ void show_help()
" -h, --help display this help and exit\n"
" -V, --version output version information and exit\n"
" -b, --bits=<range> test N-bit errors instead of full byte\n"
- " -p, --position=<bytes> first byte position to test\n"
+ " -p, --position=<bytes> first byte position to test [default 0]\n"
" -q, --quiet suppress all messages\n"
- " -s, --size=<bytes> number of byte positions to test\n"
+ " -s, --size=<bytes> number of byte positions to test [all]\n"
" -v, --verbose be verbose (a 2nd -v gives more)\n"
- "Examples of <range>: 1 1,2,3 1-4 1,3-5,8\n"
+ "Examples of <range>: 1 1,2,3 1-4 1,3-5,8 1-3,5-8\n"
"\nReport bugs to lzip-bug@nongnu.org\n"
"Lzip home page: http://www.nongnu.org/lzip/lzip.html\n" );
}
@@ -109,8 +109,8 @@ unsigned long long getnum( const char * const ptr,
const unsigned long long llimit,
const unsigned long long ulimit )
{
- errno = 0;
char * tail;
+ errno = 0;
unsigned long long result = strtoull( ptr, &tail, 0 );
if( tail == ptr )
{
@@ -172,7 +172,7 @@ public:
bool includes( const int i ) const
{ return ( i >= 1 && i <= 8 && data[i-1] ); }
- // Recognized formats: 1 1,2,3 1-4 1,3-5,8
+ // Recognized formats: 1 1,2,3 1-4 1,3-5,8 1-3,5-8
bool parse( const char * p )
{
for( int i = 0; i < 8; ++i ) data[i] = false;