summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--COPYING3
-rw-r--r--ChangeLog72
-rw-r--r--INSTALL5
-rw-r--r--Makefile.in30
-rw-r--r--NEWS41
-rw-r--r--README55
-rw-r--r--alone_to_lz.cc24
-rw-r--r--arg_parser.cc17
-rw-r--r--arg_parser.h12
-rw-r--r--byte_repair.cc104
-rw-r--r--common.h7
-rwxr-xr-xconfigure13
-rw-r--r--debian/changelog30
-rw-r--r--debian/patches/debian/0001-build.patch3
-rw-r--r--decoder.cc58
-rw-r--r--decoder.h30
-rw-r--r--doc/lziprecover.159
-rw-r--r--doc/lziprecover.info1130
-rw-r--r--doc/lziprecover.texi1125
-rw-r--r--dump_remove.cc34
-rw-r--r--fec.h296
-rw-r--r--fec_create.cc608
-rw-r--r--fec_repair.cc1109
-rw-r--r--gf16.cc308
-rw-r--r--gf8.cc244
-rw-r--r--list.cc16
-rw-r--r--lunzcrash.cc70
-rw-r--r--lzip.h88
-rw-r--r--lzip_index.cc41
-rw-r--r--lzip_index.h20
-rw-r--r--main.cc497
-rw-r--r--main_common.cc21
-rw-r--r--md5.cc4
-rw-r--r--md5.h4
-rw-r--r--merge.cc91
-rw-r--r--mtester.cc56
-rw-r--r--mtester.h21
-rw-r--r--nrep_stats.cc10
-rw-r--r--range_dec.cc20
-rw-r--r--recursive.cc130
-rw-r--r--reproduce.cc58
-rw-r--r--split.cc16
-rwxr-xr-xtestsuite/check.sh921
-rw-r--r--testsuite/fox6.lzbin480 -> 0 bytes
-rw-r--r--testsuite/fox6_b1nz.lzbin0 -> 480 bytes
-rw-r--r--testsuite/fox6_mark.lzbin480 -> 0 bytes
-rw-r--r--testsuite/fox_nz.lzbin0 -> 80 bytes
-rw-r--r--testsuite/test.txt6
-rw-r--r--testsuite/test.txt.lzbin7376 -> 7341 bytes
-rw-r--r--testsuite/test.txt.lz.fecbin0 -> 4424 bytes
-rw-r--r--testsuite/test.txt.lz.fec16bin0 -> 4424 bytes
-rw-r--r--testsuite/test.txt.lzmabin7363 -> 7328 bytes
-rw-r--r--testsuite/test21636.txt (renamed from testsuite/test21723.txt)0
-rw-r--r--testsuite/test_3m.txt.lz.md51
-rw-r--r--testsuite/test_bad1.lzbin7376 -> 7341 bytes
-rw-r--r--testsuite/test_bad2.lzbin7376 -> 7341 bytes
-rw-r--r--testsuite/test_bad3.lzbin7376 -> 7341 bytes
-rw-r--r--testsuite/test_bad4.lzbin7376 -> 7341 bytes
-rw-r--r--testsuite/test_bad5.lzbin7376 -> 7341 bytes
-rw-r--r--testsuite/test_bad6.lzbin7376 -> 7341 bytes
-rw-r--r--testsuite/test_bad6.txt11
-rw-r--r--testsuite/test_bad7.lzbin7376 -> 7341 bytes
-rw-r--r--testsuite/test_bad7.txt30
-rw-r--r--testsuite/test_bad8.lzbin7376 -> 7341 bytes
-rw-r--r--testsuite/test_bad9.lzbin7376 -> 7341 bytes
-rw-r--r--testsuite/test_bad9.txt12
-rw-r--r--testsuite/test_em.txt.lzbin14024 -> 0 bytes
-rw-r--r--unzcrash.cc61
68 files changed, 5931 insertions, 1691 deletions
diff --git a/COPYING b/COPYING
index 4ad17ae..42fe735 100644
--- a/COPYING
+++ b/COPYING
@@ -1,8 +1,7 @@
GNU GENERAL PUBLIC LICENSE
Version 2, June 1991
- Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
- 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc. <http://fsf.org/>
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
diff --git a/ChangeLog b/ChangeLog
index c0737a1..9505b85 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,19 @@
+2025-01-08 Antonio Diaz Diaz <antonio@gnu.org>
+
+ * Version 1.25 released.
+ * New options '-F, --fec', '-0' to '-9', '-b, --block-size',
+ '--fec-file', '-r, --recursive', and '-R, --dereference-recursive'.
+ * Change short name of option '--byte-repair' to '-B'.
+ * byte_repair.cc: Repair a nonzero first LZMA byte.
+ * Make '-i' ignore empty members and nonzero first LZMA byte.
+ * Rename option '--clear-marking' to '--nonzero-repair'.
+ * Remove options '--empty-error' and '--marking-error'.
+ * decoder.cc (decode_member): Remove support for Sync Flush marker.
+ * merge.cc (copy_file): Add name arguments, use 'show_file_error'.
+ * lziprecover.texi: New chapter 'Syntax of command-line arguments'.
+ * check.sh: Use 'cp' instead of 'cat'.
+ * testsuite: Require lzip/clzip. Change several test files.
+
2024-01-20 Antonio Diaz Diaz <antonio@gnu.org>
* Version 1.24 released.
@@ -122,11 +138,10 @@
* repair.cc: Repair a damaged dictionary size in the header.
* repair.cc: Try bytes at offsets 7 to 11 first.
* Decompression time has been reduced by 2%.
- * main.cc (decompress): Print up to 6 bytes of trailing data
- when '-tvvvv' is specified.
- * decoder.cc (verify_trailer): Remove test of final code.
* main.cc (main): Delete '--output' file if infd is a terminal.
- * main.cc (main): Don't use stdin more than once.
+ (main): Don't use stdin more than once.
+ (decompress): Print 6 bytes of trailing data at verbosity level 4.
+ * decoder.cc (verify_trailer): Remove test of final code.
* Use 'close_and_set_permissions' and 'set_signals' in all modes.
* range_dec.cc (list_file): Show dictionary size and size of
trailing data (if any) with '-lv'.
@@ -142,8 +157,7 @@
* lziprecover.texi: New chapter 'Trailing data'.
* configure: Avoid warning on some shells when testing for g++.
* Makefile.in: Detect the existence of install-info.
- * check.sh: Don't check error messages.
- * check.sh: A POSIX shell is required to run the tests.
+ * check.sh: Require a POSIX shell. Don't check error messages.
2015-05-28 Antonio Diaz Diaz <antonio@gnu.org>
@@ -166,8 +180,8 @@
2013-09-14 Antonio Diaz Diaz <antonio@gnu.org>
* Version 1.15 released.
- * repair.cc: Repair multimember files with up to one byte error
- per member.
+ * repair.cc: Repair multimember files with up to one byte error per
+ member.
* merge.cc: Merge multimember files.
* main.cc (show_header): Don't show header version.
* lziprecover.texinfo: New chapters 'Repairing files',
@@ -180,23 +194,22 @@
* Option '-l, --list' now accepts more than one file.
* Decompression time has been reduced by 12%.
* split.cc: Use as few digits as possible in file names.
- * split.cc: In verbose mode show names of files being created.
+ In verbose mode show names of files being created.
* main.cc (show_header): Show header version if verbosity >= 4.
+ (main): Use 'setmode' instead of '_setmode' on Windows and OS/2.
* configure: Options now accept a separate argument.
* Makefile.in: New targets 'install-as-lzip' and 'install-bin'.
- * main.cc: Use 'setmode' instead of '_setmode' on Windows and OS/2.
2012-02-24 Antonio Diaz Diaz <ant_diaz@teleline.es>
* Version 1.13 released.
- * Lziprecover is now distributed in its own package. Until
- version 1.12 it was included in the lzip package. Previous
- entries in this file are taken from there.
+ * Lziprecover is now distributed in its own package. Until version
+ 1.12 it was included in the lzip package. Previous entries in this
+ file are taken from there.
* lziprecover.cc: Rename to main.cc.
* New files merge.cc, repair.cc, split.cc, and range_dec.cc.
- * main.cc: Add decompressor options (-c, -d, -k, -t) so that
- an external decompressor is not needed for recovery nor for
- "make check".
+ * main.cc: Add decompressor options (-c, -d, -k, -t) so that an
+ external decompressor is not needed for recovery and 'make check'.
* New option '-D, --range-decompress', which extracts a range of
bytes decompressing only the members containing the desired data.
* New option '-l, --list', which prints correct total file sizes
@@ -211,25 +224,23 @@
* Version 1.12 released.
* lziprecover.cc: If '-v' is not specified show errors only.
* unzcrash.cc: Use Arg_parser.
- * unzcrash.cc: New options '-b, --bits', '-p, --position', and
- '-s, --size'.
+ New options '-b, --bits', '-p, --position', and '-s, --size'.
2010-09-16 Antonio Diaz Diaz <ant_diaz@teleline.es>
* Version 1.11 released.
- * lziprecover.cc: New option '-m, --merge', which tries to produce a
- correct file by merging the good parts of two or more damaged copies.
- * lziprecover.cc: New option '-R, --repair' for repairing a
- 1-byte error in single-member files.
* decoder.cc (decode_member): Detect file errors earlier to improve
efficiency of lziprecover's new repair capability.
This change also prevents (harmless) access to uninitialized
memory when decompressing a corrupt file.
- * lziprecover.cc: New options '-f, --force' and '-o, --output'.
- * lziprecover.cc: New option '-s, --split' to select the until
- now only operation of splitting multimember files.
- * lziprecover.cc: If no operation is specified, warn the user and do
- nothing.
+ * lziprecover.cc: New option '-m, --merge', which tries to produce a
+ correct file by merging the good parts of two or more damaged copies.
+ New option '-R, --repair' for repairing a 1-byte error in
+ single-member files.
+ New options '-f, --force' and '-o, --output'.
+ New option '-s, --split' to select the until now only operation of
+ splitting multimember files.
+ If no operation is specified, warn the user and do nothing.
2009-06-22 Antonio Diaz Diaz <ant_diaz@teleline.es>
@@ -244,8 +255,7 @@
* unzcrash.cc: Test all 1-byte errors.
-Copyright (C) 2009-2024 Antonio Diaz Diaz.
+Copyright (C) 2009-2025 Antonio Diaz Diaz.
-This file is a collection of facts, and thus it is not copyrightable,
-but just in case, you have unlimited permission to copy, distribute, and
-modify it.
+This file is a collection of facts, and thus it is not copyrightable, but just
+in case, you have unlimited permission to copy, distribute, and modify it.
diff --git a/INSTALL b/INSTALL
index 8cb9bdd..016a15c 100644
--- a/INSTALL
+++ b/INSTALL
@@ -4,9 +4,10 @@ You will need a C++98 compiler with support for 'long long'.
(gcc 3.3.6 or newer is recommended).
I use gcc 6.1.0 and 3.3.6, but the code should compile with any standards
compliant compiler.
-Gcc is available at http://gcc.gnu.org.
+Gcc is available at http://gcc.gnu.org
Lzip 1.16 (or clzip 1.6) or newer is required to run the tests.
+Lzip is available at http://www.nongnu.org/lzip/lzip.html
Unzcrash needs a 'zcmp' program able to understand the format being tested.
For example the zcmp provided by zutils.
@@ -78,7 +79,7 @@ After running 'configure', you can run 'make' and 'make install' as
explained above.
-Copyright (C) 2009-2024 Antonio Diaz Diaz.
+Copyright (C) 2009-2025 Antonio Diaz Diaz.
This file is free documentation: you have unlimited permission to copy,
distribute, and modify it.
diff --git a/Makefile.in b/Makefile.in
index 8a7b3a9..ee26089 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -2,14 +2,15 @@
DISTNAME = $(pkgname)-$(pkgversion)
INSTALL = install
INSTALL_PROGRAM = $(INSTALL) -m 755
-INSTALL_DATA = $(INSTALL) -m 644
INSTALL_DIR = $(INSTALL) -d -m 755
+INSTALL_DATA = $(INSTALL) -m 644
SHELL = /bin/sh
CAN_RUN_INSTALLINFO = $(SHELL) -c "install-info --version" > /dev/null 2>&1
objs = arg_parser.o alone_to_lz.o lzip_index.o list.o byte_repair.o \
- dump_remove.o lunzcrash.o md5.o merge.o mtester.o nrep_stats.o \
- range_dec.o reproduce.o split.o decoder.o main.o
+ dump_remove.o fec_create.o fec_repair.o gf8.o gf16.o lunzcrash.o \
+ md5.o merge.o mtester.o nrep_stats.o range_dec.o recursive.o \
+ reproduce.o split.o decoder.o main.o
unzobjs = arg_parser.o unzcrash.o
@@ -22,7 +23,7 @@ unzobjs = arg_parser.o unzcrash.o
all : $(progname)
$(progname) : $(objs)
- $(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $(objs)
+ $(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $(objs) $(LIBS)
unzcrash : $(unzobjs)
$(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $(unzobjs)
@@ -38,7 +39,8 @@ unzcrash.o : unzcrash.cc
# prevent 'make' from trying to remake source files
$(VPATH)/configure $(VPATH)/Makefile.in $(VPATH)/doc/$(pkgname).texi : ;
-%.h %.cc : ;
+MAKEFLAGS += -r
+.SUFFIXES :
$(objs) : Makefile
alone_to_lz.o : lzip.h common.h mtester.h
@@ -46,15 +48,20 @@ arg_parser.o : arg_parser.h
byte_repair.o : lzip.h common.h mtester.h lzip_index.h
decoder.o : lzip.h common.h decoder.h
dump_remove.o : lzip.h common.h lzip_index.h
+fec_create.o : lzip.h common.h md5.h fec.h
+fec_repair.o : lzip.h common.h md5.h fec.h
+gf8.o : lzip.h common.h md5.h fec.h
+gf16.o : lzip.h common.h md5.h fec.h
list.o : lzip.h common.h lzip_index.h
lunzcrash.o : lzip.h common.h md5.h mtester.h lzip_index.h
lzip_index.o : lzip.h common.h lzip_index.h
-main.o : arg_parser.h lzip.h common.h decoder.h main_common.cc
+main.o : arg_parser.h lzip.h common.h decoder.h md5.h fec.h main_common.cc
md5.o : md5.h
merge.o : lzip.h common.h decoder.h lzip_index.h
mtester.o : lzip.h common.h md5.h mtester.h
nrep_stats.o : lzip.h common.h lzip_index.h
range_dec.o : lzip.h common.h decoder.h lzip_index.h
+recursive.o : lzip.h common.h md5.h fec.h
reproduce.o : lzip.h common.h md5.h mtester.h lzip_index.h
split.o : lzip.h common.h lzip_index.h
unzcrash.o : Makefile arg_parser.h common.h main_common.cc
@@ -69,7 +76,7 @@ $(VPATH)/doc/$(pkgname).info : $(VPATH)/doc/$(pkgname).texi
man : $(VPATH)/doc/$(progname).1
$(VPATH)/doc/$(progname).1 : $(progname)
- help2man -n 'recovers data from damaged lzip files' -o $@ ./$(progname)
+ help2man -n 'recovers data from damaged files' -o $@ ./$(progname)
Makefile : $(VPATH)/configure $(VPATH)/Makefile.in
./config.status
@@ -141,21 +148,20 @@ dist : doc
$(DISTNAME)/testsuite/check.sh \
$(DISTNAME)/testsuite/fox6_bad1.txt \
$(DISTNAME)/testsuite/test.txt \
- $(DISTNAME)/testsuite/test21723.txt \
+ $(DISTNAME)/testsuite/test21636.txt \
$(DISTNAME)/testsuite/test_bad[6-9].txt \
- $(DISTNAME)/testsuite/test_3m.txt.lz.md5 \
$(DISTNAME)/testsuite/fox.lz \
$(DISTNAME)/testsuite/fox_*.lz \
- $(DISTNAME)/testsuite/fox6.lz \
+ $(DISTNAME)/testsuite/fox6_b1nz.lz \
$(DISTNAME)/testsuite/fox6_sc[1-6].lz \
$(DISTNAME)/testsuite/fox6_bad[1-6].lz \
- $(DISTNAME)/testsuite/fox6_mark.lz \
$(DISTNAME)/testsuite/numbers.lz \
$(DISTNAME)/testsuite/numbersbt.lz \
$(DISTNAME)/testsuite/test.txt.lz \
$(DISTNAME)/testsuite/test.txt.lzma \
$(DISTNAME)/testsuite/test_bad[1-9].lz \
- $(DISTNAME)/testsuite/test_em.txt.lz
+ $(DISTNAME)/testsuite/test.txt.lz.fec \
+ $(DISTNAME)/testsuite/test.txt.lz.fec16
rm -f $(DISTNAME)
lzip -v -9 $(DISTNAME).tar
diff --git a/NEWS b/NEWS
index 2ac8da5..e1cde43 100644
--- a/NEWS
+++ b/NEWS
@@ -1,35 +1,32 @@
-Changes in version 1.24:
+Changes in version 1.25:
-The option '--empty-error', which forces exit status 2 if any empty member
-is found, has been added.
+The option '-F, --fec', which implements Forward Error Correction (FEC), has
+been added.
-The option '--marking-error', which forces exit status 2 if the first LZMA
-byte is non-zero in any member, has been added.
+The options '-0' to '-9' (FEC fragmentation level) have been added.
-The option '--clear-marking', which sets to zero the first LZMA byte of each
-member, has been added.
+The option '-b, --block-size', which sets the FEC block size, has been added.
-The keyword 'empty' is now recognized in the argument of '--dump',
-'--remove', and '--strip'.
+The option '--fec-file', which sets the fec file to be used, has been added.
-The option '--repair' has been renamed to '--byte-repair'.
+The options '-r, --recursive' and '-R, --dereference-recursive' have been
+added for recursive creation and reading of fec files.
-The option '--debug-repair' has been renamed to '--debug-byte-repair'.
+The short name of option '--byte-repair' has been changed to '-B'.
-File diagnostics have been reformatted as 'PROGRAM: FILE: MESSAGE'.
+The option '--byte-repair' now repairs a nonzero first LZMA byte.
-Diagnostics caused by invalid arguments to command-line options now show the
-argument and the name of the option.
+When decompressing, testing, or listing, lziprecover now exits with error
+status 2 if any empty member is found in a regular multimember file unless
+'-i' is given.
-The option '-o, --output' now preserves dates, permissions, and ownership of
-the file, when decompressing exactly one file.
+When decompressing or testing, lziprecover now exits with error status 2 if
+the first byte of the LZMA stream is not 0 unless '-i' is given.
-The option '-o, --output' now creates missing intermediate directories when
-writing to a file.
+The option '--clear-marking' has been renamed to '--nonzero-repair'.
-The option '--no-verify' of unzcrash has been renamed to '--no-check'.
+Options '--empty-error' and '--marking-error' have been removed.
-The variable MAKEINFO has been added to configure and Makefile.in.
+The chapter 'Syntax of command-line arguments' has been added to the manual.
-The makefile target 'install-as-lzip' has been removed because '--reproduce'
-needs a lzip compressor (not just a decompressor) named 'lzip' by default.
+Lzip 1.16 (or clzip 1.6) or newer is required to run the tests.
diff --git a/README b/README
index 97e37ba..d5d0294 100644
--- a/README
+++ b/README
@@ -1,11 +1,10 @@
+See the file INSTALL for compilation and installation instructions.
+
Description
Lziprecover is a data recovery tool and decompressor for files in the lzip
-compressed data format (.lz). Lziprecover is able to repair slightly damaged
-files (up to one single-byte error per member), produce a correct file by
-merging the good parts of two or more damaged copies, reproduce a missing
-(zeroed) sector using a reference file, extract data from damaged files,
-decompress files, and test integrity of files.
+compressed data format (.lz). Lziprecover also provides Forward Error
+Correction (FEC) able to repair any kind of file.
Lziprecover can remove the damaged members from multimember files, for
example multimember tar.lz archives.
@@ -13,31 +12,9 @@ example multimember tar.lz archives.
Lziprecover provides random access to the data in multimember files; it only
decompresses the members containing the desired data.
-Lziprecover facilitates the management of metadata stored as trailing data
-in lzip files.
-
Lziprecover is not a replacement for regular backups, but a last line of
defense for the case where the backups are also damaged.
-The lzip file format is designed for data sharing and long-term archiving,
-taking into account both data integrity and decoder availability:
-
- * The lzip format provides very safe integrity checking and some data
- recovery means. The program lziprecover can repair bit flip errors
- (one of the most common forms of data corruption) in lzip files, and
- provides data recovery capabilities, including error-checked merging
- of damaged copies of a file.
-
- * The lzip format is as simple as possible (but not simpler). The lzip
- manual provides the source code of a simple decompressor along with a
- detailed explanation of how it works, so that with the only help of the
- lzip manual it would be possible for a digital archaeologist to extract
- the data from a lzip file long after quantum computers eventually
- render LZMA obsolete.
-
- * Additionally the lzip reference implementation is copylefted, which
- guarantees that it will remain free forever.
-
A nice feature of the lzip format is that a corrupt byte is easier to repair
the nearer it is from the beginning of the file. Therefore, with the help of
lziprecover, losing an entire archive just because of a corrupt byte near
@@ -54,13 +31,14 @@ Lziprecover is able to recover or decompress files produced by any of the
compressors in the lzip family: lzip, plzip, minilzip/lzlib, clzip, and
pdlzip.
-If the cause of file corruption is a damaged medium, the combination
-GNU ddrescue + lziprecover is the recommended option for recovering data
-from damaged lzip files.
+GNU ddrescue provides data recovery capabilities which nicely complement
+those of lziprecover. If the cause of file corruption is a damaged medium,
+the combination GNU ddrescue + lziprecover is the recommended option for
+recovering data from damaged files.
If a file is too damaged for lziprecover to repair it, all the recoverable
-data in all members of the file can be extracted in one step with the
-command 'lziprecover -cd --ignore-errors file.lz > file'.
+data in all members of the file can be extracted with the command
+'lziprecover -cd --ignore-errors file.lz > file'.
When recovering data, lziprecover takes as arguments the names of the
damaged files and writes zero or more recovered files depending on the
@@ -70,21 +48,16 @@ files themselves are kept unchanged.
When decompressing or testing file integrity, lziprecover behaves like lzip
or lunzip.
-To give you an idea of its possibilities, when merging two copies, each of
-them with one damaged area affecting 1 percent of the copy, the probability
-of obtaining a correct file is about 98 percent. With three such copies the
-probability rises to 99.97 percent. For large files (a few MB) with small
-errors (one sector damaged per copy), the probability approaches 100 percent
-even with only two copies. (Supposing that the errors are randomly located
-inside each copy).
-
The lziprecover package also includes unzcrash, a program written to test
robustness to decompression of corrupted data, inspired by unzcrash.c from
Julian Seward's bzip2. Type 'make unzcrash' in the lziprecover source
directory to build it. Then try 'unzcrash --help'.
+Lziprecover uses Arg_parser for command-line argument parsing:
+http://www.nongnu.org/arg-parser/arg_parser.html
+
-Copyright (C) 2009-2024 Antonio Diaz Diaz.
+Copyright (C) 2009-2025 Antonio Diaz Diaz.
This file is free documentation: you have unlimited permission to copy,
distribute, and modify it.
diff --git a/alone_to_lz.cc b/alone_to_lz.cc
index d67ea5c..c72b009 100644
--- a/alone_to_lz.cc
+++ b/alone_to_lz.cc
@@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
- Copyright (C) 2009-2024 Antonio Diaz Diaz.
+ Copyright (C) 2009-2025 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -50,9 +50,9 @@ uint8_t * read_file( const int infd, long * const file_sizep,
while( file_size >= buffer_size - 20 && !errno )
{
if( buffer_size >= LONG_MAX )
- { show_file_error( filename, "Input file is larger than LONG_MAX." );
+ { show_file_error( filename, large_file_msg );
std::free( buffer ); return 0; }
- buffer_size = ( buffer_size <= LONG_MAX / 2 ) ? 2 * buffer_size : LONG_MAX;
+ buffer_size = (buffer_size <= LONG_MAX / 2) ? 2 * buffer_size : LONG_MAX;
uint8_t * const tmp = (uint8_t *)std::realloc( buffer, buffer_size );
if( !tmp ) { std::free( buffer ); throw std::bad_alloc(); }
buffer = tmp;
@@ -61,7 +61,7 @@ uint8_t * read_file( const int infd, long * const file_sizep,
}
if( errno )
{
- show_file_error( filename, "Error reading input file", errno );
+ show_file_error( filename, read_error_msg, errno );
std::free( buffer ); return 0;
}
*file_sizep = file_size;
@@ -88,7 +88,7 @@ int alone_to_lz( const int infd, const Pretty_print & pp )
uint8_t * const buffer = read_file( infd, &file_size, pp.name() );
if( !buffer ) return 1;
if( file_size < lzma_header_size )
- { show_file_error( pp.name(), "Input file is too short." );
+ { show_file_error( pp.name(), short_file_msg );
std::free( buffer ); return 2; }
if( buffer[0] != 93 ) // (45 * 2) + (9 * 0) + 3
@@ -100,7 +100,7 @@ int alone_to_lz( const int infd, const Pretty_print & pp )
show_file_error( pp.name(), "Input file has non-default LZMA properties." );
std::free( buffer ); return 2;
}
- for( int i = 5; i < 13; ++i ) if( buffer[i] != 0xFF )
+ for( int i = 5; i < lzma_header_size; ++i ) if( buffer[i] != 0xFF )
{ show_file_error( pp.name(), "Input file is non-streamed." );
std::free( buffer ); return 2; }
@@ -113,10 +113,12 @@ int alone_to_lz( const int infd, const Pretty_print & pp )
Lzip_header & header = *(Lzip_header *)( buffer + offset );
header.set_magic();
header.dictionary_size( dictionary_size );
+ buffer[lzma_header_size] = 0; // reset first LZMA byte
for( int i = 0; i < Lzip_trailer::size; ++i ) buffer[file_size++] = 0;
+ const long lzip_size = file_size - offset;
// compute and fill trailer
{
- LZ_mtester mtester( buffer + offset, file_size - offset, dictionary_size );
+ LZ_mtester mtester( buffer + offset, lzip_size, dictionary_size );
const int result = mtester.test_member();
if( result == 1 && orig_dictionary_size > max_dictionary_size )
{ pp( "dictionary size is too large" ); std::free( buffer ); return 2; }
@@ -136,13 +138,13 @@ int alone_to_lz( const int infd, const Pretty_print & pp )
trailer.member_size( mtester.member_position() );
}
// check converted member
- LZ_mtester mtester( buffer + offset, file_size - offset, dictionary_size );
+ LZ_mtester mtester( buffer + offset, lzip_size, dictionary_size );
if( mtester.test_member() != 0 || !mtester.finished() )
{ pp( "conversion failed" ); std::free( buffer ); return 2; }
- if( writeblock( outfd, buffer + offset, file_size - offset ) != file_size - offset )
+ if( writeblock( outfd, buffer + offset, lzip_size ) != lzip_size )
{
- show_error( "Error writing output file", errno );
- std::free( buffer ); return 1;
+ show_file_error( printable_name( output_filename, false ), wr_err_msg,
+ errno ); std::free( buffer ); return 1;
}
std::free( buffer );
if( verbosity >= 1 ) std::fputs( "done\n", stderr );
diff --git a/arg_parser.cc b/arg_parser.cc
index 0c04d8e..9275846 100644
--- a/arg_parser.cc
+++ b/arg_parser.cc
@@ -1,5 +1,5 @@
/* Arg_parser - POSIX/GNU command-line argument parser. (C++ version)
- Copyright (C) 2006-2024 Antonio Diaz Diaz.
+ Copyright (C) 2006-2025 Antonio Diaz Diaz.
This library is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided
@@ -75,19 +75,19 @@ bool Arg_parser::parse_long_option( const char * const opt, const char * const a
error_ += "' requires an argument";
return false;
}
- data.back().argument = &opt[len+3];
+ data.back().argument = &opt[len+3]; // argument may be empty
return true;
}
- if( options[index].has_arg == yes )
+ if( options[index].has_arg == yes || options[index].has_arg == yme )
{
- if( !arg || !arg[0] )
+ if( !arg || ( options[index].has_arg == yes && !arg[0] ) )
{
error_ = "option '--"; error_ += options[index].long_name;
error_ += "' requires an argument";
return false;
}
- ++argind; data.back().argument = arg;
+ ++argind; data.back().argument = arg; // argument may be empty
return true;
}
@@ -123,15 +123,16 @@ bool Arg_parser::parse_short_option( const char * const opt, const char * const
{
data.back().argument = &opt[cind]; ++argind; cind = 0;
}
- else if( options[index].has_arg == yes )
+ else if( options[index].has_arg == yes || options[index].has_arg == yme )
{
- if( !arg || !arg[0] )
+ if( !arg || ( options[index].has_arg == yes && !arg[0] ) )
{
error_ = "option requires an argument -- '"; error_ += c;
error_ += '\'';
return false;
}
- data.back().argument = arg; ++argind; cind = 0;
+ ++argind; cind = 0;
+ data.back().argument = arg; // argument may be empty
}
}
return true;
diff --git a/arg_parser.h b/arg_parser.h
index 1eeec9a..2fe5a61 100644
--- a/arg_parser.h
+++ b/arg_parser.h
@@ -1,5 +1,5 @@
/* Arg_parser - POSIX/GNU command-line argument parser. (C++ version)
- Copyright (C) 2006-2024 Antonio Diaz Diaz.
+ Copyright (C) 2006-2025 Antonio Diaz Diaz.
This library is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided
@@ -36,14 +36,18 @@
The argument '--' terminates all options; any following arguments are
treated as non-option arguments, even if they begin with a hyphen.
- The syntax for optional option arguments is '-<short_option><argument>'
- (without whitespace), or '--<long_option>=<argument>'.
+ The syntax of options with an optional argument is
+ '-<short_option><argument>' (without whitespace), or
+ '--<long_option>=<argument>'.
+
+ The syntax of options with an empty argument is '-<short_option> ""',
+ '--<long_option> ""', or '--<long_option>=""'.
*/
class Arg_parser
{
public:
- enum Has_arg { no, yes, maybe };
+ enum Has_arg { no, yes, maybe, yme }; // yme = yes but maybe empty
struct Option
{
diff --git a/byte_repair.cc b/byte_repair.cc
index 370738b..2344f35 100644
--- a/byte_repair.cc
+++ b/byte_repair.cc
@@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
- Copyright (C) 2009-2024 Antonio Diaz Diaz.
+ Copyright (C) 2009-2025 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -58,6 +58,19 @@ bool gross_damage( const uint8_t * const mbuffer, const long msize )
}
+// Return value: 0 = errors remain, 6 = repaired pos
+int repair_nonzero( uint8_t * const mbuffer, const long msize )
+ {
+ mbuffer[6] = 0;
+ const Lzip_header & header = *(Lzip_header *)mbuffer;
+ const unsigned dictionary_size = header.dictionary_size();
+ if( !isvalid_ds( dictionary_size ) ) return 0;
+ LZ_mtester mtester( mbuffer, msize, dictionary_size );
+ if( mtester.test_member() == 0 ) return 6;
+ return 0;
+ }
+
+
// Return value: 0 = no change, 5 = repaired pos
int repair_dictionary_size( uint8_t * const mbuffer, const long msize )
{
@@ -69,10 +82,10 @@ int repair_dictionary_size( uint8_t * const mbuffer, const long msize )
const bool valid_ds = isvalid_ds( dictionary_size );
if( valid_ds && dictionary_size >= data_size ) return 0; // can't be bad
- const unsigned long long dictionary_size_9 = 1 << 25; // dict size of opt -9
- if( !valid_ds || dictionary_size < dictionary_size_9 )
+ const unsigned long long dict_size_9 = 1 << 25; // dict size of opt -9
+ if( !valid_ds || dictionary_size < dict_size_9 )
{
- dictionary_size = std::min( data_size, dictionary_size_9 );
+ dictionary_size = std::min( data_size, dict_size_9 );
if( dictionary_size < min_dictionary_size )
dictionary_size = min_dictionary_size;
LZ_mtester mtester( mbuffer, msize, dictionary_size );
@@ -82,7 +95,7 @@ int repair_dictionary_size( uint8_t * const mbuffer, const long msize )
if( result != 1 || mtester.max_distance() <= dictionary_size ||
mtester.max_distance() > max_dictionary_size ) return 0;
}
- if( data_size > dictionary_size_9 )
+ if( data_size > dict_size_9 )
{
dictionary_size =
std::min( data_size, (unsigned long long)max_dictionary_size );
@@ -155,6 +168,15 @@ long repair_member( uint8_t * const mbuffer, const long long mpos,
} // end namespace
+bool safe_seek( const int fd, const long long pos,
+ const std::string & filename )
+ {
+ if( lseek( fd, pos, SEEK_SET ) == pos ) return true;
+ show_file_error( filename.c_str(), "Seek error", errno );
+ return false;
+ }
+
+
long seek_write( const int fd, const uint8_t * const buf, const long size,
const long long pos )
{
@@ -165,16 +187,16 @@ long seek_write( const int fd, const uint8_t * const buf, const long size,
uint8_t * read_member( const int infd, const long long mpos,
- const long long msize, const char * const filename )
+ const long long msize, const std::string & filename )
{
if( msize <= 0 || msize > LONG_MAX )
- { show_file_error( filename,
+ { show_file_error( filename.c_str(),
"Input file contains member larger than LONG_MAX." ); return 0; }
if( !safe_seek( infd, mpos, filename ) ) return 0;
uint8_t * const buffer = new uint8_t[msize];
if( readblock( infd, buffer, msize ) != msize )
- { show_file_error( filename, "Error reading input file", errno );
+ { show_file_error( filename.c_str(), read_error_msg, errno );
delete[] buffer; return 0; }
return buffer;
}
@@ -206,8 +228,10 @@ int byte_repair( const std::string & input_filename,
const long long msize = lzip_index.mblock( i ).size();
if( !safe_seek( infd, mpos, filename ) ) cleanup_and_fail( 1 );
long long failure_pos = 0;
- if( test_member_from_file( infd, msize, &failure_pos ) == 0 ) continue;
- if( failure_pos < Lzip_header::size ) // End Of File
+ bool nonzero = false;
+ const int ret = test_member_from_file( infd, msize, &failure_pos, &nonzero );
+ if( ret == 0 && !nonzero ) continue;
+ if( ret != 0 && failure_pos < Lzip_header::size ) // End Of File
{ show_error( "Can't repair error in input file." );
cleanup_and_fail( 2 ); }
if( failure_pos >= msize - 8 ) failure_pos = msize - 8 - 1;
@@ -218,14 +242,17 @@ int byte_repair( const std::string & input_filename,
i + 1, lzip_index.members(), mpos + failure_pos );
std::fflush( stdout );
}
- uint8_t * const mbuffer = read_member( infd, mpos, msize, filename );
+ uint8_t * const mbuffer = read_member( infd, mpos, msize, input_filename );
if( !mbuffer ) cleanup_and_fail( 1 );
const Lzip_header & header = *(const Lzip_header *)mbuffer;
const unsigned dictionary_size = header.dictionary_size();
long pos = 0;
+ if( !nonzero && mbuffer[6] != 0 ) nonzero = true; // bad DS
if( !gross_damage( mbuffer, msize ) )
{
- pos = repair_dictionary_size( mbuffer, msize );
+ if( nonzero ) pos = repair_nonzero( mbuffer, msize );
+ if( pos == 0 )
+ pos = repair_dictionary_size( mbuffer, msize );
if( pos == 0 )
pos = repair_member( mbuffer, mpos, msize, header.size + 1,
header.size + 6, dictionary_size, terminator );
@@ -243,12 +270,14 @@ int byte_repair( const std::string & input_filename,
if( !safe_seek( infd, 0, filename ) ) return 1;
set_signal_handler();
if( !open_outstream( true, true, false, true, to_file ) ) return 1;
- if( !copy_file( infd, outfd ) ) // copy whole file
- cleanup_and_fail( 1 );
+ if( !copy_file( infd, outfd, input_filename, output_filename ) )
+ cleanup_and_fail( 1 ); // copy whole file
}
- if( seek_write( outfd, mbuffer + pos, 1, mpos + pos ) != 1 )
- { show_error( "Error writing output file", errno );
- cleanup_and_fail( 1 ); }
+ if( ( nonzero && pos != 6 &&
+ seek_write( outfd, mbuffer + 6, 1, mpos + 6 ) != 1 ) ||
+ seek_write( outfd, mbuffer + pos, 1, mpos + pos ) != 1 )
+ { show_file_error( printable_name( output_filename, false ),
+ wr_err_msg, errno ); cleanup_and_fail( 1 ); }
}
delete[] mbuffer;
if( pos == 0 )
@@ -266,28 +295,30 @@ int byte_repair( const std::string & input_filename,
}
if( !close_outstream( &in_stats ) ) return 1;
if( verbosity >= 1 )
- std::fputs( "Copy of input file repaired successfully.\n", stdout );
+ std::printf( "Repaired copy of '%s' written to '%s'\n",
+ filename, output_filename.c_str() );
return 0;
}
-int debug_delay( const char * const input_filename,
+int debug_delay( const std::string & input_filename,
const Cl_options & cl_opts, Block range,
const char terminator )
{
+ const char * const filename = input_filename.c_str();
struct stat in_stats; // not used
- const int infd = open_instream( input_filename, &in_stats, false, true );
+ const int infd = open_instream( filename, &in_stats, false, true );
if( infd < 0 ) return 1;
const Lzip_index lzip_index( infd, cl_opts );
if( lzip_index.retval() != 0 )
- { show_file_error( input_filename, lzip_index.error().c_str() );
+ { show_file_error( filename, lzip_index.error().c_str() );
return lzip_index.retval(); }
if( range.end() > lzip_index.cdata_size() )
range.size( std::max( 0LL, lzip_index.cdata_size() - range.pos() ) );
if( range.size() <= 0 )
- { show_file_error( input_filename, "Nothing to do." ); return 0; }
+ { show_file_error( filename, "Nothing to do; range is empty." ); return 0; }
for( long i = 0; i < lzip_index.members(); ++i )
{
@@ -353,24 +384,26 @@ int debug_delay( const char * const input_filename,
}
-int debug_byte_repair( const char * const input_filename,
+int debug_byte_repair( const std::string & input_filename,
const Cl_options & cl_opts, const Bad_byte & bad_byte,
const char terminator )
{
+ const char * const filename = input_filename.c_str();
struct stat in_stats; // not used
- const int infd = open_instream( input_filename, &in_stats, false, true );
+ const int infd = open_instream( filename, &in_stats, false, true );
if( infd < 0 ) return 1;
const Lzip_index lzip_index( infd, cl_opts );
if( lzip_index.retval() != 0 )
- { show_file_error( input_filename, lzip_index.error().c_str() );
+ { show_file_error( filename, lzip_index.error().c_str() );
return lzip_index.retval(); }
long idx = 0;
for( ; idx < lzip_index.members(); ++idx )
if( lzip_index.mblock( idx ).includes( bad_byte.pos ) ) break;
if( idx >= lzip_index.members() )
- { show_file_error( input_filename, "Nothing to do." ); return 0; }
+ { show_file_error( filename, "Nothing to do; byte is beyond EOF." );
+ return 0; }
const long long mpos = lzip_index.mblock( idx ).pos();
const long long msize = lzip_index.mblock( idx ).size();
@@ -389,11 +422,12 @@ int debug_byte_repair( const char * const input_filename,
if( !mbuffer ) return 1;
const Lzip_header & header = *(const Lzip_header *)mbuffer;
const unsigned dictionary_size = header.dictionary_size();
- const uint8_t good_value = mbuffer[bad_byte.pos-mpos];
+ const long long bad_pos = bad_byte.pos - mpos;
+ const uint8_t good_value = mbuffer[bad_pos];
const uint8_t bad_value = bad_byte( good_value );
- mbuffer[bad_byte.pos-mpos] = bad_value;
+ mbuffer[bad_pos] = bad_value;
long failure_pos = 0;
- if( bad_byte.pos != 5 || isvalid_ds( header.dictionary_size() ) )
+ if( bad_pos != 5 || isvalid_ds( header.dictionary_size() ) )
{
LZ_mtester mtester( mbuffer, msize, header.dictionary_size() );
if( mtester.test_member() == 0 && mtester.finished() )
@@ -417,6 +451,8 @@ int debug_byte_repair( const char * const input_filename,
if( failure_pos >= msize ) failure_pos = msize - 1;
long pos = repair_dictionary_size( mbuffer, msize );
if( pos == 0 )
+ if( mbuffer[6] != 0 ) pos = repair_nonzero( mbuffer, msize );
+ if( pos == 0 )
pos = repair_member( mbuffer, mpos, msize, header.size + 1,
header.size + 6, dictionary_size, terminator );
if( pos == 0 )
@@ -438,21 +474,21 @@ int debug_byte_repair( const char * const input_filename,
(Packet sizes are a fractionary number of bytes. The packet and marker
sizes shown by option -X are the number of extra bytes required to decode
the packet, not counting the data present in the range decoder before and
- after the decoding. The max marker size of a 'Sync Flush marker' does not
- include the 5 bytes read by rdec.load).
+ after the decoding.
if bad_byte.pos >= cdata_size, bad_byte is ignored.
*/
-int debug_decompress( const char * const input_filename,
+int debug_decompress( const std::string & input_filename,
const Cl_options & cl_opts, const Bad_byte & bad_byte,
const bool show_packets )
{
+ const char * const filename = input_filename.c_str();
struct stat in_stats;
- const int infd = open_instream( input_filename, &in_stats, false, true );
+ const int infd = open_instream( filename, &in_stats, false, true );
if( infd < 0 ) return 1;
const Lzip_index lzip_index( infd, cl_opts );
if( lzip_index.retval() != 0 )
- { show_file_error( input_filename, lzip_index.error().c_str() );
+ { show_file_error( filename, lzip_index.error().c_str() );
return lzip_index.retval(); }
outfd = show_packets ? -1 : STDOUT_FILENO;
diff --git a/common.h b/common.h
index 56f6298..2952ed8 100644
--- a/common.h
+++ b/common.h
@@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
- Copyright (C) 2009-2024 Antonio Diaz Diaz.
+ Copyright (C) 2009-2025 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -38,9 +38,14 @@ struct Bad_byte
};
+const char * const large_file_msg = "Input file is too large for this computer.";
const char * const mem_msg = "Not enough memory.";
+const char * const read_error_msg = "Read error";
// defined in main_common.cc
+extern int verbosity;
+
+const char * format_num3( long long num );
void show_error( const char * const msg, const int errcode = 0,
const bool help = false );
void show_file_error( const char * const filename, const char * const msg,
diff --git a/configure b/configure
index b753b20..dec0734 100755
--- a/configure
+++ b/configure
@@ -1,12 +1,12 @@
#! /bin/sh
# configure script for Lziprecover - Data recovery tool for the lzip format
-# Copyright (C) 2009-2024 Antonio Diaz Diaz.
+# Copyright (C) 2009-2025 Antonio Diaz Diaz.
#
# This configure script is free software: you have unlimited permission
# to copy, distribute, and modify it.
pkgname=lziprecover
-pkgversion=1.24
+pkgversion=1.25
progname=lziprecover
srctrigger=doc/${pkgname}.texi
@@ -24,6 +24,7 @@ CXX=g++
CPPFLAGS=
CXXFLAGS='-Wall -W -O2'
LDFLAGS=
+LIBS=-lpthread
MAKEINFO=makeinfo
# checking whether we are using GNU C++.
@@ -70,6 +71,7 @@ while [ $# != 0 ] ; do
echo " CXXFLAGS=OPTIONS command-line options for the C++ compiler [${CXXFLAGS}]"
echo " CXXFLAGS+=OPTIONS append options to the current value of CXXFLAGS"
echo " LDFLAGS=OPTIONS command-line options for the linker [${LDFLAGS}]"
+ echo " LIBS=OPTIONS libraries to pass to the linker [${LIBS}]"
echo " MAKEINFO=NAME makeinfo program to use [${MAKEINFO}]"
echo
exit 0 ;;
@@ -98,6 +100,7 @@ while [ $# != 0 ] ; do
CXXFLAGS=*) CXXFLAGS=${optarg} ;;
CXXFLAGS+=*) CXXFLAGS="${CXXFLAGS} ${optarg}" ;;
LDFLAGS=*) LDFLAGS=${optarg} ;;
+ LIBS=*) LIBS="${optarg} ${LIBS}" ;;
MAKEINFO=*) MAKEINFO=${optarg} ;;
--*)
@@ -109,7 +112,7 @@ while [ $# != 0 ] ; do
exit 1 ;;
esac
- # Check if the option took a separate argument
+ # Check whether the option took a separate argument
if [ "${arg2}" = yes ] ; then
if [ $# != 0 ] ; then args="${args} \"$1\"" ; shift
else echo "configure: Missing argument to '${option}'" 1>&2
@@ -167,11 +170,12 @@ echo "CXX = ${CXX}"
echo "CPPFLAGS = ${CPPFLAGS}"
echo "CXXFLAGS = ${CXXFLAGS}"
echo "LDFLAGS = ${LDFLAGS}"
+echo "LIBS = ${LIBS}"
echo "MAKEINFO = ${MAKEINFO}"
rm -f Makefile
cat > Makefile << EOF
# Makefile for Lziprecover - Data recovery tool for the lzip format
-# Copyright (C) 2009-2024 Antonio Diaz Diaz.
+# Copyright (C) 2009-2025 Antonio Diaz Diaz.
# This file was generated automatically by configure. Don't edit.
#
# This Makefile is free software: you have unlimited permission
@@ -191,6 +195,7 @@ CXX = ${CXX}
CPPFLAGS = ${CPPFLAGS}
CXXFLAGS = ${CXXFLAGS}
LDFLAGS = ${LDFLAGS}
+LIBS = ${LIBS}
MAKEINFO = ${MAKEINFO}
EOF
cat "${srcdir}/Makefile.in" >> Makefile
diff --git a/debian/changelog b/debian/changelog
index 21c0562..24dcbc5 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,33 @@
+lziprecover (1.25-1) sid; urgency=medium
+
+ * Uploading to sid.
+ * Merging upstream version 1.25.
+
+ -- Daniel Baumann <daniel.baumann@progress-linux.org> Tue, 14 Jan 2025 11:07:30 +0100
+
+lziprecover (1.25~rc1-1) sid; urgency=medium
+
+ * Uploading to sid.
+ * Merging upstream version 1.25~rc1.
+
+ -- Daniel Baumann <daniel.baumann@progress-linux.org> Sun, 24 Nov 2024 05:36:58 +0100
+
+lziprecover (1.25~pre1-1) sid; urgency=medium
+
+ * Uploading to sid.
+ * Merging upstream version 1.25~pre1.
+ * Refreshing build.patch.
+
+ -- Daniel Baumann <daniel.baumann@progress-linux.org> Mon, 07 Oct 2024 10:15:10 +0200
+
+lziprecover (1.24-2) sid; urgency=medium
+
+ * Uploading to sid.
+ * Updating years in copyright for 2024.
+ * Updating to standards-version 4.7.0.
+
+ -- Daniel Baumann <daniel.baumann@progress-linux.org> Thu, 25 Jul 2024 06:38:49 +0200
+
lziprecover (1.24-1) sid; urgency=medium
* Uploading to sid.
diff --git a/debian/patches/debian/0001-build.patch b/debian/patches/debian/0001-build.patch
index cd0ebf4..41817c8 100644
--- a/debian/patches/debian/0001-build.patch
+++ b/debian/patches/debian/0001-build.patch
@@ -14,6 +14,7 @@ diff -Naurp lziprecover.orig/configure lziprecover/configure
+#CPPFLAGS=
+#CXXFLAGS='-Wall -W -O2'
+#LDFLAGS=
+ LIBS=-lpthread
MAKEINFO=makeinfo
- # checking whether we are using GNU C++.
+
diff --git a/decoder.cc b/decoder.cc
index 4c68355..17f6ef3 100644
--- a/decoder.cc
+++ b/decoder.cc
@@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
- Copyright (C) 2009-2024 Antonio Diaz Diaz.
+ Copyright (C) 2009-2025 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -76,8 +76,8 @@ bool Range_decoder::read_block()
if( !at_stream_end )
{
stream_pos = readblock( infd, buffer, buffer_size );
- if( stream_pos != buffer_size && errno ) throw Error( "Read error" );
- at_stream_end = ( stream_pos < buffer_size );
+ if( stream_pos != buffer_size && errno ) throw Error( read_error_msg );
+ at_stream_end = stream_pos < buffer_size;
partial_member_pos += pos;
pos = 0;
show_dprogress();
@@ -99,7 +99,7 @@ void LZ_decoder::flush_data()
const long long s =
std::min( positive_diff( outend, sp ), (unsigned long long)size ) - i;
if( s > 0 && writeblock( outfd, buffer + stream_pos + i, s ) != s )
- throw Error( "Write error" );
+ throw Error( wr_err_msg );
}
if( pos >= dictionary_size )
{ partial_data_pos += pos; pos = 0; pos_wrapped = true; }
@@ -108,8 +108,7 @@ void LZ_decoder::flush_data()
}
-int LZ_decoder::check_trailer( const Pretty_print & pp,
- const bool ignore_empty ) const
+bool LZ_decoder::check_trailer( const Pretty_print & pp ) const
{
Lzip_trailer trailer;
int size = rdec.read_data( trailer.data, trailer.size );
@@ -154,8 +153,7 @@ int LZ_decoder::check_trailer( const Pretty_print & pp,
std::fprintf( stderr, "Member size mismatch; stored %llu (0x%llX), computed %llu (0x%llX)\n",
tm_size, tm_size, member_size, member_size ); }
}
- if( error ) return 3;
- if( !ignore_empty && data_size == 0 ) return 5;
+ if( error ) return false;
if( verbosity >= 2 )
{
if( verbosity >= 4 ) show_header( dictionary_size );
@@ -175,15 +173,15 @@ int LZ_decoder::check_trailer( const Pretty_print & pp,
pp();
std::fprintf( stderr, "Range decoder final code is %08X\n", rdec.get_code() );
}
- return 0;
+ return true;
}
/* Return value: 0 = OK, 1 = decoder error, 2 = unexpected EOF,
3 = trailer error, 4 = unknown marker found,
- 5 = empty member found, 6 = marked member found. */
-int LZ_decoder::decode_member( const Cl_options & cl_opts,
- const Pretty_print & pp )
+ 5 = nonzero first LZMA byte found. */
+int LZ_decoder::decode_member( const Pretty_print & pp,
+ const bool ignore_nonzero )
{
Bit_model bm_literal[1<<literal_context_bits][0x300];
Bit_model bm_match[State::states][pos_states];
@@ -203,7 +201,7 @@ int LZ_decoder::decode_member( const Cl_options & cl_opts,
unsigned rep3 = 0;
State state;
- if( !rdec.load( cl_opts.ignore_marking ) ) return 6;
+ if( !rdec.load( ignore_nonzero ) ) return 5;
while( !rdec.finished() )
{
const int pos_state = data_position() & pos_state_mask;
@@ -224,7 +222,7 @@ int LZ_decoder::decode_member( const Cl_options & cl_opts,
if( rdec.decode_bit( bm_rep0[state()] ) == 0 ) // 3rd bit
{
if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit
- { state.set_short_rep(); put_byte( peek( rep0 ) ); continue; }
+ { state.set_shortrep(); put_byte( peek( rep0 ) ); continue; }
}
else
{
@@ -247,39 +245,33 @@ int LZ_decoder::decode_member( const Cl_options & cl_opts,
}
else // match
{
+ rep3 = rep2; rep2 = rep1; rep1 = rep0;
len = rdec.decode_len( match_len_model, pos_state );
- unsigned distance = rdec.decode_tree6( bm_dis_slot[get_len_state(len)] );
- if( distance >= start_dis_model )
+ rep0 = rdec.decode_tree6( bm_dis_slot[get_len_state(len)] );
+ if( rep0 >= start_dis_model )
{
- const unsigned dis_slot = distance;
+ const unsigned dis_slot = rep0;
const int direct_bits = ( dis_slot >> 1 ) - 1;
- distance = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
+ rep0 = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
if( dis_slot < end_dis_model )
- distance += rdec.decode_tree_reversed(
- bm_dis + ( distance - dis_slot ), direct_bits );
+ rep0 += rdec.decode_tree_reversed( bm_dis + ( rep0 - dis_slot ),
+ direct_bits );
else
{
- distance +=
- rdec.decode( direct_bits - dis_align_bits ) << dis_align_bits;
- distance += rdec.decode_tree_reversed4( bm_align );
- if( distance == 0xFFFFFFFFU ) // marker found
+ rep0 += rdec.decode( direct_bits - dis_align_bits ) << dis_align_bits;
+ rep0 += rdec.decode_tree_reversed4( bm_align );
+ if( rep0 == 0xFFFFFFFFU ) // marker found
{
rdec.normalize();
flush_data();
if( len == min_match_len ) // End Of Stream marker
- return check_trailer( pp, cl_opts.ignore_empty );
- if( len == min_match_len + 1 ) // Sync Flush marker
- { rdec.load(); continue; }
- if( verbosity >= 0 )
- {
- pp();
- std::fprintf( stderr, "Unsupported marker code '%d'\n", len );
- }
+ { if( check_trailer( pp ) ) return 0; else return 3; }
+ if( verbosity >= 0 ) { pp();
+ std::fprintf( stderr, "Unsupported marker code '%d'\n", len ); }
return 4;
}
}
}
- rep3 = rep2; rep2 = rep1; rep1 = rep0; rep0 = distance;
state.set_match();
if( rep0 >= dictionary_size || ( rep0 >= pos && !pos_wrapped ) )
{ flush_data(); return 1; }
diff --git a/decoder.h b/decoder.h
index 6efdfc2..0d669c4 100644
--- a/decoder.h
+++ b/decoder.h
@@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
- Copyright (C) 2009-2024 Antonio Diaz Diaz.
+ Copyright (C) 2009-2025 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -26,6 +26,7 @@ class Range_decoder
uint32_t range;
const int infd; // input file descriptor
bool at_stream_end;
+ bool nonzero_;
bool read_block();
@@ -42,11 +43,12 @@ public:
code( 0 ),
range( 0xFFFFFFFFU ),
infd( ifd ),
- at_stream_end( false )
+ at_stream_end( false ), nonzero_( false )
{}
~Range_decoder() { delete[] buffer; }
+ bool nonzero() const { return nonzero_; }
unsigned get_code() const { return code; }
bool finished() { return pos >= stream_pos && !read_block(); }
@@ -106,12 +108,14 @@ public:
return false;
}
- bool load( const bool ignore_marking = true )
+ bool load( const bool ignore_nonzero )
{
code = 0;
range = 0xFFFFFFFFU;
- // check and discard first byte of the LZMA stream
- if( get_byte() != 0 && !ignore_marking ) return false;
+ // check first byte of the LZMA stream without reading it
+ nonzero_ = buffer[pos] != 0;
+ if( nonzero_ && !ignore_nonzero ) return false;
+ get_byte(); // discard first byte of the LZMA stream
for( int i = 0; i < 4; ++i ) code = ( code << 8 ) | get_byte();
return true;
}
@@ -131,7 +135,7 @@ public:
range >>= 1;
// symbol <<= 1;
// if( code >= range ) { code -= range; symbol |= 1; }
- const bool bit = ( code >= range );
+ const bool bit = code >= range;
symbol <<= 1; symbol += bit;
code -= range & ( 0U - bit );
}
@@ -305,7 +309,7 @@ class LZ_decoder
unsigned long long stream_position() const
{ return partial_data_pos + stream_pos; }
void flush_data();
- int check_trailer( const Pretty_print & pp, const bool ignore_empty ) const;
+ bool check_trailer( const Pretty_print & pp ) const;
uint8_t peek_prev() const
{ return buffer[((pos > 0) ? pos : dictionary_size)-1]; }
@@ -329,14 +333,14 @@ class LZ_decoder
bool fast, fast2;
if( lpos > distance )
{
- fast = ( len < dictionary_size - lpos );
- fast2 = ( fast && len <= lpos - i );
+ fast = len < dictionary_size - lpos;
+ fast2 = fast && len <= lpos - i;
}
else
{
i += dictionary_size;
- fast = ( len < dictionary_size - i ); // (i == pos) may happen
- fast2 = ( fast && len <= i - lpos );
+ fast = len < dictionary_size - i; // (i == pos) may happen
+ fast2 = fast && len <= i - lpos;
}
if( fast ) // no wrap
{
@@ -381,7 +385,7 @@ public:
unsigned crc() const { return crc_ ^ 0xFFFFFFFFU; }
unsigned long long data_position() const { return partial_data_pos + pos; }
- int decode_member( const Cl_options & cl_opts, const Pretty_print & pp );
+ int decode_member( const Pretty_print & pp, const bool ignore_nonzero );
int decode_member()
- { return decode_member( Cl_options(), Pretty_print( "" ) ); }
+ { return decode_member( Pretty_print( "" ), true ); }
};
diff --git a/doc/lziprecover.1 b/doc/lziprecover.1
index f95e80f..24aa984 100644
--- a/doc/lziprecover.1
+++ b/doc/lziprecover.1
@@ -1,17 +1,14 @@
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.2.
-.TH LZIPRECOVER "1" "January 2024" "lziprecover 1.24" "User Commands"
+.TH LZIPRECOVER "1" "January 2025" "lziprecover 1.25" "User Commands"
.SH NAME
-lziprecover \- recovers data from damaged lzip files
+lziprecover \- recovers data from damaged files
.SH SYNOPSIS
.B lziprecover
[\fI\,options\/\fR] [\fI\,files\/\fR]
.SH DESCRIPTION
Lziprecover is a data recovery tool and decompressor for files in the lzip
-compressed data format (.lz). Lziprecover is able to repair slightly damaged
-files (up to one single\-byte error per member), produce a correct file by
-merging the good parts of two or more damaged copies, reproduce a missing
-(zeroed) sector using a reference file, extract data from damaged files,
-decompress files, and test integrity of files.
+compressed data format (.lz). Lziprecover also provides Forward Error
+Correction (FEC) able to repair any kind of file.
.PP
With the help of lziprecover, losing an entire archive just because of a
corrupt byte near the beginning is a thing of the past.
@@ -22,9 +19,6 @@ example multimember tar.lz archives.
Lziprecover provides random access to the data in multimember files; it only
decompresses the members containing the desired data.
.PP
-Lziprecover facilitates the management of metadata stored as trailing data
-in lzip files.
-.PP
Lziprecover is not a replacement for regular backups, but a last line of
defense for the case where the backups are also damaged.
.SH OPTIONS
@@ -41,6 +35,12 @@ exit with error status if trailing data
\fB\-A\fR, \fB\-\-alone\-to\-lz\fR
convert lzma\-alone files to lzip format
.TP
+\fB\-b\fR, \fB\-\-block\-size=\fR<bytes>
+make FEC block size a multiple of <bytes>
+.TP
+\fB\-B\fR, \fB\-\-byte\-repair\fR
+try to repair a corrupt byte in file
+.TP
\fB\-c\fR, \fB\-\-stdout\fR
write to standard output, keep input files
.TP
@@ -65,8 +65,17 @@ reference file for \fB\-\-reproduce\fR
\fB\-f\fR, \fB\-\-force\fR
overwrite existing output files
.TP
+\fB\-F\fR, \fB\-\-fec\fR=\fI\,c[N]\/\fR|r|t|l
+create, repair, test, list (using) fec file
+.TP
+\fB\-0\fR .. \fB\-9\fR
+set FEC fragmentation level [default 9]
+.TP
+\fB\-\-fec\-file=\fR<file>[/]
+read fec file from <file> or directory
+.TP
\fB\-i\fR, \fB\-\-ignore\-errors\fR
-ignore some errors in \fB\-d\fR, \fB\-D\fR, \fB\-l\fR, \fB\-t\fR, \fB\-\-dump\fR
+ignore non\-fatal errors
.TP
\fB\-k\fR, \fB\-\-keep\fR
keep (don't delete) input files
@@ -77,14 +86,20 @@ print (un)compressed file sizes
\fB\-m\fR, \fB\-\-merge\fR
repair errors in file using several copies
.TP
-\fB\-o\fR, \fB\-\-output=\fR<file>
-place the output into <file>
+\fB\-n\fR, \fB\-\-threads=\fR<n>
+set number of threads for fec create [2]
+.TP
+\fB\-o\fR, \fB\-\-output=\fR<file>[/]
+place the output into <file> or directory
.TP
\fB\-q\fR, \fB\-\-quiet\fR
suppress all messages
.TP
-\fB\-R\fR, \fB\-\-byte\-repair\fR
-try to repair a corrupt byte in file
+\fB\-r\fR, \fB\-\-recursive\fR
+(fec) operate recursively on directories
+.TP
+\fB\-R\fR, \fB\-\-dereference\-recursive\fR
+(fec) recursively follow symbolic links
.TP
\fB\-s\fR, \fB\-\-split\fR
split multimember file in single\-member files
@@ -104,22 +119,18 @@ remove members, tdata from files in place
\fB\-\-strip=\fR<list>:d:e:t
copy files to stdout stripping members given
.TP
-\fB\-\-empty\-error\fR
-exit with error status if empty member in file
-.TP
-\fB\-\-marking\-error\fR
-exit with error status if 1st LZMA byte not 0
-.TP
\fB\-\-loose\-trailing\fR
allow trailing data seeming corrupt header
.TP
-\fB\-\-clear\-marking\fR
-reset the first LZMA byte of each member
+\fB\-\-nonzero\-repair\fR
+repair in place a nonzero first LZMA byte
.PP
If no file names are given, or if a file is '\-', lziprecover decompresses
from standard input to standard output.
Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,
Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...
+The argument to \fB\-\-fec\fR=\fI\,create\/\fR may be a number of blocks (\fB\-Fc20\fR), a
+percentage (\fB\-Fc5\fR%), or a size in bytes (\fB\-Fc10KiB\fR).
.PP
To extract all the files from archive 'foo.tar.lz', use the commands
\&'tar \fB\-xf\fR foo.tar.lz' or 'lziprecover \fB\-cd\fR foo.tar.lz | tar \fB\-xf\fR \-'.
@@ -133,7 +144,7 @@ Report bugs to lzip\-bug@nongnu.org
.br
Lziprecover home page: http://www.nongnu.org/lzip/lziprecover.html
.SH COPYRIGHT
-Copyright \(co 2024 Antonio Diaz Diaz.
+Copyright \(co 2025 Antonio Diaz Diaz.
License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>
.br
This is free software: you are free to change and redistribute it.
diff --git a/doc/lziprecover.info b/doc/lziprecover.info
index b1f820f..002567e 100644
--- a/doc/lziprecover.info
+++ b/doc/lziprecover.info
@@ -12,19 +12,21 @@ File: lziprecover.info, Node: Top, Next: Introduction, Up: (dir)
Lziprecover Manual
******************
-This manual is for Lziprecover (version 1.24, 20 January 2024).
+This manual is for Lziprecover (version 1.25, 8 January 2025).
* Menu:
* Introduction:: Purpose and features of lziprecover
* Invoking lziprecover:: Command-line interface
+* Argument syntax:: By convention, options start with a hyphen
+* File format:: Detailed format of the compressed file
* Data safety:: Protecting data from accidental loss
+* Fec files:: Forward Error Correction
* Repairing one byte:: Fixing bit flips and similar errors
* Merging files:: Fixing several damaged copies
* Reproducing one sector:: Fixing a missing (zeroed) sector
* Tarlz:: Options supporting the tar.lz format
* File names:: Names of the files produced by lziprecover
-* File format:: Detailed format of the compressed file
* Trailing data:: Extra data appended to the file
* Examples:: A small tutorial with examples
* Unzcrash:: Testing the robustness of decompressors
@@ -32,7 +34,7 @@ This manual is for Lziprecover (version 1.24, 20 January 2024).
* Concept index:: Index of concepts
- Copyright (C) 2009-2024 Antonio Diaz Diaz.
+ Copyright (C) 2009-2025 Antonio Diaz Diaz.
This manual is free documentation: you have unlimited permission to copy,
distribute, and modify it.
@@ -44,11 +46,14 @@ File: lziprecover.info, Node: Introduction, Next: Invoking lziprecover, Prev:
**************
Lziprecover is a data recovery tool and decompressor for files in the lzip
-compressed data format (.lz). Lziprecover is able to repair slightly damaged
-files (up to one single-byte error per member), produce a correct file by
-merging the good parts of two or more damaged copies, reproduce a missing
-(zeroed) sector using a reference file, extract data from damaged files,
-decompress files, and test integrity of files.
+compressed data format (.lz). Lziprecover also provides Forward Error
+Correction (FEC) able to repair any kind of file.
+
+ Lziprecover is able to repair slightly damaged lzip files (up to one
+single-byte error per member), produce a correct file by merging the good
+parts of two or more damaged copies, reproduce a missing (zeroed) sector
+using a reference file, extract data from damaged files, decompress files,
+and test integrity of files.
Lziprecover can remove the damaged members from multimember files, for
example multimember tar.lz archives.
@@ -56,30 +61,20 @@ example multimember tar.lz archives.
Lziprecover provides random access to the data in multimember files; it
only decompresses the members containing the desired data.
- Lziprecover facilitates the management of metadata stored as trailing
-data in lzip files.
-
Lziprecover is not a replacement for regular backups, but a last line of
defense for the case where the backups are also damaged.
- The lzip file format is designed for data sharing and long-term
-archiving, taking into account both data integrity and decoder availability:
-
- * The lzip format provides very safe integrity checking and some data
- recovery means. The program lziprecover can repair bit flip errors
- (one of the most common forms of data corruption) in lzip files, and
- provides data recovery capabilities, including error-checked merging
- of damaged copies of a file. *Note Data safety::.
-
- * The lzip format is as simple as possible (but not simpler). The lzip
- manual provides the source code of a simple decompressor along with a
- detailed explanation of how it works, so that with the only help of the
- lzip manual it would be possible for a digital archaeologist to extract
- the data from a lzip file long after quantum computers eventually
- render LZMA obsolete.
-
- * Additionally the lzip reference implementation is copylefted, which
- guarantees that it will remain free forever.
+ Lziprecover is able to provide unique data recovery capabilities because
+the lzip format is extraordinarily safe. The simple and safe design of the
+file format complements the embedded error detection provided by the LZMA
+data stream. Any distance larger than the dictionary size acts as a
+forbidden symbol, allowing the decompressor to detect the approximate
+position of errors, and leaving little work for the check sequence (CRC and
+data sizes) in the detection of errors. Lzip is usually able to detect all
+possible bit flips in the compressed data without resorting to the check
+sequence. It would be difficult to write an automatic recovery tool like
+lziprecover for the gzip format. And, as far as I know, it has never been
+written.
A nice feature of the lzip format is that a corrupt byte is easier to
repair the nearer it is from the beginning of the file. Therefore, with the
@@ -97,10 +92,12 @@ able to find and combine the good parts of several damaged copies.
compressors in the lzip family: lzip, plzip, minilzip/lzlib, clzip, and
pdlzip.
- If the cause of file corruption is a damaged medium, the combination
-GNU ddrescue + lziprecover is the recommended option for recovering data
-from damaged lzip files. *Note ddrescue-example::, and *note
-ddrescue-example2::, for examples.
+ GNU ddrescue provides data recovery capabilities which nicely complement
+those of lziprecover. If the cause of file corruption is a damaged medium,
+the combination GNU ddrescue + lziprecover is the recommended option for
+recovering data from damaged files. *Note ddrescue-example::, *note
+ddrescue-example2::, and *note ddrescue-example3::, for examples. *Note GNU
+ddrescue manual: (ddrescue)Top, for details about ddrescue.
If a file is too damaged for lziprecover to repair it, all the
recoverable data in all members of the file can be extracted with the
@@ -122,7 +119,7 @@ have been compressed. Decompressed is used to refer to data which have
undergone the process of decompression.

-File: lziprecover.info, Node: Invoking lziprecover, Next: Data safety, Prev: Introduction, Up: Top
+File: lziprecover.info, Node: Invoking lziprecover, Next: Argument syntax, Prev: Introduction, Up: Top
2 Invoking lziprecover
**********************
@@ -137,8 +134,7 @@ first time it appears in the command line. If no file names are specified,
lziprecover decompresses from standard input to standard output. Remember
to prepend './' to any file name beginning with a hyphen, or use '--'.
- lziprecover supports the following options: *Note Argument syntax:
-(arg_parser)Argument syntax.
+lziprecover supports the following options: *Note Argument syntax::.
'-h'
'--help'
@@ -162,7 +158,7 @@ to prepend './' to any file name beginning with a hyphen, or use '--'.
dictionary size of the resulting file (and therefore the amount of
memory required to decompress it). Only streamed files with default
LZMA properties can be converted; non-streamed lzma-alone files lack
- the "End Of Stream" marker required in lzip files.
+ the 'End Of Stream' marker required in lzip files.
The name of the converted lzip file is derived from that of the
original lzma-alone file as follows:
@@ -171,6 +167,19 @@ to prepend './' to any file name beginning with a hyphen, or use '--'.
filename.tlz becomes filename.tar.lz
anyothername becomes anyothername.lz
+'-b BYTES'
+'--block-size=BYTES'
+ When creating fec files, make the FEC block size a multiple of BYTES,
+ which must be a multiple of 512 not larger than 1 GiB.
+
+'-B'
+'--byte-repair'
+ Try to repair a FILE with small errors (up to one single-byte error
+ per member). If successful, a repaired copy is written to the file
+ FILE_fixed.lz. FILE is not modified at all. The exit status is 0 if
+ the file could be repaired, 2 otherwise. *Note Repairing one byte::,
+ for a complete description of the byte-repair mode.
+
'-c'
'--stdout'
Write decompressed data to standard output; keep input files
@@ -189,23 +198,24 @@ to prepend './' to any file name beginning with a hyphen, or use '--'.
status 1. If a file fails to decompress, or is a terminal, lziprecover
exits immediately with error status 2 without decompressing the rest
of the files. A terminal is considered an uncompressed file, and
- therefore invalid.
+ therefore invalid. A multimember file with one or more empty members
+ is accepted if redirected to standard input or if '-i' is given.
'-D RANGE'
'--range-decompress=RANGE'
Decompress only a range of bytes starting at decompressed byte position
- BEGIN and up to byte position END - 1. Byte positions start at 0. This
- option provides random access to the data in multimember files; it
- only decompresses the members containing the desired data. In order to
- guarantee the correctness of the data produced, all members containing
- any part of the desired data are decompressed and their integrity is
- checked.
+ BEGIN and up to byte position END - 1. Byte positions start at 0. The
+ bytes produced are sent to standard output unless the option '-o' is
+ used. This option provides random access to the data in multimember
+ files; it only decompresses the members containing the desired data.
+ In order to guarantee the correctness of the data produced, all
+ members containing any part of the desired data are decompressed and
+ their integrity is checked.
Four formats of RANGE are recognized, 'BEGIN', 'BEGIN-END',
'BEGIN,SIZE', and ',SIZE'. If only BEGIN is specified, END is taken as
the end of the file. If only SIZE is specified, BEGIN is taken as the
- beginning of the file. The bytes produced are sent to standard output
- unless the option '--output' is used.
+ beginning of the file.
'-e'
'--reproduce'
@@ -237,8 +247,45 @@ to prepend './' to any file name beginning with a hyphen, or use '--'.
'--force'
Force overwrite of output files.
+'-F create[N]|repair|test|list'
+'--fec=create[N]|repair|test|list'
+ Create fec files, or repair or test files using previously created fec
+ files, or list the contents of fec files. The argument (create, repair,
+ test, or list) can be abbreviated even to a single letter. Option '-i'
+ is required to repair or test a file using a corrupt fec file, or to
+ list a corrupt fec file. *Note Fec files::.
+
+ N is the number of FEC blocks to be created. The amount of FEC data to
+ be created may also be specified as a percentage from 0.003% to 100%,
+ or as a number of bytes followed by a 'B' (4096B, 16KiB, etc). If N is
+ not specified, it defaults to '8' (8 FEC blocks). (Because, when was
+ the last time you saw more than 8 bad sectors affecting the same file?)
+
+ '--fec=create' writes the FEC data created to FILE.fec unless option
+ '-c' or '-o' is specified. If a fec file can't be created, lziprecover
+ exits immediately with error status 1 without trying to create the
+ rest of the files.
+
+ '--fec=repair' and '--fec=test' read the FEC data from FILE.fec unless
+ '--fec-file' is specified. '--fec=repair' writes the repaired file to
+ FILE_fixed unless option '-c' or '-o' is specified. *Note File
+ names::. If a file fails to repair, lziprecover exits immediately with
+ error status 2 without repairing the rest of the files.
+
+'-0 .. -9'
+ FEC fragmentation level. Defaults to '-9'. Level '-0' is the fastest;
+ it creates FEC data using GF(2^8), maybe with large blocks. Levels
+ '-1' to '-9' use GF(2^8) or GF(2^16) as required, with increasing
+ amounts of smaller blocks.
+
+'--fec-file=FILE[/]'
+ When repairing or testing, read FEC data from FILE. If FILE ends with
+ a slash, it is interpreted as the name of a directory containing the
+ fec file(s).
+
'-i'
'--ignore-errors'
+ Ignore non-fatal errors.
Make '--decompress', '--test', and '--range-decompress' ignore format
and data errors and continue decompressing the remaining members in
the file; keep input files unchanged. For example, the commands
@@ -251,13 +298,19 @@ to prepend './' to any file name beginning with a hyphen, or use '--'.
errors. The exit status is set to 0 unless other errors are found (I/O
errors, for example).
+ Make '--fec=repair' and '--fec=test' ignore errors in the fec file and
+ return with exit status 0 if the repaired/protected file passes the
+ test, even if corrupt packets or trailing garbage are found in the fec
+ file. Make '--fec=list' ignore errors in the fec files.
+
Make '--list', '--dump', '--remove', and '--strip' ignore format
errors. The sizes of the members with errors (especially the last) may
be wrong.
'-k'
'--keep'
- Keep (don't delete) input files during decompression.
+ Keep (don't delete) input files during decompression or conversion from
+ lzma-alone.
'-l'
'--list'
@@ -268,9 +321,11 @@ to prepend './' to any file name beginning with a hyphen, or use '--'.
'-v', the dictionary size, the number of members in the file, and the
amount of trailing data (if any) are also printed. With '-vv', the
positions and sizes of each member in multimember files are also
- printed. With '-i', format errors are ignored, and with '-ivv', gaps
- between members are shown. The member numbers shown coincide with the
- file numbers produced by '--split'.
+ printed. A multimember file with one or more empty members is accepted
+ if redirected to standard input or if '-i' is given. With '-i', format
+ errors are ignored, and with '-ivv', gaps between members are shown.
+ The member numbers start at 1 and coincide with the file numbers
+ produced by '--split'.
If any file is damaged, does not exist, can't be opened, or is not
regular, the final exit status is > 0. '-lq' can be used to check
@@ -287,29 +342,52 @@ to prepend './' to any file name beginning with a hyphen, or use '--'.
produced, 2 otherwise. *Note Merging files::, for a complete
description of the merge mode.
-'-o FILE'
-'--output=FILE'
- Place the repaired output into FILE instead of into FILE_fixed.lz. If
- splitting, the names of the files produced are in the form
- 'rec01FILE', 'rec02FILE', etc.
-
- If '-c' has not been also specified, write the (de)compressed output
- to FILE, automatically creating any missing parent directories; keep
- input files unchanged. This option (or '-c') is needed when reading
- from a named pipe (fifo) or from a device. '-o -' is equivalent to
- '-c'. '-o' has no effect when testing or listing.
+'-n N'
+'--threads=N'
+ Set the maximum number of worker threads for '--fec=create',
+ overriding the system's default. Valid values range from 1 to as many
+ as your system can support. If this option is not used, lziprecover
+ tries to detect the number of processors in the system and use it as
+ default value. 'lziprecover --help' shows the system's default value.
+
+'-o FILE[/]'
+'--output=FILE[/]'
+ If repairing, place the repaired output into FILE instead of into
+ FILE_fixed.lz. If splitting, the names of the files produced are in
+ the form 'rec1FILE', 'rec2FILE', etc.
+
+ If creating FEC data and '-c' has not been also specified, write the
+ FEC data to FILE. If FILE ends with a slash, it is interpreted as the
+ name of a directory where the fec file(s) will be written to. In this
+ case, the fec file names are composed by replacing the prefix
+ preceding the last slash of each file name specified in the command
+ line with FILE (or prepending FILE if the file name does not contain a
+ slash), and appending the extension '.fec'.
+
+ Else, if '-c' has not been also specified, write the (de)compressed
+ output to FILE, automatically creating any missing parent directories;
+ keep input files unchanged. This option (or '-c') is needed when
+ reading from a named pipe (fifo) or from a device. '-o -' is
+ equivalent to '-c'. '-o' has no effect when testing or listing.
'-q'
'--quiet'
Quiet operation. Suppress all messages.
+'-r'
+'--recursive'
+ When creating or reading fec files (but not when listing), for each
+ directory operand, read and process all files in that directory,
+ recursively. Follow symbolic links given in the command line, but skip
+ symbolic links that are encountered recursively. Ignore files and
+ directories named 'fec' or '*[-._]fec'.
+
'-R'
-'--byte-repair'
- Try to repair a FILE with small errors (up to one single-byte error
- per member). If successful, a repaired copy is written to the file
- FILE_fixed.lz. FILE is not modified at all. The exit status is 0 if
- the file could be repaired, 2 otherwise. *Note Repairing one byte::,
- for a complete description of the repair mode.
+'--dereference-recursive'
+ When creating or reading fec files (but not when listing), for each
+ directory operand, read and process all files in that directory,
+ recursively, following all symbolic links. Ignore files and
+ directories named 'fec' or '*[-._]fec'.
'-s'
'--split'
@@ -324,8 +402,8 @@ to prepend './' to any file name beginning with a hyphen, or use '--'.
headers or trailers, try to split FILE and then work on each member
individually.
- The names of the files produced are in the form 'rec01FILE',
- 'rec02FILE', etc, and are designed so that the use of wildcards in
+ The names of the files produced are in the form 'rec1FILE',
+ 'rec2FILE', etc, and are designed so that the use of wildcards in
subsequent processing, for example,
'lziprecover -cd rec*FILE > recovered_data', processes the files in
the correct order. The number of digits used in the names varies
@@ -339,7 +417,9 @@ to prepend './' to any file name beginning with a hyphen, or use '--'.
fails the test, does not exist, can't be opened, or is a terminal,
lziprecover continues testing the rest of the files. A final
diagnostic is shown at verbosity level 1 or higher if any file fails
- the test when testing multiple files.
+ the test when testing multiple files. A multimember file with one or
+ more empty members is accepted if redirected to standard input or if
+ '-i' is given.
'-v'
'--verbose'
@@ -347,23 +427,23 @@ to prepend './' to any file name beginning with a hyphen, or use '--'.
When decompressing or testing, further -v's (up to 4) increase the
verbosity level, showing status, compression ratio, dictionary size,
trailer contents (CRC, data size, member size), and up to 6 bytes of
- trailing data (if any) both in hexadecimal and as a string of printable
- ASCII characters.
+ trailing data (if any) both in hexadecimal and as a string of
+ printable ASCII characters.
Two or more '-v' options show the progress of decompression.
- In other modes, increasing verbosity levels show final status, progress
- of operations, and extra information (for example, the failed areas).
+ In other modes, increasing verbosity levels show final status,
+ progress of operations, and extra information (for example, the failed
+ areas).
'--dump=[MEMBER_LIST][:damaged][:empty][:tdata]'
Dump the members listed, the damaged members (if any), the empty
members (if any), or the trailing data (if any) of one or more regular
- multimember files to standard output, or to a file if the option
- '--output' is used. If more than one file is given, the elements
- dumped from all the files are concatenated. If a file does not exist,
- can't be opened, or is not regular, lziprecover continues processing
- the rest of the files. If the dump fails in one file, lziprecover
- exits immediately without processing the rest of the files. Only
- '--dump=tdata' can write to a terminal. '--dump=damaged' implies
- '--ignore-errors'.
+ multimember files to standard output, or to a file if the option '-o'
+ is used. If more than one file is given, the elements dumped from all
+ the files are concatenated. If a file does not exist, can't be opened,
+ or is not regular, lziprecover continues processing the rest of the
+ files. If the dump fails in one file, lziprecover exits immediately
+ without processing the rest of the files. Only '--dump=tdata' can
+ write to a terminal. '--dump=damaged' implies '--ignore-errors'.
The argument to '--dump' is a colon-separated list of the following
element specifiers; a member list (1,3-6), a reverse member list
@@ -417,47 +497,32 @@ to prepend './' to any file name beginning with a hyphen, or use '--'.
'--strip=[MEMBER_LIST][:damaged][:empty][:tdata]'
Copy one or more regular multimember files to standard output (or to a
- file if the option '--output' is used), stripping the members listed,
- the damaged members (if any), the empty members (if any), or the
- trailing data (if any) from each file. If all members in a file are
- selected to be stripped, the trailing data (if any) are also stripped
- even if 'tdata' is not specified. If more than one file is given, the
- files are concatenated. In this case the trailing data are also
- stripped from all but the last file even if 'tdata' is not specified.
- If a file does not exist, can't be opened, or is not regular,
- lziprecover continues processing the rest of the files. If a file
- fails to copy, lziprecover exits immediately without processing the
- rest of the files. See '--dump' above for a description of the
- argument.
-
-'--empty-error'
- Exit with error status 2 if any empty member is found in the input
- files.
-
-'--marking-error'
- Exit with error status 2 if the first LZMA byte is non-zero in any
- member of the input files. This may be caused by data corruption or by
- deliberate insertion of tracking information in the file. Use
- 'lziprecover --clear-marking' to clear any such non-zero bytes.
+ file if the option '-o' is used), stripping the members listed, the
+ damaged members (if any), the empty members (if any), or the trailing
+ data (if any) from each file. If all members in a file are selected to
+ be stripped, the trailing data (if any) are also stripped even if
+ 'tdata' is not specified. If more than one file is given, the files are
+ concatenated. In this case the trailing data are also stripped from
+ all but the last file even if 'tdata' is not specified. If a file does
+ not exist, can't be opened, or is not regular, lziprecover continues
+ processing the rest of the files. If a file fails to copy, lziprecover
+ exits immediately without processing the rest of the files. See
+ '--dump' above for a description of the argument.
'--loose-trailing'
When decompressing, testing, or listing, allow trailing data whose
first bytes are so similar to the magic bytes of a lzip header that
they can be confused with a corrupt header. Use this option if a file
- triggers a "corrupt header" error and the cause is not indeed a
+ triggers a 'corrupt header' error and the cause is not indeed a
corrupt header.
-'--clear-marking'
- Set to zero the first LZMA byte of each member in the files specified.
- At verbosity level 1 (-v), print the number of members cleared. The
- date of each file modified is preserved if possible. This option
- exists because the first byte of the LZMA stream is ignored by the
- range decoder, and can therefore be (mis)used to store any value which
- can then be used as a watermark to track the path of the compressed
- payload.
+'--nonzero-repair'
+ Repair in place a nonzero first LZMA byte in the files specified. With
+ '-v', print the number of members repaired. The date of each file
+ modified is preserved if possible.
- Lziprecover also supports the following debug options (for experts):
+lziprecover also supports the following debug options (for experts):
'-E RANGE[,SECTOR_SIZE]'
'--debug-reproduce=RANGE[,SECTOR_SIZE]'
@@ -469,6 +534,24 @@ to prepend './' to any file name beginning with a hyphen, or use '--'.
statistics of the number of sectors reproduced successfully. Exit with
nonzero status only in case of fatal error.
+'-F dcN'
+'--fec=dcN'
+ Simulate FEC repair of all combinations of N zeroed block errors
+ spread along the whole input file.
+
+'-F dzRANGE[:RANGE]...'
+'--fec=dzRANGE[:RANGE]...'
+ Simulate FEC repair of one or more zeroed block(s) in the input file
+ at the RANGEs given. The RANGEs may be unordered and overlapping.
+ Lziprecover sorts and joins them as needed. *Note range-format::, for a
+ description of RANGE.
+
+'-F dZSIZE[,DELTA]'
+'--fec=dZSIZE[,DELTA]'
+ Simulate FEC repair of all possible zeroed blocks of size SIZE in the
+ input file. DELTA defaults to SIZE. Values of DELTA smaller than SIZE
+ result in overlapping blocks.
+
'-M'
'--md5sum'
Print to standard output the MD5 digests of the input FILES one per
@@ -518,14 +601,15 @@ to prepend './' to any file name beginning with a hyphen, or use '--'.
Load the compressed FILE into memory, set the byte at POSITION to
VALUE, and decompress the modified compressed data to standard output.
If the damaged member can be decompressed to the end (just fails with
- a CRC mismatch), the members following it are also decompressed.
+ a CRC mismatch), the members following it are also decompressed. *Note
+ --set-byte::, for a description of VALUE.
'-X[POSITION,VALUE]'
'--show-packets[=POSITION,VALUE]'
Load the compressed FILE into memory, optionally set the byte at
POSITION to VALUE, decompress the modified compressed data (discarding
the output), and print to standard output descriptions of the LZMA
- packets being decoded.
+ packets being decoded. *Note --set-byte::, for a description of VALUE.
'-Y RANGE'
'--debug-delay=RANGE'
@@ -542,6 +626,11 @@ to prepend './' to any file name beginning with a hyphen, or use '--'.
'--debug-byte-repair=POSITION,VALUE'
Load the compressed FILE into memory, set the byte at POSITION to
VALUE, and then try to repair the byte error. *Note --byte-repair::.
+ *Note --set-byte::, for a description of VALUE.
+
+'--gf16'
+ Forces the use of GF(2^16) when creating FEC blocks even if the number
+ of blocks fits in GF(2^8).
Numbers given as arguments to options may be expressed in decimal,
@@ -551,6 +640,7 @@ and may be followed by a multiplier and an optional 'B' for "byte".
Table of SI and binary prefixes (unit multipliers):
Prefix Value | Prefix Value
+----------------------------------------------------------------------
k kilobyte (10^3 = 1000) | Ki kibibyte (2^10 = 1024)
M megabyte (10^6) | Mi mebibyte (2^20)
G gigabyte (10^9) | Gi gibibyte (2^30)
@@ -569,9 +659,130 @@ corrupt or invalid input file, 3 for an internal consistency error (e.g.,
bug) which caused lziprecover to panic.

-File: lziprecover.info, Node: Data safety, Next: Repairing one byte, Prev: Invoking lziprecover, Up: Top
+File: lziprecover.info, Node: Argument syntax, Next: File format, Prev: Invoking lziprecover, Up: Top
+
+3 Syntax of command-line arguments
+**********************************
+
+POSIX recommends these conventions for command-line arguments.
+
+ * A command-line argument is an option if it begins with a hyphen ('-').
+
+ * Option names are single alphanumeric characters.
+
+ * Certain options require an argument.
+
+ * An option and its argument may or may not appear as separate tokens.
+ (In other words, the whitespace separating them is optional). Thus,
+ '-o foo' and '-ofoo' are equivalent.
+
+ * One or more options without arguments, followed by at most one option
+ that takes an argument, may follow a hyphen in a single token. Thus,
+ '-abc' is equivalent to '-a -b -c'.
+
+ * Options typically precede other non-option arguments.
+
+ * The argument '--' terminates all options; any following arguments are
+ treated as non-option arguments, even if they begin with a hyphen.
+
+ * A token consisting of a single hyphen character is interpreted as an
+ ordinary non-option argument. By convention, it is used to specify
+ standard input, standard output, or a file named '-'.
+
+GNU adds "long options" to these conventions:
+
+ * A long option consists of two hyphens ('--') followed by a name made
+ of alphanumeric characters and hyphens. Option names are typically one
+ to three words long, with hyphens to separate words. Abbreviations can
+ be used for the long option names as long as the abbreviations are
+ unique.
+
+ * A long option and its argument may or may not appear as separate
+ tokens. In the latter case they must be separated by an equal sign '='.
+ Thus, '--foo bar' and '--foo=bar' are equivalent.
+
+The syntax of options with an optional argument is
+'-<short_option><argument>' (without whitespace), or
+'--<long_option>=<argument>'.
+
+
+File: lziprecover.info, Node: File format, Next: Data safety, Prev: Argument syntax, Up: Top
+
+4 File format
+*************
+
+Perfection is reached, not when there is no longer anything to add, but
+when there is no longer anything to take away.
+-- Antoine de Saint-Exupery
+
+ In the diagram below, a box like this:
+
++---+
+| | <-- the vertical bars might be missing
++---+
+
+ represents one byte; a box like this:
+
++==============+
+| |
++==============+
+
+ represents a variable number of bytes.
+
+A lzip file consists of one or more independent "members" (compressed data
+sets). The members simply appear one after another in the file, with no
+additional information before, between, or after them. Each member can
+encode in compressed form up to 16 EiB - 1 byte of uncompressed data. The
+size of a multimember file is unlimited. Empty members (data size = 0) are
+not allowed in multimember files.
+
+ Each member has the following structure:
+
++--+--+--+--+----+----+=============+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+| ID string | VN | DS | LZMA stream | CRC32 | Data size | Member size |
++--+--+--+--+----+----+=============+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ All multibyte values are stored in little endian order.
+
+'ID string (the "magic" bytes)'
+ A four byte string, identifying the lzip format, with the value "LZIP"
+ (0x4C, 0x5A, 0x49, 0x50).
+
+'VN (version number, 1 byte)'
+ Just in case something needs to be modified in the future. 1 for now.
+
+'DS (coded dictionary size, 1 byte)'
+ The dictionary size is calculated by taking a power of 2 (the base
+ size) and subtracting from it a fraction between 0/16 and 7/16 of the
+ base size.
+ Bits 4-0 contain the base 2 logarithm of the base size (12 to 29).
+ Bits 7-5 contain the numerator of the fraction (0 to 7) to subtract
+ from the base size to obtain the dictionary size.
+ Example: 0xD3 = 2^19 - 6 * 2^15 = 512 KiB - 6 * 32 KiB = 320 KiB
+ Valid values for dictionary size range from 4 KiB to 512 MiB.
+
+'LZMA stream'
+ The LZMA stream, terminated by an 'End Of Stream' marker. Uses default
+ values for encoder properties. *Note Stream format: (lzip)Stream
+ format, for a complete description.
+
+'CRC32 (4 bytes)'
+ Cyclic Redundancy Check (CRC) of the original uncompressed data.
+
+'Data size (8 bytes)'
+ Size of the original uncompressed data.
+
+'Member size (8 bytes)'
+ Total size of the member, including header and trailer. This field acts
+ as a distributed index, improves the checking of stream integrity, and
+ facilitates the safe recovery of undamaged members from multimember
+ files. Lzip limits the member size to 2 PiB to prevent the data size
+ field from overflowing.
+
+
+File: lziprecover.info, Node: Data safety, Next: Fec files, Prev: File format, Up: Top
-3 Protecting data from accidental loss
+5 Protecting data from accidental loss
**************************************
It is a fact of life that sometimes data becomes corrupt. Software has
@@ -583,26 +794,31 @@ formats, and the reason why a data recovery tool is sometimes needed.
single-byte errors, multibyte errors (generally affecting a whole sector in
a block device), and total device failure.
+ The two methods most effective to protect data from accidental loss are
+backup copies and Forward Error Correction (FEC). Both methods can be used
+simultaneously, and both are supported by lziprecover.
+
Lziprecover protects natively against single-byte errors as long as file
integrity is checked frequently enough that a second single-byte error does
not develop in the same member before the first one is repaired. *Note
Repairing one byte::.
- Lziprecover also protects against multibyte errors if at least one backup
-copy of the file is made (*note Merging files::), or if the error is a
-zeroed sector and the uncompressed data corresponding to the zeroed sector
-are available (*note Reproducing one sector::). If you can choose between
-merging and reproducing, try merging first because it is usually faster,
-easier to use, and has a high probability of success.
+ Lziprecover protects against multibyte errors in 3 cases: if a fec file
+is available (*note Fec files::), if at least one backup copy of the file is
+available (*note Merging files::), or if the error is a zeroed sector and
+the uncompressed data corresponding to the zeroed sector are available
+(*note Reproducing one sector::). FEC is best. Else, if you can choose
+between merging and reproducing, try merging first because it is usually
+faster, easier to use, and has a high probability of success.
Lziprecover can't help in case of device failure. The only remedy for
total device failure is storing backup copies in separate media.
- The extraordinary safety of the lzip format allows lziprecover to exploit
-the redundance that occurrs naturally when making compressed backups.
-Lziprecover can recover data that would not be recoverable from files
-compressed in other formats. Let's see two examples of how much better is
-lzip compared with gzip and bzip2 with respect to data safety:
+ The extraordinary safety of the lzip format allows lziprecover to use the
+redundance that occurs naturally when making compressed backups. Lziprecover
+can recover data that would not be recoverable from files compressed in
+other formats. See these two examples of the data recovery capabilities
+offered by lziprecover:
* Menu:
@@ -612,7 +828,7 @@ lzip compared with gzip and bzip2 with respect to data safety:

File: lziprecover.info, Node: Merging with a backup, Next: Reproducing a mailbox, Up: Data safety
-3.1 Recovering a file using a damaged backup
+5.1 Recovering a file using a damaged backup
============================================
Let's suppose that you made a compressed backup of your valuable scientific
@@ -639,7 +855,7 @@ possible to recover a file with thousands of errors.

File: lziprecover.info, Node: Reproducing a mailbox, Prev: Merging with a backup, Up: Data safety
-3.2 Recovering new messages using an old backup
+5.2 Recovering new messages using an old backup
===============================================
Let's suppose that you make periodic backups of your email messages stored
@@ -683,9 +899,379 @@ performance-of-reproduce::) is almost as high as that of merging two
identical backups (*note performance-of-merge::).

-File: lziprecover.info, Node: Repairing one byte, Next: Merging files, Prev: Data safety, Up: Top
+File: lziprecover.info, Node: Fec files, Next: Repairing one byte, Prev: Data safety, Up: Top
+
+6 Forward Error Correction
+**************************
+
+Forward Error Correction (FEC) is any way of protecting data from corruption
+by creating redundant data that can be used later to repair errors in the
+protected data. Lziprecover uses a Hilbert-based Reed-Solomon code to create
+one fec file (with extension '.fec') for each file that needs to be
+protected. The fec files created by lziprecover are reproducible.
+
+ Reed-Solomon is the most space-efficient Error Correcting Code (ECC) for
+data stored in block devices. It creates redundant FEC blocks in such a way
+that X FEC blocks allow the recuperation of any combination of up to X lost
+data blocks. All the blocks (data and FEC) are of the same size, which in
+fec files must be a multiple of 512 bytes. Reed-Solomon is not optimum for
+corruption affecting random single bits in a file because each corrupt bit
+invalidates the whole block containing it.
+
+ Usually, a corrupt file does not provide an indication of where the
+corruption is located. Therefore, each fec file stores one or two arrays of
+CRCs to detect the corrupt blocks in the protected file and mark them as
+erasures (missing data blocks). Thus, a fec file creates its own Binary
+Erasure Channel (BEC) for the protected file.
+
+ Lziprecover's FEC algorithm can repair any kind of file, but its ability
+to repair lzip files is greater than for other kinds of files. Lziprecover
+can use the statistical properties of lzip data to repair a lzip file
+rescued with ddrescue, even if the fec file is so damaged that it has lost
+both CRC arrays. Lzip data helps to locate the corrupt parts of the file
+even without a BEC. For this to work, at least one chksum packet header
+must be intact to provide 'prodata_size', 'prodata_md5', and 'gf16'.
+
+* Menu:
+
+* How Reed-Solomon works:: It is basically an equation system
+* Implementation details:: How lziprecover implements Reed-Solomon
+* Creating fec files:: How to create fec files
+* Testing with fec files:: How to test files using fec files
+* Repairing with fec files:: How to repair files using fec files
+* Fec file format:: Detailed format of the redundant FEC data
+
+
+File: lziprecover.info, Node: How Reed-Solomon works, Next: Implementation details, Up: Fec files
+
+6.1 How Reed-Solomon works
+==========================
+
+To illustrate how Reed-Solomon works on the BEC, we will use an example with
+standard arithmetic on integers. Note that in lziprecover's FEC each
+variable is a (potentialy large) block of data, not a single value.
+
+ Given variables x, y, and z (the protected data) whose values are known,
+an equation system can be created where the values of three FEC variables
+p, q, and r can be computed from the values of x, y, and z:
+
+ x + y + z = p (1)
+ x + 2y + 3z = q (2)
+ x + 3y + 2z = r (3)
+
+ If we have that x = 1, y = 2, and z = 3, then p = 6, q = 14, and r = 13:
+
+ 1 + 2 + 3 = 6 (1a)
+ 1 + 4 + 9 = 14 (2a)
+ 1 + 6 + 6 = 13 (3a)
+
+ Now, if the values of x and y are lost because of data corruption, they
+can be recomputed by using any two of the three equations above. For
+example, if we replace the known values of z, p, and q in equations (1) and
+(2) we get:
+
+ x + y + 3 = 6 (1b)
+ x + 2y + 9 = 14 (2b)
+
+ In order to solve the two equations above, we first reduce them by
+subtracting the values of the known data variables from the values of the
+FEC variables:
+
+ x + y = 6 - 3 (1c)
+ x + 2y = 14 - 9 (2c)
+
+ which gives the reduced FEC values P = 3 and Q = 5.
+
+ Then we create a square matrix 'A' with the coefficients of x and y in
+the equations above, and invert it. 'A' must be invertible and must not
+have any zero element. We also create the column vector D with the missing
+data variables x and y, and the column vector F with the reduced FEC values
+P and Q:
+
+ D = x A = 1 1 A^-1 = 2 -1 F = P
+ y 1 2 -1 1 Q
+
+ Then we multiply the inverse matrix 'A^-1' by the column vector F to
+obtain the values of x and y (D = A^-1 * F):
+
+ x = 2P - Q (1d)
+ y = -P + Q (2d)
+
+ which finally gives us the lost values x = 1 and y = 2:
+
+ x = 2 * 3 - 5 (1e)
+ y = -3 + 5 (2e)
+
+
+File: lziprecover.info, Node: Implementation details, Next: Creating fec files, Prev: How Reed-Solomon works, Up: Fec files
+
+6.2 How lziprecover implements Reed-Solomon
+===========================================
+
+Lziprecover's implementation of Reed-Solomon can manage up to 128 data
+blocks + 128 FEC blocks when using a Galois Field of size 256 (GF(2^8)), or
+up to 32768 data blocks + 32768 FEC blocks when using a Galois Field of size
+65536 (GF(2^16)). GF(2^8) is included because it is faster for files up to
+about 1 MB. The number of FEC blocks is currently limited to 2048 because
+of memory and time limits. Inverting a matrix for 32768 FEC blocks would
+take a week and require 2 GiB of RAM.
+
+ The file is repaired in memory. Therefore, enough virtual memory
+(RAM + swap) to contain the protected file and the FEC data is required.
+The file size is limited to less than 2 GiB on 32-bit systems. The repaired
+file is checked with a MD5 digest.
+
+ Lziprecover divides the input file in 1 to 32768 data blocks of the same
+size, which ranges from 512 bytes to 128 TiB, for a total protected file
+size of up to 4 EiB. It then uses a Hilbert matrix 'A' to create up to 2048
+FEC blocks of the same size as the data blocks. Lziprecover corrects errors
+in the data blocks by first reducing the equation system to M equations
+with M unknowns each, where M is the number of missing data blocks. Then it
+multiplies the inverse of the relevant submatrix of 'A' by the vector of
+results of the M equations to recompute the values of the missing data
+blocks.
+
+ Lziprecover implements GF(2^8) with polynomial 0x11D and GF(2^16) with
+polynomial 0x1100B.
+
+ A Hilbert matrix is defined as A[i][j] = 1 / (i + j + 1) for i,j >= 0.
+But, as in a Galois Field the addition is the exclusive or operation,
+applying the Hilbert definition produces a singular (non invertible)
+matrix. To avoid this problem, lziprecover uses a Hilbert matrix starting
+at row r0 = gf_size / 2. I.e., A[i][j] = 1 / (i + j + r0) for
+0 <= i,j < r0. ('gf_size' is the size of the Galois Field).
+
+
+File: lziprecover.info, Node: Creating fec files, Next: Testing with fec files, Prev: Implementation details, Up: Fec files
+
+6.3 How to create fec files
+===========================
+
+Example 1: Create the fec file 'archive.tar.lz.fec' and store it in the
+same directory where 'archive.tar.lz' is.
+
+ lziprecover -v -Fc archive.tar.lz
+
+Example 2: Create the fec file 'archive.tar.lz.fec' and store it in the
+directory 'fec'.
+
+ lziprecover -v -Fc -o fec/ archive.tar.lz
+
+Example 3: Create recursively one fec file for each file in the directory
+'datadir' and store them in the tree under the directory 'fec'.
+
+ lziprecover -v -r -Fc -o fec/ datadir
+
+Example 4: Create fec files for a collection of photos stored in directory
+'photos' and store them in the directory 'photos-fec'.
+
+ lziprecover -v -Fc -o photos-fec/ photos/*
+
+
+File: lziprecover.info, Node: Testing with fec files, Next: Repairing with fec files, Prev: Creating fec files, Up: Fec files
+
+6.4 How to test files using fec files
+=====================================
+
+Example 1: Test the integrity of 'archive.tar.lz' using the fec file
+'archive.tar.lz.fec' from the same directory.
+
+ lziprecover -v -Ft archive.tar.lz
+
+Example 2: Test the integrity of the files 'foo.lz' and 'bar.lz' using the
+corresponding fec files stored in the directory 'fec'.
+
+ lziprecover -v -Ft --fec-file=fec/ foo.lz bar.lz
+
+Example 3: Test recursively the integrity of all the files in the directory
+'datadir' using the fec files stored in the directory tree under the
+directory 'fec'.
+
+ lziprecover -v -r -Ft --fec-file=fec/ datadir
+
+Example 4: Test the integrity of a collection of photos stored in directory
+'photos' using fec files from directory 'photos-fec'.
+
+ lziprecover -v -Ft --fec-file=photos-fec/ photos/*
+
+
+File: lziprecover.info, Node: Repairing with fec files, Next: Fec file format, Prev: Testing with fec files, Up: Fec files
-4 Repairing one byte
+6.5 How to repair files using fec files
+=======================================
+
+Example 1: Repair the file 'archive.tar.lz' using the fec file
+'archive.tar.lz.fec' from the same directory. The repaired file is written
+to 'archive_fixed.tar.lz' in the same directory.
+
+ lziprecover -v -Fr archive.tar.lz
+
+Example 2: Repair the files 'foo.lz' and 'bar.lz' using the corresponding
+fec files stored in the directory 'fec'.
+
+ lziprecover -v -Fr --fec-file=fec/ foo.lz bar.lz
+
+Example 3: Repair recursively all the damaged files in the directory
+'datadir' using the fec files stored in the directory tree under the
+directory 'fec'.
+
+ lziprecover -v -r -Fr --fec-file=fec/ datadir
+
+Example 4: Recover a collection of photos from a damaged external drive
+('/dev/sdc1'). The photos are in directory 'photos', and the fec files are
+in directory 'photos-fec'.
+
+ ddrescue -b4096 -r10 /dev/sdc1 hdimage mapfile
+ mount -o loop,ro hdimage /mnt/hdimage
+ cp -a /mnt/hdimage/photos photos
+ cp -a /mnt/hdimage/photos-fec photos-fec
+ umount /mnt/hdimage
+ lziprecover -v -Fr --fec-file=photos-fec/ photos/*
+ (Check and rename repaired files. They are named 'photos/*_fixed')
+
+
+File: lziprecover.info, Node: Fec file format, Prev: Repairing with fec files, Up: Fec files
+
+6.6 Fec file format
+===================
+
+A fec file consists of one chksum packet, one or more fec packets, and one
+optional second chksum packet. The first chksum packet must be the first
+packet in the file, but the second chksum packet does not need to be the
+last packet in the file. The essential information is stored in the chksum
+packet(s), while the potentially numerous fec packets are kept as simple as
+possible:
+
++=================+===============+=================+
+| Chksum packet | Fec packets | Chksum packet |
++=================+===============+=================+
+
+ All multibyte values are stored in little endian order except
+'prodata_md5'.
+
+ The 'fbs' (fec_block_size) field is coded as a little endian 16-bit
+floating point unsigned integer with an 11-bit mantissa at bits 0-10 and a
+5-bit exponent at bits 11-15. The mantissa is an integer between 0 and 2047.
+The exponent is an integer between 9 and 40, stored with a bias of -9; the
+exponent 9 is stored as 0, and 40 is stored as 31. Values are stored with
+the largest mantissa and smallest exponent; 4096 is stored as m=8, e=0. This
+encoding can store values from 0 bytes to 2047 TiB (2^51 - 2^40 bytes) with
+a maximum resolution of 512 bytes, but 0 and the values beyond 128 TiB are
+not used:
+
+ 5 11
++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+| exp | mantissa | The 'fbs' (fec_block_size) field
++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+15 11 10 0
+
+ The fec file format is 4-byte aligned for speed because FEC data are
+created and decoded 4 bytes at a time. The 4-byte alignment has been
+achieved by a careful design, without adding any padding bytes.
+
+ The fec file format has an overhead of 8 bytes per protected data block,
+plus 16 bytes per FEC block, plus 80 bytes.
+
+6.6.1 Chksum packet
+-------------------
+
+A chksum packet contains one CRC for each of the N data blocks in the
+protected file, and is structured as shown in the following table. All
+lengths and offsets are in decimal:
+
+Field Name Offset Length (in bytes)
+------------------------------------------
+magic 0 4
+version 4 1
+flags 5 1
+fbs 6 2
+prodata_size 8 8
+prodata_md5 16 16
+header_crc 32 4
+crc_array 36 4N
+payload_crc 36 + 4N 4
+
+'magic'
+ A four byte string identifying the chksum packet (and therefore the fec
+ file), with the value 0xB3, 0xA5, 0xB6, 0xAF. (The complement of
+ "LZIP").
+
+'version'
+ Just in case something needs to be modified in the future. 0 for now.
+
+'flags'
+ Bit 0 (is_crc_c): crc_array contains CRC32 (0) or CRC32-C (1).
+ Bit 1 (gf16): Galois field is GF(2^8) (0) or GF(2^16) (1).
+ Bits 2-7: zero.
+
+'fbs (coded fec_block_size)'
+ Number of FEC bytes per block. It is a multiple of 512 bytes between
+ 512 bytes and 128 TiB. *Note fbs::.
+
+'prodata_size'
+ Size of the protected file. 1 byte to 4 EiB.
+
+'prodata_md5'
+ Md5sum of the protected file. Stored in big endian order.
+
+'header_crc'
+ CRC32 of the previous fields, including magic.
+
+'crc_array'
+ Array of N CRCs corresponding to the N blocks in which the protected
+ file is divided. N is 'ceil( prodata_size / fbs )'. The first chksum
+ packet contains an array of CRC32s, while the second chksum packet (if
+ present) contains an array of CRC32-Cs.
+
+ For the expected thousands of bit flips caused by a zeroed sector, a
+ symmetric CRC like CRC32 is probably better than CRC32-C, which
+ detects all the errors with an odd number of bit flips at the expense
+ of a larger number of undetected errors with an even number of bit
+ flips.
+
+'payload_crc'
+ CRC32 of the crc_array.
+
+6.6.2 Fec packet
+----------------
+
+A fec packet contains one FEC block and is structured as shown in the
+following table. All lengths and offsets are in decimal:
+
+Field Name Offset Length (in bytes)
+------------------------------------------
+magic 0 4
+fbn 4 2
+fbs 6 2
+header_crc 8 4
+fec_block 12 fbs
+payload_crc 12 + fbs 4
+
+'magic'
+ A four byte string identifying the fec packet, with the value "\xB3FEC"
+ (0xB3, 0x46, 0x45, 0x43).
+
+'fbn (fec_block_number)'
+ Number of this FEC block (0 to 32767). Required to compute the decode
+ matrix.
+
+'fbs (coded fec_block_size)'
+ Number of FEC bytes per block. It is a multiple of 512 bytes between
+ 512 bytes and 128 TiB. *Note fbs::.
+
+'header_crc'
+ CRC32 of the previous fields, including magic.
+
+'fec_block'
+ The FEC block.
+
+'payload_crc'
+ CRC32 of the fec_block.
+
+
+File: lziprecover.info, Node: Repairing one byte, Next: Merging files, Prev: Fec files, Up: Top
+
+7 Repairing one byte
********************
Lziprecover can repair perfectly most files with small errors (up to one
@@ -695,14 +1281,15 @@ bit to the original. This makes lzip files resistant to bit flip, one of the
most common forms of data corruption.
The file is repaired in memory. Therefore, enough virtual memory
-(RAM + swap) to contain the largest damaged member is required.
+(RAM + swap) to contain the largest damaged member is required. Member size
+is limited to 2 GiB on 32-bit systems.
The error may be located anywhere in the file except in the first 5
-bytes of each member header or in the 'Member size' field of the trailer
-(last 8 bytes of each member). If the error is in the header it can be
-easily repaired with a text editor like GNU Moe (*note File format::). If
-the error is in the member size, it is enough to ignore the message about
-'bad member size' when decompressing.
+bytes of each member header (magic and version) or in the 'Member size'
+field of the trailer (last 8 bytes of each member). If the error is in the
+header it can be easily repaired with a text editor like GNU Moe (*note
+File format::). If the error is in the member size, it is enough to ignore
+the message about 'bad member size' when decompressing.
Bit flip happens when one bit in the file is changed from 0 to 1 or vice
versa. It may be caused by bad RAM or even by natural radiation. I have
@@ -712,7 +1299,7 @@ seen a case of bit flip in a file stored on an USB flash drive.
transmission errors or I/O errors just affect one byte, or even one bit, of
the file. Also, unlike magnetic media, where errors usually affect a whole
sector, solid-state storage devices tend to produce single-byte errors,
-making of lzip the perfect format for data stored on such devices.
+which lziprecover can repair.
Repairing a file can take some time. Small files or files with the error
located near the beginning can be repaired in a few seconds. But repairing
@@ -726,7 +1313,7 @@ repairs more efficiently the worst errors.

File: lziprecover.info, Node: Merging files, Next: Reproducing one sector, Prev: Repairing one byte, Up: Top
-5 Merging files
+8 Merging files
***************
If you have several copies of a file but all of them are too damaged to
@@ -780,10 +1367,8 @@ identical to the original, in just 5 seconds:
than the number of corrupt bytes (3104) because contiguous corrupt bytes
are counted as a single multibyte error.
-
Example 1: Recover a compressed backup from two copies on CD-ROM with
-error-checked merging of copies. *Note GNU ddrescue manual: (ddrescue)Top,
-for details about ddrescue.
+error-checked merging of copies.
ddrescue -d -r1 -b2048 /dev/cdrom cdimage1 mapfile1
mount -t iso9660 -o loop,ro cdimage1 /mnt/cdimage
@@ -799,7 +1384,6 @@ for details about ddrescue.
lziprecover -tv backup.tar.lz
backup.tar.lz: ok
-
Example 2: Recover the first volume of those created with the command
'lzip -b 32MiB -S 650MB big_db' from two copies, 'big_db1_00001.lz' and
'big_db2_00001.lz', with member 07 damaged in the first copy, member 18
@@ -814,7 +1398,7 @@ correct file produced is saved in 'big_db_00001.lz'.

File: lziprecover.info, Node: Reproducing one sector, Next: Tarlz, Prev: Merging files, Up: Top
-6 Reproducing one sector
+9 Reproducing one sector
************************
Lziprecover can recover a zeroed sector in a lzip file by concatenating the
@@ -836,7 +1420,8 @@ reproduction can't be done if the zeroed sector overlaps with the first 15
bytes of a member, or if the zeroed sector is smaller than 8 bytes.
The file is reproduced in memory. Therefore, enough virtual memory
-(RAM + swap) to contain the damaged member is required.
+(RAM + swap) to contain the damaged member is required. Member size is
+limited to 2 GiB on 32-bit systems.
To understand how it works, take any lzipped file, say 'foo.lz',
decompress it (keeping the original), and try to reproduce an artificially
@@ -889,7 +1474,7 @@ header, and that the archive can be reproduced. The tarlz format has minimum
overhead. It uses basic ustar headers, and only adds extended pax headers
when they are required.
-6.1 Performance of '--reproduce'
+9.1 Performance of '--reproduce'
================================
Reproduce mode is especially useful when recovering a corrupt backup (or a
@@ -942,7 +1527,6 @@ for a different version of the software.
Member reproduced successfully.
Copy of input file reproduced successfully.
-
Example 2: Recover a damaged backup with a zeroed sector of 4096 bytes at
file position 1019904, using as reference a previous backup. The damaged
backup comes from a damaged partition copied with ddrescue.
@@ -964,7 +1548,6 @@ backup comes from a damaged partition copied with ddrescue.
Member reproduced successfully.
Copy of input file reproduced successfully.
-
Example 3: Recover a damaged backup with a zeroed sector of 4096 bytes at
file position 1019904, using as reference a file from the filesystem. (If
the zeroed sector encodes (part of) a tar header, the tarball can't be
@@ -1000,8 +1583,8 @@ has been renamed.

File: lziprecover.info, Node: Tarlz, Next: File names, Prev: Reproducing one sector, Up: Top
-7 Options supporting the tar.lz format
-**************************************
+10 Options supporting the tar.lz format
+***************************************
Tarlz is a massively parallel (multi-threaded) combined implementation of
the tar archiver and the lzip compressor.
@@ -1021,9 +1604,8 @@ alignment between tar members and lzip members minimizes the amount of data
lost in case of corruption. In this chapter we'll explain the ways in which
lziprecover can recover and process multimember tar.lz archives.
-
-7.1 Recovering damaged multimember tar.lz archives
-==================================================
+10.1 Recovering damaged multimember tar.lz archives
+===================================================
If you have several copies of the damaged archive, try merging them first
because merging has a high probability of success. *Note Merging files::. If
@@ -1046,7 +1628,7 @@ one byte::. If the command below prints something like
'Copy of input file repaired successfully.' you are done and
'archive_fixed.tar.lz' now contains the recovered archive:
- lziprecover -v -R archive.tar.lz
+ lziprecover -v --byte-repair archive.tar.lz
If all the above fails, and the archive was created with tarlz, you may
save the damaged members for later and then copy the good members to another
@@ -1064,9 +1646,8 @@ possible from each damaged member in 'bad_members.tar.lz':
cd tmp
tarlz --keep-damaged -xvf ../bad_members.tar.lz
-
-7.2 Processing multimember tar.lz archives
-==========================================
+10.2 Processing multimember tar.lz archives
+===========================================
Lziprecover is able to copy a list of members from a file to another. For
example the command
@@ -1077,96 +1658,25 @@ the last member, which in an appendable tar.lz archive contains the
end-of-file blocks.

-File: lziprecover.info, Node: File names, Next: File format, Prev: Tarlz, Up: Top
+File: lziprecover.info, Node: File names, Next: Trailing data, Prev: Tarlz, Up: Top
-8 Names of the files produced by lziprecover
-********************************************
+11 Names of the files produced by lziprecover
+*********************************************
The name of the fixed file produced by '--byte-repair' and '--merge' is
made by appending the string '_fixed.lz' to the original file name. If the
original file name ends with one of the extensions '.tar.lz', '.lz', or
'.tlz', the string '_fixed' is inserted before the extension.
-
-File: lziprecover.info, Node: File format, Next: Trailing data, Prev: File names, Up: Top
-
-9 File format
-*************
-
-Perfection is reached, not when there is no longer anything to add, but
-when there is no longer anything to take away.
--- Antoine de Saint-Exupery
-
-
- In the diagram below, a box like this:
-
-+---+
-| | <-- the vertical bars might be missing
-+---+
-
- represents one byte; a box like this:
-
-+==============+
-| |
-+==============+
-
- represents a variable number of bytes.
-
-
- A lzip file consists of one or more independent "members" (compressed
-data sets). The members simply appear one after another in the file, with no
-additional information before, between, or after them. Each member can
-encode in compressed form up to 16 EiB - 1 byte of uncompressed data. The
-size of a multimember file is unlimited.
-
- Each member has the following structure:
-
-+--+--+--+--+----+----+=============+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-| ID string | VN | DS | LZMA stream | CRC32 | Data size | Member size |
-+--+--+--+--+----+----+=============+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-
- All multibyte values are stored in little endian order.
-
-'ID string (the "magic" bytes)'
- A four byte string, identifying the lzip format, with the value "LZIP"
- (0x4C, 0x5A, 0x49, 0x50).
-
-'VN (version number, 1 byte)'
- Just in case something needs to be modified in the future. 1 for now.
-
-'DS (coded dictionary size, 1 byte)'
- The dictionary size is calculated by taking a power of 2 (the base
- size) and subtracting from it a fraction between 0/16 and 7/16 of the
- base size.
- Bits 4-0 contain the base 2 logarithm of the base size (12 to 29).
- Bits 7-5 contain the numerator of the fraction (0 to 7) to subtract
- from the base size to obtain the dictionary size.
- Example: 0xD3 = 2^19 - 6 * 2^15 = 512 KiB - 6 * 32 KiB = 320 KiB
- Valid values for dictionary size range from 4 KiB to 512 MiB.
-
-'LZMA stream'
- The LZMA stream, finished by an "End Of Stream" marker. Uses default
- values for encoder properties. *Note Stream format: (lzip)Stream
- format, for a complete description.
-
-'CRC32 (4 bytes)'
- Cyclic Redundancy Check (CRC) of the original uncompressed data.
-
-'Data size (8 bytes)'
- Size of the original uncompressed data.
-
-'Member size (8 bytes)'
- Total size of the member, including header and trailer. This field acts
- as a distributed index, improves the checking of stream integrity, and
- facilitates the safe recovery of undamaged members from multimember
- files. Lzip limits the member size to 2 PiB to prevent the data size
- field from overflowing.
-
+ The name of the fixed file produced by '--fec=repair' is made by
+appending the string '_fixed' to the original file name. If the original
+file name ends with one of the extensions '.tar.lz', '.lz', or '.tlz', the
+string '_fixed' is inserted before the extension.

-File: lziprecover.info, Node: Trailing data, Next: Examples, Prev: File format, Up: Top
+File: lziprecover.info, Node: Trailing data, Next: Examples, Prev: File names, Up: Top
-10 Extra data appended to the file
+12 Extra data appended to the file
**********************************
Sometimes extra data are found appended to a lzip file after the last
@@ -1176,7 +1686,7 @@ member. Such trailing data may be:
example when writing to a tape. It is safe to append any amount of
padding zero bytes to a lzip file.
- * Useful data added by the user; an "End Of File" string (to check that
+ * Useful data added by the user; an 'End Of File' string (to check that
the file has not been truncated), a cryptographically secure hash, a
description of file contents, etc. It is safe to append any amount of
text to a lzip file as long as none of the first four bytes of the
@@ -1223,7 +1733,6 @@ Example 1: Add a comment or description to a compressed file.
# This command removes the comment from file.lz
lziprecover --remove=tdata file.lz
-
Example 2: Add and check a cryptographically secure hash. (This may be
convenient, but a separate copy of the hash must be kept in a safe place to
guarantee that both file and hash have not been maliciously replaced).
@@ -1235,7 +1744,7 @@ guarantee that both file and hash have not been maliciously replaced).

File: lziprecover.info, Node: Examples, Next: Unzcrash, Prev: Trailing data, Up: Top
-11 A small tutorial with examples
+13 A small tutorial with examples
*********************************
Example 1: Extract all the files from archive 'foo.tar.lz'.
@@ -1244,19 +1753,16 @@ Example 1: Extract all the files from archive 'foo.tar.lz'.
or
lziprecover -cd foo.tar.lz | tar -xf -
-
Example 2: Restore a regular file from its compressed version 'file.lz'. If
the operation is successful, 'file.lz' is removed.
lziprecover -d file.lz
-
Example 3: Check the integrity of the compressed file 'file.lz' and show
status.
lziprecover -tv file.lz
-
Example 4: The right way of concatenating the decompressed output of two or
more compressed files. *Note Trailing data::.
@@ -1269,29 +1775,25 @@ more compressed files. *Note Trailing data::.
Or keeping the trailing data of the last file like this
lziprecover --strip=empty file1.lz file2.lz file3.lz > file123.lz
-
Example 5: Decompress 'file.lz' partially until 10 KiB of decompressed data
are produced.
lziprecover -D 0,10KiB file.lz
-
Example 6: Decompress 'file.lz' partially from decompressed byte at offset
10000 to decompressed byte at offset 14999 (5000 bytes are produced).
lziprecover -D 10000-15000 file.lz
-
Example 7: Repair a corrupt byte in the file 'file.lz'. (Indented lines are
abridged diagnostic messages from lziprecover).
- lziprecover -v -R file.lz
+ lziprecover -v --byte-repair file.lz
Copy of input file repaired successfully.
lziprecover -tv file_fixed.lz
file_fixed.lz: ok
mv file_fixed.lz file.lz
-
Example 8: Split the multimember file 'file.lz' and write each member in
its own 'recXXXfile.lz' file. Then use 'lziprecover -t' to test the
integrity of the resulting files.
@@ -1302,7 +1804,7 @@ integrity of the resulting files.

File: lziprecover.info, Node: Unzcrash, Next: Problems, Prev: Examples, Up: Top
-12 Testing the robustness of decompressors
+14 Testing the robustness of decompressors
******************************************
*Note --unzcrash::, for a faster way of testing the robustness of lzip.
@@ -1358,7 +1860,7 @@ without being decompressed first. Use '--zcmp=false' to disable comparisons.
The compressed FILE must not contain errors and the decompressor being
tested must decompress it correctly for the comparisons to work.
- unzcrash supports the following options:
+unzcrash supports the following options:
'-h'
'--help'
@@ -1379,6 +1881,7 @@ tested must decompress it correctly for the comparisons to work.
8 28 56 70 56 28 8 1
Examples of RANGE Tests errors of N-bits
+ -------------------------------------------
1 1
1,2,3 1, 2, 3
2-4 2, 3, 4
@@ -1387,10 +1890,11 @@ tested must decompress it correctly for the comparisons to work.
'-B[SIZE][,VALUE]'
'--block[=SIZE][,VALUE]'
- Test block errors of given SIZE, simulating a whole sector I/O error.
- SIZE defaults to 512 bytes. VALUE defaults to 0. By default, only
- contiguous, non-overlapping blocks are tested, but this may be changed
- with the option '--delta'.
+ Test block errors of given SIZE, simulating a whole sector I/O error
+ by setting all the bytes in the block to VALUE before attempting
+ decompression. SIZE defaults to 512 bytes. VALUE defaults to 0. By
+ default, only contiguous, non-overlapping blocks are tested, but this
+ may be changed with the option '--delta'.
'-d N'
'--delta=N'
@@ -1456,7 +1960,7 @@ bug) which caused unzcrash to panic.

File: lziprecover.info, Node: Problems, Next: Concept index, Prev: Unzcrash, Up: Top
-13 Reporting bugs
+15 Reporting bugs
*****************
There are probably bugs in lziprecover. There are certainly errors and
@@ -1477,56 +1981,78 @@ Concept index
* Menu:
-* bugs: Problems. (line 6)
-* data safety: Data safety. (line 6)
-* examples: Examples. (line 6)
-* file format: File format. (line 6)
-* file names: File names. (line 6)
-* getting help: Problems. (line 6)
-* introduction: Introduction. (line 6)
-* invoking: Invoking lziprecover. (line 6)
-* merging files: Merging files. (line 6)
-* merging with a backup: Merging with a backup. (line 6)
-* options: Invoking lziprecover. (line 6)
-* repairing one byte: Repairing one byte. (line 6)
-* reproducing a mailbox: Reproducing a mailbox. (line 6)
-* reproducing one sector: Reproducing one sector. (line 6)
-* tarlz: Tarlz. (line 6)
-* trailing data: Trailing data. (line 6)
-* unzcrash: Unzcrash. (line 6)
-* usage: Invoking lziprecover. (line 6)
-* version: Invoking lziprecover. (line 6)
+* argument syntax: Argument syntax. (line 6)
+* bugs: Problems. (line 6)
+* chksum packet: Fec file format. (line 46)
+* data safety: Data safety. (line 6)
+* examples: Examples. (line 6)
+* fec create: Creating fec files. (line 6)
+* fec file format: Fec file format. (line 6)
+* fec packet: Fec file format. (line 106)
+* fec repair: Repairing with fec files.
+ (line 6)
+* fec test: Testing with fec files. (line 6)
+* file format: File format. (line 6)
+* file names: File names. (line 6)
+* forward error correction: Fec files. (line 6)
+* getting help: Problems. (line 6)
+* introduction: Introduction. (line 6)
+* invoking: Invoking lziprecover. (line 6)
+* merging files: Merging files. (line 6)
+* merging with a backup: Merging with a backup. (line 6)
+* options: Invoking lziprecover. (line 6)
+* Reed-Solomon details: Implementation details. (line 6)
+* Reed-Solomon tutorial: How Reed-Solomon works. (line 6)
+* repairing one byte: Repairing one byte. (line 6)
+* reproducing a mailbox: Reproducing a mailbox. (line 6)
+* reproducing one sector: Reproducing one sector. (line 6)
+* tarlz: Tarlz. (line 6)
+* trailing data: Trailing data. (line 6)
+* unzcrash: Unzcrash. (line 6)
+* usage: Invoking lziprecover. (line 6)
+* version: Invoking lziprecover. (line 6)

Tag Table:
Node: Top226
-Node: Introduction1406
-Node: Invoking lziprecover5412
-Ref: --trailing-error6359
-Ref: range-format8791
-Ref: --reproduce9126
-Ref: --byte-repair13411
-Ref: --unzcrash23209
-Node: Data safety27459
-Node: Merging with a backup29443
-Node: Reproducing a mailbox30706
-Node: Repairing one byte33160
-Node: Merging files35220
-Ref: performance-of-merge36399
-Ref: ddrescue-example38008
-Node: Reproducing one sector39295
-Ref: performance-of-reproduce43181
-Ref: ddrescue-example245855
-Node: Tarlz48275
-Node: File names51933
-Node: File format52395
-Node: Trailing data55082
-Node: Examples58397
-Ref: concat-example58972
-Node: Unzcrash60364
-Node: Problems66704
-Node: Concept index67256
+Node: Introduction1529
+Node: Invoking lziprecover5451
+Ref: --trailing-error6372
+Ref: --byte-repair7466
+Ref: range-format9547
+Ref: --reproduce9792
+Ref: --unzcrash27483
+Node: Argument syntax32076
+Node: File format33987
+Node: Data safety36741
+Node: Merging with a backup38960
+Node: Reproducing a mailbox40223
+Node: Fec files42677
+Node: How Reed-Solomon works44943
+Node: Implementation details47114
+Node: Creating fec files49179
+Node: Testing with fec files50023
+Node: Repairing with fec files50978
+Ref: ddrescue-example51796
+Node: Fec file format52306
+Ref: fbs53073
+Node: Repairing one byte57094
+Node: Merging files59186
+Ref: performance-of-merge60365
+Ref: ddrescue-example261973
+Node: Reproducing one sector63189
+Ref: performance-of-reproduce67126
+Ref: ddrescue-example369799
+Node: Tarlz72218
+Node: File names75891
+Node: Trailing data76624
+Node: Examples79937
+Ref: concat-example80509
+Node: Unzcrash81908
+Ref: --set-byte86520
+Node: Problems88378
+Node: Concept index88930

End Tag Table
diff --git a/doc/lziprecover.texi b/doc/lziprecover.texi
index 0d32d9d..41a9011 100644
--- a/doc/lziprecover.texi
+++ b/doc/lziprecover.texi
@@ -6,8 +6,8 @@
@finalout
@c %**end of header
-@set UPDATED 20 January 2024
-@set VERSION 1.24
+@set UPDATED 8 January 2025
+@set VERSION 1.25
@dircategory Compression
@direntry
@@ -38,13 +38,15 @@ This manual is for Lziprecover (version @value{VERSION}, @value{UPDATED}).
@menu
* Introduction:: Purpose and features of lziprecover
* Invoking lziprecover:: Command-line interface
+* Argument syntax:: By convention, options start with a hyphen
+* File format:: Detailed format of the compressed file
* Data safety:: Protecting data from accidental loss
+* Fec files:: Forward Error Correction
* Repairing one byte:: Fixing bit flips and similar errors
* Merging files:: Fixing several damaged copies
* Reproducing one sector:: Fixing a missing (zeroed) sector
* Tarlz:: Options supporting the tar.lz format
* File names:: Names of the files produced by lziprecover
-* File format:: Detailed format of the compressed file
* Trailing data:: Extra data appended to the file
* Examples:: A small tutorial with examples
* Unzcrash:: Testing the robustness of decompressors
@@ -53,7 +55,7 @@ This manual is for Lziprecover (version @value{VERSION}, @value{UPDATED}).
@end menu
@sp 1
-Copyright @copyright{} 2009-2024 Antonio Diaz Diaz.
+Copyright @copyright{} 2009-2025 Antonio Diaz Diaz.
This manual is free documentation: you have unlimited permission to copy,
distribute, and modify it.
@@ -66,11 +68,14 @@ distribute, and modify it.
@uref{http://www.nongnu.org/lzip/lziprecover.html,,Lziprecover}
is a data recovery tool and decompressor for files in the lzip
-compressed data format (.lz). Lziprecover is able to repair slightly damaged
-files (up to one single-byte error per member), produce a correct file by
-merging the good parts of two or more damaged copies, reproduce a missing
-(zeroed) sector using a reference file, extract data from damaged files,
-decompress files, and test integrity of files.
+compressed data format (.lz). Lziprecover also provides Forward Error
+Correction (FEC) able to repair any kind of file.
+
+Lziprecover is able to repair slightly damaged lzip files (up to one
+single-byte error per member), produce a correct file by merging the good
+parts of two or more damaged copies, reproduce a missing (zeroed) sector
+using a reference file, extract data from damaged files, decompress files,
+and test integrity of files.
Lziprecover can remove the damaged members from multimember files, for
example multimember tar.lz archives.
@@ -78,35 +83,19 @@ example multimember tar.lz archives.
Lziprecover provides random access to the data in multimember files; it only
decompresses the members containing the desired data.
-Lziprecover facilitates the management of metadata stored as trailing data
-in lzip files.
-
Lziprecover is not a replacement for regular backups, but a last line of
defense for the case where the backups are also damaged.
-The lzip file format is designed for data sharing and long-term archiving,
-taking into account both data integrity and decoder availability:
-
-@itemize @bullet
-@item
-The lzip format provides very safe integrity checking and some data
-recovery means. The program lziprecover can repair bit flip errors
-(one of the most common forms of data corruption) in lzip files, and
-provides data recovery capabilities, including error-checked merging
-of damaged copies of a file. @xref{Data safety}.
-
-@item
-The lzip format is as simple as possible (but not simpler). The lzip
-manual provides the source code of a simple decompressor along with a
-detailed explanation of how it works, so that with the only help of the
-lzip manual it would be possible for a digital archaeologist to extract
-the data from a lzip file long after quantum computers eventually
-render LZMA obsolete.
-
-@item
-Additionally the lzip reference implementation is copylefted, which
-guarantees that it will remain free forever.
-@end itemize
+Lziprecover is able to provide unique data recovery capabilities because the
+lzip format is extraordinarily safe. The simple and safe design of the file
+format complements the embedded error detection provided by the LZMA data
+stream. Any distance larger than the dictionary size acts as a forbidden
+symbol, allowing the decompressor to detect the approximate position of
+errors, and leaving little work for the check sequence (CRC and data sizes)
+in the detection of errors. Lzip is usually able to detect all possible bit
+flips in the compressed data without resorting to the check sequence. It
+would be difficult to write an automatic recovery tool like lziprecover for
+the gzip format. And, as far as I know, it has never been written.
A nice feature of the lzip format is that a corrupt byte is easier to repair
the nearer it is from the beginning of the file. Therefore, with the help of
@@ -124,10 +113,19 @@ Lziprecover is able to recover or decompress files produced by any of the
compressors in the lzip family: lzip, plzip, minilzip/lzlib, clzip, and
pdlzip.
-If the cause of file corruption is a damaged medium, the combination
-@w{GNU ddrescue + lziprecover} is the recommended option for recovering data
-from damaged lzip files. @xref{ddrescue-example}, and
-@ref{ddrescue-example2}, for examples.
+GNU ddrescue provides data recovery capabilities which nicely complement
+those of lziprecover. If the cause of file corruption is a damaged medium,
+the combination @w{GNU ddrescue + lziprecover} is the recommended option for
+recovering data from damaged files. @xref{ddrescue-example},
+@ref{ddrescue-example2}, and @ref{ddrescue-example3}, for examples.
+@ifnothtml
+@xref{Top,GNU ddrescue manual,,ddrescue},
+@end ifnothtml
+@ifhtml
+See the
+@uref{http://www.gnu.org/software/ddrescue/manual/ddrescue_manual.html,,ddrescue manual}
+@end ifhtml
+for details about ddrescue.
If a file is too damaged for lziprecover to repair it, all the recoverable
data in all members of the file can be extracted with the following command
@@ -172,11 +170,8 @@ names are specified, lziprecover decompresses from standard input to
standard output. Remember to prepend @file{./} to any file name beginning
with a hyphen, or use @samp{--}.
-lziprecover supports the following
-@uref{http://www.nongnu.org/arg-parser/manual/arg_parser_manual.html#Argument-syntax,,options}:
-@ifnothtml
-@xref{Argument syntax,,,arg_parser}.
-@end ifnothtml
+@noindent
+lziprecover supports the following options: @xref{Argument syntax}.
@table @code
@item -h
@@ -197,12 +192,12 @@ garbage that can be safely ignored. @xref{concat-example}.
@item -A
@itemx --alone-to-lz
-Convert lzma-alone files to lzip format without recompressing, just
-adding a lzip header and trailer. The conversion minimizes the
-dictionary size of the resulting file (and therefore the amount of
-memory required to decompress it). Only streamed files with default LZMA
-properties can be converted; non-streamed lzma-alone files lack the "End
-Of Stream" marker required in lzip files.
+Convert lzma-alone files to lzip format without recompressing, just adding a
+lzip header and trailer. The conversion minimizes the dictionary size of the
+resulting file (and therefore the amount of memory required to decompress
+it). Only streamed files with default LZMA properties can be converted;
+non-streamed lzma-alone files lack the 'End Of Stream' marker required in
+lzip files.
The name of the converted lzip file is derived from that of the original
lzma-alone file as follows:
@@ -213,6 +208,20 @@ lzma-alone file as follows:
@item anyothername @tab becomes @tab anyothername.lz
@end multitable
+@item -b @var{bytes}
+@itemx --block-size=@var{bytes}
+When creating fec files, make the FEC block size a multiple of @var{bytes},
+which must be a multiple of 512 not larger than @w{1 GiB}.
+
+@anchor{--byte-repair}
+@item -B
+@itemx --byte-repair
+Try to repair a @var{file} with small errors (up to one single-byte error
+per member). If successful, a repaired copy is written to the file
+@var{file}_fixed.lz. @var{file} is not modified at all. The exit status is 0
+if the file could be repaired, 2 otherwise. @xref{Repairing one byte}, for a
+complete description of the byte-repair mode.
+
@item -c
@itemx --stdout
Write decompressed data to standard output; keep input files unchanged. This
@@ -230,24 +239,27 @@ already exists and @option{--force} has not been specified, lziprecover
continues decompressing the rest of the files and exits with error status 1.
If a file fails to decompress, or is a terminal, lziprecover exits
immediately with error status 2 without decompressing the rest of the files.
-A terminal is considered an uncompressed file, and therefore invalid.
+A terminal is considered an uncompressed file, and therefore invalid. A
+multimember file with one or more empty members is accepted if redirected to
+standard input or if '-i' is given.
@item -D @var{range}
@itemx --range-decompress=@var{range}
Decompress only a range of bytes starting at decompressed byte position
@var{begin} and up to byte position @w{@var{end} - 1}. Byte positions start
-at 0. This option provides random access to the data in multimember files;
-it only decompresses the members containing the desired data. In order to
-guarantee the correctness of the data produced, all members containing any
-part of the desired data are decompressed and their integrity is checked.
+at 0. The bytes produced are sent to standard output unless the option
+@option{-o} is used. This option provides random access to the data in
+multimember files; it only decompresses the members containing the desired
+data. In order to guarantee the correctness of the data produced, all
+members containing any part of the desired data are decompressed and their
+integrity is checked.
@anchor{range-format}
Four formats of @var{range} are recognized, @samp{@var{begin}},
@samp{@var{begin}-@var{end}}, @samp{@var{begin},@var{size}}, and
@samp{,@var{size}}. If only @var{begin} is specified, @var{end} is taken as
the end of the file. If only @var{size} is specified, @var{begin} is taken
-as the beginning of the file. The bytes produced are sent to standard output
-unless the option @option{--output} is used.
+as the beginning of the file.
@anchor{--reproduce}
@item -e
@@ -280,27 +292,71 @@ sector, plus some context data before and after them.
@itemx --force
Force overwrite of output files.
+@item -F create[@var{n}]|repair|test|list
+@itemx --fec=create[@var{n}]|repair|test|list
+Create fec files, or repair or test files using previously created fec
+files, or list the contents of fec files. The argument (create, repair,
+test, or list) can be abbreviated even to a single letter. Option
+@option{-i} is required to repair or test a file using a corrupt fec file,
+or to list a corrupt fec file. @xref{Fec files}.
+
+@var{n} is the number of FEC blocks to be created. The amount of FEC data to
+be created may also be specified as a percentage from 0.003% to 100%, or as
+a number of bytes followed by a @samp{B} (4096B, 16KiB, etc). If @var{n} is
+not specified, it defaults to @samp{8} (8 FEC blocks). (Because, when was
+the last time you saw more than 8 bad sectors affecting the same file?)
+
+@option{--fec=create} writes the FEC data created to @var{file}.fec unless
+option @option{-c} or @option{-o} is specified. If a fec file can't be
+created, lziprecover exits immediately with error status 1 without trying to
+create the rest of the files.
+
+@option{--fec=repair} and @option{--fec=test} read the FEC data from
+@var{file}.fec unless @option{--fec-file} is specified. @option{--fec=repair}
+writes the repaired file to @var{file}_fixed unless option @option{-c} or
+@option{-o} is specified. @xref{File names}. If a file fails to repair,
+lziprecover exits immediately with error status 2 without repairing the rest
+of the files.
+
+@item -0 .. -9
+FEC fragmentation level. Defaults to @option{-9}. Level @option{-0} is the
+fastest; it creates FEC data using GF(2^8), maybe with large blocks. Levels
+@option{-1} to @option{-9} use GF(2^8) or GF(2^16) as required, with
+increasing amounts of smaller blocks.
+
+@item --fec-file=@var{file}[/]
+When repairing or testing, read FEC data from @var{file}. If @var{file} ends
+with a slash, it is interpreted as the name of a directory containing the
+fec file(s).
+
@item -i
@itemx --ignore-errors
+Ignore non-fatal errors.@*
Make @option{--decompress}, @option{--test}, and @option{--range-decompress}
ignore format and data errors and continue decompressing the remaining
members in the file; keep input files unchanged. For example, the commands
@w{@samp{lziprecover -cd -i file.lz > file}} or
@w{@samp{lziprecover -D0 -i file.lz > file}} decompress all the recoverable
-data in all members of @samp{file.lz} without having to split it first. The
+data in all members of @file{file.lz} without having to split it first. The
@w{@samp{-cd -i}} method resyncs to the next member header after each error,
and is immune to some format errors that make @w{@samp{-D0 -i}} fail. The
range decompressed may be smaller than the range requested, because of the
errors. The exit status is set to 0 unless other errors are found (I/O
errors, for example).
+Make @option{--fec=repair} and @option{--fec=test} ignore errors in the fec
+file and return with exit status 0 if the repaired/protected file passes the
+test, even if corrupt packets or trailing garbage are found in the fec file.
+Make @option{--fec=list} ignore errors in the fec files.
+
Make @option{--list}, @option{--dump}, @option{--remove}, and @option{--strip}
ignore format errors. The sizes of the members with errors (especially the
last) may be wrong.
@item -k
@itemx --keep
-Keep (don't delete) input files during decompression.
+Keep (don't delete) input files during decompression or conversion from
+lzma-alone.
@item -l
@itemx --list
@@ -310,9 +366,11 @@ even for multimember files. If more than one file is given, a final line
containing the cumulative sizes is printed. With @option{-v}, the dictionary
size, the number of members in the file, and the amount of trailing data (if
any) are also printed. With @option{-vv}, the positions and sizes of each
-member in multimember files are also printed. With @option{-i}, format errors
-are ignored, and with @option{-ivv}, gaps between members are shown. The
-member numbers shown coincide with the file numbers produced by @option{--split}.
+member in multimember files are also printed. A multimember file with one or
+more empty members is accepted if redirected to standard input or if '-i' is
+given. With @option{-i}, format errors are ignored, and with @option{-ivv},
+gaps between members are shown. The member numbers start at 1 and coincide
+with the file numbers produced by @option{--split}.
If any file is damaged, does not exist, can't be opened, or is not regular,
the final exit status is @w{> 0}. @option{-lq} can be used to check quickly
@@ -328,30 +386,52 @@ damaged copies. If successful, a repaired copy is written to the file
produced, 2 otherwise. @xref{Merging files}, for a complete description of
the merge mode.
-@item -o @var{file}
-@itemx --output=@var{file}
-Place the repaired output into @var{file} instead of into
+@item -n @var{n}
+@itemx --threads=@var{n}
+Set the maximum number of worker threads for @option{--fec=create},
+overriding the system's default. Valid values range from 1 to as many as
+your system can support. If this option is not used, lziprecover tries to
+detect the number of processors in the system and use it as default value.
+@w{@samp{lziprecover --help}} shows the system's default value.
+
+@item -o @var{file}[/]
+@itemx --output=@var{file}[/]
+If repairing, place the repaired output into @var{file} instead of into
@var{file}_fixed.lz. If splitting, the names of the files produced are in
-the form @samp{rec01@var{file}}, @samp{rec02@var{file}}, etc.
-
-If @option{-c} has not been also specified, write the (de)compressed output
-to @var{file}, automatically creating any missing parent directories; keep
-input files unchanged. This option (or @option{-c}) is needed when reading
-from a named pipe (fifo) or from a device. @w{@option{-o -}} is equivalent
-to @option{-c}. @option{-o} has no effect when testing or listing.
+the form @file{rec1@var{file}}, @file{rec2@var{file}}, etc.
+
+If creating FEC data and @option{-c} has not been also specified, write the
+FEC data to @var{file}. If @var{file} ends with a slash, it is interpreted
+as the name of a directory where the fec file(s) will be written to. In this
+case, the fec file names are composed by replacing the prefix preceding the
+last slash of each file name specified in the command line with @var{file}
+(or prepending @var{file} if the file name does not contain a slash), and
+appending the extension @file{.fec}.
+
+Else, if @option{-c} has not been also specified, write the (de)compressed
+output to @var{file}, automatically creating any missing parent directories;
+keep input files unchanged. This option (or @option{-c}) is needed when
+reading from a named pipe (fifo) or from a device. @w{@option{-o -}} is
+equivalent to @option{-c}. @option{-o} has no effect when testing or listing.
@item -q
@itemx --quiet
Quiet operation. Suppress all messages.
-@anchor{--byte-repair}
+@item -r
+@itemx --recursive
+When creating or reading fec files (but not when listing), for each directory
+operand, read and process all files in that directory, recursively. Follow
+symbolic links given in the command line, but skip symbolic links that are
+encountered recursively. Ignore files and directories named @file{fec} or
+@file{*[-._]fec}.
+
@item -R
-@itemx --byte-repair
-Try to repair a @var{file} with small errors (up to one single-byte error
-per member). If successful, a repaired copy is written to the file
-@var{file}_fixed.lz. @var{file} is not modified at all. The exit status is 0
-if the file could be repaired, 2 otherwise. @xref{Repairing one byte}, for a
-complete description of the repair mode.
+@itemx --dereference-recursive
+When creating or reading fec files (but not when listing), for each directory
+operand, read and process all files in that directory, recursively,
+following all symbolic links. Ignore files and directories named @file{fec}
+or @file{*[-._]fec}.
@item -s
@itemx --split
@@ -365,8 +445,8 @@ members with corrupt headers or trailers. If other lziprecover functions
fail to work on a multimember @var{file} because of damage in headers or
trailers, try to split @var{file} and then work on each member individually.
-The names of the files produced are in the form @samp{rec01@var{file}},
-@samp{rec02@var{file}}, etc, and are designed so that the use of wildcards
+The names of the files produced are in the form @file{rec1@var{file}},
+@file{rec2@var{file}}, etc, and are designed so that the use of wildcards
in subsequent processing, for example,
@w{@samp{lziprecover -cd rec*@var{file} > recovered_data}}, processes the
files in the correct order. The number of digits used in the names varies
@@ -380,25 +460,25 @@ together with @option{-v} to see information about the files. If a file
fails the test, does not exist, can't be opened, or is a terminal, lziprecover
continues testing the rest of the files. A final diagnostic is shown at
verbosity level 1 or higher if any file fails the test when testing multiple
-files.
+files. A multimember file with one or more empty members is accepted if
+redirected to standard input or if '-i' is given.
@item -v
@itemx --verbose
Verbose mode.@*
-When decompressing or testing, further -v's (up to 4) increase the
-verbosity level, showing status, compression ratio, dictionary size,
-trailer contents (CRC, data size, member size), and up to 6 bytes of
-trailing data (if any) both in hexadecimal and as a string of printable
-ASCII characters.@*
+When decompressing or testing, further -v's (up to 4) increase the verbosity
+level, showing status, compression ratio, dictionary size, trailer contents
+(CRC, data size, member size), and up to 6 bytes of trailing data (if any)
+both in hexadecimal and as a string of printable ASCII characters.@*
Two or more @option{-v} options show the progress of decompression.@*
-In other modes, increasing verbosity levels show final status, progress
-of operations, and extra information (for example, the failed areas).
+In other modes, increasing verbosity levels show final status, progress of
+operations, and extra information (for example, the failed areas).
@item --dump=[@var{member_list}][:damaged][:empty][:tdata]
Dump the members listed, the damaged members (if any), the empty members (if
any), or the trailing data (if any) of one or more regular multimember files
-to standard output, or to a file if the option @option{--output} is used. If
-more than one file is given, the elements dumped from all the files are
+to standard output, or to a file if the option @option{-o} is used. If more
+than one file is given, the elements dumped from all the files are
concatenated. If a file does not exist, can't be opened, or is not regular,
lziprecover continues processing the rest of the files. If the dump fails in
one file, lziprecover exits immediately without processing the rest of the
@@ -455,7 +535,7 @@ attempting the removal of trailing data.
@item --strip=[@var{member_list}][:damaged][:empty][:tdata]
Copy one or more regular multimember files to standard output (or to a file
-if the option @option{--output} is used), stripping the members listed, the
+if the option @option{-o} is used), stripping the members listed, the
damaged members (if any), the empty members (if any), or the trailing data
(if any) from each file. If all members in a file are selected to be
stripped, the trailing data (if any) are also stripped even if @samp{tdata}
@@ -467,32 +547,21 @@ the rest of the files. If a file fails to copy, lziprecover exits
immediately without processing the rest of the files. See @option{--dump}
above for a description of the argument.
-@item --empty-error
-Exit with error status 2 if any empty member is found in the input files.
-
-@item --marking-error
-Exit with error status 2 if the first LZMA byte is non-zero in any member of
-the input files. This may be caused by data corruption or by deliberate
-insertion of tracking information in the file. Use
-@w{@samp{lziprecover --clear-marking}} to clear any such non-zero bytes.
-
@item --loose-trailing
When decompressing, testing, or listing, allow trailing data whose first
bytes are so similar to the magic bytes of a lzip header that they can
be confused with a corrupt header. Use this option if a file triggers a
-"corrupt header" error and the cause is not indeed a corrupt header.
+'corrupt header' error and the cause is not indeed a corrupt header.
-@item --clear-marking
-Set to zero the first LZMA byte of each member in the files specified. At
-verbosity level 1 (-v), print the number of members cleared. The date of
-each file modified is preserved if possible. This option exists because the
-first byte of the LZMA stream is ignored by the range decoder, and can
-therefore be (mis)used to store any value which can then be used as a
-watermark to track the path of the compressed payload.
+@item --nonzero-repair
+Repair in place a nonzero first LZMA byte in the files specified. With
+@option{-v}, print the number of members repaired. The date of each file
+modified is preserved if possible.
@end table
-Lziprecover also supports the following debug options (for experts):
+@noindent
+lziprecover also supports the following debug options (for experts):
@table @code
@item -E @var{range}[,@var{sector_size}]
@@ -505,6 +574,24 @@ sequence and try to reproduce the file, printing to standard output final
statistics of the number of sectors reproduced successfully. Exit with
nonzero status only in case of fatal error.
+@item -F dc@var{n}
+@itemx --fec=dc@var{n}
+Simulate FEC repair of all combinations of @var{n} zeroed block errors
+spread along the whole input file.
+
+@item -F dz@var{range}[:@var{range}]...
+@itemx --fec=dz@var{range}[:@var{range}]...
+Simulate FEC repair of one or more zeroed block(s) in the input file at the
+@var{range}s given. The @var{range}s may be unordered and overlapping.
+Lziprecover sorts and joins them as needed. @xref{range-format}, for a
+description of @var{range}.
+
+@item -F dZ@var{size}[,@var{delta}]
+@itemx --fec=dZ@var{size}[,@var{delta}]
+Simulate FEC repair of all possible zeroed blocks of size @var{size} in the
+input file. @var{delta} defaults to @var{size}. Values of @var{delta}
+smaller than @var{size} result in overlapping blocks.
+
@item -M
@itemx --md5sum
Print to standard output the MD5 digests of the input @var{files} one per
@@ -556,13 +643,14 @@ Load the compressed @var{file} into memory, set the byte at @var{position}
to @var{value}, and decompress the modified compressed data to standard
output. If the damaged member can be decompressed to the end (just fails
with a CRC mismatch), the members following it are also decompressed.
+@xref{--set-byte}, for a description of @var{value}.
@item -X[@var{position},@var{value}]
@itemx --show-packets[=@var{position},@var{value}]
Load the compressed @var{file} into memory, optionally set the byte at
@var{position} to @var{value}, decompress the modified compressed data
(discarding the output), and print to standard output descriptions of the
-LZMA packets being decoded.
+LZMA packets being decoded. @xref{--set-byte}, for a description of @var{value}.
@item -Y @var{range}
@itemx --debug-delay=@var{range}
@@ -579,6 +667,11 @@ description of @var{range}.
@itemx --debug-byte-repair=@var{position},@var{value}
Load the compressed @var{file} into memory, set the byte at @var{position}
to @var{value}, and then try to repair the byte error. @xref{--byte-repair}.
+@xref{--set-byte}, for a description of @var{value}.
+
+@item --gf16
+Forces the use of GF(2^16) when creating FEC blocks even if the number of
+blocks fits in GF(2^8).
@end table
@@ -589,7 +682,7 @@ and may be followed by a multiplier and an optional @samp{B} for "byte".
Table of SI and binary prefixes (unit multipliers):
@multitable {Prefix} {kilobyte (10^3 = 1000)} {|} {Prefix} {kibibyte (2^10 = 1024)}
-@item Prefix @tab Value @tab | @tab Prefix @tab Value
+@headitem Prefix @tab Value @tab | @tab Prefix @tab Value
@item k @tab kilobyte (10^3 = 1000) @tab | @tab Ki @tab kibibyte (2^10 = 1024)
@item M @tab megabyte (10^6) @tab | @tab Mi @tab mebibyte (2^20)
@item G @tab gigabyte (10^9) @tab | @tab Gi @tab gibibyte (2^30)
@@ -609,6 +702,146 @@ indicate a corrupt or invalid input file, 3 for an internal consistency
error (e.g., bug) which caused lziprecover to panic.
+@node Argument syntax
+@chapter Syntax of command-line arguments
+@cindex argument syntax
+
+POSIX recommends these conventions for command-line arguments.
+
+@itemize @bullet
+@item A command-line argument is an option if it begins with a hyphen
+(@samp{-}).
+
+@item Option names are single alphanumeric characters.
+
+@item Certain options require an argument.
+
+@item An option and its argument may or may not appear as separate tokens.
+(In other words, the whitespace separating them is optional).
+Thus, @w{@option{-o foo}} and @option{-ofoo} are equivalent.
+
+@item One or more options without arguments, followed by at most one option
+that takes an argument, may follow a hyphen in a single token.
+Thus, @option{-abc} is equivalent to @w{@option{-a -b -c}}.
+
+@item Options typically precede other non-option arguments.
+
+@item The argument @samp{--} terminates all options; any following arguments
+are treated as non-option arguments, even if they begin with a hyphen.
+
+@item A token consisting of a single hyphen character is interpreted as an
+ordinary non-option argument. By convention, it is used to specify standard
+input, standard output, or a file named @samp{-}.
+@end itemize
+
+@noindent
+GNU adds @dfn{long options} to these conventions:
+
+@itemize @bullet
+@item A long option consists of two hyphens (@samp{--}) followed by a name
+made of alphanumeric characters and hyphens. Option names are typically one
+to three words long, with hyphens to separate words. Abbreviations can be
+used for the long option names as long as the abbreviations are unique.
+
+@item A long option and its argument may or may not appear as separate
+tokens. In the latter case they must be separated by an equal sign @samp{=}.
+Thus, @w{@option{--foo bar}} and @option{--foo=bar} are equivalent.
+@end itemize
+
+@noindent
+The syntax of options with an optional argument is
+@option{-<short_option><argument>} (without whitespace), or
+@option{--<long_option>=<argument>}.
+
+
+@node File format
+@chapter File format
+@cindex file format
+
+Perfection is reached, not when there is no longer anything to add, but
+when there is no longer anything to take away.@*
+--- Antoine de Saint-Exupery
+
+In the diagram below, a box like this:
+
+@verbatim
++---+
+| | <-- the vertical bars might be missing
++---+
+@end verbatim
+
+represents one byte; a box like this:
+
+@verbatim
++==============+
+| |
++==============+
+@end verbatim
+
+represents a variable number of bytes.
+
+@noindent
+A lzip file consists of one or more independent "members" (compressed data
+sets). The members simply appear one after another in the file, with no
+additional information before, between, or after them. Each member can
+encode in compressed form up to @w{16 EiB - 1 byte} of uncompressed data.
+The size of a multimember file is unlimited. Empty members (data size = 0)
+are not allowed in multimember files.
+
+Each member has the following structure:
+
+@verbatim
++--+--+--+--+----+----+=============+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+| ID string | VN | DS | LZMA stream | CRC32 | Data size | Member size |
++--+--+--+--+----+----+=============+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+@end verbatim
+
+All multibyte values are stored in little endian order.
+
+@table @samp
+@item ID string (the "magic" bytes)
+A four byte string, identifying the lzip format, with the value "LZIP"
+(0x4C, 0x5A, 0x49, 0x50).
+
+@item VN (version number, 1 byte)
+Just in case something needs to be modified in the future. 1 for now.
+
+@item DS (coded dictionary size, 1 byte)
+The dictionary size is calculated by taking a power of 2 (the base size)
+and subtracting from it a fraction between 0/16 and 7/16 of the base size.@*
+Bits 4-0 contain the base 2 logarithm of the base size (12 to 29).@*
+Bits 7-5 contain the numerator of the fraction (0 to 7) to subtract
+from the base size to obtain the dictionary size.@*
+Example: 0xD3 = 2^19 - 6 * 2^15 = 512 KiB - 6 * 32 KiB = 320 KiB@*
+Valid values for dictionary size range from 4 KiB to 512 MiB.
+
+@item LZMA stream
+The LZMA stream, terminated by an 'End Of Stream' marker. Uses default values
+for encoder properties.
+@ifnothtml
+@xref{Stream format,,,lzip},
+@end ifnothtml
+@ifhtml
+See
+@uref{http://www.nongnu.org/lzip/manual/lzip_manual.html#Stream-format,,Stream format}
+@end ifhtml
+for a complete description.
+
+@item CRC32 (4 bytes)
+Cyclic Redundancy Check (CRC) of the original uncompressed data.
+
+@item Data size (8 bytes)
+Size of the original uncompressed data.
+
+@item Member size (8 bytes)
+Total size of the member, including header and trailer. This field acts
+as a distributed index, improves the checking of stream integrity, and
+facilitates the safe recovery of undamaged members from multimember files.
+Lzip limits the member size to @w{2 PiB} to prevent the data size field from
+overflowing.
+@end table
+
+
@node Data safety
@chapter Protecting data from accidental loss
@cindex data safety
@@ -622,26 +855,31 @@ There are 3 main types of data corruption that may cause data loss:
single-byte errors, multibyte errors (generally affecting a whole sector
in a block device), and total device failure.
+The two methods most effective to protect data from accidental loss are
+backup copies and Forward Error Correction (FEC). Both methods can be used
+simultaneously, and both are supported by lziprecover.
+
Lziprecover protects natively against single-byte errors as long as file
integrity is checked frequently enough that a second single-byte error does
not develop in the same member before the first one is repaired.
@xref{Repairing one byte}.
-Lziprecover also protects against multibyte errors if at least one backup
-copy of the file is made (@pxref{Merging files}), or if the error is a
-zeroed sector and the uncompressed data corresponding to the zeroed sector
-are available (@pxref{Reproducing one sector}). If you can choose between
-merging and reproducing, try merging first because it is usually faster,
-easier to use, and has a high probability of success.
+Lziprecover protects against multibyte errors in 3 cases: if a fec file is
+available (@pxref{Fec files}), if at least one backup copy of the file is
+available (@pxref{Merging files}), or if the error is a zeroed sector and
+the uncompressed data corresponding to the zeroed sector are available
+(@pxref{Reproducing one sector}). FEC is best. Else, if you can choose
+between merging and reproducing, try merging first because it is usually
+faster, easier to use, and has a high probability of success.
Lziprecover can't help in case of device failure. The only remedy for total
device failure is storing backup copies in separate media.
-The extraordinary safety of the lzip format allows lziprecover to exploit
-the redundance that occurrs naturally when making compressed backups.
-Lziprecover can recover data that would not be recoverable from files
-compressed in other formats. Let's see two examples of how much better is
-lzip compared with gzip and bzip2 with respect to data safety:
+The extraordinary safety of the lzip format allows lziprecover to use the
+redundance that occurs naturally when making compressed backups. Lziprecover
+can recover data that would not be recoverable from files compressed in
+other formats. See these two examples of the data recovery capabilities
+offered by lziprecover:
@menu
* Merging with a backup:: Recovering a file using a damaged backup
@@ -721,6 +959,427 @@ reproduce. The probability of reproducing a mailbox
identical backups (@pxref{performance-of-merge}).
+@node Fec files
+@chapter Forward Error Correction
+@cindex forward error correction
+
+Forward Error Correction (FEC) is any way of protecting data from corruption
+by creating redundant data that can be used later to repair errors in the
+protected data. Lziprecover uses a Hilbert-based Reed-Solomon code to create
+one fec file (with extension @file{.fec}) for each file that needs to be
+protected. The fec files created by lziprecover are reproducible.
+
+Reed-Solomon is the most space-efficient Error Correcting Code (ECC) for
+data stored in block devices. It creates redundant FEC blocks in such a way
+that X FEC blocks allow the recuperation of any combination of up to X lost
+data blocks. All the blocks (data and FEC) are of the same size, which in
+fec files must be a multiple of 512 bytes. Reed-Solomon is not optimum for
+corruption affecting random single bits in a file because each corrupt bit
+invalidates the whole block containing it.
+
+Usually, a corrupt file does not provide an indication of where the
+corruption is located. Therefore, each fec file stores one or two arrays of
+CRCs to detect the corrupt blocks in the protected file and mark them as
+erasures (missing data blocks). Thus, a fec file creates its own Binary
+Erasure Channel (BEC) for the protected file.
+
+Lziprecover's FEC algorithm can repair any kind of file, but its ability to
+repair lzip files is greater than for other kinds of files. Lziprecover can
+use the statistical properties of lzip data to repair a lzip file rescued
+with ddrescue, even if the fec file is so damaged that it has lost both CRC
+arrays. Lzip data helps to locate the corrupt parts of the file even without
+a BEC. For this to work, at least one chksum packet header must be intact to
+provide @samp{prodata_size}, @samp{prodata_md5}, and @samp{gf16}.
+
+@menu
+* How Reed-Solomon works:: It is basically an equation system
+* Implementation details:: How lziprecover implements Reed-Solomon
+* Creating fec files:: How to create fec files
+* Testing with fec files:: How to test files using fec files
+* Repairing with fec files:: How to repair files using fec files
+* Fec file format:: Detailed format of the redundant FEC data
+@end menu
+
+
+@node How Reed-Solomon works
+@section How Reed-Solomon works
+@cindex Reed-Solomon tutorial
+
+To illustrate how Reed-Solomon works on the BEC, we will use an example with
+standard arithmetic on integers. Note that in lziprecover's FEC each
+variable is a (potentialy large) block of data, not a single value.
+
+Given variables x, y, and z (the protected data) whose values are known, an
+equation system can be created where the values of three FEC variables p, q,
+and r can be computed from the values of x, y, and z:
+
+@example
+x + y + z = p (1)
+x + 2y + 3z = q (2)
+x + 3y + 2z = r (3)
+@end example
+
+If we have that x = 1, y = 2, and z = 3, then p = 6, q = 14, and r = 13:
+
+@example
+1 + 2 + 3 = 6 (1a)
+1 + 4 + 9 = 14 (2a)
+1 + 6 + 6 = 13 (3a)
+@end example
+
+Now, if the values of x and y are lost because of data corruption, they can
+be recomputed by using any two of the three equations above. For example, if
+we replace the known values of z, p, and q in equations (1) and (2) we get:
+
+@example
+x + y + 3 = 6 (1b)
+x + 2y + 9 = 14 (2b)
+@end example
+
+In order to solve the two equations above, we first reduce them by
+subtracting the values of the known data variables from the values of the
+FEC variables:
+
+@example
+x + y = 6 - 3 (1c)
+x + 2y = 14 - 9 (2c)
+@end example
+
+which gives the reduced FEC values P = 3 and Q = 5.
+
+Then we create a square matrix @samp{A} with the coefficients of x and y in
+the equations above, and invert it. @samp{A} must be invertible and must not
+have any zero element. We also create the column vector D with the missing
+data variables x and y, and the column vector F with the reduced FEC values
+P and Q:
+
+@example
+D = x A = 1 1 A^-1 = 2 -1 F = P
+ y 1 2 -1 1 Q
+@end example
+
+Then we multiply the inverse matrix @samp{A^-1} by the column vector F to
+obtain the values of x and y (D = A^-1 * F):
+
+@example
+x = 2P - Q (1d)
+y = -P + Q (2d)
+@end example
+
+which finally gives us the lost values x = 1 and y = 2:
+
+@example
+x = 2 * 3 - 5 (1e)
+y = -3 + 5 (2e)
+@end example
+
+
+@node Implementation details
+@section How lziprecover implements Reed-Solomon
+@cindex Reed-Solomon details
+
+Lziprecover's implementation of Reed-Solomon can manage up to 128 data
+blocks + 128 FEC blocks when using a Galois Field of size 256 (GF(2^8)), or
+up to 32768 data blocks + 32768 FEC blocks when using a Galois Field of size
+65536 (GF(2^16)). GF(2^8) is included because it is faster for files up to
+about @w{1 MB}. The number of FEC blocks is currently limited to 2048
+because of memory and time limits. Inverting a matrix for 32768 FEC blocks
+would take a week and require @w{2 GiB} of RAM.
+
+The file is repaired in memory. Therefore, enough virtual memory
+@w{(RAM + swap)} to contain the protected file and the FEC data is required.
+The file size is limited to less than @w{2 GiB} on 32-bit systems. The
+repaired file is checked with a MD5 digest.
+
+Lziprecover divides the input file in 1 to 32768 data blocks of the same
+size, which ranges from 512 bytes to @w{128 TiB}, for a total protected file
+size of up to @w{4 EiB}. It then uses a Hilbert matrix @samp{A} to create up
+to 2048 FEC blocks of the same size as the data blocks. Lziprecover corrects
+errors in the data blocks by first reducing the equation system to M
+equations with M unknowns each, where M is the number of missing data
+blocks. Then it multiplies the inverse of the relevant submatrix of @samp{A}
+by the vector of results of the M equations to recompute the values of the
+missing data blocks.
+
+Lziprecover implements GF(2^8) with polynomial 0x11D and GF(2^16) with
+polynomial 0x1100B.
+
+A Hilbert matrix is defined as @w{A[i][j] = 1 / (i + j + 1)} for
+@w{i,j >= 0}. But, as in a Galois Field the addition is the exclusive or
+operation, applying the Hilbert definition produces a singular (non
+invertible) matrix. To avoid this problem, lziprecover uses a Hilbert matrix
+starting at row @w{r0 = gf_size / 2}. I.e., @w{A[i][j] = 1 / (i + j + r0)}
+for @w{0 <= i,j < r0}. (@samp{gf_size} is the size of the Galois Field).
+
+
+@node Creating fec files
+@section How to create fec files
+@cindex fec create
+
+@noindent
+Example 1: Create the fec file @file{archive.tar.lz.fec} and store it in the
+same directory where @file{archive.tar.lz} is.
+
+@example
+lziprecover -v -Fc archive.tar.lz
+@end example
+
+@noindent
+Example 2: Create the fec file @file{archive.tar.lz.fec} and store it in the
+directory @file{fec}.
+
+@example
+lziprecover -v -Fc -o fec/ archive.tar.lz
+@end example
+
+@noindent
+Example 3: Create recursively one fec file for each file in the directory
+@file{datadir} and store them in the tree under the directory @file{fec}.
+
+@example
+lziprecover -v -r -Fc -o fec/ datadir
+@end example
+
+@noindent
+Example 4: Create fec files for a collection of photos stored in directory
+@file{photos} and store them in the directory @file{photos-fec}.
+
+@example
+lziprecover -v -Fc -o photos-fec/ photos/*
+@end example
+
+
+@node Testing with fec files
+@section How to test files using fec files
+@cindex fec test
+
+@noindent
+Example 1: Test the integrity of @file{archive.tar.lz} using the fec file
+@file{archive.tar.lz.fec} from the same directory.
+
+@example
+lziprecover -v -Ft archive.tar.lz
+@end example
+
+@noindent
+Example 2: Test the integrity of the files @file{foo.lz} and @file{bar.lz}
+using the corresponding fec files stored in the directory @file{fec}.
+
+@example
+lziprecover -v -Ft --fec-file=fec/ foo.lz bar.lz
+@end example
+
+@noindent
+Example 3: Test recursively the integrity of all the files in the directory
+@file{datadir} using the fec files stored in the directory tree under the
+directory @file{fec}.
+
+@example
+lziprecover -v -r -Ft --fec-file=fec/ datadir
+@end example
+
+@noindent
+Example 4: Test the integrity of a collection of photos stored in directory
+@file{photos} using fec files from directory @file{photos-fec}.
+
+@example
+lziprecover -v -Ft --fec-file=photos-fec/ photos/*
+@end example
+
+
+@node Repairing with fec files
+@section How to repair files using fec files
+@cindex fec repair
+
+@noindent
+Example 1: Repair the file @file{archive.tar.lz} using the fec file
+@file{archive.tar.lz.fec} from the same directory. The repaired file is
+written to @file{archive_fixed.tar.lz} in the same directory.
+
+@example
+lziprecover -v -Fr archive.tar.lz
+@end example
+
+@noindent
+Example 2: Repair the files @file{foo.lz} and @file{bar.lz} using the
+corresponding fec files stored in the directory @file{fec}.
+
+@example
+lziprecover -v -Fr --fec-file=fec/ foo.lz bar.lz
+@end example
+
+@noindent
+Example 3: Repair recursively all the damaged files in the directory
+@file{datadir} using the fec files stored in the directory tree under the
+directory @file{fec}.
+
+@example
+lziprecover -v -r -Fr --fec-file=fec/ datadir
+@end example
+
+@anchor{ddrescue-example}
+@noindent
+Example 4: Recover a collection of photos from a damaged external drive
+(@file{/dev/sdc1}). The photos are in directory @file{photos}, and the fec
+files are in directory @file{photos-fec}.
+
+@example
+ddrescue -b4096 -r10 /dev/sdc1 hdimage mapfile
+mount -o loop,ro hdimage /mnt/hdimage
+cp -a /mnt/hdimage/photos photos
+cp -a /mnt/hdimage/photos-fec photos-fec
+umount /mnt/hdimage
+lziprecover -v -Fr --fec-file=photos-fec/ photos/*
+ (Check and rename repaired files. They are named @file{photos/*_fixed})
+@end example
+
+
+@node Fec file format
+@section Fec file format
+@cindex fec file format
+
+A fec file consists of one chksum packet, one or more fec packets, and one
+optional second chksum packet. The first chksum packet must be the first
+packet in the file, but the second chksum packet does not need to be the
+last packet in the file. The essential information is stored in the chksum
+packet(s), while the potentially numerous fec packets are kept as simple as
+possible:
+
+@verbatim
++=================+===============+=================+
+| Chksum packet | Fec packets | Chksum packet |
++=================+===============+=================+
+@end verbatim
+
+All multibyte values are stored in little endian order except
+@samp{prodata_md5}.
+
+@anchor{fbs}
+The @samp{fbs} (fec_block_size) field is coded as a little endian 16-bit
+floating point unsigned integer with an 11-bit mantissa at bits 0-10 and a
+5-bit exponent at bits 11-15. The mantissa is an integer between 0 and 2047.
+The exponent is an integer between 9 and 40, stored with a bias of -9; the
+exponent 9 is stored as 0, and 40 is stored as 31. Values are stored with
+the largest mantissa and smallest exponent; 4096 is stored as m=8, e=0. This
+encoding can store values from 0 bytes to @w{2047 TiB} @w{(2^51 - 2^40 bytes)}
+with a maximum resolution of 512 bytes, but 0 and the values beyond
+@w{128 TiB} are not used:
+
+@verbatim
+ 5 11
++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+| exp | mantissa | The 'fbs' (fec_block_size) field
++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+15 11 10 0
+@end verbatim
+
+The fec file format is 4-byte aligned for speed because FEC data are created
+and decoded 4 bytes at a time. The 4-byte alignment has been achieved by a
+careful design, without adding any padding bytes.
+
+The fec file format has an overhead of 8 bytes per protected data block,
+plus 16 bytes per FEC block, plus 80 bytes.
+
+@subsection Chksum packet
+@cindex chksum packet
+
+A chksum packet contains one CRC for each of the N data blocks in the
+protected file, and is structured as shown in the following table. All
+lengths and offsets are in decimal:
+
+@multitable {prodata_size} {36 + 4N} {Length (in bytes)}
+@headitem Field Name @tab Offset @tab Length (in bytes)
+@item magic @tab 0 @tab 4
+@item version @tab 4 @tab 1
+@item flags @tab 5 @tab 1
+@item fbs @tab 6 @tab 2
+@item prodata_size @tab 8 @tab 8
+@item prodata_md5 @tab 16 @tab 16
+@item header_crc @tab 32 @tab 4
+@item crc_array @tab 36 @tab 4N
+@item payload_crc @tab 36 + 4N @tab 4
+@end multitable
+
+@table @samp
+@item magic
+A four byte string identifying the chksum packet (and therefore the fec
+file), with the value 0xB3, 0xA5, 0xB6, 0xAF. (The complement of "LZIP").
+
+@item version
+Just in case something needs to be modified in the future. 0 for now.
+
+@item flags
+Bit 0 (is_crc_c): crc_array contains CRC32 (0) or CRC32-C (1).@*
+Bit 1 (gf16): Galois field is GF(2^8) (0) or GF(2^16) (1).@*
+Bits 2-7: zero.
+
+@item fbs (coded fec_block_size)
+Number of FEC bytes per block. It is a multiple of 512 bytes between 512
+bytes and @w{128 TiB}. @xref{fbs}.
+
+@item prodata_size
+Size of the protected file. 1 byte to @w{4 EiB}.
+
+@item prodata_md5
+Md5sum of the protected file. Stored in big endian order.
+
+@item header_crc
+CRC32 of the previous fields, including magic.
+
+@item crc_array
+Array of @var{n} CRCs corresponding to the @var{n} blocks in which the
+protected file is divided. @var{n} is @w{@samp{ceil( prodata_size / fbs )}}.
+The first chksum packet contains an array of CRC32s, while the second chksum
+packet (if present) contains an array of CRC32-Cs.
+
+For the expected thousands of bit flips caused by a zeroed sector, a
+symmetric CRC like CRC32 is probably better than CRC32-C, which detects all
+the errors with an odd number of bit flips at the expense of a larger number
+of undetected errors with an even number of bit flips.
+
+@item payload_crc
+CRC32 of the crc_array.
+@end table
+
+@subsection Fec packet
+@cindex fec packet
+
+A fec packet contains one FEC block and is structured as shown in the
+following table. All lengths and offsets are in decimal:
+
+@multitable {payload_crc} {12 + fbs} {Length (in bytes)}
+@headitem Field Name @tab Offset @tab Length (in bytes)
+@item magic @tab 0 @tab 4
+@item fbn @tab 4 @tab 2
+@item fbs @tab 6 @tab 2
+@item header_crc @tab 8 @tab 4
+@item fec_block @tab 12 @tab fbs
+@item payload_crc @tab 12 + fbs @tab 4
+@end multitable
+
+@table @samp
+@item magic
+A four byte string identifying the fec packet, with the value "\xB3FEC"
+(0xB3, 0x46, 0x45, 0x43).
+
+@item fbn (fec_block_number)
+Number of this FEC block (0 to 32767). Required to compute the decode matrix.
+
+@item fbs (coded fec_block_size)
+Number of FEC bytes per block. It is a multiple of 512 bytes between 512
+bytes and @w{128 TiB}. @xref{fbs}.
+
+@item header_crc
+CRC32 of the previous fields, including magic.
+
+@item fec_block
+The FEC block.
+
+@item payload_crc
+CRC32 of the fec_block.
+@end table
+
+
@node Repairing one byte
@chapter Repairing one byte
@cindex repairing one byte
@@ -732,11 +1391,12 @@ bit to the original. This makes lzip files resistant to bit flip, one of the
most common forms of data corruption.
The file is repaired in memory. Therefore, enough virtual memory
-@w{(RAM + swap)} to contain the largest damaged member is required.
+@w{(RAM + swap)} to contain the largest damaged member is required. Member
+size is limited to @w{2 GiB} on 32-bit systems.
-The error may be located anywhere in the file except in the first 5
-bytes of each member header or in the @samp{Member size} field of the
-trailer (last 8 bytes of each member). If the error is in the header it
+The error may be located anywhere in the file except in the first 5 bytes of
+each member header (magic and version) or in the @samp{Member size} field of
+the trailer (last 8 bytes of each member). If the error is in the header it
can be easily repaired with a text editor like GNU Moe (@pxref{File
format}). If the error is in the member size, it is enough to ignore the
message about @samp{bad member size} when decompressing.
@@ -749,7 +1409,7 @@ One byte may seem small, but most file corruptions not produced by
transmission errors or I/O errors just affect one byte, or even one bit,
of the file. Also, unlike magnetic media, where errors usually affect a
whole sector, solid-state storage devices tend to produce single-byte
-errors, making of lzip the perfect format for data stored on such devices.
+errors, which lziprecover can repair.
Repairing a file can take some time. Small files or files with the error
located near the beginning can be repaired in a few seconds. But
@@ -803,7 +1463,7 @@ into clusters and then merging the files as if each cluster were a
single error.
Here is a real case of successful merging. Two copies of the file
-@samp{icecat-3.5.3-x86.tar.lz} (compressed size @w{9 MB}) became corrupt
+@file{icecat-3.5.3-x86.tar.lz} (compressed size @w{9 MB}) became corrupt
while stored on the same NAND flash device. One of the copies had 76
single-bit errors scattered in an area of 1020 bytes, and the other had
3028 such errors in an area of 31729 bytes. Lziprecover produced a
@@ -821,19 +1481,10 @@ Note that the number of errors reported by lziprecover (2552) is lower
than the number of corrupt bytes (3104) because contiguous corrupt bytes
are counted as a single multibyte error.
-@sp 1
-@anchor{ddrescue-example}
+@anchor{ddrescue-example2}
@noindent
Example 1: Recover a compressed backup from two copies on CD-ROM with
error-checked merging of copies.
-@ifnothtml
-@xref{Top,GNU ddrescue manual,,ddrescue},
-@end ifnothtml
-@ifhtml
-See the
-@uref{http://www.gnu.org/software/ddrescue/manual/ddrescue_manual.html,,ddrescue manual}
-@end ifhtml
-for details about ddrescue.
@example
ddrescue -d -r1 -b2048 /dev/cdrom cdimage1 mapfile1
@@ -851,14 +1502,13 @@ lziprecover -tv backup.tar.lz
backup.tar.lz: ok
@end example
-@sp 1
@noindent
Example 2: Recover the first volume of those created with the command
@w{@samp{lzip -b 32MiB -S 650MB big_db}} from two copies,
-@samp{big_db1_00001.lz} and @samp{big_db2_00001.lz}, with member 07
+@file{big_db1_00001.lz} and @file{big_db2_00001.lz}, with member 07
damaged in the first copy, member 18 damaged in the second copy, and
member 12 damaged in both copies. The correct file produced is saved in
-@samp{big_db_00001.lz}.
+@file{big_db_00001.lz}.
@example
lziprecover -m -v -o big_db_00001.lz big_db1_00001.lz big_db2_00001.lz
@@ -891,9 +1541,10 @@ reproduction can't be done if the zeroed sector overlaps with the first 15
bytes of a member, or if the zeroed sector is smaller than 8 bytes.
The file is reproduced in memory. Therefore, enough virtual memory
-@w{(RAM + swap)} to contain the damaged member is required.
+@w{(RAM + swap)} to contain the damaged member is required. Member size is
+limited to @w{2 GiB} on 32-bit systems.
-To understand how it works, take any lzipped file, say @samp{foo.lz},
+To understand how it works, take any lzipped file, say @file{foo.lz},
decompress it (keeping the original), and try to reproduce an artificially
zeroed sector in it by running the following commands:
@@ -918,8 +1569,8 @@ Reproduction succeeded at pos 65536
all comparisons passed
@end example
-Using @samp{foo} as reference file guarantees that any zeroed sector in
-@samp{foo.lz} can be reproduced because both files contain the same data. In
+Using @file{foo} as reference file guarantees that any zeroed sector in
+@file{foo.lz} can be reproduced because both files contain the same data. In
real use, the reference file needs to contain the data corresponding to the
zeroed sector, but the rest of the data (if any) may differ between both
files. The reference data may be obtained from the partial decompression of
@@ -951,6 +1602,7 @@ when they are required.
@anchor{performance-of-reproduce}
@section Performance of @option{--reproduce}
+
Reproduce mode is especially useful when recovering a corrupt backup (or a
corrupt source tarball) that is part of a series. Usually only a small
fraction of the data changes from one backup to the next or from one version
@@ -958,8 +1610,8 @@ of a source tarball to the next. This makes sometimes possible to reproduce
a given corrupted version using reference data from a near version. The
following two tables show the fraction of reproducible sectors (reproducible
sectors divided by total sectors in archive) for some archives, using sector
-sizes of 512 and 4096 bytes. @samp{mailbox-aug.tar.lz} is a backup of some
-of my mailboxes. @samp{backup-feb.tar.lz} and @samp{backup-apr.tar.lz} are
+sizes of 512 and 4096 bytes. @file{mailbox-aug.tar.lz} is a backup of some
+of my mailboxes. @file{backup-feb.tar.lz} and @file{backup-apr.tar.lz} are
real backups of my own working directory:
@multitable {Reference file} {gawk-5.0.1.tar.lz} {4369 / 5844 = 74.76%}
@@ -1006,8 +1658,7 @@ Member reproduced successfully.
Copy of input file reproduced successfully.
@end example
-@sp 1
-@anchor{ddrescue-example2}
+@anchor{ddrescue-example3}
@noindent
Example 2: Recover a damaged backup with a zeroed sector of 4096 bytes at
file position 1019904, using as reference a previous backup. The damaged
@@ -1032,7 +1683,6 @@ Member reproduced successfully.
Copy of input file reproduced successfully.
@end example
-@sp 1
@noindent
Example 3: Recover a damaged backup with a zeroed sector of 4096 bytes at
file position 1019904, using as reference a file from the filesystem. (If
@@ -1058,15 +1708,15 @@ Member reproduced successfully.
Copy of input file reproduced successfully.
@end example
-If @samp{backup.tar.lz} is a multimember file with more than one member
+If @file{backup.tar.lz} is a multimember file with more than one member
damaged and lziprecover shows the message @samp{One member reproduced. Copy
of input file still contains errors.}, the procedure shown in the example
above can be repeated until all the members have been reproduced.
@samp{tarlz --keep-damaged -n0 -xf backup.tar.lz example.txt} produces a
-partial copy of the reference file @samp{example.txt} that may help locate a
+partial copy of the reference file @file{example.txt} that may help locate a
complete copy in the filesystem or in another backup, even if
-@samp{example.txt} has been renamed.
+@file{example.txt} has been renamed.
@node Tarlz
@@ -1095,14 +1745,13 @@ alignment between tar members and lzip members minimizes the amount of data
lost in case of corruption. In this chapter we'll explain the ways in which
lziprecover can recover and process multimember tar.lz archives.
-@sp 1
@section Recovering damaged multimember tar.lz archives
If you have several copies of the damaged archive, try merging them first
because merging has a high probability of success. @xref{Merging files}. If
the command below prints something like
@w{@samp{Input files merged successfully.}} you are done and
-@samp{archive.tar.lz} now contains the recovered archive:
+@file{archive.tar.lz} now contains the recovered archive:
@example
lziprecover -m -v -o archive.tar.lz a/archive.tar.lz b/archive.tar.lz
@@ -1112,7 +1761,7 @@ If you only have one copy of the damaged archive with a zeroed block of data
caused by an I/O error, you may try to reproduce the archive.
@xref{Reproducing one sector}. If the command below prints something like
@w{@samp{Copy of input file reproduced successfully.}} you are done and
-@samp{archive_fixed.tar.lz} now contains the recovered archive:
+@file{archive_fixed.tar.lz} now contains the recovered archive:
@example
lziprecover -vv -e --reference-file=old_archive.tar archive.tar.lz
@@ -1122,16 +1771,16 @@ If you only have one copy of the damaged archive, you may try to repair the
archive, but this has a lower probability of success. @xref{Repairing one
byte}. If the command below prints something like
@w{@samp{Copy of input file repaired successfully.}} you are done and
-@samp{archive_fixed.tar.lz} now contains the recovered archive:
+@file{archive_fixed.tar.lz} now contains the recovered archive:
@example
-lziprecover -v -R archive.tar.lz
+lziprecover -v --byte-repair archive.tar.lz
@end example
If all the above fails, and the archive was created with tarlz, you may save
the damaged members for later and then copy the good members to another
-archive. If the two commands below succeed, @samp{bad_members.tar.lz} will
-contain all the damaged members and @samp{archive_cleaned.tar.lz} will
+archive. If the two commands below succeed, @file{bad_members.tar.lz} will
+contain all the damaged members and @file{archive_cleaned.tar.lz} will
contain a good archive with the damaged members removed:
@example
@@ -1140,7 +1789,7 @@ lziprecover -v --strip=damaged -o archive_cleaned.tar.lz archive.tar.lz
@end example
You can then use @samp{tarlz --keep-damaged} to recover as much data as
-possible from each damaged member in @samp{bad_members.tar.lz}:
+possible from each damaged member in @file{bad_members.tar.lz}:
@example
mkdir tmp
@@ -1148,14 +1797,13 @@ cd tmp
tarlz --keep-damaged -xvf ../bad_members.tar.lz
@end example
-@sp 1
@section Processing multimember tar.lz archives
Lziprecover is able to copy a list of members from a file to another.
For example the command
@w{@samp{lziprecover --dump=1-10:r1:tdata archive.tar.lz > subarch.tar.lz}}
creates a subset archive containing the first ten members, the end-of-file
-blocks, and the trailing data (if any) of @samp{archive.tar.lz}. The
+blocks, and the trailing data (if any) of @file{archive.tar.lz}. The
@samp{r1} part selects the last member, which in an appendable tar.lz
archive contains the end-of-file blocks.
@@ -1165,99 +1813,15 @@ archive contains the end-of-file blocks.
@cindex file names
The name of the fixed file produced by @option{--byte-repair} and
-@option{--merge} is made by appending the string @samp{_fixed.lz} to the
+@option{--merge} is made by appending the string @file{_fixed.lz} to the
original file name. If the original file name ends with one of the
-extensions @samp{.tar.lz}, @samp{.lz}, or @samp{.tlz}, the string
-@samp{_fixed} is inserted before the extension.
-
-
-@node File format
-@chapter File format
-@cindex file format
-
-Perfection is reached, not when there is no longer anything to add, but
-when there is no longer anything to take away.@*
---- Antoine de Saint-Exupery
-
-@sp 1
-In the diagram below, a box like this:
-
-@verbatim
-+---+
-| | <-- the vertical bars might be missing
-+---+
-@end verbatim
+extensions @file{.tar.lz}, @file{.lz}, or @file{.tlz}, the string
+@file{_fixed} is inserted before the extension.
-represents one byte; a box like this:
-
-@verbatim
-+==============+
-| |
-+==============+
-@end verbatim
-
-represents a variable number of bytes.
-
-@sp 1
-A lzip file consists of one or more independent "members" (compressed data
-sets). The members simply appear one after another in the file, with no
-additional information before, between, or after them. Each member can
-encode in compressed form up to @w{16 EiB - 1 byte} of uncompressed data.
-The size of a multimember file is unlimited.
-
-Each member has the following structure:
-
-@verbatim
-+--+--+--+--+----+----+=============+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-| ID string | VN | DS | LZMA stream | CRC32 | Data size | Member size |
-+--+--+--+--+----+----+=============+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-@end verbatim
-
-All multibyte values are stored in little endian order.
-
-@table @samp
-@item ID string (the "magic" bytes)
-A four byte string, identifying the lzip format, with the value "LZIP"
-(0x4C, 0x5A, 0x49, 0x50).
-
-@item VN (version number, 1 byte)
-Just in case something needs to be modified in the future. 1 for now.
-
-@item DS (coded dictionary size, 1 byte)
-The dictionary size is calculated by taking a power of 2 (the base size)
-and subtracting from it a fraction between 0/16 and 7/16 of the base size.@*
-Bits 4-0 contain the base 2 logarithm of the base size (12 to 29).@*
-Bits 7-5 contain the numerator of the fraction (0 to 7) to subtract
-from the base size to obtain the dictionary size.@*
-Example: 0xD3 = 2^19 - 6 * 2^15 = 512 KiB - 6 * 32 KiB = 320 KiB@*
-Valid values for dictionary size range from 4 KiB to 512 MiB.
-
-@item LZMA stream
-The LZMA stream, finished by an "End Of Stream" marker. Uses default values
-for encoder properties.
-@ifnothtml
-@xref{Stream format,,,lzip},
-@end ifnothtml
-@ifhtml
-See
-@uref{http://www.nongnu.org/lzip/manual/lzip_manual.html#Stream-format,,Stream format}
-@end ifhtml
-for a complete description.
-
-@item CRC32 (4 bytes)
-Cyclic Redundancy Check (CRC) of the original uncompressed data.
-
-@item Data size (8 bytes)
-Size of the original uncompressed data.
-
-@item Member size (8 bytes)
-Total size of the member, including header and trailer. This field acts
-as a distributed index, improves the checking of stream integrity, and
-facilitates the safe recovery of undamaged members from multimember files.
-Lzip limits the member size to @w{2 PiB} to prevent the data size field from
-overflowing.
-
-@end table
+The name of the fixed file produced by @option{--fec=repair} is made by
+appending the string @file{_fixed} to the original file name. If the
+original file name ends with one of the extensions @file{.tar.lz}, @file{.lz},
+or @file{.tlz}, the string @file{_fixed} is inserted before the extension.
@node Trailing data
@@ -1274,7 +1838,7 @@ example when writing to a tape. It is safe to append any amount of
padding zero bytes to a lzip file.
@item
-Useful data added by the user; an "End Of File" string (to check that the
+Useful data added by the user; an 'End Of File' string (to check that the
file has not been truncated), a cryptographically secure hash, a description
of file contents, etc. It is safe to append any amount of text to a lzip
file as long as none of the first four bytes of the text matches the
@@ -1328,7 +1892,6 @@ lziprecover --strip=tdata file.lz > stripped_file.lz
lziprecover --remove=tdata file.lz
@end example
-@sp 1
@noindent
Example 2: Add and check a cryptographically secure hash. (This may be
convenient, but a separate copy of the hash must be kept in a safe place
@@ -1345,7 +1908,7 @@ lziprecover --strip=tdata file.lz | sha256sum -c \
@chapter A small tutorial with examples
@cindex examples
-Example 1: Extract all the files from archive @samp{foo.tar.lz}.
+Example 1: Extract all the files from archive @file{foo.tar.lz}.
@example
tar -xf foo.tar.lz
@@ -1353,25 +1916,22 @@ or
lziprecover -cd foo.tar.lz | tar -xf -
@end example
-@sp 1
@noindent
Example 2: Restore a regular file from its compressed version
-@samp{file.lz}. If the operation is successful, @samp{file.lz} is removed.
+@file{file.lz}. If the operation is successful, @file{file.lz} is removed.
@example
lziprecover -d file.lz
@end example
-@sp 1
@noindent
-Example 3: Check the integrity of the compressed file @samp{file.lz} and
+Example 3: Check the integrity of the compressed file @file{file.lz} and
show status.
@example
lziprecover -tv file.lz
@end example
-@sp 1
@anchor{concat-example}
@noindent
Example 4: The right way of concatenating the decompressed output of two or
@@ -1388,41 +1948,37 @@ Or keeping the trailing data of the last file like this
lziprecover --strip=empty file1.lz file2.lz file3.lz > file123.lz
@end example
-@sp 1
@noindent
-Example 5: Decompress @samp{file.lz} partially until @w{10 KiB} of
+Example 5: Decompress @file{file.lz} partially until @w{10 KiB} of
decompressed data are produced.
@example
lziprecover -D 0,10KiB file.lz
@end example
-@sp 1
@noindent
-Example 6: Decompress @samp{file.lz} partially from decompressed byte at
+Example 6: Decompress @file{file.lz} partially from decompressed byte at
offset 10000 to decompressed byte at offset 14999 (5000 bytes are produced).
@example
lziprecover -D 10000-15000 file.lz
@end example
-@sp 1
@noindent
-Example 7: Repair a corrupt byte in the file @samp{file.lz}. (Indented lines
+Example 7: Repair a corrupt byte in the file @file{file.lz}. (Indented lines
are abridged diagnostic messages from lziprecover).
@example
-lziprecover -v -R file.lz
+lziprecover -v --byte-repair file.lz
Copy of input file repaired successfully.
lziprecover -tv file_fixed.lz
file_fixed.lz: ok
mv file_fixed.lz file.lz
@end example
-@sp 1
@noindent
-Example 8: Split the multimember file @samp{file.lz} and write each member
-in its own @samp{recXXXfile.lz} file. Then use @w{@samp{lziprecover -t}} to
+Example 8: Split the multimember file @file{file.lz} and write each member
+in its own @file{recXXXfile.lz} file. Then use @w{@samp{lziprecover -t}} to
test the integrity of the resulting files.
@example
@@ -1494,6 +2050,7 @@ unzcrash [@var{options}] 'lzip -t' @var{file}
The compressed @var{file} must not contain errors and the decompressor being
tested must decompress it correctly for the comparisons to work.
+@noindent
unzcrash supports the following options:
@table @code
@@ -1516,20 +2073,21 @@ The number of N-bit errors per byte (N = 1 to 8) is:
@w{8 28 56 70 56 28 8 1}
@multitable {Examples of @var{range}} {Tests errors of N-bits}
-@item Examples of @var{range} @tab Tests errors of N-bits
-@item 1 @tab 1
-@item 1,2,3 @tab 1, 2, 3
-@item 2-4 @tab 2, 3, 4
-@item 1,3-5,8 @tab 1, 3, 4, 5, 8
-@item 1-3,5-8 @tab 1, 2, 3, 5, 6, 7, 8
+@headitem Examples of @var{range} @tab Tests errors of N-bits
+@item 1 @tab 1
+@item 1,2,3 @tab 1, 2, 3
+@item 2-4 @tab 2, 3, 4
+@item 1,3-5,8 @tab 1, 3, 4, 5, 8
+@item 1-3,5-8 @tab 1, 2, 3, 5, 6, 7, 8
@end multitable
@item -B[@var{size}][,@var{value}]
@itemx --block[=@var{size}][,@var{value}]
-Test block errors of given @var{size}, simulating a whole sector I/O error.
-@var{size} defaults to 512 bytes. @var{value} defaults to 0. By default,
-only contiguous, non-overlapping blocks are tested, but this may be changed
-with the option @option{--delta}.
+Test block errors of given @var{size}, simulating a whole sector I/O error
+by setting all the bytes in the block to @var{value} before attempting
+decompression. @var{size} defaults to 512 bytes. @var{value} defaults to 0.
+By default, only contiguous, non-overlapping blocks are tested, but this may
+be changed with the option @option{--delta}.
@item -d @var{n}
@itemx --delta=@var{n}
@@ -1539,6 +2097,7 @@ non-overlapping blocks, or truncation sizes. Values of @var{n} smaller than
the block size result in overlapping blocks. (Which is convenient for
testing because there are usually too few non-overlapping blocks in a file).
+@anchor{--set-byte}
@item -e @var{position},@var{value}
@itemx --set-byte=@var{position},@var{value}
Set byte at @var{position} to @var{value} in the internal buffer after
diff --git a/dump_remove.cc b/dump_remove.cc
index 3273303..9118482 100644
--- a/dump_remove.cc
+++ b/dump_remove.cc
@@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
- Copyright (C) 2009-2024 Antonio Diaz Diaz.
+ Copyright (C) 2009-2025 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -59,7 +59,7 @@ int dump_members( const std::vector< std::string > & filenames,
bool stdin_used = false;
for( unsigned i = 0; i < filenames.size(); ++i )
{
- const bool from_stdin = ( filenames[i] == "-" );
+ const bool from_stdin = filenames[i] == "-";
if( from_stdin ) { if( stdin_used ) continue; else stdin_used = true; }
const char * const input_filename =
from_stdin ? "(stdin)" : filenames[i].c_str();
@@ -93,8 +93,8 @@ int dump_members( const std::vector< std::string > & filenames,
if( in == !strip )
{
if( !safe_seek( infd, stream_pos, input_filename ) ||
- !copy_file( infd, outfd, mb.pos() - stream_pos ) )
- cleanup_and_fail( 1 );
+ !copy_file( infd, outfd, filenames[i], output_filename,
+ mb.pos() - stream_pos ) ) cleanup_and_fail( 1 );
copied_size += mb.pos() - stream_pos; ++members;
}
else { stripped_size += mb.pos() - stream_pos; ++smembers; }
@@ -106,12 +106,13 @@ int dump_members( const std::vector< std::string > & filenames,
if( !in && member_list.damaged )
{
if( !safe_seek( infd, mb.pos(), input_filename ) ) cleanup_and_fail( 1 );
- in = ( test_member_from_file( infd, mb.size() ) != 0 ); // damaged
+ in = test_member_from_file( infd, mb.size() ) != 0; // damaged
}
if( in == !strip )
{
if( !safe_seek( infd, mb.pos(), input_filename ) ||
- !copy_file( infd, outfd, mb.size() ) ) cleanup_and_fail( 1 );
+ !copy_file( infd, outfd, filenames[i], output_filename,
+ mb.size() ) ) cleanup_and_fail( 1 );
copied_size += mb.size(); ++members;
}
else { stripped_size += mb.size(); ++smembers; }
@@ -131,7 +132,8 @@ int dump_members( const std::vector< std::string > & filenames,
( !strip || i + 1 >= filenames.size() ) ) // strip all but last
{
if( !safe_seek( infd, cdata_size, input_filename ) ||
- !copy_file( infd, outfd, trailing_size ) ) cleanup_and_fail( 1 );
+ !copy_file( infd, outfd, filenames[i], output_filename,
+ trailing_size ) ) cleanup_and_fail( 1 );
copied_tsize += trailing_size;
}
else if( trailing_size > 0 ) { stripped_tsize += trailing_size; ++tfiles; }
@@ -210,7 +212,8 @@ int remove_members( const std::vector< std::string > & filenames,
if( stream_pos != prev_end &&
( !safe_seek( infd, prev_end, filename ) ||
!safe_seek( fd, stream_pos, filename ) ||
- !copy_file( infd, fd, mb.pos() - prev_end ) ) )
+ !copy_file( infd, fd, filenames[i], filenames[i],
+ mb.pos() - prev_end ) ) )
{ error = true; set_retval( retval, 1 ); break; }
stream_pos += mb.pos() - prev_end;
}
@@ -224,14 +227,14 @@ int remove_members( const std::vector< std::string > & filenames,
{
if( !safe_seek( infd, mb.pos(), filename ) )
{ error = true; set_retval( retval, 1 ); break; }
- in = ( test_member_from_file( infd, mb.size() ) != 0 ); // damaged
+ in = test_member_from_file( infd, mb.size() ) != 0; // damaged
}
if( !in )
{
if( stream_pos != mb.pos() &&
( !safe_seek( infd, mb.pos(), filename ) ||
!safe_seek( fd, stream_pos, filename ) ||
- !copy_file( infd, fd, mb.size() ) ) )
+ !copy_file( infd, fd, filenames[i], filenames[i], mb.size() ) ) )
{ error = true; set_retval( retval, 1 ); break; }
stream_pos += mb.size();
}
@@ -254,7 +257,7 @@ int remove_members( const std::vector< std::string > & filenames,
if( stream_pos != cdata_size &&
( !safe_seek( infd, cdata_size, filename ) ||
!safe_seek( fd, stream_pos, filename ) ||
- !copy_file( infd, fd, trailing_size ) ) )
+ !copy_file( infd, fd, filenames[i], filenames[i], trailing_size ) ) )
{ close( fd ); close( infd ); set_retval( retval, 1 ); break; }
stream_pos += trailing_size;
}
@@ -298,8 +301,8 @@ int remove_members( const std::vector< std::string > & filenames,
/* Set to zero in place the first LZMA byte of each member in each file by
opening one rw descriptor for each file. */
-int clear_marking( const std::vector< std::string > & filenames,
- const Cl_options & cl_opts )
+int nonzero_repair( const std::vector< std::string > & filenames,
+ const Cl_options & cl_opts )
{
long cleared_members = 0;
int files = 0, retval = 0;
@@ -310,8 +313,7 @@ int clear_marking( const std::vector< std::string > & filenames,
const int fd = open_truncable_stream( filename, &in_stats );
if( fd < 0 ) { set_retval( retval, 1 ); continue; }
- const Lzip_index lzip_index( fd, cl_opts, cl_opts.ignore_errors,
- cl_opts.ignore_errors );
+ const Lzip_index lzip_index( fd, cl_opts, true, cl_opts.ignore_errors );
if( lzip_index.retval() != 0 )
{
show_file_error( filename, lzip_index.error().c_str() );
@@ -332,7 +334,7 @@ int clear_marking( const std::vector< std::string > & filenames,
if( seek_read( fd, header_buf, bufsize, mb.pos() ) != bufsize )
{ show_file_error( filename, "Error reading member header", errno );
set_retval( retval, 1 ); break; }
- if( !header.check( cl_opts.ignore_errors ) )
+ if( !header.check( true ) )
{ show_file_error( filename, "Member header became corrupt as we read it." );
set_retval( retval, 2 ); break; }
if( *mark == 0 ) continue;
diff --git a/fec.h b/fec.h
new file mode 100644
index 0000000..0f52269
--- /dev/null
+++ b/fec.h
@@ -0,0 +1,296 @@
+/* Lziprecover - Data recovery tool for the lzip format
+ Copyright (C) 2023-2025 Antonio Diaz Diaz.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+struct le32
+ {
+ enum { size = 4 };
+ uint8_t data[size];
+
+ le32 & operator=( unsigned n )
+ { for( int i = 0; i < size; ++i ) { data[i] = (uint8_t)n; n >>= 8; }
+ return *this; }
+ unsigned val() const
+ { unsigned n = 0;
+ for( int i = size - 1; i >= 0; --i ) { n <<= 8; n += data[i]; }
+ return n; }
+ bool operator==( const le32 & b ) const
+ { return std::memcmp( data, b.data, size ) == 0; }
+ bool operator!=( const le32 & b ) const { return !( *this == b ); }
+ };
+
+
+inline unsigned long long get_le( const uint8_t * const buf, int size )
+ { unsigned long long n = 0;
+ while( --size >= 0 ) { n <<= 8; n += buf[size]; } return n; }
+
+inline unsigned long long ceil_divide( const unsigned long long size,
+ const unsigned long block_size )
+ { return size / block_size + ( size % block_size > 0 ); }
+
+inline unsigned long ceil_divide( const unsigned long size,
+ const unsigned long block_size )
+ { return size / block_size + ( size % block_size > 0 ); }
+
+inline uint8_t * set_lastbuf( const uint8_t * const prodata,
+ const unsigned long prodata_size, const unsigned long fbs,
+ const bool last_is_missing = false )
+ {
+ const unsigned long rest = prodata_size % fbs;
+ if( rest == 0 ) return 0; // last data block is complete
+ uint8_t * const lastbuf = new uint8_t[fbs];
+ if( last_is_missing ) return lastbuf; // uninitialized buffer
+ std::memcpy( lastbuf, prodata + ( prodata_size - rest ), rest );
+ std::memset( lastbuf + rest, 0, fbs - rest );
+ return lastbuf; // copy of last data block padded to fbs bytes
+ }
+
+enum { min_fbs = 512, max_unit_fbs = 1 << 30 }; // 1 GiB
+const unsigned long long max_fbs = 1ULL << 47; // 128 TiB
+
+inline bool isvalid_fbs( const unsigned long long fbs )
+ { return fbs >= min_fbs && fbs <= max_fbs && fbs % min_fbs == 0; }
+
+struct Coded_fbs // fec_block_size
+ {
+ enum { size = 2 };
+ uint8_t data[size]; // 11-bit mantissa, 5-bit exponent
+
+ Coded_fbs() {} // default constructor
+ Coded_fbs( const unsigned long long fbs, const unsigned unit_fbs )
+ {
+ unsigned long long m = fbs;
+ int e = 0;
+ while( m > 2047 || ( m > 1 && e < 9 ) ) { m >>= 1; ++e; }
+ if( m << e < fbs && ++m > 2047 ) { m >>= 1; ++e; }
+ while( ( m << e ) % unit_fbs != 0 ) if( ++m > 2047 ) { m >>= 1; ++e; }
+ if( m == 0 || m > 2047 || e < 9 || e > 40 || m << e < fbs ||
+ !isvalid_fbs( m << e ) || !isvalid_fbs( fbs ) )
+ internal_error( "Coded_fbs: can't fit fec_block_size in packet." );
+ data[0] = m;
+ data[1] = ( e - 9 ) << 3 | m >> 8;
+ }
+
+ void copy( uint8_t * const buf ) const
+ { buf[0] = data[0]; buf[1] = data[1]; }
+
+ unsigned long long val() const
+ {
+ unsigned long long m = ( ( data[1] & 7 ) << 8 ) | data[0];
+ const int e = ( data[1] >> 3 ) + 9;
+ return m << e;
+ }
+ };
+
+enum { fec_magic_l = 4, crc32_l = le32::size };
+const uint8_t fec_magic[4] = { 0xB3, 0xA5, 0xB6, 0xAF }; // ~"LZIP"
+const uint8_t fec_packet_magic[4] = { fec_magic[0], 'F', 'E', 'C' };
+
+inline bool check_fec_magic( const uint8_t * const image_buffer )
+ { return std::memcmp( image_buffer, fec_magic, 4 ) == 0; }
+
+class Packet_base
+ {
+protected:
+ // the packet trailer contains the CRC32 of the payload
+ enum Lengths { trailer_size = crc32_l };
+
+ // header_size must be a multiple of 4 for uint32_t alignment in mul_add
+ const uint8_t * image_; // header + payload + trailer
+ bool image_is_external;
+
+ Packet_base() : image_is_external( false ) {}
+ explicit Packet_base( const uint8_t * const image_buffer )
+ : image_( image_buffer ), image_is_external( true ) {}
+ ~Packet_base() { if( !image_is_external ) delete[] image_; }
+
+public:
+ const uint8_t * image() const { return image_; }
+ };
+
+
+class Chksum_packet : public Packet_base
+ {
+ enum { current_version = 0 };
+ enum Lengths { version_l = 1, flags_l = 1, prodata_size_l = 8,
+ prodata_md5_l = 16 };
+ enum Offsets { version_o = fec_magic_l,
+ flags_o = version_o + version_l,
+ fbs_o = flags_o + flags_l,
+ prodata_size_o = fbs_o + Coded_fbs::size,
+ prodata_md5_o = prodata_size_o + prodata_size_l,
+ header_crc_o = prodata_md5_o + prodata_md5_l,
+ header_size = header_crc_o + crc32_l,
+ crc_array_o = header_size };
+
+ static unsigned compute_header_crc( const uint8_t * const image_buffer )
+ { return crc32.compute_crc( image_buffer, header_crc_o ); }
+
+public:
+ // check image_buffer with check_image before calling this constructor
+ explicit Chksum_packet( const uint8_t * const image_buffer )
+ : Packet_base( image_buffer ) {}
+ Chksum_packet( const uint8_t * const prodata,
+ const unsigned long prodata_size,
+ const md5_type & prodata_md5, const Coded_fbs coded_fbs,
+ const bool gf16_, const bool is_crc_c_ );
+
+ unsigned long long packet_size() const
+ { return ceil_divide( prodata_size(), fec_block_size() ) *
+ sizeof crc_array()[0] + header_size + trailer_size; }
+ unsigned long long prodata_size() const
+ { return get_le( image_ + prodata_size_o, prodata_size_l ); }
+ const md5_type & prodata_md5() const
+ { return *(md5_type *)(image_ + prodata_md5_o); }
+ unsigned long long fec_block_size() const
+ { return ((Coded_fbs *)(image_ + fbs_o))->val(); }
+ static bool check_flags( const uint8_t * const image_buffer )
+ { return image_buffer[flags_o] <= 3; }
+ bool gf16() const { return image_[flags_o] & 2; }
+ bool is_crc_c() const { return image_[flags_o] & 1; }
+ // crc_array contains one CRC32 or one CRC32-C per protected data block
+ const le32 * crc_array() const
+ { return (const le32 *)(image_ + crc_array_o); }
+
+ static unsigned min_packet_size()
+ { return header_size + le32::size + trailer_size; }
+ static uint8_t version( const uint8_t * const image_buffer )
+ { return image_buffer[version_o]; }
+ static bool check_version( const uint8_t * const image_buffer )
+ { return image_buffer[version_o] == current_version; }
+
+ static unsigned check_image( const uint8_t * const image_buffer,
+ const unsigned long max_size );
+ bool check_payload_crc() const
+ {
+ const unsigned paysize = packet_size() - header_size - trailer_size;
+ const unsigned payload_crc_o = crc_array_o + paysize;
+ const unsigned payload_crc = get_le( image_ + payload_crc_o, crc32_l );
+ return crc32.compute_crc( image_ + crc_array_o, paysize ) == payload_crc;
+ }
+ };
+
+
+class Fec_packet : public Packet_base
+ {
+ enum Lengths { fbn_l = 2 };
+ enum Offsets { fbn_o = fec_magic_l,
+ fbs_o = fbn_o + fbn_l,
+ header_crc_o = fbs_o + Coded_fbs::size,
+ header_size = header_crc_o + crc32_l,
+ fec_block_o = header_size };
+
+ static unsigned compute_header_crc( const uint8_t * const image_buffer )
+ { return crc32.compute_crc( image_buffer, header_crc_o ); }
+
+public:
+ // check image_buffer with check_image before calling this constructor
+ explicit Fec_packet( const uint8_t * const image_buffer )
+ : Packet_base( image_buffer ) {}
+ Fec_packet( const uint8_t * const prodata, const uint8_t * const lastbuf,
+ const unsigned fbn, const unsigned k,
+ const Coded_fbs coded_fbs, const bool gf16 );
+
+ unsigned long long packet_size() const
+ { return header_size + fec_block_size() + trailer_size; }
+ unsigned fec_block_number() const
+ { return get_le( image_ + fbn_o, fbn_l ); }
+ unsigned long long fec_block_size() const // number of fec bytes
+ { return ((Coded_fbs *)(image_ + fbs_o))->val(); }
+ const uint8_t * fec_block() const { return image_ + fec_block_o; }
+
+ static unsigned min_packet_size()
+ { return header_size + min_fbs + trailer_size; }
+
+ static unsigned long check_image( const uint8_t * const image_buffer,
+ const unsigned long max_size );
+ };
+
+
+enum { max_k8 = 128, max_k16 = 32768, max_nk16 = 2048 };
+const char * const fec_extension = ".fec";
+
+inline void prot_stdin()
+ { show_file_error( "(stdin)", "Can't read protected data from standard input." ); }
+
+// defined in fec_create.cc
+enum { fc_percent, fc_blocks, fc_bytes };
+void cleanup_mutex_lock();
+int gf_check( const unsigned k, const bool cl_gf16, const bool fec_random );
+void extract_dirname( const std::string & name, std::string & srcdir );
+void replace_dirname( const std::string & name, const std::string & srcdir,
+ const std::string & destdir, std::string & outname );
+bool has_fec_extension( const std::string & name );
+int fec_create( const std::vector< std::string > & filenames,
+ const std::string & default_output_filename,
+ const unsigned long fb_or_pct, const unsigned cl_block_size,
+ const unsigned num_workers, const char debug_level,
+ const char fctype, const char fec_level, const char recursive,
+ const bool cl_gf16, const bool fec_random, const bool force,
+ const bool to_stdout );
+
+// defined in fec_repair.cc
+int fec_test( const std::vector< std::string > & filenames,
+ const std::string & cl_fec_filename,
+ const std::string & default_output_filename,
+ const char recursive, const bool force, const bool ignore_errors,
+ const bool repair, const bool to_stdout );
+int fec_list( const std::vector< std::string > & filenames,
+ const bool ignore_errors );
+int fec_dc( const std::string & input_filename,
+ const std::string & cl_fec_filename, const unsigned cblocks );
+int fec_dz( const std::string & input_filename,
+ const std::string & cl_fec_filename,
+ std::vector< Block > & range_vector );
+int fec_dZ( const std::string & input_filename,
+ const std::string & cl_fec_filename,
+ const unsigned delta, const int sector_size );
+
+// defined in recursive.cc
+bool next_filename( std::list< std::string > & filelist,
+ std::string & input_filename, int & retval,
+ const char recursive );
+
+// defined in gf8.cc, gf16.cc
+void gf8_init();
+void gf16_init();
+bool gf8_check( const std::vector< unsigned > & fbn_vector, const unsigned k );
+bool gf16_check( const std::vector< unsigned > & fbn_vector, const unsigned k );
+
+/* buffer, lastbuf: k blocks of input data, last one possibly padded to fbs.
+ fbn: number of the fec block to be created (fbn < max_k).
+*/
+void rs8_encode( const uint8_t * const buffer, const uint8_t * const lastbuf,
+ uint8_t * const fec_block, const unsigned long fbs,
+ const unsigned fbn, const unsigned k );
+void rs16_encode( const uint8_t * const buffer, const uint8_t * const lastbuf,
+ uint8_t * const fec_block, const unsigned long fbs,
+ const unsigned fbn, const unsigned k );
+
+/* buffer, lastbuf: k data blocks, those in bb_vector are missing.
+ fecbuf: as many fec blocks as missing data blocks in the order of fbn_vector.
+ The repaired data blocks are written in their place in buffer and lastbuf.
+*/
+void rs8_decode( uint8_t * const buffer, uint8_t * const lastbuf,
+ const std::vector< unsigned > & bb_vector,
+ const std::vector< unsigned > & fbn_vector,
+ uint8_t * const fecbuf, const unsigned long fbs,
+ const unsigned k );
+void rs16_decode( uint8_t * const buffer, uint8_t * const lastbuf,
+ const std::vector< unsigned > & bb_vector,
+ const std::vector< unsigned > & fbn_vector,
+ uint8_t * const fecbuf, const unsigned long fbs,
+ const unsigned k );
diff --git a/fec_create.cc b/fec_create.cc
new file mode 100644
index 0000000..14033f9
--- /dev/null
+++ b/fec_create.cc
@@ -0,0 +1,608 @@
+/* Lziprecover - Data recovery tool for the lzip format
+ Copyright (C) 2023-2025 Antonio Diaz Diaz.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#define _FILE_OFFSET_BITS 64
+
+#include <algorithm>
+#include <cerrno>
+#include <climits>
+#include <cmath>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <ctime>
+#include <new>
+#include <list>
+#include <string>
+#include <vector>
+#include <pthread.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+
+#include "lzip.h"
+#include "md5.h"
+#include "fec.h"
+
+
+namespace {
+
+void xinit_mutex( pthread_mutex_t * const mutex )
+ {
+ const int errcode = pthread_mutex_init( mutex, 0 );
+ if( errcode )
+ { show_error( "pthread_mutex_init", errcode ); cleanup_and_fail( 1 ); }
+ }
+
+void xinit_cond( pthread_cond_t * const cond )
+ {
+ const int errcode = pthread_cond_init( cond, 0 );
+ if( errcode )
+ { show_error( "pthread_cond_init", errcode ); cleanup_and_fail( 1 ); }
+ }
+
+
+void xdestroy_mutex( pthread_mutex_t * const mutex )
+ {
+ const int errcode = pthread_mutex_destroy( mutex );
+ if( errcode )
+ { show_error( "pthread_mutex_destroy", errcode ); cleanup_and_fail( 1 ); }
+ }
+
+void xdestroy_cond( pthread_cond_t * const cond )
+ {
+ const int errcode = pthread_cond_destroy( cond );
+ if( errcode )
+ { show_error( "pthread_cond_destroy", errcode ); cleanup_and_fail( 1 ); }
+ }
+
+
+void xlock( pthread_mutex_t * const mutex )
+ {
+ const int errcode = pthread_mutex_lock( mutex );
+ if( errcode )
+ { show_error( "pthread_mutex_lock", errcode ); cleanup_and_fail( 1 ); }
+ }
+
+void xunlock( pthread_mutex_t * const mutex )
+ {
+ const int errcode = pthread_mutex_unlock( mutex );
+ if( errcode )
+ { show_error( "pthread_mutex_unlock", errcode ); cleanup_and_fail( 1 ); }
+ }
+
+
+void xwait( pthread_cond_t * const cond, pthread_mutex_t * const mutex )
+ {
+ const int errcode = pthread_cond_wait( cond, mutex );
+ if( errcode )
+ { show_error( "pthread_cond_wait", errcode ); cleanup_and_fail( 1 ); }
+ }
+
+void xsignal( pthread_cond_t * const cond )
+ {
+ const int errcode = pthread_cond_signal( cond );
+ if( errcode )
+ { show_error( "pthread_cond_signal", errcode ); cleanup_and_fail( 1 ); }
+ }
+
+
+unsigned long out_size;
+unsigned deliver_id; // id of worker writing fec packets to outfd
+unsigned check_counter;
+unsigned wait_counter;
+pthread_mutex_t omutex;
+std::vector< pthread_cond_t > may_deliver; // worker[i] may write
+pthread_mutex_t cmutex = PTHREAD_MUTEX_INITIALIZER; // cleanup mutex
+
+
+struct Worker_arg
+ {
+ const uint8_t * prodata;
+ const uint8_t * lastbuf;
+ unsigned fec_blocks;
+ unsigned k;
+ unsigned num_workers;
+ unsigned worker_id;
+ Coded_fbs coded_fbs;
+ bool gf16;
+ };
+
+
+// write a fec packet and pass the token to the next thread
+extern "C" void * worker( void * arg )
+ {
+ const Worker_arg & tmp = *(const Worker_arg *)arg;
+ const uint8_t * const prodata = tmp.prodata;
+ const uint8_t * const lastbuf = tmp.lastbuf;
+ const unsigned fec_blocks = tmp.fec_blocks;
+ const unsigned k = tmp.k;
+ const unsigned num_workers = tmp.num_workers;
+ const unsigned worker_id = tmp.worker_id;
+ const Coded_fbs coded_fbs = tmp.coded_fbs;
+ const bool gf16 = tmp.gf16;
+
+ for( unsigned fbn = worker_id; fbn < fec_blocks; fbn += num_workers )
+ {
+ const Fec_packet fec_packet( prodata, lastbuf, fbn, k, coded_fbs, gf16 );
+ const long packet_size = fec_packet.packet_size();
+ xlock( &omutex );
+ ++check_counter;
+ while( worker_id != deliver_id )
+ { ++wait_counter; xwait( &may_deliver[worker_id], &omutex ); }
+ xlock( &cmutex ); // because of cleanup_and_fail
+ if( writeblock( outfd, fec_packet.image(), packet_size ) != packet_size )
+ { xunlock( &cmutex ); cleanup_and_fail( 1 ); }
+ xunlock( &cmutex );
+ out_size += packet_size;
+ if( ++deliver_id >= num_workers ) deliver_id = 0;
+ xsignal( &may_deliver[deliver_id] ); // allow next worker to write
+ xunlock( &omutex );
+ }
+ return 0;
+ }
+
+
+// start the workers and wait for them to finish.
+bool write_fec_mt( const uint8_t * const prodata,
+ const uint8_t * const lastbuf,
+ const unsigned fec_blocks, const unsigned k,
+ const unsigned num_workers, const Coded_fbs coded_fbs,
+ const char debug_level, const bool gf16 )
+ {
+ if( debug_level & 2 ) std::fputs( "write_fec_mt.\n", stderr );
+ out_size = 0;
+ deliver_id = 0;
+ check_counter = 0;
+ wait_counter = 0;
+ xinit_mutex( &omutex );
+ may_deliver.resize( num_workers );
+ for( unsigned i = 0; i < may_deliver.size(); ++i )
+ xinit_cond( &may_deliver[i] );
+ std::vector< Worker_arg > worker_args( num_workers );
+ std::vector< pthread_t > worker_threads( num_workers );
+
+ for( unsigned i = 0; i < num_workers; ++i )
+ {
+ worker_args[i].prodata = prodata;
+ worker_args[i].lastbuf = lastbuf;
+ worker_args[i].fec_blocks = fec_blocks;
+ worker_args[i].k = k;
+ worker_args[i].num_workers = num_workers;
+ worker_args[i].worker_id = i;
+ worker_args[i].coded_fbs = coded_fbs;
+ worker_args[i].gf16 = gf16;
+ const int errcode =
+ pthread_create( &worker_threads[i], 0, worker, &worker_args[i] );
+ if( errcode ) { show_error( "Can't create worker threads", errcode );
+ cleanup_and_fail( 1 ); }
+ }
+
+ for( unsigned i = 0; i < num_workers; ++i )
+ {
+ const int errcode = pthread_join( worker_threads[i], 0 );
+ if( errcode ) { show_error( "Can't join worker threads", errcode );
+ cleanup_and_fail( 1 ); }
+ }
+
+ for( unsigned i = 0; i < may_deliver.size(); ++i )
+ xdestroy_cond( &may_deliver[i] );
+ xdestroy_mutex( &omutex );
+
+ if( debug_level & 1 )
+ std::fprintf( stderr,
+ "workers started %8u\n"
+ "any worker tried to write a packet %8u times\n"
+ "any worker had to wait %8u times\n",
+ num_workers, check_counter, wait_counter );
+
+ return true;
+ }
+
+
+inline void set_le( uint8_t * const buf, const int size, unsigned long n )
+ { for( int i = 0; i < size; ++i ) { buf[i] = (uint8_t)n; n >>= 8; } }
+
+
+unsigned compute_unit_fbs( const unsigned long prodata_size )
+ {
+ unsigned bs = min_fbs;
+ while( bs < 65536 && 4ULL * bs * bs < prodata_size ) bs <<= 1;
+ return bs;
+ }
+
+unsigned long divide_fbs( const unsigned long size, const unsigned blocks,
+ const unsigned unit_fbs )
+ {
+ unsigned long long fbs = ceil_divide( size, blocks ); // ULL as max_fbs
+ if( fbs < min_fbs ) fbs = min_fbs;
+ else if( fbs > max_fbs ) fbs = max_fbs;
+ return ceil_divide( fbs, unit_fbs );
+ }
+
+
+Coded_fbs compute_fbs( const unsigned long prodata_size,
+ const unsigned cl_block_size, const char fec_level )
+ {
+ const unsigned unit_fbs = isvalid_fbs( cl_block_size ) ? cl_block_size :
+ compute_unit_fbs( prodata_size );
+ const unsigned long max_k = (fec_level == 0) ? max_k8 : max_k16;
+ const unsigned k9 = std::min( ceil_divide( prodata_size, unit_fbs ), max_k );
+ const unsigned long fbsu9 = divide_fbs( prodata_size, k9, unit_fbs );
+ const unsigned long fbsu0 = divide_fbs( prodata_size, max_k8, unit_fbs );
+ const unsigned long a = std::min( (10 - fec_level) * fbsu9, fbsu0 ); // lin
+ const unsigned long b = fbsu0 >> fec_level; // exp
+ const unsigned long fbsu = std::max( a, b ); // join linear and exponential
+ return Coded_fbs( fbsu * unit_fbs, unit_fbs );
+ }
+
+
+unsigned compute_fec_blocks( const unsigned long prodata_size,
+ const unsigned long fb_or_pct, const char fctype,
+ const char fec_level, const Coded_fbs coded_fbs )
+ {
+ const unsigned long fbs = coded_fbs.val();
+ const unsigned prodata_blocks = ceil_divide( prodata_size, fbs );
+ const unsigned long max_k = (fec_level == 0) ? max_k8 : max_k16;
+ if( !isvalid_fbs( fbs ) || prodata_blocks > max_k ) return 0;
+ const unsigned long max_nk = (fec_level == 0) ? max_k8 : max_nk16;
+ unsigned fec_blocks;
+ if( fctype == fc_blocks ) fec_blocks = std::min( max_nk, fb_or_pct );
+ else
+ {
+ unsigned long fec_bytes;
+ if( fctype == fc_percent )
+ { const double pct = std::max( 1UL, std::min( 100000UL, fb_or_pct ) );
+ fec_bytes = (unsigned long)std::ceil( prodata_size * pct / 100000 ); }
+ else if( fctype == fc_bytes )
+ fec_bytes = std::min( fb_or_pct, prodata_size );
+ else return 0; // unknown fctype, must not happen
+ fec_blocks = std::min( ceil_divide( fec_bytes, fbs ), max_nk );
+ }
+ if( fec_blocks > prodata_blocks ) fec_blocks = prodata_blocks;
+ return fec_blocks;
+ }
+
+
+unsigned my_rand( unsigned long & state )
+ {
+ state = state * 1103515245 + 12345;
+ return ( state / 65536 ) % 32768; // random number from 0 to 32767
+ }
+
+void random_fbn_vector( const unsigned fec_blocks, const bool gf16,
+ std::vector< unsigned > & fbn_vector )
+ {
+ struct timespec ts;
+ clock_gettime( CLOCK_REALTIME, &ts );
+ unsigned long state = ts.tv_nsec;
+ while( state != 0 && ( state & 1 ) == 0 ) state >>= 1;
+ if( state != 0 ) state *= ts.tv_sec; else state = ts.tv_sec;
+ for( unsigned i = 0; i < fec_blocks; ++i )
+ {
+ again: const unsigned fbn =
+ gf16 ? my_rand( state ) % max_k16 : my_rand( state ) % max_k8;
+ for( unsigned j = 0; j < fbn_vector.size(); ++j )
+ if( fbn == fbn_vector[j] ) goto again;
+ fbn_vector.push_back( fbn );
+ }
+ }
+
+
+bool write_fec( const char * const input_filename,
+ const uint8_t * const prodata, const unsigned long prodata_size,
+ const unsigned long fb_or_pct, const unsigned cl_block_size,
+ unsigned num_workers, const char debug_level, const char fctype,
+ const char fec_level, const bool cl_gf16, const bool fec_random )
+ {
+ const Coded_fbs coded_fbs =
+ compute_fbs( prodata_size, cl_block_size, fec_level );
+ const unsigned fec_blocks =
+ compute_fec_blocks( prodata_size, fb_or_pct, fctype, fec_level, coded_fbs );
+ if( fec_blocks == 0 ) { show_file_error( input_filename,
+ "Input file is too large for fec protection." ); return false; }
+ if( num_workers > fec_blocks ) num_workers = fec_blocks;
+ const unsigned long fbs = coded_fbs.val();
+ const unsigned prodata_blocks = ceil_divide( prodata_size, fbs );
+ md5_type prodata_md5;
+ compute_md5( prodata, prodata_size, prodata_md5 );
+ unsigned chksum_packet_size;
+ const bool gf16 = cl_gf16 || prodata_blocks > max_k8 || fec_blocks > max_k8;
+ {
+ const Chksum_packet chksum_packet( prodata, prodata_size, prodata_md5,
+ coded_fbs, gf16, false ); // CRC32 array
+ const long packet_size = chksum_packet.packet_size();
+ if( writeblock( outfd, chksum_packet.image(), packet_size ) != packet_size )
+ goto fail;
+ chksum_packet_size = packet_size;
+ }
+ {
+ unsigned long fecdata_size = chksum_packet_size;
+ const uint8_t * const lastbuf = set_lastbuf( prodata, prodata_size, fbs );
+ gf16 ? gf16_init() : gf8_init(); // initialize Galois tables
+ if( fec_random )
+ {
+ std::vector< unsigned > fbn_vector;
+ random_fbn_vector( fec_blocks, gf16, fbn_vector );
+ for( unsigned i = 0; i < fbn_vector.size(); ++i )
+ {
+ const unsigned fbn = fbn_vector[i];
+ const Fec_packet
+ fec_packet( prodata, lastbuf, fbn, prodata_blocks, coded_fbs, gf16 );
+ const long packet_size = fec_packet.packet_size();
+ if( writeblock( outfd, fec_packet.image(), packet_size ) != packet_size )
+ { delete[] lastbuf; goto fail; }
+ fecdata_size += packet_size;
+ }
+ }
+ else if( num_workers > 1 )
+ {
+ if( !write_fec_mt( prodata, lastbuf, fec_blocks, prodata_blocks,
+ num_workers, coded_fbs, debug_level, gf16 ) )
+ { delete[] lastbuf; goto fail; }
+ fecdata_size += out_size;
+ }
+ else for( unsigned fbn = 0; fbn < fec_blocks; ++fbn )
+ {
+ const Fec_packet
+ fec_packet( prodata, lastbuf, fbn, prodata_blocks, coded_fbs, gf16 );
+ const long packet_size = fec_packet.packet_size();
+ if( writeblock( outfd, fec_packet.image(), packet_size ) != packet_size )
+ { delete[] lastbuf; goto fail; }
+ fecdata_size += packet_size;
+ }
+ delete[] lastbuf;
+ if( ( fecdata_size + chksum_packet_size ) / 2 <= fec_blocks * fbs &&
+ fec_blocks > 1 ) // write the second chksum packet
+ {
+ const Chksum_packet chksum_packet( prodata, prodata_size, prodata_md5,
+ coded_fbs, gf16, true ); // CRC32-C array
+ const long packet_size = chksum_packet.packet_size();
+ if( writeblock( outfd, chksum_packet.image(), packet_size ) != packet_size )
+ goto fail;
+ fecdata_size += packet_size;
+ }
+ if( fecdata_size % 4 != 0 ) internal_error( "fecdata_size % 4 != 0" );
+ if( verbosity >= 1 )
+ std::fprintf( stderr, " %s: %s bytes, %s fec bytes, %u blocks\n",
+ printable_name( output_filename, false ),
+ format_num3( fecdata_size ),
+ format_num3( fec_blocks * fbs ), fec_blocks );
+ return true;
+ }
+fail:
+ show_file_error( printable_name( output_filename, false ), wr_err_msg, errno );
+ return false;
+ }
+
+
+int open_instream2( const std::string & name, struct stat * const in_statsp )
+ {
+ if( !has_fec_extension( name ) )
+ return open_instream( name.c_str(), in_statsp, false, true );
+ if( verbosity >= 0 )
+ std::fprintf( stderr, "%s: %s: Input file already has '%s' suffix, ignored.\n",
+ program_name, name.c_str(), fec_extension );
+ return -1;
+ }
+
+} // end namespace
+
+
+Chksum_packet::Chksum_packet( const uint8_t * const prodata,
+ const unsigned long prodata_size,
+ const md5_type & prodata_md5, const Coded_fbs coded_fbs,
+ const bool gf16_, const bool is_crc_c_ )
+ {
+ const unsigned long fbs = coded_fbs.val();
+ const unsigned prodata_blocks = ceil_divide( prodata_size, fbs );
+ if( prodata_blocks * fbs < prodata_size )
+ internal_error( "prodata_blocks * fec_block_size < prodata_size" );
+ const unsigned paysize = prodata_blocks * sizeof crc_array()[0];
+ const unsigned packet_size = header_size + paysize + trailer_size;
+ if( paysize <= prodata_blocks || packet_size <= paysize )
+ throw std::bad_alloc();
+ uint8_t * const ip = new uint8_t[packet_size]; // writable image ptr
+ image_ = ip;
+
+ std::memcpy( ip, fec_magic, fec_magic_l );
+ ip[version_o] = current_version;
+ ip[flags_o] = ( gf16_ << 1 ) | is_crc_c_;
+ set_le( ip + prodata_size_o, prodata_size_l, prodata_size );
+ *(md5_type *)(ip + prodata_md5_o) = prodata_md5;
+ coded_fbs.copy( ip + fbs_o );
+ set_le( ip + header_crc_o, crc32_l, compute_header_crc( image_ ) );
+
+ le32 * const crc_arr = (le32 *)(ip + crc_array_o); // fill crc array
+ unsigned i = 0;
+ if( !is_crc_c_ ) // CRC32
+ for( unsigned long pos = 0; pos < prodata_size; pos += fbs, ++i )
+ crc_arr[i] =
+ crc32.compute_crc( prodata + pos, std::min( fbs, prodata_size - pos ) );
+ else
+ { // CRC32-C
+ const CRC32 crc32c( true );
+ for( unsigned long pos = 0; pos < prodata_size; pos += fbs, ++i )
+ crc_arr[i] =
+ crc32c.compute_crc( prodata + pos, std::min( fbs, prodata_size - pos ) );
+ }
+ if( i != prodata_blocks )
+ internal_error( "wrong fec_block_size or number of prodata_blocks." );
+
+ // compute CRC32 of payload (crc array)
+ set_le( ip + crc_array_o + paysize, crc32_l,
+ crc32.compute_crc( image_ + crc_array_o, paysize ) );
+ }
+
+
+Fec_packet::Fec_packet( const uint8_t * const prodata,
+ const uint8_t * const lastbuf,
+ const unsigned fbn, const unsigned k,
+ const Coded_fbs coded_fbs, const bool gf16 )
+ {
+ const unsigned long fbs = coded_fbs.val();
+ const unsigned long packet_size = header_size + fbs + trailer_size;
+ if( packet_size <= fbs || !fits_in_size_t( packet_size ) )
+ throw std::bad_alloc();
+ uint8_t * const ip = new uint8_t[packet_size]; // writable image ptr
+ image_ = ip;
+
+ std::memcpy( ip, fec_packet_magic, fec_magic_l );
+ set_le( ip + fbn_o, fbn_l, fbn );
+ coded_fbs.copy( ip + fbs_o );
+ set_le( ip + header_crc_o, crc32_l, compute_header_crc( image_ ) );
+
+ // fill fec array
+ gf16 ? rs16_encode( prodata, lastbuf, ip + fec_block_o, fbs, fbn, k ) :
+ rs8_encode( prodata, lastbuf, ip + fec_block_o, fbs, fbn, k );
+
+ // compute CRC32 of payload (fec array)
+ set_le( ip + fec_block_o + fbs, crc32_l,
+ crc32.compute_crc( image_ + fec_block_o, fbs ) );
+ }
+
+
+void cleanup_mutex_lock() // make cleanup_and_fail thread-safe
+ { pthread_mutex_lock( &cmutex ); } // ignore errors to avoid loop
+
+int gf_check( const unsigned k, const bool cl_gf16, const bool fec_random )
+ {
+ std::vector< unsigned > fbn_vector;
+ const bool gf16 = cl_gf16 || k > max_k8;
+ if( fec_random ) random_fbn_vector( k, gf16, fbn_vector );
+ return gf16 ? !gf16_check( fbn_vector, k ) : !gf8_check( fbn_vector, k );
+ }
+
+
+/* if name contains slash(es), copy name into srcdir up to the last slash,
+ removing a leading dot followed by slash(es) */
+void extract_dirname( const std::string & name, std::string & srcdir )
+ {
+ unsigned i = 0;
+ unsigned j = name.size();
+ if( j >= 2 && name[0] == '.' && name[1] == '/' ) // remove leading "./"
+ for( i = 2; i < j && name[i] == '/'; ) ++i;
+ while( j > i && name[j-1] != '/' ) --j; // remove last component if any
+ if( j > i ) srcdir.assign( name, i, j - i );
+ }
+
+
+// replace prefix srcdir with destdir in name and write result to outname
+void replace_dirname( const std::string & name, const std::string & srcdir,
+ const std::string & destdir, std::string & outname )
+ {
+ if( srcdir.size() && name.compare( 0, srcdir.size(), srcdir ) != 0 )
+ { if( verbosity >= 0 ) std::fprintf( stderr,
+ "dirname '%s' != '%s'\n", name.c_str(), srcdir.c_str() );
+ internal_error( "srcdir mismatch." ); }
+ outname = destdir;
+ outname.append( name, srcdir.size(), name.size() - srcdir.size() );
+ }
+
+
+bool has_fec_extension( const std::string & name )
+ {
+ const unsigned ext_len = std::strlen( fec_extension );
+ return name.size() > ext_len &&
+ name.compare( name.size() - ext_len, ext_len, fec_extension ) == 0;
+ }
+
+
+int fec_create( const std::vector< std::string > & filenames,
+ const std::string & default_output_filename,
+ const unsigned long fb_or_pct, const unsigned cl_block_size,
+ const unsigned num_workers, const char debug_level,
+ const char fctype, const char fec_level, const char recursive,
+ const bool cl_gf16, const bool fec_random, const bool force,
+ const bool to_stdout )
+ {
+ const bool to_dir = !to_stdout && default_output_filename.size() &&
+ default_output_filename.end()[-1] == '/';
+ const bool to_file = !to_stdout && !to_dir && default_output_filename.size();
+ if( ( to_stdout || to_file ) && filenames.size() != 1 )
+ { show_error( "You must specify exactly 1 file when redirecting fec data." );
+ return 1; }
+ if( ( to_stdout || to_file ) && recursive )
+ { show_error( "Can't redirect fec data in recursive mode." ); return 1; }
+ if( to_stdout ) { outfd = STDOUT_FILENO; if( !check_tty_out() ) return 1; }
+ else outfd = -1;
+
+ int retval = 0;
+ const bool one_to_one = !to_stdout && !to_file;
+ for( unsigned i = 0; i < filenames.size(); ++i )
+ {
+ if( filenames[i] == "-" )
+ { prot_stdin(); set_retval( retval, 1 ); continue; }
+ std::string srcdir; // dirname to be replaced by '-o dir/'
+ if( to_dir ) extract_dirname( filenames[i], srcdir );
+ std::list< std::string > filelist( 1U, filenames[i] );
+ std::string input_filename;
+ while( next_filename( filelist, input_filename, retval, recursive ) )
+ {
+ struct stat in_stats;
+ const int infd = open_instream2( input_filename, &in_stats );
+ if( infd < 0 ) { set_retval( retval, 1 ); continue; }
+
+ const char * const input_filenamep = input_filename.c_str();
+ const long long file_size = lseek( infd, 0, SEEK_END );
+ if( file_size <= 0 )
+ { show_file_error( input_filenamep, "Input file is empty." );
+ set_retval( retval, 2 ); close( infd ); continue; }
+ if( !fits_in_size_t( file_size ) )
+ { show_file_error( input_filenamep, large_file_msg );
+ set_retval( retval, 1 ); close( infd ); continue; }
+ const unsigned long prodata_size = file_size;
+ const uint8_t * const prodata =
+ (const uint8_t *)mmap( 0, prodata_size, PROT_READ, MAP_PRIVATE, infd, 0 );
+ close( infd );
+ if( prodata == MAP_FAILED )
+ { show_file_error( input_filenamep, mmap_msg, errno );
+ set_retval( retval, 1 ); continue; }
+
+ if( one_to_one )
+ {
+ if( to_dir ) replace_dirname( input_filename, srcdir,
+ default_output_filename, output_filename );
+ else output_filename = input_filename;
+ output_filename += fec_extension; set_signal_handler();
+ if( !open_outstream( force, true, false, true, to_dir ) )
+ { munmap( (void *)prodata, prodata_size );
+ set_retval( retval, 1 ); continue; }
+ if( !check_tty_out() )
+ { set_retval( retval, 1 ); return retval; } // don't delete a tty
+ }
+ else if( to_file && outfd < 0 ) // open outfd after checking infd
+ {
+ output_filename = default_output_filename; set_signal_handler();
+ if( !open_outstream( force, false ) || !check_tty_out() )
+ return 1; // check tty only once and don't try to delete a tty
+ }
+
+ // write fec data to output file
+ if( !write_fec( input_filenamep, prodata, prodata_size, fb_or_pct,
+ cl_block_size, num_workers, debug_level, fctype,
+ fec_level, cl_gf16, fec_random ) )
+ { munmap( (void *)prodata, prodata_size ); cleanup_and_fail( 1 ); }
+ /* To avoid '-Fc | -Ft' running out of address space, munmap before
+ closing outfd and mmap after reading fec data from stdin */
+ munmap( (void *)prodata, prodata_size );
+ if( !close_outstream( &in_stats ) ) cleanup_and_fail( 1 );
+ }
+ }
+ return retval;
+ }
diff --git a/fec_repair.cc b/fec_repair.cc
new file mode 100644
index 0000000..9710247
--- /dev/null
+++ b/fec_repair.cc
@@ -0,0 +1,1109 @@
+/* Lziprecover - Data recovery tool for the lzip format
+ Copyright (C) 2023-2025 Antonio Diaz Diaz.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#define _FILE_OFFSET_BITS 64
+
+#include <algorithm>
+#include <cerrno>
+#include <climits>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <new>
+#include <list>
+#include <string>
+#include <vector>
+#include <stdint.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+
+#include "lzip.h"
+#include "md5.h"
+#include "fec.h"
+
+
+namespace {
+
+const char * const size_mismatch_msg =
+ "Size mismatch between protected data and fec data.";
+
+void show_diag_msg( const std::string & input_filename, const char * const msg,
+ const bool debug = false )
+ {
+ if( verbosity >= ( debug ? 0 : 1 ) ) std::fprintf( stderr, "%s\n", msg );
+ else show_file_error( input_filename.c_str(), msg );
+ }
+
+
+bool has_lz_extension( const std::string & name )
+ {
+ return ( name.size() > 3 &&
+ name.compare( name.size() - 3, 3, ".lz" ) == 0 ) ||
+ ( name.size() > 4 &&
+ name.compare( name.size() - 4, 4, ".tlz" ) == 0 );
+ }
+
+bool has_fec_extension2( const std::string & name )
+ {
+ if( !has_fec_extension( name ) ) return false;
+ if( verbosity >= 0 )
+ std::fprintf( stderr, "%s: %s: Input file has '%s' suffix, ignored.\n",
+ program_name, name.c_str(), fec_extension );
+ return true;
+ }
+
+
+/* Return the address of a malloc'd buffer containing the file data and
+ the file size in '*file_sizep'.
+ In case of error, return 0 and do not modify '*file_sizep'.
+*/
+uint8_t * read_file( const std::string & filename, long * const file_sizep )
+ {
+ struct stat in_stats; // not used
+ const char * const filenamep = printable_name( filename );
+ const int infd = (filename == "-") ?
+ STDIN_FILENO : open_instream( filenamep, &in_stats, false );
+ if( infd < 0 ) return 0;
+ long buffer_size = 65536;
+ uint8_t * buffer = (uint8_t *)std::malloc( buffer_size );
+ if( !buffer ) { show_file_error( filenamep, mem_msg ); return 0; }
+ long file_size = readblock( infd, buffer, buffer_size );
+ while( file_size >= buffer_size && !errno )
+ {
+ if( buffer_size >= LONG_MAX )
+ { show_file_error( filenamep, large_file_msg );
+ std::free( buffer ); return 0; }
+ buffer_size = (buffer_size <= LONG_MAX / 2) ? 2 * buffer_size : LONG_MAX;
+ uint8_t * const tmp = (uint8_t *)std::realloc( buffer, buffer_size );
+ if( !tmp )
+ { show_file_error( filenamep, mem_msg ); std::free( buffer ); return 0; }
+ buffer = tmp;
+ file_size += readblock( infd, buffer + file_size, buffer_size - file_size );
+ }
+ if( errno )
+ { show_file_error( filenamep, read_error_msg, errno );
+ std::free( buffer ); return 0; }
+ if( close( infd ) != 0 )
+ { show_file_error( filenamep, "Error closing input file", errno );
+ std::free( buffer ); return 0; }
+ *file_sizep = file_size;
+ return buffer;
+ }
+
+
+const char * bad_fec_version( const unsigned version )
+ {
+ static char buf[80];
+ snprintf( buf, sizeof buf, "Version %u fec format not supported.", version );
+ return buf;
+ }
+
+// Return false if truncation removed all blocks.
+bool truncate_block_vector( std::vector< Block > & block_vector,
+ const long long end )
+ {
+ unsigned i = block_vector.size();
+ while( i > 0 && block_vector[i-1].pos() >= end ) --i;
+ if( i == 0 ) { block_vector.clear(); return false; }
+ Block & b = block_vector[i-1];
+ if( b.includes( end ) ) b.size( end - b.pos() );
+ if( i < block_vector.size() )
+ block_vector.erase( block_vector.begin() + i, block_vector.end() );
+ return true;
+ }
+
+
+class Fec_index
+ {
+ const le32 * crc_array_; // images allocated in fecdata
+ const le32 * crcc_array_;
+ std::vector< Fec_packet > fec_vector; // fec blocks
+ std::string error_;
+ unsigned long fec_net_size_; // size of packets (not file size)
+ unsigned long fec_block_size_; // from chksum/fec packets
+ unsigned long prodata_size_; // from chksum packets
+ md5_type prodata_md5_; // from chksum packets
+ int retval_; // 0 = OK, 1 = error, 2 = fatal error
+ bool gf16_;
+ const bool is_lz_; // used by find_bad_blocks
+
+ bool parse_packet( const Chksum_packet & chksum_packet,
+ const bool ignore_errors );
+
+public:
+ Fec_index( const uint8_t * const fecdata, const unsigned long fecdata_size,
+ const bool ignore_errors = false, const bool is_lz = false );
+
+ const std::string & error() const { return error_; }
+ int retval() const { return retval_; }
+ void show_fec_data( const std::string & input_filename,
+ const std::string & fec_filename, FILE * const f ) const;
+
+ unsigned long fec_block_size() const { return fec_block_size_; }
+ unsigned fec_blocks() const { return fec_vector.size(); }
+ unsigned long fec_bytes() const { return fec_blocks() * fec_block_size_; }
+ const uint8_t * fec_block( const unsigned i ) const
+ { return fec_vector[i].fec_block(); }
+ unsigned fbn( const unsigned i ) const
+ { return fec_vector[i].fec_block_number(); }
+ bool gf16() const { return gf16_; }
+
+ unsigned long prodata_size() const { return prodata_size_; }
+ const md5_type & prodata_md5() const { return prodata_md5_; }
+ unsigned prodata_blocks() const
+ { return ceil_divide( prodata_size_, fec_block_size_ ); }
+ bool is_lz() const { return is_lz_; }
+
+ bool has_array() const { return crc_array() != 0 || crcc_array() != 0; }
+ const le32 * crc_array() const { return crc_array_; }
+ const le32 * crcc_array() const { return crcc_array_; }
+
+ unsigned long block_pos( const unsigned i ) const
+ { return i * fec_block_size_; }
+
+ unsigned long block_size( const unsigned i ) const
+ {
+ const unsigned long pos = i * fec_block_size_;
+ if( pos >= prodata_size_ ) return 0;
+ return std::min( fec_block_size_, prodata_size_ - pos );
+ }
+
+ bool prodata_match( const std::string & input_filename,
+ const md5_type & computed_prodata_md5,
+ const bool debug = true ) const
+ {
+ if( prodata_md5_ == computed_prodata_md5 ) return true;
+ show_diag_msg( input_filename,
+ "MD5 mismatch between protected data and fec data.", debug );
+ return false;
+ }
+ };
+
+
+bool Fec_index::parse_packet( const Chksum_packet & chksum_packet,
+ const bool ignore_errors )
+ {
+ const unsigned long long prodata_size = chksum_packet.prodata_size();
+ if( prodata_size_ <= 0 ) // first chksum packet
+ {
+ if( !fits_in_size_t( prodata_size ) )
+ { error_ = large_file_msg; retval_ = 1; return false; }
+ prodata_size_ = prodata_size;
+ prodata_md5_ = chksum_packet.prodata_md5();
+ gf16_ = chksum_packet.gf16();
+ }
+ else
+ {
+ if( prodata_size_ != prodata_size )
+ { error_ = "Contradictory protected data size in chksum packet.";
+ retval_ = 2; return false; }
+ if( prodata_md5_ != chksum_packet.prodata_md5() )
+ { error_ = "Contradictory protected data MD5 in chksum packet.";
+ retval_ = 2; return false; }
+ if( gf16_ != chksum_packet.gf16() )
+ { error_ = "Contradictory Galois Field size in chksum packet.";
+ retval_ = 2; return false; }
+ }
+ if( !isvalid_fbs( fec_block_size_ ) )
+ fec_block_size_ = chksum_packet.fec_block_size();
+ else if( fec_block_size_ != chksum_packet.fec_block_size() )
+ { error_ = "Contradictory fec_block_size in chksum packet.";
+ retval_ = 2; return false; }
+ if( !chksum_packet.check_payload_crc() ) // corrupt array
+ { if( ignore_errors ) return true;
+ error_ = "Corrupt CRC array in chksum packet."; retval_ = 2; return false; }
+ if( !chksum_packet.is_crc_c() )
+ {
+ if( !crc_array_ ) crc_array_ = chksum_packet.crc_array();
+ else { error_ = "More than one CRC32 array found.";
+ retval_ = 2; return false; }
+ }
+ else if( !crcc_array_ ) crcc_array_ = chksum_packet.crc_array();
+ else { error_ = "More than one CRC32-C array found.";
+ retval_ = 2; return false; }
+ return true;
+ }
+
+
+Fec_index::Fec_index( const uint8_t * const fecdata,
+ const unsigned long fecdata_size,
+ const bool ignore_errors, const bool is_lz )
+ : crc_array_( 0 ), crcc_array_( 0 ), fec_net_size_( 0 ),
+ fec_block_size_( 0 ), prodata_size_( 0 ), retval_( 0 ), gf16_( false ),
+ is_lz_( is_lz )
+ {
+ if( fecdata_size <= 0 )
+ { error_ = "Fec file is empty."; retval_ = 2; return; }
+ if( fecdata_size >= fec_magic_l && !check_fec_magic( fecdata ) )
+ { error_ = "Bad magic number (file is not fec data)."; retval_ = 2; return; }
+ if( fecdata_size < Chksum_packet::min_packet_size() +
+ Fec_packet::min_packet_size() )
+ { error_ = "Fec file is too short."; retval_ = 2; return; }
+ if( !Chksum_packet::check_version( fecdata ) )
+ { error_ = bad_fec_version( Chksum_packet::version( fecdata ) );
+ retval_ = 2; return; }
+
+ /* Parse packets. pos usually points to a packet header, except when
+ skipping a corrupt packet. */
+ for( unsigned long pos = 0; pos < fecdata_size; )
+ {
+ unsigned long image_size =
+ Chksum_packet::check_image( fecdata + pos, fecdata_size - pos );
+ if( image_size > 2 )
+ {
+ if( !parse_packet( Chksum_packet( fecdata + pos ), ignore_errors ) )
+ return;
+ fec_net_size_ += image_size; pos += image_size; continue;
+ }
+ if( image_size != 0 && ignore_errors ) { ++pos; continue; }
+ if( image_size == 1 )
+ { error_ = "Wrong packet size in chksum packet."; retval_ = 2; return; }
+ if( image_size == 2 )
+ { error_ = "Wrong CRC in chksum packet."; retval_ = 2; return; }
+
+ image_size = Fec_packet::check_image( fecdata + pos, fecdata_size - pos );
+ if( image_size > 2 )
+ {
+ const Fec_packet fec_packet( fecdata + pos );
+ if( !isvalid_fbs( fec_block_size_ ) )
+ fec_block_size_ = fec_packet.fec_block_size();
+ else if( fec_block_size_ != fec_packet.fec_block_size() )
+ { error_ = "Contradictory fec_block_size in fec packet.";
+ retval_ = 2; return; }
+ fec_vector.push_back( fec_packet );
+ fec_net_size_ += image_size; pos += image_size; continue;
+ }
+ if( image_size != 0 && ignore_errors ) { ++pos; continue; }
+ if( image_size == 1 )
+ { error_ = "Wrong packet size in fec packet."; retval_ = 2; return; }
+ if( image_size == 2 )
+ { error_ = "Wrong CRC in fec packet."; retval_ = 2; return; }
+
+ if( ignore_errors )
+ { while( ++pos < fecdata_size && fecdata[pos] != fec_magic[0] ) {}
+ continue; }
+ error_ = "Unknown packet type = "; // unknown or corrupt packet
+ const int size = std::min( (unsigned long)fec_magic_l, fecdata_size - pos );
+ format_trailing_bytes( fecdata + pos, size, error_ );
+ retval_ = 2; return;
+ }
+ if( prodata_size_ <= 0 )
+ { error_ = "No valid chksum packets found."; retval_ = 2; return; }
+ if( fec_blocks() <= 0 )
+ { error_ = "No valid fec packets found."; retval_ = 2; return; }
+ if( !has_array() && !ignore_errors )
+ { error_ = "No valid CRC arrays found."; retval_ = 2; return; }
+ if( fec_blocks() > prodata_blocks() )
+ { error_ = "Too many fec packets found. (More than data blocks)";
+ retval_ = 2; return; }
+ if( !isvalid_fbs( fec_block_size_ ) )
+ internal_error( "fec_block_size not found." );
+ // check that fbn < max_k in each fec packet
+ const unsigned max_k = gf16_ ? max_k16 : max_k8;
+ std::vector< bool > bv( max_k );
+ for( unsigned i = 0; i < fec_blocks(); ++i )
+ {
+ const unsigned fbn = fec_vector[i].fec_block_number();
+ if( fbn >= max_k )
+ { error_ = "Invalid fec_block_number in fec packet.";
+ retval_ = 2; return; }
+ if( bv[fbn] )
+ { error_ = "Same fec_block_number in two fec packets.";
+ retval_ = 2; return; }
+ bv[fbn] = true;
+ }
+ }
+
+
+void Fec_index::show_fec_data( const std::string & input_filename,
+ const std::string & fec_filename, FILE * const f ) const
+ {
+ const unsigned long fec_bytes_ = fec_bytes();
+ const double spercent = ( 100.0 * fec_net_size_ ) / prodata_size_;
+ const double fpercent = ( 100.0 * fec_bytes_ ) / prodata_size_;
+ if( input_filename.size() )
+ std::fprintf( f, "Protected file: '%s'\n", input_filename.c_str() );
+ std::fprintf( f, "Protected size: %11s Block size: %5s Data blocks: %s\n"
+ " Fec file: '%s'\n"
+ " Fec size: %11s %6.2f%% Fec blocks: %u\n"
+ " Fec bytes: %11s %6.2f%% Fec numbers:",
+ format_num3( prodata_size_ ), format_num3( fec_block_size_ ),
+ format_num3( prodata_blocks() ), printable_name( fec_filename ),
+ format_num3( fec_net_size_ ), spercent, fec_blocks(),
+ format_num3( fec_bytes_ ), fpercent );
+ for( unsigned i = 0; i < fec_blocks(); ++i ) // print ranges of fbn's
+ {
+ std::fprintf( f, " %u", fbn( i ) );
+ const unsigned j = i;
+ while( i + 1 < fec_blocks() && fbn( i + 1 ) == fbn( i ) + 1 ) ++i;
+ if( i > j ) std::fprintf( f, "%c%u", ( i == j + 1 ) ? ' ' : '-', fbn( i ) );
+ }
+ std::fprintf( f, "\n Features: GF(2^%s)%s%s\n", gf16_ ? "16" : "8",
+ crc_array_ ? " CRC32" : "", crcc_array_ ? " CRC32-C" : "" );
+ std::fflush( f );
+ }
+
+
+class Bad_block_index
+ {
+ const Fec_index & fec_index;
+ const CRC32 crc32c;
+ // list of prodata blocks with a mismatched CRC32 or CRC32-C
+ std::vector< unsigned > bb_vector_; // index of each bad block
+
+ bool check_data_block( const uint8_t * const prodata, const unsigned i ) const;
+ bool zeroed_data_block( const uint8_t * const prodata, const unsigned i ) const;
+
+public:
+ Bad_block_index( const Fec_index & fec_index_, const uint8_t * const prodata )
+ : fec_index( fec_index_ ), crc32c( true ) { find_bad_blocks( prodata ); }
+
+ unsigned bad_blocks() const { return bb_vector_.size(); }
+ const std::vector< unsigned > & bb_vector() const { return bb_vector_; }
+
+ void find_bad_blocks( const uint8_t * const prodata );
+
+ unsigned long first_bad_pos() const
+ {
+ if( bb_vector_.empty() ) return 0;
+ return fec_index.block_pos( bb_vector_.front() );
+ }
+
+ unsigned long last_bad_pos() const
+ {
+ if( bb_vector_.empty() ) return 0;
+ return fec_index.block_pos( bb_vector_.back() ) +
+ fec_index.block_size( bb_vector_.back() ) - 1;
+ }
+
+ unsigned long bad_span() const
+ {
+ if( bb_vector_.empty() ) return 0;
+ return last_bad_pos() + 1 - first_bad_pos();
+ }
+
+ unsigned long bad_data_bytes() const
+ {
+ if( bb_vector_.empty() ) return 0;
+ return ( bb_vector_.size() - 1 ) * fec_index.fec_block_size() +
+ fec_index.block_size( bb_vector_.back() );
+ }
+ };
+
+bool Bad_block_index::check_data_block( const uint8_t * const prodata,
+ const unsigned i ) const
+ {
+ // check protected file using the chksum packets
+ const unsigned long pos = fec_index.block_pos( i );
+ const unsigned long size = fec_index.block_size( i );
+ if( fec_index.crc_array() && fec_index.crc_array()[i].val() !=
+ crc32.compute_crc( prodata + pos, size ) ) return false;
+ if( fec_index.crcc_array() && fec_index.crcc_array()[i].val() !=
+ crc32c.compute_crc( prodata + pos, size ) ) return false;
+ return fec_index.has_array();
+ }
+
+bool Bad_block_index::zeroed_data_block( const uint8_t * const prodata,
+ const unsigned i ) const
+ {
+ // detect holes in lzip protected file
+ enum { minlen = 8 }; // min number of consecutive identical bytes
+ const unsigned long pos = fec_index.block_pos( i );
+ const unsigned long end = pos + fec_index.block_size( i );
+ unsigned count = 0;
+ for( unsigned long j = pos + 1; j < end; ++j )
+ {
+ if( prodata[j] != prodata[j-1] ) count = 0;
+ else if( ++count >= minlen - 1 ) return true;
+ }
+ return false;
+ }
+
+void Bad_block_index::find_bad_blocks( const uint8_t * const prodata )
+ {
+ bb_vector_.clear();
+ const unsigned blocks = fec_index.prodata_blocks();
+ if( fec_index.has_array() )
+ { for( unsigned i = 0; i < blocks; ++i )
+ if( !check_data_block( prodata, i ) )
+ bb_vector_.push_back( i ); }
+ else if( fec_index.is_lz() )
+ { for( unsigned i = 0; i < blocks; ++i )
+ if( zeroed_data_block( prodata, i ) )
+ bb_vector_.push_back( i ); }
+ }
+
+
+long next_pct_pos( const long last_pos, const int pct )
+ {
+ if( pct <= 0 ) return 0;
+ return std::min( last_pos, (long)( last_pos / ( 100.0 / pct ) ) );
+ }
+
+
+// if successful, return the repaired data in prodata
+bool repair_prodata( const Fec_index & fec_index,
+ const Bad_block_index & bb_index, uint8_t * const prodata )
+ {
+ const unsigned bad_blocks = bb_index.bad_blocks();
+ if( bad_blocks == 0 ) return true; // nothing to repair
+ const unsigned fec_blocks = fec_index.fec_blocks();
+ if( bad_blocks > fec_blocks )
+ {
+ if( verbosity >= 0 )
+ std::fprintf( stderr, "Too many damaged blocks (%u).\n Can't repair "
+ "file if it contains more than %u damaged blocks.\n",
+ bad_blocks, fec_blocks );
+ return false;
+ }
+
+ const std::vector< unsigned > & bb_vector = bb_index.bb_vector();
+ std::vector< unsigned > fbn_vector;
+ const unsigned long fbs = fec_index.fec_block_size();
+ // copy fec blocks into fecbuf where reduction will be performed
+ uint8_t * const fecbuf = new uint8_t[bad_blocks * fbs];
+ for( unsigned bi = 0; bi < bad_blocks; ++bi )
+ {
+ fbn_vector.push_back( fec_index.fbn( bi ) );
+ std::memcpy( fecbuf + bi * fbs, fec_index.fec_block( bi ), fbs );
+ }
+ const unsigned prodata_blocks = fec_index.prodata_blocks();
+ const unsigned long prodata_size = fec_index.prodata_size();
+ const bool last_is_missing = bb_vector.back() == prodata_blocks - 1;
+ // last incomplete data block padded to fbs
+ uint8_t * const lastbuf =
+ set_lastbuf( prodata, prodata_size, fbs, last_is_missing );
+ fec_index.gf16() ?
+ rs16_decode( prodata, lastbuf, bb_vector, fbn_vector, fecbuf, fbs,
+ prodata_blocks ) :
+ rs8_decode( prodata, lastbuf, bb_vector, fbn_vector, fecbuf, fbs,
+ prodata_blocks );
+ delete[] fecbuf;
+ if( lastbuf && last_is_missing ) // copy last block to its position
+ {
+ const unsigned di = bb_vector.back();
+ const unsigned long pos = fec_index.block_pos( di );
+ const unsigned long size = fec_index.block_size( di );
+ std::memcpy( prodata + pos, lastbuf, size );
+ }
+ if( lastbuf ) delete[] lastbuf;
+ if( check_md5( prodata, prodata_size, fec_index.prodata_md5() ) ) return true;
+ if( verbosity >= 0 ) std::fputs( "Repair of input file failed.\n", stderr );
+ return false;
+ }
+
+
+bool check_prodata( const Fec_index & fec_index,
+ const Bad_block_index & bb_index,
+ const std::string & input_filename,
+ const std::string & fec_filename,
+ const md5_type & computed_prodata_md5,
+ const bool debug = true, const bool repair = false,
+ const bool same_size = true )
+ {
+ FILE * const f = debug ? stdout : stderr;
+ if( verbosity >= ( debug ? 0 : 1 ) )
+ fec_index.show_fec_data( input_filename, fec_filename, f );
+ if( !same_size && verbosity >= 0 )
+ std::fprintf( stderr, "%s\n", size_mismatch_msg );
+ const unsigned bad_blocks = bb_index.bad_blocks();
+ const bool mismatch = !same_size || !fec_index.prodata_match( input_filename,
+ computed_prodata_md5, debug ) || bad_blocks;
+ if( bad_blocks )
+ {
+ if( verbosity >= ( debug ? 0 : 1 ) )
+ { std::fprintf( f, "Block mismatches: %u (%s bytes) spanning %s bytes "
+ "[%s,%s]\n", bad_blocks,
+ format_num3( bb_index.bad_data_bytes() ),
+ format_num3( bb_index.bad_span() ),
+ format_num3( bb_index.first_bad_pos() ),
+ format_num3( bb_index.last_bad_pos() ) );
+ std::fflush( f ); }
+ return false;
+ }
+ if( mismatch ) return false;
+ if( verbosity >= 1 )
+ std::fputs( !repair ? "Protected data checked successfully.\n" :
+ "Protected data checked successfully. Repair not needed.\n", f );
+ return true;
+ }
+
+
+void print_blocks( const std::vector< unsigned long > & pos_vector,
+ const char * const msg, const unsigned long cblock_size )
+ {
+ std::fputs( ( pos_vector.size() == 1 ) ? "block" : "blocks", stdout );
+ for( unsigned i = 0; i < pos_vector.size(); ++i )
+ std::printf( " %2lu", pos_vector[i] / cblock_size );
+ std::fputs( msg, stdout );
+ }
+
+
+// replace dirname with destdir in name and write result to outname
+void replace_dirname( const std::string & name, const std::string & destdir,
+ std::string & outname )
+ {
+ unsigned i = name.size(); // size of dirname to be replaced by destdir
+ while( i > 0 && name[i-1] != '/' ) --i; // point i to basename
+ outname = destdir;
+ outname.append( name, i, name.size() - i ); // append basename
+ }
+
+
+const Fec_index * fec_d_init( const std::string & input_filename,
+ const std::string & cl_fec_filename, std::string & fec_filename,
+ const uint8_t ** fecdatap, long & fecdata_size, uint8_t ** prodatap )
+ {
+ if( input_filename == "-" ) { prot_stdin(); return 0; }
+ if( has_fec_extension2( input_filename ) ) return 0;
+ const bool from_dir = cl_fec_filename.size() &&
+ cl_fec_filename.end()[-1] == '/';
+
+ if( cl_fec_filename.size() && !from_dir ) // file or stdin
+ fec_filename = cl_fec_filename;
+ else // read fec data from file.fec
+ {
+ if( from_dir )
+ replace_dirname( input_filename, cl_fec_filename, fec_filename );
+ else fec_filename = input_filename;
+ fec_filename += fec_extension;
+ }
+ *fecdatap = read_file( fec_filename, &fecdata_size );
+ if( !*fecdatap ) return 0;
+ const Fec_index * const fec_indexp = new Fec_index( *fecdatap, fecdata_size );
+ if( !fec_indexp ) { std::free( (void *)*fecdatap ); return 0; }
+ if( fec_indexp->retval() != 0 )
+ { show_file_error( printable_name( fec_filename ),
+ fec_indexp->error().c_str() );
+ delete fec_indexp; std::free( (void *)*fecdatap ); return 0; }
+
+ struct stat in_stats; // not used
+ const char * const input_filenamep = input_filename.c_str();
+ const int infd = open_instream( input_filenamep, &in_stats, false, true );
+ if( infd < 0 ) { delete fec_indexp; std::free( (void *)*fecdatap ); return 0; }
+ const long prodata_size = fec_indexp->prodata_size();
+ const long long file_size = lseek( infd, 0, SEEK_END );
+ if( prodata_size != file_size )
+ { show_file_error( input_filenamep, size_mismatch_msg ); close( infd );
+ delete fec_indexp; std::free( (void *)*fecdatap ); return 0; }
+ *prodatap = (uint8_t *)mmap( 0, prodata_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE, infd, 0 );
+ close( infd );
+ if( *prodatap == MAP_FAILED )
+ { show_file_error( input_filenamep, mmap_msg, errno );
+ delete fec_indexp; std::free( (void *)*fecdatap ); return 0; }
+ return fec_indexp;
+ }
+
+} // end namespace
+
+
+/* Check that no variable read from packet overflows unsigned long.
+ 0 = bad magic, 1 = bad size, 2 = bad crc, else return packet size. */
+unsigned Chksum_packet::check_image( const uint8_t * const image_buffer,
+ const unsigned long max_size )
+ {
+ if( max_size < min_packet_size() || !check_fec_magic( image_buffer ) )
+ return 0;
+ if( get_le( image_buffer + header_crc_o, crc32_l ) !=
+ compute_header_crc( image_buffer ) ) return 2;
+ if( !check_version( image_buffer ) || !check_flags( image_buffer ) ) return 2;
+ const Chksum_packet chksum_packet( image_buffer );
+ const unsigned long long fbs = chksum_packet.fec_block_size();
+ if( !isvalid_fbs( fbs ) ) return 1;
+ const unsigned long long image_size = chksum_packet.packet_size();
+ const unsigned elsize = sizeof chksum_packet.crc_array()[0];
+ const unsigned max_k = chksum_packet.gf16() ? max_k16 : max_k8;
+ if( image_size < min_packet_size() || image_size > max_size ||
+ image_size > header_size + max_k * elsize + trailer_size ) return 1;
+ const unsigned paysize = image_size - header_size - trailer_size;
+ const unsigned long long prodata_size = chksum_packet.prodata_size();
+ const unsigned long long prodata_blocks = ceil_divide( prodata_size, fbs );
+ if( paysize % elsize != 0 || paysize / elsize != prodata_blocks ||
+ prodata_blocks <= 0 || prodata_blocks > max_k ) return 1;
+ if( !fits_in_size_t( prodata_size ) || !fits_in_size_t( fbs ) )
+ throw std::bad_alloc();
+ return image_size;
+ }
+
+
+/* Check that no variable read from packet overflows unsigned long.
+ 0 = bad magic, 1 = bad size, 2 = bad crc, else return packet size. */
+unsigned long Fec_packet::check_image( const uint8_t * const image_buffer,
+ const unsigned long max_size )
+ {
+ if( max_size < min_packet_size() ||
+ std::memcmp( image_buffer, fec_packet_magic, fec_magic_l ) != 0 )
+ return 0;
+ if( get_le( image_buffer + header_crc_o, crc32_l ) !=
+ compute_header_crc( image_buffer ) ) return 2;
+ const Fec_packet fec_packet( image_buffer );
+ const unsigned long long image_size = fec_packet.packet_size();
+ if( image_size < min_packet_size() || image_size > max_size ) return 1;
+ const unsigned long paysize = image_size - header_size - trailer_size;
+ const unsigned long payload_crc_o = fec_block_o + paysize;
+ const unsigned payload_crc = get_le( image_buffer + payload_crc_o, crc32_l );
+ if( crc32.compute_crc( image_buffer + fec_block_o, paysize ) != payload_crc )
+ return 2;
+ const unsigned long long fbs = fec_packet.fec_block_size();
+ if( !isvalid_fbs( fbs ) || paysize != fbs ) return 1;
+ if( !fits_in_size_t( fbs ) ) throw std::bad_alloc();
+ return image_size;
+ }
+
+
+int fec_test( const std::vector< std::string > & filenames,
+ const std::string & cl_fec_filename,
+ const std::string & default_output_filename,
+ const char recursive, const bool force, const bool ignore_errors,
+ const bool repair, const bool to_stdout )
+ {
+ const bool to_file = !to_stdout && default_output_filename.size();
+ if( repair && ( to_stdout || to_file ) && filenames.size() != 1 )
+ { show_error( "You must specify exactly 1 protected file "
+ "when redirecting repaired data." ); return 1; }
+ if( repair && ( to_stdout || to_file ) && recursive )
+ { show_error( "Can't redirect repaired data in recursive mode." ); return 1; }
+ if( to_stdout ) { outfd = STDOUT_FILENO; if( !check_tty_out() ) return 1; }
+ else outfd = -1;
+ const bool to_fixed = !to_stdout && !to_file;
+ std::string fec_filename;
+ const uint8_t * fecdata = 0; // buffer containing fec data
+ long fecdata_size = 0; // size of fec data
+ const bool from_dir = cl_fec_filename.size() &&
+ cl_fec_filename.end()[-1] == '/';
+
+ if( cl_fec_filename.size() && !from_dir ) // file or stdin
+ {
+ if( filenames.size() != 1 )
+ { show_error( "You must specify exactly 1 protected file "
+ "when reading 1 fec data file." ); return 1; }
+ fec_filename = cl_fec_filename;
+ fecdata = read_file( fec_filename, &fecdata_size );
+ if( !fecdata ) return 1;
+ }
+
+ int retval = 0;
+ const bool one_to_one = !fecdata;
+ for( unsigned i = 0; i < filenames.size(); ++i )
+ {
+ if( filenames[i] == "-" )
+ { prot_stdin(); set_retval( retval, 1 ); continue; }
+ std::string srcdir; // dirname to be replaced by cl_fec_filename
+ if( from_dir ) extract_dirname( filenames[i], srcdir );
+ std::list< std::string > filelist( 1U, filenames[i] );
+ std::string input_filename;
+ while( next_filename( filelist, input_filename, retval, recursive ) )
+ {
+ if( has_fec_extension2( input_filename ) )
+ { set_retval( retval, 1 ); continue; }
+ if( !fecdata ) // read fec data from file.fec
+ {
+ if( from_dir ) replace_dirname( input_filename, srcdir,
+ cl_fec_filename, fec_filename );
+ else fec_filename = input_filename;
+ fec_filename += fec_extension;
+ fecdata = read_file( fec_filename, &fecdata_size );
+ if( !fecdata ) { set_retval( retval, 1 ); continue; }
+ }
+ const bool is_lz = has_lz_extension( input_filename );
+ const Fec_index fec_index( fecdata, fecdata_size, ignore_errors, is_lz );
+ if( fec_index.retval() != 0 )
+ { show_file_error( printable_name( fec_filename ),
+ fec_index.error().c_str() );
+ std::free( (void *)fecdata ); fecdata = 0;
+ set_retval( retval, 2 ); continue; }
+
+ // mmap is faster than reading the file, but is not resizeable
+ struct stat in_stats;
+ const char * const input_filenamep = input_filename.c_str();
+ const int infd = open_instream( input_filenamep, &in_stats, false, true );
+ if( infd < 0 ) { std::free( (void *)fecdata ); fecdata = 0;
+ set_retval( retval, 1 ); continue; }
+ const long prodata_size = fec_index.prodata_size();
+ const long long file_size = lseek( infd, 0, SEEK_END );
+ const bool mmapped = prodata_size <= file_size;
+ const bool same_size = prodata_size == file_size;
+ if( !mmapped && !safe_seek( infd, 0, input_filenamep ) )
+ { std::free( (void *)fecdata ); fecdata = 0;
+ set_retval( retval, 1 ); close( infd ); continue; }
+ uint8_t * const prodata = (uint8_t *)( mmapped ?
+ mmap( 0, prodata_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, infd, 0 ) :
+ std::malloc( prodata_size ) );
+ if( mmapped && prodata == MAP_FAILED )
+ { show_file_error( input_filenamep, mmap_msg, errno );
+ set_retval( retval, 1 ); close( infd ); goto err; }
+ if( !mmapped ) // short file
+ {
+ if( !prodata )
+ { show_file_error( input_filenamep, mem_msg );
+ set_retval( retval, 1 ); close( infd ); goto err; }
+ const long read_size = readblock( infd, prodata, prodata_size );
+ if( read_size < prodata_size )
+ { if( errno )
+ { show_file_error( input_filenamep, read_error_msg, errno );
+ set_retval( retval, 1 ); close( infd ); goto err; }
+ std::memset( prodata + read_size, 0, prodata_size - read_size ); }
+ }
+ close( infd );
+ {
+ md5_type computed_prodata_md5;
+ compute_md5( prodata, prodata_size, computed_prodata_md5 );
+ Bad_block_index bb_index( fec_index, prodata );
+ const bool mismatch = !check_prodata( fec_index, bb_index, input_filename,
+ fec_filename, computed_prodata_md5, false, repair, same_size );
+ if( mismatch && !repair ) set_retval( retval, 2 );
+ else if( mismatch && repair )
+ {
+ if( !is_lz && !fec_index.has_array() )
+ { show_diag_msg( input_filename, "Can't repair. No valid CRC "
+ "arrays found and protected file not in lzip format." );
+ cleanup_and_fail( 2 ); }
+ if( verbosity >= 1 )
+ std::fprintf( stderr, "Repairing file '%s'\n", input_filenamep );
+ if( verbosity >= 0 && !fec_index.has_array() )
+ std::fputs( "warning: Repairing without CRC arrays.\n", stderr );
+ if( !repair_prodata( fec_index, bb_index, prodata ) )
+ cleanup_and_fail( 2 );
+ if( to_fixed )
+ {
+ output_filename = insert_fixed( input_filename, false );
+ set_signal_handler();
+ if( !open_outstream( force, true ) || !check_tty_out() )
+ { set_retval( retval, 1 ); return retval; } // don't delete a tty
+ }
+ else if( to_file && outfd < 0 ) // open outfd after checking infd
+ {
+ output_filename = default_output_filename;
+ set_signal_handler();
+ // check tty only once and don't try to delete a tty
+ if( !open_outstream( force, false ) || !check_tty_out() ) return 1;
+ }
+ // write repaired prodata
+ if( writeblock( outfd, prodata, prodata_size ) != prodata_size )
+ { show_file_error( printable_name( output_filename, false ),
+ wr_err_msg, errno ); set_retval( retval, 1 ); }
+ else if( !close_outstream( &in_stats ) ) set_retval( retval, 1 );
+ if( retval ) cleanup_and_fail( retval );
+ if( verbosity >= 1 )
+ std::fprintf( stderr, "Repaired copy of '%s' written to '%s'\n",
+ input_filenamep, printable_name( output_filename, false ) );
+ }
+ if( ( filelist.size() || i + 1 < filenames.size() ) && verbosity >= 1 )
+ std::fputc( '\n', stderr );
+ }
+err: if( mmapped ) munmap( prodata, prodata_size ); else std::free( prodata );
+ if( one_to_one ) { std::free( (void *)fecdata ); fecdata = 0; }
+ }
+ }
+ if( fecdata ) std::free( (void *)fecdata );
+ return retval;
+ }
+
+
+int fec_list( const std::vector< std::string > & filenames,
+ const bool ignore_errors )
+ {
+ int retval = 0;
+ bool stdin_used = false;
+ for( unsigned i = 0; i < filenames.size(); ++i )
+ {
+ if( filenames[i] == "-" )
+ { if( stdin_used ) continue; else stdin_used = true; }
+ if( i > 0 && verbosity >= 0 )
+ { std::fputc( '\n', stdout ); std::fflush( stdout ); }
+ long fecdata_size = 0; // size of fec data
+ const uint8_t * const fecdata = read_file( filenames[i], &fecdata_size );
+ if( !fecdata ) { set_retval( retval, 1 ); continue; }
+ const Fec_index fec_index( fecdata, fecdata_size, ignore_errors );
+ if( fec_index.retval() != 0 )
+ { show_file_error( printable_name( filenames[i] ),
+ fec_index.error().c_str() );
+ std::free( (void *)fecdata ); set_retval( retval, 2 ); continue; }
+ if( verbosity >= 0 ) fec_index.show_fec_data( "", filenames[i], stdout );
+ std::free( (void *)fecdata );
+ }
+ return retval;
+ }
+
+
+// write feedback to stdout, diagnostics to stderr
+int fec_dc( const std::string & input_filename,
+ const std::string & cl_fec_filename, const unsigned cblocks )
+ {
+ std::string fec_filename;
+ const uint8_t * fecdata = 0;
+ uint8_t * prodata = 0;
+ long fecdata_size = 0; // size of fec data
+ const Fec_index * const fec_indexp = fec_d_init( input_filename,
+ cl_fec_filename, fec_filename, &fecdata, fecdata_size, &prodata );
+ if( !fec_indexp ) return 0;
+ const Fec_index & fec_index = *fec_indexp;
+ const unsigned long prodata_size = fec_index.prodata_size();
+ const unsigned fec_blocks = fec_index.fec_blocks();
+ int retval = 0;
+ if( cblocks > fec_blocks )
+ { show_file_error( input_filename.c_str(), "Not so may blocks in fec data." );
+ set_retval( retval, 1 ); goto err; }
+ {
+ md5_type computed_prodata_md5;
+ compute_md5( prodata, prodata_size, computed_prodata_md5 );
+ Bad_block_index bb_index( fec_index, prodata );
+ if( !check_prodata( fec_index, bb_index, input_filename, fec_filename,
+ computed_prodata_md5 ) )
+ { set_retval( retval, 2 ); goto err; }
+ const unsigned long fbs = fec_index.fec_block_size();
+ const unsigned long cblock_size = fec_blocks / cblocks * fbs;
+ const unsigned long max_saved_size = cblocks * cblock_size;
+ uint8_t * const sbuf = new uint8_t[max_saved_size]; // saved data bytes
+ const long last_pos = (prodata_size % cblock_size != 0) ?
+ prodata_size - prodata_size % cblock_size : prodata_size - cblock_size;
+ if( verbosity >= 0 )
+ { std::printf( "Testing sets of %u block%s of size %s\n", cblocks,
+ cblocks != 1 ? "s" : "", format_num3( cblock_size ) );
+ std::fflush( stdout ); }
+ unsigned long combinations = 0, repair_attempts = 0, successes = 0,
+ failed_comparisons = 0;
+ std::vector< unsigned long > pos_vector;
+ for( unsigned i = 0; i < cblocks; ++i )
+ pos_vector.push_back( i * cblock_size );
+ const int saved_verbosity = verbosity;
+ verbosity = -1; // suppress all messages
+ while( true )
+ {
+ for( unsigned i = 0; i < cblocks; ++i ) // save blocks
+ {
+ const unsigned long pos = pos_vector[i];
+ const unsigned long size = std::min( cblock_size, prodata_size - pos );
+ std::memcpy( sbuf + i * cblock_size, prodata + pos, size );
+ }
+ for( unsigned i = 0; i < cblocks; ++i ) // set blocks to 0
+ {
+ const unsigned long pos = pos_vector[i];
+ std::memset( prodata + pos, 0, std::min( cblock_size, prodata_size - pos ) );
+ }
+ ++combinations;
+ bb_index.find_bad_blocks( prodata );
+ if( check_prodata( fec_index, bb_index, input_filename, fec_filename,
+ computed_prodata_md5 ) )
+ { if( saved_verbosity >= 0 )
+ { print_blocks( pos_vector, " nothing to repair\n", cblock_size );
+ std::fflush( stdout ); } }
+ else if( ++repair_attempts, repair_prodata( fec_index, bb_index, prodata ) )
+ {
+ ++successes;
+ if( saved_verbosity >= 2 )
+ { print_blocks( pos_vector, " passed the test\n", cblock_size );
+ std::fflush( stdout ); }
+ if( !check_md5( prodata, prodata_size, computed_prodata_md5 ) )
+ { if( saved_verbosity >= 0 )
+ { print_blocks( pos_vector, " comparison failed\n", cblock_size );
+ std::fflush( stdout ); }
+ ++failed_comparisons; }
+ }
+ else if( saved_verbosity >= 1 )
+ { print_blocks( pos_vector, " can't repair\n", cblock_size );
+ std::fflush( stdout ); }
+ for( unsigned i = 0; i < cblocks; ++i ) // restore blocks
+ {
+ const unsigned long pos = pos_vector[i];
+ const unsigned long size = std::min( cblock_size, prodata_size - pos );
+ std::memcpy( prodata + pos, sbuf + i * cblock_size, size );
+ }
+ unsigned long pos_limit = last_pos; // advance to next block combination
+ int i = cblocks - 1;
+ while( i >= 0 )
+ {
+ if( pos_vector[i] + cblock_size > pos_limit )
+ { pos_limit -= cblock_size; --i; continue; }
+ pos_vector[i] += cblock_size;
+ for( ; i + 1U < cblocks; ++i )
+ pos_vector[i+1] = pos_vector[i] + cblock_size;
+ break;
+ }
+ if( i < 0 ) break;
+ }
+ verbosity = saved_verbosity; // restore verbosity level
+ delete[] sbuf;
+
+ if( verbosity >= 0 )
+ {
+ std::printf( "\n%11s block combinations tested\n%11s total repair attempts"
+ "\n%11s repair attempts returned with zero status",
+ format_num3( combinations ), format_num3( repair_attempts ),
+ format_num3( successes ) );
+ if( successes > 0 )
+ {
+ if( failed_comparisons > 0 )
+ std::printf( ", of which\n%11s comparisons failed\n",
+ format_num3( failed_comparisons ) );
+ else std::fputs( "\n all comparisons passed\n", stdout );
+ }
+ else std::fputc( '\n', stdout );
+ }
+ }
+err:
+ munmap( prodata, prodata_size );
+ delete fec_indexp; std::free( (void *)fecdata );
+ return retval;
+ }
+
+
+int fec_dz( const std::string & input_filename,
+ const std::string & cl_fec_filename,
+ std::vector< Block > & range_vector )
+ {
+ std::string fec_filename;
+ const uint8_t * fecdata = 0;
+ uint8_t * prodata = 0;
+ long fecdata_size = 0; // size of fec data
+ const Fec_index * const fec_indexp = fec_d_init( input_filename,
+ cl_fec_filename, fec_filename, &fecdata, fecdata_size, &prodata );
+ if( !fec_indexp ) return 0;
+ const Fec_index & fec_index = *fec_indexp;
+ const long prodata_size = fec_index.prodata_size();
+ int retval = 0;
+ if( !truncate_block_vector( range_vector, prodata_size ) )
+ { show_file_error( input_filename.c_str(), "Range is beyond end of file." );
+ set_retval( retval, 1 ); goto err; }
+ {
+ md5_type computed_prodata_md5;
+ compute_md5( prodata, prodata_size, computed_prodata_md5 );
+ if( !fec_index.prodata_match( input_filename, computed_prodata_md5 ) )
+ { set_retval( retval, 2 ); goto err; }
+ for( unsigned i = 0; i < range_vector.size(); ++i )
+ std::memset( prodata + range_vector[i].pos(), 0, range_vector[i].size() );
+ Bad_block_index bb_index( fec_index, prodata );
+ if( !check_prodata( fec_index, bb_index, input_filename, fec_filename,
+ computed_prodata_md5 ) )
+ {
+ if( !repair_prodata( fec_index, bb_index, prodata ) )
+ set_retval( retval, 2 );
+ else if( !check_md5( prodata, prodata_size, computed_prodata_md5 ) )
+ { if( verbosity >= 0 ) std::fputs( "Comparison failed\n", stdout );
+ set_retval( retval, 1 ); }
+ else if( verbosity >= 0 )
+ std::fputs( "Input file repaired successfully.\n", stdout );
+ }
+ }
+err:
+ munmap( prodata, prodata_size );
+ delete fec_indexp; std::free( (void *)fecdata );
+ return retval;
+ }
+
+
+int fec_dZ( const std::string & input_filename,
+ const std::string & cl_fec_filename,
+ const unsigned delta, const int sector_size )
+ {
+ std::string fec_filename;
+ const uint8_t * fecdata = 0;
+ uint8_t * prodata = 0;
+ long fecdata_size = 0; // size of fec data
+ const Fec_index * const fec_indexp = fec_d_init( input_filename,
+ cl_fec_filename, fec_filename, &fecdata, fecdata_size, &prodata );
+ if( !fec_indexp ) return 0;
+ const Fec_index & fec_index = *fec_indexp;
+ const long prodata_size = fec_index.prodata_size();
+ int retval = 0;
+ if( sector_size > prodata_size )
+ { show_file_error( input_filename.c_str(),
+ "Sector size is larger than file size." );
+ set_retval( retval, 1 ); goto err; }
+ {
+ md5_type computed_prodata_md5;
+ compute_md5( prodata, prodata_size, computed_prodata_md5 );
+ Bad_block_index bb_index( fec_index, prodata );
+ if( !check_prodata( fec_index, bb_index, input_filename, fec_filename,
+ computed_prodata_md5 ) )
+ { set_retval( retval, 2 ); goto err; }
+ const unsigned long fbs = fec_index.fec_block_size();
+ const int rest = std::min( 2UL, sector_size % fbs );
+ const long max_saved_size = ( sector_size / fbs + rest ) * fbs;
+ uint8_t * const sbuf = new uint8_t[max_saved_size]; // saved data bytes
+ const long last_pos = (prodata_size % sector_size != 0) ?
+ prodata_size - prodata_size % sector_size : prodata_size - sector_size;
+ if( verbosity >= 0 )
+ { std::printf( "Testing blocks of size %s (delta %s)\n",
+ format_num3( sector_size ), format_num3( delta ) );
+ std::fflush( stdout ); }
+ unsigned long combinations = 0, repair_attempts = 0, successes = 0,
+ failed_comparisons = 0;
+ int pct = (prodata_size >= 1000 && isatty( STDERR_FILENO )) ? 0 : 100;
+ long pct_pos = (pct < 100) ? 0 : prodata_size;
+ const int saved_verbosity = verbosity;
+ verbosity = -1; // suppress all messages
+ for( long pos = 0; pos <= last_pos; pos += delta )
+ {
+ if( ( saved_verbosity == 0 || saved_verbosity == 1 ) && pos >= pct_pos )
+ { std::fprintf( stderr, "\r%3u%% done\r", pct ); ++pct;
+ pct_pos = next_pct_pos( last_pos, pct ); }
+ const long saved_pos = pos - pos % fbs;
+ const long saved_size = std::min( max_saved_size, prodata_size - saved_pos );
+ std::memcpy( sbuf, prodata + saved_pos, saved_size ); // save block
+ const int zeroed_size = std::min( (long)sector_size, prodata_size - pos );
+ std::memset( prodata + pos, 0, zeroed_size ); // set block to 0
+ ++combinations;
+ bb_index.find_bad_blocks( prodata );
+ if( check_prodata( fec_index, bb_index, input_filename, fec_filename,
+ computed_prodata_md5 ) )
+ { if( saved_verbosity >= 0 )
+ { std::printf( "block %lu,%u nothing to repair\n", pos, zeroed_size );
+ std::fflush( stdout ); } }
+ else if( ++repair_attempts, repair_prodata( fec_index, bb_index, prodata ) )
+ {
+ ++successes;
+ if( saved_verbosity >= 2 )
+ { std::printf( "block %lu,%u passed the test\n", pos, zeroed_size );
+ std::fflush( stdout ); }
+ if( !check_md5( prodata, prodata_size, computed_prodata_md5 ) )
+ { if( saved_verbosity >= 0 )
+ { std::printf( "block %lu,%u comparison failed\n", pos, zeroed_size );
+ std::fflush( stdout ); }
+ ++failed_comparisons; }
+ }
+ else if( saved_verbosity >= 1 )
+ { std::printf( "block %lu,%u can't repair\n", pos, zeroed_size );
+ std::fflush( stdout ); }
+ std::memcpy( prodata + saved_pos, sbuf, saved_size ); // restore block
+ }
+ verbosity = saved_verbosity; // restore verbosity level
+ delete[] sbuf;
+
+ if( verbosity >= 0 )
+ {
+ std::printf( "\n%11s blocks tested\n%11s total repair attempts"
+ "\n%11s repair attempts returned with zero status",
+ format_num3( combinations ), format_num3( repair_attempts ),
+ format_num3( successes ) );
+ if( successes > 0 )
+ {
+ if( failed_comparisons > 0 )
+ std::printf( ", of which\n%11s comparisons failed\n",
+ format_num3( failed_comparisons ) );
+ else std::fputs( "\n all comparisons passed\n", stdout );
+ }
+ else std::fputc( '\n', stdout );
+ }
+ }
+err:
+ munmap( prodata, prodata_size );
+ delete fec_indexp; std::free( (void *)fecdata );
+ return retval;
+ }
diff --git a/gf16.cc b/gf16.cc
new file mode 100644
index 0000000..01b8dc5
--- /dev/null
+++ b/gf16.cc
@@ -0,0 +1,308 @@
+/* Lziprecover - Data recovery tool for the lzip format
+ Copyright (C) 2023-2025 Antonio Diaz Diaz.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#define _FILE_OFFSET_BITS 64
+
+#include <cstdio>
+#include <cstring>
+#include <list>
+#include <string>
+#include <vector>
+#include <stdint.h>
+#include <unistd.h> // STDERR_FILENO
+
+#include "lzip.h"
+#include "md5.h"
+#include "fec.h"
+
+namespace {
+
+const uint16_t u16_one = 1;
+const bool little_endian = *(const uint8_t *)&u16_one == 1;
+inline uint16_t swap_bytes( const uint16_t a )
+ { return ( a >> 8 ) | ( a << 8 ); }
+
+struct Galois16_table // addition/subtraction is exclusive or
+ {
+ enum { size = 1 << 16, poly = 0x1100B }; // generator polynomial
+ uint16_t * log, * ilog, * mul_tables;
+
+ Galois16_table() : log( 0 ), ilog( 0 ), mul_tables( 0 ) {}
+// ~Galois16_table() { delete[] mul_tables; delete[] ilog; delete[] log; }
+
+ void init() // fill log, inverse log, and multiplication tables
+ {
+ if( log ) return;
+ log = new uint16_t[size]; ilog = new uint16_t[size];
+ mul_tables = new uint16_t[3 * 256 * 256]; // LL, LH, HH
+ for( unsigned b = 1, i = 0; i < size - 1; ++i )
+ {
+ log[b] = i;
+ ilog[i] = b;
+ b <<= 1;
+ if( b & size ) b ^= poly;
+ }
+ log[0] = size - 1; // log(0) is not defined, so use a special value
+ ilog[size-1] = 1;
+
+ uint16_t * p = mul_tables;
+ for( int i = 0; i < 16; i += 8 )
+ for( int j = i; j < 16; j += 8 )
+ for( int a = 0; a < 256 << i; a += 1 << i )
+ for( int b = 0; b < 256 << j; b += 1 << j )
+ *p++ = mul( a, b );
+ }
+
+ uint16_t mul( const uint16_t a, const uint16_t b ) const
+ {
+ if( a == 0 || b == 0 ) return 0;
+ const unsigned sum = log[a] + log[b];
+ return ( sum >= size - 1 ) ? ilog[sum-(size-1)] : ilog[sum];
+// return ilog[(log[a] + log[b]) % (size-1)];
+ }
+
+ uint16_t inverse( const uint16_t a ) const { return ilog[size-1-log[a]]; }
+ } gf;
+
+
+inline bool check_element( const uint16_t * const A, const uint16_t * const B,
+ const unsigned k, const unsigned row, const unsigned col )
+ {
+ const uint16_t * pa = A + row * k;
+ const uint16_t * pb = B + col;
+ uint16_t sum = 0;
+ for( unsigned i = 0; i < k; ++i, ++pa, pb += k )
+ sum ^= gf.mul( *pa, *pb );
+ return sum == ( row == col );
+ }
+
+/* Check that A * B = I (A, B, I are square matrices of size k * k).
+ Check just the diagonals for matrices larger than 1024 x 1024. */
+bool check_inverse( const uint16_t * const A, const uint16_t * const B,
+ const unsigned k )
+ {
+ const bool print = verbosity >= 1 && k > max_k8 && isatty( STDERR_FILENO );
+ for( unsigned row = 0; row < k; ++row ) // multiply A * B
+ {
+ if( k <= 1024 )
+ for( unsigned col = 0; col < k; ++col )
+ { if( !check_element( A, B, k, row, col ) )
+ { if( print && row ) std::fputc( '\n', stderr ); return false; } }
+ else
+ if( !check_element( A, B, k, row, row ) ||
+ !check_element( A, B, k, row, k - 1 - row ) )
+ { if( print && row ) std::fputc( '\n', stderr ); return false; }
+ if( print ) std::fprintf( stderr, "\r%5u rows checked \r", row + 1 );
+ }
+ return true;
+ }
+
+
+/* Invert in place a matrix of size k * k.
+ This is like Gaussian elimination with a virtual identity matrix:
+ A --some_changes--> I, I --same_changes--> A^-1
+ Galois arithmetic is exact. Swapping rows or columns is not needed. */
+bool invert_matrix( uint16_t * const matrix, const unsigned k )
+ {
+ const bool print = verbosity >= 1 && k > max_k8 && isatty( STDERR_FILENO );
+ for( unsigned row = 0; row < k; ++row )
+ {
+ uint16_t * const pivot_row = matrix + row * k;
+ uint16_t pivot = pivot_row[row];
+ if( pivot == 0 )
+ { if( print && row ) std::fputc( '\n', stderr ); return false; }
+ if( pivot != 1 ) // scale the pivot_row
+ {
+ pivot = gf.inverse( pivot );
+ pivot_row[row] = 1;
+ for( unsigned col = 0; col < k; ++col )
+ pivot_row[col] = gf.mul( pivot_row[col], pivot );
+ }
+ // subtract pivot_row from the other rows
+ for( unsigned row2 = 0; row2 < k; ++row2 )
+ if( row2 != row )
+ {
+ uint16_t * const dst_row = matrix + row2 * k;
+ const uint16_t c = dst_row[row]; dst_row[row] = 0;
+ for( unsigned col = 0; col < k; ++col )
+ dst_row[col] ^= gf.mul( pivot_row[col], c );
+ }
+ if( print ) std::fprintf( stderr, "\r%5u rows inverted\r", row + 1 );
+ }
+ return true;
+ }
+
+
+// create dec_matrix containing only the rows needed and invert it in place
+const uint16_t * init_dec_matrix( const std::vector< unsigned > & bb_vector,
+ const std::vector< unsigned > & fbn_vector )
+ {
+ const unsigned bad_blocks = bb_vector.size();
+ uint16_t * const dec_matrix = new uint16_t[bad_blocks * bad_blocks];
+
+ // one row for each missing data block
+ for( unsigned row = 0; row < bad_blocks; ++row )
+ {
+ uint16_t * const dec_row = dec_matrix + row * bad_blocks;
+ const unsigned fbn = fbn_vector[row] | 0x8000;
+ for( unsigned col = 0; col < bad_blocks; ++col )
+ dec_row[col] = gf.inverse( fbn ^ bb_vector[col] );
+ }
+ if( !invert_matrix( dec_matrix, bad_blocks ) )
+ internal_error( "GF(2^16) matrix not invertible." );
+ return dec_matrix;
+ }
+
+#if 0
+/* compute dst[] += c * src[]
+ treat the buffers as arrays of 16-bit Galois values */
+inline void mul_add( const uint8_t * const src, uint8_t * const dst,
+ const unsigned long fbs, const uint16_t c )
+ {
+ if( c == 0 ) return; // nothing to add
+ const uint16_t * const src16 = (const uint16_t *)src;
+ uint16_t * const dst16 = (uint16_t *)dst;
+
+ if( little_endian )
+ for( unsigned long i = 0; i < fbs / 2; ++i )
+ dst16[i] ^= gf.mul( src16[i], c );
+ else // big endian
+ for( unsigned long i = 0; i < fbs / 2; ++i )
+ dst16[i] ^= swap_bytes( gf.mul( swap_bytes( src16[i] ), c ) );
+ }
+#else
+
+/* compute dst[] += c * src[]
+ treat the buffers as arrays of pairs of 16-bit Galois values */
+inline void mul_add( const uint8_t * const src, uint8_t * const dst,
+ const unsigned long fbs, const uint16_t c )
+ {
+ if( c == 0 ) return; // nothing to add
+ const int cl = c & 0xFF; // split factor c into low and high bytes
+ const int ch = c >> 8;
+ // pointers to the four multiplication tables (c.low/high * src.low/high)
+ const uint16_t * LL = &gf.mul_tables[cl * 256];
+ const uint16_t * LH = &gf.mul_tables[65536 + cl * 256];
+ const uint16_t * HL = &gf.mul_tables[65536 + ch]; // step 256
+ const uint16_t * HH = &gf.mul_tables[131072 + ch * 256];
+ uint16_t L[256]; // extract the two tables for factor c
+ uint16_t H[256];
+
+ if( little_endian )
+ for( int i = 0; i < 256; ++i )
+ { L[i] = *LL++ ^ *HL; HL+=256; H[i] = *LH++ ^ *HH++; }
+ else // big endian
+ for( int i = 0; i < 256; ++i )
+ { H[i] = swap_bytes( *LL++ ^ *HL ); HL+=256;
+ L[i] = swap_bytes( *LH++ ^ *HH++ ); }
+
+ const uint32_t * const src32 = (const uint32_t *)src;
+ uint32_t * const dst32 = (uint32_t *)dst;
+
+ for( unsigned long i = 0; i < fbs / 4; ++i )
+ { const uint32_t s = src32[i];
+ dst32[i] ^= L[s & 0xFF] ^ H[s >> 8 & 0xFF] ^
+ L[s >> 16 & 0xFF] << 16 ^ H[s >> 24] << 16; }
+ }
+#endif
+
+} // end namespace
+
+
+void gf16_init() { gf.init(); }
+
+bool gf16_check( const std::vector< unsigned > & fbn_vector, const unsigned k )
+ {
+ if( k == 0 ) return true;
+ gf.init();
+ bool good = true;
+ for( unsigned a = 1; a < gf.size; ++a )
+ if( gf.mul( a, gf.inverse( a ) ) != 1 )
+ { good = false;
+ std::fprintf( stderr, "%u * ( 1/%u ) != 1 in GF(2^16)\n", a, a ); }
+ uint16_t * const enc_matrix = new uint16_t[k * k];
+ uint16_t * const dec_matrix = new uint16_t[k * k];
+ const bool random = fbn_vector.size() == k;
+ for( unsigned row = 0; row < k; ++row )
+ {
+ const unsigned fbn = ( random ? fbn_vector[row] : row ) | 0x8000;
+ uint16_t * const enc_row = enc_matrix + row * k;
+ for( unsigned col = 0; col < k; ++col )
+ enc_row[col] = gf.inverse( fbn ^ col );
+ }
+ std::memcpy( dec_matrix, enc_matrix, k * k * sizeof (uint16_t) );
+ if( !invert_matrix( dec_matrix, k ) )
+ { good = false; show_error( "GF(2^16) matrix not invertible." ); }
+ else if( !check_inverse( enc_matrix, dec_matrix, k ) )
+ { good = false; show_error( "GF(2^16) matrix A * A^-1 != I" ); }
+ delete[] dec_matrix;
+ delete[] enc_matrix;
+ return good;
+ }
+
+
+void rs16_encode( const uint8_t * const buffer, const uint8_t * const lastbuf,
+ uint8_t * const fec_block, const unsigned long fbs,
+ const unsigned fbn, const unsigned k )
+ {
+ if( !gf.log ) internal_error( "GF(2^16) tables not initialized." );
+ /* The encode matrix is a Hilbert matrix of size k * k with one row per
+ fec block and one column per data block.
+ The value of each element is computed on the fly with inverse. */
+ const unsigned row = fbn | 0x8000;
+ std::memset( fec_block, 0, fbs );
+ for( unsigned col = 0; col < k; ++col )
+ {
+ const uint8_t * const src =
+ ( col < k - (lastbuf != 0) ) ? buffer + col * fbs : lastbuf;
+ mul_add( src, fec_block, fbs, gf.inverse( row ^ col ) );
+ }
+ }
+
+
+void rs16_decode( uint8_t * const buffer, uint8_t * const lastbuf,
+ const std::vector< unsigned > & bb_vector,
+ const std::vector< unsigned > & fbn_vector,
+ uint8_t * const fecbuf, const unsigned long fbs,
+ const unsigned k )
+ {
+ gf.init();
+ const unsigned bad_blocks = bb_vector.size();
+ for( unsigned col = 0, bi = 0; col < k; ++col ) // reduce
+ {
+ if( bi < bad_blocks && col == bb_vector[bi] ) { ++bi; continue; }
+ const uint8_t * const src =
+ ( col < k - (lastbuf != 0) ) ? buffer + col * fbs : lastbuf;
+ for( unsigned row = 0; row < bad_blocks; ++row )
+ {
+ const unsigned fbn = fbn_vector[row] | 0x8000;
+ mul_add( src, fecbuf + row * fbs, fbs, gf.inverse( fbn ^ col ) );
+ }
+ }
+ const uint16_t * const dec_matrix = init_dec_matrix( bb_vector, fbn_vector );
+ for( unsigned col = 0; col < bad_blocks; ++col ) // solve
+ {
+ const unsigned di = bb_vector[col];
+ uint8_t * const dst =
+ ( di < k - (lastbuf != 0) ) ? buffer + di * fbs : lastbuf;
+ std::memset( dst, 0, fbs );
+ const uint16_t * const dec_row = dec_matrix + col * bad_blocks;
+ for( unsigned row = 0; row < bad_blocks; ++row )
+ mul_add( fecbuf + row * fbs, dst, fbs, dec_row[row] );
+ }
+ delete[] dec_matrix;
+ }
diff --git a/gf8.cc b/gf8.cc
new file mode 100644
index 0000000..28bcd23
--- /dev/null
+++ b/gf8.cc
@@ -0,0 +1,244 @@
+/* Lziprecover - Data recovery tool for the lzip format
+ Copyright (C) 2023-2025 Antonio Diaz Diaz.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#define _FILE_OFFSET_BITS 64
+
+#include <cstdio>
+#include <cstring>
+#include <list>
+#include <string>
+#include <vector>
+#include <stdint.h>
+
+#include "lzip.h"
+#include "md5.h"
+#include "fec.h"
+
+namespace {
+
+struct Galois8_table // addition/subtraction is exclusive or
+ {
+ enum { size = 1 << 8, poly = 0x11D }; // generator polynomial
+ uint8_t * log, * ilog, * mul_table;
+
+ Galois8_table() : log( 0 ), ilog( 0 ), mul_table( 0 ) {}
+// ~Galois8_table() { delete[] mul_table; delete[] ilog; delete[] log; }
+
+ void init() // fill log, inverse log, and multiplication tables
+ {
+ if( log ) return;
+ log = new uint8_t[size]; ilog = new uint8_t[size];
+ mul_table = new uint8_t[size * size];
+ for( unsigned b = 1, i = 0; i < size - 1; ++i )
+ {
+ log[b] = i;
+ ilog[i] = b;
+ b <<= 1;
+ if( b & size ) b ^= poly;
+ }
+ log[0] = size - 1; // log(0) is not defined, so use a special value
+ ilog[size-1] = 1;
+
+ for( int i = 1; i < size; ++i )
+ {
+ uint8_t * const mul_row = mul_table + i * size;
+ for( int j = 1; j < size; ++j )
+ mul_row[j] = ilog[(log[i] + log[j]) % (size-1)];
+ }
+ for( int i = 0; i < size; ++i )
+ mul_table[0 * size + i] = mul_table[i * size + 0] = 0;
+ }
+
+ uint8_t inverse( const uint8_t a ) const { return ilog[size-1-log[a]]; }
+ } gf;
+
+
+// check that A * B = I (A, B, I are square matrices of size k * k)
+bool check_inverse( const uint8_t * const A, const uint8_t * const B,
+ const unsigned k )
+ {
+ for( unsigned row = 0; row < k; ++row ) // multiply A * B
+ for( unsigned col = 0; col < k; ++col )
+ {
+ const uint8_t * pa = A + row * k;
+ const uint8_t * pb = B + col;
+ uint8_t sum = 0;
+ for( unsigned i = 0; i < k; ++i, ++pa, pb += k )
+ sum ^= gf.mul_table[*pa * gf.size + *pb];
+ if( sum != ( row == col ) ) return false;
+ }
+ return true;
+ }
+
+
+/* Invert in place a matrix of size k * k.
+ This is like Gaussian elimination with a virtual identity matrix:
+ A --some_changes--> I, I --same_changes--> A^-1
+ Galois arithmetic is exact. Swapping rows or columns is not needed. */
+bool invert_matrix( uint8_t * const matrix, const unsigned k )
+ {
+ for( unsigned row = 0; row < k; ++row )
+ {
+ uint8_t * const pivot_row = matrix + row * k;
+ const uint8_t pivot = pivot_row[row];
+ if( pivot == 0 ) return false;
+ if( pivot != 1 ) // scale the pivot_row
+ {
+ const uint8_t * const mul_row =
+ gf.mul_table + gf.inverse( pivot ) * gf.size;
+ pivot_row[row] = 1;
+ for( unsigned col = 0; col < k; ++col )
+ pivot_row[col] = mul_row[pivot_row[col]];
+ }
+ // subtract pivot_row from the other rows
+ for( unsigned row2 = 0; row2 < k; ++row2 )
+ if( row2 != row )
+ {
+ uint8_t * const dst_row = matrix + row2 * k;
+ const uint8_t c = dst_row[row]; dst_row[row] = 0;
+ const uint8_t * const mul_row = gf.mul_table + c * gf.size;
+ for( unsigned col = 0; col < k; ++col )
+ dst_row[col] ^= mul_row[pivot_row[col]];
+ }
+ }
+ return true;
+ }
+
+
+// create dec_matrix containing only the rows needed and invert it in place
+const uint8_t * init_dec_matrix( const std::vector< unsigned > & bb_vector,
+ const std::vector< unsigned > & fbn_vector )
+ {
+ const unsigned bad_blocks = bb_vector.size();
+ uint8_t * const dec_matrix = new uint8_t[bad_blocks * bad_blocks];
+
+ // one row for each missing data block
+ for( unsigned row = 0; row < bad_blocks; ++row )
+ {
+ uint8_t * const dec_row = dec_matrix + row * bad_blocks;
+ const unsigned fbn = fbn_vector[row] | 0x80;
+ for( unsigned col = 0; col < bad_blocks; ++col )
+ dec_row[col] = gf.inverse( fbn ^ bb_vector[col] );
+ }
+ if( !invert_matrix( dec_matrix, bad_blocks ) )
+ internal_error( "GF(2^8) matrix not invertible." );
+ return dec_matrix;
+ }
+
+
+/* compute dst[] += c * src[]
+ treat the buffers as arrays of quadruples of 8-bit Galois values */
+inline void mul_add( const uint8_t * const src, uint8_t * const dst,
+ const unsigned long fbs, const uint8_t c )
+ {
+ if( c == 0 ) return; // nothing to add
+ const uint8_t * const mul_row = gf.mul_table + c * gf.size;
+ const uint32_t * const src32 = (const uint32_t *)src;
+ uint32_t * const dst32 = (uint32_t *)dst;
+
+ for( unsigned long i = 0; i < fbs / 4; ++i )
+ { const uint32_t s = src32[i];
+ dst32[i] ^= mul_row[s & 0xFF] ^ mul_row[s >> 8 & 0xFF] << 8 ^
+ mul_row[s >> 16 & 0xFF] << 16 ^ mul_row[s >> 24] << 24; }
+ }
+
+} // end namespace
+
+
+void gf8_init() { gf.init(); }
+
+bool gf8_check( const std::vector< unsigned > & fbn_vector, const unsigned k )
+ {
+ if( k == 0 ) return true;
+ gf.init();
+ bool good = true;
+ for( unsigned a = 1; a < gf.size; ++a )
+ if( gf.mul_table[a * gf.size + gf.inverse( a )] != 1 )
+ { good = false;
+ std::fprintf( stderr, "%u * ( 1/%u ) != 1 in GF(2^8)\n", a, a ); }
+ uint8_t * const enc_matrix = new uint8_t[k * k];
+ uint8_t * const dec_matrix = new uint8_t[k * k];
+ const bool random = fbn_vector.size() == k;
+ for( unsigned row = 0; row < k; ++row )
+ {
+ const unsigned fbn = ( random ? fbn_vector[row] : row ) | 0x80;
+ uint8_t * const enc_row = enc_matrix + row * k;
+ for( unsigned col = 0; col < k; ++col )
+ enc_row[col] = gf.inverse( fbn ^ col );
+ }
+ std::memcpy( dec_matrix, enc_matrix, k * k );
+ if( !invert_matrix( dec_matrix, k ) )
+ { good = false; show_error( "GF(2^8) matrix not invertible." ); }
+ else if( !check_inverse( enc_matrix, dec_matrix, k ) )
+ { good = false; show_error( "GF(2^8) matrix A * A^-1 != I" ); }
+ delete[] dec_matrix;
+ delete[] enc_matrix;
+ return good;
+ }
+
+
+void rs8_encode( const uint8_t * const buffer, const uint8_t * const lastbuf,
+ uint8_t * const fec_block, const unsigned long fbs,
+ const unsigned fbn, const unsigned k )
+ {
+ if( !gf.log ) internal_error( "GF(2^8) tables not initialized." );
+ /* The encode matrix is a Hilbert matrix of size k * k with one row per
+ fec block and one column per data block.
+ The value of each element is computed on the fly with inverse. */
+ const unsigned row = fbn | 0x80;
+ std::memset( fec_block, 0, fbs );
+ for( unsigned col = 0; col < k; ++col )
+ {
+ const uint8_t * const src =
+ ( col < k - (lastbuf != 0) ) ? buffer + col * fbs : lastbuf;
+ mul_add( src, fec_block, fbs, gf.inverse( row ^ col ) );
+ }
+ }
+
+
+void rs8_decode( uint8_t * const buffer, uint8_t * const lastbuf,
+ const std::vector< unsigned > & bb_vector,
+ const std::vector< unsigned > & fbn_vector,
+ uint8_t * const fecbuf, const unsigned long fbs,
+ const unsigned k )
+ {
+ gf.init();
+ const unsigned bad_blocks = bb_vector.size();
+ for( unsigned col = 0, bi = 0; col < k; ++col ) // reduce
+ {
+ if( bi < bad_blocks && col == bb_vector[bi] ) { ++bi; continue; }
+ const uint8_t * const src =
+ ( col < k - (lastbuf != 0) ) ? buffer + col * fbs : lastbuf;
+ for( unsigned row = 0; row < bad_blocks; ++row )
+ {
+ const unsigned fbn = fbn_vector[row] | 0x80;
+ mul_add( src, fecbuf + row * fbs, fbs, gf.inverse( fbn ^ col ) );
+ }
+ }
+ const uint8_t * const dec_matrix = init_dec_matrix( bb_vector, fbn_vector );
+ for( unsigned col = 0; col < bad_blocks; ++col ) // solve
+ {
+ const unsigned di = bb_vector[col];
+ uint8_t * const dst =
+ ( di < k - (lastbuf != 0) ) ? buffer + di * fbs : lastbuf;
+ std::memset( dst, 0, fbs );
+ const uint8_t * const dec_row = dec_matrix + col * bad_blocks;
+ for( unsigned row = 0; row < bad_blocks; ++row )
+ mul_add( fecbuf + row * fbs, dst, fbs, dec_row[row] );
+ }
+ delete[] dec_matrix;
+ }
diff --git a/list.cc b/list.cc
index 3c5ceb1..8e0a87f 100644
--- a/list.cc
+++ b/list.cc
@@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
- Copyright (C) 2009-2024 Antonio Diaz Diaz.
+ Copyright (C) 2009-2025 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -17,6 +17,7 @@
#define _FILE_OFFSET_BITS 64
+#include <cerrno>
#include <cstdio>
#include <cstring>
#include <string>
@@ -57,7 +58,7 @@ int list_files( const std::vector< std::string > & filenames,
for( unsigned i = 0; i < filenames.size(); ++i )
{
- const bool from_stdin = ( filenames[i] == "-" );
+ const bool from_stdin = filenames[i] == "-";
if( from_stdin ) { if( stdin_used ) continue; else stdin_used = true; }
const char * const input_filename =
from_stdin ? "(stdin)" : filenames[i].c_str();
@@ -75,6 +76,9 @@ int list_files( const std::vector< std::string > & filenames,
set_retval( retval, lzip_index.retval() );
continue;
}
+ const bool multi_empty =
+ !from_stdin && !cl_opts.ignore_errors && lzip_index.multi_empty();
+ if( multi_empty ) set_retval( retval, 2 );
if( verbosity < 0 ) continue;
const unsigned long long udata_size = lzip_index.udata_size();
const unsigned long long cdata_size = lzip_index.cdata_size();
@@ -86,6 +90,8 @@ int list_files( const std::vector< std::string > & filenames,
if( verbosity >= 1 ) std::fputs( " dict memb trail ", stdout );
std::fputs( " uncompressed compressed saved name\n", stdout );
}
+ if( multi_empty )
+ { std::fflush( stdout ); show_file_error( input_filename, empty_msg ); }
if( verbosity >= 1 )
std::printf( "%s %5ld %6lld ", format_ds( lzip_index.dictionary_size() ),
members, lzip_index.file_size() - cdata_size );
@@ -113,12 +119,16 @@ int list_files( const std::vector< std::string > & filenames,
first_post = true; // reprint heading after list of members
}
std::fflush( stdout );
+ if( std::ferror( stdout ) ) break;
}
- if( verbosity >= 0 && files > 1 )
+ if( verbosity >= 0 && files > 1 && !std::ferror( stdout ) )
{
if( verbosity >= 1 ) std::fputs( " ", stdout );
list_line( total_uncomp, total_comp, "(totals)" );
std::fflush( stdout );
}
+ if( verbosity >= 0 && ( std::ferror( stdout ) || std::fclose( stdout ) != 0 ) )
+ { show_file_error( "(stdout)", wr_err_msg, errno );
+ set_retval( retval, 1 ); }
return retval;
}
diff --git a/lunzcrash.cc b/lunzcrash.cc
index ad05697..cf1c5ae 100644
--- a/lunzcrash.cc
+++ b/lunzcrash.cc
@@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
- Copyright (C) 2009-2024 Antonio Diaz Diaz.
+ Copyright (C) 2009-2025 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -56,7 +56,7 @@ bool compare_member( const uint8_t * const mbuffer, const long msize,
{
MD5SUM md5sum;
LZ_mtester mtester( mbuffer, msize, dictionary_size, -1, &md5sum );
- bool error = ( mtester.test_member() != 0 || !mtester.finished() );
+ bool error = mtester.test_member() != 0 || !mtester.finished();
if( !error )
{
md5_type new_digest;
@@ -103,22 +103,23 @@ long next_pct_pos( const Lzip_index & lzip_index, const long i, const int pct,
/* Test 1-bit errors in LZMA streams in file.
Unless verbosity >= 1, print only the bytes with interesting results. */
-int lunzcrash_bit( const char * const input_filename,
+int lunzcrash_bit( const std::string & input_filename,
const Cl_options & cl_opts )
{
+ const char * const filename = input_filename.c_str();
struct stat in_stats; // not used
- const int infd = open_instream( input_filename, &in_stats, false, true );
+ const int infd = open_instream( filename, &in_stats, false, true );
if( infd < 0 ) return 1;
const Lzip_index lzip_index( infd, cl_opts );
if( lzip_index.retval() != 0 )
- { show_file_error( input_filename, lzip_index.error().c_str() );
+ { show_file_error( filename, lzip_index.error().c_str() );
return lzip_index.retval(); }
- if( verbosity >= 2 ) printf( "Testing file '%s'\n", input_filename );
+ if( verbosity >= 2 ) printf( "Testing file '%s'\n", filename );
const long long cdata_size = lzip_index.cdata_size();
long positions = 0, decompressions = 0, successes = 0, failed_comparisons = 0;
- int pct = ( cdata_size >= 1000 && isatty( STDERR_FILENO ) ) ? 0 : 100;
+ int pct = (cdata_size >= 1000 && isatty( STDERR_FILENO )) ? 0 : 100;
for( long i = 0; i < lzip_index.members(); ++i )
{
const long long mpos = lzip_index.mblock( i ).pos();
@@ -127,8 +128,8 @@ int lunzcrash_bit( const char * const input_filename,
if( !mbuffer ) return 1;
const unsigned dictionary_size = lzip_index.dictionary_size( i );
md5_type md5_orig;
- if( !check_member( mbuffer, msize, dictionary_size, input_filename,
- md5_orig ) ) return 2;
+ if( !check_member( mbuffer, msize, dictionary_size, filename, md5_orig ) )
+ return 2;
long pct_pos = next_pct_pos( lzip_index, i, pct );
long pos = Lzip_header::size + 1, printed = 0; // last pos printed
const long end = msize - 20;
@@ -205,15 +206,16 @@ int lunzcrash_bit( const char * const input_filename,
if( verbosity >= 0 )
{
- std::printf( "\n%9ld bytes tested\n%9ld total decompressions"
- "\n%9ld decompressions returned with zero status",
- positions, decompressions, successes );
+ std::printf( "\n%11s bytes tested\n%11s total decompressions"
+ "\n%11s decompressions returned with zero status",
+ format_num3( positions ), format_num3( decompressions ),
+ format_num3( successes ) );
if( successes > 0 )
{
if( failed_comparisons > 0 )
- std::printf( ", of which\n%9ld comparisons failed\n",
- failed_comparisons );
- else std::fputs( "\n all comparisons passed\n", stdout );
+ std::printf( ", of which\n%11s comparisons failed\n",
+ format_num3( failed_comparisons ) );
+ else std::fputs( "\n all comparisons passed\n", stdout );
}
else std::fputc( '\n', stdout );
}
@@ -223,22 +225,23 @@ int lunzcrash_bit( const char * const input_filename,
/* Test zeroed blocks of given size in LZMA streams in file.
Unless verbosity >= 1, print only the bytes with interesting results. */
-int lunzcrash_block( const char * const input_filename,
+int lunzcrash_block( const std::string & input_filename,
const Cl_options & cl_opts, const int sector_size )
{
+ const char * const filename = input_filename.c_str();
struct stat in_stats; // not used
- const int infd = open_instream( input_filename, &in_stats, false, true );
+ const int infd = open_instream( filename, &in_stats, false, true );
if( infd < 0 ) return 1;
const Lzip_index lzip_index( infd, cl_opts );
if( lzip_index.retval() != 0 )
- { show_file_error( input_filename, lzip_index.error().c_str() );
+ { show_file_error( filename, lzip_index.error().c_str() );
return lzip_index.retval(); }
- if( verbosity >= 2 ) printf( "Testing file '%s'\n", input_filename );
+ if( verbosity >= 2 ) printf( "Testing file '%s'\n", filename );
const long long cdata_size = lzip_index.cdata_size();
long decompressions = 0, successes = 0, failed_comparisons = 0;
- int pct = ( cdata_size >= 1000 && isatty( STDERR_FILENO ) ) ? 0 : 100;
+ int pct = (cdata_size >= 1000 && isatty( STDERR_FILENO )) ? 0 : 100;
uint8_t * const block = new uint8_t[sector_size];
for( long i = 0; i < lzip_index.members(); ++i )
{
@@ -250,8 +253,8 @@ int lunzcrash_block( const char * const input_filename,
if( !mbuffer ) return 1;
const unsigned dictionary_size = lzip_index.dictionary_size( i );
md5_type md5_orig;
- if( !check_member( mbuffer, msize, dictionary_size, input_filename,
- md5_orig ) ) return 2;
+ if( !check_member( mbuffer, msize, dictionary_size, filename, md5_orig ) )
+ return 2;
long pct_pos = next_pct_pos( lzip_index, i, pct, sector_size );
long pos = Lzip_header::size + 1;
const long end = msize - sector_size - 20;
@@ -319,15 +322,16 @@ int lunzcrash_block( const char * const input_filename,
if( verbosity >= 0 )
{
- std::printf( "\n%9ld blocks tested\n%9ld total decompressions"
- "\n%9ld decompressions returned with zero status",
- decompressions, decompressions, successes );
+ std::printf( "\n%11s blocks tested\n%11s total decompressions"
+ "\n%11s decompressions returned with zero status",
+ format_num3( decompressions ), format_num3( decompressions ),
+ format_num3( successes ) );
if( successes > 0 )
{
if( failed_comparisons > 0 )
- std::printf( ", of which\n%9ld comparisons failed\n",
- failed_comparisons );
- else std::fputs( "\n all comparisons passed\n", stdout );
+ std::printf( ", of which\n%11s comparisons failed\n",
+ format_num3( failed_comparisons ) );
+ else std::fputs( "\n all comparisons passed\n", stdout );
}
else std::fputc( '\n', stdout );
}
@@ -342,7 +346,7 @@ int md5sum_files( const std::vector< std::string > & filenames )
for( unsigned i = 0; i < filenames.size(); ++i )
{
- const bool from_stdin = ( filenames[i] == "-" );
+ const bool from_stdin = filenames[i] == "-";
if( from_stdin ) { if( stdin_used ) continue; else stdin_used = true; }
const char * const input_filename = filenames[i].c_str();
struct stat in_stats; // not used
@@ -357,7 +361,8 @@ int md5sum_files( const std::vector< std::string > & filenames )
while( true )
{
const int len = readblock( infd, buffer, buffer_size );
- if( len != buffer_size && errno ) throw Error( "Read error" );
+ if( len != buffer_size && errno )
+ { show_file_error( input_filename, read_error_msg, errno ); return 1; }
if( len > 0 ) md5sum.md5_update( buffer, len );
if( len < buffer_size ) break;
}
@@ -365,10 +370,15 @@ int md5sum_files( const std::vector< std::string > & filenames )
if( close( infd ) != 0 )
{ show_file_error( input_filename, "Error closing input file", errno );
return 1; }
+ if( verbosity < 0 ) continue;
for( int i = 0; i < 16; ++i ) std::printf( "%02x", md5_digest[i] );
std::printf( " %s\n", input_filename );
std::fflush( stdout );
+ if( std::ferror( stdout ) ) break;
}
+ if( verbosity >= 0 && ( std::ferror( stdout ) || std::fclose( stdout ) != 0 ) )
+ { show_file_error( "(stdout)", wr_err_msg, errno );
+ set_retval( retval, 1 ); }
return retval;
}
diff --git a/lzip.h b/lzip.h
index fb910ba..546e71a 100644
--- a/lzip.h
+++ b/lzip.h
@@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
- Copyright (C) 2009-2024 Antonio Diaz Diaz.
+ Copyright (C) 2009-2025 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -33,9 +33,9 @@ public:
st = next[st];
}
bool is_char_set_char() { set_char(); return st < 4; }
- void set_match() { st = ( st < 7 ) ? 7 : 10; }
- void set_rep() { st = ( st < 7 ) ? 8 : 11; }
- void set_short_rep() { st = ( st < 7 ) ? 9 : 11; }
+ void set_match() { st = ( st < 7 ) ? 7 : 10; }
+ void set_rep() { st = ( st < 7 ) ? 8 : 11; }
+ void set_shortrep() { st = ( st < 7 ) ? 9 : 11; }
};
@@ -98,9 +98,6 @@ struct Len_model
};
-// defined in main.cc
-extern int verbosity;
-
class Pretty_print // requires global var 'int verbosity'
{
std::string name_;
@@ -154,13 +151,17 @@ class CRC32
uint32_t data[256]; // Table of CRCs of all 8-bit messages.
public:
- CRC32()
+ explicit CRC32( const bool castagnoli = false )
{
+ const unsigned cpol = 0x82F63B78U; // CRC32-C Castagnoli polynomial
+ const unsigned ipol = 0xEDB88320U; // IEEE 802.3 Ethernet polynomial
+ const unsigned poly = castagnoli ? cpol : ipol;
+
for( unsigned n = 0; n < 256; ++n )
{
unsigned c = n;
for( int k = 0; k < 8; ++k )
- { if( c & 1 ) c = 0xEDB88320U ^ ( c >> 1 ); else c >>= 1; }
+ { if( c & 1 ) c = poly ^ ( c >> 1 ); else c >>= 1; }
data[n] = c;
}
}
@@ -179,6 +180,15 @@ public:
c = data[(c^buffer[i])&0xFF] ^ ( c >> 8 );
crc = c;
}
+
+ uint32_t compute_crc( const uint8_t * const buffer,
+ const unsigned long size ) const
+ {
+ uint32_t crc = 0xFFFFFFFFU;
+ for( unsigned long i = 0; i < size; ++i )
+ crc = data[(crc^buffer[i])&0xFF] ^ ( crc >> 8 );
+ return crc ^ 0xFFFFFFFFU;
+ }
};
extern const CRC32 crc32;
@@ -311,15 +321,12 @@ struct Lzip_trailer
struct Cl_options // command-line options
{
- bool ignore_empty;
bool ignore_errors;
- bool ignore_marking;
bool ignore_trailing;
bool loose_trailing;
- Cl_options()
- : ignore_empty( true ), ignore_errors( false ), ignore_marking( true ),
- ignore_trailing( true ), loose_trailing( false ) {}
+ Cl_options() : ignore_errors( false ),
+ ignore_trailing( true ), loose_trailing( false ) {}
};
@@ -333,6 +340,8 @@ class Block
public:
Block( const long long p, const long long s ) : pos_( p ), size_( s ) {}
+ Block & assign( const long long p, const long long s )
+ { pos_ = p; size_ = s; return *this; }
long long pos() const { return pos_; }
long long size() const { return size_; }
@@ -354,6 +363,8 @@ public:
{ return pos_ < b.end() && b.pos_ < end(); }
bool overlaps( const long long pos, const long long size ) const
{ return pos_ < pos + size && pos < end(); }
+ bool touches( const Block & b ) const // blocks are mergeable
+ { return pos_ <= b.end() && b.pos_ <= end(); }
Block split( const long long pos );
};
@@ -406,32 +417,44 @@ inline unsigned long long positive_diff( const unsigned long long x,
inline void set_retval( int & retval, const int new_val )
{ if( retval < new_val ) retval = new_val; }
+inline const char * printable_name( const std::string & filename,
+ const bool in = true )
+ {
+ if( filename.empty() || filename == "-" ) return in ? "(stdin)" : "(stdout)";
+ return filename.c_str();
+ }
+
const char * const bad_magic_msg = "Bad magic number (file not in lzip format).";
const char * const bad_dict_msg = "Invalid dictionary size in member header.";
const char * const corrupt_mm_msg = "Corrupt header in multimember file.";
const char * const empty_msg = "Empty member not allowed.";
-const char * const marking_msg = "Marking data not allowed.";
+const char * const mmap_msg = "Can't mmap";
+const char * const nonzero_msg = "Nonzero first LZMA byte.";
+const char * const short_file_msg = "Input file is truncated.";
const char * const trailing_msg = "Trailing data not allowed.";
+const char * const wr_err_msg = "Write error";
// defined in alone_to_lz.cc
int alone_to_lz( const int infd, const Pretty_print & pp );
// defined in byte_repair.cc
+bool safe_seek( const int fd, const long long pos,
+ const std::string & filename );
long seek_write( const int fd, const uint8_t * const buf, const long size,
const long long pos );
uint8_t * read_member( const int infd, const long long mpos,
- const long long msize, const char * const filename );
+ const long long msize, const std::string & filename );
int byte_repair( const std::string & input_filename,
const std::string & default_output_filename,
const Cl_options & cl_opts,
const char terminator, const bool force );
-int debug_delay( const char * const input_filename,
+int debug_delay( const std::string & input_filename,
const Cl_options & cl_opts, Block range,
const char terminator );
-int debug_byte_repair( const char * const input_filename,
+int debug_byte_repair( const std::string & input_filename,
const Cl_options & cl_opts, const Bad_byte & bad_byte,
const char terminator );
-int debug_decompress( const char * const input_filename,
+int debug_decompress( const std::string & input_filename,
const Cl_options & cl_opts, const Bad_byte & bad_byte,
const bool show_packets );
@@ -446,21 +469,17 @@ int dump_members( const std::vector< std::string > & filenames,
const bool force, const bool strip, const bool to_stdout );
int remove_members( const std::vector< std::string > & filenames,
const Cl_options & cl_opts, const Member_list & member_list );
-int clear_marking( const std::vector< std::string > & filenames,
- const Cl_options & cl_opts );
+int nonzero_repair( const std::vector< std::string > & filenames,
+ const Cl_options & cl_opts );
// defined in list.cc
int list_files( const std::vector< std::string > & filenames,
const Cl_options & cl_opts );
-// defined in lzip_index.cc
-int seek_read( const int fd, uint8_t * const buf, const int size,
- const long long pos );
-
// defined in lunzcrash.cc
-int lunzcrash_bit( const char * const input_filename,
+int lunzcrash_bit( const std::string & input_filename,
const Cl_options & cl_opts );
-int lunzcrash_block( const char * const input_filename,
+int lunzcrash_block( const std::string & input_filename,
const Cl_options & cl_opts, const int sector_size );
int md5sum_files( const std::vector< std::string > & filenames );
@@ -483,9 +502,11 @@ bool open_outstream( const bool force, const bool protect,
bool output_file_exists();
void cleanup_and_fail( const int retval );
bool check_tty_out();
+void format_trailing_bytes( const uint8_t * const data, const int size,
+ std::string & msg );
void set_signal_handler();
bool close_outstream( const struct stat * const in_statsp );
-std::string insert_fixed( std::string name );
+std::string insert_fixed( std::string name, const bool append_lz = true );
void show_2file_error( const char * const msg1, const char * const name1,
const char * const name2, const char * const msg2 );
class Range_decoder;
@@ -495,10 +516,11 @@ void show_dprogress( const unsigned long long cfile_size = 0,
const Pretty_print * const p = 0 );
// defined in merge.cc
-bool copy_file( const int infd, const int outfd,
- const long long max_size = -1 );
+bool copy_file( const int infd, const int outfd, const std::string & iname,
+ const std::string & oname, const long long max_size = -1 );
int test_member_from_file( const int infd, const unsigned long long msize,
- long long * const failure_posp = 0 );
+ long long * const failure_posp = 0,
+ bool * const nonzerop = 0 );
int merge_files( const std::vector< std::string > & filenames,
const std::string & default_output_filename,
const Cl_options & cl_opts, const char terminator,
@@ -512,8 +534,6 @@ int print_nrep_stats( const std::vector< std::string > & filenames,
const char * format_num( unsigned long long num,
unsigned long long limit = -1ULL,
const int set_prefix = 0 );
-bool safe_seek( const int fd, const long long pos,
- const char * const filename );
int range_decompress( const std::string & input_filename,
const std::string & default_output_filename,
const Cl_options & cl_opts, Block range,
@@ -526,7 +546,7 @@ int reproduce_file( const std::string & input_filename,
const char * const reference_filename,
const Cl_options & cl_opts, const int lzip_level,
const char terminator, const bool force );
-int debug_reproduce_file( const char * const input_filename,
+int debug_reproduce_file( const std::string & input_filename,
const char * const lzip_name,
const char * const reference_filename,
const Cl_options & cl_opts, const Block & range,
diff --git a/lzip_index.cc b/lzip_index.cc
index 459338e..00f2050 100644
--- a/lzip_index.cc
+++ b/lzip_index.cc
@@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
- Copyright (C) 2009-2024 Antonio Diaz Diaz.
+ Copyright (C) 2009-2025 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -67,13 +67,10 @@ void Lzip_index::set_num_error( const char * const msg, unsigned long long num )
bool Lzip_index::read_header( const int fd, Lzip_header & header,
- const long long pos, const bool ignore_marking )
+ const long long pos )
{
if( seek_read( fd, header.data, header.size, pos ) != header.size )
{ set_errno_error( "Error reading member header: " ); return false; }
- uint8_t byte;
- if( !ignore_marking && readblock( fd, &byte, 1 ) == 1 && byte != 0 )
- { error_ = marking_msg; retval_ = 2; return false; }
return true;
}
@@ -123,8 +120,7 @@ bool Lzip_index::skip_gap( const int fd, unsigned long long & pos,
{ while( i > trailer.size && buffer[i-9] == 0 ) --i; continue; }
if( member_size > ipos + i || !trailer.check_consistency() ) continue;
Lzip_header header;
- if( !read_header( fd, header, ipos + i - member_size,
- cl_opts.ignore_marking ) ) return false;
+ if( !read_header( fd, header, ipos + i - member_size ) ) return false;
if( !header.check( ignore_bad_ds ) ) continue;
const Lzip_header & header2 = *(const Lzip_header *)( buffer + i );
const bool full_h2 = bsize - i >= header.size;
@@ -152,15 +148,12 @@ bool Lzip_index::skip_gap( const int fd, unsigned long long & pos,
if( !cl_opts.ignore_trailing )
{ error_ = trailing_msg; retval_ = 2; return false; }
}
- const unsigned long long data_size = trailer.data_size();
- if( !cl_opts.ignore_empty && data_size == 0 )
- { error_ = empty_msg; retval_ = 2; return false; }
pos = ipos + i - member_size; // good member
const unsigned dictionary_size = header.dictionary_size();
if( dictionary_size_ < dictionary_size )
dictionary_size_ = dictionary_size;
- member_vector.push_back( Member( 0, data_size, pos, member_size,
- dictionary_size ) );
+ member_vector.push_back( Member( 0, trailer.data_size(), pos,
+ member_size, dictionary_size ) );
return true;
}
if( ipos == 0 )
@@ -192,18 +185,18 @@ Lzip_index::Lzip_index( const int infd, const Cl_options & cl_opts,
{
if( insize < 0 )
{ set_errno_error( "Input file is not seekable: " ); return; }
+ Lzip_header header;
+ if( insize >= header.size &&
+ ( !read_header( infd, header, 0 ) ||
+ !check_header( header, ignore_bad_ds ) ) ) return;
if( insize < min_member_size )
- { error_ = "Input file is too short."; retval_ = 2; return; }
+ { error_ = "Input file is truncated."; retval_ = 2; return; }
if( insize > INT64_MAX )
{ error_ = "Input file is too long (2^63 bytes or more).";
retval_ = 2; return; }
- Lzip_header header;
- if( !read_header( infd, header, 0, cl_opts.ignore_marking ) ||
- !check_header( header, ignore_bad_ds ) ) return;
-
// pos always points to a header or to ( EOF || max_pos )
- unsigned long long pos = ( max_pos > 0 ) ? max_pos : insize;
+ unsigned long long pos = (max_pos > 0) ? max_pos : insize;
while( pos >= min_member_size )
{
Lzip_trailer trailer;
@@ -219,8 +212,7 @@ Lzip_index::Lzip_index( const int infd, const Cl_options & cl_opts,
continue; else return; }
set_num_error( "Bad trailer at pos ", pos - trailer.size ); break;
}
- if( !read_header( infd, header, pos - member_size, cl_opts.ignore_marking ) )
- break;
+ if( !read_header( infd, header, pos - member_size ) ) break;
if( !header.check( ignore_bad_ds ) ) // bad header
{
if( ignore_gaps || member_vector.empty() )
@@ -228,15 +220,12 @@ Lzip_index::Lzip_index( const int infd, const Cl_options & cl_opts,
continue; else return; }
set_num_error( "Bad header at pos ", pos - member_size ); break;
}
- const unsigned long long data_size = trailer.data_size();
- if( !cl_opts.ignore_empty && data_size == 0 )
- { error_ = empty_msg; retval_ = 2; break; }
pos -= member_size; // good member
const unsigned dictionary_size = header.dictionary_size();
if( dictionary_size_ < dictionary_size )
dictionary_size_ = dictionary_size;
- member_vector.push_back( Member( 0, data_size, pos, member_size,
- dictionary_size ) );
+ member_vector.push_back( Member( 0, trailer.data_size(), pos,
+ member_size, dictionary_size ) );
}
// block at pos == 0 must be a member unless shorter than min_member_size
if( pos >= min_member_size || ( pos != 0 && !ignore_gaps ) ||
@@ -272,7 +261,7 @@ Lzip_index::Lzip_index( const std::vector< int > & infd_vector,
if( insize < 0 )
{ set_errno_error( "Input file is not seekable: " ); return; }
if( insize < min_member_size )
- { error_ = "Input file is too short."; retval_ = 2; return; }
+ { error_ = short_file_msg; retval_ = 2; return; }
if( insize > INT64_MAX )
{ error_ = "Input file is too long (2^63 bytes or more).";
retval_ = 2; return; }
diff --git a/lzip_index.h b/lzip_index.h
index 95e277d..e3a6f74 100644
--- a/lzip_index.h
+++ b/lzip_index.h
@@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
- Copyright (C) 2009-2024 Antonio Diaz Diaz.
+ Copyright (C) 2009-2025 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -28,8 +28,8 @@ class Lzip_index
: dblock( dpos, dsize ), mblock( mpos, msize ),
dictionary_size( dict_size ) {}
- bool operator==( const Member & m ) const { return ( mblock == m.mblock ); }
- bool operator!=( const Member & m ) const { return ( mblock != m.mblock ); }
+ bool operator==( const Member & m ) const { return mblock == m.mblock; }
+ bool operator!=( const Member & m ) const { return mblock != m.mblock; }
};
// member_vector only contains members with a valid header.
@@ -43,8 +43,7 @@ class Lzip_index
bool check_header( const Lzip_header & header, const bool ignore_bad_ds );
void set_errno_error( const char * const msg );
void set_num_error( const char * const msg, unsigned long long num );
- bool read_header( const int fd, Lzip_header & header, const long long pos,
- const bool ignore_marking = true );
+ bool read_header( const int fd, Lzip_header & header, const long long pos );
bool read_trailer( const int fd, Lzip_trailer & trailer,
const long long pos );
bool skip_gap( const int fd, unsigned long long & pos,
@@ -65,6 +64,14 @@ public:
int retval() const { return retval_; }
unsigned dictionary_size() const { return dictionary_size_; }
+ bool multi_empty() const // multimember file with empty member(s)
+ {
+ if( member_vector.size() > 1 )
+ for( unsigned long i = 0; i < member_vector.size(); ++i )
+ if( member_vector[i].dblock.size() == 0 ) return true;
+ return false;
+ }
+
bool operator==( const Lzip_index & li ) const
{
if( retval_ || li.retval_ || insize != li.insize ||
@@ -94,3 +101,6 @@ public:
unsigned dictionary_size( const long i ) const
{ return member_vector[i].dictionary_size; }
};
+
+int seek_read( const int fd, uint8_t * const buf, const int size,
+ const long long pos );
diff --git a/main.cc b/main.cc
index f82118f..f5900e9 100644
--- a/main.cc
+++ b/main.cc
@@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
- Copyright (C) 2009-2024 Antonio Diaz Diaz.
+ Copyright (C) 2009-2025 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -26,15 +26,17 @@
#include <algorithm>
#include <cctype>
#include <cerrno>
-#include <climits> // SSIZE_MAX
+#include <climits> // CHAR_BIT, SSIZE_MAX
#include <csignal>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <new>
+#include <list>
#include <string>
#include <vector>
#include <fcntl.h>
+#include <pthread.h> // pthread_t
#include <stdint.h> // SIZE_MAX
#include <unistd.h>
#include <utime.h>
@@ -42,8 +44,10 @@
#if defined __MSVCRT__ || defined __OS2__ || defined __DJGPP__
#include <io.h>
#if defined __MSVCRT__
+#include <direct.h>
#define fchmod(x,y) 0
#define fchown(x,y,z) 0
+#define mkdir(name,mode) _mkdir(name)
#define SIGHUP SIGTERM
#define S_ISSOCK(x) 0
#ifndef S_IRGRP
@@ -62,6 +66,8 @@
#include "arg_parser.h"
#include "lzip.h"
#include "decoder.h"
+#include "md5.h"
+#include "fec.h"
#ifndef O_BINARY
#define O_BINARY 0
@@ -77,10 +83,7 @@
#endif
bool fits_in_size_t( const unsigned long long size ) // fits also in long
- { return ( sizeof (long) <= sizeof (size_t) && size <= LONG_MAX ) ||
- ( sizeof (int) <= sizeof (size_t) && size <= INT_MAX ); }
-
-int verbosity = 0;
+ { return sizeof (long) <= sizeof (size_t) && size <= LONG_MAX; }
const char * const program_name = "lziprecover";
std::string output_filename; // global vars for output file
@@ -95,33 +98,29 @@ const struct { const char * from; const char * to; } known_extensions[] = {
{ ".tlz", ".tar" },
{ 0, 0 } };
-enum Mode { m_none, m_alone_to_lz, m_byte_repair, m_clear_marking,
- m_debug_byte_repair, m_debug_decompress, m_debug_delay,
- m_decompress, m_dump, m_list, m_md5sum, m_merge, m_nrep_stats,
- m_range_dec, m_remove, m_reproduce, m_show_packets, m_split,
- m_strip, m_test, m_unzcrash_bit, m_unzcrash_block };
+enum Mode { m_none, m_alone_to_lz, m_byte_repair, m_check, m_debug_byte_repair,
+ m_debug_decompress, m_debug_delay, m_decompress, m_dump,
+ m_fec_create, m_fec_repair, m_fec_test, m_fec_list, m_fec_dc,
+ m_fec_dz, m_fec_dZ, m_list, m_md5sum, m_merge, m_nonzero_repair,
+ m_nrep_stats, m_range_dec, m_remove, m_reproduce, m_show_packets,
+ m_split, m_strip, m_test, m_unzcrash_bit, m_unzcrash_block };
-/* Variable used in signal handler context.
- It is not declared volatile because the handler never returns. */
+/* Variables used in signal handler context.
+ They are not declared volatile because the handler never returns. */
bool delete_output_on_interrupt = false;
-void show_help()
+void show_help( const long num_online )
{
std::printf( "Lziprecover is a data recovery tool and decompressor for files in the lzip\n"
- "compressed data format (.lz). Lziprecover is able to repair slightly damaged\n"
- "files (up to one single-byte error per member), produce a correct file by\n"
- "merging the good parts of two or more damaged copies, reproduce a missing\n"
- "(zeroed) sector using a reference file, extract data from damaged files,\n"
- "decompress files, and test integrity of files.\n"
+ "compressed data format (.lz). Lziprecover also provides Forward Error\n"
+ "Correction (FEC) able to repair any kind of file.\n"
"\nWith the help of lziprecover, losing an entire archive just because of a\n"
"corrupt byte near the beginning is a thing of the past.\n"
"\nLziprecover can remove the damaged members from multimember files, for\n"
"example multimember tar.lz archives.\n"
"\nLziprecover provides random access to the data in multimember files; it only\n"
"decompresses the members containing the desired data.\n"
- "\nLziprecover facilitates the management of metadata stored as trailing data\n"
- "in lzip files.\n"
"\nLziprecover is not a replacement for regular backups, but a last line of\n"
"defense for the case where the backups are also damaged.\n"
"\nUsage: %s [options] [files]\n", invocation_name );
@@ -130,6 +129,8 @@ void show_help()
" -V, --version output version information and exit\n"
" -a, --trailing-error exit with error status if trailing data\n"
" -A, --alone-to-lz convert lzma-alone files to lzip format\n"
+ " -b, --block-size=<bytes> make FEC block size a multiple of <bytes>\n"
+ " -B, --byte-repair try to repair a corrupt byte in file\n"
" -c, --stdout write to standard output, keep input files\n"
" -d, --decompress decompress, test compressed file integrity\n"
" -D, --range-decompress=<n-m> decompress a range of bytes to stdout\n"
@@ -138,39 +139,52 @@ void show_help()
" --lzip-name=<name> name of lzip executable for --reproduce\n"
" --reference-file=<file> reference file for --reproduce\n"
" -f, --force overwrite existing output files\n"
- " -i, --ignore-errors ignore some errors in -d, -D, -l, -t, --dump\n"
+ " -F, --fec=c[N]|r|t|l create, repair, test, list (using) fec file\n"
+ " -0 .. -9 set FEC fragmentation level [default 9]\n"
+ " --fec-file=<file>[/] read fec file from <file> or directory\n"
+ " -i, --ignore-errors ignore non-fatal errors\n"
" -k, --keep keep (don't delete) input files\n"
" -l, --list print (un)compressed file sizes\n"
" -m, --merge repair errors in file using several copies\n"
- " -o, --output=<file> place the output into <file>\n"
+ " -n, --threads=<n> set number of threads for fec create [%ld]\n"
+ " -o, --output=<file>[/] place the output into <file> or directory\n"
" -q, --quiet suppress all messages\n"
- " -R, --byte-repair try to repair a corrupt byte in file\n"
+ " -r, --recursive (fec) operate recursively on directories\n"
+ " -R, --dereference-recursive (fec) recursively follow symbolic links\n"
" -s, --split split multimember file in single-member files\n"
" -t, --test test compressed file integrity\n"
" -v, --verbose be verbose (a 2nd -v gives more)\n"
" --dump=<list>:d:e:t dump members, damaged/empty, tdata to stdout\n"
" --remove=<list>:d:e:t remove members, tdata from files in place\n"
" --strip=<list>:d:e:t copy files to stdout stripping members given\n"
- " --empty-error exit with error status if empty member in file\n"
- " --marking-error exit with error status if 1st LZMA byte not 0\n"
" --loose-trailing allow trailing data seeming corrupt header\n"
- " --clear-marking reset the first LZMA byte of each member\n" );
+ " --nonzero-repair repair in place a nonzero first LZMA byte\n",
+ num_online );
if( verbosity >= 1 )
{
std::printf( "\nDebug options for experts:\n"
" -E, --debug-reproduce=<range>[,ss] set range to 0 and try to reproduce file\n"
+ " -F, --fec=dc<n> test repair combinations of n zeroed blocks\n"
+ " -F, --fec=dz<range>[:<range>]... test repair zeroed block(s) at range(s)\n"
+ " -F, --fec=dZ<size>[,<delta>] test repair zeroed blocks of size <size>\n"
" -M, --md5sum print the MD5 digests of the input files\n"
" -S, --nrep-stats[=<val>] print stats of N-byte repeated sequences\n"
" -U, --unzcrash=1|B<size> test 1-bit or block errors in input file\n"
" -W, --debug-decompress=<pos>,<val> set pos to val and decompress to stdout\n"
" -X, --show-packets[=<pos>,<val>] show in stdout the decoded LZMA packets\n"
" -Y, --debug-delay=<range> find max error detection delay in <range>\n"
- " -Z, --debug-byte-repair=<pos>,<val> test repair one-byte error at <pos>\n" );
+ " -Z, --debug-byte-repair=<pos>,<val> test repair one-byte error at <pos>\n"
+ " --check=<size> check creation of FEC decode matrix\n"
+ " --debug=<level> print parallel FEC statistics to stderr\n"
+ " --gf16 use GF(2^16) to create fec files\n"
+ " --random create fec files with random block numbers\n" );
}
std::printf( "\nIf no file names are given, or if a file is '-', lziprecover decompresses\n"
"from standard input to standard output.\n"
"Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n"
"Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n"
+ "The argument to --fec=create may be a number of blocks (-Fc20), a\n"
+ "percentage (-Fc5%%), or a size in bytes (-Fc10KiB).\n"
"\nTo extract all the files from archive 'foo.tar.lz', use the commands\n"
"'tar -xf foo.tar.lz' or 'lziprecover -cd foo.tar.lz | tar -xf -'.\n"
"\nExit status: 0 for a normal exit, 1 for environmental problems\n"
@@ -213,7 +227,7 @@ const char * format_ds( const unsigned dictionary_size )
const char * p = "";
const char * np = " ";
unsigned num = dictionary_size;
- bool exact = ( num % factor == 0 );
+ bool exact = num % factor == 0;
for( int i = 0; i < n && ( num > 9999 || ( exact && num >= factor ) ); ++i )
{ num /= factor; if( num % factor != 0 ) exact = false;
@@ -248,12 +262,12 @@ void Member_list::parse_ml( const char * const arg,
if( len <= 7 && std::strncmp( "damaged", p, len ) == 0 )
{ damaged = true; cl_opts.ignore_errors = true; goto next; }
if( len <= 5 && std::strncmp( "empty", p, len ) == 0 )
- { empty = true; cl_opts.ignore_empty = true; goto next; }
+ { empty = true; goto next; }
if( len <= 5 && std::strncmp( "tdata", p, len ) == 0 )
{ tdata = true; cl_opts.ignore_trailing = true; goto next; }
}
{
- const bool reverse = ( *p == 'r' );
+ const bool reverse = *p == 'r';
if( reverse ) ++p;
if( *p == '^' ) { ++p; if( reverse ) rin = false; else in = false; }
std::vector< Block > * rvp = reverse ? &rrange_vector : &range_vector;
@@ -279,13 +293,14 @@ next:
namespace {
+const char * const inv_arg_msg = "Invalid argument in";
+
// Recognized formats: <digit> 'a' m[<match_length>]
int parse_lzip_level( const char * const arg, const char * const option_name )
{
if( *arg == 'a' || std::isdigit( *(const unsigned char *)arg ) ) return *arg;
if( *arg != 'm' )
- { show_option_error( arg, "Invalid argument in", option_name );
- std::exit( 1 ); }
+ { show_option_error( arg, inv_arg_msg, option_name ); std::exit( 1 ); }
if( arg[1] == 0 ) return -1;
return -getnum( arg + 1, option_name, 0, min_match_len_limit, max_match_len );
}
@@ -306,7 +321,7 @@ const char * parse_range( const char * const arg, const char * const pn,
range.pos( value );
if( tail[0] == 0 || tail[0] == ':' )
{ range.size( INT64_MAX - value ); return tail; }
- const bool is_size = ( tail[0] == ',' );
+ const bool is_size = tail[0] == ',';
if( sector_sizep && tail[1] == ',' ) { value = INT64_MAX - value; ++tail; }
else value = getnum( tail + 1, pn, 0, 1, INT64_MAX, &tail ); // size
if( !is_size && value <= range.pos() )
@@ -325,6 +340,61 @@ const char * parse_range( const char * const arg, const char * const pn,
}
+// Insert b in its place or merge it with contiguous or overlapping blocks.
+void insert_block_sorted( std::vector< Block > & block_vector, const Block & b )
+ {
+ if( block_vector.empty() || b.pos() > block_vector.back().end() )
+ { block_vector.push_back( b ); return; } // append at the end
+ const long long pos = b.pos();
+ const long long end = b.end();
+ for( unsigned long i = 0; i < block_vector.size(); ++i )
+ if( end <= block_vector[i].pos() ) // maybe insert b before i
+ {
+ if( end < block_vector[i].pos() &&
+ ( i == 0 || pos > block_vector[i-1].end() ) )
+ { block_vector.insert( block_vector.begin() + i, b ); return; }
+ break;
+ }
+ for( unsigned long i = 0; i < block_vector.size(); ++i )
+ if( block_vector[i].touches( b ) ) // merge b with blocks touching it
+ {
+ unsigned long j = i; // indexes of first/last mergeable blocks
+ while( j + 1 < block_vector.size() && block_vector[j+1].touches( b ) )
+ ++j;
+ const long long new_pos = std::min( pos, block_vector[i].pos() );
+ const long long new_end = std::max( end, block_vector[j].end() );
+ block_vector[i].assign( new_pos, new_end - new_pos );
+ if( i < j ) block_vector.erase( block_vector.begin() + i + 1,
+ block_vector.begin() + j + 1 );
+ break;
+ }
+ }
+
+/* Recognized format: <range>[:<range>]...
+ Allow unordered, overlapping ranges. Return ranges sorted and merged. */
+void parse_range_vector( const char * const arg, const char * const pn,
+ std::vector< Block > & range_vector )
+ {
+ Block range( 0, 0 );
+ const char * p = arg;
+ while( true )
+ {
+ p = parse_range( p, pn, range );
+ insert_block_sorted( range_vector, range );
+ if( *p == 0 ) return;
+ if( *p == ':' ) { ++p; if( *p == 0 ) return; else continue; }
+ show_option_error( p, "Extra characters in", pn );
+ std::exit( 1 );
+ }
+ }
+
+
+void no_to_stdout( const bool to_stdout )
+ {
+ if( to_stdout )
+ { show_error( "'--stdout' not allowed." ); std::exit( 1 ); }
+ }
+
void one_file( const int files )
{
if( files != 1 )
@@ -355,6 +425,81 @@ void set_mode( Mode & program_mode, const Mode new_mode )
}
+// return true if arg is a non-empty prefix of target
+bool compare_prefix( const char * const arg, const char * const target,
+ const char * const option_name = 0,
+ unsigned long * const fb_or_pctp = 0, char * fctypep = 0 )
+ {
+ if( arg[0] == target[0] )
+ for( int i = 1; i < INT_MAX; ++i )
+ {
+ if( arg[i] == 0 ) return true;
+ if( fb_or_pctp && std::isdigit( arg[i] ) )
+ {
+ const char * tail = arg + i;
+ const int llimit = std::strchr( tail, '.' ) ? 0 : 1;
+ *fb_or_pctp = getnum( tail, option_name, 0, llimit, LONG_MAX, &tail );
+ if( *tail == 0 )
+ { if( tail[-1] == 'B' ) { *fctypep = fc_bytes; return true; }
+ if( std::isdigit( tail[-1] ) )
+ { if( *fb_or_pctp <= max_nk16 )
+ { *fctypep = fc_blocks; return true; }
+ getnum( arg + 1, option_name, 0, 1, max_nk16 ); } }
+ else if( *fb_or_pctp <= 100 && std::isdigit( tail[-1] ) )
+ { if( *tail == '%' && tail[1] == 0 )
+ { *fb_or_pctp *= 1000; *fctypep = fc_percent; return true; }
+ if( *tail == '.' && std::isdigit( *++tail ) )
+ { for( int j = 0; j < 3; ++j ) { *fb_or_pctp *= 10;
+ if( std::isdigit( *tail ) ) *fb_or_pctp += *tail++ - '0'; }
+ if( *tail >= '5' && *tail <= '9' ) { ++tail; ++*fb_or_pctp; }
+ while( std::isdigit( *tail ) ) { ++tail;
+ if( *fb_or_pctp == 0 && tail[-1] > '0' ) *fb_or_pctp = 1; }
+ if( *tail == '%' && tail[1] == 0 && *fb_or_pctp <= 100000 &&
+ *fb_or_pctp > 0 ) { *fctypep = fc_percent; return true; } } }
+ return false;
+ }
+ if( arg[i] != target[i] ) break;
+ }
+ return false;
+ }
+
+
+void parse_fec( const char * const arg, const char * const option_name,
+ Mode & program_mode, unsigned long & fb_or_pct,
+ unsigned & cblocks, unsigned & delta, int & sector_size,
+ std::vector< Block > & range_vector, char & fctype )
+ {
+ if( compare_prefix( arg, "create", option_name, &fb_or_pct, &fctype ) )
+ set_mode( program_mode, m_fec_create );
+ else if( compare_prefix( arg, "repair" ) )
+ set_mode( program_mode, m_fec_repair );
+ else if( compare_prefix( arg, "test" ) )
+ set_mode( program_mode, m_fec_test );
+ else if( compare_prefix( arg, "list" ) )
+ set_mode( program_mode, m_fec_list );
+ else if( arg[0] == 'd' && arg[1] == 'c' )
+ { const char * tail = arg + 2;
+ cblocks = getnum( tail, option_name, 0, 1, max_nk16, &tail );
+ if( *tail != 0 )
+ { show_option_error( arg, inv_arg_msg, option_name ); std::exit( 1 ); }
+ set_mode( program_mode, m_fec_dc ); }
+ else if( arg[0] == 'd' && arg[1] == 'z' )
+ { parse_range_vector( arg + 2, option_name, range_vector );
+ set_mode( program_mode, m_fec_dz ); }
+ else if( arg[0] == 'd' && arg[1] == 'Z' )
+ { const char * tail = arg + 2;
+ sector_size = getnum( tail, option_name, 0, 1, INT_MAX, &tail );
+ if( *tail == 0 ) delta = sector_size;
+ else if( *tail == ',' )
+ delta = getnum( tail + 1, option_name, 0, 1, INT_MAX );
+ else { show_option_error( arg, "Comma expected before delta in",
+ option_name ); std::exit( 1 ); }
+ set_mode( program_mode, m_fec_dZ ); }
+ else
+ { show_option_error( arg, inv_arg_msg, option_name ); std::exit( 1 ); }
+ }
+
+
void parse_u( const char * const arg, const char * const option_name,
Mode & program_mode, int & sector_size )
{
@@ -363,8 +508,7 @@ void parse_u( const char * const arg, const char * const option_name,
{ set_mode( program_mode, m_unzcrash_block );
sector_size = getnum( arg + 1, option_name, 0, 1, INT_MAX ); }
else
- { show_option_error( arg, "Invalid argument in", option_name );
- std::exit( 1 ); }
+ { show_option_error( arg, inv_arg_msg, option_name ); std::exit( 1 ); }
}
@@ -423,9 +567,9 @@ int open_instream( const char * const name, struct stat * const in_statsp,
{
const int i = fstat( infd, in_statsp );
const mode_t mode = in_statsp->st_mode;
- const bool can_read = ( i == 0 && !reg_only &&
- ( S_ISBLK( mode ) || S_ISCHR( mode ) ||
- S_ISFIFO( mode ) || S_ISSOCK( mode ) ) );
+ const bool can_read = i == 0 && !reg_only &&
+ ( S_ISBLK( mode ) || S_ISCHR( mode ) ||
+ S_ISFIFO( mode ) || S_ISSOCK( mode ) );
if( i != 0 || ( !S_ISREG( mode ) && ( !can_read || one_to_one ) ) )
{
if( verbosity >= 0 )
@@ -487,6 +631,9 @@ bool make_dirs( const std::string & name )
const char * const force_msg =
"Output file already exists. Use '--force' to overwrite it.";
+unsigned char xdigit( const unsigned value ) // hex digit for 'value'
+ { return (value <= 9) ? '0' + value : (value <= 15) ? 'A' + value - 10 : 0; }
+
} // end namespace
bool open_outstream( const bool force, const bool protect,
@@ -499,8 +646,8 @@ bool open_outstream( const bool force, const bool protect,
if( force ) flags |= O_TRUNC; else flags |= O_EXCL;
outfd = -1;
- if( output_filename.size() &&
- output_filename[output_filename.size()-1] == '/' ) errno = EISDIR;
+ if( output_filename.size() && output_filename.end()[-1] == '/' )
+ errno = EISDIR;
else {
if( ( !protect || to_file ) && !make_dirs( output_filename ) )
{ show_file_error( output_filename.c_str(),
@@ -536,6 +683,7 @@ void set_signals( void (*action)(int) )
void cleanup_and_fail( const int retval )
{
set_signals( SIG_IGN ); // ignore signals
+ cleanup_mutex_lock(); // only one thread can delete and exit
if( delete_output_on_interrupt )
{
delete_output_on_interrupt = false;
@@ -559,6 +707,22 @@ bool check_tty_out()
return true;
}
+
+void format_trailing_bytes( const uint8_t * const data, const int size,
+ std::string & msg )
+ {
+ for( int i = 0; i < size; ++i )
+ {
+ msg += xdigit( data[i] >> 4 );
+ msg += xdigit( data[i] & 0x0F );
+ msg += ' ';
+ }
+ msg += '\'';
+ for( int i = 0; i < size; ++i )
+ msg += std::isprint( data[i] ) ? data[i] : '.';
+ msg += '\'';
+ }
+
namespace {
extern "C" void signal_handler( int )
@@ -617,14 +781,6 @@ void close_and_set_permissions( const struct stat * const in_statsp )
}
-unsigned char xdigit( const unsigned value ) // hex digit for 'value'
- {
- if( value <= 9 ) return '0' + value;
- if( value <= 15 ) return 'A' + value - 10;
- return 0;
- }
-
-
bool show_trailing_data( const uint8_t * const data, const int size,
const Pretty_print & pp, const bool all,
const int ignore_trailing ) // -1 = show
@@ -634,16 +790,7 @@ bool show_trailing_data( const uint8_t * const data, const int size,
std::string msg;
if( !all ) msg = "first bytes of ";
msg += "trailing data = ";
- for( int i = 0; i < size; ++i )
- {
- msg += xdigit( data[i] >> 4 );
- msg += xdigit( data[i] & 0x0F );
- msg += ' ';
- }
- msg += '\'';
- for( int i = 0; i < size; ++i )
- { if( std::isprint( data[i] ) ) msg += data[i]; else msg += '.'; }
- msg += '\'';
+ format_trailing_bytes( data, size, msg );
pp( msg.c_str() );
if( ignore_trailing == 0 ) show_file_error( pp.name(), trailing_msg );
}
@@ -653,11 +800,12 @@ bool show_trailing_data( const uint8_t * const data, const int size,
int decompress( const unsigned long long cfile_size, const int infd,
const Cl_options & cl_opts, const Pretty_print & pp,
- const bool testing )
+ const bool from_stdin, const bool testing )
{
unsigned long long partial_file_pos = 0;
Range_decoder rdec( infd );
int retval = 0;
+ bool empty = false, multi = false;
for( bool first_member = true; ; first_member = false )
{
@@ -700,11 +848,10 @@ int decompress( const unsigned long long cfile_size, const int infd,
LZ_decoder decoder( rdec, dictionary_size, outfd );
show_dprogress( cfile_size, partial_file_pos, &rdec, &pp ); // init
- const int result = decoder.decode_member( cl_opts, pp );
+ const int result = decoder.decode_member( pp, cl_opts.ignore_errors );
partial_file_pos += rdec.member_position();
if( result != 0 )
{
- retval = 2;
if( verbosity >= 0 && result <= 2 )
{
pp();
@@ -712,15 +859,19 @@ int decompress( const unsigned long long cfile_size, const int infd,
"File ends unexpectedly" : "Decoder error",
partial_file_pos );
}
- else if( result == 5 ) { pp( empty_msg ); break; }
- else if( result == 6 ) { pp( marking_msg ); break; }
+ else if( result == 5 ) pp( nonzero_msg );
+ retval = 2;
if( cl_opts.ignore_errors ) { pp.reset(); continue; } else break;
}
+ if( !from_stdin && !cl_opts.ignore_errors ) { multi = !first_member;
+ if( decoder.data_position() == 0 ) empty = true; }
if( verbosity >= 2 )
{ std::fputs( testing ? "ok\n" : "done\n", stderr ); pp.reset(); }
}
if( verbosity == 1 && retval == 0 )
std::fputs( testing ? "ok\n" : "done\n", stderr );
+ if( empty && multi && retval == 0 )
+ { show_file_error( pp.name(), empty_msg ); retval = 2; }
if( retval == 2 && cl_opts.ignore_errors ) retval = 0;
return retval;
}
@@ -739,7 +890,7 @@ bool close_outstream( const struct stat * const in_statsp )
}
-std::string insert_fixed( std::string name )
+std::string insert_fixed( std::string name, const bool append_lz )
{
if( name.size() > 7 && name.compare( name.size() - 7, 7, ".tar.lz" ) == 0 )
name.insert( name.size() - 7, "_fixed" );
@@ -747,7 +898,8 @@ std::string insert_fixed( std::string name )
name.insert( name.size() - 3, "_fixed" );
else if( name.size() > 4 && name.compare( name.size() - 4, 4, ".tlz" ) == 0 )
name.insert( name.size() - 4, "_fixed" );
- else name += "_fixed.lz";
+ else if( append_lz ) name += "_fixed.lz";
+ else name += "_fixed";
return name;
}
@@ -794,73 +946,108 @@ void show_dprogress( const unsigned long long cfile_size,
int main( const int argc, const char * const argv[] )
{
+ std::vector< Block > range_vector;
Block range( 0, 0 );
int sector_size = INT_MAX; // default larger than practical range
Bad_byte bad_byte;
Member_list member_list;
+ std::string cl_fec_filename;
std::string default_output_filename;
const char * lzip_name = "lzip"; // default is lzip
const char * reference_filename = 0;
+ unsigned long fb_or_pct = 8; // fec blocks, bytes (B), or 0.001% to 100%
+ unsigned cblocks = 0; // blocks per combination in fec_dc
+ unsigned cl_block_size = 0; // make fbs a multiple of this
+ unsigned num_workers = 0; // start this many worker threads
+ unsigned delta = 0; // set to 0 to keep gcc 6.1.0 quiet
Mode program_mode = m_none;
int lzip_level = 0; // 0 = test all levels and match lengths
// '0'..'9' = level, 'a' = all levels
// -5..-273 = match length, -1 = all lengths
int repeated_byte = -1; // 0 to 255, or -1 for all values
Cl_options cl_opts; // command-line options
+ char debug_level = 0;
+ char fctype = fc_blocks; // type of value in fb_or_pct
+ char fec_level = 9; // fec fragmentation level, default = "-9"
+ char recursive = 0; // 1 = '-r', 2 = '-R'
+ bool cl_gf16 = false;
+ bool fec_random = false;
bool force = false;
bool keep_input_files = false;
bool to_stdout = false;
if( argc > 0 ) invocation_name = argv[0];
- enum { opt_cm = 256, opt_du, opt_eer, opt_lt, opt_lzl, opt_lzn, opt_mer,
- opt_ref, opt_rem, opt_st };
+ enum { opt_chk = 256, opt_dbg, opt_du, opt_ff, opt_g16, opt_lt,
+ opt_lzl, opt_lzn, opt_nzr, opt_ref, opt_rem, opt_rnd, opt_st };
const Arg_parser::Option options[] =
{
- { 'a', "trailing-error", Arg_parser::no },
- { 'A', "alone-to-lz", Arg_parser::no },
- { 'c', "stdout", Arg_parser::no },
- { 'd', "decompress", Arg_parser::no },
- { 'D', "range-decompress", Arg_parser::yes },
- { 'e', "reproduce", Arg_parser::no },
- { 'E', "debug-reproduce", Arg_parser::yes },
- { 'f', "force", Arg_parser::no },
- { 'h', "help", Arg_parser::no },
- { 'i', "ignore-errors", Arg_parser::no },
- { 'k', "keep", Arg_parser::no },
- { 'l', "list", Arg_parser::no },
- { 'm', "merge", Arg_parser::no },
- { 'M', "md5sum", Arg_parser::no },
- { 'n', "threads", Arg_parser::yes },
- { 'o', "output", Arg_parser::yes },
- { 'q', "quiet", Arg_parser::no },
- { 'R', "byte-repair", Arg_parser::no },
- { 'R', "repair", Arg_parser::no },
- { 's', "split", Arg_parser::no },
- { 'S', "nrep-stats", Arg_parser::maybe },
- { 't', "test", Arg_parser::no },
- { 'U', "unzcrash", Arg_parser::yes },
- { 'v', "verbose", Arg_parser::no },
- { 'V', "version", Arg_parser::no },
- { 'W', "debug-decompress", Arg_parser::yes },
- { 'X', "show-packets", Arg_parser::maybe },
- { 'Y', "debug-delay", Arg_parser::yes },
- { 'Z', "debug-byte-repair", Arg_parser::yes },
- { opt_cm, "clear-marking", Arg_parser::no },
- { opt_du, "dump", Arg_parser::yes },
- { opt_eer, "empty-error", Arg_parser::no },
- { opt_lt, "loose-trailing", Arg_parser::no },
- { opt_lzl, "lzip-level", Arg_parser::yes },
- { opt_lzn, "lzip-name", Arg_parser::yes },
- { opt_mer, "marking-error", Arg_parser::no },
- { opt_ref, "reference-file", Arg_parser::yes },
- { opt_rem, "remove", Arg_parser::yes },
- { opt_st, "strip", Arg_parser::yes },
- { 0, 0, Arg_parser::no } };
+ { '0', 0, Arg_parser::no },
+ { '1', 0, Arg_parser::no },
+ { '2', 0, Arg_parser::no },
+ { '3', 0, Arg_parser::no },
+ { '4', 0, Arg_parser::no },
+ { '5', 0, Arg_parser::no },
+ { '6', 0, Arg_parser::no },
+ { '7', 0, Arg_parser::no },
+ { '8', 0, Arg_parser::no },
+ { '9', 0, Arg_parser::no },
+ { 'a', "trailing-error", Arg_parser::no },
+ { 'A', "alone-to-lz", Arg_parser::no },
+ { 'b', "block-size", Arg_parser::yes },
+ { 'B', "byte-repair", Arg_parser::no },
+ { 'B', "repair", Arg_parser::no },
+ { 'c', "stdout", Arg_parser::no },
+ { 'd', "decompress", Arg_parser::no },
+ { 'D', "range-decompress", Arg_parser::yes },
+ { 'e', "reproduce", Arg_parser::no },
+ { 'E', "debug-reproduce", Arg_parser::yes },
+ { 'f', "force", Arg_parser::no },
+ { 'F', "fec", Arg_parser::yes },
+ { 'h', "help", Arg_parser::no },
+ { 'i', "ignore-errors", Arg_parser::no },
+ { 'k', "keep", Arg_parser::no },
+ { 'l', "list", Arg_parser::no },
+ { 'm', "merge", Arg_parser::no },
+ { 'M', "md5sum", Arg_parser::no },
+ { 'n', "threads", Arg_parser::yes },
+ { 'o', "output", Arg_parser::yes },
+ { 'q', "quiet", Arg_parser::no },
+ { 'r', "recursive", Arg_parser::no },
+ { 'R', "dereference-recursive", Arg_parser::no },
+ { 's', "split", Arg_parser::no },
+ { 'S', "nrep-stats", Arg_parser::maybe },
+ { 't', "test", Arg_parser::no },
+ { 'U', "unzcrash", Arg_parser::yes },
+ { 'v', "verbose", Arg_parser::no },
+ { 'V', "version", Arg_parser::no },
+ { 'W', "debug-decompress", Arg_parser::yes },
+ { 'X', "show-packets", Arg_parser::maybe },
+ { 'Y', "debug-delay", Arg_parser::yes },
+ { 'Z', "debug-byte-repair", Arg_parser::yes },
+ { opt_chk, "check", Arg_parser::yes },
+ { opt_dbg, "debug", Arg_parser::yes },
+ { opt_du, "dump", Arg_parser::yes },
+ { opt_ff, "fec-file", Arg_parser::yes },
+ { opt_g16, "gf16", Arg_parser::no },
+ { opt_lt, "loose-trailing", Arg_parser::no },
+ { opt_lzl, "lzip-level", Arg_parser::yes },
+ { opt_lzn, "lzip-name", Arg_parser::yes },
+ { opt_nzr, "nonzero-repair", Arg_parser::no },
+ { opt_ref, "reference-file", Arg_parser::yes },
+ { opt_rem, "remove", Arg_parser::yes },
+ { opt_rnd, "random", Arg_parser::no },
+ { opt_st, "strip", Arg_parser::yes },
+ { 0, 0, Arg_parser::no } };
const Arg_parser parser( argc, argv, options );
if( parser.error().size() ) // bad option
{ show_error( parser.error().c_str(), 0, true ); return 1; }
+ const long num_online = std::max( 1L, sysconf( _SC_NPROCESSORS_ONLN ) );
+ long max_workers = sysconf( _SC_THREAD_THREADS_MAX );
+ if( max_workers < 1 || max_workers > INT_MAX / (int)sizeof (pthread_t) )
+ max_workers = INT_MAX / sizeof (pthread_t);
+
int argind = 0;
for( ; argind < parser.arguments(); ++argind )
{
@@ -871,8 +1058,13 @@ int main( const int argc, const char * const argv[] )
const char * const arg = sarg.c_str();
switch( code )
{
+ case '0': case '1': case '2': case '3': case '4': case '5':
+ case '6': case '7': case '8': case '9': fec_level = code - '0'; break;
case 'a': cl_opts.ignore_trailing = false; break;
case 'A': set_mode( program_mode, m_alone_to_lz ); break;
+ case 'b': cl_block_size = getnum( arg, pn, 0, min_fbs, max_unit_fbs ) &
+ ( max_unit_fbs - min_fbs ); break;
+ case 'B': set_mode( program_mode, m_byte_repair ); break;
case 'c': to_stdout = true; break;
case 'd': set_mode( program_mode, m_decompress ); break;
case 'D': set_mode( program_mode, m_range_dec );
@@ -881,17 +1073,20 @@ int main( const int argc, const char * const argv[] )
case 'E': set_mode( program_mode, m_reproduce );
parse_range( arg, pn, range, &sector_size ); break;
case 'f': force = true; break;
- case 'h': show_help(); return 0;
+ case 'F': parse_fec( arg, pn, program_mode, fb_or_pct, cblocks, delta,
+ sector_size, range_vector, fctype ); break;
+ case 'h': show_help( num_online ); return 0;
case 'i': cl_opts.ignore_errors = true; break;
case 'k': keep_input_files = true; break;
case 'l': set_mode( program_mode, m_list ); break;
case 'm': set_mode( program_mode, m_merge ); break;
case 'M': set_mode( program_mode, m_md5sum ); break;
- case 'n': break;
+ case 'n': num_workers = getnum( arg, pn, 0, 1, max_workers ); break;
case 'o': if( sarg == "-" ) to_stdout = true;
else { default_output_filename = sarg; } break;
- case 'q': verbosity = -1; break;
- case 'R': set_mode( program_mode, m_byte_repair ); break;
+ case 'q': cl_verbosity = verbosity = -1; break;
+ case 'r': recursive = 1; break;
+ case 'R': recursive = 2; break;
case 's': set_mode( program_mode, m_split ); break;
case 'S': if( arg[0] ) repeated_byte = getnum( arg, pn, 0, 0, 255 );
set_mode( program_mode, m_nrep_stats ); break;
@@ -907,20 +1102,23 @@ int main( const int argc, const char * const argv[] )
parse_range( arg, pn, range ); break;
case 'Z': set_mode( program_mode, m_debug_byte_repair );
bad_byte.parse_bb( arg, pn ); break;
- case opt_cm: set_mode( program_mode, m_clear_marking );
- cl_opts.ignore_marking = true; break;
- case opt_du: set_mode( program_mode, m_dump );
- member_list.parse_ml( arg, pn, cl_opts ); break;
- case opt_eer: cl_opts.ignore_empty = false; break;
+ case opt_chk: set_mode( program_mode, m_check );
+ cblocks = getnum( arg, pn, 0, 1, max_k16 ); break;
+ case opt_dbg: debug_level = getnum( arg, pn, 0, 0, 3 ); break;
+ case opt_du: set_mode( program_mode, m_dump );
+ member_list.parse_ml( arg, pn, cl_opts ); break;
+ case opt_ff: cl_fec_filename = sarg; break;
+ case opt_g16: cl_gf16 = true; break;
case opt_lt: cl_opts.loose_trailing = true; break;
case opt_lzl: lzip_level = parse_lzip_level( arg, pn ); break;
case opt_lzn: lzip_name = arg; break;
- case opt_mer: cl_opts.ignore_marking = false; break;
+ case opt_nzr: set_mode( program_mode, m_nonzero_repair ); break;
case opt_ref: reference_filename = arg; break;
case opt_rem: set_mode( program_mode, m_remove );
member_list.parse_ml( arg, pn, cl_opts ); break;
- case opt_st: set_mode( program_mode, m_strip );
- member_list.parse_ml( arg, pn, cl_opts ); break;
+ case opt_rnd: fec_random = true; break;
+ case opt_st: set_mode( program_mode, m_strip );
+ member_list.parse_ml( arg, pn, cl_opts ); break;
default: internal_error( "uncaught option." );
}
} // end process options
@@ -951,34 +1149,59 @@ int main( const int argc, const char * const argv[] )
case m_none: internal_error( "invalid operation." ); break;
case m_alone_to_lz: break;
case m_byte_repair:
- one_file( filenames.size() );
+ one_file( filenames.size() ); no_to_stdout( to_stdout );
return byte_repair( filenames[0], default_output_filename, cl_opts,
terminator, force );
- case m_clear_marking:
- at_least_one_file( filenames.size() );
- return clear_marking( filenames, cl_opts );
+ case m_check: return gf_check( cblocks, cl_gf16, fec_random );
case m_debug_byte_repair:
one_file( filenames.size() );
- return debug_byte_repair( filenames[0].c_str(), cl_opts, bad_byte, terminator );
+ return debug_byte_repair( filenames[0], cl_opts, bad_byte, terminator );
case m_debug_decompress:
one_file( filenames.size() );
- return debug_decompress( filenames[0].c_str(), cl_opts, bad_byte, false );
+ return debug_decompress( filenames[0], cl_opts, bad_byte, false );
case m_debug_delay:
one_file( filenames.size() );
- return debug_delay( filenames[0].c_str(), cl_opts, range, terminator );
+ return debug_delay( filenames[0], cl_opts, range, terminator );
case m_decompress: break;
case m_dump:
case m_strip:
at_least_one_file( filenames.size() );
return dump_members( filenames, default_output_filename, cl_opts,
member_list, force, program_mode == m_strip, to_stdout );
+ case m_fec_create:
+ at_least_one_file( filenames.size() );
+ if( num_workers <= 0 ) num_workers = std::min( num_online, max_workers );
+ return fec_create( filenames, default_output_filename, fb_or_pct,
+ cl_block_size, num_workers, debug_level, fctype, fec_level,
+ recursive, cl_gf16, fec_random, force, to_stdout );
+ case m_fec_repair:
+ case m_fec_test:
+ at_least_one_file( filenames.size() );
+ return fec_test( filenames, cl_fec_filename, default_output_filename,
+ recursive, force, cl_opts.ignore_errors,
+ program_mode == m_fec_repair, to_stdout );
+ case m_fec_list:
+ if( filenames.empty() ) filenames.push_back("-");
+ return fec_list( filenames, cl_opts.ignore_errors );
+ case m_fec_dc:
+ one_file( filenames.size() );
+ return fec_dc( filenames[0], cl_fec_filename, cblocks );
+ case m_fec_dz:
+ one_file( filenames.size() );
+ return fec_dz( filenames[0], cl_fec_filename, range_vector );
+ case m_fec_dZ:
+ one_file( filenames.size() );
+ return fec_dZ( filenames[0], cl_fec_filename, delta, sector_size );
case m_list: break;
case m_md5sum: break;
- case m_merge:
+ case m_merge: no_to_stdout( to_stdout );
if( filenames.size() < 2 )
{ show_error( "You must specify at least 2 files.", 0, true ); return 1; }
return merge_files( filenames, default_output_filename, cl_opts,
terminator, force );
+ case m_nonzero_repair:
+ at_least_one_file( filenames.size() );
+ return nonzero_repair( filenames, cl_opts );
case m_nrep_stats:
return print_nrep_stats( filenames, cl_opts, repeated_byte );
case m_range_dec:
@@ -989,28 +1212,28 @@ int main( const int argc, const char * const argv[] )
at_least_one_file( filenames.size() );
return remove_members( filenames, cl_opts, member_list );
case m_reproduce:
- one_file( filenames.size() );
+ one_file( filenames.size() ); no_to_stdout( to_stdout );
if( !reference_filename || !reference_filename[0] )
{ show_error( "You must specify a reference file.", 0, true ); return 1; }
if( range.size() > 0 )
- return debug_reproduce_file( filenames[0].c_str(), lzip_name,
+ return debug_reproduce_file( filenames[0], lzip_name,
reference_filename, cl_opts, range, sector_size, lzip_level );
else
return reproduce_file( filenames[0], default_output_filename, lzip_name,
reference_filename, cl_opts, lzip_level, terminator, force );
case m_show_packets:
one_file( filenames.size() );
- return debug_decompress( filenames[0].c_str(), cl_opts, bad_byte, true );
+ return debug_decompress( filenames[0], cl_opts, bad_byte, true );
case m_split:
- one_file( filenames.size() );
+ one_file( filenames.size() ); no_to_stdout( to_stdout );
return split_file( filenames[0], default_output_filename, cl_opts, force );
case m_test: break;
case m_unzcrash_bit:
one_file( filenames.size() );
- return lunzcrash_bit( filenames[0].c_str(), cl_opts );
+ return lunzcrash_bit( filenames[0], cl_opts );
case m_unzcrash_block:
one_file( filenames.size() );
- return lunzcrash_block( filenames[0].c_str(), cl_opts, sector_size );
+ return lunzcrash_block( filenames[0], cl_opts, sector_size );
}
}
catch( std::bad_alloc & ) { show_error( mem_msg ); cleanup_and_fail( 1 ); }
@@ -1048,9 +1271,10 @@ int main( const int argc, const char * const argv[] )
{
std::string input_filename;
int infd;
+ const bool from_stdin = filenames[i] == "-";
pp.set_name( filenames[i] );
- if( filenames[i] == "-" )
+ if( from_stdin )
{
if( stdin_used ) continue; else stdin_used = true;
infd = STDIN_FILENO;
@@ -1092,7 +1316,8 @@ int main( const int argc, const char * const argv[] )
if( program_mode == m_alone_to_lz )
tmp = alone_to_lz( infd, pp );
else
- tmp = decompress( cfile_size, infd, cl_opts, pp, program_mode == m_test );
+ tmp = decompress( cfile_size, infd, cl_opts, pp, from_stdin,
+ program_mode == m_test );
}
catch( std::bad_alloc & ) { pp( mem_msg ); tmp = 1; }
catch( Error & e ) { pp(); show_error( e.msg, errno ); tmp = 1; }
diff --git a/main_common.cc b/main_common.cc
index dfaccac..0762182 100644
--- a/main_common.cc
+++ b/main_common.cc
@@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
- Copyright (C) 2009-2024 Antonio Diaz Diaz.
+ Copyright (C) 2009-2025 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -15,9 +15,12 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+int cl_verbosity = 0; // used to silence internal_error if '-q'
+int verbosity = 0;
+
namespace {
-const char * const program_year = "2024";
+const char * const program_year = "2025";
void show_version()
{
@@ -30,7 +33,7 @@ void show_version()
// separate numbers of 5 or more digits in groups of 3 digits using '_'
-const char * format_num3( long long num )
+const char * format_num3p( long long num, const bool raw = false )
{
enum { buffers = 8, bufsize = 4 * sizeof num, n = 10 };
const char * const si_prefix = "kMGTPEZYRQ";
@@ -42,7 +45,7 @@ const char * format_num3( long long num )
char * p = buf + bufsize - 1; // fill the buffer backwards
*p = 0; // terminator
const bool negative = num < 0;
- if( num > 1024 || num < -1024 )
+ if( !raw && ( num > 9999 || num < -9999 ) )
{
char prefix = 0; // try binary first, then si
for( int i = 0; i < n && num != 0 && num % 1024 == 0; ++i )
@@ -136,8 +139,8 @@ long long getnum( const char * const arg, const char * const option_name,
{
if( verbosity >= 0 )
std::fprintf( stderr, "%s: '%s': Value out of limits [%s,%s] in "
- "option '%s'.\n", program_name, arg, format_num3( llimit ),
- format_num3( ulimit ), option_name );
+ "option '%s'.\n", program_name, arg, format_num3p( llimit ),
+ format_num3p( ulimit ), option_name );
std::exit( 1 );
}
if( tailp ) *tailp = tail;
@@ -148,7 +151,6 @@ long long getnum( const char * const arg, const char * const option_name,
// Recognized formats: <pos>,<value> <pos>,+<value> <pos>,f<value>
-//
void Bad_byte::parse_bb( const char * const arg, const char * const pn )
{
argument = arg;
@@ -166,6 +168,9 @@ void Bad_byte::parse_bb( const char * const arg, const char * const pn )
}
+const char * format_num3( long long num ) { return format_num3p( num, true ); }
+
+
void show_error( const char * const msg, const int errcode, const bool help )
{
if( verbosity < 0 ) return;
@@ -191,7 +196,7 @@ void show_file_error( const char * const filename, const char * const msg,
void internal_error( const char * const msg )
{
- if( verbosity >= 0 )
+ if( cl_verbosity >= 0 )
std::fprintf( stderr, "%s: internal error: %s\n", program_name, msg );
std::exit( 3 );
}
diff --git a/md5.cc b/md5.cc
index e0a8afb..39d2fcf 100644
--- a/md5.cc
+++ b/md5.cc
@@ -1,6 +1,6 @@
/* Functions to compute MD5 message digest of memory blocks according to the
definition of MD5 in RFC 1321 from April 1992.
- Copyright (C) 2020-2024 Antonio Diaz Diaz.
+ Copyright (C) 2020-2025 Antonio Diaz Diaz.
This library is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided
@@ -178,7 +178,7 @@ void MD5SUM::md5_finish( md5_type & digest )
md5_update( padding, len ); // pad to 56 mod 64
md5_update( bits, 8 ); // append data length in bits
- for( int i = 0, j = 0; i < 4; i++, j += 4 ) // store state in digest
+ for( int i = 0, j = 0; i < 4; ++i, j += 4 ) // store state in digest
{
digest[j ] = (uint8_t)state[i];
digest[j+1] = (uint8_t)(state[i] >> 8);
diff --git a/md5.h b/md5.h
index 9f3b598..8462e0a 100644
--- a/md5.h
+++ b/md5.h
@@ -1,6 +1,6 @@
/* Functions to compute MD5 message digest of memory blocks according to the
definition of MD5 in RFC 1321 from April 1992.
- Copyright (C) 2020-2024 Antonio Diaz Diaz.
+ Copyright (C) 2020-2025 Antonio Diaz Diaz.
This library is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided
@@ -23,7 +23,7 @@ struct md5_type
uint8_t data[16]; // 128-bit md5 digest
bool operator==( const md5_type & d ) const
- { return ( std::memcmp( data, d.data, 16 ) == 0 ); }
+ { return std::memcmp( data, d.data, 16 ) == 0; }
bool operator!=( const md5_type & d ) const { return !( *this == d ); }
// const uint8_t & operator[]( const int i ) const { return data[i]; }
uint8_t & operator[]( const int i ) { return data[i]; }
diff --git a/merge.cc b/merge.cc
index 2951b50..627f74d 100644
--- a/merge.cc
+++ b/merge.cc
@@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
- Copyright (C) 2009-2024 Antonio Diaz Diaz.
+ Copyright (C) 2009-2025 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -65,7 +65,7 @@ bool file_crc( uint32_t & crc, const int infd, const char * const filename )
{
const int rd = readblock( infd, buffer, buffer_size );
if( rd != buffer_size && errno )
- { show_file_error( filename, "Error reading input file", errno );
+ { show_file_error( filename, read_error_msg, errno );
error = true; break; }
if( rd > 0 )
crc32.update_buf( crc, buffer, rd );
@@ -140,25 +140,23 @@ bool diff_member( const long long mpos, const long long msize,
continue;
std::vector< Block > bv;
long long partial_pos = 0;
- const char * const filename1 = filenames[i1].c_str();
- const char * const filename2 = filenames[i2].c_str();
const int fd1 = infd_vector[i1], fd2 = infd_vector[i2];
int begin = -1; // begin of block. -1 means no block
bool prev_equal = true;
- if( !safe_seek( fd1, mpos, filename1 ) ||
- !safe_seek( fd2, mpos, filename2 ) ) { error = true; break; }
+ if( !safe_seek( fd1, mpos, filenames[i1] ) ||
+ !safe_seek( fd2, mpos, filenames[i2] ) ) { error = true; break; }
while( partial_pos < msize )
{
const int size = std::min( (long long)buffer_size, msize - partial_pos );
const int rd = readblock( fd1, buffer1, size );
if( rd != size && errno )
- { show_file_error( filename1, "Error reading input file", errno );
+ { show_file_error( filenames[i1].c_str(), read_error_msg, errno );
error = true; break; }
if( rd > 0 )
{
if( readblock( fd2, buffer2, rd ) != rd )
- { show_file_error( filename2, "Error reading input file", errno );
+ { show_file_error( filenames[i2].c_str(), read_error_msg, errno );
error = true; break; }
for( int i = 0; i < rd; ++i )
{
@@ -267,8 +265,7 @@ int open_input_files( const std::vector< std::string > & filenames,
}
}
if( tmp < min_member_size )
- { show_file_error( filenames[i].c_str(), "Input file is too short." );
- return 2; }
+ { show_file_error( filenames[i].c_str(), short_file_msg ); return 2; }
if( i == 0 ) insize = tmp;
else if( insize != tmp )
{ show_2file_error( "Sizes of input files", filenames[0].c_str(),
@@ -286,21 +283,20 @@ int open_input_files( const std::vector< std::string > & filenames,
for( int i = 0; i < files; ++i )
{
- const char * const filename = filenames[i].c_str();
const int infd = infd_vector[i];
bool error = false;
for( long j = 0; j < lzip_index.members(); ++j )
{
const long long mpos = lzip_index.mblock( j ).pos();
const long long msize = lzip_index.mblock( j ).size();
- if( !safe_seek( infd, mpos, filename ) ) return 1;
+ if( !safe_seek( infd, mpos, filenames[i] ) ) return 1;
if( test_member_from_file( infd, msize ) != 0 ) { error = true; break; }
}
if( !error )
{
if( verbosity >= 1 )
std::printf( "Input file '%s' has no errors. Recovery is not needed.\n",
- filename );
+ filenames[i].c_str() );
return 0;
}
}
@@ -366,10 +362,10 @@ bool try_merge_member2( const std::vector< std::string > & filenames,
if( i1 == i2 || color_vector[i1] == color_vector[i2] ||
color_done( color_vector, i1 ) ) continue;
for( int bi = 0; bi < blocks; ++bi )
- if( !safe_seek( infd_vector[i2], block_vector[bi].pos(), filenames[i2].c_str() ) ||
- !safe_seek( outfd, block_vector[bi].pos(), output_filename.c_str() ) ||
- !copy_file( infd_vector[i2], outfd, block_vector[bi].size() ) )
- cleanup_and_fail( 1 );
+ if( !safe_seek( infd_vector[i2], block_vector[bi].pos(), filenames[i2] ) ||
+ !safe_seek( outfd, block_vector[bi].pos(), output_filename ) ||
+ !copy_file( infd_vector[i2], outfd, filenames[i2], output_filename,
+ block_vector[bi].size() ) ) cleanup_and_fail( 1 );
const int infd = infd_vector[i1];
const int var = ( i1 * ( files - 1 ) ) + i2 - ( i2 > i1 ) + 1;
for( int bi = 0; bi + 1 < blocks; ++bi )
@@ -380,10 +376,11 @@ bool try_merge_member2( const std::vector< std::string > & filenames,
var, variations, bi + 1, terminator );
std::fflush( stdout ); pending_newline = true;
}
- if( !safe_seek( infd, block_vector[bi].pos(), filenames[i1].c_str() ) ||
- !safe_seek( outfd, block_vector[bi].pos(), output_filename.c_str() ) ||
- !copy_file( infd, outfd, block_vector[bi].size() ) ||
- !safe_seek( outfd, mpos, output_filename.c_str() ) )
+ if( !safe_seek( infd, block_vector[bi].pos(), filenames[i1] ) ||
+ !safe_seek( outfd, block_vector[bi].pos(), output_filename ) ||
+ !copy_file( infd, outfd, filenames[i1], output_filename,
+ block_vector[bi].size() ) ||
+ !safe_seek( outfd, mpos, output_filename ) )
cleanup_and_fail( 1 );
long long failure_pos = 0;
if( test_member_from_file( outfd, msize, &failure_pos ) == 0 )
@@ -422,8 +419,7 @@ bool try_merge_member( const std::vector< std::string > & filenames,
if( verbosity >= 2 )
{
long var = 0;
- for( int i = 0; i < blocks; ++i )
- var = ( var * files ) + file_idx[i];
+ for( int i = 0; i < blocks; ++i ) var = var * files + file_idx[i];
std::printf( " Trying variation %ld of %ld %c",
var + 1, variations, terminator );
std::fflush( stdout ); pending_newline = true;
@@ -431,14 +427,13 @@ bool try_merge_member( const std::vector< std::string > & filenames,
while( bi < blocks )
{
const int infd = infd_vector[file_idx[bi]];
- if( !safe_seek( infd, block_vector[bi].pos(), filenames[file_idx[bi]].c_str() ) ||
- !safe_seek( outfd, block_vector[bi].pos(), output_filename.c_str() ) ||
- !copy_file( infd, outfd, block_vector[bi].size() ) )
- cleanup_and_fail( 1 );
+ if( !safe_seek( infd, block_vector[bi].pos(), filenames[file_idx[bi]] ) ||
+ !safe_seek( outfd, block_vector[bi].pos(), output_filename ) ||
+ !copy_file( infd, outfd, filenames[file_idx[bi]], output_filename,
+ block_vector[bi].size() ) ) cleanup_and_fail( 1 );
++bi;
}
- if( !safe_seek( outfd, mpos, output_filename.c_str() ) )
- cleanup_and_fail( 1 );
+ if( !safe_seek( outfd, mpos, output_filename ) ) cleanup_and_fail( 1 );
long long failure_pos = 0;
if( test_member_from_file( outfd, msize, &failure_pos ) == 0 ) return true;
while( bi > 0 && mpos + failure_pos < block_vector[bi-1].pos() ) --bi;
@@ -474,12 +469,12 @@ bool try_merge_member1( const std::vector< std::string > & filenames,
{
if( i1 == i2 || color_vector[i1] == color_vector[i2] ||
color_done( color_vector, i1 ) ) continue;
+ if( !safe_seek( infd_vector[i1], pos, filenames[i1] ) ||
+ !safe_seek( infd_vector[i2], pos, filenames[i2] ) ||
+ !safe_seek( outfd, pos, output_filename ) ||
+ !copy_file( infd_vector[i2], outfd, filenames[i2], output_filename,
+ size ) ) cleanup_and_fail( 1 );
const int infd = infd_vector[i1];
- if( !safe_seek( infd, pos, filenames[i1].c_str() ) ||
- !safe_seek( infd_vector[i2], pos, filenames[i2].c_str() ) ||
- !safe_seek( outfd, pos, output_filename.c_str() ) ||
- !copy_file( infd_vector[i2], outfd, size ) )
- cleanup_and_fail( 1 );
const int var = ( i1 * ( files - 1 ) ) + i2 - ( i2 > i1 ) + 1;
for( long long i = 0; i + 1 < size; ++i )
{
@@ -489,10 +484,10 @@ bool try_merge_member1( const std::vector< std::string > & filenames,
var, variations, pos + i, terminator );
std::fflush( stdout ); pending_newline = true;
}
- if( !safe_seek( outfd, pos + i, output_filename.c_str() ) ||
+ if( !safe_seek( outfd, pos + i, output_filename ) ||
readblock( infd, &byte, 1 ) != 1 ||
writeblock( outfd, &byte, 1 ) != 1 ||
- !safe_seek( outfd, mpos, output_filename.c_str() ) )
+ !safe_seek( outfd, mpos, output_filename ) )
cleanup_and_fail( 1 );
long long failure_pos = 0;
if( test_member_from_file( outfd, msize, &failure_pos ) == 0 )
@@ -509,11 +504,12 @@ bool try_merge_member1( const std::vector< std::string > & filenames,
/* infd and outfd can refer to the same file if copying to a lower file
position or if source and destination blocks don't overlap.
max_size < 0 means no size limit. */
-bool copy_file( const int infd, const int outfd, const long long max_size )
+bool copy_file( const int infd, const int outfd, const std::string & iname,
+ const std::string & oname, const long long max_size )
{
const int buffer_size = 65536;
// remaining number of bytes to copy
- long long rest = ( ( max_size >= 0 ) ? max_size : buffer_size );
+ long long rest = (max_size >= 0) ? max_size : buffer_size;
long long copied_size = 0;
uint8_t * const buffer = new uint8_t[buffer_size];
bool error = false;
@@ -524,12 +520,13 @@ bool copy_file( const int infd, const int outfd, const long long max_size )
if( max_size >= 0 ) rest -= size;
const int rd = readblock( infd, buffer, size );
if( rd != size && errno )
- { show_error( "Error reading input file", errno ); error = true; break; }
+ { show_file_error( printable_name( iname ), read_error_msg, errno );
+ error = true; break; }
if( rd > 0 )
{
const int wr = writeblock( outfd, buffer, rd );
if( wr != rd )
- { show_error( "Error writing output file", errno );
+ { show_file_error( printable_name( oname, false ), wr_err_msg, errno );
error = true; break; }
copied_size += rd;
}
@@ -537,7 +534,8 @@ bool copy_file( const int infd, const int outfd, const long long max_size )
}
delete[] buffer;
if( !error && max_size >= 0 && copied_size != max_size )
- { show_error( "Input file ends unexpectedly." ); error = true; }
+ { show_file_error( printable_name( iname ), "Input file ends unexpectedly." );
+ error = true; }
return !error;
}
@@ -545,7 +543,7 @@ bool copy_file( const int infd, const int outfd, const long long max_size )
/* Return value: 0 = OK, 1 = bad msize, 2 = data error.
'failure_pos' is relative to the beginning of the member. */
int test_member_from_file( const int infd, const unsigned long long msize,
- long long * const failure_posp )
+ long long * const failure_posp, bool * const nonzerop )
{
Range_decoder rdec( infd );
Lzip_header header;
@@ -560,6 +558,7 @@ int test_member_from_file( const int infd, const unsigned long long msize,
verbosity = -1; // suppress all messages
done = decoder.decode_member() == 0;
verbosity = saved_verbosity; // restore verbosity level
+ if( nonzerop ) *nonzerop = rdec.nonzero();
if( done && rdec.member_position() == msize ) return 0;
}
if( failure_posp ) *failure_posp = rdec.member_position();
@@ -579,15 +578,15 @@ int merge_files( const std::vector< std::string > & filenames,
const int retval =
open_input_files( filenames, infd_vector, cl_opts, lzip_index, &in_stats );
if( retval >= 0 ) return retval;
- if( !safe_seek( infd_vector[0], 0, filenames[0].c_str() ) ) return 1;
+ if( !safe_seek( infd_vector[0], 0, filenames[0] ) ) return 1;
const bool to_file = default_output_filename.size();
output_filename =
to_file ? default_output_filename : insert_fixed( filenames[0] );
set_signal_handler();
if( !open_outstream( force, true, true, false, to_file ) ) return 1;
- if( !copy_file( infd_vector[0], outfd ) ) // copy whole file
- cleanup_and_fail( 1 );
+ if( !copy_file( infd_vector[0], outfd, filenames[0], output_filename ) )
+ cleanup_and_fail( 1 ); // copy whole file
for( long j = 0; j < lzip_index.members(); ++j )
{
@@ -598,7 +597,7 @@ int merge_files( const std::vector< std::string > & filenames,
// different color means members are different
std::vector< int > color_vector( files, 0 );
if( !diff_member( mpos, msize, filenames, infd_vector, block_vector,
- color_vector ) || !safe_seek( outfd, mpos, output_filename.c_str() ) )
+ color_vector ) || !safe_seek( outfd, mpos, output_filename ) )
cleanup_and_fail( 1 );
if( block_vector.empty() )
diff --git a/mtester.cc b/mtester.cc
index fb9eb97..44a8eb0 100644
--- a/mtester.cc
+++ b/mtester.cc
@@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
- Copyright (C) 2009-2024 Antonio Diaz Diaz.
+ Copyright (C) 2009-2025 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -83,7 +83,7 @@ void LZ_mtester::flush_data()
crc32.update_buf( crc_, buffer + stream_pos, size );
if( md5sum ) md5sum->md5_update( buffer + stream_pos, size );
if( outfd >= 0 && writeblock( outfd, buffer + stream_pos, size ) != size )
- throw Error( "Write error" );
+ throw Error( wr_err_msg );
if( pos >= dictionary_size )
{ partial_data_pos += pos; pos = 0; pos_wrapped = true; }
stream_pos = pos;
@@ -148,7 +148,7 @@ int LZ_mtester::test_member( const unsigned long mpos_limit,
FILE * const f, const unsigned long long byte_pos )
{
if( mpos_limit < Lzip_header::size + 5 ) return -1;
- if( member_position() == Lzip_header::size ) rdec.load();
+ if( member_position() == Lzip_header::size && !rdec.load() ) return 1;
while( !rdec.finished() )
{
if( member_position() >= mpos_limit || data_position() >= dpos_limit )
@@ -171,7 +171,7 @@ int LZ_mtester::test_member( const unsigned long mpos_limit,
if( rdec.decode_bit( bm_rep0[state()] ) == 0 ) // 3rd bit
{
if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit
- { state.set_short_rep(); put_byte( peek( rep0 ) ); continue; }
+ { state.set_shortrep(); put_byte( peek( rep0 ) ); continue; }
}
else
{
@@ -194,22 +194,22 @@ int LZ_mtester::test_member( const unsigned long mpos_limit,
}
else // match
{
+ rep3 = rep2; rep2 = rep1; rep1 = rep0;
len = rdec.decode_len( match_len_model, pos_state );
- unsigned distance = rdec.decode_tree6( bm_dis_slot[get_len_state(len)] );
- if( distance >= start_dis_model )
+ rep0 = rdec.decode_tree6( bm_dis_slot[get_len_state(len)] );
+ if( rep0 >= start_dis_model )
{
- const unsigned dis_slot = distance;
+ const unsigned dis_slot = rep0;
const int direct_bits = ( dis_slot >> 1 ) - 1;
- distance = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
+ rep0 = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
if( dis_slot < end_dis_model )
- distance += rdec.decode_tree_reversed(
- bm_dis + ( distance - dis_slot ), direct_bits );
+ rep0 += rdec.decode_tree_reversed( bm_dis + ( rep0 - dis_slot ),
+ direct_bits );
else
{
- distance +=
- rdec.decode( direct_bits - dis_align_bits ) << dis_align_bits;
- distance += rdec.decode_tree_reversed4( bm_align );
- if( distance == 0xFFFFFFFFU ) // marker found
+ rep0 += rdec.decode( direct_bits - dis_align_bits ) << dis_align_bits;
+ rep0 += rdec.decode_tree_reversed4( bm_align );
+ if( rep0 == 0xFFFFFFFFU ) // marker found
{
rdec.normalize();
flush_data();
@@ -224,7 +224,6 @@ int LZ_mtester::test_member( const unsigned long mpos_limit,
}
}
}
- rep3 = rep2; rep2 = rep1; rep1 = rep0; rep0 = distance;
if( rep0 > max_rep0 ) max_rep0 = rep0;
state.set_match();
if( rep0 >= dictionary_size || ( rep0 >= pos && !pos_wrapped ) )
@@ -242,7 +241,7 @@ int LZ_mtester::test_member( const unsigned long mpos_limit,
int LZ_mtester::debug_decode_member( const long long dpos, const long long mpos,
const bool show_packets )
{
- rdec.load();
+ if( !rdec.load() ) return 1;
unsigned old_tmpos = member_position(); // truncated member position
while( !rdec.finished() )
{
@@ -290,7 +289,7 @@ int LZ_mtester::debug_decode_member( const long long dpos, const long long mpos,
std::printf( "%6llu %6llu shortrep %s %6u (%6llu)\n",
mp, dp, format_byte( peek( rep0 ) ),
rep0 + 1, dp - rep0 - 1 );
- state.set_short_rep(); put_byte( peek( rep0 ) ); continue;
+ state.set_shortrep(); put_byte( peek( rep0 ) ); continue;
}
}
else
@@ -317,22 +316,22 @@ int LZ_mtester::debug_decode_member( const long long dpos, const long long mpos,
}
else // match
{
+ rep3 = rep2; rep2 = rep1; rep1 = rep0;
len = rdec.decode_len( match_len_model, pos_state );
- unsigned distance = rdec.decode_tree6( bm_dis_slot[get_len_state(len)] );
- if( distance >= start_dis_model )
+ rep0 = rdec.decode_tree6( bm_dis_slot[get_len_state(len)] );
+ if( rep0 >= start_dis_model )
{
- const unsigned dis_slot = distance;
+ const unsigned dis_slot = rep0;
const int direct_bits = ( dis_slot >> 1 ) - 1;
- distance = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
+ rep0 = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
if( dis_slot < end_dis_model )
- distance += rdec.decode_tree_reversed(
- bm_dis + ( distance - dis_slot ), direct_bits );
+ rep0 += rdec.decode_tree_reversed( bm_dis + ( rep0 - dis_slot ),
+ direct_bits );
else
{
- distance +=
- rdec.decode( direct_bits - dis_align_bits ) << dis_align_bits;
- distance += rdec.decode_tree_reversed4( bm_align );
- if( distance == 0xFFFFFFFFU ) // marker found
+ rep0 += rdec.decode( direct_bits - dis_align_bits ) << dis_align_bits;
+ rep0 += rdec.decode_tree_reversed4( bm_align );
+ if( rep0 == 0xFFFFFFFFU ) // marker found
{
rdec.normalize();
flush_data();
@@ -349,13 +348,10 @@ int LZ_mtester::debug_decode_member( const long long dpos, const long long mpos,
if( check_trailer( show_packets ? stdout : 0 ) ) return 0;
return 3;
}
- if( len == min_match_len + 1 ) // Sync Flush marker
- { rdec.load(); continue; }
return 4;
}
}
}
- rep3 = rep2; rep2 = rep1; rep1 = rep0; rep0 = distance;
if( rep0 > max_rep0 ) { max_rep0 = rep0; max_rep0_pos = mp; }
state.set_match();
if( show_packets )
diff --git a/mtester.h b/mtester.h
index 49c50b1..3af159b 100644
--- a/mtester.h
+++ b/mtester.h
@@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
- Copyright (C) 2009-2024 Antonio Diaz Diaz.
+ Copyright (C) 2009-2025 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -53,12 +53,15 @@ public:
return p;
}
- void load()
+ bool load()
{
code = 0;
range = 0xFFFFFFFFU;
+ // check first byte of the LZMA stream without reading it
+ if( buffer[pos] != 0 ) return false;
get_byte(); // discard first byte of the LZMA stream
for( int i = 0; i < 4; ++i ) code = ( code << 8 ) | get_byte();
+ return true;
}
void normalize()
@@ -76,7 +79,7 @@ public:
range >>= 1;
// symbol <<= 1;
// if( code >= range ) { code -= range; symbol |= 1; }
- const bool bit = ( code >= range );
+ const bool bit = code >= range;
symbol <<= 1; symbol += bit;
code -= range & ( 0U - bit );
}
@@ -299,14 +302,14 @@ class LZ_mtester
bool fast, fast2;
if( lpos > distance )
{
- fast = ( len < dictionary_size - lpos );
- fast2 = ( fast && len <= lpos - i );
+ fast = len < dictionary_size - lpos;
+ fast2 = fast && len <= lpos - i;
}
else
{
i += dictionary_size;
- fast = ( len < dictionary_size - i ); // (i == pos) may happen
- fast2 = ( fast && len <= i - lpos );
+ fast = len < dictionary_size - i; // (i == pos) may happen
+ fast2 = fast && len <= i - lpos;
}
if( fast ) // no wrap
{
@@ -378,8 +381,8 @@ public:
const uint8_t * get_buffers( const uint8_t ** const prev_bufferp,
int * const sizep, int * const prev_sizep ) const
- { *sizep = ( pos_wrapped && pos == 0 ) ? dictionary_size : pos;
- *prev_sizep = ( pos_wrapped && pos > 0 ) ? dictionary_size - pos : 0;
+ { *sizep = (pos_wrapped && pos == 0) ? dictionary_size : pos;
+ *prev_sizep = (pos_wrapped && pos > 0) ? dictionary_size - pos : 0;
*prev_bufferp = buffer + pos; return buffer; }
void duplicate_buffer( uint8_t * const buffer2 );
diff --git a/nrep_stats.cc b/nrep_stats.cc
index 2c97d4d..4ea4a09 100644
--- a/nrep_stats.cc
+++ b/nrep_stats.cc
@@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
- Copyright (C) 2009-2024 Antonio Diaz Diaz.
+ Copyright (C) 2009-2025 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -44,11 +44,11 @@ int print_nrep_stats( const std::vector< std::string > & filenames,
unsigned long long lzma_size = 0; // total size of LZMA data
unsigned long best_pos = 0;
int best_name = -1, retval = 0;
- const bool count_all = ( repeated_byte < 0 || repeated_byte >= 256 );
+ const bool count_all = repeated_byte < 0 || repeated_byte >= 256;
bool stdin_used = false;
for( unsigned i = 0; i < filenames.size(); ++i )
{
- const bool from_stdin = ( filenames[i] == "-" );
+ const bool from_stdin = filenames[i] == "-";
if( from_stdin ) { if( stdin_used ) continue; else stdin_used = true; }
const char * const input_filename =
from_stdin ? "(stdin)" : filenames[i].c_str();
@@ -68,13 +68,13 @@ int print_nrep_stats( const std::vector< std::string > & filenames,
}
const unsigned long long cdata_size = lzip_index.cdata_size();
if( !fits_in_size_t( cdata_size ) ) // mmap uses size_t
- { show_file_error( input_filename, "Input file is too large for mmap." );
+ { show_file_error( input_filename, large_file_msg );
set_retval( retval, 1 ); close( infd ); continue; }
const uint8_t * const buffer =
(const uint8_t *)mmap( 0, cdata_size, PROT_READ, MAP_PRIVATE, infd, 0 );
close( infd );
if( buffer == MAP_FAILED )
- { show_file_error( input_filename, "Can't mmap", errno );
+ { show_file_error( input_filename, mmap_msg, errno );
set_retval( retval, 1 ); continue; }
for( long j = 0; j < lzip_index.members(); ++j )
{
diff --git a/range_dec.cc b/range_dec.cc
index 0d03264..3a904d5 100644
--- a/range_dec.cc
+++ b/range_dec.cc
@@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
- Copyright (C) 2009-2024 Antonio Diaz Diaz.
+ Copyright (C) 2009-2025 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -53,7 +53,7 @@ bool decompress_member( const int infd, const Cl_options & cl_opts,
if( verbosity >= 2 ) pp();
LZ_decoder decoder( rdec, dictionary_size, outfd, outskip, outend );
- const int result = decoder.decode_member( cl_opts, pp );
+ const int result = decoder.decode_member( pp, cl_opts.ignore_errors );
if( result != 0 )
{
if( verbosity >= 0 && result <= 2 )
@@ -94,7 +94,7 @@ const char * format_num( unsigned long long num,
static int current = 0;
static bool si = true;
- if( set_prefix ) si = ( set_prefix > 0 );
+ if( set_prefix ) si = set_prefix > 0;
unsigned long long den = 1;
const unsigned factor = si ? 1000 : 1024;
char * const buf = buffer[current++]; current %= buffers;
@@ -112,15 +112,6 @@ const char * format_num( unsigned long long num,
}
-bool safe_seek( const int fd, const long long pos,
- const char * const filename )
- {
- if( lseek( fd, pos, SEEK_SET ) == pos ) return true;
- show_file_error( filename, "Seek error", errno );
- return false;
- }
-
-
int range_decompress( const std::string & input_filename,
const std::string & default_output_filename,
const Cl_options & cl_opts, Block range,
@@ -141,7 +132,8 @@ int range_decompress( const std::string & input_filename,
if( range.end() > udata_size )
range.size( std::max( 0LL, udata_size - range.pos() ) );
if( range.size() <= 0 )
- { if( udata_size > 0 ) show_file_error( filename, "Nothing to do." );
+ { if( udata_size > 0 )
+ show_file_error( filename, "Nothing to do; range is empty." );
return 0; }
if( to_stdout || default_output_filename.empty() ) outfd = STDOUT_FILENO;
@@ -170,7 +162,7 @@ int range_decompress( const std::string & input_filename,
const long long outskip = std::max( 0LL, range.pos() - db.pos() );
const long long outend = std::min( db.size(), range.end() - db.pos() );
const long long mpos = lzip_index.mblock( i ).pos();
- if( !safe_seek( infd, mpos, filename ) ) cleanup_and_fail( 1 );
+ if( !safe_seek( infd, mpos, input_filename ) ) cleanup_and_fail( 1 );
if( !decompress_member( infd, cl_opts, pp, mpos, outskip, outend ) )
{ if( cl_opts.ignore_errors ) error = true; else cleanup_and_fail( 2 ); }
pp.reset();
diff --git a/recursive.cc b/recursive.cc
new file mode 100644
index 0000000..7b8810e
--- /dev/null
+++ b/recursive.cc
@@ -0,0 +1,130 @@
+/* Lziprecover - Data recovery tool for the lzip format
+ Copyright (C) 2023-2025 Antonio Diaz Diaz.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#define _FILE_OFFSET_BITS 64
+
+#include <cerrno>
+#include <cstdio>
+#include <cstring>
+#include <list>
+#include <string>
+#include <vector>
+#include <dirent.h>
+#include <stdint.h>
+#include <sys/stat.h>
+
+#include "lzip.h"
+#include "md5.h"
+#include "fec.h"
+
+namespace {
+
+// Return true if full_name is a regular file or (a link to) a directory.
+bool test_full_name( const std::string & full_name, const struct stat * stp,
+ const bool follow )
+ {
+ struct stat st, st2;
+ if( ( follow && stat( full_name.c_str(), &st ) != 0 ) ||
+ ( !follow && lstat( full_name.c_str(), &st ) != 0 ) ) return false;
+ if( S_ISREG( st.st_mode ) ) return true;
+ if( !S_ISDIR( st.st_mode ) ) return false;
+
+ std::string prev_dir( full_name );
+ bool loop = stp && st.st_ino == stp->st_ino && st.st_dev == stp->st_dev;
+ if( !loop )
+ for( unsigned i = prev_dir.size(); i > 1; )
+ {
+ while( i > 0 && prev_dir[i-1] != '/' ) --i;
+ if( i == 0 ) break;
+ if( i > 1 ) --i; // remove trailing slash except at root dir
+ prev_dir.resize( i );
+ if( stat( prev_dir.c_str(), &st2 ) != 0 || !S_ISDIR( st2.st_mode ) ||
+ ( st.st_ino == st2.st_ino && st.st_dev == st2.st_dev ) )
+ { loop = true; break; }
+ }
+ if( loop ) // full_name already visited or above tree
+ show_file_error( full_name.c_str(), "warning: recursive directory loop." );
+ return !loop; // (link to) directory
+ }
+
+
+bool ignore_name( const std::string & name )
+ {
+ if( name == "." || name == ".." || name == "fec" || name == "FEC" ||
+ has_fec_extension( name ) ) return true;
+ return name.size() > 3 && name.compare( name.size() - 3, 3, "fec" ) == 0 &&
+ ( name.end()[-4] == '-' || name.end()[-4] == '.' ||
+ name.end()[-4] == '_' );
+ }
+
+} // end namespace
+
+
+/* Return in input_filename the next file name. ('-' is a valid file name).
+ Ignore recursively found files and directories named "fec" or "*[-._]fec".
+ Set 'retval' to 1 if a directory fails to open. */
+bool next_filename( std::list< std::string > & filelist,
+ std::string & input_filename, int & retval,
+ const char recursive )
+ {
+ while( !filelist.empty() )
+ {
+ input_filename = filelist.front();
+ filelist.pop_front();
+ struct stat st;
+ if( stat( input_filename.c_str(), &st ) == 0 && S_ISDIR( st.st_mode ) )
+ {
+ if( recursive )
+ {
+ DIR * const dirp = opendir( input_filename.c_str() );
+ if( !dirp )
+ {
+ show_file_error( input_filename.c_str(), "Can't open directory", errno );
+ if( retval == 0 ) { retval = 1; } continue;
+ }
+ for( unsigned i = input_filename.size();
+ i > 1 && input_filename[i-1] == '/'; --i )
+ input_filename.resize( i - 1 ); // remove trailing slashes
+ struct stat stdot, *stdotp = 0;
+ if( input_filename[0] != '/' ) // relative file name
+ {
+ if( input_filename == "." ) input_filename.clear();
+ if( stat( ".", &stdot ) == 0 && S_ISDIR( stdot.st_mode ) )
+ stdotp = &stdot;
+ }
+ if( input_filename.size() && input_filename != "/" )
+ input_filename += '/';
+ std::list< std::string > tmp_list;
+ while( true )
+ {
+ const struct dirent * const entryp = readdir( dirp );
+ if( !entryp ) { closedir( dirp ); break; }
+ const std::string tmp_name( entryp->d_name );
+ if( ignore_name( tmp_name ) ) continue;
+ const std::string full_name( input_filename + tmp_name );
+ if( test_full_name( full_name, stdotp, recursive == 2 ) )
+ tmp_list.push_back( full_name );
+ }
+ filelist.splice( filelist.begin(), tmp_list );
+ }
+ continue;
+ }
+ return true;
+ }
+ input_filename.clear();
+ return false;
+ }
diff --git a/reproduce.cc b/reproduce.cc
index 5ca91a7..fcfe02d 100644
--- a/reproduce.cc
+++ b/reproduce.cc
@@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
- Copyright (C) 2009-2024 Antonio Diaz Diaz.
+ Copyright (C) 2009-2025 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -440,7 +440,7 @@ int reproduce_member( uint8_t * const mbuffer, const long msize,
(const uint8_t *)mmap( 0, rsize, PROT_READ, MAP_PRIVATE, rfd, 0 );
close( rfd );
if( rbuf == MAP_FAILED )
- { show_file_error( reference_filename, "Can't mmap", errno );
+ { show_file_error( reference_filename, mmap_msg, errno );
return fatal( 1 ); }
const Lzip_header & header = *(const Lzip_header *)mbuffer;
@@ -457,9 +457,9 @@ int reproduce_member( uint8_t * const mbuffer, const long msize,
const long offset = match_file( *master, rbuf, rsize, reference_filename );
if( offset < 0 ) { delete master; return 2; } // no match
- // Reference data from offset must be at least as large as zeroed sector
- // minus member trailer if trailer is inside the zeroed sector.
- const int t = ( begin + size >= msize ) ? 16 + Lzip_trailer::size : 0;
+ /* Reference data from offset must be at least as large as zeroed sector
+ minus member trailer if trailer is inside the zeroed sector. */
+ const int t = (begin + size >= msize) ? 16 + Lzip_trailer::size : 0;
if( rsize - offset < size - t )
{ show_file_error( reference_filename, "Not enough reference data after match." );
delete master; return 2; }
@@ -482,7 +482,7 @@ int reproduce_member( uint8_t * const mbuffer, const long msize,
std::fflush( stdout ); pending_newline = true;
}
const bool level0 = level == '0';
- const bool auto0 = ( level0 && lzip_level != '0' );
+ const bool auto0 = level0 && lzip_level != '0';
int ret = try_reproduce( mbuffer, msize, dsize, good_dsize, begin, end,
rbuf, rsize, offset, dictionary_size,
level0 ? lzip0_argv : lzip_argv, md5sump, terminator, auto0 );
@@ -550,7 +550,7 @@ int reproduce_file( const std::string & input_filename,
i + 1, lzip_index.members(), terminator );
std::fflush( stdout ); pending_newline = true;
}
- if( !safe_seek( infd, mpos, filename ) ) return 1;
+ if( !safe_seek( infd, mpos, input_filename ) ) return 1;
long long failure_pos = 0;
if( test_member_from_file( infd, msize, &failure_pos ) == 0 )
continue; // member is not damaged
@@ -567,7 +567,7 @@ int reproduce_file( const std::string & input_filename,
uint8_t * const mbuffer_base = (uint8_t *)mmap( 0, msize + mpos_rem,
PROT_READ | PROT_WRITE, MAP_PRIVATE, infd, mpos - mpos_rem );
if( mbuffer_base == MAP_FAILED )
- { show_file_error( filename, "Can't mmap", errno ); return 1; }
+ { show_file_error( filename, mmap_msg, errno ); return 1; }
uint8_t * const mbuffer = mbuffer_base + mpos_rem;
long size = 0;
uint8_t value = 0;
@@ -592,11 +592,11 @@ int reproduce_file( const std::string & input_filename,
{
if( outfd < 0 ) // first damaged member reproduced
{
- if( !safe_seek( infd, 0, filename ) ) return 1;
+ if( !safe_seek( infd, 0, input_filename ) ) return 1;
set_signal_handler();
if( !open_outstream( true, true, false, true, to_file ) ) return 1;
- if( !copy_file( infd, outfd ) ) // copy whole file
- cleanup_and_fail( 1 );
+ if( !copy_file( infd, outfd, input_filename, output_filename ) )
+ cleanup_and_fail( 1 ); // copy whole file
}
if( seek_write( outfd, mbuffer + begin, size, mpos + begin ) != size )
{ show_file_error( output_filename.c_str(), "Error writing file", errno );
@@ -627,7 +627,8 @@ int reproduce_file( const std::string & input_filename,
std::fputs( "One member reproduced."
" Copy of input file still contains errors.\n", stdout );
else
- std::fputs( "Copy of input file reproduced successfully.\n", stdout );
+ std::printf( "Repaired copy of '%s' written to '%s'\n",
+ filename, output_filename.c_str() );
}
return 0;
}
@@ -635,24 +636,25 @@ int reproduce_file( const std::string & input_filename,
/* Passes a 0 terminator to other functions to prevent intramember feedback.
Exits only in case of fatal error. (reference file too large, etc). */
-int debug_reproduce_file( const char * const input_filename,
+int debug_reproduce_file( const std::string & input_filename,
const char * const lzip_name,
const char * const reference_filename,
const Cl_options & cl_opts, const Block & range,
const int sector_size, const int lzip_level )
{
+ const char * const filename = input_filename.c_str();
struct stat in_stats; // not used
- const int infd = open_instream( input_filename, &in_stats, false, true );
+ const int infd = open_instream( filename, &in_stats, false, true );
if( infd < 0 ) return 1;
const Lzip_index lzip_index( infd, cl_opts );
if( lzip_index.retval() != 0 )
- { show_file_error( input_filename, lzip_index.error().c_str() );
+ { show_file_error( filename, lzip_index.error().c_str() );
return lzip_index.retval(); }
const long long cdata_size = lzip_index.cdata_size();
if( range.pos() >= cdata_size )
- { show_file_error( input_filename, "Range is beyond end of last member." );
+ { show_file_error( filename, "Range is beyond end of last member." );
return 1; }
const long page_size = std::max( 1L, sysconf( _SC_PAGESIZE ) );
@@ -669,7 +671,7 @@ int debug_reproduce_file( const char * const input_filename,
const long long msize = lzip_index.mblock( i ).size();
if( !range.overlaps( mpos, msize ) ) continue;
if( !fits_in_size_t( msize + page_size ) ) // mmap uses size_t
- { show_file_error( input_filename,
+ { show_file_error( filename,
"Input file contains member too large for mmap." ); return 1; }
const long long dsize = lzip_index.dblock( i ).size();
const unsigned dictionary_size = lzip_index.dictionary_size( i );
@@ -686,14 +688,14 @@ int debug_reproduce_file( const char * const input_filename,
uint8_t * const mbuffer_base = (uint8_t *)mmap( 0, msize + mpos_rem,
PROT_READ | PROT_WRITE, MAP_PRIVATE, infd, mpos - mpos_rem );
if( mbuffer_base == MAP_FAILED )
- { show_file_error( input_filename, "Can't mmap", errno ); return 1; }
+ { show_file_error( filename, mmap_msg, errno ); return 1; }
uint8_t * const mbuffer = mbuffer_base + mpos_rem;
if( !md5_valid )
{
if( verbosity >= 0 ) // give a clue of the range being tested
{ std::printf( "Reproducing: %s\nReference file: %s\nTesting "
"sectors of size %llu at file positions %llu to %llu\n",
- input_filename, reference_filename,
+ filename, reference_filename,
std::min( (long long)sector_size, rm_end - sector_pos ),
sector_pos, rm_end - 1 ); std::fflush( stdout ); }
md5_valid = true; compute_md5( mbuffer, msize, md5_digest_c );
@@ -718,7 +720,7 @@ int debug_reproduce_file( const char * const input_filename,
long size = 0;
uint8_t value = 0;
const long begin =
- zeroed_sector_pos( mbuffer, msize, input_filename, &size, &value );
+ zeroed_sector_pos( mbuffer, msize, filename, &size, &value );
if( begin < 0 ) return 2;
MD5SUM md5sum;
const int ret = reproduce_member( mbuffer, msize, dsize, lzip_name,
@@ -762,18 +764,18 @@ int debug_reproduce_file( const char * const input_filename,
done:
if( verbosity >= 0 )
{
- std::printf( "\n%9ld sectors tested"
- "\n%9ld reproductions returned with zero status",
- positions, successes );
+ std::printf( "\n%11s sectors tested"
+ "\n%11s reproductions returned with zero status",
+ format_num3( positions ), format_num3( successes ) );
if( successes > 0 )
{
if( failed_comparisons > 0 )
- std::printf( ", of which\n%9ld comparisons failed\n",
- failed_comparisons );
- else std::fputs( "\n all comparisons passed\n", stdout );
+ std::printf( ", of which\n%11s comparisons failed\n",
+ format_num3( failed_comparisons ) );
+ else std::fputs( "\n all comparisons passed\n", stdout );
if( alternative_reproductions > 0 )
- std::printf( "%9ld alternative reproductions found\n",
- alternative_reproductions );
+ std::printf( "%11s alternative reproductions found\n",
+ format_num3( alternative_reproductions ) );
}
else std::fputc( '\n', stdout );
if( fatal_retval )
diff --git a/split.cc b/split.cc
index 48d7915..6934ca9 100644
--- a/split.cc
+++ b/split.cc
@@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
- Copyright (C) 2009-2024 Antonio Diaz Diaz.
+ Copyright (C) 2009-2025 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -88,14 +88,14 @@ int split_file( const std::string & input_filename,
long long mpos = b.pos();
long long msize = b.size();
long long failure_pos = 0;
- if( !safe_seek( infd, mpos, filename ) ) return 1;
+ if( !safe_seek( infd, mpos, input_filename ) ) return 1;
if( test_member_from_file( infd, msize, &failure_pos ) == 1 )
{ // corrupt or fake trailer
while( true )
{
mpos += failure_pos; msize -= failure_pos;
if( msize < min_member_size ) break; // trailing data
- if( !safe_seek( infd, mpos, filename ) ) return 1;
+ if( !safe_seek( infd, mpos, input_filename ) ) return 1;
if( test_member_from_file( infd, msize, &failure_pos ) != 1 ) break;
}
lzip_index = Lzip_index( infd, cl_opts, true, true, mpos );
@@ -106,7 +106,7 @@ int split_file( const std::string & input_filename,
}
}
- if( !safe_seek( infd, 0, filename ) ) return 1;
+ if( !safe_seek( infd, 0, input_filename ) ) return 1;
int max_digits = 1;
for( long i = lzip_index.blocks( true ); i >= 10; i /= 10 ) ++max_digits;
bool to_file = // if true, create intermediate dirs
@@ -120,12 +120,13 @@ int split_file( const std::string & input_filename,
if( mb.pos() > stream_pos ) // gap
{
if( !open_outstream( force, true, false, false, to_file ) ) return 1;
- if( !copy_file( infd, outfd, mb.pos() - stream_pos ) ||
+ if( !copy_file( infd, outfd, input_filename, output_filename,
+ mb.pos() - stream_pos ) ||
!close_outstream( &in_stats ) ) cleanup_and_fail( 1 );
next_filename( max_digits ); to_file = false;
}
if( !open_outstream( force, true, false, false, to_file ) ) return 1; // member
- if( !copy_file( infd, outfd, mb.size() ) ||
+ if( !copy_file( infd, outfd, input_filename, output_filename, mb.size() ) ||
!close_outstream( &in_stats ) ) cleanup_and_fail( 1 );
next_filename( max_digits ); to_file = false;
stream_pos = mb.end();
@@ -133,7 +134,8 @@ int split_file( const std::string & input_filename,
if( lzip_index.file_size() > stream_pos ) // trailing data
{
if( !open_outstream( force, true, false, false, to_file ) ) return 1;
- if( !copy_file( infd, outfd, lzip_index.file_size() - stream_pos ) ||
+ if( !copy_file( infd, outfd, input_filename, output_filename,
+ lzip_index.file_size() - stream_pos ) ||
!close_outstream( &in_stats ) ) cleanup_and_fail( 1 );
next_filename( max_digits ); to_file = false;
}
diff --git a/testsuite/check.sh b/testsuite/check.sh
index a65f062..a4eb8aa 100755
--- a/testsuite/check.sh
+++ b/testsuite/check.sh
@@ -1,6 +1,6 @@
#! /bin/sh
# check script for Lziprecover - Data recovery tool for the lzip format
-# Copyright (C) 2009-2024 Antonio Diaz Diaz.
+# Copyright (C) 2009-2025 Antonio Diaz Diaz.
#
# This script is free software: you have unlimited permission
# to copy, distribute, and modify it.
@@ -29,11 +29,10 @@ if [ -d tmp ] ; then rm -rf tmp ; fi
mkdir tmp
cd "${objdir}"/tmp || framework_failure
-cat "${testdir}"/test.txt > in || framework_failure
+cp "${testdir}"/test.txt in || framework_failure
in_lz="${testdir}"/test.txt.lz
in_lzma="${testdir}"/test.txt.lzma
-in_em="${testdir}"/test_em.txt.lz
-inD="${testdir}"/test21723.txt
+inD="${testdir}"/test21636.txt
bad1_lz="${testdir}"/test_bad1.lz
bad2_lz="${testdir}"/test_bad2.lz
bad3_lz="${testdir}"/test_bad3.lz
@@ -44,8 +43,9 @@ bad7_lz="${testdir}"/test_bad7.lz
bad8_lz="${testdir}"/test_bad8.lz
bad9_lz="${testdir}"/test_bad9.lz
fox_lz="${testdir}"/fox.lz
-fox6_lz="${testdir}"/fox6.lz
+fnz_lz="${testdir}"/fox_nz.lz
f6b1="${testdir}"/fox6_bad1.txt
+f6b1nz_lz="${testdir}"/fox6_b1nz.lz
f6b1_lz="${testdir}"/fox6_bad1.lz
f6b2_lz="${testdir}"/fox6_bad2.lz
f6b3_lz="${testdir}"/fox6_bad3.lz
@@ -58,28 +58,25 @@ f6s3_lz="${testdir}"/fox6_sc3.lz
f6s4_lz="${testdir}"/fox6_sc4.lz
f6s5_lz="${testdir}"/fox6_sc5.lz
f6s6_lz="${testdir}"/fox6_sc6.lz
-f6mk_lz="${testdir}"/fox6_mark.lz
num_lz="${testdir}"/numbers.lz
nbt_lz="${testdir}"/numbersbt.lz
fail=0
test_failed() { fail=1 ; printf " $1" ; [ -z "$2" ] || printf "($2)" ; }
# Description of test files for lziprecover:
-# single-member files with one or more errors
+# single-member files; test.txt.lz with one or more errors
# test_bad1.lz: byte at offset 66 changed from 0xA6 to 0x26
-# test_bad2.lz: [ 34- 65] --> copy of bytes [ 68- 99]
-# test_bad3.lz: [ 512-1535] --> zeroed [2560-3583] --> zeroed
-# test_bad4.lz: [3072-4095] --> random errors [4608-5631] --> zeroed
-# test_bad5.lz: [1024-2047] --> random errors [5120-6143] --> random data
-# test_bad6.lz: [ 512-1023] --> zeroed (reference test.txt [ 891- 2137])
-# test_bad7.lz: [6656-7167] --> zeroed (reference test.txt [20428-32231])
-# test_bad8.lz: [ 66- 73] --> zeroed (reference test.txt [ 89- 110])
-# test_bad9.lz: [6491-6498] --> zeroed (reference test.txt [17977-18120])
+# test_bad2.lz: [ 34- 65] --> copy of bytes [68-99]
+# test_bad3.lz: [ 512-1535] --> zeroed [2560-3583] --> zeroed
+# test_bad4.lz: [3072-4095] --> random bit flips [4608-5631] --> zeroed
+# test_bad5.lz: [1024-2047] --> random bit flips [5120-6143] --> random data
+# test_bad6.lz: [ 512-1023] --> zeroed (reference test.txt [ 892- 2414])
+# test_bad7.lz: [6656-7167] --> zeroed (reference test.txt [20798-33385])
+# test_bad8.lz: [ 66- 73] --> zeroed (reference test.txt [ 89- 110])
+# test_bad9.lz: [6491-6498] --> zeroed (reference test.txt [17982-18594])
#
-# test_em.txt.lz: test.txt split in 3, with 5 empty members (1,3,5-6,8)
-# test_3m.txt.lz.md5: md5sum of test_em.txt.lz after removing empty members
-#
-# 6-member files with one or more errors
+# 6-member files; fox6.lz with one or more errors
+# fox6_b1nz.lz: fox6_bad1.lz with first LZMA byte set to 'a'..'f'
# fox6_bad1.lz: byte at offset 5 changed from 0x0C to 0x00 (DS)
# byte at offset 142 changed from 0x50 to 0x70 (CRC)
# byte at offset 224 changed from 0x2D to 0x2E (data_size)
@@ -92,8 +89,6 @@ test_failed() { fail=1 ; printf " $1" ; [ -z "$2" ] || printf "($2)" ; }
# fox6_bad5.lz: [380-479] --> zeroed (members 5,6)
# fox6_bad6.lz: [430-439] --> zeroed (member 6)
#
-# fox6_mark.lz: 4 last members marked with bytes 'm', 'a', 'r', 'k'
-#
# 6-member files "shortcircuited" by a corrupt or fake trailer
# fox6_sc1.lz: (corrupt but consistent last trailer)
# last CRC != 0 ; dsize = 4 * msize ; msize = 480 (file size)
@@ -114,7 +109,8 @@ printf "testing lziprecover-%s..." "$2"
"${LZIPRECOVER}" -q --nrep-stats=0N "${in_lz}"
[ $? = 1 ] || test_failed $LINENO
for i in 0 255 0kB 0KiB 0M 0G 0T 0P 0E 0Z 0Y 0R 0Q ; do
- "${LZIPRECOVER}" -q --nrep-stats=$i "${in_lz}" || test_failed $LINENO $i
+ "${LZIPRECOVER}" --nrep-stats=$i "${in_lz}" > /dev/null ||
+ test_failed $LINENO $i
done
"${LZIP}" -lq in
[ $? = 2 ] || test_failed $LINENO
@@ -168,7 +164,7 @@ printf "LZIP\001+.............................." | "${LZIP}" -t 2> /dev/null
[ $? = 1 ] || test_failed $LINENO
"${LZIPRECOVER}" -mq "${bad1_lz}"
[ $? = 1 ] || test_failed $LINENO
-"${LZIPRECOVER}" -Rq
+"${LZIPRECOVER}" -Bq
[ $? = 1 ] || test_failed $LINENO
"${LZIPRECOVER}" -sq
[ $? = 1 ] || test_failed $LINENO
@@ -188,7 +184,7 @@ printf "LZIP\001+.............................." | "${LZIP}" -t 2> /dev/null
[ $? = 1 ] || test_failed $LINENO
"${LZIPRECOVER}" -q --dump=damaged in > out # /dev/null returns 1 on OS/2
[ $? = 2 ] || test_failed $LINENO
-"${LZIPRECOVER}" -q --dump=damagedd "${in_lz}" > /dev/null
+"${LZIPRECOVER}" -q --dump=damageed "${in_lz}" > /dev/null
[ $? = 1 ] || test_failed $LINENO
"${LZIPRECOVER}" -q --dump=empty
[ $? = 1 ] || test_failed $LINENO
@@ -196,13 +192,13 @@ printf "LZIP\001+.............................." | "${LZIP}" -t 2> /dev/null
[ $? = 1 ] || test_failed $LINENO
"${LZIPRECOVER}" -q --strip=damaged in > out # /dev/null returns 1 on OS/2
[ $? = 2 ] || test_failed $LINENO
-"${LZIPRECOVER}" -q --strip=damagedd "${in_lz}" > /dev/null
+"${LZIPRECOVER}" -q --strip=damageed "${in_lz}" > /dev/null
[ $? = 1 ] || test_failed $LINENO
"${LZIPRECOVER}" -q --remove=damaged
[ $? = 1 ] || test_failed $LINENO
"${LZIPRECOVER}" -q --remove=damaged in
[ $? = 2 ] || test_failed $LINENO
-"${LZIPRECOVER}" -q --remove=damagedd "${in_lz}"
+"${LZIPRECOVER}" -q --remove=damageed "${in_lz}"
[ $? = 1 ] || test_failed $LINENO
"${LZIPRECOVER}" -q --dump=tdata
[ $? = 1 ] || test_failed $LINENO
@@ -244,14 +240,15 @@ cmp "${in_lz}" out.lz || test_failed $LINENO
"${LZIPRECOVER}" -A < "${in_lzma}" > out.lz || test_failed $LINENO
cmp "${in_lz}" out.lz || test_failed $LINENO
rm -f out.lz || framework_failure
-cat "${in_lzma}" > out.lzma || framework_failure
+cp "${in_lzma}" out.lzma || framework_failure
"${LZIPRECOVER}" -Ak out.lzma || test_failed $LINENO
cmp "${in_lz}" out.lz || test_failed $LINENO
printf "to be overwritten" > out.lz || framework_failure
"${LZIPRECOVER}" -Af out.lzma || test_failed $LINENO
+[ ! -e out.lzma ] || test_failed $LINENO
cmp "${in_lz}" out.lz || test_failed $LINENO
rm -f out.lz || framework_failure
-cat "${in_lzma}" > out.tlz || framework_failure
+cp "${in_lzma}" out.tlz || framework_failure
"${LZIPRECOVER}" -Ak out.tlz || test_failed $LINENO
cmp "${in_lz}" out.tar.lz || test_failed $LINENO
printf "to be overwritten" > out.tar.lz || framework_failure
@@ -261,46 +258,34 @@ rm -f out.tar.lz || framework_failure
cat in in > in2 || framework_failure
"${LZIPRECOVER}" -A -o out2.lz - "${in_lzma}" - < "${in_lzma}" ||
test_failed $LINENO
-"${LZIP}" -cd out2.lz > out2 || test_failed $LINENO
+"${LZIP}" -d out2.lz || test_failed $LINENO
cmp in2 out2 || test_failed $LINENO
-rm -f out2.lz out2 || framework_failure
+rm -f out2 || framework_failure
"${LZIPRECOVER}" -A "${in_lzma}" -o a/b/c/out.lz || test_failed $LINENO
cmp "${in_lz}" a/b/c/out.lz || test_failed $LINENO
rm -rf a || framework_failure
printf "\ntesting decompression..."
-for i in "${in_lz}" "${in_em}" ; do
- "${LZIP}" -lq "$i" || test_failed $LINENO "$i"
- "${LZIP}" -t "$i" || test_failed $LINENO "$i"
- "${LZIP}" -d "$i" -o out || test_failed $LINENO "$i"
- cmp in out || test_failed $LINENO "$i"
- "${LZIP}" -cd "$i" > out || test_failed $LINENO "$i"
- cmp in out || test_failed $LINENO "$i"
- "${LZIP}" -d "$i" -o - > out || test_failed $LINENO "$i"
- cmp in out || test_failed $LINENO "$i"
- "${LZIP}" -d < "$i" > out || test_failed $LINENO "$i"
- cmp in out || test_failed $LINENO "$i"
- rm -f out || framework_failure
-done
-
-lines=`"${LZIP}" -tvv "${in_em}" 2>&1 | wc -l` || test_failed $LINENO
-[ "${lines}" -eq 8 ] || test_failed $LINENO "${lines}"
-"${LZIP}" -tq "${in_em}" --empty-error
-[ $? = 2 ] || test_failed $LINENO
-
-lines=`"${LZIP}" -lvv "${in_em}" | wc -l` || test_failed $LINENO
-[ "${lines}" -eq 11 ] || test_failed $LINENO "${lines}"
-"${LZIP}" -lq "${in_em}" --empty-error
-[ $? = 2 ] || test_failed $LINENO
+"${LZIP}" -l "${in_lz}" > /dev/null || test_failed $LINENO
+"${LZIP}" -t "${in_lz}" || test_failed $LINENO
+"${LZIP}" -d "${in_lz}" -o out || test_failed $LINENO
+cmp in out || test_failed $LINENO
+"${LZIP}" -cd "${in_lz}" > out || test_failed $LINENO
+cmp in out || test_failed $LINENO
+"${LZIP}" -d "${in_lz}" -o - > out || test_failed $LINENO
+cmp in out || test_failed $LINENO
+"${LZIP}" -d < "${in_lz}" > out || test_failed $LINENO
+cmp in out || test_failed $LINENO
+rm -f out || framework_failure
-cat "${in_lz}" > out.lz || framework_failure
+cp "${in_lz}" out.lz || framework_failure
"${LZIP}" -dk out.lz || test_failed $LINENO
cmp in out || test_failed $LINENO
rm -f out || framework_failure
"${LZIP}" -cd "${fox_lz}" > fox || test_failed $LINENO
-cat fox > copy || framework_failure
-cat "${in_lz}" > copy.lz || framework_failure
+cp fox copy || framework_failure
+cp "${in_lz}" copy.lz || framework_failure
"${LZIP}" -d copy.lz out.lz 2> /dev/null # skip copy, decompress out
[ $? = 1 ] || test_failed $LINENO
[ ! -e out.lz ] || test_failed $LINENO
@@ -314,7 +299,6 @@ rm -f copy out || framework_failure
printf "to be overwritten" > out || framework_failure
"${LZIP}" -df -o out < "${in_lz}" || test_failed $LINENO
cmp in out || test_failed $LINENO
-rm -f out || framework_failure
"${LZIP}" -d -o ./- "${in_lz}" || test_failed $LINENO
cmp in ./- || test_failed $LINENO
rm -f ./- || framework_failure
@@ -322,12 +306,12 @@ rm -f ./- || framework_failure
cmp in ./- || test_failed $LINENO
rm -f ./- || framework_failure
-cat "${in_lz}" > anyothername || framework_failure
+cp "${in_lz}" anyothername || framework_failure
"${LZIP}" -dv - anyothername - < "${in_lz}" > out 2> /dev/null ||
test_failed $LINENO
cmp in out || test_failed $LINENO
cmp in anyothername.out || test_failed $LINENO
-rm -f out anyothername.out || framework_failure
+rm -f anyothername.out || framework_failure
"${LZIP}" -lq in "${in_lz}"
[ $? = 2 ] || test_failed $LINENO
@@ -344,7 +328,7 @@ cat out in | cmp in - || test_failed $LINENO # out must be empty
[ $? = 1 ] || test_failed $LINENO
cmp in out || test_failed $LINENO
rm -f out || framework_failure
-cat "${in_lz}" > out.lz || framework_failure
+cp "${in_lz}" out.lz || framework_failure
for i in 1 2 3 4 5 6 7 ; do
printf "g" >> out.lz || framework_failure
"${LZIP}" -alvv out.lz "${in_lz}" > /dev/null 2>&1
@@ -364,7 +348,7 @@ done
cmp in out || test_failed $LINENO
rm -f out || framework_failure
-"${LZIP}" -lq "${in_lz}" "${in_lz}" || test_failed $LINENO
+"${LZIP}" -l "${in_lz}" "${in_lz}" > /dev/null || test_failed $LINENO
"${LZIP}" -t "${in_lz}" "${in_lz}" || test_failed $LINENO
"${LZIP}" -cd "${in_lz}" "${in_lz}" -o out > out2 || test_failed $LINENO
[ ! -e out ] || test_failed $LINENO # override -o
@@ -375,10 +359,13 @@ cmp in2 out2 || test_failed $LINENO
rm -f out2 || framework_failure
cat "${in_lz}" "${in_lz}" > out2.lz || framework_failure
+lines=`"${LZIP}" -tvv out2.lz 2>&1 | wc -l` || test_failed $LINENO
+[ "${lines}" -eq 2 ] || test_failed $LINENO "${lines}"
+lines=`"${LZIP}" -lvv out2.lz | wc -l` || test_failed $LINENO
+[ "${lines}" -eq 5 ] || test_failed $LINENO "${lines}"
+
printf "\ngarbage" >> out2.lz || framework_failure
"${LZIP}" -tvvvv out2.lz 2> /dev/null || test_failed $LINENO
-"${LZIPRECOVER}" -aD0 -q out2.lz
-[ $? = 2 ] || test_failed $LINENO
"${LZIP}" -alq out2.lz
[ $? = 2 ] || test_failed $LINENO
"${LZIP}" -atq out2.lz
@@ -391,42 +378,13 @@ printf "\ngarbage" >> out2.lz || framework_failure
"${LZIP}" -adkq -o out2 < out2.lz
[ $? = 2 ] || test_failed $LINENO
[ ! -e out2 ] || test_failed $LINENO
+"${LZIPRECOVER}" -aD0 -q out2.lz
+[ $? = 2 ] || test_failed $LINENO
printf "to be overwritten" > out2 || framework_failure
"${LZIP}" -df out2.lz || test_failed $LINENO
cmp in2 out2 || test_failed $LINENO
rm -f out2 || framework_failure
-"${LZIPRECOVER}" -D ,18000 "${in_lz}" > out || test_failed $LINENO
-"${LZIPRECOVER}" -D 18000 "${in_lz}" >> out || test_failed $LINENO
-cmp in out || test_failed $LINENO
-"${LZIPRECOVER}" -D 21723-22120 -fo out "${in_lz}" || test_failed $LINENO
-cmp "${inD}" out || test_failed $LINENO
-"${LZIPRECOVER}" -D 21723,397 "${in_lz}" > out || test_failed $LINENO
-cmp "${inD}" out || test_failed $LINENO
-"${LZIPRECOVER}" -D 21723,397 "${in_em}" > out || test_failed $LINENO
-cmp "${inD}" out || test_failed $LINENO
-"${LZIPRECOVER}" -q -D 21723,397 --empty-error "${in_em}"
-[ $? = 2 ] || test_failed $LINENO
-"${LZIP}" -D 0 "${in_lz}" -o a/b/c/out || test_failed $LINENO
-cmp in a/b/c/out || test_failed $LINENO
-rm -rf a || framework_failure
-
-"${LZIP}" -cd "${fox6_lz}" > out || test_failed $LINENO
-"${LZIP}" -cd "${f6mk_lz}" > copy || test_failed $LINENO
-cmp copy out || test_failed $LINENO
-rm -f copy out || framework_failure
-"${LZIP}" -lq "${f6mk_lz}" --marking-error
-[ $? = 2 ] || test_failed $LINENO
-"${LZIP}" -tq "${f6mk_lz}" --marking-error
-[ $? = 2 ] || test_failed $LINENO
-cat "${f6mk_lz}" > f6mk.lz || framework_failure
-cat "${f6mk_lz}" > f6mk2.lz || framework_failure
-cmp -s "${fox6_lz}" f6mk.lz && test_failed $LINENO
-"${LZIPRECOVER}" --clear-marking f6mk.lz f6mk2.lz || test_failed $LINENO
-cmp "${fox6_lz}" f6mk.lz || test_failed $LINENO
-cmp "${fox6_lz}" f6mk2.lz || test_failed $LINENO
-rm -f f6mk.lz f6mk2.lz || framework_failure
-
"${LZIP}" -d "${fox_lz}" -o a/b/c/fox || test_failed $LINENO
cmp fox a/b/c/fox || test_failed $LINENO
rm -rf a || framework_failure
@@ -437,13 +395,85 @@ rm -rf a || framework_failure
[ $? = 1 ] || test_failed $LINENO
[ ! -e a ] || test_failed $LINENO
+if [ -z "${LZIP_NAME}" ] ; then LZIP_NAME=lzip ; fi
+touch empty em || framework_failure
+"${LZIP_NAME}" -0 em || test_failed $LINENO
+"${LZIP}" -l em.lz > /dev/null || test_failed $LINENO
+"${LZIP}" -dk em.lz || test_failed $LINENO
+cmp empty em || test_failed $LINENO
+cat em.lz em.lz | "${LZIP}" -t || test_failed $LINENO
+cat em.lz em.lz | "${LZIP}" -d > em || test_failed $LINENO
+cmp empty em || test_failed $LINENO
+cat em.lz "${in_lz}" | "${LZIP}" -t || test_failed $LINENO
+cat em.lz "${in_lz}" | "${LZIP}" -d > out || test_failed $LINENO
+cmp in out || test_failed $LINENO
+cat "${in_lz}" em.lz | "${LZIP}" -t || test_failed $LINENO
+cat "${in_lz}" em.lz | "${LZIP}" -d > out || test_failed $LINENO
+cmp in out || test_failed $LINENO
+
+cat em.lz "${fox_lz}" em.lz "${fox_lz}" em.lz em.lz "${fox_lz}" em.lz > \
+ f3_em.lz || framework_failure
+"${LZIPRECOVER}" -D ,18000 "${in_lz}" > out || test_failed $LINENO
+"${LZIPRECOVER}" -D 18000 "${in_lz}" >> out || test_failed $LINENO
+cmp in out || test_failed $LINENO
+"${LZIPRECOVER}" -D 21636-22033 -fo out "${in_lz}" || test_failed $LINENO
+cmp "${inD}" out || test_failed $LINENO
+"${LZIPRECOVER}" -D 21636,397 "${in_lz}" > out || test_failed $LINENO
+cmp "${inD}" out || test_failed $LINENO
+"${LZIPRECOVER}" -D 45,45 f3_em.lz > out || test_failed $LINENO
+cmp fox out || test_failed $LINENO
+"${LZIP}" -D 0 "${in_lz}" -o a/b/c/out || test_failed $LINENO
+cmp in a/b/c/out || test_failed $LINENO
+rm -rf a || framework_failure
+
printf "\ntesting bad input..."
+cat em.lz em.lz > ee.lz || framework_failure
+"${LZIP}" -l < ee.lz > /dev/null || test_failed $LINENO
+"${LZIP}" -t < ee.lz || test_failed $LINENO
+"${LZIP}" -d < ee.lz > em || test_failed $LINENO
+cmp empty em || test_failed $LINENO
+"${LZIPRECOVER}" -li ee.lz > /dev/null || test_failed $LINENO
+"${LZIPRECOVER}" -ti ee.lz || test_failed $LINENO
+"${LZIPRECOVER}" -cdi ee.lz > em || test_failed $LINENO
+cmp empty em || test_failed $LINENO
+"${LZIP}" -lq ee.lz
+[ $? = 2 ] || test_failed $LINENO
+"${LZIP}" -tq ee.lz
+[ $? = 2 ] || test_failed $LINENO
+"${LZIP}" -dq ee.lz
+[ $? = 2 ] || test_failed $LINENO
+[ ! -e ee ] || test_failed $LINENO
+"${LZIP}" -cdq ee.lz > em
+[ $? = 2 ] || test_failed $LINENO
+cmp empty em || test_failed $LINENO
+rm -f em || framework_failure
+cat "${in_lz}" em.lz "${in_lz}" > inein.lz || framework_failure
+"${LZIP}" -l < inein.lz > /dev/null || test_failed $LINENO
+"${LZIP}" -t < inein.lz || test_failed $LINENO
+"${LZIP}" -d < inein.lz > out2 || test_failed $LINENO
+cmp in2 out2 || test_failed $LINENO
+"${LZIPRECOVER}" -li inein.lz > /dev/null || test_failed $LINENO
+"${LZIPRECOVER}" -ti inein.lz || test_failed $LINENO
+"${LZIPRECOVER}" -cdi inein.lz > out2 || test_failed $LINENO
+cmp in2 out2 || test_failed $LINENO
+"${LZIP}" -lq inein.lz
+[ $? = 2 ] || test_failed $LINENO
+"${LZIP}" -tq inein.lz
+[ $? = 2 ] || test_failed $LINENO
+"${LZIP}" -dq inein.lz
+[ $? = 2 ] || test_failed $LINENO
+[ ! -e inein ] || test_failed $LINENO
+"${LZIP}" -cdq inein.lz > out2
+[ $? = 2 ] || test_failed $LINENO
+cmp in2 out2 || test_failed $LINENO
+rm -f out2 inein.lz em.lz || framework_failure
+
headers='LZIp LZiP LZip LzIP LzIp LziP lZIP lZIp lZiP lzIP'
-body='\001\014\000\203\377\373\377\377\300\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000$\000\000\000\000\000\000\000'
-cat "${in_lz}" > int.lz || framework_failure
+body='\001\014\000\000\101\376\367\377\377\340\000\200\000\215\357\002\322\001\000\000\000\000\000\000\000\045\000\000\000\000\000\000\000'
+cp "${in_lz}" int.lz || framework_failure
printf "LZIP${body}" >> int.lz || framework_failure
-if "${LZIP}" -tq int.lz ; then
+if "${LZIP}" -t int.lz ; then
for header in ${headers} ; do
printf "${header}${body}" > int.lz || framework_failure
"${LZIP}" -lq int.lz # first member
@@ -462,7 +492,7 @@ if "${LZIP}" -tq int.lz ; then
[ $? = 2 ] || test_failed $LINENO ${header}
"${LZIP}" -cdq --loose-trailing int.lz > /dev/null
[ $? = 2 ] || test_failed $LINENO ${header}
- cat "${in_lz}" > int.lz || framework_failure
+ cp "${in_lz}" int.lz || framework_failure
printf "${header}${body}" >> int.lz || framework_failure
"${LZIP}" -lq int.lz # trailing data
[ $? = 2 ] || test_failed $LINENO ${header}
@@ -472,7 +502,7 @@ if "${LZIP}" -tq int.lz ; then
[ $? = 2 ] || test_failed $LINENO ${header}
"${LZIP}" -cdq int.lz > /dev/null
[ $? = 2 ] || test_failed $LINENO ${header}
- "${LZIP}" -lq --loose-trailing int.lz ||
+ "${LZIP}" -l --loose-trailing int.lz > /dev/null ||
test_failed $LINENO ${header}
"${LZIP}" -t --loose-trailing int.lz ||
test_failed $LINENO ${header}
@@ -503,9 +533,31 @@ if "${LZIP}" -tq int.lz ; then
cmp "${in_lz}" int.lz || test_failed $LINENO ${header}
done
else
- printf "\nwarning: skipping header test: 'printf' does not work on your system."
+ printf "warning: skipping header test: 'printf' does not work on your system."
fi
-rm -f int.lz out || framework_failure
+rm -f int.lz || framework_failure
+
+"${LZIP}" -l "${fnz_lz}" > /dev/null || test_failed $LINENO
+"${LZIP}" -tq "${fnz_lz}"
+[ $? = 2 ] || test_failed $LINENO
+cat "${fox_lz}" "${fox_lz}" "${fox_lz}" "${fox_lz}" "${fox_lz}" "${fox_lz}" \
+ > fox6.lz || framework_failure
+cat "${fox_lz}" "${fnz_lz}" "${fox_lz}" "${fnz_lz}" "${fox_lz}" "${fnz_lz}" \
+ > fox6_nz.lz || framework_failure
+"${LZIP}" -cd fox6.lz > out || test_failed $LINENO
+"${LZIPRECOVER}" -cd -i fox6_nz.lz > copy || test_failed $LINENO
+cmp copy out || test_failed $LINENO
+rm -f copy || framework_failure
+cp fox6_nz.lz f6nz.lz || framework_failure
+cp fox6_nz.lz f6nz2.lz || framework_failure
+cp "${f6b1nz_lz}" f6b1nz.lz || framework_failure
+cmp -s fox6.lz f6nz.lz && test_failed $LINENO
+"${LZIPRECOVER}" --nonzero-repair f6nz.lz f6nz2.lz f6b1nz.lz ||
+ test_failed $LINENO
+cmp fox6.lz f6nz.lz || test_failed $LINENO
+cmp fox6.lz f6nz2.lz || test_failed $LINENO
+cmp "${f6b1_lz}" f6b1nz.lz || test_failed $LINENO
+rm -f f6nz.lz f6nz2.lz f6b1nz.lz || framework_failure
for i in fox_v2.lz fox_s11.lz fox_de20.lz \
fox_bcrc.lz fox_crc0.lz fox_das46.lz fox_mes81.lz ; do
@@ -521,13 +573,13 @@ for i in fox_bcrc.lz fox_crc0.lz fox_das46.lz fox_mes81.lz ; do
"${LZIPRECOVER}" -cdq -i "${testdir}"/$i > out || test_failed $LINENO $i
cmp fox out || test_failed $LINENO $i
done
-rm -f fox out || framework_failure
+rm -f fox || framework_failure
cat "${in_lz}" "${in_lz}" > in2.lz || framework_failure
cat "${in_lz}" "${in_lz}" "${in_lz}" > in3.lz || framework_failure
-if dd if=in3.lz of=trunc.lz bs=14752 count=1 2> /dev/null &&
- [ -e trunc.lz ] && cmp in2.lz trunc.lz > /dev/null 2>&1 ; then
- for i in 6 20 14734 14753 14754 14755 14756 14757 14758 ; do
+if dd if=in3.lz of=trunc.lz bs=14682 count=1 2> /dev/null &&
+ [ -e trunc.lz ] && cmp in2.lz trunc.lz ; then
+ for i in 6 20 14664 14683 14684 14685 14686 14687 14688 ; do
dd if=in3.lz of=trunc.lz bs=$i count=1 2> /dev/null
"${LZIP}" -lq trunc.lz
[ $? = 2 ] || test_failed $LINENO $i
@@ -541,7 +593,7 @@ if dd if=in3.lz of=trunc.lz bs=14752 count=1 2> /dev/null &&
[ $? = 2 ] || test_failed $LINENO $i
done
else
- printf "\nwarning: skipping truncation test: 'dd' does not work on your system."
+ printf "warning: skipping truncation test: 'dd' does not work on your system."
fi
rm -f in3.lz trunc.lz || framework_failure
@@ -554,7 +606,7 @@ for i in "${f6s3_lz}" "${f6s4_lz}" "${f6s5_lz}" "${f6s6_lz}" ; do
[ "${lines}" -eq 9 ] || test_failed $LINENO "$i ${lines}"
done
-cat "${in_lz}" > ingin.lz || framework_failure
+cp "${in_lz}" ingin.lz || framework_failure
printf "g" >> ingin.lz || framework_failure
cat "${in_lz}" >> ingin.lz || framework_failure
"${LZIP}" -lq ingin.lz
@@ -563,17 +615,22 @@ cat "${in_lz}" >> ingin.lz || framework_failure
[ $? = 2 ] || test_failed $LINENO
"${LZIP}" -atq < ingin.lz
[ $? = 2 ] || test_failed $LINENO
-"${LZIP}" -acdq ingin.lz > /dev/null
+"${LZIP}" -acdq ingin.lz > out
[ $? = 2 ] || test_failed $LINENO
-"${LZIP}" -adq < ingin.lz > /dev/null
+cmp in out || test_failed $LINENO
+"${LZIP}" -adq < ingin.lz > out
[ $? = 2 ] || test_failed $LINENO
-"${LZIPRECOVER}" -lq -i ingin.lz || test_failed $LINENO
+cmp in out || test_failed $LINENO
"${LZIP}" -t ingin.lz || test_failed $LINENO
"${LZIP}" -t < ingin.lz || test_failed $LINENO
+"${LZIP}" -dk ingin.lz || test_failed $LINENO
+cmp in ingin || test_failed $LINENO
"${LZIP}" -cd ingin.lz > out || test_failed $LINENO
cmp in out || test_failed $LINENO
"${LZIP}" -d < ingin.lz > out || test_failed $LINENO
cmp in out || test_failed $LINENO
+rm -f ingin || framework_failure
+"${LZIPRECOVER}" -l -i ingin.lz > /dev/null || test_failed $LINENO
"${LZIPRECOVER}" -cd -i ingin.lz > out2 || test_failed $LINENO
cmp in2 out2 || test_failed $LINENO
@@ -588,7 +645,6 @@ cmp "${f6b1}" out || test_failed $LINENO
"${LZIPRECOVER}" -D0 -iq "${f6b1_lz}" > out || test_failed $LINENO
cmp "${f6b1}" out || test_failed $LINENO
-touch empty || framework_failure
"${LZIPRECOVER}" -D0 -q ingin.lz > out
[ $? = 2 ] || test_failed $LINENO
cmp empty out || test_failed $LINENO
@@ -604,12 +660,96 @@ cmp in2 out2 || test_failed $LINENO
"${LZIPRECOVER}" -t -iq in2t.lz || test_failed $LINENO
rm -f in2 in2t.lz out out2 || framework_failure
+printf "\ntesting --byte-repair..."
+
+rm -f out.lz || framework_failure
+"${LZIPRECOVER}" -B -o out.lz fox6.lz || test_failed $LINENO
+[ ! -e out.lz ] || test_failed $LINENO
+"${LZIPRECOVER}" -B -o out.lz "${bad2_lz}" -q
+[ $? = 2 ] || test_failed $LINENO
+[ ! -e out.lz ] || test_failed $LINENO
+"${LZIPRECOVER}" -B -o out.lz "${bad3_lz}" -q
+[ $? = 2 ] || test_failed $LINENO
+[ ! -e out.lz ] || test_failed $LINENO
+"${LZIPRECOVER}" -B -o out.lz "${bad4_lz}" -q
+[ $? = 2 ] || test_failed $LINENO
+[ ! -e out.lz ] || test_failed $LINENO
+"${LZIPRECOVER}" -Bf -o out.lz "${f6b1_lz}" || test_failed $LINENO
+cmp fox6.lz out.lz || test_failed $LINENO
+"${LZIPRECOVER}" -Bf fox6_nz.lz -o out.lz || test_failed $LINENO
+cmp fox6.lz out.lz || test_failed $LINENO
+"${LZIPRECOVER}" -Bf "${f6b1nz_lz}" -o out.lz || test_failed $LINENO
+cmp fox6.lz out.lz || test_failed $LINENO
+"${LZIPRECOVER}" -Bf -o out.lz "${bad1_lz}" || test_failed $LINENO
+cmp "${in_lz}" out.lz || test_failed $LINENO
+"${LZIPRECOVER}" -B -o a/b/c/out.lz "${bad1_lz}" || test_failed $LINENO
+cmp "${in_lz}" a/b/c/out.lz || test_failed $LINENO
+rm -rf a || framework_failure
+
+cp "${f6b1_lz}" out.lz || framework_failure
+"${LZIPRECOVER}" -B out.lz || test_failed $LINENO
+[ -e out_fixed.lz ] || test_failed $LINENO
+mv out.lz out.tar.lz || framework_failure
+"${LZIPRECOVER}" -B out.tar.lz || test_failed $LINENO
+[ -e out_fixed.tar.lz ] || test_failed $LINENO
+mv out.tar.lz out.tlz || framework_failure
+"${LZIPRECOVER}" -B out.tlz || test_failed $LINENO
+[ -e out_fixed.tlz ] || test_failed $LINENO
+rm -f out.tlz out_fixed.lz out_fixed.tar.lz out_fixed.tlz ||
+ framework_failure
+
+printf "\ntesting --fec..."
+
+"${LZIPRECOVER}" -Ft "${in_lz}" || test_failed $LINENO
+"${LZIPRECOVER}" -Fc "${in_lz}" -o fecfile.fec || test_failed $LINENO
+cmp "${in_lz}".fec fecfile.fec || test_failed $LINENO
+"${LZIPRECOVER}" -Fc -cn1 "${in_lz}" | cmp fecfile.fec - || test_failed $LINENO
+"${LZIPRECOVER}" -Fc -cn4 "${in_lz}" | cmp fecfile.fec - || test_failed $LINENO
+"${LZIPRECOVER}" -Fc -c --gf16 "${in_lz}" | cmp "${in_lz}".fec16 - ||
+ test_failed $LINENO
+for i in "${bad1_lz}" "${bad2_lz}" "${bad3_lz}" "${bad4_lz}" "${bad5_lz}" \
+ "${bad6_lz}" "${bad7_lz}" "${bad8_lz}" "${bad9_lz}" ; do
+ "${LZIPRECOVER}" -q -Fr --fec-file=fecfile.fec "$i" -o fixed.lz ||
+ test_failed $LINENO "$i"
+ cmp "${in_lz}" "fixed.lz" || test_failed $LINENO "$i"
+ rm -f fixed.lz || framework_failure
+done
+"${LZIPRECOVER}" -Fc "${in_lz}" -o fec/ || test_failed $LINENO
+cmp fec/test.txt.lz.fec fecfile.fec || test_failed $LINENO
+"${LZIPRECOVER}" -Ft "${in_lz}" --fec-file=fec/ || test_failed $LINENO
+rm -rf fec || framework_failure
+mkdir a
+mkdir a/bfec
+mkdir a/b-fec
+mkdir a/b.fec
+mkdir a/b_fec
+cp fox6.lz "${in_lz}" a || framework_failure
+cp "${fox_lz}" a/bfec || framework_failure
+cp "${fox_lz}" a/b-fec || framework_failure
+cp "${fox_lz}" a/b.fec || framework_failure
+cp "${fox_lz}" a/b_fec || framework_failure
+"${LZIPRECOVER}" -r -Fc a/ -o fec/ || test_failed $LINENO
+[ -e fec/fox6.lz.fec ] || test_failed $LINENO
+[ -e fec/bfec/fox.lz.fec ] || test_failed $LINENO
+[ ! -e fec/b-fec ] || test_failed $LINENO
+[ ! -e fec/b.fec ] || test_failed $LINENO
+[ ! -e fec/b_fec ] || test_failed $LINENO
+cmp fec/test.txt.lz.fec fecfile.fec || test_failed $LINENO
+"${LZIPRECOVER}" -r -Fc a -o fec/ || test_failed $LINENO
+[ -e fec/a/fox6.lz.fec ] || test_failed $LINENO
+[ -e fec/a/bfec/fox.lz.fec ] || test_failed $LINENO
+[ ! -e fec/a/b-fec ] || test_failed $LINENO
+[ ! -e fec/a/b.fec ] || test_failed $LINENO
+[ ! -e fec/a/b_fec ] || test_failed $LINENO
+cmp fec/a/test.txt.lz.fec fecfile.fec || test_failed $LINENO
+rm -rf a fec fecfile.fec || framework_failure
+
printf "\ntesting --merge..."
rm -f out.lz || framework_failure
-"${LZIPRECOVER}" -m -o out.lz "${fox6_lz}" "${f6b1_lz}" || test_failed $LINENO
+"${LZIPRECOVER}" -m -o out.lz fox6.lz "${f6b1_lz}" || test_failed $LINENO
[ ! -e out.lz ] || test_failed $LINENO
-"${LZIPRECOVER}" -m -o out.lz "${f6b1_lz}" "${fox6_lz}" || test_failed $LINENO
+"${LZIPRECOVER}" -m -o out.lz "${f6b1_lz}" fox6.lz || test_failed $LINENO
[ ! -e out.lz ] || test_failed $LINENO
"${LZIPRECOVER}" -m -o out.lz "${bad1_lz}" "${bad2_lz}" "${bad1_lz}" -q
[ $? = 2 ] || test_failed $LINENO
@@ -617,7 +757,7 @@ rm -f out.lz || framework_failure
"${LZIPRECOVER}" -m -o out.lz "${bad1_lz}" "${bad2_lz}" "${bad2_lz}" -q
[ $? = 2 ] || test_failed $LINENO
[ ! -e out.lz ] || test_failed $LINENO
-cat "${bad2_lz}" > bad2.lz || framework_failure
+cp "${bad2_lz}" bad2.lz || framework_failure
"${LZIPRECOVER}" -m -o out.lz "${bad1_lz}" "${bad2_lz}" bad2.lz -q
[ $? = 2 ] || test_failed $LINENO
[ ! -e out.lz ] || test_failed $LINENO
@@ -633,56 +773,56 @@ rm -f bad2.lz || framework_failure
[ ! -e out.lz ] || test_failed $LINENO
"${LZIPRECOVER}" -mf -o out.lz "${f6b1_lz}" "${f6b4_lz}" || test_failed $LINENO
-cmp "${fox6_lz}" out.lz || test_failed $LINENO
+cmp fox6.lz out.lz || test_failed $LINENO
"${LZIPRECOVER}" -mf -o out.lz "${f6b4_lz}" "${f6b1_lz}" || test_failed $LINENO
-cmp "${fox6_lz}" out.lz || test_failed $LINENO
+cmp fox6.lz out.lz || test_failed $LINENO
"${LZIPRECOVER}" -m -o a/b/c/out.lz "${f6b1_lz}" "${f6b4_lz}" ||
test_failed $LINENO
-cmp "${fox6_lz}" a/b/c/out.lz || test_failed $LINENO
+cmp fox6.lz a/b/c/out.lz || test_failed $LINENO
rm -rf a || framework_failure
for i in "${f6b1_lz}" "${f6b3_lz}" "${f6b4_lz}" "${f6b5_lz}" "${f6b6_lz}" ; do
"${LZIPRECOVER}" -mf -o out.lz "${f6b2_lz}" "$i" ||
test_failed $LINENO "$i"
- cmp "${fox6_lz}" out.lz || test_failed $LINENO "$i"
+ cmp fox6.lz out.lz || test_failed $LINENO "$i"
"${LZIPRECOVER}" -mf -o out.lz "$i" "${f6b2_lz}" ||
test_failed $LINENO "$i"
- cmp "${fox6_lz}" out.lz || test_failed $LINENO "$i"
+ cmp fox6.lz out.lz || test_failed $LINENO "$i"
done
for i in "${f6b3_lz}" "${f6b4_lz}" "${f6b5_lz}" "${f6b6_lz}" ; do
"${LZIPRECOVER}" -mf -o out.lz "${f6b1_lz}" "${f6b2_lz}" "$i" ||
test_failed $LINENO "$i"
- cmp "${fox6_lz}" out.lz || test_failed $LINENO "$i"
+ cmp fox6.lz out.lz || test_failed $LINENO "$i"
"${LZIPRECOVER}" -mf -o out.lz "${f6b1_lz}" "$i" "${f6b2_lz}" ||
test_failed $LINENO "$i"
- cmp "${fox6_lz}" out.lz || test_failed $LINENO "$i"
+ cmp fox6.lz out.lz || test_failed $LINENO "$i"
"${LZIPRECOVER}" -mf -o out.lz "${f6b2_lz}" "${f6b1_lz}" "$i" ||
test_failed $LINENO "$i"
- cmp "${fox6_lz}" out.lz || test_failed $LINENO "$i"
+ cmp fox6.lz out.lz || test_failed $LINENO "$i"
"${LZIPRECOVER}" -mf -o out.lz "${f6b2_lz}" "$i" "${f6b1_lz}" ||
test_failed $LINENO "$i"
- cmp "${fox6_lz}" out.lz || test_failed $LINENO "$i"
+ cmp fox6.lz out.lz || test_failed $LINENO "$i"
"${LZIPRECOVER}" -mf -o out.lz "$i" "${f6b1_lz}" "${f6b2_lz}" ||
test_failed $LINENO "$i"
- cmp "${fox6_lz}" out.lz || test_failed $LINENO "$i"
+ cmp fox6.lz out.lz || test_failed $LINENO "$i"
"${LZIPRECOVER}" -mf -o out.lz "$i" "${f6b2_lz}" "${f6b1_lz}" ||
test_failed $LINENO "$i"
- cmp "${fox6_lz}" out.lz || test_failed $LINENO "$i"
+ cmp fox6.lz out.lz || test_failed $LINENO "$i"
done
"${LZIPRECOVER}" -mf -o out.lz "${f6b3_lz}" "${f6b4_lz}" "${f6b5_lz}" ||
test_failed $LINENO
-cmp "${fox6_lz}" out.lz || test_failed $LINENO
+cmp fox6.lz out.lz || test_failed $LINENO
"${LZIPRECOVER}" -mf -o out.lz "${f6b1_lz}" "${f6b3_lz}" "${f6b4_lz}" \
"${f6b5_lz}" || test_failed $LINENO
-cmp "${fox6_lz}" out.lz || test_failed $LINENO
+cmp fox6.lz out.lz || test_failed $LINENO
"${LZIPRECOVER}" -mf -o out.lz "${f6b2_lz}" "${f6b3_lz}" "${f6b4_lz}" \
"${f6b5_lz}" || test_failed $LINENO
-cmp "${fox6_lz}" out.lz || test_failed $LINENO
+cmp fox6.lz out.lz || test_failed $LINENO
"${LZIPRECOVER}" -mf -o out.lz "${f6b1_lz}" "${f6b2_lz}" "${f6b3_lz}" \
"${f6b4_lz}" "${f6b5_lz}" || test_failed $LINENO
-cmp "${fox6_lz}" out.lz || test_failed $LINENO
+cmp fox6.lz out.lz || test_failed $LINENO
"${LZIPRECOVER}" -mf -o out.lz "${bad1_lz}" "${bad2_lz}" || test_failed $LINENO
cmp "${in_lz}" out.lz || test_failed $LINENO
@@ -736,6 +876,7 @@ cmp "${in_lz}" out.lz || test_failed $LINENO
"${LZIPRECOVER}" -mf -o out.lz "${bad5_lz}" "${bad4_lz}" "${bad3_lz}" ||
test_failed $LINENO
cmp "${in_lz}" out.lz || test_failed $LINENO
+rm -f out.lz || framework_failure
cat "${bad3_lz}" "${bad4_lz}" "${bad5_lz}" "${in_lz}" > bad345.lz || framework_failure
cat "${bad4_lz}" "${bad5_lz}" "${bad3_lz}" "${in_lz}" > bad453.lz || framework_failure
@@ -760,45 +901,10 @@ cmp in4.lz out4.lz || test_failed $LINENO
cmp in4.lz out4.lz || test_failed $LINENO
rm -f bad345.lz bad453.lz bad534.lz out4.lz || framework_failure
-printf "\ntesting --byte-repair..."
-
-rm -f out.lz || framework_failure
-"${LZIPRECOVER}" -R -o out.lz "${fox6_lz}" || test_failed $LINENO
-[ ! -e out.lz ] || test_failed $LINENO
-"${LZIPRECOVER}" -R -o out.lz "${bad2_lz}" -q
-[ $? = 2 ] || test_failed $LINENO
-[ ! -e out.lz ] || test_failed $LINENO
-"${LZIPRECOVER}" -R -o out.lz "${bad3_lz}" -q
-[ $? = 2 ] || test_failed $LINENO
-[ ! -e out.lz ] || test_failed $LINENO
-"${LZIPRECOVER}" -R -o out.lz "${bad4_lz}" -q
-[ $? = 2 ] || test_failed $LINENO
-[ ! -e out.lz ] || test_failed $LINENO
-"${LZIPRECOVER}" -Rf -o out.lz "${f6b1_lz}" || test_failed $LINENO
-cmp "${fox6_lz}" out.lz || test_failed $LINENO
-"${LZIPRECOVER}" -Rf -o out.lz "${bad1_lz}" || test_failed $LINENO
-cmp "${in_lz}" out.lz || test_failed $LINENO
-"${LZIPRECOVER}" -R -o a/b/c/out.lz "${bad1_lz}" || test_failed $LINENO
-cmp "${in_lz}" a/b/c/out.lz || test_failed $LINENO
-rm -rf a || framework_failure
-
-cat "${f6b1_lz}" > out.lz || framework_failure
-"${LZIPRECOVER}" -R out.lz || test_failed $LINENO
-[ -e out_fixed.lz ] || test_failed $LINENO
-mv out.lz out.tar.lz || framework_failure
-"${LZIPRECOVER}" -R out.tar.lz || test_failed $LINENO
-[ -e out_fixed.tar.lz ] || test_failed $LINENO
-mv out.tar.lz out.tlz || framework_failure
-"${LZIPRECOVER}" -R out.tlz || test_failed $LINENO
-[ -e out_fixed.tlz ] || test_failed $LINENO
-rm -f out.tlz out_fixed.lz out_fixed.tar.lz out_fixed.tlz ||
- framework_failure
-
printf "\ntesting --reproduce..."
-if [ -z "${LZIP_NAME}" ] ; then LZIP_NAME=lzip ; fi
if /bin/sh -c "${LZIP_NAME} -s18KiB" < in > out 2> /dev/null &&
- cmp "${in_lz}" out > /dev/null 2>&1 ; then
+ cmp "${in_lz}" out ; then
rm -f out || framework_failure
"${LZIPRECOVER}" --reproduce --lzip-name="${LZIP_NAME}" -o out \
--reference-file=foo "${in_lz}" || test_failed $LINENO "${LZIP_NAME}"
@@ -812,19 +918,19 @@ if /bin/sh -c "${LZIP_NAME} -s18KiB" < in > out 2> /dev/null &&
for i in 6 7 8 9 ; do
for f in "${testdir}"/test_bad${i}.txt in ; do
rm -f out || framework_failure
- "${LZIPRECOVER}" -q --reproduce --lzip-name="${LZIP_NAME}" \
- --reference-file="$f" "${testdir}"/test_bad${i}.lz -o out ||
+ "${LZIPRECOVER}" --reproduce --lzip-name="${LZIP_NAME}" -o out \
+ --reference-file="$f" "${testdir}"/test_bad${i}.lz > /dev/null ||
test_failed $LINENO "${LZIP_NAME} $i $f"
cmp "${in_lz}" out || test_failed $LINENO "${LZIP_NAME} $i $f"
rm -f out || framework_failure
- "${LZIPRECOVER}" -q --reproduce --lzip-name="${LZIP_NAME}" \
- --reference-file="$f" "${testdir}"/test_bad${i}.lz -o out \
- --lzip-level=6 || test_failed $LINENO "${LZIP_NAME} $i $f level=6"
+ "${LZIPRECOVER}" --reproduce --lzip-name="${LZIP_NAME}" -o out \
+ --lzip-level=6 --reference-file="$f" "${testdir}"/test_bad${i}.lz \
+ > /dev/null || test_failed $LINENO "${LZIP_NAME} $i $f level=6"
cmp "${in_lz}" out || test_failed $LINENO "${LZIP_NAME} $i $f level=6"
rm -f out || framework_failure
- "${LZIPRECOVER}" -q --reproduce --lzip-name="${LZIP_NAME}" \
- --reference-file="$f" "${testdir}"/test_bad${i}.lz -o out \
- --lzip-level=m36 || test_failed $LINENO "${LZIP_NAME} $i $f level=m36"
+ "${LZIPRECOVER}" --reproduce --lzip-name="${LZIP_NAME}" -o out \
+ --lzip-level=m36 --reference-file="$f" "${testdir}"/test_bad${i}.lz \
+ > /dev/null || test_failed $LINENO "${LZIP_NAME} $i $f level=m36"
cmp "${in_lz}" out || test_failed $LINENO "${LZIP_NAME} $i $f level=m36"
done
done
@@ -834,8 +940,8 @@ if /bin/sh -c "${LZIP_NAME} -s18KiB" < in > out 2> /dev/null &&
framework_failure
rm -f out || framework_failure
for i in 6 7 8 9 ; do # reproduce one member each time
- "${LZIPRECOVER}" -q --reproduce --lzip-name="${LZIP_NAME}" \
- --reference-file="${testdir}"/test_bad${i}.txt mm_bad.lz -o out ||
+ "${LZIPRECOVER}" --reproduce --lzip-name="${LZIP_NAME}" mm_bad.lz \
+ --reference-file="${testdir}"/test_bad${i}.txt -o out > /dev/null ||
test_failed $LINENO "${LZIP_NAME} $i"
mv out mm_bad.lz || framework_failure
done
@@ -846,22 +952,23 @@ if /bin/sh -c "${LZIP_NAME} -s18KiB" < in > out 2> /dev/null &&
framework_failure
rm -f out || framework_failure
for i in 6 7 8 9 ; do # reproduce one member each time
- "${LZIPRECOVER}" -q --reproduce --lzip-name="${LZIP_NAME}" -o out \
- --reference-file=in mm_bad.lz || test_failed $LINENO "${LZIP_NAME} $i"
+ "${LZIPRECOVER}" --reproduce --lzip-name="${LZIP_NAME}" mm_bad.lz -o out \
+ --reference-file=in > /dev/null || test_failed $LINENO "${LZIP_NAME} $i"
mv out mm_bad.lz || framework_failure
done
cmp in4.lz mm_bad.lz || test_failed $LINENO "${LZIP_NAME}"
rm -f mm_bad.lz || framework_failure
- "${LZIPRECOVER}" -q --debug-reproduce=13-7356 --lzip-name="${LZIP_NAME}" \
- --reference-file=in "${in_lz}" || test_failed $LINENO "${LZIP_NAME}"
+ "${LZIPRECOVER}" --reference-file=in "${in_lz}" --lzip-name="${LZIP_NAME}" \
+ --debug-reproduce=13-7356 > /dev/null || test_failed $LINENO "${LZIP_NAME}"
"${LZIPRECOVER}" --debug-reproduce=512,5120,512 --lzip-name="${LZIP_NAME}" \
-q --reference-file=in "${in_lz}" || test_failed $LINENO "${LZIP_NAME}"
else
- printf "\nwarning: skipping --reproduce test: ${LZIP_NAME} not found or not the right version.\n"
- ${LZIP_NAME} -V
- printf "\nTry 'make LZIP_NAME=<name_of_lzip_executable> check'."
+ printf "warning: skipping --reproduce test: "
+ printf "${LZIP_NAME} not found, not the right compressor, not the right version, or this is not a POSIX system.\n"
+ if ${LZIP_NAME} -V > /dev/null 2>&1 ; then ${LZIP_NAME} -V | sed -e 1q
+ else printf "Try 'make LZIP_NAME=<name_of_lzip_executable> check'.\n" ; fi
fi
rm -f in4.lz || framework_failure
@@ -878,7 +985,7 @@ done
cat a/b/c/rec*in9.lz | cmp in9.lz - || test_failed $LINENO
rm -rf a || framework_failure
-cat in9.lz > in9t.lz || framework_failure
+cp in9.lz in9t.lz || framework_failure
printf "garbage" >> in9t.lz || framework_failure
"${LZIPRECOVER}" -s in9t.lz || test_failed $LINENO
for i in 01 02 03 04 05 06 07 08 09 ; do
@@ -1011,9 +1118,194 @@ printf "g" | cmp rec2ingin.lz - || test_failed $LINENO
cat rec*ingin.lz | cmp ingin.lz - || test_failed $LINENO
rm -f rec*ingin.lz || framework_failure
-printf "\ntesting --*=damaged..."
+printf "\ntesting --dump/remove/strip..."
+
+"${LZIPRECOVER}" --dump=1 "${in_lz}" -o a/b/c/out.lz || test_failed $LINENO
+cmp "${in_lz}" a/b/c/out.lz || test_failed $LINENO
+rm -rf a || framework_failure
+
+"${LZIPRECOVER}" -s "${num_lz}" -o num.lz || test_failed $LINENO
+[ -e rec9num.lz ] || test_failed $LINENO
+[ ! -e rec10num.lz ] || test_failed $LINENO
+cat rec*num.lz | cmp "${num_lz}" - || test_failed $LINENO
+for i in 1 2 3 4 5 6 7 8 9 ; do
+ "${LZIPRECOVER}" --dump=$i "${num_lz}" | cmp rec${i}num.lz - ||
+ test_failed $LINENO $i
+ "${LZIPRECOVER}" --strip=^$i "${num_lz}" | cmp rec${i}num.lz - ||
+ test_failed $LINENO $i
+ cp "${num_lz}" num.lz || framework_failure
+ "${LZIPRECOVER}" --remove=^$i num.lz || test_failed $LINENO $i
+ cmp rec${i}num.lz num.lz || test_failed $LINENO $i
+done
+"${LZIPRECOVER}" -q --dump=1 in "${num_lz}" > out
+[ $? = 2 ] || test_failed $LINENO
+cmp rec1num.lz out || test_failed $LINENO
+"${LZIPRECOVER}" -q --strip=^1 in "${num_lz}" > out
+[ $? = 2 ] || test_failed $LINENO
+cmp rec1num.lz out || test_failed $LINENO
+
+"${LZIPRECOVER}" --dump=r1 "${num_lz}" | cmp rec9num.lz - ||
+ test_failed $LINENO
+"${LZIPRECOVER}" --dump=d:r3 "${num_lz}" | cmp rec7num.lz - ||
+ test_failed $LINENO
+"${LZIPRECOVER}" --dump=r5:d "${num_lz}" | cmp rec5num.lz - ||
+ test_failed $LINENO
+"${LZIPRECOVER}" --dump=t:r9 "${num_lz}" | cmp rec1num.lz - ||
+ test_failed $LINENO
+"${LZIPRECOVER}" --strip=r^1:t "${num_lz}" | cmp rec9num.lz - ||
+ test_failed $LINENO
+"${LZIPRECOVER}" --strip=d:r^3:t "${num_lz}" | cmp rec7num.lz - ||
+ test_failed $LINENO
+"${LZIPRECOVER}" --strip=r^5:d:t "${num_lz}" | cmp rec5num.lz - ||
+ test_failed $LINENO
+"${LZIPRECOVER}" --strip=d:t:r^9 "${num_lz}" | cmp rec1num.lz - ||
+ test_failed $LINENO
+
+"${LZIPRECOVER}" --dump=1,5 "${num_lz}" > out || test_failed $LINENO
+cat rec1num.lz rec5num.lz | cmp out - || test_failed $LINENO
+"${LZIPRECOVER}" --dump=3,6 "${num_lz}" > out || test_failed $LINENO
+cat rec3num.lz rec6num.lz | cmp out - || test_failed $LINENO
+"${LZIPRECOVER}" --dump=2-4 "${num_lz}" > out || test_failed $LINENO
+cat rec2num.lz rec3num.lz rec4num.lz | cmp out - || test_failed $LINENO
+"${LZIPRECOVER}" --dump=4,6,8 "${num_lz}" > out || test_failed $LINENO
+cat rec4num.lz rec6num.lz rec8num.lz | cmp out - || test_failed $LINENO
+"${LZIPRECOVER}" --strip=^1,5 "${num_lz}" > out || test_failed $LINENO
+cat rec1num.lz rec5num.lz | cmp out - || test_failed $LINENO
+"${LZIPRECOVER}" --strip=^3,6 "${num_lz}" > out || test_failed $LINENO
+cat rec3num.lz rec6num.lz | cmp out - || test_failed $LINENO
+"${LZIPRECOVER}" --strip=^2-4 "${num_lz}" > out || test_failed $LINENO
+cat rec2num.lz rec3num.lz rec4num.lz | cmp out - || test_failed $LINENO
+"${LZIPRECOVER}" --strip=^4,6,8 "${num_lz}" > out || test_failed $LINENO
+cat rec4num.lz rec6num.lz rec8num.lz | cmp out - || test_failed $LINENO
+
+# create a subset tarlz archive
+"${LZIPRECOVER}" --dump=1-2:r1:t "${num_lz}" > out || test_failed $LINENO
+cat rec1num.lz rec2num.lz rec9num.lz | cmp out - || test_failed $LINENO
+"${LZIPRECOVER}" --dump=4-5:r1:t "${num_lz}" > out || test_failed $LINENO
+cat rec4num.lz rec5num.lz rec9num.lz | cmp out - || test_failed $LINENO
+"${LZIPRECOVER}" --dump=7-8:r1:t "${num_lz}" > out || test_failed $LINENO
+cat rec7num.lz rec8num.lz rec9num.lz | cmp out - || test_failed $LINENO
+
+"${LZIPRECOVER}" --dump=1-9 "${num_lz}" | cmp "${num_lz}" - ||
+ test_failed $LINENO
+"${LZIPRECOVER}" --dump=r1-9 "${num_lz}" | cmp "${num_lz}" - ||
+ test_failed $LINENO
+"${LZIPRECOVER}" --dump=1-1000 "${num_lz}" | cmp "${num_lz}" - ||
+ test_failed $LINENO
+"${LZIPRECOVER}" --dump=r1-1000 "${num_lz}" | cmp "${num_lz}" - ||
+ test_failed $LINENO
+"${LZIPRECOVER}" --dump=1-4:r1-4:5 "${num_lz}" | cmp "${num_lz}" - ||
+ test_failed $LINENO
+"${LZIPRECOVER}" --dump=^10 "${num_lz}" | cmp "${num_lz}" - ||
+ test_failed $LINENO
+"${LZIPRECOVER}" --strip=^1-9 "${num_lz}" | cmp "${num_lz}" - ||
+ test_failed $LINENO
+"${LZIPRECOVER}" --strip=r^1-9 "${num_lz}" | cmp "${num_lz}" - ||
+ test_failed $LINENO
+"${LZIPRECOVER}" --strip=^1-1000 "${num_lz}" | cmp "${num_lz}" - ||
+ test_failed $LINENO
+"${LZIPRECOVER}" --strip=r^1-1000 "${num_lz}" | cmp "${num_lz}" - ||
+ test_failed $LINENO
+"${LZIPRECOVER}" --strip=^1-4:r^1-4:^5 "${num_lz}" | cmp "${num_lz}" - ||
+ test_failed $LINENO
+"${LZIPRECOVER}" --strip=10 "${num_lz}" | cmp "${num_lz}" - ||
+ test_failed $LINENO
+
+"${LZIPRECOVER}" -i --dump=r1 "${nbt_lz}" | cmp rec9num.lz - ||
+ test_failed $LINENO
+"${LZIPRECOVER}" -i --dump=r3 "${nbt_lz}" | cmp rec7num.lz - ||
+ test_failed $LINENO
+"${LZIPRECOVER}" -i --dump=r7 "${nbt_lz}" | cmp rec4num.lz - ||
+ test_failed $LINENO
+"${LZIPRECOVER}" -i --strip=r^1:t "${nbt_lz}" | cmp rec9num.lz - ||
+ test_failed $LINENO
+"${LZIPRECOVER}" -i --strip=r^3:t "${nbt_lz}" | cmp rec7num.lz - ||
+ test_failed $LINENO
+"${LZIPRECOVER}" -i --strip=r^7:t "${nbt_lz}" | cmp rec4num.lz - ||
+ test_failed $LINENO
+
+"${LZIPRECOVER}" -i --dump=4 -f -o out "${nbt_lz}" || test_failed $LINENO
+printf "gap" | cmp out - || test_failed $LINENO
+"${LZIPRECOVER}" -i --dump=8 "${nbt_lz}" > out || test_failed $LINENO
+printf "damaged" | cmp out - || test_failed $LINENO
+"${LZIPRECOVER}" -i --dump=tdata "${nbt_lz}" > out || test_failed $LINENO
+printf "trailing data" | cmp out - || test_failed $LINENO
+"${LZIPRECOVER}" -i --dump=4:t "${nbt_lz}" > out || test_failed $LINENO
+printf "gaptrailing data" | cmp out - || test_failed $LINENO
+"${LZIPRECOVER}" -i --dump=4,8:t "${nbt_lz}" > out || test_failed $LINENO
+printf "gapdamagedtrailing data" | cmp out - || test_failed $LINENO
+"${LZIPRECOVER}" -i --dump=4,8 "${nbt_lz}" > out || test_failed $LINENO
+printf "gapdamaged" | cmp out - || test_failed $LINENO
+"${LZIPRECOVER}" -i --dump=damaged "${nbt_lz}" > out || test_failed $LINENO
+printf "gapdamaged" | cmp out - || test_failed $LINENO
+"${LZIPRECOVER}" -i --dump=d:t "${nbt_lz}" > out || test_failed $LINENO
+printf "gapdamagedtrailing data" | cmp out - || test_failed $LINENO
+"${LZIPRECOVER}" -i --strip=^4:t -f -o out "${nbt_lz}" || test_failed $LINENO
+printf "gap" | cmp out - || test_failed $LINENO
+"${LZIPRECOVER}" -i --strip=^8:t "${nbt_lz}" > out || test_failed $LINENO
+printf "damaged" | cmp out - || test_failed $LINENO
+"${LZIPRECOVER}" -i --strip=1-11 "${nbt_lz}" > out || test_failed $LINENO
+cmp empty out || test_failed $LINENO
+"${LZIPRECOVER}" -i --strip=^4 "${nbt_lz}" > out || test_failed $LINENO
+printf "gaptrailing data" | cmp out - || test_failed $LINENO
+"${LZIPRECOVER}" -i --strip=^4,8 "${nbt_lz}" > out || test_failed $LINENO
+printf "gapdamagedtrailing data" | cmp out - || test_failed $LINENO
+"${LZIPRECOVER}" -i --strip=^4,8:t "${nbt_lz}" > out || test_failed $LINENO
+printf "gapdamaged" | cmp out - || test_failed $LINENO
+"${LZIPRECOVER}" -i --strip=r^4,8:t "${nbt_lz}" > out || test_failed $LINENO
+printf "gapdamaged" | cmp out - || test_failed $LINENO
+"${LZIPRECOVER}" -i --strip=r^4,8 "${nbt_lz}" > out || test_failed $LINENO
+printf "gapdamagedtrailing data" | cmp out - || test_failed $LINENO
+
+cp "${num_lz}" num.lz || framework_failure
+"${LZIPRECOVER}" --remove=1-3,5,7,9 num.lz || test_failed $LINENO
+cat rec4num.lz rec6num.lz rec8num.lz | cmp num.lz - || test_failed $LINENO
+cp "${num_lz}" num.lz || framework_failure
+"${LZIPRECOVER}" --remove=^4,6,8 num.lz || test_failed $LINENO
+cat rec4num.lz rec6num.lz rec8num.lz | cmp num.lz - || test_failed $LINENO
+cp "${num_lz}" num.lz || framework_failure
+"${LZIPRECOVER}" --remove=r1,3,5,7-9 num.lz || test_failed $LINENO
+cat rec4num.lz rec6num.lz rec8num.lz | cmp num.lz - || test_failed $LINENO
+cp "${num_lz}" num.lz || framework_failure
+"${LZIPRECOVER}" --remove=r^2,4,6 num.lz || test_failed $LINENO
+cat rec4num.lz rec6num.lz rec8num.lz | cmp num.lz - || test_failed $LINENO
+rm -f num.lz || framework_failure
+
+cp "${nbt_lz}" nbt.lz || framework_failure
+"${LZIPRECOVER}" -i --remove=4,8:tdata nbt.lz || test_failed $LINENO
+cmp "${num_lz}" nbt.lz || test_failed $LINENO
+cp "${nbt_lz}" nbt.lz || framework_failure
+"${LZIPRECOVER}" -i --remove=r4,8:tdata nbt.lz || test_failed $LINENO
+cmp "${num_lz}" nbt.lz || test_failed $LINENO
+cp "${nbt_lz}" nbt.lz || framework_failure
+"${LZIPRECOVER}" --remove=damaged:tdata nbt.lz || test_failed $LINENO
+cmp "${num_lz}" nbt.lz || test_failed $LINENO
+rm -f rec*num.lz nbt.lz || framework_failure
-cat "${in_lz}" > in.lz || framework_failure
+for i in 1 2 3 4 5 6 7 8 9 10 ; do
+ "${LZIPRECOVER}" -i --strip=1-$i "${nbt_lz}" > out ||
+ test_failed $LINENO $i
+ cp "${nbt_lz}" nbt.lz || framework_failure
+ "${LZIPRECOVER}" -i --remove=1-$i nbt.lz || test_failed $LINENO $i
+ cmp nbt.lz out || test_failed $LINENO $i
+done
+rm -f nbt.lz || framework_failure
+
+cat "${fox_lz}" "${fox_lz}" "${fox_lz}" > fox3.lz || framework_failure
+cp f3_em.lz out.lz || framework_failure
+"${LZIPRECOVER}" --remove=empty out.lz || test_failed $LINENO
+cmp fox3.lz out.lz || test_failed $LINENO
+rm -f out.lz || framework_failure
+"${LZIPRECOVER}" --dump=2,4,7 f3_em.lz | cmp fox3.lz - || test_failed $LINENO
+"${LZIPRECOVER}" --strip=e f3_em.lz | cmp fox3.lz - || test_failed $LINENO
+"${LZIPRECOVER}" --strip=1,3,5-6,8 f3_em.lz | cmp fox3.lz - ||
+ test_failed $LINENO
+"${LZIPRECOVER}" --dump=empty f3_em.lz | "${LZIP}" -d | cmp empty - ||
+ test_failed $LINENO
+
+printf "\ntesting --dump/remove/strip=damaged..."
+
+cp "${in_lz}" in.lz || framework_failure
cat "${in_lz}" in > int.lz || framework_failure
"${LZIPRECOVER}" --dump=damaged in.lz > out || test_failed $LINENO
cmp empty out || test_failed $LINENO
@@ -1043,7 +1335,7 @@ cmp in9.lz out || test_failed $LINENO
cmp in9t.lz out || test_failed $LINENO
"${LZIPRECOVER}" --remove=damaged in9t.lz || test_failed $LINENO
cat in9.lz in | cmp in9t.lz - || test_failed $LINENO
-cat in9.lz > in9t.lz || framework_failure
+cp in9.lz in9t.lz || framework_failure
"${LZIPRECOVER}" --remove=damaged in9t.lz || test_failed $LINENO
cmp in9.lz in9t.lz || test_failed $LINENO
rm -f in9t.lz || framework_failure
@@ -1068,11 +1360,11 @@ cmp "${f6b1_lz}" out || test_failed $LINENO
cat "${f6b1_lz}" in > f6bt.lz || framework_failure
"${LZIPRECOVER}" --dump=damaged f6bt.lz > out || test_failed $LINENO
cmp "${f6b1_lz}" out || test_failed $LINENO
-"${LZIPRECOVER}" -q --strip=damaged "${f6b1_lz}" > out || test_failed $LINENO
+"${LZIPRECOVER}" --strip=damaged "${f6b1_lz}" > out || test_failed $LINENO
cmp empty out || test_failed $LINENO
-"${LZIPRECOVER}" -q --strip=damaged f6bt.lz > out || test_failed $LINENO
+"${LZIPRECOVER}" --strip=damaged f6bt.lz > out || test_failed $LINENO
cmp empty out || test_failed $LINENO
-cat "${f6b1_lz}" > f6b.lz || framework_failure
+cp "${f6b1_lz}" f6b.lz || framework_failure
"${LZIPRECOVER}" -q --remove=damaged f6b.lz
[ $? = 2 ] || test_failed $LINENO
cmp "${f6b1_lz}" f6b.lz || test_failed $LINENO
@@ -1094,7 +1386,7 @@ cat "${fox_lz}" "${fox_lz}" "${fox_lz}" "${fox_lz}" "${fox_lz}" > fox5.lz ||
cmp fox5.lz out || test_failed $LINENO
"${LZIPRECOVER}" --strip=damaged f6bt.lz > out || test_failed $LINENO
cat fox5.lz in | cmp out - || test_failed $LINENO
-cat "${f6b2_lz}" > f6b.lz || framework_failure
+cp "${f6b2_lz}" f6b.lz || framework_failure
"${LZIPRECOVER}" --remove=damaged f6b.lz || test_failed $LINENO
cmp fox5.lz f6b.lz || test_failed $LINENO
"${LZIPRECOVER}" --remove=damaged f6bt.lz || test_failed $LINENO
@@ -1108,12 +1400,11 @@ cat "${f6b3_lz}" in > f6bt.lz || framework_failure
"${LZIPRECOVER}" --dump=damaged f6bt.lz > out || test_failed $LINENO
cat "${fox_lz}" "${fox_lz}" out "${fox_lz}" | cmp "${f6b3_lz}" - ||
test_failed $LINENO
-cat "${fox_lz}" "${fox_lz}" "${fox_lz}" > fox3.lz || framework_failure
"${LZIPRECOVER}" --strip=damaged "${f6b3_lz}" > out || test_failed $LINENO
cmp fox3.lz out || test_failed $LINENO
"${LZIPRECOVER}" --strip=damaged f6bt.lz > out || test_failed $LINENO
cat fox3.lz in | cmp out - || test_failed $LINENO
-cat "${f6b3_lz}" > f6b.lz || framework_failure
+cp "${f6b3_lz}" f6b.lz || framework_failure
"${LZIPRECOVER}" --remove=damaged f6b.lz || test_failed $LINENO
cmp fox3.lz f6b.lz || test_failed $LINENO
"${LZIPRECOVER}" --remove=damaged f6bt.lz || test_failed $LINENO
@@ -1134,7 +1425,7 @@ for i in "${f6b4_lz}" "${f6b5_lz}" ; do
"${LZIPRECOVER}" --strip=damaged f6bt.lz > out ||
test_failed $LINENO "$i"
cmp fox4.lz out || test_failed $LINENO "$i"
- cat "$i" > f6b.lz || framework_failure
+ cp "$i" f6b.lz || framework_failure
"${LZIPRECOVER}" --remove=damaged f6b.lz || test_failed $LINENO "$i"
cmp fox4.lz f6b.lz || test_failed $LINENO "$i"
"${LZIPRECOVER}" --remove=damaged f6bt.lz || test_failed $LINENO "$i"
@@ -1151,7 +1442,7 @@ cat fox5.lz out | cmp "${f6b6_lz}" - || test_failed $LINENO
cmp fox5.lz out || test_failed $LINENO
"${LZIPRECOVER}" --strip=damaged f6bt.lz > out || test_failed $LINENO
cat fox5.lz in | cmp out - || test_failed $LINENO
-cat "${f6b6_lz}" > f6b.lz || framework_failure
+cp "${f6b6_lz}" f6b.lz || framework_failure
"${LZIPRECOVER}" --remove=damaged f6b.lz || test_failed $LINENO
cmp fox5.lz f6b.lz || test_failed $LINENO
"${LZIPRECOVER}" --remove=damaged f6bt.lz || test_failed $LINENO
@@ -1165,13 +1456,12 @@ for i in "${f6s1_lz}" "${f6s2_lz}" ; do
"${LZIPRECOVER}" --dump=damaged f6bt.lz > out ||
test_failed $LINENO "$i"
cmp "$i" out || test_failed $LINENO "$i"
- "${LZIPRECOVER}" -q --strip=damaged "$i" > out ||
- test_failed $LINENO "$i"
+ "${LZIPRECOVER}" --strip=damaged "$i" > out || test_failed $LINENO "$i"
cmp empty out || test_failed $LINENO "$i"
- "${LZIPRECOVER}" -q --strip=damaged f6bt.lz > out ||
+ "${LZIPRECOVER}" --strip=damaged f6bt.lz > out ||
test_failed $LINENO "$i"
cmp empty out || test_failed $LINENO "$i"
- cat "$i" > f6b.lz || framework_failure
+ cp "$i" f6b.lz || framework_failure
"${LZIPRECOVER}" -q --remove=damaged f6b.lz
[ $? = 2 ] || test_failed $LINENO "$i"
cmp "$i" f6b.lz || test_failed $LINENO "$i"
@@ -1193,13 +1483,13 @@ for i in "${f6s3_lz}" "${f6s4_lz}" "${f6s5_lz}" "${f6s6_lz}" ; do
"${LZIPRECOVER}" --strip=damaged f6bt.lz > out ||
test_failed $LINENO "$i"
cat "$i" in | cmp out - || test_failed $LINENO "$i"
- cat "$i" > f6b.lz || framework_failure
+ cp "$i" f6b.lz || framework_failure
"${LZIPRECOVER}" --remove=damaged f6b.lz || test_failed $LINENO "$i"
cmp "$i" f6b.lz || test_failed $LINENO "$i"
"${LZIPRECOVER}" --remove=damaged f6bt.lz || test_failed $LINENO "$i"
cat "$i" in | cmp f6bt.lz - || test_failed $LINENO "$i"
done
-rm -f f6b.lz f6bt.lz || framework_failure
+rm -f f6b.lz f6bt.lz empty || framework_failure
cat ingin.lz "${inD}" > ingint.lz || framework_failure
"${LZIPRECOVER}" --dump=damaged ingin.lz > out || test_failed $LINENO
@@ -1210,7 +1500,7 @@ printf "g" | cmp out - || test_failed $LINENO
cmp in2.lz out || test_failed $LINENO
"${LZIPRECOVER}" --strip=damaged ingint.lz > out || test_failed $LINENO
cat "${in_lz}" "${in_lz}" "${inD}" | cmp out - || test_failed $LINENO
-cat ingin.lz > ingin2.lz || framework_failure
+cp ingin.lz ingin2.lz || framework_failure
"${LZIPRECOVER}" --remove=damaged ingin2.lz || test_failed $LINENO
cmp in2.lz ingin2.lz || test_failed $LINENO
"${LZIPRECOVER}" --remove=damaged ingint.lz || test_failed $LINENO
@@ -1231,47 +1521,48 @@ cat "${bad2_lz}" out "${bad2_lz}" out | cmp out4 - || test_failed $LINENO
"${bad2_lz}" > out4 || test_failed $LINENO
cat out "${bad2_lz}" out "${bad2_lz}" | cmp out4 - || test_failed $LINENO
#
-"${LZIPRECOVER}" -q --strip=damaged "${bad2_lz}" "${f6b2_lz}" > out ||
+"${LZIPRECOVER}" --strip=damaged "${bad2_lz}" "${f6b2_lz}" > out ||
test_failed $LINENO
cmp fox5.lz out || test_failed $LINENO
-"${LZIPRECOVER}" -q --strip=damaged bad2t.lz "${f6b2_lz}" > out ||
+"${LZIPRECOVER}" --strip=damaged bad2t.lz "${f6b2_lz}" > out ||
test_failed $LINENO
cmp fox5.lz out || test_failed $LINENO
-"${LZIPRECOVER}" -q --strip=damaged "${f6b2_lz}" bad2t.lz f6bt.lz > out ||
+"${LZIPRECOVER}" --strip=damaged "${f6b2_lz}" bad2t.lz f6bt.lz > out ||
test_failed $LINENO
cat fox5.lz fox5.lz in | cmp out - || test_failed $LINENO
-"${LZIPRECOVER}" -q --strip=damaged "${f6b2_lz}" f6bt.lz bad2t.lz > out ||
+"${LZIPRECOVER}" --strip=damaged "${f6b2_lz}" f6bt.lz bad2t.lz > out ||
test_failed $LINENO
cat fox5.lz fox5.lz | cmp out - || test_failed $LINENO
-"${LZIPRECOVER}" -q --strip=damaged f6bt.lz bad2t.lz > out ||
+"${LZIPRECOVER}" --strip=damaged f6bt.lz bad2t.lz > out ||
test_failed $LINENO
cmp fox5.lz out || test_failed $LINENO
-"${LZIPRECOVER}" -q --strip=damaged f6bt.lz "${in_lz}" > out ||
+"${LZIPRECOVER}" --strip=damaged f6bt.lz "${in_lz}" > out ||
test_failed $LINENO
cat fox5.lz "${in_lz}" | cmp out - || test_failed $LINENO
"${LZIPRECOVER}" --strip=damaged --strip=tdata f6bt.lz "${in_lz}" > out ||
test_failed $LINENO
cat fox5.lz "${in_lz}" | cmp out - || test_failed $LINENO
#
-cat "${f6b2_lz}" > f6b.lz || framework_failure
+cp "${f6b2_lz}" f6b.lz || framework_failure
"${LZIPRECOVER}" -q --remove=damaged f6b.lz bad2t.lz f6bt.lz
[ $? = 2 ] || test_failed $LINENO
cat "${bad2_lz}" in | cmp bad2t.lz - || test_failed $LINENO
cmp fox5.lz f6b.lz || test_failed $LINENO
cat fox5.lz in | cmp f6bt.lz - || test_failed $LINENO
cat "${bad2_lz}" in > bad2t.lz || framework_failure
-cat "${fox6_lz}" "${inD}" > fox6t.lz || framework_failure
+cat fox6.lz "${inD}" > fox6t.lz || framework_failure
cat "${f6b1_lz}" in > f6abt.lz || framework_failure
-cat "${f6b2_lz}" > f6b.lz || framework_failure
+cp "${f6b2_lz}" f6b.lz || framework_failure
cat "${f6b2_lz}" in > f6bt.lz || framework_failure
"${LZIPRECOVER}" -q --remove=d:t fox6t.lz f6abt.lz f6b.lz bad2t.lz f6bt.lz
[ $? = 2 ] || test_failed $LINENO
cat "${bad2_lz}" in | cmp bad2t.lz - || test_failed $LINENO
cat "${f6b1_lz}" in | cmp f6abt.lz - || test_failed $LINENO
-cmp "${fox6_lz}" fox6t.lz || test_failed $LINENO
+cmp fox6.lz fox6t.lz || test_failed $LINENO
cmp fox5.lz f6b.lz || test_failed $LINENO
cmp fox5.lz f6bt.lz || test_failed $LINENO
-rm -f fox6t.lz f6b.lz f6bt.lz bad2t.lz fox5.lz out2 out4 || framework_failure
+rm -f fox6t.lz f6b.lz f6bt.lz f6abt.lz bad2t.lz fox5.lz out2 out4 ||
+ framework_failure
printf "\ntesting trailing data..."
@@ -1281,7 +1572,7 @@ cmp "${inD}" out || test_failed $LINENO
rm -f out || framework_failure
"${LZIPRECOVER}" --dump=tdat int.lz -o out || test_failed $LINENO
cmp "${inD}" out || test_failed $LINENO
-cat "${fox6_lz}" "${inD}" > fox6t.lz || framework_failure
+cat fox6.lz "${inD}" > fox6t.lz || framework_failure
cat "${inD}" "${inD}" > inD2 || framework_failure
"${LZIPRECOVER}" --dump=tda int.lz fox6t.lz -f -o out || test_failed $LINENO
cmp inD2 out || test_failed $LINENO
@@ -1298,12 +1589,12 @@ rm -f out || framework_failure
"${LZIPRECOVER}" --strip=tdata int.lz -o out || test_failed $LINENO
cmp "${in_lz}" out || test_failed $LINENO
"${LZIPRECOVER}" --strip=tdata fox6t.lz -f -o out || test_failed $LINENO
-cmp "${fox6_lz}" out || test_failed $LINENO
+cmp fox6.lz out || test_failed $LINENO
"${LZIPRECOVER}" --strip=tdata int.lz int.lz -f -o out || test_failed $LINENO
cmp in2.lz out || test_failed $LINENO
rm -f in2.lz || framework_failure
"${LZIPRECOVER}" --strip=tdata int.lz fox6t.lz > out || test_failed $LINENO
-cat "${in_lz}" "${fox6_lz}" | cmp out - || test_failed $LINENO
+cat "${in_lz}" fox6.lz | cmp out - || test_failed $LINENO
"${LZIPRECOVER}" -q --strip=tdata ingint.lz > out # /dev/null returns 1 on OS/2
[ $? = 2 ] || test_failed $LINENO
"${LZIPRECOVER}" -i --strip=tdata ingint.lz > out || test_failed $LINENO
@@ -1311,11 +1602,11 @@ cmp ingin.lz out || test_failed $LINENO
"${LZIPRECOVER}" --remove=tdata int.lz fox6t.lz || test_failed $LINENO
cmp "${in_lz}" int.lz || test_failed $LINENO
-cmp "${fox6_lz}" fox6t.lz || test_failed $LINENO
+cmp fox6.lz fox6t.lz || test_failed $LINENO
"${LZIPRECOVER}" --remove=tdata int.lz || test_failed $LINENO
cmp "${in_lz}" int.lz || test_failed $LINENO
"${LZIPRECOVER}" --remove=tdata fox6t.lz || test_failed $LINENO
-cmp "${fox6_lz}" fox6t.lz || test_failed $LINENO
+cmp fox6.lz fox6t.lz || test_failed $LINENO
"${LZIPRECOVER}" -q --remove=tdata ingint.lz
[ $? = 2 ] || test_failed $LINENO
cmp -s ingin.lz ingint.lz && test_failed $LINENO
@@ -1326,208 +1617,18 @@ rm -f int.lz fox6t.lz ingint.lz ingin.lz || framework_failure
for i in "${f6s3_lz}" "${f6s4_lz}" "${f6s5_lz}" "${f6s6_lz}" ; do
"${LZIPRECOVER}" --strip=tdata "$i" > out || test_failed $LINENO "$i"
"${LZIPRECOVER}" --dump=tdata "$i" > tdata || test_failed $LINENO "$i"
- cmp "${fox6_lz}" out || test_failed $LINENO "$i"
+ cmp fox6.lz out || test_failed $LINENO "$i"
cat out tdata | cmp "$i" - || test_failed $LINENO "$i"
cat "$i" "${inD}" > f6t.lz || framework_failure
"${LZIPRECOVER}" --strip=tdata f6t.lz > out || test_failed $LINENO "$i"
"${LZIPRECOVER}" --dump=tdata f6t.lz > tdata || test_failed $LINENO "$i"
- cmp "${fox6_lz}" out || test_failed $LINENO "$i"
+ cmp fox6.lz out || test_failed $LINENO "$i"
cat out tdata | cmp f6t.lz - || test_failed $LINENO "$i"
"${LZIPRECOVER}" --remove=tdata f6t.lz || test_failed $LINENO "$i"
- cmp "${fox6_lz}" f6t.lz || test_failed $LINENO "$i"
+ cmp fox6.lz f6t.lz || test_failed $LINENO "$i"
rm -f out tdata f6t.lz || framework_failure
done
-printf "\ntesting --dump/remove/strip..."
-
-"${LZIPRECOVER}" --dump=1 "${in_lz}" -o a/b/c/out.lz || test_failed $LINENO
-cmp "${in_lz}" a/b/c/out.lz || test_failed $LINENO
-rm -rf a || framework_failure
-
-"${LZIPRECOVER}" -s "${num_lz}" -o num.lz || test_failed $LINENO
-[ -e rec9num.lz ] || test_failed $LINENO
-[ ! -e rec10num.lz ] || test_failed $LINENO
-cat rec*num.lz | cmp "${num_lz}" - || test_failed $LINENO
-for i in 1 2 3 4 5 6 7 8 9 ; do
- "${LZIPRECOVER}" --dump=$i "${num_lz}" | cmp rec${i}num.lz - ||
- test_failed $LINENO $i
- "${LZIPRECOVER}" --strip=^$i "${num_lz}" | cmp rec${i}num.lz - ||
- test_failed $LINENO $i
- cat "${num_lz}" > num.lz || framework_failure
- "${LZIPRECOVER}" --remove=^$i num.lz || test_failed $LINENO $i
- cmp rec${i}num.lz num.lz || test_failed $LINENO $i
-done
-"${LZIPRECOVER}" -q --dump=1 in "${num_lz}" > out
-[ $? = 2 ] || test_failed $LINENO
-cmp rec1num.lz out || test_failed $LINENO
-"${LZIPRECOVER}" -q --strip=^1 in "${num_lz}" > out
-[ $? = 2 ] || test_failed $LINENO
-cmp rec1num.lz out || test_failed $LINENO
-
-"${LZIPRECOVER}" --dump=r1 "${num_lz}" | cmp rec9num.lz - ||
- test_failed $LINENO
-"${LZIPRECOVER}" --dump=d:r3 "${num_lz}" | cmp rec7num.lz - ||
- test_failed $LINENO
-"${LZIPRECOVER}" --dump=r5:d "${num_lz}" | cmp rec5num.lz - ||
- test_failed $LINENO
-"${LZIPRECOVER}" --dump=t:r9 "${num_lz}" | cmp rec1num.lz - ||
- test_failed $LINENO
-"${LZIPRECOVER}" --strip=r^1:t "${num_lz}" | cmp rec9num.lz - ||
- test_failed $LINENO
-"${LZIPRECOVER}" --strip=d:r^3:t "${num_lz}" | cmp rec7num.lz - ||
- test_failed $LINENO
-"${LZIPRECOVER}" --strip=r^5:d:t "${num_lz}" | cmp rec5num.lz - ||
- test_failed $LINENO
-"${LZIPRECOVER}" --strip=d:t:r^9 "${num_lz}" | cmp rec1num.lz - ||
- test_failed $LINENO
-
-"${LZIPRECOVER}" --dump=1,5 "${num_lz}" > out || test_failed $LINENO
-cat rec1num.lz rec5num.lz | cmp out - || test_failed $LINENO
-"${LZIPRECOVER}" --dump=3,6 "${num_lz}" > out || test_failed $LINENO
-cat rec3num.lz rec6num.lz | cmp out - || test_failed $LINENO
-"${LZIPRECOVER}" --dump=2-4 "${num_lz}" > out || test_failed $LINENO
-cat rec2num.lz rec3num.lz rec4num.lz | cmp out - || test_failed $LINENO
-"${LZIPRECOVER}" --dump=4,6,8 "${num_lz}" > out || test_failed $LINENO
-cat rec4num.lz rec6num.lz rec8num.lz | cmp out - || test_failed $LINENO
-"${LZIPRECOVER}" --strip=^1,5 "${num_lz}" > out || test_failed $LINENO
-cat rec1num.lz rec5num.lz | cmp out - || test_failed $LINENO
-"${LZIPRECOVER}" --strip=^3,6 "${num_lz}" > out || test_failed $LINENO
-cat rec3num.lz rec6num.lz | cmp out - || test_failed $LINENO
-"${LZIPRECOVER}" --strip=^2-4 "${num_lz}" > out || test_failed $LINENO
-cat rec2num.lz rec3num.lz rec4num.lz | cmp out - || test_failed $LINENO
-"${LZIPRECOVER}" --strip=^4,6,8 "${num_lz}" > out || test_failed $LINENO
-cat rec4num.lz rec6num.lz rec8num.lz | cmp out - || test_failed $LINENO
-
-# create a subset tarlz archive
-"${LZIPRECOVER}" --dump=1-2:r1:t "${num_lz}" > out || test_failed $LINENO
-cat rec1num.lz rec2num.lz rec9num.lz | cmp out - || test_failed $LINENO
-"${LZIPRECOVER}" --dump=4-5:r1:t "${num_lz}" > out || test_failed $LINENO
-cat rec4num.lz rec5num.lz rec9num.lz | cmp out - || test_failed $LINENO
-"${LZIPRECOVER}" --dump=7-8:r1:t "${num_lz}" > out || test_failed $LINENO
-cat rec7num.lz rec8num.lz rec9num.lz | cmp out - || test_failed $LINENO
-
-"${LZIPRECOVER}" --dump=1-9 "${num_lz}" | cmp "${num_lz}" - ||
- test_failed $LINENO
-"${LZIPRECOVER}" --dump=r1-9 "${num_lz}" | cmp "${num_lz}" - ||
- test_failed $LINENO
-"${LZIPRECOVER}" --dump=1-1000 "${num_lz}" | cmp "${num_lz}" - ||
- test_failed $LINENO
-"${LZIPRECOVER}" --dump=r1-1000 "${num_lz}" | cmp "${num_lz}" - ||
- test_failed $LINENO
-"${LZIPRECOVER}" --dump=1-4:r1-4:5 "${num_lz}" | cmp "${num_lz}" - ||
- test_failed $LINENO
-"${LZIPRECOVER}" --dump=^10 "${num_lz}" | cmp "${num_lz}" - ||
- test_failed $LINENO
-"${LZIPRECOVER}" --strip=^1-9 "${num_lz}" | cmp "${num_lz}" - ||
- test_failed $LINENO
-"${LZIPRECOVER}" --strip=r^1-9 "${num_lz}" | cmp "${num_lz}" - ||
- test_failed $LINENO
-"${LZIPRECOVER}" --strip=^1-1000 "${num_lz}" | cmp "${num_lz}" - ||
- test_failed $LINENO
-"${LZIPRECOVER}" --strip=r^1-1000 "${num_lz}" | cmp "${num_lz}" - ||
- test_failed $LINENO
-"${LZIPRECOVER}" --strip=^1-4:r^1-4:^5 "${num_lz}" | cmp "${num_lz}" - ||
- test_failed $LINENO
-"${LZIPRECOVER}" --strip=10 "${num_lz}" | cmp "${num_lz}" - ||
- test_failed $LINENO
-
-"${LZIPRECOVER}" -i --dump=r1 "${nbt_lz}" | cmp rec9num.lz - ||
- test_failed $LINENO
-"${LZIPRECOVER}" -i --dump=r3 "${nbt_lz}" | cmp rec7num.lz - ||
- test_failed $LINENO
-"${LZIPRECOVER}" -i --dump=r7 "${nbt_lz}" | cmp rec4num.lz - ||
- test_failed $LINENO
-"${LZIPRECOVER}" -i --strip=r^1:t "${nbt_lz}" | cmp rec9num.lz - ||
- test_failed $LINENO
-"${LZIPRECOVER}" -i --strip=r^3:t "${nbt_lz}" | cmp rec7num.lz - ||
- test_failed $LINENO
-"${LZIPRECOVER}" -i --strip=r^7:t "${nbt_lz}" | cmp rec4num.lz - ||
- test_failed $LINENO
-
-"${LZIPRECOVER}" -i --dump=4 -f -o out "${nbt_lz}" || test_failed $LINENO
-printf "gap" | cmp out - || test_failed $LINENO
-"${LZIPRECOVER}" -i --dump=8 "${nbt_lz}" > out || test_failed $LINENO
-printf "damaged" | cmp out - || test_failed $LINENO
-"${LZIPRECOVER}" -i --dump=tdata "${nbt_lz}" > out || test_failed $LINENO
-printf "trailing data" | cmp out - || test_failed $LINENO
-"${LZIPRECOVER}" -i --dump=4:t "${nbt_lz}" > out || test_failed $LINENO
-printf "gaptrailing data" | cmp out - || test_failed $LINENO
-"${LZIPRECOVER}" -i --dump=4,8:t "${nbt_lz}" > out || test_failed $LINENO
-printf "gapdamagedtrailing data" | cmp out - || test_failed $LINENO
-"${LZIPRECOVER}" -i --dump=4,8 "${nbt_lz}" > out || test_failed $LINENO
-printf "gapdamaged" | cmp out - || test_failed $LINENO
-"${LZIPRECOVER}" -i --dump=damaged "${nbt_lz}" > out || test_failed $LINENO
-printf "gapdamaged" | cmp out - || test_failed $LINENO
-"${LZIPRECOVER}" -i --dump=d:t "${nbt_lz}" > out || test_failed $LINENO
-printf "gapdamagedtrailing data" | cmp out - || test_failed $LINENO
-"${LZIPRECOVER}" -i --strip=^4:t -f -o out "${nbt_lz}" || test_failed $LINENO
-printf "gap" | cmp out - || test_failed $LINENO
-"${LZIPRECOVER}" -i --strip=^8:t "${nbt_lz}" > out || test_failed $LINENO
-printf "damaged" | cmp out - || test_failed $LINENO
-"${LZIPRECOVER}" -i --strip=1-11 "${nbt_lz}" > out || test_failed $LINENO
-cmp empty out || test_failed $LINENO
-"${LZIPRECOVER}" -i --strip=^4 "${nbt_lz}" > out || test_failed $LINENO
-printf "gaptrailing data" | cmp out - || test_failed $LINENO
-"${LZIPRECOVER}" -i --strip=^4,8 "${nbt_lz}" > out || test_failed $LINENO
-printf "gapdamagedtrailing data" | cmp out - || test_failed $LINENO
-"${LZIPRECOVER}" -i --strip=^4,8:t "${nbt_lz}" > out || test_failed $LINENO
-printf "gapdamaged" | cmp out - || test_failed $LINENO
-"${LZIPRECOVER}" -i --strip=r^4,8:t "${nbt_lz}" > out || test_failed $LINENO
-printf "gapdamaged" | cmp out - || test_failed $LINENO
-"${LZIPRECOVER}" -i --strip=r^4,8 "${nbt_lz}" > out || test_failed $LINENO
-printf "gapdamagedtrailing data" | cmp out - || test_failed $LINENO
-
-cat "${num_lz}" > num.lz || framework_failure
-"${LZIPRECOVER}" --remove=1-3,5,7,9 num.lz || test_failed $LINENO
-cat rec4num.lz rec6num.lz rec8num.lz | cmp num.lz - || test_failed $LINENO
-cat "${num_lz}" > num.lz || framework_failure
-"${LZIPRECOVER}" --remove=^4,6,8 num.lz || test_failed $LINENO
-cat rec4num.lz rec6num.lz rec8num.lz | cmp num.lz - || test_failed $LINENO
-cat "${num_lz}" > num.lz || framework_failure
-"${LZIPRECOVER}" --remove=r1,3,5,7-9 num.lz || test_failed $LINENO
-cat rec4num.lz rec6num.lz rec8num.lz | cmp num.lz - || test_failed $LINENO
-cat "${num_lz}" > num.lz || framework_failure
-"${LZIPRECOVER}" --remove=r^2,4,6 num.lz || test_failed $LINENO
-cat rec4num.lz rec6num.lz rec8num.lz | cmp num.lz - || test_failed $LINENO
-
-cat "${nbt_lz}" > nbt.lz || framework_failure
-"${LZIPRECOVER}" -i --remove=4,8:tdata nbt.lz || test_failed $LINENO
-cmp "${num_lz}" nbt.lz || test_failed $LINENO
-cat "${nbt_lz}" > nbt.lz || framework_failure
-"${LZIPRECOVER}" -i --remove=r4,8:tdata nbt.lz || test_failed $LINENO
-cmp "${num_lz}" nbt.lz || test_failed $LINENO
-cat "${nbt_lz}" > nbt.lz || framework_failure
-"${LZIPRECOVER}" --remove=damaged:tdata nbt.lz || test_failed $LINENO
-cmp "${num_lz}" nbt.lz || test_failed $LINENO
-rm -f rec*num.lz nbt.lz || framework_failure
-
-for i in 1 2 3 4 5 6 7 8 9 10 ; do
- "${LZIPRECOVER}" -i --strip=1-$i "${nbt_lz}" > out ||
- test_failed $LINENO $i
- cat "${nbt_lz}" > nbt.lz || framework_failure
- "${LZIPRECOVER}" -i --remove=1-$i nbt.lz || test_failed $LINENO $i
- cmp nbt.lz out || test_failed $LINENO $i
-done
-rm -f nbt.lz || framework_failure
-
-cat "${in_em}" > test_3m.txt.lz || framework_failure
-"${LZIPRECOVER}" --remove=empty test_3m.txt.lz || test_failed $LINENO
-"${LZIPRECOVER}" -M test_3m.txt.lz | cmp "${testdir}"/test_3m.txt.lz.md5 - ||
- test_failed $LINENO
-"${LZIPRECOVER}" --dump=2,4,7 "${in_em}" | cmp test_3m.txt.lz - ||
- test_failed $LINENO
-"${LZIPRECOVER}" --strip=e "${in_em}" | cmp test_3m.txt.lz - ||
- test_failed $LINENO
-"${LZIPRECOVER}" --empty-error --strip=e "${in_em}" | cmp test_3m.txt.lz - ||
- test_failed $LINENO
-"${LZIPRECOVER}" --strip=1,3,5-6,8 "${in_em}" | cmp test_3m.txt.lz - ||
- test_failed $LINENO
-"${LZIPRECOVER}" -q --strip=1,3,5-6,8 --empty-error "${in_em}" > out
-[ $? = 2 ] || test_failed $LINENO
-"${LZIPRECOVER}" --dump=emp "${in_em}" | "${LZIP}" -d | cmp empty - ||
- test_failed $LINENO
-rm -f test_3m.txt.lz empty out || framework_failure
-
echo
if [ ${fail} = 0 ] ; then
echo "tests completed successfully."
diff --git a/testsuite/fox6.lz b/testsuite/fox6.lz
deleted file mode 100644
index 8401b99..0000000
--- a/testsuite/fox6.lz
+++ /dev/null
Binary files differ
diff --git a/testsuite/fox6_b1nz.lz b/testsuite/fox6_b1nz.lz
new file mode 100644
index 0000000..d841502
--- /dev/null
+++ b/testsuite/fox6_b1nz.lz
Binary files differ
diff --git a/testsuite/fox6_mark.lz b/testsuite/fox6_mark.lz
deleted file mode 100644
index 32b2ac0..0000000
--- a/testsuite/fox6_mark.lz
+++ /dev/null
Binary files differ
diff --git a/testsuite/fox_nz.lz b/testsuite/fox_nz.lz
new file mode 100644
index 0000000..44a4b58
--- /dev/null
+++ b/testsuite/fox_nz.lz
Binary files differ
diff --git a/testsuite/test.txt b/testsuite/test.txt
index 9196a3a..423f0c0 100644
--- a/testsuite/test.txt
+++ b/testsuite/test.txt
@@ -1,8 +1,7 @@
GNU GENERAL PUBLIC LICENSE
Version 2, June 1991
- Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
- 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc. <http://fsf.org/>
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
@@ -339,8 +338,7 @@ Public License instead of this License.
GNU GENERAL PUBLIC LICENSE
Version 2, June 1991
- Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
- 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc. <http://fsf.org/>
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
diff --git a/testsuite/test.txt.lz b/testsuite/test.txt.lz
index 22cea6e..5dc169f 100644
--- a/testsuite/test.txt.lz
+++ b/testsuite/test.txt.lz
Binary files differ
diff --git a/testsuite/test.txt.lz.fec b/testsuite/test.txt.lz.fec
new file mode 100644
index 0000000..265ec6b
--- /dev/null
+++ b/testsuite/test.txt.lz.fec
Binary files differ
diff --git a/testsuite/test.txt.lz.fec16 b/testsuite/test.txt.lz.fec16
new file mode 100644
index 0000000..534d233
--- /dev/null
+++ b/testsuite/test.txt.lz.fec16
Binary files differ
diff --git a/testsuite/test.txt.lzma b/testsuite/test.txt.lzma
index 53e54ea..091c023 100644
--- a/testsuite/test.txt.lzma
+++ b/testsuite/test.txt.lzma
Binary files differ
diff --git a/testsuite/test21723.txt b/testsuite/test21636.txt
index 7194547..7194547 100644
--- a/testsuite/test21723.txt
+++ b/testsuite/test21636.txt
diff --git a/testsuite/test_3m.txt.lz.md5 b/testsuite/test_3m.txt.lz.md5
deleted file mode 100644
index 5bec6bc..0000000
--- a/testsuite/test_3m.txt.lz.md5
+++ /dev/null
@@ -1 +0,0 @@
-6a6bb58464ec8567eab17015064d0c5b test_3m.txt.lz
diff --git a/testsuite/test_bad1.lz b/testsuite/test_bad1.lz
index 2129c90..5300a0c 100644
--- a/testsuite/test_bad1.lz
+++ b/testsuite/test_bad1.lz
Binary files differ
diff --git a/testsuite/test_bad2.lz b/testsuite/test_bad2.lz
index e013c34..8691377 100644
--- a/testsuite/test_bad2.lz
+++ b/testsuite/test_bad2.lz
Binary files differ
diff --git a/testsuite/test_bad3.lz b/testsuite/test_bad3.lz
index 0ae9e7d..5f82877 100644
--- a/testsuite/test_bad3.lz
+++ b/testsuite/test_bad3.lz
Binary files differ
diff --git a/testsuite/test_bad4.lz b/testsuite/test_bad4.lz
index ddb0d6b..c816609 100644
--- a/testsuite/test_bad4.lz
+++ b/testsuite/test_bad4.lz
Binary files differ
diff --git a/testsuite/test_bad5.lz b/testsuite/test_bad5.lz
index 6fab91c..7b002b8 100644
--- a/testsuite/test_bad5.lz
+++ b/testsuite/test_bad5.lz
Binary files differ
diff --git a/testsuite/test_bad6.lz b/testsuite/test_bad6.lz
index cfea88c..554b3f8 100644
--- a/testsuite/test_bad6.lz
+++ b/testsuite/test_bad6.lz
Binary files differ
diff --git a/testsuite/test_bad6.txt b/testsuite/test_bad6.txt
index b47462e..cd4dc0a 100644
--- a/testsuite/test_bad6.txt
+++ b/testsuite/test_bad6.txt
@@ -1,6 +1,3 @@
-) You can apply it to
-your programs, too.
-
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
@@ -23,4 +20,10 @@ rights.
(2) offer you this license which gives you legal permission to copy,
distribute and/or modify the software.
- Also, for each author's protection and ours, we want to \ No newline at end of file
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
diff --git a/testsuite/test_bad7.lz b/testsuite/test_bad7.lz
index 77f2b85..4275cfc 100644
--- a/testsuite/test_bad7.lz
+++ b/testsuite/test_bad7.lz
Binary files differ
diff --git a/testsuite/test_bad7.txt b/testsuite/test_bad7.txt
index be54c7c..ba4ade5 100644
--- a/testsuite/test_bad7.txt
+++ b/testsuite/test_bad7.txt
@@ -1,13 +1,3 @@
-, so
-that any problems introduced by others will not reflect on the original
-authors' reputations.
-
- Finally, any free program is threatened constantly by software
-patents. We wish to avoid the danger that redistributors of a free
-program will individually obtain patent licenses, in effect making the
-program proprietary. To prevent this, we have made it clear that any
-patent must be licensed for everyone's free use or not licensed at all.
-
The precise terms and conditions for copying, distribution and
modification follow.
@@ -212,4 +202,22 @@ of promoting the sharing and reuse of software generally.
NO WARRANTY
- 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY \ No newline at end of file
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
diff --git a/testsuite/test_bad8.lz b/testsuite/test_bad8.lz
index fca701b..6c8b878 100644
--- a/testsuite/test_bad8.lz
+++ b/testsuite/test_bad8.lz
Binary files differ
diff --git a/testsuite/test_bad9.lz b/testsuite/test_bad9.lz
index becb0ec..3851682 100644
--- a/testsuite/test_bad9.lz
+++ b/testsuite/test_bad9.lz
Binary files differ
diff --git a/testsuite/test_bad9.txt b/testsuite/test_bad9.txt
index b72a626..0bbf61f 100644
--- a/testsuite/test_bad9.txt
+++ b/testsuite/test_bad9.txt
@@ -1,5 +1,13 @@
-General
-Public License instead of this License.
GNU GENERAL PUBLIC LICENSE
Version 2, June 1991
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc. <http://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
diff --git a/testsuite/test_em.txt.lz b/testsuite/test_em.txt.lz
deleted file mode 100644
index 7e96250..0000000
--- a/testsuite/test_em.txt.lz
+++ /dev/null
Binary files differ
diff --git a/unzcrash.cc b/unzcrash.cc
index 107189f..1fe1aa8 100644
--- a/unzcrash.cc
+++ b/unzcrash.cc
@@ -1,6 +1,6 @@
/* Unzcrash - Tests robustness of decompressors to corrupted data.
Inspired by unzcrash.c from Julian Seward's bzip2.
- Copyright (C) 2008-2024 Antonio Diaz Diaz.
+ Copyright (C) 2008-2025 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -26,7 +26,7 @@
#include <algorithm>
#include <cerrno>
-#include <climits> // SSIZE_MAX
+#include <climits> // CHAR_BIT, SSIZE_MAX
#include <csignal>
#include <cstdio>
#include <cstdlib>
@@ -54,8 +54,6 @@ namespace {
const char * const program_name = "unzcrash";
const char * invocation_name = program_name; // default value
-int verbosity = 0;
-
void show_help()
{
@@ -142,28 +140,29 @@ uint8_t * read_file( const char * const filename, long * const file_sizep )
long buffer_size = 65536;
uint8_t * buffer = (uint8_t *)std::malloc( buffer_size );
- if( !buffer ) { show_error( mem_msg ); return 0; }
+ if( !buffer ) { show_file_error( filename, mem_msg ); return 0; }
long file_size = std::fread( buffer, 1, buffer_size, f );
while( file_size >= buffer_size || ( !std::ferror( f ) && !std::feof( f ) ) )
{
if( file_size >= buffer_size ) // may be false because of EINTR
{
if( buffer_size >= LONG_MAX )
- { show_file_error( filename, "Input file is larger than LONG_MAX." );
+ { show_file_error( filename, large_file_msg );
std::free( buffer ); return 0; }
- buffer_size = ( buffer_size <= LONG_MAX / 2 ) ? 2 * buffer_size : LONG_MAX;
+ buffer_size = (buffer_size <= LONG_MAX / 2) ? 2 * buffer_size : LONG_MAX;
uint8_t * const tmp = (uint8_t *)std::realloc( buffer, buffer_size );
- if( !tmp ) { show_error( mem_msg ); std::free( buffer ); return 0; }
+ if( !tmp )
+ { show_file_error( filename, mem_msg ); std::free( buffer ); return 0; }
buffer = tmp;
}
file_size += std::fread( buffer + file_size, 1, buffer_size - file_size, f );
}
if( std::ferror( f ) || !std::feof( f ) )
- {
- show_file_error( filename, "Error reading input file", errno );
- std::free( buffer ); return 0;
- }
- std::fclose( f );
+ { show_file_error( filename, read_error_msg, errno );
+ std::free( buffer ); return 0; }
+ if( std::fclose( f ) != 0 )
+ { show_file_error( filename, "Error closing input file", errno );
+ std::free( buffer ); return 0; }
*file_sizep = file_size;
return buffer;
}
@@ -173,13 +172,13 @@ class Bitset8 // 8 value bitset (1 to 8)
{
bool data[8];
static bool valid_digit( const unsigned char ch )
- { return ( ch >= '1' && ch <= '8' ); }
+ { return ch >= '1' && ch <= '8'; }
public:
Bitset8() { for( int i = 0; i < 8; ++i ) data[i] = true; }
bool includes( const int i ) const
- { return ( i >= 1 && i <= 8 && data[i-1] ); }
+ { return i >= 1 && i <= 8 && data[i-1]; }
// Recognized formats: 1 1,2,3 1-4 1,3-5,8 1-3,5-8
void parse_bs( const char * const arg, const char * const option_name )
@@ -355,7 +354,7 @@ int main( const int argc, const char * const argv[] )
{
enum Mode { m_block, m_byte, m_truncate };
const char * mode_str[3] = { "block", "byte", "size" };
- Bitset8 bits; // if Bitset8::parse_bs not called test full byte
+ Bitset8 bits; // if Bitset8::parse_bs not called, test full byte
Bad_byte bad_byte;
const char * zcmp_program = "zcmp";
long pos = 0;
@@ -383,7 +382,7 @@ int main( const int argc, const char * const argv[] )
{ 'v', "verbose", Arg_parser::no },
{ 'V', "version", Arg_parser::no },
{ 'z', "zcmp", Arg_parser::yes },
- { 0 , 0, Arg_parser::no } };
+ { 0, 0, Arg_parser::no } };
const Arg_parser parser( argc, argv, options );
if( parser.error().size() ) // bad option
@@ -398,15 +397,15 @@ int main( const int argc, const char * const argv[] )
const char * const arg = parser.argument( argind ).c_str();
switch( code )
{
- case 'h': show_help(); return 0;
case 'b': bits.parse_bs( arg, pn ); program_mode = m_byte; break;
case 'B': if( arg[0] ) parse_block( arg, pn, block_size, block_value );
program_mode = m_block; break;
case 'd': delta = getnum( arg, pn, block_size, 1, INT_MAX ); break;
case 'e': bad_byte.parse_bb( arg, pn ); break;
+ case 'h': show_help(); return 0;
case 'n': check = false; break;
case 'p': pos = getnum( arg, pn, block_size, -LONG_MAX, LONG_MAX ); break;
- case 'q': verbosity = -1; break;
+ case 'q': cl_verbosity = verbosity = -1; break;
case 's': max_size = getnum( arg, pn, block_size, -LONG_MAX, LONG_MAX ); break;
case 't': program_mode = m_truncate; break;
case 'v': if( verbosity < 4 ) ++verbosity; break;
@@ -419,11 +418,12 @@ int main( const int argc, const char * const argv[] )
if( parser.arguments() - argind != 2 )
{
if( verbosity >= 0 )
- std::fprintf( stderr, "Usage: %s 'lzip -t' file.lz\n", invocation_name );
+ std::fprintf( stderr, "Usage: %s [options] 'lzip -t' file.lz\n",
+ invocation_name );
return 1;
}
- if( delta <= 0 ) delta = ( program_mode == m_block ) ? block_size : 1;
+ if( delta <= 0 ) delta = (program_mode == m_block) ? block_size : 1;
const char * const command = parser.argument( argind ).c_str();
std::vector< std::string > command_args;
@@ -498,7 +498,7 @@ int main( const int argc, const char * const argv[] )
( max_size < 0 && -max_size >= file_size - pos ) )
{ show_error( "Nothing to do; domain is empty." ); return 0; }
if( max_size < 0 ) max_size += file_size - pos;
- const long end = ( ( max_size < file_size - pos ) ? pos + max_size : file_size );
+ const long end = (max_size < file_size - pos) ? pos + max_size : file_size;
if( bad_byte.pos >= file_size )
{ show_option_error( bad_byte.argument, "Position is beyond end of file in",
bad_byte.option_name ); return 1; }
@@ -532,7 +532,7 @@ int main( const int argc, const char * const argv[] )
}
else if( program_mode == m_block )
{
- uint8_t * block = (uint8_t *)std::malloc( block_size );
+ uint8_t * const block = (uint8_t *)std::malloc( block_size );
if( !block ) { show_error( mem_msg ); return 1; }
for( long i = pos; i < end; i += std::min( delta, end - i ) )
{
@@ -611,17 +611,18 @@ int main( const int argc, const char * const argv[] )
if( verbosity >= 0 )
{
- std::fprintf( stderr, "\n%9ld %ss tested\n%9ld total decompressions"
- "\n%9ld decompressions returned with zero status",
- positions, mode_str[program_mode], decompressions, successes );
+ std::fprintf( stderr, "\n%11s %ss tested\n%11s total decompressions"
+ "\n%11s decompressions returned with zero status",
+ format_num3( positions ), mode_str[program_mode],
+ format_num3( decompressions ), format_num3( successes ) );
if( successes > 0 )
{
if( zcmp_command.empty() )
- std::fputs( "\n comparisons disabled\n", stderr );
+ std::fputs( "\n comparisons disabled\n", stderr );
else if( failed_comparisons > 0 )
- std::fprintf( stderr, ", of which\n%9ld comparisons failed\n",
- failed_comparisons );
- else std::fputs( "\n all comparisons passed\n", stderr );
+ std::fprintf( stderr, ", of which\n%11s comparisons failed\n",
+ format_num3( failed_comparisons ) );
+ else std::fputs( "\n all comparisons passed\n", stderr );
}
else std::fputc( '\n', stderr );
}