diff options
-rw-r--r-- | COPYING | 3 | ||||
-rw-r--r-- | ChangeLog | 39 | ||||
-rw-r--r-- | Makefile.in | 25 | ||||
-rw-r--r-- | NEWS | 44 | ||||
-rw-r--r-- | README | 22 | ||||
-rw-r--r-- | alone_to_lz.cc | 16 | ||||
-rw-r--r-- | arg_parser.cc | 15 | ||||
-rw-r--r-- | arg_parser.h | 10 | ||||
-rw-r--r-- | byte_repair.cc | 19 | ||||
-rw-r--r-- | common.h | 5 | ||||
-rwxr-xr-x | configure | 9 | ||||
-rw-r--r-- | debian/changelog | 16 | ||||
-rw-r--r-- | debian/patches/debian/0001-build.patch | 3 | ||||
-rw-r--r-- | decoder.cc | 28 | ||||
-rw-r--r-- | decoder.h | 12 | ||||
-rw-r--r-- | doc/lziprecover.1 | 57 | ||||
-rw-r--r-- | doc/lziprecover.info | 910 | ||||
-rw-r--r-- | doc/lziprecover.texi | 912 | ||||
-rw-r--r-- | dump_remove.cc | 4 | ||||
-rw-r--r-- | fec.h | 297 | ||||
-rw-r--r-- | fec_create.cc | 615 | ||||
-rw-r--r-- | fec_repair.cc | 1106 | ||||
-rw-r--r-- | gf16.cc | 308 | ||||
-rw-r--r-- | gf8.cc | 244 | ||||
-rw-r--r-- | lunzcrash.cc | 28 | ||||
-rw-r--r-- | lzip.h | 44 | ||||
-rw-r--r-- | lzip_index.cc | 31 | ||||
-rw-r--r-- | lzip_index.h | 10 | ||||
-rw-r--r-- | main.cc | 450 | ||||
-rw-r--r-- | main_common.cc | 21 | ||||
-rw-r--r-- | md5.cc | 2 | ||||
-rw-r--r-- | md5.h | 2 | ||||
-rw-r--r-- | merge.cc | 11 | ||||
-rw-r--r-- | mtester.cc | 2 | ||||
-rw-r--r-- | nrep_stats.cc | 4 | ||||
-rw-r--r-- | range_dec.cc | 5 | ||||
-rw-r--r-- | recursive.cc | 122 | ||||
-rw-r--r-- | reproduce.cc | 29 | ||||
-rwxr-xr-x | testsuite/check.sh | 666 | ||||
-rw-r--r-- | testsuite/fox6_mark.lz | bin | 480 -> 0 bytes | |||
-rw-r--r-- | testsuite/fox6_nz.lz | bin | 0 -> 480 bytes | |||
-rw-r--r-- | testsuite/test.txt | 6 | ||||
-rw-r--r-- | testsuite/test.txt.lz | bin | 7376 -> 7341 bytes | |||
-rw-r--r-- | testsuite/test.txt.lz.fec | bin | 0 -> 4424 bytes | |||
-rw-r--r-- | testsuite/test.txt.lz.fec16 | bin | 0 -> 4424 bytes | |||
-rw-r--r-- | testsuite/test.txt.lzma | bin | 7363 -> 7328 bytes | |||
-rw-r--r-- | testsuite/test21636.txt (renamed from testsuite/test21723.txt) | 0 | ||||
-rw-r--r-- | testsuite/test_3m.txt.lz.md5 | 2 | ||||
-rw-r--r-- | testsuite/test_bad1.lz | bin | 7376 -> 7341 bytes | |||
-rw-r--r-- | testsuite/test_bad2.lz | bin | 7376 -> 7341 bytes | |||
-rw-r--r-- | testsuite/test_bad3.lz | bin | 7376 -> 7341 bytes | |||
-rw-r--r-- | testsuite/test_bad4.lz | bin | 7376 -> 7341 bytes | |||
-rw-r--r-- | testsuite/test_bad5.lz | bin | 7376 -> 7341 bytes | |||
-rw-r--r-- | testsuite/test_bad6.lz | bin | 7376 -> 7341 bytes | |||
-rw-r--r-- | testsuite/test_bad6.txt | 11 | ||||
-rw-r--r-- | testsuite/test_bad7.lz | bin | 7376 -> 7341 bytes | |||
-rw-r--r-- | testsuite/test_bad7.txt | 30 | ||||
-rw-r--r-- | testsuite/test_bad8.lz | bin | 7376 -> 7341 bytes | |||
-rw-r--r-- | testsuite/test_bad9.lz | bin | 7376 -> 7341 bytes | |||
-rw-r--r-- | testsuite/test_bad9.txt | 12 | ||||
-rw-r--r-- | testsuite/test_em.txt.lz | bin | 14024 -> 13950 bytes | |||
-rw-r--r-- | unzcrash.cc | 51 |
62 files changed, 5143 insertions, 1115 deletions
@@ -1,8 +1,7 @@ GNU GENERAL PUBLIC LICENSE Version 2, June 1991 - Copyright (C) 1989, 1991 Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Copyright (C) 1989, 1991 Free Software Foundation, Inc. <http://fsf.org/> Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. @@ -1,3 +1,15 @@ +2024-10-01 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 1.25-pre1 released. + * New options '-F, --fec', '-0' to '-9', '-b, --block-size', + '--fec-file', '-r, --recursive', and '-R, --dereference-recursive'. + * Change short name of option '--byte-repair' to '-B'. + * New options '--ignore-empty' and '--ignore-nonzero'. + * Rename option '--clear-marking' to '--nonzero-repair'. + * Remove options '--empty-error' and '--marking-error'. + * Remove decompression support for Sync Flush marker. + * testsuite: Require lzip/clzip. Add fox6_nz.lz. Remove fox6_mark.lz. + 2024-01-20 Antonio Diaz Diaz <antonio@gnu.org> * Version 1.24 released. @@ -122,8 +134,8 @@ * repair.cc: Repair a damaged dictionary size in the header. * repair.cc: Try bytes at offsets 7 to 11 first. * Decompression time has been reduced by 2%. - * main.cc (decompress): Print up to 6 bytes of trailing data - when '-tvvvv' is specified. + * main.cc (decompress): Print up to 6 bytes of trailing data when + '-tvvvv' is specified. * decoder.cc (verify_trailer): Remove test of final code. * main.cc (main): Delete '--output' file if infd is a terminal. * main.cc (main): Don't use stdin more than once. @@ -166,8 +178,8 @@ 2013-09-14 Antonio Diaz Diaz <antonio@gnu.org> * Version 1.15 released. - * repair.cc: Repair multimember files with up to one byte error - per member. + * repair.cc: Repair multimember files with up to one byte error per + member. * merge.cc: Merge multimember files. * main.cc (show_header): Don't show header version. * lziprecover.texinfo: New chapters 'Repairing files', @@ -189,13 +201,13 @@ 2012-02-24 Antonio Diaz Diaz <ant_diaz@teleline.es> * Version 1.13 released. - * Lziprecover is now distributed in its own package. Until - version 1.12 it was included in the lzip package. Previous - entries in this file are taken from there. + * Lziprecover is now distributed in its own package. Until version + 1.12 it was included in the lzip package. Previous entries in this + file are taken from there. * lziprecover.cc: Rename to main.cc. * New files merge.cc, repair.cc, split.cc, and range_dec.cc. - * main.cc: Add decompressor options (-c, -d, -k, -t) so that - an external decompressor is not needed for recovery nor for + * main.cc: Add decompressor options (-c, -d, -k, -t) so that an + external decompressor is not needed for recovery nor for "make check". * New option '-D, --range-decompress', which extracts a range of bytes decompressing only the members containing the desired data. @@ -226,8 +238,8 @@ This change also prevents (harmless) access to uninitialized memory when decompressing a corrupt file. * lziprecover.cc: New options '-f, --force' and '-o, --output'. - * lziprecover.cc: New option '-s, --split' to select the until - now only operation of splitting multimember files. + * lziprecover.cc: New option '-s, --split' to select the until now + only operation of splitting multimember files. * lziprecover.cc: If no operation is specified, warn the user and do nothing. @@ -246,6 +258,5 @@ Copyright (C) 2009-2024 Antonio Diaz Diaz. -This file is a collection of facts, and thus it is not copyrightable, -but just in case, you have unlimited permission to copy, distribute, and -modify it. +This file is a collection of facts, and thus it is not copyrightable, but just +in case, you have unlimited permission to copy, distribute, and modify it. diff --git a/Makefile.in b/Makefile.in index 8a7b3a9..fead3d2 100644 --- a/Makefile.in +++ b/Makefile.in @@ -8,8 +8,9 @@ SHELL = /bin/sh CAN_RUN_INSTALLINFO = $(SHELL) -c "install-info --version" > /dev/null 2>&1 objs = arg_parser.o alone_to_lz.o lzip_index.o list.o byte_repair.o \ - dump_remove.o lunzcrash.o md5.o merge.o mtester.o nrep_stats.o \ - range_dec.o reproduce.o split.o decoder.o main.o + dump_remove.o fec_create.o fec_repair.o gf8.o gf16.o lunzcrash.o \ + md5.o merge.o mtester.o nrep_stats.o range_dec.o recursive.o \ + reproduce.o split.o decoder.o main.o unzobjs = arg_parser.o unzcrash.o @@ -22,7 +23,7 @@ unzobjs = arg_parser.o unzcrash.o all : $(progname) $(progname) : $(objs) - $(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $(objs) + $(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $(objs) $(LIBS) unzcrash : $(unzobjs) $(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $(unzobjs) @@ -38,7 +39,8 @@ unzcrash.o : unzcrash.cc # prevent 'make' from trying to remake source files $(VPATH)/configure $(VPATH)/Makefile.in $(VPATH)/doc/$(pkgname).texi : ; -%.h %.cc : ; +MAKEFLAGS += -r +.SUFFIXES : $(objs) : Makefile alone_to_lz.o : lzip.h common.h mtester.h @@ -46,15 +48,20 @@ arg_parser.o : arg_parser.h byte_repair.o : lzip.h common.h mtester.h lzip_index.h decoder.o : lzip.h common.h decoder.h dump_remove.o : lzip.h common.h lzip_index.h +fec_create.o : lzip.h common.h md5.h fec.h +fec_repair.o : lzip.h common.h md5.h fec.h +gf8.o : lzip.h common.h md5.h fec.h +gf16.o : lzip.h common.h md5.h fec.h list.o : lzip.h common.h lzip_index.h lunzcrash.o : lzip.h common.h md5.h mtester.h lzip_index.h lzip_index.o : lzip.h common.h lzip_index.h -main.o : arg_parser.h lzip.h common.h decoder.h main_common.cc +main.o : arg_parser.h lzip.h common.h decoder.h md5.h fec.h main_common.cc md5.o : md5.h merge.o : lzip.h common.h decoder.h lzip_index.h mtester.o : lzip.h common.h md5.h mtester.h nrep_stats.o : lzip.h common.h lzip_index.h range_dec.o : lzip.h common.h decoder.h lzip_index.h +recursive.o : lzip.h common.h md5.h fec.h reproduce.o : lzip.h common.h md5.h mtester.h lzip_index.h split.o : lzip.h common.h lzip_index.h unzcrash.o : Makefile arg_parser.h common.h main_common.cc @@ -141,21 +148,23 @@ dist : doc $(DISTNAME)/testsuite/check.sh \ $(DISTNAME)/testsuite/fox6_bad1.txt \ $(DISTNAME)/testsuite/test.txt \ - $(DISTNAME)/testsuite/test21723.txt \ + $(DISTNAME)/testsuite/test21636.txt \ $(DISTNAME)/testsuite/test_bad[6-9].txt \ $(DISTNAME)/testsuite/test_3m.txt.lz.md5 \ $(DISTNAME)/testsuite/fox.lz \ $(DISTNAME)/testsuite/fox_*.lz \ $(DISTNAME)/testsuite/fox6.lz \ + $(DISTNAME)/testsuite/fox6_nz.lz \ $(DISTNAME)/testsuite/fox6_sc[1-6].lz \ $(DISTNAME)/testsuite/fox6_bad[1-6].lz \ - $(DISTNAME)/testsuite/fox6_mark.lz \ $(DISTNAME)/testsuite/numbers.lz \ $(DISTNAME)/testsuite/numbersbt.lz \ $(DISTNAME)/testsuite/test.txt.lz \ $(DISTNAME)/testsuite/test.txt.lzma \ $(DISTNAME)/testsuite/test_bad[1-9].lz \ - $(DISTNAME)/testsuite/test_em.txt.lz + $(DISTNAME)/testsuite/test_em.txt.lz \ + $(DISTNAME)/testsuite/test.txt.lz.fec \ + $(DISTNAME)/testsuite/test.txt.lz.fec16 rm -f $(DISTNAME) lzip -v -9 $(DISTNAME).tar @@ -1,35 +1,31 @@ -Changes in version 1.24: +Changes in version 1.25: -The option '--empty-error', which forces exit status 2 if any empty member -is found, has been added. +The option '-F, --fec', which implements Forward Error Correction (FEC), has +been added. -The option '--marking-error', which forces exit status 2 if the first LZMA -byte is non-zero in any member, has been added. +The options '-0' to '-9' (FEC fragmentation level) have been added. -The option '--clear-marking', which sets to zero the first LZMA byte of each -member, has been added. +The option '-b, --block-size', which sets the FEC block size, has been added. -The keyword 'empty' is now recognized in the argument of '--dump', -'--remove', and '--strip'. +The option '--fec-file', which sets the fec file to be used, has been added. -The option '--repair' has been renamed to '--byte-repair'. +The options '-r, --recursive' and '-R, --dereference-recursive' have been +added for recursive creation and reading of fec files. -The option '--debug-repair' has been renamed to '--debug-byte-repair'. +The short name of option '--byte-repair' has been changed to "-B". -File diagnostics have been reformatted as 'PROGRAM: FILE: MESSAGE'. +The option '--ignore-empty', which makes lziprecover ignore empty members in +multimember files when decompressing, testing, or listing, has been added. +By default lziprecover now exits with error status 2 if any empty member is +found in a multimember file. -Diagnostics caused by invalid arguments to command-line options now show the -argument and the name of the option. +The option '--ignore-nonzero', which makes lziprecover ignore a nonzero +first byte in the LZMA stream when decompressing or testing, has been added. +By default lziprecover now exits with error status 2 if the first LZMA byte +is nonzero in any member of the input files. -The option '-o, --output' now preserves dates, permissions, and ownership of -the file, when decompressing exactly one file. +The option '--clear-marking' has been renamed to '--nonzero-repair'. -The option '-o, --output' now creates missing intermediate directories when -writing to a file. +Options '--empty-error' and '--marking-error' have been removed. -The option '--no-verify' of unzcrash has been renamed to '--no-check'. - -The variable MAKEINFO has been added to configure and Makefile.in. - -The makefile target 'install-as-lzip' has been removed because '--reproduce' -needs a lzip compressor (not just a decompressor) named 'lzip' by default. +Lzip 1.16 (or clzip 1.6) or newer is required to run the tests. @@ -1,11 +1,8 @@ Description Lziprecover is a data recovery tool and decompressor for files in the lzip -compressed data format (.lz). Lziprecover is able to repair slightly damaged -files (up to one single-byte error per member), produce a correct file by -merging the good parts of two or more damaged copies, reproduce a missing -(zeroed) sector using a reference file, extract data from damaged files, -decompress files, and test integrity of files. +compressed data format (.lz). Lziprecover also provides Forward Error +Correction (FEC) able to repair any kind of file. Lziprecover can remove the damaged members from multimember files, for example multimember tar.lz archives. @@ -13,9 +10,6 @@ example multimember tar.lz archives. Lziprecover provides random access to the data in multimember files; it only decompresses the members containing the desired data. -Lziprecover facilitates the management of metadata stored as trailing data -in lzip files. - Lziprecover is not a replacement for regular backups, but a last line of defense for the case where the backups are also damaged. @@ -59,8 +53,8 @@ GNU ddrescue + lziprecover is the recommended option for recovering data from damaged lzip files. If a file is too damaged for lziprecover to repair it, all the recoverable -data in all members of the file can be extracted in one step with the -command 'lziprecover -cd --ignore-errors file.lz > file'. +data in all members of the file can be extracted with the command +'lziprecover -cd --ignore-errors file.lz > file'. When recovering data, lziprecover takes as arguments the names of the damaged files and writes zero or more recovered files depending on the @@ -70,14 +64,6 @@ files themselves are kept unchanged. When decompressing or testing file integrity, lziprecover behaves like lzip or lunzip. -To give you an idea of its possibilities, when merging two copies, each of -them with one damaged area affecting 1 percent of the copy, the probability -of obtaining a correct file is about 98 percent. With three such copies the -probability rises to 99.97 percent. For large files (a few MB) with small -errors (one sector damaged per copy), the probability approaches 100 percent -even with only two copies. (Supposing that the errors are randomly located -inside each copy). - The lziprecover package also includes unzcrash, a program written to test robustness to decompression of corrupted data, inspired by unzcrash.c from Julian Seward's bzip2. Type 'make unzcrash' in the lziprecover source diff --git a/alone_to_lz.cc b/alone_to_lz.cc index d67ea5c..c71f335 100644 --- a/alone_to_lz.cc +++ b/alone_to_lz.cc @@ -50,7 +50,7 @@ uint8_t * read_file( const int infd, long * const file_sizep, while( file_size >= buffer_size - 20 && !errno ) { if( buffer_size >= LONG_MAX ) - { show_file_error( filename, "Input file is larger than LONG_MAX." ); + { show_file_error( filename, large_file_msg ); std::free( buffer ); return 0; } buffer_size = ( buffer_size <= LONG_MAX / 2 ) ? 2 * buffer_size : LONG_MAX; uint8_t * const tmp = (uint8_t *)std::realloc( buffer, buffer_size ); @@ -61,7 +61,7 @@ uint8_t * read_file( const int infd, long * const file_sizep, } if( errno ) { - show_file_error( filename, "Error reading input file", errno ); + show_file_error( filename, read_error_msg, errno ); std::free( buffer ); return 0; } *file_sizep = file_size; @@ -88,7 +88,7 @@ int alone_to_lz( const int infd, const Pretty_print & pp ) uint8_t * const buffer = read_file( infd, &file_size, pp.name() ); if( !buffer ) return 1; if( file_size < lzma_header_size ) - { show_file_error( pp.name(), "Input file is too short." ); + { show_file_error( pp.name(), short_file_msg ); std::free( buffer ); return 2; } if( buffer[0] != 93 ) // (45 * 2) + (9 * 0) + 3 @@ -100,7 +100,7 @@ int alone_to_lz( const int infd, const Pretty_print & pp ) show_file_error( pp.name(), "Input file has non-default LZMA properties." ); std::free( buffer ); return 2; } - for( int i = 5; i < 13; ++i ) if( buffer[i] != 0xFF ) + for( int i = 5; i < lzma_header_size; ++i ) if( buffer[i] != 0xFF ) { show_file_error( pp.name(), "Input file is non-streamed." ); std::free( buffer ); return 2; } @@ -113,10 +113,12 @@ int alone_to_lz( const int infd, const Pretty_print & pp ) Lzip_header & header = *(Lzip_header *)( buffer + offset ); header.set_magic(); header.dictionary_size( dictionary_size ); + buffer[lzma_header_size] = 0; // reset first LZMA byte for( int i = 0; i < Lzip_trailer::size; ++i ) buffer[file_size++] = 0; + const long lzip_size = file_size - offset; // compute and fill trailer { - LZ_mtester mtester( buffer + offset, file_size - offset, dictionary_size ); + LZ_mtester mtester( buffer + offset, lzip_size, dictionary_size ); const int result = mtester.test_member(); if( result == 1 && orig_dictionary_size > max_dictionary_size ) { pp( "dictionary size is too large" ); std::free( buffer ); return 2; } @@ -136,10 +138,10 @@ int alone_to_lz( const int infd, const Pretty_print & pp ) trailer.member_size( mtester.member_position() ); } // check converted member - LZ_mtester mtester( buffer + offset, file_size - offset, dictionary_size ); + LZ_mtester mtester( buffer + offset, lzip_size, dictionary_size ); if( mtester.test_member() != 0 || !mtester.finished() ) { pp( "conversion failed" ); std::free( buffer ); return 2; } - if( writeblock( outfd, buffer + offset, file_size - offset ) != file_size - offset ) + if( writeblock( outfd, buffer + offset, lzip_size ) != lzip_size ) { show_error( "Error writing output file", errno ); std::free( buffer ); return 1; diff --git a/arg_parser.cc b/arg_parser.cc index 0c04d8e..0c528b2 100644 --- a/arg_parser.cc +++ b/arg_parser.cc @@ -75,19 +75,19 @@ bool Arg_parser::parse_long_option( const char * const opt, const char * const a error_ += "' requires an argument"; return false; } - data.back().argument = &opt[len+3]; + data.back().argument = &opt[len+3]; // argument may be empty return true; } - if( options[index].has_arg == yes ) + if( options[index].has_arg == yes || options[index].has_arg == yme ) { - if( !arg || !arg[0] ) + if( !arg || ( options[index].has_arg == yes && !arg[0] ) ) { error_ = "option '--"; error_ += options[index].long_name; error_ += "' requires an argument"; return false; } - ++argind; data.back().argument = arg; + ++argind; data.back().argument = arg; // argument may be empty return true; } @@ -123,15 +123,16 @@ bool Arg_parser::parse_short_option( const char * const opt, const char * const { data.back().argument = &opt[cind]; ++argind; cind = 0; } - else if( options[index].has_arg == yes ) + else if( options[index].has_arg == yes || options[index].has_arg == yme ) { - if( !arg || !arg[0] ) + if( !arg || ( options[index].has_arg == yes && !arg[0] ) ) { error_ = "option requires an argument -- '"; error_ += c; error_ += '\''; return false; } - data.back().argument = arg; ++argind; cind = 0; + ++argind; cind = 0; + data.back().argument = arg; // argument may be empty } } return true; diff --git a/arg_parser.h b/arg_parser.h index 1eeec9a..ab77fc5 100644 --- a/arg_parser.h +++ b/arg_parser.h @@ -36,14 +36,18 @@ The argument '--' terminates all options; any following arguments are treated as non-option arguments, even if they begin with a hyphen. - The syntax for optional option arguments is '-<short_option><argument>' - (without whitespace), or '--<long_option>=<argument>'. + The syntax of options with an optional argument is + '-<short_option><argument>' (without whitespace), or + '--<long_option>=<argument>'. + + The syntax of options with an empty argument is '-<short_option> ""', + '--<long_option> ""', or '--<long_option>=""'. */ class Arg_parser { public: - enum Has_arg { no, yes, maybe }; + enum Has_arg { no, yes, maybe, yme }; // yme = yes but maybe empty struct Option { diff --git a/byte_repair.cc b/byte_repair.cc index 370738b..d6b1782 100644 --- a/byte_repair.cc +++ b/byte_repair.cc @@ -69,10 +69,10 @@ int repair_dictionary_size( uint8_t * const mbuffer, const long msize ) const bool valid_ds = isvalid_ds( dictionary_size ); if( valid_ds && dictionary_size >= data_size ) return 0; // can't be bad - const unsigned long long dictionary_size_9 = 1 << 25; // dict size of opt -9 - if( !valid_ds || dictionary_size < dictionary_size_9 ) + const unsigned long long dict_size_9 = 1 << 25; // dict size of opt -9 + if( !valid_ds || dictionary_size < dict_size_9 ) { - dictionary_size = std::min( data_size, dictionary_size_9 ); + dictionary_size = std::min( data_size, dict_size_9 ); if( dictionary_size < min_dictionary_size ) dictionary_size = min_dictionary_size; LZ_mtester mtester( mbuffer, msize, dictionary_size ); @@ -82,7 +82,7 @@ int repair_dictionary_size( uint8_t * const mbuffer, const long msize ) if( result != 1 || mtester.max_distance() <= dictionary_size || mtester.max_distance() > max_dictionary_size ) return 0; } - if( data_size > dictionary_size_9 ) + if( data_size > dict_size_9 ) { dictionary_size = std::min( data_size, (unsigned long long)max_dictionary_size ); @@ -174,7 +174,7 @@ uint8_t * read_member( const int infd, const long long mpos, uint8_t * const buffer = new uint8_t[msize]; if( readblock( infd, buffer, msize ) != msize ) - { show_file_error( filename, "Error reading input file", errno ); + { show_file_error( filename, read_error_msg, errno ); delete[] buffer; return 0; } return buffer; } @@ -266,7 +266,8 @@ int byte_repair( const std::string & input_filename, } if( !close_outstream( &in_stats ) ) return 1; if( verbosity >= 1 ) - std::fputs( "Copy of input file repaired successfully.\n", stdout ); + std::printf( "Repaired copy of '%s' written to '%s'\n", + filename, output_filename.c_str() ); return 0; } @@ -287,7 +288,8 @@ int debug_delay( const char * const input_filename, if( range.end() > lzip_index.cdata_size() ) range.size( std::max( 0LL, lzip_index.cdata_size() - range.pos() ) ); if( range.size() <= 0 ) - { show_file_error( input_filename, "Nothing to do." ); return 0; } + { show_file_error( input_filename, "Nothing to do; range is empty." ); + return 0; } for( long i = 0; i < lzip_index.members(); ++i ) { @@ -370,7 +372,8 @@ int debug_byte_repair( const char * const input_filename, for( ; idx < lzip_index.members(); ++idx ) if( lzip_index.mblock( idx ).includes( bad_byte.pos ) ) break; if( idx >= lzip_index.members() ) - { show_file_error( input_filename, "Nothing to do." ); return 0; } + { show_file_error( input_filename, "Nothing to do; byte is beyond EOF." ); + return 0; } const long long mpos = lzip_index.mblock( idx ).pos(); const long long msize = lzip_index.mblock( idx ).size(); @@ -38,9 +38,14 @@ struct Bad_byte }; +const char * const large_file_msg = "Input file is too large for this computer."; const char * const mem_msg = "Not enough memory."; +const char * const read_error_msg = "Read error"; // defined in main_common.cc +extern int verbosity; + +const char * format_num3( long long num ); void show_error( const char * const msg, const int errcode = 0, const bool help = false ); void show_file_error( const char * const filename, const char * const msg, @@ -6,7 +6,7 @@ # to copy, distribute, and modify it. pkgname=lziprecover -pkgversion=1.24 +pkgversion=1.25-pre1 progname=lziprecover srctrigger=doc/${pkgname}.texi @@ -24,6 +24,7 @@ CXX=g++ CPPFLAGS= CXXFLAGS='-Wall -W -O2' LDFLAGS= +LIBS=-lpthread MAKEINFO=makeinfo # checking whether we are using GNU C++. @@ -70,6 +71,7 @@ while [ $# != 0 ] ; do echo " CXXFLAGS=OPTIONS command-line options for the C++ compiler [${CXXFLAGS}]" echo " CXXFLAGS+=OPTIONS append options to the current value of CXXFLAGS" echo " LDFLAGS=OPTIONS command-line options for the linker [${LDFLAGS}]" + echo " LIBS=OPTIONS libraries to pass to the linker [${LIBS}]" echo " MAKEINFO=NAME makeinfo program to use [${MAKEINFO}]" echo exit 0 ;; @@ -98,6 +100,7 @@ while [ $# != 0 ] ; do CXXFLAGS=*) CXXFLAGS=${optarg} ;; CXXFLAGS+=*) CXXFLAGS="${CXXFLAGS} ${optarg}" ;; LDFLAGS=*) LDFLAGS=${optarg} ;; + LIBS=*) LIBS="${optarg} ${LIBS}" ;; MAKEINFO=*) MAKEINFO=${optarg} ;; --*) @@ -109,7 +112,7 @@ while [ $# != 0 ] ; do exit 1 ;; esac - # Check if the option took a separate argument + # Check whether the option took a separate argument if [ "${arg2}" = yes ] ; then if [ $# != 0 ] ; then args="${args} \"$1\"" ; shift else echo "configure: Missing argument to '${option}'" 1>&2 @@ -167,6 +170,7 @@ echo "CXX = ${CXX}" echo "CPPFLAGS = ${CPPFLAGS}" echo "CXXFLAGS = ${CXXFLAGS}" echo "LDFLAGS = ${LDFLAGS}" +echo "LIBS = ${LIBS}" echo "MAKEINFO = ${MAKEINFO}" rm -f Makefile cat > Makefile << EOF @@ -191,6 +195,7 @@ CXX = ${CXX} CPPFLAGS = ${CPPFLAGS} CXXFLAGS = ${CXXFLAGS} LDFLAGS = ${LDFLAGS} +LIBS = ${LIBS} MAKEINFO = ${MAKEINFO} EOF cat "${srcdir}/Makefile.in" >> Makefile diff --git a/debian/changelog b/debian/changelog index 21c0562..d1fc2fd 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,19 @@ +lziprecover (1.25~pre1-1) sid; urgency=medium + + * Uploading to sid. + * Merging upstream version 1.25~pre1. + * Refreshing build.patch. + + -- Daniel Baumann <daniel.baumann@progress-linux.org> Mon, 07 Oct 2024 10:15:10 +0200 + +lziprecover (1.24-2) sid; urgency=medium + + * Uploading to sid. + * Updating years in copyright for 2024. + * Updating to standards-version 4.7.0. + + -- Daniel Baumann <daniel.baumann@progress-linux.org> Thu, 25 Jul 2024 06:38:49 +0200 + lziprecover (1.24-1) sid; urgency=medium * Uploading to sid. diff --git a/debian/patches/debian/0001-build.patch b/debian/patches/debian/0001-build.patch index cd0ebf4..41817c8 100644 --- a/debian/patches/debian/0001-build.patch +++ b/debian/patches/debian/0001-build.patch @@ -14,6 +14,7 @@ diff -Naurp lziprecover.orig/configure lziprecover/configure +#CPPFLAGS= +#CXXFLAGS='-Wall -W -O2' +#LDFLAGS= + LIBS=-lpthread MAKEINFO=makeinfo - # checking whether we are using GNU C++. + @@ -76,7 +76,7 @@ bool Range_decoder::read_block() if( !at_stream_end ) { stream_pos = readblock( infd, buffer, buffer_size ); - if( stream_pos != buffer_size && errno ) throw Error( "Read error" ); + if( stream_pos != buffer_size && errno ) throw Error( read_error_msg ); at_stream_end = ( stream_pos < buffer_size ); partial_member_pos += pos; pos = 0; @@ -108,8 +108,7 @@ void LZ_decoder::flush_data() } -int LZ_decoder::check_trailer( const Pretty_print & pp, - const bool ignore_empty ) const +bool LZ_decoder::check_trailer( const Pretty_print & pp ) const { Lzip_trailer trailer; int size = rdec.read_data( trailer.data, trailer.size ); @@ -154,8 +153,7 @@ int LZ_decoder::check_trailer( const Pretty_print & pp, std::fprintf( stderr, "Member size mismatch; stored %llu (0x%llX), computed %llu (0x%llX)\n", tm_size, tm_size, member_size, member_size ); } } - if( error ) return 3; - if( !ignore_empty && data_size == 0 ) return 5; + if( error ) return false; if( verbosity >= 2 ) { if( verbosity >= 4 ) show_header( dictionary_size ); @@ -175,15 +173,14 @@ int LZ_decoder::check_trailer( const Pretty_print & pp, pp(); std::fprintf( stderr, "Range decoder final code is %08X\n", rdec.get_code() ); } - return 0; + return true; } /* Return value: 0 = OK, 1 = decoder error, 2 = unexpected EOF, 3 = trailer error, 4 = unknown marker found, - 5 = empty member found, 6 = marked member found. */ -int LZ_decoder::decode_member( const Cl_options & cl_opts, - const Pretty_print & pp ) + 5 = nonzero first LZMA byte found. */ +int LZ_decoder::decode_member( const Pretty_print & pp, const bool ignore_nonzero ) { Bit_model bm_literal[1<<literal_context_bits][0x300]; Bit_model bm_match[State::states][pos_states]; @@ -203,7 +200,7 @@ int LZ_decoder::decode_member( const Cl_options & cl_opts, unsigned rep3 = 0; State state; - if( !rdec.load( cl_opts.ignore_marking ) ) return 6; + if( !rdec.load( ignore_nonzero ) ) return 5; while( !rdec.finished() ) { const int pos_state = data_position() & pos_state_mask; @@ -267,14 +264,9 @@ int LZ_decoder::decode_member( const Cl_options & cl_opts, rdec.normalize(); flush_data(); if( len == min_match_len ) // End Of Stream marker - return check_trailer( pp, cl_opts.ignore_empty ); - if( len == min_match_len + 1 ) // Sync Flush marker - { rdec.load(); continue; } - if( verbosity >= 0 ) - { - pp(); - std::fprintf( stderr, "Unsupported marker code '%d'\n", len ); - } + { if( check_trailer( pp ) ) return 0; else return 3; } + if( verbosity >= 0 ) { pp(); + std::fprintf( stderr, "Unsupported marker code '%d'\n", len ); } return 4; } } @@ -106,12 +106,12 @@ public: return false; } - bool load( const bool ignore_marking = true ) + bool load( const bool ignore_nonzero ) { code = 0; range = 0xFFFFFFFFU; - // check and discard first byte of the LZMA stream - if( get_byte() != 0 && !ignore_marking ) return false; + // check first byte of the LZMA stream + if( get_byte() != 0 && !ignore_nonzero ) return false; for( int i = 0; i < 4; ++i ) code = ( code << 8 ) | get_byte(); return true; } @@ -305,7 +305,7 @@ class LZ_decoder unsigned long long stream_position() const { return partial_data_pos + stream_pos; } void flush_data(); - int check_trailer( const Pretty_print & pp, const bool ignore_empty ) const; + bool check_trailer( const Pretty_print & pp ) const; uint8_t peek_prev() const { return buffer[((pos > 0) ? pos : dictionary_size)-1]; } @@ -381,7 +381,7 @@ public: unsigned crc() const { return crc_ ^ 0xFFFFFFFFU; } unsigned long long data_position() const { return partial_data_pos + pos; } - int decode_member( const Cl_options & cl_opts, const Pretty_print & pp ); + int decode_member( const Pretty_print & pp, const bool ignore_nonzero ); int decode_member() - { return decode_member( Cl_options(), Pretty_print( "" ) ); } + { return decode_member( Pretty_print( "" ), true ); } }; diff --git a/doc/lziprecover.1 b/doc/lziprecover.1 index f95e80f..bb39f36 100644 --- a/doc/lziprecover.1 +++ b/doc/lziprecover.1 @@ -1,5 +1,5 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.2. -.TH LZIPRECOVER "1" "January 2024" "lziprecover 1.24" "User Commands" +.TH LZIPRECOVER "1" "October 2024" "lziprecover 1.25-pre1" "User Commands" .SH NAME lziprecover \- recovers data from damaged lzip files .SH SYNOPSIS @@ -7,11 +7,8 @@ lziprecover \- recovers data from damaged lzip files [\fI\,options\/\fR] [\fI\,files\/\fR] .SH DESCRIPTION Lziprecover is a data recovery tool and decompressor for files in the lzip -compressed data format (.lz). Lziprecover is able to repair slightly damaged -files (up to one single\-byte error per member), produce a correct file by -merging the good parts of two or more damaged copies, reproduce a missing -(zeroed) sector using a reference file, extract data from damaged files, -decompress files, and test integrity of files. +compressed data format (.lz). Lziprecover also provides Forward Error +Correction (FEC) able to repair any kind of file. .PP With the help of lziprecover, losing an entire archive just because of a corrupt byte near the beginning is a thing of the past. @@ -22,9 +19,6 @@ example multimember tar.lz archives. Lziprecover provides random access to the data in multimember files; it only decompresses the members containing the desired data. .PP -Lziprecover facilitates the management of metadata stored as trailing data -in lzip files. -.PP Lziprecover is not a replacement for regular backups, but a last line of defense for the case where the backups are also damaged. .SH OPTIONS @@ -41,6 +35,12 @@ exit with error status if trailing data \fB\-A\fR, \fB\-\-alone\-to\-lz\fR convert lzma\-alone files to lzip format .TP +\fB\-b\fR, \fB\-\-block\-size=\fR<bytes> +make FEC block size a multiple of <bytes> +.TP +\fB\-B\fR, \fB\-\-byte\-repair\fR +try to repair a corrupt byte in file +.TP \fB\-c\fR, \fB\-\-stdout\fR write to standard output, keep input files .TP @@ -65,8 +65,17 @@ reference file for \fB\-\-reproduce\fR \fB\-f\fR, \fB\-\-force\fR overwrite existing output files .TP +\fB\-F\fR, \fB\-\-fec\fR=\fI\,c[N]\/\fR|r|t|l +create, repair, test, list (using) fec file +.TP +\fB\-0\fR .. \fB\-9\fR +set FEC fragmentation level [default 9] +.TP +\fB\-\-fec\-file=\fR<file>[/] +read fec file from <file> or directory +.TP \fB\-i\fR, \fB\-\-ignore\-errors\fR -ignore some errors in \fB\-d\fR, \fB\-D\fR, \fB\-l\fR, \fB\-t\fR, \fB\-\-dump\fR +ignore non\-fatal errors .TP \fB\-k\fR, \fB\-\-keep\fR keep (don't delete) input files @@ -77,14 +86,20 @@ print (un)compressed file sizes \fB\-m\fR, \fB\-\-merge\fR repair errors in file using several copies .TP -\fB\-o\fR, \fB\-\-output=\fR<file> -place the output into <file> +\fB\-n\fR, \fB\-\-threads=\fR<n> +set number of threads for fec create [2] +.TP +\fB\-o\fR, \fB\-\-output=\fR<file>[/] +place the output into <file> or directory .TP \fB\-q\fR, \fB\-\-quiet\fR suppress all messages .TP -\fB\-R\fR, \fB\-\-byte\-repair\fR -try to repair a corrupt byte in file +\fB\-r\fR, \fB\-\-recursive\fR +(fec) operate recursively on directories +.TP +\fB\-R\fR, \fB\-\-dereference\-recursive\fR +(fec) recursively follow symbolic links .TP \fB\-s\fR, \fB\-\-split\fR split multimember file in single\-member files @@ -104,22 +119,24 @@ remove members, tdata from files in place \fB\-\-strip=\fR<list>:d:e:t copy files to stdout stripping members given .TP -\fB\-\-empty\-error\fR -exit with error status if empty member in file +\fB\-\-ignore\-empty\fR +ignore empty members in multimember files .TP -\fB\-\-marking\-error\fR -exit with error status if 1st LZMA byte not 0 +\fB\-\-ignore\-nonzero\fR +ignore a nonzero first LZMA byte .TP \fB\-\-loose\-trailing\fR allow trailing data seeming corrupt header .TP -\fB\-\-clear\-marking\fR -reset the first LZMA byte of each member +\fB\-\-nonzero\-repair\fR +repair in place a nonzero first LZMA byte .PP If no file names are given, or if a file is '\-', lziprecover decompresses from standard input to standard output. Numbers may be followed by a multiplier: k = kB = 10^3 = 1000, Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc... +The argument to \fB\-\-fec\fR=\fI\,create\/\fR may be a number of blocks (\fB\-Fc20\fR), a +percentage (\fB\-Fc5\fR%), or a size in bytes (\fB\-Fc10KiB\fR). .PP To extract all the files from archive 'foo.tar.lz', use the commands \&'tar \fB\-xf\fR foo.tar.lz' or 'lziprecover \fB\-cd\fR foo.tar.lz | tar \fB\-xf\fR \-'. diff --git a/doc/lziprecover.info b/doc/lziprecover.info index b1f820f..197af5e 100644 --- a/doc/lziprecover.info +++ b/doc/lziprecover.info @@ -12,19 +12,20 @@ File: lziprecover.info, Node: Top, Next: Introduction, Up: (dir) Lziprecover Manual ****************** -This manual is for Lziprecover (version 1.24, 20 January 2024). +This manual is for Lziprecover (version 1.25-pre1, 1 October 2024). * Menu: * Introduction:: Purpose and features of lziprecover * Invoking lziprecover:: Command-line interface +* File format:: Detailed format of the compressed file * Data safety:: Protecting data from accidental loss +* Fec files:: Forward Error Correction * Repairing one byte:: Fixing bit flips and similar errors * Merging files:: Fixing several damaged copies * Reproducing one sector:: Fixing a missing (zeroed) sector * Tarlz:: Options supporting the tar.lz format * File names:: Names of the files produced by lziprecover -* File format:: Detailed format of the compressed file * Trailing data:: Extra data appended to the file * Examples:: A small tutorial with examples * Unzcrash:: Testing the robustness of decompressors @@ -44,11 +45,14 @@ File: lziprecover.info, Node: Introduction, Next: Invoking lziprecover, Prev: ************** Lziprecover is a data recovery tool and decompressor for files in the lzip -compressed data format (.lz). Lziprecover is able to repair slightly damaged -files (up to one single-byte error per member), produce a correct file by -merging the good parts of two or more damaged copies, reproduce a missing -(zeroed) sector using a reference file, extract data from damaged files, -decompress files, and test integrity of files. +compressed data format (.lz). Lziprecover also provides Forward Error +Correction (FEC) able to repair any kind of file. + + Lziprecover is able to repair slightly damaged lzip files (up to one +single-byte error per member), produce a correct file by merging the good +parts of two or more damaged copies, reproduce a missing (zeroed) sector +using a reference file, extract data from damaged files, decompress files, +and test integrity of files. Lziprecover can remove the damaged members from multimember files, for example multimember tar.lz archives. @@ -56,12 +60,21 @@ example multimember tar.lz archives. Lziprecover provides random access to the data in multimember files; it only decompresses the members containing the desired data. - Lziprecover facilitates the management of metadata stored as trailing -data in lzip files. - Lziprecover is not a replacement for regular backups, but a last line of defense for the case where the backups are also damaged. + Lziprecover is able to provide unique data recovery capabilities because +the lzip format is extraordinarily safe. The simple and safe design of the +file format complements the embedded error detection provided by the LZMA +data stream. Any distance larger than the dictionary size acts as a +forbidden symbol, allowing the decompressor to detect the approximate +position of errors, and leaving very little work for the check sequence +(CRC and data sizes) in the detection of errors. Lzip is usually able to +detect all possible bit flips in the compressed data without resorting to +the check sequence. It would be difficult to write an automatic recovery +tool like lziprecover for the gzip format. And, as far as I know, it has +never been written. + The lzip file format is designed for data sharing and long-term archiving, taking into account both data integrity and decoder availability: @@ -122,7 +135,7 @@ have been compressed. Decompressed is used to refer to data which have undergone the process of decompression. -File: lziprecover.info, Node: Invoking lziprecover, Next: Data safety, Prev: Introduction, Up: Top +File: lziprecover.info, Node: Invoking lziprecover, Next: File format, Prev: Introduction, Up: Top 2 Invoking lziprecover ********************** @@ -137,7 +150,7 @@ first time it appears in the command line. If no file names are specified, lziprecover decompresses from standard input to standard output. Remember to prepend './' to any file name beginning with a hyphen, or use '--'. - lziprecover supports the following options: *Note Argument syntax: +lziprecover supports the following options: *Note Argument syntax: (arg_parser)Argument syntax. '-h' @@ -171,6 +184,19 @@ to prepend './' to any file name beginning with a hyphen, or use '--'. filename.tlz becomes filename.tar.lz anyothername becomes anyothername.lz +'-b BYTES' +'--block-size=BYTES' + When creating fec files, make the FEC block size a multiple of BYTES, + which must be a multiple of 512 not larger than 1 GiB. + +'-B' +'--byte-repair' + Try to repair a FILE with small errors (up to one single-byte error + per member). If successful, a repaired copy is written to the file + FILE_fixed.lz. FILE is not modified at all. The exit status is 0 if + the file could be repaired, 2 otherwise. *Note Repairing one byte::, + for a complete description of the byte-repair mode. + '-c' '--stdout' Write decompressed data to standard output; keep input files @@ -237,8 +263,45 @@ to prepend './' to any file name beginning with a hyphen, or use '--'. '--force' Force overwrite of output files. +'-F create[N]|repair|test|list' +'--fec=create[N]|repair|test|list' + Create fec files, or repair or test files using previously created fec + files, or list the contents of fec files. The argument (create, repair, + test, or list) can be abbreviated even to a single letter. Option '-i' + is required to repair or test a file using a corrupt fec file, or to + list a corrupt fec file. *Note Fec files::. + + N is the number of FEC blocks to be created. The amount of FEC data to + be created may also be specified as a percentage from 0.003% to 100%, + or as a number of bytes followed by a 'B' (4096B, 16KiB, etc). If N is + not specified, it defaults to '8' (8 FEC blocks). (Because, when was + the last time you saw more than 8 bad sectors affecting the same file?) + + '--fec=create' writes the FEC data created to FILE.fec unless option + '-c' or '-o' is specified. If a fec file can't be created, lziprecover + exits immediately with error status 1 without trying to create the + rest of the files. + + '--fec=repair' and '--fec=test' read the FEC data from FILE.fec unless + '--fec-file' is specified. '--fec=repair' writes the repaired file to + FILE_fixed unless option '-c' or '-o' is specified. *Note File + names::. If a file fails to repair, lziprecover exits immediately with + error status 2 without repairing the rest of the files. + +'-0 .. -9' + FEC fragmentation level. Defaults to '-9'. Level '-0' is the fastest; + it creates FEC data using GF(2^8), maybe with large blocks. Levels + '-1' to '-9' use GF(2^8) or GF(2^16) as required, with increasing + amounts of smaller blocks. + +'--fec-file=FILE[/]' + When repairing or testing, read FEC data from FILE. If FILE ends with + a slash, it is interpreted as the name of a directory containing the + fec file(s). + '-i' '--ignore-errors' + Ignore non-fatal errors. Make '--decompress', '--test', and '--range-decompress' ignore format and data errors and continue decompressing the remaining members in the file; keep input files unchanged. For example, the commands @@ -251,6 +314,11 @@ to prepend './' to any file name beginning with a hyphen, or use '--'. errors. The exit status is set to 0 unless other errors are found (I/O errors, for example). + Make '--fec=repair' and '--fec=test' ignore errors in the fec file and + return with exit status 0 if the repaired/protected file passes the + test, even if corrupt packets or trailing garbage are found in the fec + file. Make '--fec=list' ignore errors in the fec files. + Make '--list', '--dump', '--remove', and '--strip' ignore format errors. The sizes of the members with errors (especially the last) may be wrong. @@ -287,29 +355,52 @@ to prepend './' to any file name beginning with a hyphen, or use '--'. produced, 2 otherwise. *Note Merging files::, for a complete description of the merge mode. -'-o FILE' -'--output=FILE' - Place the repaired output into FILE instead of into FILE_fixed.lz. If - splitting, the names of the files produced are in the form - 'rec01FILE', 'rec02FILE', etc. - - If '-c' has not been also specified, write the (de)compressed output - to FILE, automatically creating any missing parent directories; keep - input files unchanged. This option (or '-c') is needed when reading - from a named pipe (fifo) or from a device. '-o -' is equivalent to - '-c'. '-o' has no effect when testing or listing. +'-n N' +'--threads=N' + Set the maximum number of worker threads for '--fec=create', + overriding the system's default. Valid values range from 1 to "as many + as your system can support". If this option is not used, lziprecover + tries to detect the number of processors in the system and use it as + default value. 'lziprecover --help' shows the system's default value. + +'-o FILE[/]' +'--output=FILE[/]' + If repairing, place the repaired output into FILE instead of into + FILE_fixed.lz. If splitting, the names of the files produced are in + the form 'rec01FILE', 'rec02FILE', etc. + + If creating FEC data and '-c' has not been also specified, write the + FEC data to FILE. If FILE ends with a slash, it is interpreted as the + name of a directory where the fec file(s) will be written to. In this + case, the fec file names are composed by replacing the prefix + preceding the last slash of each file name specified in the command + line with FILE (or prepending FILE if the file name does not contain a + slash), and appending the extension '.fec'. + + Else, if '-c' has not been also specified, write the (de)compressed + output to FILE, automatically creating any missing parent directories; + keep input files unchanged. This option (or '-c') is needed when + reading from a named pipe (fifo) or from a device. '-o -' is + equivalent to '-c'. '-o' has no effect when testing or listing. '-q' '--quiet' Quiet operation. Suppress all messages. +'-r' +'--recursive' + When creating or reading fec files (but not when listing), for each + directory operand, read and process all files in that directory, + recursively. Follow symbolic links given in the command line, but skip + symbolic links that are encountered recursively. Ignore files with + extension '.fec', and files and directories named 'fec'. + '-R' -'--byte-repair' - Try to repair a FILE with small errors (up to one single-byte error - per member). If successful, a repaired copy is written to the file - FILE_fixed.lz. FILE is not modified at all. The exit status is 0 if - the file could be repaired, 2 otherwise. *Note Repairing one byte::, - for a complete description of the repair mode. +'--dereference-recursive' + When creating or reading fec files (but not when listing), for each + directory operand, read and process all files in that directory, + recursively, following all symbolic links. Ignore files with extension + '.fec', and files and directories named 'fec'. '-s' '--split' @@ -347,11 +438,12 @@ to prepend './' to any file name beginning with a hyphen, or use '--'. When decompressing or testing, further -v's (up to 4) increase the verbosity level, showing status, compression ratio, dictionary size, trailer contents (CRC, data size, member size), and up to 6 bytes of - trailing data (if any) both in hexadecimal and as a string of printable - ASCII characters. + trailing data (if any) both in hexadecimal and as a string of + printable ASCII characters. Two or more '-v' options show the progress of decompression. - In other modes, increasing verbosity levels show final status, progress - of operations, and extra information (for example, the failed areas). + In other modes, increasing verbosity levels show final status, + progress of operations, and extra information (for example, the failed + areas). '--dump=[MEMBER_LIST][:damaged][:empty][:tdata]' Dump the members listed, the damaged members (if any), the empty @@ -430,15 +522,16 @@ to prepend './' to any file name beginning with a hyphen, or use '--'. rest of the files. See '--dump' above for a description of the argument. -'--empty-error' - Exit with error status 2 if any empty member is found in the input - files. +'--ignore-empty' + When decompressing, testing, or listing, ignore empty members in + multimember files. By default lziprecover exits with error status 2 if + any empty member is found in a multimember file. -'--marking-error' - Exit with error status 2 if the first LZMA byte is non-zero in any - member of the input files. This may be caused by data corruption or by - deliberate insertion of tracking information in the file. Use - 'lziprecover --clear-marking' to clear any such non-zero bytes. +'--ignore-nonzero' + When decompressing or testing, ignore a nonzero first byte in the LZMA + stream. By default lziprecover exits with error status 2 if the first + LZMA byte is nonzero in any member of the input files. Use + 'lziprecover --nonzero-repair' to repair any such nonzero bytes. '--loose-trailing' When decompressing, testing, or listing, allow trailing data whose @@ -447,17 +540,13 @@ to prepend './' to any file name beginning with a hyphen, or use '--'. triggers a "corrupt header" error and the cause is not indeed a corrupt header. -'--clear-marking' - Set to zero the first LZMA byte of each member in the files specified. - At verbosity level 1 (-v), print the number of members cleared. The - date of each file modified is preserved if possible. This option - exists because the first byte of the LZMA stream is ignored by the - range decoder, and can therefore be (mis)used to store any value which - can then be used as a watermark to track the path of the compressed - payload. +'--nonzero-repair' + Repair in place a nonzero first LZMA byte in the files specified. With + '-v', print the number of members repaired. The date of each file + modified is preserved if possible. - Lziprecover also supports the following debug options (for experts): +lziprecover also supports the following debug options (for experts): '-E RANGE[,SECTOR_SIZE]' '--debug-reproduce=RANGE[,SECTOR_SIZE]' @@ -469,6 +558,24 @@ to prepend './' to any file name beginning with a hyphen, or use '--'. statistics of the number of sectors reproduced successfully. Exit with nonzero status only in case of fatal error. +'-F dcN' +'--fec=dcN' + Simulate FEC repair of all combinations of N zeroed block errors + spread along the whole input file. + +'-F dzRANGE[:RANGE]...' +'--fec=dzRANGE[:RANGE]...' + Simulate FEC repair of one or more zeroed block(s) in the input file + at the RANGEs given. The RANGEs may be unordered and overlapping. + Lziprecover sorts and joins them as needed. *Note range-format::, for a + description of RANGE. + +'-F dZSIZE[,DELTA]' +'--fec=dZSIZE[,DELTA]' + Simulate FEC repair of all possible zeroed blocks of size SIZE in the + input file. DELTA defaults to SIZE. Values of DELTA smaller than SIZE + result in overlapping blocks. + '-M' '--md5sum' Print to standard output the MD5 digests of the input FILES one per @@ -543,6 +650,10 @@ to prepend './' to any file name beginning with a hyphen, or use '--'. Load the compressed FILE into memory, set the byte at POSITION to VALUE, and then try to repair the byte error. *Note --byte-repair::. +'--gf16' + Forces the use of GF(2^16) when creating FEC blocks even if the number + of blocks fits in GF(2^8). + Numbers given as arguments to options may be expressed in decimal, hexadecimal, or octal (using the same syntax as integer constants in C++), @@ -551,6 +662,7 @@ and may be followed by a multiplier and an optional 'B' for "byte". Table of SI and binary prefixes (unit multipliers): Prefix Value | Prefix Value +---------------------------------------------------------------------- k kilobyte (10^3 = 1000) | Ki kibibyte (2^10 = 1024) M megabyte (10^6) | Mi mebibyte (2^20) G gigabyte (10^9) | Gi gibibyte (2^30) @@ -569,9 +681,83 @@ corrupt or invalid input file, 3 for an internal consistency error (e.g., bug) which caused lziprecover to panic. -File: lziprecover.info, Node: Data safety, Next: Repairing one byte, Prev: Invoking lziprecover, Up: Top +File: lziprecover.info, Node: File format, Next: Data safety, Prev: Invoking lziprecover, Up: Top -3 Protecting data from accidental loss +3 File format +************* + +Perfection is reached, not when there is no longer anything to add, but +when there is no longer anything to take away. +-- Antoine de Saint-Exupery + + In the diagram below, a box like this: + ++---+ +| | <-- the vertical bars might be missing ++---+ + + represents one byte; a box like this: + ++==============+ +| | ++==============+ + + represents a variable number of bytes. + +A lzip file consists of one or more independent "members" (compressed data +sets). The members simply appear one after another in the file, with no +additional information before, between, or after them. Each member can +encode in compressed form up to 16 EiB - 1 byte of uncompressed data. The +size of a multimember file is unlimited. Empty members (data size = 0) are +not allowed in multimember files. + + Each member has the following structure: + ++--+--+--+--+----+----+=============+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +| ID string | VN | DS | LZMA stream | CRC32 | Data size | Member size | ++--+--+--+--+----+----+=============+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + All multibyte values are stored in little endian order. + +'ID string (the "magic" bytes)' + A four byte string, identifying the lzip format, with the value "LZIP" + (0x4C, 0x5A, 0x49, 0x50). + +'VN (version number, 1 byte)' + Just in case something needs to be modified in the future. 1 for now. + +'DS (coded dictionary size, 1 byte)' + The dictionary size is calculated by taking a power of 2 (the base + size) and subtracting from it a fraction between 0/16 and 7/16 of the + base size. + Bits 4-0 contain the base 2 logarithm of the base size (12 to 29). + Bits 7-5 contain the numerator of the fraction (0 to 7) to subtract + from the base size to obtain the dictionary size. + Example: 0xD3 = 2^19 - 6 * 2^15 = 512 KiB - 6 * 32 KiB = 320 KiB + Valid values for dictionary size range from 4 KiB to 512 MiB. + +'LZMA stream' + The LZMA stream, finished by an "End Of Stream" marker. Uses default + values for encoder properties. *Note Stream format: (lzip)Stream + format, for a complete description. + +'CRC32 (4 bytes)' + Cyclic Redundancy Check (CRC) of the original uncompressed data. + +'Data size (8 bytes)' + Size of the original uncompressed data. + +'Member size (8 bytes)' + Total size of the member, including header and trailer. This field acts + as a distributed index, improves the checking of stream integrity, and + facilitates the safe recovery of undamaged members from multimember + files. Lzip limits the member size to 2 PiB to prevent the data size + field from overflowing. + + +File: lziprecover.info, Node: Data safety, Next: Fec files, Prev: File format, Up: Top + +4 Protecting data from accidental loss ************************************** It is a fact of life that sometimes data becomes corrupt. Software has @@ -583,26 +769,31 @@ formats, and the reason why a data recovery tool is sometimes needed. single-byte errors, multibyte errors (generally affecting a whole sector in a block device), and total device failure. + The two methods most effective to protect data from accidental loss are +backup copies and Forward Error Correction (FEC). Both methods can be used +simultaneously, and both are supported by lziprecover. + Lziprecover protects natively against single-byte errors as long as file integrity is checked frequently enough that a second single-byte error does not develop in the same member before the first one is repaired. *Note Repairing one byte::. - Lziprecover also protects against multibyte errors if at least one backup -copy of the file is made (*note Merging files::), or if the error is a -zeroed sector and the uncompressed data corresponding to the zeroed sector -are available (*note Reproducing one sector::). If you can choose between -merging and reproducing, try merging first because it is usually faster, -easier to use, and has a high probability of success. + Lziprecover protects against multibyte errors in 3 cases: if a fec file +is available (*note Fec files::), if at least one backup copy of the file is +available (*note Merging files::), or if the error is a zeroed sector and +the uncompressed data corresponding to the zeroed sector are available +(*note Reproducing one sector::). FEC is best. Else, if you can choose +between merging and reproducing, try merging first because it is usually +faster, easier to use, and has a high probability of success. Lziprecover can't help in case of device failure. The only remedy for total device failure is storing backup copies in separate media. - The extraordinary safety of the lzip format allows lziprecover to exploit -the redundance that occurrs naturally when making compressed backups. -Lziprecover can recover data that would not be recoverable from files -compressed in other formats. Let's see two examples of how much better is -lzip compared with gzip and bzip2 with respect to data safety: + The extraordinary safety of the lzip format allows lziprecover to use the +redundance that occurs naturally when making compressed backups. Lziprecover +can recover data that would not be recoverable from files compressed in +other formats. Let's see two examples of how much better is lzip compared +with gzip and bzip2 with respect to data safety: * Menu: @@ -612,7 +803,7 @@ lzip compared with gzip and bzip2 with respect to data safety: File: lziprecover.info, Node: Merging with a backup, Next: Reproducing a mailbox, Up: Data safety -3.1 Recovering a file using a damaged backup +4.1 Recovering a file using a damaged backup ============================================ Let's suppose that you made a compressed backup of your valuable scientific @@ -639,7 +830,7 @@ possible to recover a file with thousands of errors. File: lziprecover.info, Node: Reproducing a mailbox, Prev: Merging with a backup, Up: Data safety -3.2 Recovering new messages using an old backup +4.2 Recovering new messages using an old backup =============================================== Let's suppose that you make periodic backups of your email messages stored @@ -683,9 +874,357 @@ performance-of-reproduce::) is almost as high as that of merging two identical backups (*note performance-of-merge::). -File: lziprecover.info, Node: Repairing one byte, Next: Merging files, Prev: Data safety, Up: Top +File: lziprecover.info, Node: Fec files, Next: Repairing one byte, Prev: Data safety, Up: Top + +5 Forward Error Correction +************************** + +"Forward Error Correction" (FEC) is any way of protecting data from +corruption by creating redundant data that can be used later to repair +errors in the protected data. Lziprecover uses a Hilbert-based Reed-Solomon +code to create one fec file (with extension '.fec') for each file that +needs to be protected. The fec files created by lziprecover are +reproducible. + + Reed-Solomon is the most space-efficient Error Correcting Code (ECC) for +data stored in block devices. It creates redundant FEC blocks in such a way +that X FEC blocks allow the recuperation of any combination of up to X lost +data blocks. All the blocks (data and FEC) are of the same size, which in +fec files must be a multiple of 512 bytes. Reed-Solomon is not optimum for +corruption affecting random single bits in a file because each corrupt bit +invalidates the whole block containing it. But in block devices, scattered +bit flips should not happen. + + Usually, a corrupt file does not provide an indication of where the +corruption is located. Therefore, each fec file stores one or two arrays of +CRCs to detect the corrupt blocks in the protected file and mark them as +erasures (missing data blocks). Thus, a fec file creates its own Binary +Erasure Channel (BEC) for the protected file. + + Lziprecover's FEC algorithm can repair any kind of file, but its ability +to repair lzip files is greater than for other kinds of files. Lziprecover +can use the statistical properties of lzip data to repair a lzip file +rescued with ddrescue, even if the fec file is so damaged that it has lost +both CRC arrays. Lzip data helps to locate the corrupt parts of the file +even without a BEC. For this to work, at least one chksum packet header +must be intact to provide 'prodata_size', 'prodata_md5', and 'gf16'. + +* Menu: + +* How Reed-Solomon works:: It is basically an equation system +* Implementation details:: How lziprecover implements Reed-Solomon +* Creating fec files:: How to create fec files +* Testing with fec files:: How to test files using fec files +* Repairing with fec files:: How to repair files using fec files +* Fec file format:: Detailed format of the redundant FEC data + + +File: lziprecover.info, Node: How Reed-Solomon works, Next: Implementation details, Up: Fec files + +5.1 How Reed-Solomon works +========================== + +To illustrate how Reed-Solomon works on the BEC, we will use an example with +standard arithmetic on integers. Note that in lziprecover's FEC each +variable is a (potentialy large) block of data, not a single value. + + Given variables x, y, and z (the protected data) whose values are known, +an equation system can be created where the values of three FEC variables +p, q, and r can be computed from the values of x, y, and z: + + x + y + z = p (1) + x + 2y + 3z = q (2) + x + 3y + 2z = r (3) + + If we have that x = 1, y = 2, and z = 3, then p = 6, q = 14, and r = 13: + + 1 + 2 + 3 = 6 (1a) + 1 + 4 + 9 = 14 (2a) + 1 + 6 + 6 = 13 (3a) + + Now, if the values of x and y are lost because of data corruption, they +can be recomputed by using any two of the three equations above. For +example, if we replace the known values of z, p, q, and r in equations (1) +and (2) we get: + + x + y + 3 = 6 (1b) + x + 2y + 9 = 14 (2b) + + In order to solve the two equations above, we first reduce them by +subtracting the values of the known data variables from the values of the +FEC variables: + + x + y = 6 - 3 (1c) + x + 2y = 14 - 9 (2c) + + which gives the reduced FEC values P = 3 and Q = 5. + + Then we create a square matrix 'A' with the coefficients of x and y in +the equations above, and invert it. 'A' must be invertible and must not +have any zero element. We also create the column vector D with the missing +data variables x and y, and the column vector F with the reduced FEC values +P and Q: + + D = x A = 1 1 A^-1 = 2 -1 F = P + y 1 2 -1 1 Q + + Then we multiply the inverse matrix 'A^-1' by the column vector F to +obtain the values of x and y (D = A^-1 * F): + + x = 2P - Q (1d) + y = -P + Q (2d) + + which finally gives us the lost values x = 1 and y = 2: + + x = 2 * 3 - 5 (1e) + y = -3 + 5 (2e) -4 Repairing one byte + +File: lziprecover.info, Node: Implementation details, Next: Creating fec files, Prev: How Reed-Solomon works, Up: Fec files + +5.2 How lziprecover implements Reed-Solomon +=========================================== + +Lziprecover's implementation of Reed-Solomon can manage up to 128 data +blocks + 128 FEC blocks when using a Galois Field of size 256 (GF(2^8)), or +up to 32768 data blocks + 32768 FEC blocks when using a Galois Field of size +65536 (GF(2^16)). GF(2^8) is included because it is faster for files up to +about 1 MB. The number of FEC blocks is currently limited to 2048 because +of memory and time limits. Inverting a matrix for 32768 FEC blocks would +take a week and require 2 GiB of RAM. + + The file is repaired in memory. Therefore, enough virtual memory +(RAM + swap) to contain the protected file and the FEC data is required. +The file size is limited to less than 2 GiB on 32-bit systems. The repaired +file is checked with a MD5 digest. + + Lziprecover divides the input file in 1 to 32768 data blocks of the same +size, which ranges from 512 bytes to 128 TiB, for a total protected file +size of up to 4 EiB. It then uses a Hilbert matrix 'A' to create up to 2048 +FEC blocks of the same size as the data blocks. Lziprecover corrects errors +in the data blocks by first reducing the equation system to M equations +with M unknowns each, where M is the number of missing data blocks. Then it +multiplies the inverse of the relevant submatrix of 'A' by the vector of +results of the M equations to recompute the values of the missing data +blocks. + + Lziprecover implements GF(2^8) with polynomial 0x11D and GF(2^16) with +polynomial 0x1100B. + + A Hilbert matrix is defined as 'A[i][j] = 1 / (i + j + 1)' for i and j +>= 0. But as in a Galois Field addition is exclusive or, applying the +Hilbert definition produces a singular (non invertible) matrix. To avoid +this problem, lziprecover uses a Hilbert matrix starting at row +'gf_size / 2'. I.e., 'A[i][j] = 1 / (i + gf_size / 2 + j)' for +'0 <= i,j < gf_size / 2'. (gf_size is the size of the Galois Field). + + +File: lziprecover.info, Node: Creating fec files, Next: Testing with fec files, Prev: Implementation details, Up: Fec files + +5.3 How to create fec files +=========================== + +Example 1: Create the fec file 'archive.tar.lz.fec' and store it in the +same directory where 'archive.tar.lz' is. + + lziprecover -v -Fc archive.tar.lz + +Example 2: Create the fec file 'archive.tar.lz.fec' and store it in the +directory 'fec'. + + lziprecover -v -Fc -o fec/ archive.tar.lz + +Example 3: Create recursively one fec file for each file in the directory +'datadir' and store them in the tree under the directory 'fec'. + + lziprecover -v -r -Fc -o fec/ datadir + + +File: lziprecover.info, Node: Testing with fec files, Next: Repairing with fec files, Prev: Creating fec files, Up: Fec files + +5.4 How to test files using fec files +===================================== + +Example 1: Test the integrity of 'archive.tar.lz' using the fec file +'archive.tar.lz.fec' from the same directory. + + lziprecover -v -Ft archive.tar.lz + +Example 2: Test the integrity of the files 'foo.lz' and 'bar.lz' using the +corresponding fec files stored in the directory 'fec'. + + lziprecover -v -Ft --fec-file=fec/ foo.lz bar.lz + +Example 3: Test recursively the integrity of all the files in the directory +'datadir' using the fec files stored in the directory tree under the +directory 'fec'. + + lziprecover -v -r -Ft --fec-file=fec/ datadir + + +File: lziprecover.info, Node: Repairing with fec files, Next: Fec file format, Prev: Testing with fec files, Up: Fec files + +5.5 How to repair files using fec files +======================================= + +Example 1: Repair the file 'archive.tar.lz' using the fec file +'archive.tar.lz.fec' from the same directory. The repaired file is written +to 'archive_fixed.tar.lz' in the same directory. + + lziprecover -v -Fr archive.tar.lz + +Example 2: Repair the files 'foo.lz' and 'bar.lz' using the corresponding +fec files stored in the directory 'fec'. + + lziprecover -v -Fr --fec-file=fec/ foo.lz bar.lz + +Example 3: Repair recursively all the damaged files in the directory +'datadir' using the fec files stored in the directory tree under the +directory 'fec'. + + lziprecover -v -r -Fr --fec-file=fec/ datadir + + +File: lziprecover.info, Node: Fec file format, Prev: Repairing with fec files, Up: Fec files + +5.6 Fec file format +=================== + +A fec file consists of one chksum packet, one or more fec packets, and one +optional second chksum packet. The first chksum packet must be the first +packet in the file, but the second chksum packet does not need to be the +last packet in the file. The essential information is stored in the chksum +packet(s), while the potentially numerous fec packets are kept as simple as +possible: + ++=================+===============+=================+ +| Chksum packet | Fec packets | Chksum packet | ++=================+===============+=================+ + + All multibyte values are stored in little endian order except +'prodata_md5'. + + The 'fbs' (fec_block_size) field is coded as a little endian 16-bit +floating point unsigned integer with an 11-bit mantissa at bits 0-10 and a +5-bit exponent at bits 11-15. The mantissa is an integer between 0 and 2047. +The exponent is an integer between 9 and 40, stored with a bias of -9; the +exponent 9 is stored as 0, and 40 is stored as 31. Values are stored with +the largest mantissa and smallest exponent; 4096 is stored as m=8, e=0. This +encoding can store values from 0 bytes to 2047 TiB (2^51 - 2^40 bytes) with +a maximum resolution of 512 bytes, but 0 and the values beyond 128 TiB are +not used: + + 5 11 ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +| exp | mantissa | The 'fbs' (fec_block_size) field ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +15 11 10 0 + + The fec file format is 4-byte aligned for speed because FEC data are +created and decoded 4 bytes at a time. The 4-byte alignment has been +achieved by a careful design, without adding any padding bytes. + + The fec file format has an overhead of 8 bytes per protected data block, +plus 16 bytes per FEC block, plus 80 bytes. + +5.6.1 Chksum packet +------------------- + +A chksum packet contains one CRC for each of the N data blocks in the +protected file, and is structured as shown in the following table. All +lengths and offsets are in decimal: + +Field Name Offset Length (in bytes) +------------------------------------------ +magic 0 4 +version 4 1 +flags 5 1 +fbs 6 2 +prodata_size 8 8 +prodata_md5 16 16 +header_crc 32 4 +crc_array 36 4N +payload_crc 36 + 4N 4 + +'magic' + A four byte string identifying the chksum packet (and therefore the fec + file), with the value 0xB3, 0xA5, 0xB6, 0xAF. (The complement of + "LZIP"). + +'version' + Just in case something needs to be modified in the future. 0 for now. + +'flags' + Bit 0 (is_crc_c): crc_array contains CRC32 (0) or CRC32-C (1). + Bit 1 (gf16): Galois field is GF(2^8) (0) or GF(2^16) (1). + Bits 2-7: zero. + +'fbs (coded fec_block_size)' + Number of FEC bytes per block. It is a multiple of 512 bytes between + 512 bytes and 128 TiB. + +'prodata_size' + Size of the protected file. 1 byte to 4 EiB. + +'prodata_md5' + Md5sum of the protected file. Stored in big endian order. + +'header_crc' + CRC32 of the previous fields, including magic. + +'crc_array' + Array of N CRCs corresponding to the N blocks in which the protected + file is divided. N is 'ceil( prodata_size / fbs )'. The first chksum + packet contains an array of CRC32s, while the second chksum packet (if + present) contains an array of CRC32-Cs. + + For the expected thousands of bit flips caused by a zeroed sector, a + "symmetric" CRC like CRC32 is probably better than CRC32-C, which + detects all the errors with an odd number of bit flips at the expense + of a larger number of undetected errors with an even number of bit + flips. + +'payload_crc' + CRC32 of the crc_array. + +5.6.2 Fec packet +---------------- + +A fec packet contains one FEC block and is structured as shown in the +following table. All lengths and offsets are in decimal: + +Field Name Offset Length (in bytes) +------------------------------------------ +magic 0 4 +fbn 4 2 +fbs 6 2 +header_crc 8 4 +fec_block 12 fbs +payload_crc 12 + fbs 4 + +'magic' + A four byte string identifying the fec packet, with the value "\xB3FEC" + (0xB3, 0x46, 0x45, 0x43). + +'fbn (fec_block_number)' + Number of this FEC block. Required to compute the decode matrix. + +'fbs (coded fec_block_size)' + *Note fbs::. + +'header_crc' + CRC32 of the previous fields, including magic. + +'fec_block' + The FEC block. + +'payload_crc' + CRC32 of the fec_block. + + +File: lziprecover.info, Node: Repairing one byte, Next: Merging files, Prev: Fec files, Up: Top + +6 Repairing one byte ******************** Lziprecover can repair perfectly most files with small errors (up to one @@ -695,7 +1234,8 @@ bit to the original. This makes lzip files resistant to bit flip, one of the most common forms of data corruption. The file is repaired in memory. Therefore, enough virtual memory -(RAM + swap) to contain the largest damaged member is required. +(RAM + swap) to contain the largest damaged member is required. Member size +is limited to 2 GiB on 32-bit systems. The error may be located anywhere in the file except in the first 5 bytes of each member header or in the 'Member size' field of the trailer @@ -726,7 +1266,7 @@ repairs more efficiently the worst errors. File: lziprecover.info, Node: Merging files, Next: Reproducing one sector, Prev: Repairing one byte, Up: Top -5 Merging files +7 Merging files *************** If you have several copies of a file but all of them are too damaged to @@ -814,7 +1354,7 @@ correct file produced is saved in 'big_db_00001.lz'. File: lziprecover.info, Node: Reproducing one sector, Next: Tarlz, Prev: Merging files, Up: Top -6 Reproducing one sector +8 Reproducing one sector ************************ Lziprecover can recover a zeroed sector in a lzip file by concatenating the @@ -836,7 +1376,8 @@ reproduction can't be done if the zeroed sector overlaps with the first 15 bytes of a member, or if the zeroed sector is smaller than 8 bytes. The file is reproduced in memory. Therefore, enough virtual memory -(RAM + swap) to contain the damaged member is required. +(RAM + swap) to contain the damaged member is required. Member size is +limited to 2 GiB on 32-bit systems. To understand how it works, take any lzipped file, say 'foo.lz', decompress it (keeping the original), and try to reproduce an artificially @@ -889,7 +1430,7 @@ header, and that the archive can be reproduced. The tarlz format has minimum overhead. It uses basic ustar headers, and only adds extended pax headers when they are required. -6.1 Performance of '--reproduce' +8.1 Performance of '--reproduce' ================================ Reproduce mode is especially useful when recovering a corrupt backup (or a @@ -1000,7 +1541,7 @@ has been renamed. File: lziprecover.info, Node: Tarlz, Next: File names, Prev: Reproducing one sector, Up: Top -7 Options supporting the tar.lz format +9 Options supporting the tar.lz format ************************************** Tarlz is a massively parallel (multi-threaded) combined implementation of @@ -1021,8 +1562,7 @@ alignment between tar members and lzip members minimizes the amount of data lost in case of corruption. In this chapter we'll explain the ways in which lziprecover can recover and process multimember tar.lz archives. - -7.1 Recovering damaged multimember tar.lz archives +9.1 Recovering damaged multimember tar.lz archives ================================================== If you have several copies of the damaged archive, try merging them first @@ -1046,7 +1586,7 @@ one byte::. If the command below prints something like 'Copy of input file repaired successfully.' you are done and 'archive_fixed.tar.lz' now contains the recovered archive: - lziprecover -v -R archive.tar.lz + lziprecover -v --byte-repair archive.tar.lz If all the above fails, and the archive was created with tarlz, you may save the damaged members for later and then copy the good members to another @@ -1064,8 +1604,7 @@ possible from each damaged member in 'bad_members.tar.lz': cd tmp tarlz --keep-damaged -xvf ../bad_members.tar.lz - -7.2 Processing multimember tar.lz archives +9.2 Processing multimember tar.lz archives ========================================== Lziprecover is able to copy a list of members from a file to another. For @@ -1077,96 +1616,25 @@ the last member, which in an appendable tar.lz archive contains the end-of-file blocks. -File: lziprecover.info, Node: File names, Next: File format, Prev: Tarlz, Up: Top +File: lziprecover.info, Node: File names, Next: Trailing data, Prev: Tarlz, Up: Top -8 Names of the files produced by lziprecover -******************************************** +10 Names of the files produced by lziprecover +********************************************* The name of the fixed file produced by '--byte-repair' and '--merge' is made by appending the string '_fixed.lz' to the original file name. If the original file name ends with one of the extensions '.tar.lz', '.lz', or '.tlz', the string '_fixed' is inserted before the extension. - -File: lziprecover.info, Node: File format, Next: Trailing data, Prev: File names, Up: Top - -9 File format -************* - -Perfection is reached, not when there is no longer anything to add, but -when there is no longer anything to take away. --- Antoine de Saint-Exupery - - - In the diagram below, a box like this: - -+---+ -| | <-- the vertical bars might be missing -+---+ - - represents one byte; a box like this: - -+==============+ -| | -+==============+ - - represents a variable number of bytes. - - - A lzip file consists of one or more independent "members" (compressed -data sets). The members simply appear one after another in the file, with no -additional information before, between, or after them. Each member can -encode in compressed form up to 16 EiB - 1 byte of uncompressed data. The -size of a multimember file is unlimited. - - Each member has the following structure: - -+--+--+--+--+----+----+=============+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -| ID string | VN | DS | LZMA stream | CRC32 | Data size | Member size | -+--+--+--+--+----+----+=============+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - - All multibyte values are stored in little endian order. - -'ID string (the "magic" bytes)' - A four byte string, identifying the lzip format, with the value "LZIP" - (0x4C, 0x5A, 0x49, 0x50). - -'VN (version number, 1 byte)' - Just in case something needs to be modified in the future. 1 for now. - -'DS (coded dictionary size, 1 byte)' - The dictionary size is calculated by taking a power of 2 (the base - size) and subtracting from it a fraction between 0/16 and 7/16 of the - base size. - Bits 4-0 contain the base 2 logarithm of the base size (12 to 29). - Bits 7-5 contain the numerator of the fraction (0 to 7) to subtract - from the base size to obtain the dictionary size. - Example: 0xD3 = 2^19 - 6 * 2^15 = 512 KiB - 6 * 32 KiB = 320 KiB - Valid values for dictionary size range from 4 KiB to 512 MiB. - -'LZMA stream' - The LZMA stream, finished by an "End Of Stream" marker. Uses default - values for encoder properties. *Note Stream format: (lzip)Stream - format, for a complete description. - -'CRC32 (4 bytes)' - Cyclic Redundancy Check (CRC) of the original uncompressed data. - -'Data size (8 bytes)' - Size of the original uncompressed data. - -'Member size (8 bytes)' - Total size of the member, including header and trailer. This field acts - as a distributed index, improves the checking of stream integrity, and - facilitates the safe recovery of undamaged members from multimember - files. Lzip limits the member size to 2 PiB to prevent the data size - field from overflowing. - + The name of the fixed file produced by '--fec=repair' is made by +appending the string '_fixed' to the original file name. If the original +file name ends with one of the extensions '.tar.lz', '.lz', or '.tlz', the +string '_fixed' is inserted before the extension. -File: lziprecover.info, Node: Trailing data, Next: Examples, Prev: File format, Up: Top +File: lziprecover.info, Node: Trailing data, Next: Examples, Prev: File names, Up: Top -10 Extra data appended to the file +11 Extra data appended to the file ********************************** Sometimes extra data are found appended to a lzip file after the last @@ -1235,7 +1703,7 @@ guarantee that both file and hash have not been maliciously replaced). File: lziprecover.info, Node: Examples, Next: Unzcrash, Prev: Trailing data, Up: Top -11 A small tutorial with examples +12 A small tutorial with examples ********************************* Example 1: Extract all the files from archive 'foo.tar.lz'. @@ -1244,19 +1712,16 @@ Example 1: Extract all the files from archive 'foo.tar.lz'. or lziprecover -cd foo.tar.lz | tar -xf - - Example 2: Restore a regular file from its compressed version 'file.lz'. If the operation is successful, 'file.lz' is removed. lziprecover -d file.lz - Example 3: Check the integrity of the compressed file 'file.lz' and show status. lziprecover -tv file.lz - Example 4: The right way of concatenating the decompressed output of two or more compressed files. *Note Trailing data::. @@ -1269,29 +1734,25 @@ more compressed files. *Note Trailing data::. Or keeping the trailing data of the last file like this lziprecover --strip=empty file1.lz file2.lz file3.lz > file123.lz - Example 5: Decompress 'file.lz' partially until 10 KiB of decompressed data are produced. lziprecover -D 0,10KiB file.lz - Example 6: Decompress 'file.lz' partially from decompressed byte at offset 10000 to decompressed byte at offset 14999 (5000 bytes are produced). lziprecover -D 10000-15000 file.lz - Example 7: Repair a corrupt byte in the file 'file.lz'. (Indented lines are abridged diagnostic messages from lziprecover). - lziprecover -v -R file.lz + lziprecover -v --byte-repair file.lz Copy of input file repaired successfully. lziprecover -tv file_fixed.lz file_fixed.lz: ok mv file_fixed.lz file.lz - Example 8: Split the multimember file 'file.lz' and write each member in its own 'recXXXfile.lz' file. Then use 'lziprecover -t' to test the integrity of the resulting files. @@ -1302,7 +1763,7 @@ integrity of the resulting files. File: lziprecover.info, Node: Unzcrash, Next: Problems, Prev: Examples, Up: Top -12 Testing the robustness of decompressors +13 Testing the robustness of decompressors ****************************************** *Note --unzcrash::, for a faster way of testing the robustness of lzip. @@ -1358,7 +1819,7 @@ without being decompressed first. Use '--zcmp=false' to disable comparisons. The compressed FILE must not contain errors and the decompressor being tested must decompress it correctly for the comparisons to work. - unzcrash supports the following options: +unzcrash supports the following options: '-h' '--help' @@ -1379,6 +1840,7 @@ tested must decompress it correctly for the comparisons to work. 8 28 56 70 56 28 8 1 Examples of RANGE Tests errors of N-bits + ------------------------------------------- 1 1 1,2,3 1, 2, 3 2-4 2, 3, 4 @@ -1456,7 +1918,7 @@ bug) which caused unzcrash to panic. File: lziprecover.info, Node: Problems, Next: Concept index, Prev: Unzcrash, Up: Top -13 Reporting bugs +14 Reporting bugs ***************** There are probably bugs in lziprecover. There are certainly errors and @@ -1477,56 +1939,74 @@ Concept index * Menu: -* bugs: Problems. (line 6) -* data safety: Data safety. (line 6) -* examples: Examples. (line 6) -* file format: File format. (line 6) -* file names: File names. (line 6) -* getting help: Problems. (line 6) -* introduction: Introduction. (line 6) -* invoking: Invoking lziprecover. (line 6) -* merging files: Merging files. (line 6) -* merging with a backup: Merging with a backup. (line 6) -* options: Invoking lziprecover. (line 6) -* repairing one byte: Repairing one byte. (line 6) -* reproducing a mailbox: Reproducing a mailbox. (line 6) -* reproducing one sector: Reproducing one sector. (line 6) -* tarlz: Tarlz. (line 6) -* trailing data: Trailing data. (line 6) -* unzcrash: Unzcrash. (line 6) -* usage: Invoking lziprecover. (line 6) -* version: Invoking lziprecover. (line 6) +* bugs: Problems. (line 6) +* chksum packet: Fec file format. (line 46) +* data safety: Data safety. (line 6) +* examples: Examples. (line 6) +* fec create: Creating fec files. (line 6) +* fec file format: Fec file format. (line 6) +* fec packet: Fec file format. (line 106) +* fec repair: Repairing with fec files. + (line 6) +* fec test: Testing with fec files. (line 6) +* file format: File format. (line 6) +* file names: File names. (line 6) +* forward error correction: Fec files. (line 6) +* getting help: Problems. (line 6) +* introduction: Introduction. (line 6) +* invoking: Invoking lziprecover. (line 6) +* merging files: Merging files. (line 6) +* merging with a backup: Merging with a backup. (line 6) +* options: Invoking lziprecover. (line 6) +* Reed-Solomon details: Implementation details. (line 6) +* Reed-Solomon tutorial: How Reed-Solomon works. (line 6) +* repairing one byte: Repairing one byte. (line 6) +* reproducing a mailbox: Reproducing a mailbox. (line 6) +* reproducing one sector: Reproducing one sector. (line 6) +* tarlz: Tarlz. (line 6) +* trailing data: Trailing data. (line 6) +* unzcrash: Unzcrash. (line 6) +* usage: Invoking lziprecover. (line 6) +* version: Invoking lziprecover. (line 6) Tag Table: Node: Top226 -Node: Introduction1406 -Node: Invoking lziprecover5412 -Ref: --trailing-error6359 -Ref: range-format8791 -Ref: --reproduce9126 -Ref: --byte-repair13411 -Ref: --unzcrash23209 -Node: Data safety27459 -Node: Merging with a backup29443 -Node: Reproducing a mailbox30706 -Node: Repairing one byte33160 -Node: Merging files35220 -Ref: performance-of-merge36399 -Ref: ddrescue-example38008 -Node: Reproducing one sector39295 -Ref: performance-of-reproduce43181 -Ref: ddrescue-example245855 -Node: Tarlz48275 -Node: File names51933 -Node: File format52395 -Node: Trailing data55082 -Node: Examples58397 -Ref: concat-example58972 -Node: Unzcrash60364 -Node: Problems66704 -Node: Concept index67256 +Node: Introduction1463 +Node: Invoking lziprecover6223 +Ref: --trailing-error7167 +Ref: --byte-repair8261 +Ref: range-format10138 +Ref: --reproduce10473 +Ref: --unzcrash28457 +Node: File format32896 +Node: Data safety35653 +Node: Merging with a backup37899 +Node: Reproducing a mailbox39162 +Node: Fec files41616 +Node: How Reed-Solomon works43945 +Node: Implementation details46119 +Node: Creating fec files48188 +Node: Testing with fec files48852 +Node: Repairing with fec files49619 +Node: Fec file format50437 +Ref: fbs53308 +Node: Repairing one byte55099 +Node: Merging files57208 +Ref: performance-of-merge58387 +Ref: ddrescue-example59996 +Node: Reproducing one sector61283 +Ref: performance-of-reproduce65220 +Ref: ddrescue-example267894 +Node: Tarlz70314 +Node: File names73981 +Node: Trailing data74714 +Node: Examples78028 +Ref: concat-example78600 +Node: Unzcrash79999 +Node: Problems86385 +Node: Concept index86937 End Tag Table diff --git a/doc/lziprecover.texi b/doc/lziprecover.texi index 0d32d9d..41f3641 100644 --- a/doc/lziprecover.texi +++ b/doc/lziprecover.texi @@ -6,8 +6,8 @@ @finalout @c %**end of header -@set UPDATED 20 January 2024 -@set VERSION 1.24 +@set UPDATED 1 October 2024 +@set VERSION 1.25-pre1 @dircategory Compression @direntry @@ -38,13 +38,14 @@ This manual is for Lziprecover (version @value{VERSION}, @value{UPDATED}). @menu * Introduction:: Purpose and features of lziprecover * Invoking lziprecover:: Command-line interface +* File format:: Detailed format of the compressed file * Data safety:: Protecting data from accidental loss +* Fec files:: Forward Error Correction * Repairing one byte:: Fixing bit flips and similar errors * Merging files:: Fixing several damaged copies * Reproducing one sector:: Fixing a missing (zeroed) sector * Tarlz:: Options supporting the tar.lz format * File names:: Names of the files produced by lziprecover -* File format:: Detailed format of the compressed file * Trailing data:: Extra data appended to the file * Examples:: A small tutorial with examples * Unzcrash:: Testing the robustness of decompressors @@ -66,11 +67,14 @@ distribute, and modify it. @uref{http://www.nongnu.org/lzip/lziprecover.html,,Lziprecover} is a data recovery tool and decompressor for files in the lzip -compressed data format (.lz). Lziprecover is able to repair slightly damaged -files (up to one single-byte error per member), produce a correct file by -merging the good parts of two or more damaged copies, reproduce a missing -(zeroed) sector using a reference file, extract data from damaged files, -decompress files, and test integrity of files. +compressed data format (.lz). Lziprecover also provides Forward Error +Correction (FEC) able to repair any kind of file. + +Lziprecover is able to repair slightly damaged lzip files (up to one +single-byte error per member), produce a correct file by merging the good +parts of two or more damaged copies, reproduce a missing (zeroed) sector +using a reference file, extract data from damaged files, decompress files, +and test integrity of files. Lziprecover can remove the damaged members from multimember files, for example multimember tar.lz archives. @@ -78,12 +82,21 @@ example multimember tar.lz archives. Lziprecover provides random access to the data in multimember files; it only decompresses the members containing the desired data. -Lziprecover facilitates the management of metadata stored as trailing data -in lzip files. - Lziprecover is not a replacement for regular backups, but a last line of defense for the case where the backups are also damaged. +Lziprecover is able to provide unique data recovery capabilities because the +lzip format is extraordinarily safe. The simple and safe design of the file +format complements the embedded error detection provided by the LZMA data +stream. Any distance larger than the dictionary size acts as a forbidden +symbol, allowing the decompressor to detect the approximate position of +errors, and leaving very little work for the check sequence (CRC and data +sizes) in the detection of errors. Lzip is usually able to detect all +possible bit flips in the compressed data without resorting to the check +sequence. It would be difficult to write an automatic recovery tool like +lziprecover for the gzip format. And, as far as I know, it has never been +written. + The lzip file format is designed for data sharing and long-term archiving, taking into account both data integrity and decoder availability: @@ -172,6 +185,7 @@ names are specified, lziprecover decompresses from standard input to standard output. Remember to prepend @file{./} to any file name beginning with a hyphen, or use @samp{--}. +@noindent lziprecover supports the following @uref{http://www.nongnu.org/arg-parser/manual/arg_parser_manual.html#Argument-syntax,,options}: @ifnothtml @@ -213,6 +227,20 @@ lzma-alone file as follows: @item anyothername @tab becomes @tab anyothername.lz @end multitable +@item -b @var{bytes} +@itemx --block-size=@var{bytes} +When creating fec files, make the FEC block size a multiple of @var{bytes}, +which must be a multiple of 512 not larger than @w{1 GiB}. + +@anchor{--byte-repair} +@item -B +@itemx --byte-repair +Try to repair a @var{file} with small errors (up to one single-byte error +per member). If successful, a repaired copy is written to the file +@var{file}_fixed.lz. @var{file} is not modified at all. The exit status is 0 +if the file could be repaired, 2 otherwise. @xref{Repairing one byte}, for a +complete description of the byte-repair mode. + @item -c @itemx --stdout Write decompressed data to standard output; keep input files unchanged. This @@ -280,20 +308,63 @@ sector, plus some context data before and after them. @itemx --force Force overwrite of output files. +@item -F create[@var{n}]|repair|test|list +@itemx --fec=create[@var{n}]|repair|test|list +Create fec files, or repair or test files using previously created fec +files, or list the contents of fec files. The argument (create, repair, +test, or list) can be abbreviated even to a single letter. Option +@option{-i} is required to repair or test a file using a corrupt fec file, +or to list a corrupt fec file. @xref{Fec files}. + +@var{n} is the number of FEC blocks to be created. The amount of FEC data to +be created may also be specified as a percentage from 0.003% to 100%, or as +a number of bytes followed by a @samp{B} (4096B, 16KiB, etc). If @var{n} is +not specified, it defaults to @samp{8} (8 FEC blocks). (Because, when was +the last time you saw more than 8 bad sectors affecting the same file?) + +@option{--fec=create} writes the FEC data created to @var{file}.fec unless +option @option{-c} or @option{-o} is specified. If a fec file can't be +created, lziprecover exits immediately with error status 1 without trying to +create the rest of the files. + +@option{--fec=repair} and @option{--fec=test} read the FEC data from +@var{file}.fec unless @option{--fec-file} is specified. @option{--fec=repair} +writes the repaired file to @var{file}_fixed unless option @option{-c} or +@option{-o} is specified. @xref{File names}. If a file fails to repair, +lziprecover exits immediately with error status 2 without repairing the rest +of the files. + +@item -0 .. -9 +FEC fragmentation level. Defaults to @option{-9}. Level @option{-0} is the +fastest; it creates FEC data using GF(2^8), maybe with large blocks. Levels +@option{-1} to @option{-9} use GF(2^8) or GF(2^16) as required, with +increasing amounts of smaller blocks. + +@item --fec-file=@var{file}[/] +When repairing or testing, read FEC data from @var{file}. If @var{file} ends +with a slash, it is interpreted as the name of a directory containing the +fec file(s). + @item -i @itemx --ignore-errors +Ignore non-fatal errors.@* Make @option{--decompress}, @option{--test}, and @option{--range-decompress} ignore format and data errors and continue decompressing the remaining members in the file; keep input files unchanged. For example, the commands @w{@samp{lziprecover -cd -i file.lz > file}} or @w{@samp{lziprecover -D0 -i file.lz > file}} decompress all the recoverable -data in all members of @samp{file.lz} without having to split it first. The +data in all members of @file{file.lz} without having to split it first. The @w{@samp{-cd -i}} method resyncs to the next member header after each error, and is immune to some format errors that make @w{@samp{-D0 -i}} fail. The range decompressed may be smaller than the range requested, because of the errors. The exit status is set to 0 unless other errors are found (I/O errors, for example). +Make @option{--fec=repair} and @option{--fec=test} ignore errors in the fec +file and return with exit status 0 if the repaired/protected file passes the +test, even if corrupt packets or trailing garbage are found in the fec file. +Make @option{--fec=list} ignore errors in the fec files. + Make @option{--list}, @option{--dump}, @option{--remove}, and @option{--strip} ignore format errors. The sizes of the members with errors (especially the last) may be wrong. @@ -328,30 +399,52 @@ damaged copies. If successful, a repaired copy is written to the file produced, 2 otherwise. @xref{Merging files}, for a complete description of the merge mode. -@item -o @var{file} -@itemx --output=@var{file} -Place the repaired output into @var{file} instead of into +@item -n @var{n} +@itemx --threads=@var{n} +Set the maximum number of worker threads for @option{--fec=create}, +overriding the system's default. Valid values range from 1 to "as many as +your system can support". If this option is not used, lziprecover tries to +detect the number of processors in the system and use it as default value. +@w{@samp{lziprecover --help}} shows the system's default value. + +@item -o @var{file}[/] +@itemx --output=@var{file}[/] +If repairing, place the repaired output into @var{file} instead of into @var{file}_fixed.lz. If splitting, the names of the files produced are in -the form @samp{rec01@var{file}}, @samp{rec02@var{file}}, etc. - -If @option{-c} has not been also specified, write the (de)compressed output -to @var{file}, automatically creating any missing parent directories; keep -input files unchanged. This option (or @option{-c}) is needed when reading -from a named pipe (fifo) or from a device. @w{@option{-o -}} is equivalent -to @option{-c}. @option{-o} has no effect when testing or listing. +the form @file{rec01@var{file}}, @file{rec02@var{file}}, etc. + +If creating FEC data and @option{-c} has not been also specified, write the +FEC data to @var{file}. If @var{file} ends with a slash, it is interpreted +as the name of a directory where the fec file(s) will be written to. In this +case, the fec file names are composed by replacing the prefix preceding the +last slash of each file name specified in the command line with @var{file} +(or prepending @var{file} if the file name does not contain a slash), and +appending the extension @file{.fec}. + +Else, if @option{-c} has not been also specified, write the (de)compressed +output to @var{file}, automatically creating any missing parent directories; +keep input files unchanged. This option (or @option{-c}) is needed when +reading from a named pipe (fifo) or from a device. @w{@option{-o -}} is +equivalent to @option{-c}. @option{-o} has no effect when testing or listing. @item -q @itemx --quiet Quiet operation. Suppress all messages. -@anchor{--byte-repair} +@item -r +@itemx --recursive +When creating or reading fec files (but not when listing), for each directory +operand, read and process all files in that directory, recursively. Follow +symbolic links given in the command line, but skip symbolic links that are +encountered recursively. Ignore files with extension @file{.fec}, and files +and directories named @file{fec}. + @item -R -@itemx --byte-repair -Try to repair a @var{file} with small errors (up to one single-byte error -per member). If successful, a repaired copy is written to the file -@var{file}_fixed.lz. @var{file} is not modified at all. The exit status is 0 -if the file could be repaired, 2 otherwise. @xref{Repairing one byte}, for a -complete description of the repair mode. +@itemx --dereference-recursive +When creating or reading fec files (but not when listing), for each directory +operand, read and process all files in that directory, recursively, +following all symbolic links. Ignore files with extension @file{.fec}, and +files and directories named @file{fec}. @item -s @itemx --split @@ -365,8 +458,8 @@ members with corrupt headers or trailers. If other lziprecover functions fail to work on a multimember @var{file} because of damage in headers or trailers, try to split @var{file} and then work on each member individually. -The names of the files produced are in the form @samp{rec01@var{file}}, -@samp{rec02@var{file}}, etc, and are designed so that the use of wildcards +The names of the files produced are in the form @file{rec01@var{file}}, +@file{rec02@var{file}}, etc, and are designed so that the use of wildcards in subsequent processing, for example, @w{@samp{lziprecover -cd rec*@var{file} > recovered_data}}, processes the files in the correct order. The number of digits used in the names varies @@ -385,14 +478,13 @@ files. @item -v @itemx --verbose Verbose mode.@* -When decompressing or testing, further -v's (up to 4) increase the -verbosity level, showing status, compression ratio, dictionary size, -trailer contents (CRC, data size, member size), and up to 6 bytes of -trailing data (if any) both in hexadecimal and as a string of printable -ASCII characters.@* +When decompressing or testing, further -v's (up to 4) increase the verbosity +level, showing status, compression ratio, dictionary size, trailer contents +(CRC, data size, member size), and up to 6 bytes of trailing data (if any) +both in hexadecimal and as a string of printable ASCII characters.@* Two or more @option{-v} options show the progress of decompression.@* -In other modes, increasing verbosity levels show final status, progress -of operations, and extra information (for example, the failed areas). +In other modes, increasing verbosity levels show final status, progress of +operations, and extra information (for example, the failed areas). @item --dump=[@var{member_list}][:damaged][:empty][:tdata] Dump the members listed, the damaged members (if any), the empty members (if @@ -467,14 +559,16 @@ the rest of the files. If a file fails to copy, lziprecover exits immediately without processing the rest of the files. See @option{--dump} above for a description of the argument. -@item --empty-error -Exit with error status 2 if any empty member is found in the input files. +@item --ignore-empty +When decompressing, testing, or listing, ignore empty members in multimember +files. By default lziprecover exits with error status 2 if any empty member +is found in a multimember file. -@item --marking-error -Exit with error status 2 if the first LZMA byte is non-zero in any member of -the input files. This may be caused by data corruption or by deliberate -insertion of tracking information in the file. Use -@w{@samp{lziprecover --clear-marking}} to clear any such non-zero bytes. +@item --ignore-nonzero +When decompressing or testing, ignore a nonzero first byte in the LZMA +stream. By default lziprecover exits with error status 2 if the first LZMA +byte is nonzero in any member of the input files. +Use @w{@samp{lziprecover --nonzero-repair}} to repair any such nonzero bytes. @item --loose-trailing When decompressing, testing, or listing, allow trailing data whose first @@ -482,17 +576,15 @@ bytes are so similar to the magic bytes of a lzip header that they can be confused with a corrupt header. Use this option if a file triggers a "corrupt header" error and the cause is not indeed a corrupt header. -@item --clear-marking -Set to zero the first LZMA byte of each member in the files specified. At -verbosity level 1 (-v), print the number of members cleared. The date of -each file modified is preserved if possible. This option exists because the -first byte of the LZMA stream is ignored by the range decoder, and can -therefore be (mis)used to store any value which can then be used as a -watermark to track the path of the compressed payload. +@item --nonzero-repair +Repair in place a nonzero first LZMA byte in the files specified. With +@option{-v}, print the number of members repaired. The date of each file +modified is preserved if possible. @end table -Lziprecover also supports the following debug options (for experts): +@noindent +lziprecover also supports the following debug options (for experts): @table @code @item -E @var{range}[,@var{sector_size}] @@ -505,6 +597,24 @@ sequence and try to reproduce the file, printing to standard output final statistics of the number of sectors reproduced successfully. Exit with nonzero status only in case of fatal error. +@item -F dc@var{n} +@itemx --fec=dc@var{n} +Simulate FEC repair of all combinations of @var{n} zeroed block errors +spread along the whole input file. + +@item -F dz@var{range}[:@var{range}]... +@itemx --fec=dz@var{range}[:@var{range}]... +Simulate FEC repair of one or more zeroed block(s) in the input file at the +@var{range}s given. The @var{range}s may be unordered and overlapping. +Lziprecover sorts and joins them as needed. @xref{range-format}, for a +description of @var{range}. + +@item -F dZ@var{size}[,@var{delta}] +@itemx --fec=dZ@var{size}[,@var{delta}] +Simulate FEC repair of all possible zeroed blocks of size @var{size} in the +input file. @var{delta} defaults to @var{size}. Values of @var{delta} +smaller than @var{size} result in overlapping blocks. + @item -M @itemx --md5sum Print to standard output the MD5 digests of the input @var{files} one per @@ -580,6 +690,10 @@ description of @var{range}. Load the compressed @var{file} into memory, set the byte at @var{position} to @var{value}, and then try to repair the byte error. @xref{--byte-repair}. +@item --gf16 +Forces the use of GF(2^16) when creating FEC blocks even if the number of +blocks fits in GF(2^8). + @end table Numbers given as arguments to options may be expressed in decimal, @@ -589,7 +703,7 @@ and may be followed by a multiplier and an optional @samp{B} for "byte". Table of SI and binary prefixes (unit multipliers): @multitable {Prefix} {kilobyte (10^3 = 1000)} {|} {Prefix} {kibibyte (2^10 = 1024)} -@item Prefix @tab Value @tab | @tab Prefix @tab Value +@headitem Prefix @tab Value @tab | @tab Prefix @tab Value @item k @tab kilobyte (10^3 = 1000) @tab | @tab Ki @tab kibibyte (2^10 = 1024) @item M @tab megabyte (10^6) @tab | @tab Mi @tab mebibyte (2^20) @item G @tab gigabyte (10^9) @tab | @tab Gi @tab gibibyte (2^30) @@ -609,6 +723,94 @@ indicate a corrupt or invalid input file, 3 for an internal consistency error (e.g., bug) which caused lziprecover to panic. +@node File format +@chapter File format +@cindex file format + +Perfection is reached, not when there is no longer anything to add, but +when there is no longer anything to take away.@* +--- Antoine de Saint-Exupery + +In the diagram below, a box like this: + +@verbatim ++---+ +| | <-- the vertical bars might be missing ++---+ +@end verbatim + +represents one byte; a box like this: + +@verbatim ++==============+ +| | ++==============+ +@end verbatim + +represents a variable number of bytes. + +@noindent +A lzip file consists of one or more independent "members" (compressed data +sets). The members simply appear one after another in the file, with no +additional information before, between, or after them. Each member can +encode in compressed form up to @w{16 EiB - 1 byte} of uncompressed data. +The size of a multimember file is unlimited. Empty members (data size = 0) +are not allowed in multimember files. + +Each member has the following structure: + +@verbatim ++--+--+--+--+----+----+=============+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +| ID string | VN | DS | LZMA stream | CRC32 | Data size | Member size | ++--+--+--+--+----+----+=============+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +@end verbatim + +All multibyte values are stored in little endian order. + +@table @samp +@item ID string (the "magic" bytes) +A four byte string, identifying the lzip format, with the value "LZIP" +(0x4C, 0x5A, 0x49, 0x50). + +@item VN (version number, 1 byte) +Just in case something needs to be modified in the future. 1 for now. + +@item DS (coded dictionary size, 1 byte) +The dictionary size is calculated by taking a power of 2 (the base size) +and subtracting from it a fraction between 0/16 and 7/16 of the base size.@* +Bits 4-0 contain the base 2 logarithm of the base size (12 to 29).@* +Bits 7-5 contain the numerator of the fraction (0 to 7) to subtract +from the base size to obtain the dictionary size.@* +Example: 0xD3 = 2^19 - 6 * 2^15 = 512 KiB - 6 * 32 KiB = 320 KiB@* +Valid values for dictionary size range from 4 KiB to 512 MiB. + +@item LZMA stream +The LZMA stream, finished by an "End Of Stream" marker. Uses default values +for encoder properties. +@ifnothtml +@xref{Stream format,,,lzip}, +@end ifnothtml +@ifhtml +See +@uref{http://www.nongnu.org/lzip/manual/lzip_manual.html#Stream-format,,Stream format} +@end ifhtml +for a complete description. + +@item CRC32 (4 bytes) +Cyclic Redundancy Check (CRC) of the original uncompressed data. + +@item Data size (8 bytes) +Size of the original uncompressed data. + +@item Member size (8 bytes) +Total size of the member, including header and trailer. This field acts +as a distributed index, improves the checking of stream integrity, and +facilitates the safe recovery of undamaged members from multimember files. +Lzip limits the member size to @w{2 PiB} to prevent the data size field from +overflowing. +@end table + + @node Data safety @chapter Protecting data from accidental loss @cindex data safety @@ -622,26 +824,31 @@ There are 3 main types of data corruption that may cause data loss: single-byte errors, multibyte errors (generally affecting a whole sector in a block device), and total device failure. +The two methods most effective to protect data from accidental loss are +backup copies and Forward Error Correction (FEC). Both methods can be used +simultaneously, and both are supported by lziprecover. + Lziprecover protects natively against single-byte errors as long as file integrity is checked frequently enough that a second single-byte error does not develop in the same member before the first one is repaired. @xref{Repairing one byte}. -Lziprecover also protects against multibyte errors if at least one backup -copy of the file is made (@pxref{Merging files}), or if the error is a -zeroed sector and the uncompressed data corresponding to the zeroed sector -are available (@pxref{Reproducing one sector}). If you can choose between -merging and reproducing, try merging first because it is usually faster, -easier to use, and has a high probability of success. +Lziprecover protects against multibyte errors in 3 cases: if a fec file is +available (@pxref{Fec files}), if at least one backup copy of the file is +available (@pxref{Merging files}), or if the error is a zeroed sector and +the uncompressed data corresponding to the zeroed sector are available +(@pxref{Reproducing one sector}). FEC is best. Else, if you can choose +between merging and reproducing, try merging first because it is usually +faster, easier to use, and has a high probability of success. Lziprecover can't help in case of device failure. The only remedy for total device failure is storing backup copies in separate media. -The extraordinary safety of the lzip format allows lziprecover to exploit -the redundance that occurrs naturally when making compressed backups. -Lziprecover can recover data that would not be recoverable from files -compressed in other formats. Let's see two examples of how much better is -lzip compared with gzip and bzip2 with respect to data safety: +The extraordinary safety of the lzip format allows lziprecover to use the +redundance that occurs naturally when making compressed backups. Lziprecover +can recover data that would not be recoverable from files compressed in +other formats. Let's see two examples of how much better is lzip compared +with gzip and bzip2 with respect to data safety: @menu * Merging with a backup:: Recovering a file using a damaged backup @@ -721,6 +928,398 @@ reproduce. The probability of reproducing a mailbox identical backups (@pxref{performance-of-merge}). +@node Fec files +@chapter Forward Error Correction +@cindex forward error correction + +"Forward Error Correction" (FEC) is any way of protecting data from +corruption by creating redundant data that can be used later to repair +errors in the protected data. Lziprecover uses a Hilbert-based Reed-Solomon +code to create one fec file (with extension @file{.fec}) for each file that +needs to be protected. The fec files created by lziprecover are +reproducible. + +Reed-Solomon is the most space-efficient Error Correcting Code (ECC) for +data stored in block devices. It creates redundant FEC blocks in such a way +that X FEC blocks allow the recuperation of any combination of up to X lost +data blocks. All the blocks (data and FEC) are of the same size, which in +fec files must be a multiple of 512 bytes. Reed-Solomon is not optimum for +corruption affecting random single bits in a file because each corrupt bit +invalidates the whole block containing it. But in block devices, scattered +bit flips should not happen. + +Usually, a corrupt file does not provide an indication of where the +corruption is located. Therefore, each fec file stores one or two arrays of +CRCs to detect the corrupt blocks in the protected file and mark them as +erasures (missing data blocks). Thus, a fec file creates its own Binary +Erasure Channel (BEC) for the protected file. + +Lziprecover's FEC algorithm can repair any kind of file, but its ability to +repair lzip files is greater than for other kinds of files. Lziprecover can +use the statistical properties of lzip data to repair a lzip file rescued +with ddrescue, even if the fec file is so damaged that it has lost both CRC +arrays. Lzip data helps to locate the corrupt parts of the file even without +a BEC. For this to work, at least one chksum packet header must be intact to +provide @samp{prodata_size}, @samp{prodata_md5}, and @samp{gf16}. + +@menu +* How Reed-Solomon works:: It is basically an equation system +* Implementation details:: How lziprecover implements Reed-Solomon +* Creating fec files:: How to create fec files +* Testing with fec files:: How to test files using fec files +* Repairing with fec files:: How to repair files using fec files +* Fec file format:: Detailed format of the redundant FEC data +@end menu + + +@node How Reed-Solomon works +@section How Reed-Solomon works +@cindex Reed-Solomon tutorial + +To illustrate how Reed-Solomon works on the BEC, we will use an example with +standard arithmetic on integers. Note that in lziprecover's FEC each +variable is a (potentialy large) block of data, not a single value. + +Given variables x, y, and z (the protected data) whose values are known, an +equation system can be created where the values of three FEC variables p, q, +and r can be computed from the values of x, y, and z: + +@example +x + y + z = p (1) +x + 2y + 3z = q (2) +x + 3y + 2z = r (3) +@end example + +If we have that x = 1, y = 2, and z = 3, then p = 6, q = 14, and r = 13: + +@example +1 + 2 + 3 = 6 (1a) +1 + 4 + 9 = 14 (2a) +1 + 6 + 6 = 13 (3a) +@end example + +Now, if the values of x and y are lost because of data corruption, they can +be recomputed by using any two of the three equations above. For example, if +we replace the known values of z, p, q, and r in equations (1) and (2) we +get: + +@example +x + y + 3 = 6 (1b) +x + 2y + 9 = 14 (2b) +@end example + +In order to solve the two equations above, we first reduce them by +subtracting the values of the known data variables from the values of the +FEC variables: + +@example +x + y = 6 - 3 (1c) +x + 2y = 14 - 9 (2c) +@end example + +which gives the reduced FEC values P = 3 and Q = 5. + +Then we create a square matrix @samp{A} with the coefficients of x and y in +the equations above, and invert it. @samp{A} must be invertible and must not +have any zero element. We also create the column vector D with the missing +data variables x and y, and the column vector F with the reduced FEC values +P and Q: + +@example +D = x A = 1 1 A^-1 = 2 -1 F = P + y 1 2 -1 1 Q +@end example + +Then we multiply the inverse matrix @samp{A^-1} by the column vector F to +obtain the values of x and y (D = A^-1 * F): + +@example +x = 2P - Q (1d) +y = -P + Q (2d) +@end example + +which finally gives us the lost values x = 1 and y = 2: + +@example +x = 2 * 3 - 5 (1e) +y = -3 + 5 (2e) +@end example + + +@node Implementation details +@section How lziprecover implements Reed-Solomon +@cindex Reed-Solomon details + +Lziprecover's implementation of Reed-Solomon can manage up to 128 data +blocks + 128 FEC blocks when using a Galois Field of size 256 (GF(2^8)), or +up to 32768 data blocks + 32768 FEC blocks when using a Galois Field of size +65536 (GF(2^16)). GF(2^8) is included because it is faster for files up to +about @w{1 MB}. The number of FEC blocks is currently limited to 2048 +because of memory and time limits. Inverting a matrix for 32768 FEC blocks +would take a week and require @w{2 GiB} of RAM. + +The file is repaired in memory. Therefore, enough virtual memory +@w{(RAM + swap)} to contain the protected file and the FEC data is required. +The file size is limited to less than @w{2 GiB} on 32-bit systems. The +repaired file is checked with a MD5 digest. + +Lziprecover divides the input file in 1 to 32768 data blocks of the same +size, which ranges from 512 bytes to @w{128 TiB}, for a total protected file +size of up to @w{4 EiB}. It then uses a Hilbert matrix @samp{A} to create up +to 2048 FEC blocks of the same size as the data blocks. Lziprecover corrects +errors in the data blocks by first reducing the equation system to M +equations with M unknowns each, where M is the number of missing data +blocks. Then it multiplies the inverse of the relevant submatrix of @samp{A} +by the vector of results of the M equations to recompute the values of the +missing data blocks. + +Lziprecover implements GF(2^8) with polynomial 0x11D and GF(2^16) with +polynomial 0x1100B. + +A Hilbert matrix is defined as @w{@samp{A[i][j] = 1 / (i + j + 1)}} for i +and j >= 0. But as in a Galois Field addition is exclusive or, applying the +Hilbert definition produces a singular (non invertible) matrix. To avoid +this problem, lziprecover uses a Hilbert matrix starting at row +@w{@samp{gf_size / 2}}. I.e., @w{@samp{A[i][j] = 1 / (i + gf_size / 2 + j)}} +for @w{@samp{0 <= i,j < gf_size / 2}}. (gf_size is the size of the Galois +Field). + + +@node Creating fec files +@section How to create fec files +@cindex fec create + +@noindent +Example 1: Create the fec file @file{archive.tar.lz.fec} and store it in the +same directory where @file{archive.tar.lz} is. + +@example +lziprecover -v -Fc archive.tar.lz +@end example + +@noindent +Example 2: Create the fec file @file{archive.tar.lz.fec} and store it in the +directory @file{fec}. + +@example +lziprecover -v -Fc -o fec/ archive.tar.lz +@end example + +@noindent +Example 3: Create recursively one fec file for each file in the directory +@file{datadir} and store them in the tree under the directory @file{fec}. + +@example +lziprecover -v -r -Fc -o fec/ datadir +@end example + + +@node Testing with fec files +@section How to test files using fec files +@cindex fec test + +@noindent +Example 1: Test the integrity of @file{archive.tar.lz} using the fec file +@file{archive.tar.lz.fec} from the same directory. + +@example +lziprecover -v -Ft archive.tar.lz +@end example + +@noindent +Example 2: Test the integrity of the files @file{foo.lz} and @file{bar.lz} +using the corresponding fec files stored in the directory @file{fec}. + +@example +lziprecover -v -Ft --fec-file=fec/ foo.lz bar.lz +@end example + +@noindent +Example 3: Test recursively the integrity of all the files in the directory +@file{datadir} using the fec files stored in the directory tree under the +directory @file{fec}. + +@example +lziprecover -v -r -Ft --fec-file=fec/ datadir +@end example + + +@node Repairing with fec files +@section How to repair files using fec files +@cindex fec repair + +@noindent +Example 1: Repair the file @file{archive.tar.lz} using the fec file +@file{archive.tar.lz.fec} from the same directory. The repaired file is +written to @file{archive_fixed.tar.lz} in the same directory. + +@example +lziprecover -v -Fr archive.tar.lz +@end example + +@noindent +Example 2: Repair the files @file{foo.lz} and @file{bar.lz} using the +corresponding fec files stored in the directory @file{fec}. + +@example +lziprecover -v -Fr --fec-file=fec/ foo.lz bar.lz +@end example + +@noindent +Example 3: Repair recursively all the damaged files in the directory +@file{datadir} using the fec files stored in the directory tree under the +directory @file{fec}. + +@example +lziprecover -v -r -Fr --fec-file=fec/ datadir +@end example + + +@node Fec file format +@section Fec file format +@cindex fec file format + +A fec file consists of one chksum packet, one or more fec packets, and one +optional second chksum packet. The first chksum packet must be the first +packet in the file, but the second chksum packet does not need to be the +last packet in the file. The essential information is stored in the chksum +packet(s), while the potentially numerous fec packets are kept as simple as +possible: + +@verbatim ++=================+===============+=================+ +| Chksum packet | Fec packets | Chksum packet | ++=================+===============+=================+ +@end verbatim + +All multibyte values are stored in little endian order except +@samp{prodata_md5}. + +The @samp{fbs} (fec_block_size) field is coded as a little endian 16-bit +floating point unsigned integer with an 11-bit mantissa at bits 0-10 and a +5-bit exponent at bits 11-15. The mantissa is an integer between 0 and 2047. +The exponent is an integer between 9 and 40, stored with a bias of -9; the +exponent 9 is stored as 0, and 40 is stored as 31. Values are stored with +the largest mantissa and smallest exponent; 4096 is stored as m=8, e=0. This +encoding can store values from 0 bytes to @w{2047 TiB} @w{(2^51 - 2^40 bytes)} +with a maximum resolution of 512 bytes, but 0 and the values beyond +@w{128 TiB} are not used: + +@verbatim + 5 11 ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +| exp | mantissa | The 'fbs' (fec_block_size) field ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +15 11 10 0 +@end verbatim + +The fec file format is 4-byte aligned for speed because FEC data are created +and decoded 4 bytes at a time. The 4-byte alignment has been achieved by a +careful design, without adding any padding bytes. + +The fec file format has an overhead of 8 bytes per protected data block, +plus 16 bytes per FEC block, plus 80 bytes. + +@subsection Chksum packet +@cindex chksum packet + +A chksum packet contains one CRC for each of the N data blocks in the +protected file, and is structured as shown in the following table. All +lengths and offsets are in decimal: + +@multitable {prodata_size} {36 + 4N} {Length (in bytes)} +@headitem Field Name @tab Offset @tab Length (in bytes) +@item magic @tab 0 @tab 4 +@item version @tab 4 @tab 1 +@item flags @tab 5 @tab 1 +@item fbs @tab 6 @tab 2 +@item prodata_size @tab 8 @tab 8 +@item prodata_md5 @tab 16 @tab 16 +@item header_crc @tab 32 @tab 4 +@item crc_array @tab 36 @tab 4N +@item payload_crc @tab 36 + 4N @tab 4 +@end multitable + +@table @samp +@item magic +A four byte string identifying the chksum packet (and therefore the fec +file), with the value 0xB3, 0xA5, 0xB6, 0xAF. (The complement of "LZIP"). + +@item version +Just in case something needs to be modified in the future. 0 for now. + +@item flags +Bit 0 (is_crc_c): crc_array contains CRC32 (0) or CRC32-C (1).@* +Bit 1 (gf16): Galois field is GF(2^8) (0) or GF(2^16) (1).@* +Bits 2-7: zero. + +@anchor{fbs} +@item fbs (coded fec_block_size) +Number of FEC bytes per block. It is a multiple of 512 bytes between 512 +bytes and @w{128 TiB}. + +@item prodata_size +Size of the protected file. 1 byte to @w{4 EiB}. + +@item prodata_md5 +Md5sum of the protected file. Stored in big endian order. + +@item header_crc +CRC32 of the previous fields, including magic. + +@item crc_array +Array of @var{n} CRCs corresponding to the @var{n} blocks in which the +protected file is divided. @var{n} is @w{@samp{ceil( prodata_size / fbs )}}. +The first chksum packet contains an array of CRC32s, while the second chksum +packet (if present) contains an array of CRC32-Cs. + +For the expected thousands of bit flips caused by a zeroed sector, a +"symmetric" CRC like CRC32 is probably better than CRC32-C, which detects +all the errors with an odd number of bit flips at the expense of a larger +number of undetected errors with an even number of bit flips. + +@item payload_crc +CRC32 of the crc_array. +@end table + +@subsection Fec packet +@cindex fec packet + +A fec packet contains one FEC block and is structured as shown in the +following table. All lengths and offsets are in decimal: + +@multitable {payload_crc} {12 + fbs} {Length (in bytes)} +@headitem Field Name @tab Offset @tab Length (in bytes) +@item magic @tab 0 @tab 4 +@item fbn @tab 4 @tab 2 +@item fbs @tab 6 @tab 2 +@item header_crc @tab 8 @tab 4 +@item fec_block @tab 12 @tab fbs +@item payload_crc @tab 12 + fbs @tab 4 +@end multitable + +@table @samp +@item magic +A four byte string identifying the fec packet, with the value "\xB3FEC" +(0xB3, 0x46, 0x45, 0x43). + +@item fbn (fec_block_number) +Number of this FEC block. Required to compute the decode matrix. + +@item fbs (coded fec_block_size) +@xref{fbs}. + +@item header_crc +CRC32 of the previous fields, including magic. + +@item fec_block +The FEC block. + +@item payload_crc +CRC32 of the fec_block. +@end table + + @node Repairing one byte @chapter Repairing one byte @cindex repairing one byte @@ -732,7 +1331,8 @@ bit to the original. This makes lzip files resistant to bit flip, one of the most common forms of data corruption. The file is repaired in memory. Therefore, enough virtual memory -@w{(RAM + swap)} to contain the largest damaged member is required. +@w{(RAM + swap)} to contain the largest damaged member is required. Member +size is limited to @w{2 GiB} on 32-bit systems. The error may be located anywhere in the file except in the first 5 bytes of each member header or in the @samp{Member size} field of the @@ -803,7 +1403,7 @@ into clusters and then merging the files as if each cluster were a single error. Here is a real case of successful merging. Two copies of the file -@samp{icecat-3.5.3-x86.tar.lz} (compressed size @w{9 MB}) became corrupt +@file{icecat-3.5.3-x86.tar.lz} (compressed size @w{9 MB}) became corrupt while stored on the same NAND flash device. One of the copies had 76 single-bit errors scattered in an area of 1020 bytes, and the other had 3028 such errors in an area of 31729 bytes. Lziprecover produced a @@ -855,10 +1455,10 @@ lziprecover -tv backup.tar.lz @noindent Example 2: Recover the first volume of those created with the command @w{@samp{lzip -b 32MiB -S 650MB big_db}} from two copies, -@samp{big_db1_00001.lz} and @samp{big_db2_00001.lz}, with member 07 +@file{big_db1_00001.lz} and @file{big_db2_00001.lz}, with member 07 damaged in the first copy, member 18 damaged in the second copy, and member 12 damaged in both copies. The correct file produced is saved in -@samp{big_db_00001.lz}. +@file{big_db_00001.lz}. @example lziprecover -m -v -o big_db_00001.lz big_db1_00001.lz big_db2_00001.lz @@ -891,9 +1491,10 @@ reproduction can't be done if the zeroed sector overlaps with the first 15 bytes of a member, or if the zeroed sector is smaller than 8 bytes. The file is reproduced in memory. Therefore, enough virtual memory -@w{(RAM + swap)} to contain the damaged member is required. +@w{(RAM + swap)} to contain the damaged member is required. Member size is +limited to @w{2 GiB} on 32-bit systems. -To understand how it works, take any lzipped file, say @samp{foo.lz}, +To understand how it works, take any lzipped file, say @file{foo.lz}, decompress it (keeping the original), and try to reproduce an artificially zeroed sector in it by running the following commands: @@ -918,8 +1519,8 @@ Reproduction succeeded at pos 65536 all comparisons passed @end example -Using @samp{foo} as reference file guarantees that any zeroed sector in -@samp{foo.lz} can be reproduced because both files contain the same data. In +Using @file{foo} as reference file guarantees that any zeroed sector in +@file{foo.lz} can be reproduced because both files contain the same data. In real use, the reference file needs to contain the data corresponding to the zeroed sector, but the rest of the data (if any) may differ between both files. The reference data may be obtained from the partial decompression of @@ -951,6 +1552,7 @@ when they are required. @anchor{performance-of-reproduce} @section Performance of @option{--reproduce} + Reproduce mode is especially useful when recovering a corrupt backup (or a corrupt source tarball) that is part of a series. Usually only a small fraction of the data changes from one backup to the next or from one version @@ -958,8 +1560,8 @@ of a source tarball to the next. This makes sometimes possible to reproduce a given corrupted version using reference data from a near version. The following two tables show the fraction of reproducible sectors (reproducible sectors divided by total sectors in archive) for some archives, using sector -sizes of 512 and 4096 bytes. @samp{mailbox-aug.tar.lz} is a backup of some -of my mailboxes. @samp{backup-feb.tar.lz} and @samp{backup-apr.tar.lz} are +sizes of 512 and 4096 bytes. @file{mailbox-aug.tar.lz} is a backup of some +of my mailboxes. @file{backup-feb.tar.lz} and @file{backup-apr.tar.lz} are real backups of my own working directory: @multitable {Reference file} {gawk-5.0.1.tar.lz} {4369 / 5844 = 74.76%} @@ -1058,15 +1660,15 @@ Member reproduced successfully. Copy of input file reproduced successfully. @end example -If @samp{backup.tar.lz} is a multimember file with more than one member +If @file{backup.tar.lz} is a multimember file with more than one member damaged and lziprecover shows the message @samp{One member reproduced. Copy of input file still contains errors.}, the procedure shown in the example above can be repeated until all the members have been reproduced. @samp{tarlz --keep-damaged -n0 -xf backup.tar.lz example.txt} produces a -partial copy of the reference file @samp{example.txt} that may help locate a +partial copy of the reference file @file{example.txt} that may help locate a complete copy in the filesystem or in another backup, even if -@samp{example.txt} has been renamed. +@file{example.txt} has been renamed. @node Tarlz @@ -1095,14 +1697,13 @@ alignment between tar members and lzip members minimizes the amount of data lost in case of corruption. In this chapter we'll explain the ways in which lziprecover can recover and process multimember tar.lz archives. -@sp 1 @section Recovering damaged multimember tar.lz archives If you have several copies of the damaged archive, try merging them first because merging has a high probability of success. @xref{Merging files}. If the command below prints something like @w{@samp{Input files merged successfully.}} you are done and -@samp{archive.tar.lz} now contains the recovered archive: +@file{archive.tar.lz} now contains the recovered archive: @example lziprecover -m -v -o archive.tar.lz a/archive.tar.lz b/archive.tar.lz @@ -1112,7 +1713,7 @@ If you only have one copy of the damaged archive with a zeroed block of data caused by an I/O error, you may try to reproduce the archive. @xref{Reproducing one sector}. If the command below prints something like @w{@samp{Copy of input file reproduced successfully.}} you are done and -@samp{archive_fixed.tar.lz} now contains the recovered archive: +@file{archive_fixed.tar.lz} now contains the recovered archive: @example lziprecover -vv -e --reference-file=old_archive.tar archive.tar.lz @@ -1122,16 +1723,16 @@ If you only have one copy of the damaged archive, you may try to repair the archive, but this has a lower probability of success. @xref{Repairing one byte}. If the command below prints something like @w{@samp{Copy of input file repaired successfully.}} you are done and -@samp{archive_fixed.tar.lz} now contains the recovered archive: +@file{archive_fixed.tar.lz} now contains the recovered archive: @example -lziprecover -v -R archive.tar.lz +lziprecover -v --byte-repair archive.tar.lz @end example If all the above fails, and the archive was created with tarlz, you may save the damaged members for later and then copy the good members to another -archive. If the two commands below succeed, @samp{bad_members.tar.lz} will -contain all the damaged members and @samp{archive_cleaned.tar.lz} will +archive. If the two commands below succeed, @file{bad_members.tar.lz} will +contain all the damaged members and @file{archive_cleaned.tar.lz} will contain a good archive with the damaged members removed: @example @@ -1140,7 +1741,7 @@ lziprecover -v --strip=damaged -o archive_cleaned.tar.lz archive.tar.lz @end example You can then use @samp{tarlz --keep-damaged} to recover as much data as -possible from each damaged member in @samp{bad_members.tar.lz}: +possible from each damaged member in @file{bad_members.tar.lz}: @example mkdir tmp @@ -1148,14 +1749,13 @@ cd tmp tarlz --keep-damaged -xvf ../bad_members.tar.lz @end example -@sp 1 @section Processing multimember tar.lz archives Lziprecover is able to copy a list of members from a file to another. For example the command @w{@samp{lziprecover --dump=1-10:r1:tdata archive.tar.lz > subarch.tar.lz}} creates a subset archive containing the first ten members, the end-of-file -blocks, and the trailing data (if any) of @samp{archive.tar.lz}. The +blocks, and the trailing data (if any) of @file{archive.tar.lz}. The @samp{r1} part selects the last member, which in an appendable tar.lz archive contains the end-of-file blocks. @@ -1165,99 +1765,15 @@ archive contains the end-of-file blocks. @cindex file names The name of the fixed file produced by @option{--byte-repair} and -@option{--merge} is made by appending the string @samp{_fixed.lz} to the +@option{--merge} is made by appending the string @file{_fixed.lz} to the original file name. If the original file name ends with one of the -extensions @samp{.tar.lz}, @samp{.lz}, or @samp{.tlz}, the string -@samp{_fixed} is inserted before the extension. +extensions @file{.tar.lz}, @file{.lz}, or @file{.tlz}, the string +@file{_fixed} is inserted before the extension. - -@node File format -@chapter File format -@cindex file format - -Perfection is reached, not when there is no longer anything to add, but -when there is no longer anything to take away.@* ---- Antoine de Saint-Exupery - -@sp 1 -In the diagram below, a box like this: - -@verbatim -+---+ -| | <-- the vertical bars might be missing -+---+ -@end verbatim - -represents one byte; a box like this: - -@verbatim -+==============+ -| | -+==============+ -@end verbatim - -represents a variable number of bytes. - -@sp 1 -A lzip file consists of one or more independent "members" (compressed data -sets). The members simply appear one after another in the file, with no -additional information before, between, or after them. Each member can -encode in compressed form up to @w{16 EiB - 1 byte} of uncompressed data. -The size of a multimember file is unlimited. - -Each member has the following structure: - -@verbatim -+--+--+--+--+----+----+=============+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -| ID string | VN | DS | LZMA stream | CRC32 | Data size | Member size | -+--+--+--+--+----+----+=============+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -@end verbatim - -All multibyte values are stored in little endian order. - -@table @samp -@item ID string (the "magic" bytes) -A four byte string, identifying the lzip format, with the value "LZIP" -(0x4C, 0x5A, 0x49, 0x50). - -@item VN (version number, 1 byte) -Just in case something needs to be modified in the future. 1 for now. - -@item DS (coded dictionary size, 1 byte) -The dictionary size is calculated by taking a power of 2 (the base size) -and subtracting from it a fraction between 0/16 and 7/16 of the base size.@* -Bits 4-0 contain the base 2 logarithm of the base size (12 to 29).@* -Bits 7-5 contain the numerator of the fraction (0 to 7) to subtract -from the base size to obtain the dictionary size.@* -Example: 0xD3 = 2^19 - 6 * 2^15 = 512 KiB - 6 * 32 KiB = 320 KiB@* -Valid values for dictionary size range from 4 KiB to 512 MiB. - -@item LZMA stream -The LZMA stream, finished by an "End Of Stream" marker. Uses default values -for encoder properties. -@ifnothtml -@xref{Stream format,,,lzip}, -@end ifnothtml -@ifhtml -See -@uref{http://www.nongnu.org/lzip/manual/lzip_manual.html#Stream-format,,Stream format} -@end ifhtml -for a complete description. - -@item CRC32 (4 bytes) -Cyclic Redundancy Check (CRC) of the original uncompressed data. - -@item Data size (8 bytes) -Size of the original uncompressed data. - -@item Member size (8 bytes) -Total size of the member, including header and trailer. This field acts -as a distributed index, improves the checking of stream integrity, and -facilitates the safe recovery of undamaged members from multimember files. -Lzip limits the member size to @w{2 PiB} to prevent the data size field from -overflowing. - -@end table +The name of the fixed file produced by @option{--fec=repair} is made by +appending the string @file{_fixed} to the original file name. If the +original file name ends with one of the extensions @file{.tar.lz}, @file{.lz}, +or @file{.tlz}, the string @file{_fixed} is inserted before the extension. @node Trailing data @@ -1345,7 +1861,7 @@ lziprecover --strip=tdata file.lz | sha256sum -c \ @chapter A small tutorial with examples @cindex examples -Example 1: Extract all the files from archive @samp{foo.tar.lz}. +Example 1: Extract all the files from archive @file{foo.tar.lz}. @example tar -xf foo.tar.lz @@ -1353,25 +1869,22 @@ or lziprecover -cd foo.tar.lz | tar -xf - @end example -@sp 1 @noindent Example 2: Restore a regular file from its compressed version -@samp{file.lz}. If the operation is successful, @samp{file.lz} is removed. +@file{file.lz}. If the operation is successful, @file{file.lz} is removed. @example lziprecover -d file.lz @end example -@sp 1 @noindent -Example 3: Check the integrity of the compressed file @samp{file.lz} and +Example 3: Check the integrity of the compressed file @file{file.lz} and show status. @example lziprecover -tv file.lz @end example -@sp 1 @anchor{concat-example} @noindent Example 4: The right way of concatenating the decompressed output of two or @@ -1388,41 +1901,37 @@ Or keeping the trailing data of the last file like this lziprecover --strip=empty file1.lz file2.lz file3.lz > file123.lz @end example -@sp 1 @noindent -Example 5: Decompress @samp{file.lz} partially until @w{10 KiB} of +Example 5: Decompress @file{file.lz} partially until @w{10 KiB} of decompressed data are produced. @example lziprecover -D 0,10KiB file.lz @end example -@sp 1 @noindent -Example 6: Decompress @samp{file.lz} partially from decompressed byte at +Example 6: Decompress @file{file.lz} partially from decompressed byte at offset 10000 to decompressed byte at offset 14999 (5000 bytes are produced). @example lziprecover -D 10000-15000 file.lz @end example -@sp 1 @noindent -Example 7: Repair a corrupt byte in the file @samp{file.lz}. (Indented lines +Example 7: Repair a corrupt byte in the file @file{file.lz}. (Indented lines are abridged diagnostic messages from lziprecover). @example -lziprecover -v -R file.lz +lziprecover -v --byte-repair file.lz Copy of input file repaired successfully. lziprecover -tv file_fixed.lz file_fixed.lz: ok mv file_fixed.lz file.lz @end example -@sp 1 @noindent -Example 8: Split the multimember file @samp{file.lz} and write each member -in its own @samp{recXXXfile.lz} file. Then use @w{@samp{lziprecover -t}} to +Example 8: Split the multimember file @file{file.lz} and write each member +in its own @file{recXXXfile.lz} file. Then use @w{@samp{lziprecover -t}} to test the integrity of the resulting files. @example @@ -1494,6 +2003,7 @@ unzcrash [@var{options}] 'lzip -t' @var{file} The compressed @var{file} must not contain errors and the decompressor being tested must decompress it correctly for the comparisons to work. +@noindent unzcrash supports the following options: @table @code @@ -1516,12 +2026,12 @@ The number of N-bit errors per byte (N = 1 to 8) is: @w{8 28 56 70 56 28 8 1} @multitable {Examples of @var{range}} {Tests errors of N-bits} -@item Examples of @var{range} @tab Tests errors of N-bits -@item 1 @tab 1 -@item 1,2,3 @tab 1, 2, 3 -@item 2-4 @tab 2, 3, 4 -@item 1,3-5,8 @tab 1, 3, 4, 5, 8 -@item 1-3,5-8 @tab 1, 2, 3, 5, 6, 7, 8 +@headitem Examples of @var{range} @tab Tests errors of N-bits +@item 1 @tab 1 +@item 1,2,3 @tab 1, 2, 3 +@item 2-4 @tab 2, 3, 4 +@item 1,3-5,8 @tab 1, 3, 4, 5, 8 +@item 1-3,5-8 @tab 1, 2, 3, 5, 6, 7, 8 @end multitable @item -B[@var{size}][,@var{value}] diff --git a/dump_remove.cc b/dump_remove.cc index 3273303..4b90a92 100644 --- a/dump_remove.cc +++ b/dump_remove.cc @@ -298,8 +298,8 @@ int remove_members( const std::vector< std::string > & filenames, /* Set to zero in place the first LZMA byte of each member in each file by opening one rw descriptor for each file. */ -int clear_marking( const std::vector< std::string > & filenames, - const Cl_options & cl_opts ) +int nonzero_repair( const std::vector< std::string > & filenames, + const Cl_options & cl_opts ) { long cleared_members = 0; int files = 0, retval = 0; @@ -0,0 +1,297 @@ +/* Lziprecover - Data recovery tool for the lzip format + Copyright (C) 2023-2024 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +struct le32 + { + enum { size = 4 }; + uint8_t data[size]; + + le32 & operator=( unsigned n ) + { for( int i = 0; i < size; ++i ) { data[i] = (uint8_t)n; n >>= 8; } + return *this; } + unsigned val() const + { unsigned n = 0; + for( int i = size - 1; i >= 0; --i ) { n <<= 8; n += data[i]; } + return n; } + bool operator==( const le32 & b ) const + { return std::memcmp( data, b.data, size ) == 0; } + bool operator!=( const le32 & b ) const { return !( *this == b ); } + }; + + +inline unsigned long long get_le( const uint8_t * const buf, int size ) + { unsigned long long n = 0; + while( --size >= 0 ) { n <<= 8; n += buf[size]; } return n; } + +inline unsigned long long ceil_divide( const unsigned long long size, + const unsigned long block_size ) + { return size / block_size + ( size % block_size > 0 ); } + +inline unsigned long ceil_divide( const unsigned long size, + const unsigned long block_size ) + { return size / block_size + ( size % block_size > 0 ); } + +inline uint8_t * set_lastbuf( const uint8_t * const prodata, + const unsigned long prodata_size, const unsigned long fbs, + const bool last_is_missing = false ) + { + const unsigned long rest = prodata_size % fbs; + if( rest == 0 ) return 0; // last data block is complete + uint8_t * const lastbuf = new uint8_t[fbs]; + if( last_is_missing ) return lastbuf; // uninitialized buffer + std::memcpy( lastbuf, prodata + ( prodata_size - rest ), rest ); + std::memset( lastbuf + rest, 0, fbs - rest ); + return lastbuf; // copy of last data block padded to fbs bytes + } + +enum { min_fbs = 512, max_unit_fbs = 1 << 30 }; // 1 GiB +const unsigned long long max_fbs = 1ULL << 47; // 128 TiB + +inline bool isvalid_fbs( const unsigned long long fbs ) + { return fbs >= min_fbs && fbs <= max_fbs && fbs % min_fbs == 0; } + +struct Coded_fbs // fec_block_size + { + enum { size = 2 }; + uint8_t data[size]; // 11-bit mantissa, 5-bit exponent + + Coded_fbs() {} // default constructor + Coded_fbs( const unsigned long long fbs, const unsigned unit_fbs ) + { + unsigned long long m = fbs; + int e = 0; + while( m > 2047 || ( m > 1 && e < 9 ) ) { m >>= 1; ++e; } + if( m << e < fbs && ++m > 2047 ) { m >>= 1; ++e; } + while( ( m << e ) % unit_fbs != 0 ) if( ++m > 2047 ) { m >>= 1; ++e; } + if( m == 0 || m > 2047 || e < 9 || e > 40 || m << e < fbs || + !isvalid_fbs( m << e ) || !isvalid_fbs( fbs ) ) + internal_error( "Coded_fbs: can't fit fec_block_size in packet." ); + data[0] = m; + data[1] = ( e - 9 ) << 3 | m >> 8; + } + + void copy( uint8_t * const buf ) const + { buf[0] = data[0]; buf[1] = data[1]; } + + unsigned long long val() const + { + unsigned long long m = ( ( data[1] & 7 ) << 8 ) | data[0]; + const int e = ( data[1] >> 3 ) + 9; + return m << e; + } + }; + +enum { fec_magic_l = 4, crc32_l = le32::size }; +const uint8_t fec_magic[4] = { 0xB3, 0xA5, 0xB6, 0xAF }; // ~"LZIP" +const uint8_t fec_packet_magic[4] = { fec_magic[0], 'F', 'E', 'C' }; + +inline bool check_fec_magic( const uint8_t * const image_buffer ) + { return std::memcmp( image_buffer, fec_magic, 4 ) == 0; } + +class Packet_base + { +protected: + // the packet trailer contains the CRC32 of the payload + enum Lengths { trailer_size = crc32_l }; + + // header_size must be a multiple of 4 for uint32_t alignment in mul_add + const uint8_t * image_; // header + payload + trailer + bool image_is_external; + + Packet_base() : image_is_external( false ) {} + explicit Packet_base( const uint8_t * const image_buffer ) + : image_( image_buffer ), image_is_external( true ) {} + ~Packet_base() { if( !image_is_external ) delete[] image_; } + +public: + const uint8_t * image() const { return image_; } + }; + + +class Chksum_packet : public Packet_base + { + enum { current_version = 0 }; + enum Lengths { version_l = 1, flags_l = 1, prodata_size_l = 8, + prodata_md5_l = 16 }; + enum Offsets { version_o = fec_magic_l, + flags_o = version_o + version_l, + fbs_o = flags_o + flags_l, + prodata_size_o = fbs_o + Coded_fbs::size, + prodata_md5_o = prodata_size_o + prodata_size_l, + header_crc_o = prodata_md5_o + prodata_md5_l, + header_size = header_crc_o + crc32_l, + crc_array_o = header_size }; + + static unsigned compute_header_crc( const uint8_t * const image_buffer ) + { return crc32.compute_crc( image_buffer, header_crc_o ); } + +public: + // check image_buffer with check_image before calling this constructor + explicit Chksum_packet( const uint8_t * const image_buffer ) + : Packet_base( image_buffer ) {} + Chksum_packet( const uint8_t * const prodata, + const unsigned long prodata_size, + const md5_type & prodata_md5, const Coded_fbs coded_fbs, + const bool gf16_, const bool is_crc_c_ ); + + unsigned long long packet_size() const + { return ceil_divide( prodata_size(), fec_block_size() ) * + sizeof crc_array()[0] + header_size + trailer_size; } + unsigned long long prodata_size() const + { return get_le( image_ + prodata_size_o, prodata_size_l ); } + const md5_type & prodata_md5() const + { return *(md5_type *)(image_ + prodata_md5_o); } + unsigned long long fec_block_size() const + { return ((Coded_fbs *)(image_ + fbs_o))->val(); } + static bool check_flags( const uint8_t * const image_buffer ) + { return image_buffer[flags_o] <= 3; } + bool gf16() const { return image_[flags_o] & 2; } + bool is_crc_c() const { return image_[flags_o] & 1; } + // crc_array contains one CRC32 or one CRC32-C per protected data block + const le32 * crc_array() const + { return (const le32 *)(image_ + crc_array_o); } + + static unsigned min_packet_size() + { return header_size + le32::size + trailer_size; } + static uint8_t version( const uint8_t * const image_buffer ) + { return image_buffer[version_o]; } + static bool check_version( const uint8_t * const image_buffer ) + { return image_buffer[version_o] == current_version; } + + static unsigned check_image( const uint8_t * const image_buffer, + const unsigned long max_size ); + bool check_payload_crc() const + { + const unsigned paysize = packet_size() - header_size - trailer_size; + const unsigned payload_crc_o = crc_array_o + paysize; + const unsigned payload_crc = get_le( image_ + payload_crc_o, crc32_l ); + return crc32.compute_crc( image_ + crc_array_o, paysize ) == payload_crc; + } + }; + + +class Fec_packet : public Packet_base + { + enum Lengths { fbn_l = 2 }; + enum Offsets { fbn_o = fec_magic_l, + fbs_o = fbn_o + fbn_l, + header_crc_o = fbs_o + Coded_fbs::size, + header_size = header_crc_o + crc32_l, + fec_block_o = header_size }; + + static unsigned compute_header_crc( const uint8_t * const image_buffer ) + { return crc32.compute_crc( image_buffer, header_crc_o ); } + +public: + // check image_buffer with check_image before calling this constructor + explicit Fec_packet( const uint8_t * const image_buffer ) + : Packet_base( image_buffer ) {} + Fec_packet( const uint8_t * const prodata, const uint8_t * const lastbuf, + const unsigned fbn, const unsigned k, + const Coded_fbs coded_fbs, const bool gf16 ); + + unsigned long long packet_size() const + { return header_size + fec_block_size() + trailer_size; } + unsigned fec_block_number() const + { return get_le( image_ + fbn_o, fbn_l ); } + unsigned long long fec_block_size() const // number of fec bytes + { return ((Coded_fbs *)(image_ + fbs_o))->val(); } + const uint8_t * fec_block() const { return image_ + fec_block_o; } + + static unsigned min_packet_size() + { return header_size + min_fbs + trailer_size; } + + static unsigned long check_image( const uint8_t * const image_buffer, + const unsigned long max_size ); + }; + + +enum { max_k8 = 128, max_k16 = 32768, max_nk16 = 2048 }; +const char * const fec_extension = ".fec"; + +inline void prot_stdin() + { show_file_error( "(stdin)", "Can't read protected data from standard input." ); } + +// defined in fec_create.cc +enum { fc_percent, fc_blocks, fc_bytes }; +void cleanup_mutex_lock(); +int gf_check( const unsigned k, const bool cl_gf16, const bool fec_random ); +void extract_dirname( const std::string & name, std::string & srcdir ); +void replace_dirname( const std::string & name, const std::string & srcdir, + const std::string & destdir, std::string & outname ); +bool has_fec_extension( const std::string & name ); +const char * printable_name( const std::string & filename, const bool in = true ); +int fec_create( const std::vector< std::string > & filenames, + const std::string & default_output_filename, + const unsigned long fb_or_pct, const unsigned cl_block_size, + const unsigned num_workers, const char debug_level, + const char fctype, const char fec_level, const char recursive, + const bool cl_gf16, const bool fec_random, const bool force, + const bool to_stdout ); + +// defined in fec_repair.cc +int fec_test( const std::vector< std::string > & filenames, + const std::string & cl_fec_filename, + const std::string & default_output_filename, + const char recursive, const bool force, const bool ignore_errors, + const bool repair, const bool to_stdout ); +int fec_list( const std::vector< std::string > & filenames, + const bool ignore_errors ); +int fec_dc( const std::string & input_filename, + const std::string & cl_fec_filename, const unsigned cblocks ); +int fec_dz( const std::string & input_filename, + const std::string & cl_fec_filename, + std::vector< Block > & range_vector ); +int fec_dZ( const std::string & input_filename, + const std::string & cl_fec_filename, + const unsigned delta, const int sector_size ); + +// defined in recursive.cc +bool next_filename( std::list< std::string > & filelist, + std::string & input_filename, int & retval, + const char recursive ); + +// defined in gf8.cc, gf16.cc +void gf8_init(); +void gf16_init(); +bool gf8_check( const std::vector< unsigned > & fbn_vector, const unsigned k ); +bool gf16_check( const std::vector< unsigned > & fbn_vector, const unsigned k ); + +/* buffer, lastbuf: k blocks of input data, last one possibly padded to fbs. + fbn: number of the fec block to be created (fbn < max_k). +*/ +void rs8_encode( const uint8_t * const buffer, const uint8_t * const lastbuf, + uint8_t * const fec_block, const unsigned long fbs, + const unsigned fbn, const unsigned k ); +void rs16_encode( const uint8_t * const buffer, const uint8_t * const lastbuf, + uint8_t * const fec_block, const unsigned long fbs, + const unsigned fbn, const unsigned k ); + +/* buffer, lastbuf: k data blocks, those in bb_vector are missing. + fecbuf: as many fec blocks as missing data blocks in the order of fbn_vector. + The repaired data blocks are written in their place in buffer and lastbuf. +*/ +void rs8_decode( uint8_t * const buffer, uint8_t * const lastbuf, + const std::vector< unsigned > & bb_vector, + const std::vector< unsigned > & fbn_vector, + uint8_t * const fecbuf, const unsigned long fbs, + const unsigned k ); +void rs16_decode( uint8_t * const buffer, uint8_t * const lastbuf, + const std::vector< unsigned > & bb_vector, + const std::vector< unsigned > & fbn_vector, + uint8_t * const fecbuf, const unsigned long fbs, + const unsigned k ); diff --git a/fec_create.cc b/fec_create.cc new file mode 100644 index 0000000..e5c2a6c --- /dev/null +++ b/fec_create.cc @@ -0,0 +1,615 @@ +/* Lziprecover - Data recovery tool for the lzip format + Copyright (C) 2023-2024 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#define _FILE_OFFSET_BITS 64 + +#include <algorithm> +#include <cerrno> +#include <climits> +#include <cmath> +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <ctime> +#include <new> +#include <list> +#include <string> +#include <vector> +#include <pthread.h> +#include <stdint.h> +#include <unistd.h> +#include <sys/mman.h> +#include <sys/stat.h> + +#include "lzip.h" +#include "md5.h" +#include "fec.h" + + +namespace { + +void xinit_mutex( pthread_mutex_t * const mutex ) + { + const int errcode = pthread_mutex_init( mutex, 0 ); + if( errcode ) + { show_error( "pthread_mutex_init", errcode ); cleanup_and_fail( 1 ); } + } + +void xinit_cond( pthread_cond_t * const cond ) + { + const int errcode = pthread_cond_init( cond, 0 ); + if( errcode ) + { show_error( "pthread_cond_init", errcode ); cleanup_and_fail( 1 ); } + } + + +void xdestroy_mutex( pthread_mutex_t * const mutex ) + { + const int errcode = pthread_mutex_destroy( mutex ); + if( errcode ) + { show_error( "pthread_mutex_destroy", errcode ); cleanup_and_fail( 1 ); } + } + +void xdestroy_cond( pthread_cond_t * const cond ) + { + const int errcode = pthread_cond_destroy( cond ); + if( errcode ) + { show_error( "pthread_cond_destroy", errcode ); cleanup_and_fail( 1 ); } + } + + +void xlock( pthread_mutex_t * const mutex ) + { + const int errcode = pthread_mutex_lock( mutex ); + if( errcode ) + { show_error( "pthread_mutex_lock", errcode ); cleanup_and_fail( 1 ); } + } + +void xunlock( pthread_mutex_t * const mutex ) + { + const int errcode = pthread_mutex_unlock( mutex ); + if( errcode ) + { show_error( "pthread_mutex_unlock", errcode ); cleanup_and_fail( 1 ); } + } + + +void xwait( pthread_cond_t * const cond, pthread_mutex_t * const mutex ) + { + const int errcode = pthread_cond_wait( cond, mutex ); + if( errcode ) + { show_error( "pthread_cond_wait", errcode ); cleanup_and_fail( 1 ); } + } + +void xsignal( pthread_cond_t * const cond ) + { + const int errcode = pthread_cond_signal( cond ); + if( errcode ) + { show_error( "pthread_cond_signal", errcode ); cleanup_and_fail( 1 ); } + } + + +unsigned long out_size; +unsigned deliver_id; // id of worker writing fec packets to outfd +unsigned check_counter; +unsigned wait_counter; +pthread_mutex_t omutex; +std::vector< pthread_cond_t > may_deliver; // worker[i] may write +pthread_mutex_t cmutex = PTHREAD_MUTEX_INITIALIZER; // cleanup mutex + + +struct Worker_arg + { + const uint8_t * prodata; + const uint8_t * lastbuf; + unsigned fec_blocks; + unsigned k; + unsigned num_workers; + unsigned worker_id; + Coded_fbs coded_fbs; + bool gf16; + }; + + +// write a fec packet and pass the token to the next thread +extern "C" void * worker( void * arg ) + { + const Worker_arg & tmp = *(const Worker_arg *)arg; + const uint8_t * const prodata = tmp.prodata; + const uint8_t * const lastbuf = tmp.lastbuf; + const unsigned fec_blocks = tmp.fec_blocks; + const unsigned k = tmp.k; + const unsigned num_workers = tmp.num_workers; + const unsigned worker_id = tmp.worker_id; + const Coded_fbs coded_fbs = tmp.coded_fbs; + const bool gf16 = tmp.gf16; + + for( unsigned fbn = worker_id; fbn < fec_blocks; fbn += num_workers ) + { + const Fec_packet fec_packet( prodata, lastbuf, fbn, k, coded_fbs, gf16 ); + const long packet_size = fec_packet.packet_size(); + xlock( &omutex ); + ++check_counter; + while( worker_id != deliver_id ) + { ++wait_counter; xwait( &may_deliver[worker_id], &omutex ); } + xlock( &cmutex ); // because of cleanup_and_fail + if( writeblock( outfd, fec_packet.image(), packet_size ) != packet_size ) + { xunlock( &cmutex ); cleanup_and_fail( 1 ); } + xunlock( &cmutex ); + out_size += packet_size; + if( ++deliver_id >= num_workers ) deliver_id = 0; + xsignal( &may_deliver[deliver_id] ); // allow next worker to write + xunlock( &omutex ); + } + return 0; + } + + +// start the workers and wait for them to finish. +bool write_fec_mt( const uint8_t * const prodata, + const uint8_t * const lastbuf, + const unsigned fec_blocks, const unsigned k, + const unsigned num_workers, const Coded_fbs coded_fbs, + const char debug_level, const bool gf16 ) + { + if( debug_level & 2 ) std::fputs( "write_fec_mt.\n", stderr ); + out_size = 0; + deliver_id = 0; + check_counter = 0; + wait_counter = 0; + xinit_mutex( &omutex ); + may_deliver.resize( num_workers ); + for( unsigned i = 0; i < may_deliver.size(); ++i ) + xinit_cond( &may_deliver[i] ); + std::vector< Worker_arg > worker_args( num_workers ); + std::vector< pthread_t > worker_threads( num_workers ); + + for( unsigned i = 0; i < num_workers; ++i ) + { + worker_args[i].prodata = prodata; + worker_args[i].lastbuf = lastbuf; + worker_args[i].fec_blocks = fec_blocks; + worker_args[i].k = k; + worker_args[i].num_workers = num_workers; + worker_args[i].worker_id = i; + worker_args[i].coded_fbs = coded_fbs; + worker_args[i].gf16 = gf16; + const int errcode = + pthread_create( &worker_threads[i], 0, worker, &worker_args[i] ); + if( errcode ) { show_error( "Can't create worker threads", errcode ); + cleanup_and_fail( 1 ); } + } + + for( unsigned i = 0; i < num_workers; ++i ) + { + const int errcode = pthread_join( worker_threads[i], 0 ); + if( errcode ) { show_error( "Can't join worker threads", errcode ); + cleanup_and_fail( 1 ); } + } + + for( unsigned i = 0; i < may_deliver.size(); ++i ) + xdestroy_cond( &may_deliver[i] ); + xdestroy_mutex( &omutex ); + + if( debug_level & 1 ) + std::fprintf( stderr, + "workers started %8u\n" + "any worker tried to write a packet %8u times\n" + "any worker had to wait %8u times\n", + num_workers, check_counter, wait_counter ); + + return true; + } + + +inline void set_le( uint8_t * const buf, const int size, unsigned long n ) + { for( int i = 0; i < size; ++i ) { buf[i] = (uint8_t)n; n >>= 8; } } + + +unsigned compute_unit_fbs( const unsigned long prodata_size ) + { + unsigned bs = min_fbs; + while( bs < 65536 && 4ULL * bs * bs < prodata_size ) bs <<= 1; + return bs; + } + +unsigned long divide_fbs( const unsigned long size, const unsigned blocks, + const unsigned unit_fbs ) + { + unsigned long long fbs = ceil_divide( size, blocks ); // ULL as max_fbs + if( fbs < min_fbs ) fbs = min_fbs; + else if( fbs > max_fbs ) fbs = max_fbs; + return ceil_divide( fbs, unit_fbs ); + } + + +Coded_fbs compute_fbs( const unsigned long prodata_size, + const unsigned cl_block_size, const char fec_level ) + { + const unsigned unit_fbs = isvalid_fbs( cl_block_size ) ? cl_block_size : + compute_unit_fbs( prodata_size ); + const unsigned long max_k = ( fec_level == 0 ) ? max_k8 : max_k16; + const unsigned k9 = std::min( ceil_divide( prodata_size, unit_fbs ), max_k ); + const unsigned long fbsu9 = divide_fbs( prodata_size, k9, unit_fbs ); + const unsigned long fbsu0 = divide_fbs( prodata_size, max_k8, unit_fbs ); + const unsigned long a = std::min( (10 - fec_level) * fbsu9, fbsu0 ); // lin + const unsigned long b = fbsu0 >> fec_level; // exp + const unsigned long fbsu = std::max( a, b ); // join linear and exponential + return Coded_fbs( fbsu * unit_fbs, unit_fbs ); + } + + +unsigned compute_fec_blocks( const unsigned long prodata_size, + const unsigned long fb_or_pct, const char fctype, + const char fec_level, const Coded_fbs coded_fbs ) + { + const unsigned long fbs = coded_fbs.val(); + const unsigned prodata_blocks = ceil_divide( prodata_size, fbs ); + const unsigned long max_k = ( fec_level == 0 ) ? max_k8 : max_k16; + if( !isvalid_fbs( fbs ) || prodata_blocks > max_k ) return 0; + const unsigned long max_nk = ( fec_level == 0 ) ? max_k8 : max_nk16; + unsigned fec_blocks; + if( fctype == fc_blocks ) fec_blocks = std::min( max_nk, fb_or_pct ); + else + { + unsigned long fec_bytes; + if( fctype == fc_percent ) + { const double pct = std::max( 1UL, std::min( 100000UL, fb_or_pct ) ); + fec_bytes = (unsigned long)std::ceil( prodata_size * pct / 100000 ); } + else if( fctype == fc_bytes ) + fec_bytes = std::min( fb_or_pct, prodata_size ); + else return 0; // unknown fctype, must not happen + fec_blocks = std::min( ceil_divide( fec_bytes, fbs ), max_nk ); + } + if( fec_blocks > prodata_blocks ) fec_blocks = prodata_blocks; + return fec_blocks; + } + + +// return random number between 0 and 32767 +unsigned my_rand( unsigned long & state ) + { + state = state * 1103515245 + 12345; + return ( state / 65536 ) % 32768; + } + +void random_fbn_vector( const unsigned fec_blocks, const bool gf16, + std::vector< unsigned > & fbn_vector ) + { + struct timespec ts; + clock_gettime( CLOCK_REALTIME, &ts ); + unsigned long state = ts.tv_nsec; + while( state != 0 && ( state & 1 ) == 0 ) state >>= 1; + if( state != 0 ) state *= ts.tv_sec; else state = ts.tv_sec; + for( unsigned i = 0; i < fec_blocks; ++i ) + { + again: const unsigned fbn = + gf16 ? my_rand( state ) : my_rand( state ) % 128; + for( unsigned j = 0; j < fbn_vector.size(); ++j ) + if( fbn == fbn_vector[j] ) goto again; + fbn_vector.push_back( fbn ); + } + } + + +bool write_fec( const char * const input_filename, + const uint8_t * const prodata, const unsigned long prodata_size, + const unsigned long fb_or_pct, const unsigned cl_block_size, + unsigned num_workers, const char debug_level, const char fctype, + const char fec_level, const bool cl_gf16, const bool fec_random ) + { + const Coded_fbs coded_fbs = + compute_fbs( prodata_size, cl_block_size, fec_level ); + const unsigned fec_blocks = + compute_fec_blocks( prodata_size, fb_or_pct, fctype, fec_level, coded_fbs ); + if( fec_blocks == 0 ) { show_file_error( input_filename, + "Input file is too large for fec protection." ); return false; } + if( num_workers > fec_blocks ) num_workers = fec_blocks; + const unsigned long fbs = coded_fbs.val(); + const unsigned prodata_blocks = ceil_divide( prodata_size, fbs ); + md5_type prodata_md5; + compute_md5( prodata, prodata_size, prodata_md5 ); + unsigned chksum_packet_size; + const bool gf16 = cl_gf16 || prodata_blocks > max_k8 || fec_blocks > max_k8; + { + const Chksum_packet chksum_packet( prodata, prodata_size, prodata_md5, + coded_fbs, gf16, false ); // CRC32 array + const long packet_size = chksum_packet.packet_size(); + if( writeblock( outfd, chksum_packet.image(), packet_size ) != packet_size ) + goto fail; + chksum_packet_size = packet_size; + } + { + unsigned long fecdata_size = chksum_packet_size; + const uint8_t * const lastbuf = set_lastbuf( prodata, prodata_size, fbs ); + gf16 ? gf16_init() : gf8_init(); // initialize Galois tables + if( fec_random ) + { + std::vector< unsigned > fbn_vector; + random_fbn_vector( fec_blocks, gf16, fbn_vector ); + for( unsigned i = 0; i < fbn_vector.size(); ++i ) + { + const unsigned fbn = fbn_vector[i]; + const Fec_packet + fec_packet( prodata, lastbuf, fbn, prodata_blocks, coded_fbs, gf16 ); + const long packet_size = fec_packet.packet_size(); + if( writeblock( outfd, fec_packet.image(), packet_size ) != packet_size ) + { delete[] lastbuf; goto fail; } + fecdata_size += packet_size; + } + } + else if( num_workers > 1 ) + { + if( !write_fec_mt( prodata, lastbuf, fec_blocks, prodata_blocks, + num_workers, coded_fbs, debug_level, gf16 ) ) + { delete[] lastbuf; goto fail; } + fecdata_size += out_size; + } + else for( unsigned fbn = 0; fbn < fec_blocks; ++fbn ) + { + const Fec_packet + fec_packet( prodata, lastbuf, fbn, prodata_blocks, coded_fbs, gf16 ); + const long packet_size = fec_packet.packet_size(); + if( writeblock( outfd, fec_packet.image(), packet_size ) != packet_size ) + { delete[] lastbuf; goto fail; } + fecdata_size += packet_size; + } + delete[] lastbuf; + if( ( fecdata_size + chksum_packet_size ) / 2 <= fec_blocks * fbs && + fec_blocks > 1 ) // write the second chksum packet + { + const Chksum_packet chksum_packet( prodata, prodata_size, prodata_md5, + coded_fbs, gf16, true ); // CRC32-C array + const long packet_size = chksum_packet.packet_size(); + if( writeblock( outfd, chksum_packet.image(), packet_size ) != packet_size ) + goto fail; + fecdata_size += packet_size; + } + if( fecdata_size % 4 != 0 ) internal_error( "fecdata_size % 4 != 0" ); + if( verbosity >= 1 ) + std::fprintf( stderr, " %s: %s bytes, %s fec bytes, %u blocks\n", + printable_name( output_filename, false ), + format_num3( fecdata_size ), + format_num3( fec_blocks * fbs ), fec_blocks ); + return true; + } +fail: + show_file_error( input_filename, "Write error", errno ); return false; + } + + +int open_instream2( const std::string & name, struct stat * const in_statsp ) + { + if( !has_fec_extension( name ) ) + return open_instream( name.c_str(), in_statsp, false, true ); + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: %s: Input file already has '%s' suffix, ignored.\n", + program_name, name.c_str(), fec_extension ); + return -1; + } + +} // end namespace + + +Chksum_packet::Chksum_packet( const uint8_t * const prodata, + const unsigned long prodata_size, + const md5_type & prodata_md5, const Coded_fbs coded_fbs, + const bool gf16_, const bool is_crc_c_ ) + { + const unsigned long fbs = coded_fbs.val(); + const unsigned prodata_blocks = ceil_divide( prodata_size, fbs ); + if( prodata_blocks * fbs < prodata_size ) + internal_error( "prodata_blocks * fec_block_size < prodata_size" ); + const unsigned paysize = prodata_blocks * sizeof crc_array()[0]; + const unsigned packet_size = header_size + paysize + trailer_size; + if( paysize <= prodata_blocks || packet_size <= paysize ) + throw std::bad_alloc(); + uint8_t * const ip = new uint8_t[packet_size]; // writable image ptr + image_ = ip; + + std::memcpy( ip, fec_magic, fec_magic_l ); + ip[version_o] = current_version; + ip[flags_o] = ( gf16_ << 1 ) | is_crc_c_; + set_le( ip + prodata_size_o, prodata_size_l, prodata_size ); + *(md5_type *)(ip + prodata_md5_o) = prodata_md5; + coded_fbs.copy( ip + fbs_o ); + set_le( ip + header_crc_o, crc32_l, compute_header_crc( image_ ) ); + + le32 * const crc_arr = (le32 *)(ip + crc_array_o); // fill crc array + unsigned i = 0; + if( !is_crc_c_ ) // CRC32 + for( unsigned long pos = 0; pos < prodata_size; pos += fbs, ++i ) + crc_arr[i] = + crc32.compute_crc( prodata + pos, std::min( fbs, prodata_size - pos ) ); + else + { // CRC32-C + const CRC32 crc32c( true ); + for( unsigned long pos = 0; pos < prodata_size; pos += fbs, ++i ) + crc_arr[i] = + crc32c.compute_crc( prodata + pos, std::min( fbs, prodata_size - pos ) ); + } + if( i != prodata_blocks ) + internal_error( "wrong fec_block_size or number of prodata_blocks." ); + + // compute CRC32 of payload (crc array) + set_le( ip + crc_array_o + paysize, crc32_l, + crc32.compute_crc( image_ + crc_array_o, paysize ) ); + } + + +Fec_packet::Fec_packet( const uint8_t * const prodata, + const uint8_t * const lastbuf, + const unsigned fbn, const unsigned k, + const Coded_fbs coded_fbs, const bool gf16 ) + { + const unsigned long fbs = coded_fbs.val(); + const unsigned long packet_size = header_size + fbs + trailer_size; + if( packet_size <= fbs || !fits_in_size_t( packet_size ) ) + throw std::bad_alloc(); + uint8_t * const ip = new uint8_t[packet_size]; // writable image ptr + image_ = ip; + + std::memcpy( ip, fec_packet_magic, fec_magic_l ); + set_le( ip + fbn_o, fbn_l, fbn ); + coded_fbs.copy( ip + fbs_o ); + set_le( ip + header_crc_o, crc32_l, compute_header_crc( image_ ) ); + + // fill fec array + gf16 ? rs16_encode( prodata, lastbuf, ip + fec_block_o, fbs, fbn, k ) : + rs8_encode( prodata, lastbuf, ip + fec_block_o, fbs, fbn, k ); + + // compute CRC32 of payload (fec array) + set_le( ip + fec_block_o + fbs, crc32_l, + crc32.compute_crc( image_ + fec_block_o, fbs ) ); + } + + +void cleanup_mutex_lock() // make cleanup_and_fail thread-safe + { pthread_mutex_lock( &cmutex ); } // ignore errors to avoid loop + +int gf_check( const unsigned k, const bool cl_gf16, const bool fec_random ) + { + std::vector< unsigned > fbn_vector; + const bool gf16 = cl_gf16 || k > max_k8; + if( fec_random ) random_fbn_vector( k, gf16, fbn_vector ); + return gf16 ? !gf16_check( fbn_vector, k ) : !gf8_check( fbn_vector, k ); + } + + +/* if name contains slash(es), copy name into srcdir up to the last slash, + removing a leading dot followed by slash(es) */ +void extract_dirname( const std::string & name, std::string & srcdir ) + { + unsigned i = 0; + unsigned j = name.size(); + if( j >= 2 && name[0] == '.' && name[1] == '/' ) // remove leading "./" + for( i = 2; i < j && name[i] == '/'; ) ++i; + while( j > i && name[j-1] != '/' ) --j; // remove last component if any + if( j > i ) srcdir.assign( name, i, j - i ); + } + + +// replace prefix srcdir with destdir in name and write result to outname +void replace_dirname( const std::string & name, const std::string & srcdir, + const std::string & destdir, std::string & outname ) + { + if( srcdir.size() && name.compare( 0, srcdir.size(), srcdir ) != 0 ) + { if( verbosity >= 0 ) std::fprintf( stderr, + "dirname '%s' != '%s'\n", name.c_str(), srcdir.c_str() ); + internal_error( "srcdir mismatch." ); } + outname = destdir; + outname.append( name, srcdir.size(), name.size() - srcdir.size() ); + } + + +bool has_fec_extension( const std::string & name ) + { + const unsigned ext_len = std::strlen( fec_extension ); + return name.size() > ext_len && + name.compare( name.size() - ext_len, ext_len, fec_extension ) == 0; + } + + +const char * printable_name( const std::string & filename, const bool in ) + { + if( filename.empty() || filename == "-" ) return in ? "(stdin)" : "(stdout)"; + return filename.c_str(); + } + + +int fec_create( const std::vector< std::string > & filenames, + const std::string & default_output_filename, + const unsigned long fb_or_pct, const unsigned cl_block_size, + const unsigned num_workers, const char debug_level, + const char fctype, const char fec_level, const char recursive, + const bool cl_gf16, const bool fec_random, const bool force, + const bool to_stdout ) + { + const bool to_dir = !to_stdout && default_output_filename.size() && + default_output_filename.end()[-1] == '/'; + const bool to_file = !to_stdout && !to_dir && default_output_filename.size(); + if( ( to_stdout || to_file ) && filenames.size() != 1 ) + { show_error( "You must specify exactly 1 file when redirecting fec data." ); + return 1; } + if( ( to_stdout || to_file ) && recursive ) + { show_error( "Can't redirect fec data in recursive mode." ); return 1; } + if( to_stdout ) { outfd = STDOUT_FILENO; if( !check_tty_out() ) return 1; } + else outfd = -1; + + int retval = 0; + const bool one_to_one = !to_stdout && !to_file; + for( unsigned i = 0; i < filenames.size(); ++i ) + { + if( filenames[i] == "-" ) + { prot_stdin(); set_retval( retval, 1 ); continue; } + std::string srcdir; // dirname to be replaced by '-o dir/' + if( to_dir ) extract_dirname( filenames[i], srcdir ); + std::list< std::string > filelist( 1U, filenames[i] ); + std::string input_filename; + while( next_filename( filelist, input_filename, retval, recursive ) ) + { + struct stat in_stats; + const int infd = open_instream2( input_filename, &in_stats ); + if( infd < 0 ) { set_retval( retval, 1 ); continue; } + + const char * const input_filenamep = input_filename.c_str(); + const long long file_size = lseek( infd, 0, SEEK_END ); + if( file_size <= 0 ) + { show_file_error( input_filenamep, "Input file is empty." ); + set_retval( retval, 2 ); close( infd ); continue; } + if( !fits_in_size_t( file_size ) ) + { show_file_error( input_filenamep, large_file_msg ); + set_retval( retval, 1 ); close( infd ); continue; } + const unsigned long prodata_size = file_size; + const uint8_t * const prodata = + (const uint8_t *)mmap( 0, prodata_size, PROT_READ, MAP_PRIVATE, infd, 0 ); + close( infd ); + if( prodata == MAP_FAILED ) + { show_file_error( input_filenamep, mmap_msg, errno ); + set_retval( retval, 1 ); continue; } + + if( one_to_one ) + { + if( to_dir ) replace_dirname( input_filename, srcdir, + default_output_filename, output_filename ); + else output_filename = input_filename; + output_filename += fec_extension; set_signal_handler(); + if( !open_outstream( force, true, false, true, to_dir ) ) + { munmap( (void *)prodata, prodata_size ); + set_retval( retval, 1 ); continue; } + if( !check_tty_out() ) + { set_retval( retval, 1 ); return retval; } // don't delete a tty + } + else if( to_file && outfd < 0 ) // open outfd after checking infd + { + output_filename = default_output_filename; set_signal_handler(); + if( !open_outstream( force, false ) || !check_tty_out() ) + return 1; // check tty only once and don't try to delete a tty + } + + // write fec data to output file + if( !write_fec( input_filenamep, prodata, prodata_size, fb_or_pct, + cl_block_size, num_workers, debug_level, fctype, + fec_level, cl_gf16, fec_random ) ) + { munmap( (void *)prodata, prodata_size ); cleanup_and_fail( 1 ); } + /* To avoid '-Fc | -Ft' running out of address space, munmap before + closing outfd and mmap after reading fec data from stdin */ + munmap( (void *)prodata, prodata_size ); + if( !close_outstream( &in_stats ) ) cleanup_and_fail( 1 ); + } + } + return retval; + } diff --git a/fec_repair.cc b/fec_repair.cc new file mode 100644 index 0000000..c15dfd3 --- /dev/null +++ b/fec_repair.cc @@ -0,0 +1,1106 @@ +/* Lziprecover - Data recovery tool for the lzip format + Copyright (C) 2023-2024 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#define _FILE_OFFSET_BITS 64 + +#include <algorithm> +#include <cerrno> +#include <climits> +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <new> +#include <list> +#include <string> +#include <vector> +#include <stdint.h> +#include <unistd.h> +#include <sys/mman.h> +#include <sys/stat.h> + +#include "lzip.h" +#include "md5.h" +#include "fec.h" + + +namespace { + +const char * const size_mismatch_msg = + "Size mismatch between protected data and fec data."; + +void show_diag_msg( const std::string & input_filename, const char * const msg, + const bool debug = false ) + { + if( verbosity >= ( debug ? 0 : 1 ) ) std::fprintf( stderr, "%s\n", msg ); + else show_file_error( input_filename.c_str(), msg ); + } + + +bool has_lz_extension( const std::string & name ) + { + return ( name.size() > 3 && + name.compare( name.size() - 3, 3, ".lz" ) == 0 ) || + ( name.size() > 4 && + name.compare( name.size() - 4, 4, ".tlz" ) == 0 ); + } + +bool has_fec_extension2( const std::string & name ) + { + if( !has_fec_extension( name ) ) return false; + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: %s: Input file has '%s' suffix, ignored.\n", + program_name, name.c_str(), fec_extension ); + return true; + } + + +/* Return the address of a malloc'd buffer containing the file data and + the file size in '*file_sizep'. + In case of error, return 0 and do not modify '*file_sizep'. +*/ +uint8_t * read_file( const std::string & filename, long * const file_sizep ) + { + struct stat in_stats; // not used + const char * const filenamep = printable_name( filename ); + const int infd = ( filename == "-" ) ? + STDIN_FILENO : open_instream( filenamep, &in_stats, false ); + if( infd < 0 ) return 0; + long buffer_size = 65536; + uint8_t * buffer = (uint8_t *)std::malloc( buffer_size ); + if( !buffer ) { show_file_error( filenamep, mem_msg ); return 0; } + long file_size = readblock( infd, buffer, buffer_size ); + while( file_size >= buffer_size && !errno ) + { + if( buffer_size >= LONG_MAX ) + { show_file_error( filenamep, large_file_msg ); + std::free( buffer ); return 0; } + buffer_size = ( buffer_size <= LONG_MAX / 2 ) ? 2 * buffer_size : LONG_MAX; + uint8_t * const tmp = (uint8_t *)std::realloc( buffer, buffer_size ); + if( !tmp ) + { show_file_error( filenamep, mem_msg ); std::free( buffer ); return 0; } + buffer = tmp; + file_size += readblock( infd, buffer + file_size, buffer_size - file_size ); + } + if( errno ) + { show_file_error( filenamep, read_error_msg, errno ); + std::free( buffer ); return 0; } + if( close( infd ) != 0 ) + { show_file_error( filenamep, "Error closing input file", errno ); + std::free( buffer ); return 0; } + *file_sizep = file_size; + return buffer; + } + + +const char * bad_fec_version( const unsigned version ) + { + static char buf[80]; + snprintf( buf, sizeof buf, "Version %u fec format not supported.", version ); + return buf; + } + +// Return false if truncation removed all blocks. +bool truncate_block_vector( std::vector< Block > & block_vector, + const long long end ) + { + unsigned i = block_vector.size(); + while( i > 0 && block_vector[i-1].pos() >= end ) --i; + if( i == 0 ) { block_vector.clear(); return false; } + Block & b = block_vector[i-1]; + if( b.includes( end ) ) b.size( end - b.pos() ); + if( i < block_vector.size() ) + block_vector.erase( block_vector.begin() + i, block_vector.end() ); + return true; + } + + +class Fec_index + { + const le32 * crc_array_; // images allocated in fecdata + const le32 * crcc_array_; + std::vector< Fec_packet > fec_vector; // fec blocks + std::string error_; + unsigned long fec_net_size_; // size of packets (not file size) + unsigned long fec_block_size_; // from chksum/fec packets + unsigned long prodata_size_; // from chksum packets + md5_type prodata_md5_; // from chksum packets + int retval_; // 0 = OK, 1 = error, 2 = fatal error + bool gf16_; + const bool is_lz_; // used by find_bad_blocks + + bool parse_packet( const Chksum_packet & chksum_packet, + const bool ignore_errors ); + +public: + Fec_index( const uint8_t * const fecdata, const unsigned long fecdata_size, + const bool ignore_errors = false, const bool is_lz = false ); + + const std::string & error() const { return error_; } + int retval() const { return retval_; } + void show_fec_data( const std::string & input_filename, + const std::string & fec_filename, FILE * const f ) const; + + unsigned long fec_block_size() const { return fec_block_size_; } + unsigned fec_blocks() const { return fec_vector.size(); } + unsigned long fec_bytes() const { return fec_blocks() * fec_block_size_; } + const uint8_t * fec_block( const unsigned i ) const + { return fec_vector[i].fec_block(); } + unsigned fbn( const unsigned i ) const + { return fec_vector[i].fec_block_number(); } + bool gf16() const { return gf16_; } + + unsigned long prodata_size() const { return prodata_size_; } + const md5_type & prodata_md5() const { return prodata_md5_; } + unsigned prodata_blocks() const + { return ceil_divide( prodata_size_, fec_block_size_ ); } + bool is_lz() const { return is_lz_; } + + bool has_array() const { return crc_array() != 0 || crcc_array() != 0; } + const le32 * crc_array() const { return crc_array_; } + const le32 * crcc_array() const { return crcc_array_; } + + unsigned long block_pos( const unsigned i ) const + { return i * fec_block_size_; } + + unsigned long block_size( const unsigned i ) const + { + const unsigned long pos = i * fec_block_size_; + if( pos >= prodata_size_ ) return 0; + return std::min( fec_block_size_, prodata_size_ - pos ); + } + + bool prodata_match( const std::string & input_filename, + const md5_type & computed_prodata_md5, + const bool debug = true ) const + { + if( prodata_md5_ == computed_prodata_md5 ) return true; + show_diag_msg( input_filename, + "MD5 mismatch between protected data and fec data.", debug ); + return false; + } + }; + + +bool Fec_index::parse_packet( const Chksum_packet & chksum_packet, + const bool ignore_errors ) + { + const unsigned long long prodata_size = chksum_packet.prodata_size(); + if( prodata_size_ <= 0 ) // first chksum packet + { + if( !fits_in_size_t( prodata_size ) ) + { error_ = large_file_msg; retval_ = 1; return false; } + prodata_size_ = prodata_size; + prodata_md5_ = chksum_packet.prodata_md5(); + gf16_ = chksum_packet.gf16(); + } + else + { + if( prodata_size_ != prodata_size ) + { error_ = "Contradictory protected data size in chksum packet."; + retval_ = 2; return false; } + if( prodata_md5_ != chksum_packet.prodata_md5() ) + { error_ = "Contradictory protected data MD5 in chksum packet."; + retval_ = 2; return false; } + if( gf16_ != chksum_packet.gf16() ) + { error_ = "Contradictory Galois Field size in chksum packet."; + retval_ = 2; return false; } + } + if( !isvalid_fbs( fec_block_size_ ) ) + fec_block_size_ = chksum_packet.fec_block_size(); + else if( fec_block_size_ != chksum_packet.fec_block_size() ) + { error_ = "Contradictory fec_block_size in chksum packet."; + retval_ = 2; return false; } + if( !chksum_packet.check_payload_crc() ) // corrupt array + { if( ignore_errors ) return true; + error_ = "Corrupt CRC array in chksum packet."; retval_ = 2; return false; } + if( !chksum_packet.is_crc_c() ) + { + if( !crc_array_ ) crc_array_ = chksum_packet.crc_array(); + else { error_ = "More than one CRC32 array found."; + retval_ = 2; return false; } + } + else if( !crcc_array_ ) crcc_array_ = chksum_packet.crc_array(); + else { error_ = "More than one CRC32-C array found."; + retval_ = 2; return false; } + return true; + } + + +Fec_index::Fec_index( const uint8_t * const fecdata, + const unsigned long fecdata_size, + const bool ignore_errors, const bool is_lz ) + : crc_array_( 0 ), crcc_array_( 0 ), fec_net_size_( 0 ), + fec_block_size_( 0 ), prodata_size_( 0 ), retval_( 0 ), gf16_( false ), + is_lz_( is_lz ) + { + if( fecdata_size <= 0 ) + { error_ = "Fec file is empty."; retval_ = 2; return; } + if( fecdata_size >= fec_magic_l && !check_fec_magic( fecdata ) ) + { error_ = "Bad magic number (file is not fec data)."; retval_ = 2; return; } + if( fecdata_size < Chksum_packet::min_packet_size() + + Fec_packet::min_packet_size() ) + { error_ = "Fec file is too short."; retval_ = 2; return; } + if( !Chksum_packet::check_version( fecdata ) ) + { error_ = bad_fec_version( Chksum_packet::version( fecdata ) ); + retval_ = 2; return; } + + /* Parse packets. pos usually points to a packet header, except when + skipping a corrupt packet. */ + for( unsigned long pos = 0; pos < fecdata_size; ) + { + unsigned long image_size = + Chksum_packet::check_image( fecdata + pos, fecdata_size - pos ); + if( image_size > 2 ) + { + if( !parse_packet( Chksum_packet( fecdata + pos ), ignore_errors ) ) + return; + fec_net_size_ += image_size; pos += image_size; continue; + } + if( image_size != 0 && ignore_errors ) { ++pos; continue; } + if( image_size == 1 ) + { error_ = "Wrong packet size in chksum packet."; retval_ = 2; return; } + if( image_size == 2 ) + { error_ = "Wrong CRC in chksum packet."; retval_ = 2; return; } + + image_size = Fec_packet::check_image( fecdata + pos, fecdata_size - pos ); + if( image_size > 2 ) + { + const Fec_packet fec_packet( fecdata + pos ); + if( !isvalid_fbs( fec_block_size_ ) ) + fec_block_size_ = fec_packet.fec_block_size(); + else if( fec_block_size_ != fec_packet.fec_block_size() ) + { error_ = "Contradictory fec_block_size in fec packet."; + retval_ = 2; return; } + fec_vector.push_back( fec_packet ); + fec_net_size_ += image_size; pos += image_size; continue; + } + if( image_size != 0 && ignore_errors ) { ++pos; continue; } + if( image_size == 1 ) + { error_ = "Wrong packet size in fec packet."; retval_ = 2; return; } + if( image_size == 2 ) + { error_ = "Wrong CRC in fec packet."; retval_ = 2; return; } + + if( ignore_errors ) + { while( ++pos < fecdata_size && fecdata[pos] != fec_magic[0] ) {} + continue; } + error_ = "Unknown packet type = "; // unknown or corrupt packet + const int size = std::min( (unsigned long)fec_magic_l, fecdata_size - pos ); + format_trailing_bytes( fecdata + pos, size, error_ ); + retval_ = 2; return; + } + if( prodata_size_ <= 0 ) + { error_ = "No valid chksum packets found."; retval_ = 2; return; } + if( fec_blocks() <= 0 ) + { error_ = "No valid fec packets found."; retval_ = 2; return; } + if( !has_array() && !ignore_errors ) + { error_ = "No valid CRC arrays found."; retval_ = 2; return; } + if( fec_blocks() > prodata_blocks() ) + { error_ = "Too many fec packets found. (More than data blocks)"; + retval_ = 2; return; } + if( !isvalid_fbs( fec_block_size_ ) ) + internal_error( "fec_block_size not found." ); + // check that fbn < max_k in each fec packet + const unsigned max_k = gf16_ ? max_k16 : max_k8; + std::vector< bool > bv( max_k ); + for( unsigned i = 0; i < fec_blocks(); ++i ) + { + const unsigned fbn = fec_vector[i].fec_block_number(); + if( fbn >= max_k ) + { error_ = "Invalid fec_block_number in fec packet."; + retval_ = 2; return; } + if( bv[fbn] ) + { error_ = "Same fec_block_number in two fec packets."; + retval_ = 2; return; } + bv[fbn] = true; + } + } + + +void Fec_index::show_fec_data( const std::string & input_filename, + const std::string & fec_filename, FILE * const f ) const + { + const unsigned long fec_bytes_ = fec_bytes(); + const double spercent = ( 100.0 * fec_net_size_ ) / prodata_size_; + const double fpercent = ( 100.0 * fec_bytes_ ) / prodata_size_; + if( input_filename.size() ) + std::fprintf( f, "Protected file: '%s'\n", input_filename.c_str() ); + std::fprintf( f, "Protected size: %11s Block size: %5s Data blocks: %u\n" + " Fec file: '%s'\n" + " Fec size: %11s %6.2f%% Fec blocks: %u\n" + " Fec bytes: %11s %6.2f%% Fec numbers:", + format_num3( prodata_size_ ), format_num3( fec_block_size_ ), + prodata_blocks(), printable_name( fec_filename ), + format_num3( fec_net_size_ ), spercent, fec_blocks(), + format_num3( fec_bytes_ ), fpercent ); + for( unsigned i = 0; i < fec_blocks(); ++i ) // print ranges of fbn's + { + std::fprintf( f, " %u", fbn( i ) ); + const unsigned j = i; + while( i + 1 < fec_blocks() && fbn( i + 1 ) == fbn( i ) + 1 ) ++i; + if( i > j ) std::fprintf( f, "%c%u", ( i == j + 1 ) ? ' ' : '-', fbn( i ) ); + } + std::fprintf( f, "\n Features: GF(2^%s)%s%s\n", gf16_ ? "16" : "8", + crc_array_ ? " CRC32" : "", crcc_array_ ? " CRC32-C" : "" ); + std::fflush( f ); + } + + +class Bad_block_index + { + const Fec_index & fec_index; + const CRC32 crc32c; + // list of prodata blocks with a mismatched CRC32 or CRC32-C + std::vector< unsigned > bb_vector_; // index of each bad block + + bool check_data_block( const uint8_t * const prodata, const unsigned i ) const; + bool zeroed_data_block( const uint8_t * const prodata, const unsigned i ) const; + +public: + Bad_block_index( const Fec_index & fec_index_, const uint8_t * const prodata ) + : fec_index( fec_index_ ), crc32c( true ) { find_bad_blocks( prodata ); } + + unsigned bad_blocks() const { return bb_vector_.size(); } + const std::vector< unsigned > & bb_vector() const { return bb_vector_; } + + void find_bad_blocks( const uint8_t * const prodata ); + + unsigned long first_bad_pos() const + { + if( bb_vector_.empty() ) return 0; + return fec_index.block_pos( bb_vector_.front() ); + } + + unsigned long last_bad_pos() const + { + if( bb_vector_.empty() ) return 0; + return fec_index.block_pos( bb_vector_.back() ) + + fec_index.block_size( bb_vector_.back() ) - 1; + } + + unsigned long bad_span() const + { + if( bb_vector_.empty() ) return 0; + return last_bad_pos() + 1 - first_bad_pos(); + } + + unsigned long bad_data_bytes() const + { + if( bb_vector_.empty() ) return 0; + return ( bb_vector_.size() - 1 ) * fec_index.fec_block_size() + + fec_index.block_size( bb_vector_.back() ); + } + }; + +bool Bad_block_index::check_data_block( const uint8_t * const prodata, + const unsigned i ) const + { + // check protected file using the chksum packets + const unsigned long pos = fec_index.block_pos( i ); + const unsigned long size = fec_index.block_size( i ); + if( fec_index.crc_array() && fec_index.crc_array()[i].val() != + crc32.compute_crc( prodata + pos, size ) ) return false; + if( fec_index.crcc_array() && fec_index.crcc_array()[i].val() != + crc32c.compute_crc( prodata + pos, size ) ) return false; + return fec_index.has_array(); + } + +bool Bad_block_index::zeroed_data_block( const uint8_t * const prodata, + const unsigned i ) const + { + // detect holes in lzip protected file + enum { minlen = 8 }; // min number of consecutive identical bytes + const unsigned long pos = fec_index.block_pos( i ); + const unsigned long end = pos + fec_index.block_size( i ); + unsigned count = 0; + for( unsigned long j = pos + 1; j < end; ++j ) + { + if( prodata[j] != prodata[j-1] ) count = 0; + else if( ++count >= minlen - 1 ) return true; + } + return false; + } + +void Bad_block_index::find_bad_blocks( const uint8_t * const prodata ) + { + bb_vector_.clear(); + const unsigned blocks = fec_index.prodata_blocks(); + if( fec_index.has_array() ) + { for( unsigned i = 0; i < blocks; ++i ) + if( !check_data_block( prodata, i ) ) + bb_vector_.push_back( i ); } + else if( fec_index.is_lz() ) + { for( unsigned i = 0; i < blocks; ++i ) + if( zeroed_data_block( prodata, i ) ) + bb_vector_.push_back( i ); } + } + + +long next_pct_pos( const long last_pos, const int pct ) + { + if( pct <= 0 ) return 0; + return std::min( last_pos, (long)( last_pos / ( 100.0 / pct ) ) ); + } + + +// if successful, return the repaired data in prodata +bool repair_prodata( const Fec_index & fec_index, + const Bad_block_index & bb_index, uint8_t * const prodata ) + { + const unsigned bad_blocks = bb_index.bad_blocks(); + if( bad_blocks == 0 ) return true; // nothing to repair + const unsigned fec_blocks = fec_index.fec_blocks(); + if( bad_blocks > fec_blocks ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "Too many damaged blocks (%u).\n Can't repair " + "file if it contains more than %u damaged blocks.\n", + bad_blocks, fec_blocks ); + return false; + } + + const std::vector< unsigned > & bb_vector = bb_index.bb_vector(); + std::vector< unsigned > fbn_vector; + const unsigned long fbs = fec_index.fec_block_size(); + // copy fec blocks into fecbuf where reduction will be performed + uint8_t * const fecbuf = new uint8_t[bad_blocks * fbs]; + for( unsigned bi = 0; bi < bad_blocks; ++bi ) + { + fbn_vector.push_back( fec_index.fbn( bi ) ); + std::memcpy( fecbuf + bi * fbs, fec_index.fec_block( bi ), fbs ); + } + const unsigned prodata_blocks = fec_index.prodata_blocks(); + const unsigned long prodata_size = fec_index.prodata_size(); + const bool last_is_missing = bb_vector.back() == prodata_blocks - 1; + // last incomplete data block padded to fbs + uint8_t * const lastbuf = + set_lastbuf( prodata, prodata_size, fbs, last_is_missing ); + fec_index.gf16() ? + rs16_decode( prodata, lastbuf, bb_vector, fbn_vector, fecbuf, fbs, + prodata_blocks ) : + rs8_decode( prodata, lastbuf, bb_vector, fbn_vector, fecbuf, fbs, + prodata_blocks ); + delete[] fecbuf; + if( lastbuf && last_is_missing ) // copy last block to its position + { + const unsigned di = bb_vector.back(); + const unsigned long pos = fec_index.block_pos( di ); + const unsigned long size = fec_index.block_size( di ); + std::memcpy( prodata + pos, lastbuf, size ); + } + if( lastbuf ) delete[] lastbuf; + if( check_md5( prodata, prodata_size, fec_index.prodata_md5() ) ) return true; + if( verbosity >= 0 ) std::fputs( "Repair of input file failed.\n", stderr ); + return false; + } + + +bool check_prodata( const Fec_index & fec_index, + const Bad_block_index & bb_index, + const std::string & input_filename, + const std::string & fec_filename, + const md5_type & computed_prodata_md5, + const bool debug = true, const bool repair = false, + const bool same_size = true ) + { + FILE * const f = debug ? stdout : stderr; + if( verbosity >= ( debug ? 0 : 1 ) ) + fec_index.show_fec_data( input_filename, fec_filename, f ); + if( !same_size && verbosity >= 0 ) + std::fprintf( stderr, "%s\n", size_mismatch_msg ); + const unsigned bad_blocks = bb_index.bad_blocks(); + const bool mismatch = !same_size || !fec_index.prodata_match( input_filename, + computed_prodata_md5, debug ) || bad_blocks; + if( bad_blocks ) + { + if( verbosity >= ( debug ? 0 : 1 ) ) + { std::fprintf( f, "Block mismatches: %u (%s bytes) spanning %s bytes " + "[%s,%s]\n", bad_blocks, + format_num3( bb_index.bad_data_bytes() ), + format_num3( bb_index.bad_span() ), + format_num3( bb_index.first_bad_pos() ), + format_num3( bb_index.last_bad_pos() ) ); + std::fflush( f ); } + return false; + } + if( mismatch ) return false; + if( verbosity >= 1 ) + std::fputs( !repair ? "Protected data checked successfully.\n" : + "Protected data checked successfully. Repair not needed.\n", f ); + return true; + } + + +void print_blocks( const std::vector< unsigned long > & pos_vector, + const char * const msg, const unsigned long cblock_size ) + { + std::fputs( ( pos_vector.size() == 1 ) ? "block" : "blocks", stdout ); + for( unsigned i = 0; i < pos_vector.size(); ++i ) + std::printf( " %2lu", pos_vector[i] / cblock_size ); + std::fputs( msg, stdout ); + } + + +// replace dirname with destdir in name and write result to outname +void replace_dirname( const std::string & name, const std::string & destdir, + std::string & outname ) + { + unsigned i = name.size(); // size of dirname to be replaced by destdir + while( i > 0 && name[i-1] != '/' ) --i; // point i to basename + outname = destdir; + outname.append( name, i, name.size() - i ); // append basename + } + + +const Fec_index * fec_d_init( const std::string & input_filename, + const std::string & cl_fec_filename, std::string & fec_filename, + const uint8_t ** fecdatap, long & fecdata_size, uint8_t ** prodatap ) + { + if( input_filename == "-" ) { prot_stdin(); return 0; } + if( has_fec_extension2( input_filename ) ) return 0; + const bool from_dir = cl_fec_filename.size() && + cl_fec_filename.end()[-1] == '/'; + + if( cl_fec_filename.size() && !from_dir ) // file or stdin + fec_filename = cl_fec_filename; + else // read fec data from file.fec + { + if( from_dir ) + replace_dirname( input_filename, cl_fec_filename, fec_filename ); + else fec_filename = input_filename; + fec_filename += fec_extension; + } + *fecdatap = read_file( fec_filename, &fecdata_size ); + if( !*fecdatap ) return 0; + const Fec_index * const fec_indexp = new Fec_index( *fecdatap, fecdata_size ); + if( !fec_indexp ) { std::free( (void *)*fecdatap ); return 0; } + if( fec_indexp->retval() != 0 ) + { show_file_error( printable_name( fec_filename ), + fec_indexp->error().c_str() ); + delete fec_indexp; std::free( (void *)*fecdatap ); return 0; } + + struct stat in_stats; // not used + const char * const input_filenamep = input_filename.c_str(); + const int infd = open_instream( input_filenamep, &in_stats, false, true ); + if( infd < 0 ) { delete fec_indexp; std::free( (void *)*fecdatap ); return 0; } + const long prodata_size = fec_indexp->prodata_size(); + const long long file_size = lseek( infd, 0, SEEK_END ); + if( prodata_size != file_size ) + { show_file_error( input_filenamep, size_mismatch_msg ); close( infd ); + delete fec_indexp; std::free( (void *)*fecdatap ); return 0; } + *prodatap = (uint8_t *)mmap( 0, prodata_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE, infd, 0 ); + close( infd ); + if( *prodatap == MAP_FAILED ) + { show_file_error( input_filenamep, mmap_msg, errno ); + delete fec_indexp; std::free( (void *)*fecdatap ); return 0; } + return fec_indexp; + } + +} // end namespace + + +/* Check that no variable read from packet overflows unsigned long. + 0 = bad magic, 1 = bad size, 2 = bad crc, else return packet size. */ +unsigned Chksum_packet::check_image( const uint8_t * const image_buffer, + const unsigned long max_size ) + { + if( max_size < min_packet_size() || !check_fec_magic( image_buffer ) ) + return 0; + if( get_le( image_buffer + header_crc_o, crc32_l ) != + compute_header_crc( image_buffer ) ) return 2; + if( !check_version( image_buffer ) || !check_flags( image_buffer ) ) return 2; + const Chksum_packet chksum_packet( image_buffer ); + const unsigned long long fbs = chksum_packet.fec_block_size(); + if( !isvalid_fbs( fbs ) ) return 1; + const unsigned long long image_size = chksum_packet.packet_size(); + const unsigned elsize = sizeof chksum_packet.crc_array()[0]; + const unsigned max_k = chksum_packet.gf16() ? max_k16 : max_k8; + if( image_size < min_packet_size() || image_size > max_size || + image_size > header_size + max_k * elsize + trailer_size ) return 1; + const unsigned paysize = image_size - header_size - trailer_size; + const unsigned long long prodata_size = chksum_packet.prodata_size(); + const unsigned long long prodata_blocks = ceil_divide( prodata_size, fbs ); + if( paysize % elsize != 0 || paysize / elsize != prodata_blocks || + prodata_blocks <= 0 || prodata_blocks > max_k ) return 1; + if( !fits_in_size_t( prodata_size ) || !fits_in_size_t( fbs ) ) + throw std::bad_alloc(); + return image_size; + } + + +/* Check that no variable read from packet overflows unsigned long. + 0 = bad magic, 1 = bad size, 2 = bad crc, else return packet size. */ +unsigned long Fec_packet::check_image( const uint8_t * const image_buffer, + const unsigned long max_size ) + { + if( max_size < min_packet_size() || + std::memcmp( image_buffer, fec_packet_magic, fec_magic_l ) != 0 ) + return 0; + if( get_le( image_buffer + header_crc_o, crc32_l ) != + compute_header_crc( image_buffer ) ) return 2; + const Fec_packet fec_packet( image_buffer ); + const unsigned long long image_size = fec_packet.packet_size(); + if( image_size < min_packet_size() || image_size > max_size ) return 1; + const unsigned long paysize = image_size - header_size - trailer_size; + const unsigned long payload_crc_o = fec_block_o + paysize; + const unsigned payload_crc = get_le( image_buffer + payload_crc_o, crc32_l ); + if( crc32.compute_crc( image_buffer + fec_block_o, paysize ) != payload_crc ) + return 2; + const unsigned long long fbs = fec_packet.fec_block_size(); + if( !isvalid_fbs( fbs ) || paysize != fbs ) return 1; + if( !fits_in_size_t( fbs ) ) throw std::bad_alloc(); + return image_size; + } + + +int fec_test( const std::vector< std::string > & filenames, + const std::string & cl_fec_filename, + const std::string & default_output_filename, + const char recursive, const bool force, const bool ignore_errors, + const bool repair, const bool to_stdout ) + { + const bool to_file = !to_stdout && default_output_filename.size(); + if( repair && ( to_stdout || to_file ) && filenames.size() != 1 ) + { show_error( "You must specify exactly 1 protected file " + "when redirecting repaired data." ); return 1; } + if( repair && ( to_stdout || to_file ) && recursive ) + { show_error( "Can't redirect repaired data in recursive mode." ); return 1; } + if( to_stdout ) { outfd = STDOUT_FILENO; if( !check_tty_out() ) return 1; } + else outfd = -1; + const bool to_fixed = !to_stdout && !to_file; + std::string fec_filename; + const uint8_t * fecdata = 0; // buffer containing fec data + long fecdata_size = 0; // size of fec data + const bool from_dir = cl_fec_filename.size() && + cl_fec_filename.end()[-1] == '/'; + + if( cl_fec_filename.size() && !from_dir ) // file or stdin + { + if( filenames.size() != 1 ) + { show_error( "You must specify exactly 1 protected file " + "when reading 1 fec data file." ); return 1; } + fec_filename = cl_fec_filename; + fecdata = read_file( fec_filename, &fecdata_size ); + if( !fecdata ) return 1; + } + + int retval = 0; + const bool one_to_one = !fecdata; + for( unsigned i = 0; i < filenames.size(); ++i ) + { + if( filenames[i] == "-" ) + { prot_stdin(); set_retval( retval, 1 ); continue; } + std::string srcdir; // dirname to be replaced by cl_fec_filename + if( from_dir ) extract_dirname( filenames[i], srcdir ); + std::list< std::string > filelist( 1U, filenames[i] ); + std::string input_filename; + while( next_filename( filelist, input_filename, retval, recursive ) ) + { + if( has_fec_extension2( input_filename ) ) + { set_retval( retval, 1 ); continue; } + if( !fecdata ) // read fec data from file.fec + { + if( from_dir ) replace_dirname( input_filename, srcdir, + cl_fec_filename, fec_filename ); + else fec_filename = input_filename; + fec_filename += fec_extension; + fecdata = read_file( fec_filename, &fecdata_size ); + if( !fecdata ) { set_retval( retval, 1 ); continue; } + } + const bool is_lz = has_lz_extension( input_filename ); + const Fec_index fec_index( fecdata, fecdata_size, ignore_errors, is_lz ); + if( fec_index.retval() != 0 ) + { show_file_error( printable_name( fec_filename ), + fec_index.error().c_str() ); + std::free( (void *)fecdata ); fecdata = 0; + set_retval( retval, 2 ); continue; } + + // mmap is faster than reading the file, but is not resizeable + struct stat in_stats; + const char * const input_filenamep = input_filename.c_str(); + const int infd = open_instream( input_filenamep, &in_stats, false, true ); + if( infd < 0 ) { std::free( (void *)fecdata ); fecdata = 0; + set_retval( retval, 1 ); continue; } + const long prodata_size = fec_index.prodata_size(); + const long long file_size = lseek( infd, 0, SEEK_END ); + const bool mmapped = prodata_size <= file_size; + const bool same_size = prodata_size == file_size; + if( !mmapped && !safe_seek( infd, 0, input_filenamep ) ) + { std::free( (void *)fecdata ); fecdata = 0; + set_retval( retval, 1 ); close( infd ); continue; } + uint8_t * const prodata = (uint8_t *)( mmapped ? + mmap( 0, prodata_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, infd, 0 ) : + std::malloc( prodata_size ) ); + if( mmapped && prodata == MAP_FAILED ) + { show_file_error( input_filenamep, mmap_msg, errno ); + set_retval( retval, 1 ); close( infd ); goto err; } + if( !mmapped ) // short file + { + if( !prodata ) + { show_file_error( input_filenamep, mem_msg ); + set_retval( retval, 1 ); close( infd ); goto err; } + const long read_size = readblock( infd, prodata, prodata_size ); + if( read_size < prodata_size ) + { if( errno ) + { show_file_error( input_filenamep, read_error_msg, errno ); + set_retval( retval, 1 ); close( infd ); goto err; } + std::memset( prodata + read_size, 0, prodata_size - read_size ); } + } + close( infd ); + { + md5_type computed_prodata_md5; + compute_md5( prodata, prodata_size, computed_prodata_md5 ); + Bad_block_index bb_index( fec_index, prodata ); + const bool mismatch = !check_prodata( fec_index, bb_index, input_filename, + fec_filename, computed_prodata_md5, false, repair, same_size ); + if( mismatch && !repair ) set_retval( retval, 2 ); + else if( mismatch && repair ) + { + if( !is_lz && !fec_index.has_array() ) + { show_diag_msg( input_filename, "Can't repair. No valid CRC " + "arrays found and protected file not in lzip format." ); + cleanup_and_fail( 2 ); } + if( verbosity >= 1 ) + std::fprintf( stderr, "Repairing file '%s'\n", input_filenamep ); + if( verbosity >= 0 && !fec_index.has_array() ) + std::fputs( "warning: Repairing without CRC arrays.\n", stderr ); + if( !repair_prodata( fec_index, bb_index, prodata ) ) + cleanup_and_fail( 2 ); + if( to_fixed ) + { + output_filename = insert_fixed( input_filename, false ); + set_signal_handler(); + if( !open_outstream( force, true ) || !check_tty_out() ) + { set_retval( retval, 1 ); return retval; } // don't delete a tty + } + else if( to_file && outfd < 0 ) // open outfd after checking infd + { + output_filename = default_output_filename; + set_signal_handler(); + // check tty only once and don't try to delete a tty + if( !open_outstream( force, false ) || !check_tty_out() ) return 1; + } + if( writeblock( outfd, prodata, prodata_size ) != prodata_size || + !close_outstream( &in_stats ) ) // write repaired prodata + { set_retval( retval, 1 ); cleanup_and_fail( retval ); } + if( verbosity >= 1 ) + std::fprintf( stderr, "Repaired copy of '%s' written to '%s'\n", + input_filenamep, printable_name( output_filename, false ) ); + } + if( ( filelist.size() || i + 1 < filenames.size() ) && verbosity >= 1 ) + std::fputc( '\n', stderr ); + } +err: if( mmapped ) munmap( prodata, prodata_size ); else std::free( prodata ); + if( one_to_one ) { std::free( (void *)fecdata ); fecdata = 0; } + } + } + if( fecdata ) std::free( (void *)fecdata ); + return retval; + } + + +int fec_list( const std::vector< std::string > & filenames, + const bool ignore_errors ) + { + int retval = 0; + bool stdin_used = false; + for( unsigned i = 0; i < filenames.size(); ++i ) + { + if( filenames[i] == "-" ) + { if( stdin_used ) continue; else stdin_used = true; } + if( i > 0 && verbosity >= 0 ) + { std::fputc( '\n', stdout ); std::fflush( stdout ); } + long fecdata_size = 0; // size of fec data + const uint8_t * const fecdata = read_file( filenames[i], &fecdata_size ); + if( !fecdata ) { set_retval( retval, 1 ); continue; } + const Fec_index fec_index( fecdata, fecdata_size, ignore_errors ); + if( fec_index.retval() != 0 ) + { show_file_error( printable_name( filenames[i] ), + fec_index.error().c_str() ); + std::free( (void *)fecdata ); set_retval( retval, 2 ); continue; } + if( verbosity >= 0 ) fec_index.show_fec_data( "", filenames[i], stdout ); + std::free( (void *)fecdata ); + } + return retval; + } + + +// write feedback to stdout, diagnostics to stderr +int fec_dc( const std::string & input_filename, + const std::string & cl_fec_filename, const unsigned cblocks ) + { + std::string fec_filename; + const uint8_t * fecdata = 0; + uint8_t * prodata = 0; + long fecdata_size = 0; // size of fec data + const Fec_index * const fec_indexp = fec_d_init( input_filename, + cl_fec_filename, fec_filename, &fecdata, fecdata_size, &prodata ); + if( !fec_indexp ) return 0; + const Fec_index & fec_index = *fec_indexp; + const unsigned long prodata_size = fec_index.prodata_size(); + const unsigned fec_blocks = fec_index.fec_blocks(); + int retval = 0; + if( cblocks > fec_blocks ) + { show_file_error( input_filename.c_str(), "Not so may blocks in fec data." ); + set_retval( retval, 1 ); goto err; } + { + md5_type computed_prodata_md5; + compute_md5( prodata, prodata_size, computed_prodata_md5 ); + Bad_block_index bb_index( fec_index, prodata ); + if( !check_prodata( fec_index, bb_index, input_filename, fec_filename, + computed_prodata_md5 ) ) + { set_retval( retval, 2 ); goto err; } + const unsigned long fbs = fec_index.fec_block_size(); + const unsigned long cblock_size = fec_blocks / cblocks * fbs; + const unsigned long max_saved_size = cblocks * cblock_size; + uint8_t * const sbuf = new uint8_t[max_saved_size]; // saved data bytes + const long last_pos = ( prodata_size % cblock_size != 0 ) ? + prodata_size - prodata_size % cblock_size : prodata_size - cblock_size; + if( verbosity >= 0 ) + { std::printf( "Testing sets of %u block%s of size %s\n", cblocks, + cblocks != 1 ? "s" : "", format_num3( cblock_size ) ); + std::fflush( stdout ); } + unsigned long combinations = 0, repair_attempts = 0, successes = 0, + failed_comparisons = 0; + std::vector< unsigned long > pos_vector; + for( unsigned i = 0; i < cblocks; ++i ) + pos_vector.push_back( i * cblock_size ); + const int saved_verbosity = verbosity; + verbosity = -1; // suppress all messages + while( true ) + { + for( unsigned i = 0; i < cblocks; ++i ) // save blocks + { + const unsigned long pos = pos_vector[i]; + const unsigned long size = std::min( cblock_size, prodata_size - pos ); + std::memcpy( sbuf + i * cblock_size, prodata + pos, size ); + } + for( unsigned i = 0; i < cblocks; ++i ) // set blocks to 0 + { + const unsigned long pos = pos_vector[i]; + std::memset( prodata + pos, 0, std::min( cblock_size, prodata_size - pos ) ); + } + ++combinations; + bb_index.find_bad_blocks( prodata ); + if( check_prodata( fec_index, bb_index, input_filename, fec_filename, + computed_prodata_md5 ) ) + { if( saved_verbosity >= 0 ) + { print_blocks( pos_vector, " nothing to repair\n", cblock_size ); + std::fflush( stdout ); } } + else if( ++repair_attempts, repair_prodata( fec_index, bb_index, prodata ) ) + { + ++successes; + if( saved_verbosity >= 2 ) + { print_blocks( pos_vector, " passed the test\n", cblock_size ); + std::fflush( stdout ); } + if( !check_md5( prodata, prodata_size, computed_prodata_md5 ) ) + { if( saved_verbosity >= 0 ) + { print_blocks( pos_vector, " comparison failed\n", cblock_size ); + std::fflush( stdout ); } + ++failed_comparisons; } + } + else if( saved_verbosity >= 1 ) + { print_blocks( pos_vector, " can't repair\n", cblock_size ); + std::fflush( stdout ); } + for( unsigned i = 0; i < cblocks; ++i ) // restore blocks + { + const unsigned long pos = pos_vector[i]; + const unsigned long size = std::min( cblock_size, prodata_size - pos ); + std::memcpy( prodata + pos, sbuf + i * cblock_size, size ); + } + unsigned long pos_limit = last_pos; // advance to next block combination + int i = cblocks - 1; + while( i >= 0 ) + { + if( pos_vector[i] + cblock_size > pos_limit ) + { pos_limit -= cblock_size; --i; continue; } + pos_vector[i] += cblock_size; + for( ; i + 1U < cblocks; ++i ) + pos_vector[i+1] = pos_vector[i] + cblock_size; + break; + } + if( i < 0 ) break; + } + verbosity = saved_verbosity; // restore verbosity level + delete[] sbuf; + + if( verbosity >= 0 ) + { + std::printf( "\n%11s block combinations tested\n%11s total repair attempts" + "\n%11s repair attempts returned with zero status", + format_num3( combinations ), format_num3( repair_attempts ), + format_num3( successes ) ); + if( successes > 0 ) + { + if( failed_comparisons > 0 ) + std::printf( ", of which\n%11s comparisons failed\n", + format_num3( failed_comparisons ) ); + else std::fputs( "\n all comparisons passed\n", stdout ); + } + else std::fputc( '\n', stdout ); + } + } +err: + munmap( prodata, prodata_size ); + delete fec_indexp; std::free( (void *)fecdata ); + return retval; + } + + +int fec_dz( const std::string & input_filename, + const std::string & cl_fec_filename, + std::vector< Block > & range_vector ) + { + std::string fec_filename; + const uint8_t * fecdata = 0; + uint8_t * prodata = 0; + long fecdata_size = 0; // size of fec data + const Fec_index * const fec_indexp = fec_d_init( input_filename, + cl_fec_filename, fec_filename, &fecdata, fecdata_size, &prodata ); + if( !fec_indexp ) return 0; + const Fec_index & fec_index = *fec_indexp; + const long prodata_size = fec_index.prodata_size(); + int retval = 0; + if( !truncate_block_vector( range_vector, prodata_size ) ) + { show_file_error( input_filename.c_str(), "Range is beyond end of file." ); + set_retval( retval, 1 ); goto err; } + { + md5_type computed_prodata_md5; + compute_md5( prodata, prodata_size, computed_prodata_md5 ); + if( !fec_index.prodata_match( input_filename, computed_prodata_md5 ) ) + { set_retval( retval, 2 ); goto err; } + for( unsigned i = 0; i < range_vector.size(); ++i ) + std::memset( prodata + range_vector[i].pos(), 0, range_vector[i].size() ); + Bad_block_index bb_index( fec_index, prodata ); + if( !check_prodata( fec_index, bb_index, input_filename, fec_filename, + computed_prodata_md5 ) ) + { + if( !repair_prodata( fec_index, bb_index, prodata ) ) + set_retval( retval, 2 ); + else if( !check_md5( prodata, prodata_size, computed_prodata_md5 ) ) + { if( verbosity >= 0 ) std::fputs( "Comparison failed\n", stdout ); + set_retval( retval, 1 ); } + else if( verbosity >= 0 ) + std::fputs( "Input file repaired successfully.\n", stdout ); + } + } +err: + munmap( prodata, prodata_size ); + delete fec_indexp; std::free( (void *)fecdata ); + return retval; + } + + +int fec_dZ( const std::string & input_filename, + const std::string & cl_fec_filename, + const unsigned delta, const int sector_size ) + { + std::string fec_filename; + const uint8_t * fecdata = 0; + uint8_t * prodata = 0; + long fecdata_size = 0; // size of fec data + const Fec_index * const fec_indexp = fec_d_init( input_filename, + cl_fec_filename, fec_filename, &fecdata, fecdata_size, &prodata ); + if( !fec_indexp ) return 0; + const Fec_index & fec_index = *fec_indexp; + const long prodata_size = fec_index.prodata_size(); + int retval = 0; + if( sector_size > prodata_size ) + { show_file_error( input_filename.c_str(), + "Sector size is larger than file size." ); + set_retval( retval, 1 ); goto err; } + { + md5_type computed_prodata_md5; + compute_md5( prodata, prodata_size, computed_prodata_md5 ); + Bad_block_index bb_index( fec_index, prodata ); + if( !check_prodata( fec_index, bb_index, input_filename, fec_filename, + computed_prodata_md5 ) ) + { set_retval( retval, 2 ); goto err; } + const unsigned long fbs = fec_index.fec_block_size(); + const int rest = std::min( 2UL, sector_size % fbs ); + const long max_saved_size = ( sector_size / fbs + rest ) * fbs; + uint8_t * const sbuf = new uint8_t[max_saved_size]; // saved data bytes + const long last_pos = ( prodata_size % sector_size != 0 ) ? + prodata_size - prodata_size % sector_size : prodata_size - sector_size; + if( verbosity >= 0 ) + { std::printf( "Testing blocks of size %s (delta %s)\n", + format_num3( sector_size ), format_num3( delta ) ); + std::fflush( stdout ); } + unsigned long combinations = 0, repair_attempts = 0, successes = 0, + failed_comparisons = 0; + int pct = ( prodata_size >= 1000 && isatty( STDERR_FILENO ) ) ? 0 : 100; + long pct_pos = ( pct < 100 ) ? 0 : prodata_size; + const int saved_verbosity = verbosity; + verbosity = -1; // suppress all messages + for( long pos = 0; pos <= last_pos; pos += delta ) + { + if( ( saved_verbosity == 0 || saved_verbosity == 1 ) && pos >= pct_pos ) + { std::fprintf( stderr, "\r%3u%% done\r", pct ); ++pct; + pct_pos = next_pct_pos( last_pos, pct ); } + const long saved_pos = pos - pos % fbs; + const long saved_size = std::min( max_saved_size, prodata_size - saved_pos ); + std::memcpy( sbuf, prodata + saved_pos, saved_size ); // save block + const int zeroed_size = std::min( (long)sector_size, prodata_size - pos ); + std::memset( prodata + pos, 0, zeroed_size ); // set block to 0 + ++combinations; + bb_index.find_bad_blocks( prodata ); + if( check_prodata( fec_index, bb_index, input_filename, fec_filename, + computed_prodata_md5 ) ) + { if( saved_verbosity >= 0 ) + { std::printf( "block %lu,%u nothing to repair\n", pos, zeroed_size ); + std::fflush( stdout ); } } + else if( ++repair_attempts, repair_prodata( fec_index, bb_index, prodata ) ) + { + ++successes; + if( saved_verbosity >= 2 ) + { std::printf( "block %lu,%u passed the test\n", pos, zeroed_size ); + std::fflush( stdout ); } + if( !check_md5( prodata, prodata_size, computed_prodata_md5 ) ) + { if( saved_verbosity >= 0 ) + { std::printf( "block %lu,%u comparison failed\n", pos, zeroed_size ); + std::fflush( stdout ); } + ++failed_comparisons; } + } + else if( saved_verbosity >= 1 ) + { std::printf( "block %lu,%u can't repair\n", pos, zeroed_size ); + std::fflush( stdout ); } + std::memcpy( prodata + saved_pos, sbuf, saved_size ); // restore block + } + verbosity = saved_verbosity; // restore verbosity level + delete[] sbuf; + + if( verbosity >= 0 ) + { + std::printf( "\n%11s blocks tested\n%11s total repair attempts" + "\n%11s repair attempts returned with zero status", + format_num3( combinations ), format_num3( repair_attempts ), + format_num3( successes ) ); + if( successes > 0 ) + { + if( failed_comparisons > 0 ) + std::printf( ", of which\n%11s comparisons failed\n", + format_num3( failed_comparisons ) ); + else std::fputs( "\n all comparisons passed\n", stdout ); + } + else std::fputc( '\n', stdout ); + } + } +err: + munmap( prodata, prodata_size ); + delete fec_indexp; std::free( (void *)fecdata ); + return retval; + } @@ -0,0 +1,308 @@ +/* Lziprecover - Data recovery tool for the lzip format + Copyright (C) 2023-2024 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#define _FILE_OFFSET_BITS 64 + +#include <cstdio> +#include <cstring> +#include <list> +#include <string> +#include <vector> +#include <stdint.h> +#include <unistd.h> // STDERR_FILENO + +#include "lzip.h" +#include "md5.h" +#include "fec.h" + +namespace { + +const uint16_t u16_one = 1; +const bool little_endian = *(const uint8_t *)&u16_one == 1; +inline uint16_t swap_bytes( const uint16_t a ) + { return ( a >> 8 ) | ( a << 8 ); } + +struct Galois16_table // addition/subtraction is exclusive or + { + enum { size = 1 << 16, poly = 0x1100B }; // generator polynomial + uint16_t * log, * ilog, * mul_tables; + + Galois16_table() : log( 0 ), ilog( 0 ), mul_tables( 0 ) {} +// ~Galois16_table() { delete[] mul_tables; delete[] ilog; delete[] log; } + + void init() // fill log, inverse log, and multiplication tables + { + if( log ) return; + log = new uint16_t[size]; ilog = new uint16_t[size]; + mul_tables = new uint16_t[3 * 256 * 256]; // LL, LH, HH + for( unsigned b = 1, i = 0; i < size - 1; ++i ) + { + log[b] = i; + ilog[i] = b; + b <<= 1; + if( b & size ) b ^= poly; + } + log[0] = size - 1; // log(0) is not defined, so use a special value + ilog[size-1] = 1; + + uint16_t * p = mul_tables; + for( int i = 0; i < 16; i += 8 ) + for( int j = i; j < 16; j += 8 ) + for( int a = 0; a < 256 << i; a += 1 << i ) + for( int b = 0; b < 256 << j; b += 1 << j ) + *p++ = mul( a, b ); + } + + uint16_t mul( const uint16_t a, const uint16_t b ) const + { + if( a == 0 || b == 0 ) return 0; + const unsigned sum = log[a] + log[b]; + return ( sum >= size - 1 ) ? ilog[sum-(size-1)] : ilog[sum]; +// return ilog[(log[a] + log[b]) % (size-1)]; + } + + uint16_t inverse( const uint16_t a ) const { return ilog[size-1-log[a]]; } + } gf; + + +inline bool check_element( const uint16_t * const A, const uint16_t * const B, + const unsigned k, const unsigned row, const unsigned col ) + { + const uint16_t * pa = A + row * k; + const uint16_t * pb = B + col; + uint16_t sum = 0; + for( unsigned i = 0; i < k; ++i, ++pa, pb += k ) + sum ^= gf.mul( *pa, *pb ); + return sum == ( row == col ); + } + +/* Check that A * B = I (A, B, I are square matrices of size k * k). + Check just the diagonals for matrices larger than 1024 x 1024. */ +bool check_inverse( const uint16_t * const A, const uint16_t * const B, + const unsigned k ) + { + const bool print = verbosity >= 1 && k > max_k8 && isatty( STDERR_FILENO ); + for( unsigned row = 0; row < k; ++row ) // multiply A * B + { + if( k <= 1024 ) + for( unsigned col = 0; col < k; ++col ) + { if( !check_element( A, B, k, row, col ) ) + { if( print && row ) std::fputc( '\n', stderr ); return false; } } + else + if( !check_element( A, B, k, row, row ) || + !check_element( A, B, k, row, k - 1 - row ) ) + { if( print && row ) std::fputc( '\n', stderr ); return false; } + if( print ) std::fprintf( stderr, "\r%5u rows checked \r", row + 1 ); + } + return true; // A * B == I + } + + +/* Invert in place a matrix of size k * k. + This is like Gaussian elimination with a virtual identity matrix: + A --some_changes--> I, I --same_changes--> A^-1 + Galois arithmetic is exact. Swapping rows or columns is not needed. */ +bool invert_matrix( uint16_t * const matrix, const unsigned k ) + { + const bool print = verbosity >= 1 && k > max_k8 && isatty( STDERR_FILENO ); + for( unsigned row = 0; row < k; ++row ) + { + uint16_t * const pivot_row = matrix + row * k; + uint16_t pivot = pivot_row[row]; + if( pivot == 0 ) + { if( print && row ) std::fputc( '\n', stderr ); return false; } + if( pivot != 1 ) // scale the pivot_row + { + pivot = gf.inverse( pivot ); + pivot_row[row] = 1; + for( unsigned col = 0; col < k; ++col ) + pivot_row[col] = gf.mul( pivot_row[col], pivot ); + } + // subtract pivot_row from the other rows + for( unsigned row2 = 0; row2 < k; ++row2 ) + if( row2 != row ) + { + uint16_t * const dst_row = matrix + row2 * k; + const uint16_t c = dst_row[row]; dst_row[row] = 0; + for( unsigned col = 0; col < k; ++col ) + dst_row[col] ^= gf.mul( pivot_row[col], c ); + } + if( print ) std::fprintf( stderr, "\r%5u rows inverted\r", row + 1 ); + } + return true; + } + + +// create dec_matrix containing only the rows needed and invert it in place +const uint16_t * init_dec_matrix( const std::vector< unsigned > & bb_vector, + const std::vector< unsigned > & fbn_vector ) + { + const unsigned bad_blocks = bb_vector.size(); + uint16_t * const dec_matrix = new uint16_t[bad_blocks * bad_blocks]; + + // one row for each missing data block + for( unsigned row = 0; row < bad_blocks; ++row ) + { + uint16_t * const dec_row = dec_matrix + row * bad_blocks; + const unsigned fbn = fbn_vector[row] | 0x8000; + for( unsigned col = 0; col < bad_blocks; ++col ) + dec_row[col] = gf.inverse( fbn ^ bb_vector[col] ); + } + if( !invert_matrix( dec_matrix, bad_blocks ) ) + internal_error( "GF(2^16) matrix not invertible." ); + return dec_matrix; + } + +#if 0 +/* compute dst[] += c * src[] + treat the buffers as arrays of 16-bit Galois values */ +inline void mul_add( const uint8_t * const src, uint8_t * const dst, + const unsigned long fbs, const uint16_t c ) + { + if( c == 0 ) return; // nothing to add + const uint16_t * const src16 = (const uint16_t *)src; + uint16_t * const dst16 = (uint16_t *)dst; + + if( little_endian ) + for( unsigned long i = 0; i < fbs / 2; ++i ) + dst16[i] ^= gf.mul( src16[i], c ); + else // big endian + for( unsigned long i = 0; i < fbs / 2; ++i ) + dst16[i] ^= swap_bytes( gf.mul( swap_bytes( src16[i] ), c ) ); + } +#else + +/* compute dst[] += c * src[] + treat the buffers as arrays of pairs of 16-bit Galois values */ +inline void mul_add( const uint8_t * const src, uint8_t * const dst, + const unsigned long fbs, const uint16_t c ) + { + if( c == 0 ) return; // nothing to add + const int cl = c & 0xFF; // split factor c into low and high bytes + const int ch = c >> 8; + // pointers to the four multiplication tables (c.low/high * src.low/high) + const uint16_t * LL = &gf.mul_tables[cl * 256]; + const uint16_t * LH = &gf.mul_tables[65536 + cl * 256]; + const uint16_t * HL = &gf.mul_tables[65536 + ch]; // step 256 + const uint16_t * HH = &gf.mul_tables[131072 + ch * 256]; + uint16_t L[256]; // extract the two tables for factor c + uint16_t H[256]; + + if( little_endian ) + for( int i = 0; i < 256; ++i ) + { L[i] = *LL++ ^ *HL; HL+=256; H[i] = *LH++ ^ *HH++; } + else // big endian + for( int i = 0; i < 256; ++i ) + { H[i] = swap_bytes( *LL++ ^ *HL ); HL+=256; + L[i] = swap_bytes( *LH++ ^ *HH++ ); } + + const uint32_t * const src32 = (const uint32_t *)src; + uint32_t * const dst32 = (uint32_t *)dst; + + for( unsigned long i = 0; i < fbs / 4; ++i ) + { const uint32_t s = src32[i]; + dst32[i] ^= L[s & 0xFF] ^ H[s >> 8 & 0xFF] ^ + L[s >> 16 & 0xFF] << 16 ^ H[s >> 24] << 16; } + } +#endif + +} // end namespace + + +void gf16_init() { gf.init(); } + +bool gf16_check( const std::vector< unsigned > & fbn_vector, const unsigned k ) + { + if( k == 0 ) return true; + gf.init(); + bool good = true; + for( unsigned a = 1; a < gf.size; ++a ) + if( gf.mul( a, gf.inverse( a ) ) != 1 ) + { good = false; + std::fprintf( stderr, "%u * ( 1/%u ) != 1 in GF(2^16)\n", a, a ); } + uint16_t * const enc_matrix = new uint16_t[k * k]; + uint16_t * const dec_matrix = new uint16_t[k * k]; + const bool random = fbn_vector.size() == k; + for( unsigned row = 0; row < k; ++row ) + { + const unsigned fbn = ( random ? fbn_vector[row] : row ) | 0x8000; + uint16_t * const enc_row = enc_matrix + row * k; + for( unsigned col = 0; col < k; ++col ) + enc_row[col] = gf.inverse( fbn ^ col ); + } + std::memcpy( dec_matrix, enc_matrix, k * k * sizeof (uint16_t) ); + if( !invert_matrix( dec_matrix, k ) ) + { good = false; show_error( "GF(2^16) matrix not invertible." ); } + else if( !check_inverse( enc_matrix, dec_matrix, k ) ) + { good = false; show_error( "GF(2^16) matrix A * A^-1 != I" ); } + delete[] dec_matrix; + delete[] enc_matrix; + return good; + } + + +void rs16_encode( const uint8_t * const buffer, const uint8_t * const lastbuf, + uint8_t * const fec_block, const unsigned long fbs, + const unsigned fbn, const unsigned k ) + { + if( !gf.log ) internal_error( "GF(2^16) tables not initialized." ); + /* The encode matrix is a Hilbert matrix of size k * k with one row per + fec block and one column per data block. + The value of each element is computed on the fly with inverse. */ + const unsigned row = fbn | 0x8000; + std::memset( fec_block, 0, fbs ); + for( unsigned col = 0; col < k; ++col ) + { + const uint8_t * const src = + ( col < k - (lastbuf != 0) ) ? buffer + col * fbs : lastbuf; + mul_add( src, fec_block, fbs, gf.inverse( row ^ col ) ); + } + } + + +void rs16_decode( uint8_t * const buffer, uint8_t * const lastbuf, + const std::vector< unsigned > & bb_vector, + const std::vector< unsigned > & fbn_vector, + uint8_t * const fecbuf, const unsigned long fbs, + const unsigned k ) + { + gf.init(); + const unsigned bad_blocks = bb_vector.size(); + for( unsigned col = 0, bi = 0; col < k; ++col ) // reduce + { + if( bi < bad_blocks && col == bb_vector[bi] ) { ++bi; continue; } + const uint8_t * const src = + ( col < k - (lastbuf != 0) ) ? buffer + col * fbs : lastbuf; + for( unsigned row = 0; row < bad_blocks; ++row ) + { + const unsigned fbn = fbn_vector[row] | 0x8000; + mul_add( src, fecbuf + row * fbs, fbs, gf.inverse( fbn ^ col ) ); + } + } + const uint16_t * const dec_matrix = init_dec_matrix( bb_vector, fbn_vector ); + for( unsigned col = 0; col < bad_blocks; ++col ) // solve + { + const unsigned di = bb_vector[col]; + uint8_t * const dst = + ( di < k - (lastbuf != 0) ) ? buffer + di * fbs : lastbuf; + std::memset( dst, 0, fbs ); + const uint16_t * const dec_row = dec_matrix + col * bad_blocks; + for( unsigned row = 0; row < bad_blocks; ++row ) + mul_add( fecbuf + row * fbs, dst, fbs, dec_row[row] ); + } + delete[] dec_matrix; + } @@ -0,0 +1,244 @@ +/* Lziprecover - Data recovery tool for the lzip format + Copyright (C) 2023-2024 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#define _FILE_OFFSET_BITS 64 + +#include <cstdio> +#include <cstring> +#include <list> +#include <string> +#include <vector> +#include <stdint.h> + +#include "lzip.h" +#include "md5.h" +#include "fec.h" + +namespace { + +struct Galois8_table // addition/subtraction is exclusive or + { + enum { size = 1 << 8, poly = 0x11D }; // generator polynomial + uint8_t * log, * ilog, * mul_table; + + Galois8_table() : log( 0 ), ilog( 0 ), mul_table( 0 ) {} +// ~Galois8_table() { delete[] mul_table; delete[] ilog; delete[] log; } + + void init() // fill log, inverse log, and multiplication tables + { + if( log ) return; + log = new uint8_t[size]; ilog = new uint8_t[size]; + mul_table = new uint8_t[size * size]; + for( unsigned b = 1, i = 0; i < size - 1; ++i ) + { + log[b] = i; + ilog[i] = b; + b <<= 1; + if( b & size ) b ^= poly; + } + log[0] = size - 1; // log(0) is not defined, so use a special value + ilog[size-1] = 1; + + for( int i = 1; i < size; ++i ) + { + uint8_t * const mul_row = mul_table + i * size; + for( int j = 1; j < size; ++j ) + mul_row[j] = ilog[(log[i] + log[j]) % (size - 1)]; + } + for( int i = 0; i < size; ++i ) + mul_table[0 * size + i] = mul_table[i * size + 0] = 0; + } + + uint8_t inverse( const uint8_t a ) const { return ilog[size-1-log[a]]; } + } gf; + + +// check that A * B = I (A, B, I are square matrices of size k * k) +bool check_inverse( const uint8_t * const A, const uint8_t * const B, + const unsigned k ) + { + for( unsigned row = 0; row < k; ++row ) // multiply A * B + for( unsigned col = 0; col < k; ++col ) + { + const uint8_t * pa = A + row * k; + const uint8_t * pb = B + col; + uint8_t sum = 0; + for( unsigned i = 0; i < k; ++i, ++pa, pb += k ) + sum ^= gf.mul_table[*pa * gf.size + *pb]; + if( sum != ( row == col ) ) return false; // A * B != I + } + return true; + } + + +/* Invert in place a matrix of size k * k. + This is like Gaussian elimination with a virtual identity matrix: + A --some_changes--> I, I --same_changes--> A^-1 + Galois arithmetic is exact. Swapping rows or columns is not needed. */ +bool invert_matrix( uint8_t * const matrix, const unsigned k ) + { + for( unsigned row = 0; row < k; ++row ) + { + uint8_t * const pivot_row = matrix + row * k; + const uint8_t pivot = pivot_row[row]; + if( pivot == 0 ) return false; + if( pivot != 1 ) // scale the pivot_row + { + const uint8_t * const mul_row = + gf.mul_table + gf.inverse( pivot ) * gf.size; + pivot_row[row] = 1; + for( unsigned col = 0; col < k; ++col ) + pivot_row[col] = mul_row[pivot_row[col]]; + } + // subtract pivot_row from the other rows + for( unsigned row2 = 0; row2 < k; ++row2 ) + if( row2 != row ) + { + uint8_t * const dst_row = matrix + row2 * k; + const uint8_t c = dst_row[row]; dst_row[row] = 0; + const uint8_t * const mul_row = gf.mul_table + c * gf.size; + for( unsigned col = 0; col < k; ++col ) + dst_row[col] ^= mul_row[pivot_row[col]]; + } + } + return true; + } + + +// create dec_matrix containing only the rows needed and invert it in place +const uint8_t * init_dec_matrix( const std::vector< unsigned > & bb_vector, + const std::vector< unsigned > & fbn_vector ) + { + const unsigned bad_blocks = bb_vector.size(); + uint8_t * const dec_matrix = new uint8_t[bad_blocks * bad_blocks]; + + // one row for each missing data block + for( unsigned row = 0; row < bad_blocks; ++row ) + { + uint8_t * const dec_row = dec_matrix + row * bad_blocks; + const unsigned fbn = fbn_vector[row] | 0x80; + for( unsigned col = 0; col < bad_blocks; ++col ) + dec_row[col] = gf.inverse( fbn ^ bb_vector[col] ); + } + if( !invert_matrix( dec_matrix, bad_blocks ) ) + internal_error( "GF(2^8) matrix not invertible." ); + return dec_matrix; + } + + +/* compute dst[] += c * src[] + treat the buffers as arrays of quadruples of 8-bit Galois values */ +inline void mul_add( const uint8_t * const src, uint8_t * const dst, + const unsigned long fbs, const uint8_t c ) + { + if( c == 0 ) return; // nothing to add + const uint8_t * const mul_row = gf.mul_table + c * gf.size; + const uint32_t * const src32 = (const uint32_t *)src; + uint32_t * const dst32 = (uint32_t *)dst; + + for( unsigned long i = 0; i < fbs / 4; ++i ) + { const uint32_t s = src32[i]; + dst32[i] ^= mul_row[s & 0xFF] ^ mul_row[s >> 8 & 0xFF] << 8 ^ + mul_row[s >> 16 & 0xFF] << 16 ^ mul_row[s >> 24] << 24; } + } + +} // end namespace + + +void gf8_init() { gf.init(); } + +bool gf8_check( const std::vector< unsigned > & fbn_vector, const unsigned k ) + { + if( k == 0 ) return true; + gf.init(); + bool good = true; + for( unsigned a = 1; a < gf.size; ++a ) + if( gf.mul_table[a * gf.size + gf.inverse( a )] != 1 ) + { good = false; + std::fprintf( stderr, "%u * ( 1/%u ) != 1 in GF(2^8)\n", a, a ); } + uint8_t * const enc_matrix = new uint8_t[k * k]; + uint8_t * const dec_matrix = new uint8_t[k * k]; + const bool random = fbn_vector.size() == k; + for( unsigned row = 0; row < k; ++row ) + { + const unsigned fbn = ( random ? fbn_vector[row] : row ) | 0x80; + uint8_t * const enc_row = enc_matrix + row * k; + for( unsigned col = 0; col < k; ++col ) + enc_row[col] = gf.inverse( fbn ^ col ); + } + std::memcpy( dec_matrix, enc_matrix, k * k ); + if( !invert_matrix( dec_matrix, k ) ) + { good = false; show_error( "GF(2^8) matrix not invertible." ); } + else if( !check_inverse( enc_matrix, dec_matrix, k ) ) + { good = false; show_error( "GF(2^8) matrix A * A^-1 != I" ); } + delete[] dec_matrix; + delete[] enc_matrix; + return good; + } + + +void rs8_encode( const uint8_t * const buffer, const uint8_t * const lastbuf, + uint8_t * const fec_block, const unsigned long fbs, + const unsigned fbn, const unsigned k ) + { + if( !gf.log ) internal_error( "GF(2^8) tables not initialized." ); + /* The encode matrix is a Hilbert matrix of size k * k with one row per + fec block and one column per data block. + The value of each element is computed on the fly with inverse. */ + const unsigned row = fbn | 0x80; + std::memset( fec_block, 0, fbs ); + for( unsigned col = 0; col < k; ++col ) + { + const uint8_t * const src = + ( col < k - (lastbuf != 0) ) ? buffer + col * fbs : lastbuf; + mul_add( src, fec_block, fbs, gf.inverse( row ^ col ) ); + } + } + + +void rs8_decode( uint8_t * const buffer, uint8_t * const lastbuf, + const std::vector< unsigned > & bb_vector, + const std::vector< unsigned > & fbn_vector, + uint8_t * const fecbuf, const unsigned long fbs, + const unsigned k ) + { + gf.init(); + const unsigned bad_blocks = bb_vector.size(); + for( unsigned col = 0, bi = 0; col < k; ++col ) // reduce + { + if( bi < bad_blocks && col == bb_vector[bi] ) { ++bi; continue; } + const uint8_t * const src = + ( col < k - (lastbuf != 0) ) ? buffer + col * fbs : lastbuf; + for( unsigned row = 0; row < bad_blocks; ++row ) + { + const unsigned fbn = fbn_vector[row] | 0x80; + mul_add( src, fecbuf + row * fbs, fbs, gf.inverse( fbn ^ col ) ); + } + } + const uint8_t * const dec_matrix = init_dec_matrix( bb_vector, fbn_vector ); + for( unsigned col = 0; col < bad_blocks; ++col ) // solve + { + const unsigned di = bb_vector[col]; + uint8_t * const dst = + ( di < k - (lastbuf != 0) ) ? buffer + di * fbs : lastbuf; + std::memset( dst, 0, fbs ); + const uint8_t * const dec_row = dec_matrix + col * bad_blocks; + for( unsigned row = 0; row < bad_blocks; ++row ) + mul_add( fecbuf + row * fbs, dst, fbs, dec_row[row] ); + } + delete[] dec_matrix; + } diff --git a/lunzcrash.cc b/lunzcrash.cc index ad05697..a1360cf 100644 --- a/lunzcrash.cc +++ b/lunzcrash.cc @@ -205,15 +205,16 @@ int lunzcrash_bit( const char * const input_filename, if( verbosity >= 0 ) { - std::printf( "\n%9ld bytes tested\n%9ld total decompressions" - "\n%9ld decompressions returned with zero status", - positions, decompressions, successes ); + std::printf( "\n%11s bytes tested\n%11s total decompressions" + "\n%11s decompressions returned with zero status", + format_num3( positions ), format_num3( decompressions ), + format_num3( successes ) ); if( successes > 0 ) { if( failed_comparisons > 0 ) - std::printf( ", of which\n%9ld comparisons failed\n", - failed_comparisons ); - else std::fputs( "\n all comparisons passed\n", stdout ); + std::printf( ", of which\n%11s comparisons failed\n", + format_num3( failed_comparisons ) ); + else std::fputs( "\n all comparisons passed\n", stdout ); } else std::fputc( '\n', stdout ); } @@ -319,15 +320,16 @@ int lunzcrash_block( const char * const input_filename, if( verbosity >= 0 ) { - std::printf( "\n%9ld blocks tested\n%9ld total decompressions" - "\n%9ld decompressions returned with zero status", - decompressions, decompressions, successes ); + std::printf( "\n%11s blocks tested\n%11s total decompressions" + "\n%11s decompressions returned with zero status", + format_num3( decompressions ), format_num3( decompressions ), + format_num3( successes ) ); if( successes > 0 ) { if( failed_comparisons > 0 ) - std::printf( ", of which\n%9ld comparisons failed\n", - failed_comparisons ); - else std::fputs( "\n all comparisons passed\n", stdout ); + std::printf( ", of which\n%11s comparisons failed\n", + format_num3( failed_comparisons ) ); + else std::fputs( "\n all comparisons passed\n", stdout ); } else std::fputc( '\n', stdout ); } @@ -357,7 +359,7 @@ int md5sum_files( const std::vector< std::string > & filenames ) while( true ) { const int len = readblock( infd, buffer, buffer_size ); - if( len != buffer_size && errno ) throw Error( "Read error" ); + if( len != buffer_size && errno ) throw Error( read_error_msg ); if( len > 0 ) md5sum.md5_update( buffer, len ); if( len < buffer_size ) break; } @@ -98,9 +98,6 @@ struct Len_model }; -// defined in main.cc -extern int verbosity; - class Pretty_print // requires global var 'int verbosity' { std::string name_; @@ -154,13 +151,17 @@ class CRC32 uint32_t data[256]; // Table of CRCs of all 8-bit messages. public: - CRC32() + explicit CRC32( const bool castagnoli = false ) { + const unsigned cpol = 0x82F63B78U; // CRC32-C Castagnoli polynomial + const unsigned ipol = 0xEDB88320U; // IEEE 802.3 Ethernet polynomial + const unsigned poly = castagnoli ? cpol : ipol; + for( unsigned n = 0; n < 256; ++n ) { unsigned c = n; for( int k = 0; k < 8; ++k ) - { if( c & 1 ) c = 0xEDB88320U ^ ( c >> 1 ); else c >>= 1; } + { if( c & 1 ) c = poly ^ ( c >> 1 ); else c >>= 1; } data[n] = c; } } @@ -179,6 +180,15 @@ public: c = data[(c^buffer[i])&0xFF] ^ ( c >> 8 ); crc = c; } + + uint32_t compute_crc( const uint8_t * const buffer, + const unsigned long size ) const + { + uint32_t crc = 0xFFFFFFFFU; + for( unsigned long i = 0; i < size; ++i ) + crc = data[(crc^buffer[i])&0xFF] ^ ( crc >> 8 ); + return crc ^ 0xFFFFFFFFU; + } }; extern const CRC32 crc32; @@ -313,12 +323,12 @@ struct Cl_options // command-line options { bool ignore_empty; bool ignore_errors; - bool ignore_marking; + bool ignore_nonzero; bool ignore_trailing; bool loose_trailing; Cl_options() - : ignore_empty( true ), ignore_errors( false ), ignore_marking( true ), + : ignore_empty( false ), ignore_errors( false ), ignore_nonzero( false ), ignore_trailing( true ), loose_trailing( false ) {} }; @@ -333,6 +343,8 @@ class Block public: Block( const long long p, const long long s ) : pos_( p ), size_( s ) {} + Block & assign( const long long p, const long long s ) + { pos_ = p; size_ = s; return *this; } long long pos() const { return pos_; } long long size() const { return size_; } @@ -354,6 +366,8 @@ public: { return pos_ < b.end() && b.pos_ < end(); } bool overlaps( const long long pos, const long long size ) const { return pos_ < pos + size && pos < end(); } + bool touches( const Block & b ) const // blocks are mergeable + { return pos_ <= b.end() && b.pos_ <= end(); } Block split( const long long pos ); }; @@ -410,8 +424,10 @@ const char * const bad_magic_msg = "Bad magic number (file not in lzip format)." const char * const bad_dict_msg = "Invalid dictionary size in member header."; const char * const corrupt_mm_msg = "Corrupt header in multimember file."; const char * const empty_msg = "Empty member not allowed."; -const char * const marking_msg = "Marking data not allowed."; +const char * const nonzero_msg = "Nonzero first LZMA byte."; const char * const trailing_msg = "Trailing data not allowed."; +const char * const mmap_msg = "Can't mmap"; +const char * const short_file_msg = "Input file is too short."; // defined in alone_to_lz.cc int alone_to_lz( const int infd, const Pretty_print & pp ); @@ -446,17 +462,13 @@ int dump_members( const std::vector< std::string > & filenames, const bool force, const bool strip, const bool to_stdout ); int remove_members( const std::vector< std::string > & filenames, const Cl_options & cl_opts, const Member_list & member_list ); -int clear_marking( const std::vector< std::string > & filenames, - const Cl_options & cl_opts ); +int nonzero_repair( const std::vector< std::string > & filenames, + const Cl_options & cl_opts ); // defined in list.cc int list_files( const std::vector< std::string > & filenames, const Cl_options & cl_opts ); -// defined in lzip_index.cc -int seek_read( const int fd, uint8_t * const buf, const int size, - const long long pos ); - // defined in lunzcrash.cc int lunzcrash_bit( const char * const input_filename, const Cl_options & cl_opts ); @@ -483,9 +495,11 @@ bool open_outstream( const bool force, const bool protect, bool output_file_exists(); void cleanup_and_fail( const int retval ); bool check_tty_out(); +void format_trailing_bytes( const uint8_t * const data, const int size, + std::string & msg ); void set_signal_handler(); bool close_outstream( const struct stat * const in_statsp ); -std::string insert_fixed( std::string name ); +std::string insert_fixed( std::string name, const bool append_lz = true ); void show_2file_error( const char * const msg1, const char * const name1, const char * const name2, const char * const msg2 ); class Range_decoder; diff --git a/lzip_index.cc b/lzip_index.cc index 459338e..332fe63 100644 --- a/lzip_index.cc +++ b/lzip_index.cc @@ -67,13 +67,10 @@ void Lzip_index::set_num_error( const char * const msg, unsigned long long num ) bool Lzip_index::read_header( const int fd, Lzip_header & header, - const long long pos, const bool ignore_marking ) + const long long pos ) { if( seek_read( fd, header.data, header.size, pos ) != header.size ) { set_errno_error( "Error reading member header: " ); return false; } - uint8_t byte; - if( !ignore_marking && readblock( fd, &byte, 1 ) == 1 && byte != 0 ) - { error_ = marking_msg; retval_ = 2; return false; } return true; } @@ -123,8 +120,7 @@ bool Lzip_index::skip_gap( const int fd, unsigned long long & pos, { while( i > trailer.size && buffer[i-9] == 0 ) --i; continue; } if( member_size > ipos + i || !trailer.check_consistency() ) continue; Lzip_header header; - if( !read_header( fd, header, ipos + i - member_size, - cl_opts.ignore_marking ) ) return false; + if( !read_header( fd, header, ipos + i - member_size ) ) return false; if( !header.check( ignore_bad_ds ) ) continue; const Lzip_header & header2 = *(const Lzip_header *)( buffer + i ); const bool full_h2 = bsize - i >= header.size; @@ -153,8 +149,6 @@ bool Lzip_index::skip_gap( const int fd, unsigned long long & pos, { error_ = trailing_msg; retval_ = 2; return false; } } const unsigned long long data_size = trailer.data_size(); - if( !cl_opts.ignore_empty && data_size == 0 ) - { error_ = empty_msg; retval_ = 2; return false; } pos = ipos + i - member_size; // good member const unsigned dictionary_size = header.dictionary_size(); if( dictionary_size_ < dictionary_size ) @@ -192,16 +186,16 @@ Lzip_index::Lzip_index( const int infd, const Cl_options & cl_opts, { if( insize < 0 ) { set_errno_error( "Input file is not seekable: " ); return; } + Lzip_header header; + if( insize >= header.size && + ( !read_header( infd, header, 0 ) || + !check_header( header, ignore_bad_ds ) ) ) return; if( insize < min_member_size ) - { error_ = "Input file is too short."; retval_ = 2; return; } + { error_ = short_file_msg; retval_ = 2; return; } if( insize > INT64_MAX ) { error_ = "Input file is too long (2^63 bytes or more)."; retval_ = 2; return; } - Lzip_header header; - if( !read_header( infd, header, 0, cl_opts.ignore_marking ) || - !check_header( header, ignore_bad_ds ) ) return; - // pos always points to a header or to ( EOF || max_pos ) unsigned long long pos = ( max_pos > 0 ) ? max_pos : insize; while( pos >= min_member_size ) @@ -219,8 +213,7 @@ Lzip_index::Lzip_index( const int infd, const Cl_options & cl_opts, continue; else return; } set_num_error( "Bad trailer at pos ", pos - trailer.size ); break; } - if( !read_header( infd, header, pos - member_size, cl_opts.ignore_marking ) ) - break; + if( !read_header( infd, header, pos - member_size ) ) break; if( !header.check( ignore_bad_ds ) ) // bad header { if( ignore_gaps || member_vector.empty() ) @@ -229,8 +222,6 @@ Lzip_index::Lzip_index( const int infd, const Cl_options & cl_opts, set_num_error( "Bad header at pos ", pos - member_size ); break; } const unsigned long long data_size = trailer.data_size(); - if( !cl_opts.ignore_empty && data_size == 0 ) - { error_ = empty_msg; retval_ = 2; break; } pos -= member_size; // good member const unsigned dictionary_size = header.dictionary_size(); if( dictionary_size_ < dictionary_size ) @@ -246,6 +237,10 @@ Lzip_index::Lzip_index( const int infd, const Cl_options & cl_opts, if( retval_ == 0 ) { error_ = "Can't create file index."; retval_ = 2; } return; } + if( !cl_opts.ignore_empty && member_vector.size() > 1 ) + for( unsigned long i = 0; i < member_vector.size(); ++i ) + if( member_vector[i].dblock.size() == 0 ) + { member_vector.clear(); error_ = empty_msg; retval_ = 2; return; } std::reverse( member_vector.begin(), member_vector.end() ); for( unsigned long i = 0; ; ++i ) { @@ -272,7 +267,7 @@ Lzip_index::Lzip_index( const std::vector< int > & infd_vector, if( insize < 0 ) { set_errno_error( "Input file is not seekable: " ); return; } if( insize < min_member_size ) - { error_ = "Input file is too short."; retval_ = 2; return; } + { error_ = short_file_msg; retval_ = 2; return; } if( insize > INT64_MAX ) { error_ = "Input file is too long (2^63 bytes or more)."; retval_ = 2; return; } diff --git a/lzip_index.h b/lzip_index.h index 95e277d..9c8bd2b 100644 --- a/lzip_index.h +++ b/lzip_index.h @@ -28,8 +28,8 @@ class Lzip_index : dblock( dpos, dsize ), mblock( mpos, msize ), dictionary_size( dict_size ) {} - bool operator==( const Member & m ) const { return ( mblock == m.mblock ); } - bool operator!=( const Member & m ) const { return ( mblock != m.mblock ); } + bool operator==( const Member & m ) const { return mblock == m.mblock; } + bool operator!=( const Member & m ) const { return mblock != m.mblock; } }; // member_vector only contains members with a valid header. @@ -43,8 +43,7 @@ class Lzip_index bool check_header( const Lzip_header & header, const bool ignore_bad_ds ); void set_errno_error( const char * const msg ); void set_num_error( const char * const msg, unsigned long long num ); - bool read_header( const int fd, Lzip_header & header, const long long pos, - const bool ignore_marking = true ); + bool read_header( const int fd, Lzip_header & header, const long long pos ); bool read_trailer( const int fd, Lzip_trailer & trailer, const long long pos ); bool skip_gap( const int fd, unsigned long long & pos, @@ -94,3 +93,6 @@ public: unsigned dictionary_size( const long i ) const { return member_vector[i].dictionary_size; } }; + +int seek_read( const int fd, uint8_t * const buf, const int size, + const long long pos ); @@ -26,12 +26,13 @@ #include <algorithm> #include <cctype> #include <cerrno> -#include <climits> // SSIZE_MAX +#include <climits> // CHAR_BIT, SSIZE_MAX #include <csignal> #include <cstdio> #include <cstdlib> #include <cstring> #include <new> +#include <list> #include <string> #include <vector> #include <fcntl.h> @@ -42,8 +43,10 @@ #if defined __MSVCRT__ || defined __OS2__ || defined __DJGPP__ #include <io.h> #if defined __MSVCRT__ +#include <direct.h> #define fchmod(x,y) 0 #define fchown(x,y,z) 0 +#define mkdir(name,mode) _mkdir(name) #define SIGHUP SIGTERM #define S_ISSOCK(x) 0 #ifndef S_IRGRP @@ -62,6 +65,8 @@ #include "arg_parser.h" #include "lzip.h" #include "decoder.h" +#include "md5.h" +#include "fec.h" #ifndef O_BINARY #define O_BINARY 0 @@ -77,10 +82,7 @@ #endif bool fits_in_size_t( const unsigned long long size ) // fits also in long - { return ( sizeof (long) <= sizeof (size_t) && size <= LONG_MAX ) || - ( sizeof (int) <= sizeof (size_t) && size <= INT_MAX ); } - -int verbosity = 0; + { return sizeof (long) <= sizeof (size_t) && size <= LONG_MAX; } const char * const program_name = "lziprecover"; std::string output_filename; // global vars for output file @@ -95,33 +97,29 @@ const struct { const char * from; const char * to; } known_extensions[] = { { ".tlz", ".tar" }, { 0, 0 } }; -enum Mode { m_none, m_alone_to_lz, m_byte_repair, m_clear_marking, - m_debug_byte_repair, m_debug_decompress, m_debug_delay, - m_decompress, m_dump, m_list, m_md5sum, m_merge, m_nrep_stats, - m_range_dec, m_remove, m_reproduce, m_show_packets, m_split, - m_strip, m_test, m_unzcrash_bit, m_unzcrash_block }; +enum Mode { m_none, m_alone_to_lz, m_byte_repair, m_check, m_debug_byte_repair, + m_debug_decompress, m_debug_delay, m_decompress, m_dump, + m_fec_create, m_fec_repair, m_fec_test, m_fec_list, m_fec_dc, + m_fec_dz, m_fec_dZ, m_list, m_md5sum, m_merge, m_nonzero_repair, + m_nrep_stats, m_range_dec, m_remove, m_reproduce, m_show_packets, + m_split, m_strip, m_test, m_unzcrash_bit, m_unzcrash_block }; /* Variable used in signal handler context. It is not declared volatile because the handler never returns. */ bool delete_output_on_interrupt = false; -void show_help() +void show_help( const long num_online ) { std::printf( "Lziprecover is a data recovery tool and decompressor for files in the lzip\n" - "compressed data format (.lz). Lziprecover is able to repair slightly damaged\n" - "files (up to one single-byte error per member), produce a correct file by\n" - "merging the good parts of two or more damaged copies, reproduce a missing\n" - "(zeroed) sector using a reference file, extract data from damaged files,\n" - "decompress files, and test integrity of files.\n" + "compressed data format (.lz). Lziprecover also provides Forward Error\n" + "Correction (FEC) able to repair any kind of file.\n" "\nWith the help of lziprecover, losing an entire archive just because of a\n" "corrupt byte near the beginning is a thing of the past.\n" "\nLziprecover can remove the damaged members from multimember files, for\n" "example multimember tar.lz archives.\n" "\nLziprecover provides random access to the data in multimember files; it only\n" "decompresses the members containing the desired data.\n" - "\nLziprecover facilitates the management of metadata stored as trailing data\n" - "in lzip files.\n" "\nLziprecover is not a replacement for regular backups, but a last line of\n" "defense for the case where the backups are also damaged.\n" "\nUsage: %s [options] [files]\n", invocation_name ); @@ -130,6 +128,8 @@ void show_help() " -V, --version output version information and exit\n" " -a, --trailing-error exit with error status if trailing data\n" " -A, --alone-to-lz convert lzma-alone files to lzip format\n" + " -b, --block-size=<bytes> make FEC block size a multiple of <bytes>\n" + " -B, --byte-repair try to repair a corrupt byte in file\n" " -c, --stdout write to standard output, keep input files\n" " -d, --decompress decompress, test compressed file integrity\n" " -D, --range-decompress=<n-m> decompress a range of bytes to stdout\n" @@ -138,39 +138,54 @@ void show_help() " --lzip-name=<name> name of lzip executable for --reproduce\n" " --reference-file=<file> reference file for --reproduce\n" " -f, --force overwrite existing output files\n" - " -i, --ignore-errors ignore some errors in -d, -D, -l, -t, --dump\n" + " -F, --fec=c[N]|r|t|l create, repair, test, list (using) fec file\n" + " -0 .. -9 set FEC fragmentation level [default 9]\n" + " --fec-file=<file>[/] read fec file from <file> or directory\n" + " -i, --ignore-errors ignore non-fatal errors\n" " -k, --keep keep (don't delete) input files\n" " -l, --list print (un)compressed file sizes\n" " -m, --merge repair errors in file using several copies\n" - " -o, --output=<file> place the output into <file>\n" + " -n, --threads=<n> set number of threads for fec create [%ld]\n" + " -o, --output=<file>[/] place the output into <file> or directory\n" " -q, --quiet suppress all messages\n" - " -R, --byte-repair try to repair a corrupt byte in file\n" + " -r, --recursive (fec) operate recursively on directories\n" + " -R, --dereference-recursive (fec) recursively follow symbolic links\n" " -s, --split split multimember file in single-member files\n" " -t, --test test compressed file integrity\n" " -v, --verbose be verbose (a 2nd -v gives more)\n" " --dump=<list>:d:e:t dump members, damaged/empty, tdata to stdout\n" " --remove=<list>:d:e:t remove members, tdata from files in place\n" " --strip=<list>:d:e:t copy files to stdout stripping members given\n" - " --empty-error exit with error status if empty member in file\n" - " --marking-error exit with error status if 1st LZMA byte not 0\n" + " --ignore-empty ignore empty members in multimember files\n" + " --ignore-nonzero ignore a nonzero first LZMA byte\n" " --loose-trailing allow trailing data seeming corrupt header\n" - " --clear-marking reset the first LZMA byte of each member\n" ); + " --nonzero-repair repair in place a nonzero first LZMA byte\n", + num_online ); if( verbosity >= 1 ) { std::printf( "\nDebug options for experts:\n" " -E, --debug-reproduce=<range>[,ss] set range to 0 and try to reproduce file\n" + " -F, --fec=dc<n> test repair combinations of n zeroed blocks\n" + " -F, --fec=dz<range>[:<range>]... test repair zeroed block(s) at range(s)\n" + " -F, --fec=dZ<size>[,<delta>] test repair zeroed blocks of size <size>\n" " -M, --md5sum print the MD5 digests of the input files\n" " -S, --nrep-stats[=<val>] print stats of N-byte repeated sequences\n" " -U, --unzcrash=1|B<size> test 1-bit or block errors in input file\n" " -W, --debug-decompress=<pos>,<val> set pos to val and decompress to stdout\n" " -X, --show-packets[=<pos>,<val>] show in stdout the decoded LZMA packets\n" " -Y, --debug-delay=<range> find max error detection delay in <range>\n" - " -Z, --debug-byte-repair=<pos>,<val> test repair one-byte error at <pos>\n" ); + " -Z, --debug-byte-repair=<pos>,<val> test repair one-byte error at <pos>\n" + " --check=<size> check creation of FEC decode matrix\n" + " --debug=<level> print parallel FEC statistics to stderr\n" + " --gf16 use GF(2^16) to create fec files\n" + " --random create fec files with random block numbers\n" ); } std::printf( "\nIf no file names are given, or if a file is '-', lziprecover decompresses\n" "from standard input to standard output.\n" "Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n" "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n" + "The argument to --fec=create may be a number of blocks (-Fc20), a\n" + "percentage (-Fc5%%), or a size in bytes (-Fc10KiB).\n" "\nTo extract all the files from archive 'foo.tar.lz', use the commands\n" "'tar -xf foo.tar.lz' or 'lziprecover -cd foo.tar.lz | tar -xf -'.\n" "\nExit status: 0 for a normal exit, 1 for environmental problems\n" @@ -279,13 +294,14 @@ next: namespace { +const char * const inv_arg_msg = "Invalid argument in"; + // Recognized formats: <digit> 'a' m[<match_length>] int parse_lzip_level( const char * const arg, const char * const option_name ) { if( *arg == 'a' || std::isdigit( *(const unsigned char *)arg ) ) return *arg; if( *arg != 'm' ) - { show_option_error( arg, "Invalid argument in", option_name ); - std::exit( 1 ); } + { show_option_error( arg, inv_arg_msg, option_name ); std::exit( 1 ); } if( arg[1] == 0 ) return -1; return -getnum( arg + 1, option_name, 0, min_match_len_limit, max_match_len ); } @@ -325,6 +341,55 @@ const char * parse_range( const char * const arg, const char * const pn, } +// Insert b in its place or merge it with contiguous or overlapping blocks. +void insert_block_sorted( std::vector< Block > & block_vector, const Block & b ) + { + if( block_vector.empty() || b.pos() > block_vector.back().end() ) + { block_vector.push_back( b ); return; } // append at the end + const long long pos = b.pos(); + const long long end = b.end(); + for( unsigned long i = 0; i < block_vector.size(); ++i ) + if( end <= block_vector[i].pos() ) // maybe insert b before i + { + if( end < block_vector[i].pos() && + ( i == 0 || pos > block_vector[i-1].end() ) ) + { block_vector.insert( block_vector.begin() + i, b ); return; } + break; + } + for( unsigned long i = 0; i < block_vector.size(); ++i ) + if( block_vector[i].touches( b ) ) // merge b with blocks touching it + { + unsigned long j = i; // indexes of first/last mergeable blocks + while( j + 1 < block_vector.size() && block_vector[j+1].touches( b ) ) + ++j; + const long long new_pos = std::min( pos, block_vector[i].pos() ); + const long long new_end = std::max( end, block_vector[j].end() ); + block_vector[i].assign( new_pos, new_end - new_pos ); + if( i < j ) block_vector.erase( block_vector.begin() + i + 1, + block_vector.begin() + j + 1 ); + break; + } + } + +/* Recognized format: <range>[:<range>]... + Allow unordered, overlapping ranges. Return ranges sorted and merged. */ +void parse_range_vector( const char * const arg, const char * const pn, + std::vector< Block > & range_vector ) + { + Block range( 0, 0 ); + const char * p = arg; + while( true ) + { + p = parse_range( p, pn, range ); + insert_block_sorted( range_vector, range ); + if( *p == 0 ) return; + if( *p == ':' ) { ++p; if( *p == 0 ) return; else continue; } + show_option_error( p, "Extra characters in", pn ); + std::exit( 1 ); + } + } + + void one_file( const int files ) { if( files != 1 ) @@ -355,6 +420,81 @@ void set_mode( Mode & program_mode, const Mode new_mode ) } +// return true if arg is a non-empty prefix of target +bool compare_prefix( const char * const arg, const char * const target, + const char * const option_name = 0, + unsigned long * const fb_or_pctp = 0, char * fctypep = 0 ) + { + if( arg[0] == target[0] ) + for( int i = 1; i < INT_MAX; ++i ) + { + if( arg[i] == 0 ) return true; + if( fb_or_pctp && std::isdigit( arg[i] ) ) + { + const char * tail = arg + i; + const int llimit = std::strchr( tail, '.' ) ? 0 : 1; + *fb_or_pctp = getnum( tail, option_name, 0, llimit, LONG_MAX, &tail ); + if( *tail == 0 ) + { if( tail[-1] == 'B' ) { *fctypep = fc_bytes; return true; } + if( std::isdigit( tail[-1] ) ) + { if( *fb_or_pctp <= max_nk16 ) + { *fctypep = fc_blocks; return true; } + getnum( arg + 1, option_name, 0, 1, max_nk16 ); } } + else if( *fb_or_pctp <= 100 && std::isdigit( tail[-1] ) ) + { if( *tail == '%' && tail[1] == 0 ) + { *fb_or_pctp *= 1000; *fctypep = fc_percent; return true; } + if( *tail == '.' && std::isdigit( *++tail ) ) + { for( int j = 0; j < 3; ++j ) { *fb_or_pctp *= 10; + if( std::isdigit( *tail ) ) *fb_or_pctp += *tail++ - '0'; } + if( *tail >= '5' && *tail <= '9' ) { ++tail; ++*fb_or_pctp; } + while( std::isdigit( *tail ) ) { ++tail; + if( *fb_or_pctp == 0 && tail[-1] > '0' ) *fb_or_pctp = 1; } + if( *tail == '%' && tail[1] == 0 && *fb_or_pctp <= 100000 && + *fb_or_pctp > 0 ) { *fctypep = fc_percent; return true; } } } + return false; + } + if( arg[i] != target[i] ) break; + } + return false; + } + + +void parse_fec( const char * const arg, const char * const option_name, + Mode & program_mode, unsigned long & fb_or_pct, + unsigned & cblocks, unsigned & delta, int & sector_size, + std::vector< Block > & range_vector, char & fctype ) + { + if( compare_prefix( arg, "create", option_name, &fb_or_pct, &fctype ) ) + set_mode( program_mode, m_fec_create ); + else if( compare_prefix( arg, "repair" ) ) + set_mode( program_mode, m_fec_repair ); + else if( compare_prefix( arg, "test" ) ) + set_mode( program_mode, m_fec_test ); + else if( compare_prefix( arg, "list" ) ) + set_mode( program_mode, m_fec_list ); + else if( arg[0] == 'd' && arg[1] == 'c' ) + { const char * tail = arg + 2; + cblocks = getnum( tail, option_name, 0, 1, max_nk16, &tail ); + if( *tail != 0 ) + { show_option_error( arg, inv_arg_msg, option_name ); std::exit( 1 ); } + set_mode( program_mode, m_fec_dc ); } + else if( arg[0] == 'd' && arg[1] == 'z' ) + { parse_range_vector( arg + 2, option_name, range_vector ); + set_mode( program_mode, m_fec_dz ); } + else if( arg[0] == 'd' && arg[1] == 'Z' ) + { const char * tail = arg + 2; + sector_size = getnum( tail, option_name, 0, 1, INT_MAX, &tail ); + if( *tail == 0 ) delta = sector_size; + else if( *tail == ',' ) + delta = getnum( tail + 1, option_name, 0, 1, INT_MAX ); + else { show_option_error( arg, "Comma expected before delta in", + option_name ); std::exit( 1 ); } + set_mode( program_mode, m_fec_dZ ); } + else + { show_option_error( arg, inv_arg_msg, option_name ); std::exit( 1 ); } + } + + void parse_u( const char * const arg, const char * const option_name, Mode & program_mode, int & sector_size ) { @@ -363,8 +503,7 @@ void parse_u( const char * const arg, const char * const option_name, { set_mode( program_mode, m_unzcrash_block ); sector_size = getnum( arg + 1, option_name, 0, 1, INT_MAX ); } else - { show_option_error( arg, "Invalid argument in", option_name ); - std::exit( 1 ); } + { show_option_error( arg, inv_arg_msg, option_name ); std::exit( 1 ); } } @@ -487,6 +626,9 @@ bool make_dirs( const std::string & name ) const char * const force_msg = "Output file already exists. Use '--force' to overwrite it."; +unsigned char xdigit( const unsigned value ) // hex digit for 'value' + { return (value <= 9) ? '0' + value : (value <= 15) ? 'A' + value - 10 : 0; } + } // end namespace bool open_outstream( const bool force, const bool protect, @@ -499,8 +641,8 @@ bool open_outstream( const bool force, const bool protect, if( force ) flags |= O_TRUNC; else flags |= O_EXCL; outfd = -1; - if( output_filename.size() && - output_filename[output_filename.size()-1] == '/' ) errno = EISDIR; + if( output_filename.size() && output_filename.end()[-1] == '/' ) + errno = EISDIR; else { if( ( !protect || to_file ) && !make_dirs( output_filename ) ) { show_file_error( output_filename.c_str(), @@ -535,6 +677,7 @@ void set_signals( void (*action)(int) ) void cleanup_and_fail( const int retval ) { + cleanup_mutex_lock(); // only one thread can delete and exit set_signals( SIG_IGN ); // ignore signals if( delete_output_on_interrupt ) { @@ -559,6 +702,22 @@ bool check_tty_out() return true; } + +void format_trailing_bytes( const uint8_t * const data, const int size, + std::string & msg ) + { + for( int i = 0; i < size; ++i ) + { + msg += xdigit( data[i] >> 4 ); + msg += xdigit( data[i] & 0x0F ); + msg += ' '; + } + msg += '\''; + for( int i = 0; i < size; ++i ) + msg += std::isprint( data[i] ) ? data[i] : '.'; + msg += '\''; + } + namespace { extern "C" void signal_handler( int ) @@ -617,14 +776,6 @@ void close_and_set_permissions( const struct stat * const in_statsp ) } -unsigned char xdigit( const unsigned value ) // hex digit for 'value' - { - if( value <= 9 ) return '0' + value; - if( value <= 15 ) return 'A' + value - 10; - return 0; - } - - bool show_trailing_data( const uint8_t * const data, const int size, const Pretty_print & pp, const bool all, const int ignore_trailing ) // -1 = show @@ -634,16 +785,7 @@ bool show_trailing_data( const uint8_t * const data, const int size, std::string msg; if( !all ) msg = "first bytes of "; msg += "trailing data = "; - for( int i = 0; i < size; ++i ) - { - msg += xdigit( data[i] >> 4 ); - msg += xdigit( data[i] & 0x0F ); - msg += ' '; - } - msg += '\''; - for( int i = 0; i < size; ++i ) - { if( std::isprint( data[i] ) ) msg += data[i]; else msg += '.'; } - msg += '\''; + format_trailing_bytes( data, size, msg ); pp( msg.c_str() ); if( ignore_trailing == 0 ) show_file_error( pp.name(), trailing_msg ); } @@ -658,6 +800,7 @@ int decompress( const unsigned long long cfile_size, const int infd, unsigned long long partial_file_pos = 0; Range_decoder rdec( infd ); int retval = 0; + bool empty = false, nonempty = false; for( bool first_member = true; ; first_member = false ) { @@ -700,7 +843,7 @@ int decompress( const unsigned long long cfile_size, const int infd, LZ_decoder decoder( rdec, dictionary_size, outfd ); show_dprogress( cfile_size, partial_file_pos, &rdec, &pp ); // init - const int result = decoder.decode_member( cl_opts, pp ); + const int result = decoder.decode_member( pp, cl_opts.ignore_nonzero ); partial_file_pos += rdec.member_position(); if( result != 0 ) { @@ -712,16 +855,19 @@ int decompress( const unsigned long long cfile_size, const int infd, "File ends unexpectedly" : "Decoder error", partial_file_pos ); } - else if( result == 5 ) { pp( empty_msg ); break; } - else if( result == 6 ) { pp( marking_msg ); break; } + else if( result == 5 ) { pp( nonzero_msg ); break; } if( cl_opts.ignore_errors ) { pp.reset(); continue; } else break; } + if( !cl_opts.ignore_empty ) + { if( decoder.data_position() == 0 ) empty = true; else nonempty = true; } if( verbosity >= 2 ) { std::fputs( testing ? "ok\n" : "done\n", stderr ); pp.reset(); } } if( verbosity == 1 && retval == 0 ) std::fputs( testing ? "ok\n" : "done\n", stderr ); if( retval == 2 && cl_opts.ignore_errors ) retval = 0; + if( empty && nonempty && retval == 0 ) + { show_file_error( pp.name(), empty_msg ); retval = 2; } return retval; } @@ -739,7 +885,7 @@ bool close_outstream( const struct stat * const in_statsp ) } -std::string insert_fixed( std::string name ) +std::string insert_fixed( std::string name, const bool append_lz ) { if( name.size() > 7 && name.compare( name.size() - 7, 7, ".tar.lz" ) == 0 ) name.insert( name.size() - 7, "_fixed" ); @@ -747,7 +893,8 @@ std::string insert_fixed( std::string name ) name.insert( name.size() - 3, "_fixed" ); else if( name.size() > 4 && name.compare( name.size() - 4, 4, ".tlz" ) == 0 ) name.insert( name.size() - 4, "_fixed" ); - else name += "_fixed.lz"; + else if( append_lz ) name += "_fixed.lz"; + else name += "_fixed"; return name; } @@ -794,73 +941,110 @@ void show_dprogress( const unsigned long long cfile_size, int main( const int argc, const char * const argv[] ) { + std::vector< Block > range_vector; Block range( 0, 0 ); int sector_size = INT_MAX; // default larger than practical range Bad_byte bad_byte; Member_list member_list; + std::string cl_fec_filename; std::string default_output_filename; const char * lzip_name = "lzip"; // default is lzip const char * reference_filename = 0; + unsigned long fb_or_pct = 8; // fec blocks, bytes (B), or 0.001% to 100% + unsigned cblocks = 0; // blocks per combination in fec_dc + unsigned cl_block_size = 0; // make fbs a multiple of this + unsigned num_workers = 0; // start this many worker threads + unsigned delta = 0; // set to 0 to keep gcc 6.1.0 quiet Mode program_mode = m_none; int lzip_level = 0; // 0 = test all levels and match lengths // '0'..'9' = level, 'a' = all levels // -5..-273 = match length, -1 = all lengths int repeated_byte = -1; // 0 to 255, or -1 for all values Cl_options cl_opts; // command-line options + char debug_level = 0; + char fctype = fc_blocks; // type of value in fb_or_pct + char fec_level = 9; // fec fragmentation level, default = "-9" + char recursive = 0; // 1 = '-r', 2 = '-R' + bool cl_gf16 = false; + bool fec_random = false; bool force = false; bool keep_input_files = false; bool to_stdout = false; if( argc > 0 ) invocation_name = argv[0]; - enum { opt_cm = 256, opt_du, opt_eer, opt_lt, opt_lzl, opt_lzn, opt_mer, - opt_ref, opt_rem, opt_st }; + enum { opt_chk = 256, opt_dbg, opt_du, opt_ff, opt_g16, opt_ie, opt_inz, + opt_lt, opt_lzl, opt_lzn, opt_nzr, opt_ref, opt_rem, opt_rnd, opt_st }; const Arg_parser::Option options[] = { - { 'a', "trailing-error", Arg_parser::no }, - { 'A', "alone-to-lz", Arg_parser::no }, - { 'c', "stdout", Arg_parser::no }, - { 'd', "decompress", Arg_parser::no }, - { 'D', "range-decompress", Arg_parser::yes }, - { 'e', "reproduce", Arg_parser::no }, - { 'E', "debug-reproduce", Arg_parser::yes }, - { 'f', "force", Arg_parser::no }, - { 'h', "help", Arg_parser::no }, - { 'i', "ignore-errors", Arg_parser::no }, - { 'k', "keep", Arg_parser::no }, - { 'l', "list", Arg_parser::no }, - { 'm', "merge", Arg_parser::no }, - { 'M', "md5sum", Arg_parser::no }, - { 'n', "threads", Arg_parser::yes }, - { 'o', "output", Arg_parser::yes }, - { 'q', "quiet", Arg_parser::no }, - { 'R', "byte-repair", Arg_parser::no }, - { 'R', "repair", Arg_parser::no }, - { 's', "split", Arg_parser::no }, - { 'S', "nrep-stats", Arg_parser::maybe }, - { 't', "test", Arg_parser::no }, - { 'U', "unzcrash", Arg_parser::yes }, - { 'v', "verbose", Arg_parser::no }, - { 'V', "version", Arg_parser::no }, - { 'W', "debug-decompress", Arg_parser::yes }, - { 'X', "show-packets", Arg_parser::maybe }, - { 'Y', "debug-delay", Arg_parser::yes }, - { 'Z', "debug-byte-repair", Arg_parser::yes }, - { opt_cm, "clear-marking", Arg_parser::no }, - { opt_du, "dump", Arg_parser::yes }, - { opt_eer, "empty-error", Arg_parser::no }, - { opt_lt, "loose-trailing", Arg_parser::no }, - { opt_lzl, "lzip-level", Arg_parser::yes }, - { opt_lzn, "lzip-name", Arg_parser::yes }, - { opt_mer, "marking-error", Arg_parser::no }, - { opt_ref, "reference-file", Arg_parser::yes }, - { opt_rem, "remove", Arg_parser::yes }, - { opt_st, "strip", Arg_parser::yes }, - { 0, 0, Arg_parser::no } }; + { '0', 0, Arg_parser::no }, + { '1', 0, Arg_parser::no }, + { '2', 0, Arg_parser::no }, + { '3', 0, Arg_parser::no }, + { '4', 0, Arg_parser::no }, + { '5', 0, Arg_parser::no }, + { '6', 0, Arg_parser::no }, + { '7', 0, Arg_parser::no }, + { '8', 0, Arg_parser::no }, + { '9', 0, Arg_parser::no }, + { 'a', "trailing-error", Arg_parser::no }, + { 'A', "alone-to-lz", Arg_parser::no }, + { 'b', "block-size", Arg_parser::yes }, + { 'B', "byte-repair", Arg_parser::no }, + { 'B', "repair", Arg_parser::no }, + { 'c', "stdout", Arg_parser::no }, + { 'd', "decompress", Arg_parser::no }, + { 'D', "range-decompress", Arg_parser::yes }, + { 'e', "reproduce", Arg_parser::no }, + { 'E', "debug-reproduce", Arg_parser::yes }, + { 'f', "force", Arg_parser::no }, + { 'F', "fec", Arg_parser::yes }, + { 'h', "help", Arg_parser::no }, + { 'i', "ignore-errors", Arg_parser::no }, + { 'k', "keep", Arg_parser::no }, + { 'l', "list", Arg_parser::no }, + { 'm', "merge", Arg_parser::no }, + { 'M', "md5sum", Arg_parser::no }, + { 'n', "threads", Arg_parser::yes }, + { 'o', "output", Arg_parser::yes }, + { 'q', "quiet", Arg_parser::no }, + { 'r', "recursive", Arg_parser::no }, + { 'R', "dereference-recursive", Arg_parser::no }, + { 's', "split", Arg_parser::no }, + { 'S', "nrep-stats", Arg_parser::maybe }, + { 't', "test", Arg_parser::no }, + { 'U', "unzcrash", Arg_parser::yes }, + { 'v', "verbose", Arg_parser::no }, + { 'V', "version", Arg_parser::no }, + { 'W', "debug-decompress", Arg_parser::yes }, + { 'X', "show-packets", Arg_parser::maybe }, + { 'Y', "debug-delay", Arg_parser::yes }, + { 'Z', "debug-byte-repair", Arg_parser::yes }, + { opt_chk, "check", Arg_parser::yes }, + { opt_dbg, "debug", Arg_parser::yes }, + { opt_du, "dump", Arg_parser::yes }, + { opt_ff, "fec-file", Arg_parser::yes }, + { opt_g16, "gf16", Arg_parser::no }, + { opt_ie, "ignore-empty", Arg_parser::no }, + { opt_inz, "ignore-nonzero", Arg_parser::no }, + { opt_lt, "loose-trailing", Arg_parser::no }, + { opt_lzl, "lzip-level", Arg_parser::yes }, + { opt_lzn, "lzip-name", Arg_parser::yes }, + { opt_nzr, "nonzero-repair", Arg_parser::no }, + { opt_ref, "reference-file", Arg_parser::yes }, + { opt_rem, "remove", Arg_parser::yes }, + { opt_rnd, "random", Arg_parser::no }, + { opt_st, "strip", Arg_parser::yes }, + { 0, 0, Arg_parser::no } }; const Arg_parser parser( argc, argv, options ); if( parser.error().size() ) // bad option { show_error( parser.error().c_str(), 0, true ); return 1; } + const long num_online = std::max( 1L, sysconf( _SC_NPROCESSORS_ONLN ) ); + long max_workers = sysconf( _SC_THREAD_THREADS_MAX ); + if( max_workers < 1 || max_workers > INT_MAX / (int)sizeof (pthread_t) ) + max_workers = INT_MAX / sizeof (pthread_t); + int argind = 0; for( ; argind < parser.arguments(); ++argind ) { @@ -871,8 +1055,13 @@ int main( const int argc, const char * const argv[] ) const char * const arg = sarg.c_str(); switch( code ) { + case '0': case '1': case '2': case '3': case '4': case '5': + case '6': case '7': case '8': case '9': fec_level = code - '0'; break; case 'a': cl_opts.ignore_trailing = false; break; case 'A': set_mode( program_mode, m_alone_to_lz ); break; + case 'b': cl_block_size = getnum( arg, pn, 0, min_fbs, max_unit_fbs ) & + ( max_unit_fbs - min_fbs ); break; + case 'B': set_mode( program_mode, m_byte_repair ); break; case 'c': to_stdout = true; break; case 'd': set_mode( program_mode, m_decompress ); break; case 'D': set_mode( program_mode, m_range_dec ); @@ -881,17 +1070,20 @@ int main( const int argc, const char * const argv[] ) case 'E': set_mode( program_mode, m_reproduce ); parse_range( arg, pn, range, §or_size ); break; case 'f': force = true; break; - case 'h': show_help(); return 0; + case 'F': parse_fec( arg, pn, program_mode, fb_or_pct, cblocks, delta, + sector_size, range_vector, fctype ); break; + case 'h': show_help( num_online ); return 0; case 'i': cl_opts.ignore_errors = true; break; case 'k': keep_input_files = true; break; case 'l': set_mode( program_mode, m_list ); break; case 'm': set_mode( program_mode, m_merge ); break; case 'M': set_mode( program_mode, m_md5sum ); break; - case 'n': break; + case 'n': num_workers = getnum( arg, pn, 0, 1, max_workers ); break; case 'o': if( sarg == "-" ) to_stdout = true; else { default_output_filename = sarg; } break; - case 'q': verbosity = -1; break; - case 'R': set_mode( program_mode, m_byte_repair ); break; + case 'q': cl_verbosity = verbosity = -1; break; + case 'r': recursive = 1; break; + case 'R': recursive = 2; break; case 's': set_mode( program_mode, m_split ); break; case 'S': if( arg[0] ) repeated_byte = getnum( arg, pn, 0, 0, 255 ); set_mode( program_mode, m_nrep_stats ); break; @@ -907,20 +1099,25 @@ int main( const int argc, const char * const argv[] ) parse_range( arg, pn, range ); break; case 'Z': set_mode( program_mode, m_debug_byte_repair ); bad_byte.parse_bb( arg, pn ); break; - case opt_cm: set_mode( program_mode, m_clear_marking ); - cl_opts.ignore_marking = true; break; - case opt_du: set_mode( program_mode, m_dump ); - member_list.parse_ml( arg, pn, cl_opts ); break; - case opt_eer: cl_opts.ignore_empty = false; break; + case opt_chk: set_mode( program_mode, m_check ); + cblocks = getnum( arg, pn, 0, 1, max_k16 ); break; + case opt_dbg: debug_level = getnum( arg, pn, 0, 0, 3 ); break; + case opt_du: set_mode( program_mode, m_dump ); + member_list.parse_ml( arg, pn, cl_opts ); break; + case opt_ff: cl_fec_filename = sarg; break; + case opt_g16: cl_gf16 = true; break; + case opt_ie: cl_opts.ignore_empty = true; break; + case opt_inz: cl_opts.ignore_nonzero = true; break; case opt_lt: cl_opts.loose_trailing = true; break; case opt_lzl: lzip_level = parse_lzip_level( arg, pn ); break; case opt_lzn: lzip_name = arg; break; - case opt_mer: cl_opts.ignore_marking = false; break; + case opt_nzr: set_mode( program_mode, m_nonzero_repair ); break; case opt_ref: reference_filename = arg; break; case opt_rem: set_mode( program_mode, m_remove ); member_list.parse_ml( arg, pn, cl_opts ); break; - case opt_st: set_mode( program_mode, m_strip ); - member_list.parse_ml( arg, pn, cl_opts ); break; + case opt_rnd: fec_random = true; break; + case opt_st: set_mode( program_mode, m_strip ); + member_list.parse_ml( arg, pn, cl_opts ); break; default: internal_error( "uncaught option." ); } } // end process options @@ -935,6 +1132,9 @@ int main( const int argc, const char * const argv[] ) show_error( "You must specify the operation to be performed.", 0, true ); return 1; } + if( program_mode != m_decompress && program_mode != m_list && + program_mode != m_test && program_mode != m_range_dec ) + cl_opts.ignore_empty = true; std::vector< std::string > filenames; bool filenames_given = false; @@ -954,12 +1154,11 @@ int main( const int argc, const char * const argv[] ) one_file( filenames.size() ); return byte_repair( filenames[0], default_output_filename, cl_opts, terminator, force ); - case m_clear_marking: - at_least_one_file( filenames.size() ); - return clear_marking( filenames, cl_opts ); + case m_check: return gf_check( cblocks, cl_gf16, fec_random ); case m_debug_byte_repair: one_file( filenames.size() ); - return debug_byte_repair( filenames[0].c_str(), cl_opts, bad_byte, terminator ); + return debug_byte_repair( filenames[0].c_str(), cl_opts, bad_byte, + terminator ); case m_debug_decompress: one_file( filenames.size() ); return debug_decompress( filenames[0].c_str(), cl_opts, bad_byte, false ); @@ -972,6 +1171,30 @@ int main( const int argc, const char * const argv[] ) at_least_one_file( filenames.size() ); return dump_members( filenames, default_output_filename, cl_opts, member_list, force, program_mode == m_strip, to_stdout ); + case m_fec_create: + at_least_one_file( filenames.size() ); + if( num_workers <= 0 ) num_workers = std::min( num_online, max_workers ); + return fec_create( filenames, default_output_filename, fb_or_pct, + cl_block_size, num_workers, debug_level, fctype, fec_level, + recursive, cl_gf16, fec_random, force, to_stdout ); + case m_fec_repair: + case m_fec_test: + at_least_one_file( filenames.size() ); + return fec_test( filenames, cl_fec_filename, default_output_filename, + recursive, force, cl_opts.ignore_errors, + program_mode == m_fec_repair, to_stdout ); + case m_fec_list: + if( filenames.empty() ) filenames.push_back("-"); + return fec_list( filenames, cl_opts.ignore_errors ); + case m_fec_dc: + one_file( filenames.size() ); + return fec_dc( filenames[0], cl_fec_filename, cblocks ); + case m_fec_dz: + one_file( filenames.size() ); + return fec_dz( filenames[0], cl_fec_filename, range_vector ); + case m_fec_dZ: + one_file( filenames.size() ); + return fec_dZ( filenames[0], cl_fec_filename, delta, sector_size ); case m_list: break; case m_md5sum: break; case m_merge: @@ -979,6 +1202,9 @@ int main( const int argc, const char * const argv[] ) { show_error( "You must specify at least 2 files.", 0, true ); return 1; } return merge_files( filenames, default_output_filename, cl_opts, terminator, force ); + case m_nonzero_repair: + at_least_one_file( filenames.size() ); + return nonzero_repair( filenames, cl_opts ); case m_nrep_stats: return print_nrep_stats( filenames, cl_opts, repeated_byte ); case m_range_dec: diff --git a/main_common.cc b/main_common.cc index dfaccac..4a40acf 100644 --- a/main_common.cc +++ b/main_common.cc @@ -15,6 +15,9 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ +int cl_verbosity = 0; // used to silence internal_error if '-q' +int verbosity = 0; + namespace { const char * const program_year = "2024"; @@ -29,8 +32,8 @@ void show_version() } -// separate numbers of 5 or more digits in groups of 3 digits using '_' -const char * format_num3( long long num ) +// separate numbers of 6 or more digits in groups of 3 digits using '_' +const char * format_num3p( long long num, const bool raw = false ) { enum { buffers = 8, bufsize = 4 * sizeof num, n = 10 }; const char * const si_prefix = "kMGTPEZYRQ"; @@ -42,7 +45,7 @@ const char * format_num3( long long num ) char * p = buf + bufsize - 1; // fill the buffer backwards *p = 0; // terminator const bool negative = num < 0; - if( num > 1024 || num < -1024 ) + if( !raw && ( num > 9999 || num < -9999 ) ) { char prefix = 0; // try binary first, then si for( int i = 0; i < n && num != 0 && num % 1024 == 0; ++i ) @@ -53,7 +56,7 @@ const char * format_num3( long long num ) { num /= 1000; prefix = si_prefix[i]; } if( prefix ) *(--p) = prefix; } - const bool split = num >= 10000 || num <= -10000; + const bool split = num >= 100000 || num <= -100000; for( int i = 0; ; ) { @@ -136,8 +139,8 @@ long long getnum( const char * const arg, const char * const option_name, { if( verbosity >= 0 ) std::fprintf( stderr, "%s: '%s': Value out of limits [%s,%s] in " - "option '%s'.\n", program_name, arg, format_num3( llimit ), - format_num3( ulimit ), option_name ); + "option '%s'.\n", program_name, arg, format_num3p( llimit ), + format_num3p( ulimit ), option_name ); std::exit( 1 ); } if( tailp ) *tailp = tail; @@ -148,7 +151,6 @@ long long getnum( const char * const arg, const char * const option_name, // Recognized formats: <pos>,<value> <pos>,+<value> <pos>,f<value> -// void Bad_byte::parse_bb( const char * const arg, const char * const pn ) { argument = arg; @@ -166,6 +168,9 @@ void Bad_byte::parse_bb( const char * const arg, const char * const pn ) } +const char * format_num3( long long num ) { return format_num3p( num, true ); } + + void show_error( const char * const msg, const int errcode, const bool help ) { if( verbosity < 0 ) return; @@ -191,7 +196,7 @@ void show_file_error( const char * const filename, const char * const msg, void internal_error( const char * const msg ) { - if( verbosity >= 0 ) + if( cl_verbosity >= 0 ) std::fprintf( stderr, "%s: internal error: %s\n", program_name, msg ); std::exit( 3 ); } @@ -178,7 +178,7 @@ void MD5SUM::md5_finish( md5_type & digest ) md5_update( padding, len ); // pad to 56 mod 64 md5_update( bits, 8 ); // append data length in bits - for( int i = 0, j = 0; i < 4; i++, j += 4 ) // store state in digest + for( int i = 0, j = 0; i < 4; ++i, j += 4 ) // store state in digest { digest[j ] = (uint8_t)state[i]; digest[j+1] = (uint8_t)(state[i] >> 8); @@ -23,7 +23,7 @@ struct md5_type uint8_t data[16]; // 128-bit md5 digest bool operator==( const md5_type & d ) const - { return ( std::memcmp( data, d.data, 16 ) == 0 ); } + { return std::memcmp( data, d.data, 16 ) == 0; } bool operator!=( const md5_type & d ) const { return !( *this == d ); } // const uint8_t & operator[]( const int i ) const { return data[i]; } uint8_t & operator[]( const int i ) { return data[i]; } @@ -65,7 +65,7 @@ bool file_crc( uint32_t & crc, const int infd, const char * const filename ) { const int rd = readblock( infd, buffer, buffer_size ); if( rd != buffer_size && errno ) - { show_file_error( filename, "Error reading input file", errno ); + { show_file_error( filename, read_error_msg, errno ); error = true; break; } if( rd > 0 ) crc32.update_buf( crc, buffer, rd ); @@ -153,12 +153,12 @@ bool diff_member( const long long mpos, const long long msize, const int size = std::min( (long long)buffer_size, msize - partial_pos ); const int rd = readblock( fd1, buffer1, size ); if( rd != size && errno ) - { show_file_error( filename1, "Error reading input file", errno ); + { show_file_error( filename1, read_error_msg, errno ); error = true; break; } if( rd > 0 ) { if( readblock( fd2, buffer2, rd ) != rd ) - { show_file_error( filename2, "Error reading input file", errno ); + { show_file_error( filename2, read_error_msg, errno ); error = true; break; } for( int i = 0; i < rd; ++i ) { @@ -267,8 +267,7 @@ int open_input_files( const std::vector< std::string > & filenames, } } if( tmp < min_member_size ) - { show_file_error( filenames[i].c_str(), "Input file is too short." ); - return 2; } + { show_file_error( filenames[i].c_str(), short_file_msg ); return 2; } if( i == 0 ) insize = tmp; else if( insize != tmp ) { show_2file_error( "Sizes of input files", filenames[0].c_str(), @@ -524,7 +523,7 @@ bool copy_file( const int infd, const int outfd, const long long max_size ) if( max_size >= 0 ) rest -= size; const int rd = readblock( infd, buffer, size ); if( rd != size && errno ) - { show_error( "Error reading input file", errno ); error = true; break; } + { show_error( read_error_msg, errno ); error = true; break; } if( rd > 0 ) { const int wr = writeblock( outfd, buffer, rd ); @@ -349,8 +349,6 @@ int LZ_mtester::debug_decode_member( const long long dpos, const long long mpos, if( check_trailer( show_packets ? stdout : 0 ) ) return 0; return 3; } - if( len == min_match_len + 1 ) // Sync Flush marker - { rdec.load(); continue; } return 4; } } diff --git a/nrep_stats.cc b/nrep_stats.cc index 2c97d4d..746a3bc 100644 --- a/nrep_stats.cc +++ b/nrep_stats.cc @@ -68,13 +68,13 @@ int print_nrep_stats( const std::vector< std::string > & filenames, } const unsigned long long cdata_size = lzip_index.cdata_size(); if( !fits_in_size_t( cdata_size ) ) // mmap uses size_t - { show_file_error( input_filename, "Input file is too large for mmap." ); + { show_file_error( input_filename, large_file_msg ); set_retval( retval, 1 ); close( infd ); continue; } const uint8_t * const buffer = (const uint8_t *)mmap( 0, cdata_size, PROT_READ, MAP_PRIVATE, infd, 0 ); close( infd ); if( buffer == MAP_FAILED ) - { show_file_error( input_filename, "Can't mmap", errno ); + { show_file_error( input_filename, mmap_msg, errno ); set_retval( retval, 1 ); continue; } for( long j = 0; j < lzip_index.members(); ++j ) { diff --git a/range_dec.cc b/range_dec.cc index 0d03264..0bb3078 100644 --- a/range_dec.cc +++ b/range_dec.cc @@ -53,7 +53,7 @@ bool decompress_member( const int infd, const Cl_options & cl_opts, if( verbosity >= 2 ) pp(); LZ_decoder decoder( rdec, dictionary_size, outfd, outskip, outend ); - const int result = decoder.decode_member( cl_opts, pp ); + const int result = decoder.decode_member( pp, cl_opts.ignore_nonzero ); if( result != 0 ) { if( verbosity >= 0 && result <= 2 ) @@ -141,7 +141,8 @@ int range_decompress( const std::string & input_filename, if( range.end() > udata_size ) range.size( std::max( 0LL, udata_size - range.pos() ) ); if( range.size() <= 0 ) - { if( udata_size > 0 ) show_file_error( filename, "Nothing to do." ); + { if( udata_size > 0 ) + show_file_error( filename, "Nothing to do; range is empty." ); return 0; } if( to_stdout || default_output_filename.empty() ) outfd = STDOUT_FILENO; diff --git a/recursive.cc b/recursive.cc new file mode 100644 index 0000000..aaf84d7 --- /dev/null +++ b/recursive.cc @@ -0,0 +1,122 @@ +/* Lziprecover - Data recovery tool for the lzip format + Copyright (C) 2023-2024 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#define _FILE_OFFSET_BITS 64 + +#include <cerrno> +#include <cstdio> +#include <cstring> +#include <list> +#include <string> +#include <vector> +#include <dirent.h> +#include <stdint.h> +#include <sys/stat.h> + +#include "lzip.h" +#include "md5.h" +#include "fec.h" + +namespace { + +/* Return true if full_name is a regular file without extension .fec + or (a link to) a directory. */ +bool test_full_name( const std::string & full_name, const struct stat * stp, + const bool follow ) + { + struct stat st, st2; + if( ( follow && stat( full_name.c_str(), &st ) != 0 ) || + ( !follow && lstat( full_name.c_str(), &st ) != 0 ) ) return false; + if( S_ISREG( st.st_mode ) ) return !has_fec_extension( full_name ); + if( !S_ISDIR( st.st_mode ) ) return false; + + std::string prev_dir( full_name ); + bool loop = ( stp && st.st_ino == stp->st_ino && st.st_dev == stp->st_dev ); + if( !loop ) + for( unsigned i = prev_dir.size(); i > 1; ) + { + while( i > 0 && prev_dir[i-1] != '/' ) --i; + if( i == 0 ) break; + if( i > 1 ) --i; // remove trailing slash except at root dir + prev_dir.resize( i ); + if( stat( prev_dir.c_str(), &st2 ) != 0 || !S_ISDIR( st2.st_mode ) || + ( st.st_ino == st2.st_ino && st.st_dev == st2.st_dev ) ) + { loop = true; break; } + } + if( loop ) // full_name already visited or above tree + show_file_error( full_name.c_str(), "warning: recursive directory loop." ); + return !loop; // (link to) directory + } + +} // end namespace + + +/* Return in input_filename the next file name. ('-' is a valid file name). + Recursively found files and directories named "fec" are ignored. + Set 'retval' to 1 if a directory fails to open. */ +bool next_filename( std::list< std::string > & filelist, + std::string & input_filename, int & retval, + const char recursive ) + { + while( !filelist.empty() ) + { + input_filename = filelist.front(); + filelist.pop_front(); + struct stat st; + if( stat( input_filename.c_str(), &st ) == 0 && S_ISDIR( st.st_mode ) ) + { + if( recursive ) + { + DIR * const dirp = opendir( input_filename.c_str() ); + if( !dirp ) + { + show_file_error( input_filename.c_str(), "Can't open directory", errno ); + if( retval == 0 ) { retval = 1; } continue; + } + for( unsigned i = input_filename.size(); + i > 1 && input_filename[i-1] == '/'; --i ) + input_filename.resize( i - 1 ); // remove trailing slashes + struct stat stdot, *stdotp = 0; + if( input_filename[0] != '/' ) // relative file name + { + if( input_filename == "." ) input_filename.clear(); + if( stat( ".", &stdot ) == 0 && S_ISDIR( stdot.st_mode ) ) + stdotp = &stdot; + } + if( input_filename.size() && input_filename != "/" ) + input_filename += '/'; + std::list< std::string > tmp_list; + while( true ) + { + const struct dirent * const entryp = readdir( dirp ); + if( !entryp ) { closedir( dirp ); break; } + const std::string tmp_name( entryp->d_name ); + if( tmp_name == "." || tmp_name == ".." || tmp_name == "fec" || + tmp_name == "FEC" ) continue; + const std::string full_name( input_filename + tmp_name ); + if( test_full_name( full_name, stdotp, recursive == 2 ) ) + tmp_list.push_back( full_name ); + } + filelist.splice( filelist.begin(), tmp_list ); + } + continue; + } + return true; + } + input_filename.clear(); + return false; + } diff --git a/reproduce.cc b/reproduce.cc index 5ca91a7..ff7c654 100644 --- a/reproduce.cc +++ b/reproduce.cc @@ -440,7 +440,7 @@ int reproduce_member( uint8_t * const mbuffer, const long msize, (const uint8_t *)mmap( 0, rsize, PROT_READ, MAP_PRIVATE, rfd, 0 ); close( rfd ); if( rbuf == MAP_FAILED ) - { show_file_error( reference_filename, "Can't mmap", errno ); + { show_file_error( reference_filename, mmap_msg, errno ); return fatal( 1 ); } const Lzip_header & header = *(const Lzip_header *)mbuffer; @@ -457,8 +457,8 @@ int reproduce_member( uint8_t * const mbuffer, const long msize, const long offset = match_file( *master, rbuf, rsize, reference_filename ); if( offset < 0 ) { delete master; return 2; } // no match - // Reference data from offset must be at least as large as zeroed sector - // minus member trailer if trailer is inside the zeroed sector. + /* Reference data from offset must be at least as large as zeroed sector + minus member trailer if trailer is inside the zeroed sector. */ const int t = ( begin + size >= msize ) ? 16 + Lzip_trailer::size : 0; if( rsize - offset < size - t ) { show_file_error( reference_filename, "Not enough reference data after match." ); @@ -567,7 +567,7 @@ int reproduce_file( const std::string & input_filename, uint8_t * const mbuffer_base = (uint8_t *)mmap( 0, msize + mpos_rem, PROT_READ | PROT_WRITE, MAP_PRIVATE, infd, mpos - mpos_rem ); if( mbuffer_base == MAP_FAILED ) - { show_file_error( filename, "Can't mmap", errno ); return 1; } + { show_file_error( filename, mmap_msg, errno ); return 1; } uint8_t * const mbuffer = mbuffer_base + mpos_rem; long size = 0; uint8_t value = 0; @@ -627,7 +627,8 @@ int reproduce_file( const std::string & input_filename, std::fputs( "One member reproduced." " Copy of input file still contains errors.\n", stdout ); else - std::fputs( "Copy of input file reproduced successfully.\n", stdout ); + std::printf( "Repaired copy of '%s' written to '%s'\n", + filename, output_filename.c_str() ); } return 0; } @@ -686,7 +687,7 @@ int debug_reproduce_file( const char * const input_filename, uint8_t * const mbuffer_base = (uint8_t *)mmap( 0, msize + mpos_rem, PROT_READ | PROT_WRITE, MAP_PRIVATE, infd, mpos - mpos_rem ); if( mbuffer_base == MAP_FAILED ) - { show_file_error( input_filename, "Can't mmap", errno ); return 1; } + { show_file_error( input_filename, mmap_msg, errno ); return 1; } uint8_t * const mbuffer = mbuffer_base + mpos_rem; if( !md5_valid ) { @@ -762,18 +763,18 @@ int debug_reproduce_file( const char * const input_filename, done: if( verbosity >= 0 ) { - std::printf( "\n%9ld sectors tested" - "\n%9ld reproductions returned with zero status", - positions, successes ); + std::printf( "\n%11s sectors tested" + "\n%11s reproductions returned with zero status", + format_num3( positions ), format_num3( successes ) ); if( successes > 0 ) { if( failed_comparisons > 0 ) - std::printf( ", of which\n%9ld comparisons failed\n", - failed_comparisons ); - else std::fputs( "\n all comparisons passed\n", stdout ); + std::printf( ", of which\n%11s comparisons failed\n", + format_num3( failed_comparisons ) ); + else std::fputs( "\n all comparisons passed\n", stdout ); if( alternative_reproductions > 0 ) - std::printf( "%9ld alternative reproductions found\n", - alternative_reproductions ); + std::printf( "%11s alternative reproductions found\n", + format_num3( alternative_reproductions ) ); } else std::fputc( '\n', stdout ); if( fatal_retval ) diff --git a/testsuite/check.sh b/testsuite/check.sh index a65f062..638d6db 100755 --- a/testsuite/check.sh +++ b/testsuite/check.sh @@ -29,11 +29,11 @@ if [ -d tmp ] ; then rm -rf tmp ; fi mkdir tmp cd "${objdir}"/tmp || framework_failure -cat "${testdir}"/test.txt > in || framework_failure +cp "${testdir}"/test.txt in || framework_failure in_lz="${testdir}"/test.txt.lz in_lzma="${testdir}"/test.txt.lzma in_em="${testdir}"/test_em.txt.lz -inD="${testdir}"/test21723.txt +inD="${testdir}"/test21636.txt bad1_lz="${testdir}"/test_bad1.lz bad2_lz="${testdir}"/test_bad2.lz bad3_lz="${testdir}"/test_bad3.lz @@ -58,28 +58,28 @@ f6s3_lz="${testdir}"/fox6_sc3.lz f6s4_lz="${testdir}"/fox6_sc4.lz f6s5_lz="${testdir}"/fox6_sc5.lz f6s6_lz="${testdir}"/fox6_sc6.lz -f6mk_lz="${testdir}"/fox6_mark.lz +f6nz_lz="${testdir}"/fox6_nz.lz num_lz="${testdir}"/numbers.lz nbt_lz="${testdir}"/numbersbt.lz fail=0 test_failed() { fail=1 ; printf " $1" ; [ -z "$2" ] || printf "($2)" ; } # Description of test files for lziprecover: -# single-member files with one or more errors +# single-member files; test.txt.lz with one or more errors # test_bad1.lz: byte at offset 66 changed from 0xA6 to 0x26 -# test_bad2.lz: [ 34- 65] --> copy of bytes [ 68- 99] -# test_bad3.lz: [ 512-1535] --> zeroed [2560-3583] --> zeroed -# test_bad4.lz: [3072-4095] --> random errors [4608-5631] --> zeroed -# test_bad5.lz: [1024-2047] --> random errors [5120-6143] --> random data -# test_bad6.lz: [ 512-1023] --> zeroed (reference test.txt [ 891- 2137]) -# test_bad7.lz: [6656-7167] --> zeroed (reference test.txt [20428-32231]) -# test_bad8.lz: [ 66- 73] --> zeroed (reference test.txt [ 89- 110]) -# test_bad9.lz: [6491-6498] --> zeroed (reference test.txt [17977-18120]) +# test_bad2.lz: [ 34- 65] --> copy of bytes [68-99] +# test_bad3.lz: [ 512-1535] --> zeroed [2560-3583] --> zeroed +# test_bad4.lz: [3072-4095] --> random bit flips [4608-5631] --> zeroed +# test_bad5.lz: [1024-2047] --> random bit flips [5120-6143] --> random data +# test_bad6.lz: [ 512-1023] --> zeroed (reference test.txt [ 892- 2414]) +# test_bad7.lz: [6656-7167] --> zeroed (reference test.txt [20798-33385]) +# test_bad8.lz: [ 66- 73] --> zeroed (reference test.txt [ 89- 110]) +# test_bad9.lz: [6491-6498] --> zeroed (reference test.txt [17982-18594]) # # test_em.txt.lz: test.txt split in 3, with 5 empty members (1,3,5-6,8) # test_3m.txt.lz.md5: md5sum of test_em.txt.lz after removing empty members # -# 6-member files with one or more errors +# 6-member files; fox6.lz with one or more errors # fox6_bad1.lz: byte at offset 5 changed from 0x0C to 0x00 (DS) # byte at offset 142 changed from 0x50 to 0x70 (CRC) # byte at offset 224 changed from 0x2D to 0x2E (data_size) @@ -92,7 +92,7 @@ test_failed() { fail=1 ; printf " $1" ; [ -z "$2" ] || printf "($2)" ; } # fox6_bad5.lz: [380-479] --> zeroed (members 5,6) # fox6_bad6.lz: [430-439] --> zeroed (member 6) # -# fox6_mark.lz: 4 last members marked with bytes 'm', 'a', 'r', 'k' +# fox6_nz.lz: first LZMA byte of 4 last members set to 'a', 'b', 'c', 'd' # # 6-member files "shortcircuited" by a corrupt or fake trailer # fox6_sc1.lz: (corrupt but consistent last trailer) @@ -168,7 +168,7 @@ printf "LZIP\001+.............................." | "${LZIP}" -t 2> /dev/null [ $? = 1 ] || test_failed $LINENO "${LZIPRECOVER}" -mq "${bad1_lz}" [ $? = 1 ] || test_failed $LINENO -"${LZIPRECOVER}" -Rq +"${LZIPRECOVER}" -Bq [ $? = 1 ] || test_failed $LINENO "${LZIPRECOVER}" -sq [ $? = 1 ] || test_failed $LINENO @@ -188,7 +188,7 @@ printf "LZIP\001+.............................." | "${LZIP}" -t 2> /dev/null [ $? = 1 ] || test_failed $LINENO "${LZIPRECOVER}" -q --dump=damaged in > out # /dev/null returns 1 on OS/2 [ $? = 2 ] || test_failed $LINENO -"${LZIPRECOVER}" -q --dump=damagedd "${in_lz}" > /dev/null +"${LZIPRECOVER}" -q --dump=damageed "${in_lz}" > /dev/null [ $? = 1 ] || test_failed $LINENO "${LZIPRECOVER}" -q --dump=empty [ $? = 1 ] || test_failed $LINENO @@ -196,13 +196,13 @@ printf "LZIP\001+.............................." | "${LZIP}" -t 2> /dev/null [ $? = 1 ] || test_failed $LINENO "${LZIPRECOVER}" -q --strip=damaged in > out # /dev/null returns 1 on OS/2 [ $? = 2 ] || test_failed $LINENO -"${LZIPRECOVER}" -q --strip=damagedd "${in_lz}" > /dev/null +"${LZIPRECOVER}" -q --strip=damageed "${in_lz}" > /dev/null [ $? = 1 ] || test_failed $LINENO "${LZIPRECOVER}" -q --remove=damaged [ $? = 1 ] || test_failed $LINENO "${LZIPRECOVER}" -q --remove=damaged in [ $? = 2 ] || test_failed $LINENO -"${LZIPRECOVER}" -q --remove=damagedd "${in_lz}" +"${LZIPRECOVER}" -q --remove=damageed "${in_lz}" [ $? = 1 ] || test_failed $LINENO "${LZIPRECOVER}" -q --dump=tdata [ $? = 1 ] || test_failed $LINENO @@ -244,14 +244,14 @@ cmp "${in_lz}" out.lz || test_failed $LINENO "${LZIPRECOVER}" -A < "${in_lzma}" > out.lz || test_failed $LINENO cmp "${in_lz}" out.lz || test_failed $LINENO rm -f out.lz || framework_failure -cat "${in_lzma}" > out.lzma || framework_failure +cp "${in_lzma}" out.lzma || framework_failure "${LZIPRECOVER}" -Ak out.lzma || test_failed $LINENO cmp "${in_lz}" out.lz || test_failed $LINENO printf "to be overwritten" > out.lz || framework_failure "${LZIPRECOVER}" -Af out.lzma || test_failed $LINENO cmp "${in_lz}" out.lz || test_failed $LINENO rm -f out.lz || framework_failure -cat "${in_lzma}" > out.tlz || framework_failure +cp "${in_lzma}" out.tlz || framework_failure "${LZIPRECOVER}" -Ak out.tlz || test_failed $LINENO cmp "${in_lz}" out.tar.lz || test_failed $LINENO printf "to be overwritten" > out.tar.lz || framework_failure @@ -271,36 +271,37 @@ rm -rf a || framework_failure printf "\ntesting decompression..." for i in "${in_lz}" "${in_em}" ; do - "${LZIP}" -lq "$i" || test_failed $LINENO "$i" - "${LZIP}" -t "$i" || test_failed $LINENO "$i" - "${LZIP}" -d "$i" -o out || test_failed $LINENO "$i" + "${LZIP}" -lq "$i" --ignore-empty || test_failed $LINENO "$i" + "${LZIP}" -t "$i" --ignore-empty || test_failed $LINENO "$i" + "${LZIP}" -d "$i" --ignore-empty -o out || test_failed $LINENO "$i" cmp in out || test_failed $LINENO "$i" - "${LZIP}" -cd "$i" > out || test_failed $LINENO "$i" + "${LZIP}" -cd "$i" --ignore-empty > out || test_failed $LINENO "$i" cmp in out || test_failed $LINENO "$i" - "${LZIP}" -d "$i" -o - > out || test_failed $LINENO "$i" + "${LZIP}" -d "$i" --ignore-empty -o - > out || test_failed $LINENO "$i" cmp in out || test_failed $LINENO "$i" - "${LZIP}" -d < "$i" > out || test_failed $LINENO "$i" + "${LZIP}" -d < "$i" --ignore-empty > out || test_failed $LINENO "$i" cmp in out || test_failed $LINENO "$i" rm -f out || framework_failure done -lines=`"${LZIP}" -tvv "${in_em}" 2>&1 | wc -l` || test_failed $LINENO +lines=`"${LZIP}" -tvv "${in_em}" --ignore-empty 2>&1 | wc -l` || + test_failed $LINENO [ "${lines}" -eq 8 ] || test_failed $LINENO "${lines}" -"${LZIP}" -tq "${in_em}" --empty-error +"${LZIP}" -tq "${in_em}" [ $? = 2 ] || test_failed $LINENO -lines=`"${LZIP}" -lvv "${in_em}" | wc -l` || test_failed $LINENO +lines=`"${LZIP}" -lvv "${in_em}" --ignore-empty | wc -l` || test_failed $LINENO [ "${lines}" -eq 11 ] || test_failed $LINENO "${lines}" -"${LZIP}" -lq "${in_em}" --empty-error +"${LZIP}" -lq "${in_em}" [ $? = 2 ] || test_failed $LINENO -cat "${in_lz}" > out.lz || framework_failure +cp "${in_lz}" out.lz || framework_failure "${LZIP}" -dk out.lz || test_failed $LINENO cmp in out || test_failed $LINENO rm -f out || framework_failure "${LZIP}" -cd "${fox_lz}" > fox || test_failed $LINENO -cat fox > copy || framework_failure -cat "${in_lz}" > copy.lz || framework_failure +cp fox copy || framework_failure +cp "${in_lz}" copy.lz || framework_failure "${LZIP}" -d copy.lz out.lz 2> /dev/null # skip copy, decompress out [ $? = 1 ] || test_failed $LINENO [ ! -e out.lz ] || test_failed $LINENO @@ -322,7 +323,7 @@ rm -f ./- || framework_failure cmp in ./- || test_failed $LINENO rm -f ./- || framework_failure -cat "${in_lz}" > anyothername || framework_failure +cp "${in_lz}" anyothername || framework_failure "${LZIP}" -dv - anyothername - < "${in_lz}" > out 2> /dev/null || test_failed $LINENO cmp in out || test_failed $LINENO @@ -344,7 +345,7 @@ cat out in | cmp in - || test_failed $LINENO # out must be empty [ $? = 1 ] || test_failed $LINENO cmp in out || test_failed $LINENO rm -f out || framework_failure -cat "${in_lz}" > out.lz || framework_failure +cp "${in_lz}" out.lz || framework_failure for i in 1 2 3 4 5 6 7 ; do printf "g" >> out.lz || framework_failure "${LZIP}" -alvv out.lz "${in_lz}" > /dev/null 2>&1 @@ -396,52 +397,45 @@ printf "to be overwritten" > out2 || framework_failure cmp in2 out2 || test_failed $LINENO rm -f out2 || framework_failure +"${LZIP}" -d "${fox_lz}" -o a/b/c/fox || test_failed $LINENO +cmp fox a/b/c/fox || test_failed $LINENO +rm -rf a || framework_failure +"${LZIP}" -d -o a/b/c/fox < "${fox_lz}" || test_failed $LINENO +cmp fox a/b/c/fox || test_failed $LINENO +rm -rf a || framework_failure +"${LZIP}" -dq "${fox_lz}" -o a/b/c/ +[ $? = 1 ] || test_failed $LINENO +[ ! -e a ] || test_failed $LINENO + +if [ -z "${LZIP_NAME}" ] ; then LZIP_NAME=lzip ; fi +touch empty em || framework_failure +"${LZIP_NAME}" -0 em || test_failed $LINENO +"${LZIP}" -lq em.lz || test_failed $LINENO +"${LZIP}" -d em.lz || test_failed $LINENO +cmp empty em || test_failed $LINENO +rm -f em || framework_failure + "${LZIPRECOVER}" -D ,18000 "${in_lz}" > out || test_failed $LINENO "${LZIPRECOVER}" -D 18000 "${in_lz}" >> out || test_failed $LINENO cmp in out || test_failed $LINENO -"${LZIPRECOVER}" -D 21723-22120 -fo out "${in_lz}" || test_failed $LINENO +"${LZIPRECOVER}" -D 21636-22033 -fo out "${in_lz}" || test_failed $LINENO cmp "${inD}" out || test_failed $LINENO -"${LZIPRECOVER}" -D 21723,397 "${in_lz}" > out || test_failed $LINENO +"${LZIPRECOVER}" -D 21636,397 "${in_lz}" > out || test_failed $LINENO cmp "${inD}" out || test_failed $LINENO -"${LZIPRECOVER}" -D 21723,397 "${in_em}" > out || test_failed $LINENO +"${LZIPRECOVER}" -D 21636,397 "${in_em}" --ignore-empty > out || + test_failed $LINENO cmp "${inD}" out || test_failed $LINENO -"${LZIPRECOVER}" -q -D 21723,397 --empty-error "${in_em}" +"${LZIPRECOVER}" -q -D 21636,397 "${in_em}" [ $? = 2 ] || test_failed $LINENO "${LZIP}" -D 0 "${in_lz}" -o a/b/c/out || test_failed $LINENO cmp in a/b/c/out || test_failed $LINENO rm -rf a || framework_failure -"${LZIP}" -cd "${fox6_lz}" > out || test_failed $LINENO -"${LZIP}" -cd "${f6mk_lz}" > copy || test_failed $LINENO -cmp copy out || test_failed $LINENO -rm -f copy out || framework_failure -"${LZIP}" -lq "${f6mk_lz}" --marking-error -[ $? = 2 ] || test_failed $LINENO -"${LZIP}" -tq "${f6mk_lz}" --marking-error -[ $? = 2 ] || test_failed $LINENO -cat "${f6mk_lz}" > f6mk.lz || framework_failure -cat "${f6mk_lz}" > f6mk2.lz || framework_failure -cmp -s "${fox6_lz}" f6mk.lz && test_failed $LINENO -"${LZIPRECOVER}" --clear-marking f6mk.lz f6mk2.lz || test_failed $LINENO -cmp "${fox6_lz}" f6mk.lz || test_failed $LINENO -cmp "${fox6_lz}" f6mk2.lz || test_failed $LINENO -rm -f f6mk.lz f6mk2.lz || framework_failure - -"${LZIP}" -d "${fox_lz}" -o a/b/c/fox || test_failed $LINENO -cmp fox a/b/c/fox || test_failed $LINENO -rm -rf a || framework_failure -"${LZIP}" -d -o a/b/c/fox < "${fox_lz}" || test_failed $LINENO -cmp fox a/b/c/fox || test_failed $LINENO -rm -rf a || framework_failure -"${LZIP}" -dq "${fox_lz}" -o a/b/c/ -[ $? = 1 ] || test_failed $LINENO -[ ! -e a ] || test_failed $LINENO - printf "\ntesting bad input..." headers='LZIp LZiP LZip LzIP LzIp LziP lZIP lZIp lZiP lzIP' -body='\001\014\000\203\377\373\377\377\300\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000$\000\000\000\000\000\000\000' -cat "${in_lz}" > int.lz || framework_failure +body='\001\014\000\000\101\376\367\377\377\340\000\200\000\215\357\002\322\001\000\000\000\000\000\000\000\045\000\000\000\000\000\000\000' +cp "${in_lz}" int.lz || framework_failure printf "LZIP${body}" >> int.lz || framework_failure if "${LZIP}" -tq int.lz ; then for header in ${headers} ; do @@ -462,7 +456,7 @@ if "${LZIP}" -tq int.lz ; then [ $? = 2 ] || test_failed $LINENO ${header} "${LZIP}" -cdq --loose-trailing int.lz > /dev/null [ $? = 2 ] || test_failed $LINENO ${header} - cat "${in_lz}" > int.lz || framework_failure + cp "${in_lz}" int.lz || framework_failure printf "${header}${body}" >> int.lz || framework_failure "${LZIP}" -lq int.lz # trailing data [ $? = 2 ] || test_failed $LINENO ${header} @@ -507,6 +501,21 @@ else fi rm -f int.lz out || framework_failure +"${LZIP}" -cd "${fox6_lz}" > out || test_failed $LINENO +"${LZIPRECOVER}" -cd "${f6nz_lz}" --ignore-nonzero > copy || test_failed $LINENO +cmp copy out || test_failed $LINENO +rm -f copy out || framework_failure +"${LZIP}" -lq "${f6nz_lz}" || test_failed $LINENO +"${LZIP}" -tq "${f6nz_lz}" +[ $? = 2 ] || test_failed $LINENO +cp "${f6nz_lz}" f6nz.lz || framework_failure +cp "${f6nz_lz}" f6nz2.lz || framework_failure +cmp -s "${fox6_lz}" f6nz.lz && test_failed $LINENO +"${LZIPRECOVER}" --nonzero-repair f6nz.lz f6nz2.lz || test_failed $LINENO +cmp "${fox6_lz}" f6nz.lz || test_failed $LINENO +cmp "${fox6_lz}" f6nz2.lz || test_failed $LINENO +rm -f f6nz.lz f6nz2.lz || framework_failure + for i in fox_v2.lz fox_s11.lz fox_de20.lz \ fox_bcrc.lz fox_crc0.lz fox_das46.lz fox_mes81.lz ; do "${LZIP}" -tq "${testdir}"/$i @@ -525,9 +534,9 @@ rm -f fox out || framework_failure cat "${in_lz}" "${in_lz}" > in2.lz || framework_failure cat "${in_lz}" "${in_lz}" "${in_lz}" > in3.lz || framework_failure -if dd if=in3.lz of=trunc.lz bs=14752 count=1 2> /dev/null && +if dd if=in3.lz of=trunc.lz bs=14682 count=1 2> /dev/null && [ -e trunc.lz ] && cmp in2.lz trunc.lz > /dev/null 2>&1 ; then - for i in 6 20 14734 14753 14754 14755 14756 14757 14758 ; do + for i in 6 20 14664 14683 14684 14685 14686 14687 14688 ; do dd if=in3.lz of=trunc.lz bs=$i count=1 2> /dev/null "${LZIP}" -lq trunc.lz [ $? = 2 ] || test_failed $LINENO $i @@ -554,7 +563,7 @@ for i in "${f6s3_lz}" "${f6s4_lz}" "${f6s5_lz}" "${f6s6_lz}" ; do [ "${lines}" -eq 9 ] || test_failed $LINENO "$i ${lines}" done -cat "${in_lz}" > ingin.lz || framework_failure +cp "${in_lz}" ingin.lz || framework_failure printf "g" >> ingin.lz || framework_failure cat "${in_lz}" >> ingin.lz || framework_failure "${LZIP}" -lq ingin.lz @@ -588,7 +597,6 @@ cmp "${f6b1}" out || test_failed $LINENO "${LZIPRECOVER}" -D0 -iq "${f6b1_lz}" > out || test_failed $LINENO cmp "${f6b1}" out || test_failed $LINENO -touch empty || framework_failure "${LZIPRECOVER}" -D0 -q ingin.lz > out [ $? = 2 ] || test_failed $LINENO cmp empty out || test_failed $LINENO @@ -604,6 +612,70 @@ cmp in2 out2 || test_failed $LINENO "${LZIPRECOVER}" -t -iq in2t.lz || test_failed $LINENO rm -f in2 in2t.lz out out2 || framework_failure +printf "\ntesting --byte-repair..." + +rm -f out.lz || framework_failure +"${LZIPRECOVER}" -B -o out.lz "${fox6_lz}" || test_failed $LINENO +[ ! -e out.lz ] || test_failed $LINENO +"${LZIPRECOVER}" -B -o out.lz "${bad2_lz}" -q +[ $? = 2 ] || test_failed $LINENO +[ ! -e out.lz ] || test_failed $LINENO +"${LZIPRECOVER}" -B -o out.lz "${bad3_lz}" -q +[ $? = 2 ] || test_failed $LINENO +[ ! -e out.lz ] || test_failed $LINENO +"${LZIPRECOVER}" -B -o out.lz "${bad4_lz}" -q +[ $? = 2 ] || test_failed $LINENO +[ ! -e out.lz ] || test_failed $LINENO +"${LZIPRECOVER}" -Bf -o out.lz "${f6b1_lz}" || test_failed $LINENO +cmp "${fox6_lz}" out.lz || test_failed $LINENO +"${LZIPRECOVER}" -Bf -o out.lz "${bad1_lz}" || test_failed $LINENO +cmp "${in_lz}" out.lz || test_failed $LINENO +"${LZIPRECOVER}" -B -o a/b/c/out.lz "${bad1_lz}" || test_failed $LINENO +cmp "${in_lz}" a/b/c/out.lz || test_failed $LINENO +rm -rf a || framework_failure + +cp "${f6b1_lz}" out.lz || framework_failure +"${LZIPRECOVER}" -B out.lz || test_failed $LINENO +[ -e out_fixed.lz ] || test_failed $LINENO +mv out.lz out.tar.lz || framework_failure +"${LZIPRECOVER}" -B out.tar.lz || test_failed $LINENO +[ -e out_fixed.tar.lz ] || test_failed $LINENO +mv out.tar.lz out.tlz || framework_failure +"${LZIPRECOVER}" -B out.tlz || test_failed $LINENO +[ -e out_fixed.tlz ] || test_failed $LINENO +rm -f out.tlz out_fixed.lz out_fixed.tar.lz out_fixed.tlz || + framework_failure + +printf "\ntesting --fec..." + +"${LZIPRECOVER}" -Ft "${in_lz}" || test_failed $LINENO +"${LZIPRECOVER}" -Fc "${in_lz}" -o fecfile.fec || test_failed $LINENO +cmp "${in_lz}".fec fecfile.fec || test_failed $LINENO +"${LZIPRECOVER}" -Fc -cn1 "${in_lz}" | cmp fecfile.fec - || test_failed $LINENO +"${LZIPRECOVER}" -Fc -cn4 "${in_lz}" | cmp fecfile.fec - || test_failed $LINENO +"${LZIPRECOVER}" -Fc -c --gf16 "${in_lz}" | cmp "${in_lz}".fec16 - || + test_failed $LINENO +for i in "${bad1_lz}" "${bad2_lz}" "${bad3_lz}" "${bad4_lz}" "${bad5_lz}" \ + "${bad6_lz}" "${bad7_lz}" "${bad8_lz}" "${bad9_lz}" ; do + "${LZIPRECOVER}" -q -Fr --fec-file=fecfile.fec "$i" -o fixed.lz || + test_failed $LINENO "$i" + cmp "${in_lz}" "fixed.lz" || test_failed $LINENO "$i" + rm -f fixed.lz || framework_failure +done +"${LZIPRECOVER}" -Fc "${in_lz}" -o fec/ || test_failed $LINENO +cmp fec/test.txt.lz.fec fecfile.fec || test_failed $LINENO +"${LZIPRECOVER}" -Ft "${in_lz}" --fec-file=fec/ || test_failed $LINENO +rm -rf fec || framework_failure +mkdir a +cp "${in_lz}" "${fox6_lz}" a || framework_failure +"${LZIPRECOVER}" -r -Fc a/ -o fec/ || test_failed $LINENO +cmp fec/test.txt.lz.fec fecfile.fec || test_failed $LINENO +[ -e fec/fox6.lz.fec ] || test_failed $LINENO +"${LZIPRECOVER}" -r -Fc a -o fec/ || test_failed $LINENO +cmp fec/a/test.txt.lz.fec fecfile.fec || test_failed $LINENO +[ -e fec/a/fox6.lz.fec ] || test_failed $LINENO +rm -rf a fec fecfile.fec || framework_failure + printf "\ntesting --merge..." rm -f out.lz || framework_failure @@ -617,7 +689,7 @@ rm -f out.lz || framework_failure "${LZIPRECOVER}" -m -o out.lz "${bad1_lz}" "${bad2_lz}" "${bad2_lz}" -q [ $? = 2 ] || test_failed $LINENO [ ! -e out.lz ] || test_failed $LINENO -cat "${bad2_lz}" > bad2.lz || framework_failure +cp "${bad2_lz}" bad2.lz || framework_failure "${LZIPRECOVER}" -m -o out.lz "${bad1_lz}" "${bad2_lz}" bad2.lz -q [ $? = 2 ] || test_failed $LINENO [ ! -e out.lz ] || test_failed $LINENO @@ -736,6 +808,7 @@ cmp "${in_lz}" out.lz || test_failed $LINENO "${LZIPRECOVER}" -mf -o out.lz "${bad5_lz}" "${bad4_lz}" "${bad3_lz}" || test_failed $LINENO cmp "${in_lz}" out.lz || test_failed $LINENO +rm -f out.lz || framework_failure cat "${bad3_lz}" "${bad4_lz}" "${bad5_lz}" "${in_lz}" > bad345.lz || framework_failure cat "${bad4_lz}" "${bad5_lz}" "${bad3_lz}" "${in_lz}" > bad453.lz || framework_failure @@ -760,43 +833,8 @@ cmp in4.lz out4.lz || test_failed $LINENO cmp in4.lz out4.lz || test_failed $LINENO rm -f bad345.lz bad453.lz bad534.lz out4.lz || framework_failure -printf "\ntesting --byte-repair..." - -rm -f out.lz || framework_failure -"${LZIPRECOVER}" -R -o out.lz "${fox6_lz}" || test_failed $LINENO -[ ! -e out.lz ] || test_failed $LINENO -"${LZIPRECOVER}" -R -o out.lz "${bad2_lz}" -q -[ $? = 2 ] || test_failed $LINENO -[ ! -e out.lz ] || test_failed $LINENO -"${LZIPRECOVER}" -R -o out.lz "${bad3_lz}" -q -[ $? = 2 ] || test_failed $LINENO -[ ! -e out.lz ] || test_failed $LINENO -"${LZIPRECOVER}" -R -o out.lz "${bad4_lz}" -q -[ $? = 2 ] || test_failed $LINENO -[ ! -e out.lz ] || test_failed $LINENO -"${LZIPRECOVER}" -Rf -o out.lz "${f6b1_lz}" || test_failed $LINENO -cmp "${fox6_lz}" out.lz || test_failed $LINENO -"${LZIPRECOVER}" -Rf -o out.lz "${bad1_lz}" || test_failed $LINENO -cmp "${in_lz}" out.lz || test_failed $LINENO -"${LZIPRECOVER}" -R -o a/b/c/out.lz "${bad1_lz}" || test_failed $LINENO -cmp "${in_lz}" a/b/c/out.lz || test_failed $LINENO -rm -rf a || framework_failure - -cat "${f6b1_lz}" > out.lz || framework_failure -"${LZIPRECOVER}" -R out.lz || test_failed $LINENO -[ -e out_fixed.lz ] || test_failed $LINENO -mv out.lz out.tar.lz || framework_failure -"${LZIPRECOVER}" -R out.tar.lz || test_failed $LINENO -[ -e out_fixed.tar.lz ] || test_failed $LINENO -mv out.tar.lz out.tlz || framework_failure -"${LZIPRECOVER}" -R out.tlz || test_failed $LINENO -[ -e out_fixed.tlz ] || test_failed $LINENO -rm -f out.tlz out_fixed.lz out_fixed.tar.lz out_fixed.tlz || - framework_failure - printf "\ntesting --reproduce..." -if [ -z "${LZIP_NAME}" ] ; then LZIP_NAME=lzip ; fi if /bin/sh -c "${LZIP_NAME} -s18KiB" < in > out 2> /dev/null && cmp "${in_lz}" out > /dev/null 2>&1 ; then rm -f out || framework_failure @@ -878,7 +916,7 @@ done cat a/b/c/rec*in9.lz | cmp in9.lz - || test_failed $LINENO rm -rf a || framework_failure -cat in9.lz > in9t.lz || framework_failure +cp in9.lz in9t.lz || framework_failure printf "garbage" >> in9t.lz || framework_failure "${LZIPRECOVER}" -s in9t.lz || test_failed $LINENO for i in 01 02 03 04 05 06 07 08 09 ; do @@ -1011,9 +1049,198 @@ printf "g" | cmp rec2ingin.lz - || test_failed $LINENO cat rec*ingin.lz | cmp ingin.lz - || test_failed $LINENO rm -f rec*ingin.lz || framework_failure -printf "\ntesting --*=damaged..." +printf "\ntesting --dump/remove/strip..." + +"${LZIPRECOVER}" --dump=1 "${in_lz}" -o a/b/c/out.lz || test_failed $LINENO +cmp "${in_lz}" a/b/c/out.lz || test_failed $LINENO +rm -rf a || framework_failure + +"${LZIPRECOVER}" -s "${num_lz}" -o num.lz || test_failed $LINENO +[ -e rec9num.lz ] || test_failed $LINENO +[ ! -e rec10num.lz ] || test_failed $LINENO +cat rec*num.lz | cmp "${num_lz}" - || test_failed $LINENO +for i in 1 2 3 4 5 6 7 8 9 ; do + "${LZIPRECOVER}" --dump=$i "${num_lz}" | cmp rec${i}num.lz - || + test_failed $LINENO $i + "${LZIPRECOVER}" --strip=^$i "${num_lz}" | cmp rec${i}num.lz - || + test_failed $LINENO $i + cp "${num_lz}" num.lz || framework_failure + "${LZIPRECOVER}" --remove=^$i num.lz || test_failed $LINENO $i + cmp rec${i}num.lz num.lz || test_failed $LINENO $i +done +"${LZIPRECOVER}" -q --dump=1 in "${num_lz}" > out +[ $? = 2 ] || test_failed $LINENO +cmp rec1num.lz out || test_failed $LINENO +"${LZIPRECOVER}" -q --strip=^1 in "${num_lz}" > out +[ $? = 2 ] || test_failed $LINENO +cmp rec1num.lz out || test_failed $LINENO + +"${LZIPRECOVER}" --dump=r1 "${num_lz}" | cmp rec9num.lz - || + test_failed $LINENO +"${LZIPRECOVER}" --dump=d:r3 "${num_lz}" | cmp rec7num.lz - || + test_failed $LINENO +"${LZIPRECOVER}" --dump=r5:d "${num_lz}" | cmp rec5num.lz - || + test_failed $LINENO +"${LZIPRECOVER}" --dump=t:r9 "${num_lz}" | cmp rec1num.lz - || + test_failed $LINENO +"${LZIPRECOVER}" --strip=r^1:t "${num_lz}" | cmp rec9num.lz - || + test_failed $LINENO +"${LZIPRECOVER}" --strip=d:r^3:t "${num_lz}" | cmp rec7num.lz - || + test_failed $LINENO +"${LZIPRECOVER}" --strip=r^5:d:t "${num_lz}" | cmp rec5num.lz - || + test_failed $LINENO +"${LZIPRECOVER}" --strip=d:t:r^9 "${num_lz}" | cmp rec1num.lz - || + test_failed $LINENO + +"${LZIPRECOVER}" --dump=1,5 "${num_lz}" > out || test_failed $LINENO +cat rec1num.lz rec5num.lz | cmp out - || test_failed $LINENO +"${LZIPRECOVER}" --dump=3,6 "${num_lz}" > out || test_failed $LINENO +cat rec3num.lz rec6num.lz | cmp out - || test_failed $LINENO +"${LZIPRECOVER}" --dump=2-4 "${num_lz}" > out || test_failed $LINENO +cat rec2num.lz rec3num.lz rec4num.lz | cmp out - || test_failed $LINENO +"${LZIPRECOVER}" --dump=4,6,8 "${num_lz}" > out || test_failed $LINENO +cat rec4num.lz rec6num.lz rec8num.lz | cmp out - || test_failed $LINENO +"${LZIPRECOVER}" --strip=^1,5 "${num_lz}" > out || test_failed $LINENO +cat rec1num.lz rec5num.lz | cmp out - || test_failed $LINENO +"${LZIPRECOVER}" --strip=^3,6 "${num_lz}" > out || test_failed $LINENO +cat rec3num.lz rec6num.lz | cmp out - || test_failed $LINENO +"${LZIPRECOVER}" --strip=^2-4 "${num_lz}" > out || test_failed $LINENO +cat rec2num.lz rec3num.lz rec4num.lz | cmp out - || test_failed $LINENO +"${LZIPRECOVER}" --strip=^4,6,8 "${num_lz}" > out || test_failed $LINENO +cat rec4num.lz rec6num.lz rec8num.lz | cmp out - || test_failed $LINENO + +# create a subset tarlz archive +"${LZIPRECOVER}" --dump=1-2:r1:t "${num_lz}" > out || test_failed $LINENO +cat rec1num.lz rec2num.lz rec9num.lz | cmp out - || test_failed $LINENO +"${LZIPRECOVER}" --dump=4-5:r1:t "${num_lz}" > out || test_failed $LINENO +cat rec4num.lz rec5num.lz rec9num.lz | cmp out - || test_failed $LINENO +"${LZIPRECOVER}" --dump=7-8:r1:t "${num_lz}" > out || test_failed $LINENO +cat rec7num.lz rec8num.lz rec9num.lz | cmp out - || test_failed $LINENO + +"${LZIPRECOVER}" --dump=1-9 "${num_lz}" | cmp "${num_lz}" - || + test_failed $LINENO +"${LZIPRECOVER}" --dump=r1-9 "${num_lz}" | cmp "${num_lz}" - || + test_failed $LINENO +"${LZIPRECOVER}" --dump=1-1000 "${num_lz}" | cmp "${num_lz}" - || + test_failed $LINENO +"${LZIPRECOVER}" --dump=r1-1000 "${num_lz}" | cmp "${num_lz}" - || + test_failed $LINENO +"${LZIPRECOVER}" --dump=1-4:r1-4:5 "${num_lz}" | cmp "${num_lz}" - || + test_failed $LINENO +"${LZIPRECOVER}" --dump=^10 "${num_lz}" | cmp "${num_lz}" - || + test_failed $LINENO +"${LZIPRECOVER}" --strip=^1-9 "${num_lz}" | cmp "${num_lz}" - || + test_failed $LINENO +"${LZIPRECOVER}" --strip=r^1-9 "${num_lz}" | cmp "${num_lz}" - || + test_failed $LINENO +"${LZIPRECOVER}" --strip=^1-1000 "${num_lz}" | cmp "${num_lz}" - || + test_failed $LINENO +"${LZIPRECOVER}" --strip=r^1-1000 "${num_lz}" | cmp "${num_lz}" - || + test_failed $LINENO +"${LZIPRECOVER}" --strip=^1-4:r^1-4:^5 "${num_lz}" | cmp "${num_lz}" - || + test_failed $LINENO +"${LZIPRECOVER}" --strip=10 "${num_lz}" | cmp "${num_lz}" - || + test_failed $LINENO + +"${LZIPRECOVER}" -i --dump=r1 "${nbt_lz}" | cmp rec9num.lz - || + test_failed $LINENO +"${LZIPRECOVER}" -i --dump=r3 "${nbt_lz}" | cmp rec7num.lz - || + test_failed $LINENO +"${LZIPRECOVER}" -i --dump=r7 "${nbt_lz}" | cmp rec4num.lz - || + test_failed $LINENO +"${LZIPRECOVER}" -i --strip=r^1:t "${nbt_lz}" | cmp rec9num.lz - || + test_failed $LINENO +"${LZIPRECOVER}" -i --strip=r^3:t "${nbt_lz}" | cmp rec7num.lz - || + test_failed $LINENO +"${LZIPRECOVER}" -i --strip=r^7:t "${nbt_lz}" | cmp rec4num.lz - || + test_failed $LINENO + +"${LZIPRECOVER}" -i --dump=4 -f -o out "${nbt_lz}" || test_failed $LINENO +printf "gap" | cmp out - || test_failed $LINENO +"${LZIPRECOVER}" -i --dump=8 "${nbt_lz}" > out || test_failed $LINENO +printf "damaged" | cmp out - || test_failed $LINENO +"${LZIPRECOVER}" -i --dump=tdata "${nbt_lz}" > out || test_failed $LINENO +printf "trailing data" | cmp out - || test_failed $LINENO +"${LZIPRECOVER}" -i --dump=4:t "${nbt_lz}" > out || test_failed $LINENO +printf "gaptrailing data" | cmp out - || test_failed $LINENO +"${LZIPRECOVER}" -i --dump=4,8:t "${nbt_lz}" > out || test_failed $LINENO +printf "gapdamagedtrailing data" | cmp out - || test_failed $LINENO +"${LZIPRECOVER}" -i --dump=4,8 "${nbt_lz}" > out || test_failed $LINENO +printf "gapdamaged" | cmp out - || test_failed $LINENO +"${LZIPRECOVER}" -i --dump=damaged "${nbt_lz}" > out || test_failed $LINENO +printf "gapdamaged" | cmp out - || test_failed $LINENO +"${LZIPRECOVER}" -i --dump=d:t "${nbt_lz}" > out || test_failed $LINENO +printf "gapdamagedtrailing data" | cmp out - || test_failed $LINENO +"${LZIPRECOVER}" -i --strip=^4:t -f -o out "${nbt_lz}" || test_failed $LINENO +printf "gap" | cmp out - || test_failed $LINENO +"${LZIPRECOVER}" -i --strip=^8:t "${nbt_lz}" > out || test_failed $LINENO +printf "damaged" | cmp out - || test_failed $LINENO +"${LZIPRECOVER}" -i --strip=1-11 "${nbt_lz}" > out || test_failed $LINENO +cmp empty out || test_failed $LINENO +"${LZIPRECOVER}" -i --strip=^4 "${nbt_lz}" > out || test_failed $LINENO +printf "gaptrailing data" | cmp out - || test_failed $LINENO +"${LZIPRECOVER}" -i --strip=^4,8 "${nbt_lz}" > out || test_failed $LINENO +printf "gapdamagedtrailing data" | cmp out - || test_failed $LINENO +"${LZIPRECOVER}" -i --strip=^4,8:t "${nbt_lz}" > out || test_failed $LINENO +printf "gapdamaged" | cmp out - || test_failed $LINENO +"${LZIPRECOVER}" -i --strip=r^4,8:t "${nbt_lz}" > out || test_failed $LINENO +printf "gapdamaged" | cmp out - || test_failed $LINENO +"${LZIPRECOVER}" -i --strip=r^4,8 "${nbt_lz}" > out || test_failed $LINENO +printf "gapdamagedtrailing data" | cmp out - || test_failed $LINENO + +cp "${num_lz}" num.lz || framework_failure +"${LZIPRECOVER}" --remove=1-3,5,7,9 num.lz || test_failed $LINENO +cat rec4num.lz rec6num.lz rec8num.lz | cmp num.lz - || test_failed $LINENO +cp "${num_lz}" num.lz || framework_failure +"${LZIPRECOVER}" --remove=^4,6,8 num.lz || test_failed $LINENO +cat rec4num.lz rec6num.lz rec8num.lz | cmp num.lz - || test_failed $LINENO +cp "${num_lz}" num.lz || framework_failure +"${LZIPRECOVER}" --remove=r1,3,5,7-9 num.lz || test_failed $LINENO +cat rec4num.lz rec6num.lz rec8num.lz | cmp num.lz - || test_failed $LINENO +cp "${num_lz}" num.lz || framework_failure +"${LZIPRECOVER}" --remove=r^2,4,6 num.lz || test_failed $LINENO +cat rec4num.lz rec6num.lz rec8num.lz | cmp num.lz - || test_failed $LINENO +rm -f num.lz || framework_failure + +cp "${nbt_lz}" nbt.lz || framework_failure +"${LZIPRECOVER}" -i --remove=4,8:tdata nbt.lz || test_failed $LINENO +cmp "${num_lz}" nbt.lz || test_failed $LINENO +cp "${nbt_lz}" nbt.lz || framework_failure +"${LZIPRECOVER}" -i --remove=r4,8:tdata nbt.lz || test_failed $LINENO +cmp "${num_lz}" nbt.lz || test_failed $LINENO +cp "${nbt_lz}" nbt.lz || framework_failure +"${LZIPRECOVER}" --remove=damaged:tdata nbt.lz || test_failed $LINENO +cmp "${num_lz}" nbt.lz || test_failed $LINENO +rm -f rec*num.lz nbt.lz || framework_failure -cat "${in_lz}" > in.lz || framework_failure +for i in 1 2 3 4 5 6 7 8 9 10 ; do + "${LZIPRECOVER}" -i --strip=1-$i "${nbt_lz}" > out || + test_failed $LINENO $i + cp "${nbt_lz}" nbt.lz || framework_failure + "${LZIPRECOVER}" -i --remove=1-$i nbt.lz || test_failed $LINENO $i + cmp nbt.lz out || test_failed $LINENO $i +done +rm -f nbt.lz || framework_failure + +cp "${in_em}" test_3m.txt.lz || framework_failure +"${LZIPRECOVER}" --remove=empty test_3m.txt.lz || test_failed $LINENO +"${LZIPRECOVER}" -M test_3m.txt.lz | cmp "${testdir}"/test_3m.txt.lz.md5 - || + test_failed $LINENO +"${LZIPRECOVER}" --dump=2,4,7 "${in_em}" | cmp test_3m.txt.lz - || + test_failed $LINENO +"${LZIPRECOVER}" --strip=e "${in_em}" | cmp test_3m.txt.lz - || + test_failed $LINENO +"${LZIPRECOVER}" --strip=e "${in_em}" | cmp test_3m.txt.lz - || + test_failed $LINENO +"${LZIPRECOVER}" --strip=1,3,5-6,8 "${in_em}" | cmp test_3m.txt.lz - || + test_failed $LINENO +"${LZIPRECOVER}" --dump=emp "${in_em}" | "${LZIP}" -d --ignore-empty | \ + cmp empty - || test_failed $LINENO +rm -f test_3m.txt.lz || framework_failure + +printf "\ntesting --dump/remove/strip=damaged..." + +cp "${in_lz}" in.lz || framework_failure cat "${in_lz}" in > int.lz || framework_failure "${LZIPRECOVER}" --dump=damaged in.lz > out || test_failed $LINENO cmp empty out || test_failed $LINENO @@ -1043,7 +1270,7 @@ cmp in9.lz out || test_failed $LINENO cmp in9t.lz out || test_failed $LINENO "${LZIPRECOVER}" --remove=damaged in9t.lz || test_failed $LINENO cat in9.lz in | cmp in9t.lz - || test_failed $LINENO -cat in9.lz > in9t.lz || framework_failure +cp in9.lz in9t.lz || framework_failure "${LZIPRECOVER}" --remove=damaged in9t.lz || test_failed $LINENO cmp in9.lz in9t.lz || test_failed $LINENO rm -f in9t.lz || framework_failure @@ -1072,7 +1299,7 @@ cmp "${f6b1_lz}" out || test_failed $LINENO cmp empty out || test_failed $LINENO "${LZIPRECOVER}" -q --strip=damaged f6bt.lz > out || test_failed $LINENO cmp empty out || test_failed $LINENO -cat "${f6b1_lz}" > f6b.lz || framework_failure +cp "${f6b1_lz}" f6b.lz || framework_failure "${LZIPRECOVER}" -q --remove=damaged f6b.lz [ $? = 2 ] || test_failed $LINENO cmp "${f6b1_lz}" f6b.lz || test_failed $LINENO @@ -1094,7 +1321,7 @@ cat "${fox_lz}" "${fox_lz}" "${fox_lz}" "${fox_lz}" "${fox_lz}" > fox5.lz || cmp fox5.lz out || test_failed $LINENO "${LZIPRECOVER}" --strip=damaged f6bt.lz > out || test_failed $LINENO cat fox5.lz in | cmp out - || test_failed $LINENO -cat "${f6b2_lz}" > f6b.lz || framework_failure +cp "${f6b2_lz}" f6b.lz || framework_failure "${LZIPRECOVER}" --remove=damaged f6b.lz || test_failed $LINENO cmp fox5.lz f6b.lz || test_failed $LINENO "${LZIPRECOVER}" --remove=damaged f6bt.lz || test_failed $LINENO @@ -1113,7 +1340,7 @@ cat "${fox_lz}" "${fox_lz}" "${fox_lz}" > fox3.lz || framework_failure cmp fox3.lz out || test_failed $LINENO "${LZIPRECOVER}" --strip=damaged f6bt.lz > out || test_failed $LINENO cat fox3.lz in | cmp out - || test_failed $LINENO -cat "${f6b3_lz}" > f6b.lz || framework_failure +cp "${f6b3_lz}" f6b.lz || framework_failure "${LZIPRECOVER}" --remove=damaged f6b.lz || test_failed $LINENO cmp fox3.lz f6b.lz || test_failed $LINENO "${LZIPRECOVER}" --remove=damaged f6bt.lz || test_failed $LINENO @@ -1134,7 +1361,7 @@ for i in "${f6b4_lz}" "${f6b5_lz}" ; do "${LZIPRECOVER}" --strip=damaged f6bt.lz > out || test_failed $LINENO "$i" cmp fox4.lz out || test_failed $LINENO "$i" - cat "$i" > f6b.lz || framework_failure + cp "$i" f6b.lz || framework_failure "${LZIPRECOVER}" --remove=damaged f6b.lz || test_failed $LINENO "$i" cmp fox4.lz f6b.lz || test_failed $LINENO "$i" "${LZIPRECOVER}" --remove=damaged f6bt.lz || test_failed $LINENO "$i" @@ -1151,7 +1378,7 @@ cat fox5.lz out | cmp "${f6b6_lz}" - || test_failed $LINENO cmp fox5.lz out || test_failed $LINENO "${LZIPRECOVER}" --strip=damaged f6bt.lz > out || test_failed $LINENO cat fox5.lz in | cmp out - || test_failed $LINENO -cat "${f6b6_lz}" > f6b.lz || framework_failure +cp "${f6b6_lz}" f6b.lz || framework_failure "${LZIPRECOVER}" --remove=damaged f6b.lz || test_failed $LINENO cmp fox5.lz f6b.lz || test_failed $LINENO "${LZIPRECOVER}" --remove=damaged f6bt.lz || test_failed $LINENO @@ -1171,7 +1398,7 @@ for i in "${f6s1_lz}" "${f6s2_lz}" ; do "${LZIPRECOVER}" -q --strip=damaged f6bt.lz > out || test_failed $LINENO "$i" cmp empty out || test_failed $LINENO "$i" - cat "$i" > f6b.lz || framework_failure + cp "$i" f6b.lz || framework_failure "${LZIPRECOVER}" -q --remove=damaged f6b.lz [ $? = 2 ] || test_failed $LINENO "$i" cmp "$i" f6b.lz || test_failed $LINENO "$i" @@ -1193,13 +1420,13 @@ for i in "${f6s3_lz}" "${f6s4_lz}" "${f6s5_lz}" "${f6s6_lz}" ; do "${LZIPRECOVER}" --strip=damaged f6bt.lz > out || test_failed $LINENO "$i" cat "$i" in | cmp out - || test_failed $LINENO "$i" - cat "$i" > f6b.lz || framework_failure + cp "$i" f6b.lz || framework_failure "${LZIPRECOVER}" --remove=damaged f6b.lz || test_failed $LINENO "$i" cmp "$i" f6b.lz || test_failed $LINENO "$i" "${LZIPRECOVER}" --remove=damaged f6bt.lz || test_failed $LINENO "$i" cat "$i" in | cmp f6bt.lz - || test_failed $LINENO "$i" done -rm -f f6b.lz f6bt.lz || framework_failure +rm -f f6b.lz f6bt.lz empty || framework_failure cat ingin.lz "${inD}" > ingint.lz || framework_failure "${LZIPRECOVER}" --dump=damaged ingin.lz > out || test_failed $LINENO @@ -1210,7 +1437,7 @@ printf "g" | cmp out - || test_failed $LINENO cmp in2.lz out || test_failed $LINENO "${LZIPRECOVER}" --strip=damaged ingint.lz > out || test_failed $LINENO cat "${in_lz}" "${in_lz}" "${inD}" | cmp out - || test_failed $LINENO -cat ingin.lz > ingin2.lz || framework_failure +cp ingin.lz ingin2.lz || framework_failure "${LZIPRECOVER}" --remove=damaged ingin2.lz || test_failed $LINENO cmp in2.lz ingin2.lz || test_failed $LINENO "${LZIPRECOVER}" --remove=damaged ingint.lz || test_failed $LINENO @@ -1253,7 +1480,7 @@ cat fox5.lz "${in_lz}" | cmp out - || test_failed $LINENO test_failed $LINENO cat fox5.lz "${in_lz}" | cmp out - || test_failed $LINENO # -cat "${f6b2_lz}" > f6b.lz || framework_failure +cp "${f6b2_lz}" f6b.lz || framework_failure "${LZIPRECOVER}" -q --remove=damaged f6b.lz bad2t.lz f6bt.lz [ $? = 2 ] || test_failed $LINENO cat "${bad2_lz}" in | cmp bad2t.lz - || test_failed $LINENO @@ -1262,7 +1489,7 @@ cat fox5.lz in | cmp f6bt.lz - || test_failed $LINENO cat "${bad2_lz}" in > bad2t.lz || framework_failure cat "${fox6_lz}" "${inD}" > fox6t.lz || framework_failure cat "${f6b1_lz}" in > f6abt.lz || framework_failure -cat "${f6b2_lz}" > f6b.lz || framework_failure +cp "${f6b2_lz}" f6b.lz || framework_failure cat "${f6b2_lz}" in > f6bt.lz || framework_failure "${LZIPRECOVER}" -q --remove=d:t fox6t.lz f6abt.lz f6b.lz bad2t.lz f6bt.lz [ $? = 2 ] || test_failed $LINENO @@ -1271,7 +1498,8 @@ cat "${f6b1_lz}" in | cmp f6abt.lz - || test_failed $LINENO cmp "${fox6_lz}" fox6t.lz || test_failed $LINENO cmp fox5.lz f6b.lz || test_failed $LINENO cmp fox5.lz f6bt.lz || test_failed $LINENO -rm -f fox6t.lz f6b.lz f6bt.lz bad2t.lz fox5.lz out2 out4 || framework_failure +rm -f fox6t.lz f6b.lz f6bt.lz f6abt.lz bad2t.lz fox5.lz out2 out4 || + framework_failure printf "\ntesting trailing data..." @@ -1338,196 +1566,6 @@ for i in "${f6s3_lz}" "${f6s4_lz}" "${f6s5_lz}" "${f6s6_lz}" ; do rm -f out tdata f6t.lz || framework_failure done -printf "\ntesting --dump/remove/strip..." - -"${LZIPRECOVER}" --dump=1 "${in_lz}" -o a/b/c/out.lz || test_failed $LINENO -cmp "${in_lz}" a/b/c/out.lz || test_failed $LINENO -rm -rf a || framework_failure - -"${LZIPRECOVER}" -s "${num_lz}" -o num.lz || test_failed $LINENO -[ -e rec9num.lz ] || test_failed $LINENO -[ ! -e rec10num.lz ] || test_failed $LINENO -cat rec*num.lz | cmp "${num_lz}" - || test_failed $LINENO -for i in 1 2 3 4 5 6 7 8 9 ; do - "${LZIPRECOVER}" --dump=$i "${num_lz}" | cmp rec${i}num.lz - || - test_failed $LINENO $i - "${LZIPRECOVER}" --strip=^$i "${num_lz}" | cmp rec${i}num.lz - || - test_failed $LINENO $i - cat "${num_lz}" > num.lz || framework_failure - "${LZIPRECOVER}" --remove=^$i num.lz || test_failed $LINENO $i - cmp rec${i}num.lz num.lz || test_failed $LINENO $i -done -"${LZIPRECOVER}" -q --dump=1 in "${num_lz}" > out -[ $? = 2 ] || test_failed $LINENO -cmp rec1num.lz out || test_failed $LINENO -"${LZIPRECOVER}" -q --strip=^1 in "${num_lz}" > out -[ $? = 2 ] || test_failed $LINENO -cmp rec1num.lz out || test_failed $LINENO - -"${LZIPRECOVER}" --dump=r1 "${num_lz}" | cmp rec9num.lz - || - test_failed $LINENO -"${LZIPRECOVER}" --dump=d:r3 "${num_lz}" | cmp rec7num.lz - || - test_failed $LINENO -"${LZIPRECOVER}" --dump=r5:d "${num_lz}" | cmp rec5num.lz - || - test_failed $LINENO -"${LZIPRECOVER}" --dump=t:r9 "${num_lz}" | cmp rec1num.lz - || - test_failed $LINENO -"${LZIPRECOVER}" --strip=r^1:t "${num_lz}" | cmp rec9num.lz - || - test_failed $LINENO -"${LZIPRECOVER}" --strip=d:r^3:t "${num_lz}" | cmp rec7num.lz - || - test_failed $LINENO -"${LZIPRECOVER}" --strip=r^5:d:t "${num_lz}" | cmp rec5num.lz - || - test_failed $LINENO -"${LZIPRECOVER}" --strip=d:t:r^9 "${num_lz}" | cmp rec1num.lz - || - test_failed $LINENO - -"${LZIPRECOVER}" --dump=1,5 "${num_lz}" > out || test_failed $LINENO -cat rec1num.lz rec5num.lz | cmp out - || test_failed $LINENO -"${LZIPRECOVER}" --dump=3,6 "${num_lz}" > out || test_failed $LINENO -cat rec3num.lz rec6num.lz | cmp out - || test_failed $LINENO -"${LZIPRECOVER}" --dump=2-4 "${num_lz}" > out || test_failed $LINENO -cat rec2num.lz rec3num.lz rec4num.lz | cmp out - || test_failed $LINENO -"${LZIPRECOVER}" --dump=4,6,8 "${num_lz}" > out || test_failed $LINENO -cat rec4num.lz rec6num.lz rec8num.lz | cmp out - || test_failed $LINENO -"${LZIPRECOVER}" --strip=^1,5 "${num_lz}" > out || test_failed $LINENO -cat rec1num.lz rec5num.lz | cmp out - || test_failed $LINENO -"${LZIPRECOVER}" --strip=^3,6 "${num_lz}" > out || test_failed $LINENO -cat rec3num.lz rec6num.lz | cmp out - || test_failed $LINENO -"${LZIPRECOVER}" --strip=^2-4 "${num_lz}" > out || test_failed $LINENO -cat rec2num.lz rec3num.lz rec4num.lz | cmp out - || test_failed $LINENO -"${LZIPRECOVER}" --strip=^4,6,8 "${num_lz}" > out || test_failed $LINENO -cat rec4num.lz rec6num.lz rec8num.lz | cmp out - || test_failed $LINENO - -# create a subset tarlz archive -"${LZIPRECOVER}" --dump=1-2:r1:t "${num_lz}" > out || test_failed $LINENO -cat rec1num.lz rec2num.lz rec9num.lz | cmp out - || test_failed $LINENO -"${LZIPRECOVER}" --dump=4-5:r1:t "${num_lz}" > out || test_failed $LINENO -cat rec4num.lz rec5num.lz rec9num.lz | cmp out - || test_failed $LINENO -"${LZIPRECOVER}" --dump=7-8:r1:t "${num_lz}" > out || test_failed $LINENO -cat rec7num.lz rec8num.lz rec9num.lz | cmp out - || test_failed $LINENO - -"${LZIPRECOVER}" --dump=1-9 "${num_lz}" | cmp "${num_lz}" - || - test_failed $LINENO -"${LZIPRECOVER}" --dump=r1-9 "${num_lz}" | cmp "${num_lz}" - || - test_failed $LINENO -"${LZIPRECOVER}" --dump=1-1000 "${num_lz}" | cmp "${num_lz}" - || - test_failed $LINENO -"${LZIPRECOVER}" --dump=r1-1000 "${num_lz}" | cmp "${num_lz}" - || - test_failed $LINENO -"${LZIPRECOVER}" --dump=1-4:r1-4:5 "${num_lz}" | cmp "${num_lz}" - || - test_failed $LINENO -"${LZIPRECOVER}" --dump=^10 "${num_lz}" | cmp "${num_lz}" - || - test_failed $LINENO -"${LZIPRECOVER}" --strip=^1-9 "${num_lz}" | cmp "${num_lz}" - || - test_failed $LINENO -"${LZIPRECOVER}" --strip=r^1-9 "${num_lz}" | cmp "${num_lz}" - || - test_failed $LINENO -"${LZIPRECOVER}" --strip=^1-1000 "${num_lz}" | cmp "${num_lz}" - || - test_failed $LINENO -"${LZIPRECOVER}" --strip=r^1-1000 "${num_lz}" | cmp "${num_lz}" - || - test_failed $LINENO -"${LZIPRECOVER}" --strip=^1-4:r^1-4:^5 "${num_lz}" | cmp "${num_lz}" - || - test_failed $LINENO -"${LZIPRECOVER}" --strip=10 "${num_lz}" | cmp "${num_lz}" - || - test_failed $LINENO - -"${LZIPRECOVER}" -i --dump=r1 "${nbt_lz}" | cmp rec9num.lz - || - test_failed $LINENO -"${LZIPRECOVER}" -i --dump=r3 "${nbt_lz}" | cmp rec7num.lz - || - test_failed $LINENO -"${LZIPRECOVER}" -i --dump=r7 "${nbt_lz}" | cmp rec4num.lz - || - test_failed $LINENO -"${LZIPRECOVER}" -i --strip=r^1:t "${nbt_lz}" | cmp rec9num.lz - || - test_failed $LINENO -"${LZIPRECOVER}" -i --strip=r^3:t "${nbt_lz}" | cmp rec7num.lz - || - test_failed $LINENO -"${LZIPRECOVER}" -i --strip=r^7:t "${nbt_lz}" | cmp rec4num.lz - || - test_failed $LINENO - -"${LZIPRECOVER}" -i --dump=4 -f -o out "${nbt_lz}" || test_failed $LINENO -printf "gap" | cmp out - || test_failed $LINENO -"${LZIPRECOVER}" -i --dump=8 "${nbt_lz}" > out || test_failed $LINENO -printf "damaged" | cmp out - || test_failed $LINENO -"${LZIPRECOVER}" -i --dump=tdata "${nbt_lz}" > out || test_failed $LINENO -printf "trailing data" | cmp out - || test_failed $LINENO -"${LZIPRECOVER}" -i --dump=4:t "${nbt_lz}" > out || test_failed $LINENO -printf "gaptrailing data" | cmp out - || test_failed $LINENO -"${LZIPRECOVER}" -i --dump=4,8:t "${nbt_lz}" > out || test_failed $LINENO -printf "gapdamagedtrailing data" | cmp out - || test_failed $LINENO -"${LZIPRECOVER}" -i --dump=4,8 "${nbt_lz}" > out || test_failed $LINENO -printf "gapdamaged" | cmp out - || test_failed $LINENO -"${LZIPRECOVER}" -i --dump=damaged "${nbt_lz}" > out || test_failed $LINENO -printf "gapdamaged" | cmp out - || test_failed $LINENO -"${LZIPRECOVER}" -i --dump=d:t "${nbt_lz}" > out || test_failed $LINENO -printf "gapdamagedtrailing data" | cmp out - || test_failed $LINENO -"${LZIPRECOVER}" -i --strip=^4:t -f -o out "${nbt_lz}" || test_failed $LINENO -printf "gap" | cmp out - || test_failed $LINENO -"${LZIPRECOVER}" -i --strip=^8:t "${nbt_lz}" > out || test_failed $LINENO -printf "damaged" | cmp out - || test_failed $LINENO -"${LZIPRECOVER}" -i --strip=1-11 "${nbt_lz}" > out || test_failed $LINENO -cmp empty out || test_failed $LINENO -"${LZIPRECOVER}" -i --strip=^4 "${nbt_lz}" > out || test_failed $LINENO -printf "gaptrailing data" | cmp out - || test_failed $LINENO -"${LZIPRECOVER}" -i --strip=^4,8 "${nbt_lz}" > out || test_failed $LINENO -printf "gapdamagedtrailing data" | cmp out - || test_failed $LINENO -"${LZIPRECOVER}" -i --strip=^4,8:t "${nbt_lz}" > out || test_failed $LINENO -printf "gapdamaged" | cmp out - || test_failed $LINENO -"${LZIPRECOVER}" -i --strip=r^4,8:t "${nbt_lz}" > out || test_failed $LINENO -printf "gapdamaged" | cmp out - || test_failed $LINENO -"${LZIPRECOVER}" -i --strip=r^4,8 "${nbt_lz}" > out || test_failed $LINENO -printf "gapdamagedtrailing data" | cmp out - || test_failed $LINENO - -cat "${num_lz}" > num.lz || framework_failure -"${LZIPRECOVER}" --remove=1-3,5,7,9 num.lz || test_failed $LINENO -cat rec4num.lz rec6num.lz rec8num.lz | cmp num.lz - || test_failed $LINENO -cat "${num_lz}" > num.lz || framework_failure -"${LZIPRECOVER}" --remove=^4,6,8 num.lz || test_failed $LINENO -cat rec4num.lz rec6num.lz rec8num.lz | cmp num.lz - || test_failed $LINENO -cat "${num_lz}" > num.lz || framework_failure -"${LZIPRECOVER}" --remove=r1,3,5,7-9 num.lz || test_failed $LINENO -cat rec4num.lz rec6num.lz rec8num.lz | cmp num.lz - || test_failed $LINENO -cat "${num_lz}" > num.lz || framework_failure -"${LZIPRECOVER}" --remove=r^2,4,6 num.lz || test_failed $LINENO -cat rec4num.lz rec6num.lz rec8num.lz | cmp num.lz - || test_failed $LINENO - -cat "${nbt_lz}" > nbt.lz || framework_failure -"${LZIPRECOVER}" -i --remove=4,8:tdata nbt.lz || test_failed $LINENO -cmp "${num_lz}" nbt.lz || test_failed $LINENO -cat "${nbt_lz}" > nbt.lz || framework_failure -"${LZIPRECOVER}" -i --remove=r4,8:tdata nbt.lz || test_failed $LINENO -cmp "${num_lz}" nbt.lz || test_failed $LINENO -cat "${nbt_lz}" > nbt.lz || framework_failure -"${LZIPRECOVER}" --remove=damaged:tdata nbt.lz || test_failed $LINENO -cmp "${num_lz}" nbt.lz || test_failed $LINENO -rm -f rec*num.lz nbt.lz || framework_failure - -for i in 1 2 3 4 5 6 7 8 9 10 ; do - "${LZIPRECOVER}" -i --strip=1-$i "${nbt_lz}" > out || - test_failed $LINENO $i - cat "${nbt_lz}" > nbt.lz || framework_failure - "${LZIPRECOVER}" -i --remove=1-$i nbt.lz || test_failed $LINENO $i - cmp nbt.lz out || test_failed $LINENO $i -done -rm -f nbt.lz || framework_failure - -cat "${in_em}" > test_3m.txt.lz || framework_failure -"${LZIPRECOVER}" --remove=empty test_3m.txt.lz || test_failed $LINENO -"${LZIPRECOVER}" -M test_3m.txt.lz | cmp "${testdir}"/test_3m.txt.lz.md5 - || - test_failed $LINENO -"${LZIPRECOVER}" --dump=2,4,7 "${in_em}" | cmp test_3m.txt.lz - || - test_failed $LINENO -"${LZIPRECOVER}" --strip=e "${in_em}" | cmp test_3m.txt.lz - || - test_failed $LINENO -"${LZIPRECOVER}" --empty-error --strip=e "${in_em}" | cmp test_3m.txt.lz - || - test_failed $LINENO -"${LZIPRECOVER}" --strip=1,3,5-6,8 "${in_em}" | cmp test_3m.txt.lz - || - test_failed $LINENO -"${LZIPRECOVER}" -q --strip=1,3,5-6,8 --empty-error "${in_em}" > out -[ $? = 2 ] || test_failed $LINENO -"${LZIPRECOVER}" --dump=emp "${in_em}" | "${LZIP}" -d | cmp empty - || - test_failed $LINENO -rm -f test_3m.txt.lz empty out || framework_failure - echo if [ ${fail} = 0 ] ; then echo "tests completed successfully." diff --git a/testsuite/fox6_mark.lz b/testsuite/fox6_mark.lz Binary files differdeleted file mode 100644 index 32b2ac0..0000000 --- a/testsuite/fox6_mark.lz +++ /dev/null diff --git a/testsuite/fox6_nz.lz b/testsuite/fox6_nz.lz Binary files differnew file mode 100644 index 0000000..cd5b481 --- /dev/null +++ b/testsuite/fox6_nz.lz diff --git a/testsuite/test.txt b/testsuite/test.txt index 9196a3a..423f0c0 100644 --- a/testsuite/test.txt +++ b/testsuite/test.txt @@ -1,8 +1,7 @@ GNU GENERAL PUBLIC LICENSE Version 2, June 1991 - Copyright (C) 1989, 1991 Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Copyright (C) 1989, 1991 Free Software Foundation, Inc. <http://fsf.org/> Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. @@ -339,8 +338,7 @@ Public License instead of this License. GNU GENERAL PUBLIC LICENSE
Version 2, June 1991
- Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
- 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc. <http://fsf.org/>
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
diff --git a/testsuite/test.txt.lz b/testsuite/test.txt.lz Binary files differindex 22cea6e..5dc169f 100644 --- a/testsuite/test.txt.lz +++ b/testsuite/test.txt.lz diff --git a/testsuite/test.txt.lz.fec b/testsuite/test.txt.lz.fec Binary files differnew file mode 100644 index 0000000..265ec6b --- /dev/null +++ b/testsuite/test.txt.lz.fec diff --git a/testsuite/test.txt.lz.fec16 b/testsuite/test.txt.lz.fec16 Binary files differnew file mode 100644 index 0000000..534d233 --- /dev/null +++ b/testsuite/test.txt.lz.fec16 diff --git a/testsuite/test.txt.lzma b/testsuite/test.txt.lzma Binary files differindex 53e54ea..091c023 100644 --- a/testsuite/test.txt.lzma +++ b/testsuite/test.txt.lzma diff --git a/testsuite/test21723.txt b/testsuite/test21636.txt index 7194547..7194547 100644 --- a/testsuite/test21723.txt +++ b/testsuite/test21636.txt diff --git a/testsuite/test_3m.txt.lz.md5 b/testsuite/test_3m.txt.lz.md5 index 5bec6bc..e9f47d7 100644 --- a/testsuite/test_3m.txt.lz.md5 +++ b/testsuite/test_3m.txt.lz.md5 @@ -1 +1 @@ -6a6bb58464ec8567eab17015064d0c5b test_3m.txt.lz +aa8ca65001d627f89e7494fa829e710f test_3m.txt.lz diff --git a/testsuite/test_bad1.lz b/testsuite/test_bad1.lz Binary files differindex 2129c90..5300a0c 100644 --- a/testsuite/test_bad1.lz +++ b/testsuite/test_bad1.lz diff --git a/testsuite/test_bad2.lz b/testsuite/test_bad2.lz Binary files differindex e013c34..8691377 100644 --- a/testsuite/test_bad2.lz +++ b/testsuite/test_bad2.lz diff --git a/testsuite/test_bad3.lz b/testsuite/test_bad3.lz Binary files differindex 0ae9e7d..5f82877 100644 --- a/testsuite/test_bad3.lz +++ b/testsuite/test_bad3.lz diff --git a/testsuite/test_bad4.lz b/testsuite/test_bad4.lz Binary files differindex ddb0d6b..c816609 100644 --- a/testsuite/test_bad4.lz +++ b/testsuite/test_bad4.lz diff --git a/testsuite/test_bad5.lz b/testsuite/test_bad5.lz Binary files differindex 6fab91c..7b002b8 100644 --- a/testsuite/test_bad5.lz +++ b/testsuite/test_bad5.lz diff --git a/testsuite/test_bad6.lz b/testsuite/test_bad6.lz Binary files differindex cfea88c..554b3f8 100644 --- a/testsuite/test_bad6.lz +++ b/testsuite/test_bad6.lz diff --git a/testsuite/test_bad6.txt b/testsuite/test_bad6.txt index b47462e..cd4dc0a 100644 --- a/testsuite/test_bad6.txt +++ b/testsuite/test_bad6.txt @@ -1,6 +1,3 @@ -) You can apply it to -your programs, too. - When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for @@ -23,4 +20,10 @@ rights. (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. - Also, for each author's protection and ours, we want to
\ No newline at end of file + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + diff --git a/testsuite/test_bad7.lz b/testsuite/test_bad7.lz Binary files differindex 77f2b85..4275cfc 100644 --- a/testsuite/test_bad7.lz +++ b/testsuite/test_bad7.lz diff --git a/testsuite/test_bad7.txt b/testsuite/test_bad7.txt index be54c7c..ba4ade5 100644 --- a/testsuite/test_bad7.txt +++ b/testsuite/test_bad7.txt @@ -1,13 +1,3 @@ -, so
-that any problems introduced by others will not reflect on the original
-authors' reputations.
-
- Finally, any free program is threatened constantly by software
-patents. We wish to avoid the danger that redistributors of a free
-program will individually obtain patent licenses, in effect making the
-program proprietary. To prevent this, we have made it clear that any
-patent must be licensed for everyone's free use or not licensed at all.
-
The precise terms and conditions for copying, distribution and
modification follow.
@@ -212,4 +202,22 @@ of promoting the sharing and reuse of software generally. NO WARRANTY
- 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
\ No newline at end of file + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
diff --git a/testsuite/test_bad8.lz b/testsuite/test_bad8.lz Binary files differindex fca701b..6c8b878 100644 --- a/testsuite/test_bad8.lz +++ b/testsuite/test_bad8.lz diff --git a/testsuite/test_bad9.lz b/testsuite/test_bad9.lz Binary files differindex becb0ec..3851682 100644 --- a/testsuite/test_bad9.lz +++ b/testsuite/test_bad9.lz diff --git a/testsuite/test_bad9.txt b/testsuite/test_bad9.txt index b72a626..0bbf61f 100644 --- a/testsuite/test_bad9.txt +++ b/testsuite/test_bad9.txt @@ -1,5 +1,13 @@ -General -Public License instead of this License. GNU GENERAL PUBLIC LICENSE
Version 2, June 1991
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc. <http://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
diff --git a/testsuite/test_em.txt.lz b/testsuite/test_em.txt.lz Binary files differindex 7e96250..9e093be 100644 --- a/testsuite/test_em.txt.lz +++ b/testsuite/test_em.txt.lz diff --git a/unzcrash.cc b/unzcrash.cc index 107189f..b0c4871 100644 --- a/unzcrash.cc +++ b/unzcrash.cc @@ -26,7 +26,7 @@ #include <algorithm> #include <cerrno> -#include <climits> // SSIZE_MAX +#include <climits> // CHAR_BIT, SSIZE_MAX #include <csignal> #include <cstdio> #include <cstdlib> @@ -54,8 +54,6 @@ namespace { const char * const program_name = "unzcrash"; const char * invocation_name = program_name; // default value -int verbosity = 0; - void show_help() { @@ -142,28 +140,29 @@ uint8_t * read_file( const char * const filename, long * const file_sizep ) long buffer_size = 65536; uint8_t * buffer = (uint8_t *)std::malloc( buffer_size ); - if( !buffer ) { show_error( mem_msg ); return 0; } + if( !buffer ) { show_file_error( filename, mem_msg ); return 0; } long file_size = std::fread( buffer, 1, buffer_size, f ); while( file_size >= buffer_size || ( !std::ferror( f ) && !std::feof( f ) ) ) { if( file_size >= buffer_size ) // may be false because of EINTR { if( buffer_size >= LONG_MAX ) - { show_file_error( filename, "Input file is larger than LONG_MAX." ); + { show_file_error( filename, large_file_msg ); std::free( buffer ); return 0; } buffer_size = ( buffer_size <= LONG_MAX / 2 ) ? 2 * buffer_size : LONG_MAX; uint8_t * const tmp = (uint8_t *)std::realloc( buffer, buffer_size ); - if( !tmp ) { show_error( mem_msg ); std::free( buffer ); return 0; } + if( !tmp ) + { show_file_error( filename, mem_msg ); std::free( buffer ); return 0; } buffer = tmp; } file_size += std::fread( buffer + file_size, 1, buffer_size - file_size, f ); } if( std::ferror( f ) || !std::feof( f ) ) - { - show_file_error( filename, "Error reading input file", errno ); - std::free( buffer ); return 0; - } - std::fclose( f ); + { show_file_error( filename, read_error_msg, errno ); + std::free( buffer ); return 0; } + if( std::fclose( f ) != 0 ) + { show_file_error( filename, "Error closing input file", errno ); + std::free( buffer ); return 0; } *file_sizep = file_size; return buffer; } @@ -173,13 +172,13 @@ class Bitset8 // 8 value bitset (1 to 8) { bool data[8]; static bool valid_digit( const unsigned char ch ) - { return ( ch >= '1' && ch <= '8' ); } + { return ch >= '1' && ch <= '8'; } public: Bitset8() { for( int i = 0; i < 8; ++i ) data[i] = true; } bool includes( const int i ) const - { return ( i >= 1 && i <= 8 && data[i-1] ); } + { return i >= 1 && i <= 8 && data[i-1]; } // Recognized formats: 1 1,2,3 1-4 1,3-5,8 1-3,5-8 void parse_bs( const char * const arg, const char * const option_name ) @@ -383,7 +382,7 @@ int main( const int argc, const char * const argv[] ) { 'v', "verbose", Arg_parser::no }, { 'V', "version", Arg_parser::no }, { 'z', "zcmp", Arg_parser::yes }, - { 0 , 0, Arg_parser::no } }; + { 0, 0, Arg_parser::no } }; const Arg_parser parser( argc, argv, options ); if( parser.error().size() ) // bad option @@ -398,15 +397,15 @@ int main( const int argc, const char * const argv[] ) const char * const arg = parser.argument( argind ).c_str(); switch( code ) { - case 'h': show_help(); return 0; case 'b': bits.parse_bs( arg, pn ); program_mode = m_byte; break; case 'B': if( arg[0] ) parse_block( arg, pn, block_size, block_value ); program_mode = m_block; break; case 'd': delta = getnum( arg, pn, block_size, 1, INT_MAX ); break; case 'e': bad_byte.parse_bb( arg, pn ); break; + case 'h': show_help(); return 0; case 'n': check = false; break; case 'p': pos = getnum( arg, pn, block_size, -LONG_MAX, LONG_MAX ); break; - case 'q': verbosity = -1; break; + case 'q': cl_verbosity = verbosity = -1; break; case 's': max_size = getnum( arg, pn, block_size, -LONG_MAX, LONG_MAX ); break; case 't': program_mode = m_truncate; break; case 'v': if( verbosity < 4 ) ++verbosity; break; @@ -419,7 +418,8 @@ int main( const int argc, const char * const argv[] ) if( parser.arguments() - argind != 2 ) { if( verbosity >= 0 ) - std::fprintf( stderr, "Usage: %s 'lzip -t' file.lz\n", invocation_name ); + std::fprintf( stderr, "Usage: %s [options] 'lzip -t' file.lz\n", + invocation_name ); return 1; } @@ -532,7 +532,7 @@ int main( const int argc, const char * const argv[] ) } else if( program_mode == m_block ) { - uint8_t * block = (uint8_t *)std::malloc( block_size ); + uint8_t * const block = (uint8_t *)std::malloc( block_size ); if( !block ) { show_error( mem_msg ); return 1; } for( long i = pos; i < end; i += std::min( delta, end - i ) ) { @@ -611,17 +611,18 @@ int main( const int argc, const char * const argv[] ) if( verbosity >= 0 ) { - std::fprintf( stderr, "\n%9ld %ss tested\n%9ld total decompressions" - "\n%9ld decompressions returned with zero status", - positions, mode_str[program_mode], decompressions, successes ); + std::fprintf( stderr, "\n%11s %ss tested\n%11s total decompressions" + "\n%11s decompressions returned with zero status", + format_num3( positions ), mode_str[program_mode], + format_num3( decompressions ), format_num3( successes ) ); if( successes > 0 ) { if( zcmp_command.empty() ) - std::fputs( "\n comparisons disabled\n", stderr ); + std::fputs( "\n comparisons disabled\n", stderr ); else if( failed_comparisons > 0 ) - std::fprintf( stderr, ", of which\n%9ld comparisons failed\n", - failed_comparisons ); - else std::fputs( "\n all comparisons passed\n", stderr ); + std::fprintf( stderr, ", of which\n%11s comparisons failed\n", + format_num3( failed_comparisons ) ); + else std::fputs( "\n all comparisons passed\n", stderr ); } else std::fputc( '\n', stderr ); } |