From d7c278fca708bc2c4badaeac041ecb136a4ef955 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 16 Jun 2023 13:13:24 +0200 Subject: Adding upstream version 1.24~pre1. Signed-off-by: Daniel Baumann --- ChangeLog | 19 +- INSTALL | 16 +- Makefile.in | 41 ++-- NEWS | 34 ++- README | 10 +- alone_to_lz.cc | 31 +-- arg_parser.cc | 2 +- arg_parser.h | 2 +- byte_repair.cc | 519 +++++++++++++++++++++++++++++++++++++++++++ common.h | 9 +- configure | 21 +- decoder.cc | 39 ++-- decoder.h | 19 +- doc/lziprecover.1 | 43 ++-- doc/lziprecover.info | 354 ++++++++++++++++------------- doc/lziprecover.texi | 379 ++++++++++++++++--------------- dump_remove.cc | 136 +++++++++--- list.cc | 9 +- lunzcrash.cc | 66 +++--- lzip.h | 173 ++++++++------- lzip_index.cc | 92 ++++---- lzip_index.h | 28 ++- main.cc | 321 +++++++++++++------------- main_common.cc | 83 ++++--- md5.cc | 12 +- md5.h | 20 +- merge.cc | 103 +++++---- mtester.cc | 20 +- mtester.h | 24 +- nrep_stats.cc | 22 +- range_dec.cc | 38 ++-- repair.cc | 517 ------------------------------------------ reproduce.cc | 206 +++++++++-------- split.cc | 25 ++- testsuite/check.sh | 97 ++++++-- testsuite/fox6_mark.lz | Bin 0 -> 480 bytes testsuite/test_3m.txt.lz.md5 | 1 + unzcrash.cc | 116 +++++----- 38 files changed, 1964 insertions(+), 1683 deletions(-) create mode 100644 byte_repair.cc delete mode 100644 repair.cc create mode 100644 testsuite/fox6_mark.lz create mode 100644 testsuite/test_3m.txt.lz.md5 diff --git a/ChangeLog b/ChangeLog index 363ce18..e83d855 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,16 @@ +2023-06-14 Antonio Diaz Diaz + + * Version 1.24-pre1 released. + * New options '--empty-error', '--marking-error', '--clear-marking'. + * dump_remove.cc, main.cc: Accept 'empty' in --dump, --remove, --strip. + * main.cc: Rename '--repair' to '--byte-repair'. + Rename '--debug-repair' to '--debug-byte-repair'. + (show_option_error): New function showing argument and option name. + * lzip.h: Rename verify_* to check_*. + * unzcrash.cc: Rename '--no-verify' to '--no-check'. + * repair.cc: Rename to byte_repair.cc. + * testsuite: New test files test_3m.txt.lz.md5, fox6_mark.lz. + 2022-01-21 Antonio Diaz Diaz * Version 1.23 released. @@ -54,7 +67,7 @@ some kinds of corrupt trailers and some fake trailers embedded in trailing data. * split.cc: Use Lzip_index to split members, gaps and trailing data. - * split.cc: Verify last member before writing anything. + * split.cc: Check last member before writing anything. * list.cc (list_files): With '-i', ignore format errors, show gaps. * range_dec.cc: With '-i', ignore a truncated last member. * main.cc (main): Check return value of close( infd ). @@ -64,7 +77,7 @@ * lziprecover.texi: New chapter 'Tarlz'. * configure: Accept appending to CXXFLAGS; 'CXXFLAGS+=OPTIONS'. * INSTALL: Document use of CXXFLAGS+='-D __USE_MINGW_ANSI_STDIO'. - * New test files fox.lz, fox6_sc[1-6].lz. + * testsuite: New test files fox.lz, fox6_sc[1-6].lz. 2018-02-12 Antonio Diaz Diaz @@ -226,7 +239,7 @@ * unzcrash.cc: Test all 1-byte errors. -Copyright (C) 2009-2022 Antonio Diaz Diaz. +Copyright (C) 2009-2023 Antonio Diaz Diaz. This file is a collection of facts, and thus it is not copyrightable, but just in case, you have unlimited permission to copy, distribute, and diff --git a/INSTALL b/INSTALL index 9b86987..b40e06c 100644 --- a/INSTALL +++ b/INSTALL @@ -1,6 +1,6 @@ Requirements ------------ -You will need a C++98 compiler with suport for 'long long'. +You will need a C++98 compiler with support for 'long long'. (gcc 3.3.6 or newer is recommended). I use gcc 6.1.0 and 3.3.6, but the code should compile with any standards compliant compiler. @@ -25,8 +25,8 @@ Procedure or lzip -cd lziprecover[version].tar.lz | tar -xf - -This creates the directory ./lziprecover[version] containing the source from -the main archive. +This creates the directory ./lziprecover[version] containing the source code +extracted from the archive. 2. Change to lziprecover directory and run configure. (Try 'configure --help' for usage instructions). @@ -44,6 +44,10 @@ the main archive. 4. Optionally, type 'make check' to run the tests that come with lziprecover. + If you have clzip installed (instead of lzip), use: + + make LZIP_NAME=clzip check + 5. Type 'make install' to install the program and any data files and documentation. @@ -69,15 +73,15 @@ object files and executables to go and run the 'configure' script. 'configure' automatically checks for the source code in '.', in '..', and in the directory that 'configure' is in. -'configure' recognizes the option '--srcdir=DIR' to control where to -look for the sources. Usually 'configure' can determine that directory +'configure' recognizes the option '--srcdir=DIR' to control where to look +for the source code. Usually 'configure' can determine that directory automatically. After running 'configure', you can run 'make' and 'make install' as explained above. -Copyright (C) 2009-2022 Antonio Diaz Diaz. +Copyright (C) 2009-2023 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute, and modify it. diff --git a/Makefile.in b/Makefile.in index a8bd012..f8a2e48 100644 --- a/Makefile.in +++ b/Makefile.in @@ -7,9 +7,9 @@ INSTALL_DIR = $(INSTALL) -d -m 755 SHELL = /bin/sh CAN_RUN_INSTALLINFO = $(SHELL) -c "install-info --version" > /dev/null 2>&1 -objs = arg_parser.o alone_to_lz.o lzip_index.o list.o dump_remove.o \ - lunzcrash.o md5.o merge.o mtester.o nrep_stats.o range_dec.o \ - repair.o reproduce.o split.o decoder.o main.o +objs = arg_parser.o alone_to_lz.o lzip_index.o list.o byte_repair.o \ + dump_remove.o lunzcrash.o md5.o merge.o mtester.o nrep_stats.o \ + range_dec.o reproduce.o split.o decoder.o main.o unzobjs = arg_parser.o unzcrash.o @@ -38,24 +38,23 @@ unzcrash.o : unzcrash.cc $(CXX) $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $< $(objs) : Makefile -lzip.h : common.h -alone_to_lz.o : lzip.h mtester.h +alone_to_lz.o : lzip.h common.h mtester.h arg_parser.o : arg_parser.h -decoder.o : lzip.h decoder.h -dump_remove.o : lzip.h lzip_index.h -list.o : lzip.h lzip_index.h -lunzcrash.o : lzip.h md5.h mtester.h lzip_index.h -lzip_index.o : lzip.h lzip_index.h -main.o : arg_parser.h lzip.h decoder.h main_common.cc +byte_repair.o : lzip.h common.h mtester.h lzip_index.h +decoder.o : lzip.h common.h decoder.h +dump_remove.o : lzip.h common.h lzip_index.h +list.o : lzip.h common.h lzip_index.h +lunzcrash.o : lzip.h common.h md5.h mtester.h lzip_index.h +lzip_index.o : lzip.h common.h lzip_index.h +main.o : arg_parser.h lzip.h common.h decoder.h main_common.cc md5.o : md5.h -merge.o : lzip.h decoder.h lzip_index.h -mtester.o : lzip.h md5.h mtester.h -nrep_stats.o : lzip.h lzip_index.h -range_dec.o : lzip.h decoder.h lzip_index.h -repair.o : lzip.h mtester.h lzip_index.h -reproduce.o : lzip.h md5.h mtester.h lzip_index.h -split.o : lzip.h lzip_index.h -unzcrash.o : Makefile arg_parser.h main_common.cc +merge.o : lzip.h common.h decoder.h lzip_index.h +mtester.o : lzip.h common.h md5.h mtester.h +nrep_stats.o : lzip.h common.h lzip_index.h +range_dec.o : lzip.h common.h decoder.h lzip_index.h +reproduce.o : lzip.h common.h md5.h mtester.h lzip_index.h +split.o : lzip.h common.h lzip_index.h +unzcrash.o : Makefile arg_parser.h common.h main_common.cc doc : info man @@ -63,7 +62,7 @@ doc : info man info : $(VPATH)/doc/$(pkgname).info $(VPATH)/doc/$(pkgname).info : $(VPATH)/doc/$(pkgname).texi - cd $(VPATH)/doc && makeinfo $(pkgname).texi + cd $(VPATH)/doc && $(MAKEINFO) $(pkgname).texi man : $(VPATH)/doc/$(progname).1 @@ -146,11 +145,13 @@ dist : doc $(DISTNAME)/testsuite/test.txt \ $(DISTNAME)/testsuite/test21723.txt \ $(DISTNAME)/testsuite/test_bad[6-9].txt \ + $(DISTNAME)/testsuite/test_3m.txt.lz.md5 \ $(DISTNAME)/testsuite/fox.lz \ $(DISTNAME)/testsuite/fox_*.lz \ $(DISTNAME)/testsuite/fox6.lz \ $(DISTNAME)/testsuite/fox6_sc[1-6].lz \ $(DISTNAME)/testsuite/fox6_bad[1-6].lz \ + $(DISTNAME)/testsuite/fox6_mark.lz \ $(DISTNAME)/testsuite/numbers.lz \ $(DISTNAME)/testsuite/numbersbt.lz \ $(DISTNAME)/testsuite/test.txt.lz \ diff --git a/NEWS b/NEWS index eaf9899..a39290f 100644 --- a/NEWS +++ b/NEWS @@ -1,28 +1,22 @@ -Changes in version 1.23: +Changes in version 1.24: -Decompression time has been reduced by 5-12% depending on the file. +The option '--empty-error', which forces exit status 2 if any empty member +is found, has been added. -In case of error in a numerical argument to a command line option, lziprecover -now shows the name of the option and the range of valid values. +The option '--marking-error', which forces exit status 2 if the first LZMA +byte is non-zero in any member, has been added. -Options '--dump' and '--strip' now refuse to write compressed data to a -terminal except when dumping trailing data with '--dump=tdata'. +The option '--clear-marking', which sets to zero the first LZMA byte of each +member, has been added. -The option '-U, --unzcrash' now requires an argument: '1' to test 1-bit -errors, or 'B' to test zeroed blocks. +The keyword 'empty' is now recognized in the argument of --dump, --remove, +and --strip. -The memory tester now allocates the dictionary once per member instead of -doing it for each test. This makes '-U, --unzcrash' about two times faster -on my machine on files with an uncompressed size larger than about 30 MB. +The option '--repair' has been renamed to '--byte-repair'. -'-W, --debug-decompress' now continues decompressing the members following -the damaged member if it has been fully decompressed (just failed with a CRC -mismatch). +The option '--debug-repair' has been renamed to '--debug-byte-repair'. -The tool unzcrash now uses execvp instead of popen to avoid invoking /bin/sh -and run faster. It also prints byte or block position in messages. +Diagnostics caused by invalid arguments to command line options now show the +argument and the name of the option. -Several descriptions have been improved in manual, '--help', and man page. - -The texinfo category of the manual has been changed from 'Data Compression' -to 'Compression' to match that of gzip. (Reported by Alfred M. Szmidt). +The option '--no-verify' of unzcrash has been renamed to '--no-check'. diff --git a/README b/README index e64ea0d..c3104da 100644 --- a/README +++ b/README @@ -46,9 +46,9 @@ the beginning is a thing of the past. Compression may be good for long-term archiving. For compressible data, multiple compressed copies may provide redundancy in a more useful form and may have a better chance of surviving intact than one uncompressed copy -using the same amount of storage space. This is specially true if the format -provides recovery capabilities like those of lziprecover, which is able to -find and combine the good parts of several damaged copies. +using the same amount of storage space. This is especially true if the +format provides recovery capabilities like those of lziprecover, which is +able to find and combine the good parts of several damaged copies. Lziprecover is able to recover or decompress files produced by any of the compressors in the lzip family: lzip, plzip, minilzip/lzlib, clzip, and @@ -60,7 +60,7 @@ from damaged lzip files. If a file is too damaged for lziprecover to repair it, all the recoverable data in all members of the file can be extracted in one step with the -command 'lziprecover -cd -i file.lz > file'. +command 'lziprecover -cd --ignore-errors file.lz > file'. When recovering data, lziprecover takes as arguments the names of the damaged files and writes zero or more recovered files depending on the @@ -84,7 +84,7 @@ Julian Seward's bzip2. Type 'make unzcrash' in the lziprecover source directory to build it. Then try 'unzcrash --help'. -Copyright (C) 2009-2022 Antonio Diaz Diaz. +Copyright (C) 2009-2023 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute, and modify it. diff --git a/alone_to_lz.cc b/alone_to_lz.cc index 9e5b330..ead1e38 100644 --- a/alone_to_lz.cc +++ b/alone_to_lz.cc @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2022 Antonio Diaz Diaz. + Copyright (C) 2009-2023 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -36,13 +36,13 @@ namespace { /* Return the address of a malloc'd buffer containing the file data and - the file size in '*size'. The buffer is at least 20 bytes larger. - In case of error, return 0 and do not modify '*size'. + the file size in '*file_sizep'. The buffer is at least 20 bytes larger. + In case of error, return 0 and do not modify '*file_sizep'. */ -uint8_t * read_file( const int infd, long * const size, +uint8_t * read_file( const int infd, long * const file_sizep, const char * const filename ) { - long buffer_size = 1 << 20; + long buffer_size = 65536; uint8_t * buffer = (uint8_t *)std::malloc( buffer_size ); if( !buffer ) throw std::bad_alloc(); @@ -50,8 +50,8 @@ uint8_t * read_file( const int infd, long * const size, while( file_size >= buffer_size - 20 && !errno ) { if( buffer_size >= LONG_MAX ) - { show_file_error( filename, "File is too large" ); std::free( buffer ); - return 0; } + { show_file_error( filename, "Input file is larger than LONG_MAX." ); + std::free( buffer ); return 0; } buffer_size = ( buffer_size <= LONG_MAX / 2 ) ? 2 * buffer_size : LONG_MAX; uint8_t * const tmp = (uint8_t *)std::realloc( buffer, buffer_size ); if( !tmp ) { std::free( buffer ); throw std::bad_alloc(); } @@ -61,10 +61,10 @@ uint8_t * read_file( const int infd, long * const size, } if( errno ) { - show_file_error( filename, "Error reading file", errno ); + show_file_error( filename, "Error reading input file", errno ); std::free( buffer ); return 0; } - *size = file_size; + *file_sizep = file_size; return buffer; } @@ -88,21 +88,20 @@ int alone_to_lz( const int infd, const Pretty_print & pp ) uint8_t * const buffer = read_file( infd, &file_size, pp.name() ); if( !buffer ) return 1; if( file_size < lzma_header_size ) - { show_file_error( pp.name(), "file is too short" ); + { show_file_error( pp.name(), "Input file is too short." ); std::free( buffer ); return 2; } if( buffer[0] != 93 ) // (45 * 2) + (9 * 0) + 3 { const Lzip_header & header = *(const Lzip_header *)buffer; - if( header.verify_magic() && header.verify_version() && - isvalid_ds( header.dictionary_size() ) ) - show_file_error( pp.name(), "file is already in lzip format" ); + if( header.check() ) + show_file_error( pp.name(), "Input file is already in lzip format." ); else - show_file_error( pp.name(), "file has non-default LZMA properties" ); + show_file_error( pp.name(), "Input file has non-default LZMA properties." ); std::free( buffer ); return 2; } for( int i = 5; i < 13; ++i ) if( buffer[i] != 0xFF ) - { show_file_error( pp.name(), "file is non-streamed" ); + { show_file_error( pp.name(), "Input file is non-streamed." ); std::free( buffer ); return 2; } if( verbosity >= 1 ) pp(); @@ -115,6 +114,7 @@ int alone_to_lz( const int infd, const Pretty_print & pp ) header.set_magic(); header.dictionary_size( dictionary_size ); for( int i = 0; i < Lzip_trailer::size; ++i ) buffer[file_size++] = 0; + // compute and fill trailer { LZ_mtester mtester( buffer + offset, file_size - offset, dictionary_size ); const int result = mtester.test_member(); @@ -135,6 +135,7 @@ int alone_to_lz( const int infd, const Pretty_print & pp ) trailer.data_size( mtester.data_position() ); trailer.member_size( mtester.member_position() ); } + // check converted member LZ_mtester mtester( buffer + offset, file_size - offset, dictionary_size ); if( mtester.test_member() != 0 || !mtester.finished() ) { pp( "conversion failed" ); std::free( buffer ); return 2; } diff --git a/arg_parser.cc b/arg_parser.cc index 59998ac..5d46a9d 100644 --- a/arg_parser.cc +++ b/arg_parser.cc @@ -1,5 +1,5 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C++ version) - Copyright (C) 2006-2022 Antonio Diaz Diaz. + Copyright (C) 2006-2023 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided diff --git a/arg_parser.h b/arg_parser.h index e854838..272e919 100644 --- a/arg_parser.h +++ b/arg_parser.h @@ -1,5 +1,5 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C++ version) - Copyright (C) 2006-2022 Antonio Diaz Diaz. + Copyright (C) 2006-2023 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided diff --git a/byte_repair.cc b/byte_repair.cc new file mode 100644 index 0000000..3e92ca4 --- /dev/null +++ b/byte_repair.cc @@ -0,0 +1,519 @@ +/* Lziprecover - Data recovery tool for the lzip format + Copyright (C) 2009-2023 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#define _FILE_OFFSET_BITS 64 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "lzip.h" +#include "mtester.h" +#include "lzip_index.h" + + +namespace { + +bool pending_newline = false; + +void print_pending_newline( const char terminator ) + { if( pending_newline && terminator != '\n' ) std::fputc( '\n', stdout ); + pending_newline = false; } + + +bool gross_damage( const uint8_t * const mbuffer, const long msize ) + { + enum { maxlen = 7 }; // max number of consecutive identical bytes + long i = Lzip_header::size; + const long end = msize - Lzip_trailer::size - maxlen; + while( i < end ) + { + const uint8_t byte = mbuffer[i]; + int len = 0; // does not count the first byte + while( mbuffer[++i] == byte ) if( ++len >= maxlen ) return true; + } + return false; + } + + +// Return value: 0 = no change, 5 = repaired pos +int repair_dictionary_size( uint8_t * const mbuffer, const long msize ) + { + Lzip_header & header = *(Lzip_header *)mbuffer; + unsigned dictionary_size = header.dictionary_size(); + const Lzip_trailer & trailer = + *(const Lzip_trailer *)( mbuffer + msize - Lzip_trailer::size ); + const unsigned long long data_size = trailer.data_size(); + const bool valid_ds = isvalid_ds( dictionary_size ); + if( valid_ds && dictionary_size >= data_size ) return 0; // can't be bad + + const unsigned long long dictionary_size_9 = 1 << 25; // dict size of opt -9 + if( !valid_ds || dictionary_size < dictionary_size_9 ) + { + dictionary_size = std::min( data_size, dictionary_size_9 ); + if( dictionary_size < min_dictionary_size ) + dictionary_size = min_dictionary_size; + LZ_mtester mtester( mbuffer, msize, dictionary_size ); + const int result = mtester.test_member(); + if( result == 0 ) + { header.dictionary_size( dictionary_size ); return 5; } // fix DS + if( result != 1 || mtester.max_distance() <= dictionary_size || + mtester.max_distance() > max_dictionary_size ) return 0; + } + if( data_size > dictionary_size_9 ) + { + dictionary_size = + std::min( data_size, (unsigned long long)max_dictionary_size ); + LZ_mtester mtester( mbuffer, msize, dictionary_size ); + if( mtester.test_member() == 0 ) + { header.dictionary_size( dictionary_size ); return 5; } // fix DS + } + return 0; + } + + +const LZ_mtester * prepare_master( const uint8_t * const buffer, + const long buffer_size, + const unsigned long pos_limit, + const unsigned dictionary_size ) + { + LZ_mtester * const master = + new LZ_mtester( buffer, buffer_size, dictionary_size ); + if( master->test_member( pos_limit ) == -1 ) return master; + delete master; + return 0; + } + + +bool test_member_rest( const LZ_mtester & master, uint8_t * const buffer2, + long * const failure_posp = 0 ) + { + LZ_mtester mtester( master ); // tester with external buffer + mtester.duplicate_buffer( buffer2 ); + if( mtester.test_member() == 0 && mtester.finished() ) return true; + if( failure_posp ) *failure_posp = mtester.member_position(); + return false; + } + + +// Return value: -1 = master failed, 0 = begin reached, > 0 = repaired pos +long repair_member( uint8_t * const mbuffer, const long long mpos, + const long msize, const long begin, const long end, + const unsigned dictionary_size, const char terminator ) + { + uint8_t * const buffer2 = new uint8_t[dictionary_size]; + for( long pos = end; pos >= begin && pos > end - 50000; ) + { + const long min_pos = std::max( begin, pos - 100 ); + const unsigned long pos_limit = std::max( min_pos - 16, 0L ); + const LZ_mtester * master = + prepare_master( mbuffer, msize, pos_limit, dictionary_size ); + if( !master ) { delete[] buffer2; return -1; } + for( ; pos >= min_pos; --pos ) + { + if( verbosity >= 2 ) + { + std::printf( " Trying position %llu %c", mpos + pos, terminator ); + std::fflush( stdout ); pending_newline = true; + } + for( int j = 0; j < 255; ++j ) + { + ++mbuffer[pos]; + if( test_member_rest( *master, buffer2 ) ) + { delete master; delete[] buffer2; return pos; } + } + ++mbuffer[pos]; + } + delete master; + } + delete[] buffer2; + return 0; + } + +} // end namespace + + +long seek_write( const int fd, const uint8_t * const buf, const long size, + const long long pos ) + { + if( lseek( fd, pos, SEEK_SET ) == pos ) + return writeblock( fd, buf, size ); + return 0; + } + + +uint8_t * read_member( const int infd, const long long mpos, + const long long msize, const char * const filename ) + { + if( msize <= 0 || msize > LONG_MAX ) + { show_file_error( filename, + "Input file contains member larger than LONG_MAX." ); return 0; } + if( !safe_seek( infd, mpos, filename ) ) return 0; + uint8_t * const buffer = new uint8_t[msize]; + + if( readblock( infd, buffer, msize ) != msize ) + { show_file_error( filename, "Error reading input file", errno ); + delete[] buffer; return 0; } + return buffer; + } + + +int byte_repair( const std::string & input_filename, + const std::string & default_output_filename, + const Cl_options & cl_opts, + const char terminator, const bool force ) + { + const char * const filename = input_filename.c_str(); + struct stat in_stats; + const int infd = open_instream( filename, &in_stats, false, true ); + if( infd < 0 ) return 1; + + const Lzip_index lzip_index( infd, cl_opts, true ); + if( lzip_index.retval() != 0 ) + { show_file_error( filename, lzip_index.error().c_str() ); + return lzip_index.retval(); } + + output_filename = default_output_filename.empty() ? + insert_fixed( input_filename ) : default_output_filename; + if( !force && output_file_exists() ) return 1; + outfd = -1; + for( long i = 0; i < lzip_index.members(); ++i ) + { + const long long mpos = lzip_index.mblock( i ).pos(); + const long long msize = lzip_index.mblock( i ).size(); + if( !safe_seek( infd, mpos, filename ) ) cleanup_and_fail( 1 ); + long long failure_pos = 0; + if( test_member_from_file( infd, msize, &failure_pos ) == 0 ) continue; + if( failure_pos < Lzip_header::size ) // End Of File + { show_error( "Can't repair error in input file." ); + cleanup_and_fail( 2 ); } + if( failure_pos >= msize - 8 ) failure_pos = msize - 8 - 1; + + if( verbosity >= 2 ) // damaged member found + { + std::printf( "Repairing member %ld of %ld (failure pos = %llu)\n", + i + 1, lzip_index.members(), mpos + failure_pos ); + std::fflush( stdout ); + } + uint8_t * const mbuffer = read_member( infd, mpos, msize, filename ); + if( !mbuffer ) cleanup_and_fail( 1 ); + const Lzip_header & header = *(const Lzip_header *)mbuffer; + const unsigned dictionary_size = header.dictionary_size(); + long pos = 0; + if( !gross_damage( mbuffer, msize ) ) + { + pos = repair_dictionary_size( mbuffer, msize ); + if( pos == 0 ) + pos = repair_member( mbuffer, mpos, msize, Lzip_header::size + 1, + Lzip_header::size + 6, dictionary_size, terminator ); + if( pos == 0 ) + pos = repair_member( mbuffer, mpos, msize, Lzip_header::size + 7, + failure_pos, dictionary_size, terminator ); + print_pending_newline( terminator ); + } + if( pos < 0 ) + { show_error( "Can't prepare master." ); cleanup_and_fail( 1 ); } + if( pos > 0 ) + { + if( outfd < 0 ) // first damaged member repaired + { + if( !safe_seek( infd, 0, filename ) ) return 1; + set_signal_handler(); + if( !open_outstream( true, true ) ) return 1; + if( !copy_file( infd, outfd ) ) // copy whole file + cleanup_and_fail( 1 ); + } + if( seek_write( outfd, mbuffer + pos, 1, mpos + pos ) != 1 ) + { show_error( "Error writing output file", errno ); + cleanup_and_fail( 1 ); } + } + delete[] mbuffer; + if( pos == 0 ) + { + show_error( "Can't repair input file. Error is probably larger than 1 byte." ); + cleanup_and_fail( 2 ); + } + } + + if( outfd < 0 ) + { + if( verbosity >= 1 ) + std::fputs( "Input file has no errors. Recovery is not needed.\n", stdout ); + return 0; + } + if( close_outstream( &in_stats ) != 0 ) return 1; + if( verbosity >= 1 ) + std::fputs( "Copy of input file repaired successfully.\n", stdout ); + return 0; + } + + +int debug_delay( const char * const input_filename, + const Cl_options & cl_opts, Block range, + const char terminator ) + { + struct stat in_stats; // not used + const int infd = open_instream( input_filename, &in_stats, false, true ); + if( infd < 0 ) return 1; + + const Lzip_index lzip_index( infd, cl_opts ); + if( lzip_index.retval() != 0 ) + { show_file_error( input_filename, lzip_index.error().c_str() ); + return lzip_index.retval(); } + + if( range.end() > lzip_index.cdata_size() ) + range.size( std::max( 0LL, lzip_index.cdata_size() - range.pos() ) ); + if( range.size() <= 0 ) + { show_file_error( input_filename, "Nothing to do." ); return 0; } + + for( long i = 0; i < lzip_index.members(); ++i ) + { + const Block & mb = lzip_index.mblock( i ); + if( !range.overlaps( mb ) ) continue; + const long long mpos = lzip_index.mblock( i ).pos(); + const long long msize = lzip_index.mblock( i ).size(); + const unsigned dictionary_size = lzip_index.dictionary_size( i ); + if( verbosity >= 2 ) + { + std::printf( "Finding max delay in member %ld of %ld (mpos = %llu, msize = %llu)\n", + i + 1, lzip_index.members(), mpos, msize ); + std::fflush( stdout ); + } + uint8_t * const mbuffer = read_member( infd, mpos, msize, input_filename ); + if( !mbuffer ) return 1; + uint8_t * const buffer2 = new uint8_t[dictionary_size]; + long pos = std::max( range.pos() - mpos, Lzip_header::size + 1LL ); + const long end = std::min( range.end() - mpos, msize ); + long max_delay = 0; + while( pos < end ) + { + const unsigned long pos_limit = std::max( pos - 16, 0L ); + const LZ_mtester * master = + prepare_master( mbuffer, msize, pos_limit, dictionary_size ); + if( !master ) { show_error( "Can't prepare master." ); + delete[] buffer2; delete[] mbuffer; return 1; } + const long partial_end = std::min( pos + 100, end ); + for( ; pos < partial_end; ++pos ) + { + if( verbosity >= 2 ) + { + std::printf( " Delays at position %llu %c", mpos + pos, terminator ); + std::fflush( stdout ); pending_newline = true; + } + int value = -1; + for( int j = 0; j < 256; ++j ) + { + ++mbuffer[pos]; + if( j == 255 ) break; + long failure_pos = 0; + if( test_member_rest( *master, buffer2, &failure_pos ) ) continue; + const long delay = failure_pos - pos; + if( delay > max_delay ) { max_delay = delay; value = mbuffer[pos]; } + } + if( value >= 0 && verbosity >= 2 ) + { + std::printf( " New max delay %lu at position %llu (0x%02X)\n", + max_delay, mpos + pos, value ); + std::fflush( stdout ); pending_newline = false; + } + if( pos + max_delay >= msize ) { pos = end; break; } + } + delete master; + } + delete[] buffer2; + delete[] mbuffer; + print_pending_newline( terminator ); + } + + if( verbosity >= 1 ) std::fputs( "Done.\n", stdout ); + return 0; + } + + +int debug_byte_repair( const char * const input_filename, + const Cl_options & cl_opts, const Bad_byte & bad_byte, + const char terminator ) + { + struct stat in_stats; // not used + const int infd = open_instream( input_filename, &in_stats, false, true ); + if( infd < 0 ) return 1; + + const Lzip_index lzip_index( infd, cl_opts ); + if( lzip_index.retval() != 0 ) + { show_file_error( input_filename, lzip_index.error().c_str() ); + return lzip_index.retval(); } + + long idx = 0; + for( ; idx < lzip_index.members(); ++idx ) + if( lzip_index.mblock( idx ).includes( bad_byte.pos ) ) break; + if( idx >= lzip_index.members() ) + { show_file_error( input_filename, "Nothing to do." ); return 0; } + + const long long mpos = lzip_index.mblock( idx ).pos(); + const long long msize = lzip_index.mblock( idx ).size(); + { + long long failure_pos = 0; + if( !safe_seek( infd, mpos, input_filename ) ) return 1; + if( test_member_from_file( infd, msize, &failure_pos ) != 0 ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "Member %ld of %ld already damaged (failure pos = %llu)\n", + idx + 1, lzip_index.members(), mpos + failure_pos ); + return 2; + } + } + uint8_t * const mbuffer = read_member( infd, mpos, msize, input_filename ); + if( !mbuffer ) return 1; + const Lzip_header & header = *(const Lzip_header *)mbuffer; + const unsigned dictionary_size = header.dictionary_size(); + const uint8_t good_value = mbuffer[bad_byte.pos-mpos]; + const uint8_t bad_value = bad_byte( good_value ); + mbuffer[bad_byte.pos-mpos] = bad_value; + long failure_pos = 0; + if( bad_byte.pos != 5 || isvalid_ds( header.dictionary_size() ) ) + { + LZ_mtester mtester( mbuffer, msize, header.dictionary_size() ); + if( mtester.test_member() == 0 && mtester.finished() ) + { + if( verbosity >= 1 ) + std::fputs( "Member decompressed with no errors.\n", stdout ); + delete[] mbuffer; + return 0; + } + failure_pos = mtester.member_position(); + } + if( verbosity >= 2 ) + { + std::printf( "Test repairing member %ld of %ld (mpos = %llu, msize = %llu)\n" + " (damage pos = %llu (0x%02X->0x%02X), failure pos = %llu, delay = %lld )\n", + idx + 1, lzip_index.members(), mpos, msize, + bad_byte.pos, good_value, bad_value, mpos + failure_pos, + mpos + failure_pos - bad_byte.pos ); + std::fflush( stdout ); + } + if( failure_pos >= msize ) failure_pos = msize - 1; + long pos = repair_dictionary_size( mbuffer, msize ); + if( pos == 0 ) + pos = repair_member( mbuffer, mpos, msize, Lzip_header::size + 1, + Lzip_header::size + 6, dictionary_size, terminator ); + if( pos == 0 ) + pos = repair_member( mbuffer, mpos, msize, Lzip_header::size + 7, + failure_pos, dictionary_size, terminator ); + print_pending_newline( terminator ); + delete[] mbuffer; + if( pos < 0 ) { show_error( "Can't prepare master." ); return 1; } + if( pos == 0 ) internal_error( "can't repair input file." ); + if( verbosity >= 1 ) std::fputs( "Member repaired successfully.\n", stdout ); + return 0; + } + + +/* If show_packets is true, print to stdout descriptions of the decoded LZMA + packets. Print also some global values; total number of packets in + member, max distance (rep0) and its file position, max LZMA packet size + in each member and the file position of these packets. + (Packet sizes are a fractionary number of bytes. The packet and marker + sizes shown by option -X are the number of extra bytes required to decode + the packet, not counting the data present in the range decoder before and + after the decoding. The max marker size of a 'Sync Flush marker' does not + include the 5 bytes read by rdec.load). + if bad_byte.pos >= cdata_size, bad_byte is ignored. +*/ +int debug_decompress( const char * const input_filename, + const Cl_options & cl_opts, const Bad_byte & bad_byte, + const bool show_packets ) + { + struct stat in_stats; + const int infd = open_instream( input_filename, &in_stats, false, true ); + if( infd < 0 ) return 1; + + const Lzip_index lzip_index( infd, cl_opts ); + if( lzip_index.retval() != 0 ) + { show_file_error( input_filename, lzip_index.error().c_str() ); + return lzip_index.retval(); } + + outfd = show_packets ? -1 : STDOUT_FILENO; + int retval = 0; + for( long i = 0; i < lzip_index.members(); ++i ) + { + const long long dpos = lzip_index.dblock( i ).pos(); + const long long mpos = lzip_index.mblock( i ).pos(); + const long long msize = lzip_index.mblock( i ).size(); + const unsigned dictionary_size = lzip_index.dictionary_size( i ); + if( verbosity >= 1 && show_packets ) + std::printf( "Decoding LZMA packets in member %ld of %ld (mpos = %llu, msize = %llu)\n" + " mpos dpos\n", + i + 1, lzip_index.members(), mpos, msize ); + if( !isvalid_ds( dictionary_size ) ) + { show_error( bad_dict_msg ); retval = 2; break; } + uint8_t * const mbuffer = read_member( infd, mpos, msize, input_filename ); + if( !mbuffer ) { retval = 1; break; } + if( bad_byte.pos >= 0 && lzip_index.mblock( i ).includes( bad_byte.pos ) ) + { + const uint8_t good_value = mbuffer[bad_byte.pos-mpos]; + const uint8_t bad_value = bad_byte( good_value ); + mbuffer[bad_byte.pos-mpos] = bad_value; + if( verbosity >= 1 && show_packets ) + std::printf( "Byte at pos %llu changed from 0x%02X to 0x%02X\n", + bad_byte.pos, good_value, bad_value ); + } + LZ_mtester mtester( mbuffer, msize, dictionary_size, outfd ); + const int result = mtester.debug_decode_member( dpos, mpos, show_packets ); + delete[] mbuffer; + if( show_packets ) + { + const std::vector< unsigned long long > & mppv = mtester.max_packet_posv(); + const unsigned mpackets = mppv.size(); + std::printf( "Total packets in member = %llu\n" + "Max distance in any match = %u at file position %llu\n" + "Max marker size found = %u\n" + "Max packet size found = %u (%u packets)%s", + mtester.total_packets(), mtester.max_distance(), + mtester.max_distance_pos(), mtester.max_marker_size(), + mtester.max_packet_size(), mpackets, + mpackets ? " at file positions" : "" ); + for( unsigned i = 0; i < mpackets; ++i ) + std::printf( " %llu", mppv[i] ); + std::fputc( '\n', stdout ); + } + if( result != 0 ) + { + if( verbosity >= 0 && result <= 2 && show_packets ) + std::printf( "%s at pos %llu\n", ( result == 2 ) ? + "File ends unexpectedly" : "Decoder error", + mpos + mtester.member_position() ); + retval = 2; + if( result != 3 || !mtester.finished() || mtester.data_position() != + (unsigned long long)lzip_index.dblock( i ).size() ) break; + } + if( i + 1 < lzip_index.members() && show_packets ) + std::fputc( '\n', stdout ); + } + + retval = std::max( retval, close_outstream( &in_stats ) ); + if( verbosity >= 1 && show_packets && retval == 0 ) + std::fputs( "Done.\n", stdout ); + return retval; + } diff --git a/common.h b/common.h index c3d0691..453f56e 100644 --- a/common.h +++ b/common.h @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2022 Antonio Diaz Diaz. + Copyright (C) 2009-2023 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,11 +19,14 @@ struct Bad_byte { enum Mode { literal, delta, flip }; long long pos; + const char * argument; const char * option_name; Mode mode; uint8_t value; - Bad_byte() : pos( -1 ), option_name( 0 ), mode( literal ), value( 0 ) {} + Bad_byte() : + pos( -1 ), argument( 0 ), option_name( 0 ), mode( literal ), value( 0 ) {} + uint8_t operator()( const uint8_t old_value ) const { if( mode == delta ) return old_value + value; @@ -35,6 +38,8 @@ struct Bad_byte }; +const char * const mem_msg = "Not enough memory."; + // defined in main_common.cc void show_error( const char * const msg, const int errcode = 0, const bool help = false ); diff --git a/configure b/configure index 577f04f..1b84392 100755 --- a/configure +++ b/configure @@ -1,12 +1,12 @@ #! /bin/sh # configure script for Lziprecover - Data recovery tool for the lzip format -# Copyright (C) 2009-2022 Antonio Diaz Diaz. +# Copyright (C) 2009-2023 Antonio Diaz Diaz. # # This configure script is free software: you have unlimited permission # to copy, distribute, and modify it. pkgname=lziprecover -pkgversion=1.23 +pkgversion=1.24-pre1 progname=lziprecover srctrigger=doc/${pkgname}.texi @@ -24,6 +24,7 @@ CXX=g++ CPPFLAGS= CXXFLAGS='-Wall -W -O2' LDFLAGS= +MAKEINFO=makeinfo # checking whether we are using GNU C++. /bin/sh -c "${CXX} --version" > /dev/null 2>&1 || { CXX=c++ ; CXXFLAGS=-O2 ; } @@ -43,7 +44,7 @@ while [ $# != 0 ] ; do # Split out the argument for options that take them case ${option} in - *=*) optarg=`echo "${option}" | sed -e 's,^[^=]*=,,;s,/$,,'` ;; + *=*) optarg="`echo "${option}" | sed -e 's,^[^=]*=,,;s,/$,,'`" ;; esac # Process the options @@ -57,7 +58,7 @@ while [ $# != 0 ] ; do echo "Options and variables: [defaults in brackets]" echo " -h, --help display this help and exit" echo " -V, --version output version information and exit" - echo " --srcdir=DIR find the sources in DIR [. or ..]" + echo " --srcdir=DIR find the source code in DIR [. or ..]" echo " --prefix=DIR install into DIR [${prefix}]" echo " --exec-prefix=DIR base directory for arch-dependent files [${exec_prefix}]" echo " --bindir=DIR user executables directory [${bindir}]" @@ -69,6 +70,7 @@ while [ $# != 0 ] ; do echo " CXXFLAGS=OPTIONS command line options for the C++ compiler [${CXXFLAGS}]" echo " CXXFLAGS+=OPTIONS append options to the current value of CXXFLAGS" echo " LDFLAGS=OPTIONS command line options for the linker [${LDFLAGS}]" + echo " MAKEINFO=NAME makeinfo program to use [${MAKEINFO}]" echo exit 0 ;; --version | -V) @@ -96,6 +98,7 @@ while [ $# != 0 ] ; do CXXFLAGS=*) CXXFLAGS=${optarg} ;; CXXFLAGS+=*) CXXFLAGS="${CXXFLAGS} ${optarg}" ;; LDFLAGS=*) LDFLAGS=${optarg} ;; + MAKEINFO=*) MAKEINFO=${optarg} ;; --*) echo "configure: WARNING: unrecognized option: '${option}'" 1>&2 ;; @@ -115,19 +118,19 @@ while [ $# != 0 ] ; do fi done -# Find the source files, if location was not specified. +# Find the source code, if location was not specified. srcdirtext= if [ -z "${srcdir}" ] ; then srcdirtext="or . or .." ; srcdir=. if [ ! -r "${srcdir}/${srctrigger}" ] ; then srcdir=.. ; fi if [ ! -r "${srcdir}/${srctrigger}" ] ; then ## the sed command below emulates the dirname command - srcdir=`echo "$0" | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'` + srcdir="`echo "$0" | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'`" fi fi if [ ! -r "${srcdir}/${srctrigger}" ] ; then - echo "configure: Can't find sources in ${srcdir} ${srcdirtext}" 1>&2 + echo "configure: Can't find source code in ${srcdir} ${srcdirtext}" 1>&2 echo "configure: (At least ${srctrigger} is missing)." 1>&2 exit 1 fi @@ -164,10 +167,11 @@ echo "CXX = ${CXX}" echo "CPPFLAGS = ${CPPFLAGS}" echo "CXXFLAGS = ${CXXFLAGS}" echo "LDFLAGS = ${LDFLAGS}" +echo "MAKEINFO = ${MAKEINFO}" rm -f Makefile cat > Makefile << EOF # Makefile for Lziprecover - Data recovery tool for the lzip format -# Copyright (C) 2009-2022 Antonio Diaz Diaz. +# Copyright (C) 2009-2023 Antonio Diaz Diaz. # This file was generated automatically by configure. Don't edit. # # This Makefile is free software: you have unlimited permission @@ -187,6 +191,7 @@ CXX = ${CXX} CPPFLAGS = ${CPPFLAGS} CXXFLAGS = ${CXXFLAGS} LDFLAGS = ${LDFLAGS} +MAKEINFO = ${MAKEINFO} EOF cat "${srcdir}/Makefile.in" >> Makefile diff --git a/decoder.cc b/decoder.cc index 345d02b..fc617de 100644 --- a/decoder.cc +++ b/decoder.cc @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2022 Antonio Diaz Diaz. + Copyright (C) 2009-2023 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -37,13 +37,13 @@ const CRC32 crc32; /* Return the number of bytes really read. If (value returned < size) and (errno == 0), means EOF was reached. */ -long long readblock( const int fd, uint8_t * const buf, const long long size ) +long readblock( const int fd, uint8_t * const buf, const long size ) { - long long sz = 0; + long sz = 0; errno = 0; while( sz < size ) { - const int n = read( fd, buf + sz, std::min( 1LL << 20, size - sz ) ); + const long n = read( fd, buf + sz, size - sz ); if( n > 0 ) sz += n; else if( n == 0 ) break; // EOF else if( errno != EINTR ) break; @@ -56,14 +56,13 @@ long long readblock( const int fd, uint8_t * const buf, const long long size ) /* Return the number of bytes really written. If (value returned < size), it is always an error. */ -long long writeblock( const int fd, const uint8_t * const buf, - const long long size ) +long writeblock( const int fd, const uint8_t * const buf, const long size ) { - long long sz = 0; + long sz = 0; errno = 0; while( sz < size ) { - const int n = write( fd, buf + sz, std::min( 1LL << 20, size - sz ) ); + const long n = write( fd, buf + sz, size - sz ); if( n > 0 ) sz += n; else if( n < 0 && errno != EINTR ) break; errno = 0; @@ -109,7 +108,8 @@ void LZ_decoder::flush_data() } -bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const +int LZ_decoder::check_trailer( const Pretty_print & pp, + const bool ignore_empty ) const { Lzip_trailer trailer; int size = rdec.read_data( trailer.data, Lzip_trailer::size ); @@ -162,7 +162,8 @@ bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const tm_size, tm_size, member_size, member_size ); } } - if( error ) return false; + if( error ) return 3; + if( !ignore_empty && data_size == 0 ) return 5; if( verbosity >= 2 ) { if( verbosity >= 4 ) show_header( dictionary_size ); @@ -182,13 +183,15 @@ bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const pp(); std::fprintf( stderr, "Range decoder final code is %08X\n", rdec.get_code() ); } - return true; + return 0; } /* Return value: 0 = OK, 1 = decoder error, 2 = unexpected EOF, - 3 = trailer error, 4 = unknown marker found. */ -int LZ_decoder::decode_member( const Pretty_print & pp ) + 3 = trailer error, 4 = unknown marker found, + 5 = empty member found, 6 = marked member found. */ +int LZ_decoder::decode_member( const Pretty_print & pp, + const bool ignore_empty, const bool ignore_marking ) { Bit_model bm_literal[1<= 0 ) { pp(); diff --git a/decoder.h b/decoder.h index 5b06b25..03adbbb 100644 --- a/decoder.h +++ b/decoder.h @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2022 Antonio Diaz Diaz. + Copyright (C) 2009-2023 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -86,7 +86,7 @@ public: header.data[sz] = buffer[pos]; if( ignore_errors && ( ( sz < 4 && header.data[sz] != lzip_magic[sz] ) || - ( sz == 4 && !header.verify_version() ) || + ( sz == 4 && !header.check_version() ) || ( sz == 5 && !isvalid_ds( header.dictionary_size() ) ) ) ) break; ++pos; ++sz; } @@ -106,12 +106,14 @@ public: return false; } - void load() + bool load( const bool ignore_marking = true ) { code = 0; - for( int i = 0; i < 5; ++i ) code = ( code << 8 ) | get_byte(); range = 0xFFFFFFFFU; - code &= range; // make sure that first byte is discarded + // check and discard first byte of the LZMA stream + if( get_byte() != 0 && !ignore_marking ) return false; + for( int i = 0; i < 4; ++i ) code = ( code << 8 ) | get_byte(); + return true; } void normalize() @@ -136,7 +138,7 @@ public: return symbol; } - unsigned decode_bit( Bit_model & bm ) + bool decode_bit( Bit_model & bm ) { normalize(); const uint32_t bound = ( range >> bit_model_total_bits ) * bm.probability; @@ -303,7 +305,7 @@ class LZ_decoder unsigned long long stream_position() const { return partial_data_pos + stream_pos; } void flush_data(); - bool verify_trailer( const Pretty_print & pp ) const; + int check_trailer( const Pretty_print & pp, const bool ignore_empty ) const; uint8_t peek_prev() const { return buffer[((pos > 0) ? pos : dictionary_size)-1]; } @@ -379,5 +381,6 @@ public: unsigned crc() const { return crc_ ^ 0xFFFFFFFFU; } unsigned long long data_position() const { return partial_data_pos + pos; } - int decode_member( const Pretty_print & pp ); + int decode_member( const Pretty_print & pp, const bool ignore_empty = true, + const bool ignore_marking = true ); }; diff --git a/doc/lziprecover.1 b/doc/lziprecover.1 index e05a366..1f26b81 100644 --- a/doc/lziprecover.1 +++ b/doc/lziprecover.1 @@ -1,5 +1,5 @@ -.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.16. -.TH LZIPRECOVER "1" "January 2022" "lziprecover 1.23" "User Commands" +.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.2. +.TH LZIPRECOVER "1" "June 2023" "lziprecover 1.24-pre1" "User Commands" .SH NAME lziprecover \- recovers data from damaged lzip files .SH SYNOPSIS @@ -45,7 +45,7 @@ convert lzma\-alone files to lzip format write to standard output, keep input files .TP \fB\-d\fR, \fB\-\-decompress\fR -decompress +decompress, test compressed file integrity .TP \fB\-D\fR, \fB\-\-range\-decompress=\fR decompress a range of bytes to stdout @@ -83,8 +83,8 @@ place the output into \fB\-q\fR, \fB\-\-quiet\fR suppress all messages .TP -\fB\-R\fR, \fB\-\-repair\fR -try to repair a small error in file +\fB\-R\fR, \fB\-\-byte\-repair\fR +try to repair a corrupt byte in file .TP \fB\-s\fR, \fB\-\-split\fR split multimember file in single\-member files @@ -95,17 +95,26 @@ test compressed file integrity \fB\-v\fR, \fB\-\-verbose\fR be verbose (a 2nd \fB\-v\fR gives more) .TP -\fB\-\-loose\-trailing\fR -allow trailing data seeming corrupt header -.TP -\fB\-\-dump=\fR:d:t -dump members listed/damaged, tdata to stdout +\fB\-\-dump=\fR:d:e:t +dump members, damaged/empty, tdata to stdout .TP -\fB\-\-remove=\fR:d:t +\fB\-\-remove=\fR:d:e:t remove members, tdata from files in place .TP -\fB\-\-strip=\fR:d:t +\fB\-\-strip=\fR:d:e:t copy files to stdout stripping members given +.TP +\fB\-\-empty\-error\fR +exit with error status if empty member in file +.TP +\fB\-\-marking\-error\fR +exit with error status if 1st LZMA byte not 0 +.TP +\fB\-\-loose\-trailing\fR +allow trailing data seeming corrupt header +.TP +\fB\-\-clear\-marking\fR +reset the first LZMA byte of each member .PP If no file names are given, or if a file is '\-', lziprecover decompresses from standard input to standard output. @@ -115,16 +124,16 @@ Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc... To extract all the files from archive 'foo.tar.lz', use the commands \&'tar \fB\-xf\fR foo.tar.lz' or 'lziprecover \fB\-cd\fR foo.tar.lz | tar \fB\-xf\fR \-'. .PP -Exit status: 0 for a normal exit, 1 for environmental problems (file -not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or -invalid input file, 3 for an internal consistency error (e.g., bug) which -caused lziprecover to panic. +Exit status: 0 for a normal exit, 1 for environmental problems +(file not found, invalid command line options, I/O errors, etc), 2 to +indicate a corrupt or invalid input file, 3 for an internal consistency +error (e.g., bug) which caused lziprecover to panic. .SH "REPORTING BUGS" Report bugs to lzip\-bug@nongnu.org .br Lziprecover home page: http://www.nongnu.org/lzip/lziprecover.html .SH COPYRIGHT -Copyright \(co 2022 Antonio Diaz Diaz. +Copyright \(co 2023 Antonio Diaz Diaz. License GPLv2+: GNU GPL version 2 or later .br This is free software: you are free to change and redistribute it. diff --git a/doc/lziprecover.info b/doc/lziprecover.info index 112f65b..2ef7641 100644 --- a/doc/lziprecover.info +++ b/doc/lziprecover.info @@ -12,7 +12,7 @@ File: lziprecover.info, Node: Top, Next: Introduction, Up: (dir) Lziprecover Manual ****************** -This manual is for Lziprecover (version 1.23, 21 January 2022). +This manual is for Lziprecover (version 1.24-pre1, 14 June 2023). * Menu: @@ -32,7 +32,7 @@ This manual is for Lziprecover (version 1.23, 21 January 2022). * Concept index:: Index of concepts - Copyright (C) 2009-2022 Antonio Diaz Diaz. + Copyright (C) 2009-2023 Antonio Diaz Diaz. This manual is free documentation: you have unlimited permission to copy, distribute, and modify it. @@ -89,9 +89,9 @@ byte near the beginning is a thing of the past. Compression may be good for long-term archiving. For compressible data, multiple compressed copies may provide redundancy in a more useful form and may have a better chance of surviving intact than one uncompressed copy -using the same amount of storage space. This is specially true if the format -provides recovery capabilities like those of lziprecover, which is able to -find and combine the good parts of several damaged copies. +using the same amount of storage space. This is especially true if the +format provides recovery capabilities like those of lziprecover, which is +able to find and combine the good parts of several damaged copies. Lziprecover is able to recover or decompress files produced by any of the compressors in the lzip family: lzip, plzip, minilzip/lzlib, clzip, and @@ -107,7 +107,7 @@ recoverable data in all members of the file can be extracted with the following command (the resulting file may contain errors and some garbage data may be produced at the end of each damaged member): - lziprecover -cd -i file.lz > file + lziprecover -cd --ignore-errors file.lz > file When recovering data, lziprecover takes as arguments the names of the damaged files and writes zero or more recovered files depending on the @@ -134,7 +134,8 @@ The format for running lziprecover is: When decompressing or testing, a hyphen '-' used as a FILE argument means standard input. It can be mixed with other FILES and is read just once, the first time it appears in the command line. If no file names are specified, -lziprecover decompresses from standard input to standard output. +lziprecover decompresses from standard input to standard output. Remember +to prepend './' to any file name beginning with a hyphen, or use '--'. lziprecover supports the following options: *Note Argument syntax: (arg_parser)Argument syntax. @@ -181,13 +182,14 @@ lziprecover decompresses from standard input to standard output. '-d' '--decompress' - Decompress the files specified. If a file does not exist, can't be - opened, or the destination file already exists and '--force' has not - been specified, lziprecover continues decompressing the rest of the - files and exits with error status 1. If a file fails to decompress, or - is a terminal, lziprecover exits immediately with error status 2 - without decompressing the rest of the files. A terminal is considered - an uncompressed file, and therefore invalid. + Decompress the files specified. The integrity of the files specified is + checked. If a file does not exist, can't be opened, or the destination + file already exists and '--force' has not been specified, lziprecover + continues decompressing the rest of the files and exits with error + status 1. If a file fails to decompress, or is a terminal, lziprecover + exits immediately with error status 2 without decompressing the rest + of the files. A terminal is considered an uncompressed file, and + therefore invalid. '-D RANGE' '--range-decompress=RANGE' @@ -197,7 +199,7 @@ lziprecover decompresses from standard input to standard output. only decompresses the members containing the desired data. In order to guarantee the correctness of the data produced, all members containing any part of the desired data are decompressed and their integrity is - verified. + checked. Four formats of RANGE are recognized, 'BEGIN', 'BEGIN-END', 'BEGIN,SIZE', and ',SIZE'. If only BEGIN is specified, END is taken as @@ -250,7 +252,7 @@ lziprecover decompresses from standard input to standard output. errors, for example). Make '--list', '--dump', '--remove', and '--strip' ignore format - errors. The sizes of the members with errors (specially the last) may + errors. The sizes of the members with errors (especially the last) may be wrong. '-k' @@ -271,11 +273,11 @@ lziprecover decompresses from standard input to standard output. file numbers produced by '--split'. If any file is damaged, does not exist, can't be opened, or is not - regular, the final exit status will be > 0. '-lq' can be used to verify + regular, the final exit status will be > 0. '-lq' can be used to check quickly (without decompressing) the structural integrity of the files - specified. (Use '--test' to verify the data integrity). '-alq' - additionally verifies that none of the files specified contain - trailing data. + specified. (Use '--test' to check the data integrity). '-alq' + additionally checks that none of the files specified contain trailing + data. '-m' '--merge' @@ -302,7 +304,7 @@ lziprecover decompresses from standard input to standard output. Quiet operation. Suppress all messages. '-R' -'--repair' +'--byte-repair' Try to repair a FILE with small errors (up to one single-byte error per member). If successful, a repaired copy is written to the file 'FILE_fixed.lz'. FILE is not modified at all. The exit status is 0 if @@ -335,7 +337,7 @@ lziprecover decompresses from standard input to standard output. really performs a trial decompression and throws away the result. Use it together with '-v' to see information about the files. If a file fails the test, does not exist, can't be opened, or is a terminal, - lziprecover continues checking the rest of the files. A final + lziprecover continues testing the rest of the files. A final diagnostic is shown at verbosity level 1 or higher if any file fails the test when testing multiple files. @@ -351,35 +353,31 @@ lziprecover decompresses from standard input to standard output. In other modes, increasing verbosity levels show final status, progress of operations, and extra information (for example, the failed areas). -'--loose-trailing' - When decompressing, testing, or listing, allow trailing data whose - first bytes are so similar to the magic bytes of a lzip header that - they can be confused with a corrupt header. Use this option if a file - triggers a "corrupt header" error and the cause is not indeed a - corrupt header. - -'--dump=[MEMBER_LIST][:damaged][:tdata]' - Dump the members listed, the damaged members (if any), or the trailing - data (if any) of one or more regular multimember files to standard - output, or to a file if the option '--output' is used. If more than - one file is given, the elements dumped from all files are concatenated. - If a file does not exist, can't be opened, or is not regular, - lziprecover continues processing the rest of the files. If the dump - fails in one file, lziprecover exits immediately without processing the - rest of the files. Only '--dump=tdata' can write to a terminal. +'--dump=[MEMBER_LIST][:damaged][:empty][:tdata]' + Dump the members listed, the damaged members (if any), the empty + members (if any), or the trailing data (if any) of one or more regular + multimember files to standard output, or to a file if the option + '--output' is used. If more than one file is given, the elements + dumped from all the files are concatenated. If a file does not exist, + can't be opened, or is not regular, lziprecover continues processing + the rest of the files. If the dump fails in one file, lziprecover + exits immediately without processing the rest of the files. Only + '--dump=tdata' can write to a terminal. '--dump=damaged' implies + '--ignore-errors'. The argument to '--dump' is a colon-separated list of the following element specifiers; a member list (1,3-6), a reverse member list - (r1,3-6), and the strings "damaged" and "tdata" (which may be shortened - to 'd' and 't' respectively). A member list selects the members (or - gaps) listed, whose numbers coincide with those shown by '--list'. A - reverse member list selects the members listed counting from the last - member in the file (r1). Negated versions of both kinds of lists exist - (^1,3-6:r^1,3-6) which selects all the members except those in the - list. The strings "damaged" and "tdata" select the damaged members and - the trailing data respectively. If the same member is selected more - than once, for example by '1:r1' in a single-member file, it is dumped - just once. See the following examples: + (r1,3-6), and the strings "damaged", "empty", and "tdata" (which may + be shortened to 'd', 'e', and 't' respectively). A member list selects + the members (or gaps) listed, whose numbers coincide with those shown + by '--list'. A reverse member list selects the members listed counting + from the last member in the file (r1). Negated versions of both kinds + of lists exist (^1,3-6:r^1,3-6) which select all the members except + those in the list. The strings "damaged", "empty", and "tdata" select + the damaged members, the empty members (those with a data size = 0), + and the trailing data respectively. If the same member is selected + more than once, for example by '1:r1' in a single-member file, it is + dumped just once. See the following examples: '--dump' argument Elements dumped --------------------------------------------------------------------- @@ -388,43 +386,75 @@ lziprecover decompresses from standard input to standard output. '^13,15' all but 13th and 15th members in file 'r^1' all but last member in file 'damaged' all damaged members in file + 'empty' all empty members in file 'tdata' trailing data '1-5:r1:tdata' members 1 to 5, last member, trailing data 'damaged:tdata' damaged members, trailing data '3,12:damaged:tdata' members 3, 12, damaged members, trailing data -'--remove=[MEMBER_LIST][:damaged][:tdata]' - Remove the members listed, the damaged members (if any), or the - trailing data (if any) from regular multimember files in place. The - date of each file is preserved if possible. If all members in a file - are selected to be removed, the file is left unchanged and the exit - status is set to 2. If a file does not exist, can't be opened, is not - regular, or is left unchanged, lziprecover continues processing the - rest of the files. In case of I/O error, lziprecover exits immediately - without processing the rest of the files. See '--dump' above for a - description of the argument. - - This option may be dangerous even if only the trailing data is being +'--remove=[MEMBER_LIST][:damaged][:empty][:tdata]' + Remove the members listed, the damaged members (if any), the empty + members (if any), or the trailing data (if any) from regular + multimember files in place. The date of each file modified is + preserved if possible. If all members in a file are selected to be + removed, the file is left unchanged and the exit status is set to 2. + If a file does not exist, can't be opened, is not regular, or is left + unchanged, lziprecover continues processing the rest of the files. In + case of I/O error, lziprecover exits immediately without processing + the rest of the files. See '--dump' above for a description of the + argument. + + This option may be dangerous even if only the trailing data are being removed because the file may be corrupt or the trailing data may contain a forbidden combination of characters. *Note Trailing data::. - It is advisable to make a backup before attempting the removal. At - least verify that 'lzip -cd file.lz | wc -c' and the uncompressed size - shown by 'lzip -l file.lz' match before attempting the removal of - trailing data. - -'--strip=[MEMBER_LIST][:damaged][:tdata]' + It is safer to send the output of '--strip' to a temporary file, check + it, and then copy it over the original file. But if you prefer + '--remove' because of its more efficient in-place removal, it is + advisable to make a backup before attempting the removal. At least + check that 'lzip -cd file.lz | wc -c' and the uncompressed size shown + by 'lzip -l file.lz' match before attempting the removal of trailing + data. + +'--strip=[MEMBER_LIST][:damaged][:empty][:tdata]' Copy one or more regular multimember files to standard output (or to a file if the option '--output' is used), stripping the members listed, - the damaged members (if any), or the trailing data (if any) from each - file. If all members in a file are selected to be stripped, the - trailing data (if any) are also stripped even if 'tdata' is not - specified. If more than one file is given, the files are concatenated. - In this case the trailing data are also stripped from all but the last - file even if 'tdata' is not specified. If a file does not exist, can't - be opened, or is not regular, lziprecover continues processing the - rest of the files. If a file fails to copy, lziprecover exits - immediately without processing the rest of the files. See '--dump' - above for a description of the argument. + the damaged members (if any), the empty members (if any), or the + trailing data (if any) from each file. If all members in a file are + selected to be stripped, the trailing data (if any) are also stripped + even if 'tdata' is not specified. If more than one file is given, the + files are concatenated. In this case the trailing data are also + stripped from all but the last file even if 'tdata' is not specified. + If a file does not exist, can't be opened, or is not regular, + lziprecover continues processing the rest of the files. If a file + fails to copy, lziprecover exits immediately without processing the + rest of the files. See '--dump' above for a description of the + argument. + +'--empty-error' + Exit with error status 2 if any empty member is found in the input + files. + +'--marking-error' + Exit with error status 2 if the first LZMA byte is non-zero in any + member of the input files. This may be caused by data corruption or by + deliberate insertion of tracking information in the file. Use + 'lziprecover --clear-marking' to clear any such non-zero bytes. + +'--loose-trailing' + When decompressing, testing, or listing, allow trailing data whose + first bytes are so similar to the magic bytes of a lzip header that + they can be confused with a corrupt header. Use this option if a file + triggers a "corrupt header" error and the cause is not indeed a + corrupt header. + +'--clear-marking' + Set to zero the first LZMA byte of each member in the files specified. + At verbosity level 1 (-v), print the number of members cleared. The + date of each file modified is preserved if possible. This option + exists because the first byte of the LZMA stream is ignored by the + range decoder, and can therefore be (mis)used to store any value which + can then be used as a watermark to track the path of the compressed + payload. Lziprecover also supports the following debug options (for experts): @@ -443,9 +473,9 @@ lziprecover decompresses from standard input to standard output. '--md5sum' Print to standard output the MD5 digests of the input FILES one per line in the same format produced by the 'md5sum' tool. Lziprecover - uses MD5 digests to verify the result of some operations. This option - allows the verification of lziprecover's implementation of the MD5 - algorithm. + uses MD5 digests to check the result of some operations. This option + can be used to test the correctness of lziprecover's implementation of + the MD5 algorithm. '-S[VALUE]' '--nrep-stats[=VALUE]' @@ -453,8 +483,8 @@ lziprecover decompresses from standard input to standard output. VALUE in the compressed LZMA streams of the input FILES with the frequency expected for random data (1 / 2^(8N)). If VALUE is not specified, print the frequency of repeated sequences of all possible - byte values. Print cumulative data for all files followed by the name - of the first file with the longest sequence. + byte values. Print cumulative data for all the files, followed by the + name of the first file with the longest sequence. '-U 1|BSIZE' '--unzcrash=1|BSIZE' @@ -509,31 +539,34 @@ lziprecover decompresses from standard input to standard output. range-format::, for a description of RANGE. '-Z POSITION,VALUE' -'--debug-repair=POSITION,VALUE' +'--debug-byte-repair=POSITION,VALUE' Load the compressed FILE into memory, set the byte at POSITION to - VALUE, and then try to repair the error. *Note --repair::. + VALUE, and then try to repair the byte error. *Note --byte-repair::. - Numbers given as arguments to options may be followed by a multiplier -and an optional 'B' for "byte". + Numbers given as arguments to options may be expressed in decimal, +hexadecimal, or octal (using the same syntax as integer constants in C++), +and may be followed by a multiplier and an optional 'B' for "byte". Table of SI and binary prefixes (unit multipliers): -Prefix Value | Prefix Value -k kilobyte (10^3 = 1000) | Ki kibibyte (2^10 = 1024) -M megabyte (10^6) | Mi mebibyte (2^20) -G gigabyte (10^9) | Gi gibibyte (2^30) -T terabyte (10^12) | Ti tebibyte (2^40) -P petabyte (10^15) | Pi pebibyte (2^50) -E exabyte (10^18) | Ei exbibyte (2^60) -Z zettabyte (10^21) | Zi zebibyte (2^70) -Y yottabyte (10^24) | Yi yobibyte (2^80) +Prefix Value | Prefix Value +k kilobyte (10^3 = 1000) | Ki kibibyte (2^10 = 1024) +M megabyte (10^6) | Mi mebibyte (2^20) +G gigabyte (10^9) | Gi gibibyte (2^30) +T terabyte (10^12) | Ti tebibyte (2^40) +P petabyte (10^15) | Pi pebibyte (2^50) +E exabyte (10^18) | Ei exbibyte (2^60) +Z zettabyte (10^21) | Zi zebibyte (2^70) +Y yottabyte (10^24) | Yi yobibyte (2^80) +R ronnabyte (10^27) | Ri robibyte (2^90) +Q quettabyte (10^30) | Qi quebibyte (2^100) Exit status: 0 for a normal exit, 1 for environmental problems (file not -found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or invalid -input file, 3 for an internal consistency error (e.g., bug) which caused -lziprecover to panic. +found, invalid command line options, I/O errors, etc), 2 to indicate a +corrupt or invalid input file, 3 for an internal consistency error (e.g., +bug) which caused lziprecover to panic.  File: lziprecover.info, Node: Data safety, Next: Repairing one byte, Prev: Invoking lziprecover, Up: Top @@ -593,7 +626,7 @@ only be recovered by an expert, if at all. If you used bzip2, and if the file is large enough to contain more than one compressed data block (usually larger than 900 kB uncompressed), and if no block is damaged in both files, then the data can be manually recovered -by splitting the files with bzip2recover, verifying every block, and then +by splitting the files with bzip2recover, checking every block, and then copying the right blocks in the right order into another file. But if you used lzip, the data can be automatically recovered with @@ -616,12 +649,12 @@ mailbox, therefore the initial part of two consecutive backups is identical unless some messages have been changed or deleted in the meantime. The new messages added to each backup are usually a small part of the whole mailbox. -+========================================================+ -| Older backup containing some messages | -+========================================================+ -+========================================================+================+ -| Newer backup containing the messages above plus some | new messages | -+========================================================+================+ ++============================================+ +| Older backup containing some messages | ++============================================+ ++============================================+========================+ +| Newer backup containing the messages above | plus some new messages | ++============================================+========================+ One day you discover that your mailbox has disappeared because you deleted it inadvertently or because of a bug in your email reader. Not only @@ -644,7 +677,7 @@ combining the good blocks from both backups. But if you used lzip, the whole newer backup can be automatically recovered with 'lziprecover --reproduce' as long as the missing bytes can be recovered from the older backup, even if other messages in the common part -have been changed or deleted. Mailboxes seem to be specially easy to +have been changed or deleted. Mailboxes seem to be especially easy to reproduce. The probability of reproducing a mailbox (*note performance-of-reproduce::) is almost as high as that of merging two identical backups (*note performance-of-merge::). @@ -791,7 +824,7 @@ feeding the concatenated data to the same version of lzip that created the file. For this to work, a reference file is required containing the uncompressed data corresponding to the missing compressed data of the zeroed sector, plus some context data before and after them. It is possible to -recover a large file using just a few KB of reference data. +recover a large file using just a few kB of reference data. The difficult part is finding a suitable reference file. It must contain the exact data required (possibly mixed with other data). Containing similar @@ -821,9 +854,9 @@ which should produce an output like the following: foo: Match found at offset 296892 Reproduction succeeded at pos 65536 - 1 sectors tested - 1 reproductions returned with zero status - all comparisons passed + 1 sectors tested + 1 reproductions returned with zero status + all comparisons passed Using 'foo' as reference file guarantees that any zeroed sector in 'foo.lz' can be reproduced because both files contain the same data. In @@ -859,7 +892,7 @@ when they are required. 6.1 Performance of '--reproduce' ================================ -Reproduce mode is specially useful when recovering a corrupt backup (or a +Reproduce mode is especially useful when recovering a corrupt backup (or a corrupt source tarball) that is part of a series. Usually only a small fraction of the data changes from one backup to the next or from one version of a source tarball to the next. This makes sometimes possible to reproduce @@ -890,11 +923,11 @@ gmp-6.1.1.tar gmp-6.1.2.tar.lz 175 / 473 = 37% gmp-6.1.2.tar gmp-6.1.1.tar.lz 181 / 472 = 38.35% Note that the "performance of reproduce" is a probability, not a partial -recovery. The data is either recovered fully (with the probability X shown +recovery. The data are either recovered fully (with the probability X shown in the last column of the tables above) or not recovered at all (with probability 1 - X). - Example 1: Recover a damaged source tarball with a zeroed sector of 512 +Example 1: Recover a damaged source tarball with a zeroed sector of 512 bytes at file position 1019904, using as reference another source tarball for a different version of the software. @@ -1049,10 +1082,10 @@ File: lziprecover.info, Node: File names, Next: File format, Prev: Tarlz, Up 8 Names of the files produced by lziprecover ******************************************** -The name of the fixed file produced by '--merge' and '--repair' is made by -appending the string '_fixed.lz' to the original file name. If the original -file name ends with one of the extensions '.tar.lz', '.lz', or '.tlz', the -string '_fixed' is inserted before the extension. +The name of the fixed file produced by '--byte-repair' and '--merge' is +made by appending the string '_fixed.lz' to the original file name. If the +original file name ends with one of the extensions '.tar.lz', '.lz', or +'.tlz', the string '_fixed' is inserted before the extension.  File: lziprecover.info, Node: File format, Next: Trailing data, Prev: File names, Up: Top @@ -1124,10 +1157,10 @@ size of a multimember file is unlimited. 'Member size (8 bytes)' Total size of the member, including header and trailer. This field acts - as a distributed index, allows the verification of stream integrity, - and facilitates the safe recovery of undamaged members from - multimember files. Member size should be limited to 2 PiB to prevent - the data size field from overflowing. + as a distributed index, improves the checking of stream integrity, and + facilitates the safe recovery of undamaged members from multimember + files. Lzip limits the member size to 2 PiB to prevent the data size + field from overflowing.  @@ -1143,12 +1176,13 @@ member. Such trailing data may be: example when writing to a tape. It is safe to append any amount of padding zero bytes to a lzip file. - * Useful data added by the user; a cryptographically secure hash, a + * Useful data added by the user; an "End Of File" string (to check that + the file has not been truncated), a cryptographically secure hash, a description of file contents, etc. It is safe to append any amount of - text to a lzip file as long as none of the first four bytes of the text - match the corresponding byte in the string "LZIP", and the text does - not contain any zero bytes (null characters). Nonzero bytes and zero - bytes can't be safely mixed in trailing data. + text to a lzip file as long as none of the first four bytes of the + text match the corresponding byte in the string "LZIP", and the text + does not contain any zero bytes (null characters). Nonzero bytes and + zero bytes can't be safely mixed in trailing data. * Garbage added by some not totally successful copy operation. @@ -1190,7 +1224,7 @@ Example 1: Add a comment or description to a compressed file. lziprecover --remove=tdata file.lz -Example 2: Add and verify a cryptographically secure hash. (This may be +Example 2: Add and check a cryptographically secure hash. (This may be convenient, but a separate copy of the hash must be kept in a safe place to guarantee that both file and hash have not been maliciously replaced). @@ -1217,7 +1251,7 @@ the operation is successful, 'file.lz' is removed. lziprecover -d file.lz -Example 3: Verify the integrity of the compressed file 'file.lz' and show +Example 3: Check the integrity of the compressed file 'file.lz' and show status. lziprecover -tv file.lz @@ -1233,7 +1267,7 @@ more compressed files. *Note Trailing data::. You may also concatenate the compressed files like this lziprecover --strip=tdata file1.lz file2.lz file3.lz > file123.lz Or keeping the trailing data of the last file like this - lziprecover --strip=damaged file1.lz file2.lz file3.lz > file123.lz + lziprecover --strip=empty file1.lz file2.lz file3.lz > file123.lz Example 5: Decompress 'file.lz' partially until 10 KiB of decompressed data @@ -1248,7 +1282,7 @@ Example 6: Decompress 'file.lz' partially from decompressed byte at offset lziprecover -D 10000-15000 file.lz -Example 7: Repair small errors in the file 'file.lz'. (Indented lines are +Example 7: Repair a corrupt byte in the file 'file.lz'. (Indented lines are abridged diagnostic messages from lziprecover). lziprecover -v -R file.lz @@ -1375,9 +1409,9 @@ tested must decompress it correctly for the comparisons to work. for example. '-n' -'--no-verify' - Skip initial verification of FILE and 'zcmp'. May speed up things a - lot when testing many (or large) known good files. +'--no-check' + Skip initial test of FILE and 'zcmp'. May speed up things a lot when + testing many (or large) known good files. '-p BYTES' '--position=BYTES' @@ -1413,9 +1447,9 @@ tested must decompress it correctly for the comparisons to work. Exit status: 0 for a normal exit, 1 for environmental problems (file not -found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or invalid -input file, 3 for an internal consistency error (e.g., bug) which caused -unzcrash to panic. +found, invalid command line options, I/O errors, etc), 2 to indicate a +corrupt or invalid input file, 3 for an internal consistency error (e.g., +bug) which caused unzcrash to panic.  File: lziprecover.info, Node: Problems, Next: Concept index, Prev: Unzcrash, Up: Top @@ -1465,31 +1499,31 @@ Concept index  Tag Table: Node: Top226 -Node: Introduction1406 -Node: Invoking lziprecover5398 -Ref: --trailing-error6265 -Ref: range-format8644 -Ref: --reproduce8979 -Ref: --repair13278 -Node: Data safety25584 -Node: Merging with a backup27572 -Node: Reproducing a mailbox28836 -Node: Repairing one byte31337 -Node: Merging files33402 -Ref: performance-of-merge34572 -Ref: ddrescue-example36181 -Node: Reproducing one sector37468 -Ref: performance-of-reproduce41351 -Ref: ddrescue-example244026 -Node: Tarlz46446 -Node: File names50110 -Node: File format50567 -Node: Trailing data53258 -Node: Examples56499 -Ref: concat-example57075 -Node: Unzcrash58467 -Node: Problems64739 -Node: Concept index65291 +Node: Introduction1408 +Node: Invoking lziprecover5414 +Ref: --trailing-error6361 +Ref: range-format8793 +Ref: --reproduce9128 +Ref: --byte-repair13424 +Node: Data safety27441 +Node: Merging with a backup29429 +Node: Reproducing a mailbox30692 +Node: Repairing one byte33146 +Node: Merging files35211 +Ref: performance-of-merge36381 +Ref: ddrescue-example37990 +Node: Reproducing one sector39277 +Ref: performance-of-reproduce43163 +Ref: ddrescue-example245837 +Node: Tarlz48257 +Node: File names51921 +Node: File format52383 +Node: Trailing data55070 +Node: Examples58388 +Ref: concat-example58963 +Node: Unzcrash60355 +Node: Problems66633 +Node: Concept index67185  End Tag Table diff --git a/doc/lziprecover.texi b/doc/lziprecover.texi index 7b3449e..11a9ed5 100644 --- a/doc/lziprecover.texi +++ b/doc/lziprecover.texi @@ -6,8 +6,8 @@ @finalout @c %**end of header -@set UPDATED 21 January 2022 -@set VERSION 1.23 +@set UPDATED 14 June 2023 +@set VERSION 1.24-pre1 @dircategory Compression @direntry @@ -53,7 +53,7 @@ This manual is for Lziprecover (version @value{VERSION}, @value{UPDATED}). @end menu @sp 1 -Copyright @copyright{} 2009-2022 Antonio Diaz Diaz. +Copyright @copyright{} 2009-2023 Antonio Diaz Diaz. This manual is free documentation: you have unlimited permission to copy, distribute, and modify it. @@ -116,9 +116,9 @@ the beginning is a thing of the past. Compression may be good for long-term archiving. For compressible data, multiple compressed copies may provide redundancy in a more useful form and may have a better chance of surviving intact than one uncompressed copy -using the same amount of storage space. This is specially true if the format -provides recovery capabilities like those of lziprecover, which is able to -find and combine the good parts of several damaged copies. +using the same amount of storage space. This is especially true if the +format provides recovery capabilities like those of lziprecover, which is +able to find and combine the good parts of several damaged copies. Lziprecover is able to recover or decompress files produced by any of the compressors in the lzip family: lzip, plzip, minilzip/lzlib, clzip, and @@ -135,7 +135,7 @@ data in all members of the file can be extracted with the following command at the end of each damaged member): @example -lziprecover -cd -i file.lz > file +lziprecover -cd --ignore-errors file.lz > file @end example When recovering data, lziprecover takes as arguments the names of the @@ -169,7 +169,8 @@ When decompressing or testing, a hyphen @samp{-} used as a @var{file} argument means standard input. It can be mixed with other @var{files} and is read just once, the first time it appears in the command line. If no file names are specified, lziprecover decompresses from standard input to -standard output. +standard output. Remember to prepend @file{./} to any file name beginning +with a hyphen, or use @samp{--}. lziprecover supports the following @uref{http://www.nongnu.org/arg-parser/manual/arg_parser_manual.html#Argument-syntax,,options}: @@ -215,21 +216,21 @@ lzma-alone file as follows: @item -c @itemx --stdout Write decompressed data to standard output; keep input files unchanged. This -option (or @samp{-o}) is needed when reading from a named pipe (fifo) or +option (or @option{-o}) is needed when reading from a named pipe (fifo) or from a device. Use it also to recover as much of the decompressed data as -possible when decompressing a corrupt file. @samp{-c} overrides @samp{-o}. -@samp{-c} has no effect when merging, removing members, repairing, +possible when decompressing a corrupt file. @option{-c} overrides @option{-o}. +@option{-c} has no effect when merging, removing members, repairing, reproducing, splitting, testing or listing. @item -d @itemx --decompress -Decompress the files specified. If a file does not exist, can't be opened, -or the destination file already exists and @samp{--force} has not been -specified, lziprecover continues decompressing the rest of the files and -exits with error status 1. If a file fails to decompress, or is a terminal, -lziprecover exits immediately with error status 2 without decompressing the -rest of the files. A terminal is considered an uncompressed file, and -therefore invalid. +Decompress the files specified. The integrity of the files specified is +checked. If a file does not exist, can't be opened, or the destination file +already exists and @option{--force} has not been specified, lziprecover +continues decompressing the rest of the files and exits with error status 1. +If a file fails to decompress, or is a terminal, lziprecover exits +immediately with error status 2 without decompressing the rest of the files. +A terminal is considered an uncompressed file, and therefore invalid. @item -D @var{range} @itemx --range-decompress=@var{range} @@ -238,7 +239,7 @@ Decompress only a range of bytes starting at decompressed byte position at 0. This option provides random access to the data in multimember files; it only decompresses the members containing the desired data. In order to guarantee the correctness of the data produced, all members containing any -part of the desired data are decompressed and their integrity is verified. +part of the desired data are decompressed and their integrity is checked. @anchor{range-format} Four formats of @var{range} are recognized, @samp{@var{begin}}, @@ -246,7 +247,7 @@ Four formats of @var{range} are recognized, @samp{@var{begin}}, @samp{,@var{size}}. If only @var{begin} is specified, @var{end} is taken as the end of the file. If only @var{size} is specified, @var{begin} is taken as the beginning of the file. The bytes produced are sent to standard output -unless the option @samp{--output} is used. +unless the option @option{--output} is used. @anchor{--reproduce} @item -e @@ -262,16 +263,16 @@ of the reproduce mode. @item --lzip-level=@var{digit}|a|m[@var{length}] Try only the given compression level or match length limit when reproducing -a zeroed sector. @samp{--lzip-level=a} tries all the compression levels -@w{(0 to 9)}, while @samp{--lzip-level=m} tries all the match length limits +a zeroed sector. @option{--lzip-level=a} tries all the compression levels +@w{(0 to 9)}, while @option{--lzip-level=m} tries all the match length limits @w{(5 to 273)}. @item --lzip-name=@var{name} -Set the name of the lzip executable used by @samp{--reproduce}. If -@samp{--lzip-name} is not specified, @samp{lzip} is used. +Set the name of the lzip executable used by @option{--reproduce}. If +@option{--lzip-name} is not specified, @samp{lzip} is used. @item --reference-file=@var{file} -Set the reference file used by @samp{--reproduce}. It must contain the +Set the reference file used by @option{--reproduce}. It must contain the uncompressed data corresponding to the missing compressed data of the zeroed sector, plus some context data before and after them. @@ -281,7 +282,7 @@ Force overwrite of output files. @item -i @itemx --ignore-errors -Make @samp{--decompress}, @samp{--test}, and @samp{--range-decompress} +Make @option{--decompress}, @option{--test}, and @option{--range-decompress} ignore format and data errors and continue decompressing the remaining members in the file; keep input files unchanged. For example, the commands @w{@samp{lziprecover -cd -i file.lz > file}} or @@ -293,8 +294,8 @@ range decompressed may be smaller than the range requested, because of the errors. The exit status is set to 0 unless other errors are found (I/O errors, for example). -Make @samp{--list}, @samp{--dump}, @samp{--remove}, and @samp{--strip} -ignore format errors. The sizes of the members with errors (specially the +Make @option{--list}, @option{--dump}, @option{--remove}, and @option{--strip} +ignore format errors. The sizes of the members with errors (especially the last) may be wrong. @item -k @@ -306,18 +307,18 @@ Keep (don't delete) input files during decompression. Print the uncompressed size, compressed size, and percentage saved of the files specified. Trailing data are ignored. The values produced are correct even for multimember files. If more than one file is given, a final line -containing the cumulative sizes is printed. With @samp{-v}, the dictionary +containing the cumulative sizes is printed. With @option{-v}, the dictionary size, the number of members in the file, and the amount of trailing data (if -any) are also printed. With @samp{-vv}, the positions and sizes of each -member in multimember files are also printed. With @samp{-i}, format errors -are ignored, and with @samp{-ivv}, gaps between members are shown. The -member numbers shown coincide with the file numbers produced by @samp{--split}. +any) are also printed. With @option{-vv}, the positions and sizes of each +member in multimember files are also printed. With @option{-i}, format errors +are ignored, and with @option{-ivv}, gaps between members are shown. The +member numbers shown coincide with the file numbers produced by @option{--split}. If any file is damaged, does not exist, can't be opened, or is not regular, -the final exit status will be @w{> 0}. @samp{-lq} can be used to verify +the final exit status will be @w{> 0}. @option{-lq} can be used to check quickly (without decompressing) the structural integrity of the files -specified. (Use @samp{--test} to verify the data integrity). @samp{-alq} -additionally verifies that none of the files specified contain trailing data. +specified. (Use @option{--test} to check the data integrity). @option{-alq} +additionally checks that none of the files specified contain trailing data. @item -m @itemx --merge @@ -333,19 +334,19 @@ Place the output into @var{file} instead of into @samp{@var{file}_fixed.lz}. If splitting, the names of the files produced are in the form @samp{rec01@var{file}}, @samp{rec02@var{file}}, etc. -If decompressing, or converting lzma-alone files, and @samp{-c} has not been +If decompressing, or converting lzma-alone files, and @option{-c} has not been also specified, write the decompressed or converted output to @var{file}; -keep input files unchanged. This option (or @samp{-c}) is needed when +keep input files unchanged. This option (or @option{-c}) is needed when reading from a named pipe (fifo) or from a device. @w{@samp{-o -}} is -equivalent to @samp{-c}. @samp{-o} has no effect when testing or listing. +equivalent to @option{-c}. @option{-o} has no effect when testing or listing. @item -q @itemx --quiet Quiet operation. Suppress all messages. -@anchor{--repair} +@anchor{--byte-repair} @item -R -@itemx --repair +@itemx --byte-repair Try to repair a @var{file} with small errors (up to one single-byte error per member). If successful, a repaired copy is written to the file @samp{@var{file}_fixed.lz}. @var{file} is not modified at all. The exit @@ -375,11 +376,11 @@ depending on the number of members in @var{file}. @itemx --test Check integrity of the files specified, but don't decompress them. This really performs a trial decompression and throws away the result. Use it -together with @samp{-v} to see information about the files. If a file +together with @option{-v} to see information about the files. If a file fails the test, does not exist, can't be opened, or is a terminal, lziprecover -continues checking the rest of the files. A final diagnostic is shown at -verbosity level 1 or higher if any file fails the test when testing -multiple files. +continues testing the rest of the files. A final diagnostic is shown at +verbosity level 1 or higher if any file fails the test when testing multiple +files. @item -v @itemx --verbose @@ -389,38 +390,33 @@ verbosity level, showing status, compression ratio, dictionary size, trailer contents (CRC, data size, member size), and up to 6 bytes of trailing data (if any) both in hexadecimal and as a string of printable ASCII characters.@* -Two or more @samp{-v} options show the progress of decompression.@* +Two or more @option{-v} options show the progress of decompression.@* In other modes, increasing verbosity levels show final status, progress of operations, and extra information (for example, the failed areas). -@item --loose-trailing -When decompressing, testing, or listing, allow trailing data whose first -bytes are so similar to the magic bytes of a lzip header that they can -be confused with a corrupt header. Use this option if a file triggers a -"corrupt header" error and the cause is not indeed a corrupt header. - -@item --dump=[@var{member_list}][:damaged][:tdata] -Dump the members listed, the damaged members (if any), or the trailing -data (if any) of one or more regular multimember files to standard -output, or to a file if the option @samp{--output} is used. If more than -one file is given, the elements dumped from all files are concatenated. -If a file does not exist, can't be opened, or is not regular, -lziprecover continues processing the rest of the files. If the dump -fails in one file, lziprecover exits immediately without processing the -rest of the files. Only @samp{--dump=tdata} can write to a terminal. - -The argument to @samp{--dump} is a colon-separated list of the following -element specifiers; a member list (1,3-6), a reverse member list -(r1,3-6), and the strings "damaged" and "tdata" (which may be shortened -to 'd' and 't' respectively). A member list selects the members (or -gaps) listed, whose numbers coincide with those shown by @samp{--list}. -A reverse member list selects the members listed counting from the last -member in the file (r1). Negated versions of both kinds of lists exist -(^1,3-6:r^1,3-6) which selects all the members except those in the list. -The strings "damaged" and "tdata" select the damaged members and the -trailing data respectively. If the same member is selected more than -once, for example by @samp{1:r1} in a single-member file, it is dumped -just once. See the following examples: +@item --dump=[@var{member_list}][:damaged][:empty][:tdata] +Dump the members listed, the damaged members (if any), the empty members (if +any), or the trailing data (if any) of one or more regular multimember files +to standard output, or to a file if the option @option{--output} is used. If +more than one file is given, the elements dumped from all the files are +concatenated. If a file does not exist, can't be opened, or is not regular, +lziprecover continues processing the rest of the files. If the dump fails in +one file, lziprecover exits immediately without processing the rest of the +files. Only @option{--dump=tdata} can write to a terminal. +@option{--dump=damaged} implies @option{--ignore-errors}. + +The argument to @option{--dump} is a colon-separated list of the following +element specifiers; a member list (1,3-6), a reverse member list (r1,3-6), +and the strings "damaged", "empty", and "tdata" (which may be shortened to +'d', 'e', and 't' respectively). A member list selects the members (or gaps) +listed, whose numbers coincide with those shown by @option{--list}. A reverse +member list selects the members listed counting from the last member in the +file (r1). Negated versions of both kinds of lists exist (^1,3-6:r^1,3-6) +which select all the members except those in the list. The strings +"damaged", "empty", and "tdata" select the damaged members, the empty +members (those with a data size = 0), and the trailing data respectively. If +the same member is selected more than once, for example by @samp{1:r1} in a +single-member file, it is dumped just once. See the following examples: @multitable {@code{3,12:damaged:tdata}} {members 3, 12, damaged members, trailing data} @headitem @code{--dump} argument @tab Elements dumped @@ -429,44 +425,71 @@ just once. See the following examples: @item @code{^13,15} @tab all but 13th and 15th members in file @item @code{r^1} @tab all but last member in file @item @code{damaged} @tab all damaged members in file +@item @code{empty} @tab all empty members in file @item @code{tdata} @tab trailing data @item @code{1-5:r1:tdata} @tab members 1 to 5, last member, trailing data @item @code{damaged:tdata} @tab damaged members, trailing data @item @code{3,12:damaged:tdata} @tab members 3, 12, damaged members, trailing data @end multitable -@item --remove=[@var{member_list}][:damaged][:tdata] -Remove the members listed, the damaged members (if any), or the trailing -data (if any) from regular multimember files in place. The date of each -file is preserved if possible. If all members in a file are selected to -be removed, the file is left unchanged and the exit status is set to 2. -If a file does not exist, can't be opened, is not regular, or is left -unchanged, lziprecover continues processing the rest of the files. In case -of I/O error, lziprecover exits immediately without processing the rest of -the files. See @samp{--dump} above for a description of the argument. - -This option may be dangerous even if only the trailing data is being -removed because the file may be corrupt or the trailing data may contain -a forbidden combination of characters. @xref{Trailing data}. It is -advisable to make a backup before attempting the removal. At least -verify that @w{@samp{lzip -cd file.lz | wc -c}} and the uncompressed -size shown by @w{@samp{lzip -l file.lz}} match before attempting the -removal of trailing data. - -@item --strip=[@var{member_list}][:damaged][:tdata] -Copy one or more regular multimember files to standard output (or to a -file if the option @samp{--output} is used), stripping the members -listed, the damaged members (if any), or the trailing data (if any) from -each file. If all members in a file are selected to be stripped, the -trailing data (if any) are also stripped even if @samp{tdata} is not -specified. If more than one file is given, the files are concatenated. -In this case the trailing data are also stripped from all but the last -file even if @samp{tdata} is not specified. If a file does not exist, -can't be opened, or is not regular, lziprecover continues processing the -rest of the files. If a file fails to copy, lziprecover exits -immediately without processing the rest of the files. See @samp{--dump} +@item --remove=[@var{member_list}][:damaged][:empty][:tdata] +Remove the members listed, the damaged members (if any), the empty members +(if any), or the trailing data (if any) from regular multimember files in +place. The date of each file modified is preserved if possible. If all +members in a file are selected to be removed, the file is left unchanged and +the exit status is set to 2. If a file does not exist, can't be opened, is +not regular, or is left unchanged, lziprecover continues processing the rest +of the files. In case of I/O error, lziprecover exits immediately without +processing the rest of the files. See @option{--dump} above for a description +of the argument. + +This option may be dangerous even if only the trailing data are being +removed because the file may be corrupt or the trailing data may contain a +forbidden combination of characters. @xref{Trailing data}. It is safer to +send the output of @option{--strip} to a temporary file, check it, and then +copy it over the original file. But if you prefer @option{--remove} because of +its more efficient in-place removal, it is advisable to make a backup before +attempting the removal. At least check that @w{@samp{lzip -cd file.lz | wc -c}} +and the uncompressed size shown by @w{@samp{lzip -l file.lz}} match before +attempting the removal of trailing data. + +@item --strip=[@var{member_list}][:damaged][:empty][:tdata] +Copy one or more regular multimember files to standard output (or to a file +if the option @option{--output} is used), stripping the members listed, the +damaged members (if any), the empty members (if any), or the trailing data +(if any) from each file. If all members in a file are selected to be +stripped, the trailing data (if any) are also stripped even if @samp{tdata} +is not specified. If more than one file is given, the files are +concatenated. In this case the trailing data are also stripped from all but +the last file even if @samp{tdata} is not specified. If a file does not +exist, can't be opened, or is not regular, lziprecover continues processing +the rest of the files. If a file fails to copy, lziprecover exits +immediately without processing the rest of the files. See @option{--dump} above for a description of the argument. +@item --empty-error +Exit with error status 2 if any empty member is found in the input files. + +@item --marking-error +Exit with error status 2 if the first LZMA byte is non-zero in any member of +the input files. This may be caused by data corruption or by deliberate +insertion of tracking information in the file. Use +@w{@samp{lziprecover --clear-marking}} to clear any such non-zero bytes. + +@item --loose-trailing +When decompressing, testing, or listing, allow trailing data whose first +bytes are so similar to the magic bytes of a lzip header that they can +be confused with a corrupt header. Use this option if a file triggers a +"corrupt header" error and the cause is not indeed a corrupt header. + +@item --clear-marking +Set to zero the first LZMA byte of each member in the files specified. At +verbosity level 1 (-v), print the number of members cleared. The date of +each file modified is preserved if possible. This option exists because the +first byte of the LZMA stream is ignored by the range decoder, and can +therefore be (mis)used to store any value which can then be used as a +watermark to track the path of the compressed payload. + @end table Lziprecover also supports the following debug options (for experts): @@ -486,8 +509,9 @@ nonzero status only in case of fatal error. @itemx --md5sum Print to standard output the MD5 digests of the input @var{files} one per line in the same format produced by the @command{md5sum} tool. Lziprecover -uses MD5 digests to verify the result of some operations. This option allows -the verification of lziprecover's implementation of the MD5 algorithm. +uses MD5 digests to check the result of some operations. This option can be +used to test the correctness of lziprecover's implementation of the MD5 +algorithm. @item -S[@var{value}] @itemx --nrep-stats[=@var{value}] @@ -495,7 +519,7 @@ Compare the frequency of sequences of N repeated bytes of a given @var{value} in the compressed LZMA streams of the input @var{files} with the frequency expected for random data (1 / 2^(8N)). If @var{value} is not specified, print the frequency of repeated sequences of all possible byte -values. Print cumulative data for all files followed by the name of the +values. Print cumulative data for all the files, followed by the name of the first file with the longest sequence. @item -U 1|B@var{size} @@ -516,7 +540,7 @@ stream of the compressed input @var{file} like the command but in memory, and therefore much faster. Testing and comparisons work just like with the argument @samp{1} explained above. -By default @samp{--unzcrash} only prints the interesting cases; CRC +By default @option{--unzcrash} only prints the interesting cases; CRC mismatches, size mismatches, unsupported marker codes, unexpected EOFs, apparently successful decompressions, and decoder errors detected 50_000 or more bytes beyond the byte (or the start of the block) being tested. At @@ -551,34 +575,37 @@ decoder realized that the data contains an error. @xref{range-format}, for a description of @var{range}. @item -Z @var{position},@var{value} -@itemx --debug-repair=@var{position},@var{value} +@itemx --debug-byte-repair=@var{position},@var{value} Load the compressed @var{file} into memory, set the byte at @var{position} -to @var{value}, and then try to repair the error. @xref{--repair}. +to @var{value}, and then try to repair the byte error. @xref{--byte-repair}. @end table -Numbers given as arguments to options may be followed by a multiplier -and an optional @samp{B} for "byte". +Numbers given as arguments to options may be expressed in decimal, +hexadecimal, or octal (using the same syntax as integer constants in C++), +and may be followed by a multiplier and an optional @samp{B} for "byte". Table of SI and binary prefixes (unit multipliers): -@multitable {Prefix} {kilobyte (10^3 = 1000)} {|} {Prefix} {kibibyte (2^10 = 1024)} +@multitable {Prefix} {kilobyte (10^3 = 1000)} {|} {Prefix} {kibibyte (2^10 = 1024)} @item Prefix @tab Value @tab | @tab Prefix @tab Value -@item k @tab kilobyte (10^3 = 1000) @tab | @tab Ki @tab kibibyte (2^10 = 1024) -@item M @tab megabyte (10^6) @tab | @tab Mi @tab mebibyte (2^20) -@item G @tab gigabyte (10^9) @tab | @tab Gi @tab gibibyte (2^30) -@item T @tab terabyte (10^12) @tab | @tab Ti @tab tebibyte (2^40) -@item P @tab petabyte (10^15) @tab | @tab Pi @tab pebibyte (2^50) -@item E @tab exabyte (10^18) @tab | @tab Ei @tab exbibyte (2^60) -@item Z @tab zettabyte (10^21) @tab | @tab Zi @tab zebibyte (2^70) -@item Y @tab yottabyte (10^24) @tab | @tab Yi @tab yobibyte (2^80) +@item k @tab kilobyte (10^3 = 1000) @tab | @tab Ki @tab kibibyte (2^10 = 1024) +@item M @tab megabyte (10^6) @tab | @tab Mi @tab mebibyte (2^20) +@item G @tab gigabyte (10^9) @tab | @tab Gi @tab gibibyte (2^30) +@item T @tab terabyte (10^12) @tab | @tab Ti @tab tebibyte (2^40) +@item P @tab petabyte (10^15) @tab | @tab Pi @tab pebibyte (2^50) +@item E @tab exabyte (10^18) @tab | @tab Ei @tab exbibyte (2^60) +@item Z @tab zettabyte (10^21) @tab | @tab Zi @tab zebibyte (2^70) +@item Y @tab yottabyte (10^24) @tab | @tab Yi @tab yobibyte (2^80) +@item R @tab ronnabyte (10^27) @tab | @tab Ri @tab robibyte (2^90) +@item Q @tab quettabyte (10^30) @tab | @tab Qi @tab quebibyte (2^100) @end multitable @sp 1 -Exit status: 0 for a normal exit, 1 for environmental problems (file not -found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or invalid -input file, 3 for an internal consistency error (e.g., bug) which caused -lziprecover to panic. +Exit status: 0 for a normal exit, 1 for environmental problems +(file not found, invalid command line options, I/O errors, etc), 2 to +indicate a corrupt or invalid input file, 3 for an internal consistency +error (e.g., bug) which caused lziprecover to panic. @node Data safety @@ -636,7 +663,7 @@ only be recovered by an expert, if at all. If you used bzip2, and if the file is large enough to contain more than one compressed data block (usually larger than @w{900 kB} uncompressed), and if no block is damaged in both files, then the data can be manually recovered -by splitting the files with bzip2recover, verifying every block, and then +by splitting the files with bzip2recover, checking every block, and then copying the right blocks in the right order into another file. But if you used lzip, the data can be automatically recovered with @@ -659,12 +686,12 @@ unless some messages have been changed or deleted in the meantime. The new messages added to each backup are usually a small part of the whole mailbox. @verbatim -+========================================================+ -| Older backup containing some messages | -+========================================================+ -+========================================================+================+ -| Newer backup containing the messages above plus some | new messages | -+========================================================+================+ ++============================================+ +| Older backup containing some messages | ++============================================+ ++============================================+========================+ +| Newer backup containing the messages above | plus some new messages | ++============================================+========================+ @end verbatim One day you discover that your mailbox has disappeared because you deleted @@ -687,7 +714,7 @@ combining the good blocks from both backups. But if you used lzip, the whole newer backup can be automatically recovered with @w{@samp{lziprecover --reproduce}} as long as the missing bytes can be recovered from the older backup, even if other messages in the common part -have been changed or deleted. Mailboxes seem to be specially easy to +have been changed or deleted. Mailboxes seem to be especially easy to reproduce. The probability of reproducing a mailbox (@pxref{performance-of-reproduce}) is almost as high as that of merging two identical backups (@pxref{performance-of-merge}). @@ -852,7 +879,7 @@ feeding the concatenated data to the same version of lzip that created the file. For this to work, a reference file is required containing the uncompressed data corresponding to the missing compressed data of the zeroed sector, plus some context data before and after them. It is possible to -recover a large file using just a few KB of reference data. +recover a large file using just a few kB of reference data. The difficult part is finding a suitable reference file. It must contain the exact data required (possibly mixed with other data). Containing similar @@ -886,9 +913,9 @@ Testing sectors of size 512 at file positions 65536 to 66047 foo: Match found at offset 296892 Reproduction succeeded at pos 65536 - 1 sectors tested - 1 reproductions returned with zero status - all comparisons passed + 1 sectors tested + 1 reproductions returned with zero status + all comparisons passed @end example Using @samp{foo} as reference file guarantees that any zeroed sector in @@ -923,8 +950,8 @@ overhead. It uses basic ustar headers, and only adds extended pax headers when they are required. @anchor{performance-of-reproduce} -@section Performance of @samp{--reproduce} -Reproduce mode is specially useful when recovering a corrupt backup (or a +@section Performance of @option{--reproduce} +Reproduce mode is especially useful when recovering a corrupt backup (or a corrupt source tarball) that is part of a series. Usually only a small fraction of the data changes from one backup to the next or from one version of a source tarball to the next. This makes sometimes possible to reproduce @@ -957,10 +984,11 @@ real backups of my own working directory: @end multitable Note that the "performance of reproduce" is a probability, not a partial -recovery. The data is either recovered fully (with the probability X shown +recovery. The data are either recovered fully (with the probability X shown in the last column of the tables above) or not recovered at all (with probability @w{1 - X}). +@noindent Example 1: Recover a damaged source tarball with a zeroed sector of 512 bytes at file position 1019904, using as reference another source tarball for a different version of the software. @@ -1136,11 +1164,11 @@ archive contains the end-of-file blocks. @chapter Names of the files produced by lziprecover @cindex file names -The name of the fixed file produced by @samp{--merge} and @samp{--repair} is -made by appending the string @samp{_fixed.lz} to the original file name. If -the original file name ends with one of the extensions @samp{.tar.lz}, -@samp{.lz}, or @samp{.tlz}, the string @samp{_fixed} is inserted before the -extension. +The name of the fixed file produced by @option{--byte-repair} and +@option{--merge} is made by appending the string @samp{_fixed.lz} to the +original file name. If the original file name ends with one of the +extensions @samp{.tar.lz}, @samp{.lz}, or @samp{.tlz}, the string +@samp{_fixed} is inserted before the extension. @node File format @@ -1224,10 +1252,10 @@ Size of the original uncompressed data. @item Member size (8 bytes) Total size of the member, including header and trailer. This field acts -as a distributed index, allows the verification of stream integrity, and +as a distributed index, improves the checking of stream integrity, and facilitates the safe recovery of undamaged members from multimember files. -Member size should be limited to @w{2 PiB} to prevent the data size field -from overflowing. +Lzip limits the member size to @w{2 PiB} to prevent the data size field from +overflowing. @end table @@ -1246,12 +1274,13 @@ example when writing to a tape. It is safe to append any amount of padding zero bytes to a lzip file. @item -Useful data added by the user; a cryptographically secure hash, a -description of file contents, etc. It is safe to append any amount of -text to a lzip file as long as none of the first four bytes of the text -match the corresponding byte in the string "LZIP", and the text does not -contain any zero bytes (null characters). Nonzero bytes and zero bytes -can't be safely mixed in trailing data. +Useful data added by the user; an "End Of File" string (to check that the +file has not been truncated), a cryptographically secure hash, a description +of file contents, etc. It is safe to append any amount of text to a lzip +file as long as none of the first four bytes of the text match the +corresponding byte in the string "LZIP", and the text does not contain any +zero bytes (null characters). Nonzero bytes and zero bytes can't be safely +mixed in trailing data. @item Garbage added by some not totally successful copy operation. @@ -1269,7 +1298,7 @@ integrity information itself. Therefore it can be considered to be below the noise level. Additionally, the test used by lziprecover to discriminate trailing data from a corrupt header has a Hamming distance (HD) of 3, and the 3 bit flips must happen in different magic bytes for the test to -fail. In any case, the option @samp{--trailing-error} guarantees that +fail. In any case, the option @option{--trailing-error} guarantees that any corrupt header will be detected. @end itemize @@ -1280,7 +1309,7 @@ possible in the presence of trailing data. Trailing data can be safely ignored in most cases. In some cases, like that of user-added data, they are expected to be ignored. In those cases where a file containing trailing data must be rejected, the option -@samp{--trailing-error} can be used. @xref{--trailing-error}. +@option{--trailing-error} can be used. @xref{--trailing-error}. Lziprecover facilitates the management of metadata stored as trailing data in lzip files. See the following examples: @@ -1301,7 +1330,7 @@ lziprecover --remove=tdata file.lz @sp 1 @noindent -Example 2: Add and verify a cryptographically secure hash. (This may be +Example 2: Add and check a cryptographically secure hash. (This may be convenient, but a separate copy of the hash must be kept in a safe place to guarantee that both file and hash have not been maliciously replaced). @@ -1335,7 +1364,7 @@ lziprecover -d file.lz @sp 1 @noindent -Example 3: Verify the integrity of the compressed file @samp{file.lz} and +Example 3: Check the integrity of the compressed file @samp{file.lz} and show status. @example @@ -1356,7 +1385,7 @@ Do this instead You may also concatenate the compressed files like this lziprecover --strip=tdata file1.lz file2.lz file3.lz > file123.lz Or keeping the trailing data of the last file like this - lziprecover --strip=damaged file1.lz file2.lz file3.lz > file123.lz + lziprecover --strip=empty file1.lz file2.lz file3.lz > file123.lz @end example @sp 1 @@ -1379,7 +1408,7 @@ lziprecover -D 10000-15000 file.lz @sp 1 @noindent -Example 7: Repair small errors in the file @samp{file.lz}. (Indented lines +Example 7: Repair a corrupt byte in the file @samp{file.lz}. (Indented lines are abridged diagnostic messages from lziprecover). @example @@ -1416,11 +1445,11 @@ decompresses it, increasing 256 times each byte of the compressed data, so as to test all possible one-byte errors. Note that it may take years or even centuries to test all possible one-byte errors in a large file (tens of MB). -If the option @samp{--block} is given, unzcrash reads the file specified and +If the option @option{--block} is given, unzcrash reads the file specified and then repeatedly decompresses it, setting all bytes in each successive block to the value given, so as to test all possible full sector errors. -If the option @samp{--truncate} is given, unzcrash reads the file specified +If the option @option{--truncate} is given, unzcrash reads the file specified and then repeatedly decompresses it, truncating the file to increasing lengths, so as to test all possible truncation points. @@ -1448,7 +1477,7 @@ to understand the format being tested. For example the @samp{zcmp} provided by @uref{http://www.nongnu.org/zutils/manual/zutils_manual.html#Zcmp,,zutils}. If the @samp{zcmp} program used does not understand the format being tested, all the comparisons will fail because the compressed files will be compared -without being decompressed first. Use @samp{--zcmp=false} to disable +without being decompressed first. Use @option{--zcmp=false} to disable comparisons. @ifnothtml @xref{Zcmp,,,zutils}. @@ -1499,12 +1528,12 @@ The number of N-bit errors per byte (N = 1 to 8) is: Test block errors of given @var{size}, simulating a whole sector I/O error. @var{size} defaults to 512 bytes. @var{value} defaults to 0. By default, only contiguous, non-overlapping blocks are tested, but this may be changed -with the option @samp{--delta}. +with the option @option{--delta}. @item -d @var{n} @itemx --delta=@var{n} Test one byte, block, or truncation size every @var{n} bytes. If -@samp{--delta} is not specified, unzcrash tests all the bytes, +@option{--delta} is not specified, unzcrash tests all the bytes, non-overlapping blocks, or truncation sizes. Values of @var{n} smaller than the block size will result in overlapping blocks. (Which is convenient for testing because there are usually too few non-overlapping blocks in a file). @@ -1520,9 +1549,9 @@ value of the byte at @var{position}. This option can be used to run tests with a changed dictionary size, for example. @item -n -@itemx --no-verify -Skip initial verification of @var{file} and @samp{zcmp}. May speed up things -a lot when testing many (or large) known good files. +@itemx --no-check +Skip initial test of @var{file} and @samp{zcmp}. May speed up things a lot +when testing many (or large) known good files. @item -p @var{bytes} @itemx --position=@var{bytes} @@ -1536,13 +1565,13 @@ Quiet operation. Suppress all messages. @item -s @var{bytes} @itemx --size=@var{bytes} Number of byte positions to test. If not specified, the rest of the file -is tested (from @samp{--position} to end of file). Negative values are +is tested (from @option{--position} to end of file). Negative values are relative to the rest of the file. @item -t @itemx --truncate Test all possible truncation points in the range specified by -@samp{--position} and @samp{--size}. +@option{--position} and @option{--size}. @item -v @itemx --verbose @@ -1551,17 +1580,17 @@ Verbose mode. @item -z @itemx --zcmp= Set zcmp command name and options. Defaults to @samp{zcmp}. Use -@samp{--zcmp=false} to disable comparisons. If testing a decompressor +@option{--zcmp=false} to disable comparisons. If testing a decompressor different from the one used by default by zcmp, it is needed to force unzcrash and zcmp to use the same decompressor with a command like @w{@samp{unzcrash --zcmp='zcmp --lz=plzip' 'plzip -t' @var{file}}} @end table -Exit status: 0 for a normal exit, 1 for environmental problems (file not -found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or -invalid input file, 3 for an internal consistency error (e.g., bug) which -caused unzcrash to panic. +Exit status: 0 for a normal exit, 1 for environmental problems +(file not found, invalid command line options, I/O errors, etc), 2 to +indicate a corrupt or invalid input file, 3 for an internal consistency +error (e.g., bug) which caused unzcrash to panic. @node Problems diff --git a/dump_remove.cc b/dump_remove.cc index 37f7f00..92b5e3d 100644 --- a/dump_remove.cc +++ b/dump_remove.cc @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2022 Antonio Diaz Diaz. + Copyright (C) 2009-2023 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -32,14 +32,12 @@ #include "lzip_index.h" -// If strip is false, dump to outfd members/gaps/tdata in member_list. -// If strip is true, dump to outfd members/gaps/tdata not in member_list. +/* If strip is false, dump to outfd members/gaps/tdata in member_list. + If strip is true, dump to outfd members/gaps/tdata not in member_list. */ int dump_members( const std::vector< std::string > & filenames, const std::string & default_output_filename, - const Member_list & member_list, const bool force, - bool ignore_errors, bool ignore_trailing, - const bool loose_trailing, const bool strip, - const bool to_stdout ) + const Cl_options & cl_opts, const Member_list & member_list, + const bool force, const bool strip, const bool to_stdout ) { if( to_stdout || default_output_filename.empty() ) outfd = STDOUT_FILENO; else @@ -48,14 +46,13 @@ int dump_members( const std::vector< std::string > & filenames, set_signal_handler(); if( !open_outstream( force, false, false, false ) ) return 1; } - if( ( strip || !member_list.tdata || member_list.damaged || member_list.range() ) && + if( ( strip || !member_list.tdata || member_list.damaged || + member_list.empty || member_list.range() ) && !check_tty_out() ) return 1; // check tty except for --dump=tdata unsigned long long copied_size = 0, stripped_size = 0; unsigned long long copied_tsize = 0, stripped_tsize = 0; long members = 0, smembers = 0; int files = 0, tfiles = 0, retval = 0; - if( member_list.damaged ) ignore_errors = true; - if( member_list.tdata ) ignore_trailing = true; bool stdin_used = false; for( unsigned i = 0; i < filenames.size(); ++i ) { @@ -68,8 +65,8 @@ int dump_members( const std::vector< std::string > & filenames, open_instream( input_filename, &in_stats, false, true ); if( infd < 0 ) { set_retval( retval, 1 ); continue; } - const Lzip_index lzip_index( infd, ignore_trailing, loose_trailing, - ignore_errors, ignore_errors ); + const Lzip_index lzip_index( infd, cl_opts, cl_opts.ignore_errors, + cl_opts.ignore_errors ); if( lzip_index.retval() != 0 ) { show_file_error( input_filename, lzip_index.error().c_str() ); @@ -77,7 +74,7 @@ int dump_members( const std::vector< std::string > & filenames, close( infd ); continue; } - if( !safe_seek( infd, 0 ) ) cleanup_and_fail( 1 ); + if( !safe_seek( infd, 0, input_filename ) ) cleanup_and_fail( 1 ); const long blocks = lzip_index.blocks( false ); // not counting tdata long long stream_pos = 0; // first pos not yet read from file long gaps = 0; @@ -92,7 +89,7 @@ int dump_members( const std::vector< std::string > & filenames, member_list.includes( j + gaps, blocks ); if( in == !strip ) { - if( !safe_seek( infd, stream_pos ) || + if( !safe_seek( infd, stream_pos, input_filename ) || !copy_file( infd, outfd, mb.pos() - stream_pos ) ) cleanup_and_fail( 1 ); copied_size += mb.pos() - stream_pos; ++members; @@ -101,14 +98,16 @@ int dump_members( const std::vector< std::string > & filenames, ++gaps; } bool in = member_list.includes( j + gaps, blocks ); // member + if( !in && member_list.empty && lzip_index.dblock( j ).size() == 0 ) + in = true; if( !in && member_list.damaged ) { - if( !safe_seek( infd, mb.pos() ) ) cleanup_and_fail( 1 ); + if( !safe_seek( infd, mb.pos(), input_filename ) ) cleanup_and_fail( 1 ); in = ( test_member_from_file( infd, mb.size() ) != 0 ); // damaged } if( in == !strip ) { - if( !safe_seek( infd, mb.pos() ) || + if( !safe_seek( infd, mb.pos(), input_filename ) || !copy_file( infd, outfd, mb.size() ) ) cleanup_and_fail( 1 ); copied_size += mb.size(); ++members; } @@ -128,7 +127,7 @@ int dump_members( const std::vector< std::string > & filenames, if( member_list.tdata == !strip && trailing_size > 0 && ( !strip || i + 1 >= filenames.size() ) ) // strip all but last { - if( !safe_seek( infd, cdata_size ) || + if( !safe_seek( infd, cdata_size, input_filename ) || !copy_file( infd, outfd, trailing_size ) ) cleanup_and_fail( 1 ); copied_tsize += trailing_size; } @@ -140,7 +139,7 @@ int dump_members( const std::vector< std::string > & filenames, { if( !strip ) { - if( member_list.damaged || member_list.range() ) + if( member_list.damaged || member_list.empty || member_list.range() ) std::fprintf( stderr, "%llu bytes dumped from %ld %s from %d %s.\n", copied_size, members, ( members == 1 ) ? "member" : "members", @@ -150,7 +149,7 @@ int dump_members( const std::vector< std::string > & filenames, } else { - if( member_list.damaged || member_list.range() ) + if( member_list.damaged || member_list.empty || member_list.range() ) std::fprintf( stderr, "%llu bytes stripped from %ld %s from %d %s.\n", stripped_size, smembers, ( smembers == 1 ) ? "member" : "members", @@ -164,15 +163,14 @@ int dump_members( const std::vector< std::string > & filenames, } +/* Remove members, tdata from files in place by opening two descriptors for + each file. */ int remove_members( const std::vector< std::string > & filenames, - const Member_list & member_list, bool ignore_errors, - bool ignore_trailing, const bool loose_trailing ) + const Cl_options & cl_opts, const Member_list & member_list ) { unsigned long long removed_size = 0, removed_tsize = 0; long members = 0; int files = 0, tfiles = 0, retval = 0; - if( member_list.damaged ) ignore_errors = true; - if( member_list.tdata ) ignore_trailing = true; for( unsigned i = 0; i < filenames.size(); ++i ) { const char * const filename = filenames[i].c_str(); @@ -180,8 +178,8 @@ int remove_members( const std::vector< std::string > & filenames, const int infd = open_instream( filename, &in_stats, false, true ); if( infd < 0 ) { set_retval( retval, 1 ); continue; } - const Lzip_index lzip_index( infd, ignore_trailing, loose_trailing, - ignore_errors, ignore_errors ); + const Lzip_index lzip_index( infd, cl_opts, cl_opts.ignore_errors, + cl_opts.ignore_errors ); if( lzip_index.retval() != 0 ) { show_file_error( filename, lzip_index.error().c_str() ); @@ -192,7 +190,7 @@ int remove_members( const std::vector< std::string > & filenames, const int fd = open_truncable_stream( filename, &dummy_stats ); if( fd < 0 ) { close( infd ); set_retval( retval, 1 ); continue; } - if( !safe_seek( infd, 0 ) ) return 1; + if( !safe_seek( infd, 0, filename ) ) return 1; const long blocks = lzip_index.blocks( false ); // not counting tdata long long stream_pos = 0; // first pos not yet written to file long gaps = 0; @@ -207,8 +205,8 @@ int remove_members( const std::vector< std::string > & filenames, if( !member_list.damaged && !member_list.includes( j + gaps, blocks ) ) { if( stream_pos != prev_end && - ( !safe_seek( infd, prev_end ) || - !safe_seek( fd, stream_pos ) || + ( !safe_seek( infd, prev_end, filename ) || + !safe_seek( fd, stream_pos, filename ) || !copy_file( infd, fd, mb.pos() - prev_end ) ) ) { error = true; set_retval( retval, 1 ); break; } stream_pos += mb.pos() - prev_end; @@ -217,17 +215,19 @@ int remove_members( const std::vector< std::string > & filenames, ++gaps; } bool in = member_list.includes( j + gaps, blocks ); // member + if( !in && member_list.empty && lzip_index.dblock( j ).size() == 0 ) + in = true; if( !in && member_list.damaged ) { - if( !safe_seek( infd, mb.pos() ) ) + if( !safe_seek( infd, mb.pos(), filename ) ) { error = true; set_retval( retval, 1 ); break; } in = ( test_member_from_file( infd, mb.size() ) != 0 ); // damaged } if( !in ) { if( stream_pos != mb.pos() && - ( !safe_seek( infd, mb.pos() ) || - !safe_seek( fd, stream_pos ) || + ( !safe_seek( infd, mb.pos(), filename ) || + !safe_seek( fd, stream_pos, filename ) || !copy_file( infd, fd, mb.size() ) ) ) { error = true; set_retval( retval, 1 ); break; } stream_pos += mb.size(); @@ -249,8 +249,8 @@ int remove_members( const std::vector< std::string > & filenames, if( !member_list.tdata ) // copy trailing data { if( stream_pos != cdata_size && - ( !safe_seek( infd, cdata_size ) || - !safe_seek( fd, stream_pos ) || + ( !safe_seek( infd, cdata_size, filename ) || + !safe_seek( fd, stream_pos, filename ) || !copy_file( infd, fd, trailing_size ) ) ) { close( fd ); close( infd ); set_retval( retval, 1 ); break; } stream_pos += trailing_size; @@ -279,7 +279,7 @@ int remove_members( const std::vector< std::string > & filenames, } if( verbosity >= 1 ) { - if( member_list.damaged || member_list.range() ) + if( member_list.damaged || member_list.empty || member_list.range() ) std::fprintf( stderr, "%llu bytes removed from %ld %s from %d %s.\n", removed_size, members, ( members == 1 ) ? "member" : "members", @@ -290,3 +290,71 @@ int remove_members( const std::vector< std::string > & filenames, } return retval; } + + +/* Set to zero in place the first LZMA byte of each member in each file by + opening one rw descriptor for each file. */ +int clear_marking( const std::vector< std::string > & filenames, + const Cl_options & cl_opts ) + { + long cleared_members = 0; + int files = 0, retval = 0; + for( unsigned i = 0; i < filenames.size(); ++i ) + { + const char * const filename = filenames[i].c_str(); + struct stat in_stats; + const int fd = open_truncable_stream( filename, &in_stats ); + if( fd < 0 ) { set_retval( retval, 1 ); continue; } + + const Lzip_index lzip_index( fd, cl_opts, cl_opts.ignore_errors, + cl_opts.ignore_errors ); + if( lzip_index.retval() != 0 ) + { + show_file_error( filename, lzip_index.error().c_str() ); + set_retval( retval, lzip_index.retval() ); + close( fd ); + continue; + } + + enum { bufsize = Lzip_header::size + 1 }; + uint8_t header_buf[bufsize]; + const uint8_t * const p = header_buf; // keep gcc 6.1.0 quiet + const Lzip_header & header = *(const Lzip_header *)p; + uint8_t * const mark = header_buf + Lzip_header::size; + bool write_attempted = false; + for( long j = 0; j < lzip_index.members(); ++j ) // clear the members + { + const Block & mb = lzip_index.mblock( j ); + if( seek_read( fd, header_buf, bufsize, mb.pos() ) != bufsize ) + { show_file_error( filename, "Error reading member header", errno ); + set_retval( retval, 1 ); break; } + if( !header.check( cl_opts.ignore_errors ) ) + { show_file_error( filename, "Member header became corrupt as we read it." ); + set_retval( retval, 2 ); break; } + if( *mark == 0 ) continue; + *mark = 0; write_attempted = true; + if( seek_write( fd, mark, 1, mb.pos() + Lzip_header::size ) != 1 ) + { show_file_error( filename, "Error writing to file", errno ); + set_retval( retval, 1 ); break; } + ++cleared_members; + } + if( close( fd ) != 0 ) + { + show_file_error( filename, "Error closing file", errno ); + set_retval( retval, 1 ); break; + } + if( write_attempted ) + { + struct utimbuf t; + t.actime = in_stats.st_atime; + t.modtime = in_stats.st_mtime; + utime( filename, &t ); + ++files; + } + } + if( verbosity >= 1 ) + std::fprintf( stderr, "%lu %s cleared in %d %s.\n", cleared_members, + ( cleared_members == 1 ) ? "member" : "members", + files, ( files == 1 ) ? "file" : "files" ); + return retval; + } diff --git a/list.cc b/list.cc index 6cc5aab..5a3a13a 100644 --- a/list.cc +++ b/list.cc @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2022 Antonio Diaz Diaz. + Copyright (C) 2009-2023 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -48,8 +48,7 @@ void list_line( const unsigned long long uncomp_size, int list_files( const std::vector< std::string > & filenames, - const bool ignore_errors, - const bool ignore_trailing, const bool loose_trailing ) + const Cl_options & cl_opts ) { unsigned long long total_comp = 0, total_uncomp = 0; int files = 0, retval = 0; @@ -67,8 +66,8 @@ int list_files( const std::vector< std::string > & filenames, open_instream( input_filename, &in_stats, false, true ); if( infd < 0 ) { set_retval( retval, 1 ); continue; } - const Lzip_index lzip_index( infd, ignore_trailing, loose_trailing, - ignore_errors, ignore_errors ); + const Lzip_index lzip_index( infd, cl_opts, cl_opts.ignore_errors, + cl_opts.ignore_errors ); close( infd ); if( lzip_index.retval() != 0 ) { diff --git a/lunzcrash.cc b/lunzcrash.cc index 577d355..6267ce8 100644 --- a/lunzcrash.cc +++ b/lunzcrash.cc @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2022 Antonio Diaz Diaz. + Copyright (C) 2009-2023 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -37,31 +37,31 @@ namespace { -bool verify_member( const uint8_t * const mbuffer, const long long msize, - const unsigned dictionary_size, const char * const name, - uint8_t digest[16] ) +bool check_member( const uint8_t * const mbuffer, const long msize, + const unsigned dictionary_size, const char * const name, + md5_type & digest ) { MD5SUM md5sum; LZ_mtester mtester( mbuffer, msize, dictionary_size, -1, &md5sum ); if( mtester.test_member() != 0 || !mtester.finished() ) - { show_file_error( name, "Error verifying input file." ); return false; } + { show_file_error( name, "Error checking input file." ); return false; } md5sum.md5_finish( digest ); return true; } -bool compare_member( const uint8_t * const mbuffer, const long long msize, +bool compare_member( const uint8_t * const mbuffer, const long msize, const unsigned dictionary_size, - const long long byte_pos, const uint8_t digest[16] ) + const long long byte_pos, const md5_type & digest ) { MD5SUM md5sum; LZ_mtester mtester( mbuffer, msize, dictionary_size, -1, &md5sum ); bool error = ( mtester.test_member() != 0 || !mtester.finished() ); if( !error ) { - uint8_t new_digest[16]; + md5_type new_digest; md5sum.md5_finish( new_digest ); - if( std::memcmp( digest, new_digest, 16 ) != 0 ) error = true; + if( digest != new_digest ) error = true; } if( error && verbosity >= 0 ) std::printf( "byte %llu comparison failed\n", byte_pos ); @@ -75,14 +75,14 @@ int test_member_rest( const LZ_mtester & master, uint8_t * const buffer2, { LZ_mtester mtester( master ); // tester with external buffer mtester.duplicate_buffer( buffer2 ); - int result = mtester.test_member( LLONG_MAX, LLONG_MAX, stdout, byte_pos ); + int result = mtester.test_member( LONG_MAX, LLONG_MAX, stdout, byte_pos ); if( result == 0 && !mtester.finished() ) result = -1; // false negative if( result != 0 ) *failure_posp = mtester.member_position(); return result; } -long next_pct_pos( const Lzip_index & lzip_index, const int i, const int pct, +long next_pct_pos( const Lzip_index & lzip_index, const long i, const int pct, const int sector_size = 0 ) { if( pct <= 0 ) return 0; @@ -103,13 +103,14 @@ long next_pct_pos( const Lzip_index & lzip_index, const int i, const int pct, /* Test 1-bit errors in LZMA streams in file. Unless verbosity >= 1, print only the bytes with interesting results. */ -int lunzcrash_bit( const char * const input_filename ) +int lunzcrash_bit( const char * const input_filename, + const Cl_options & cl_opts ) { struct stat in_stats; // not used const int infd = open_instream( input_filename, &in_stats, false, true ); if( infd < 0 ) return 1; - const Lzip_index lzip_index( infd, true, true ); + const Lzip_index lzip_index( infd, cl_opts ); if( lzip_index.retval() != 0 ) { show_file_error( input_filename, lzip_index.error().c_str() ); return lzip_index.retval(); } @@ -122,12 +123,12 @@ int lunzcrash_bit( const char * const input_filename ) { const long long mpos = lzip_index.mblock( i ).pos(); const long long msize = lzip_index.mblock( i ).size(); - const unsigned dictionary_size = lzip_index.dictionary_size( i ); - uint8_t * const mbuffer = read_member( infd, mpos, msize ); + uint8_t * const mbuffer = read_member( infd, mpos, msize, input_filename ); if( !mbuffer ) return 1; - uint8_t md5_orig[16]; - if( !verify_member( mbuffer, msize, dictionary_size, input_filename, - md5_orig ) ) return 2; + const unsigned dictionary_size = lzip_index.dictionary_size( i ); + md5_type md5_orig; + if( !check_member( mbuffer, msize, dictionary_size, input_filename, + md5_orig ) ) return 2; long pct_pos = next_pct_pos( lzip_index, i, pct ); long pos = Lzip_header::size + 1, printed = 0; // last pos printed const long end = msize - 20; @@ -212,7 +213,7 @@ int lunzcrash_bit( const char * const input_filename ) if( failed_comparisons > 0 ) std::printf( ", of which\n%9ld comparisons failed\n", failed_comparisons ); - else std::fputs( "\n all comparisons passed\n", stdout ); + else std::fputs( "\n all comparisons passed\n", stdout ); } else std::fputc( '\n', stdout ); } @@ -222,13 +223,14 @@ int lunzcrash_bit( const char * const input_filename ) /* Test zeroed blocks of given size in LZMA streams in file. Unless verbosity >= 1, print only the bytes with interesting results. */ -int lunzcrash_block( const char * const input_filename, const int sector_size ) +int lunzcrash_block( const char * const input_filename, + const Cl_options & cl_opts, const int sector_size ) { struct stat in_stats; // not used const int infd = open_instream( input_filename, &in_stats, false, true ); if( infd < 0 ) return 1; - const Lzip_index lzip_index( infd, true, true ); + const Lzip_index lzip_index( infd, cl_opts ); if( lzip_index.retval() != 0 ) { show_file_error( input_filename, lzip_index.error().c_str() ); return lzip_index.retval(); } @@ -242,16 +244,17 @@ int lunzcrash_block( const char * const input_filename, const int sector_size ) { const long long mpos = lzip_index.mblock( i ).pos(); const long long msize = lzip_index.mblock( i ).size(); - long pos = Lzip_header::size + 1; - const long end = msize - sector_size - 20; - if( end <= pos ) continue; // sector_size larger than LZMA stream - const unsigned dictionary_size = lzip_index.dictionary_size( i ); - uint8_t * const mbuffer = read_member( infd, mpos, msize ); + // skip members with LZMA stream smaller than sector_size + if( msize - Lzip_header::size - 1 - 20 <= sector_size ) continue; + uint8_t * const mbuffer = read_member( infd, mpos, msize, input_filename ); if( !mbuffer ) return 1; - uint8_t md5_orig[16]; - if( !verify_member( mbuffer, msize, dictionary_size, input_filename, - md5_orig ) ) return 2; + const unsigned dictionary_size = lzip_index.dictionary_size( i ); + md5_type md5_orig; + if( !check_member( mbuffer, msize, dictionary_size, input_filename, + md5_orig ) ) return 2; long pct_pos = next_pct_pos( lzip_index, i, pct, sector_size ); + long pos = Lzip_header::size + 1; + const long end = msize - sector_size - 20; if( verbosity >= 0 ) // give a clue of the range being tested std::printf( "Testing blocks of size %u from pos %llu to %llu\n", sector_size, mpos + pos, mpos + end - 1 ); @@ -324,7 +327,7 @@ int lunzcrash_block( const char * const input_filename, const int sector_size ) if( failed_comparisons > 0 ) std::printf( ", of which\n%9ld comparisons failed\n", failed_comparisons ); - else std::fputs( "\n all comparisons passed\n", stdout ); + else std::fputs( "\n all comparisons passed\n", stdout ); } else std::fputc( '\n', stdout ); } @@ -348,7 +351,8 @@ int md5sum_files( const std::vector< std::string > & filenames ) if( infd < 0 ) { set_retval( retval, 1 ); continue; } enum { buffer_size = 16384 }; - uint8_t buffer[buffer_size], md5_digest[16]; + uint8_t buffer[buffer_size]; + md5_type md5_digest; MD5SUM md5sum; while( true ) { diff --git a/lzip.h b/lzip.h index 6197b7e..013672f 100644 --- a/lzip.h +++ b/lzip.h @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2022 Antonio Diaz Diaz. + Copyright (C) 2009-2023 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -55,7 +55,7 @@ enum { dis_slot_bits = 6, start_dis_model = 4, end_dis_model = 14, - modeled_distances = 1 << (end_dis_model / 2), // 128 + modeled_distances = 1 << ( end_dis_model / 2 ), // 128 dis_align_bits = 4, dis_align_size = 1 << dis_align_bits, @@ -179,23 +179,14 @@ public: c = data[(c^buffer[i])&0xFF] ^ ( c >> 8 ); crc = c; } - - uint32_t compute_crc( const uint8_t * const buffer, - const long long size ) const - { - uint32_t crc = 0xFFFFFFFFU; - for( long long i = 0; i < size; ++i ) - crc = data[(crc^buffer[i])&0xFF] ^ ( crc >> 8 ); - return crc ^ 0xFFFFFFFFU; - } }; extern const CRC32 crc32; inline bool isvalid_ds( const unsigned dictionary_size ) - { return ( dictionary_size >= min_dictionary_size && - dictionary_size <= max_dictionary_size ); } + { return dictionary_size >= min_dictionary_size && + dictionary_size <= max_dictionary_size; } inline int real_bits( unsigned value ) @@ -210,35 +201,35 @@ const uint8_t lzip_magic[4] = { 0x4C, 0x5A, 0x49, 0x50 }; // "LZIP" struct Lzip_header { - uint8_t data[6]; // 0-3 magic bytes + enum { size = 6 }; + uint8_t data[size]; // 0-3 magic bytes // 4 version // 5 coded dictionary size - enum { size = 6 }; void set_magic() { std::memcpy( data, lzip_magic, 4 ); data[4] = 1; } - bool verify_magic() const - { return ( std::memcmp( data, lzip_magic, 4 ) == 0 ); } + bool check_magic() const { return std::memcmp( data, lzip_magic, 4 ) == 0; } - bool verify_prefix( const int sz ) const // detect (truncated) header + bool check_prefix( const int sz ) const // detect (truncated) header { for( int i = 0; i < sz && i < 4; ++i ) if( data[i] != lzip_magic[i] ) return false; - return ( sz > 0 ); + return sz > 0; } - bool verify_corrupt() const // detect corrupt header + + bool check_corrupt() const // detect corrupt header { int matches = 0; for( int i = 0; i < 4; ++i ) if( data[i] == lzip_magic[i] ) ++matches; - return ( matches > 1 && matches < 4 ); + return matches > 1 && matches < 4; } uint8_t version() const { return data[4]; } - bool verify_version() const { return ( data[4] == 1 ); } + bool check_version() const { return data[4] == 1; } unsigned dictionary_size() const { - unsigned sz = ( 1 << ( data[5] & 0x1F ) ); + unsigned sz = 1 << ( data[5] & 0x1F ); if( sz > min_dictionary_size ) sz -= ( sz / 16 ) * ( ( data[5] >> 5 ) & 7 ); return sz; @@ -254,23 +245,23 @@ struct Lzip_header const unsigned fraction = base_size / 16; for( unsigned i = 7; i >= 1; --i ) if( base_size - ( i * fraction ) >= sz ) - { data[5] |= ( i << 5 ); break; } + { data[5] |= i << 5; break; } } return true; } - bool verify( const bool ignore_bad_ds ) const - { return verify_magic() && verify_version() && + bool check( const bool ignore_bad_ds = false ) const + { return check_magic() && check_version() && ( ignore_bad_ds || isvalid_ds( dictionary_size() ) ); } }; struct Lzip_trailer { - uint8_t data[20]; // 0-3 CRC32 of the uncompressed data + enum { size = 20 }; + uint8_t data[size]; // 0-3 CRC32 of the uncompressed data // 4-11 size of the uncompressed data // 12-19 member size including header and trailer - enum { size = 20 }; unsigned data_crc() const { @@ -302,7 +293,7 @@ struct Lzip_trailer void member_size( unsigned long long sz ) { for( int i = 12; i <= 19; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; } } - bool verify_consistency() const // check internal consistency + bool check_consistency() const // check internal consistency { const unsigned crc = data_crc(); const unsigned long long dsize = data_size(); @@ -318,13 +309,27 @@ struct Lzip_trailer }; +struct Cl_options // command line options + { + bool ignore_empty; + bool ignore_errors; + bool ignore_marking; + bool ignore_trailing; + bool loose_trailing; + + Cl_options() + : ignore_empty( true ), ignore_errors( false ), ignore_marking( true ), + ignore_trailing( true ), loose_trailing( false ) {} + }; + + #ifndef INT64_MAX -#define INT64_MAX 0x7FFFFFFFFFFFFFFFLL +#define INT64_MAX 0x7FFFFFFFFFFFFFFFLL #endif class Block { - long long pos_, size_; // pos + size <= INT64_MAX + long long pos_, size_; // pos >= 0, size >= 0, pos + size <= INT64_MAX public: Block( const long long p, const long long s ) : pos_( p ), size_( s ) {} @@ -344,11 +349,11 @@ public: bool operator<( const Block & b ) const { return pos_ < b.pos_; } bool includes( const long long pos ) const - { return ( pos_ <= pos && end() > pos ); } + { return pos_ <= pos && end() > pos; } bool overlaps( const Block & b ) const - { return ( pos_ < b.end() && b.pos_ < end() ); } + { return pos_ < b.end() && b.pos_ < end(); } bool overlaps( const long long pos, const long long size ) const - { return ( pos_ < pos + size && pos < end() ); } + { return pos_ < pos + size && pos < end(); } void shift( Block & b ) { ++size_; ++b.pos_; --b.size_; } Block split( const long long pos ); @@ -358,12 +363,15 @@ public: struct Member_list // members/gaps/tdata to be dumped/removed/stripped { bool damaged; + bool empty; bool tdata; bool in, rin; std::vector< Block > range_vector, rrange_vector; - Member_list() : damaged( false ), tdata( false ), in( true ), rin( true ) {} - void parse_ml( const char * p, const char * const option_name ); + Member_list() : damaged( false ), empty( false ), tdata( false ), + in( true ), rin( true ) {} + void parse_ml( const char * const arg, const char * const option_name, + Cl_options & cl_opts ); bool range() const { return range_vector.size() || rrange_vector.size(); } @@ -394,7 +402,7 @@ struct Error inline unsigned long long positive_diff( const unsigned long long x, const unsigned long long y ) - { return ( ( x > y ) ? x - y : 0 ); } + { return ( x > y ) ? x - y : 0; } inline void set_retval( int & retval, const int new_val ) { if( retval < new_val ) retval = new_val; } @@ -402,39 +410,59 @@ inline void set_retval( int & retval, const int new_val ) const char * const bad_magic_msg = "Bad magic number (file not in lzip format)."; const char * const bad_dict_msg = "Invalid dictionary size in member header."; const char * const corrupt_mm_msg = "Corrupt header in multimember file."; +const char * const empty_msg = "Empty member not allowed."; +const char * const marking_msg = "Marking data not allowed."; const char * const trailing_msg = "Trailing data not allowed."; // defined in alone_to_lz.cc int alone_to_lz( const int infd, const Pretty_print & pp ); +// defined in byte_repair.cc +long seek_write( const int fd, const uint8_t * const buf, const long size, + const long long pos ); +uint8_t * read_member( const int infd, const long long mpos, + const long long msize, const char * const filename ); +int byte_repair( const std::string & input_filename, + const std::string & default_output_filename, + const Cl_options & cl_opts, + const char terminator, const bool force ); +int debug_delay( const char * const input_filename, + const Cl_options & cl_opts, Block range, + const char terminator ); +int debug_byte_repair( const char * const input_filename, + const Cl_options & cl_opts, const Bad_byte & bad_byte, + const char terminator ); +int debug_decompress( const char * const input_filename, + const Cl_options & cl_opts, const Bad_byte & bad_byte, + const bool show_packets ); + // defined in decoder.cc -long long readblock( const int fd, uint8_t * const buf, const long long size ); -long long writeblock( const int fd, const uint8_t * const buf, - const long long size ); +long readblock( const int fd, uint8_t * const buf, const long size ); +long writeblock( const int fd, const uint8_t * const buf, const long size ); // defined in dump_remove.cc int dump_members( const std::vector< std::string > & filenames, const std::string & default_output_filename, - const Member_list & member_list, const bool force, - bool ignore_errors, bool ignore_trailing, - const bool loose_trailing, const bool strip, - const bool to_stdout ); + const Cl_options & cl_opts, const Member_list & member_list, + const bool force, const bool strip, const bool to_stdout ); int remove_members( const std::vector< std::string > & filenames, - const Member_list & member_list, bool ignore_errors, - bool ignore_trailing, const bool loose_trailing ); + const Cl_options & cl_opts, const Member_list & member_list ); +int clear_marking( const std::vector< std::string > & filenames, + const Cl_options & cl_opts ); // defined in list.cc int list_files( const std::vector< std::string > & filenames, - const bool ignore_errors, - const bool ignore_trailing, const bool loose_trailing ); + const Cl_options & cl_opts ); // defined in lzip_index.cc int seek_read( const int fd, uint8_t * const buf, const int size, const long long pos ); // defined in lunzcrash.cc -int lunzcrash_bit( const char * const input_filename ); -int lunzcrash_block( const char * const input_filename, const int sector_size ); +int lunzcrash_bit( const char * const input_filename, + const Cl_options & cl_opts ); +int lunzcrash_block( const char * const input_filename, + const Cl_options & cl_opts, const int sector_size ); int md5sum_files( const std::vector< std::string > & filenames ); // defined in main.cc @@ -442,6 +470,7 @@ extern const char * const program_name; extern std::string output_filename; // global vars for output file extern int outfd; struct stat; +bool fits_in_size_t( const unsigned long long size ); const char * bad_version( const unsigned version ); const char * format_ds( const unsigned dictionary_size ); void show_header( const unsigned dictionary_size ); @@ -451,7 +480,7 @@ int open_truncable_stream( const char * const name, struct stat * const in_statsp ); bool open_outstream( const bool force, const bool protect, const bool rw = false, const bool skipping = true ); -bool file_exists( const std::string & filename ); +bool output_file_exists(); void cleanup_and_fail( const int retval ); bool check_tty_out(); void set_signal_handler(); @@ -472,52 +501,38 @@ int test_member_from_file( const int infd, const unsigned long long msize, long long * const failure_posp = 0 ); int merge_files( const std::vector< std::string > & filenames, const std::string & default_output_filename, - const char terminator, const bool force ); + const Cl_options & cl_opts, const char terminator, + const bool force ); // defined in nrep_stats.cc int print_nrep_stats( const std::vector< std::string > & filenames, - const int repeated_byte, const bool ignore_errors, - const bool ignore_trailing, const bool loose_trailing ); + const Cl_options & cl_opts, const int repeated_byte ); // defined in range_dec.cc const char * format_num( unsigned long long num, unsigned long long limit = -1ULL, const int set_prefix = 0 ); -bool safe_seek( const int fd, const long long pos ); +bool safe_seek( const int fd, const long long pos, + const char * const filename ); int range_decompress( const std::string & input_filename, const std::string & default_output_filename, - Block range, const bool force, const bool ignore_errors, - const bool ignore_trailing, const bool loose_trailing, - const bool to_stdout ); - -// defined in repair.cc -long long seek_write( const int fd, const uint8_t * const buf, - const long long size, const long long pos ); -uint8_t * read_member( const int infd, const long long mpos, - const long long msize ); -int repair_file( const std::string & input_filename, - const std::string & default_output_filename, - const char terminator, const bool force ); -int debug_delay( const std::string & input_filename, Block range, - const char terminator ); -int debug_repair( const std::string & input_filename, - const Bad_byte & bad_byte, const char terminator ); -int debug_decompress( const std::string & input_filename, - const Bad_byte & bad_byte, const bool show_packets ); + const Cl_options & cl_opts, Block range, + const bool force, const bool to_stdout ); // defined in reproduce.cc int reproduce_file( const std::string & input_filename, const std::string & default_output_filename, const char * const lzip_name, const char * const reference_filename, - const int lzip_level, const char terminator, - const bool force ); -int debug_reproduce_file( const std::string & input_filename, + const Cl_options & cl_opts, const int lzip_level, + const char terminator, const bool force ); +int debug_reproduce_file( const char * const input_filename, const char * const lzip_name, const char * const reference_filename, - const Block & range, const int sector_size, - const int lzip_level ); + const Cl_options & cl_opts, const Block & range, + const int sector_size, const int lzip_level ); // defined in split.cc int split_file( const std::string & input_filename, - const std::string & default_output_filename, const bool force ); + const std::string & default_output_filename, + const Cl_options & cl_opts, const bool force ); diff --git a/lzip_index.cc b/lzip_index.cc index eff4d05..1614dde 100644 --- a/lzip_index.cc +++ b/lzip_index.cc @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2022 Antonio Diaz Diaz. + Copyright (C) 2009-2023 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -40,11 +40,12 @@ int seek_read( const int fd, uint8_t * const buf, const int size, bool Lzip_index::check_header_error( const Lzip_header & header, - const bool ignore_bad_ds ) + const bool first, const bool ignore_bad_ds ) { - if( !header.verify_magic() ) - { error_ = bad_magic_msg; retval_ = 2; return true; } - if( !header.verify_version() ) + if( !header.check_magic() ) + { error_ = bad_magic_msg; retval_ = 2; if( first ) bad_magic_ = true; + return true; } + if( !header.check_version() ) { error_ = bad_version( header.version() ); retval_ = 2; return true; } if( !ignore_bad_ds && !isvalid_ds( header.dictionary_size() ) ) { error_ = bad_dict_msg; retval_ = 2; return true; } @@ -67,10 +68,13 @@ void Lzip_index::set_num_error( const char * const msg, unsigned long long num ) bool Lzip_index::read_header( const int fd, Lzip_header & header, - const long long pos ) + const long long pos, const bool ignore_marking ) { if( seek_read( fd, header.data, Lzip_header::size, pos ) != Lzip_header::size ) { set_errno_error( "Error reading member header: " ); return false; } + uint8_t byte; + if( !ignore_marking && readblock( fd, &byte, 1 ) == 1 && byte != 0 ) + { error_ = marking_msg; retval_ = 2; return false; } return true; } @@ -88,8 +92,8 @@ bool Lzip_index::read_trailer( const int fd, Lzip_trailer & trailer, 'ignore_gaps' also ignores format errors and a truncated last member. If successful, push member preceding gap and set pos to member header. */ bool Lzip_index::skip_gap( const int fd, unsigned long long & pos, - const bool ignore_trailing, const bool loose_trailing, - const bool ignore_bad_ds, const bool ignore_gaps ) + const Cl_options & cl_opts, + const bool ignore_bad_ds, const bool ignore_gaps ) { if( pos < min_member_size ) { @@ -118,19 +122,20 @@ bool Lzip_index::skip_gap( const int fd, unsigned long long & pos, const unsigned long long member_size = trailer.member_size(); if( member_size == 0 ) // skip trailing zeros { while( i > Lzip_trailer::size && buffer[i-9] == 0 ) --i; continue; } - if( member_size > ipos + i || !trailer.verify_consistency() ) + if( member_size > ipos + i || !trailer.check_consistency() ) continue; Lzip_header header; - if( !read_header( fd, header, ipos + i - member_size ) ) return false; - if( !header.verify( ignore_bad_ds ) ) continue; + if( !read_header( fd, header, ipos + i - member_size, + cl_opts.ignore_marking ) ) return false; + if( !header.check( ignore_bad_ds ) ) continue; const Lzip_header & header2 = *(const Lzip_header *)( buffer + i ); const bool full_h2 = bsize - i >= Lzip_header::size; - if( header2.verify_prefix( bsize - i ) ) // next header + if( header2.check_prefix( bsize - i ) ) // next header { if( !ignore_gaps && member_vector.empty() ) // last member { if( !full_h2 ) error_ = "Last member in input file is truncated."; - else if( !check_header_error( header2, ignore_bad_ds ) ) + else if( !check_header_error( header2, false, ignore_bad_ds ) ) error_ = "Last member in input file is truncated or corrupt."; retval_ = 2; return false; } @@ -144,15 +149,18 @@ bool Lzip_index::skip_gap( const int fd, unsigned long long & pos, } if( !ignore_gaps && member_vector.empty() ) { - if( !loose_trailing && full_h2 && header2.verify_corrupt() ) + if( !cl_opts.loose_trailing && full_h2 && header2.check_corrupt() ) { error_ = corrupt_mm_msg; retval_ = 2; return false; } - if( !ignore_trailing ) + if( !cl_opts.ignore_trailing ) { error_ = trailing_msg; retval_ = 2; return false; } } pos = ipos + i - member_size; + const unsigned long long data_size = trailer.data_size(); + if( !cl_opts.ignore_empty && data_size == 0 ) + { error_ = empty_msg; retval_ = 2; return false; } const unsigned dictionary_size = header.dictionary_size(); - member_vector.push_back( Member( 0, trailer.data_size(), pos, - member_size, dictionary_size ) ); + member_vector.push_back( Member( 0, data_size, pos, member_size, + dictionary_size ) ); if( dictionary_size_ < dictionary_size ) dictionary_size_ = dictionary_size; return true; @@ -179,10 +187,11 @@ bool Lzip_index::skip_gap( const int fd, unsigned long long & pos, } -Lzip_index::Lzip_index( const int infd, const bool ignore_trailing, - const bool loose_trailing, const bool ignore_bad_ds, - const bool ignore_gaps, const long long max_pos ) - : insize( lseek( infd, 0, SEEK_END ) ), retval_( 0 ), dictionary_size_( 0 ) +Lzip_index::Lzip_index( const int infd, const Cl_options & cl_opts, + const bool ignore_bad_ds, const bool ignore_gaps, + const long long max_pos ) + : insize( lseek( infd, 0, SEEK_END ) ), retval_( 0 ), + dictionary_size_( 0 ), bad_magic_( false ) { if( insize < 0 ) { set_errno_error( "Input file is not seekable: " ); return; } @@ -193,8 +202,8 @@ Lzip_index::Lzip_index( const int infd, const bool ignore_trailing, retval_ = 2; return; } Lzip_header header; - if( !read_header( infd, header, 0 ) ) return; - if( check_header_error( header, ignore_bad_ds ) ) return; + if( !read_header( infd, header, 0, cl_opts.ignore_marking ) ) return; + if( check_header_error( header, true, ignore_bad_ds ) ) return; // pos always points to a header or to ( EOF || max_pos ) unsigned long long pos = ( max_pos > 0 ) ? max_pos : insize; @@ -203,36 +212,40 @@ Lzip_index::Lzip_index( const int infd, const bool ignore_trailing, Lzip_trailer trailer; if( !read_trailer( infd, trailer, pos ) ) break; const unsigned long long member_size = trailer.member_size(); - // if gaps are being ignored, verify consistency of last trailer only. + // if gaps are being ignored, check consistency of last trailer only. if( member_size > pos || member_size < min_member_size || ( ( !ignore_gaps || member_vector.empty() ) && - !trailer.verify_consistency() ) ) // bad trailer + !trailer.check_consistency() ) ) // bad trailer { if( ignore_gaps || member_vector.empty() ) - { if( skip_gap( infd, pos, ignore_trailing, loose_trailing, - ignore_bad_ds, ignore_gaps ) ) continue; else return; } + { if( skip_gap( infd, pos, cl_opts, ignore_bad_ds, ignore_gaps ) ) + continue; else return; } set_num_error( "Bad trailer at pos ", pos - Lzip_trailer::size ); break; } - if( !read_header( infd, header, pos - member_size ) ) break; - if( !header.verify( ignore_bad_ds ) ) // bad header + if( !read_header( infd, header, pos - member_size, cl_opts.ignore_marking ) ) + break; + if( !header.check( ignore_bad_ds ) ) // bad header { if( ignore_gaps || member_vector.empty() ) - { if( skip_gap( infd, pos, ignore_trailing, loose_trailing, - ignore_bad_ds, ignore_gaps ) ) continue; else return; } + { if( skip_gap( infd, pos, cl_opts, ignore_bad_ds, ignore_gaps ) ) + continue; else return; } set_num_error( "Bad header at pos ", pos - member_size ); break; } pos -= member_size; + const unsigned long long data_size = trailer.data_size(); + if( !cl_opts.ignore_empty && data_size == 0 ) + { error_ = empty_msg; retval_ = 2; break; } const unsigned dictionary_size = header.dictionary_size(); - member_vector.push_back( Member( 0, trailer.data_size(), pos, - member_size, dictionary_size ) ); + member_vector.push_back( Member( 0, data_size, pos, member_size, + dictionary_size ) ); if( dictionary_size_ < dictionary_size ) dictionary_size_ = dictionary_size; } // block at pos == 0 must be a member unless shorter than min_member_size if( pos >= min_member_size || ( pos != 0 && !ignore_gaps ) || - member_vector.empty() ) + member_vector.empty() || retval_ != 0 ) { member_vector.clear(); if( retval_ == 0 ) { error_ = "Can't create file index."; retval_ = 2; } @@ -259,7 +272,8 @@ Lzip_index::Lzip_index( const int infd, const bool ignore_trailing, // All files in 'infd_vector' must be at least 'fsize' bytes long. Lzip_index::Lzip_index( const std::vector< int > & infd_vector, const long long fsize ) - : insize( fsize ), retval_( 0 ), dictionary_size_( 0 ) // DS not used + : insize( fsize ), retval_( 0 ), + dictionary_size_( 0 ), bad_magic_( false ) // DS not used { if( insize < 0 ) { set_errno_error( "Input file is not seekable: " ); return; } @@ -276,7 +290,7 @@ Lzip_index::Lzip_index( const std::vector< int > & infd_vector, { const int infd = infd_vector[i]; if( !read_header( infd, header, 0 ) ) return; - if( header.verify_magic() && header.verify_version() ) done = true; + if( header.check_magic() && header.check_version() ) done = true; } if( !done ) { error_ = bad_magic_msg; retval_ = 2; return; } @@ -292,12 +306,12 @@ Lzip_index::Lzip_index( const std::vector< int > & infd_vector, const int tfd = infd_vector[it]; if( !read_trailer( tfd, trailer, pos ) ) goto error; member_size = trailer.member_size(); - if( member_size <= (unsigned long long)pos && trailer.verify_consistency() ) + if( member_size <= (unsigned long long)pos && trailer.check_consistency() ) for( int ih = 0; ih < files && !done; ++ih ) { const int hfd = infd_vector[ih]; if( !read_header( hfd, header, pos - member_size ) ) goto error; - if( header.verify_magic() && header.verify_version() ) done = true; + if( header.check_magic() && header.check_version() ) done = true; } } if( !done ) @@ -313,7 +327,7 @@ Lzip_index::Lzip_index( const std::vector< int > & infd_vector, { const int infd = infd_vector[i]; if( seek_read( infd, header.data, size, pos ) == size && - header.verify_prefix( size ) ) + header.check_prefix( size ) ) { error_ = "Last member in input file is truncated or corrupt."; retval_ = 2; goto error; diff --git a/lzip_index.h b/lzip_index.h index 0b8ace1..52d831e 100644 --- a/lzip_index.h +++ b/lzip_index.h @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2022 Antonio Diaz Diaz. + Copyright (C) 2009-2023 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -22,9 +22,11 @@ class Lzip_index Block dblock, mblock; // data block, member block unsigned dictionary_size; - Member( const long long dp, const long long ds, - const long long mp, const long long ms, const unsigned dict_size ) - : dblock( dp, ds ), mblock( mp, ms ), dictionary_size( dict_size ) {} + Member( const long long dpos, const long long dsize, + const long long mpos, const long long msize, + const unsigned dict_size ) + : dblock( dpos, dsize ), mblock( mpos, msize ), + dictionary_size( dict_size ) {} bool operator==( const Member & m ) const { return ( mblock == m.mblock ); } bool operator!=( const Member & m ) const { return ( mblock != m.mblock ); } @@ -37,24 +39,27 @@ class Lzip_index long long insize; int retval_; unsigned dictionary_size_; // largest dictionary size in the file + bool bad_magic_; // bad magic in first header - bool check_header_error( const Lzip_header & header, + bool check_header_error( const Lzip_header & header, const bool first, const bool ignore_bad_ds ); void set_errno_error( const char * const msg ); void set_num_error( const char * const msg, unsigned long long num ); - bool read_header( const int fd, Lzip_header & header, const long long pos ); + bool read_header( const int fd, Lzip_header & header, const long long pos, + const bool ignore_marking = true ); bool read_trailer( const int fd, Lzip_trailer & trailer, const long long pos ); bool skip_gap( const int fd, unsigned long long & pos, - const bool ignore_trailing, const bool loose_trailing, + const Cl_options & cl_opts, const bool ignore_bad_ds, const bool ignore_gaps ); public: Lzip_index() - : error_( "No index" ), insize( 0 ), retval_( 2 ), dictionary_size_( 0 ) {} - Lzip_index( const int infd, const bool ignore_trailing, - const bool loose_trailing, const bool ignore_bad_ds = false, - const bool ignore_gaps = false, const long long max_pos = 0 ); + : error_( "No index" ), insize( 0 ), retval_( 2 ), + dictionary_size_( 0 ), bad_magic_( false ) {} + Lzip_index( const int infd, const Cl_options & cl_opts, + const bool ignore_bad_ds = false, const bool ignore_gaps = false, + const long long max_pos = 0 ); Lzip_index( const std::vector< int > & infd_vector, const long long fsize ); long members() const { return member_vector.size(); } @@ -62,6 +67,7 @@ public: const std::string & error() const { return error_; } int retval() const { return retval_; } unsigned dictionary_size() const { return dictionary_size_; } + bool bad_magic() const { return bad_magic_; } bool operator==( const Lzip_index & li ) const { diff --git a/main.cc b/main.cc index 72f415e..ed8f1ed 100644 --- a/main.cc +++ b/main.cc @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2022 Antonio Diaz Diaz. + Copyright (C) 2009-2023 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -16,9 +16,9 @@ */ /* Exit status: 0 for a normal exit, 1 for environmental problems - (file not found, invalid flags, I/O errors, etc), 2 to indicate a - corrupt or invalid input file, 3 for an internal consistency error - (e.g., bug) which caused lziprecover to panic. + (file not found, invalid command line options, I/O errors, etc), 2 to + indicate a corrupt or invalid input file, 3 for an internal consistency + error (e.g., bug) which caused lziprecover to panic. */ #define _FILE_OFFSET_BITS 64 @@ -26,7 +26,7 @@ #include #include #include -#include +#include // SSIZE_MAX #include #include #include @@ -35,7 +35,7 @@ #include #include #include -#include +#include // SIZE_MAX #include #include #include @@ -71,11 +71,15 @@ #error "Environments where CHAR_BIT != 8 are not supported." #endif -#if ( defined SIZE_MAX && SIZE_MAX < UINT_MAX ) || \ - ( defined SSIZE_MAX && SSIZE_MAX < INT_MAX ) -#error "Environments where 'size_t' is narrower than 'int' are not supported." +#if ( defined SIZE_MAX && SIZE_MAX < ULONG_MAX ) || \ + ( defined SSIZE_MAX && SSIZE_MAX < LONG_MAX ) +#error "Environments where 'size_t' is narrower than 'long' are not supported." #endif +bool fits_in_size_t( const unsigned long long size ) + { return ( sizeof (long) <= sizeof (size_t) && size <= LONG_MAX ) || + ( sizeof (int) <= sizeof (size_t) && size <= INT_MAX ); } + int verbosity = 0; const char * const program_name = "lziprecover"; @@ -91,11 +95,11 @@ const struct { const char * from; const char * to; } known_extensions[] = { { ".tlz", ".tar" }, { 0, 0 } }; -enum Mode { m_none, m_alone_to_lz, m_debug_decompress, m_debug_delay, - m_debug_repair, m_decompress, m_dump, m_list, m_md5sum, m_merge, - m_nrep_stats, m_range_dec, m_remove, m_repair, m_reproduce, - m_show_packets, m_split, m_strip, m_test, m_unzcrash_bit, - m_unzcrash_block }; +enum Mode { m_none, m_alone_to_lz, m_byte_repair, m_clear_marking, + m_debug_byte_repair, m_debug_decompress, m_debug_delay, + m_decompress, m_dump, m_list, m_md5sum, m_merge, m_nrep_stats, + m_range_dec, m_remove, m_reproduce, m_show_packets, m_split, + m_strip, m_test, m_unzcrash_bit, m_unzcrash_block }; /* Variable used in signal handler context. It is not declared volatile because the handler never returns. */ @@ -127,7 +131,7 @@ void show_help() " -a, --trailing-error exit with error status if trailing data\n" " -A, --alone-to-lz convert lzma-alone files to lzip format\n" " -c, --stdout write to standard output, keep input files\n" - " -d, --decompress decompress\n" + " -d, --decompress decompress, test compressed file integrity\n" " -D, --range-decompress= decompress a range of bytes to stdout\n" " -e, --reproduce try to reproduce a zeroed sector in file\n" " --lzip-level=N|a|m[N] reproduce one level, all, or match length\n" @@ -140,14 +144,17 @@ void show_help() " -m, --merge correct errors in file using several copies\n" " -o, --output= place the output into \n" " -q, --quiet suppress all messages\n" - " -R, --repair try to repair a small error in file\n" + " -R, --byte-repair try to repair a corrupt byte in file\n" " -s, --split split multimember file in single-member files\n" " -t, --test test compressed file integrity\n" " -v, --verbose be verbose (a 2nd -v gives more)\n" + " --dump=:d:e:t dump members, damaged/empty, tdata to stdout\n" + " --remove=:d:e:t remove members, tdata from files in place\n" + " --strip=:d:e:t copy files to stdout stripping members given\n" + " --empty-error exit with error status if empty member in file\n" + " --marking-error exit with error status if 1st LZMA byte not 0\n" " --loose-trailing allow trailing data seeming corrupt header\n" - " --dump=:d:t dump members listed/damaged, tdata to stdout\n" - " --remove=:d:t remove members, tdata from files in place\n" - " --strip=:d:t copy files to stdout stripping members given\n" ); + " --clear-marking reset the first LZMA byte of each member\n" ); if( verbosity >= 1 ) { std::printf( "\nDebug options for experts:\n" @@ -158,7 +165,7 @@ void show_help() " -W, --debug-decompress=, set pos to val and decompress to stdout\n" " -X, --show-packets[=,] show in stdout the decoded LZMA packets\n" " -Y, --debug-delay= find max error detection delay in \n" - " -Z, --debug-repair=, test repair one-byte error at \n" ); + " -Z, --debug-byte-repair=, test repair one-byte error at \n" ); } std::printf( "\nIf no file names are given, or if a file is '-', lziprecover decompresses\n" "from standard input to standard output.\n" @@ -166,10 +173,10 @@ void show_help() "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n" "\nTo extract all the files from archive 'foo.tar.lz', use the commands\n" "'tar -xf foo.tar.lz' or 'lziprecover -cd foo.tar.lz | tar -xf -'.\n" - "\nExit status: 0 for a normal exit, 1 for environmental problems (file\n" - "not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n" - "invalid input file, 3 for an internal consistency error (e.g., bug) which\n" - "caused lziprecover to panic.\n" + "\nExit status: 0 for a normal exit, 1 for environmental problems\n" + "(file not found, invalid command line options, I/O errors, etc), 2 to\n" + "indicate a corrupt or invalid input file, 3 for an internal consistency\n" + "error (e.g., bug) which caused lziprecover to panic.\n" "\nReport bugs to lzip-bug@nongnu.org\n" "Lziprecover home page: http://www.nongnu.org/lzip/lziprecover.html\n" ); } @@ -202,14 +209,13 @@ const char * format_ds( const unsigned dictionary_size ) { enum { bufsize = 16, factor = 1024 }; static char buf[bufsize]; - const char * const prefix[8] = - { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" }; + const char * const prefix[3] = { "Ki", "Mi", "Gi" }; const char * p = ""; const char * np = " "; unsigned num = dictionary_size; bool exact = ( num % factor == 0 ); - for( int i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i ) + for( int i = 0; i < 3 && ( num > 9999 || ( exact && num >= factor ) ); ++i ) { num /= factor; if( num % factor != 0 ) exact = false; p = prefix[i]; np = ""; } snprintf( buf, bufsize, "%s%4u %sB", np, num, p ); @@ -226,42 +232,47 @@ void show_header( const unsigned dictionary_size ) #include "main_common.cc" -// Colon-separated list of "damaged", "tdata", [r][^] (1 1,3-5,8) -void Member_list::parse_ml( const char * arg, const char * const option_name ) +// Colon-separated list of "damaged", "empty", "tdata", [r][^] (1 1,3-5) +void Member_list::parse_ml( const char * const arg, + const char * const option_name, + Cl_options & cl_opts ) { + const char * p = arg; // points to current char while( true ) { - const char * tp = arg; // points to terminator (':' or '\0') + const char * tp = p; // points to terminator (':' or '\0') while( *tp && *tp != ':' ) ++tp; - const unsigned len = tp - arg; - if( std::islower( *(const unsigned char *)arg ) ) + const unsigned len = tp - p; + if( std::islower( *(const unsigned char *)p ) ) { - if( len <= 7 && std::strncmp( "damaged", arg, len ) == 0 ) - { damaged = true; goto next; } - if( len <= 5 && std::strncmp( "tdata", arg, len ) == 0 ) - { tdata = true; goto next; } + if( len <= 7 && std::strncmp( "damaged", p, len ) == 0 ) + { damaged = true; cl_opts.ignore_errors = true; goto next; } + if( len <= 5 && std::strncmp( "empty", p, len ) == 0 ) + { empty = true; cl_opts.ignore_empty = true; goto next; } + if( len <= 5 && std::strncmp( "tdata", p, len ) == 0 ) + { tdata = true; cl_opts.ignore_trailing = true; goto next; } } { - const bool reverse = ( *arg == 'r' ); - if( reverse ) ++arg; - if( *arg == '^' ) { ++arg; if( reverse ) rin = false; else in = false; } + const bool reverse = ( *p == 'r' ); + if( reverse ) ++p; + if( *p == '^' ) { ++p; if( reverse ) rin = false; else in = false; } std::vector< Block > * rvp = reverse ? &rrange_vector : &range_vector; - while( std::isdigit( *(const unsigned char *)arg ) ) + while( std::isdigit( *(const unsigned char *)p ) ) { const char * tail; - const int pos = getnum( arg, option_name, 0, 1, INT_MAX, &tail ) - 1; + const long pos = getnum( p, option_name, 0, 1, LONG_MAX, &tail ) - 1; if( rvp->size() && pos < rvp->back().end() ) break; - const int size = (*tail == '-') ? - getnum( tail + 1, option_name, 0, pos + 1, INT_MAX, &tail ) - pos : 1; + const long size = (*tail == '-') ? + getnum( tail + 1, option_name, 0, pos + 1, LONG_MAX, &tail ) - pos : 1; rvp->push_back( Block( pos, size ) ); if( tail == tp ) goto next; - if( *tail == ',' ) arg = tail + 1; else break; + if( *tail == ',' ) p = tail + 1; else break; } } - show_error( "Invalid list of members." ); + show_option_error( arg, "Invalid list of members in", option_name ); std::exit( 1 ); next: - if( *(arg = tp) != 0 ) ++arg; else return; + if( *(p = tp) != 0 ) ++p; else return; } } @@ -274,12 +285,8 @@ int parse_lzip_level( const char * const arg, const char * const option_name ) { if( *arg == 'a' || std::isdigit( *(const unsigned char *)arg ) ) return *arg; if( *arg != 'm' ) - { - if( verbosity >= 0 ) - std::fprintf( stderr, "%s: Bad argument in option '%s'.\n", - program_name, option_name ); - std::exit( 1 ); - } + { show_option_error( arg, "Invalid argument in", option_name ); + std::exit( 1 ); } if( arg[1] == 0 ) return -1; return -getnum( arg + 1, option_name, 0, min_match_len_limit, max_match_len ); } @@ -287,39 +294,34 @@ int parse_lzip_level( const char * const arg, const char * const option_name ) /* Recognized format: [,] range formats: - , , + Return a pointer to the byte following the bytes parsed. */ -void parse_range( const char * const arg, const char * const pn, - Block & range, int * const sector_sizep = 0 ) +const char * parse_range( const char * const arg, const char * const pn, + Block & range, int * const sector_sizep = 0 ) { const char * tail = arg; long long value = ( arg[0] == ',' ) ? 0 : getnum( arg, pn, 0, 0, INT64_MAX - 1, &tail ); - if( tail[0] == 0 || tail[0] == ',' || tail[0] == '-' ) + if( tail[0] == 0 || tail[0] == ',' || tail[0] == '-' || tail[0] == ':' ) { range.pos( value ); - if( tail[0] == 0 ) { range.size( INT64_MAX - value ); return; } + if( tail[0] == 0 || tail[0] == ':' ) + { range.size( INT64_MAX - value ); return tail; } const bool is_size = ( tail[0] == ',' ); if( sector_sizep && tail[1] == ',' ) { value = INT64_MAX - value; ++tail; } else value = getnum( tail + 1, pn, 0, 1, INT64_MAX, &tail ); // size if( !is_size && value <= range.pos() ) - { - if( verbosity >= 0 ) - std::fprintf( stderr, "%s: Begin must be < end in range argument " - "of option '%s'.\n", program_name, pn ); - std::exit( 1 ); - } - if( !is_size ) value -= range.pos(); + { show_option_error( arg, "Begin must be < end in", pn ); std::exit( 1 ); } + if( !is_size ) value -= range.pos(); // size = end - pos if( INT64_MAX - value >= range.pos() ) { range.size( value ); if( sector_sizep && tail[0] == ',' ) - *sector_sizep = getnum( tail + 1, pn, 0, 8, INT_MAX ); - return; + *sector_sizep = getnum( tail + 1, pn, 0, 8, INT_MAX, &tail ); + return tail; } } - if( verbosity >= 0 ) - std::fprintf( stderr, "%s: Bad decompression range in option '%s'.\n", - program_name, pn ); + show_option_error( arg, "Invalid decompression range in", pn ); std::exit( 1 ); } @@ -333,6 +335,15 @@ void one_file( const int files ) } } +void at_least_one_file( const int files ) + { + if( files < 1 ) + { + show_error( "You must specify at least 1 file.", 0, true ); + std::exit( 1 ); + } + } + void set_mode( Mode & program_mode, const Mode new_mode ) { @@ -353,12 +364,8 @@ void parse_u( const char * const arg, const char * const option_name, { set_mode( program_mode, m_unzcrash_block ); sector_size = getnum( arg + 1, option_name, 0, 1, INT_MAX ); } else - { - if( verbosity >= 0 ) - std::fprintf( stderr, "%s: Bad argument for option '%s'.\n", - program_name, option_name ); - std::exit( 1 ); - } + { show_option_error( arg, "Invalid argument in", option_name ); + std::exit( 1 ); } } @@ -476,15 +483,15 @@ bool open_outstream( const bool force, const bool protect, } -bool file_exists( const std::string & filename ) +bool output_file_exists() { struct stat st; - if( stat( filename.c_str(), &st ) == 0 ) + if( stat( output_filename.c_str(), &st ) == 0 ) { if( verbosity >= 0 ) std::fprintf( stderr, "%s: Output file '%s' already exists." " Use '--force' to overwrite it.\n", - program_name, filename.c_str() ); + program_name, output_filename.c_str() ); return true; } return false; @@ -585,7 +592,7 @@ void close_and_set_permissions( const struct stat * const in_statsp ) } -unsigned char xdigit( const unsigned value ) +unsigned char xdigit( const unsigned value ) // hex digit for 'value' { if( value <= 9 ) return '0' + value; if( value <= 15 ) return 'A' + value - 10; @@ -620,8 +627,7 @@ bool show_trailing_data( const uint8_t * const data, const int size, int decompress( const unsigned long long cfile_size, const int infd, - const Pretty_print & pp, const bool ignore_errors, - const bool ignore_trailing, const bool loose_trailing, + const Cl_options & cl_opts, const Pretty_print & pp, const bool testing ) { unsigned long long partial_file_pos = 0; @@ -632,50 +638,49 @@ int decompress( const unsigned long long cfile_size, const int infd, { Lzip_header header; rdec.reset_member_position(); - const int size = rdec.read_header_carefully( header, ignore_errors ); + const int size = rdec.read_header_carefully( header, cl_opts.ignore_errors ); if( rdec.finished() || // End Of File ( size < Lzip_header::size && !rdec.find_header( header ) ) ) { if( first_member ) { show_file_error( pp.name(), "File ends unexpectedly at member header." ); retval = 2; } - else if( header.verify_prefix( size ) ) + else if( header.check_prefix( size ) ) { pp( "Truncated header in multimember file." ); - show_trailing_data( header.data, size, pp, true, -1 ); - retval = 2; } - else if( size > 0 && !show_trailing_data( header.data, size, pp, - true, ignore_trailing ) ) - retval = 2; + show_trailing_data( header.data, size, pp, true, -1 ); retval = 2; } + else if( size > 0 && !show_trailing_data( header.data, size, pp, true, + cl_opts.ignore_trailing ) ) retval = 2; break; } - if( !header.verify_magic() ) + if( !header.check_magic() ) { if( first_member ) { show_file_error( pp.name(), bad_magic_msg ); retval = 2; } - else if( !loose_trailing && header.verify_corrupt() ) + else if( !cl_opts.loose_trailing && header.check_corrupt() ) { pp( corrupt_mm_msg ); - show_trailing_data( header.data, size, pp, false, -1 ); - retval = 2; } - else if( !show_trailing_data( header.data, size, pp, false, ignore_trailing ) ) - retval = 2; - if( ignore_errors ) { pp.reset(); continue; } else break; + show_trailing_data( header.data, size, pp, false, -1 ); retval = 2; } + else if( !show_trailing_data( header.data, size, pp, false, + cl_opts.ignore_trailing ) ) retval = 2; + if( cl_opts.ignore_errors ) { pp.reset(); continue; } else break; } - if( !header.verify_version() ) + if( !header.check_version() ) { pp( bad_version( header.version() ) ); retval = 2; - if( ignore_errors ) { pp.reset(); continue; } else break; } + if( cl_opts.ignore_errors ) { pp.reset(); continue; } else break; } const unsigned dictionary_size = header.dictionary_size(); if( !isvalid_ds( dictionary_size ) ) { pp( bad_dict_msg ); retval = 2; - if( ignore_errors ) { pp.reset(); continue; } else break; } + if( cl_opts.ignore_errors ) { pp.reset(); continue; } else break; } if( verbosity >= 2 || ( verbosity == 1 && first_member ) ) pp(); LZ_decoder decoder( rdec, dictionary_size, outfd ); show_dprogress( cfile_size, partial_file_pos, &rdec, &pp ); // init - const int result = decoder.decode_member( pp ); + const int result = + decoder.decode_member( pp, cl_opts.ignore_empty, cl_opts.ignore_marking ); partial_file_pos += rdec.member_position(); if( result != 0 ) { + retval = 2; if( verbosity >= 0 && result <= 2 ) { pp(); @@ -683,14 +688,16 @@ int decompress( const unsigned long long cfile_size, const int infd, "File ends unexpectedly" : "Decoder error", partial_file_pos ); } - retval = 2; if( ignore_errors ) { pp.reset(); continue; } else break; + else if( result == 5 ) { pp( empty_msg ); break; } + else if( result == 6 ) { pp( marking_msg ); break; } + if( cl_opts.ignore_errors ) { pp.reset(); continue; } else break; } if( verbosity >= 2 ) { std::fputs( testing ? "ok\n" : "done\n", stderr ); pp.reset(); } } if( verbosity == 1 && retval == 0 ) std::fputs( testing ? "ok\n" : "done\n", stderr ); - if( retval == 2 && ignore_errors ) retval = 0; + if( retval == 2 && cl_opts.ignore_errors ) retval = 0; return retval; } @@ -776,15 +783,14 @@ int main( const int argc, const char * const argv[] ) // '0'..'9' = level, 'a' = all levels // -5..-273 = match length, -1 = all lengths int repeated_byte = -1; // 0 to 255, or -1 for all values + Cl_options cl_opts; // command line options bool force = false; - bool ignore_errors = false; - bool ignore_trailing = true; bool keep_input_files = false; - bool loose_trailing = false; bool to_stdout = false; if( argc > 0 ) invocation_name = argv[0]; - enum { opt_du = 256, opt_lt, opt_lzl, opt_lzn, opt_ref, opt_re, opt_st }; + enum { opt_cm = 256, opt_du, opt_eer, opt_lt, opt_lzl, opt_lzn, opt_mer, + opt_ref, opt_rem, opt_st }; const Arg_parser::Option options[] = { { 'a', "trailing-error", Arg_parser::no }, @@ -804,6 +810,7 @@ int main( const int argc, const char * const argv[] ) { 'n', "threads", Arg_parser::yes }, { 'o', "output", Arg_parser::yes }, { 'q', "quiet", Arg_parser::no }, + { 'R', "byte-repair", Arg_parser::no }, { 'R', "repair", Arg_parser::no }, { 's', "split", Arg_parser::no }, { 'S', "nrep-stats", Arg_parser::maybe }, @@ -814,13 +821,16 @@ int main( const int argc, const char * const argv[] ) { 'W', "debug-decompress", Arg_parser::yes }, { 'X', "show-packets", Arg_parser::maybe }, { 'Y', "debug-delay", Arg_parser::yes }, - { 'Z', "debug-repair", Arg_parser::yes }, + { 'Z', "debug-byte-repair", Arg_parser::yes }, + { opt_cm, "clear-marking", Arg_parser::no }, { opt_du, "dump", Arg_parser::yes }, + { opt_eer, "empty-error", Arg_parser::no }, + { opt_mer, "marking-error", Arg_parser::no }, { opt_lt, "loose-trailing", Arg_parser::no }, { opt_lzl, "lzip-level", Arg_parser::yes }, { opt_lzn, "lzip-name", Arg_parser::yes }, { opt_ref, "reference-file", Arg_parser::yes }, - { opt_re, "remove", Arg_parser::yes }, + { opt_rem, "remove", Arg_parser::yes }, { opt_st, "strip", Arg_parser::yes }, { 0 , 0, Arg_parser::no } }; @@ -838,7 +848,7 @@ int main( const int argc, const char * const argv[] ) const char * const arg = sarg.c_str(); switch( code ) { - case 'a': ignore_trailing = false; break; + case 'a': cl_opts.ignore_trailing = false; break; case 'A': set_mode( program_mode, m_alone_to_lz ); break; case 'c': to_stdout = true; break; case 'd': set_mode( program_mode, m_decompress ); break; @@ -849,7 +859,7 @@ int main( const int argc, const char * const argv[] ) parse_range( arg, pn, range, §or_size ); break; case 'f': force = true; break; case 'h': show_help(); return 0; - case 'i': ignore_errors = true; break; + case 'i': cl_opts.ignore_errors = true; break; case 'k': keep_input_files = true; break; case 'l': set_mode( program_mode, m_list ); break; case 'm': set_mode( program_mode, m_merge ); break; @@ -858,7 +868,7 @@ int main( const int argc, const char * const argv[] ) case 'o': if( sarg == "-" ) to_stdout = true; else { default_output_filename = sarg; } break; case 'q': verbosity = -1; break; - case 'R': set_mode( program_mode, m_repair ); break; + case 'R': set_mode( program_mode, m_byte_repair ); break; case 's': set_mode( program_mode, m_split ); break; case 'S': if( arg[0] ) repeated_byte = getnum( arg, pn, 0, 0, 255 ); set_mode( program_mode, m_nrep_stats ); break; @@ -872,18 +882,22 @@ int main( const int argc, const char * const argv[] ) if( arg[0] ) { bad_byte.parse_bb( arg, pn ); } break; case 'Y': set_mode( program_mode, m_debug_delay ); parse_range( arg, pn, range ); break; - case 'Z': set_mode( program_mode, m_debug_repair ); + case 'Z': set_mode( program_mode, m_debug_byte_repair ); bad_byte.parse_bb( arg, pn ); break; + case opt_cm: set_mode( program_mode, m_clear_marking ); + cl_opts.ignore_marking = true; break; case opt_du: set_mode( program_mode, m_dump ); - member_list.parse_ml( arg, pn ); break; - case opt_lt: loose_trailing = true; break; + member_list.parse_ml( arg, pn, cl_opts ); break; + case opt_eer: cl_opts.ignore_empty = false; break; + case opt_lt: cl_opts.loose_trailing = true; break; case opt_lzl: lzip_level = parse_lzip_level( arg, pn ); break; case opt_lzn: lzip_name = arg; break; + case opt_mer: cl_opts.ignore_marking = false; break; case opt_ref: reference_filename = arg; break; - case opt_re: set_mode( program_mode, m_remove ); - member_list.parse_ml( arg, pn ); break; + case opt_rem: set_mode( program_mode, m_remove ); + member_list.parse_ml( arg, pn, cl_opts ); break; case opt_st: set_mode( program_mode, m_strip ); - member_list.parse_ml( arg, pn ); break; + member_list.parse_ml( arg, pn, cl_opts ); break; default : internal_error( "uncaught option." ); } } // end process options @@ -913,67 +927,67 @@ int main( const int argc, const char * const argv[] ) { case m_none: internal_error( "invalid operation." ); break; case m_alone_to_lz: break; + case m_byte_repair: + one_file( filenames.size() ); + return byte_repair( filenames[0], default_output_filename, cl_opts, + terminator, force ); + case m_clear_marking: + at_least_one_file( filenames.size() ); + return clear_marking( filenames, cl_opts ); + case m_debug_byte_repair: + one_file( filenames.size() ); + return debug_byte_repair( filenames[0].c_str(), cl_opts, bad_byte, terminator ); case m_debug_decompress: one_file( filenames.size() ); - return debug_decompress( filenames[0], bad_byte, false ); + return debug_decompress( filenames[0].c_str(), cl_opts, bad_byte, false ); case m_debug_delay: one_file( filenames.size() ); - return debug_delay( filenames[0], range, terminator ); - case m_debug_repair: - one_file( filenames.size() ); - return debug_repair( filenames[0], bad_byte, terminator ); + return debug_delay( filenames[0].c_str(), cl_opts, range, terminator ); case m_decompress: break; case m_dump: case m_strip: - if( filenames.size() < 1 ) - { show_error( "You must specify at least 1 file.", 0, true ); return 1; } - return dump_members( filenames, default_output_filename, member_list, - force, ignore_errors, ignore_trailing, - loose_trailing, program_mode == m_strip, to_stdout ); + at_least_one_file( filenames.size() ); + return dump_members( filenames, default_output_filename, cl_opts, + member_list, force, program_mode == m_strip, to_stdout ); case m_list: break; case m_md5sum: break; case m_merge: if( filenames.size() < 2 ) { show_error( "You must specify at least 2 files.", 0, true ); return 1; } - return merge_files( filenames, default_output_filename, terminator, force ); - case m_nrep_stats: return print_nrep_stats( filenames, repeated_byte, - ignore_errors, ignore_trailing, loose_trailing ); + return merge_files( filenames, default_output_filename, cl_opts, + terminator, force ); + case m_nrep_stats: + return print_nrep_stats( filenames, cl_opts, repeated_byte ); case m_range_dec: one_file( filenames.size() ); - return range_decompress( filenames[0], default_output_filename, range, - force, ignore_errors, ignore_trailing, - loose_trailing, to_stdout ); + return range_decompress( filenames[0], default_output_filename, + cl_opts, range, force, to_stdout ); case m_remove: - if( filenames.size() < 1 ) - { show_error( "You must specify at least 1 file.", 0, true ); return 1; } - return remove_members( filenames, member_list, ignore_errors, - ignore_trailing, loose_trailing ); - case m_repair: - one_file( filenames.size() ); - return repair_file( filenames[0], default_output_filename, terminator, force ); + at_least_one_file( filenames.size() ); + return remove_members( filenames, cl_opts, member_list ); case m_reproduce: one_file( filenames.size() ); if( !reference_filename || !reference_filename[0] ) { show_error( "You must specify a reference file.", 0, true ); return 1; } if( range.size() > 0 ) - return debug_reproduce_file( filenames[0], lzip_name, - reference_filename, range, sector_size, lzip_level ); + return debug_reproduce_file( filenames[0].c_str(), lzip_name, + reference_filename, cl_opts, range, sector_size, lzip_level ); else - return reproduce_file( filenames[0], default_output_filename, - lzip_name, reference_filename, lzip_level, terminator, force ); + return reproduce_file( filenames[0], default_output_filename, lzip_name, + reference_filename, cl_opts, lzip_level, terminator, force ); case m_show_packets: one_file( filenames.size() ); - return debug_decompress( filenames[0], bad_byte, true ); + return debug_decompress( filenames[0].c_str(), cl_opts, bad_byte, true ); case m_split: one_file( filenames.size() ); - return split_file( filenames[0], default_output_filename, force ); + return split_file( filenames[0], default_output_filename, cl_opts, force ); case m_test: break; case m_unzcrash_bit: one_file( filenames.size() ); - return lunzcrash_bit( filenames[0].c_str() ); + return lunzcrash_bit( filenames[0].c_str(), cl_opts ); case m_unzcrash_block: one_file( filenames.size() ); - return lunzcrash_block( filenames[0].c_str(), sector_size ); + return lunzcrash_block( filenames[0].c_str(), cl_opts, sector_size ); } } catch( std::bad_alloc & ) { show_error( mem_msg ); cleanup_and_fail( 1 ); } @@ -981,10 +995,8 @@ int main( const int argc, const char * const argv[] ) if( filenames.empty() ) filenames.push_back("-"); - if( program_mode == m_list ) - return list_files( filenames, ignore_errors, ignore_trailing, loose_trailing ); - if( program_mode == m_md5sum ) - return md5sum_files( filenames ); + if( program_mode == m_list ) return list_files( filenames, cl_opts ); + if( program_mode == m_md5sum ) return md5sum_files( filenames ); if( program_mode != m_alone_to_lz && program_mode != m_decompress && program_mode != m_test ) @@ -1028,7 +1040,7 @@ int main( const int argc, const char * const argv[] ) infd = open_instream( input_filename.c_str(), &in_stats, one_to_one ); if( infd < 0 ) { set_retval( retval, 1 ); continue; } if( !check_tty_in( pp.name(), infd, program_mode, retval ) ) continue; - if( one_to_one ) // open outfd after verifying infd + if( one_to_one ) // open outfd after checking infd { if( program_mode == m_alone_to_lz ) set_a_outname( input_filename ); else set_d_outname( input_filename, extension_index( input_filename ) ); @@ -1040,7 +1052,7 @@ int main( const int argc, const char * const argv[] ) if( one_to_one && !check_tty_out( program_mode ) ) { set_retval( retval, 1 ); return retval; } // don't delete a tty - if( to_file && outfd < 0 ) // open outfd after verifying infd + if( to_file && outfd < 0 ) // open outfd after checking infd { output_filename = default_output_filename; if( !open_outstream( force, false ) || !check_tty_out( program_mode ) ) @@ -1057,8 +1069,7 @@ int main( const int argc, const char * const argv[] ) if( program_mode == m_alone_to_lz ) tmp = alone_to_lz( infd, pp ); else - tmp = decompress( cfile_size, infd, pp, ignore_errors, ignore_trailing, - loose_trailing, program_mode == m_test ); + tmp = decompress( cfile_size, infd, cl_opts, pp, program_mode == m_test ); } catch( std::bad_alloc & ) { pp( mem_msg ); tmp = 1; } catch( Error & e ) { pp(); show_error( e.msg, errno ); tmp = 1; } @@ -1073,7 +1084,7 @@ int main( const int argc, const char * const argv[] ) if( delete_output_on_interrupt && one_to_one ) close_and_set_permissions( in_statsp ); if( input_filename.size() && !keep_input_files && one_to_one && - ( program_mode != m_decompress || !ignore_errors ) ) + ( program_mode != m_decompress || !cl_opts.ignore_errors ) ) std::remove( input_filename.c_str() ); } if( delete_output_on_interrupt ) close_and_set_permissions( 0 ); // -o diff --git a/main_common.cc b/main_common.cc index 8f56a13..1e592c6 100644 --- a/main_common.cc +++ b/main_common.cc @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2022 Antonio Diaz Diaz. + Copyright (C) 2009-2023 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -17,8 +17,7 @@ namespace { -const char * const program_year = "2022"; -const char * const mem_msg = "Not enough memory."; +const char * const program_year = "2023"; void show_version() { @@ -30,12 +29,12 @@ void show_version() } -// separate large numbers >= 100_000 in groups of 3 digits using '_' +// separate numbers of 5 or more digits in groups of 3 digits using '_' const char * format_num3( long long num ) { const char * const si_prefix = "kMGTPEZY"; const char * const binary_prefix = "KMGTPEZY"; - enum { buffers = 8, bufsize = 4 * sizeof (long long) }; + enum { buffers = 8, bufsize = 4 * sizeof num }; static char buffer[buffers][bufsize]; // circle of static buffers for printf static int current = 0; @@ -43,23 +42,20 @@ const char * format_num3( long long num ) char * p = buf + bufsize - 1; // fill the buffer backwards *p = 0; // terminator const bool negative = num < 0; - if( negative ) num = -num; - if( num > 1024 ) - { - char prefix = 0; // try binary first, then si - for( int i = 0; i < 8 && num >= 1024 && num % 1024 == 0; ++i ) - { num /= 1024; prefix = binary_prefix[i]; } - if( prefix ) *(--p) = 'i'; - else - for( int i = 0; i < 8 && num >= 1000 && num % 1000 == 0; ++i ) - { num /= 1000; prefix = si_prefix[i]; } - if( prefix ) *(--p) = prefix; - } - const bool split = num >= 100000; + char prefix = 0; // try binary first, then si + for( int i = 0; i < 8 && num != 0 && ( num / 1024 ) * 1024 == num; ++i ) + { num /= 1024; prefix = binary_prefix[i]; } + if( prefix ) *(--p) = 'i'; + else + for( int i = 0; i < 8 && num != 0 && ( num / 1000 ) * 1000 == num; ++i ) + { num /= 1000; prefix = si_prefix[i]; } + if( prefix ) *(--p) = prefix; + const bool split = num >= 10000 || num <= -10000; for( int i = 0; ; ) { - *(--p) = num % 10 + '0'; num /= 10; if( num == 0 ) break; + long long onum = num; num /= 10; + *(--p) = llabs( onum - ( 10 * num ) ) + '0'; if( num == 0 ) break; if( split && ++i >= 3 ) { i = 0; *(--p) = '_'; } } if( negative ) *(--p) = '-'; @@ -67,10 +63,19 @@ const char * format_num3( long long num ) } +void show_option_error( const char * const arg, const char * const msg, + const char * const option_name ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: '%s': %s option '%s'.\n", + program_name, arg, msg, option_name ); + } + + // Recognized formats: [YZEPTGM][i][Bs], k[Bs], Ki[Bs] // long long getnum( const char * const arg, const char * const option_name, - const int hardbs, const long long llimit = -LLONG_MAX, + const int hardbs, const long long llimit = LLONG_MIN, const long long ulimit = LLONG_MAX, const char ** const tailp = 0 ) { @@ -78,12 +83,8 @@ long long getnum( const char * const arg, const char * const option_name, errno = 0; long long result = strtoll( arg, &tail, 0 ); if( tail == arg ) - { - if( verbosity >= 0 ) - std::fprintf( stderr, "%s: Bad or missing numerical argument in " - "option '%s'.\n", program_name, option_name ); - std::exit( 1 ); - } + { show_option_error( arg, "Bad or missing numerical argument in", + option_name ); std::exit( 1 ); } if( !errno && tail[0] ) { @@ -93,6 +94,8 @@ long long getnum( const char * const arg, const char * const option_name, char usuf = 0; // 'B' or 's' unit suffix is present switch( *p ) { + case 'Q': exponent = 10; break; + case 'R': exponent = 9; break; case 'Y': exponent = 8; break; case 'Z': exponent = 7; break; case 'E': exponent = 6; break; @@ -111,20 +114,18 @@ long long getnum( const char * const arg, const char * const option_name, { usuf = tail[0]; ++tail; } if( exponent < 0 || ( usuf == 's' && hardbs <= 0 ) || ( !tailp && tail[0] != 0 ) ) - { - if( verbosity >= 0 ) - std::fprintf( stderr, "%s: Bad multiplier in numerical argument of " - "option '%s'.\n", program_name, option_name ); - std::exit( 1 ); - } + { show_option_error( arg, "Bad multiplier in numerical argument of", + option_name ); std::exit( 1 ); } for( int i = 0; i < exponent; ++i ) { - if( LLONG_MAX / factor >= llabs( result ) ) result *= factor; + if( ( result >= 0 && LLONG_MAX / factor >= result ) || + ( result < 0 && LLONG_MIN / factor <= result ) ) result *= factor; else { errno = ERANGE; break; } } if( usuf == 's' ) { - if( LLONG_MAX / hardbs >= llabs( result ) ) result *= hardbs; + if( ( result >= 0 && LLONG_MAX / hardbs >= result ) || + ( result < 0 && LLONG_MIN / hardbs <= result ) ) result *= hardbs; else errno = ERANGE; } } @@ -132,8 +133,8 @@ long long getnum( const char * const arg, const char * const option_name, if( errno ) { if( verbosity >= 0 ) - std::fprintf( stderr, "%s: Numerical argument out of limits [%s,%s] " - "in option '%s'.\n", program_name, format_num3( llimit ), + std::fprintf( stderr, "%s: '%s': Value out of limits [%s,%s] in " + "option '%s'.\n", program_name, arg, format_num3( llimit ), format_num3( ulimit ), option_name ); std::exit( 1 ); } @@ -148,16 +149,14 @@ long long getnum( const char * const arg, const char * const option_name, // void Bad_byte::parse_bb( const char * const arg, const char * const pn ) { + argument = arg; option_name = pn; const char * tail; pos = getnum( arg, option_name, 0, 0, LLONG_MAX, &tail ); if( tail[0] != ',' ) - { - if( verbosity >= 0 ) - std::fprintf( stderr, "%s: Bad separator between and in " - "argument of option '%s'.\n", program_name, option_name ); - std::exit( 1 ); - } + { show_option_error( arg, ( tail[0] == 0 ) ? "Missing in" : + "Missing comma between and in", + option_name ); std::exit( 1 ); } if( tail[1] == '+' ) { ++tail; mode = delta; } else if( tail[1] == 'f' ) { ++tail; mode = flip; } else mode = literal; diff --git a/md5.cc b/md5.cc index 2cec5cd..2793909 100644 --- a/md5.cc +++ b/md5.cc @@ -1,6 +1,6 @@ /* Functions to compute MD5 message digest of memory blocks according to the definition of MD5 in RFC 1321 from April 1992. - Copyright (C) 2020-2022 Antonio Diaz Diaz. + Copyright (C) 2020-2023 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -162,7 +162,7 @@ void MD5SUM::md5_update( const uint8_t * const buffer, const unsigned long len ) // finish computation and return the digest -void MD5SUM::md5_finish( uint8_t digest[16] ) +void MD5SUM::md5_finish( md5_type & digest ) { uint8_t padding[64] = { 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -189,7 +189,7 @@ void MD5SUM::md5_finish( uint8_t digest[16] ) void compute_md5( const uint8_t * const buffer, const unsigned long len, - uint8_t digest[16] ) + md5_type & digest ) { MD5SUM md5sum; if( len > 0 ) md5sum.md5_update( buffer, len ); @@ -198,9 +198,9 @@ void compute_md5( const uint8_t * const buffer, const unsigned long len, bool check_md5( const uint8_t * const buffer, const unsigned long len, - const uint8_t digest[16] ) + const md5_type & digest ) { - uint8_t new_digest[16]; + md5_type new_digest; compute_md5( buffer, len, new_digest ); - return ( std::memcmp( digest, new_digest, 16 ) == 0 ); + return digest == new_digest; } diff --git a/md5.h b/md5.h index fd84461..552a079 100644 --- a/md5.h +++ b/md5.h @@ -1,6 +1,6 @@ /* Functions to compute MD5 message digest of memory blocks according to the definition of MD5 in RFC 1321 from April 1992. - Copyright (C) 2020-2022 Antonio Diaz Diaz. + Copyright (C) 2020-2023 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -18,6 +18,18 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. */ +struct md5_type + { + uint8_t data[16]; // 128-bit md5 digest + + bool operator==( const md5_type & d ) const + { return ( std::memcmp( data, d.data, 16 ) == 0 ); } + bool operator!=( const md5_type & d ) const { return !( *this == d ); } +// const uint8_t & operator[]( const int i ) const { return data[i]; } + uint8_t & operator[]( const int i ) { return data[i]; } + }; + + class MD5SUM { uint64_t count; // data length in bytes, modulo 2^64 @@ -39,11 +51,11 @@ public: } void md5_update( const uint8_t * const buffer, const unsigned long len ); - void md5_finish( uint8_t digest[16] ); + void md5_finish( md5_type & digest ); }; void compute_md5( const uint8_t * const buffer, const unsigned long len, - uint8_t digest[16] ); + md5_type & digest ); bool check_md5( const uint8_t * const buffer, const unsigned long len, - const uint8_t digest[16] ); + const md5_type & digest ); diff --git a/merge.cc b/merge.cc index 8335841..509c94f 100644 --- a/merge.cc +++ b/merge.cc @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2022 Antonio Diaz Diaz. + Copyright (C) 2009-2023 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -140,24 +140,26 @@ bool diff_member( const long long mpos, const long long msize, continue; std::vector< Block > bv; long long partial_pos = 0; + const char * const filename1 = filenames[i1].c_str(); + const char * const filename2 = filenames[i2].c_str(); const int fd1 = infd_vector[i1], fd2 = infd_vector[i2]; int begin = -1; // begin of block. -1 means no block bool prev_equal = true; - if( !safe_seek( fd1, mpos ) || !safe_seek( fd2, mpos ) ) - { error = true; break; } + if( !safe_seek( fd1, mpos, filename1 ) || + !safe_seek( fd2, mpos, filename2 ) ) { error = true; break; } while( partial_pos < msize ) { const int size = std::min( (long long)buffer_size, msize - partial_pos ); const int rd = readblock( fd1, buffer1, size ); if( rd != size && errno ) - { show_file_error( filenames[i1].c_str(), "Error reading input file", - errno ); error = true; break; } + { show_file_error( filename1, "Error reading input file", errno ); + error = true; break; } if( rd > 0 ) { if( readblock( fd2, buffer2, rd ) != rd ) - { show_file_error( filenames[i2].c_str(), "Error reading input file", - errno ); error = true; break; } + { show_file_error( filename2, "Error reading input file", errno ); + error = true; break; } for( int i = 0; i < rd; ++i ) { if( buffer1[i] != buffer2[i] ) @@ -215,7 +217,8 @@ long ipow( const unsigned base, const unsigned exponent ) int open_input_files( const std::vector< std::string > & filenames, std::vector< int > & infd_vector, - Lzip_index & lzip_index, struct stat * const in_statsp ) + const Cl_options & cl_opts, Lzip_index & lzip_index, + struct stat * const in_statsp ) { const int files = filenames.size(); for( int i = 0; i + 1 < files; ++i ) @@ -245,7 +248,7 @@ int open_input_files( const std::vector< std::string > & filenames, for( int i = 0; i < files; ++i ) { long long tmp; - const Lzip_index li( infd_vector[i], true, true, true ); + const Lzip_index li( infd_vector[i], cl_opts, true ); if( li.retval() == 0 ) // file format is intact { if( good_i < 0 ) { good_i = i; lzip_index = li; } @@ -283,20 +286,21 @@ int open_input_files( const std::vector< std::string > & filenames, for( int i = 0; i < files; ++i ) { + const char * const filename = filenames[i].c_str(); const int infd = infd_vector[i]; bool error = false; for( long j = 0; j < lzip_index.members(); ++j ) { const long long mpos = lzip_index.mblock( j ).pos(); const long long msize = lzip_index.mblock( j ).size(); - if( !safe_seek( infd, mpos ) ) return 1; + if( !safe_seek( infd, mpos, filename ) ) return 1; if( test_member_from_file( infd, msize ) != 0 ) { error = true; break; } } if( !error ) { if( verbosity >= 1 ) - std::printf( "File '%s' has no errors. Recovery is not needed.\n", - filenames[i].c_str() ); + std::printf( "Input file '%s' has no errors. Recovery is not needed.\n", + filename ); return 0; } } @@ -345,7 +349,8 @@ bool color_done( const std::vector< int > & color_vector, const int i ) // try dividing blocks in 2 color groups at every gap -bool try_merge_member2( const long long mpos, const long long msize, +bool try_merge_member2( const std::vector< std::string > & filenames, + const long long mpos, const long long msize, const std::vector< Block > & block_vector, const std::vector< int > & color_vector, const std::vector< int > & infd_vector, @@ -361,8 +366,8 @@ bool try_merge_member2( const long long mpos, const long long msize, if( i1 == i2 || color_vector[i1] == color_vector[i2] || color_done( color_vector, i1 ) ) continue; for( int bi = 0; bi < blocks; ++bi ) - if( !safe_seek( infd_vector[i2], block_vector[bi].pos() ) || - !safe_seek( outfd, block_vector[bi].pos() ) || + if( !safe_seek( infd_vector[i2], block_vector[bi].pos(), filenames[i2].c_str() ) || + !safe_seek( outfd, block_vector[bi].pos(), output_filename.c_str() ) || !copy_file( infd_vector[i2], outfd, block_vector[bi].size() ) ) cleanup_and_fail( 1 ); const int infd = infd_vector[i1]; @@ -375,10 +380,10 @@ bool try_merge_member2( const long long mpos, const long long msize, var, variations, bi + 1, terminator ); std::fflush( stdout ); pending_newline = true; } - if( !safe_seek( infd, block_vector[bi].pos() ) || - !safe_seek( outfd, block_vector[bi].pos() ) || + if( !safe_seek( infd, block_vector[bi].pos(), filenames[i1].c_str() ) || + !safe_seek( outfd, block_vector[bi].pos(), output_filename.c_str() ) || !copy_file( infd, outfd, block_vector[bi].size() ) || - !safe_seek( outfd, mpos ) ) + !safe_seek( outfd, mpos, output_filename.c_str() ) ) cleanup_and_fail( 1 ); long long failure_pos = 0; if( test_member_from_file( outfd, msize, &failure_pos ) == 0 ) @@ -391,7 +396,8 @@ bool try_merge_member2( const long long mpos, const long long msize, // merge block by block -bool try_merge_member( const long long mpos, const long long msize, +bool try_merge_member( const std::vector< std::string > & filenames, + const long long mpos, const long long msize, const std::vector< Block > & block_vector, const std::vector< int > & color_vector, const std::vector< int > & infd_vector, @@ -425,13 +431,14 @@ bool try_merge_member( const long long mpos, const long long msize, while( bi < blocks ) { const int infd = infd_vector[file_idx[bi]]; - if( !safe_seek( infd, block_vector[bi].pos() ) || - !safe_seek( outfd, block_vector[bi].pos() ) || + if( !safe_seek( infd, block_vector[bi].pos(), filenames[file_idx[bi]].c_str() ) || + !safe_seek( outfd, block_vector[bi].pos(), output_filename.c_str() ) || !copy_file( infd, outfd, block_vector[bi].size() ) ) cleanup_and_fail( 1 ); ++bi; } - if( !safe_seek( outfd, mpos ) ) cleanup_and_fail( 1 ); + if( !safe_seek( outfd, mpos, output_filename.c_str() ) ) + cleanup_and_fail( 1 ); long long failure_pos = 0; if( test_member_from_file( outfd, msize, &failure_pos ) == 0 ) return true; while( bi > 0 && mpos + failure_pos < block_vector[bi-1].pos() ) --bi; @@ -448,7 +455,8 @@ bool try_merge_member( const long long mpos, const long long msize, // merge a single block split at every possible position -bool try_merge_member1( const long long mpos, const long long msize, +bool try_merge_member1( const std::vector< std::string > & filenames, + const long long mpos, const long long msize, const std::vector< Block > & block_vector, const std::vector< int > & color_vector, const std::vector< int > & infd_vector, @@ -467,9 +475,9 @@ bool try_merge_member1( const long long mpos, const long long msize, if( i1 == i2 || color_vector[i1] == color_vector[i2] || color_done( color_vector, i1 ) ) continue; const int infd = infd_vector[i1]; - if( !safe_seek( infd, pos ) || - !safe_seek( infd_vector[i2], pos ) || - !safe_seek( outfd, pos ) || + if( !safe_seek( infd, pos, filenames[i1].c_str() ) || + !safe_seek( infd_vector[i2], pos, filenames[i2].c_str() ) || + !safe_seek( outfd, pos, output_filename.c_str() ) || !copy_file( infd_vector[i2], outfd, size ) ) cleanup_and_fail( 1 ); const int var = ( i1 * ( files - 1 ) ) + i2 - ( i2 > i1 ) + 1; @@ -481,10 +489,10 @@ bool try_merge_member1( const long long mpos, const long long msize, var, variations, pos + i, terminator ); std::fflush( stdout ); pending_newline = true; } - if( !safe_seek( outfd, pos + i ) || + if( !safe_seek( outfd, pos + i, output_filename.c_str() ) || readblock( infd, &byte, 1 ) != 1 || writeblock( outfd, &byte, 1 ) != 1 || - !safe_seek( outfd, mpos ) ) + !safe_seek( outfd, mpos, output_filename.c_str() ) ) cleanup_and_fail( 1 ); long long failure_pos = 0; if( test_member_from_file( outfd, msize, &failure_pos ) == 0 ) @@ -498,9 +506,9 @@ bool try_merge_member1( const long long mpos, const long long msize, } // end namespace -// infd and outfd can refer to the same file if copying to a lower file -// position or if source and destination blocks don't overlap. -// max_size < 0 means no size limit. +/* infd and outfd can refer to the same file if copying to a lower file + position or if source and destination blocks don't overlap. + max_size < 0 means no size limit. */ bool copy_file( const int infd, const int outfd, const long long max_size ) { const int buffer_size = 65536; @@ -534,8 +542,8 @@ bool copy_file( const int infd, const int outfd, const long long max_size ) } -// Return value: 0 = OK, 1 = bad msize, 2 = data error -// 'failure_pos' is relative to the beginning of the member +/* Return value: 0 = OK, 1 = bad msize, 2 = data error. + 'failure_pos' is relative to the beginning of the member. */ int test_member_from_file( const int infd, const unsigned long long msize, long long * const failure_posp ) { @@ -544,15 +552,15 @@ int test_member_from_file( const int infd, const unsigned long long msize, rdec.read_data( header.data, Lzip_header::size ); const unsigned dictionary_size = header.dictionary_size(); bool done = false; - if( !rdec.finished() && header.verify_magic() && - header.verify_version() && isvalid_ds( dictionary_size ) ) + if( !rdec.finished() && header.check_magic() && + header.check_version() && isvalid_ds( dictionary_size ) ) { LZ_decoder decoder( rdec, dictionary_size, -1 ); - const int old_verbosity = verbosity; + const int saved_verbosity = verbosity; verbosity = -1; // suppress all messages Pretty_print dummy_pp( "" ); done = ( decoder.decode_member( dummy_pp ) == 0 ); - verbosity = old_verbosity; // restore verbosity level + verbosity = saved_verbosity; // restore verbosity level if( done && rdec.member_position() == msize ) return 0; } if( failure_posp ) *failure_posp = rdec.member_position(); @@ -562,16 +570,17 @@ int test_member_from_file( const int infd, const unsigned long long msize, int merge_files( const std::vector< std::string > & filenames, const std::string & default_output_filename, - const char terminator, const bool force ) + const Cl_options & cl_opts, const char terminator, + const bool force ) { const int files = filenames.size(); std::vector< int > infd_vector( files ); Lzip_index lzip_index; struct stat in_stats; const int retval = - open_input_files( filenames, infd_vector, lzip_index, &in_stats ); + open_input_files( filenames, infd_vector, cl_opts, lzip_index, &in_stats ); if( retval >= 0 ) return retval; - if( !safe_seek( infd_vector[0], 0 ) ) return 1; + if( !safe_seek( infd_vector[0], 0, filenames[0].c_str() ) ) return 1; output_filename = default_output_filename.empty() ? insert_fixed( filenames[0] ) : default_output_filename; @@ -589,7 +598,7 @@ int merge_files( const std::vector< std::string > & filenames, // different color means members are different std::vector< int > color_vector( files, 0 ); if( !diff_member( mpos, msize, filenames, infd_vector, block_vector, - color_vector ) || !safe_seek( outfd, mpos ) ) + color_vector ) || !safe_seek( outfd, mpos, output_filename.c_str() ) ) cleanup_and_fail( 1 ); if( block_vector.empty() ) @@ -614,21 +623,21 @@ int merge_files( const std::vector< std::string > & filenames, if( block_vector.size() > 1 ) { maybe_cluster_blocks( block_vector ); - done = try_merge_member2( mpos, msize, block_vector, color_vector, - infd_vector, terminator ); + done = try_merge_member2( filenames, mpos, msize, block_vector, + color_vector, infd_vector, terminator ); print_pending_newline( terminator ); } // With just one member and one differing block the merge can't succeed. if( !done && ( lzip_index.members() > 1 || block_vector.size() > 1 ) ) { - done = try_merge_member( mpos, msize, block_vector, color_vector, - infd_vector, terminator ); + done = try_merge_member( filenames, mpos, msize, block_vector, + color_vector, infd_vector, terminator ); print_pending_newline( terminator ); } if( !done ) { - done = try_merge_member1( mpos, msize, block_vector, color_vector, - infd_vector, terminator ); + done = try_merge_member1( filenames, mpos, msize, block_vector, + color_vector, infd_vector, terminator ); print_pending_newline( terminator ); } if( !done ) diff --git a/mtester.cc b/mtester.cc index ecdb9c0..33126ee 100644 --- a/mtester.cc +++ b/mtester.cc @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2022 Antonio Diaz Diaz. + Copyright (C) 2009-2023 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -91,7 +91,7 @@ void LZ_mtester::flush_data() } -bool LZ_mtester::verify_trailer( FILE * const f, unsigned long long byte_pos ) +bool LZ_mtester::check_trailer( FILE * const f, unsigned long long byte_pos ) { const Lzip_trailer * const trailer = rdec.get_trailer(); if( !trailer ) @@ -103,7 +103,7 @@ bool LZ_mtester::verify_trailer( FILE * const f, unsigned long long byte_pos ) return false; } const unsigned long long data_size = data_position(); - const unsigned long long member_size = rdec.member_position(); + const unsigned long member_size = rdec.member_position(); bool error = false; const unsigned td_crc = trailer->data_crc(); @@ -133,7 +133,7 @@ bool LZ_mtester::verify_trailer( FILE * const f, unsigned long long byte_pos ) if( verbosity >= 0 && f ) { if( byte_pos ) { std::fprintf( f, "byte %llu\n", byte_pos ); byte_pos = 0; } - std::fprintf( f, "Member size mismatch; stored %llu (0x%llX), computed %llu (0x%llX)\n", + std::fprintf( f, "Member size mismatch; stored %llu (0x%llX), computed %lu (0x%lX)\n", tm_size, tm_size, member_size, member_size ); } } return !error; @@ -143,7 +143,7 @@ bool LZ_mtester::verify_trailer( FILE * const f, unsigned long long byte_pos ) /* Return value: 0 = OK, 1 = decoder error, 2 = unexpected EOF, 3 = trailer error, 4 = unknown marker found, -1 = pos_limit reached. */ -int LZ_mtester::test_member( const unsigned long long mpos_limit, +int LZ_mtester::test_member( const unsigned long mpos_limit, const unsigned long long dpos_limit, FILE * const f, const unsigned long long byte_pos ) { @@ -214,9 +214,7 @@ int LZ_mtester::test_member( const unsigned long long mpos_limit, rdec.normalize(); flush_data(); if( len == min_match_len ) // End Of Stream marker - { - if( verify_trailer( f, byte_pos ) ) return 0; else return 3; - } + { if( check_trailer( f, byte_pos ) ) return 0; else return 3; } if( verbosity >= 0 && f ) { if( byte_pos ) std::fprintf( f, "byte %llu\n", byte_pos ); @@ -234,7 +232,7 @@ int LZ_mtester::test_member( const unsigned long long mpos_limit, } copy_block( rep0, len ); } - if( outfd >= 0 ) flush_data(); + if( outfd >= 0 ) flush_data(); // else no need to flush if error return 2; } @@ -245,7 +243,7 @@ int LZ_mtester::debug_decode_member( const long long dpos, const long long mpos, const bool show_packets ) { rdec.load(); - unsigned old_tmpos = member_position(); // truncated member_position + unsigned old_tmpos = member_position(); // truncated member position while( !rdec.finished() ) { const unsigned long long dp = data_position() + dpos; @@ -348,7 +346,7 @@ int LZ_mtester::debug_decode_member( const long long dpos, const long long mpos, if( show_packets ) std::printf( "%6llu %6llu member trailer\n", mpos + member_position(), dpos + data_position() ); - if( verify_trailer( show_packets ? stdout : 0 ) ) return 0; + if( check_trailer( show_packets ? stdout : 0 ) ) return 0; return 3; } if( len == min_match_len + 1 ) // Sync Flush marker diff --git a/mtester.h b/mtester.h index 12c7d2d..cd07cd8 100644 --- a/mtester.h +++ b/mtester.h @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2022 Antonio Diaz Diaz. + Copyright (C) 2009-2023 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,14 +18,14 @@ class Range_mtester { const uint8_t * const buffer; // input buffer - const long long buffer_size; - long long pos; // current pos in buffer + const long buffer_size; + long pos; // current pos in buffer uint32_t code; uint32_t range; bool at_stream_end; public: - Range_mtester( const uint8_t * const buf, const long long buf_size ) + Range_mtester( const uint8_t * const buf, const long buf_size ) : buffer( buf ), buffer_size( buf_size ), @@ -36,7 +36,7 @@ public: {} bool finished() { return pos >= buffer_size; } - unsigned long long member_position() const { return pos; } + unsigned long member_position() const { return pos; } uint8_t get_byte() { @@ -56,9 +56,9 @@ public: void load() { code = 0; - for( int i = 0; i < 5; ++i ) code = ( code << 8 ) | get_byte(); range = 0xFFFFFFFFU; - code &= range; // make sure that first byte is discarded + get_byte(); // discard first byte of the LZMA stream + for( int i = 0; i < 4; ++i ) code = ( code << 8 ) | get_byte(); } void normalize() @@ -83,7 +83,7 @@ public: return symbol; } - unsigned decode_bit( Bit_model & bm ) + bool decode_bit( Bit_model & bm ) { normalize(); const uint32_t bound = ( range >> bit_model_total_bits ) * bm.probability; @@ -275,7 +275,7 @@ class LZ_mtester void print_block( const int len ); void flush_data(); - bool verify_trailer( FILE * const f = 0, unsigned long long byte_pos = 0 ); + bool check_trailer( FILE * const f = 0, unsigned long long byte_pos = 0 ); uint8_t peek_prev() const { return buffer[((pos > 0) ? pos : dictionary_size)-1]; } @@ -336,7 +336,7 @@ void set_max_marker( const unsigned new_size ) { if( max_marker_size_ < new_size ) max_marker_size_ = new_size; } public: - LZ_mtester( const uint8_t * const ibuf, const long long ibuf_size, + LZ_mtester( const uint8_t * const ibuf, const long ibuf_size, const unsigned dict_size, const int ofd = -1, MD5SUM * const md5sum_ = 0 ) : @@ -367,7 +367,7 @@ public: unsigned crc() const { return crc_ ^ 0xFFFFFFFFU; } unsigned long long data_position() const { return partial_data_pos + pos; } bool finished() { return rdec.finished(); } - unsigned long long member_position() const { return rdec.member_position(); } + unsigned long member_position() const { return rdec.member_position(); } unsigned long long total_packets() const { return total_packets_; } unsigned long long max_distance_pos() const { return max_rep0_pos; } unsigned max_distance() const { return max_rep0 + 1; } @@ -385,7 +385,7 @@ public: void duplicate_buffer( uint8_t * const buffer2 ); // these two functions set max_rep0 - int test_member( const unsigned long long mpos_limit = LLONG_MAX, + int test_member( const unsigned long mpos_limit = LONG_MAX, const unsigned long long dpos_limit = LLONG_MAX, FILE * const f = 0, const unsigned long long byte_pos = 0 ); /* this function also sets max_rep0_pos, total_packets_, max_packet_size_, diff --git a/nrep_stats.cc b/nrep_stats.cc index 1f249ff..1d5b598 100644 --- a/nrep_stats.cc +++ b/nrep_stats.cc @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2022 Antonio Diaz Diaz. + Copyright (C) 2009-2023 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -38,11 +38,11 @@ file with the longest sequence. */ int print_nrep_stats( const std::vector< std::string > & filenames, - const int repeated_byte, const bool ignore_errors, - const bool ignore_trailing, const bool loose_trailing ) + const Cl_options & cl_opts, const int repeated_byte ) { std::vector< unsigned long > len_vector; - unsigned long long best_pos = 0, lzma_size = 0; + unsigned long long lzma_size = 0; // total size of LZMA data + unsigned long best_pos = 0; int best_name = -1, retval = 0; const bool count_all = ( repeated_byte < 0 || repeated_byte >= 256 ); bool stdin_used = false; @@ -57,8 +57,8 @@ int print_nrep_stats( const std::vector< std::string > & filenames, open_instream( input_filename, &in_stats, false, true ); if( infd < 0 ) { set_retval( retval, 1 ); continue; } - const Lzip_index lzip_index( infd, ignore_trailing, loose_trailing, - ignore_errors, ignore_errors ); + const Lzip_index lzip_index( infd, cl_opts, cl_opts.ignore_errors, + cl_opts.ignore_errors ); if( lzip_index.retval() != 0 ) { show_file_error( input_filename, lzip_index.error().c_str() ); @@ -67,6 +67,9 @@ int print_nrep_stats( const std::vector< std::string > & filenames, continue; } const unsigned long long cdata_size = lzip_index.cdata_size(); + if( !fits_in_size_t( cdata_size ) ) // mmap uses size_t + { show_file_error( input_filename, "Input file is too large for mmap." ); + set_retval( retval, 1 ); close( infd ); continue; } const uint8_t * const buffer = (const uint8_t *)mmap( 0, cdata_size, PROT_READ, MAP_PRIVATE, infd, 0 ); close( infd ); @@ -76,8 +79,8 @@ int print_nrep_stats( const std::vector< std::string > & filenames, for( long j = 0; j < lzip_index.members(); ++j ) { const Block & mb = lzip_index.mblock( j ); - long long pos = mb.pos() + 7; // skip header (+1 byte) and - const long long end = mb.end() - 20; // trailer of each member + long pos = mb.pos() + 7; // skip header (+1 byte) and + const long end = mb.end() - 20; // trailer of each member lzma_size += end - pos; while( pos < end ) { @@ -97,6 +100,7 @@ int print_nrep_stats( const std::vector< std::string > & filenames, munmap( (void *)buffer, cdata_size ); } + if( verbosity < 0 ) return retval; if( count_all ) std::fputs( "\nShowing repeated sequences of any byte value.\n", stdout ); else @@ -111,7 +115,7 @@ int print_nrep_stats( const std::vector< std::string > & filenames, len, len_vector[len], lzma_size / len_vector[len], format_num( 1ULL << ( 8 * ( len - count_all ) ), -1ULL, -1 ) ); if( best_name >= 0 ) - std::printf( "Longest sequence found at position %llu of '%s'\n", + std::printf( "Longest sequence found at position %lu of '%s'\n", best_pos, filenames[best_name].c_str() ); return retval; } diff --git a/range_dec.cc b/range_dec.cc index ea7f7e7..b6000ec 100644 --- a/range_dec.cc +++ b/range_dec.cc @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2022 Antonio Diaz Diaz. + Copyright (C) 2009-2023 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -45,8 +45,8 @@ bool decompress_member( const int infd, const Pretty_print & pp, rdec.read_data( header.data, Lzip_header::size ); if( rdec.finished() ) // End Of File { pp( "File ends unexpectedly at member header." ); return false; } - if( !header.verify_magic() ) { pp( bad_magic_msg ); return false; } - if( !header.verify_version() ) + if( !header.check_magic() ) { pp( bad_magic_msg ); return false; } + if( !header.check_version() ) { pp( bad_version( header.version() ) ); return false; } const unsigned dictionary_size = header.dictionary_size(); if( !isvalid_ds( dictionary_size ) ) { pp( bad_dict_msg ); return false; } @@ -113,36 +113,36 @@ const char * format_num( unsigned long long num, } -bool safe_seek( const int fd, const long long pos ) +bool safe_seek( const int fd, const long long pos, + const char * const filename ) { if( lseek( fd, pos, SEEK_SET ) == pos ) return true; - show_error( "Seek error", errno ); return false; + show_file_error( filename, "Seek error", errno ); + return false; } int range_decompress( const std::string & input_filename, const std::string & default_output_filename, - Block range, const bool force, const bool ignore_errors, - const bool ignore_trailing, const bool loose_trailing, - const bool to_stdout ) + const Cl_options & cl_opts, Block range, + const bool force, const bool to_stdout ) { + const char * const filename = input_filename.c_str(); struct stat in_stats; - const int infd = - open_instream( input_filename.c_str(), &in_stats, false, true ); + const int infd = open_instream( filename, &in_stats, false, true ); if( infd < 0 ) return 1; - const Lzip_index lzip_index( infd, ignore_trailing, loose_trailing, - ignore_errors, ignore_errors ); + const Lzip_index lzip_index( infd, cl_opts, cl_opts.ignore_errors, + cl_opts.ignore_errors ); if( lzip_index.retval() != 0 ) - { show_file_error( input_filename.c_str(), lzip_index.error().c_str() ); + { show_file_error( filename, lzip_index.error().c_str() ); return lzip_index.retval(); } const long long udata_size = lzip_index.udata_size(); if( range.end() > udata_size ) range.size( std::max( 0LL, udata_size - range.pos() ) ); if( range.size() <= 0 ) - { if( udata_size > 0 ) - show_file_error( input_filename.c_str(), "Nothing to do." ); + { if( udata_size > 0 ) show_file_error( filename, "Nothing to do." ); return 0; } if( to_stdout || default_output_filename.empty() ) outfd = STDOUT_FILENO; @@ -171,13 +171,15 @@ int range_decompress( const std::string & input_filename, const long long outskip = std::max( 0LL, range.pos() - db.pos() ); const long long outend = std::min( db.size(), range.end() - db.pos() ); const long long mpos = lzip_index.mblock( i ).pos(); - if( !safe_seek( infd, mpos ) ) cleanup_and_fail( 1 ); + if( !safe_seek( infd, mpos, filename ) ) cleanup_and_fail( 1 ); if( !decompress_member( infd, pp, mpos, outskip, outend ) ) - { if( !ignore_errors ) cleanup_and_fail( 2 ); else error = true; } + { if( cl_opts.ignore_errors ) error = true; else cleanup_and_fail( 2 ); } pp.reset(); } } - close( infd ); + if( close( infd ) != 0 ) + { show_file_error( filename, "Error closing input file", errno ); + cleanup_and_fail( 1 ); } if( close_outstream( &in_stats ) != 0 ) cleanup_and_fail( 1 ); if( verbosity >= 2 && !error ) std::fputs( "Byte range decompressed successfully.\n", stderr ); diff --git a/repair.cc b/repair.cc deleted file mode 100644 index c49fbdb..0000000 --- a/repair.cc +++ /dev/null @@ -1,517 +0,0 @@ -/* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2022 Antonio Diaz Diaz. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#define _FILE_OFFSET_BITS 64 - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "lzip.h" -#include "mtester.h" -#include "lzip_index.h" - - -namespace { - -bool pending_newline = false; - -void print_pending_newline( const char terminator ) - { if( pending_newline && terminator != '\n' ) std::fputc( '\n', stdout ); - pending_newline = false; } - - -bool gross_damage( const long long msize, const uint8_t * const mbuffer ) - { - enum { maxlen = 7 }; // max number of consecutive identical bytes - long i = Lzip_header::size; - const long end = msize - Lzip_trailer::size - maxlen; - while( i < end ) - { - const uint8_t byte = mbuffer[i]; - int len = 0; // does not count the first byte - while( mbuffer[++i] == byte ) if( ++len >= maxlen ) return true; - } - return false; - } - - -// Return value: 0 = no change, 5 = repaired pos -int repair_dictionary_size( const long long msize, uint8_t * const mbuffer ) - { - const unsigned long long dictionary_size_9 = 1 << 25; // dict size of opt -9 - Lzip_header & header = *(Lzip_header *)mbuffer; - unsigned dictionary_size = header.dictionary_size(); - const Lzip_trailer & trailer = - *(const Lzip_trailer *)( mbuffer + msize - Lzip_trailer::size ); - const unsigned long long data_size = trailer.data_size(); - const bool valid_ds = isvalid_ds( dictionary_size ); - if( valid_ds && dictionary_size >= data_size ) return 0; // can't be bad - - if( !valid_ds || dictionary_size < dictionary_size_9 ) - { - dictionary_size = std::min( data_size, dictionary_size_9 ); - if( dictionary_size < min_dictionary_size ) - dictionary_size = min_dictionary_size; - LZ_mtester mtester( mbuffer, msize, dictionary_size ); - const int result = mtester.test_member(); - if( result == 0 ) - { header.dictionary_size( dictionary_size ); return 5; } // fix DS - if( result != 1 || mtester.max_distance() <= dictionary_size || - mtester.max_distance() > max_dictionary_size ) return 0; - } - if( data_size > dictionary_size_9 ) - { - dictionary_size = - std::min( data_size, (unsigned long long)max_dictionary_size ); - LZ_mtester mtester( mbuffer, msize, dictionary_size ); - if( mtester.test_member() == 0 ) - { header.dictionary_size( dictionary_size ); return 5; } // fix DS - } - return 0; - } - - -const LZ_mtester * prepare_master( const uint8_t * const buffer, - const long buffer_size, - const unsigned long pos_limit, - const unsigned dictionary_size ) - { - LZ_mtester * const master = - new LZ_mtester( buffer, buffer_size, dictionary_size ); - if( master->test_member( pos_limit ) == -1 ) return master; - delete master; - return 0; - } - - -bool test_member_rest( const LZ_mtester & master, uint8_t * const buffer2, - long * const failure_posp = 0 ) - { - LZ_mtester mtester( master ); // tester with external buffer - mtester.duplicate_buffer( buffer2 ); - if( mtester.test_member() == 0 && mtester.finished() ) return true; - if( failure_posp ) *failure_posp = mtester.member_position(); - return false; - } - - -// Return value: -1 = master failed, 0 = begin reached, >0 = repaired pos -long repair_member( const long long mpos, const long long msize, - uint8_t * const mbuffer, const long begin, const long end, - const unsigned dictionary_size, const char terminator ) - { - uint8_t * const buffer2 = new uint8_t[dictionary_size]; - for( long pos = end; pos >= begin && pos > end - 50000; ) - { - const long min_pos = std::max( begin, pos - 100 ); - const unsigned long pos_limit = std::max( min_pos - 16, 0L ); - const LZ_mtester * master = - prepare_master( mbuffer, msize, pos_limit, dictionary_size ); - if( !master ) { delete[] buffer2; return -1; } - for( ; pos >= min_pos; --pos ) - { - if( verbosity >= 2 ) - { - std::printf( " Trying position %llu %c", mpos + pos, terminator ); - std::fflush( stdout ); pending_newline = true; - } - for( int j = 0; j < 255; ++j ) - { - ++mbuffer[pos]; - if( test_member_rest( *master, buffer2 ) ) - { delete master; delete[] buffer2; return pos; } - } - ++mbuffer[pos]; - } - delete master; - } - delete[] buffer2; - return 0; - } - -} // end namespace - - -long long seek_write( const int fd, const uint8_t * const buf, - const long long size, const long long pos ) - { - if( lseek( fd, pos, SEEK_SET ) == pos ) - return writeblock( fd, buf, size ); - return 0; - } - - -uint8_t * read_member( const int infd, const long long mpos, - const long long msize ) - { - if( msize <= 0 || msize > LONG_MAX ) - { show_error( "Member is larger than LONG_MAX." ); return 0; } - if( !safe_seek( infd, mpos ) ) return 0; - uint8_t * const buffer = new uint8_t[msize]; - - if( readblock( infd, buffer, msize ) != msize ) - { show_error( "Error reading input file", errno ); - delete[] buffer; return 0; } - return buffer; - } - - -int repair_file( const std::string & input_filename, - const std::string & default_output_filename, - const char terminator, const bool force ) - { - struct stat in_stats; - const int infd = - open_instream( input_filename.c_str(), &in_stats, false, true ); - if( infd < 0 ) return 1; - - const Lzip_index lzip_index( infd, true, true, true ); - if( lzip_index.retval() != 0 ) - { show_file_error( input_filename.c_str(), lzip_index.error().c_str() ); - return lzip_index.retval(); } - - output_filename = default_output_filename.empty() ? - insert_fixed( input_filename ) : default_output_filename; - if( !force && file_exists( output_filename ) ) return 1; - outfd = -1; - for( long i = 0; i < lzip_index.members(); ++i ) - { - const long long mpos = lzip_index.mblock( i ).pos(); - const long long msize = lzip_index.mblock( i ).size(); - if( !safe_seek( infd, mpos ) ) cleanup_and_fail( 1 ); - long long failure_pos = 0; - if( test_member_from_file( infd, msize, &failure_pos ) == 0 ) continue; - if( failure_pos < Lzip_header::size ) // End Of File - { show_error( "Can't repair error in input file." ); - cleanup_and_fail( 2 ); } - if( failure_pos >= msize - 8 ) failure_pos = msize - 8 - 1; - - if( verbosity >= 2 ) // damaged member found - { - std::printf( "Repairing member %ld of %ld (failure pos = %llu)\n", - i + 1, lzip_index.members(), mpos + failure_pos ); - std::fflush( stdout ); - } - uint8_t * const mbuffer = read_member( infd, mpos, msize ); - if( !mbuffer ) cleanup_and_fail( 1 ); - const Lzip_header & header = *(const Lzip_header *)mbuffer; - const unsigned dictionary_size = header.dictionary_size(); - long pos = 0; - if( !gross_damage( msize, mbuffer ) ) - { - pos = repair_dictionary_size( msize, mbuffer ); - if( pos == 0 ) - pos = repair_member( mpos, msize, mbuffer, Lzip_header::size + 1, - Lzip_header::size + 6, dictionary_size, terminator ); - if( pos == 0 ) - pos = repair_member( mpos, msize, mbuffer, Lzip_header::size + 7, - failure_pos, dictionary_size, terminator ); - print_pending_newline( terminator ); - } - if( pos < 0 ) - { show_error( "Can't prepare master." ); cleanup_and_fail( 1 ); } - if( pos > 0 ) - { - if( outfd < 0 ) // first damaged member repaired - { - if( !safe_seek( infd, 0 ) ) return 1; - set_signal_handler(); - if( !open_outstream( true, true ) ) return 1; - if( !copy_file( infd, outfd ) ) // copy whole file - cleanup_and_fail( 1 ); - } - if( seek_write( outfd, mbuffer + pos, 1, mpos + pos ) != 1 ) - { show_error( "Error writing output file", errno ); - cleanup_and_fail( 1 ); } - } - delete[] mbuffer; - if( pos == 0 ) - { - show_error( "Can't repair input file. Error is probably larger than 1 byte." ); - cleanup_and_fail( 2 ); - } - } - - if( outfd < 0 ) - { - if( verbosity >= 1 ) - std::fputs( "Input file has no errors. Recovery is not needed.\n", stdout ); - return 0; - } - if( close_outstream( &in_stats ) != 0 ) return 1; - if( verbosity >= 1 ) - std::fputs( "Copy of input file repaired successfully.\n", stdout ); - return 0; - } - - -int debug_delay( const std::string & input_filename, Block range, - const char terminator ) - { - struct stat in_stats; // not used - const int infd = - open_instream( input_filename.c_str(), &in_stats, false, true ); - if( infd < 0 ) return 1; - - const Lzip_index lzip_index( infd, true, true ); - if( lzip_index.retval() != 0 ) - { show_file_error( input_filename.c_str(), lzip_index.error().c_str() ); - return lzip_index.retval(); } - - if( range.end() > lzip_index.cdata_size() ) - range.size( std::max( 0LL, lzip_index.cdata_size() - range.pos() ) ); - if( range.size() <= 0 ) - { show_file_error( input_filename.c_str(), "Nothing to do." ); return 0; } - - for( long i = 0; i < lzip_index.members(); ++i ) - { - const Block & mb = lzip_index.mblock( i ); - if( !range.overlaps( mb ) ) continue; - const long long mpos = lzip_index.mblock( i ).pos(); - const long long msize = lzip_index.mblock( i ).size(); - const unsigned dictionary_size = lzip_index.dictionary_size( i ); - if( verbosity >= 2 ) - { - std::printf( "Finding max delay in member %ld of %ld (mpos = %llu, msize = %llu)\n", - i + 1, lzip_index.members(), mpos, msize ); - std::fflush( stdout ); - } - uint8_t * const mbuffer = read_member( infd, mpos, msize ); - if( !mbuffer ) return 1; - uint8_t * const buffer2 = new uint8_t[dictionary_size]; - long pos = std::max( range.pos() - mpos, Lzip_header::size + 1LL ); - const long end = std::min( range.end() - mpos, msize ); - long max_delay = 0; - while( pos < end ) - { - const unsigned long pos_limit = std::max( pos - 16, 0L ); - const LZ_mtester * master = - prepare_master( mbuffer, msize, pos_limit, dictionary_size ); - if( !master ) { show_error( "Can't prepare master." ); - delete[] buffer2; delete[] mbuffer; return 1; } - const long partial_end = std::min( pos + 100, end ); - for( ; pos < partial_end; ++pos ) - { - if( verbosity >= 2 ) - { - std::printf( " Delays at position %llu %c", mpos + pos, terminator ); - std::fflush( stdout ); pending_newline = true; - } - int value = -1; - for( int j = 0; j < 256; ++j ) - { - ++mbuffer[pos]; - if( j == 255 ) break; - long failure_pos = 0; - if( test_member_rest( *master, buffer2, &failure_pos ) ) continue; - const long delay = failure_pos - pos; - if( delay > max_delay ) { max_delay = delay; value = mbuffer[pos]; } - } - if( value >= 0 && verbosity >= 2 ) - { - std::printf( " New max delay %lu at position %llu (0x%02X)\n", - max_delay, mpos + pos, value ); - std::fflush( stdout ); pending_newline = false; - } - if( pos + max_delay >= msize ) { pos = end; break; } - } - delete master; - } - delete[] buffer2; - delete[] mbuffer; - print_pending_newline( terminator ); - } - - if( verbosity >= 1 ) std::fputs( "Done.\n", stdout ); - return 0; - } - - -int debug_repair( const std::string & input_filename, - const Bad_byte & bad_byte, const char terminator ) - { - struct stat in_stats; // not used - const int infd = - open_instream( input_filename.c_str(), &in_stats, false, true ); - if( infd < 0 ) return 1; - - const Lzip_index lzip_index( infd, true, true ); - if( lzip_index.retval() != 0 ) - { show_file_error( input_filename.c_str(), lzip_index.error().c_str() ); - return lzip_index.retval(); } - - long idx = 0; - for( ; idx < lzip_index.members(); ++idx ) - if( lzip_index.mblock( idx ).includes( bad_byte.pos ) ) break; - if( idx >= lzip_index.members() ) - { show_file_error( input_filename.c_str(), "Nothing to do." ); return 0; } - - const long long mpos = lzip_index.mblock( idx ).pos(); - const long long msize = lzip_index.mblock( idx ).size(); - { - long long failure_pos = 0; - if( !safe_seek( infd, mpos ) ) return 1; - if( test_member_from_file( infd, msize, &failure_pos ) != 0 ) - { - if( verbosity >= 0 ) - std::fprintf( stderr, "Member %ld of %ld already damaged (failure pos = %llu)\n", - idx + 1, lzip_index.members(), mpos + failure_pos ); - return 2; - } - } - uint8_t * const mbuffer = read_member( infd, mpos, msize ); - if( !mbuffer ) return 1; - const Lzip_header & header = *(const Lzip_header *)mbuffer; - const unsigned dictionary_size = header.dictionary_size(); - const uint8_t good_value = mbuffer[bad_byte.pos-mpos]; - const uint8_t bad_value = bad_byte( good_value ); - mbuffer[bad_byte.pos-mpos] = bad_value; - long failure_pos = 0; - if( bad_byte.pos != 5 || isvalid_ds( header.dictionary_size() ) ) - { - LZ_mtester mtester( mbuffer, msize, header.dictionary_size() ); - if( mtester.test_member() == 0 && mtester.finished() ) - { - if( verbosity >= 1 ) - std::fputs( "Member decompressed with no errors.\n", stdout ); - delete[] mbuffer; - return 0; - } - failure_pos = mtester.member_position(); - } - if( verbosity >= 2 ) - { - std::printf( "Test repairing member %ld of %ld (mpos = %llu, msize = %llu)\n" - " (damage pos = %llu (0x%02X->0x%02X), failure pos = %llu, delay = %lld )\n", - idx + 1, lzip_index.members(), mpos, msize, - bad_byte.pos, good_value, bad_value, mpos + failure_pos, - mpos + failure_pos - bad_byte.pos ); - std::fflush( stdout ); - } - if( failure_pos >= msize ) failure_pos = msize - 1; - long pos = repair_dictionary_size( msize, mbuffer ); - if( pos == 0 ) - pos = repair_member( mpos, msize, mbuffer, Lzip_header::size + 1, - Lzip_header::size + 6, dictionary_size, terminator ); - if( pos == 0 ) - pos = repair_member( mpos, msize, mbuffer, Lzip_header::size + 7, - failure_pos, dictionary_size, terminator ); - print_pending_newline( terminator ); - delete[] mbuffer; - if( pos < 0 ) { show_error( "Can't prepare master." ); return 1; } - if( pos == 0 ) internal_error( "can't repair input file." ); - if( verbosity >= 1 ) std::fputs( "Member repaired successfully.\n", stdout ); - return 0; - } - - -/* If show_packets is true, print to stdout descriptions of the decoded LZMA - packets. Print also some global values; total number of packets in - member, max distance (rep0) and its file position, max LZMA packet size - in each member and the file position of these packets. - (Packet sizes are a fractionary number of bytes. The packet and marker - sizes shown by option -X are the number of extra bytes required to decode - the packet, not counting the data present in the range decoder before and - after the decoding. The max marker size of a 'Sync Flush marker' does not - include the 5 bytes read by rdec.load). - if bad_byte.pos >= cdata_size, bad_byte is ignored. -*/ -int debug_decompress( const std::string & input_filename, - const Bad_byte & bad_byte, const bool show_packets ) - { - struct stat in_stats; - const int infd = - open_instream( input_filename.c_str(), &in_stats, false, true ); - if( infd < 0 ) return 1; - - const Lzip_index lzip_index( infd, true, true ); - if( lzip_index.retval() != 0 ) - { show_file_error( input_filename.c_str(), lzip_index.error().c_str() ); - return lzip_index.retval(); } - - outfd = show_packets ? -1 : STDOUT_FILENO; - int retval = 0; - for( long i = 0; i < lzip_index.members(); ++i ) - { - const long long dpos = lzip_index.dblock( i ).pos(); - const long long mpos = lzip_index.mblock( i ).pos(); - const long long msize = lzip_index.mblock( i ).size(); - const unsigned dictionary_size = lzip_index.dictionary_size( i ); - if( verbosity >= 1 && show_packets ) - std::printf( "Decoding LZMA packets in member %ld of %ld (mpos = %llu, msize = %llu)\n" - " mpos dpos\n", - i + 1, lzip_index.members(), mpos, msize ); - if( !isvalid_ds( dictionary_size ) ) - { show_error( bad_dict_msg ); retval = 2; break; } - uint8_t * const mbuffer = read_member( infd, mpos, msize ); - if( !mbuffer ) { retval = 1; break; } - if( bad_byte.pos >= 0 && lzip_index.mblock( i ).includes( bad_byte.pos ) ) - { - const uint8_t good_value = mbuffer[bad_byte.pos-mpos]; - const uint8_t bad_value = bad_byte( good_value ); - mbuffer[bad_byte.pos-mpos] = bad_value; - if( verbosity >= 1 && show_packets ) - std::printf( "Byte at pos %llu changed from 0x%02X to 0x%02X\n", - bad_byte.pos, good_value, bad_value ); - } - LZ_mtester mtester( mbuffer, msize, dictionary_size, outfd ); - const int result = mtester.debug_decode_member( dpos, mpos, show_packets ); - delete[] mbuffer; - if( show_packets ) - { - const std::vector< unsigned long long > & mppv = mtester.max_packet_posv(); - const unsigned mpackets = mppv.size(); - std::printf( "Total packets in member = %llu\n" - "Max distance in any match = %u at file position %llu\n" - "Max marker size found = %u\n" - "Max packet size found = %u (%u packets)%s", - mtester.total_packets(), mtester.max_distance(), - mtester.max_distance_pos(), mtester.max_marker_size(), - mtester.max_packet_size(), mpackets, - mpackets ? " at file positions" : "" ); - for( unsigned i = 0; i < mpackets; ++i ) - std::printf( " %llu", mppv[i] ); - std::fputc( '\n', stdout ); - } - if( result != 0 ) - { - if( verbosity >= 0 && result <= 2 && show_packets ) - std::printf( "%s at pos %llu\n", ( result == 2 ) ? - "File ends unexpectedly" : "Decoder error", - mpos + mtester.member_position() ); - retval = 2; - if( result != 3 || !mtester.finished() || mtester.data_position() != - (unsigned long long)lzip_index.dblock( i ).size() ) break; - } - if( i + 1 < lzip_index.members() && show_packets ) - std::fputc( '\n', stdout ); - } - - retval = std::max( retval, close_outstream( &in_stats ) ); - if( verbosity >= 1 && show_packets && retval == 0 ) - std::fputs( "Done.\n", stdout ); - return retval; - } diff --git a/reproduce.cc b/reproduce.cc index 58a0c5d..bca229a 100644 --- a/reproduce.cc +++ b/reproduce.cc @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2022 Antonio Diaz Diaz. + Copyright (C) 2009-2023 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -53,22 +53,22 @@ int fatal( const int retval ) { if( fatal_retval == 0 ) fatal_retval = retval; return retval; } // Return the position of the damaged area in the member, or -1 if error. -long long zeroed_sector_pos( const char * const input_filename, - const uint8_t * const mbuffer, const long long msize, - long long * const sizep, uint8_t * const valuep ) +long zeroed_sector_pos( const uint8_t * const mbuffer, const long msize, + const char * const input_filename, + long * const sizep, uint8_t * const valuep ) { enum { minlen = 8 }; // min number of consecutive identical bytes - long long i = Lzip_header::size; - const long long end = msize - minlen; - long long begin = -1; - long long size = 0; + long i = Lzip_header::size; + const long end = msize - minlen; + long begin = -1; + long size = 0; uint8_t value = 0; while( i < end ) // leave i pointing to the first differing byte { const uint8_t byte = mbuffer[i++]; if( mbuffer[i] == byte ) { - const long long pos = i - 1; + const long pos = i - 1; ++i; while( i < msize && mbuffer[i] == byte ) ++i; if( i - pos >= minlen ) @@ -94,23 +94,22 @@ long long zeroed_sector_pos( const char * const input_filename, const LZ_mtester * prepare_master2( const uint8_t * const mbuffer, - const long long msize, - const long long begin, + const long msize, const long begin, const unsigned dictionary_size ) { - long long pos_limit = std::max( begin - 16, (long long)Lzip_header::size ); + long pos_limit = std::max( begin - 16, (long)Lzip_header::size ); LZ_mtester * master = new LZ_mtester( mbuffer, msize, dictionary_size ); if( master->test_member( pos_limit ) != -1 || - master->member_position() > (unsigned long long)begin ) + master->member_position() > (unsigned long)begin ) { delete master; return 0; } // decompress as much data as possible without surpassing begin while( pos_limit < begin && master->test_member( pos_limit + 1 ) == -1 && - master->member_position() <= (unsigned long long)begin ) + master->member_position() <= (unsigned long)begin ) ++pos_limit; delete master; master = new LZ_mtester( mbuffer, msize, dictionary_size ); if( master->test_member( pos_limit ) == -1 && - master->member_position() <= (unsigned long long)begin ) return master; + master->member_position() <= (unsigned long)begin ) return master; delete master; return 0; } @@ -122,9 +121,8 @@ const LZ_mtester * prepare_master2( const uint8_t * const mbuffer, Choose the match nearest to the beginning of the file. As a fallback, locate the longest partial match at least 512 bytes long. Return the offset in file of the first undecoded byte, or -1 if no match. */ -long long match_file( const LZ_mtester & master, const uint8_t * const rbuf, - const long long rsize, - const char * const reference_filename ) +long match_file( const LZ_mtester & master, const uint8_t * const rbuf, + const long rsize, const char * const reference_filename ) { const uint8_t * prev_buffer; int dec_size, prev_size; @@ -135,17 +133,17 @@ long long match_file( const LZ_mtester & master, const uint8_t * const rbuf, { std::printf( "'%s' can't match: not enough data in dictionary.\n", reference_filename ); pending_newline = false; } return -1; } - long long offset = -1; // offset in file of the first undecoded byte + long offset = -1; // offset in file of the first undecoded byte bool multiple = false; const uint8_t last_byte = dec_buffer[dec_size-1]; - for( long long i = rsize - 1; i >= 3; --i ) // match at least 4 bytes at bof + for( long i = rsize - 1; i >= 3; --i ) // match at least 4 bytes at bof if( rbuf[i] == last_byte ) { // compare file with the two parts of the dictionary - int len = std::min( (long long)dec_size - 1, i ); + int len = std::min( (long)dec_size - 1, i ); if( std::memcmp( rbuf + i - len, dec_buffer + dec_size - 1 - len, len ) == 0 ) { - int len2 = std::min( (long long)prev_size, i - len ); + int len2 = std::min( (long)prev_size, i - len ); if( len2 <= 0 || !prev_buffer || std::memcmp( rbuf + i - len - len2, prev_buffer + prev_size - len2, len2 ) == 0 ) @@ -159,24 +157,24 @@ long long match_file( const LZ_mtester & master, const uint8_t * const rbuf, if( offset >= 0 ) { if( multiple && verbosity >= 1 ) - { std::printf( "warning: %s: Multiple matches. Using match at offset %lld\n", + { std::printf( "warning: %s: Multiple matches. Using match at offset %ld\n", reference_filename, offset ); std::fflush( stdout ); } if( !multiple && verbosity >= 2 ) - { std::printf( "%s: Match found at offset %lld\n", + { std::printf( "%s: Match found at offset %ld\n", reference_filename, offset ); std::fflush( stdout ); } return offset; } int maxlen = 0; // choose longest match in reference file - for( long long i = rsize - 1; i >= 0; --i ) + for( long i = rsize - 1; i >= 0; --i ) if( rbuf[i] == last_byte ) { // compare file with the two parts of the dictionary - const int size1 = std::min( (long long)dec_size, i + 1 ); + const int size1 = std::min( (long)dec_size, i + 1 ); int len = 1; while( len < size1 && rbuf[i-len] == dec_buffer[dec_size-len-1] ) ++len; if( len == size1 ) { - int size2 = std::min( (long long)prev_size, i + 1 - size1 ); + int size2 = std::min( (long)prev_size, i + 1 - size1 ); while( len < size1 + size2 && rbuf[i-len] == prev_buffer[prev_size+size1-len] ) ++len; } @@ -185,7 +183,7 @@ long long match_file( const LZ_mtester & master, const uint8_t * const rbuf, if( maxlen >= 512 && offset >= 0 ) { if( verbosity >= 1 ) - { std::printf( "warning: %s: Partial match found at offset %lld, len %d." + { std::printf( "warning: %s: Partial match found at offset %ld, len %d." " Reference data may be mixed with other data.\n", reference_filename, offset, maxlen ); std::fflush( stdout ); } @@ -295,39 +293,34 @@ bool good_status( const pid_t pid, const char * const name, const bool finished /* Feed to lzip through 'ofd' the data decompressed up to 'good_dsize' (master->data_position) followed by the reference data from byte at offset 'offset' of reference file, up to a total of 'dsize' bytes. */ -bool feed_data( uint8_t * const mbuffer, const long long msize, +bool feed_data( uint8_t * const mbuffer, const long msize, const long long dsize, const unsigned long long good_dsize, - const uint8_t * const rbuf, const long long rsize, - const long long offset, const unsigned dictionary_size, + const uint8_t * const rbuf, const long rsize, + const long offset, const unsigned dictionary_size, const int ofd ) { LZ_mtester mtester( mbuffer, msize, dictionary_size, ofd ); - if( mtester.test_member( LLONG_MAX, good_dsize ) != -1 || + if( mtester.test_member( LONG_MAX, good_dsize ) != -1 || good_dsize != mtester.data_position() ) { show_error( "Error decompressing prefix data for compressor." ); return false; } // limit reference data to remaining decompressed data in member - const long long end = - std::min( (unsigned long long)rsize, dsize - good_dsize + offset ); - for( long long i = offset; i < end; ) - { - const int size = std::min( end - i, 65536LL ); - if( writeblock( ofd, rbuf + i, size ) != size ) - { show_error( "Error writing reference data to compressor", errno ); - return false; } - i += size; - } + const long size = + std::min( (unsigned long long)rsize - offset, dsize - good_dsize ); + if( writeblock( ofd, rbuf + offset, size ) != size ) + { show_error( "Error writing reference data to compressor", errno ); + return false; } return true; } /* Try to reproduce the zeroed sector. Return value: -1 = failure, 0 = success, > 0 = fatal error. */ -int try_reproduce( uint8_t * const mbuffer, const long long msize, +int try_reproduce( uint8_t * const mbuffer, const long msize, const long long dsize, const unsigned long long good_dsize, - const long long begin, const long long end, - const uint8_t * const rbuf, const long long rsize, - const long long offset, const unsigned dictionary_size, + const long begin, const long end, + const uint8_t * const rbuf, const long rsize, + const long offset, const unsigned dictionary_size, const char ** const lzip_argv, MD5SUM * const md5sump, const char terminator, const bool auto0 = false ) { @@ -365,12 +358,12 @@ int try_reproduce( uint8_t * const mbuffer, const long long msize, { show_fork_error( lzip_argv[0] ); return fatal( 1 ); } close( fda[0] ); close( fda[1] ); close( fda2[1] ); - const long long xend = std::min( end + 4, msize ); + const long xend = std::min( end + 4, msize ); int retval = 0; // -1 = mismatch bool first_post = true; bool same_ds = true; // reproduced DS == header DS bool tail_mismatch = false; // mismatch after end - for( long long i = 0; i < xend; ) + for( long i = 0; i < xend; ) { enum { buffer_size = 16384 }; // 65536 makes it slower uint8_t buffer[buffer_size]; @@ -378,7 +371,7 @@ int try_reproduce( uint8_t * const mbuffer, const long long msize, { if( first_post ) { first_post = false; print_pending_newline( terminator ); } - std::printf( " Reproducing position %lld %c", i, terminator ); + std::printf( " Reproducing position %ld %c", i, terminator ); std::fflush( stdout ); pending_newline = true; } const int rd = readblock( fda2[0], buffer, buffer_size ); @@ -406,7 +399,7 @@ done: if( close( fda2[0] ) != 0 ) { show_close_error( "compressor" ); retval = 1; } if( !good_status( pid, "data feeder", false ) || !good_status( pid2, lzip_argv[0], false ) ) retval = auto0 ? -1 : 1; - if( !retval ) // test whole member after reproduction + if( retval == 0 ) // test whole member after reproduction { if( md5sump ) md5sump->reset(); LZ_mtester mtester( mbuffer, msize, dictionary_size, -1, md5sump ); @@ -429,20 +422,20 @@ done: // Return value: -1 = master failed, 0 = success, > 0 = failure -int reproduce_member( uint8_t * const mbuffer, const long long msize, +int reproduce_member( uint8_t * const mbuffer, const long msize, const long long dsize, const char * const lzip_name, const char * const reference_filename, - const long long begin, const long long size, + const long begin, const long size, const int lzip_level, MD5SUM * const md5sump, const char terminator ) { struct stat st; const int rfd = open_instream( reference_filename, &st, false, true ); if( rfd < 0 ) return fatal( 1 ); - if( st.st_size > LLONG_MAX ) - { show_file_error( reference_filename, "File too large." ); close( rfd ); - return fatal( 2 ); } - const long long rsize = st.st_size; + if( !fits_in_size_t( st.st_size ) ) // mmap uses size_t + { show_file_error( reference_filename, "Reference file is too large for mmap." ); + close( rfd ); return fatal( 1 ); } + const long rsize = st.st_size; const uint8_t * const rbuf = (const uint8_t *)mmap( 0, rsize, PROT_READ, MAP_PRIVATE, rfd, 0 ); close( rfd ); @@ -457,12 +450,12 @@ int reproduce_member( uint8_t * const mbuffer, const long long msize, if( !master ) return -1; if( verbosity >= 2 ) { - std::printf( " (master mpos = %llu, dpos = %llu)\n", + std::printf( " (master mpos = %lu, dpos = %llu)\n", master->member_position(), master->data_position() ); std::fflush( stdout ); } - const long long offset = match_file( *master, rbuf, rsize, reference_filename ); + const long offset = match_file( *master, rbuf, rsize, reference_filename ); if( offset < 0 ) { delete master; return 2; } // no match // Reference data from offset must be at least as large as zeroed sector // minus member trailer if trailer is inside the zeroed sector. @@ -472,7 +465,7 @@ int reproduce_member( uint8_t * const mbuffer, const long long msize, delete master; return 2; } const unsigned long long good_dsize = master->data_position(); - const long long end = begin + size; + const long end = begin + size; char level_str[8] = "-0"; // compression level or match length limit char dict_str[16]; snprintf( dict_str, sizeof dict_str, "-s%u", dictionary_size ); @@ -526,22 +519,22 @@ int reproduce_file( const std::string & input_filename, const std::string & default_output_filename, const char * const lzip_name, const char * const reference_filename, - const int lzip_level, const char terminator, - const bool force ) + const Cl_options & cl_opts, const int lzip_level, + const char terminator, const bool force ) { + const char * const filename = input_filename.c_str(); struct stat in_stats; - const int infd = - open_instream( input_filename.c_str(), &in_stats, false, true ); + const int infd = open_instream( filename, &in_stats, false, true ); if( infd < 0 ) return 1; - const Lzip_index lzip_index( infd, true, true, true ); + const Lzip_index lzip_index( infd, cl_opts, true ); if( lzip_index.retval() != 0 ) - { show_file_error( input_filename.c_str(), lzip_index.error().c_str() ); + { show_file_error( filename, lzip_index.error().c_str() ); return lzip_index.retval(); } output_filename = default_output_filename.empty() ? insert_fixed( input_filename ) : default_output_filename; - if( !force && file_exists( output_filename ) ) return 1; + if( !force && output_file_exists() ) return 1; outfd = -1; int errors = 0; const long page_size = std::max( 1L, sysconf( _SC_PAGESIZE ) ); @@ -556,35 +549,37 @@ int reproduce_file( const std::string & input_filename, i + 1, lzip_index.members(), terminator ); std::fflush( stdout ); pending_newline = true; } - if( !safe_seek( infd, mpos ) ) return 1; + if( !safe_seek( infd, mpos, filename ) ) return 1; long long failure_pos = 0; if( test_member_from_file( infd, msize, &failure_pos ) == 0 ) continue; // member is not damaged print_pending_newline( terminator ); if( ++errors > 1 ) break; // only one member can be reproduced if( failure_pos < Lzip_header::size ) // End Of File - { show_file_error( input_filename.c_str(), "Unexpected end of file." ); - return 2; } + { show_file_error( filename, "Unexpected end of file." ); return 2; } + if( !fits_in_size_t( msize + page_size ) ) // mmap uses size_t + { show_file_error( filename, + "Input file contains member too large for mmap." ); return 1; } // without mmap, 3 times more memory are required because of fork const long mpos_rem = mpos % page_size; uint8_t * const mbuffer_base = (uint8_t *)mmap( 0, msize + mpos_rem, PROT_READ | PROT_WRITE, MAP_PRIVATE, infd, mpos - mpos_rem ); if( mbuffer_base == MAP_FAILED ) - { show_file_error( input_filename.c_str(), "Can't mmap", errno ); return 1; } + { show_file_error( filename, "Can't mmap", errno ); return 1; } uint8_t * const mbuffer = mbuffer_base + mpos_rem; - long long size = 0; + long size = 0; uint8_t value = 0; - const long long begin = zeroed_sector_pos( input_filename.c_str(), mbuffer, - msize, &size, &value ); + const long begin = + zeroed_sector_pos( mbuffer, msize, filename, &size, &value ); if( begin < 0 ) return 2; if( failure_pos < begin ) - { show_file_error( input_filename.c_str(), - "Data error found before damaged area." ); return 2; } + { show_file_error( filename, "Data error found before damaged area." ); + return 2; } if( verbosity >= 1 ) { std::printf( "Reproducing bad area in member %ld of %ld\n" - " (begin = %lld, size = %lld, value = 0x%02X)\n", + " (begin = %ld, size = %ld, value = 0x%02X)\n", i + 1, lzip_index.members(), begin, size, value ); std::fflush( stdout ); } @@ -596,7 +591,7 @@ int reproduce_file( const std::string & input_filename, { if( outfd < 0 ) // first damaged member reproduced { - if( !safe_seek( infd, 0 ) ) return 1; + if( !safe_seek( infd, 0, filename ) ) return 1; set_signal_handler(); if( !open_outstream( true, true ) ) return 1; if( !copy_file( infd, outfd ) ) // copy whole file @@ -613,15 +608,15 @@ int reproduce_file( const std::string & input_filename, { if( final_msg ) { std::fputs( final_msg, stdout ); std::fflush( stdout ); } - show_file_error( input_filename.c_str(), - "Unable to reproduce member." ); return ret; + show_file_error( filename, "Unable to reproduce member." ); return ret; } } if( outfd < 0 ) { if( verbosity >= 1 ) - std::fputs( "Input file has no errors. Recovery is not needed.\n", stdout ); + std::printf( "Input file '%s' has no errors. Recovery is not needed.\n", + filename ); return 0; } if( close_outstream( &in_stats ) != 0 ) return 1; @@ -639,30 +634,29 @@ int reproduce_file( const std::string & input_filename, /* Passes a 0 terminator to other functions to prevent intramember feedback. Exits only in case of fatal error. (reference file too large, etc). */ -int debug_reproduce_file( const std::string & input_filename, +int debug_reproduce_file( const char * const input_filename, const char * const lzip_name, const char * const reference_filename, - const Block & range, const int sector_size, - const int lzip_level ) + const Cl_options & cl_opts, const Block & range, + const int sector_size, const int lzip_level ) { struct stat in_stats; // not used - const int infd = - open_instream( input_filename.c_str(), &in_stats, false, true ); + const int infd = open_instream( input_filename, &in_stats, false, true ); if( infd < 0 ) return 1; - const Lzip_index lzip_index( infd, true, true ); + const Lzip_index lzip_index( infd, cl_opts ); if( lzip_index.retval() != 0 ) - { show_file_error( input_filename.c_str(), lzip_index.error().c_str() ); + { show_file_error( input_filename, lzip_index.error().c_str() ); return lzip_index.retval(); } const long long cdata_size = lzip_index.cdata_size(); if( range.pos() >= cdata_size ) - { show_file_error( input_filename.c_str(), - "Range is beyond end of last member." ); return 1; } + { show_file_error( input_filename, "Range is beyond end of last member." ); + return 1; } const long page_size = std::max( 1L, sysconf( _SC_PAGESIZE ) ); const long long positions_to_test = - ( ( std::min( range.end(), cdata_size ) - range.pos() ) + + ( ( std::min( range.size(), cdata_size - range.pos() ) ) + sector_size - 9 ) / sector_size; long positions = 0, successes = 0, failed_comparisons = 0; long alternative_reproductions = 0; @@ -673,11 +667,14 @@ int debug_reproduce_file( const std::string & input_filename, const long long mpos = lzip_index.mblock( i ).pos(); const long long msize = lzip_index.mblock( i ).size(); if( !range.overlaps( mpos, msize ) ) continue; + if( !fits_in_size_t( msize + page_size ) ) // mmap uses size_t + { show_file_error( input_filename, + "Input file contains member too large for mmap." ); return 1; } const long long dsize = lzip_index.dblock( i ).size(); const unsigned dictionary_size = lzip_index.dictionary_size( i ); // md5sums of original not damaged member (compressed and decompressed) - uint8_t md5_digest_c[16], md5_digest_d[16]; + md5_type md5_digest_c, md5_digest_d; bool md5_valid = false; const long long rm_end = std::min( range.end(), mpos + msize ); for( long long sector_pos = std::max( range.pos(), mpos ); @@ -688,15 +685,14 @@ int debug_reproduce_file( const std::string & input_filename, uint8_t * const mbuffer_base = (uint8_t *)mmap( 0, msize + mpos_rem, PROT_READ | PROT_WRITE, MAP_PRIVATE, infd, mpos - mpos_rem ); if( mbuffer_base == MAP_FAILED ) - { show_file_error( input_filename.c_str(), "Can't mmap", errno ); - return 1; } + { show_file_error( input_filename, "Can't mmap", errno ); return 1; } uint8_t * const mbuffer = mbuffer_base + mpos_rem; if( !md5_valid ) { if( verbosity >= 0 ) // give a clue of the range being tested { std::printf( "Reproducing: %s\nReference file: %s\nTesting " "sectors of size %llu at file positions %llu to %llu\n", - input_filename.c_str(), reference_filename, + input_filename, reference_filename, std::min( (long long)sector_size, rm_end - sector_pos ), sector_pos, rm_end - 1 ); std::fflush( stdout ); } md5_valid = true; compute_md5( mbuffer, msize, md5_digest_c ); @@ -715,13 +711,13 @@ int debug_reproduce_file( const std::string & input_filename, } ++positions; const int sector_sz = - std::min( rm_end - sector_pos, (long long)sector_size ); + std::min( (long long)sector_size, rm_end - sector_pos ); // set mbuffer[sector] to 0 std::memset( mbuffer + ( sector_pos - mpos ), 0, sector_sz ); - long long size = 0; + long size = 0; uint8_t value = 0; - const long long begin = zeroed_sector_pos( input_filename.c_str(), mbuffer, - msize, &size, &value ); + const long begin = + zeroed_sector_pos( mbuffer, msize, input_filename, &size, &value ); if( begin < 0 ) return 2; MD5SUM md5sum; const int ret = reproduce_member( mbuffer, msize, dsize, lzip_name, @@ -730,9 +726,9 @@ int debug_reproduce_file( const std::string & input_filename, if( ret == 0 ) { ++successes; - uint8_t new_digest[16]; + md5_type new_digest; md5sum.md5_finish( new_digest ); - if( std::memcmp( md5_digest_d, new_digest, 16 ) != 0 ) + if( md5_digest_d != new_digest ) { ++failed_comparisons; if( verbosity >= 0 ) @@ -765,17 +761,17 @@ int debug_reproduce_file( const std::string & input_filename, done: if( verbosity >= 0 ) { - std::printf( "\n%8ld sectors tested" - "\n%8ld reproductions returned with zero status", + std::printf( "\n%9ld sectors tested" + "\n%9ld reproductions returned with zero status", positions, successes ); if( successes > 0 ) { if( failed_comparisons > 0 ) - std::printf( ", of which\n%8ld comparisons failed\n", + std::printf( ", of which\n%9ld comparisons failed\n", failed_comparisons ); - else std::fputs( "\n all comparisons passed\n", stdout ); + else std::fputs( "\n all comparisons passed\n", stdout ); if( alternative_reproductions > 0 ) - std::printf( "%8ld alternative reproductions found\n", + std::printf( "%9ld alternative reproductions found\n", alternative_reproductions ); } else std::fputc( '\n', stdout ); diff --git a/split.cc b/split.cc index 269f051..9d3304e 100644 --- a/split.cc +++ b/split.cc @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2022 Antonio Diaz Diaz. + Copyright (C) 2009-2023 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -68,43 +68,44 @@ bool next_filename( const int max_digits ) int split_file( const std::string & input_filename, - const std::string & default_output_filename, const bool force ) + const std::string & default_output_filename, + const Cl_options & cl_opts, const bool force ) { + const char * const filename = input_filename.c_str(); struct stat in_stats; - const int infd = - open_instream( input_filename.c_str(), &in_stats, false, true ); + const int infd = open_instream( filename, &in_stats, false, true ); if( infd < 0 ) return 1; - Lzip_index lzip_index( infd, true, true, true, true ); + Lzip_index lzip_index( infd, cl_opts, true, true ); if( lzip_index.retval() != 0 ) { - show_file_error( input_filename.c_str(), lzip_index.error().c_str() ); + show_file_error( filename, lzip_index.error().c_str() ); return lzip_index.retval(); } - // verify last member + // check last member const Block b = lzip_index.mblock( lzip_index.members() - 1 ); long long mpos = b.pos(); long long msize = b.size(); long long failure_pos = 0; - if( !safe_seek( infd, mpos ) ) return 1; + if( !safe_seek( infd, mpos, filename ) ) return 1; if( test_member_from_file( infd, msize, &failure_pos ) == 1 ) { // corrupt or fake trailer while( true ) { mpos += failure_pos; msize -= failure_pos; if( msize < min_member_size ) break; // trailing data - if( !safe_seek( infd, mpos ) ) return 1; + if( !safe_seek( infd, mpos, filename ) ) return 1; if( test_member_from_file( infd, msize, &failure_pos ) != 1 ) break; } - lzip_index = Lzip_index( infd, true, true, true, true, mpos ); + lzip_index = Lzip_index( infd, cl_opts, true, true, mpos ); if( lzip_index.retval() != 0 ) { - show_file_error( input_filename.c_str(), lzip_index.error().c_str() ); + show_file_error( filename, lzip_index.error().c_str() ); return lzip_index.retval(); } } - if( !safe_seek( infd, 0 ) ) return 1; + if( !safe_seek( infd, 0, filename ) ) return 1; int max_digits = 1; for( long i = lzip_index.blocks( true ); i >= 10; i /= 10 ) ++max_digits; first_filename( input_filename, default_output_filename, max_digits ); diff --git a/testsuite/check.sh b/testsuite/check.sh index af8d787..429ec95 100755 --- a/testsuite/check.sh +++ b/testsuite/check.sh @@ -1,14 +1,14 @@ #! /bin/sh # check script for Lziprecover - Data recovery tool for the lzip format -# Copyright (C) 2009-2022 Antonio Diaz Diaz. +# Copyright (C) 2009-2023 Antonio Diaz Diaz. # # This script is free software: you have unlimited permission # to copy, distribute, and modify it. LC_ALL=C export LC_ALL -objdir=`pwd` -testdir=`cd "$1" ; pwd` +objdir="`pwd`" +testdir="`cd "$1" ; pwd`" LZIP="${objdir}"/lziprecover LZIPRECOVER="${LZIP}" framework_failure() { echo "failure in testing framework" ; exit 1 ; } @@ -54,6 +54,7 @@ f6s3_lz="${testdir}"/fox6_sc3.lz f6s4_lz="${testdir}"/fox6_sc4.lz f6s5_lz="${testdir}"/fox6_sc5.lz f6s6_lz="${testdir}"/fox6_sc6.lz +f6ma_lz="${testdir}"/fox6_mark.lz num_lz="${testdir}"/numbers.lz nbt_lz="${testdir}"/numbersbt.lz fail=0 @@ -71,6 +72,9 @@ test_failed() { fail=1 ; printf " $1" ; [ -z "$2" ] || printf "($2)" ; } # test_bad8.lz: [ 66- 73] --> zeroed (reference test.txt [ 89- 110]) # test_bad9.lz: [6491-6498] --> zeroed (reference test.txt [17977-18120]) # +# test_em.txt.lz: test.txt split in 3, with 5 empty members (1,3,5-6,8) +# test_3m.txt.lz.md5: md5sum of test_em.txt.lz after removing empty members +# # 6-member files with one or more errors # fox6_bad1.lz: byte at offset 5 changed from 0x0C to 0x00 (DS) # byte at offset 142 changed from 0x50 to 0x70 (CRC) @@ -84,6 +88,8 @@ test_failed() { fail=1 ; printf " $1" ; [ -z "$2" ] || printf "($2)" ; } # fox6_bad5.lz: [380-479] --> zeroed (members 5,6) # fox6_bad6.lz: [430-439] --> zeroed (member 6) # +# fox6_mark.lz: 4 last members marked with bytes 'm', 'a', 'r', 'k' +# # 6-member files "shortcircuited" by a corrupt or fake trailer # fox6_sc1.lz: (corrupt but consistent last trailer) # last CRC != 0 ; dsize = 4 * msize ; msize = 480 (file size) @@ -101,6 +107,11 @@ test_failed() { fail=1 ; printf " $1" ; [ -z "$2" ] || printf "($2)" ; } printf "testing lziprecover-%s..." "$2" +"${LZIPRECOVER}" -q --nrep-stats=0N "${in_lz}" +[ $? = 1 ] || test_failed $LINENO +for i in 0 255 0kB 0KiB 0M 0G 0T 0P 0E 0Z 0Y 0R 0Q ; do + "${LZIPRECOVER}" -q --nrep-stats=$i "${in_lz}" || test_failed $LINENO $i +done "${LZIP}" -lq in [ $? = 2 ] || test_failed $LINENO "${LZIP}" -tq in @@ -175,6 +186,8 @@ printf "LZIP\001+.............................." | "${LZIP}" -t 2> /dev/null [ $? = 2 ] || test_failed $LINENO "${LZIPRECOVER}" -q --dump=damagedd "${in_lz}" > /dev/null [ $? = 1 ] || test_failed $LINENO +"${LZIPRECOVER}" -q --dump=empty +[ $? = 1 ] || test_failed $LINENO "${LZIPRECOVER}" -q --strip=damaged [ $? = 1 ] || test_failed $LINENO "${LZIPRECOVER}" -q --strip=damaged in > /dev/null @@ -264,11 +277,15 @@ for i in "${in_lz}" "${in_em}" ; do rm -f copy || framework_failure done -lines=$("${LZIP}" -tvv "${in_em}" 2>&1 | wc -l) || test_failed $LINENO +lines="`"${LZIP}" -tvv "${in_em}" 2>&1 | wc -l`" || test_failed $LINENO [ "${lines}" -eq 8 ] || test_failed $LINENO "${lines}" +"${LZIP}" -tq "${in_em}" --empty-error +[ $? = 2 ] || test_failed $LINENO -lines=$("${LZIP}" -lvv "${in_em}" | wc -l) || test_failed $LINENO +lines="`"${LZIP}" -lvv "${in_em}" | wc -l`" || test_failed $LINENO [ "${lines}" -eq 11 ] || test_failed $LINENO "${lines}" +"${LZIP}" -lq "${in_em}" --empty-error +[ $? = 2 ] || test_failed $LINENO "${LZIP}" -cd "${fox_lz}" > fox || test_failed $LINENO cat "${in_lz}" > copy.lz || framework_failure @@ -377,17 +394,37 @@ cmp in copy || test_failed $LINENO cmp "${inD}" copy || test_failed $LINENO "${LZIPRECOVER}" -D 21723,397 "${in_lz}" > copy || test_failed $LINENO cmp "${inD}" copy || test_failed $LINENO +"${LZIPRECOVER}" -D 21723,397 "${in_em}" > copy || test_failed $LINENO +cmp "${inD}" copy || test_failed $LINENO +"${LZIPRECOVER}" -q -D 21723,397 --empty-error "${in_em}" +[ $? = 2 ] || test_failed $LINENO + +"${LZIP}" -cd "${fox6_lz}" > out || test_failed $LINENO +"${LZIP}" -cd "${f6ma_lz}" > copy || test_failed $LINENO +cmp out copy || test_failed $LINENO +rm -f out copy || framework_failure +cat "${f6ma_lz}" > f6ma.lz || framework_failure +cat "${f6ma_lz}" > f6ma2.lz || framework_failure +cmp -s "${fox6_lz}" f6ma.lz && test_failed $LINENO +"${LZIPRECOVER}" --clear-marking f6ma.lz f6ma2.lz || test_failed $LINENO +cmp "${fox6_lz}" f6ma.lz || test_failed $LINENO +cmp "${fox6_lz}" f6ma2.lz || test_failed $LINENO +rm -f f6ma.lz f6ma2.lz || framework_failure +"${LZIP}" -lq "${f6ma_lz}" --marking-error +[ $? = 2 ] || test_failed $LINENO +"${LZIP}" -tq "${f6ma_lz}" --marking-error +[ $? = 2 ] || test_failed $LINENO printf "\ntesting bad input..." headers='LZIp LZiP LZip LzIP LzIp LziP lZIP lZIp lZiP lzIP' body='\001\014\000\203\377\373\377\377\300\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000$\000\000\000\000\000\000\000' -cat "${in_lz}" > int.lz -printf "LZIP${body}" >> int.lz +cat "${in_lz}" > int.lz || framework_failure +printf "LZIP${body}" >> int.lz || framework_failure if "${LZIP}" -tq int.lz ; then for header in ${headers} ; do - printf "${header}${body}" > int.lz # first member - "${LZIP}" -lq int.lz + printf "${header}${body}" > int.lz || framework_failure + "${LZIP}" -lq int.lz # first member [ $? = 2 ] || test_failed $LINENO ${header} "${LZIP}" -tq int.lz [ $? = 2 ] || test_failed $LINENO ${header} @@ -403,9 +440,9 @@ if "${LZIP}" -tq int.lz ; then [ $? = 2 ] || test_failed $LINENO ${header} "${LZIP}" -cdq --loose-trailing int.lz > /dev/null [ $? = 2 ] || test_failed $LINENO ${header} - cat "${in_lz}" > int.lz - printf "${header}${body}" >> int.lz # trailing data - "${LZIP}" -lq int.lz + cat "${in_lz}" > int.lz || framework_failure + printf "${header}${body}" >> int.lz || framework_failure + "${LZIP}" -lq int.lz # trailing data [ $? = 2 ] || test_failed $LINENO ${header} "${LZIP}" -tq int.lz [ $? = 2 ] || test_failed $LINENO ${header} @@ -487,11 +524,11 @@ fi rm -f in3.lz trunc.lz out || framework_failure for i in "${f6s1_lz}" "${f6s2_lz}" ; do - lines=`"${LZIP}" -lvv "$i" | wc -l || test_failed $LINENO "$i"` + lines="`"${LZIP}" -lvv "$i" | wc -l || test_failed $LINENO "$i"`" [ "${lines}" -eq 2 ] || test_failed $LINENO "$i ${lines}" done for i in "${f6s3_lz}" "${f6s4_lz}" "${f6s5_lz}" "${f6s6_lz}" ; do - lines=`"${LZIP}" -lvv "$i" | wc -l || test_failed $LINENO "$i"` + lines="`"${LZIP}" -lvv "$i" | wc -l || test_failed $LINENO "$i"`" [ "${lines}" -eq 9 ] || test_failed $LINENO "$i ${lines}" done @@ -698,7 +735,7 @@ cmp out4.lz copy4.lz || test_failed $LINENO cmp out4.lz copy4.lz || test_failed $LINENO rm -f bad345.lz bad453.lz bad534.lz out4.lz copy4.lz || framework_failure -printf "\ntesting --repair..." +printf "\ntesting --byte-repair..." rm -f copy.lz || framework_failure "${LZIPRECOVER}" -R -o copy.lz "${fox6_lz}" || test_failed $LINENO @@ -1022,7 +1059,8 @@ cat "${f6b2_lz}" in > f6bt.lz || framework_failure "${LZIPRECOVER}" --dump=damaged f6bt.lz > copy || test_failed $LINENO cat "${fox_lz}" copy "${fox_lz}" "${fox_lz}" "${fox_lz}" \ "${fox_lz}" | cmp "${f6b2_lz}" - || test_failed $LINENO -cat "${fox_lz}" "${fox_lz}" "${fox_lz}" "${fox_lz}" "${fox_lz}" > fox5.lz +cat "${fox_lz}" "${fox_lz}" "${fox_lz}" "${fox_lz}" "${fox_lz}" > fox5.lz || + framework_failure "${LZIPRECOVER}" --strip=damaged "${f6b2_lz}" > copy || test_failed $LINENO cmp fox5.lz copy || test_failed $LINENO "${LZIPRECOVER}" --strip=damaged f6bt.lz > copy || test_failed $LINENO @@ -1041,7 +1079,7 @@ cat "${f6b3_lz}" in > f6bt.lz || framework_failure "${LZIPRECOVER}" --dump=damaged f6bt.lz > copy || test_failed $LINENO cat "${fox_lz}" "${fox_lz}" copy "${fox_lz}" | cmp "${f6b3_lz}" - || test_failed $LINENO -cat "${fox_lz}" "${fox_lz}" "${fox_lz}" > fox3.lz +cat "${fox_lz}" "${fox_lz}" "${fox_lz}" > fox3.lz || framework_failure "${LZIPRECOVER}" --strip=damaged "${f6b3_lz}" > copy || test_failed $LINENO cmp fox3.lz copy || test_failed $LINENO "${LZIPRECOVER}" --strip=damaged f6bt.lz > copy || test_failed $LINENO @@ -1053,7 +1091,8 @@ cmp fox3.lz f6b.lz || test_failed $LINENO cat fox3.lz in | cmp f6bt.lz - || test_failed $LINENO rm -f f6b.lz f6bt.lz fox3.lz || framework_failure -cat "${fox_lz}" "${fox_lz}" "${fox_lz}" "${fox_lz}" > fox4.lz +cat "${fox_lz}" "${fox_lz}" "${fox_lz}" "${fox_lz}" > fox4.lz || + framework_failure for i in "${f6b4_lz}" "${f6b5_lz}" ; do "${LZIPRECOVER}" --dump=damaged "$i" > copy || test_failed $LINENO "$i" cat fox4.lz copy | cmp "$i" - || test_failed $LINENO "$i" @@ -1427,7 +1466,7 @@ cmp "${num_lz}" nbt.lz || test_failed $LINENO cat "${nbt_lz}" > nbt.lz || framework_failure "${LZIPRECOVER}" --remove=damaged:tdata nbt.lz || test_failed $LINENO cmp "${num_lz}" nbt.lz || test_failed $LINENO -rm -f rec*num.lz nbt.lz empty || framework_failure +rm -f rec*num.lz nbt.lz || framework_failure for i in 1 2 3 4 5 6 7 8 9 10 ; do "${LZIPRECOVER}" -i --strip=1-$i "${nbt_lz}" > out || @@ -1436,7 +1475,25 @@ for i in 1 2 3 4 5 6 7 8 9 10 ; do "${LZIPRECOVER}" -i --remove=1-$i nbt.lz || test_failed $LINENO $i cmp nbt.lz out || test_failed $LINENO $i done -rm -f nbt.lz out || framework_failure +rm -f nbt.lz || framework_failure + +cat "${in_em}" > test_3m.txt.lz || framework_failure +"${LZIPRECOVER}" --remove=empty test_3m.txt.lz || test_failed $LINENO +"${LZIPRECOVER}" -M test_3m.txt.lz | cmp "${testdir}"/test_3m.txt.lz.md5 - || + test_failed $LINENO +"${LZIPRECOVER}" --dump=2,4,7 "${in_em}" | cmp test_3m.txt.lz - || + test_failed $LINENO +"${LZIPRECOVER}" --strip=e "${in_em}" | cmp test_3m.txt.lz - || + test_failed $LINENO +"${LZIPRECOVER}" --empty-error --strip=e "${in_em}" | cmp test_3m.txt.lz - || + test_failed $LINENO +"${LZIPRECOVER}" --strip=1,3,5-6,8 "${in_em}" | cmp test_3m.txt.lz - || + test_failed $LINENO +"${LZIPRECOVER}" -q --strip=1,3,5-6,8 --empty-error "${in_em}" > out +[ $? = 2 ] || test_failed $LINENO +"${LZIPRECOVER}" --dump=emp "${in_em}" | "${LZIP}" -d | cmp empty - || + test_failed $LINENO +rm -f test_3m.txt.lz empty out || framework_failure echo if [ ${fail} = 0 ] ; then diff --git a/testsuite/fox6_mark.lz b/testsuite/fox6_mark.lz new file mode 100644 index 0000000..32b2ac0 Binary files /dev/null and b/testsuite/fox6_mark.lz differ diff --git a/testsuite/test_3m.txt.lz.md5 b/testsuite/test_3m.txt.lz.md5 new file mode 100644 index 0000000..5bec6bc --- /dev/null +++ b/testsuite/test_3m.txt.lz.md5 @@ -0,0 +1 @@ +6a6bb58464ec8567eab17015064d0c5b test_3m.txt.lz diff --git a/unzcrash.cc b/unzcrash.cc index 0c92af8..b04bd05 100644 --- a/unzcrash.cc +++ b/unzcrash.cc @@ -1,6 +1,6 @@ /* Unzcrash - Tests robustness of decompressors to corrupted data. Inspired by unzcrash.c from Julian Seward's bzip2. - Copyright (C) 2008-2022 Antonio Diaz Diaz. + Copyright (C) 2008-2023 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -17,9 +17,9 @@ */ /* Exit status: 0 for a normal exit, 1 for environmental problems - (file not found, invalid flags, I/O errors, etc), 2 to indicate a - corrupt or invalid input file, 3 for an internal consistency error - (e.g., bug) which caused unzcrash to panic. + (file not found, invalid command line options, I/O errors, etc), 2 to + indicate a corrupt or invalid input file, 3 for an internal consistency + error (e.g., bug) which caused unzcrash to panic. */ #define _FILE_OFFSET_BITS 64 @@ -91,7 +91,7 @@ void show_help() " -B, --block[=][,] test blocks of given size [512,0]\n" " -d, --delta= test one byte/block/truncation every n bytes\n" " -e, --set-byte=, set byte at position to value \n" - " -n, --no-verify skip initial verification of file.lz\n" + " -n, --no-check skip initial test of file.lz and zcmp\n" " -p, --position= first byte position to test [default 0]\n" " -q, --quiet suppress all messages\n" " -s, --size= number of byte positions to test [all]\n" @@ -101,10 +101,10 @@ void show_help() "Examples of : 1 1,2,3 1-4 1,3-5,8 1-3,5-8\n" "A negative position is relative to the end of file.\n" "A negative size is relative to the rest of the file.\n" - "\nExit status: 0 for a normal exit, 1 for environmental problems (file\n" - "not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n" - "invalid input file, 3 for an internal consistency error (e.g., bug) which\n" - "caused unzcrash to panic.\n" + "\nExit status: 0 for a normal exit, 1 for environmental problems\n" + "(file not found, invalid command line options, I/O errors, etc), 2 to\n" + "indicate a corrupt or invalid input file, 3 for an internal consistency\n" + "error (e.g., bug) which caused unzcrash to panic.\n" "\nReport bugs to lzip-bug@nongnu.org\n" "Lziprecover home page: http://www.nongnu.org/lzip/lziprecover.html\n" ); } @@ -125,58 +125,46 @@ void parse_block( const char * const arg, const char * const option_name, if( tail[0] == ',' ) value = getnum( tail + 1, option_name, 0, 0, 255 ); else if( tail[0] ) - { - if( verbosity >= 0 ) - std::fprintf( stderr, "%s: Bad separator between and in " - "argument of option '%s'.\n", program_name, option_name ); - std::exit( 1 ); - } + { show_option_error( arg, "Missing comma between and in", + option_name ); std::exit( 1 ); } } /* Return the address of a malloc'd buffer containing the file data and - the file size in '*size'. - In case of error, return 0 and do not modify '*size'. + the file size in '*file_sizep'. + In case of error, return 0 and do not modify '*file_sizep'. */ -uint8_t * read_file( const char * const name, long * const size ) +uint8_t * read_file( const char * const filename, long * const file_sizep ) { - FILE * const f = std::fopen( name, "rb" ); + FILE * const f = std::fopen( filename, "rb" ); if( !f ) - { - if( verbosity >= 0 ) - std::fprintf( stderr, "%s: Can't open input file '%s': %s\n", - program_name, name, std::strerror( errno ) ); - return 0; - } + { show_file_error( filename, "Can't open input file", errno ); return 0; } - long buffer_size = 1 << 20; + long buffer_size = 65536; uint8_t * buffer = (uint8_t *)std::malloc( buffer_size ); if( !buffer ) { show_error( mem_msg ); return 0; } long file_size = std::fread( buffer, 1, buffer_size, f ); - while( file_size >= buffer_size ) + while( file_size >= buffer_size || ( !std::ferror( f ) && !std::feof( f ) ) ) { - if( buffer_size >= LONG_MAX ) + if( file_size >= buffer_size ) // may be false because of EINTR { - if( verbosity >= 0 ) - std::fprintf( stderr, "%s: Input file '%s' is too large.\n", - program_name, name ); - std::free( buffer ); return 0; + if( buffer_size >= LONG_MAX ) + { show_file_error( filename, "Input file is larger than LONG_MAX." ); + std::free( buffer ); return 0; } + buffer_size = ( buffer_size <= LONG_MAX / 2 ) ? 2 * buffer_size : LONG_MAX; + uint8_t * const tmp = (uint8_t *)std::realloc( buffer, buffer_size ); + if( !tmp ) { show_error( mem_msg ); std::free( buffer ); return 0; } + buffer = tmp; } - buffer_size = ( buffer_size <= LONG_MAX / 2 ) ? 2 * buffer_size : LONG_MAX; - uint8_t * const tmp = (uint8_t *)std::realloc( buffer, buffer_size ); - if( !tmp ) { show_error( mem_msg ); std::free( buffer ); return 0; } - buffer = tmp; file_size += std::fread( buffer + file_size, 1, buffer_size - file_size, f ); } if( std::ferror( f ) || !std::feof( f ) ) { - if( verbosity >= 0 ) - std::fprintf( stderr, "%s: Error reading file '%s': %s\n", - program_name, name, std::strerror( errno ) ); + show_file_error( filename, "Error reading input file", errno ); std::free( buffer ); return 0; } std::fclose( f ); - *size = file_size; + *file_sizep = file_size; return buffer; } @@ -194,8 +182,9 @@ public: { return ( i >= 1 && i <= 8 && data[i-1] ); } // Recognized formats: 1 1,2,3 1-4 1,3-5,8 1-3,5-8 - bool parse_bs( const char * p ) + void parse_bs( const char * const arg, const char * const option_name ) { + const char * p = arg; for( int i = 0; i < 8; ++i ) data[i] = false; while( true ) { @@ -209,11 +198,11 @@ public: for( int c = ch1; c <= *p; ++c ) data[c-'1'] = true; ++p; } - if( *p == 0 ) return true; + if( *p == 0 ) return; if( *p == ',' ) ++p; else break; } - show_error( "Invalid value or range." ); - return false; + show_option_error( arg, "Invalid bit position or range in", option_name ); + std::exit( 1 ); } // number of N-bit errors per byte (N=0 to 8): 1 8 28 56 70 56 28 8 1 @@ -327,9 +316,9 @@ bool word_split( const char * const command, std::vector< std::string > & args ) } -// return -1 if fatal error, 0 if OK, >0 if error +// return -1 if fatal error, 0 if OK, > 0 if error int fork_and_feed( const uint8_t * const buffer, const long buffer_size, - const char ** const argv, const bool verify = false ) + const char ** const argv, const bool check = false ) { int fda[2]; // pipe to child if( pipe( fda ) < 0 ) @@ -342,7 +331,7 @@ int fork_and_feed( const uint8_t * const buffer, const long buffer_size, { if( close( fda[0] ) != 0 ) { show_error( "Error closing unused pipe", errno ); return -1; } - if( writeblock( fda[1], buffer, buffer_size ) != buffer_size && verify ) + if( writeblock( fda[1], buffer, buffer_size ) != buffer_size && check ) { show_error( "Can't write to child process", errno ); return -1; } if( close( fda[1] ) != 0 ) { show_error( "Error closing pipe", errno ); return -1; } @@ -375,7 +364,7 @@ int main( const int argc, const char * const argv[] ) long block_size = 512; Mode program_mode = m_byte; uint8_t block_value = 0; - bool verify = true; + bool check = true; if( argc > 0 ) invocation_name = argv[0]; const Arg_parser::Option options[] = @@ -385,6 +374,7 @@ int main( const int argc, const char * const argv[] ) { 'B', "block", Arg_parser::maybe }, { 'd', "delta", Arg_parser::yes }, { 'e', "set-byte", Arg_parser::yes }, + { 'n', "no-check", Arg_parser::no }, { 'n', "no-verify", Arg_parser::no }, { 'p', "position", Arg_parser::yes }, { 'q', "quiet", Arg_parser::no }, @@ -409,12 +399,12 @@ int main( const int argc, const char * const argv[] ) switch( code ) { case 'h': show_help(); return 0; - case 'b': if( !bits.parse_bs( arg ) ) return 1; program_mode = m_byte; break; + case 'b': bits.parse_bs( arg, pn ); program_mode = m_byte; break; case 'B': if( arg[0] ) parse_block( arg, pn, block_size, block_value ); program_mode = m_block; break; case 'd': delta = getnum( arg, pn, block_size, 1, INT_MAX ); break; case 'e': bad_byte.parse_bb( arg, pn ); break; - case 'n': verify = false; break; + case 'n': check = false; break; case 'p': pos = getnum( arg, pn, block_size, -LONG_MAX, LONG_MAX ); break; case 'q': verbosity = -1; break; case 's': max_size = getnum( arg, pn, block_size, -LONG_MAX, LONG_MAX ); break; @@ -438,7 +428,7 @@ int main( const int argc, const char * const argv[] ) const char * const command = parser.argument( argind ).c_str(); std::vector< std::string > command_args; if( !word_split( command, command_args ) ) - { show_file_error( command, "Invalid command" ); return 1; } + { show_file_error( command, "Invalid command." ); return 1; } const char ** const command_argv = new const char *[command_args.size()+1]; for( unsigned i = 0; i < command_args.size(); ++i ) command_argv[i] = command_args[i].c_str(); @@ -456,7 +446,7 @@ int main( const int argc, const char * const argv[] ) zcmp_command = zcmp_program; zcmp_command += " '"; zcmp_command += filename; zcmp_command += "' -"; if( !word_split( zcmp_command.c_str(), zcmp_args ) ) - { show_file_error( zcmp_command.c_str(), "Invalid zcmp command" ); + { show_file_error( zcmp_command.c_str(), "Invalid zcmp command." ); return 1; } zcmp_argv = new const char *[zcmp_args.size()+1]; for( unsigned i = 0; i < zcmp_args.size(); ++i ) @@ -464,9 +454,9 @@ int main( const int argc, const char * const argv[] ) zcmp_argv[zcmp_args.size()] = 0; } - // verify original file + // check original file if( verbosity >= 1 ) fprintf( stderr, "Testing file '%s'\n", filename ); - if( verify ) + if( check ) { const int ret = fork_and_feed( buffer, file_size, command_argv, true ); if( ret != 0 ) @@ -510,12 +500,8 @@ int main( const int argc, const char * const argv[] ) if( max_size < 0 ) max_size += file_size - pos; const long end = ( ( max_size < file_size - pos ) ? pos + max_size : file_size ); if( bad_byte.pos >= file_size ) - { - if( verbosity >= 0 ) - std::fprintf( stderr, "%s: Position is beyond end of file " - "in option '%s'.\n", program_name, bad_byte.option_name ); - return 1; - } + { show_option_error( bad_byte.argument, "Position is beyond end of file in", + bad_byte.option_name ); return 1; } if( bad_byte.pos >= 0 ) buffer[bad_byte.pos] = bad_byte( buffer[bad_byte.pos] ); long positions = 0, decompressions = 0, successes = 0, failed_comparisons = 0; @@ -625,17 +611,17 @@ int main( const int argc, const char * const argv[] ) if( verbosity >= 0 ) { - std::fprintf( stderr, "\n%8ld %ss tested\n%8ld total decompressions" - "\n%8ld decompressions returned with zero status", + std::fprintf( stderr, "\n%9ld %ss tested\n%9ld total decompressions" + "\n%9ld decompressions returned with zero status", positions, mode_str[program_mode], decompressions, successes ); if( successes > 0 ) { if( zcmp_command.empty() ) - std::fputs( "\n comparisons disabled\n", stderr ); + std::fputs( "\n comparisons disabled\n", stderr ); else if( failed_comparisons > 0 ) - std::fprintf( stderr, ", of which\n%8ld comparisons failed\n", + std::fprintf( stderr, ", of which\n%9ld comparisons failed\n", failed_comparisons ); - else std::fputs( "\n all comparisons passed\n", stderr ); + else std::fputs( "\n all comparisons passed\n", stderr ); } else std::fputc( '\n', stderr ); } -- cgit v1.2.3