diff options
-rw-r--r-- | ChangeLog | 46 | ||||
-rw-r--r-- | INSTALL | 7 | ||||
-rw-r--r-- | Makefile.in | 45 | ||||
-rw-r--r-- | NEWS | 66 | ||||
-rw-r--r-- | README | 15 | ||||
-rw-r--r-- | alone_to_lz.cc | 143 | ||||
-rw-r--r-- | arg_parser.cc | 2 | ||||
-rw-r--r-- | arg_parser.h | 2 | ||||
-rw-r--r-- | block.cc | 2 | ||||
-rw-r--r-- | block.h | 4 | ||||
-rwxr-xr-x | configure | 14 | ||||
-rw-r--r-- | decoder.cc | 58 | ||||
-rw-r--r-- | decoder.h | 15 | ||||
-rw-r--r-- | doc/lziprecover.1 | 17 | ||||
-rw-r--r-- | doc/lziprecover.info | 223 | ||||
-rw-r--r-- | doc/lziprecover.texi | 212 | ||||
-rw-r--r-- | file_index.cc | 2 | ||||
-rw-r--r-- | file_index.h | 2 | ||||
-rw-r--r-- | lzip.h | 71 | ||||
-rw-r--r-- | main.cc | 282 | ||||
-rw-r--r-- | merge.cc | 166 | ||||
-rw-r--r-- | mtester.cc | 44 | ||||
-rw-r--r-- | mtester.h | 32 | ||||
-rw-r--r-- | range_dec.cc | 84 | ||||
-rw-r--r-- | repair.cc | 182 | ||||
-rw-r--r-- | split.cc | 33 | ||||
-rwxr-xr-x | testsuite/check.sh | 220 | ||||
-rw-r--r-- | testsuite/fox5_bad1.lz | bin | 400 -> 0 bytes | |||
-rw-r--r-- | testsuite/fox5_bad3.lz | bin | 400 -> 0 bytes | |||
-rw-r--r-- | testsuite/fox5_bad4.lz | bin | 400 -> 0 bytes | |||
-rw-r--r-- | testsuite/fox6.lz (renamed from testsuite/fox5.lz) | bin | 400 -> 480 bytes | |||
-rw-r--r-- | testsuite/fox6_bad1.lz | bin | 0 -> 480 bytes | |||
-rw-r--r-- | testsuite/fox6_bad1.txt (renamed from testsuite/fox5_bad1.txt) | 0 | ||||
-rw-r--r-- | testsuite/fox6_bad2.lz | bin | 0 -> 480 bytes | |||
-rw-r--r-- | testsuite/fox6_bad3.lz | bin | 0 -> 480 bytes | |||
-rw-r--r-- | testsuite/fox6_bad4.lz (renamed from testsuite/fox5_bad2.lz) | bin | 400 -> 480 bytes | |||
-rw-r--r-- | testsuite/fox6_bad5.lz (renamed from testsuite/fox5_bad5.lz) | bin | 400 -> 480 bytes | |||
-rw-r--r-- | testsuite/test.txt.lz | bin | 7376 -> 7376 bytes | |||
-rw-r--r-- | testsuite/test.txt.lzma | bin | 0 -> 7363 bytes | |||
-rw-r--r-- | testsuite/test_bad1.lz | bin | 7376 -> 7376 bytes | |||
-rw-r--r-- | testsuite/test_bad2.lz | bin | 7376 -> 7376 bytes | |||
-rw-r--r-- | testsuite/test_bad3.lz | bin | 7376 -> 7376 bytes | |||
-rw-r--r-- | testsuite/test_bad4.lz | bin | 7376 -> 7376 bytes | |||
-rw-r--r-- | testsuite/test_bad5.lz | bin | 7376 -> 7376 bytes | |||
-rw-r--r-- | unzcrash.cc | 361 |
45 files changed, 1576 insertions, 774 deletions
@@ -1,21 +1,37 @@ -2015-09-16 Antonio Diaz Diaz <antonio@gnu.org> +2016-05-12 Antonio Diaz Diaz <antonio@gnu.org> - * Version 1.18-pre2 released. + * Version 1.18 released. * main.cc: Added new option '-a, --trailing-error'. + * merge.cc (open_input_files): Use CRC to test identical files. + * repair.cc (repair_file): Detect gross damage before repairing. + * repair.cc: Repair a damaged dictionary size in the header. + * repair.cc: Try bytes at offsets 7 to 11 first. * Decompression time has been reduced by 2%. * main.cc (decompress): Print up to 6 bytes of trailing data when '-tvvvv' is specified. + * decoder.cc (verify_trailer): Removed test of final code. + * main.cc (main): Delete '--output' file if infd is a terminal. + * main.cc (main): Don't use stdin more than once. + * Use 'close_and_set_permissions' and 'set_signals' in all modes. * range_dec.cc (list_file): Show dictionary size and size of trailing data (if any) with '-lv'. + * Added new option '-A, --alone-to-lz'. + * Added new option '-W, --debug-decompress'. + * Added new option '-X, --show-packets'. + * Changed short name of option '--debug-delay' to '-Y'. + * Changed short name of option '--debug-repair' to '-Z'. + * unzcrash.cc: Added new option '-B, --block'. + * unzcrash.cc: Added new option '-d, --delta'. + * unzcrash.cc: Added new option '-t, --truncate'. + * unzcrash.cc: Added new option '-z, --zcmp'. + * unzcrash.cc: Read files as large as RAM allows. + * unzcrash.cc: Compare output using zcmp if decompressor returns 0. + * unzcrash.cc: Accept negative position and size. * lzip.texi: Added chapter 'Trailing data'. + * configure: Avoid warning on some shells when testing for g++. + * Makefile.in: Detect the existence of install-info. * testsuite/check.sh: Don't check error messages. - -2015-06-30 Antonio Diaz Diaz <antonio@gnu.org> - - * Version 1.18-pre1 released. - * repair.cc (repair_file): Detect gross damage before repairing. - * repair.cc: Try bytes at offsets 7 and 8 first. - * Added new option '-x, --show-packets'. + * testsuite/check.sh: A POSIX shell is required to run the tests. 2015-05-28 Antonio Diaz Diaz <antonio@gnu.org> @@ -39,10 +55,10 @@ 2013-09-14 Antonio Diaz Diaz <antonio@gnu.org> * Version 1.15 released. - * repair.cc: Repair multi-member files with up to one byte error + * repair.cc: Repair multimember files with up to one byte error per member. - * merge.cc: Merge multi-member files. - * main.cc (show_header): Do not show header version. + * merge.cc: Merge multimember files. + * main.cc (show_header): Don't show header version. * lziprecover.texinfo: Added chapters 'Repairing files', 'Merging files' and 'Unzcrash'. @@ -75,7 +91,7 @@ range of bytes decompressing only the members containing the desired data. * Added new option '-l, --list' which prints correct total file - sizes and ratios even for multi-member files. + sizes and ratios even for multimember files. * merge.cc repair.cc: Remove output file if recovery fails. * Changed quote characters in messages as advised by GNU Standards. * split.cc: Use Boyer-Moore algorithm to search for headers. @@ -105,7 +121,7 @@ * lziprecover.cc: Added new option '-f, --force'. * lziprecover.cc: Added new option '-o, --output'. * lziprecover.cc: Added new option '-s, --split' to select the - until now only operation of splitting multi-member files. + until now only operation of splitting multimember files. * lziprecover.cc: If no operation is specified, warn the user and do nothing. @@ -122,7 +138,7 @@ * testsuite/unzcrash.cc: Test all 1-byte errors. -Copyright (C) 2009-2015 Antonio Diaz Diaz. +Copyright (C) 2009-2016 Antonio Diaz Diaz. This file is a collection of facts, and thus it is not copyrightable, but just in case, you have unlimited permission to copy, distribute and @@ -1,10 +1,13 @@ Requirements ------------ You will need a C++ compiler. -I use gcc 4.9.1 and 4.1.2, but the code should compile with any +I use gcc 5.3.0 and 4.1.2, but the code should compile with any standards compliant compiler. Gcc is available at http://gcc.gnu.org. +Unzcrash needs a zcmp program able to understand the format being +tested. For example the one provided by zutils. +Zutils is available at http://www.nongnu.org/zutils/zutils.html Procedure --------- @@ -62,7 +65,7 @@ After running 'configure', you can run 'make' and 'make install' as explained above. -Copyright (C) 2009-2015 Antonio Diaz Diaz. +Copyright (C) 2009-2016 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute and modify it. diff --git a/Makefile.in b/Makefile.in index 81b34a6..258ecc1 100644 --- a/Makefile.in +++ b/Makefile.in @@ -5,9 +5,10 @@ INSTALL_PROGRAM = $(INSTALL) -m 755 INSTALL_DATA = $(INSTALL) -m 644 INSTALL_DIR = $(INSTALL) -d -m 755 SHELL = /bin/sh +CAN_RUN_INSTALLINFO = $(SHELL) -c "install-info --version" > /dev/null 2>&1 -objs = arg_parser.o block.o file_index.o merge.o mtester.o range_dec.o \ - repair.o split.o decoder.o main.o +objs = arg_parser.o alone_to_lz.o block.o file_index.o merge.o mtester.o \ + range_dec.o repair.o split.o decoder.o main.o unzobjs = arg_parser.o unzcrash.o @@ -34,18 +35,19 @@ unzcrash.o : unzcrash.cc %.o : %.cc $(CXX) $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $< -$(objs) : Makefile -arg_parser.o : arg_parser.h -block.o : block.h -decoder.o : lzip.h decoder.h -file_index.o : lzip.h block.h file_index.h -main.o : arg_parser.h lzip.h decoder.h block.h -merge.o : lzip.h decoder.h block.h file_index.h -mtester.o : lzip.h mtester.h -range_dec.o : lzip.h decoder.h block.h file_index.h -repair.o : lzip.h mtester.h block.h file_index.h -split.o : lzip.h block.h file_index.h -unzcrash.o : arg_parser.h Makefile +$(objs) : Makefile +alone_to_lz.o : lzip.h mtester.h +arg_parser.o : arg_parser.h +block.o : block.h +decoder.o : lzip.h decoder.h +file_index.o : lzip.h block.h file_index.h +main.o : arg_parser.h lzip.h decoder.h block.h +merge.o : lzip.h decoder.h block.h file_index.h +mtester.o : lzip.h mtester.h +range_dec.o : lzip.h decoder.h block.h file_index.h +repair.o : lzip.h mtester.h block.h file_index.h +split.o : lzip.h block.h file_index.h +unzcrash.o : arg_parser.h Makefile doc : info man @@ -82,7 +84,9 @@ install-info : if [ ! -d "$(DESTDIR)$(infodir)" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(infodir)" ; fi -rm -f "$(DESTDIR)$(infodir)/$(pkgname).info"* $(INSTALL_DATA) $(VPATH)/doc/$(pkgname).info "$(DESTDIR)$(infodir)/$(pkgname).info" - -install-info --info-dir="$(DESTDIR)$(infodir)" "$(DESTDIR)$(infodir)/$(pkgname).info" + -if $(CAN_RUN_INSTALLINFO) ; then \ + install-info --info-dir="$(DESTDIR)$(infodir)" "$(DESTDIR)$(infodir)/$(pkgname).info" ; \ + fi install-info-compress : install-info lzip -v -9 "$(DESTDIR)$(infodir)/$(pkgname).info" @@ -105,7 +109,9 @@ uninstall-bin : -rm -f "$(DESTDIR)$(bindir)/$(progname)" uninstall-info : - -install-info --info-dir="$(DESTDIR)$(infodir)" --remove "$(DESTDIR)$(infodir)/$(pkgname).info" + -if $(CAN_RUN_INSTALLINFO) ; then \ + install-info --info-dir="$(DESTDIR)$(infodir)" --remove "$(DESTDIR)$(infodir)/$(pkgname).info" ; \ + fi -rm -f "$(DESTDIR)$(infodir)/$(pkgname).info"* uninstall-man : @@ -126,11 +132,12 @@ dist : doc $(DISTNAME)/doc/$(pkgname).info \ $(DISTNAME)/doc/$(pkgname).texi \ $(DISTNAME)/testsuite/check.sh \ - $(DISTNAME)/testsuite/fox5.lz \ - $(DISTNAME)/testsuite/fox5_bad[1-5].lz \ - $(DISTNAME)/testsuite/fox5_bad1.txt \ + $(DISTNAME)/testsuite/fox6.lz \ + $(DISTNAME)/testsuite/fox6_bad[1-5].lz \ + $(DISTNAME)/testsuite/fox6_bad1.txt \ $(DISTNAME)/testsuite/test.txt \ $(DISTNAME)/testsuite/test.txt.lz \ + $(DISTNAME)/testsuite/test.txt.lzma \ $(DISTNAME)/testsuite/test21723.txt \ $(DISTNAME)/testsuite/test_bad[1-5].lz \ $(DISTNAME)/*.h \ @@ -1,27 +1,71 @@ Changes in version 1.18: +The option "-a, --trailing-error", which makes lziprecover exit with +error status 2 if any remaining input is detected after decompressing +the last member, has been added. + +"--merge" now detects identical files by their CRC. + "--repair" now tries to detect gross damage in the file before attempting to repair it. -"--repair" now tries bytes at member offsets 7 and 8 first because +"--repair" now can repair a damaged dictionary size in the header. + +"--repair" now tries bytes at member offsets 7 to 11 first because errors in these bytes sometimes can't be detected until the end of the member. -The option "-a, --trailing-error", which makes lzip exit with error -status 2 if any remaining input is detected after decompressing the last -member, has been added. +Decompression time has been reduced by 2%. -The new option "-x, --show-packets", which shows the LZMA packets -(coding sequences) coded in a given file, has been added. +When decompressing or testing, up to 6 bytes of trailing data are +printed if "-vvvv" is specified. -Decompression time has been reduced by 2%. +The test of the value remaining in the range decoder has been removed. +(After extensive testing it has been found useless to detect corruption +in the decompressed data. Eliminating it reduces the number of false +positives for corruption and makes error detection more accurate). + +When decompressing, the file specified with the '--output' option is now +deleted if the input is a terminal. -Up to 6 bytes of trailing data are printed when "-tvvvv" is specified. +"--merge", "--range-decompress", "--repair" and "--split" now preserve +dates, permissions, and, when possible, ownership of the files created +just as "--decompress" does. -Dictionary size and size of trailing data (if any) are printed when +Dictionary size and size of trailing data (if any) are now printed when "-lv" is specified. +The new option "-A, --alone-to-lz", which converts lzma-alone files to +lzip format without recompressing, just adding a lzip header and +trailer, has been added. Only streamed files with default LZMA +properties can be converted; non-streamed lzma-alone files lack the end +of stream marker required in lzip files. + +The new option "-W, --debug-decompress=<pos>,<val>", which sets the byte +<pos> to the value <val> and then decompresses to stdout the resulting +corrupt data, has been added. + +The new option "-X, --show-packets", which shows the LZMA packets +(coding sequences) coded in a given file, has been added. + +The short name of option "--debug-delay" has been changed to "-Y". + +The short name of option "--debug-repair" has been changed to "-Z". + +The new options "-B, --block", "-d, --delta", "-t, --truncate" and "-z, +--zcmp" have been added to unzcrash. + +Unzcrash now can read files as large as RAM allows. + +Unzcrash now compares the output of the decompressor for the original +and corrupt files when the decompressor returns with zero status. For +this unzcrash needs a 'zcmp' program able to understand the format being +tested. For example the one provided by 'zutils'. + +Unzcrash now accepts negative position (relative to the end of file) and +negative size (relative to the rest of the file). + The new chapter "Trailing data" has been added to the manual. -Fixed a harmless check failure on Windows caused by the failed -comparison of a message in text mode. +A harmless check failure on Windows, caused by the failed comparison of +a message in text mode, has been fixed. @@ -6,6 +6,9 @@ files, produce a correct file by merging the good parts of two or more damaged copies, extract data from damaged files, decompress files and test integrity of files. +Lziprecover is not a replacement for regular backups, but a last line of +defense for the case where the backups are also damaged. + The lzip file format is designed for data sharing and long-term archiving, taking into account both data integrity and decoder availability: @@ -43,12 +46,11 @@ If a file is too damaged for lziprecover to repair it, all the recoverable data in all members of the file can be extracted in one step with the '-D' option. -Lziprecover is able to efficiently extract a range of bytes from a -multi-member file, because it only decompresses the members containing -the desired data. +Lziprecover provides random access to the data in multimember files; it +only decompresses the members containing the desired data. Lziprecover can print correct total file sizes and ratios even for -multi-member files. +multimember files. When recovering data, lziprecover takes as arguments the names of the damaged files and writes zero or more recovered files depending on the @@ -66,16 +68,13 @@ few MB) with small errors (one sector damaged per copy), the probability approaches 100 percent even with only two copies. (Supposing that the errors are randomly located inside each copy). -Lziprecover is not a replacement for regular backups, but a last line of -defense for the case where the backups are also damaged. - The lziprecover package also includes unzcrash, a program written to test robustness to decompression of corrupted data, inspired by unzcrash.c from Julian Seward's bzip2. Type 'make unzcrash' in the lziprecover source directory to build it. Then try 'unzcrash --help'. -Copyright (C) 2009-2015 Antonio Diaz Diaz. +Copyright (C) 2009-2016 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute and modify it. diff --git a/alone_to_lz.cc b/alone_to_lz.cc new file mode 100644 index 0000000..22e3f29 --- /dev/null +++ b/alone_to_lz.cc @@ -0,0 +1,143 @@ +/* Lziprecover - Data recovery tool for the lzip format + Copyright (C) 2009-2016 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <algorithm> +#include <cerrno> +#include <climits> +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <string> +#include <vector> +#include <stdint.h> +#include <unistd.h> + +#include "lzip.h" +#include "mtester.h" + + +namespace { + +/* Returns the address of a malloc'd buffer containing the file data and + the file size in '*size'. The buffer is at least 20 bytes larger. + In case of error, returns 0 and does not modify '*size'. +*/ +uint8_t * read_file( const int infd, long * const size, const Pretty_print & pp ) + { + long buffer_size = 1 << 20; + uint8_t * buffer = (uint8_t *)std::malloc( buffer_size ); + if( !buffer ) throw std::bad_alloc(); + + long file_size = readblock( infd, buffer, buffer_size - 20 ); + while( file_size >= buffer_size - 20 && !errno ) + { + if( buffer_size >= LONG_MAX ) + { pp( "file is too large" ); std::free( buffer ); return 0; } + buffer_size = ( buffer_size <= LONG_MAX / 2 ) ? 2 * buffer_size : LONG_MAX; + uint8_t * const tmp = (uint8_t *)std::realloc( buffer, buffer_size ); + if( !tmp ) { std::free( buffer ); throw std::bad_alloc(); } + buffer = tmp; + file_size += + readblock( infd, buffer + file_size, buffer_size - 20 - file_size ); + } + if( errno ) + { + show_error( "Error reading file", errno ); + std::free( buffer ); return 0; + } + close( infd ); + *size = file_size; + return buffer; + } + + +bool validate_ds( unsigned * const dictionary_size ) + { + if( *dictionary_size < min_dictionary_size ) + { *dictionary_size = min_dictionary_size; return false; } + if( *dictionary_size > max_dictionary_size ) + { *dictionary_size = max_dictionary_size; return false; } + return true; + } + +} // end namespace + + +int alone_to_lz( const int infd, const Pretty_print & pp ) + { + enum { lzma_header_size = 13, offset = lzma_header_size - File_header::size }; + + try { + long file_size = 0; + uint8_t * const buffer = read_file( infd, &file_size, pp ); + if( !buffer ) return 1; + if( pp.verbosity() >= 1 ) pp(); + + if( file_size < lzma_header_size ) + { pp( "file is too short" ); std::free( buffer ); return 2; } + if( buffer[0] != 93 ) // (45 * 2) + (9 * 0) + 3 + { + pp( "file has non-default LZMA properties" ); + std::free( buffer ); return 2; + } + for( int i = 5; i < 13; ++i ) if( buffer[i] != 0xFF ) + { pp( "file is non-streamed" ); std::free( buffer ); return 2; } + + unsigned dictionary_size = 0; + for( int i = 4; i > 0; --i ) + { dictionary_size <<= 8; dictionary_size += buffer[i]; } + const unsigned orig_dictionary_size = dictionary_size; + validate_ds( &dictionary_size ); + File_header & header = *(File_header *)( buffer + offset ); + header.set_magic(); + header.dictionary_size( dictionary_size ); + for( int i = 0; i < File_trailer::size; ++i ) buffer[file_size++] = 0; + { + LZ_mtester mtester( buffer + offset, file_size - offset, dictionary_size ); + const int result = mtester.test_member(); + if( result == 1 && orig_dictionary_size > max_dictionary_size ) + { pp( "dictionary size is too large" ); std::free( buffer ); return 2; } + if( result != 3 || !mtester.finished() ) + { pp( "file is corrupt" ); std::free( buffer ); return 2; } + if( mtester.max_distance() < dictionary_size && + dictionary_size > min_dictionary_size ) + { + dictionary_size = + std::max( mtester.max_distance(), (unsigned)min_dictionary_size ); + header.dictionary_size( dictionary_size ); + } + File_trailer & trailer = + *(File_trailer *)( buffer + file_size - File_trailer::size ); + trailer.data_crc( mtester.crc() ); + trailer.data_size( mtester.data_position() ); + trailer.member_size( mtester.member_position() ); + } + LZ_mtester mtester( buffer + offset, file_size - offset, dictionary_size ); + if( mtester.test_member() != 0 || !mtester.finished() ) + { pp( "conversion failed" ); std::free( buffer ); return 2; } + if( writeblock( outfd, buffer + offset, file_size - offset ) != file_size - offset ) + { + show_error( "Error writing output file", errno ); + std::free( buffer ); return 1; + } + std::free( buffer ); + } + catch( std::bad_alloc ) { pp( "Not enough memory." ); return 1; } + catch( Error e ) { pp(); show_error( e.msg, errno ); return 1; } + if( pp.verbosity() >= 1 ) std::fputs( "done\n", stderr ); + return 0; + } diff --git a/arg_parser.cc b/arg_parser.cc index 551264a..82972ad 100644 --- a/arg_parser.cc +++ b/arg_parser.cc @@ -1,5 +1,5 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C++ version) - Copyright (C) 2006-2015 Antonio Diaz Diaz. + Copyright (C) 2006-2016 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided diff --git a/arg_parser.h b/arg_parser.h index 3dc85d0..f45b9ac 100644 --- a/arg_parser.h +++ b/arg_parser.h @@ -1,5 +1,5 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C++ version) - Copyright (C) 2006-2015 Antonio Diaz Diaz. + Copyright (C) 2006-2016 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2015 Antonio Diaz Diaz. + Copyright (C) 2009-2016 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2015 Antonio Diaz Diaz. + Copyright (C) 2009-2016 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -53,7 +53,7 @@ public: // defined in range_dec.cc int range_decompress( const std::string & input_filename, - const std::string & output_filename, + const std::string & default_output_filename, Block range, const int verbosity, const bool force, const bool ignore, const bool to_stdout ); @@ -1,12 +1,12 @@ #! /bin/sh # configure script for Lziprecover - Data recovery tool for the lzip format -# Copyright (C) 2009-2015 Antonio Diaz Diaz. +# Copyright (C) 2009-2016 Antonio Diaz Diaz. # # This configure script is free software: you have unlimited permission # to copy, distribute and modify it. pkgname=lziprecover -pkgversion=1.18-pre2 +pkgversion=1.18 progname=lziprecover srctrigger=doc/${pkgname}.texi @@ -26,8 +26,8 @@ CXXFLAGS='-Wall -W -O2' LDFLAGS= # checking whether we are using GNU C++. -${CXX} --version > /dev/null 2>&1 -if [ $? != 0 ] ; then +if /bin/sh -c "${CXX} --version" > /dev/null 2>&1 ; then true +else CXX=c++ CXXFLAGS='-W -O2' fi @@ -139,7 +139,7 @@ if [ -z "${no_create}" ] ; then rm -f config.status cat > config.status << EOF #! /bin/sh -# This file was generated automatically by configure. Do not edit. +# This file was generated automatically by configure. Don't edit. # Run this file to recreate the current configuration. # # This script is free software: you have unlimited permission @@ -165,8 +165,8 @@ echo "LDFLAGS = ${LDFLAGS}" rm -f Makefile cat > Makefile << EOF # Makefile for Lziprecover - Data recovery tool for the lzip format -# Copyright (C) 2009-2015 Antonio Diaz Diaz. -# This file was generated automatically by configure. Do not edit. +# Copyright (C) 2009-2016 Antonio Diaz Diaz. +# This file was generated automatically by configure. Don't edit. # # This Makefile is free software: you have unlimited permission # to copy, distribute and modify it. @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2015 Antonio Diaz Diaz. + Copyright (C) 2009-2016 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -73,13 +73,13 @@ long readblock( const int fd, uint8_t * const buf, const long size ) /* Returns the number of bytes really written. If (returned value < size), it is always an error. */ -int writeblock( const int fd, const uint8_t * const buf, const int size ) +long writeblock( const int fd, const uint8_t * const buf, const long size ) { - int sz = 0; + long sz = 0; errno = 0; while( sz < size ) { - const int n = write( fd, buf + sz, size - sz ); + const int n = write( fd, buf + sz, std::min( 1L << 20, size - sz ) ); if( n > 0 ) sz += n; else if( n < 0 && errno != EINTR ) break; errno = 0; @@ -117,7 +117,8 @@ void LZ_decoder::flush_data() if( s > 0 && writeblock( outfd, buffer + stream_pos + i, s ) != s ) throw Error( "Write error" ); } - if( pos >= dictionary_size ) { partial_data_pos += pos; pos = 0; } + if( pos >= dictionary_size ) + { partial_data_pos += pos; pos = 0; pos_wrapped = true; } stream_pos = pos; } } @@ -126,66 +127,67 @@ void LZ_decoder::flush_data() bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const { File_trailer trailer; - const int trailer_size = File_trailer::size; - const unsigned long long member_size = rdec.member_position() + trailer_size; + int size = rdec.read_data( trailer.data, File_trailer::size ); + const unsigned long long data_size = data_position(); + const unsigned long long member_size = rdec.member_position(); + const int verbosity = pp.verbosity(); bool error = false; - int size = rdec.read_data( trailer.data, trailer_size ); - if( size < trailer_size ) + if( size < File_trailer::size ) { error = true; - if( pp.verbosity() >= 0 ) + if( verbosity >= 0 ) { pp(); std::fprintf( stderr, "Trailer truncated at trailer position %d;" " some checks may fail.\n", size ); } - while( size < trailer_size ) trailer.data[size++] = 0; + while( size < File_trailer::size ) trailer.data[size++] = 0; } - if( !rdec.code_is_zero() ) - { - error = true; - pp( "Range decoder final code is not zero." ); - } if( trailer.data_crc() != crc() ) { error = true; - if( pp.verbosity() >= 0 ) + if( verbosity >= 0 ) { pp(); std::fprintf( stderr, "CRC mismatch; trailer says %08X, data CRC is %08X\n", trailer.data_crc(), crc() ); } } - if( trailer.data_size() != data_position() ) + if( trailer.data_size() != data_size ) { error = true; - if( pp.verbosity() >= 0 ) + if( verbosity >= 0 ) { pp(); std::fprintf( stderr, "Data size mismatch; trailer says %llu, data size is %llu (0x%llX)\n", - trailer.data_size(), data_position(), data_position() ); + trailer.data_size(), data_size, data_size ); } } if( trailer.member_size() != member_size ) { error = true; - if( pp.verbosity() >= 0 ) + if( verbosity >= 0 ) { pp(); std::fprintf( stderr, "Member size mismatch; trailer says %llu, member size is %llu (0x%llX)\n", trailer.member_size(), member_size, member_size ); } } - if( !error && pp.verbosity() >= 2 && data_position() > 0 && member_size > 0 ) + if( !error && verbosity >= 2 && data_size > 0 && member_size > 0 ) std::fprintf( stderr, "%6.3f:1, %6.3f bits/byte, %5.2f%% saved. ", - (double)data_position() / member_size, - ( 8.0 * member_size ) / data_position(), - 100.0 * ( 1.0 - ( (double)member_size / data_position() ) ) ); - if( !error && pp.verbosity() >= 4 ) + (double)data_size / member_size, + ( 8.0 * member_size ) / data_size, + 100.0 * ( 1.0 - ( (double)member_size / data_size ) ) ); + if( !error && verbosity >= 4 ) std::fprintf( stderr, "data CRC %08X, data size %9llu, member size %8llu. ", - trailer.data_crc(), trailer.data_size(), trailer.member_size() ); + crc(), data_size, member_size ); + if( rdec.get_code() != 0 && !error && verbosity >= 1 ) + { // corruption in the last 4 bytes of the EOS marker + pp(); + std::fprintf( stderr, "Range decoder final code is %08X\n", rdec.get_code() ); + } return !error; } @@ -301,7 +303,7 @@ int LZ_decoder::decode_member( const Pretty_print & pp ) } rep3 = rep2; rep2 = rep1; rep1 = rep0_saved; state.set_match(); - if( rep0 >= dictionary_size || rep0 >= data_position() ) + if( rep0 >= dictionary_size || ( rep0 >= pos && !pos_wrapped ) ) { flush_data(); return 1; } } copy_block( rep0, len ); @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2015 Antonio Diaz Diaz. + Copyright (C) 2009-2016 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -47,14 +47,15 @@ public: ~Range_decoder() { delete[] buffer; } - bool code_is_zero() const { return ( code == 0 ); } + unsigned get_code() const { return code; } bool finished() { return pos >= stream_pos && !read_block(); } unsigned long long member_position() const { return partial_member_pos + pos; } void reset_member_position() { partial_member_pos = -pos; } uint8_t get_byte() { - if( finished() ) return 0xAA; // make code != 0 + // 0xFF avoids decoder error if member is truncated at EOS marker + if( finished() ) return 0xFF; return buffer[pos++]; } @@ -219,6 +220,7 @@ class LZ_decoder unsigned stream_pos; // first byte not yet written to file uint32_t crc_; const int outfd; // output file descriptor + bool pos_wrapped; unsigned long long stream_position() const { return partial_data_pos + stream_pos; } @@ -270,7 +272,7 @@ class LZ_decoder void operator=( const LZ_decoder & ); // declared as private public: - LZ_decoder( const File_header & header, Range_decoder & rde, const int ofd, + LZ_decoder( Range_decoder & rde, const unsigned dict_size, const int ofd, const unsigned long long oskip = 0, const unsigned long long oend = -1ULL ) : @@ -278,12 +280,13 @@ public: outend( oend ), partial_data_pos( 0 ), rdec( rde ), - dictionary_size( header.dictionary_size() ), + dictionary_size( dict_size ), buffer( new uint8_t[dictionary_size] ), pos( 0 ), stream_pos( 0 ), crc_( 0xFFFFFFFFU ), - outfd( ofd ) + outfd( ofd ), + pos_wrapped( false ) { buffer[dictionary_size-1] = 0; } // prev_byte of first byte ~LZ_decoder() { delete[] buffer; } diff --git a/doc/lziprecover.1 b/doc/lziprecover.1 index 87c0598..97f564f 100644 --- a/doc/lziprecover.1 +++ b/doc/lziprecover.1 @@ -1,5 +1,5 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.46.1. -.TH LZIPRECOVER "1" "September 2015" "lziprecover 1.18-pre2" "User Commands" +.TH LZIPRECOVER "1" "May 2016" "lziprecover 1.18" "User Commands" .SH NAME lziprecover \- recovers data from damaged lzip files .SH SYNOPSIS @@ -12,9 +12,13 @@ Lziprecover can repair perfectly most files with small errors (up to one single\-byte error per member), without the need of any extra redundance at all. Losing an entire archive just because of a corrupt byte near the beginning is a thing of the past. +.PP Lziprecover can also produce a correct file by merging the good parts of two or more damaged copies, extract data from damaged files, decompress files and test integrity of files. +.PP +Lziprecover is not a replacement for regular backups, but a last line of +defense for the case where the backups are also damaged. .SH OPTIONS .TP \fB\-h\fR, \fB\-\-help\fR @@ -26,8 +30,11 @@ output version information and exit \fB\-a\fR, \fB\-\-trailing\-error\fR exit with error status if trailing data .TP +\fB\-A\fR, \fB\-\-alone\-to\-lz\fR +convert lzma\-alone files to lzip format +.TP \fB\-c\fR, \fB\-\-stdout\fR -send decompressed output to standard output +write to standard output, keep input files .TP \fB\-d\fR, \fB\-\-decompress\fR decompress @@ -60,7 +67,7 @@ suppress all messages try to repair a small error in file .TP \fB\-s\fR, \fB\-\-split\fR -split multi\-member file in single\-member files +split multimember file in single\-member files .TP \fB\-t\fR, \fB\-\-test\fR test compressed file integrity @@ -68,6 +75,8 @@ test compressed file integrity \fB\-v\fR, \fB\-\-verbose\fR be verbose (a 2nd \fB\-v\fR gives more) .PP +If no file names are given, or if a file is '\-', lziprecover decompresses +from standard input to standard output. Numbers may be followed by a multiplier: k = kB = 10^3 = 1000, Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc... .PP @@ -80,7 +89,7 @@ Report bugs to lzip\-bug@nongnu.org .br Lziprecover home page: http://www.nongnu.org/lzip/lziprecover.html .SH COPYRIGHT -Copyright \(co 2015 Antonio Diaz Diaz. +Copyright \(co 2016 Antonio Diaz Diaz. License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html> .br This is free software: you are free to change and redistribute it. diff --git a/doc/lziprecover.info b/doc/lziprecover.info index 8d7bc66..17985d2 100644 --- a/doc/lziprecover.info +++ b/doc/lziprecover.info @@ -12,7 +12,7 @@ File: lziprecover.info, Node: Top, Next: Introduction, Up: (dir) Lziprecover Manual ****************** -This manual is for Lziprecover (version 1.18-pre2, 16 September 2015). +This manual is for Lziprecover (version 1.18, 12 May 2016). * Menu: @@ -30,7 +30,7 @@ This manual is for Lziprecover (version 1.18-pre2, 16 September 2015). * Concept index:: Index of concepts - Copyright (C) 2009-2015 Antonio Diaz Diaz. + Copyright (C) 2009-2016 Antonio Diaz Diaz. This manual is free documentation: you have unlimited permission to copy, distribute and modify it. @@ -47,6 +47,9 @@ files, produce a correct file by merging the good parts of two or more damaged copies, extract data from damaged files, decompress files and test integrity of files. + Lziprecover is not a replacement for regular backups, but a last +line of defense for the case where the backups are also damaged. + The lzip file format is designed for data sharing and long-term archiving, taking into account both data integrity and decoder availability: @@ -72,6 +75,10 @@ repair the nearer it is from the beginning of the file. Therefore, with the help of lziprecover, losing an entire archive just because of a corrupt byte near the beginning is a thing of the past. + For compressible data, multiple lzip-compressed copies have a better +chance of surviving intact than one uncompressed copy using the same +amount of storage space. + Lziprecover is able to recover or decompress files produced by any of the compressors in the lzip family; lzip, plzip, minilzip/lzlib, clzip and pdlzip. @@ -87,12 +94,11 @@ garbage data may be produced at the end of each member): lziprecover -D0 -i -o file -q file.lz - Lziprecover is able to efficiently extract a range of bytes from a -multi-member file, because it only decompresses the members containing -the desired data. + Lziprecover provides random access to the data in multimember files; +it only decompresses the members containing the desired data. Lziprecover can print correct total file sizes and ratios even for -multi-member files. +multimember files. When recovering data, lziprecover takes as arguments the names of the damaged files and writes zero or more recovered files depending on the @@ -102,9 +108,6 @@ damaged files themselves are never modified. When decompressing or testing file integrity, lziprecover behaves like lzip or lunzip. - Lziprecover is not a replacement for regular backups, but a last -line of defense for the case where the backups are also damaged. - File: lziprecover.info, Node: Invoking lziprecover, Next: Data safety, Prev: Introduction, Up: Top @@ -115,6 +118,10 @@ The format for running lziprecover is: lziprecover [OPTIONS] [FILES] +When decompressing or testing, '-' used as a FILE argument means +standard input. It can be mixed with other FILES and is read just once, +the first time it appears in the command line. + Lziprecover supports the following options: '-h' @@ -133,30 +140,51 @@ The format for running lziprecover is: trailing garbage that can be safely ignored. *Note concat-example::. +'-A' +'--alone-to-lz' + Convert lzma-alone files to lzip format without recompressing, just + adding a lzip header and trailer. The conversion minimizes the + dictionary size of the resulting file (and therefore the amount of + memory required to decompress it). Only streamed files with + default LZMA properties can be converted; non-streamed lzma-alone + files lack the end of stream marker required in lzip files. + + The name of the converted lzip file is derived from that of the + original lzma-alone file as follows: + + filename.lzma becomes filename.lz + filename.tlz becomes filename.tar.lz + anyothername becomes anyothername.lz + '-c' '--stdout' - Decompress to standard output. Needed when reading from a named - pipe (fifo) or from a device. Use it to recover as much of the + Write decompressed data to standard output; keep input files + unchanged. This option is needed when reading from a named pipe + (fifo) or from a device. Use it also to recover as much of the uncompressed data as possible when decompressing a corrupt file. '-d' '--decompress' - Decompress the specified file(s). If a file fails to decompress, - lziprecover exits immediately without decompressing the rest of the - files. + Decompress the specified file(s). If a file does not exist or + can't be opened, lziprecover continues decompressing the rest of + the files. If a file fails to decompress, lziprecover exits + immediately without decompressing the rest of the files. '-D RANGE' '--range-decompress=RANGE' Decompress only a range of bytes starting at decompressed byte - position 'BEGIN' and up to byte position 'END - 1'. Three formats - of RANGE are recognized, 'BEGIN', 'BEGIN-END', and 'BEGIN,SIZE'. - If only BEGIN is specified, END is taken as the end of the file. - The produced bytes are sent to standard output unless the - '--output' option is used. In order to guarantee the correctness - of the data produced, all members containing any part of the - desired data are decompressed and their integrity is verified. - This operation is more efficient in multi-member files because it - only decompresses the members containing the desired data. + position 'BEGIN' and up to byte position 'END - 1'. This option + provides random access to the data in multimember files; it only + decompresses the members containing the desired data. In order to + guarantee the correctness of the data produced, all members + containing any part of the desired data are decompressed and their + integrity is verified. + + Four formats of RANGE are recognized, 'BEGIN', 'BEGIN-END', + 'BEGIN,SIZE', and ',SIZE'. If only BEGIN is specified, END is taken + as the end of the file. If only SIZE is specified, BEGIN is taken + as the beginning of the file. The produced bytes are sent to + standard output unless the '--output' option is used. '-f' '--force' @@ -166,7 +194,7 @@ The format for running lziprecover is: '--ignore-errors' Make '--range-decompress' ignore data errors and continue decompressing the remaining members in the file. For example, - 'lziprecover -i -D0 file.lz > file' decompresses all the + 'lziprecover -D0 -i file.lz > file' decompresses all the recoverable data in all members of 'file.lz' without having to split it first. @@ -177,7 +205,7 @@ The format for running lziprecover is: '-l' '--list' Print total file sizes and ratios. The values produced are correct - even for multi-member files. Use it together with '-v' to see + even for multimember files. Use it together with '-v' to see information about the members in the file. '-m' @@ -195,7 +223,10 @@ The format for running lziprecover is: splitting, the names of the files produced are in the form 'rec01FILE', 'rec02FILE', etc. If decompressing from standard input and '--stdout' has not been specified, use 'FILE' as the - name of the decompressed file. + name of the decompressed file. If converting a lzma-alone file + from standard input and '--stdout' has not been specified, use + 'FILE.lz' as the name of the converted file. (Or plain 'FILE' if + it already ends in '.lz' or '.tlz'). '-q' '--quiet' @@ -270,15 +301,15 @@ File: lziprecover.info, Node: Data safety, Next: Repairing files, Prev: Invok ************************************** There are 3 main types of data corruption that may cause data loss: -single-byte errors, multi-byte errors (generally affecting a whole -sector in a block device), and total device failure. +single-byte errors, multibyte errors (generally affecting a whole sector +in a block device), and total device failure. Lziprecover protects natively against single-byte errors (*note Repairing files::), as long as file integrity is checked frequently enough that a second single-byte error does not develop in the same member before the first one is repaired. - Lziprecover also protects against multi-byte errors (*note Merging + Lziprecover also protects against multibyte errors (*note Merging files::), if at least one backup copy of the file is made. The only remedy for total device failure is storing backup copies in @@ -298,10 +329,10 @@ than one compressed data block (usually larger than 900 kB uncompressed), and if no block is damaged in both files, then the data can be manually recovered by splitting the files with bzip2recover, verifying every block and then copying the right blocks in the right -order in another file. +order into another file. But if you used lzip, the data can be automatically recovered as -long as no byte is damaged in both files. +long as the damaged areas don't overlap. Note that each error in a bzip2 file makes a whole block unusable, but each error in a lzip file only affects the damaged bytes, making it @@ -319,8 +350,8 @@ at all. If the reparation is successful, the repaired file will be identical bit for bit to the original. This makes lzip files resistant to bit-flip, one of the most common forms of data corruption. - The error may be located anywhere in the file except in the header -(first 6 bytes of each member) or in the 'Member size' field of the + The error may be located anywhere in the file except in the first 5 +bytes of each member header or in the 'Member size' field of the trailer (last 8 bytes of each member). If the error is in the header it can be easily repaired with a text editor like GNU Moe (*note File format::). If the error is in the member size, it is enough to ignore @@ -364,19 +395,13 @@ the file. is damaged in all copies), or are adjacent and the boundary can't be determined, or if the copies have too many damaged areas. - All the copies must have the same size. If some of them have been -truncated and are therefore smaller than they should, they can be -extended to the correct size with the following command before merging -them with the other copies: - - ddrescue --extend-outfile=<correct_size> small_file.lz extended_file.lz + All the copies must have the same size. If any of them is larger or +smaller than it should, either because it has been truncated or because +it got some garbage data appended at the end, it can be brought to the +correct size with the following command before merging it with the other +copies: - If some of the copies have got garbage data at the end and are -therefore larger than they should, their sizes can be reduced to the -correct value with the following command before merging them with the -other copies: - - ddrescue --size=<correct_size> large_file.lz reduced_file.lz + ddrescue -s<correct_size> -x<correct_size> file.lz correct_size_file.lz To give you an idea of its possibilities, when merging two copies, each of them with one damaged area affecting 1 percent of the copy, the @@ -427,7 +452,7 @@ additional information before, between, or after them. Each member has the following structure: +--+--+--+--+----+----+=============+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -| ID string | VN | DS | Lzma stream | CRC32 | Data size | Member size | +| ID string | VN | DS | LZMA stream | CRC32 | Data size | Member size | +--+--+--+--+----+----+=============+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ All multibyte values are stored in little endian order. @@ -450,8 +475,8 @@ additional information before, between, or after them. Example: 0xD3 = 2^19 - 6 * 2^15 = 512 KiB - 6 * 32 KiB = 320 KiB Valid values for dictionary size range from 4 KiB to 512 MiB. -'Lzma stream' - The lzma stream, finished by an end of stream marker. Uses default +'LZMA stream' + The LZMA stream, finished by an end of stream marker. Uses default values for encoder properties. *Note Stream format: (lzip)Stream format, for a complete description. @@ -465,7 +490,7 @@ additional information before, between, or after them. Total size of the member, including header and trailer. This field acts as a distributed index, allows the verification of stream integrity, and facilitates safe recovery of undamaged members from - multi-member files. + multimember files. @@ -490,15 +515,15 @@ member. Such trailing data may be: file. * In very rare cases, trailing data could be the corrupt header of - another member. In multi-member or concatenated files the + another member. In multimember or concatenated files the probability of corruption happening in the magic bytes is 5 times smaller than the probability of getting a false positive caused by the corruption of the integrity information itself. Therefore it can be considered to be below the noise level. Trailing data can be safely ignored in most cases. In some cases, -like user-added data, it is expected to be ignored. In those cases -where a file containing trailing data must be rejected, the option +like that of user-added data, it is expected to be ignored. In those +cases where a file containing trailing data must be rejected, the option '--trailing-error' can be used. *Note --trailing-error::. @@ -550,7 +575,7 @@ are abridged diagnostic messages from lziprecover). mv file_fixed.lz file.lz -Example 7: Split the multi-member file 'file.lz' and write each member +Example 7: Split the multimember file 'file.lz' and write each member in its own 'recXXXfile.lz' file. Then use 'lziprecover -t' to test the integrity of the resulting files. @@ -599,10 +624,22 @@ test robustness to decompression of corrupted data, inspired by unzcrash.c from Julian Seward's bzip2. Type 'make unzcrash' in the lziprecover source directory to build it. - Unzcrash reads the specified file and then repeatedly decompresses -it, increasing 256 times each byte of the compressed data, so as to -test all possible one-byte errors. This should not cause any invalid -memory accesses. If it does, please, report it as a bug. + By default, unzcrash reads the specified file and then repeatedly +decompresses it, increasing 256 times each byte of the compressed data, +so as to test all possible one-byte errors. + + If the '--block' option is given, unzcrash reads the specified file +and then repeatedly decompresses it, setting all bytes in each +successive block to the value given, so as to test all possible full +sector errors. + + If the '--truncate' option is given, unzcrash reads the specified +file and then repeatedly decompresses it, truncating the file to +increasing lengths, so as to test all possible truncation points. + + None of the three test modes described above should cause any invalid +memory accesses. If any of them does, please, report it as a bug to the +maintainers of the decompressor being tested. Unzcrash really executes as a subprocess the shell command specified in the first non-option argument, and then writes the file specified in @@ -611,6 +648,19 @@ modifying the corresponding byte each time. Therefore unzcrash can be used to test any decompressor (not only lzip), or even other decoder programs having a suitable command line syntax. + If the decompressor returns with zero status, unzcrash compares the +output of the decompressor for the original and corrupt files. If the +outputs differ, it means that the decompressor returned a false +negative; it failed to recognize the corruption and produced garbage +output. The only exception is when a multimember file is truncated just +after the last byte of a member, producing a shorter but valid +compressed file. Except in this latter case, please, report any false +negative as a bug. + + In order to compare the outputs, unzcrash needs a 'zcmp' program +able to understand the format being tested. For example the one provided +by 'zutils'. *Note Zcmp: (zutils)Zcmp, + The format for running unzcrash is: unzcrash [OPTIONS] "lzip -tv" FILENAME.lz @@ -642,9 +692,21 @@ programs having a suitable command line syntax. 1,3-5,8 1, 3, 4, 5 and 8 1-3,5-8 1, 2, 3, 5, 6, 7 and 8 +'-B[SIZE][,VALUE]' +'--block[=SIZE][,VALUE]' + Test block errors of given SIZE aligned to a SIZE-byte boundary, + simulating a whole sector I/O error. Block SIZE defaults to 512 + bytes. VALUE defaults to 0. + +'-d N' +'--delta=N' + Test only one of every N bytes, blocks or truncation sizes, + instead of all of them. + '-p BYTES' '--position=BYTES' - First byte position to test in the file. Defaults to 0. + First byte position to test in the file. Defaults to 0. Negative + values are relative to the end of the file. '-q' '--quiet' @@ -652,13 +714,24 @@ programs having a suitable command line syntax. '-s BYTES' '--size=BYTES' - Number of byte positions to test. If not specified, the whole file - is tested. + Number of byte positions to test. If not specified, the rest of + the file is tested (from '--position' to end of file). Negative + values are relative to the rest of the file. + +'-t' +'--truncate' + Test all possible truncation points in the range specified by + '--position' and '--size'. '-v' '--verbose' Verbose mode. +'-z' +'--zcmp=<command>' + Set zcmp command name and options. Defaults to 'zcmp'. Use + '--zcmp=false' to disable comparisons. + Exit status: 0 for a normal exit, 1 for environmental problems (file not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or @@ -706,21 +779,21 @@ Concept index Tag Table: Node: Top231 -Node: Introduction1278 -Node: Invoking lziprecover4395 -Ref: --trailing-error4860 -Node: Data safety10294 -Node: Repairing files12218 -Node: Merging files14120 -Node: File names15961 -Node: File format16425 -Node: Trailing data18854 -Node: Examples20230 -Ref: concat-example20661 -Ref: ddrescue-example21725 -Node: Unzcrash23015 -Node: Problems25571 -Node: Concept index26123 +Node: Introduction1267 +Node: Invoking lziprecover4525 +Ref: --trailing-error5175 +Node: Data safety11779 +Node: Repairing files13702 +Node: Merging files15602 +Node: File names17217 +Node: File format17681 +Node: Trailing data20109 +Node: Examples21492 +Ref: concat-example21923 +Ref: ddrescue-example22986 +Node: Unzcrash24276 +Node: Problems28786 +Node: Concept index29338 End Tag Table diff --git a/doc/lziprecover.texi b/doc/lziprecover.texi index e29a59f..2702d70 100644 --- a/doc/lziprecover.texi +++ b/doc/lziprecover.texi @@ -6,8 +6,8 @@ @finalout @c %**end of header -@set UPDATED 16 September 2015 -@set VERSION 1.18-pre2 +@set UPDATED 12 May 2016 +@set VERSION 1.18 @dircategory Data Compression @direntry @@ -50,7 +50,7 @@ This manual is for Lziprecover (version @value{VERSION}, @value{UPDATED}). @end menu @sp 1 -Copyright @copyright{} 2009-2015 Antonio Diaz Diaz. +Copyright @copyright{} 2009-2016 Antonio Diaz Diaz. This manual is free documentation: you have unlimited permission to copy, distribute and modify it. @@ -66,6 +66,9 @@ files, produce a correct file by merging the good parts of two or more damaged copies, extract data from damaged files, decompress files and test integrity of files. +Lziprecover is not a replacement for regular backups, but a last line of +defense for the case where the backups are also damaged. + The lzip file format is designed for data sharing and long-term archiving, taking into account both data integrity and decoder availability: @@ -96,6 +99,10 @@ repair the nearer it is from the beginning of the file. Therefore, with the help of lziprecover, losing an entire archive just because of a corrupt byte near the beginning is a thing of the past. +For compressible data, multiple lzip-compressed copies have a better +chance of surviving intact than one uncompressed copy using the same +amount of storage space. + Lziprecover is able to recover or decompress files produced by any of the compressors in the lzip family; lzip, plzip, minilzip/lzlib, clzip and pdlzip. @@ -113,12 +120,11 @@ garbage data may be produced at the end of each member): lziprecover -D0 -i -o file -q file.lz @end example -Lziprecover is able to efficiently extract a range of bytes from a -multi-member file, because it only decompresses the members containing -the desired data. +Lziprecover provides random access to the data in multimember files; it +only decompresses the members containing the desired data. Lziprecover can print correct total file sizes and ratios even for -multi-member files. +multimember files. When recovering data, lziprecover takes as arguments the names of the damaged files and writes zero or more recovered files depending on the @@ -128,9 +134,6 @@ damaged files themselves are never modified. When decompressing or testing file integrity, lziprecover behaves like lzip or lunzip. -Lziprecover is not a replacement for regular backups, but a last line of -defense for the case where the backups are also damaged. - @node Invoking lziprecover @chapter Invoking lziprecover @@ -142,6 +145,11 @@ The format for running lziprecover is: lziprecover [@var{options}] [@var{files}] @end example +@noindent +When decompressing or testing, @samp{-} used as a @var{file} argument +means standard input. It can be mixed with other @var{files} and is read +just once, the first time it appears in the command line. + Lziprecover supports the following options: @table @code @@ -160,31 +168,54 @@ Exit with error status 2 if any remaining input is detected after decompressing the last member. Such remaining input is usually trailing garbage that can be safely ignored. @xref{concat-example}. +@item -A +@itemx --alone-to-lz +Convert lzma-alone files to lzip format without recompressing, just +adding a lzip header and trailer. The conversion minimizes the +dictionary size of the resulting file (and therefore the amount of +memory required to decompress it). Only streamed files with default LZMA +properties can be converted; non-streamed lzma-alone files lack the end +of stream marker required in lzip files. + +The name of the converted lzip file is derived from that of the original +lzma-alone file as follows: + +@multitable {filename.lzma} {becomes} {anyothername.lz} +@item filename.lzma @tab becomes @tab filename.lz +@item filename.tlz @tab becomes @tab filename.tar.lz +@item anyothername @tab becomes @tab anyothername.lz +@end multitable + @item -c @itemx --stdout -Decompress to standard output. Needed when reading from a named pipe -(fifo) or from a device. Use it to recover as much of the uncompressed -data as possible when decompressing a corrupt file. +Write decompressed data to standard output; keep input files unchanged. +This option is needed when reading from a named pipe (fifo) or from a +device. Use it also to recover as much of the uncompressed data as +possible when decompressing a corrupt file. @item -d @itemx --decompress -Decompress the specified file(s). If a file fails to decompress, -lziprecover exits immediately without decompressing the rest of the -files. +Decompress the specified file(s). If a file does not exist or can't be +opened, lziprecover continues decompressing the rest of the files. If a +file fails to decompress, lziprecover exits immediately without +decompressing the rest of the files. @item -D @var{range} @itemx --range-decompress=@var{range} Decompress only a range of bytes starting at decompressed byte position @samp{@var{begin}} and up to byte position @w{@samp{@var{end} - 1}}. -Three formats of @var{range} are recognized, @samp{@var{begin}}, -@samp{@var{begin}-@var{end}}, and @samp{@var{begin},@var{size}}. If only -@var{begin} is specified, @var{end} is taken as the end of the file. The -produced bytes are sent to standard output unless the @samp{--output} -option is used. In order to guarantee the correctness of the data -produced, all members containing any part of the desired data are -decompressed and their integrity is verified. This operation is more -efficient in multi-member files because it only decompresses the members -containing the desired data. +This option provides random access to the data in multimember files; it +only decompresses the members containing the desired data. In order to +guarantee the correctness of the data produced, all members containing +any part of the desired data are decompressed and their integrity is +verified. + +Four formats of @var{range} are recognized, @samp{@var{begin}}, +@samp{@var{begin}-@var{end}}, @samp{@var{begin},@var{size}}, and +@samp{,@var{size}}. If only @var{begin} is specified, @var{end} is taken +as the end of the file. If only @var{size} is specified, @var{begin} is +taken as the beginning of the file. The produced bytes are sent to +standard output unless the @samp{--output} option is used. @item -f @itemx --force @@ -194,7 +225,7 @@ Force overwrite of output files. @itemx --ignore-errors Make @samp{--range-decompress} ignore data errors and continue decompressing the remaining members in the file. For example, -@w{@samp{lziprecover -i -D0 file.lz > file}} decompresses all the +@w{@samp{lziprecover -D0 -i file.lz > file}} decompresses all the recoverable data in all members of @samp{file.lz} without having to split it first. @@ -205,8 +236,8 @@ Keep (don't delete) input files during decompression. @item -l @itemx --list Print total file sizes and ratios. The values produced are correct even -for multi-member files. Use it together with @samp{-v} to see -information about the members in the file. +for multimember files. Use it together with @samp{-v} to see information +about the members in the file. @item -m @itemx --merge @@ -223,7 +254,11 @@ Place the output into @samp{@var{file}} instead of into produced are in the form @samp{rec01@var{file}}, @samp{rec02@var{file}}, etc. If decompressing from standard input and @samp{--stdout} has not been specified, use @samp{@var{file}} as the name of the decompressed -file. +file. If converting a lzma-alone file from standard input and +@samp{--stdout} has not been specified, use @samp{@var{file}.lz} as the +name of the converted file. (Or plain @samp{@var{file}} if it already +ends in @samp{.lz} or @samp{.tlz}). + @item -q @itemx --quiet @@ -299,15 +334,15 @@ caused lziprecover to panic. @cindex data safety There are 3 main types of data corruption that may cause data loss: -single-byte errors, multi-byte errors (generally affecting a whole -sector in a block device), and total device failure. +single-byte errors, multibyte errors (generally affecting a whole sector +in a block device), and total device failure. Lziprecover protects natively against single-byte errors (@pxref{Repairing files}), as long as file integrity is checked frequently enough that a second single-byte error does not develop in the same member before the first one is repaired. -Lziprecover also protects against multi-byte errors (@pxref{Merging +Lziprecover also protects against multibyte errors (@pxref{Merging files}), if at least one backup copy of the file is made. The only remedy for total device failure is storing backup copies in @@ -326,11 +361,11 @@ If you used bzip2, and if the file is large enough to contain more than one compressed data block (usually larger than 900 kB uncompressed), and if no block is damaged in both files, then the data can be manually recovered by splitting the files with bzip2recover, verifying every -block and then copying the right blocks in the right order in another +block and then copying the right blocks in the right order into another file. But if you used lzip, the data can be automatically recovered as long as -no byte is damaged in both files. +the damaged areas don't overlap. Note that each error in a bzip2 file makes a whole block unusable, but each error in a lzip file only affects the damaged bytes, making it @@ -347,8 +382,8 @@ at all. If the reparation is successful, the repaired file will be identical bit for bit to the original. This makes lzip files resistant to bit-flip, one of the most common forms of data corruption. -The error may be located anywhere in the file except in the header -(first 6 bytes of each member) or in the @samp{Member size} field of the +The error may be located anywhere in the file except in the first 5 +bytes of each member header or in the @samp{Member size} field of the trailer (last 8 bytes of each member). If the error is in the header it can be easily repaired with a text editor like GNU Moe (@pxref{File format}). If the error is in the member size, it is enough to ignore the @@ -391,21 +426,14 @@ The merge will fail if the damaged areas overlap (at least one byte is damaged in all copies), or are adjacent and the boundary can't be determined, or if the copies have too many damaged areas. -All the copies must have the same size. If some of them have been -truncated and are therefore smaller than they should, they can be -extended to the correct size with the following command before merging -them with the other copies: +All the copies must have the same size. If any of them is larger or +smaller than it should, either because it has been truncated or because +it got some garbage data appended at the end, it can be brought to the +correct size with the following command before merging it with the other +copies: @example -ddrescue --extend-outfile=<correct_size> small_file.lz extended_file.lz -@end example - -If some of the copies have got garbage data at the end and are therefore -larger than they should, their sizes can be reduced to the correct value -with the following command before merging them with the other copies: - -@example -ddrescue --size=<correct_size> large_file.lz reduced_file.lz +ddrescue -s<correct_size> -x<correct_size> file.lz correct_size_file.lz @end example To give you an idea of its possibilities, when merging two copies, each @@ -461,7 +489,7 @@ additional information before, between, or after them. Each member has the following structure: @verbatim +--+--+--+--+----+----+=============+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -| ID string | VN | DS | Lzma stream | CRC32 | Data size | Member size | +| ID string | VN | DS | LZMA stream | CRC32 | Data size | Member size | +--+--+--+--+----+----+=============+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ @end verbatim @@ -485,8 +513,8 @@ from the base size to obtain the dictionary size.@* Example: 0xD3 = 2^19 - 6 * 2^15 = 512 KiB - 6 * 32 KiB = 320 KiB@* Valid values for dictionary size range from 4 KiB to 512 MiB. -@item Lzma stream -The lzma stream, finished by an end of stream marker. Uses default +@item LZMA stream +The LZMA stream, finished by an end of stream marker. Uses default values for encoder properties. @ifnothtml @xref{Stream format,,,lzip}, @@ -506,7 +534,7 @@ Size of the uncompressed original data. @item Member size (8 bytes) Total size of the member, including header and trailer. This field acts as a distributed index, allows the verification of stream integrity, and -facilitates safe recovery of undamaged members from multi-member files. +facilitates safe recovery of undamaged members from multimember files. @end table @@ -536,7 +564,7 @@ hash value (for a chosen hash) coincide with those of another file. @item In very rare cases, trailing data could be the corrupt header of another -member. In multi-member or concatenated files the probability of +member. In multimember or concatenated files the probability of corruption happening in the magic bytes is 5 times smaller than the probability of getting a false positive caused by the corruption of the integrity information itself. Therefore it can be considered to be below @@ -544,8 +572,8 @@ the noise level. @end itemize Trailing data can be safely ignored in most cases. In some cases, like -user-added data, it is expected to be ignored. In those cases where a -file containing trailing data must be rejected, the option +that of user-added data, it is expected to be ignored. In those cases +where a file containing trailing data must be rejected, the option @samp{--trailing-error} can be used. @xref{--trailing-error}. @@ -616,7 +644,7 @@ mv file_fixed.lz file.lz @sp 1 @noindent -Example 7: Split the multi-member file @samp{file.lz} and write each +Example 7: Split the multimember file @samp{file.lz} and write each member in its own @samp{recXXXfile.lz} file. Then use @w{@samp{lziprecover -t}} to test the integrity of the resulting files. @@ -681,10 +709,22 @@ test robustness to decompression of corrupted data, inspired by unzcrash.c from Julian Seward's bzip2. Type @samp{make unzcrash} in the lziprecover source directory to build it. -Unzcrash reads the specified file and then repeatedly decompresses it, -increasing 256 times each byte of the compressed data, so as to test all -possible one-byte errors. This should not cause any invalid memory -accesses. If it does, please, report it as a bug. +By default, unzcrash reads the specified file and then repeatedly +decompresses it, increasing 256 times each byte of the compressed data, +so as to test all possible one-byte errors. + +If the @code{--block} option is given, unzcrash reads the specified file +and then repeatedly decompresses it, setting all bytes in each +successive block to the value given, so as to test all possible full +sector errors. + +If the @code{--truncate} option is given, unzcrash reads the specified +file and then repeatedly decompresses it, truncating the file to +increasing lengths, so as to test all possible truncation points. + +None of the three test modes described above should cause any invalid +memory accesses. If any of them does, please, report it as a bug to the +maintainers of the decompressor being tested. Unzcrash really executes as a subprocess the shell command specified in the first non-option argument, and then writes the file specified in the @@ -693,6 +733,27 @@ modifying the corresponding byte each time. Therefore unzcrash can be used to test any decompressor (not only lzip), or even other decoder programs having a suitable command line syntax. +If the decompressor returns with zero status, unzcrash compares the +output of the decompressor for the original and corrupt files. If the +outputs differ, it means that the decompressor returned a false +negative; it failed to recognize the corruption and produced garbage +output. The only exception is when a multimember file is truncated just +after the last byte of a member, producing a shorter but valid +compressed file. Except in this latter case, please, report any false +negative as a bug. + +In order to compare the outputs, unzcrash needs a @samp{zcmp} program +able to understand the format being tested. For example the one provided +by @samp{zutils}. +@ifnothtml +@xref{Zcmp,,,zutils}, +@end ifnothtml +@ifhtml +See +@uref{http://www.nongnu.org/zutils/manual/zutils_manual.html#Zcmp,,zcmp} +@end ifhtml + + The format for running unzcrash is: @example @@ -728,9 +789,21 @@ The number of N-bit errors per byte (N = 1 to 8) is: @item 1-3,5-8 @tab 1, 2, 3, 5, 6, 7 and 8 @end multitable +@item -B[@var{size}][,@var{value}] +@itemx --block[=@var{size}][,@var{value}] +Test block errors of given @var{size} aligned to a @var{size}-byte +boundary, simulating a whole sector I/O error. Block @var{size} defaults +to 512 bytes. @var{value} defaults to 0. + +@item -d @var{n} +@itemx --delta=@var{n} +Test only one of every @var{n} bytes, blocks or truncation sizes, +instead of all of them. + @item -p @var{bytes} @itemx --position=@var{bytes} -First byte position to test in the file. Defaults to 0. +First byte position to test in the file. Defaults to 0. Negative values +are relative to the end of the file. @item -q @itemx --quiet @@ -738,13 +811,24 @@ Quiet operation. Suppress all messages. @item -s @var{bytes} @itemx --size=@var{bytes} -Number of byte positions to test. If not specified, the whole file is -tested. +Number of byte positions to test. If not specified, the rest of the file +is tested (from @code{--position} to end of file). Negative values are +relative to the rest of the file. + +@item -t +@itemx --truncate +Test all possible truncation points in the range specified by +@code{--position} and @code{--size}. @item -v @itemx --verbose Verbose mode. +@item -z +@itemx --zcmp=<command> +Set zcmp command name and options. Defaults to @code{zcmp}. Use +@code{--zcmp=false} to disable comparisons. + @end table Exit status: 0 for a normal exit, 1 for environmental problems (file not diff --git a/file_index.cc b/file_index.cc index af55417..747c376 100644 --- a/file_index.cc +++ b/file_index.cc @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2015 Antonio Diaz Diaz. + Copyright (C) 2009-2016 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/file_index.h b/file_index.h index 5084fcb..2b127c3 100644 --- a/file_index.h +++ b/file_index.h @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2015 Antonio Diaz Diaz. + Copyright (C) 2009-2016 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2015 Antonio Diaz Diaz. + Copyright (C) 2009-2016 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -45,6 +45,7 @@ enum { max_dictionary_size = 1 << max_dictionary_bits, min_member_size = 36, literal_context_bits = 3, + literal_pos_state_bits = 0, // not used pos_state_bits = 2, pos_states = 1 << pos_state_bits, pos_state_mask = pos_states - 1, @@ -175,6 +176,11 @@ public: extern const CRC32 crc32; +inline bool isvalid_ds( const unsigned dictionary_size ) + { return ( dictionary_size >= min_dictionary_size && + dictionary_size <= max_dictionary_size ); } + + inline int real_bits( unsigned value ) { int bits = 0; @@ -195,6 +201,12 @@ struct File_header void set_magic() { std::memcpy( data, magic_string, 4 ); data[4] = 1; } bool verify_magic() const { return ( std::memcmp( data, magic_string, 4 ) == 0 ); } + bool verify_prefix( const int size ) const // detect truncated header + { + for( int i = 0; i < size && i < 4; ++i ) + if( data[i] != magic_string[i] ) return false; + return ( size > 0 ); + } uint8_t version() const { return data[4]; } bool verify_version() const { return ( data[4] == 1 ); } @@ -209,20 +221,17 @@ struct File_header bool dictionary_size( const unsigned sz ) { - if( sz >= min_dictionary_size && sz <= max_dictionary_size ) + if( !isvalid_ds( sz ) ) return false; + data[5] = real_bits( sz - 1 ); + if( sz > min_dictionary_size ) { - data[5] = real_bits( sz - 1 ); - if( sz > min_dictionary_size ) - { - const unsigned base_size = 1 << data[5]; - const unsigned fraction = base_size / 16; - for( int i = 7; i >= 1; --i ) - if( base_size - ( i * fraction ) >= sz ) - { data[5] |= ( i << 5 ); break; } - } - return true; + const unsigned base_size = 1 << data[5]; + const unsigned fraction = base_size / 16; + for( int i = 7; i >= 1; --i ) + if( base_size - ( i * fraction ) >= sz ) + { data[5] |= ( i << 5 ); break; } } - return false; + return true; } }; @@ -279,36 +288,46 @@ inline unsigned long long positive_diff( const unsigned long long x, { return ( ( x > y ) ? x - y : 0 ); } +// defined in alone_to_lz.cc +int alone_to_lz( const int infd, const Pretty_print & pp ); + // defined in decoder.cc long readblock( const int fd, uint8_t * const buf, const long size ); -int writeblock( const int fd, const uint8_t * const buf, const int size ); +long writeblock( const int fd, const uint8_t * const buf, const long size ); // defined in file_index.cc int seek_read( const int fd, uint8_t * const buf, const int size, const long long pos ); // defined in main.cc +extern std::string output_filename; // global vars for output file +extern int outfd; + int open_instream( const char * const name, struct stat * const in_statsp, const bool no_ofile, const bool reg_only = false ); +bool open_outstream( const bool force, const bool from_stdin, + const bool rw = false, const bool skipping = true ); bool file_exists( const std::string & filename ); -int open_outstream_rw( const std::string & output_filename, const bool force ); +void cleanup_and_fail( const int retval ); +int close_outstream( const struct stat * const in_statsp ); +std::string insert_fixed( std::string name ); void show_header( const unsigned dictionary_size, const int vlevel = 3 ); void show_error( const char * const msg, const int errcode = 0, const bool help = false ); void internal_error( const char * const msg ); void show_error2( const char * const msg1, const char * const name, const char * const msg2 ); +void show_error4( const char * const msg1, const char * const name1, + const char * const name2, const char * const msg2 ); // defined in merge.cc -void cleanup_and_fail( const std::string & output_filename, - const int outfd, const int retval ); bool copy_file( const int infd, const int outfd, const long long max_size = -1 ); -bool try_decompress_member( const int fd, const unsigned long long msize, - long long * failure_posp = 0 ); +bool test_member_from_file( const int infd, const unsigned long long msize, + long long * const failure_posp = 0 ); int merge_files( const std::vector< std::string > & filenames, - const std::string & output_filename, const int verbosity, - const bool force ); + const std::string & default_output_filename, + const int verbosity, const bool force ); // defined in range_dec.cc const char * format_num( unsigned long long num, @@ -320,13 +339,13 @@ int list_files( const std::vector< std::string > & filenames, // defined in repair.cc int repair_file( const std::string & input_filename, - const std::string & output_filename, const int verbosity, - const bool force ); + const std::string & default_output_filename, + const int verbosity, const bool force ); int debug_repair( const std::string & input_filename, const long long bad_pos, const int verbosity, const uint8_t bad_value ); -int debug_show_packets( const std::string & input_filename, - const long long bad_pos, const int verbosity, - const uint8_t bad_value ); +int debug_decompress( const std::string & input_filename, + const long long bad_pos, const int verbosity, + const uint8_t bad_value, const bool show_packets ); // defined in split.cc bool verify_header( const File_header & header, const Pretty_print & pp ); @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2015 Antonio Diaz Diaz. + Copyright (C) 2009-2016 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -66,12 +66,14 @@ #error "Environments where CHAR_BIT != 8 are not supported." #endif +std::string output_filename; // global vars for output file +int outfd = -1; namespace { const char * const Program_name = "Lziprecover"; const char * const program_name = "lziprecover"; -const char * const program_year = "2015"; +const char * const program_year = "2016"; const char * invocation_name = 0; struct { const char * from; const char * to; } const known_extensions[] = { @@ -79,15 +81,11 @@ struct { const char * from; const char * to; } const known_extensions[] = { { ".tlz", ".tar" }, { 0, 0 } }; -enum Mode { m_none, m_debug_delay, m_debug_repair, m_decompress, m_list, - m_merge, m_range_dec, m_repair, m_show_packets, m_split, m_test }; +enum Mode { m_none, m_alone_to_lz, m_debug_decompress, m_debug_delay, + m_debug_repair, m_decompress, m_list, m_merge, m_range_dec, + m_repair, m_show_packets, m_split, m_test }; -std::string output_filename; -int outfd = -1; int verbosity = 0; -const mode_t usr_rw = S_IRUSR | S_IWUSR; -const mode_t all_rw = usr_rw | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH; -mode_t outfd_mode = usr_rw; bool delete_output_on_interrupt = false; @@ -98,15 +96,18 @@ void show_help() "single-byte error per member), without the need of any extra redundance\n" "at all. Losing an entire archive just because of a corrupt byte near the\n" "beginning is a thing of the past.\n" - "Lziprecover can also produce a correct file by merging the good parts of\n" + "\nLziprecover can also produce a correct file by merging the good parts of\n" "two or more damaged copies, extract data from damaged files, decompress\n" "files and test integrity of files.\n" + "\nLziprecover is not a replacement for regular backups, but a last line of\n" + "defense for the case where the backups are also damaged.\n" "\nUsage: %s [options] [files]\n", invocation_name ); std::printf( "\nOptions:\n" " -h, --help display this help and exit\n" " -V, --version output version information and exit\n" " -a, --trailing-error exit with error status if trailing data\n" - " -c, --stdout send decompressed output to standard output\n" + " -A, --alone-to-lz convert lzma-alone files to lzip format\n" + " -c, --stdout write to standard output, keep input files\n" " -d, --decompress decompress\n" " -D, --range-decompress=<range> decompress a range of bytes (N-M) to stdout\n" " -f, --force overwrite existing output files\n" @@ -117,16 +118,19 @@ void show_help() " -o, --output=<file> place the output into <file>\n" " -q, --quiet suppress all messages\n" " -R, --repair try to repair a small error in file\n" - " -s, --split split multi-member file in single-member files\n" + " -s, --split split multimember file in single-member files\n" " -t, --test test compressed file integrity\n" " -v, --verbose be verbose (a 2nd -v gives more)\n" ); if( verbosity >= 1 ) { - std::printf( " -x, --show-packets[=<pos>,<val>] show in stdout the decoded LZMA packets\n" - " -y, --debug-delay=<range> find max error detection delay in <range>\n" - " -z, --debug-repair=<pos>,<val> test repair one-byte error at <pos>\n" ); + std::printf( " -W, --debug-decompress=<pos>,<val> set pos to val and decompress to stdout\n" + " -X, --show-packets[=<pos>,<val>] show in stdout the decoded LZMA packets\n" + " -Y, --debug-delay=<range> find max error detection delay in <range>\n" + " -Z, --debug-repair=<pos>,<val> test repair one-byte error at <pos>\n" ); } - std::printf( "Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n" + std::printf( "If no file names are given, or if a file is '-', lziprecover decompresses\n" + "from standard input to standard output.\n" + "Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n" "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n" "\nExit status: 0 for a normal exit, 1 for environmental problems (file\n" "not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n" @@ -211,14 +215,15 @@ int parse_long_long( const char * const ptr, long long & value ) } -// Recognized formats: <begin> <begin>-<end> <begin>,<size> +// Recognized formats: <begin> <begin>-<end> <begin>,<size> ,<size> // void parse_range( const char * const ptr, Block & range ) { long long value = 0; - int c = parse_long_long( ptr, value ); // pos - if( c && value >= 0 && value < INT64_MAX && - ( ptr[c] == 0 || ptr[c] == ',' || ptr[c] == '-' ) ) + const bool size_only = ( ptr[0] == ',' ); + int c = size_only ? 0 : parse_long_long( ptr, value ); // pos + if( size_only || ( c && value >= 0 && value < INT64_MAX && + ( ptr[c] == 0 || ptr[c] == ',' || ptr[c] == '-' ) ) ) { range.pos( value ); if( ptr[c] == 0 ) { range.size( INT64_MAX - value ); return; } @@ -321,6 +326,18 @@ int open_instream( const char * const name, struct stat * const in_statsp, namespace { +void set_a_outname( const std::string & name ) + { + output_filename = name; + if( name.size() > 5 && name.compare( name.size() - 5, 5, ".lzma" ) == 0 ) + output_filename.erase( name.size() - 2 ); + else if( name.size() > 4 && name.compare( name.size() - 4, 4, ".tlz" ) == 0 ) + output_filename.insert( name.size() - 2, "ar." ); + else if( name.size() <= 3 || name.compare( name.size() - 3, 3, ".lz" ) != 0 ) + output_filename += known_extensions[0].from; + } + + void set_d_outname( const std::string & name, const int i ) { if( i >= 0 ) @@ -339,18 +356,25 @@ void set_d_outname( const std::string & name, const int i ) program_name, name.c_str(), output_filename.c_str() ); } +} // end namespace -bool open_outstream( const bool force ) +bool open_outstream( const bool force, const bool from_stdin, + const bool rw, const bool skipping ) { - int flags = O_CREAT | O_WRONLY | O_BINARY; + const mode_t usr_rw = S_IRUSR | S_IWUSR; + const mode_t all_rw = usr_rw | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH; + const mode_t outfd_mode = from_stdin ? all_rw : usr_rw; + int flags = O_CREAT | ( rw ? O_RDWR : O_WRONLY ) | O_BINARY; if( force ) flags |= O_TRUNC; else flags |= O_EXCL; outfd = open( output_filename.c_str(), flags, outfd_mode ); - if( outfd < 0 && verbosity >= 0 ) + if( outfd >= 0 ) delete_output_on_interrupt = true; + else if( verbosity >= 0 ) { if( errno == EEXIST ) - std::fprintf( stderr, "%s: Output file '%s' already exists, skipping.\n", - program_name, output_filename.c_str() ); + std::fprintf( stderr, "%s: Output file '%s' already exists%s.\n", + program_name, output_filename.c_str(), skipping ? + ", skipping" : ". Use '--force' to overwrite it" ); else std::fprintf( stderr, "%s: Can't create output file '%s': %s\n", program_name, output_filename.c_str(), std::strerror( errno ) ); @@ -359,6 +383,37 @@ bool open_outstream( const bool force ) } +bool file_exists( const std::string & filename ) + { + struct stat st; + if( stat( filename.c_str(), &st ) == 0 ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Output file '%s' already exists." + " Use '--force' to overwrite it.\n", + program_name, filename.c_str() ); + return true; + } + return false; + } + + +bool check_tty( const int infd, const Mode program_mode ) + { + if( program_mode == m_alone_to_lz && isatty( outfd ) ) + { + show_error( "I won't write compressed data to a terminal.", 0, true ); + return false; + } + if( isatty( infd ) ) // all modes read compressed data + { + show_error( "I won't read compressed data from a terminal.", 0, true ); + return false; + } + return true; + } + + void cleanup_and_fail( const int retval ) { if( delete_output_on_interrupt ) @@ -374,6 +429,7 @@ void cleanup_and_fail( const int retval ) std::exit( retval ); } +namespace { // Set permissions, owner and times. void close_and_set_permissions( const struct stat * const in_statsp ) @@ -390,7 +446,11 @@ void close_and_set_permissions( const struct stat * const in_statsp ) fchmod( outfd, mode & ~( S_ISUID | S_ISGID | S_ISVTX ) ) != 0 ) warning = true; } - if( close( outfd ) != 0 ) cleanup_and_fail( 1 ); + if( close( outfd ) != 0 ) + { + show_error( "Error closing output file", errno ); + cleanup_and_fail( 1 ); + } outfd = -1; delete_output_on_interrupt = false; if( in_statsp ) @@ -405,19 +465,6 @@ void close_and_set_permissions( const struct stat * const in_statsp ) } -std::string insert_fixed( std::string name ) - { - if( name.size() > 7 && name.compare( name.size() - 7, 7, ".tar.lz" ) == 0 ) - name.insert( name.size() - 7, "_fixed" ); - else if( name.size() > 3 && name.compare( name.size() - 3, 3, ".lz" ) == 0 ) - name.insert( name.size() - 3, "_fixed" ); - else if( name.size() > 4 && name.compare( name.size() - 4, 4, ".tlz" ) == 0 ) - name.insert( name.size() - 4, "_fixed" ); - else name += "_fixed.lz"; - return name; - } - - unsigned char xdigit( const int value ) { if( value >= 0 && value <= 9 ) return '0' + value; @@ -475,7 +522,7 @@ int decompress( const int infd, const Pretty_print & pp, const int size = rdec.read_data( header.data, File_header::size ); if( rdec.finished() ) // End Of File { - if( first_member ) + if( first_member || header.verify_prefix( size ) ) { pp( "File ends unexpectedly at member header." ); retval = 2; } else if( size > 0 && !show_trailing_data( header.data, size, pp, true, ignore_trailing ) ) @@ -499,14 +546,13 @@ int decompress( const int infd, const Pretty_print & pp, retval = 2; break; } const unsigned dictionary_size = header.dictionary_size(); - if( dictionary_size < min_dictionary_size || - dictionary_size > max_dictionary_size ) + if( !isvalid_ds( dictionary_size ) ) { pp( "Invalid dictionary size in member header." ); retval = 2; break; } if( verbosity >= 2 || ( verbosity == 1 && first_member ) ) { pp(); show_header( dictionary_size ); } - LZ_decoder decoder( header, rdec, outfd ); + LZ_decoder decoder( rdec, dictionary_size, outfd ); const int result = decoder.decode_member( pp ); partial_file_pos += rdec.member_position(); if( result != 0 ) @@ -549,56 +595,42 @@ void set_signals() } // end namespace -bool file_exists( const std::string & filename ) +int close_outstream( const struct stat * const in_statsp ) { - struct stat st; - if( stat( filename.c_str(), &st ) == 0 ) - { - if( verbosity >= 0 ) - std::fprintf( stderr, "%s: Output file '%s' already exists." - " Use '--force' to overwrite it.\n", - program_name, filename.c_str() ); - return true; - } - return false; + if( delete_output_on_interrupt ) + close_and_set_permissions( in_statsp ); + if( outfd >= 0 && close( outfd ) != 0 ) + { show_error( "Can't close stdout", errno ); return 1; } + outfd = -1; + return 0; } -int open_outstream_rw( const std::string & output_filename, const bool force ) +std::string insert_fixed( std::string name ) { - int flags = O_CREAT | O_RDWR | O_BINARY; - if( force ) flags |= O_TRUNC; else flags |= O_EXCL; - - int outfd = open( output_filename.c_str(), flags, all_rw ); - if( outfd < 0 && verbosity >= 0 ) - { - if( errno == EEXIST ) - std::fprintf( stderr, "%s: Output file '%s' already exists." - " Use '--force' to overwrite it.\n", - program_name, output_filename.c_str() ); - else - std::fprintf( stderr, "%s: Can't create output file '%s': %s\n", - program_name, output_filename.c_str(), std::strerror( errno ) ); - } - return outfd; + if( name.size() > 7 && name.compare( name.size() - 7, 7, ".tar.lz" ) == 0 ) + name.insert( name.size() - 7, "_fixed" ); + else if( name.size() > 3 && name.compare( name.size() - 3, 3, ".lz" ) == 0 ) + name.insert( name.size() - 3, "_fixed" ); + else if( name.size() > 4 && name.compare( name.size() - 4, 4, ".tlz" ) == 0 ) + name.insert( name.size() - 4, "_fixed" ); + else name += "_fixed.lz"; + return name; } void show_error( const char * const msg, const int errcode, const bool help ) { - if( verbosity >= 0 ) + if( verbosity < 0 ) return; + if( msg && msg[0] ) { - if( msg && msg[0] ) - { - std::fprintf( stderr, "%s: %s", program_name, msg ); - if( errcode > 0 ) - std::fprintf( stderr, ": %s", std::strerror( errcode ) ); - std::fputc( '\n', stderr ); - } - if( help ) - std::fprintf( stderr, "Try '%s --help' for more information.\n", - invocation_name ); + std::fprintf( stderr, "%s: %s", program_name, msg ); + if( errcode > 0 ) std::fprintf( stderr, ": %s", std::strerror( errcode ) ); + std::fputc( '\n', stderr ); } + if( help ) + std::fprintf( stderr, "Try '%s --help' for more information.\n", + invocation_name ); } @@ -618,6 +650,15 @@ void show_error2( const char * const msg1, const char * const name, } +void show_error4( const char * const msg1, const char * const name1, + const char * const name2, const char * const msg2 ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: %s '%s' and '%s' %s\n", + program_name, msg1, name1, name2, msg2 ); + } + + int main( const int argc, const char * const argv[] ) { Block range( 0, 0 ); @@ -638,6 +679,7 @@ int main( const int argc, const char * const argv[] ) const Arg_parser::Option options[] = { { 'a', "trailing-error", Arg_parser::no }, + { 'A', "alone-to-lz", Arg_parser::no }, { 'c', "stdout", Arg_parser::no }, { 'd', "decompress", Arg_parser::no }, { 'D', "range-decompress", Arg_parser::yes }, @@ -655,10 +697,11 @@ int main( const int argc, const char * const argv[] ) { 't', "test", Arg_parser::no }, { 'v', "verbose", Arg_parser::no }, { 'V', "version", Arg_parser::no }, - { 'x', "show-packets", Arg_parser::maybe }, - { 'y', "debug-delay", Arg_parser::yes }, - { 'z', "debug-repair", Arg_parser::yes }, - { 0 , 0, Arg_parser::no } }; + { 'W', "debug-decompress", Arg_parser::yes }, + { 'X', "show-packets", Arg_parser::maybe }, + { 'Y', "debug-delay", Arg_parser::yes }, + { 'Z', "debug-repair", Arg_parser::yes }, + { 0 , 0, Arg_parser::no } }; const Arg_parser parser( argc, argv, options ); if( parser.error().size() ) // bad option @@ -670,13 +713,15 @@ int main( const int argc, const char * const argv[] ) const int code = parser.code( argind ); if( !code ) break; // no more options const std::string & arg = parser.argument( argind ); + const char * const ptr = arg.c_str(); switch( code ) { case 'a': ignore_trailing = false; break; + case 'A': set_mode( program_mode, m_alone_to_lz ); break; case 'c': to_stdout = true; break; case 'd': set_mode( program_mode, m_decompress ); break; case 'D': set_mode( program_mode, m_range_dec ); - parse_range( arg.c_str(), range ); break; + parse_range( ptr, range ); break; case 'f': force = true; break; case 'h': show_help(); return 0; case 'i': ignore_errors = true; break; @@ -691,13 +736,14 @@ int main( const int argc, const char * const argv[] ) case 't': set_mode( program_mode, m_test ); break; case 'v': if( verbosity < 4 ) ++verbosity; break; case 'V': show_version(); return 0; - case 'x': set_mode( program_mode, m_show_packets ); - if( arg.size() ) - parse_pos_value( arg.c_str(), bad_pos, bad_value ); break; - case 'y': set_mode( program_mode, m_debug_delay ); - parse_range( arg.c_str(), range ); break; - case 'z': set_mode( program_mode, m_debug_repair ); - parse_pos_value( arg.c_str(), bad_pos, bad_value ); break; + case 'W': set_mode( program_mode, m_debug_decompress ); + parse_pos_value( ptr, bad_pos, bad_value ); break; + case 'X': set_mode( program_mode, m_show_packets ); + if( ptr[0] ) parse_pos_value( ptr, bad_pos, bad_value ); break; + case 'Y': set_mode( program_mode, m_debug_delay ); + parse_range( ptr, range ); break; + case 'Z': set_mode( program_mode, m_debug_repair ); + parse_pos_value( ptr, bad_pos, bad_value ); break; default : internal_error( "uncaught option." ); } } // end process options @@ -724,6 +770,10 @@ int main( const int argc, const char * const argv[] ) switch( program_mode ) { case m_none: internal_error( "invalid operation." ); break; + case m_alone_to_lz: break; + case m_debug_decompress: + one_file( filenames.size() ); + return debug_decompress( filenames[0], bad_pos, verbosity, bad_value, false ); case m_debug_delay: one_file( filenames.size() ); return debug_delay( filenames[0], range, verbosity ); @@ -738,34 +788,35 @@ int main( const int argc, const char * const argv[] ) case m_merge: if( filenames.size() < 2 ) { show_error( "You must specify at least 2 files.", 0, true ); return 1; } - if( default_output_filename.empty() ) - default_output_filename = insert_fixed( filenames[0] ); + set_signals(); return merge_files( filenames, default_output_filename, verbosity, force ); case m_range_dec: one_file( filenames.size() ); + set_signals(); return range_decompress( filenames[0], default_output_filename, range, verbosity, force, ignore_errors, to_stdout ); case m_repair: one_file( filenames.size() ); - if( default_output_filename.empty() ) - default_output_filename = insert_fixed( filenames[0] ); + set_signals(); return repair_file( filenames[0], default_output_filename, verbosity, force ); case m_show_packets: one_file( filenames.size() ); - return debug_show_packets( filenames[0], bad_pos, verbosity, bad_value ); + return debug_decompress( filenames[0], bad_pos, verbosity, bad_value, true ); case m_split: one_file( filenames.size() ); + set_signals(); return split_file( filenames[0], default_output_filename, verbosity, force ); case m_test: break; } } - catch( std::bad_alloc ) { show_error( "Not enough memory." ); return 1; } - catch( Error e ) { show_error( e.msg, errno ); return 1; } + catch( std::bad_alloc ) + { show_error( "Not enough memory." ); cleanup_and_fail( 1 ); } + catch( Error e ) { show_error( e.msg, errno ); cleanup_and_fail( 1 ); } if( program_mode == m_test ) outfd = -1; - else if( program_mode != m_decompress ) + else if( program_mode != m_alone_to_lz && program_mode != m_decompress ) internal_error( "invalid decompressor operation." ); if( filenames.empty() ) filenames.push_back("-"); @@ -776,6 +827,7 @@ int main( const int argc, const char * const argv[] ) Pretty_print pp( filenames, verbosity ); int retval = 0; + bool stdin_used = false; for( unsigned i = 0; i < filenames.size(); ++i ) { struct stat in_stats; @@ -783,6 +835,7 @@ int main( const int argc, const char * const argv[] ) if( filenames[i].empty() || filenames[i] == "-" ) { + if( stdin_used ) continue; else stdin_used = true; input_filename.clear(); infd = STDIN_FILENO; if( program_mode != m_test ) @@ -792,8 +845,10 @@ int main( const int argc, const char * const argv[] ) else { output_filename = default_output_filename; - outfd_mode = all_rw; - if( !open_outstream( force ) ) + if( program_mode == m_alone_to_lz && + extension_index( default_output_filename ) < 0 ) + output_filename += known_extensions[0].from; + if( !open_outstream( force, true ) ) { if( retval < 1 ) retval = 1; close( infd ); infd = -1; @@ -813,9 +868,10 @@ int main( const int argc, const char * const argv[] ) if( to_stdout ) outfd = STDOUT_FILENO; else { - set_d_outname( input_filename, extension_index( input_filename ) ); - outfd_mode = usr_rw; - if( !open_outstream( force ) ) + if( program_mode == m_alone_to_lz ) + set_a_outname( input_filename ); + else set_d_outname( input_filename, extension_index( input_filename ) ); + if( !open_outstream( force, false ) ) { if( retval < 1 ) retval = 1; close( infd ); infd = -1; @@ -825,17 +881,19 @@ int main( const int argc, const char * const argv[] ) } } - if( isatty( infd ) ) + if( !check_tty( infd, program_mode ) ) { - show_error( "I won't read compressed data from a terminal.", 0, true ); - return 1; + if( retval < 1 ) retval = 1; + cleanup_and_fail( retval ); } - if( output_filename.size() && !to_stdout && program_mode != m_test ) - delete_output_on_interrupt = true; const struct stat * const in_statsp = input_filename.size() ? &in_stats : 0; pp.set_name( input_filename ); - const int tmp = decompress( infd, pp, ignore_trailing, program_mode == m_test ); + int tmp; + if( program_mode == m_alone_to_lz ) + tmp = alone_to_lz( infd, pp ); + else + tmp = decompress( infd, pp, ignore_trailing, program_mode == m_test ); if( tmp > retval ) retval = tmp; if( tmp && program_mode != m_test ) cleanup_and_fail( retval ); @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2015 Antonio Diaz Diaz. + Copyright (C) 2009-2016 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -17,6 +17,7 @@ #define _FILE_OFFSET_BITS 64 +#include <algorithm> #include <cerrno> #include <climits> #include <cstdio> @@ -36,6 +37,28 @@ namespace { +bool file_crc( uint32_t & crc, const int infd ) + { + const int buffer_size = 65536; + crc = 0xFFFFFFFFU; + uint8_t * const buffer = new uint8_t[buffer_size]; + bool error = false; + + while( true ) + { + const int rd = readblock( infd, buffer, buffer_size ); + if( rd != buffer_size && errno ) + { show_error( "Error reading input file", errno ); error = true; break; } + if( rd > 0 ) + crc32.update_buf( crc, buffer, rd ); + if( rd < buffer_size ) break; // EOF + } + delete[] buffer; + crc ^= 0xFFFFFFFFU; + return !error; + } + + // Add 'bv' to 'block_vector' splitting blocks as needed to keep all the // edges (pos and end of every block). // 'block_vector' contains the result. 'bv' is destroyed. @@ -171,37 +194,43 @@ long ipow( const unsigned base, const unsigned exponent ) int open_input_files( const std::vector< std::string > & filenames, std::vector< int > & infd_vector, - File_index & file_index, const int verbosity ) + File_index & file_index, struct stat * const in_statsp, + const int verbosity ) { const int files = filenames.size(); - bool identical = false; - for( int i = 1; i < files; ++i ) - if( filenames[0] == filenames[i] ) - { identical = true; break; } - if( !identical ) - for( int i = 0; i < files; ++i ) - { - struct stat in_stats; - ino_t st_ino0 = 0; - dev_t st_dev0 = 0; - infd_vector[i] = open_instream( filenames[i].c_str(), &in_stats, true, true ); - if( infd_vector[i] < 0 ) return 1; - if( i == 0 ) { st_ino0 = in_stats.st_ino; st_dev0 = in_stats.st_dev; } - else if( st_ino0 == in_stats.st_ino && st_dev0 == in_stats.st_dev ) - { identical = true; break; } - } - if( identical ) { show_error( "Two input files are the same." ); return 2; } + for( int i = 0; i + 1 < files; ++i ) + for( int j = i + 1; j < files; ++j ) + if( filenames[i] == filenames[j] ) + { show_error2( "Input file", filenames[i].c_str(), "given twice." ); + return 2; } + { + std::vector< uint32_t > crc_vector( files ); + for( int i = 0; i < files; ++i ) + { + struct stat in_stats; // not used + infd_vector[i] = open_instream( filenames[i].c_str(), + ( i == 0 ) ? in_statsp : &in_stats, true, true ); + if( infd_vector[i] < 0 ) return 1; + if( !file_crc( crc_vector[i], infd_vector[i] ) ) return 1; + for( int j = 0; j < i; ++j ) + if( crc_vector[i] == crc_vector[j] ) + { show_error4( "Input files", filenames[j].c_str(), + filenames[i].c_str(), "are identical." ); return 2; } + } + } long long isize = 0; + int good_fi = -1; for( int i = 0; i < files; ++i ) { long long tmp; const File_index fi( infd_vector[i] ); if( fi.retval() == 0 ) // file format is intact { - if( file_index.retval() != 0 ) file_index = fi; + if( good_fi < 0 ) { good_fi = i; file_index = fi; } else if( file_index != fi ) - { show_error( "Input files are different." ); return 2; } + { show_error4( "Input files", filenames[good_fi].c_str(), + filenames[i].c_str(), "are different." ); return 2; } tmp = file_index.file_size(); } else // file format is damaged @@ -213,15 +242,13 @@ int open_input_files( const std::vector< std::string > & filenames, return 1; } } - if( i == 0 ) - { - isize = tmp; - if( isize < min_member_size ) - { show_error2( "Input file", filenames[i].c_str(), "is too short." ); - return 2; } - } + if( tmp < min_member_size ) + { show_error2( "Input file", filenames[i].c_str(), "is too short." ); + return 2; } + if( i == 0 ) isize = tmp; else if( isize != tmp ) - { show_error( "Sizes of input files are different." ); return 2; } + { show_error4( "Sizes of input files", filenames[0].c_str(), + filenames[i].c_str(), "are different." ); return 2; } } if( file_index.retval() != 0 ) @@ -242,7 +269,7 @@ int open_input_files( const std::vector< std::string > & filenames, const long long mpos = file_index.mblock( j ).pos(); const long long msize = file_index.mblock( j ).size(); if( !safe_seek( infd, mpos ) ) return 1; - if( !try_decompress_member( infd, msize ) ) { error = true; break; } + if( !test_member_from_file( infd, msize ) ) { error = true; break; } } if( !error ) { @@ -261,8 +288,7 @@ bool try_merge_member( const long long mpos, const long long msize, const std::vector< Block > & block_vector, const std::vector< int > & color_vector, const std::vector< int > & infd_vector, - const std::string & output_filename, - const int outfd, const int verbosity ) + const int verbosity ) { const int blocks = block_vector.size(); const int files = infd_vector.size(); @@ -273,7 +299,7 @@ bool try_merge_member( const long long mpos, const long long msize, show_error( "Too many damaged blocks. Try merging fewer files." ); else show_error( "Too many damaged blocks. Merging is not possible." ); - cleanup_and_fail( output_filename, outfd, 2 ); + cleanup_and_fail( 2 ); } int bi = 0; // block index std::vector< int > file_idx( blocks, 0 ); // file to read each block from @@ -294,13 +320,13 @@ bool try_merge_member( const long long mpos, const long long msize, if( !safe_seek( infd, block_vector[bi].pos() ) || !safe_seek( outfd, block_vector[bi].pos() ) || !copy_file( infd, outfd, block_vector[bi].size() ) ) - cleanup_and_fail( output_filename, outfd, 1 ); + cleanup_and_fail( 1 ); ++bi; } if( !safe_seek( outfd, mpos ) ) - cleanup_and_fail( output_filename, outfd, 1 ); + cleanup_and_fail( 1 ); long long failure_pos = 0; - if( try_decompress_member( outfd, msize, &failure_pos ) ) return true; + if( test_member_from_file( outfd, msize, &failure_pos ) ) return true; while( bi > 0 && mpos + failure_pos < block_vector[bi-1].pos() ) --bi; while( --bi >= 0 ) { @@ -325,8 +351,7 @@ bool try_merge_member1( const long long mpos, const long long msize, const std::vector< Block > & block_vector, const std::vector< int > & color_vector, const std::vector< int > & infd_vector, - const std::string & output_filename, - const int outfd, const int verbosity ) + const int verbosity ) { if( block_vector.size() != 1 || block_vector[0].size() <= 1 ) return false; const long long pos = block_vector[0].pos(); @@ -344,7 +369,7 @@ bool try_merge_member1( const long long mpos, const long long msize, !safe_seek( infd_vector[i2], pos ) || !safe_seek( outfd, pos ) || !copy_file( infd_vector[i2], outfd, size ) ) - cleanup_and_fail( output_filename, outfd, 1 ); + cleanup_and_fail( 1 ); const int var = ( i1 * ( files - 1 ) ) + i2 - ( i2 > i1 ) + 1; for( long long i = 0; i < size; ++i ) { @@ -358,9 +383,9 @@ bool try_merge_member1( const long long mpos, const long long msize, readblock( infd, &byte, 1 ) != 1 || writeblock( outfd, &byte, 1 ) != 1 || !safe_seek( outfd, mpos ) ) - cleanup_and_fail( output_filename, outfd, 1 ); + cleanup_and_fail( 1 ); long long failure_pos = 0; - if( try_decompress_member( outfd, msize, &failure_pos ) ) return true; + if( test_member_from_file( outfd, msize, &failure_pos ) ) return true; if( mpos + failure_pos <= pos + i ) break; } } @@ -370,16 +395,6 @@ bool try_merge_member1( const long long mpos, const long long msize, } // end namespace -void cleanup_and_fail( const std::string & output_filename, - const int outfd, const int retval ) - { - if( outfd >= 0 ) close( outfd ); - if( std::remove( output_filename.c_str() ) != 0 && errno != ENOENT ) - show_error( "WARNING: deletion of output file (apparently) failed." ); - std::exit( retval ); - } - - // max_size < 0 means no size limit. bool copy_file( const int infd, const int outfd, const long long max_size ) { @@ -410,44 +425,45 @@ bool copy_file( const int infd, const int outfd, const long long max_size ) } -bool try_decompress_member( const int fd, const unsigned long long msize, - long long * failure_posp ) +bool test_member_from_file( const int infd, const unsigned long long msize, + long long * const failure_posp ) { - Range_decoder rdec( fd ); + Range_decoder rdec( infd ); File_header header; rdec.read_data( header.data, File_header::size ); - if( !rdec.finished() && // End Of File - header.verify_magic() && header.verify_version() && - header.dictionary_size() >= min_dictionary_size && - header.dictionary_size() <= max_dictionary_size ) + const unsigned dictionary_size = header.dictionary_size(); + if( !rdec.finished() && header.verify_magic() && + header.verify_version() && isvalid_ds( dictionary_size ) ) { - LZ_decoder decoder( header, rdec, -1 ); + LZ_decoder decoder( rdec, dictionary_size, -1 ); Pretty_print dummy( "", -1 ); if( decoder.decode_member( dummy ) == 0 && rdec.member_position() == msize ) return true; - if( failure_posp ) *failure_posp = rdec.member_position(); } + if( failure_posp ) *failure_posp = rdec.member_position(); return false; } int merge_files( const std::vector< std::string > & filenames, - const std::string & output_filename, const int verbosity, - const bool force ) + const std::string & default_output_filename, + const int verbosity, const bool force ) { const int files = filenames.size(); std::vector< int > infd_vector( files ); File_index file_index; + struct stat in_stats; const int retval = - open_input_files( filenames, infd_vector, file_index, verbosity ); + open_input_files( filenames, infd_vector, file_index, &in_stats, verbosity ); if( retval >= 0 ) return retval; if( !safe_seek( infd_vector[0], 0 ) ) return 1; - const int outfd = open_outstream_rw( output_filename, force ); - if( outfd < 0 ) return 1; + output_filename = default_output_filename.empty() ? + insert_fixed( filenames[0] ) : default_output_filename; + if( !open_outstream( force, false, true, false ) ) return 1; if( !copy_file( infd_vector[0], outfd ) ) // copy whole file - cleanup_and_fail( output_filename, outfd, 1 ); + cleanup_and_fail( 1 ); for( long j = 0; j < file_index.members(); ++j ) { @@ -458,14 +474,14 @@ int merge_files( const std::vector< std::string > & filenames, std::vector< int > color_vector( files, 0 ); if( !diff_member( mpos, msize, infd_vector, block_vector, color_vector ) || !safe_seek( outfd, mpos ) ) - cleanup_and_fail( output_filename, outfd, 1 ); + cleanup_and_fail( 1 ); if( block_vector.empty() ) { - if( file_index.members() > 1 && try_decompress_member( outfd, msize ) ) + if( file_index.members() > 1 && test_member_from_file( outfd, msize ) ) continue; show_error( "Input files are (partially) identical. Merging is not possible." ); - cleanup_and_fail( output_filename, outfd, 2 ); + cleanup_and_fail( 2 ); } if( verbosity >= 1 && file_index.members() > 1 ) @@ -479,12 +495,12 @@ int merge_files( const std::vector< std::string > & filenames, if( file_index.members() > 1 || block_vector.size() > 1 ) { done = try_merge_member( mpos, msize, block_vector, color_vector, - infd_vector, output_filename, outfd, verbosity ); + infd_vector, verbosity ); if( !done && verbosity >= 1 ) std::fputc( '\n', stdout ); } if( !done ) done = try_merge_member1( mpos, msize, block_vector, color_vector, - infd_vector, output_filename, outfd, verbosity ); + infd_vector, verbosity ); if( verbosity >= 1 ) std::fputc( '\n', stdout ); if( !done ) { @@ -493,15 +509,11 @@ int merge_files( const std::vector< std::string > & filenames, std::fprintf( stderr, "area %2d from position %6lld to %6lld\n", i + 1, block_vector[i].pos(), block_vector[i].end() - 1 ); show_error( "Some error areas overlap. Can't recover input file." ); - cleanup_and_fail( output_filename, outfd, 2 ); + cleanup_and_fail( 2 ); } } - if( close( outfd ) != 0 ) - { - show_error( "Error closing output file", errno ); - cleanup_and_fail( output_filename, -1, 1 ); - } + if( close_outstream( &in_stats ) != 0 ) return 1; if( verbosity >= 1 ) std::fputs( "Input files merged successfully.\n", stdout ); return 0; @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2015 Antonio Diaz Diaz. + Copyright (C) 2009-2016 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -56,7 +56,10 @@ void LZ_mtester::flush_data() { const int size = pos - stream_pos; crc32.update_buf( crc_, buffer + stream_pos, size ); - if( pos >= dictionary_size ) { partial_data_pos += pos; pos = 0; } + if( outfd >= 0 && writeblock( outfd, buffer + stream_pos, size ) != size ) + throw Error( "Write error" ); + if( pos >= dictionary_size ) + { partial_data_pos += pos; pos = 0; pos_wrapped = true; } stream_pos = pos; } } @@ -64,13 +67,12 @@ void LZ_mtester::flush_data() bool LZ_mtester::verify_trailer() { - const File_trailer * trailer = rdec.get_trailer(); + const File_trailer * const trailer = rdec.get_trailer(); if( !trailer ) return false; - return ( rdec.code_is_zero() && - trailer->data_crc() == crc() && + return ( trailer->data_crc() == crc() && trailer->data_size() == data_position() && - trailer->member_size() == (unsigned long)member_position() ); + trailer->member_size() == member_position() ); } @@ -101,7 +103,7 @@ void LZ_mtester::duplicate_buffer() /* Return value: 0 = OK, 1 = decoder error, 2 = unexpected EOF, 3 = trailer error, 4 = unknown marker found, -1 = pos_limit reached. */ -int LZ_mtester::test_member( const long pos_limit ) +int LZ_mtester::test_member( const unsigned long pos_limit ) { if( pos_limit < File_header::size + 5 ) return -1; if( member_position() == File_header::size ) rdec.load(); @@ -181,11 +183,12 @@ int LZ_mtester::test_member( const long pos_limit ) } return 4; } + if( rep0 > max_rep0 ) max_rep0 = rep0; } } rep3 = rep2; rep2 = rep1; rep1 = rep0_saved; state.set_match(); - if( rep0 >= dictionary_size || rep0 >= data_position() ) + if( rep0 >= dictionary_size || ( rep0 >= pos && !pos_wrapped ) ) { flush_data(); return 1; } } copy_block( rep0, len ); @@ -304,8 +307,13 @@ int LZ_mtester::debug_decode_member( const long long dpos, const long long mpos, if( show_packets ) std::fputs( "trailer error\n", stdout ); return 3; } + if( len == min_match_len + 1 ) // Sync Flush marker + { + rdec.load(); continue; + } return 4; } + if( rep0 > max_rep0 ) max_rep0 = rep0; } } rep3 = rep2; rep2 = rep1; rep1 = rep0_saved; @@ -313,7 +321,7 @@ int LZ_mtester::debug_decode_member( const long long dpos, const long long mpos, if( show_packets ) std::printf( "%6llu %6llu match %6u,%3d (%6lld)", mp, dp, rep0 + 1, len, dp - rep0 - 1 ); - if( rep0 >= dictionary_size || rep0 >= data_position() ) + if( rep0 >= dictionary_size || ( rep0 >= pos && !pos_wrapped ) ) { flush_data(); if( show_packets ) std::fputc( '\n', stdout ); return 1; } } @@ -343,19 +351,13 @@ uint8_t * read_member( const int infd, const long long mpos, const LZ_mtester * prepare_master( const uint8_t * const buffer, const long buffer_size, - const long pos_limit ) + const unsigned long pos_limit, + const unsigned dictionary_size ) { - const File_header & header = *(File_header *)buffer; - const unsigned dictionary_size = header.dictionary_size(); - if( header.verify_magic() && header.verify_version() && - dictionary_size >= min_dictionary_size && - dictionary_size <= max_dictionary_size ) - { - LZ_mtester * const master = - new LZ_mtester( buffer, buffer_size, dictionary_size ); - if( master->test_member( pos_limit ) == -1 ) return master; - delete master; - } + LZ_mtester * const master = + new LZ_mtester( buffer, buffer_size, dictionary_size ); + if( master->test_member( pos_limit ) == -1 ) return master; + delete master; return 0; } @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2015 Antonio Diaz Diaz. + Copyright (C) 2009-2016 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -37,27 +37,29 @@ public: at_stream_end( false ) {} - bool code_is_zero() const { return ( code == 0 ); } bool finished() { return pos >= buffer_size; } - long member_position() const { return pos; } + unsigned long member_position() const { return pos; } uint8_t get_byte() { - if( finished() ) return 0xAA; // make code != 0 + // 0xFF avoids decoder error if member is truncated at EOS marker + if( finished() ) return 0xFF; return buffer[pos++]; } const File_trailer * get_trailer() { if( buffer_size - pos < File_trailer::size ) return 0; - const File_trailer * const p = (File_trailer *)(buffer + pos); + const File_trailer * const p = (File_trailer *)( buffer + pos ); pos += File_trailer::size; return p; } void load() { + code = 0; for( int i = 0; i < 5; ++i ) code = (code << 8) | get_byte(); + range = 0xFFFFFFFFU; code &= range; // make sure that first byte is discarded } @@ -198,11 +200,14 @@ class LZ_mtester unsigned pos; // current pos in buffer unsigned stream_pos; // first byte not yet written to file uint32_t crc_; + const int outfd; // output file descriptor unsigned rep0; // rep[0-3] latest four distances unsigned rep1; // used for efficient coding of unsigned rep2; // repeated distances unsigned rep3; State state; + unsigned max_rep0; // maximum distance found + bool pos_wrapped; Bit_model bm_literal[1<<literal_context_bits][0x300]; Bit_model bm_match[State::states][pos_states]; @@ -267,7 +272,7 @@ class LZ_mtester public: LZ_mtester( const uint8_t * const ibuf, const long ibuf_size, - const int dict_size ) + const unsigned dict_size, const int ofd = -1 ) : partial_data_pos( 0 ), rdec( ibuf, ibuf_size ), @@ -276,10 +281,13 @@ public: pos( 0 ), stream_pos( 0 ), crc_( 0xFFFFFFFFU ), + outfd( ofd ), rep0( 0 ), rep1( 0 ), rep2( 0 ), - rep3( 0 ) + rep3( 0 ), + max_rep0( 0 ), + pos_wrapped( false ) { buffer[dictionary_size-1] = 0; } // prev_byte of first byte ~LZ_mtester() { delete[] buffer; } @@ -287,12 +295,13 @@ public: unsigned crc() const { return crc_ ^ 0xFFFFFFFFU; } unsigned long long data_position() const { return partial_data_pos + pos; } bool finished() { return rdec.finished(); } - long member_position() const { return rdec.member_position(); } + unsigned long member_position() const { return rdec.member_position(); } + unsigned max_distance() const { return max_rep0 + 1; } void duplicate_buffer(); - int test_member( const long pos_limit = LONG_MAX ); + int test_member( const unsigned long pos_limit = LONG_MAX ); // sets max_rep0 int debug_decode_member( const long long dpos, const long long mpos, - const bool show_packets ); + const bool show_packets ); // sets max_rep0 }; @@ -300,5 +309,6 @@ uint8_t * read_member( const int infd, const long long mpos, const long long msize ); const LZ_mtester * prepare_master( const uint8_t * const buffer, const long buffer_size, - const long pos_limit ); + const unsigned long pos_limit, + const unsigned dictionary_size ); bool test_member_rest( const LZ_mtester & master, long * const failure_posp = 0 ); diff --git a/range_dec.cc b/range_dec.cc index c6ccb7a..eeb542a 100644 --- a/range_dec.cc +++ b/range_dec.cc @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2015 Antonio Diaz Diaz. + Copyright (C) 2009-2016 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -36,50 +36,44 @@ namespace { -int decompress_member( const int infd, const int outfd, - const Pretty_print & pp, +int decompress_member( const int infd, const Pretty_print & pp, const unsigned long long mpos, const unsigned long long outskip, const unsigned long long outend ) { - try { - Range_decoder rdec( infd ); - File_header header; - rdec.read_data( header.data, File_header::size ); - if( rdec.finished() ) // End Of File - { pp( "File ends unexpectedly at member header." ); return 2; } - if( !verify_header( header, pp ) ) return 2; - const unsigned dictionary_size = header.dictionary_size(); - if( dictionary_size < min_dictionary_size || - dictionary_size > max_dictionary_size ) - { pp( "Invalid dictionary size in member header." ); return 2; } - - if( pp.verbosity() >= 2 ) { pp(); show_header( dictionary_size ); } - - LZ_decoder decoder( header, rdec, outfd, outskip, outend ); - const int result = decoder.decode_member( pp ); - if( result != 0 ) + Range_decoder rdec( infd ); + File_header header; + rdec.read_data( header.data, File_header::size ); + if( rdec.finished() ) // End Of File + { pp( "File ends unexpectedly at member header." ); return 2; } + if( !verify_header( header, pp ) ) return 2; + const unsigned dictionary_size = header.dictionary_size(); + if( !isvalid_ds( dictionary_size ) ) + { pp( "Invalid dictionary size in member header." ); return 2; } + + if( pp.verbosity() >= 2 ) { pp(); show_header( dictionary_size ); } + + LZ_decoder decoder( rdec, dictionary_size, outfd, outskip, outend ); + const int result = decoder.decode_member( pp ); + if( result != 0 ) + { + if( pp.verbosity() >= 0 && result <= 2 ) { - if( pp.verbosity() >= 0 && result <= 2 ) - { - pp(); - std::fprintf( stderr, "%s at pos %llu\n", ( result == 2 ) ? - "File ends unexpectedly" : "Decoder error", - mpos + rdec.member_position() ); - } - return 2; + pp(); + std::fprintf( stderr, "%s at pos %llu\n", ( result == 2 ) ? + "File ends unexpectedly" : "Decoder error", + mpos + rdec.member_position() ); } - if( pp.verbosity() >= 2 ) std::fputs( "done\n", stderr ); + return 2; } - catch( std::bad_alloc ) { pp( "Not enough memory." ); return 1; } - catch( Error e ) { pp(); show_error( e.msg, errno ); return 1; } + if( pp.verbosity() >= 2 ) std::fputs( "done\n", stderr ); return 0; } int list_file( const char * const input_filename, const Pretty_print & pp ) { - struct stat in_stats; + struct stat in_stats; // not used const int infd = open_instream( input_filename, &in_stats, true, true ); if( infd < 0 ) return 1; @@ -94,8 +88,8 @@ int list_file( const char * const input_filename, const Pretty_print & pp ) const unsigned long long file_size = file_index.file_end(); unsigned dictionary_size = 0; for( long i = 0; i < file_index.members(); ++i ) - if( dictionary_size < file_index.dictionary_size( i ) ) - dictionary_size = file_index.dictionary_size( i ); + dictionary_size = + std::max( dictionary_size, file_index.dictionary_size( i ) ); pp( 0, stdout ); show_header( dictionary_size, 1 ); if( data_size > 0 && file_size > 0 ) @@ -180,7 +174,7 @@ int list_files( const std::vector< std::string > & filenames, int range_decompress( const std::string & input_filename, - const std::string & output_filename, + const std::string & default_output_filename, Block range, const int verbosity, const bool force, const bool ignore, const bool to_stdout ) { @@ -209,12 +203,14 @@ int range_decompress( const std::string & input_filename, format_num( range.size() ) ); } - int outfd = -1; - if( to_stdout || output_filename.empty() ) + if( to_stdout || default_output_filename.empty() ) outfd = STDOUT_FILENO; else - { outfd = open_outstream_rw( output_filename, force ); - if( outfd < 0 ) return 1; } + { + output_filename = default_output_filename; + if( !open_outstream( force, false, false, false ) ) + { close( infd ); return 1; } + } int retval = 0; for( long i = 0; i < file_index.members(); ++i ) @@ -228,19 +224,15 @@ int range_decompress( const std::string & input_filename, const long long outend = std::min( db.size(), range.end() - db.pos() ); const long long mpos = file_index.mblock( i ).pos(); if( !safe_seek( infd, mpos ) ) { retval = 1; break; } - const int tmp = decompress_member( infd, outfd, pp, mpos, outskip, outend ); + const int tmp = decompress_member( infd, pp, mpos, outskip, outend ); if( tmp && ( tmp != 2 || !ignore ) ) - cleanup_and_fail( output_filename, outfd, tmp ); + cleanup_and_fail( tmp ); if( tmp > retval ) retval = tmp; pp.reset(); } } close( infd ); - if( close( outfd ) != 0 ) - { - show_error( "Error closing output file", errno ); - cleanup_and_fail( output_filename, -1, 1 ); - } + retval = std::max( retval, close_outstream( &in_stats ) ); if( verbosity >= 2 && retval == 0 ) std::fputs( "Byte range decompressed successfully.\n", stderr ); return retval; @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2015 Antonio Diaz Diaz. + Copyright (C) 2009-2016 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -17,6 +17,7 @@ #define _FILE_OFFSET_BITS 64 +#include <algorithm> #include <cerrno> #include <climits> #include <cstdio> @@ -45,8 +46,7 @@ bool gross_damage( const long long msize, const uint8_t * const mbuffer ) { const uint8_t byte = mbuffer[i]; int len = 0; // does not count the first byte - while( mbuffer[++i] == byte && ++len < maxlen ) {} - if( len >= maxlen ) return true; + while( mbuffer[++i] == byte ) if( ++len >= maxlen ) return true; } return false; } @@ -60,15 +60,55 @@ int seek_write( const int fd, const uint8_t * const buf, const int size, return 0; } + +// Return value: 0 = no change, 5 = repaired pos +int repair_dictionary_size( const long long msize, uint8_t * const mbuffer ) + { + enum { dictionary_size_9 = 1 << 25 }; // dictionary size of option -9 + File_header & header = *(File_header *)mbuffer; + unsigned dictionary_size = header.dictionary_size(); + File_trailer & trailer = + *(File_trailer *)( mbuffer + msize - File_trailer::size ); + const unsigned long long data_size = trailer.data_size(); + const bool valid_ds = isvalid_ds( dictionary_size ); + if( valid_ds && dictionary_size >= data_size ) return 0; // can't be bad + + if( !valid_ds || dictionary_size < dictionary_size_9 ) + { + dictionary_size = + std::min( data_size, (unsigned long long)dictionary_size_9 ); + if( dictionary_size < min_dictionary_size ) + dictionary_size = min_dictionary_size; + LZ_mtester mtester( mbuffer, msize, dictionary_size ); + const int result = mtester.test_member(); + if( result == 0 ) + { header.dictionary_size( dictionary_size ); return 5; } // fix DS + if( result != 1 || mtester.max_distance() <= dictionary_size || + mtester.max_distance() > max_dictionary_size ) return 0; + } + if( data_size > dictionary_size_9 ) + { + dictionary_size = + std::min( data_size, (unsigned long long)max_dictionary_size ); + LZ_mtester mtester( mbuffer, msize, dictionary_size ); + if( mtester.test_member() == 0 ) + { header.dictionary_size( dictionary_size ); return 5; } // fix DS + } + return 0; + } + + // Return value: -1 = master failed, 0 = begin reached, >0 = repaired pos long repair_member( const long long mpos, const long long msize, uint8_t * const mbuffer, const long begin, const long end, - const int verbosity ) + const unsigned dictionary_size, const int verbosity ) { for( long pos = end; pos >= begin && pos > end - 50000; ) { const long min_pos = std::max( begin, pos - 100 ); - const LZ_mtester * master = prepare_master( mbuffer, msize, min_pos - 16 ); + const unsigned long pos_limit = std::max( min_pos - 16, 0L ); + const LZ_mtester * master = + prepare_master( mbuffer, msize, pos_limit, dictionary_size ); if( !master ) return -1; for( ; pos >= min_pos; --pos ) { @@ -93,10 +133,10 @@ long repair_member( const long long mpos, const long long msize, int repair_file( const std::string & input_filename, - const std::string & output_filename, const int verbosity, - const bool force ) + const std::string & default_output_filename, + const int verbosity, const bool force ) { - struct stat in_stats; // not used + struct stat in_stats; const int infd = open_instream( input_filename.c_str(), &in_stats, true, true ); if( infd < 0 ) return 1; @@ -105,19 +145,21 @@ int repair_file( const std::string & input_filename, if( file_index.retval() != 0 ) { pp( file_index.error().c_str() ); return file_index.retval(); } + output_filename = default_output_filename.empty() ? + insert_fixed( input_filename ) : default_output_filename; if( !force && file_exists( output_filename ) ) return 1; - int outfd = -1; + outfd = -1; for( long i = 0; i < file_index.members(); ++i ) { const long long mpos = file_index.mblock( i ).pos(); const long long msize = file_index.mblock( i ).size(); if( !safe_seek( infd, mpos ) ) - cleanup_and_fail( output_filename, outfd, 1 ); + cleanup_and_fail( 1 ); long long failure_pos = 0; - if( try_decompress_member( infd, msize, &failure_pos ) ) continue; - if( failure_pos < File_header::size ) + if( test_member_from_file( infd, msize, &failure_pos ) ) continue; + if( failure_pos < File_header::size ) // End Of File { show_error( "Can't repair error in input file." ); - cleanup_and_fail( output_filename, outfd, 2 ); } + cleanup_and_fail( 2 ); } if( verbosity >= 1 ) // damaged member found { @@ -128,38 +170,41 @@ int repair_file( const std::string & input_filename, if( failure_pos >= msize - 8 ) failure_pos = msize - 8 - 1; uint8_t * const mbuffer = read_member( infd, mpos, msize ); if( !mbuffer ) - cleanup_and_fail( output_filename, outfd, 1 ); + cleanup_and_fail( 1 ); + const File_header & header = *(File_header *)mbuffer; + const unsigned dictionary_size = header.dictionary_size(); long pos = 0; if( !gross_damage( msize, mbuffer ) ) { - pos = repair_member( mpos, msize, mbuffer, File_header::size + 1, - File_header::size + 2, verbosity ); + pos = repair_dictionary_size( msize, mbuffer ); + if( pos == 0 ) + pos = repair_member( mpos, msize, mbuffer, File_header::size + 1, + File_header::size + 5, dictionary_size, verbosity ); if( pos == 0 ) - pos = repair_member( mpos, msize, mbuffer, File_header::size + 3, - failure_pos, verbosity ); + pos = repair_member( mpos, msize, mbuffer, File_header::size + 6, + failure_pos, dictionary_size, verbosity ); } if( pos < 0 ) - cleanup_and_fail( output_filename, outfd, 1 ); + cleanup_and_fail( 1 ); if( pos > 0 ) { if( outfd < 0 ) // first damaged member repaired { if( !safe_seek( infd, 0 ) ) return 1; - outfd = open_outstream_rw( output_filename, true ); - if( outfd < 0 ) { close( infd ); return 1; } + if( !open_outstream( true, false ) ) { close( infd ); return 1; } if( !copy_file( infd, outfd ) ) // copy whole file - cleanup_and_fail( output_filename, outfd, 1 ); + cleanup_and_fail( 1 ); } if( seek_write( outfd, mbuffer + pos, 1, mpos + pos ) != 1 ) { show_error( "Error writing output file", errno ); - cleanup_and_fail( output_filename, outfd, 1 ); } + cleanup_and_fail( 1 ); } } delete[] mbuffer; if( verbosity >= 1 ) std::fputc( '\n', stdout ); if( pos == 0 ) { show_error( "Can't repair input file. Error is probably larger than 1 byte." ); - cleanup_and_fail( output_filename, outfd, 2 ); + cleanup_and_fail( 2 ); } } @@ -169,11 +214,7 @@ int repair_file( const std::string & input_filename, std::fputs( "Input file has no errors. Recovery is not needed.\n", stdout ); return 0; } - if( close( outfd ) != 0 ) - { - show_error( "Error closing output file", errno ); - cleanup_and_fail( output_filename, -1, 1 ); - } + if( close_outstream( &in_stats ) != 0 ) return 1; if( verbosity >= 1 ) std::fputs( "Copy of input file repaired successfully.\n", stdout ); return 0; @@ -203,6 +244,7 @@ int debug_delay( const std::string & input_filename, Block range, if( !range.overlaps( mb ) ) continue; const long long mpos = file_index.mblock( i ).pos(); const long long msize = file_index.mblock( i ).size(); + const unsigned dictionary_size = file_index.dictionary_size( i ); if( verbosity >= 1 ) { std::printf( "Finding max delay in member %ld of %ld (mpos = %llu, msize = %llu)\n", @@ -216,7 +258,9 @@ int debug_delay( const std::string & input_filename, Block range, long max_delay = 0; while( pos < end ) { - const LZ_mtester * master = prepare_master( mbuffer, msize, pos - 16 ); + const unsigned long pos_limit = std::max( pos - 16, 0L ); + const LZ_mtester * master = + prepare_master( mbuffer, msize, pos_limit, dictionary_size ); if( !master ) { show_error( "Can't prepare master." ); return 1; } const long partial_end = std::min( pos + 100, end ); @@ -232,7 +276,7 @@ int debug_delay( const std::string & input_filename, Block range, { ++mbuffer[pos]; if( j == 255 ) break; - long failure_pos; + long failure_pos = 0; if( test_member_rest( *master, &failure_pos ) ) continue; const long delay = failure_pos - pos; if( delay > max_delay ) { max_delay = delay; value = mbuffer[pos]; } @@ -279,7 +323,7 @@ int debug_repair( const std::string & input_filename, const long long bad_pos, { long long failure_pos = 0; if( !safe_seek( infd, mpos ) ) return 1; - if( !try_decompress_member( infd, msize, &failure_pos ) ) + if( !test_member_from_file( infd, msize, &failure_pos ) ) { if( verbosity >= 0 ) std::printf( "Member %ld of %ld already damaged (failure pos = %llu)\n", @@ -289,22 +333,27 @@ int debug_repair( const std::string & input_filename, const long long bad_pos, } uint8_t * const mbuffer = read_member( infd, mpos, msize ); if( !mbuffer ) return 1; + const File_header & header = *(File_header *)mbuffer; + const unsigned dictionary_size = header.dictionary_size(); const uint8_t good_value = mbuffer[bad_pos-mpos]; mbuffer[bad_pos-mpos] = bad_value; long failure_pos = 0; - { - const LZ_mtester * master = prepare_master( mbuffer, msize, 0 ); - if( !master ) - { show_error( "Can't prepare master." ); delete[] mbuffer; return 1; } - if( test_member_rest( *master, &failure_pos ) ) + if( bad_pos != 5 || isvalid_ds( header.dictionary_size() ) ) { - if( verbosity >= 1 ) - std::fputs( "Member decompressed with no errors.\n", stdout ); + const LZ_mtester * master = + prepare_master( mbuffer, msize, 0, header.dictionary_size() ); + if( !master ) + { show_error( "Can't prepare master." ); delete[] mbuffer; return 1; } + if( test_member_rest( *master, &failure_pos ) ) + { + if( verbosity >= 1 ) + std::fputs( "Member decompressed with no errors.\n", stdout ); + delete master; + delete[] mbuffer; + return 0; + } delete master; - delete[] mbuffer; - return 0; } - delete master; if( verbosity >= 1 ) { std::printf( "Test repairing member %ld of %ld (mpos = %llu, msize = %llu)\n" @@ -313,13 +362,14 @@ int debug_repair( const std::string & input_filename, const long long bad_pos, bad_pos, good_value, bad_value, mpos + failure_pos ); std::fflush( stdout ); } - } if( failure_pos >= msize ) failure_pos = msize - 1; - long pos = repair_member( mpos, msize, mbuffer, File_header::size + 1, - File_header::size + 2, verbosity ); + long pos = repair_dictionary_size( msize, mbuffer ); if( pos == 0 ) - pos = repair_member( mpos, msize, mbuffer, File_header::size + 3, - failure_pos, verbosity ); + pos = repair_member( mpos, msize, mbuffer, File_header::size + 1, + File_header::size + 5, dictionary_size, verbosity ); + if( pos == 0 ) + pos = repair_member( mpos, msize, mbuffer, File_header::size + 6, + failure_pos, dictionary_size, verbosity ); delete[] mbuffer; if( pos < 0 ) { show_error( "Can't prepare master." ); return 1; } @@ -331,11 +381,11 @@ int debug_repair( const std::string & input_filename, const long long bad_pos, } -int debug_show_packets( const std::string & input_filename, - const long long bad_pos, const int verbosity, - const uint8_t bad_value ) +int debug_decompress( const std::string & input_filename, + const long long bad_pos, const int verbosity, + const uint8_t bad_value, const bool show_packets ) { - struct stat in_stats; // not used + struct stat in_stats; const int infd = open_instream( input_filename.c_str(), &in_stats, true, true ); if( infd < 0 ) return 1; @@ -344,45 +394,47 @@ int debug_show_packets( const std::string & input_filename, if( file_index.retval() != 0 ) { pp( file_index.error().c_str() ); return file_index.retval(); } + outfd = show_packets ? -1 : STDOUT_FILENO; int retval = 0; for( long i = 0; i < file_index.members(); ++i ) { const long long dpos = file_index.dblock( i ).pos(); const long long mpos = file_index.mblock( i ).pos(); const long long msize = file_index.mblock( i ).size(); - if( verbosity >= 1 ) + const unsigned dictionary_size = file_index.dictionary_size( i ); + if( verbosity >= 1 && show_packets ) std::printf( "Decoding LZMA packets in member %ld of %ld (mpos = %llu, msize = %llu)\n" " mpos dpos\n", i + 1, file_index.members(), mpos, msize ); + if( !isvalid_ds( dictionary_size ) ) + { show_error( "Invalid dictionary size in member header." ); + retval = 2; break; } uint8_t * const mbuffer = read_member( infd, mpos, msize ); - if( !mbuffer ) return 1; - const File_header & header = *(File_header *)mbuffer; - const unsigned dictionary_size = header.dictionary_size(); - if( !header.verify_magic() || !header.verify_version() || - dictionary_size < min_dictionary_size || - dictionary_size > max_dictionary_size ) - { show_error( "Header error." ); return 2; } + if( !mbuffer ) { retval = 1; break; } if( bad_pos >= 0 && file_index.mblock( i ).includes( bad_pos ) ) { - if( verbosity >= 1 ) + if( verbosity >= 1 && show_packets ) std::printf( "Byte at pos %llu changed from 0x%02X to 0x%02X\n", bad_pos, mbuffer[bad_pos-mpos], bad_value ); mbuffer[bad_pos-mpos] = bad_value; } - LZ_mtester mtester( mbuffer, msize, dictionary_size ); - const int result = mtester.debug_decode_member( dpos, mpos, true ); + LZ_mtester mtester( mbuffer, msize, dictionary_size, outfd ); + const int result = mtester.debug_decode_member( dpos, mpos, show_packets ); delete[] mbuffer; if( result != 0 ) { - if( verbosity >= 0 && result <= 2 ) + if( verbosity >= 0 && result <= 2 && show_packets ) std::printf( "%s at pos %llu\n", ( result == 2 ) ? "File ends unexpectedly" : "Decoder error", mpos + mtester.member_position() ); retval = 2; break; } - if( i + 1 < file_index.members() ) std::fputc( '\n', stdout ); + if( i + 1 < file_index.members() && show_packets ) + std::fputc( '\n', stdout ); } - if( verbosity >= 1 ) std::fputs( "Done.\n", stdout ); + retval = std::max( retval, close_outstream( &in_stats ) ); + if( verbosity >= 1 && show_packets && retval == 0 ) + std::fputs( "Done.\n", stdout ); return retval; } @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2015 Antonio Diaz Diaz. + Copyright (C) 2009-2016 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -17,6 +17,7 @@ #define _FILE_OFFSET_BITS 64 +#include <algorithm> #include <cerrno> #include <climits> #include <cstdio> @@ -36,12 +37,10 @@ namespace { void first_filename( const std::string & input_filename, const std::string & default_output_filename, - std::string & output_filename, const int max_digits ) + const int max_digits ) { - if( default_output_filename.size() ) - output_filename = default_output_filename; - else - output_filename = input_filename; + output_filename = default_output_filename.empty() ? + input_filename : default_output_filename; int b = output_filename.size(); while( b > 0 && output_filename[b-1] != '/' ) --b; output_filename.insert( b, "rec1" ); @@ -49,7 +48,7 @@ void first_filename( const std::string & input_filename, } -bool next_filename( std::string & output_filename, const int max_digits ) +bool next_filename( const int max_digits ) { int b = output_filename.size(); while( b > 0 && output_filename[b-1] != '/' ) --b; @@ -114,11 +113,9 @@ int do_split_file( const std::string & input_filename, uint8_t * & base_buffer, int max_digits = 1; for( long i = max_members; i >= 10; i /= 10 ) ++max_digits; - std::string output_filename; - first_filename( input_filename, default_output_filename, output_filename, - max_digits ); - int outfd = open_outstream_rw( output_filename, force ); - if( outfd < 0 ) { close( infd ); return 1; } + first_filename( input_filename, default_output_filename, max_digits ); + if( !open_outstream( force, false, false, false ) ) + { close( infd ); return 1; } unsigned long long partial_member_size = 0; while( true ) @@ -135,17 +132,16 @@ int do_split_file( const std::string & input_filename, uint8_t * & base_buffer, const int wr = writeblock( outfd, buffer + pos, newpos - pos ); if( wr != newpos - pos ) { show_error( "Write error", errno ); return 1; } - if( close( outfd ) != 0 ) - { show_error( "Error closing output file", errno ); return 1; } + if( close_outstream( &in_stats ) != 0 ) return 1; if( verbosity >= 1 ) { std::printf( "Member '%s' done \r", output_filename.c_str() ); std::fflush( stdout ); } - if( !next_filename( output_filename, max_digits ) ) + if( !next_filename( max_digits ) ) { show_error( "Too many members in file." ); close( infd ); return 1; } - outfd = open_outstream_rw( output_filename, force ); - if( outfd < 0 ) { close( infd ); return 1; } + if( !open_outstream( force, false, false, false ) ) + { close( infd ); return 1; } partial_member_size = 0; pos = newpos; } @@ -173,8 +169,7 @@ int do_split_file( const std::string & input_filename, uint8_t * & base_buffer, { show_error( "Read error", errno ); return 1; } } close( infd ); - if( close( outfd ) != 0 ) - { show_error( "Error closing output file", errno ); return 1; } + if( close_outstream( &in_stats ) != 0 ) return 1; if( verbosity >= 1 ) { std::printf( "Member '%s' done \n", output_filename.c_str() ); diff --git a/testsuite/check.sh b/testsuite/check.sh index 537f5f0..285d225 100755 --- a/testsuite/check.sh +++ b/testsuite/check.sh @@ -1,6 +1,6 @@ #! /bin/sh # check script for Lziprecover - Data recovery tool for the lzip format -# Copyright (C) 2009-2015 Antonio Diaz Diaz. +# Copyright (C) 2009-2016 Antonio Diaz Diaz. # # This script is free software: you have unlimited permission # to copy, distribute and modify it. @@ -18,20 +18,28 @@ if [ ! -f "${LZIP}" ] || [ ! -x "${LZIP}" ] ; then exit 1 fi +if [ -e "${LZIP}" ] 2> /dev/null ; then true +else + echo "$0: a POSIX shell is required to run the tests" + echo "Try bash -c \"$0 $1 $2\"" + exit 1 +fi + if [ -d tmp ] ; then rm -rf tmp ; fi mkdir tmp -cd "${objdir}"/tmp +cd "${objdir}"/tmp || framework_failure cat "${testdir}"/test.txt > in || framework_failure in_lz="${testdir}"/test.txt.lz +in_lzma="${testdir}"/test.txt.lzma inD="${testdir}"/test21723.txt -fox5_lz="${testdir}"/fox5.lz -f5b1="${testdir}"/fox5_bad1.txt -f5b1_lz="${testdir}"/fox5_bad1.lz -f5b2_lz="${testdir}"/fox5_bad2.lz -f5b3_lz="${testdir}"/fox5_bad3.lz -f5b4_lz="${testdir}"/fox5_bad4.lz -f5b5_lz="${testdir}"/fox5_bad5.lz +fox6_lz="${testdir}"/fox6.lz +f6b1="${testdir}"/fox6_bad1.txt +f6b1_lz="${testdir}"/fox6_bad1.lz +f6b2_lz="${testdir}"/fox6_bad2.lz +f6b3_lz="${testdir}"/fox6_bad3.lz +f6b4_lz="${testdir}"/fox6_bad4.lz +f6b5_lz="${testdir}"/fox6_bad5.lz bad1_lz="${testdir}"/test_bad1.lz bad2_lz="${testdir}"/test_bad2.lz bad3_lz="${testdir}"/test_bad3.lz @@ -40,15 +48,16 @@ bad5_lz="${testdir}"/test_bad5.lz fail=0 # Description of test files for lziprecover: -# fox5_bad1.lz: byte at offset 62 changed from 0x50 to 0x70 (CRC) -# byte at offset 144 changed from 0x2D to 0x2E (data_size) -# byte at offset 188 changed from 0x34 to 0x33 (mid stream) -# byte at offset 247 changed from 0x2A to 0x2B (first byte) -# byte at offset 378 changed from 0xA0 to 0x20 (EOS marker) -# fox5_bad2.lz: [ 30- 49] --> zeroed; -# fox5_bad3.lz: [100-299] --> zeroed; -# fox5_bad4.lz: [250-349] --> zeroed; -# fox5_bad5.lz: [300-399] --> zeroed; +# fox6_bad1.lz: byte at offset 5 changed from 0x0C to 0x00 (DS) +# byte at offset 142 changed from 0x50 to 0x70 (CRC) +# byte at offset 224 changed from 0x2D to 0x2E (data_size) +# byte at offset 268 changed from 0x34 to 0x33 (mid stream) +# byte at offset 327 changed from 0x2A to 0x2B (byte 7) +# byte at offset 458 changed from 0xA0 to 0x20 (EOS marker) +# fox6_bad2.lz: [110-129] --> zeroed; +# fox6_bad3.lz: [180-379] --> zeroed; +# fox6_bad4.lz: [330-429] --> zeroed; +# fox6_bad5.lz: [380-479] --> zeroed; # test_bad1.lz: byte at offset 66 changed from 0xA6 to 0x46 # test_bad2.lz: [ 34- 65] --> copy of bytes [ 68- 99] # test_bad3.lz: [ 512-1535] --> zeroed; [2560-3583] --> zeroed @@ -78,26 +87,88 @@ if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi "${LZIPRECOVER}" -sq if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi -"${LZIP}" -t "${in_lz}" || fail=1 +"${LZIPRECOVER}" -Aq in +if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi +"${LZIPRECOVER}" -Aq < in > copy.lz # /dev/null returns 1 on OS/2 +if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi +"${LZIPRECOVER}" -Aq < "${in_lz}" > copy.lz +if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi +"${LZIPRECOVER}" -Aq "${in_lz}" +if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi +"${LZIPRECOVER}" -Akq "${in_lzma}" +if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi +"${LZIPRECOVER}" -Ac "${in_lzma}" > copy.lz +if [ $? = 0 ] && cmp "${in_lz}" copy.lz ; then printf . ; else printf - ; fail=1 ; fi +"${LZIPRECOVER}" -A < "${in_lzma}" > copy.lz +if [ $? = 0 ] && cmp "${in_lz}" copy.lz ; then printf . ; else printf - ; fail=1 ; fi +rm -f copy.lz +cat "${in_lzma}" > copy.lzma || framework_failure +"${LZIPRECOVER}" -Ak copy.lzma +if [ $? = 0 ] && cmp "${in_lz}" copy.lz ; then printf . ; else printf - ; fail=1 ; fi +printf "to be overwritten" > copy.lz || framework_failure +"${LZIPRECOVER}" -Af copy.lzma +if [ $? = 0 ] && cmp "${in_lz}" copy.lz ; then printf . ; else printf - ; fail=1 ; fi +rm -f copy.lz +cat "${in_lzma}" > copy.tlz || framework_failure +"${LZIPRECOVER}" -Ak copy.tlz +if [ $? = 0 ] && cmp "${in_lz}" copy.tar.lz ; then printf . ; else printf - ; fail=1 ; fi +printf "to be overwritten" > copy.tar.lz || framework_failure +"${LZIPRECOVER}" -Af copy.tlz +if [ $? = 0 ] && cmp "${in_lz}" copy.tar.lz ; then printf . ; else printf - ; fail=1 ; fi +rm -f copy.tar.lz +cat "${in_lzma}" > anyothername || framework_failure +"${LZIPRECOVER}" -A -o copy - anyothername - < "${in_lzma}" +if [ $? = 0 ] && cmp "${in_lz}" copy.lz && cmp "${in_lz}" anyothername.lz ; then + printf . ; else printf - ; fail=1 ; fi +rm -f copy.lz anyothername.lz + +printf "\ntesting decompression..." + +"${LZIP}" -t "${in_lz}" +if [ $? = 0 ] ; then printf . ; else printf - ; fail=1 ; fi "${LZIP}" -cd "${in_lz}" > copy || fail=1 cmp in copy || fail=1 printf . +rm -f copy cat "${in_lz}" > copy.lz || framework_failure -printf "to be overwritten" > copy || framework_failure -"${LZIP}" -df copy.lz || fail=1 +"${LZIP}" -dk copy.lz || fail=1 cmp in copy || fail=1 -printf . +printf "to be overwritten" > copy || framework_failure +"${LZIP}" -dq copy.lz +if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi +"${LZIP}" -df copy.lz +if [ $? = 0 ] && [ ! -e copy.lz ] && cmp in copy ; then + printf . ; else printf - ; fail=1 ; fi printf "to be overwritten" > copy || framework_failure "${LZIP}" -df -o copy < "${in_lz}" || fail=1 cmp in copy || fail=1 printf . +rm -f copy cat "${in_lz}" > anyothername || framework_failure -"${LZIP}" -d anyothername || fail=1 -cmp in anyothername.out || fail=1 -printf . +"${LZIP}" -d -o copy - anyothername - < "${in_lz}" +if [ $? = 0 ] && cmp in copy && cmp in anyothername.out ; then + printf . ; else printf - ; fail=1 ; fi +rm -f copy anyothername.out + +"${LZIP}" -tq in "${in_lz}" +if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi +"${LZIP}" -tq foo.lz "${in_lz}" +if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi +"${LZIP}" -cdq in "${in_lz}" > copy +if [ $? = 2 ] && cat copy in | cmp in - ; then printf . ; else printf - ; fail=1 ; fi +"${LZIP}" -cdq foo.lz "${in_lz}" > copy +if [ $? = 1 ] && cmp in copy ; then printf . ; else printf - ; fail=1 ; fi +rm -f copy +cat "${in_lz}" > copy.lz || framework_failure +"${LZIP}" -dq in copy.lz +if [ $? = 2 ] && [ -e copy.lz ] && [ ! -e copy ] && [ ! -e in.out ] ; then + printf . ; else printf - ; fail=1 ; fi +"${LZIP}" -dq foo.lz copy.lz +if [ $? = 1 ] && [ ! -e copy.lz ] && [ ! -e foo ] && cmp in copy ; then + printf . ; else printf - ; fail=1 ; fi cat in in > in2 || framework_failure cat "${in_lz}" "${in_lz}" > copy2.lz || framework_failure @@ -121,67 +192,84 @@ printf "to be overwritten" > copy2 || framework_failure cmp in2 copy2 || fail=1 printf . +"${LZIPRECOVER}" -D ,18000 "${in_lz}" > copy || fail=1 +"${LZIPRECOVER}" -D 18000 "${in_lz}" >> copy || fail=1 +cmp in copy || fail=1 +printf . "${LZIPRECOVER}" -D 21723-22120 -fo copy "${in_lz}" || fail=1 cmp "${inD}" copy || fail=1 "${LZIPRECOVER}" -D 21723,397 "${in_lz}" > copy || fail=1 cmp "${inD}" copy || fail=1 printf . -"${LZIPRECOVER}" -D0 -iq "${f5b1_lz}" -fo copy -if [ $? = 2 ] && cmp "${f5b1}" copy ; then printf . ; else printf - ; fail=1 ; fi -"${LZIPRECOVER}" -D0 -iq "${f5b1_lz}" > copy -if [ $? = 2 ] && cmp "${f5b1}" copy ; then printf . ; else printf - ; fail=1 ; fi +"${LZIPRECOVER}" -D0 -iq "${f6b1_lz}" -fo copy +if [ $? = 2 ] && cmp "${f6b1}" copy ; then printf . ; else printf - ; fail=1 ; fi +"${LZIPRECOVER}" -D0 -iq "${f6b1_lz}" > copy +if [ $? = 2 ] && cmp "${f6b1}" copy ; then printf . ; else printf - ; fail=1 ; fi -printf "\ntesting --merge ..." +printf "\ntesting --merge..." rm -f copy.lz -"${LZIPRECOVER}" -m -o copy.lz "${fox5_lz}" "${f5b1_lz}" +"${LZIPRECOVER}" -m -o copy.lz "${fox6_lz}" "${f6b1_lz}" if [ $? = 0 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi -"${LZIPRECOVER}" -m -o copy.lz "${f5b1_lz}" "${fox5_lz}" +"${LZIPRECOVER}" -m -o copy.lz "${f6b1_lz}" "${fox6_lz}" if [ $? = 0 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi "${LZIPRECOVER}" -m -o copy.lz "${bad1_lz}" "${bad2_lz}" "${bad1_lz}" -q if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi -"${LZIPRECOVER}" -m -o copy.lz "${f5b1_lz}" "${f5b5_lz}" -q +"${LZIPRECOVER}" -m -o copy.lz "${bad1_lz}" "${bad2_lz}" "${bad2_lz}" -q +if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi +cat "${bad2_lz}" > bad2.lz || framework_failure +"${LZIPRECOVER}" -m -o copy.lz "${bad1_lz}" "${bad2_lz}" bad2.lz -q +if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi +rm -f bad2.lz +"${LZIPRECOVER}" -m -o copy.lz "${f6b1_lz}" "${f6b5_lz}" -q if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi -"${LZIPRECOVER}" -m -o copy.lz "${f5b3_lz}" "${f5b5_lz}" -q +"${LZIPRECOVER}" -m -o copy.lz "${f6b3_lz}" "${f6b5_lz}" -q if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi "${LZIPRECOVER}" -m -o copy.lz "${bad3_lz}" "${bad4_lz}" -q if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi -for i in "${f5b1_lz}" "${f5b3_lz}" "${f5b4_lz}" "${f5b5_lz}" ; do - "${LZIPRECOVER}" -mf -o copy.lz "${f5b2_lz}" "${i}" || fail=1 - cmp "${fox5_lz}" copy.lz || fail=1 - "${LZIPRECOVER}" -mf -o copy.lz "${i}" "${f5b2_lz}" || fail=1 - cmp "${fox5_lz}" copy.lz || fail=1 +"${LZIPRECOVER}" -mf -o copy.lz "${f6b1_lz}" "${f6b4_lz}" || fail=1 +cmp "${fox6_lz}" copy.lz || fail=1 +printf . +"${LZIPRECOVER}" -mf -o copy.lz "${f6b4_lz}" "${f6b1_lz}" || fail=1 +cmp "${fox6_lz}" copy.lz || fail=1 +printf . + +for i in "${f6b1_lz}" "${f6b3_lz}" "${f6b4_lz}" "${f6b5_lz}" ; do + "${LZIPRECOVER}" -mf -o copy.lz "${f6b2_lz}" "${i}" || fail=1 + cmp "${fox6_lz}" copy.lz || fail=1 + "${LZIPRECOVER}" -mf -o copy.lz "${i}" "${f6b2_lz}" || fail=1 + cmp "${fox6_lz}" copy.lz || fail=1 printf . done -for i in "${f5b3_lz}" "${f5b4_lz}" "${f5b5_lz}" ; do - "${LZIPRECOVER}" -mf -o copy.lz "${f5b1_lz}" "${f5b2_lz}" "${i}" || fail=1 - cmp "${fox5_lz}" copy.lz || fail=1 - "${LZIPRECOVER}" -mf -o copy.lz "${f5b1_lz}" "${i}" "${f5b2_lz}" || fail=1 - cmp "${fox5_lz}" copy.lz || fail=1 - "${LZIPRECOVER}" -mf -o copy.lz "${f5b2_lz}" "${f5b1_lz}" "${i}" || fail=1 - cmp "${fox5_lz}" copy.lz || fail=1 - "${LZIPRECOVER}" -mf -o copy.lz "${f5b2_lz}" "${i}" "${f5b1_lz}" || fail=1 - cmp "${fox5_lz}" copy.lz || fail=1 - "${LZIPRECOVER}" -mf -o copy.lz "${i}" "${f5b1_lz}" "${f5b2_lz}" || fail=1 - cmp "${fox5_lz}" copy.lz || fail=1 - "${LZIPRECOVER}" -mf -o copy.lz "${i}" "${f5b2_lz}" "${f5b1_lz}" || fail=1 - cmp "${fox5_lz}" copy.lz || fail=1 +for i in "${f6b3_lz}" "${f6b4_lz}" "${f6b5_lz}" ; do + "${LZIPRECOVER}" -mf -o copy.lz "${f6b1_lz}" "${f6b2_lz}" "${i}" || fail=1 + cmp "${fox6_lz}" copy.lz || fail=1 + "${LZIPRECOVER}" -mf -o copy.lz "${f6b1_lz}" "${i}" "${f6b2_lz}" || fail=1 + cmp "${fox6_lz}" copy.lz || fail=1 + "${LZIPRECOVER}" -mf -o copy.lz "${f6b2_lz}" "${f6b1_lz}" "${i}" || fail=1 + cmp "${fox6_lz}" copy.lz || fail=1 + "${LZIPRECOVER}" -mf -o copy.lz "${f6b2_lz}" "${i}" "${f6b1_lz}" || fail=1 + cmp "${fox6_lz}" copy.lz || fail=1 + "${LZIPRECOVER}" -mf -o copy.lz "${i}" "${f6b1_lz}" "${f6b2_lz}" || fail=1 + cmp "${fox6_lz}" copy.lz || fail=1 + "${LZIPRECOVER}" -mf -o copy.lz "${i}" "${f6b2_lz}" "${f6b1_lz}" || fail=1 + cmp "${fox6_lz}" copy.lz || fail=1 printf . done -"${LZIPRECOVER}" -mf -o copy.lz "${f5b3_lz}" "${f5b4_lz}" "${f5b5_lz}" || fail=1 -cmp "${fox5_lz}" copy.lz || fail=1 +"${LZIPRECOVER}" -mf -o copy.lz "${f6b3_lz}" "${f6b4_lz}" "${f6b5_lz}" || fail=1 +cmp "${fox6_lz}" copy.lz || fail=1 printf . -"${LZIPRECOVER}" -mf -o copy.lz "${f5b1_lz}" "${f5b3_lz}" "${f5b4_lz}" "${f5b5_lz}" || fail=1 -cmp "${fox5_lz}" copy.lz || fail=1 +"${LZIPRECOVER}" -mf -o copy.lz "${f6b1_lz}" "${f6b3_lz}" "${f6b4_lz}" "${f6b5_lz}" || fail=1 +cmp "${fox6_lz}" copy.lz || fail=1 printf . -"${LZIPRECOVER}" -mf -o copy.lz "${f5b2_lz}" "${f5b3_lz}" "${f5b4_lz}" "${f5b5_lz}" || fail=1 -cmp "${fox5_lz}" copy.lz || fail=1 +"${LZIPRECOVER}" -mf -o copy.lz "${f6b2_lz}" "${f6b3_lz}" "${f6b4_lz}" "${f6b5_lz}" || fail=1 +cmp "${fox6_lz}" copy.lz || fail=1 printf . -"${LZIPRECOVER}" -mf -o copy.lz "${f5b1_lz}" "${f5b2_lz}" "${f5b3_lz}" "${f5b4_lz}" "${f5b5_lz}" || fail=1 -cmp "${fox5_lz}" copy.lz || fail=1 +"${LZIPRECOVER}" -mf -o copy.lz "${f6b1_lz}" "${f6b2_lz}" "${f6b3_lz}" "${f6b4_lz}" "${f6b5_lz}" || fail=1 +cmp "${fox6_lz}" copy.lz || fail=1 printf . "${LZIPRECOVER}" -mf -o copy.lz "${bad1_lz}" "${bad2_lz}" || fail=1 @@ -266,10 +354,10 @@ printf . cmp out4.lz copy4.lz || fail=1 printf . -printf "\ntesting --repair ..." +printf "\ntesting --repair..." rm -f copy.lz -"${LZIPRECOVER}" -R -o copy.lz "${fox5_lz}" || fail=1 +"${LZIPRECOVER}" -R -o copy.lz "${fox6_lz}" || fail=1 if [ $? = 0 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi "${LZIPRECOVER}" -R -o copy.lz "${bad2_lz}" -q if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi @@ -277,14 +365,14 @@ if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi "${LZIPRECOVER}" -R -o copy.lz "${bad4_lz}" -q if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi -"${LZIPRECOVER}" -Rf -o copy.lz "${f5b1_lz}" || fail=1 -cmp "${fox5_lz}" copy.lz || fail=1 +"${LZIPRECOVER}" -Rf -o copy.lz "${f6b1_lz}" || fail=1 +cmp "${fox6_lz}" copy.lz || fail=1 printf . "${LZIPRECOVER}" -Rf -o copy.lz "${bad1_lz}" || fail=1 cmp "${in_lz}" copy.lz || fail=1 printf . -cat "${f5b1_lz}" > copy.tar.lz || framework_failure +cat "${f6b1_lz}" > copy.tar.lz || framework_failure "${LZIPRECOVER}" -R copy.tar.lz || fail=1 if [ $? = 0 ] && [ -e copy_fixed.tar.lz ] ; then printf . ; else printf - ; fail=1 ; fi mv copy.tar.lz copy.lz || framework_failure @@ -294,7 +382,7 @@ mv copy.lz copy.tlz || framework_failure "${LZIPRECOVER}" -R copy.tlz || fail=1 if [ $? = 0 ] && [ -e copy_fixed.tlz ] ; then printf . ; else printf - ; fail=1 ; fi -printf "\ntesting --split ..." +printf "\ntesting --split..." cat "${in_lz}" "${in_lz}" "${in_lz}" > copy || framework_failure printf "garbage" >> copy || fail=1 diff --git a/testsuite/fox5_bad1.lz b/testsuite/fox5_bad1.lz Binary files differdeleted file mode 100644 index a3b5658..0000000 --- a/testsuite/fox5_bad1.lz +++ /dev/null diff --git a/testsuite/fox5_bad3.lz b/testsuite/fox5_bad3.lz Binary files differdeleted file mode 100644 index ef58e47..0000000 --- a/testsuite/fox5_bad3.lz +++ /dev/null diff --git a/testsuite/fox5_bad4.lz b/testsuite/fox5_bad4.lz Binary files differdeleted file mode 100644 index 0474bb9..0000000 --- a/testsuite/fox5_bad4.lz +++ /dev/null diff --git a/testsuite/fox5.lz b/testsuite/fox6.lz Binary files differindex 3472f64..8401b99 100644 --- a/testsuite/fox5.lz +++ b/testsuite/fox6.lz diff --git a/testsuite/fox6_bad1.lz b/testsuite/fox6_bad1.lz Binary files differnew file mode 100644 index 0000000..4e0d8fd --- /dev/null +++ b/testsuite/fox6_bad1.lz diff --git a/testsuite/fox5_bad1.txt b/testsuite/fox6_bad1.txt index 14e5367..14e5367 100644 --- a/testsuite/fox5_bad1.txt +++ b/testsuite/fox6_bad1.txt diff --git a/testsuite/fox6_bad2.lz b/testsuite/fox6_bad2.lz Binary files differnew file mode 100644 index 0000000..bf8a04a --- /dev/null +++ b/testsuite/fox6_bad2.lz diff --git a/testsuite/fox6_bad3.lz b/testsuite/fox6_bad3.lz Binary files differnew file mode 100644 index 0000000..2d3cff2 --- /dev/null +++ b/testsuite/fox6_bad3.lz diff --git a/testsuite/fox5_bad2.lz b/testsuite/fox6_bad4.lz Binary files differindex 9993ea7..e931d7d 100644 --- a/testsuite/fox5_bad2.lz +++ b/testsuite/fox6_bad4.lz diff --git a/testsuite/fox5_bad5.lz b/testsuite/fox6_bad5.lz Binary files differindex 6ec2740..95f44f3 100644 --- a/testsuite/fox5_bad5.lz +++ b/testsuite/fox6_bad5.lz diff --git a/testsuite/test.txt.lz b/testsuite/test.txt.lz Binary files differindex 41d2e39..22cea6e 100644 --- a/testsuite/test.txt.lz +++ b/testsuite/test.txt.lz diff --git a/testsuite/test.txt.lzma b/testsuite/test.txt.lzma Binary files differnew file mode 100644 index 0000000..53e54ea --- /dev/null +++ b/testsuite/test.txt.lzma diff --git a/testsuite/test_bad1.lz b/testsuite/test_bad1.lz Binary files differindex d63dcbf..16762ca 100644 --- a/testsuite/test_bad1.lz +++ b/testsuite/test_bad1.lz diff --git a/testsuite/test_bad2.lz b/testsuite/test_bad2.lz Binary files differindex 4842785..e013c34 100644 --- a/testsuite/test_bad2.lz +++ b/testsuite/test_bad2.lz diff --git a/testsuite/test_bad3.lz b/testsuite/test_bad3.lz Binary files differindex b231ee0..0ae9e7d 100644 --- a/testsuite/test_bad3.lz +++ b/testsuite/test_bad3.lz diff --git a/testsuite/test_bad4.lz b/testsuite/test_bad4.lz Binary files differindex 2aab2a4..361df5e 100644 --- a/testsuite/test_bad4.lz +++ b/testsuite/test_bad4.lz diff --git a/testsuite/test_bad5.lz b/testsuite/test_bad5.lz Binary files differindex 2e577e6..1ed1566 100644 --- a/testsuite/test_bad5.lz +++ b/testsuite/test_bad5.lz diff --git a/unzcrash.cc b/unzcrash.cc index 50262b9..3970638 100644 --- a/unzcrash.cc +++ b/unzcrash.cc @@ -1,6 +1,6 @@ /* Unzcrash - Tests robustness of decompressors to corrupted data. Inspired by unzcrash.c from Julian Seward's bzip2. - Copyright (C) 2008-2015 Antonio Diaz Diaz. + Copyright (C) 2008-2016 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -22,6 +22,7 @@ (eg, bug) which caused unzcrash to panic. */ +#include <algorithm> #include <cerrno> #include <climits> #include <csignal> @@ -44,7 +45,7 @@ namespace { const char * const Program_name = "Unzcrash"; const char * const program_name = "unzcrash"; -const char * const program_year = "2015"; +const char * const program_year = "2016"; const char * invocation_name = 0; int verbosity = 0; @@ -58,15 +59,28 @@ void show_help() "it, increasing 256 times each byte of the compressed data, so as to test\n" "all possible one-byte errors. This should not cause any invalid memory\n" "accesses. If it does, please, report it as a bug.\n" + "\nIf the decompressor returns with zero status, unzcrash compares the\n" + "output of the decompressor for the original and corrupt files. If the\n" + "outputs differ, it means that the decompressor failed to recognize the\n" + "corruption and produced garbage output. Please, report it as a bug.\n" + "\nIn order to compare the outputs, unzcrash needs a zcmp program able to\n" + "understand the format being tested. For example the one provided by zutils.\n" + "Use '--zcmp=false' to disable comparisons.\n" "\nOptions:\n" - " -h, --help display this help and exit\n" - " -V, --version output version information and exit\n" - " -b, --bits=<range> test N-bit errors instead of full byte\n" - " -p, --position=<bytes> first byte position to test [default 0]\n" - " -q, --quiet suppress all messages\n" - " -s, --size=<bytes> number of byte positions to test [all]\n" - " -v, --verbose be verbose (a 2nd -v gives more)\n" + " -h, --help display this help and exit\n" + " -V, --version output version information and exit\n" + " -b, --bits=<range> test N-bit errors instead of full byte\n" + " -B, --block[=<size>][,<val>] test blocks of given size [512,0]\n" + " -d, --delta=<n> test one of every n bytes/blocks/truncations\n" + " -p, --position=<bytes> first byte position to test [default 0]\n" + " -q, --quiet suppress all messages\n" + " -s, --size=<bytes> number of byte positions to test [all]\n" + " -t, --truncate test decompression of truncated file\n" + " -v, --verbose be verbose (a 2nd -v gives more)\n" + " -z, --zcmp=<command> set zcmp command name and options [zcmp]\n" "Examples of <range>: 1 1,2,3 1-4 1,3-5,8 1-3,5-8\n" + "A negative position is relative to the end of file.\n" + "A negative size is relative to the rest of the file.\n" "\nExit status: 0 for a normal exit, 1 for environmental problems (file\n" "not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n" "invalid input file, 3 for an internal consistency error (eg, bug) which\n" @@ -89,19 +103,16 @@ void show_version() void show_error( const char * const msg, const int errcode = 0, const bool help = false ) { - if( verbosity >= 0 ) + if( verbosity < 0 ) return; + if( msg && msg[0] ) { - if( msg && msg[0] ) - { - std::fprintf( stderr, "%s: %s", program_name, msg ); - if( errcode > 0 ) - std::fprintf( stderr, ": %s", std::strerror( errcode ) ); - std::fputc( '\n', stderr ); - } - if( help ) - std::fprintf( stderr, "Try '%s --help' for more information.\n", - invocation_name ); + std::fprintf( stderr, "%s: %s", program_name, msg ); + if( errcode > 0 ) std::fprintf( stderr, ": %s", std::strerror( errcode ) ); + std::fputc( '\n', stderr ); } + if( help ) + std::fprintf( stderr, "Try '%s --help' for more information.\n", + invocation_name ); } @@ -113,13 +124,12 @@ void internal_error( const char * const msg ) } -unsigned long long getnum( const char * const ptr, - const unsigned long long llimit, - const unsigned long long ulimit ) +long getnum( const char * const ptr, const long llimit, const long ulimit, + const bool comma = false ) { char * tail; errno = 0; - unsigned long long result = strtoull( ptr, &tail, 0 ); + long result = strtol( ptr, &tail, 0 ); if( tail == ptr ) { show_error( "Bad or missing numerical argument.", 0, true ); @@ -129,11 +139,10 @@ unsigned long long getnum( const char * const ptr, if( !errno && tail[0] ) { const int factor = ( tail[1] == 'i' ) ? 1024 : 1000; - int exponent = 0; - bool bad_multiplier = false; + int exponent = -1; // -1 = bad multiplier switch( tail[0] ) { - case ' ': break; + case ',': if( comma ) exponent = 0; break; case 'Y': exponent = 8; break; case 'Z': exponent = 7; break; case 'E': exponent = 6; break; @@ -141,20 +150,17 @@ unsigned long long getnum( const char * const ptr, case 'T': exponent = 4; break; case 'G': exponent = 3; break; case 'M': exponent = 2; break; - case 'K': if( factor == 1024 ) exponent = 1; else bad_multiplier = true; - break; - case 'k': if( factor == 1000 ) exponent = 1; else bad_multiplier = true; - break; - default : bad_multiplier = true; + case 'K': if( factor == 1024 ) exponent = 1; break; + case 'k': if( factor == 1000 ) exponent = 1; break; } - if( bad_multiplier ) + if( exponent < 0 ) { show_error( "Bad multiplier in numerical argument.", 0, true ); std::exit( 1 ); } for( int i = 0; i < exponent; ++i ) { - if( ulimit / factor >= result ) result *= factor; + if( LONG_MAX / factor >= std::labs( result ) ) result *= factor; else { errno = ERANGE; break; } } } @@ -168,6 +174,65 @@ unsigned long long getnum( const char * const ptr, } +void parse_block( const char * const ptr, long & size, uint8_t & value ) + { + const char * const ptr2 = std::strchr( ptr, ',' ); + + if( !ptr2 || ptr2 != ptr ) + size = getnum( ptr, 1, INT_MAX, true ); + if( ptr2 ) + value = getnum( ptr2 + 1, 0, 255 ); + } + + +/* Returns the address of a malloc'd buffer containing the file data and + its size in '*size'. + In case of error, returns 0 and does not modify '*size'. +*/ +uint8_t * read_file( const char * const name, long * const size ) + { + FILE * const f = std::fopen( name, "rb" ); + if( !f ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Can't open input file '%s': %s\n", + program_name, name, std::strerror( errno ) ); + return 0; + } + + long buffer_size = 1 << 20; + uint8_t * buffer = (uint8_t *)std::malloc( buffer_size ); + if( !buffer ) { show_error( "Not enough memory." ); return 0; } + long file_size = std::fread( buffer, 1, buffer_size, f ); + while( file_size >= buffer_size ) + { + if( buffer_size >= LONG_MAX ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Input file '%s' is too large.\n", + program_name, name ); + std::free( buffer ); return 0; + } + buffer_size = ( buffer_size <= LONG_MAX / 2 ) ? 2 * buffer_size : LONG_MAX; + uint8_t * const tmp = (uint8_t *)std::realloc( buffer, buffer_size ); + if( !tmp ) + { show_error( "Not enough memory." ); std::free( buffer ); return 0; } + buffer = tmp; + file_size += std::fread( buffer + file_size, 1, buffer_size - file_size, f ); + } + if( std::ferror( f ) || !std::feof( f ) ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Error reading file '%s': %s\n", + program_name, name, std::strerror( errno ) ); + std::free( buffer ); return 0; + } + std::fclose( f ); + *size = file_size; + return buffer; + } + + class Bitset8 // 8 value bitset (1 to 8) { bool data[8]; @@ -241,21 +306,31 @@ int differing_bits( const uint8_t byte1, const uint8_t byte2 ) int main( const int argc, const char * const argv[] ) { - enum { buffer_size = 75 << 20 }; + enum Mode { m_block, m_byte, m_truncate }; + const char * mode_str[3] = { "block", "byte", "size" }; Bitset8 bits; // if Bitset8::parse not called test full byte - int pos = 0; - int max_size = buffer_size; + const char * zcmp_program = "zcmp"; + long pos = 0; + long max_size = LONG_MAX; + long delta = 1; + long block_size = 512; + Mode program_mode = m_byte; + uint8_t block_value = 0; invocation_name = argv[0]; const Arg_parser::Option options[] = { { 'h', "help", Arg_parser::no }, { 'b', "bits", Arg_parser::yes }, + { 'B', "block", Arg_parser::maybe }, + { 'd', "delta", Arg_parser::yes }, { 'p', "position", Arg_parser::yes }, { 'q', "quiet", Arg_parser::no }, { 's', "size", Arg_parser::yes }, + { 't', "truncate", Arg_parser::no }, { 'v', "verbose", Arg_parser::no }, { 'V', "version", Arg_parser::no }, + { 'z', "zcmp", Arg_parser::yes }, { 0 , 0, Arg_parser::no } }; const Arg_parser parser( argc, argv, options ); @@ -271,12 +346,17 @@ int main( const int argc, const char * const argv[] ) switch( code ) { case 'h': show_help(); return 0; - case 'b': if( !bits.parse( arg ) ) return 1; break; - case 'p': pos = getnum( arg, 0, buffer_size - 1 ); break; + case 'b': if( !bits.parse( arg ) ) return 1; program_mode = m_byte; break; + case 'B': if( arg[0] ) parse_block( arg, block_size, block_value ); + program_mode = m_block; break; + case 'd': delta = getnum( arg, 1, INT_MAX ); break; + case 'p': pos = getnum( arg, -LONG_MAX, LONG_MAX ); break; case 'q': verbosity = -1; break; - case 's': max_size = getnum( arg, 1, buffer_size ); break; + case 's': max_size = getnum( arg, -LONG_MAX, LONG_MAX ); break; + case 't': program_mode = m_truncate; break; case 'v': if( verbosity < 4 ) ++verbosity; break; case 'V': show_version(); return 0; + case 'z': zcmp_program = arg; break; default : internal_error( "uncaught option." ); } } // end process options @@ -289,67 +369,178 @@ int main( const int argc, const char * const argv[] ) return 1; } - FILE *f = std::fopen( parser.argument( argind + 1 ).c_str(), "rb" ); + const char * const filename = parser.argument( argind + 1 ).c_str(); + long file_size = 0; + uint8_t * const buffer = read_file( filename, &file_size ); + if( !buffer ) return 1; + const char * const command = parser.argument( argind ).c_str(); + char zcmp_command[1024] = { 0 }; + if( std::strcmp( zcmp_program, "false" ) != 0 ) + snprintf( zcmp_command, sizeof zcmp_command, "%s '%s' -", + zcmp_program, filename ); + + // verify original file + if( verbosity >= 1 ) fprintf( stderr, "Testing file '%s'\n", filename ); + FILE * f = popen( command, "w" ); if( !f ) + { show_error( "Can't open pipe to decompressor", errno ); return 1; } + if( (long)std::fwrite( buffer, 1, file_size, f ) != file_size ) + { show_error( "Can't write to decompressor", errno ); return 1; } + if( pclose( f ) != 0 ) { if( verbosity >= 0 ) - std::fprintf( stderr, "Can't open file '%s' for reading.\n", - parser.argument( argind + 1 ).c_str() ); + std::fprintf( stderr, "%s: Can't run '%s'.\n", program_name, command ); return 1; } - - uint8_t * const buffer = new uint8_t[buffer_size]; - const int size = std::fread( buffer, 1, buffer_size, f ); - if( size >= buffer_size ) + if( zcmp_command[0] ) { - if( verbosity >= 0 ) - std::fprintf( stderr, "input file '%s' is too large.\n", - parser.argument( argind + 1 ).c_str() ); - return 2; - } - std::fclose( f ); - - f = popen( parser.argument( argind ).c_str(), "w" ); - if( !f ) - { show_error( "Can't open pipe", errno ); return 1; } - const int wr = std::fwrite( buffer, 1, size, f ); - if( wr != size || pclose( f ) != 0 ) - { - if( verbosity >= 0 ) - std::fprintf( stderr, "Could not run '%s': %s\n", - parser.argument( argind ).c_str(), std::strerror( errno ) ); - return 1; + f = popen( zcmp_command, "w" ); + if( !f ) + { show_error( "Can't open pipe to zcmp command", errno ); return 1; } + if( (long)std::fwrite( buffer, 1, file_size, f ) != file_size ) + { show_error( "Can't write to zcmp command", errno ); return 1; } + if( pclose( f ) != 0 ) + { + show_error( "zcmp command failed. Skipping comparison" ); + zcmp_command[0] = 0; + } } std::signal( SIGPIPE, SIG_IGN ); - if( verbosity >= 1 ) bits.print(); - const int end = ( ( pos + max_size < size ) ? pos + max_size : size ); - for( int i = pos; i < end; ++i ) + if( pos < 0 ) pos = std::max( 0L, file_size + pos ); + if( pos >= file_size || max_size == 0 || + ( max_size < 0 && -max_size >= file_size - pos ) ) + { show_error( "Nothing to do; domain is empty." ); return 0; } + if( max_size < 0 ) max_size += file_size - pos; + const long end = ( ( max_size < file_size - pos ) ? pos + max_size : file_size ); + long positions = 0, decompressions = 0, successes = 0, failed_comparisons = 0; + if( program_mode == m_truncate ) + for( long i = pos; i < end; i += std::min( delta, end - i ) ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "length %ld\n", i ); + ++positions; ++decompressions; + f = popen( command, "w" ); + if( !f ) { show_error( "Can't open pipe", errno ); return 1; } + std::fwrite( buffer, 1, i, f ); + if( pclose( f ) == 0 ) + { + ++successes; + if( verbosity >= 0 ) + std::fputs( "passed the test\n", stderr ); + if( zcmp_command[0] ) + { + f = popen( zcmp_command, "w" ); + if( !f ) { show_error( "Can't open pipe", errno ); return 1; } + std::fwrite( buffer, 1, i, f ); + if( pclose( f ) != 0 ) + { + ++failed_comparisons; + if( verbosity >= 0 ) + std::fprintf( stderr, "byte %ld comparison failed\n", i ); + } + } + } + } + else if( program_mode == m_block ) { - if( verbosity >= 0 ) - std::fprintf( stderr, "byte %d\n", i ); - const uint8_t byte = buffer[i]; - for( int j = 1; j < 256; ++j ) + uint8_t * block = (uint8_t *)std::malloc( block_size ); + if( !block ) { show_error( "Not enough memory." ); return 1; } + for( long i = pos; i < end; i += std::min( block_size * delta, end - i ) ) + { + const long size = std::min( block_size, file_size - i ); + if( verbosity >= 0 ) + std::fprintf( stderr, "block %ld,%ld\n", i, size ); + ++positions; ++decompressions; + f = popen( command, "w" ); + if( !f ) { show_error( "Can't open pipe", errno ); return 1; } + std::memcpy( block , buffer + i, size ); + std::memset( buffer + i, block_value, size ); + std::fwrite( buffer, 1, file_size, f ); + if( pclose( f ) == 0 ) + { + ++successes; + if( verbosity >= 0 ) + std::fputs( "passed the test\n", stderr ); + if( zcmp_command[0] ) + { + f = popen( zcmp_command, "w" ); + if( !f ) { show_error( "Can't open pipe", errno ); return 1; } + std::fwrite( buffer, 1, file_size, f ); + if( pclose( f ) != 0 ) + { + ++failed_comparisons; + if( verbosity >= 0 ) + std::fprintf( stderr, "block %ld,%ld comparison failed\n", i, size ); + } + } + } + std::memcpy( buffer + i, block, size ); + } + std::free( block ); + } + else + { + if( verbosity >= 1 ) bits.print(); + for( long i = pos; i < end; i += std::min( delta, end - i ) ) { - ++buffer[i]; - if( bits.includes( differing_bits( byte, buffer[i] ) ) ) + if( verbosity >= 0 ) + std::fprintf( stderr, "byte %ld\n", i ); + ++positions; + const uint8_t byte = buffer[i]; + for( int j = 1; j < 256; ++j ) { - if( verbosity >= 2 ) - std::fprintf( stderr, "0x%02X (0x%02X+0x%02X) ", - buffer[i], byte, j ); - f = popen( parser.argument( argind ).c_str(), "w" ); - if( !f ) - { show_error( "Can't open pipe", errno ); return 1; } - std::fwrite( buffer, 1, size, f ); - if( pclose( f ) == 0 && verbosity >= 0 ) - std::fprintf( stderr, "0x%02X (0x%02X+0x%02X) passed the test\n", - buffer[i], byte, j ); + ++buffer[i]; + if( bits.includes( differing_bits( byte, buffer[i] ) ) ) + { + ++decompressions; + if( verbosity >= 2 ) + std::fprintf( stderr, "0x%02X (0x%02X+0x%02X) ", + buffer[i], byte, j ); + f = popen( command, "w" ); + if( !f ) { show_error( "Can't open pipe", errno ); return 1; } + std::fwrite( buffer, 1, file_size, f ); + if( pclose( f ) == 0 ) + { + ++successes; + if( verbosity >= 0 ) + std::fprintf( stderr, "0x%02X (0x%02X+0x%02X) passed the test\n", + buffer[i], byte, j ); + if( zcmp_command[0] ) + { + f = popen( zcmp_command, "w" ); + if( !f ) { show_error( "Can't open pipe", errno ); return 1; } + std::fwrite( buffer, 1, file_size, f ); + if( pclose( f ) != 0 ) + { + ++failed_comparisons; + if( verbosity >= 0 ) + std::fprintf( stderr, "byte %ld comparison failed\n", i ); + } + } + } + } } + buffer[i] = byte; + } + } + + if( verbosity >= 0 ) + { + std::fprintf( stderr, "\n%8ld %ss tested\n%8ld total decompressions" + "\n%8ld decompressions returned with zero status", + positions, mode_str[program_mode], decompressions, successes ); + if( successes > 0 ) + { + if( zcmp_command[0] ) + std::fprintf( stderr, ", of which\n%8ld comparisons failed\n", + failed_comparisons ); + else std::fprintf( stderr, "\n comparisons disabled\n" ); } - buffer[i] = byte; + else std::fputc( '\n', stderr ); } - delete[] buffer; + std::free( buffer ); return 0; } |