diff options
Diffstat (limited to '')
-rw-r--r-- | ChangeLog | 42 | ||||
-rw-r--r-- | INSTALL | 19 | ||||
-rw-r--r-- | Makefile.in | 41 | ||||
-rw-r--r-- | NEWS | 70 | ||||
-rw-r--r-- | README | 39 | ||||
-rw-r--r-- | alone_to_lz.cc | 48 | ||||
-rw-r--r-- | arg_parser.cc | 2 | ||||
-rw-r--r-- | arg_parser.h | 2 | ||||
-rw-r--r-- | block.cc | 33 | ||||
-rw-r--r-- | block.h | 62 | ||||
-rwxr-xr-x | configure | 16 | ||||
-rw-r--r-- | decoder.cc | 126 | ||||
-rw-r--r-- | decoder.h | 2 | ||||
-rw-r--r-- | doc/lziprecover.1 | 41 | ||||
-rw-r--r-- | doc/lziprecover.info | 306 | ||||
-rw-r--r-- | doc/lziprecover.texi | 273 | ||||
-rw-r--r-- | dump_remove.cc | 288 | ||||
-rw-r--r-- | list.cc | 45 | ||||
-rw-r--r-- | lzip.h | 148 | ||||
-rw-r--r-- | lzip_index.cc (renamed from file_index.cc) | 184 | ||||
-rw-r--r-- | lzip_index.h (renamed from file_index.h) | 36 | ||||
-rw-r--r-- | main.cc | 275 | ||||
-rw-r--r-- | main_common.cc | 15 | ||||
-rw-r--r-- | merge.cc | 185 | ||||
-rw-r--r-- | mtester.cc | 342 | ||||
-rw-r--r-- | mtester.h | 125 | ||||
-rw-r--r-- | range_dec.cc | 62 | ||||
-rw-r--r-- | repair.cc | 214 | ||||
-rw-r--r-- | split.cc | 194 | ||||
-rwxr-xr-x | testsuite/check.sh | 977 | ||||
-rw-r--r-- | testsuite/fox.lz | bin | 0 -> 80 bytes | |||
-rw-r--r-- | testsuite/fox6_bad6.lz | bin | 0 -> 480 bytes | |||
-rw-r--r-- | testsuite/fox6_sc1.lz | bin | 0 -> 480 bytes | |||
-rw-r--r-- | testsuite/fox6_sc2.lz | bin | 0 -> 500 bytes | |||
-rw-r--r-- | testsuite/fox6_sc3.lz | bin | 0 -> 500 bytes | |||
-rw-r--r-- | testsuite/fox6_sc4.lz | bin | 0 -> 500 bytes | |||
-rw-r--r-- | testsuite/fox6_sc5.lz | bin | 0 -> 500 bytes | |||
-rw-r--r-- | testsuite/fox6_sc6.lz | bin | 0 -> 500 bytes | |||
-rw-r--r-- | testsuite/numbers.lz | bin | 0 -> 369 bytes | |||
-rw-r--r-- | testsuite/numbersbt.lz | bin | 0 -> 392 bytes | |||
-rw-r--r-- | trailing_data.cc | 144 | ||||
-rw-r--r-- | unzcrash.cc | 13 |
42 files changed, 2818 insertions, 1551 deletions
@@ -1,11 +1,35 @@ +2019-01-04 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 1.21 released. + * File_* renamed to Lzip_*. + * Added new options '--dump', '--remove' and '--strip'. They + replace '--dump-tdata', '--remove-tdata' and '--strip-tdata', + which are now aliases and will be removed in version 1.22. + * lzip.h (Lzip_trailer): New function 'verify_consistency'. + * lzip_index.cc: Lzip_index now detects gaps between members, + some kinds of corrupt trailers and + some fake trailers embedded in trailing data. + * split.cc: Use Lzip_index to split members, gaps and trailing data. + * split.cc: Verify last member before writing anything. + * list.cc (list_files): With '-i', ignore format errors, show gaps. + * range_dec.cc: With '-i', ignore a truncated last member. + * main.cc (main): Check return value of close( infd ). + * Improve and add new diagnostic messages. + * Print '\n' instead of '\r' if !isatty( 1 ) in merge, repair. + * main.cc: Compile on DOS with DJGPP. + * lziprecover.texi: Added chapter 'Tarlz'. + * configure: Accept appending to CXXFLAGS, 'CXXFLAGS+=OPTIONS'. + * INSTALL: Document use of CXXFLAGS+='-D __USE_MINGW_ANSI_STDIO'. + * Added new test files fox.lz fox6_sc[1-6].lz. + 2018-02-12 Antonio Diaz Diaz <antonio@gnu.org> * Version 1.20 released. * split.cc: Fixed splitting of files > 64 KiB broken since 1.16. - * main.cc: Added new option '--dump-tdata'. - * main.cc: Added new option '--remove-tdata'. - * main.cc: Added new option '--strip-tdata'. - * main.cc: Added new option '--loose-trailing'. + * Added new option '--dump-tdata'. + * Added new option '--remove-tdata'. + * Added new option '--strip-tdata'. + * Added new option '--loose-trailing'. * Improved corrupt header detection to HD=3. * main.cc: Show corrupt or truncated header in multimember file. * Replaced 'bits/byte' with inverse compression ratio in output. @@ -23,8 +47,8 @@ * The output of option '-l, --list' has been simplified. * main.cc: Continue testing if any input file is a terminal. * main.cc: Show trailing data in both hexadecimal and ASCII. - * file_index.cc: Improve detection of bad dict and trailing data. - * file_index.cc: Skip trailing data more efficiently. + * lzip_index.cc: Improve detection of bad dict and trailing data. + * lzip_index.cc: Skip trailing data more efficiently. * lzip.h: Unified messages for bad magic, trailing data, etc. * New struct Bad_byte allows delta and flip modes for bad_value. * unzcrash.cc: Added new option '-e, --set-byte'. @@ -32,7 +56,7 @@ 2016-05-12 Antonio Diaz Diaz <antonio@gnu.org> * Version 1.18 released. - * main.cc: Added new option '-a, --trailing-error'. + * Added new option '-a, --trailing-error'. * merge.cc (open_input_files): Use CRC to test identical files. * repair.cc (repair_file): Detect gross damage before repairing. * repair.cc: Repair a damaged dictionary size in the header. @@ -58,7 +82,7 @@ * unzcrash.cc: Read files as large as RAM allows. * unzcrash.cc: Compare output using zcmp if decompressor returns 0. * unzcrash.cc: Accept negative position and size. - * lzip.texi: Added chapter 'Trailing data'. + * lziprecover.texi: Added chapter 'Trailing data'. * configure: Avoid warning on some shells when testing for g++. * Makefile.in: Detect the existence of install-info. * check.sh: Don't check error messages. @@ -168,7 +192,7 @@ * unzcrash.cc: Test all 1-byte errors. -Copyright (C) 2009-2018 Antonio Diaz Diaz. +Copyright (C) 2009-2019 Antonio Diaz Diaz. This file is a collection of facts, and thus it is not copyrightable, but just in case, you have unlimited permission to copy, distribute and @@ -1,14 +1,19 @@ Requirements ------------ You will need a C++ compiler. -I use gcc 5.3.0 and 4.1.2, but the code should compile with any -standards compliant compiler. +I use gcc 5.3.0 and 4.1.2, but the code should compile with any standards +compliant compiler. Gcc is available at http://gcc.gnu.org. -Unzcrash needs a zcmp program able to understand the format being -tested. For example the zcmp program provided by zutils. +Unzcrash needs a zcmp program able to understand the format being tested. +For example the zcmp program provided by zutils. Zutils is available at http://www.nongnu.org/zutils/zutils.html +The operating system must allow signal handlers read access to objects with +static storage duration so that the cleanup handler for Control-C can delete +the partial output file. + + Procedure --------- 1. Unpack the archive if you have not done so already: @@ -26,6 +31,10 @@ the main archive. cd lziprecover[version] ./configure + If you are compiling on MinGW, use: + + ./configure CXXFLAGS+='-D __USE_MINGW_ANSI_STDIO' + 3. Run make. make @@ -65,7 +74,7 @@ After running 'configure', you can run 'make' and 'make install' as explained above. -Copyright (C) 2009-2018 Antonio Diaz Diaz. +Copyright (C) 2009-2019 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute and modify it. diff --git a/Makefile.in b/Makefile.in index 76c06cc..48c8448 100644 --- a/Makefile.in +++ b/Makefile.in @@ -7,8 +7,8 @@ INSTALL_DIR = $(INSTALL) -d -m 755 SHELL = /bin/sh CAN_RUN_INSTALLINFO = $(SHELL) -c "install-info --version" > /dev/null 2>&1 -objs = arg_parser.o alone_to_lz.o block.o file_index.o list.o merge.o \ - mtester.o range_dec.o repair.o split.o trailing_data.o decoder.o main.o +objs = arg_parser.o alone_to_lz.o lzip_index.o list.o dump_remove.o \ + merge.o mtester.o range_dec.o repair.o split.o decoder.o main.o unzobjs = arg_parser.o unzcrash.o @@ -36,21 +36,20 @@ unzcrash.o : unzcrash.cc %.o : %.cc $(CXX) $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $< -$(objs) : Makefile -alone_to_lz.o : lzip.h mtester.h -arg_parser.o : arg_parser.h -block.o : block.h -decoder.o : lzip.h decoder.h -file_index.o : lzip.h block.h file_index.h -list.o : lzip.h block.h file_index.h -main.o : arg_parser.h lzip.h decoder.h block.h main_common.cc -merge.o : lzip.h decoder.h block.h file_index.h -mtester.o : lzip.h mtester.h -range_dec.o : lzip.h decoder.h block.h file_index.h -repair.o : lzip.h mtester.h block.h file_index.h -split.o : lzip.h block.h file_index.h -trailing_data.o : lzip.h block.h file_index.h -unzcrash.o : Makefile arg_parser.h main_common.cc +$(objs) : Makefile +alone_to_lz.o : lzip.h mtester.h +arg_parser.o : arg_parser.h +decoder.o : lzip.h decoder.h +dump_remove.o : lzip.h lzip_index.h +list.o : lzip.h lzip_index.h +lzip_index.o : lzip.h lzip_index.h +main.o : arg_parser.h lzip.h decoder.h main_common.cc +merge.o : lzip.h decoder.h lzip_index.h +mtester.o : lzip.h mtester.h +range_dec.o : lzip.h decoder.h lzip_index.h +repair.o : lzip.h mtester.h lzip_index.h +split.o : lzip.h lzip_index.h +unzcrash.o : Makefile arg_parser.h main_common.cc doc : info man @@ -137,11 +136,15 @@ dist : doc $(DISTNAME)/*.h \ $(DISTNAME)/*.cc \ $(DISTNAME)/testsuite/check.sh \ - $(DISTNAME)/testsuite/fox6.lz \ - $(DISTNAME)/testsuite/fox6_bad[1-5].lz \ $(DISTNAME)/testsuite/fox6_bad1.txt \ $(DISTNAME)/testsuite/test.txt \ $(DISTNAME)/testsuite/test21723.txt \ + $(DISTNAME)/testsuite/fox.lz \ + $(DISTNAME)/testsuite/fox6.lz \ + $(DISTNAME)/testsuite/fox6_sc[1-6].lz \ + $(DISTNAME)/testsuite/fox6_bad[1-6].lz \ + $(DISTNAME)/testsuite/numbers.lz \ + $(DISTNAME)/testsuite/numbersbt.lz \ $(DISTNAME)/testsuite/test.txt.lz \ $(DISTNAME)/testsuite/test.txt.lzma \ $(DISTNAME)/testsuite/test_bad[1-5].lz @@ -1,46 +1,52 @@ -Changes in version 1.20: +Changes in version 1.21: -Splitting was broken for files larger than 64 KiB because of a bug -introduced in version 1.16. +The options '--dump', '--remove' and '--strip' have been added, mainly as +support for the tarlz archive format: http://www.nongnu.org/lzip/tarlz.html +These options replace '--dump-tdata', '--remove-tdata' and '--strip-tdata', +which are now aliases and will be removed in version 1.22. -The options "--dump-tdata", "--remove-tdata", and "--strip-tdata" have -been added to ease the management of metadata stored as trailing data: + '--dump=[<member_list>][:damaged][:tdata]' dumps the members listed, the + damaged members (if any), or the trailing data (if any) of one or more + regular multimember files to standard output. - "--dump-tdata" dumps the trailing data (if any) of one or more regular - files to standard output. + '--remove=[<member_list>][:damaged][:tdata]' removes the members listed, + the damaged members (if any), or the trailing data (if any) from regular + multimember files in place. - "--remove-tdata" removes the trailing data from regular files in place. + '--strip=[<member_list>][:damaged][:tdata]' copies one or more regular + multimember files to standard output, stripping the members listed, the + damaged members (if any), or the trailing data (if any) from each file. - "--strip-tdata" copies one or more regular files to standard output, - stripping the trailing data (if any) from each file. +Detection of forbidden combinations of characters in trailing data has been +improved. -The option '--loose-trailing', has been added. +'--split' can now detect trailing data and gaps between members, and save +each gap in its own file. Trailing data (if any) are saved alone in the last +file. (Gaps may contain garbage or may be members with corrupt headers or +trailers). -The test used by lziprecover to discriminate trailing data from a corrupt -header in multimember or concatenated files has been improved to a -Hamming distance (HD) of 3, and the 3 bit flips must happen in different -magic bytes for the test to fail. As a consequence some kinds of files -no longer can be appended to a lzip file as trailing data unless the -'--loose-trailing' option is used when decompressing. -Lziprecover can be used to remove conflicting trailing data from a file. +'--ignore-errors' now makes '--list' show gaps between members, ignoring +format errors. -The contents of a corrupt or truncated header found in a multimember -file is now shown, after the error message, in the same format as -trailing data. +'--ignore-errors' now makes '--range-decompress' ignore a truncated last +member. -The 'bits/byte' ratio has been replaced with the inverse compression -ratio in the output. +Errors are now also checked when closing the input file in decompression +mode. -The progress of decompression is now shown at verbosity level 2 (-vv) or -higher. +Some diagnostic messages have been improved. -Progress of decompression is only shown if stderr is a terminal. +'\n' is now printed instead of '\r' when showing progress of merge or repair +if stdout is not a terminal. -A final diagnostic is now shown at verbosity level 1 (-v) or higher if -any file fails the test when testing multiple files. +Lziprecover now compiles on DOS with DJGPP. (Patch from Robert Riebisch). -In case of (de)compressed size mismatch, the stored size is now also -shown in hexadecimal to ease visual comparison. +The new chapter 'Tarlz', explaining the ways in which lziprecover can +recover and process multimember tar.lz archives, has been added to the +manual. -The dictionary size is now shown at verbosity level 4 (-vvvv) when -decompressing or testing. +The configure script now accepts appending options to CXXFLAGS using the +syntax 'CXXFLAGS+=OPTIONS'. + +It has been documented in INSTALL the use of +CXXFLAGS+='-D __USE_MINGW_ANSI_STDIO' when compiling on MinGW. @@ -1,20 +1,25 @@ Description -Lziprecover is a data recovery tool and decompressor for files in the -lzip compressed data format (.lz). Lziprecover is able to repair -slightly damaged files, produce a correct file by merging the good parts -of two or more damaged copies, extract data from damaged files, -decompress files and test integrity of files. +Lziprecover is a data recovery tool and decompressor for files in the lzip +compressed data format (.lz). Lziprecover is able to repair slightly damaged +files, produce a correct file by merging the good parts of two or more +damaged copies, extract data from damaged files, decompress files and test +integrity of files. -Lziprecover provides random access to the data in multimember files; it -only decompresses the members containing the desired data. +Lziprecover can remove the damaged members from multimember files, for +example multimember tar.lz archives. + +Lziprecover provides random access to the data in multimember files; it only +decompresses the members containing the desired data. + +Lziprecover facilitates the management of metadata stored as trailing data +in lzip files. Lziprecover is not a replacement for regular backups, but a last line of defense for the case where the backups are also damaged. -The lzip file format is designed for data sharing and long-term -archiving, taking into account both data integrity and decoder -availability: +The lzip file format is designed for data sharing and long-term archiving, +taking into account both data integrity and decoder availability: * The lzip format provides very safe integrity checking and some data recovery means. The lziprecover program can repair bit flip errors @@ -23,11 +28,11 @@ availability: merging of damaged copies of a file. * The lzip format is as simple as possible (but not simpler). The - lzip manual provides the source code of a simple decompressor along - with a detailed explanation of how it works, so that with the only - help of the lzip manual it would be possible for a digital - archaeologist to extract the data from a lzip file long after - quantum computers eventually render LZMA obsolete. + lzip manual provides the source code of a simple decompressor + along with a detailed explanation of how it works, so that with + the only help of the lzip manual it would be possible for a + digital archaeologist to extract the data from a lzip file long + after quantum computers eventually render LZMA obsolete. * Additionally the lzip reference implementation is copylefted, which guarantees that it will remain free forever. @@ -56,7 +61,7 @@ with the '-D' option. When recovering data, lziprecover takes as arguments the names of the damaged files and writes zero or more recovered files depending on the operation selected and whether the recovery succeeded or not. The -damaged files themselves are never modified. +damaged files themselves are kept unchanged. When decompressing or testing file integrity, lziprecover behaves like lzip or lunzip. @@ -75,7 +80,7 @@ unzcrash.c from Julian Seward's bzip2. Type 'make unzcrash' in the lziprecover source directory to build it. Then try 'unzcrash --help'. -Copyright (C) 2009-2018 Antonio Diaz Diaz. +Copyright (C) 2009-2019 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute and modify it. diff --git a/alone_to_lz.cc b/alone_to_lz.cc index e949f9d..dd39e34 100644 --- a/alone_to_lz.cc +++ b/alone_to_lz.cc @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2018 Antonio Diaz Diaz. + Copyright (C) 2009-2019 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -15,6 +15,8 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#define _FILE_OFFSET_BITS 64 + #include <algorithm> #include <cerrno> #include <climits> @@ -36,7 +38,8 @@ namespace { the file size in '*size'. The buffer is at least 20 bytes larger. In case of error, returns 0 and does not modify '*size'. */ -uint8_t * read_file( const int infd, long * const size, const Pretty_print & pp ) +uint8_t * read_file( const int infd, long * const size, + const char * const filename ) { long buffer_size = 1 << 20; uint8_t * buffer = (uint8_t *)std::malloc( buffer_size ); @@ -46,7 +49,8 @@ uint8_t * read_file( const int infd, long * const size, const Pretty_print & pp while( file_size >= buffer_size - 20 && !errno ) { if( buffer_size >= LONG_MAX ) - { pp( "file is too large" ); std::free( buffer ); return 0; } + { show_file_error( filename, "File is too large" ); std::free( buffer ); + return 0; } buffer_size = ( buffer_size <= LONG_MAX / 2 ) ? 2 * buffer_size : LONG_MAX; uint8_t * const tmp = (uint8_t *)std::realloc( buffer, buffer_size ); if( !tmp ) { std::free( buffer ); throw std::bad_alloc(); } @@ -56,10 +60,9 @@ uint8_t * read_file( const int infd, long * const size, const Pretty_print & pp } if( errno ) { - show_error( "Error reading file", errno ); + show_file_error( filename, "Error reading file", errno ); std::free( buffer ); return 0; } - close( infd ); *size = file_size; return buffer; } @@ -79,39 +82,40 @@ bool validate_ds( unsigned * const dictionary_size ) int alone_to_lz( const int infd, const Pretty_print & pp ) { - enum { lzma_header_size = 13, offset = lzma_header_size - File_header::size }; + enum { lzma_header_size = 13, offset = lzma_header_size - Lzip_header::size }; try { long file_size = 0; - uint8_t * const buffer = read_file( infd, &file_size, pp ); + uint8_t * const buffer = read_file( infd, &file_size, pp.name() ); if( !buffer ) return 1; - if( verbosity >= 1 ) pp(); - if( file_size < lzma_header_size ) - { pp( "file is too short" ); std::free( buffer ); return 2; } + { show_file_error( pp.name(), "file is too short" ); + std::free( buffer ); return 2; } + if( buffer[0] != 93 ) // (45 * 2) + (9 * 0) + 3 { - File_header & header = *(File_header *)buffer; - const unsigned dictionary_size = header.dictionary_size(); + const Lzip_header & header = *(const Lzip_header *)buffer; if( header.verify_magic() && header.verify_version() && - isvalid_ds( dictionary_size ) ) - pp( "file is already in lzip format" ); + isvalid_ds( header.dictionary_size() ) ) + show_file_error( pp.name(), "file is already in lzip format" ); else - pp( "file has non-default LZMA properties" ); + show_file_error( pp.name(), "file has non-default LZMA properties" ); std::free( buffer ); return 2; } for( int i = 5; i < 13; ++i ) if( buffer[i] != 0xFF ) - { pp( "file is non-streamed" ); std::free( buffer ); return 2; } + { show_file_error( pp.name(), "file is non-streamed" ); + std::free( buffer ); return 2; } + if( verbosity >= 1 ) pp(); unsigned dictionary_size = 0; for( int i = 4; i > 0; --i ) { dictionary_size <<= 8; dictionary_size += buffer[i]; } const unsigned orig_dictionary_size = dictionary_size; validate_ds( &dictionary_size ); - File_header & header = *(File_header *)( buffer + offset ); + Lzip_header & header = *(Lzip_header *)( buffer + offset ); header.set_magic(); header.dictionary_size( dictionary_size ); - for( int i = 0; i < File_trailer::size; ++i ) buffer[file_size++] = 0; + for( int i = 0; i < Lzip_trailer::size; ++i ) buffer[file_size++] = 0; { LZ_mtester mtester( buffer + offset, file_size - offset, dictionary_size ); const int result = mtester.test_member(); @@ -126,8 +130,8 @@ int alone_to_lz( const int infd, const Pretty_print & pp ) std::max( mtester.max_distance(), (unsigned)min_dictionary_size ); header.dictionary_size( dictionary_size ); } - File_trailer & trailer = - *(File_trailer *)( buffer + file_size - File_trailer::size ); + Lzip_trailer & trailer = + *(Lzip_trailer *)( buffer + file_size - Lzip_trailer::size ); trailer.data_crc( mtester.crc() ); trailer.data_size( mtester.data_position() ); trailer.member_size( mtester.member_position() ); @@ -142,8 +146,8 @@ int alone_to_lz( const int infd, const Pretty_print & pp ) } std::free( buffer ); } - catch( std::bad_alloc ) { pp( "Not enough memory." ); return 1; } - catch( Error e ) { pp(); show_error( e.msg, errno ); return 1; } + catch( std::bad_alloc & ) { pp( "Not enough memory." ); return 1; } + catch( Error & e ) { pp(); show_error( e.msg, errno ); return 1; } if( verbosity >= 1 ) std::fputs( "done\n", stderr ); return 0; } diff --git a/arg_parser.cc b/arg_parser.cc index 008ebc8..ea32fde 100644 --- a/arg_parser.cc +++ b/arg_parser.cc @@ -1,5 +1,5 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C++ version) - Copyright (C) 2006-2018 Antonio Diaz Diaz. + Copyright (C) 2006-2019 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided diff --git a/arg_parser.h b/arg_parser.h index f015881..ceb9933 100644 --- a/arg_parser.h +++ b/arg_parser.h @@ -1,5 +1,5 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C++ version) - Copyright (C) 2006-2018 Antonio Diaz Diaz. + Copyright (C) 2006-2019 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided diff --git a/block.cc b/block.cc deleted file mode 100644 index d588e48..0000000 --- a/block.cc +++ /dev/null @@ -1,33 +0,0 @@ -/* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2018 Antonio Diaz Diaz. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -#include <string> -#include <vector> - -#include "block.h" - - -Block Block::split( const long long pos ) - { - if( pos > pos_ && pos < end() ) - { - const Block b( pos_, pos - pos_ ); - pos_ = pos; size_ -= b.size_; - return b; - } - return Block( 0, 0 ); - } diff --git a/block.h b/block.h deleted file mode 100644 index d424d35..0000000 --- a/block.h +++ /dev/null @@ -1,62 +0,0 @@ -/* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2018 Antonio Diaz Diaz. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -#ifndef INT64_MAX -#define INT64_MAX 0x7FFFFFFFFFFFFFFFLL -#endif - - -class Block - { - long long pos_, size_; // pos + size <= INT64_MAX - -public: - Block( const long long p, const long long s ) : pos_( p ), size_( s ) {} - - long long pos() const { return pos_; } - long long size() const { return size_; } - long long end() const { return pos_ + size_; } - - void pos( const long long p ) { pos_ = p; } - void size( const long long s ) { size_ = s; } - - bool operator==( const Block & b ) const - { return pos_ == b.pos_ && size_ == b.size_; } - bool operator!=( const Block & b ) const - { return pos_ != b.pos_ || size_ != b.size_; } - - bool operator<( const Block & b ) const { return pos_ < b.pos_; } - - bool includes( const long long pos ) const - { return ( pos_ <= pos && end() > pos ); } - bool overlaps( const Block & b ) const - { return ( pos_ < b.end() && b.pos_ < end() ); } - - void shift( Block & b ) { ++size_; ++b.pos_; --b.size_; } - Block split( const long long pos ); - }; - - -// defined in range_dec.cc -int range_decompress( const std::string & input_filename, - const std::string & default_output_filename, - Block range, const bool force, const bool ignore_errors, - const bool ignore_trailing, const bool loose_trailing, - const bool to_stdout ); - -// defined in repair.cc -int debug_delay( const std::string & input_filename, Block range ); @@ -1,12 +1,12 @@ #! /bin/sh # configure script for Lziprecover - Data recovery tool for the lzip format -# Copyright (C) 2009-2018 Antonio Diaz Diaz. +# Copyright (C) 2009-2019 Antonio Diaz Diaz. # # This configure script is free software: you have unlimited permission # to copy, distribute and modify it. pkgname=lziprecover -pkgversion=1.20 +pkgversion=1.21 progname=lziprecover srctrigger=doc/${pkgname}.texi @@ -70,6 +70,7 @@ while [ $# != 0 ] ; do echo " CXX=COMPILER C++ compiler to use [${CXX}]" echo " CPPFLAGS=OPTIONS command line options for the preprocessor [${CPPFLAGS}]" echo " CXXFLAGS=OPTIONS command line options for the C++ compiler [${CXXFLAGS}]" + echo " CXXFLAGS+=OPTIONS append options to the current value of CXXFLAGS" echo " LDFLAGS=OPTIONS command line options for the linker [${LDFLAGS}]" echo exit 0 ;; @@ -93,10 +94,11 @@ while [ $# != 0 ] ; do --mandir=*) mandir=${optarg} ;; --no-create) no_create=yes ;; - CXX=*) CXX=${optarg} ;; - CPPFLAGS=*) CPPFLAGS=${optarg} ;; - CXXFLAGS=*) CXXFLAGS=${optarg} ;; - LDFLAGS=*) LDFLAGS=${optarg} ;; + CXX=*) CXX=${optarg} ;; + CPPFLAGS=*) CPPFLAGS=${optarg} ;; + CXXFLAGS=*) CXXFLAGS=${optarg} ;; + CXXFLAGS+=*) CXXFLAGS="${CXXFLAGS} ${optarg}" ;; + LDFLAGS=*) LDFLAGS=${optarg} ;; --*) echo "configure: WARNING: unrecognized option: '${option}'" 1>&2 ;; @@ -168,7 +170,7 @@ echo "LDFLAGS = ${LDFLAGS}" rm -f Makefile cat > Makefile << EOF # Makefile for Lziprecover - Data recovery tool for the lzip format -# Copyright (C) 2009-2018 Antonio Diaz Diaz. +# Copyright (C) 2009-2019 Antonio Diaz Diaz. # This file was generated automatically by configure. Don't edit. # # This Makefile is free software: you have unlimited permission @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2018 Antonio Diaz Diaz. + Copyright (C) 2009-2019 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -110,13 +110,13 @@ void LZ_decoder::flush_data() bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const { - File_trailer trailer; - int size = rdec.read_data( trailer.data, File_trailer::size ); + Lzip_trailer trailer; + int size = rdec.read_data( trailer.data, Lzip_trailer::size ); const unsigned long long data_size = data_position(); const unsigned long long member_size = rdec.member_position(); bool error = false; - if( size < File_trailer::size ) + if( size < Lzip_trailer::size ) { error = true; if( verbosity >= 0 ) @@ -125,7 +125,7 @@ bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const std::fprintf( stderr, "Trailer truncated at trailer position %d;" " some checks may fail.\n", size ); } - while( size < File_trailer::size ) trailer.data[size++] = 0; + while( size < Lzip_trailer::size ) trailer.data[size++] = 0; } const unsigned td_crc = trailer.data_crc(); @@ -214,86 +214,86 @@ int LZ_decoder::decode_member( const Pretty_print & pp ) const int pos_state = data_position() & pos_state_mask; if( rdec.decode_bit( bm_match[state()][pos_state] ) == 0 ) // 1st bit { + // literal byte Bit_model * const bm = bm_literal[get_lit_state(peek_prev())]; if( state.is_char_set_char() ) put_byte( rdec.decode_tree8( bm ) ); else put_byte( rdec.decode_matched( bm, peek( rep0 ) ) ); + continue; } - else // match or repeated match + // match or repeated match + int len; + if( rdec.decode_bit( bm_rep[state()] ) != 0 ) // 2nd bit { - int len; - if( rdec.decode_bit( bm_rep[state()] ) != 0 ) // 2nd bit + if( rdec.decode_bit( bm_rep0[state()] ) == 0 ) // 3rd bit { - if( rdec.decode_bit( bm_rep0[state()] ) == 0 ) // 3rd bit - { - if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit - { state.set_short_rep(); put_byte( peek( rep0 ) ); continue; } - } + if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit + { state.set_short_rep(); put_byte( peek( rep0 ) ); continue; } + } + else + { + unsigned distance; + if( rdec.decode_bit( bm_rep1[state()] ) == 0 ) // 4th bit + distance = rep1; else { - unsigned distance; - if( rdec.decode_bit( bm_rep1[state()] ) == 0 ) // 4th bit - distance = rep1; + if( rdec.decode_bit( bm_rep2[state()] ) == 0 ) // 5th bit + distance = rep2; else - { - if( rdec.decode_bit( bm_rep2[state()] ) == 0 ) // 5th bit - distance = rep2; - else - { distance = rep3; rep3 = rep2; } - rep2 = rep1; - } - rep1 = rep0; - rep0 = distance; + { distance = rep3; rep3 = rep2; } + rep2 = rep1; } - state.set_rep(); - len = min_match_len + rdec.decode_len( rep_len_model, pos_state ); + rep1 = rep0; + rep0 = distance; } - else // match + state.set_rep(); + len = min_match_len + rdec.decode_len( rep_len_model, pos_state ); + } + else // match + { + len = min_match_len + rdec.decode_len( match_len_model, pos_state ); + unsigned distance = rdec.decode_tree6( bm_dis_slot[get_len_state(len)] ); + if( distance >= start_dis_model ) { - len = min_match_len + rdec.decode_len( match_len_model, pos_state ); - unsigned distance = rdec.decode_tree6( bm_dis_slot[get_len_state(len)] ); - if( distance >= start_dis_model ) + const unsigned dis_slot = distance; + const int direct_bits = ( dis_slot >> 1 ) - 1; + distance = ( 2 | ( dis_slot & 1 ) ) << direct_bits; + if( dis_slot < end_dis_model ) + distance += rdec.decode_tree_reversed( + bm_dis + ( distance - dis_slot ), direct_bits ); + else { - const unsigned dis_slot = distance; - const int direct_bits = ( dis_slot >> 1 ) - 1; - distance = ( 2 | ( dis_slot & 1 ) ) << direct_bits; - if( dis_slot < end_dis_model ) - distance += rdec.decode_tree_reversed( - bm_dis + ( distance - dis_slot ), direct_bits ); - else + distance += + rdec.decode( direct_bits - dis_align_bits ) << dis_align_bits; + distance += rdec.decode_tree_reversed4( bm_align ); + if( distance == 0xFFFFFFFFU ) // marker found { - distance += - rdec.decode( direct_bits - dis_align_bits ) << dis_align_bits; - distance += rdec.decode_tree_reversed4( bm_align ); - if( distance == 0xFFFFFFFFU ) // marker found + rdec.normalize(); + flush_data(); + if( len == min_match_len ) // End Of Stream marker + { + if( verify_trailer( pp ) ) return 0; else return 3; + } + if( len == min_match_len + 1 ) // Sync Flush marker + { + rdec.load(); continue; + } + if( verbosity >= 0 ) { - rdec.normalize(); - flush_data(); - if( len == min_match_len ) // End Of Stream marker - { - if( verify_trailer( pp ) ) return 0; else return 3; - } - if( len == min_match_len + 1 ) // Sync Flush marker - { - rdec.load(); continue; - } - if( verbosity >= 0 ) - { - pp(); - std::fprintf( stderr, "Unsupported marker code '%d'\n", len ); - } - return 4; + pp(); + std::fprintf( stderr, "Unsupported marker code '%d'\n", len ); } + return 4; } } - rep3 = rep2; rep2 = rep1; rep1 = rep0; rep0 = distance; - state.set_match(); - if( rep0 >= dictionary_size || ( rep0 >= pos && !pos_wrapped ) ) - { flush_data(); return 1; } } - copy_block( rep0, len ); + rep3 = rep2; rep2 = rep1; rep1 = rep0; rep0 = distance; + state.set_match(); + if( rep0 >= dictionary_size || ( rep0 >= pos && !pos_wrapped ) ) + { flush_data(); return 1; } } + copy_block( rep0, len ); } flush_data(); return 2; @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2018 Antonio Diaz Diaz. + Copyright (C) 2009-2019 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/doc/lziprecover.1 b/doc/lziprecover.1 index d52e2e6..29df1e0 100644 --- a/doc/lziprecover.1 +++ b/doc/lziprecover.1 @@ -1,27 +1,30 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.46.1. -.TH LZIPRECOVER "1" "February 2018" "lziprecover 1.20" "User Commands" +.TH LZIPRECOVER "1" "January 2019" "lziprecover 1.21" "User Commands" .SH NAME lziprecover \- recovers data from damaged lzip files .SH SYNOPSIS .B lziprecover [\fI\,options\/\fR] [\fI\,files\/\fR] .SH DESCRIPTION -Lziprecover \- Data recovery tool and decompressor for the lzip format. +Lziprecover is a data recovery tool and decompressor for files in the lzip +compressed data format (.lz). Lziprecover is able to repair slightly damaged +files, produce a correct file by merging the good parts of two or more +damaged copies, extract data from damaged files, decompress files and test +integrity of files. .PP Lziprecover can repair perfectly most files with small errors (up to one single\-byte error per member), without the need of any extra redundance at all. Losing an entire archive just because of a corrupt byte near the beginning is a thing of the past. .PP -Lziprecover can also produce a correct file by merging the good parts of -two or more damaged copies, extract data from damaged files, decompress -files and test integrity of files. +Lziprecover can remove the damaged members from multimember files, for +example multimember tar.lz archives. .PP -Lziprecover provides random access to the data in multimember files; it -only decompresses the members containing the desired data. +Lziprecover provides random access to the data in multimember files; it only +decompresses the members containing the desired data. .PP -Lziprecover facilitates the management of metadata stored as trailing -data in lzip files. +Lziprecover facilitates the management of metadata stored as trailing data +in lzip files. .PP Lziprecover is not a replacement for regular backups, but a last line of defense for the case where the backups are also damaged. @@ -45,14 +48,14 @@ write to standard output, keep input files \fB\-d\fR, \fB\-\-decompress\fR decompress .TP -\fB\-D\fR, \fB\-\-range\-decompress=\fR<range> -decompress a range of bytes (N\-M) to stdout +\fB\-D\fR, \fB\-\-range\-decompress=\fR<n\-m> +decompress a range of bytes to stdout .TP \fB\-f\fR, \fB\-\-force\fR overwrite existing output files .TP \fB\-i\fR, \fB\-\-ignore\-errors\fR -make '\-\-range\-decompress' ignore data errors +all errors in \fB\-D\fR, format errors in \fB\-l\fR, \fB\-\-dump\fR .TP \fB\-k\fR, \fB\-\-keep\fR keep (don't delete) input files @@ -84,14 +87,14 @@ be verbose (a 2nd \fB\-v\fR gives more) \fB\-\-loose\-trailing\fR allow trailing data seeming corrupt header .TP -\fB\-\-dump\-tdata\fR -dump trailing data to standard output +\fB\-\-dump=\fR<list>:d:t +dump members listed/damaged, tdata to stdout .TP -\fB\-\-remove\-tdata\fR -remove trailing data from files in place +\fB\-\-remove=\fR<list>:d:t +remove members, tdata from files in place .TP -\fB\-\-strip\-tdata\fR -copy files to stdout without trailing data +\fB\-\-strip=\fR<list>:d:t +copy files to stdout stripping members given .PP If no file names are given, or if a file is '\-', lziprecover decompresses from standard input to standard output. @@ -107,7 +110,7 @@ Report bugs to lzip\-bug@nongnu.org .br Lziprecover home page: http://www.nongnu.org/lzip/lziprecover.html .SH COPYRIGHT -Copyright \(co 2018 Antonio Diaz Diaz. +Copyright \(co 2019 Antonio Diaz Diaz. License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html> .br This is free software: you are free to change and redistribute it. diff --git a/doc/lziprecover.info b/doc/lziprecover.info index 55d044b..0339c15 100644 --- a/doc/lziprecover.info +++ b/doc/lziprecover.info @@ -12,7 +12,7 @@ File: lziprecover.info, Node: Top, Next: Introduction, Up: (dir) Lziprecover Manual ****************** -This manual is for Lziprecover (version 1.20, 12 February 2018). +This manual is for Lziprecover (version 1.21, 4 January 2019). * Menu: @@ -21,6 +21,7 @@ This manual is for Lziprecover (version 1.20, 12 February 2018). * Data safety:: Protecting data from accidental loss * Repairing files:: Fixing bit flips and similar errors * Merging files:: Fixing several damaged copies +* Tarlz:: Options supporting the tar.lz format * File names:: Names of the files produced by lziprecover * File format:: Detailed format of the compressed file * Trailing data:: Extra data appended to the file @@ -30,7 +31,7 @@ This manual is for Lziprecover (version 1.20, 12 February 2018). * Concept index:: Index of concepts - Copyright (C) 2009-2018 Antonio Diaz Diaz. + Copyright (C) 2009-2019 Antonio Diaz Diaz. This manual is free documentation: you have unlimited permission to copy, distribute and modify it. @@ -43,13 +44,19 @@ File: lziprecover.info, Node: Introduction, Next: Invoking lziprecover, Prev: Lziprecover is a data recovery tool and decompressor for files in the lzip compressed data format (.lz). Lziprecover is able to repair -slightly damaged files, produce a correct file by merging the good parts -of two or more damaged copies, extract data from damaged files, +slightly damaged files, produce a correct file by merging the good +parts of two or more damaged copies, extract data from damaged files, decompress files and test integrity of files. + Lziprecover can remove the damaged members from multimember files, +for example multimember tar.lz archives. + Lziprecover provides random access to the data in multimember files; it only decompresses the members containing the desired data. + Lziprecover facilitates the management of metadata stored as trailing +data in lzip files. + Lziprecover is not a replacement for regular backups, but a last line of defense for the case where the backups are also damaged. @@ -100,7 +107,7 @@ garbage data may be produced at the end of each member): When recovering data, lziprecover takes as arguments the names of the damaged files and writes zero or more recovered files depending on the operation selected and whether the recovery succeeded or not. The -damaged files themselves are never modified. +damaged files themselves are kept unchanged. When decompressing or testing file integrity, lziprecover behaves like lzip or lunzip. @@ -132,7 +139,7 @@ the first time it appears in the command line. '-V' '--version' Print the version number of lziprecover on the standard output and - exit. + exit. This version number should be included in all bug reports. '-a' '--trailing-error' @@ -194,12 +201,15 @@ the first time it appears in the command line. '-i' '--ignore-errors' - Make '--range-decompress' ignore data errors and continue - decompressing the remaining members in the file. For example, + Make '--range-decompress' ignore errors and continue decompressing + the remaining members in the file. For example, 'lziprecover -D0 -i file.lz > file' decompresses all the recoverable data in all members of 'file.lz' without having to split it first. + Make '--list', '--dump', '--remove' and '--strip' ignore format + errors. + '-k' '--keep' Keep (don't delete) input files during decompression. @@ -213,20 +223,23 @@ the first time it appears in the command line. printed. With '-v', the dictionary size, the number of members in the file, and the amount of trailing data (if any) are also printed. With '-vv', the positions and sizes of each member in - multimember files are also printed. '-lq' can be used to verify - quickly (without decompressing) the structural integrity of the - specified files. (Use '--test' to verify the data integrity). - '-alq' additionally verifies that none of the specified files - contain trailing data. + multimember files are also printed. With '-i', format errors are + ignored, and with '-ivv', gaps between members are shown. The + member numbers shown coincide with the file numbers produced by + '--split'. + + '-lq' can be used to verify quickly (without decompressing) the + structural integrity of the specified files. (Use '--test' to + verify the data integrity). '-alq' additionally verifies that none + of the specified files contain trailing data. '-m' '--merge' Try to produce a correct file by merging the good parts of two or more damaged copies. If successful, a repaired copy is written to the file 'FILE_fixed.lz'. The exit status is 0 if a correct file - could be produced, 2 otherwise. See the chapter 'Merging files' - (*note Merging files::) for a complete description of the merge - mode. + could be produced, 2 otherwise. *Note Merging files::, for a + complete description of the merge mode. '-o FILE' '--output=FILE' @@ -248,17 +261,21 @@ the first time it appears in the command line. Try to repair a file with small errors (up to one single-byte error per member). If successful, a repaired copy is written to the file 'FILE_fixed.lz'. 'FILE' is not modified at all. The exit - status is 0 if the file could be repaired, 2 otherwise. See the - chapter 'Repairing files' (*note Repairing files::) for a complete - description of the repair mode. + status is 0 if the file could be repaired, 2 otherwise. *Note + Repairing files::, for a complete description of the repair mode. '-s' '--split' Search for members in 'FILE' and write each member in its own - '.lz' file. You can then use 'lziprecover -t' to test the - integrity of the resulting files, decompress those which are - undamaged, and try to repair or partially decompress those which - are damaged. + file. Gaps between members are detected and each gap is saved in + its own file. Trailing data (if any) are saved alone in the last + file. You can then use 'lziprecover -t' to test the integrity of + the resulting files, decompress those which are undamaged, and try + to repair or partially decompress those which are damaged. Gaps + may contain garbage or may be members with corrupt headers or + trailers. If other lziprecover functions fail to work on a + multimember FILE because of damage in headers or trailers, try to + split FILE and then work on each member individually. The names of the files produced are in the form 'rec01FILE', 'rec02FILE', etc, and are designed so that the use of wildcards in @@ -297,33 +314,75 @@ the first time it appears in the command line. if a file triggers a "corrupt header" error and the cause is not indeed a corrupt header. -'--dump-tdata' - Dump the trailing data (if any) of one or more regular files to +'--dump=[MEMBER_LIST][:damaged][:tdata]' + Dump the members listed, the damaged members (if any), or the + trailing data (if any) of one or more regular multimember files to standard output, or to a file if the '--output' option is used. If - more than one file is given, the trailing data of all files are - concatenated. If a file does not exist, can't be opened, or is not - regular, lziprecover continues processing the rest of the files. - If the dump fails in one file, lziprecover exits immediately - without processing the rest of the files. - -'--remove-tdata' - Remove the trailing data from regular files in place. The date of - each file is preserved if possible. If the removal fails in one - file, lziprecover continues processing the rest of the files. This - option may be dangerous if the file is corrupt or if the trailing - data contain a forbidden combination of characters. *Note Trailing - data::. Verify that 'lzip -cd file.lz | wc -c' and the + more than one file is given, the elements dumped from all files + are concatenated. If a file does not exist, can't be opened, or + is not regular, lziprecover continues processing the rest of the + files. If the dump fails in one file, lziprecover exits + immediately without processing the rest of the files. + + The argument to '--dump' is a colon-separated list of the following + element specifiers; a member list (1,3-6), a reverse member list + (r1,3-6), and the strings "damaged" and "tdata" (which may be + shortened to 'd' and 't' respectively). A member list selects the + members (or gaps) listed, whose numbers coincide with those shown + by '--list'. A reverse member list selects the members listed + counting from the last member in the file (r1). Negated versions + of both kinds of lists exist (^1,3-6:r^1,3-6) which selects all + the members except those in the list. The strings "damaged" and + "tdata" select the damaged members and the trailing data + respectively. If the same member is selected more than once, for + example by '1:r1' in a single-member file, it is dumped just once. + See the following examples: + + '--dump' argument Elements dumped + --------------------------------------------------------------------- + '1,3-6' members 1, 3, 4, 5 and 6 + 'r1-3' last 3 members in file + '^13,15' all but 13th and 15th members in file + 'r^1' all but last member in file + 'damaged' all damaged members in file + 'tdata' trailing data + '1-5:r1:tdata' members 1 to 5, last member, trailing data + 'damaged:tdata' damaged members, trailing data + '3,12:damaged:tdata' members 3, 12, damaged members, trailing data + +'--remove=[MEMBER_LIST][:damaged][:tdata]' + Remove the members listed, the damaged members (if any), or the + trailing data (if any) from regular multimember files in place. + The date of each file is preserved if possible. If all members in + a file are selected to be removed, the file is left unchanged and + the exit status is set to 2. If a file does not exist, can't be + opened, is not regular, or is left unchanged, lziprecover + continues processing the rest of the files. In case of I/O error, + lziprecover exits immediately without processing the rest of the + files. See '--dump' above for a description of the argument. + + This option may be dangerous even if only the trailing data is + being removed because the file may be corrupt or the trailing data + may contain a forbidden combination of characters. *Note Trailing + data::. It is advisable to make a backup before attempting the + removal. At least verify that 'lzip -cd file.lz | wc -c' and the uncompressed size shown by 'lzip -l file.lz' match before - attempting the removal. - -'--strip-tdata' - Copy one or more regular files to standard output (or to a file if - the '--output' option is used), stripping the trailing data (if - any) from each file. If more than one file is given, the files are - concatenated. If a file does not exist, can't be opened, or is not + attempting the removal of trailing data. + +'--strip=[MEMBER_LIST][:damaged][:tdata]' + Copy one or more regular multimember files to standard output (or + to a file if the '--output' option is used), stripping the members + listed, the damaged members (if any), or the trailing data (if + any) from each file. If all members in a file are selected to be + stripped, the trailing data (if any) are also stripped even if + 'tdata' is not specified. If more than one file is given, the + files are concatenated. In this case the trailing data are also + stripped from all but the last file even if 'tdata' is not + specified. If a file does not exist, can't be opened, or is not regular, lziprecover continues processing the rest of the files. If a file fails to copy, lziprecover exits immediately without - processing the rest of the files. + processing the rest of the files. See '--dump' above for a + description of the argument. Numbers given as arguments to options may be followed by a multiplier @@ -431,7 +490,7 @@ cause much more loss of data than errors located near the end. So lziprecover repairs more efficiently the worst errors. -File: lziprecover.info, Node: Merging files, Next: File names, Prev: Repairing files, Up: Top +File: lziprecover.info, Node: Merging files, Next: Tarlz, Prev: Repairing files, Up: Top 5 Merging files *************** @@ -489,9 +548,74 @@ lower than the number of corrupt bytes (3104) because contiguous corrupt bytes are counted as a single multibyte error. -File: lziprecover.info, Node: File names, Next: File format, Prev: Merging files, Up: Top +File: lziprecover.info, Node: Tarlz, Next: File names, Prev: Merging files, Up: Top + +6 Options supporting the tar.lz format +************************************** + +Tarlz is an implementation of the tar archiver which by default creates +archives compressed with lzip on a per file basis. Tarlz can append +files to the end of such compressed archives because each tar member is +compressed in its own lzip member, as well as the end-of-file blocks. +Thus tarlz archives are multimember lzip files, which has some safety +advantages over solidly compressed tar.lz archives. For example, in +case of corruption, tarlz can extract all the undamaged members from +the tar.lz archive, skipping over the damaged members, just like the +standard (uncompressed) tar. In this chapter we'll explain the ways in +which lziprecover can recover and process multimember tar.lz archives. +*Note tarlz manual: (tarlz)Top. + + +6.1 Recovering damaged multimember tar.lz archives +================================================== + +If you have several copies of the damaged archive, try merging them +first because merging has a high probability of success. If the command +below prints something like 'Input files merged successfully.' you are +done and 'archive.tar.lz' now contains the recovered archive: + + lziprecover -m -v -o archive.tar.lz a/archive.tar.lz b/archive.tar.lz -6 Names of the files produced by lziprecover + If you only have one copy of the damaged archive, you may try to +repair the archive, but this has a lower probability of success. If the +command below prints something like +'Copy of input file repaired successfully.' you are done and +'archive_fixed.tar.lz' now contains the recovered archive: + + lziprecover -v -R archive.tar.lz + + If all the above fails, you may save the damaged members for later +and then copy the good members to another archive. If the two commands +below succeed, 'bad_members.tar.lz' will contain all the damaged members +and 'archive_cleaned.tar.lz' will contain a good archive with the +damaged members removed: + + lziprecover -v --dump=damaged -o bad_members.tar.lz archive.tar.lz + lziprecover -v --strip=damaged -o archive_cleaned.tar.lz archive.tar.lz + + You can then use 'tarlz --keep-damaged' to recover as much data as +possible from each damaged member in 'bad_members.tar.lz': + + mkdir tmp + cd tmp + tarlz --keep-damaged -xvf ../bad_members.tar.lz + + +6.2 Processing multimember tar.lz archives +========================================== + +Lziprecover is able to copy a list of members from a file to another. +For example the command +'lziprecover --dump=1-10:r1:tdata archive.tar.lz > subarch.tar.lz' +creates a subset archive containing the first ten members, the +end-of-file blocks, and the trailing data (if any) of 'archive.tar.lz'. +The 'r1' part selects the last member, which in an appendable tar.lz +archive contains the end-of-file blocks. + + +File: lziprecover.info, Node: File names, Next: File format, Prev: Tarlz, Up: Top + +7 Names of the files produced by lziprecover ******************************************** The name of the fixed file produced by '--merge' and '--repair' is made @@ -502,7 +626,7 @@ original file name ends with one of the extensions '.tar.lz', '.lz' or File: lziprecover.info, Node: File format, Next: Trailing data, Prev: File names, Up: Top -7 File format +8 File format ************* Perfection is reached, not when there is no longer anything to add, but @@ -544,11 +668,11 @@ additional information before, between, or after them. 'DS (coded dictionary size, 1 byte)' The dictionary size is calculated by taking a power of 2 (the base - size) and substracting from it a fraction between 0/16 and 7/16 of + size) and subtracting from it a fraction between 0/16 and 7/16 of the base size. Bits 4-0 contain the base 2 logarithm of the base size (12 to 29). - Bits 7-5 contain the numerator of the fraction (0 to 7) to - substract from the base size to obtain the dictionary size. + Bits 7-5 contain the numerator of the fraction (0 to 7) to subtract + from the base size to obtain the dictionary size. Example: 0xD3 = 2^19 - 6 * 2^15 = 512 KiB - 6 * 32 KiB = 320 KiB Valid values for dictionary size range from 4 KiB to 512 MiB. @@ -573,7 +697,7 @@ additional information before, between, or after them. File: lziprecover.info, Node: Trailing data, Next: Examples, Prev: File format, Up: Top -8 Extra data appended to the file +9 Extra data appended to the file ********************************* Sometimes extra data are found appended to a lzip file after the last @@ -626,11 +750,11 @@ Example 1: Add a comment or description to a compressed file. # First append the comment as trailing data to a lzip file echo 'This file contains this and that' >> file.lz # This command prints the comment to standard output - lziprecover --dump-tdata file.lz + lziprecover --dump=tdata file.lz # This command outputs file.lz without the comment - lziprecover --strip-tdata file.lz + lziprecover --strip=tdata file.lz # This command removes the comment from file.lz - lziprecover --remove-tdata file.lz + lziprecover --remove=tdata file.lz Example 2: Add and verify a cryptographically secure hash. (This may be @@ -639,14 +763,14 @@ to guarantee that both file and hash have not been maliciously replaced). sha256sum < file.lz >> file.lz - lziprecover --strip-tdata file.lz | sha256sum -c \ - <(lziprecover --dump-tdata file.lz) + lziprecover --strip=tdata file.lz | sha256sum -c \ + <(lziprecover --dump=tdata file.lz) File: lziprecover.info, Node: Examples, Next: Unzcrash, Prev: Trailing data, Up: Top -9 A small tutorial with examples -******************************** +10 A small tutorial with examples +********************************* Example 1: Restore a regular file from its compressed version 'file.lz'. If the operation is successful, 'file.lz' is removed. @@ -667,6 +791,10 @@ or more compressed files. *Note Trailing data::. cat file1.lz file2.lz file3.lz | lziprecover -d Do this instead lziprecover -cd file1.lz file2.lz file3.lz + You may also concatenate the compressed files like this + lziprecover --strip=tdata file1.lz file2.lz file3.lz > file123.lz + Or keeping the trailing data of the last file like this + lziprecover --strip=damaged file1.lz file2.lz file3.lz > file123.lz Example 4: Decompress 'file.lz' partially until 10 KiB of decompressed @@ -700,8 +828,8 @@ integrity of the resulting files. Example 8: Recover a compressed backup from two copies on CD-ROM with -error-checked merging of copies. (*Note GNU ddrescue manual: -(ddrescue)Top, for details about ddrescue). +error-checked merging of copies. *Note GNU ddrescue manual: +(ddrescue)Top, for details about ddrescue. ddrescue -d -r1 -b2048 /dev/cdrom cdimage1 mapfile1 mount -t iso9660 -o loop,ro cdimage1 /mnt/cdimage @@ -732,7 +860,7 @@ correct file produced is saved in 'big_db_00001.lz'. File: lziprecover.info, Node: Unzcrash, Next: Problems, Prev: Examples, Up: Top -10 Testing the robustness of decompressors +11 Testing the robustness of decompressors ****************************************** The lziprecover package also includes unzcrash, a program written to @@ -775,14 +903,17 @@ after the last byte of a member, producing a shorter but valid compressed file. Except in this latter case, please, report any false negative as a bug. - In order to compare the outputs, unzcrash needs a 'zcmp' program -able to understand the format being tested. For example the one provided -by 'zutils'. *Note Zcmp: (zutils)Zcmp, + In order to compare the outputs, unzcrash needs a 'zcmp' program able +to understand the format being tested. For example the 'zcmp' provided +by 'zutils'. *Note Zcmp: (zutils)Zcmp. The format for running unzcrash is: unzcrash [OPTIONS] 'lzip -t' FILE.lz +FILE.lz must not contain errors and must be correctly decompressed by +the decompressor being tested for the comparisons to work. + unzcrash supports the following options: '-h' @@ -792,7 +923,7 @@ by 'zutils'. *Note Zcmp: (zutils)Zcmp, '-V' '--version' Print the version number of unzcrash on the standard output and - exit. + exit. This version number should be included in all bug reports. '-b RANGE' '--bits=RANGE' @@ -868,7 +999,10 @@ by 'zutils'. *Note Zcmp: (zutils)Zcmp, '-z' '--zcmp=<command>' Set zcmp command name and options. Defaults to 'zcmp'. Use - '--zcmp=false' to disable comparisons. + '--zcmp=false' to disable comparisons. If testing a decompressor + different from the one used by default by zcmp, it is needed to + force unzcrash and zcmp to use the same decompressor with a + command like 'unzcrash --zcmp='zcmp --lz=plzip' 'plzip -t' FILE.lz' Exit status: 0 for a normal exit, 1 for environmental problems (file @@ -879,7 +1013,7 @@ caused unzcrash to panic. File: lziprecover.info, Node: Problems, Next: Concept index, Prev: Unzcrash, Up: Top -11 Reporting bugs +12 Reporting bugs ***************** There are probably bugs in lziprecover. There are certainly errors and @@ -911,6 +1045,7 @@ Concept index * merging files: Merging files. (line 6) * options: Invoking lziprecover. (line 6) * repairing files: Repairing files. (line 6) +* tarlz: Tarlz. (line 6) * trailing data: Trailing data. (line 6) * unzcrash: Unzcrash. (line 6) * usage: Invoking lziprecover. (line 6) @@ -920,21 +1055,22 @@ Concept index Tag Table: Node: Top231 -Node: Introduction1273 -Node: Invoking lziprecover4650 -Ref: --trailing-error5300 -Node: Data safety14832 -Node: Repairing files16783 -Node: Merging files18706 -Node: File names21468 -Node: File format21932 -Node: Trailing data24360 -Node: Examples27595 -Ref: concat-example28026 -Ref: ddrescue-example29127 -Node: Unzcrash30417 -Node: Problems36055 -Node: Concept index36607 +Node: Introduction1335 +Node: Invoking lziprecover4918 +Ref: --trailing-error5628 +Node: Data safety18371 +Node: Repairing files20322 +Node: Merging files22245 +Node: Tarlz25002 +Node: File names27857 +Node: File format28313 +Node: Trailing data30739 +Node: Examples33974 +Ref: concat-example34407 +Ref: ddrescue-example35778 +Node: Unzcrash37066 +Node: Problems43130 +Node: Concept index43682 End Tag Table diff --git a/doc/lziprecover.texi b/doc/lziprecover.texi index a15e710..731515b 100644 --- a/doc/lziprecover.texi +++ b/doc/lziprecover.texi @@ -6,8 +6,8 @@ @finalout @c %**end of header -@set UPDATED 12 February 2018 -@set VERSION 1.20 +@set UPDATED 4 January 2019 +@set VERSION 1.21 @dircategory Data Compression @direntry @@ -40,6 +40,7 @@ This manual is for Lziprecover (version @value{VERSION}, @value{UPDATED}). * Data safety:: Protecting data from accidental loss * Repairing files:: Fixing bit flips and similar errors * Merging files:: Fixing several damaged copies +* Tarlz:: Options supporting the tar.lz format * File names:: Names of the files produced by lziprecover * File format:: Detailed format of the compressed file * Trailing data:: Extra data appended to the file @@ -50,7 +51,7 @@ This manual is for Lziprecover (version @value{VERSION}, @value{UPDATED}). @end menu @sp 1 -Copyright @copyright{} 2009-2018 Antonio Diaz Diaz. +Copyright @copyright{} 2009-2019 Antonio Diaz Diaz. This manual is free documentation: you have unlimited permission to copy, distribute and modify it. @@ -60,15 +61,22 @@ to copy, distribute and modify it. @chapter Introduction @cindex introduction -Lziprecover is a data recovery tool and decompressor for files in the -lzip compressed data format (.lz). Lziprecover is able to repair -slightly damaged files, produce a correct file by merging the good parts -of two or more damaged copies, extract data from damaged files, -decompress files and test integrity of files. +@uref{http://www.nongnu.org/lzip/lziprecover.html,,Lziprecover} is a +data recovery tool and decompressor for files in the lzip compressed +data format (.lz). Lziprecover is able to repair slightly damaged files, +produce a correct file by merging the good parts of two or more damaged +copies, extract data from damaged files, decompress files and test +integrity of files. + +Lziprecover can remove the damaged members from multimember files, for +example multimember tar.lz archives. Lziprecover provides random access to the data in multimember files; it only decompresses the members containing the desired data. +Lziprecover facilitates the management of metadata stored as trailing +data in lzip files. + Lziprecover is not a replacement for regular backups, but a last line of defense for the case where the backups are also damaged. @@ -126,7 +134,7 @@ lziprecover -D0 -i -o file -q file.lz When recovering data, lziprecover takes as arguments the names of the damaged files and writes zero or more recovered files depending on the operation selected and whether the recovery succeeded or not. The -damaged files themselves are never modified. +damaged files themselves are kept unchanged. When decompressing or testing file integrity, lziprecover behaves like lzip or lunzip. @@ -164,6 +172,7 @@ Print an informative help message describing the options and exit. @item -V @itemx --version Print the version number of lziprecover on the standard output and exit. +This version number should be included in all bug reports. @anchor{--trailing-error} @item -a @@ -227,12 +236,15 @@ Force overwrite of output files. @item -i @itemx --ignore-errors -Make @samp{--range-decompress} ignore data errors and continue -decompressing the remaining members in the file. For example, +Make @samp{--range-decompress} ignore errors and continue decompressing +the remaining members in the file. For example, @w{@samp{lziprecover -D0 -i file.lz > file}} decompresses all the recoverable data in all members of @samp{file.lz} without having to split it first. +Make @samp{--list}, @samp{--dump}, @samp{--remove} and @samp{--strip} +ignore format errors. + @item -k @itemx --keep Keep (don't delete) input files during decompression. @@ -246,18 +258,22 @@ final line containing the cumulative sizes is printed. With @samp{-v}, the dictionary size, the number of members in the file, and the amount of trailing data (if any) are also printed. With @samp{-vv}, the positions and sizes of each member in multimember files are also -printed. @samp{-lq} can be used to verify quickly (without -decompressing) the structural integrity of the specified files. (Use -@samp{--test} to verify the data integrity). @samp{-alq} additionally -verifies that none of the specified files contain trailing data. +printed. With @samp{-i}, format errors are ignored, and with +@samp{-ivv}, gaps between members are shown. The member numbers shown +coincide with the file numbers produced by @samp{--split}. + +@samp{-lq} can be used to verify quickly (without decompressing) the +structural integrity of the specified files. (Use @samp{--test} to +verify the data integrity). @samp{-alq} additionally verifies that none +of the specified files contain trailing data. @item -m @itemx --merge Try to produce a correct file by merging the good parts of two or more damaged copies. If successful, a repaired copy is written to the file @samp{@var{file}_fixed.lz}. The exit status is 0 if a correct file could -be produced, 2 otherwise. See the chapter @samp{Merging files} -(@pxref{Merging files}) for a complete description of the merge mode. +be produced, 2 otherwise. @xref{Merging files}, for a complete +description of the merge mode. @item -o @var{file} @itemx --output=@var{file} @@ -280,16 +296,21 @@ Quiet operation. Suppress all messages. Try to repair a file with small errors (up to one single-byte error per member). If successful, a repaired copy is written to the file @samp{@var{file}_fixed.lz}. @samp{@var{file}} is not modified at all. -The exit status is 0 if the file could be repaired, 2 otherwise. See the -chapter @samp{Repairing files} (@pxref{Repairing files}) for a complete -description of the repair mode. +The exit status is 0 if the file could be repaired, 2 otherwise. +@xref{Repairing files}, for a complete description of the repair mode. @item -s @itemx --split Search for members in @samp{@var{file}} and write each member in its own -@samp{.lz} file. You can then use @samp{lziprecover -t} to test the -integrity of the resulting files, decompress those which are undamaged, -and try to repair or partially decompress those which are damaged. +file. Gaps between members are detected and each gap is saved in its own +file. Trailing data (if any) are saved alone in the last file. You can +then use @samp{lziprecover -t} to test the integrity of the resulting +files, decompress those which are undamaged, and try to repair or +partially decompress those which are damaged. Gaps may contain garbage +or may be members with corrupt headers or trailers. If other lziprecover +functions fail to work on a multimember @var{file} because of damage in +headers or trailers, try to split @var{file} and then work on each +member individually. The names of the files produced are in the form @samp{rec01@var{file}}, @samp{rec02@var{file}}, etc, and are designed so that the use of @@ -326,33 +347,75 @@ bytes are so similar to the magic bytes of a lzip header that they can be confused with a corrupt header. Use this option if a file triggers a "corrupt header" error and the cause is not indeed a corrupt header. -@item --dump-tdata -Dump the trailing data (if any) of one or more regular files to standard +@item --dump=[@var{member_list}][:damaged][:tdata] +Dump the members listed, the damaged members (if any), or the trailing +data (if any) of one or more regular multimember files to standard output, or to a file if the @samp{--output} option is used. If more than -one file is given, the trailing data of all files are concatenated. If a -file does not exist, can't be opened, or is not regular, lziprecover -continues processing the rest of the files. If the dump fails in one -file, lziprecover exits immediately without processing the rest of the -files. - -@item --remove-tdata -Remove the trailing data from regular files in place. The date of each -file is preserved if possible. If the removal fails in one file, -lziprecover continues processing the rest of the files. This option may -be dangerous if the file is corrupt or if the trailing data contain a -forbidden combination of characters. @xref{Trailing data}. Verify that -@w{@samp{lzip -cd file.lz | wc -c}} and the uncompressed size shown by -@w{@samp{lzip -l file.lz}} match before attempting the removal. - -@item --strip-tdata -Copy one or more regular files to standard output (or to a file if the -@samp{--output} option is used), stripping the trailing data (if any) -from each file. If more than one file is given, the files are -concatenated. If a file does not exist, can't be opened, or is not -regular, lziprecover continues processing the rest of the files. If a -file fails to copy, lziprecover exits immediately without processing the +one file is given, the elements dumped from all files are concatenated. +If a file does not exist, can't be opened, or is not regular, +lziprecover continues processing the rest of the files. If the dump +fails in one file, lziprecover exits immediately without processing the rest of the files. +The argument to @samp{--dump} is a colon-separated list of the following +element specifiers; a member list (1,3-6), a reverse member list +(r1,3-6), and the strings "damaged" and "tdata" (which may be shortened +to 'd' and 't' respectively). A member list selects the members (or +gaps) listed, whose numbers coincide with those shown by @samp{--list}. +A reverse member list selects the members listed counting from the last +member in the file (r1). Negated versions of both kinds of lists exist +(^1,3-6:r^1,3-6) which selects all the members except those in the list. +The strings "damaged" and "tdata" select the damaged members and the +trailing data respectively. If the same member is selected more than +once, for example by @code{1:r1} in a single-member file, it is dumped +just once. See the following examples: + +@multitable {@code{3,12:damaged:tdata}} {members 3, 12, damaged members, trailing data} +@headitem @code{--dump} argument @tab Elements dumped +@item @code{1,3-6} @tab members 1, 3, 4, 5 and 6 +@item @code{r1-3} @tab last 3 members in file +@item @code{^13,15} @tab all but 13th and 15th members in file +@item @code{r^1} @tab all but last member in file +@item @code{damaged} @tab all damaged members in file +@item @code{tdata} @tab trailing data +@item @code{1-5:r1:tdata} @tab members 1 to 5, last member, trailing data +@item @code{damaged:tdata} @tab damaged members, trailing data +@item @code{3,12:damaged:tdata} @tab members 3, 12, damaged members, trailing data +@end multitable + +@item --remove=[@var{member_list}][:damaged][:tdata] +Remove the members listed, the damaged members (if any), or the trailing +data (if any) from regular multimember files in place. The date of each +file is preserved if possible. If all members in a file are selected to +be removed, the file is left unchanged and the exit status is set to 2. +If a file does not exist, can't be opened, is not regular, or is left +unchanged, lziprecover continues processing the rest of the files. In +case of I/O error, lziprecover exits immediately without processing the +rest of the files. See @samp{--dump} above for a description of the +argument. + +This option may be dangerous even if only the trailing data is being +removed because the file may be corrupt or the trailing data may contain +a forbidden combination of characters. @xref{Trailing data}. It is +advisable to make a backup before attempting the removal. At least +verify that @w{@samp{lzip -cd file.lz | wc -c}} and the uncompressed +size shown by @w{@samp{lzip -l file.lz}} match before attempting the +removal of trailing data. + +@item --strip=[@var{member_list}][:damaged][:tdata] +Copy one or more regular multimember files to standard output (or to a +file if the @samp{--output} option is used), stripping the members +listed, the damaged members (if any), or the trailing data (if any) from +each file. If all members in a file are selected to be stripped, the +trailing data (if any) are also stripped even if @samp{tdata} is not +specified. If more than one file is given, the files are concatenated. +In this case the trailing data are also stripped from all but the last +file even if @samp{tdata} is not specified. If a file does not exist, +can't be opened, or is not regular, lziprecover continues processing the +rest of the files. If a file fails to copy, lziprecover exits +immediately without processing the rest of the files. See @samp{--dump} +above for a description of the argument. + @end table Numbers given as arguments to options may be followed by a multiplier @@ -521,6 +584,80 @@ than the number of corrupt bytes (3104) because contiguous corrupt bytes are counted as a single multibyte error. +@node Tarlz +@chapter Options supporting the tar.lz format +@cindex tarlz + +@uref{http://www.nongnu.org/lzip/manual/tarlz_manual.html,,Tarlz} is an +implementation of the tar archiver which by default creates archives +compressed with lzip on a per file basis. Tarlz can append files to the +end of such compressed archives because each tar member is compressed in +its own lzip member, as well as the end-of-file blocks. Thus tarlz +archives are multimember lzip files, which has some safety advantages +over solidly compressed tar.lz archives. For example, in case of +corruption, tarlz can extract all the undamaged members from the tar.lz +archive, skipping over the damaged members, just like the standard +(uncompressed) tar. In this chapter we'll explain the ways in which +lziprecover can recover and process multimember tar.lz archives. +@ifnothtml +@xref{Top,tarlz manual,,tarlz}. +@end ifnothtml + +@sp 1 +@section Recovering damaged multimember tar.lz archives + +If you have several copies of the damaged archive, try merging +them first because merging has a high probability of success. If the +command below prints something like +@w{@code{Input files merged successfully.}} you are done and +@code{archive.tar.lz} now contains the recovered archive: + +@example +lziprecover -m -v -o archive.tar.lz a/archive.tar.lz b/archive.tar.lz +@end example + +If you only have one copy of the damaged archive, you may try to repair +the archive, but this has a lower probability of success. If the command +below prints something like +@w{@code{Copy of input file repaired successfully.}} you are done and +@code{archive_fixed.tar.lz} now contains the recovered archive: + +@example +lziprecover -v -R archive.tar.lz +@end example + +If all the above fails, you may save the damaged members for later and +then copy the good members to another archive. If the two commands below +succeed, @code{bad_members.tar.lz} will contain all the damaged members +and @code{archive_cleaned.tar.lz} will contain a good archive with the +damaged members removed: + +@example +lziprecover -v --dump=damaged -o bad_members.tar.lz archive.tar.lz +lziprecover -v --strip=damaged -o archive_cleaned.tar.lz archive.tar.lz +@end example + +You can then use @code{tarlz --keep-damaged} to recover as much data as +possible from each damaged member in @samp{bad_members.tar.lz}: + +@example +mkdir tmp +cd tmp +tarlz --keep-damaged -xvf ../bad_members.tar.lz +@end example + +@sp 1 +@section Processing multimember tar.lz archives + +Lziprecover is able to copy a list of members from a file to another. +For example the command +@w{@code{lziprecover --dump=1-10:r1:tdata archive.tar.lz > subarch.tar.lz}} +creates a subset archive containing the first ten members, the +end-of-file blocks, and the trailing data (if any) of +@code{archive.tar.lz}. The @code{r1} part selects the last member, which +in an appendable tar.lz archive contains the end-of-file blocks. + + @node File names @chapter Names of the files produced by lziprecover @cindex file names @@ -581,10 +718,10 @@ Just in case something needs to be modified in the future. 1 for now. @item DS (coded dictionary size, 1 byte) The dictionary size is calculated by taking a power of 2 (the base size) -and substracting from it a fraction between 0/16 and 7/16 of the base +and subtracting from it a fraction between 0/16 and 7/16 of the base size.@* Bits 4-0 contain the base 2 logarithm of the base size (12 to 29).@* -Bits 7-5 contain the numerator of the fraction (0 to 7) to substract +Bits 7-5 contain the numerator of the fraction (0 to 7) to subtract from the base size to obtain the dictionary size.@* Example: 0xD3 = 2^19 - 6 * 2^15 = 512 KiB - 6 * 32 KiB = 320 KiB@* Valid values for dictionary size range from 4 KiB to 512 MiB. @@ -675,11 +812,11 @@ Example 1: Add a comment or description to a compressed file. # First append the comment as trailing data to a lzip file echo 'This file contains this and that' >> file.lz # This command prints the comment to standard output -lziprecover --dump-tdata file.lz +lziprecover --dump=tdata file.lz # This command outputs file.lz without the comment -lziprecover --strip-tdata file.lz +lziprecover --strip=tdata file.lz # This command removes the comment from file.lz -lziprecover --remove-tdata file.lz +lziprecover --remove=tdata file.lz @end example @sp 1 @@ -690,8 +827,8 @@ to guarantee that both file and hash have not been maliciously replaced). @example sha256sum < file.lz >> file.lz -lziprecover --strip-tdata file.lz | sha256sum -c \ - <(lziprecover --dump-tdata file.lz) +lziprecover --strip=tdata file.lz | sha256sum -c \ + <(lziprecover --dump=tdata file.lz) @end example @@ -727,6 +864,10 @@ Don't do this cat file1.lz file2.lz file3.lz | lziprecover -d Do this instead lziprecover -cd file1.lz file2.lz file3.lz +You may also concatenate the compressed files like this + lziprecover --strip=tdata file1.lz file2.lz file3.lz > file123.lz +Or keeping the trailing data of the last file like this + lziprecover --strip=damaged file1.lz file2.lz file3.lz > file123.lz @end example @sp 1 @@ -777,13 +918,13 @@ lziprecover -tv rec*file.lz Example 8: Recover a compressed backup from two copies on CD-ROM with error-checked merging of copies. @ifnothtml -(@xref{Top,GNU ddrescue manual,,ddrescue}, +@xref{Top,GNU ddrescue manual,,ddrescue}, @end ifnothtml @ifhtml -(See the +See the @uref{http://www.gnu.org/software/ddrescue/manual/ddrescue_manual.html,,ddrescue manual} @end ifhtml -for details about ddrescue). +for details about ddrescue. @example ddrescue -d -r1 -b2048 /dev/cdrom cdimage1 mapfile1 @@ -862,15 +1003,15 @@ after the last byte of a member, producing a shorter but valid compressed file. Except in this latter case, please, report any false negative as a bug. -In order to compare the outputs, unzcrash needs a @samp{zcmp} program -able to understand the format being tested. For example the one provided +In order to compare the outputs, unzcrash needs a @samp{zcmp} program able +to understand the format being tested. For example the @samp{zcmp} provided by @samp{zutils}. @ifnothtml -@xref{Zcmp,,,zutils}, +@xref{Zcmp,,,zutils}. @end ifnothtml @ifhtml See -@uref{http://www.nongnu.org/zutils/manual/zutils_manual.html#Zcmp,,zcmp} +@uref{http://www.nongnu.org/zutils/manual/zutils_manual.html#Zcmp,,zcmp}. @end ifhtml The format for running unzcrash is: @@ -879,6 +1020,10 @@ The format for running unzcrash is: unzcrash [@var{options}] 'lzip -t' @var{file}.lz @end example +@noindent +@var{file}.lz must not contain errors and must be correctly decompressed +by the decompressor being tested for the comparisons to work. + unzcrash supports the following options: @table @code @@ -889,6 +1034,7 @@ Print an informative help message describing the options and exit. @item -V @itemx --version Print the version number of unzcrash on the standard output and exit. +This version number should be included in all bug reports. @item -b @var{range} @itemx --bits=@var{range} @@ -966,7 +1112,10 @@ Verbose mode. @item -z @itemx --zcmp=<command> Set zcmp command name and options. Defaults to @code{zcmp}. Use -@code{--zcmp=false} to disable comparisons. +@code{--zcmp=false} to disable comparisons. If testing a decompressor +different from the one used by default by zcmp, it is needed to force +unzcrash and zcmp to use the same decompressor with a command like +@w{@code{unzcrash --zcmp='zcmp --lz=plzip' 'plzip -t' @var{file}.lz}} @end table diff --git a/dump_remove.cc b/dump_remove.cc new file mode 100644 index 0000000..7bbe829 --- /dev/null +++ b/dump_remove.cc @@ -0,0 +1,288 @@ +/* Lziprecover - Data recovery tool for the lzip format + Copyright (C) 2009-2019 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#define _FILE_OFFSET_BITS 64 + +#include <cerrno> +#include <cstdio> +#include <cstring> +#include <string> +#include <vector> +#include <stdint.h> +#include <unistd.h> +#include <utime.h> +#include <sys/stat.h> + +#include "lzip.h" +#include "lzip_index.h" + + +// If strip is false, dump to outfd members/gaps/tdata in member_list. +// If strip is true, dump to outfd members/gaps/tdata not in member_list. +int dump_members( const std::vector< std::string > & filenames, + const std::string & default_output_filename, + const Member_list & member_list, const bool force, + bool ignore_errors, bool ignore_trailing, + const bool loose_trailing, const bool strip ) + { + if( default_output_filename.empty() ) outfd = STDOUT_FILENO; + else + { + output_filename = default_output_filename; + set_signal_handler(); + if( !open_outstream( force, true, false, false ) ) return 1; + } + unsigned long long copied_size = 0, stripped_size = 0; + unsigned long long copied_tsize = 0, stripped_tsize = 0; + long members = 0, smembers = 0; + int files = 0, tfiles = 0, retval = 0; + if( member_list.damaged ) ignore_errors = true; + if( member_list.tdata ) ignore_trailing = true; + bool stdin_used = false; + for( unsigned i = 0; i < filenames.size(); ++i ) + { + const bool from_stdin = ( filenames[i] == "-" ); + if( from_stdin ) { if( stdin_used ) continue; else stdin_used = true; } + const char * const input_filename = + from_stdin ? "(stdin)" : filenames[i].c_str(); + struct stat in_stats; // not used + const int infd = from_stdin ? STDIN_FILENO : + open_instream( input_filename, &in_stats, true, true ); + if( infd < 0 ) { if( retval < 1 ) retval = 1; continue; } + + const Lzip_index lzip_index( infd, ignore_trailing, loose_trailing, + ignore_errors, ignore_errors ); + if( lzip_index.retval() != 0 ) + { + show_file_error( input_filename, lzip_index.error().c_str() ); + if( retval < lzip_index.retval() ) retval = lzip_index.retval(); + close( infd ); + continue; + } + if( !safe_seek( infd, 0 ) ) cleanup_and_fail( 1 ); + const long blocks = lzip_index.blocks( false ); // not counting tdata + long long stream_pos = 0; // first pos not yet read from file + long gaps = 0; + const long prev_members = members, prev_smembers = smembers; + const unsigned long long prev_stripped_size = stripped_size; + for( long j = 0; j < lzip_index.members(); ++j ) // copy members and gaps + { + const Block & mb = lzip_index.mblock( j ); + if( mb.pos() > stream_pos ) // gap + { + const bool in = member_list.damaged || + member_list.includes( j + gaps, blocks ); + if( in == !strip ) + { + if( !safe_seek( infd, stream_pos ) || + !copy_file( infd, outfd, mb.pos() - stream_pos ) ) + cleanup_and_fail( 1 ); + copied_size += mb.pos() - stream_pos; ++members; + } + else { stripped_size += mb.pos() - stream_pos; ++smembers; } + ++gaps; + } + bool in = member_list.includes( j + gaps, blocks ); // member + if( !in && member_list.damaged ) + { + if( !safe_seek( infd, mb.pos() ) ) cleanup_and_fail( 1 ); + in = ( test_member_from_file( infd, mb.size() ) != 0 ); // damaged + } + if( in == !strip ) + { + if( !safe_seek( infd, mb.pos() ) || + !copy_file( infd, outfd, mb.size() ) ) cleanup_and_fail( 1 ); + copied_size += mb.size(); ++members; + } + else { stripped_size += mb.size(); ++smembers; } + stream_pos = mb.end(); + } + if( strip && members == prev_members ) // all members were stripped + { if( verbosity >= 1 ) + show_file_error( input_filename, "All members stripped, skipping." ); + stripped_size = prev_stripped_size; smembers = prev_smembers; + close( infd ); continue; } + if( ( !strip && members > prev_members ) || + ( strip && smembers > prev_smembers ) ) ++files; + // copy trailing data + const unsigned long long cdata_size = lzip_index.cdata_size(); + const long long trailing_size = lzip_index.file_size() - cdata_size; + if( member_list.tdata == !strip && trailing_size > 0 && + ( !strip || i + 1 >= filenames.size() ) ) // strip all but last + { + if( !safe_seek( infd, cdata_size ) || + !copy_file( infd, outfd, trailing_size ) ) cleanup_and_fail( 1 ); + copied_tsize += trailing_size; + } + else if( trailing_size > 0 ) { stripped_tsize += trailing_size; ++tfiles; } + close( infd ); + } + if( close_outstream( 0 ) != 0 && retval < 1 ) retval = 1; + if( verbosity >= 1 ) + { + if( !strip ) + { + if( member_list.damaged || member_list.range() ) + std::fprintf( stderr, "%llu bytes dumped from %ld %s from %d %s.\n", + copied_size, + members, ( members == 1 ) ? "member" : "members", + files, ( files == 1 ) ? "file" : "files" ); + if( member_list.tdata ) + std::fprintf( stderr, "%llu trailing bytes dumped.\n", copied_tsize ); + } + else + { + if( member_list.damaged || member_list.range() ) + std::fprintf( stderr, "%llu bytes stripped from %ld %s from %d %s.\n", + stripped_size, + smembers, ( smembers == 1 ) ? "member" : "members", + files, ( files == 1 ) ? "file" : "files" ); + if( member_list.tdata ) + std::fprintf( stderr, "%llu trailing bytes stripped from %d %s.\n", + stripped_tsize, tfiles, ( tfiles == 1 ) ? "file" : "files" ); + } + } + return retval; + } + + +int remove_members( const std::vector< std::string > & filenames, + const Member_list & member_list, bool ignore_errors, + bool ignore_trailing, const bool loose_trailing ) + { + unsigned long long removed_size = 0, removed_tsize = 0; + long members = 0; + int files = 0, tfiles = 0, retval = 0; + if( member_list.damaged ) ignore_errors = true; + if( member_list.tdata ) ignore_trailing = true; + for( unsigned i = 0; i < filenames.size(); ++i ) + { + const char * const filename = filenames[i].c_str(); + struct stat in_stats, dummy_stats; + const int infd = open_instream( filename, &in_stats, true, true ); + if( infd < 0 ) { if( retval < 1 ) retval = 1; continue; } + + const Lzip_index lzip_index( infd, ignore_trailing, loose_trailing, + ignore_errors, ignore_errors ); + if( lzip_index.retval() != 0 ) + { + show_file_error( filename, lzip_index.error().c_str() ); + if( retval < lzip_index.retval() ) retval = lzip_index.retval(); + close( infd ); + continue; + } + const int fd = open_truncable_stream( filename, &dummy_stats ); + if( fd < 0 ) { close( infd ); if( retval < 1 ) retval = 1; continue; } + + if( !safe_seek( infd, 0 ) ) return 1; + const long blocks = lzip_index.blocks( false ); // not counting tdata + long long stream_pos = 0; // first pos not yet written to file + long gaps = 0; + bool error = false; + const long prev_members = members; + for( long j = 0; j < lzip_index.members(); ++j ) // copy members and gaps + { + const Block & mb = lzip_index.mblock( j ); + const long long prev_end = (j > 0) ? lzip_index.mblock(j - 1).end() : 0; + if( mb.pos() > prev_end ) // gap + { + if( !member_list.damaged && !member_list.includes( j + gaps, blocks ) ) + { + if( stream_pos != prev_end && + ( !safe_seek( infd, prev_end ) || + !safe_seek( fd, stream_pos ) || + !copy_file( infd, fd, mb.pos() - prev_end ) ) ) + { error = true; if( retval < 1 ) retval = 1; break; } + stream_pos += mb.pos() - prev_end; + } + else ++members; + ++gaps; + } + bool in = member_list.includes( j + gaps, blocks ); // member + if( !in && member_list.damaged ) + { + if( !safe_seek( infd, mb.pos() ) ) + { error = true; if( retval < 1 ) retval = 1; break; } + in = ( test_member_from_file( infd, mb.size() ) != 0 ); // damaged + } + if( !in ) + { + if( stream_pos != mb.pos() && + ( !safe_seek( infd, mb.pos() ) || + !safe_seek( fd, stream_pos ) || + !copy_file( infd, fd, mb.size() ) ) ) + { error = true; if( retval < 1 ) retval = 1; break; } + stream_pos += mb.size(); + } + else ++members; + } + if( error ) { close( fd ); close( infd ); break; } + if( stream_pos == 0 ) // all members were removed + { show_file_error( filename, "All members would be removed, skipping." ); + close( fd ); close( infd ); if( retval < 2 ) retval = 2; + members = prev_members; continue; } + const long long cdata_size = lzip_index.cdata_size(); + if( cdata_size > stream_pos ) + { removed_size += cdata_size - stream_pos; ++files; } + const long long file_size = lzip_index.file_size(); + const long long trailing_size = file_size - cdata_size; + if( trailing_size > 0 ) + { + if( !member_list.tdata ) // copy trailing data + { + if( stream_pos != cdata_size && + ( !safe_seek( infd, cdata_size ) || + !safe_seek( fd, stream_pos ) || + !copy_file( infd, fd, trailing_size ) ) ) + { close( fd ); close( infd ); if( retval < 1 ) retval = 1; break; } + stream_pos += trailing_size; + } + else { removed_tsize += trailing_size; ++tfiles; } + } + if( stream_pos >= file_size ) // no members were removed + { close( fd ); close( infd ); continue; } + int result; + do result = ftruncate( fd, stream_pos ); + while( result != 0 && errno == EINTR ); + if( result != 0 ) + { + show_file_error( filename, "Can't truncate file", errno ); + close( fd ); close( infd ); if( retval < 1 ) retval = 1; break; + } + if( close( fd ) != 0 || close( infd ) != 0 ) + { + show_file_error( filename, "Error closing file", errno ); + if( retval < 1 ) { retval = 1; } break; + } + struct utimbuf t; + t.actime = in_stats.st_atime; + t.modtime = in_stats.st_mtime; + utime( filename, &t ); + } + if( verbosity >= 1 ) + { + if( member_list.damaged || member_list.range() ) + std::fprintf( stderr, "%llu bytes removed from %ld %s from %d %s.\n", + removed_size, + members, ( members == 1 ) ? "member" : "members", + files, ( files == 1 ) ? "file" : "files" ); + if( member_list.tdata ) + std::fprintf( stderr, "%llu trailing bytes removed from %d %s.\n", + removed_tsize, tfiles, ( tfiles == 1 ) ? "file" : "files" ); + } + return retval; + } @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2018 Antonio Diaz Diaz. + Copyright (C) 2009-2019 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -26,8 +26,7 @@ #include <sys/stat.h> #include "lzip.h" -#include "block.h" -#include "file_index.h" +#include "lzip_index.h" namespace { @@ -49,6 +48,7 @@ void list_line( const unsigned long long uncomp_size, int list_files( const std::vector< std::string > & filenames, + const bool ignore_errors, const bool ignore_trailing, const bool loose_trailing ) { unsigned long long total_comp = 0, total_uncomp = 0; @@ -66,18 +66,19 @@ int list_files( const std::vector< std::string > & filenames, open_instream( input_filename, &in_stats, true, true ); if( infd < 0 ) { if( retval < 1 ) retval = 1; continue; } - const File_index file_index( infd, false, ignore_trailing, loose_trailing ); + const Lzip_index lzip_index( infd, ignore_trailing, loose_trailing, + ignore_errors, ignore_errors ); close( infd ); - if( file_index.retval() != 0 ) + if( lzip_index.retval() != 0 ) { - show_file_error( input_filename, file_index.error().c_str() ); - if( retval < file_index.retval() ) retval = file_index.retval(); + show_file_error( input_filename, lzip_index.error().c_str() ); + if( retval < lzip_index.retval() ) retval = lzip_index.retval(); continue; } if( verbosity >= 0 ) { - const unsigned long long udata_size = file_index.udata_size(); - const unsigned long long cdata_size = file_index.cdata_size(); + const unsigned long long udata_size = lzip_index.udata_size(); + const unsigned long long cdata_size = lzip_index.cdata_size(); total_comp += cdata_size; total_uncomp += udata_size; ++files; if( first_post ) { @@ -88,24 +89,32 @@ int list_files( const std::vector< std::string > & filenames, if( verbosity >= 1 ) { unsigned dictionary_size = 0; - for( long i = 0; i < file_index.members(); ++i ) + for( long i = 0; i < lzip_index.members(); ++i ) dictionary_size = - std::max( dictionary_size, file_index.dictionary_size( i ) ); - const long long trailing_size = file_index.file_size() - cdata_size; + std::max( dictionary_size, lzip_index.dictionary_size( i ) ); + const long long trailing_size = lzip_index.file_size() - cdata_size; std::printf( "%s %5ld %6lld ", format_ds( dictionary_size ), - file_index.members(), trailing_size ); + lzip_index.members(), trailing_size ); } list_line( udata_size, cdata_size, input_filename ); - if( verbosity >= 2 && file_index.members() > 1 ) + if( verbosity >= 2 && lzip_index.members() > 1 ) { std::fputs( " member data_pos data_size member_pos member_size\n", stdout ); - for( long i = 0; i < file_index.members(); ++i ) + long long prev_end = 0; + for( long i = 0, gaps = 0; i < lzip_index.members(); ++i ) { - const Block & db = file_index.dblock( i ); - const Block & mb = file_index.mblock( i ); + const Block & db = lzip_index.dblock( i ); + const Block & mb = lzip_index.mblock( i ); + if( mb.pos() > prev_end ) + { + std::printf( " gap - - %15llu %15llu\n", + prev_end, mb.pos() - prev_end ); + ++gaps; + } std::printf( "%5ld %15llu %15llu %15llu %15llu\n", - i + 1, db.pos(), db.size(), mb.pos(), mb.size() ); + i + gaps + 1, db.pos(), db.size(), mb.pos(), mb.size() ); + prev_end = mb.end(); } first_post = true; // reprint heading after list of members } @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2018 Antonio Diaz Diaz. + Copyright (C) 2009-2019 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -121,7 +121,7 @@ public: { const std::string & s = filenames[i]; const unsigned len = ( s == "-" ) ? stdin_name_len : s.size(); - if( len > longest_name ) longest_name = len; + if( longest_name < len ) longest_name = len; } if( longest_name == 0 ) longest_name = stdin_name_len; } @@ -140,7 +140,7 @@ public: if( filename.size() && filename != "-" ) name_ = filename; else name_ = stdin_name; padded_name = " "; padded_name += name_; padded_name += ": "; - if( name_.size() < longest_name ) + if( longest_name > name_.size() ) padded_name.append( longest_name - name_.size(), ' ' ); first_post = true; } @@ -198,30 +198,30 @@ inline int real_bits( unsigned value ) } -const uint8_t magic_string[4] = { 0x4C, 0x5A, 0x49, 0x50 }; // "LZIP" +const uint8_t lzip_magic[4] = { 0x4C, 0x5A, 0x49, 0x50 }; // "LZIP" -struct File_header +struct Lzip_header { uint8_t data[6]; // 0-3 magic bytes // 4 version // 5 coded_dict_size enum { size = 6 }; - void set_magic() { std::memcpy( data, magic_string, 4 ); data[4] = 1; } + void set_magic() { std::memcpy( data, lzip_magic, 4 ); data[4] = 1; } bool verify_magic() const - { return ( std::memcmp( data, magic_string, 4 ) == 0 ); } + { return ( std::memcmp( data, lzip_magic, 4 ) == 0 ); } bool verify_prefix( const int sz ) const // detect (truncated) header { for( int i = 0; i < sz && i < 4; ++i ) - if( data[i] != magic_string[i] ) return false; + if( data[i] != lzip_magic[i] ) return false; return ( sz > 0 ); } bool verify_corrupt() const // detect corrupt header { int matches = 0; for( int i = 0; i < 4; ++i ) - if( data[i] == magic_string[i] ) ++matches; + if( data[i] == lzip_magic[i] ) ++matches; return ( matches > 1 && matches < 4 ); } @@ -253,12 +253,11 @@ struct File_header }; -struct File_trailer +struct Lzip_trailer { uint8_t data[20]; // 0-3 CRC32 of the uncompressed data // 4-11 size of the uncompressed data // 12-19 member size including header and trailer - enum { size = 20 }; unsigned data_crc() const @@ -290,6 +289,20 @@ struct File_trailer void member_size( unsigned long long sz ) { for( int i = 12; i <= 19; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; } } + + bool verify_consistency() const // check internal consistency + { + const unsigned crc = data_crc(); + const unsigned long long dsize = data_size(); + if( ( crc == 0 ) != ( dsize == 0 ) ) return false; + const unsigned long long msize = member_size(); + if( msize < min_member_size ) return false; + const unsigned long long mlimit = ( 9 * dsize + 7 ) / 8 + min_member_size; + if( mlimit > dsize && msize > mlimit ) return false; + const unsigned long long dlimit = 7090 * ( msize - 26 ) - 1; + if( dlimit > msize && dsize > dlimit ) return false; + return true; + } }; @@ -310,6 +323,72 @@ struct Bad_byte }; +#ifndef INT64_MAX +#define INT64_MAX 0x7FFFFFFFFFFFFFFFLL +#endif + +class Block + { + long long pos_, size_; // pos + size <= INT64_MAX + +public: + Block( const long long p, const long long s ) : pos_( p ), size_( s ) {} + + long long pos() const { return pos_; } + long long size() const { return size_; } + long long end() const { return pos_ + size_; } + + void pos( const long long p ) { pos_ = p; } + void size( const long long s ) { size_ = s; } + + bool operator==( const Block & b ) const + { return pos_ == b.pos_ && size_ == b.size_; } + bool operator!=( const Block & b ) const + { return pos_ != b.pos_ || size_ != b.size_; } + + bool operator<( const Block & b ) const { return pos_ < b.pos_; } + + bool includes( const long long pos ) const + { return ( pos_ <= pos && end() > pos ); } + bool overlaps( const Block & b ) const + { return ( pos_ < b.end() && b.pos_ < end() ); } + + void shift( Block & b ) { ++size_; ++b.pos_; --b.size_; } + Block split( const long long pos ); + }; + + +struct Member_list // members/gaps/tdata to be dumped/removed/stripped + { + bool damaged; + bool tdata; + bool in, rin; + std::vector< Block > range_vector, rrange_vector; + + Member_list() : damaged( false ), tdata( false ), in( true ), rin( true ) {} + void parse( const char * p ); + + bool range() const { return range_vector.size() || rrange_vector.size(); } + + // blocks is the sum of members + gaps, excluding trailing data + bool includes( const long i, const long blocks ) const + { + for( unsigned j = 0; j < range_vector.size(); ++j ) + { + if( range_vector[j].pos() > i ) break; + if( range_vector[j].end() > i ) return in; + } + if( i >= 0 && i < blocks ) + for( unsigned j = 0; j < rrange_vector.size(); ++j ) + { + if( rrange_vector[j].pos() > blocks - i - 1 ) break; + if( rrange_vector[j].end() > blocks - i - 1 ) return rin; + } + return !in || !rin; + } + }; + + struct Error { const char * const msg; @@ -334,14 +413,25 @@ int alone_to_lz( const int infd, const Pretty_print & pp ); long readblock( const int fd, uint8_t * const buf, const long size ); long writeblock( const int fd, const uint8_t * const buf, const long size ); -// defined in file_index.cc -int seek_read( const int fd, uint8_t * const buf, const int size, - const long long pos ); +// defined in dump_remove.cc +int dump_members( const std::vector< std::string > & filenames, + const std::string & default_output_filename, + const Member_list & member_list, const bool force, + bool ignore_errors, bool ignore_trailing, + const bool loose_trailing, const bool strip ); +int remove_members( const std::vector< std::string > & filenames, + const Member_list & member_list, bool ignore_errors, + bool ignore_trailing, const bool loose_trailing ); // defined in list.cc int list_files( const std::vector< std::string > & filenames, + const bool ignore_errors, const bool ignore_trailing, const bool loose_trailing ); +// defined in lzip_index.cc +int seek_read( const int fd, uint8_t * const buf, const int size, + const long long pos ); + // defined in main.cc extern std::string output_filename; // global vars for output file extern int outfd; @@ -357,6 +447,7 @@ bool open_outstream( const bool force, const bool from_stdin, const bool rw = false, const bool skipping = true ); bool file_exists( const std::string & filename ); void cleanup_and_fail( const int retval ); +void set_signal_handler(); int close_outstream( const struct stat * const in_statsp ); std::string insert_fixed( std::string name ); void show_error( const char * const msg, const int errcode = 0, @@ -364,9 +455,7 @@ void show_error( const char * const msg, const int errcode = 0, void show_file_error( const char * const filename, const char * const msg, const int errcode = 0 ); void internal_error( const char * const msg ); -void show_error2( const char * const msg1, const char * const name, - const char * const msg2 ); -void show_error4( const char * const msg1, const char * const name1, +void show_2file_error( const char * const msg1, const char * const name1, const char * const name2, const char * const msg2 ); class Range_decoder; void show_dprogress( const unsigned long long cfile_size = 0, @@ -377,32 +466,31 @@ void show_dprogress( const unsigned long long cfile_size = 0, // defined in merge.cc bool copy_file( const int infd, const int outfd, const long long max_size = -1 ); -bool test_member_from_file( const int infd, const unsigned long long msize, - long long * const failure_posp = 0 ); +int test_member_from_file( const int infd, const unsigned long long msize, + long long * const failure_posp = 0 ); int merge_files( const std::vector< std::string > & filenames, const std::string & default_output_filename, - const bool force ); + const bool force, const char terminator ); // defined in range_dec.cc bool safe_seek( const int fd, const long long pos ); +int range_decompress( const std::string & input_filename, + const std::string & default_output_filename, + Block range, const bool force, const bool ignore_errors, + const bool ignore_trailing, const bool loose_trailing, + const bool to_stdout ); // defined in repair.cc int repair_file( const std::string & input_filename, const std::string & default_output_filename, - const bool force ); + const bool force, const char terminator ); +int debug_delay( const std::string & input_filename, Block range, + const char terminator ); int debug_repair( const std::string & input_filename, - const Bad_byte & bad_byte ); + const Bad_byte & bad_byte, const char terminator ); int debug_decompress( const std::string & input_filename, const Bad_byte & bad_byte, const bool show_packets ); // defined in split.cc -bool verify_header( const File_header & header, const Pretty_print & pp ); int split_file( const std::string & input_filename, const std::string & default_output_filename, const bool force ); - -// defined in trailing_data.cc -int dump_tdata( const std::vector< std::string > & filenames, - const std::string & default_output_filename, const bool force, - const bool strip, const bool loose_trailing ); -int remove_tdata( const std::vector< std::string > & filenames, - const bool loose_trailing ); diff --git a/file_index.cc b/lzip_index.cc index b3d7d70..f70307c 100644 --- a/file_index.cc +++ b/lzip_index.cc @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2018 Antonio Diaz Diaz. + Copyright (C) 2009-2019 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -27,8 +27,7 @@ #include <unistd.h> #include "lzip.h" -#include "block.h" -#include "file_index.h" +#include "lzip_index.h" int seek_read( const int fd, uint8_t * const buf, const int size, @@ -40,13 +39,13 @@ int seek_read( const int fd, uint8_t * const buf, const int size, } -void File_index::set_errno_error( const char * const msg ) +void Lzip_index::set_errno_error( const char * const msg ) { error_ = msg; error_ += std::strerror( errno ); retval_ = 1; } -void File_index::set_num_error( const char * const msg, unsigned long long num ) +void Lzip_index::set_num_error( const char * const msg, unsigned long long num ) { char buf[80]; snprintf( buf, sizeof buf, "%s%llu", msg, num ); @@ -54,16 +53,22 @@ void File_index::set_num_error( const char * const msg, unsigned long long num ) retval_ = 2; } - -// If successful, push last member and set pos to member header. -bool File_index::skip_trailing_data( const int fd, long long & pos, - const bool ignore_bad_ds, - const bool ignore_trailing, const bool loose_trailing ) +/* Skip backwards the gap or trailing data ending at pos. + 'ignore_gaps' also ignores format errors and a truncated last member. + If successful, push member preceding gap and set pos to member header. */ +bool Lzip_index::skip_gap( const int fd, long long & pos, + const bool ignore_trailing, const bool loose_trailing, + const bool ignore_bad_ds, const bool ignore_gaps ) { enum { block_size = 16384, - buffer_size = block_size + File_trailer::size - 1 + File_header::size }; + buffer_size = block_size + Lzip_trailer::size - 1 + Lzip_header::size }; uint8_t buffer[buffer_size]; - if( pos < min_member_size ) return false; + if( pos < min_member_size ) + { + if( pos >= 0 && ignore_gaps && !member_vector.empty() ) + { pos = 0; return true; } + return false; + } int bsize = pos % block_size; // total bytes in buffer if( bsize <= buffer_size - block_size ) bsize += block_size; int search_size = bsize; // bytes to search for trailer @@ -75,41 +80,60 @@ bool File_index::skip_trailing_data( const int fd, long long & pos, if( seek_read( fd, buffer, rd_size, ipos ) != rd_size ) { set_errno_error( "Error seeking member trailer: " ); return false; } const uint8_t max_msb = ( ipos + search_size ) >> 56; - for( int i = search_size; i >= File_trailer::size; --i ) + for( int i = search_size; i >= Lzip_trailer::size; --i ) if( buffer[i-1] <= max_msb ) // most significant byte of member_size { - File_trailer & trailer = - *(File_trailer *)( buffer + i - File_trailer::size ); + const Lzip_trailer & trailer = + *(const Lzip_trailer *)( buffer + i - Lzip_trailer::size ); const unsigned long long member_size = trailer.member_size(); - if( member_size == 0 ) - { while( i > File_trailer::size && buffer[i-9] == 0 ) --i; continue; } - if( member_size < min_member_size || member_size > ipos + i ) + if( member_size == 0 ) // skip trailing zeros + { while( i > Lzip_trailer::size && buffer[i-9] == 0 ) --i; continue; } + if( member_size > ipos + i || !trailer.verify_consistency() ) continue; - File_header header; - if( seek_read( fd, header.data, File_header::size, - ipos + i - member_size ) != File_header::size ) + Lzip_header header; + if( seek_read( fd, header.data, Lzip_header::size, + ipos + i - member_size ) != Lzip_header::size ) { set_errno_error( "Error reading member header: " ); return false; } const unsigned dictionary_size = header.dictionary_size(); if( !header.verify_magic() || !header.verify_version() || ( !ignore_bad_ds && !isvalid_ds( dictionary_size ) ) ) continue; - if( (*(File_header *)( buffer + i )).verify_prefix( bsize - i ) ) - { error_ = "Last member in input file is truncated or corrupt."; - retval_ = 2; return false; } - if( !loose_trailing && bsize - i >= File_header::size && - (*(File_header *)( buffer + i )).verify_corrupt() ) - { error_ = corrupt_mm_msg; retval_ = 2; return false; } - if( !ignore_trailing ) - { error_ = trailing_msg; retval_ = 2; return false; } + if( member_vector.empty() ) // trailing data or truncated member + { + const Lzip_header & last_header = *(const Lzip_header *)( buffer + i ); + if( last_header.verify_prefix( bsize - i ) ) + { + if( !ignore_gaps ) + { error_ = "Last member in input file is truncated or corrupt."; + retval_ = 2; return false; } + const unsigned dictionary_size = + ( bsize - i >= Lzip_header::size ) ? + last_header.dictionary_size() : 0; + const unsigned long long member_size = pos - ( ipos + i ); + pos = ipos + i; + member_vector.push_back( Member( 0, 0, pos, + member_size, dictionary_size ) ); + return true; + } + } + if( !ignore_gaps && member_vector.empty() ) + { + if( !loose_trailing && bsize - i >= Lzip_header::size && + (*(const Lzip_header *)( buffer + i )).verify_corrupt() ) + { error_ = corrupt_mm_msg; retval_ = 2; return false; } + if( !ignore_trailing ) + { error_ = trailing_msg; retval_ = 2; return false; } + } pos = ipos + i - member_size; member_vector.push_back( Member( 0, trailer.data_size(), pos, member_size, dictionary_size ) ); return true; } if( ipos <= 0 ) - { set_num_error( "Member size in trailer is corrupt at pos ", pos - 8 ); + { if( ignore_gaps && !member_vector.empty() ) { pos = 0; return true; } + set_num_error( "Bad trailer at pos ", pos - Lzip_trailer::size ); return false; } bsize = buffer_size; - search_size = bsize - File_header::size; + search_size = bsize - Lzip_header::size; rd_size = block_size; ipos -= rd_size; std::memcpy( buffer + rd_size, buffer, buffer_size - rd_size ); @@ -117,20 +141,21 @@ bool File_index::skip_trailing_data( const int fd, long long & pos, } -File_index::File_index( const int infd, const bool ignore_bad_ds, - const bool ignore_trailing, const bool loose_trailing ) - : isize( lseek( infd, 0, SEEK_END ) ), retval_( 0 ) +Lzip_index::Lzip_index( const int infd, const bool ignore_trailing, + const bool loose_trailing, const bool ignore_bad_ds, + const bool ignore_gaps, const long long max_pos ) + : insize( lseek( infd, 0, SEEK_END ) ), retval_( 0 ) { - if( isize < 0 ) + if( insize < 0 ) { set_errno_error( "Input file is not seekable: " ); return; } - if( isize < min_member_size ) + if( insize < min_member_size ) { error_ = "Input file is too short."; retval_ = 2; return; } - if( isize > INT64_MAX ) + if( insize > INT64_MAX ) { error_ = "Input file is too long (2^63 bytes or more)."; retval_ = 2; return; } - File_header header; - if( seek_read( infd, header.data, File_header::size, 0 ) != File_header::size ) + Lzip_header header; + if( seek_read( infd, header.data, Lzip_header::size, 0 ) != Lzip_header::size ) { set_errno_error( "Error reading member header: " ); return; } if( !header.verify_magic() ) { error_ = bad_magic_msg; retval_ = 2; return; } @@ -139,32 +164,33 @@ File_index::File_index( const int infd, const bool ignore_bad_ds, if( !ignore_bad_ds && !isvalid_ds( header.dictionary_size() ) ) { error_ = bad_dict_msg; retval_ = 2; return; } - long long pos = isize; // always points to a header or to EOF + // pos always points to a header or to ( EOF || max_pos ) + long long pos = ( max_pos > 0 ) ? max_pos : insize; while( pos >= min_member_size ) { - File_trailer trailer; - if( seek_read( infd, trailer.data, File_trailer::size, - pos - File_trailer::size ) != File_trailer::size ) + Lzip_trailer trailer; + if( seek_read( infd, trailer.data, Lzip_trailer::size, + pos - Lzip_trailer::size ) != Lzip_trailer::size ) { set_errno_error( "Error reading member trailer: " ); break; } const unsigned long long member_size = trailer.member_size(); - if( member_size < min_member_size || member_size > (unsigned long long)pos ) + if( member_size > (unsigned long long)pos || !trailer.verify_consistency() ) { - if( member_vector.empty() ) - { if( skip_trailing_data( infd, pos, ignore_bad_ds, ignore_trailing, - loose_trailing ) ) continue; else return; } - set_num_error( "Member size in trailer is corrupt at pos ", pos - 8 ); + if( ignore_gaps || member_vector.empty() ) + { if( skip_gap( infd, pos, ignore_trailing, loose_trailing, + ignore_bad_ds, ignore_gaps ) ) continue; else return; } + set_num_error( "Bad trailer at pos ", pos - Lzip_trailer::size ); break; } - if( seek_read( infd, header.data, File_header::size, - pos - member_size ) != File_header::size ) + if( seek_read( infd, header.data, Lzip_header::size, + pos - member_size ) != Lzip_header::size ) { set_errno_error( "Error reading member header: " ); break; } const unsigned dictionary_size = header.dictionary_size(); if( !header.verify_magic() || !header.verify_version() || ( !ignore_bad_ds && !isvalid_ds( dictionary_size ) ) ) { - if( member_vector.empty() ) - { if( skip_trailing_data( infd, pos, ignore_bad_ds, ignore_trailing, - loose_trailing ) ) continue; else return; } + if( ignore_gaps || member_vector.empty() ) + { if( skip_gap( infd, pos, ignore_trailing, loose_trailing, + ignore_bad_ds, ignore_gaps ) ) continue; else return; } set_num_error( "Bad header at pos ", pos - member_size ); break; } @@ -172,14 +198,15 @@ File_index::File_index( const int infd, const bool ignore_bad_ds, member_vector.push_back( Member( 0, trailer.data_size(), pos, member_size, dictionary_size ) ); } - if( pos != 0 || member_vector.empty() ) + if( pos < 0 || pos >= min_member_size || ( pos != 0 && !ignore_gaps ) || + member_vector.empty() ) { member_vector.clear(); if( retval_ == 0 ) { error_ = "Can't create file index."; retval_ = 2; } return; } std::reverse( member_vector.begin(), member_vector.end() ); - for( unsigned long i = 0; i < member_vector.size() - 1; ++i ) + for( unsigned long i = 0; ; ++i ) { const long long end = member_vector[i].dblock.end(); if( end < 0 || end > INT64_MAX ) @@ -188,56 +215,57 @@ File_index::File_index( const int infd, const bool ignore_bad_ds, error_ = "Data in input file is too long (2^63 bytes or more)."; retval_ = 2; return; } + if( i + 1 >= member_vector.size() ) break; member_vector[i+1].dblock.pos( end ); } } // All files in 'infd_vector' must be at least 'fsize' bytes long. -File_index::File_index( const std::vector< int > & infd_vector, +Lzip_index::Lzip_index( const std::vector< int > & infd_vector, const long long fsize ) - : isize( fsize ), retval_( 0 ) + : insize( fsize ), retval_( 0 ) { - if( isize < 0 ) + if( insize < 0 ) { set_errno_error( "Input file is not seekable: " ); return; } - if( isize < min_member_size ) + if( insize < min_member_size ) { error_ = "Input file is too short."; retval_ = 2; return; } - if( isize > INT64_MAX ) + if( insize > INT64_MAX ) { error_ = "Input file is too long (2^63 bytes or more)."; retval_ = 2; return; } const int files = infd_vector.size(); - File_header header; + Lzip_header header; bool done = false; for( int i = 0; i < files && !done; ++i ) { const int infd = infd_vector[i]; - if( seek_read( infd, header.data, File_header::size, 0 ) != File_header::size ) + if( seek_read( infd, header.data, Lzip_header::size, 0 ) != Lzip_header::size ) { set_errno_error( "Error reading member header: " ); return; } if( header.verify_magic() && header.verify_version() ) done = true; } if( !done ) { error_ = bad_magic_msg; retval_ = 2; return; } - long long pos = isize; // always points to a header or to EOF + long long pos = insize; // always points to a header or to EOF while( pos >= min_member_size ) { unsigned long long member_size; - File_trailer trailer; + Lzip_trailer trailer; done = false; for( int it = 0; it < files && !done; ++it ) { const int tfd = infd_vector[it]; - if( seek_read( tfd, trailer.data, File_trailer::size, - pos - File_trailer::size ) != File_trailer::size ) + if( seek_read( tfd, trailer.data, Lzip_trailer::size, + pos - Lzip_trailer::size ) != Lzip_trailer::size ) { set_errno_error( "Error reading member trailer: " ); goto error; } member_size = trailer.member_size(); - if( member_size >= min_member_size && member_size <= (unsigned long long)pos ) + if( member_size <= (unsigned long long)pos && trailer.verify_consistency() ) for( int ih = 0; ih < files && !done; ++ih ) { const int hfd = infd_vector[ih]; - if( seek_read( hfd, header.data, File_header::size, - pos - member_size ) != File_header::size ) + if( seek_read( hfd, header.data, Lzip_header::size, + pos - member_size ) != Lzip_header::size ) { set_errno_error( "Error reading member header: " ); goto error; } if( header.verify_magic() && header.verify_version() ) done = true; } @@ -248,9 +276,9 @@ File_index::File_index( const std::vector< int > & infd_vector, set_num_error( "Member size in trailer may be corrupt at pos ", pos - 8 ); break; } - if( member_vector.empty() && isize > pos ) + if( member_vector.empty() && insize > pos ) { - const int size = std::min( (long long)File_header::size, isize - pos ); + const int size = std::min( (long long)Lzip_header::size, insize - pos ); for( int i = 0; i < files; ++i ) { const int infd = infd_vector[i]; @@ -274,7 +302,7 @@ error: return; } std::reverse( member_vector.begin(), member_vector.end() ); - for( unsigned long i = 0; i < member_vector.size() - 1; ++i ) + for( unsigned long i = 0; ; ++i ) { const long long end = member_vector[i].dblock.end(); if( end < 0 || end > INT64_MAX ) @@ -283,6 +311,18 @@ error: error_ = "Data in input file is too long (2^63 bytes or more)."; retval_ = 2; return; } + if( i + 1 >= member_vector.size() ) break; member_vector[i+1].dblock.pos( end ); } } + + +// Returns members + gaps [+ trailing data]. +long Lzip_index::blocks( const bool count_tdata ) const + { + long n = member_vector.size() + ( count_tdata && cdata_size() < file_size() ); + if( member_vector.size() && member_vector[0].mblock.pos() > 0 ) ++n; + for( unsigned long i = 1; i < member_vector.size(); ++i ) + if( member_vector[i].mblock.pos() > member_vector[i-1].mblock.end() ) ++n; + return n; + } diff --git a/file_index.h b/lzip_index.h index da374ae..d4f2ef9 100644 --- a/file_index.h +++ b/lzip_index.h @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2018 Antonio Diaz Diaz. + Copyright (C) 2009-2019 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -15,7 +15,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ -class File_index +class Lzip_index { struct Member { @@ -30,36 +30,40 @@ class File_index bool operator!=( const Member & m ) const { return ( mblock != m.mblock ); } }; + // member_vector only contains good members. + // Garbage between members is represented by gaps between mblocks. std::vector< Member > member_vector; std::string error_; - long long isize; + long long insize; int retval_; void set_errno_error( const char * const msg ); void set_num_error( const char * const msg, unsigned long long num ); - bool skip_trailing_data( const int fd, long long & pos, - const bool ignore_bad_ds, - const bool ignore_trailing, const bool loose_trailing ); + bool skip_gap( const int fd, long long & pos, + const bool ignore_trailing, const bool loose_trailing, + const bool ignore_bad_ds, const bool ignore_gaps ); public: - File_index() : error_( "No index" ), isize( 0 ), retval_( 2 ) {} - File_index( const int infd, const bool ignore_bad_ds, - const bool ignore_trailing, const bool loose_trailing ); - File_index( const std::vector< int > & infd_vector, const long long fsize ); + Lzip_index() : error_( "No index" ), insize( 0 ), retval_( 2 ) {} + Lzip_index( const int infd, const bool ignore_trailing, + const bool loose_trailing, const bool ignore_bad_ds = false, + const bool ignore_gaps = false, const long long max_pos = 0 ); + Lzip_index( const std::vector< int > & infd_vector, const long long fsize ); long members() const { return member_vector.size(); } + long blocks( const bool count_tdata ) const; // members + gaps [+ tdata] const std::string & error() const { return error_; } int retval() const { return retval_; } - bool operator==( const File_index & fi ) const + bool operator==( const Lzip_index & li ) const { - if( retval_ || fi.retval_ || isize != fi.isize || - member_vector.size() != fi.member_vector.size() ) return false; + if( retval_ || li.retval_ || insize != li.insize || + member_vector.size() != li.member_vector.size() ) return false; for( unsigned long i = 0; i < member_vector.size(); ++i ) - if( member_vector[i] != fi.member_vector[i] ) return false; + if( member_vector[i] != li.member_vector[i] ) return false; return true; } - bool operator!=( const File_index & fi ) const { return !( *this == fi ); } + bool operator!=( const Lzip_index & li ) const { return !( *this == li ); } long long udata_size() const { if( member_vector.empty() ) return 0; @@ -71,7 +75,7 @@ public: // total size including trailing data (if any) long long file_size() const - { if( isize >= 0 ) return isize; else return 0; } + { if( insize >= 0 ) return insize; else return 0; } const Block & dblock( const long i ) const { return member_vector[i].dblock; } @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2018 Antonio Diaz Diaz. + Copyright (C) 2009-2019 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -38,25 +38,29 @@ #include <unistd.h> #include <utime.h> #include <sys/stat.h> -#if defined(__MSVCRT__) +#if defined(__MSVCRT__) || defined(__OS2__) || defined(__DJGPP__) #include <io.h> +#if defined(__MSVCRT__) #define fchmod(x,y) 0 #define fchown(x,y,z) 0 #define SIGHUP SIGTERM #define S_ISSOCK(x) 0 +#ifndef S_IRGRP #define S_IRGRP 0 #define S_IWGRP 0 #define S_IROTH 0 #define S_IWOTH 0 #endif -#if defined(__OS2__) -#include <io.h> +#endif +#if defined(__DJGPP__) +#define S_ISSOCK(x) 0 +#define S_ISVTX 0 +#endif #endif #include "arg_parser.h" #include "lzip.h" #include "decoder.h" -#include "block.h" #ifndef O_BINARY #define O_BINARY 0 @@ -67,12 +71,11 @@ #endif int verbosity = 0; -std::string output_filename; // global vars for output file -int outfd = -1; +std::string output_filename; // global vars for output file +int outfd = -1; // see 'delete_output_on_interrupt' below namespace { -const char * const Program_name = "Lziprecover"; const char * const program_name = "lziprecover"; const char * invocation_name = 0; @@ -82,53 +85,58 @@ const struct { const char * from; const char * to; } known_extensions[] = { { 0, 0 } }; enum Mode { m_none, m_alone_to_lz, m_debug_decompress, m_debug_delay, - m_debug_repair, m_decompress, m_dump_tdata, m_list, m_merge, - m_range_dec, m_remove_tdata, m_repair, m_show_packets, m_split, - m_strip_tdata, m_test }; + m_debug_repair, m_decompress, m_dump, m_list, m_merge, + m_range_dec, m_remove, m_repair, m_show_packets, m_split, + m_strip, m_test }; +/* Variable used in signal handler context. + It is not declared volatile because the handler never returns. */ bool delete_output_on_interrupt = false; void show_help() { - std::printf( "%s - Data recovery tool and decompressor for the lzip format.\n", Program_name ); - std::printf( "\nLziprecover can repair perfectly most files with small errors (up to one\n" + std::printf( "Lziprecover is a data recovery tool and decompressor for files in the lzip\n" + "compressed data format (.lz). Lziprecover is able to repair slightly damaged\n" + "files, produce a correct file by merging the good parts of two or more\n" + "damaged copies, extract data from damaged files, decompress files and test\n" + "integrity of files.\n" + "\nLziprecover can repair perfectly most files with small errors (up to one\n" "single-byte error per member), without the need of any extra redundance\n" "at all. Losing an entire archive just because of a corrupt byte near the\n" "beginning is a thing of the past.\n" - "\nLziprecover can also produce a correct file by merging the good parts of\n" - "two or more damaged copies, extract data from damaged files, decompress\n" - "files and test integrity of files.\n" - "\nLziprecover provides random access to the data in multimember files; it\n" - "only decompresses the members containing the desired data.\n" - "\nLziprecover facilitates the management of metadata stored as trailing\n" - "data in lzip files.\n" + "\nLziprecover can remove the damaged members from multimember files, for\n" + "example multimember tar.lz archives.\n" + "\nLziprecover provides random access to the data in multimember files; it only\n" + "decompresses the members containing the desired data.\n" + "\nLziprecover facilitates the management of metadata stored as trailing data\n" + "in lzip files.\n" "\nLziprecover is not a replacement for regular backups, but a last line of\n" "defense for the case where the backups are also damaged.\n" "\nUsage: %s [options] [files]\n", invocation_name ); std::printf( "\nOptions:\n" - " -h, --help display this help and exit\n" - " -V, --version output version information and exit\n" - " -a, --trailing-error exit with error status if trailing data\n" - " -A, --alone-to-lz convert lzma-alone files to lzip format\n" - " -c, --stdout write to standard output, keep input files\n" - " -d, --decompress decompress\n" - " -D, --range-decompress=<range> decompress a range of bytes (N-M) to stdout\n" - " -f, --force overwrite existing output files\n" - " -i, --ignore-errors make '--range-decompress' ignore data errors\n" - " -k, --keep keep (don't delete) input files\n" - " -l, --list print (un)compressed file sizes\n" - " -m, --merge correct errors in file using several copies\n" - " -o, --output=<file> place the output into <file>\n" - " -q, --quiet suppress all messages\n" - " -R, --repair try to repair a small error in file\n" - " -s, --split split multimember file in single-member files\n" - " -t, --test test compressed file integrity\n" - " -v, --verbose be verbose (a 2nd -v gives more)\n" - " --loose-trailing allow trailing data seeming corrupt header\n" - " --dump-tdata dump trailing data to standard output\n" - " --remove-tdata remove trailing data from files in place\n" - " --strip-tdata copy files to stdout without trailing data\n" ); + " -h, --help display this help and exit\n" + " -V, --version output version information and exit\n" + " -a, --trailing-error exit with error status if trailing data\n" + " -A, --alone-to-lz convert lzma-alone files to lzip format\n" + " -c, --stdout write to standard output, keep input files\n" + " -d, --decompress decompress\n" + " -D, --range-decompress=<n-m> decompress a range of bytes to stdout\n" + " -f, --force overwrite existing output files\n" + " -i, --ignore-errors all errors in -D, format errors in -l, --dump\n" + " -k, --keep keep (don't delete) input files\n" + " -l, --list print (un)compressed file sizes\n" + " -m, --merge correct errors in file using several copies\n" + " -o, --output=<file> place the output into <file>\n" + " -q, --quiet suppress all messages\n" + " -R, --repair try to repair a small error in file\n" + " -s, --split split multimember file in single-member files\n" + " -t, --test test compressed file integrity\n" + " -v, --verbose be verbose (a 2nd -v gives more)\n" + " --loose-trailing allow trailing data seeming corrupt header\n" + " --dump=<list>:d:t dump members listed/damaged, tdata to stdout\n" + " --remove=<list>:d:t remove members, tdata from files in place\n" + " --strip=<list>:d:t copy files to stdout stripping members given\n" ); if( verbosity >= 1 ) { std::printf( " -W, --debug-decompress=<pos>,<val> set pos to val and decompress to stdout\n" @@ -202,6 +210,46 @@ void show_header( const unsigned dictionary_size ) #include "main_common.cc" +// Colon-separated list of "damaged", "tdata", [r][^]<list> (1 1,3-5,8) +void Member_list::parse( const char * p ) + { + while( true ) + { + const char * tp = p; // points to terminator; ':' or null + while( *tp && *tp != ':' ) ++tp; + const unsigned len = tp - p; + if( std::isalpha( (const unsigned char)*p ) ) + { + if( len <= 7 && std::strncmp( "damaged", p, len ) == 0 ) + { damaged = true; goto next; } + if( len <= 5 && std::strncmp( "tdata", p, len ) == 0 ) + { tdata = true; goto next; } + } + { + const bool reverse = ( *p == 'r' ); + if( reverse ) ++p; + if( *p == '^' ) { ++p; if( reverse ) rin = false; else in = false; } + std::vector< Block > * rvp = reverse ? &rrange_vector : &range_vector; + while( std::isdigit( (const unsigned char)*p ) ) + { + const char * tail; + const int pos = getnum( p, 0, 1, INT_MAX, &tail ) - 1; + if( rvp->size() && pos < rvp->back().end() ) break; + const int size = (*tail == '-') ? + getnum( tail + 1, 0, pos + 1, INT_MAX, &tail ) - pos : 1; + rvp->push_back( Block( pos, size ) ); + if( tail == tp ) goto next; + if( *tail == ',' ) p = tail + 1; else break; + } + } + show_error( "Invalid list of members." ); + std::exit( 1 ); +next: + if( *(p = tp) != 0 ) ++p; else return; + } + } + + namespace { // Recognized formats: <begin> <begin>-<end> <begin>,<size> ,<size> @@ -215,11 +263,11 @@ void parse_range( const char * const ptr, Block & range ) { range.pos( value ); if( tail[0] == 0 ) { range.size( INT64_MAX - value ); return; } - const bool issize = ( tail[0] == ',' ); + const bool is_size = ( tail[0] == ',' ); value = getnum( tail + 1, 0, 1, INT64_MAX ); // size - if( issize || value > range.pos() ) + if( is_size || value > range.pos() ) { - if( !issize ) value -= range.pos(); + if( !is_size ) value -= range.pos(); if( INT64_MAX - range.pos() >= value ) { range.size( value ); return; } } } @@ -343,23 +391,23 @@ int open_instream( const char * const name, struct stat * const in_statsp, int open_truncable_stream( const char * const name, struct stat * const in_statsp ) { - int infd = open( name, O_RDWR | O_BINARY ); - if( infd < 0 ) + int fd = open( name, O_RDWR | O_BINARY ); + if( fd < 0 ) show_file_error( name, "Can't open input file", errno ); else { - const int i = fstat( infd, in_statsp ); + const int i = fstat( fd, in_statsp ); const mode_t mode = in_statsp->st_mode; if( i != 0 || !S_ISREG( mode ) ) { if( verbosity >= 0 ) std::fprintf( stderr, "%s: File '%s' is not a regular file.\n", program_name, name ); - close( infd ); - infd = -1; + close( fd ); + fd = -1; } } - return infd; + return fd; } @@ -421,8 +469,17 @@ bool check_tty( const char * const input_filename, const int infd, } +void set_signals( void (*action)(int) ) + { + std::signal( SIGHUP, action ); + std::signal( SIGINT, action ); + std::signal( SIGTERM, action ); + } + + void cleanup_and_fail( const int retval ) { + set_signals( SIG_IGN ); // ignore signals if( delete_output_on_interrupt ) { delete_output_on_interrupt = false; @@ -438,6 +495,13 @@ void cleanup_and_fail( const int retval ) namespace { +extern "C" void signal_handler( int ) + { + show_error( "Control-C or similar caught, quitting." ); + cleanup_and_fail( 1 ); + } + + // Set permissions, owner and times. void close_and_set_permissions( const struct stat * const in_statsp ) { @@ -517,9 +581,9 @@ int decompress( const unsigned long long cfile_size, const int infd, Range_decoder rdec( infd ); for( bool first_member = true; ; first_member = false ) { - File_header header; + Lzip_header header; rdec.reset_member_position(); - const int size = rdec.read_data( header.data, File_header::size ); + const int size = rdec.read_data( header.data, Lzip_header::size ); if( rdec.finished() ) // End Of File { if( first_member ) @@ -573,30 +637,16 @@ int decompress( const unsigned long long cfile_size, const int infd, { std::fputs( testing ? "ok\n" : "done\n", stderr ); pp.reset(); } } } - catch( std::bad_alloc ) { pp( "Not enough memory." ); retval = 1; } - catch( Error e ) { pp(); show_error( e.msg, errno ); retval = 1; } + catch( std::bad_alloc & ) { pp( "Not enough memory." ); retval = 1; } + catch( Error & e ) { pp(); show_error( e.msg, errno ); retval = 1; } if( verbosity == 1 && retval == 0 ) std::fputs( testing ? "ok\n" : "done\n", stderr ); return retval; } - -extern "C" void signal_handler( int ) - { - show_error( "Control-C or similar caught, quitting." ); - cleanup_and_fail( 1 ); - } - - -void set_signals() - { - std::signal( SIGHUP, signal_handler ); - std::signal( SIGINT, signal_handler ); - std::signal( SIGTERM, signal_handler ); - } - } // end namespace +void set_signal_handler() { set_signals( signal_handler ); } int close_outstream( const struct stat * const in_statsp ) { @@ -625,23 +675,15 @@ std::string insert_fixed( std::string name ) void show_file_error( const char * const filename, const char * const msg, const int errcode ) { - if( verbosity < 0 ) return; - std::fprintf( stderr, "%s: %s: %s", program_name, filename, msg ); - if( errcode > 0 ) std::fprintf( stderr, ": %s", std::strerror( errcode ) ); - std::fputc( '\n', stderr ); - } - - -void show_error2( const char * const msg1, const char * const name, - const char * const msg2 ) - { if( verbosity >= 0 ) - std::fprintf( stderr, "%s: %s '%s' %s\n", program_name, msg1, name, msg2 ); + std::fprintf( stderr, "%s: %s: %s%s%s\n", program_name, filename, msg, + ( errcode > 0 ) ? ": " : "", + ( errcode > 0 ) ? std::strerror( errcode ) : "" ); } -void show_error4( const char * const msg1, const char * const name1, - const char * const name2, const char * const msg2 ) +void show_2file_error( const char * const msg1, const char * const name1, + const char * const name2, const char * const msg2 ) { if( verbosity >= 0 ) std::fprintf( stderr, "%s: %s '%s' and '%s' %s\n", @@ -684,6 +726,7 @@ int main( const int argc, const char * const argv[] ) { Block range( 0, 0 ); Bad_byte bad_byte; + Member_list member_list; std::string default_output_filename; std::vector< std::string > filenames; Mode program_mode = m_none; @@ -695,7 +738,7 @@ int main( const int argc, const char * const argv[] ) bool to_stdout = false; invocation_name = argv[0]; - enum { opt_dtd = 256, opt_lt, opt_rtd, opt_std }; + enum { opt_du = 256, opt_dtd, opt_lt, opt_re, opt_rtd, opt_st, opt_std }; const Arg_parser::Option options[] = { { 'a', "trailing-error", Arg_parser::no }, @@ -721,9 +764,12 @@ int main( const int argc, const char * const argv[] ) { 'X', "show-packets", Arg_parser::maybe }, { 'Y', "debug-delay", Arg_parser::yes }, { 'Z', "debug-repair", Arg_parser::yes }, + { opt_du, "dump", Arg_parser::yes }, { opt_dtd, "dump-tdata", Arg_parser::no }, { opt_lt, "loose-trailing", Arg_parser::no }, + { opt_re, "remove", Arg_parser::yes }, { opt_rtd, "remove-tdata", Arg_parser::no }, + { opt_st, "strip", Arg_parser::yes }, { opt_std, "strip-tdata", Arg_parser::no }, { 0 , 0, Arg_parser::no } }; @@ -768,15 +814,24 @@ int main( const int argc, const char * const argv[] ) parse_range( arg, range ); break; case 'Z': set_mode( program_mode, m_debug_repair ); parse_pos_value( arg, bad_byte ); break; - case opt_dtd: set_mode( program_mode, m_dump_tdata ); break; + case opt_du: set_mode( program_mode, m_dump ); + member_list.parse( arg ); break; + case opt_dtd: set_mode( program_mode, m_dump ); + member_list.parse( "tdata" ); break; case opt_lt: loose_trailing = true; break; - case opt_rtd: set_mode( program_mode, m_remove_tdata ); break; - case opt_std: set_mode( program_mode, m_strip_tdata ); break; + case opt_re: set_mode( program_mode, m_remove ); + member_list.parse( arg ); break; + case opt_rtd: set_mode( program_mode, m_remove ); + member_list.parse( "tdata" ); break; + case opt_st: set_mode( program_mode, m_strip ); + member_list.parse( arg ); break; + case opt_std: set_mode( program_mode, m_strip ); + member_list.parse( "tdata" ); break; default : internal_error( "uncaught option." ); } } // end process options -#if defined(__MSVCRT__) || defined(__OS2__) +#if defined(__MSVCRT__) || defined(__OS2__) || defined(__DJGPP__) setmode( STDIN_FILENO, O_BINARY ); setmode( STDOUT_FILENO, O_BINARY ); #endif @@ -794,6 +849,7 @@ int main( const int argc, const char * const argv[] ) if( filenames.back() != "-" ) filenames_given = true; } + const char terminator = isatty( STDOUT_FILENO ) ? '\r' : '\n'; try { switch( program_mode ) { @@ -804,56 +860,54 @@ int main( const int argc, const char * const argv[] ) return debug_decompress( filenames[0], bad_byte, false ); case m_debug_delay: one_file( filenames.size() ); - return debug_delay( filenames[0], range ); + return debug_delay( filenames[0], range, terminator ); case m_debug_repair: one_file( filenames.size() ); - return debug_repair( filenames[0], bad_byte ); + return debug_repair( filenames[0], bad_byte, terminator ); case m_decompress: break; - case m_dump_tdata: - case m_strip_tdata: + case m_dump: + case m_strip: if( filenames.size() < 1 ) { show_error( "You must specify at least 1 file.", 0, true ); return 1; } - if( default_output_filename.size() ) set_signals(); - return dump_tdata( filenames, default_output_filename, force, - program_mode == m_strip_tdata, loose_trailing ); + return dump_members( filenames, default_output_filename, member_list, + force, ignore_errors, ignore_trailing, + loose_trailing, program_mode == m_strip ); case m_list: break; case m_merge: if( filenames.size() < 2 ) { show_error( "You must specify at least 2 files.", 0, true ); return 1; } - set_signals(); - return merge_files( filenames, default_output_filename, force ); + return merge_files( filenames, default_output_filename, force, terminator ); case m_range_dec: one_file( filenames.size() ); - set_signals(); return range_decompress( filenames[0], default_output_filename, range, force, ignore_errors, ignore_trailing, loose_trailing, to_stdout ); - case m_remove_tdata: + case m_remove: if( filenames.size() < 1 ) { show_error( "You must specify at least 1 file.", 0, true ); return 1; } - return remove_tdata( filenames, loose_trailing ); + return remove_members( filenames, member_list, ignore_errors, + ignore_trailing, loose_trailing ); case m_repair: one_file( filenames.size() ); - set_signals(); - return repair_file( filenames[0], default_output_filename, force ); + return repair_file( filenames[0], default_output_filename, force, terminator ); case m_show_packets: one_file( filenames.size() ); return debug_decompress( filenames[0], bad_byte, true ); case m_split: one_file( filenames.size() ); - set_signals(); return split_file( filenames[0], default_output_filename, force ); case m_test: break; } } - catch( std::bad_alloc ) + catch( std::bad_alloc & ) { show_error( "Not enough memory." ); cleanup_and_fail( 1 ); } - catch( Error e ) { show_error( e.msg, errno ); cleanup_and_fail( 1 ); } + catch( Error & e ) { show_error( e.msg, errno ); cleanup_and_fail( 1 ); } if( filenames.empty() ) filenames.push_back("-"); if( program_mode == m_list ) - return list_files( filenames, ignore_trailing, loose_trailing ); + return list_files( filenames, ignore_errors, ignore_trailing, + loose_trailing ); if( program_mode == m_test ) outfd = -1; @@ -862,7 +916,7 @@ int main( const int argc, const char * const argv[] ) if( !to_stdout && program_mode != m_test && ( filenames_given || default_output_filename.size() ) ) - set_signals(); + set_signals( signal_handler ); Pretty_print pp( filenames ); @@ -941,6 +995,12 @@ int main( const int argc, const char * const argv[] ) else tmp = decompress( cfile_size, infd, pp, ignore_trailing, loose_trailing, program_mode == m_test ); + if( close( infd ) != 0 ) + { + show_error( input_filename.size() ? "Error closing input file" : + "Error closing stdin", errno ); + if( tmp < 1 ) tmp = 1; + } if( tmp > retval ) retval = tmp; if( tmp ) { if( program_mode != m_test ) cleanup_and_fail( retval ); @@ -950,7 +1010,6 @@ int main( const int argc, const char * const argv[] ) close_and_set_permissions( in_statsp ); if( input_filename.size() ) { - close( infd ); if( !keep_input_files && !to_stdout && program_mode != m_test ) std::remove( input_filename.c_str() ); } diff --git a/main_common.cc b/main_common.cc index 3b9f677..d7a2e81 100644 --- a/main_common.cc +++ b/main_common.cc @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2018 Antonio Diaz Diaz. + Copyright (C) 2009-2019 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -17,8 +17,7 @@ namespace { -const char * const program_year = "2018"; - +const char * const program_year = "2019"; void show_version() { @@ -65,7 +64,7 @@ long long getnum( const char * const ptr, const int hardbs, case 'k': if( tail[0] != 'i' ) exponent = 1; break; case 'B': case 's': usuf = *p; exponent = 0; break; - default : if( tailp ) { tail = p; exponent = 0; } break; + default : if( tailp ) { tail = p; exponent = 0; } } if( exponent > 1 && tail[0] == 'i' ) { ++tail; factor = 1024; } if( exponent > 0 && usuf == 0 && ( tail[0] == 'B' || tail[0] == 's' ) ) @@ -104,11 +103,9 @@ void show_error( const char * const msg, const int errcode, const bool help ) { if( verbosity < 0 ) return; if( msg && msg[0] ) - { - std::fprintf( stderr, "%s: %s", program_name, msg ); - if( errcode > 0 ) std::fprintf( stderr, ": %s", std::strerror( errcode ) ); - std::fputc( '\n', stderr ); - } + std::fprintf( stderr, "%s: %s%s%s\n", program_name, msg, + ( errcode > 0 ) ? ": " : "", + ( errcode > 0 ) ? std::strerror( errcode ) : "" ); if( help ) std::fprintf( stderr, "Try '%s --help' for more information.\n", invocation_name ); @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2018 Antonio Diaz Diaz. + Copyright (C) 2009-2019 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -31,19 +31,30 @@ #include "lzip.h" #include "decoder.h" -#include "block.h" -#include "file_index.h" +#include "lzip_index.h" +Block Block::split( const long long pos ) + { + if( pos > pos_ && pos < end() ) + { + const Block b( pos_, pos - pos_ ); + pos_ = pos; size_ -= b.size_; + return b; + } + return Block( 0, 0 ); + } + namespace { bool pending_newline = false; -void print_pending_newline() - { if( pending_newline ) std::fputc( '\n', stdout ); pending_newline = false; } +void print_pending_newline( const char terminator ) + { if( pending_newline && terminator != '\n' ) std::fputc( '\n', stdout ); + pending_newline = false; } -bool file_crc( uint32_t & crc, const int infd ) +bool file_crc( uint32_t & crc, const int infd, const char * const filename ) { const int buffer_size = 65536; crc = 0xFFFFFFFFU; @@ -54,7 +65,8 @@ bool file_crc( uint32_t & crc, const int infd ) { const int rd = readblock( infd, buffer, buffer_size ); if( rd != buffer_size && errno ) - { show_error( "Error reading input file", errno ); error = true; break; } + { show_file_error( filename, "Error reading input file", errno ); + error = true; break; } if( rd > 0 ) crc32.update_buf( crc, buffer, rd ); if( rd < buffer_size ) break; // EOF @@ -108,6 +120,7 @@ void combine( std::vector< Block > & block_vector, std::vector< Block > & bv ) // positions in 'block_vector' are absolute file positions. // blocks in 'block_vector' are ascending and don't overlap. bool diff_member( const long long mpos, const long long msize, + const std::vector< std::string > & filenames, const std::vector< int > & infd_vector, std::vector< Block > & block_vector, std::vector< int > & color_vector ) @@ -138,12 +151,13 @@ bool diff_member( const long long mpos, const long long msize, const int size = std::min( (long long)buffer_size, msize - partial_pos ); const int rd = readblock( fd1, buffer1, size ); if( rd != size && errno ) - { show_error( "Error reading input file", errno ); error = true; break; } + { show_file_error( filenames[i1].c_str(), "Error reading input file", + errno ); error = true; break; } if( rd > 0 ) { if( readblock( fd2, buffer2, rd ) != rd ) - { show_error( "Error reading input file", errno ); - error = true; break; } + { show_file_error( filenames[i2].c_str(), "Error reading input file", + errno ); error = true; break; } for( int i = 0; i < rd; ++i ) { if( buffer1[i] != buffer2[i] ) @@ -201,13 +215,13 @@ long ipow( const unsigned base, const unsigned exponent ) int open_input_files( const std::vector< std::string > & filenames, std::vector< int > & infd_vector, - File_index & file_index, struct stat * const in_statsp ) + Lzip_index & lzip_index, struct stat * const in_statsp ) { const int files = filenames.size(); for( int i = 0; i + 1 < files; ++i ) for( int j = i + 1; j < files; ++j ) if( filenames[i] == filenames[j] ) - { show_error2( "Input file", filenames[i].c_str(), "given twice." ); + { show_file_error( filenames[i].c_str(), "Input file given twice." ); return 2; } { std::vector< uint32_t > crc_vector( files ); @@ -217,51 +231,52 @@ int open_input_files( const std::vector< std::string > & filenames, infd_vector[i] = open_instream( filenames[i].c_str(), ( i == 0 ) ? in_statsp : &in_stats, true, true ); if( infd_vector[i] < 0 ) return 1; - if( !file_crc( crc_vector[i], infd_vector[i] ) ) return 1; + if( !file_crc( crc_vector[i], infd_vector[i], filenames[i].c_str() ) ) + return 1; for( int j = 0; j < i; ++j ) if( crc_vector[i] == crc_vector[j] ) - { show_error4( "Input files", filenames[j].c_str(), - filenames[i].c_str(), "are identical." ); return 2; } + { show_2file_error( "Input files", filenames[j].c_str(), + filenames[i].c_str(), "are identical." ); return 2; } } } - long long isize = 0; - int good_fi = -1; + long long insize = 0; + int good_i = -1; for( int i = 0; i < files; ++i ) { long long tmp; - const File_index fi( infd_vector[i], true, true, true ); - if( fi.retval() == 0 ) // file format is intact + const Lzip_index li( infd_vector[i], true, true, true ); + if( li.retval() == 0 ) // file format is intact { - if( good_fi < 0 ) { good_fi = i; file_index = fi; } - else if( file_index != fi ) - { show_error4( "Input files", filenames[good_fi].c_str(), - filenames[i].c_str(), "are different." ); return 2; } - tmp = file_index.file_size(); + if( good_i < 0 ) { good_i = i; lzip_index = li; } + else if( lzip_index != li ) + { show_2file_error( "Input files", filenames[good_i].c_str(), + filenames[i].c_str(), "are different." ); return 2; } + tmp = lzip_index.file_size(); } else // file format is damaged { tmp = lseek( infd_vector[i], 0, SEEK_END ); if( tmp < 0 ) { - show_error2( "Input file", filenames[i].c_str(), "is not seekable." ); + show_file_error( filenames[i].c_str(), "Input file is not seekable." ); return 1; } } if( tmp < min_member_size ) - { show_error2( "Input file", filenames[i].c_str(), "is too short." ); + { show_file_error( filenames[i].c_str(), "Input file is too short." ); return 2; } - if( i == 0 ) isize = tmp; - else if( isize != tmp ) - { show_error4( "Sizes of input files", filenames[0].c_str(), - filenames[i].c_str(), "are different." ); return 2; } + if( i == 0 ) insize = tmp; + else if( insize != tmp ) + { show_2file_error( "Sizes of input files", filenames[0].c_str(), + filenames[i].c_str(), "are different." ); return 2; } } - if( file_index.retval() != 0 ) + if( lzip_index.retval() != 0 ) { - const File_index fi( infd_vector, isize ); - if( fi.retval() == 0 ) // file format could be recovered - file_index = fi; + const Lzip_index li( infd_vector, insize ); + if( li.retval() == 0 ) // file format could be recovered + lzip_index = li; else { show_error( "Format damaged in all input files." ); return 2; } } @@ -270,12 +285,12 @@ int open_input_files( const std::vector< std::string > & filenames, { const int infd = infd_vector[i]; bool error = false; - for( long j = 0; j < file_index.members(); ++j ) + for( long j = 0; j < lzip_index.members(); ++j ) { - const long long mpos = file_index.mblock( j ).pos(); - const long long msize = file_index.mblock( j ).size(); + const long long mpos = lzip_index.mblock( j ).pos(); + const long long msize = lzip_index.mblock( j ).size(); if( !safe_seek( infd, mpos ) ) return 1; - if( !test_member_from_file( infd, msize ) ) { error = true; break; } + if( test_member_from_file( infd, msize ) != 0 ) { error = true; break; } } if( !error ) { @@ -333,7 +348,8 @@ bool color_done( const std::vector< int > & color_vector, const int i ) bool try_merge_member2( const long long mpos, const long long msize, const std::vector< Block > & block_vector, const std::vector< int > & color_vector, - const std::vector< int > & infd_vector ) + const std::vector< int > & infd_vector, + const char terminator ) { const int blocks = block_vector.size(); const int files = infd_vector.size(); @@ -355,8 +371,8 @@ bool try_merge_member2( const long long mpos, const long long msize, { if( verbosity >= 2 ) { - std::printf( " Trying variation %d of %d, block %d \r", - var, variations, bi + 1 ); + std::printf( " Trying variation %d of %d, block %d %c", + var, variations, bi + 1, terminator ); std::fflush( stdout ); pending_newline = true; } if( !safe_seek( infd, block_vector[bi].pos() ) || @@ -365,7 +381,8 @@ bool try_merge_member2( const long long mpos, const long long msize, !safe_seek( outfd, mpos ) ) cleanup_and_fail( 1 ); long long failure_pos = 0; - if( test_member_from_file( outfd, msize, &failure_pos ) ) return true; + if( test_member_from_file( outfd, msize, &failure_pos ) == 0 ) + return true; if( mpos + failure_pos < block_vector[bi].end() ) break; } } @@ -377,7 +394,8 @@ bool try_merge_member2( const long long mpos, const long long msize, bool try_merge_member( const long long mpos, const long long msize, const std::vector< Block > & block_vector, const std::vector< int > & color_vector, - const std::vector< int > & infd_vector ) + const std::vector< int > & infd_vector, + const char terminator ) { const int blocks = block_vector.size(); const int files = infd_vector.size(); @@ -400,7 +418,8 @@ bool try_merge_member( const long long mpos, const long long msize, long var = 0; for( int i = 0; i < blocks; ++i ) var = ( var * files ) + file_idx[i]; - std::printf( " Trying variation %ld of %ld \r", var + 1, variations ); + std::printf( " Trying variation %ld of %ld %c", + var + 1, variations, terminator ); std::fflush( stdout ); pending_newline = true; } while( bi < blocks ) @@ -412,10 +431,9 @@ bool try_merge_member( const long long mpos, const long long msize, cleanup_and_fail( 1 ); ++bi; } - if( !safe_seek( outfd, mpos ) ) - cleanup_and_fail( 1 ); + if( !safe_seek( outfd, mpos ) ) cleanup_and_fail( 1 ); long long failure_pos = 0; - if( test_member_from_file( outfd, msize, &failure_pos ) ) return true; + if( test_member_from_file( outfd, msize, &failure_pos ) == 0 ) return true; while( bi > 0 && mpos + failure_pos < block_vector[bi-1].pos() ) --bi; while( --bi >= 0 ) { @@ -433,7 +451,8 @@ bool try_merge_member( const long long mpos, const long long msize, bool try_merge_member1( const long long mpos, const long long msize, const std::vector< Block > & block_vector, const std::vector< int > & color_vector, - const std::vector< int > & infd_vector ) + const std::vector< int > & infd_vector, + const char terminator ) { if( block_vector.size() != 1 || block_vector[0].size() <= 1 ) return false; const long long pos = block_vector[0].pos(); @@ -458,8 +477,8 @@ bool try_merge_member1( const long long mpos, const long long msize, { if( verbosity >= 2 ) { - std::printf( " Trying variation %d of %d, position %lld \r", - var, variations, pos + i ); + std::printf( " Trying variation %d of %d, position %lld %c", + var, variations, pos + i, terminator ); std::fflush( stdout ); pending_newline = true; } if( !safe_seek( outfd, pos + i ) || @@ -468,7 +487,8 @@ bool try_merge_member1( const long long mpos, const long long msize, !safe_seek( outfd, mpos ) ) cleanup_and_fail( 1 ); long long failure_pos = 0; - if( test_member_from_file( outfd, msize, &failure_pos ) ) return true; + if( test_member_from_file( outfd, msize, &failure_pos ) == 0 ) + return true; if( mpos + failure_pos <= pos + i ) break; } } @@ -478,12 +498,15 @@ bool try_merge_member1( const long long mpos, const long long msize, } // end namespace +// infd and outfd can refer to the same file if copying to a lower file +// position or if source and destination blocks don't overlap. // max_size < 0 means no size limit. bool copy_file( const int infd, const int outfd, const long long max_size ) { const int buffer_size = 65536; // remaining number of bytes to copy long long rest = ( ( max_size >= 0 ) ? max_size : buffer_size ); + long long copied_size = 0; uint8_t * const buffer = new uint8_t[buffer_size]; bool error = false; @@ -500,72 +523,78 @@ bool copy_file( const int infd, const int outfd, const long long max_size ) if( wr != rd ) { show_error( "Error writing output file", errno ); error = true; break; } + copied_size += rd; } if( rd < size ) break; // EOF } delete[] buffer; + if( !error && max_size >= 0 && copied_size != max_size ) + { show_error( "Input file ends unexpectedly." ); error = true; } return !error; } -bool test_member_from_file( const int infd, const unsigned long long msize, - long long * const failure_posp ) +// Return value: 0 = OK, 1 = bad msize, 2 = data error +// 'failure_pos' is relative to the beginning of the member +int test_member_from_file( const int infd, const unsigned long long msize, + long long * const failure_posp ) { Range_decoder rdec( infd ); - File_header header; - rdec.read_data( header.data, File_header::size ); + Lzip_header header; + rdec.read_data( header.data, Lzip_header::size ); const unsigned dictionary_size = header.dictionary_size(); + bool done = false; if( !rdec.finished() && header.verify_magic() && header.verify_version() && isvalid_ds( dictionary_size ) ) { LZ_decoder decoder( rdec, dictionary_size, -1 ); const int old_verbosity = verbosity; verbosity = -1; // suppress all messages - Pretty_print dummy( "" ); - const bool done = ( decoder.decode_member( dummy ) == 0 && - rdec.member_position() == msize ); + Pretty_print dummy_pp( "" ); + done = ( decoder.decode_member( dummy_pp ) == 0 ); verbosity = old_verbosity; // restore verbosity level - if( done ) return true; + if( done && rdec.member_position() == msize ) return 0; } if( failure_posp ) *failure_posp = rdec.member_position(); - return false; + return done ? 1 : 2; } int merge_files( const std::vector< std::string > & filenames, const std::string & default_output_filename, - const bool force ) + const bool force, const char terminator ) { const int files = filenames.size(); std::vector< int > infd_vector( files ); - File_index file_index; + Lzip_index lzip_index; struct stat in_stats; const int retval = - open_input_files( filenames, infd_vector, file_index, &in_stats ); + open_input_files( filenames, infd_vector, lzip_index, &in_stats ); if( retval >= 0 ) return retval; if( !safe_seek( infd_vector[0], 0 ) ) return 1; output_filename = default_output_filename.empty() ? insert_fixed( filenames[0] ) : default_output_filename; + set_signal_handler(); if( !open_outstream( force, false, true, false ) ) return 1; if( !copy_file( infd_vector[0], outfd ) ) // copy whole file cleanup_and_fail( 1 ); - for( long j = 0; j < file_index.members(); ++j ) + for( long j = 0; j < lzip_index.members(); ++j ) { - const long long mpos = file_index.mblock( j ).pos(); - const long long msize = file_index.mblock( j ).size(); + const long long mpos = lzip_index.mblock( j ).pos(); + const long long msize = lzip_index.mblock( j ).size(); // vector of data blocks differing among the copies of the current member std::vector< Block > block_vector; // different color means members are different std::vector< int > color_vector( files, 0 ); - if( !diff_member( mpos, msize, infd_vector, block_vector, color_vector ) || - !safe_seek( outfd, mpos ) ) + if( !diff_member( mpos, msize, filenames, infd_vector, block_vector, + color_vector ) || !safe_seek( outfd, mpos ) ) cleanup_and_fail( 1 ); if( block_vector.empty() ) { - if( file_index.members() > 1 && test_member_from_file( outfd, msize ) ) + if( lzip_index.members() > 1 && test_member_from_file( outfd, msize ) == 0 ) continue; if( verbosity >= 0 ) std::fprintf( stderr, "Member %ld is damaged and identical in all files." @@ -576,33 +605,33 @@ int merge_files( const std::vector< std::string > & filenames, if( verbosity >= 2 ) { std::printf( "Merging member %ld of %ld (%lu error%s)\n", - j + 1, file_index.members(), (long)block_vector.size(), + j + 1, lzip_index.members(), (long)block_vector.size(), ( block_vector.size() == 1 ) ? "" : "s" ); std::fflush( stdout ); } bool done = false; - if( file_index.members() > 1 || block_vector.size() > 1 ) + if( lzip_index.members() > 1 || block_vector.size() > 1 ) { if( block_vector.size() > 1 ) { maybe_cluster_blocks( block_vector ); done = try_merge_member2( mpos, msize, block_vector, color_vector, - infd_vector ); - print_pending_newline(); + infd_vector, terminator ); + print_pending_newline( terminator ); } if( !done ) { done = try_merge_member( mpos, msize, block_vector, color_vector, - infd_vector ); - print_pending_newline(); + infd_vector, terminator ); + print_pending_newline( terminator ); } } if( !done ) { done = try_merge_member1( mpos, msize, block_vector, color_vector, - infd_vector ); - print_pending_newline(); + infd_vector, terminator ); + print_pending_newline( terminator ); } if( !done ) { @@ -610,7 +639,7 @@ int merge_files( const std::vector< std::string > & filenames, for( unsigned i = 0; i < block_vector.size(); ++i ) std::fprintf( stderr, "area %2d from position %6lld to %6lld\n", i + 1, block_vector[i].pos(), block_vector[i].end() - 1 ); - show_error( "Some error areas overlap. Can't recover input file." ); + show_error( "Some error areas overlap. Merging is not possible." ); cleanup_and_fail( 2 ); } } @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2018 Antonio Diaz Diaz. + Copyright (C) 2009-2019 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -50,32 +50,6 @@ const char * format_byte( const uint8_t byte ) } // end namespace -void LZ_mtester::flush_data() - { - if( pos > stream_pos ) - { - const int size = pos - stream_pos; - crc32.update_buf( crc_, buffer + stream_pos, size ); - if( outfd >= 0 && writeblock( outfd, buffer + stream_pos, size ) != size ) - throw Error( "Write error" ); - if( pos >= dictionary_size ) - { partial_data_pos += pos; pos = 0; pos_wrapped = true; } - stream_pos = pos; - } - } - - -bool LZ_mtester::verify_trailer() - { - const File_trailer * const trailer = rdec.get_trailer(); - if( !trailer ) return false; - - return ( trailer->data_crc() == crc() && - trailer->data_size() == data_position() && - trailer->member_size() == member_position() ); - } - - void LZ_mtester::print_block( const int len ) { std::fputs( " \"", stdout ); @@ -100,91 +74,117 @@ void LZ_mtester::duplicate_buffer() } +void LZ_mtester::flush_data() + { + if( pos > stream_pos ) + { + const int size = pos - stream_pos; + crc32.update_buf( crc_, buffer + stream_pos, size ); + if( outfd >= 0 && writeblock( outfd, buffer + stream_pos, size ) != size ) + throw Error( "Write error" ); + if( pos >= dictionary_size ) + { partial_data_pos += pos; pos = 0; pos_wrapped = true; } + stream_pos = pos; + } + } + + +bool LZ_mtester::verify_trailer() + { + const Lzip_trailer * const trailer = rdec.get_trailer(); + + return ( trailer && + trailer->data_crc() == crc() && + trailer->data_size() == data_position() && + trailer->member_size() == member_position() ); + } + + /* Return value: 0 = OK, 1 = decoder error, 2 = unexpected EOF, 3 = trailer error, 4 = unknown marker found, -1 = pos_limit reached. */ int LZ_mtester::test_member( const unsigned long pos_limit ) { - if( pos_limit < File_header::size + 5 ) return -1; - if( member_position() == File_header::size ) rdec.load(); + if( pos_limit < Lzip_header::size + 5 ) return -1; + if( member_position() == Lzip_header::size ) rdec.load(); while( !rdec.finished() ) { if( member_position() >= pos_limit ) { flush_data(); return -1; } const int pos_state = data_position() & pos_state_mask; if( rdec.decode_bit( bm_match[state()][pos_state] ) == 0 ) // 1st bit { + // literal byte Bit_model * const bm = bm_literal[get_lit_state(peek_prev())]; if( state.is_char_set_char() ) put_byte( rdec.decode_tree8( bm ) ); else put_byte( rdec.decode_matched( bm, peek( rep0 ) ) ); + continue; } - else // match or repeated match + // match or repeated match + int len; + if( rdec.decode_bit( bm_rep[state()] ) != 0 ) // 2nd bit { - int len; - if( rdec.decode_bit( bm_rep[state()] ) != 0 ) // 2nd bit + if( rdec.decode_bit( bm_rep0[state()] ) == 0 ) // 3rd bit { - if( rdec.decode_bit( bm_rep0[state()] ) == 0 ) // 3rd bit - { - if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit - { state.set_short_rep(); put_byte( peek( rep0 ) ); continue; } - } + if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit + { state.set_short_rep(); put_byte( peek( rep0 ) ); continue; } + } + else + { + unsigned distance; + if( rdec.decode_bit( bm_rep1[state()] ) == 0 ) // 4th bit + distance = rep1; else { - unsigned distance; - if( rdec.decode_bit( bm_rep1[state()] ) == 0 ) // 4th bit - distance = rep1; + if( rdec.decode_bit( bm_rep2[state()] ) == 0 ) // 5th bit + distance = rep2; else - { - if( rdec.decode_bit( bm_rep2[state()] ) == 0 ) // 5th bit - distance = rep2; - else - { distance = rep3; rep3 = rep2; } - rep2 = rep1; - } - rep1 = rep0; - rep0 = distance; + { distance = rep3; rep3 = rep2; } + rep2 = rep1; } - state.set_rep(); - len = min_match_len + rdec.decode_len( rep_len_model, pos_state ); + rep1 = rep0; + rep0 = distance; } - else // match + state.set_rep(); + len = min_match_len + rdec.decode_len( rep_len_model, pos_state ); + } + else // match + { + len = min_match_len + rdec.decode_len( match_len_model, pos_state ); + unsigned distance = rdec.decode_tree6( bm_dis_slot[get_len_state(len)] ); + if( distance >= start_dis_model ) { - len = min_match_len + rdec.decode_len( match_len_model, pos_state ); - unsigned distance = rdec.decode_tree6( bm_dis_slot[get_len_state(len)] ); - if( distance >= start_dis_model ) + const unsigned dis_slot = distance; + const int direct_bits = ( dis_slot >> 1 ) - 1; + distance = ( 2 | ( dis_slot & 1 ) ) << direct_bits; + if( dis_slot < end_dis_model ) + distance += rdec.decode_tree_reversed( + bm_dis + ( distance - dis_slot ), direct_bits ); + else { - const unsigned dis_slot = distance; - const int direct_bits = ( dis_slot >> 1 ) - 1; - distance = ( 2 | ( dis_slot & 1 ) ) << direct_bits; - if( dis_slot < end_dis_model ) - distance += rdec.decode_tree_reversed( - bm_dis + ( distance - dis_slot ), direct_bits ); - else + distance += + rdec.decode( direct_bits - dis_align_bits ) << dis_align_bits; + distance += rdec.decode_tree_reversed4( bm_align ); + if( distance == 0xFFFFFFFFU ) // marker found { - distance += - rdec.decode( direct_bits - dis_align_bits ) << dis_align_bits; - distance += rdec.decode_tree_reversed4( bm_align ); - if( distance == 0xFFFFFFFFU ) // marker found + rdec.normalize(); + flush_data(); + if( len == min_match_len ) // End Of Stream marker { - rdec.normalize(); - flush_data(); - if( len == min_match_len ) // End Of Stream marker - { - if( verify_trailer() ) return 0; else return 3; - } - return 4; + if( verify_trailer() ) return 0; else return 3; } - if( distance > max_rep0 ) max_rep0 = distance; + return 4; } + if( distance > max_rep0 ) max_rep0 = distance; } - rep3 = rep2; rep2 = rep1; rep1 = rep0; rep0 = distance; - state.set_match(); - if( rep0 >= dictionary_size || ( rep0 >= pos && !pos_wrapped ) ) - { flush_data(); return 1; } } - copy_block( rep0, len ); + rep3 = rep2; rep2 = rep1; rep1 = rep0; rep0 = distance; + state.set_match(); + if( rep0 >= dictionary_size || ( rep0 >= pos && !pos_wrapped ) ) + { flush_data(); return 1; } } + copy_block( rep0, len ); } flush_data(); return 2; @@ -204,6 +204,7 @@ int LZ_mtester::debug_decode_member( const long long dpos, const long long mpos, const int pos_state = data_position() & pos_state_mask; if( rdec.decode_bit( bm_match[state()][pos_state] ) == 0 ) // 1st bit { + // literal byte Bit_model * const bm = bm_literal[get_lit_state(peek_prev())]; if( state.is_char_set_char() ) { @@ -223,138 +224,99 @@ int LZ_mtester::debug_decode_member( const long long dpos, const long long mpos, mp, dp, format_byte( cur_byte ), dp - rep0 - 1, format_byte( match_byte ) ); } + continue; } - else // match or repeated match + // match or repeated match + int len; + if( rdec.decode_bit( bm_rep[state()] ) != 0 ) // 2nd bit { - int len; - if( rdec.decode_bit( bm_rep[state()] ) != 0 ) // 2nd bit + int rep = 0; + if( rdec.decode_bit( bm_rep0[state()] ) == 0 ) // 3rd bit { - int rep = 0; - if( rdec.decode_bit( bm_rep0[state()] ) == 0 ) // 3rd bit + if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit { - if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit - { - if( show_packets ) - std::printf( "%6llu %6llu shortrep %s %6u (%6llu)\n", - mp, dp, format_byte( peek( rep0 ) ), - rep0 + 1, dp - rep0 - 1 ); - state.set_short_rep(); put_byte( peek( rep0 ) ); continue; - } + if( show_packets ) + std::printf( "%6llu %6llu shortrep %s %6u (%6llu)\n", + mp, dp, format_byte( peek( rep0 ) ), + rep0 + 1, dp - rep0 - 1 ); + state.set_short_rep(); put_byte( peek( rep0 ) ); continue; } + } + else + { + unsigned distance; + if( rdec.decode_bit( bm_rep1[state()] ) == 0 ) // 4th bit + { distance = rep1; rep = 1; } else { - unsigned distance; - if( rdec.decode_bit( bm_rep1[state()] ) == 0 ) // 4th bit - { distance = rep1; rep = 1; } + if( rdec.decode_bit( bm_rep2[state()] ) == 0 ) // 5th bit + { distance = rep2; rep = 2; } else - { - if( rdec.decode_bit( bm_rep2[state()] ) == 0 ) // 5th bit - { distance = rep2; rep = 2; } - else - { distance = rep3; rep3 = rep2; rep = 3; } - rep2 = rep1; - } - rep1 = rep0; - rep0 = distance; + { distance = rep3; rep3 = rep2; rep = 3; } + rep2 = rep1; } - state.set_rep(); - len = min_match_len + rdec.decode_len( rep_len_model, pos_state ); - if( show_packets ) - std::printf( "%6llu %6llu rep%c %6u,%3d (%6llu)", - mp, dp, rep + '0', rep0 + 1, len, dp - rep0 - 1 ); + rep1 = rep0; + rep0 = distance; } - else // match + state.set_rep(); + len = min_match_len + rdec.decode_len( rep_len_model, pos_state ); + if( show_packets ) + std::printf( "%6llu %6llu rep%c %6u,%3d (%6llu)", + mp, dp, rep + '0', rep0 + 1, len, dp - rep0 - 1 ); + } + else // match + { + len = min_match_len + rdec.decode_len( match_len_model, pos_state ); + unsigned distance = rdec.decode_tree6( bm_dis_slot[get_len_state(len)] ); + if( distance >= start_dis_model ) { - len = min_match_len + rdec.decode_len( match_len_model, pos_state ); - unsigned distance = rdec.decode_tree6( bm_dis_slot[get_len_state(len)] ); - if( distance >= start_dis_model ) + const unsigned dis_slot = distance; + const int direct_bits = ( dis_slot >> 1 ) - 1; + distance = ( 2 | ( dis_slot & 1 ) ) << direct_bits; + if( dis_slot < end_dis_model ) + distance += rdec.decode_tree_reversed( + bm_dis + ( distance - dis_slot ), direct_bits ); + else { - const unsigned dis_slot = distance; - const int direct_bits = ( dis_slot >> 1 ) - 1; - distance = ( 2 | ( dis_slot & 1 ) ) << direct_bits; - if( dis_slot < end_dis_model ) - distance += rdec.decode_tree_reversed( - bm_dis + ( distance - dis_slot ), direct_bits ); - else + distance += + rdec.decode( direct_bits - dis_align_bits ) << dis_align_bits; + distance += rdec.decode_tree_reversed4( bm_align ); + if( distance == 0xFFFFFFFFU ) // marker found { - distance += - rdec.decode( direct_bits - dis_align_bits ) << dis_align_bits; - distance += rdec.decode_tree_reversed4( bm_align ); - if( distance == 0xFFFFFFFFU ) // marker found + rdec.normalize(); + flush_data(); + if( show_packets ) + std::printf( "%6llu %6llu marker code '%d'\n", mp, dp, len ); + if( len == min_match_len ) // End Of Stream marker { - rdec.normalize(); - flush_data(); if( show_packets ) - std::printf( "%6llu %6llu marker code '%d'\n", mp, dp, len ); - if( len == min_match_len ) // End Of Stream marker - { - if( show_packets ) - std::printf( "%6llu %6llu member trailer\n", - mpos + member_position(), dpos + data_position() ); - if( verify_trailer() ) return 0; - if( show_packets ) std::fputs( "trailer error\n", stdout ); - return 3; - } - if( len == min_match_len + 1 ) // Sync Flush marker - { - rdec.load(); continue; - } - return 4; + std::printf( "%6llu %6llu member trailer\n", + mpos + member_position(), dpos + data_position() ); + if( verify_trailer() ) return 0; + if( show_packets ) std::fputs( "trailer error\n", stdout ); + return 3; } - if( distance > max_rep0 ) max_rep0 = distance; + if( len == min_match_len + 1 ) // Sync Flush marker + { + rdec.load(); continue; + } + return 4; } + if( distance > max_rep0 ) max_rep0 = distance; } - rep3 = rep2; rep2 = rep1; rep1 = rep0; rep0 = distance; - state.set_match(); - if( show_packets ) - std::printf( "%6llu %6llu match %6u,%3d (%6lld)", - mp, dp, rep0 + 1, len, dp - rep0 - 1 ); - if( rep0 >= dictionary_size || ( rep0 >= pos && !pos_wrapped ) ) - { flush_data(); if( show_packets ) std::fputc( '\n', stdout ); - return 1; } } - copy_block( rep0, len ); - if( show_packets ) print_block( len ); + rep3 = rep2; rep2 = rep1; rep1 = rep0; rep0 = distance; + state.set_match(); + if( show_packets ) + std::printf( "%6llu %6llu match %6u,%3d (%6lld)", + mp, dp, rep0 + 1, len, dp - rep0 - 1 ); + if( rep0 >= dictionary_size || ( rep0 >= pos && !pos_wrapped ) ) + { flush_data(); if( show_packets ) std::fputc( '\n', stdout ); + return 1; } } + copy_block( rep0, len ); + if( show_packets ) print_block( len ); } flush_data(); return 2; } - - -uint8_t * read_member( const int infd, const long long mpos, - const long long msize ) - { - if( msize <= 0 || msize > LONG_MAX ) - { show_error( "Member is larger than LONG_MAX." ); return 0; } - if( !safe_seek( infd, mpos ) ) return 0; - uint8_t * const buffer = new uint8_t[msize]; - - if( readblock( infd, buffer, msize ) != msize ) - { show_error( "Error reading input file", errno ); - delete[] buffer; return 0; } - return buffer; - } - - -const LZ_mtester * prepare_master( const uint8_t * const buffer, - const long buffer_size, - const unsigned long pos_limit, - const unsigned dictionary_size ) - { - LZ_mtester * const master = - new LZ_mtester( buffer, buffer_size, dictionary_size ); - if( master->test_member( pos_limit ) == -1 ) return master; - delete master; - return 0; - } - - -bool test_member_rest( const LZ_mtester & master, long * const failure_posp ) - { - LZ_mtester mtester( master ); - mtester.duplicate_buffer(); - if( mtester.test_member() == 0 && mtester.finished() ) return true; - if( failure_posp ) *failure_posp = mtester.member_position(); - return false; - } @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2018 Antonio Diaz Diaz. + Copyright (C) 2009-2019 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -31,7 +31,7 @@ public: : buffer( buf ), buffer_size( buf_size ), - pos( File_header::size ), + pos( Lzip_header::size ), code( 0 ), range( 0xFFFFFFFFU ), at_stream_end( false ) @@ -47,11 +47,11 @@ public: return buffer[pos++]; } - const File_trailer * get_trailer() + const Lzip_trailer * get_trailer() { - if( buffer_size - pos < File_trailer::size ) return 0; - const File_trailer * const p = (File_trailer *)( buffer + pos ); - pos += File_trailer::size; + if( buffer_size - pos < Lzip_trailer::size ) return 0; + const Lzip_trailer * const p = (const Lzip_trailer *)( buffer + pos ); + pos += Lzip_trailer::size; return p; } @@ -69,24 +69,23 @@ public: { range <<= 8; code = (code << 8) | get_byte(); } } - int decode( const int num_bits ) + unsigned decode( const int num_bits ) { - int symbol = 0; + unsigned symbol = 0; for( int i = num_bits; i > 0; --i ) { normalize(); range >>= 1; // symbol <<= 1; // if( code >= range ) { code -= range; symbol |= 1; } - const uint32_t mask = 0U - (code < range); - code -= range; - code += range & mask; - symbol = (symbol << 1) + (mask + 1); + const bool bit = ( code >= range ); + symbol = ( symbol << 1 ) + bit; + code -= range & ( 0U - bit ); } return symbol; } - int decode_bit( Bit_model & bm ) + unsigned decode_bit( Bit_model & bm ) { normalize(); const uint32_t bound = ( range >> bit_model_total_bits ) * bm.probability; @@ -105,18 +104,18 @@ public: } } - int decode_tree3( Bit_model bm[] ) + unsigned decode_tree3( Bit_model bm[] ) { - int symbol = 1; + unsigned symbol = 1; symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); return symbol & 7; } - int decode_tree6( Bit_model bm[] ) + unsigned decode_tree6( Bit_model bm[] ) { - int symbol = 1; + unsigned symbol = 1; symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); @@ -126,49 +125,47 @@ public: return symbol & 0x3F; } - int decode_tree8( Bit_model bm[] ) + unsigned decode_tree8( Bit_model bm[] ) { - int symbol = 1; - while( symbol < 0x100 ) + unsigned symbol = 1; + for( int i = 0; i < 8; ++i ) symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); return symbol & 0xFF; } - int decode_tree_reversed( Bit_model bm[], const int num_bits ) + unsigned decode_tree_reversed( Bit_model bm[], const int num_bits ) { - int model = 1; - int symbol = 0; + unsigned model = 1; + unsigned symbol = 0; for( int i = 0; i < num_bits; ++i ) { - const bool bit = decode_bit( bm[model] ); - model <<= 1; - if( bit ) { ++model; symbol |= (1 << i); } + const unsigned bit = decode_bit( bm[model] ); + model = ( model << 1 ) + bit; + symbol |= ( bit << i ); } return symbol; } - int decode_tree_reversed4( Bit_model bm[] ) + unsigned decode_tree_reversed4( Bit_model bm[] ) { - int model = 1; - int symbol = decode_bit( bm[model] ); - model = (model << 1) + symbol; - int bit = decode_bit( bm[model] ); - model = (model << 1) + bit; symbol |= (bit << 1); + unsigned symbol = decode_bit( bm[1] ); + unsigned model = 2 + symbol; + unsigned bit = decode_bit( bm[model] ); + model = ( model << 1 ) + bit; symbol |= ( bit << 1 ); bit = decode_bit( bm[model] ); - model = (model << 1) + bit; symbol |= (bit << 2); - if( decode_bit( bm[model] ) ) symbol |= 8; + model = ( model << 1 ) + bit; symbol |= ( bit << 2 ); + symbol |= ( decode_bit( bm[model] ) << 3 ); return symbol; } - int decode_matched( Bit_model bm[], int match_byte ) + unsigned decode_matched( Bit_model bm[], unsigned match_byte ) { Bit_model * const bm1 = bm + 0x100; - int symbol = 1; + unsigned symbol = 1; while( symbol < 0x100 ) { - match_byte <<= 1; - const int match_bit = match_byte & 0x100; - const int bit = decode_bit( bm1[match_bit+symbol] ); + const unsigned match_bit = ( match_byte <<= 1 ) & 0x100; + const unsigned bit = decode_bit( bm1[match_bit+symbol] ); symbol = ( symbol << 1 ) | bit; if( match_bit != bit << 8 ) { @@ -180,7 +177,7 @@ public: return symbol & 0xFF; } - int decode_len( Len_model & lm, const int pos_state ) + unsigned decode_len( Len_model & lm, const int pos_state ) { if( decode_bit( lm.choice1 ) == 0 ) return decode_tree3( lm.bm_low[pos_state] ); @@ -223,20 +220,17 @@ class LZ_mtester Len_model match_len_model; Len_model rep_len_model; + void print_block( const int len ); void flush_data(); bool verify_trailer(); - void print_block( const int len ); uint8_t peek_prev() const - { - const unsigned i = ( ( pos > 0 ) ? pos : dictionary_size ) - 1; - return buffer[i]; - } + { return buffer[((pos > 0) ? pos : dictionary_size)-1]; } uint8_t peek( const unsigned distance ) const { - unsigned i = pos - distance - 1; - if( pos <= distance ) i += dictionary_size; + const unsigned i = ( ( pos > distance ) ? 0 : dictionary_size ) + + pos - distance - 1; return buffer[i]; } @@ -248,17 +242,26 @@ class LZ_mtester void copy_block( const unsigned distance, unsigned len ) { - unsigned i = pos - distance - 1; - bool fast; - if( pos <= distance ) - { i += dictionary_size; - fast = ( len <= dictionary_size - i && len <= i - pos ); } + unsigned lpos = pos, i = lpos - distance - 1; + bool fast, fast2; + if( lpos > distance ) + { + fast = ( len < dictionary_size - lpos ); + fast2 = ( fast && len <= lpos - i ); + } else - fast = ( len < dictionary_size - pos && len <= pos - i ); - if( fast ) // no wrap, no overlap { - std::memcpy( buffer + pos, buffer + i, len ); + i += dictionary_size; + fast = ( len < dictionary_size - i ); // (i == pos) may happen + fast2 = ( fast && len <= i - lpos ); + } + if( fast ) // no wrap + { pos += len; + if( fast2 ) // no wrap, no overlap + std::memcpy( buffer + lpos, buffer + i, len ); + else + for( ; len > 0; --len ) buffer[lpos++] = buffer[i++]; } else for( ; len > 0; --len ) { @@ -288,7 +291,8 @@ public: rep3( 0 ), max_rep0( 0 ), pos_wrapped( false ) - { buffer[dictionary_size-1] = 0; } // prev_byte of first byte + // prev_byte of first byte; also for peek( 0 ) on corrupt file + { buffer[dictionary_size-1] = 0; } ~LZ_mtester() { delete[] buffer; } @@ -303,12 +307,3 @@ public: int debug_decode_member( const long long dpos, const long long mpos, const bool show_packets ); // sets max_rep0 }; - - -uint8_t * read_member( const int infd, const long long mpos, - const long long msize ); -const LZ_mtester * prepare_master( const uint8_t * const buffer, - const long buffer_size, - const unsigned long pos_limit, - const unsigned dictionary_size ); -bool test_member_rest( const LZ_mtester & master, long * const failure_posp = 0 ); diff --git a/range_dec.cc b/range_dec.cc index 803a540..78d586f 100644 --- a/range_dec.cc +++ b/range_dec.cc @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2018 Antonio Diaz Diaz. + Copyright (C) 2009-2019 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -30,8 +30,7 @@ #include "lzip.h" #include "decoder.h" -#include "block.h" -#include "file_index.h" +#include "lzip_index.h" namespace { @@ -42,14 +41,15 @@ int decompress_member( const int infd, const Pretty_print & pp, const unsigned long long outend ) { Range_decoder rdec( infd ); - File_header header; - rdec.read_data( header.data, File_header::size ); + Lzip_header header; + rdec.read_data( header.data, Lzip_header::size ); if( rdec.finished() ) // End Of File { pp( "File ends unexpectedly at member header." ); return 2; } - if( !verify_header( header, pp ) ) return 2; + if( !header.verify_magic() ) { pp( bad_magic_msg ); return 2; } + if( !header.verify_version() ) + { pp( bad_version( header.version() ) ); return 2; } const unsigned dictionary_size = header.dictionary_size(); - if( !isvalid_ds( dictionary_size ) ) - { pp( "Invalid dictionary size in member header." ); return 2; } + if( !isvalid_ds( dictionary_size ) ) { pp( bad_dict_msg ); return 2; } if( verbosity >= 2 ) pp(); @@ -117,49 +117,49 @@ int range_decompress( const std::string & input_filename, const int infd = open_instream( input_filename.c_str(), &in_stats, true, true ); if( infd < 0 ) return 1; - Pretty_print pp( input_filename ); - const File_index file_index( infd, ignore_errors, ignore_trailing, - loose_trailing ); - if( file_index.retval() != 0 ) - { show_file_error( input_filename.c_str(), file_index.error().c_str() ); - return file_index.retval(); } - - if( range.end() > file_index.udata_size() ) - range.size( std::max( 0LL, file_index.udata_size() - range.pos() ) ); - if( range.size() <= 0 ) - { pp( "Nothing to do." ); return 0; } + const Lzip_index lzip_index( infd, ignore_trailing, loose_trailing, + ignore_errors, ignore_errors ); + if( lzip_index.retval() != 0 ) + { show_file_error( input_filename.c_str(), lzip_index.error().c_str() ); + return lzip_index.retval(); } - if( verbosity >= 1 ) - std::fprintf( stderr, "Decompressing range %sB to %sB (%sof %sBytes)\n", - format_num( range.pos() ), - format_num( range.pos() + range.size() ), - format_num( range.size() ), - format_num( file_index.udata_size() ) ); + if( range.end() > lzip_index.udata_size() ) + range.size( std::max( 0LL, lzip_index.udata_size() - range.pos() ) ); + if( range.size() <= 0 ) + { show_file_error( input_filename.c_str(), "Nothing to do." ); return 0; } if( to_stdout || default_output_filename.empty() ) outfd = STDOUT_FILENO; else { output_filename = default_output_filename; + set_signal_handler(); if( !open_outstream( force, false, false, false ) ) { close( infd ); return 1; } } + if( verbosity >= 1 ) + std::fprintf( stderr, "Decompressing range %sB to %sB (%sof %sBytes)\n", + format_num( range.pos() ), + format_num( range.pos() + range.size() ), + format_num( range.size() ), + format_num( lzip_index.udata_size() ) ); + + Pretty_print pp( input_filename ); int retval = 0; - for( long i = 0; i < file_index.members(); ++i ) + for( long i = 0; i < lzip_index.members(); ++i ) { - const Block & db = file_index.dblock( i ); + const Block & db = lzip_index.dblock( i ); if( range.overlaps( db ) ) { - if( verbosity >= 3 && file_index.members() > 1 ) + if( verbosity >= 3 && lzip_index.members() > 1 ) std::fprintf( stderr, "Decompressing member %3ld\n", i + 1 ); const long long outskip = std::max( 0LL, range.pos() - db.pos() ); const long long outend = std::min( db.size(), range.end() - db.pos() ); - const long long mpos = file_index.mblock( i ).pos(); + const long long mpos = lzip_index.mblock( i ).pos(); if( !safe_seek( infd, mpos ) ) { retval = 1; break; } const int tmp = decompress_member( infd, pp, mpos, outskip, outend ); - if( tmp && ( tmp != 2 || !ignore_errors ) ) - cleanup_and_fail( tmp ); + if( tmp && ( tmp != 2 || !ignore_errors ) ) cleanup_and_fail( tmp ); if( tmp > retval ) retval = tmp; pp.reset(); } @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2018 Antonio Diaz Diaz. + Copyright (C) 2009-2019 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -31,23 +31,38 @@ #include "lzip.h" #include "mtester.h" -#include "block.h" -#include "file_index.h" +#include "lzip_index.h" namespace { bool pending_newline = false; -void print_pending_newline() - { if( pending_newline ) std::fputc( '\n', stdout ); pending_newline = false; } +void print_pending_newline( const char terminator ) + { if( pending_newline && terminator != '\n' ) std::fputc( '\n', stdout ); + pending_newline = false; } + + +uint8_t * read_member( const int infd, const long long mpos, + const long long msize ) + { + if( msize <= 0 || msize > LONG_MAX ) + { show_error( "Member is larger than LONG_MAX." ); return 0; } + if( !safe_seek( infd, mpos ) ) return 0; + uint8_t * const buffer = new uint8_t[msize]; + + if( readblock( infd, buffer, msize ) != msize ) + { show_error( "Error reading input file", errno ); + delete[] buffer; return 0; } + return buffer; + } bool gross_damage( const long long msize, const uint8_t * const mbuffer ) { enum { maxlen = 6 }; // max number of consecutive identical bytes - long i = File_header::size; - const long end = msize - File_trailer::size - maxlen; + long i = Lzip_header::size; + const long end = msize - Lzip_trailer::size - maxlen; while( i < end ) { const uint8_t byte = mbuffer[i]; @@ -71,10 +86,10 @@ int seek_write( const int fd, const uint8_t * const buf, const int size, int repair_dictionary_size( const long long msize, uint8_t * const mbuffer ) { enum { dictionary_size_9 = 1 << 25 }; // dictionary size of option -9 - File_header & header = *(File_header *)mbuffer; + Lzip_header & header = *(Lzip_header *)mbuffer; unsigned dictionary_size = header.dictionary_size(); - File_trailer & trailer = - *(File_trailer *)( mbuffer + msize - File_trailer::size ); + const Lzip_trailer & trailer = + *(const Lzip_trailer *)( mbuffer + msize - Lzip_trailer::size ); const unsigned long long data_size = trailer.data_size(); const bool valid_ds = isvalid_ds( dictionary_size ); if( valid_ds && dictionary_size >= data_size ) return 0; // can't be bad @@ -104,10 +119,33 @@ int repair_dictionary_size( const long long msize, uint8_t * const mbuffer ) } +const LZ_mtester * prepare_master( const uint8_t * const buffer, + const long buffer_size, + const unsigned long pos_limit, + const unsigned dictionary_size ) + { + LZ_mtester * const master = + new LZ_mtester( buffer, buffer_size, dictionary_size ); + if( master->test_member( pos_limit ) == -1 ) return master; + delete master; + return 0; + } + + +bool test_member_rest( const LZ_mtester & master, long * const failure_posp = 0 ) + { + LZ_mtester mtester( master ); + mtester.duplicate_buffer(); + if( mtester.test_member() == 0 && mtester.finished() ) return true; + if( failure_posp ) *failure_posp = mtester.member_position(); + return false; + } + + // Return value: -1 = master failed, 0 = begin reached, >0 = repaired pos long repair_member( const long long mpos, const long long msize, uint8_t * const mbuffer, const long begin, const long end, - const unsigned dictionary_size ) + const unsigned dictionary_size, const char terminator ) { for( long pos = end; pos >= begin && pos > end - 50000; ) { @@ -120,7 +158,7 @@ long repair_member( const long long mpos, const long long msize, { if( verbosity >= 2 ) { - std::printf( " Trying position %llu \r", mpos + pos ); + std::printf( " Trying position %llu %c", mpos + pos, terminator ); std::fflush( stdout ); pending_newline = true; } for( int j = 0; j < 255; ++j ) @@ -140,65 +178,62 @@ long repair_member( const long long mpos, const long long msize, int repair_file( const std::string & input_filename, const std::string & default_output_filename, - const bool force ) + const bool force, const char terminator ) { struct stat in_stats; const int infd = open_instream( input_filename.c_str(), &in_stats, true, true ); if( infd < 0 ) return 1; - Pretty_print pp( input_filename ); - const File_index file_index( infd, true, true, true ); - if( file_index.retval() != 0 ) - { show_file_error( input_filename.c_str(), file_index.error().c_str() ); - return file_index.retval(); } + const Lzip_index lzip_index( infd, true, true, true ); + if( lzip_index.retval() != 0 ) + { show_file_error( input_filename.c_str(), lzip_index.error().c_str() ); + return lzip_index.retval(); } output_filename = default_output_filename.empty() ? insert_fixed( input_filename ) : default_output_filename; if( !force && file_exists( output_filename ) ) return 1; outfd = -1; - for( long i = 0; i < file_index.members(); ++i ) + for( long i = 0; i < lzip_index.members(); ++i ) { - const long long mpos = file_index.mblock( i ).pos(); - const long long msize = file_index.mblock( i ).size(); - if( !safe_seek( infd, mpos ) ) - cleanup_and_fail( 1 ); + const long long mpos = lzip_index.mblock( i ).pos(); + const long long msize = lzip_index.mblock( i ).size(); + if( !safe_seek( infd, mpos ) ) cleanup_and_fail( 1 ); long long failure_pos = 0; - if( test_member_from_file( infd, msize, &failure_pos ) ) continue; - if( failure_pos < File_header::size ) // End Of File + if( test_member_from_file( infd, msize, &failure_pos ) == 0 ) continue; + if( failure_pos < Lzip_header::size ) // End Of File { show_error( "Can't repair error in input file." ); cleanup_and_fail( 2 ); } + if( failure_pos >= msize - 8 ) failure_pos = msize - 8 - 1; if( verbosity >= 2 ) // damaged member found { std::printf( "Repairing member %ld of %ld (failure pos = %llu)\n", - i + 1, file_index.members(), mpos + failure_pos ); + i + 1, lzip_index.members(), mpos + failure_pos ); std::fflush( stdout ); } - if( failure_pos >= msize - 8 ) failure_pos = msize - 8 - 1; uint8_t * const mbuffer = read_member( infd, mpos, msize ); - if( !mbuffer ) - cleanup_and_fail( 1 ); - const File_header & header = *(File_header *)mbuffer; + if( !mbuffer ) cleanup_and_fail( 1 ); + const Lzip_header & header = *(const Lzip_header *)mbuffer; const unsigned dictionary_size = header.dictionary_size(); long pos = 0; if( !gross_damage( msize, mbuffer ) ) { pos = repair_dictionary_size( msize, mbuffer ); if( pos == 0 ) - pos = repair_member( mpos, msize, mbuffer, File_header::size + 1, - File_header::size + 5, dictionary_size ); + pos = repair_member( mpos, msize, mbuffer, Lzip_header::size + 1, + Lzip_header::size + 5, dictionary_size, terminator ); if( pos == 0 ) - pos = repair_member( mpos, msize, mbuffer, File_header::size + 6, - failure_pos, dictionary_size ); - print_pending_newline(); + pos = repair_member( mpos, msize, mbuffer, Lzip_header::size + 6, + failure_pos, dictionary_size, terminator ); + print_pending_newline( terminator ); } - if( pos < 0 ) - cleanup_and_fail( 1 ); + if( pos < 0 ) cleanup_and_fail( 1 ); if( pos > 0 ) { if( outfd < 0 ) // first damaged member repaired { if( !safe_seek( infd, 0 ) ) return 1; + set_signal_handler(); if( !open_outstream( true, false ) ) { close( infd ); return 1; } if( !copy_file( infd, outfd ) ) // copy whole file cleanup_and_fail( 1 ); @@ -228,39 +263,39 @@ int repair_file( const std::string & input_filename, } -int debug_delay( const std::string & input_filename, Block range ) +int debug_delay( const std::string & input_filename, Block range, + const char terminator ) { struct stat in_stats; // not used const int infd = open_instream( input_filename.c_str(), &in_stats, true, true ); if( infd < 0 ) return 1; - Pretty_print pp( input_filename ); - const File_index file_index( infd, false, true, true ); - if( file_index.retval() != 0 ) - { show_file_error( input_filename.c_str(), file_index.error().c_str() ); - return file_index.retval(); } + const Lzip_index lzip_index( infd, true, true ); + if( lzip_index.retval() != 0 ) + { show_file_error( input_filename.c_str(), lzip_index.error().c_str() ); + return lzip_index.retval(); } - if( range.end() > file_index.cdata_size() ) - range.size( std::max( 0LL, file_index.cdata_size() - range.pos() ) ); + if( range.end() > lzip_index.cdata_size() ) + range.size( std::max( 0LL, lzip_index.cdata_size() - range.pos() ) ); if( range.size() <= 0 ) - { pp( "Nothing to do." ); return 0; } + { show_file_error( input_filename.c_str(), "Nothing to do." ); return 0; } - for( long i = 0; i < file_index.members(); ++i ) + for( long i = 0; i < lzip_index.members(); ++i ) { - const Block & mb = file_index.mblock( i ); + const Block & mb = lzip_index.mblock( i ); if( !range.overlaps( mb ) ) continue; - const long long mpos = file_index.mblock( i ).pos(); - const long long msize = file_index.mblock( i ).size(); - const unsigned dictionary_size = file_index.dictionary_size( i ); + const long long mpos = lzip_index.mblock( i ).pos(); + const long long msize = lzip_index.mblock( i ).size(); + const unsigned dictionary_size = lzip_index.dictionary_size( i ); if( verbosity >= 2 ) { std::printf( "Finding max delay in member %ld of %ld (mpos = %llu, msize = %llu)\n", - i + 1, file_index.members(), mpos, msize ); + i + 1, lzip_index.members(), mpos, msize ); std::fflush( stdout ); } uint8_t * const mbuffer = read_member( infd, mpos, msize ); if( !mbuffer ) return 1; - long pos = std::max( range.pos() - mpos, File_header::size + 1LL ); + long pos = std::max( range.pos() - mpos, Lzip_header::size + 1LL ); const long end = std::min( range.end() - mpos, msize ); long max_delay = 0; while( pos < end ) @@ -275,7 +310,7 @@ int debug_delay( const std::string & input_filename, Block range ) { if( verbosity >= 2 ) { - std::printf( " Delays at position %llu \r", mpos + pos ); + std::printf( " Delays at position %llu %c", mpos + pos, terminator ); std::fflush( stdout ); pending_newline = true; } int value = -1; @@ -299,7 +334,7 @@ int debug_delay( const std::string & input_filename, Block range ) delete master; } delete[] mbuffer; - print_pending_newline(); + print_pending_newline( terminator ); } if( verbosity >= 1 ) std::fputs( "Done.\n", stdout ); @@ -308,40 +343,39 @@ int debug_delay( const std::string & input_filename, Block range ) int debug_repair( const std::string & input_filename, - const Bad_byte & bad_byte ) + const Bad_byte & bad_byte, const char terminator ) { struct stat in_stats; // not used const int infd = open_instream( input_filename.c_str(), &in_stats, true, true ); if( infd < 0 ) return 1; - Pretty_print pp( input_filename ); - const File_index file_index( infd, false, true, true ); - if( file_index.retval() != 0 ) - { show_file_error( input_filename.c_str(), file_index.error().c_str() ); - return file_index.retval(); } + const Lzip_index lzip_index( infd, true, true ); + if( lzip_index.retval() != 0 ) + { show_file_error( input_filename.c_str(), lzip_index.error().c_str() ); + return lzip_index.retval(); } long idx = 0; - for( ; idx < file_index.members(); ++idx ) - if( file_index.mblock( idx ).includes( bad_byte.pos ) ) break; - if( idx >= file_index.members() ) - { pp( "Nothing to do." ); return 0; } + for( ; idx < lzip_index.members(); ++idx ) + if( lzip_index.mblock( idx ).includes( bad_byte.pos ) ) break; + if( idx >= lzip_index.members() ) + { show_file_error( input_filename.c_str(), "Nothing to do." ); return 0; } - const long long mpos = file_index.mblock( idx ).pos(); - const long long msize = file_index.mblock( idx ).size(); + const long long mpos = lzip_index.mblock( idx ).pos(); + const long long msize = lzip_index.mblock( idx ).size(); { long long failure_pos = 0; if( !safe_seek( infd, mpos ) ) return 1; - if( !test_member_from_file( infd, msize, &failure_pos ) ) + if( test_member_from_file( infd, msize, &failure_pos ) != 0 ) { if( verbosity >= 0 ) std::printf( "Member %ld of %ld already damaged (failure pos = %llu)\n", - idx + 1, file_index.members(), mpos + failure_pos ); + idx + 1, lzip_index.members(), mpos + failure_pos ); return 1; } } uint8_t * const mbuffer = read_member( infd, mpos, msize ); if( !mbuffer ) return 1; - const File_header & header = *(File_header *)mbuffer; + const Lzip_header & header = *(const Lzip_header *)mbuffer; const unsigned dictionary_size = header.dictionary_size(); const uint8_t good_value = mbuffer[bad_byte.pos-mpos]; const uint8_t bad_value = bad_byte( good_value ); @@ -367,7 +401,7 @@ int debug_repair( const std::string & input_filename, { std::printf( "Test repairing member %ld of %ld (mpos = %llu, msize = %llu)\n" " (damage pos = %llu (0x%02X->0x%02X), failure pos = %llu, delay = %lld )\n", - idx + 1, file_index.members(), mpos, msize, + idx + 1, lzip_index.members(), mpos, msize, bad_byte.pos, good_value, bad_value, mpos + failure_pos, mpos + failure_pos - bad_byte.pos ); std::fflush( stdout ); @@ -375,12 +409,12 @@ int debug_repair( const std::string & input_filename, if( failure_pos >= msize ) failure_pos = msize - 1; long pos = repair_dictionary_size( msize, mbuffer ); if( pos == 0 ) - pos = repair_member( mpos, msize, mbuffer, File_header::size + 1, - File_header::size + 5, dictionary_size ); + pos = repair_member( mpos, msize, mbuffer, Lzip_header::size + 1, + Lzip_header::size + 5, dictionary_size, terminator ); if( pos == 0 ) - pos = repair_member( mpos, msize, mbuffer, File_header::size + 6, - failure_pos, dictionary_size ); - print_pending_newline(); + pos = repair_member( mpos, msize, mbuffer, Lzip_header::size + 6, + failure_pos, dictionary_size, terminator ); + print_pending_newline( terminator ); delete[] mbuffer; if( pos < 0 ) { show_error( "Can't prepare master." ); return 1; } @@ -398,30 +432,28 @@ int debug_decompress( const std::string & input_filename, const int infd = open_instream( input_filename.c_str(), &in_stats, true, true ); if( infd < 0 ) return 1; - Pretty_print pp( input_filename ); - const File_index file_index( infd, false, true, true ); - if( file_index.retval() != 0 ) - { show_file_error( input_filename.c_str(), file_index.error().c_str() ); - return file_index.retval(); } + const Lzip_index lzip_index( infd, true, true ); + if( lzip_index.retval() != 0 ) + { show_file_error( input_filename.c_str(), lzip_index.error().c_str() ); + return lzip_index.retval(); } outfd = show_packets ? -1 : STDOUT_FILENO; int retval = 0; - for( long i = 0; i < file_index.members(); ++i ) + for( long i = 0; i < lzip_index.members(); ++i ) { - const long long dpos = file_index.dblock( i ).pos(); - const long long mpos = file_index.mblock( i ).pos(); - const long long msize = file_index.mblock( i ).size(); - const unsigned dictionary_size = file_index.dictionary_size( i ); + const long long dpos = lzip_index.dblock( i ).pos(); + const long long mpos = lzip_index.mblock( i ).pos(); + const long long msize = lzip_index.mblock( i ).size(); + const unsigned dictionary_size = lzip_index.dictionary_size( i ); if( verbosity >= 1 && show_packets ) std::printf( "Decoding LZMA packets in member %ld of %ld (mpos = %llu, msize = %llu)\n" " mpos dpos\n", - i + 1, file_index.members(), mpos, msize ); + i + 1, lzip_index.members(), mpos, msize ); if( !isvalid_ds( dictionary_size ) ) - { show_error( "Invalid dictionary size in member header." ); - retval = 2; break; } + { show_error( bad_dict_msg ); retval = 2; break; } uint8_t * const mbuffer = read_member( infd, mpos, msize ); if( !mbuffer ) { retval = 1; break; } - if( bad_byte.pos >= 0 && file_index.mblock( i ).includes( bad_byte.pos ) ) + if( bad_byte.pos >= 0 && lzip_index.mblock( i ).includes( bad_byte.pos ) ) { const uint8_t good_value = mbuffer[bad_byte.pos-mpos]; const uint8_t bad_value = bad_byte( good_value ); @@ -441,7 +473,7 @@ int debug_decompress( const std::string & input_filename, mpos + mtester.member_position() ); retval = 2; break; } - if( i + 1 < file_index.members() && show_packets ) + if( i + 1 < lzip_index.members() && show_packets ) std::fputc( '\n', stdout ); } @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2018 Antonio Diaz Diaz. + Copyright (C) 2009-2019 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -29,8 +29,7 @@ #include <sys/stat.h> #include "lzip.h" -#include "block.h" -#include "file_index.h" +#include "lzip_index.h" namespace { @@ -50,6 +49,11 @@ void first_filename( const std::string & input_filename, bool next_filename( const int max_digits ) { + if( verbosity >= 1 ) + { + std::printf( "Member '%s' done \n", output_filename.c_str() ); + std::fflush( stdout ); + } int b = output_filename.size(); while( b > 0 && output_filename[b-1] != '/' ) --b; for( int i = b + max_digits + 2; i > b + 2; --i ) // "rec<max_digits>" @@ -60,147 +64,81 @@ bool next_filename( const int max_digits ) return false; } - -// Search forward from 'pos' for "LZIP" (Boyer-Moore algorithm) -// Returns pos of found string or 'pos+size' if not found. -// -int find_magic( const uint8_t * const buffer, const int pos, const int size ) - { - const unsigned char table[256] = { - 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, - 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, - 4,4,4,4,4,4,4,4,4,1,4,4,3,4,4,4,4,4,4,4,4,4,4,4,4,4,2,4,4,4,4,4, - 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, - 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, - 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, - 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, - 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4 }; - - for( int i = pos; i <= pos + size - 4; i += table[buffer[i+3]] ) - if( buffer[i] == 'L' && buffer[i+1] == 'Z' && - buffer[i+2] == 'I' && buffer[i+3] == 'P' ) - return i; // magic string found - return pos + size; - } +} // end namespace -int do_split_file( const std::string & input_filename, uint8_t * & base_buffer, - const std::string & default_output_filename, - const bool force ) +int split_file( const std::string & input_filename, + const std::string & default_output_filename, const bool force ) { - const int hsize = File_header::size; - const int tsize = File_trailer::size; - const int buffer_size = 65536; - const int base_buffer_size = tsize + buffer_size + hsize; - base_buffer = new uint8_t[base_buffer_size]; - uint8_t * const buffer = base_buffer + tsize; - struct stat in_stats; const int infd = open_instream( input_filename.c_str(), &in_stats, true, true ); if( infd < 0 ) return 1; - Pretty_print pp( input_filename ); - - // don't move this after seek_read - const File_index file_index( infd, true, true, true ); -// if( file_index.retval() != 0 ) pp( file_index.error().c_str() ); - const long max_members = file_index.retval() ? 999999 : file_index.members(); - int max_digits = 1; - for( long i = max_members; i >= 10; i /= 10 ) ++max_digits; - - int size = seek_read( infd, buffer, buffer_size + hsize, 0 ) - hsize; - bool at_stream_end = ( size < buffer_size ); - if( size != buffer_size && errno ) - { show_error( "Read error", errno ); return 1; } - if( size < min_member_size ) - { pp( "Input file is too short." ); return 2; } - if( !verify_header( *(File_header *)buffer, pp ) ) return 2; - - first_filename( input_filename, default_output_filename, max_digits ); - if( !open_outstream( force, false, false, false ) ) - { close( infd ); return 1; } - unsigned long long partial_member_size = 0; - const bool ttyout = isatty( STDOUT_FILENO ); - while( true ) + Lzip_index lzip_index( infd, true, true, true, true ); + if( lzip_index.retval() != 0 ) { - int pos = 0; - for( int newpos = 1; newpos <= size; ++newpos ) + show_file_error( input_filename.c_str(), lzip_index.error().c_str() ); + return lzip_index.retval(); + } + // verify last member + const Block b = lzip_index.mblock( lzip_index.members() - 1 ); + long long mpos = b.pos(); + long long msize = b.size(); + long long failure_pos = 0; + if( !safe_seek( infd, mpos ) ) return 1; + if( test_member_from_file( infd, msize, &failure_pos ) == 1 ) + { // corrupt or fake trailer + while( true ) { - newpos = find_magic( buffer, newpos, size + 4 - newpos ); - if( newpos <= size ) - { - const File_trailer & trailer = *(File_trailer *)(base_buffer + newpos); - if( partial_member_size + newpos - pos == trailer.member_size() ) - { // header found - const int wr = writeblock( outfd, buffer + pos, newpos - pos ); - if( wr != newpos - pos ) - { show_error( "Write error", errno ); return 1; } - if( close_outstream( &in_stats ) != 0 ) return 1; - if( verbosity >= 1 ) - { - std::printf( "Member '%s' done %c", output_filename.c_str(), - ttyout ? '\r' : '\n' ); - std::fflush( stdout ); - } - if( !next_filename( max_digits ) ) - { show_error( "Too many members in file." ); close( infd ); return 1; } - if( !open_outstream( force, false, false, false ) ) - { close( infd ); return 1; } - partial_member_size = 0; - pos = newpos; - } - } + mpos += failure_pos; msize -= failure_pos; + if( msize < min_member_size ) break; // trailing data + if( !safe_seek( infd, mpos ) ) return 1; + if( test_member_from_file( infd, msize, &failure_pos ) != 1 ) break; } - - if( at_stream_end ) + lzip_index = Lzip_index( infd, true, true, true, true, mpos ); + if( lzip_index.retval() != 0 ) { - const int wr = writeblock( outfd, buffer + pos, size + hsize - pos ); - if( wr != size + hsize - pos ) - { show_error( "Write error", errno ); return 1; } - break; + show_file_error( input_filename.c_str(), lzip_index.error().c_str() ); + return lzip_index.retval(); } - if( pos < buffer_size ) + } + + if( !safe_seek( infd, 0 ) ) return 1; + int max_digits = 1; + for( long i = lzip_index.blocks( true ); i >= 10; i /= 10 ) ++max_digits; + first_filename( input_filename, default_output_filename, max_digits ); + + long long stream_pos = 0; // first pos not yet written to file + set_signal_handler(); + for( long i = 0; i < lzip_index.members(); ++i ) + { + const Block & mb = lzip_index.mblock( i ); + if( mb.pos() > stream_pos ) // gap { - partial_member_size += buffer_size - pos; - const int wr = writeblock( outfd, buffer + pos, buffer_size - pos ); - if( wr != buffer_size - pos ) - { show_error( "Write error", errno ); return 1; } + if( !open_outstream( force, false, false, false ) ) + { close( infd ); return 1; } + if( !copy_file( infd, outfd, mb.pos() - stream_pos ) || + close_outstream( &in_stats ) != 0 ) + cleanup_and_fail( 1 ); + next_filename( max_digits ); } - std::memcpy( base_buffer, base_buffer + buffer_size, tsize + hsize ); - size = readblock( infd, buffer + hsize, buffer_size ); - at_stream_end = ( size < buffer_size ); - if( size != buffer_size && errno ) - { show_error( "Read error", errno ); return 1; } + if( !open_outstream( force, false, false, false ) ) // member + { close( infd ); return 1; } + if( !copy_file( infd, outfd, mb.size() ) || + close_outstream( &in_stats ) != 0 ) + cleanup_and_fail( 1 ); + next_filename( max_digits ); + stream_pos = mb.end(); } - close( infd ); - if( close_outstream( &in_stats ) != 0 ) return 1; - if( verbosity >= 1 ) + if( lzip_index.file_size() > stream_pos ) // trailing data { - std::printf( "Member '%s' done \n", output_filename.c_str() ); - std::fflush( stdout ); + if( !open_outstream( force, false, false, false ) ) + { close( infd ); return 1; } + if( !copy_file( infd, outfd, lzip_index.file_size() - stream_pos ) || + close_outstream( &in_stats ) != 0 ) + cleanup_and_fail( 1 ); + next_filename( max_digits ); } + close( infd ); return 0; } - -} // end namespace - - -bool verify_header( const File_header & header, const Pretty_print & pp ) - { - if( !header.verify_magic() ) - { pp( bad_magic_msg ); return false; } - if( !header.verify_version() ) - { pp( bad_version( header.version() ) ); return false; } - return true; - } - - -int split_file( const std::string & input_filename, - const std::string & default_output_filename, const bool force ) - { - uint8_t * base_buffer; - const int retval = do_split_file( input_filename, base_buffer, - default_output_filename, force ); - delete[] base_buffer; - return retval; - } diff --git a/testsuite/check.sh b/testsuite/check.sh index 76ad361..a902d93 100755 --- a/testsuite/check.sh +++ b/testsuite/check.sh @@ -1,6 +1,6 @@ #! /bin/sh # check script for Lziprecover - Data recovery tool for the lzip format -# Copyright (C) 2009-2018 Antonio Diaz Diaz. +# Copyright (C) 2009-2019 Antonio Diaz Diaz. # # This script is free software: you have unlimited permission # to copy, distribute and modify it. @@ -33,6 +33,12 @@ cat "${testdir}"/test.txt > in || framework_failure in_lz="${testdir}"/test.txt.lz in_lzma="${testdir}"/test.txt.lzma inD="${testdir}"/test21723.txt +bad1_lz="${testdir}"/test_bad1.lz +bad2_lz="${testdir}"/test_bad2.lz +bad3_lz="${testdir}"/test_bad3.lz +bad4_lz="${testdir}"/test_bad4.lz +bad5_lz="${testdir}"/test_bad5.lz +fox_lz="${testdir}"/fox.lz fox6_lz="${testdir}"/fox6.lz f6b1="${testdir}"/fox6_bad1.txt f6b1_lz="${testdir}"/fox6_bad1.lz @@ -40,30 +46,53 @@ f6b2_lz="${testdir}"/fox6_bad2.lz f6b3_lz="${testdir}"/fox6_bad3.lz f6b4_lz="${testdir}"/fox6_bad4.lz f6b5_lz="${testdir}"/fox6_bad5.lz -bad1_lz="${testdir}"/test_bad1.lz -bad2_lz="${testdir}"/test_bad2.lz -bad3_lz="${testdir}"/test_bad3.lz -bad4_lz="${testdir}"/test_bad4.lz -bad5_lz="${testdir}"/test_bad5.lz +f6b6_lz="${testdir}"/fox6_bad6.lz +f6s1_lz="${testdir}"/fox6_sc1.lz +f6s2_lz="${testdir}"/fox6_sc2.lz +f6s3_lz="${testdir}"/fox6_sc3.lz +f6s4_lz="${testdir}"/fox6_sc4.lz +f6s5_lz="${testdir}"/fox6_sc5.lz +f6s6_lz="${testdir}"/fox6_sc6.lz +num_lz="${testdir}"/numbers.lz +nbt_lz="${testdir}"/numbersbt.lz fail=0 test_failed() { fail=1 ; printf " $1" ; [ -z "$2" ] || printf "($2)" ; } # Description of test files for lziprecover: +# single-member files with one or more errors +# test_bad1.lz: byte at offset 66 changed from 0xA6 to 0x46 +# test_bad2.lz: [ 34- 65] --> copy of bytes [ 68- 99] +# test_bad3.lz: [ 512-1535] --> zeroed [2560-3583] --> zeroed +# test_bad4.lz: [3072-4095] --> random errors [4608-5631] --> zeroed +# test_bad5.lz: [1024-2047] --> random errors [5120-6143] --> random data +# +# 6-member files with one or more errors # fox6_bad1.lz: byte at offset 5 changed from 0x0C to 0x00 (DS) # byte at offset 142 changed from 0x50 to 0x70 (CRC) # byte at offset 224 changed from 0x2D to 0x2E (data_size) # byte at offset 268 changed from 0x34 to 0x33 (mid stream) # byte at offset 327 changed from 0x2A to 0x2B (byte 7) # byte at offset 458 changed from 0xA0 to 0x20 (EOS marker) -# fox6_bad2.lz: [110-129] --> zeroed -# fox6_bad3.lz: [180-379] --> zeroed -# fox6_bad4.lz: [330-429] --> zeroed -# fox6_bad5.lz: [380-479] --> zeroed -# test_bad1.lz: byte at offset 66 changed from 0xA6 to 0x46 -# test_bad2.lz: [ 34- 65] --> copy of bytes [ 68- 99] -# test_bad3.lz: [ 512-1535] --> zeroed [2560-3583] --> zeroed -# test_bad4.lz: [3072-4095] --> random errors [4608-5631] --> zeroed -# test_bad5.lz: [1024-2047] --> random errors [5120-6143] --> random data +# fox6_bad2.lz: [110-129] --> zeroed (member 2) +# fox6_bad3.lz: [180-379] --> zeroed (members 3-5) +# fox6_bad4.lz: [330-429] --> zeroed (members 5,6) +# fox6_bad5.lz: [380-479] --> zeroed (members 5,6) +# fox6_bad6.lz: [430-439] --> zeroed (member 6) +# +# 6-member files "shortcircuited" by a corrupt or fake trailer +# fox6_sc1.lz: (corrupt but consistent last trailer) +# last CRC != 0 ; dsize = 4 * msize ; msize = 480 (file size) +# fox6_sc2.lz: (appended fake but consistent trailer) +# fake CRC != 0 ; dsize = 4 * msize ; msize = 500 (file size) +# fox6_sc3.lz: fake CRC = 0 +# fox6_sc4.lz: fake dsize = 0 +# fox6_sc5.lz: fake dsize = 411 (< 8 * ( fake msize - 36 ) / 9) +# fox6_sc6.lz: fake dsize = 3360660 (>= 7090 * ( fake msize - 26 )) +# +# 9-member files "one_" "two_" "three_" "four_" "five_" "six_" "seven_" +# "eight_" "nine_" +# numbers.lz : good file containing the 9 members shown above +# numbersbt.lz: "gap" after "three_", "damaged" after "six_", "trailing data" printf "testing lziprecover-%s..." "$2" @@ -113,11 +142,53 @@ printf "LZIP\001+.............................." | "${LZIP}" -t 2> /dev/null [ $? = 1 ] || test_failed $LINENO "${LZIPRECOVER}" -sq [ $? = 1 ] || test_failed $LINENO -"${LZIPRECOVER}" -q --dump-tdata +"${LZIPRECOVER}" -t --remove=damaged "${in_lz}" 2> /dev/null +[ $? = 1 ] || test_failed $LINENO +"${LZIPRECOVER}" --strip=damaged -t "${in_lz}" 2> /dev/null +[ $? = 1 ] || test_failed $LINENO +"${LZIPRECOVER}" --remove=tdata -t "${in_lz}" 2> /dev/null +[ $? = 1 ] || test_failed $LINENO +"${LZIPRECOVER}" -t --strip=tdata "${in_lz}" 2> /dev/null [ $? = 1 ] || test_failed $LINENO -"${LZIPRECOVER}" -q --strip-tdata +"${LZIPRECOVER}" -q --dump=tdata --strip=damaged "${in_lz}" [ $? = 1 ] || test_failed $LINENO -"${LZIPRECOVER}" -q --remove-tdata +"${LZIPRECOVER}" --remove=tdata --strip=damaged "${in_lz}" 2> /dev/null +[ $? = 1 ] || test_failed $LINENO +"${LZIPRECOVER}" -q --dump=damaged +[ $? = 1 ] || test_failed $LINENO +"${LZIPRECOVER}" -q --dump=damaged in > /dev/null +[ $? = 2 ] || test_failed $LINENO +"${LZIPRECOVER}" -q --dump=damagedd "${in_lz}" > /dev/null +[ $? = 1 ] || test_failed $LINENO +"${LZIPRECOVER}" -q --strip=damaged +[ $? = 1 ] || test_failed $LINENO +"${LZIPRECOVER}" -q --strip=damaged in > /dev/null +[ $? = 2 ] || test_failed $LINENO +"${LZIPRECOVER}" -q --strip=damagedd "${in_lz}" > /dev/null +[ $? = 1 ] || test_failed $LINENO +"${LZIPRECOVER}" -q --remove=damaged +[ $? = 1 ] || test_failed $LINENO +"${LZIPRECOVER}" -q --remove=damaged in +[ $? = 2 ] || test_failed $LINENO +"${LZIPRECOVER}" -q --remove=damagedd "${in_lz}" +[ $? = 1 ] || test_failed $LINENO +"${LZIPRECOVER}" -q --dump=tdata +[ $? = 1 ] || test_failed $LINENO +"${LZIPRECOVER}" -q --dump=tdata in > /dev/null +[ $? = 2 ] || test_failed $LINENO +"${LZIPRECOVER}" -q --dump=tdataa "${in_lz}" > /dev/null +[ $? = 1 ] || test_failed $LINENO +"${LZIPRECOVER}" -q --strip=tdata +[ $? = 1 ] || test_failed $LINENO +"${LZIPRECOVER}" -q --strip=tdata in > /dev/null +[ $? = 2 ] || test_failed $LINENO +"${LZIPRECOVER}" -q --strip=tdataa "${in_lz}" > /dev/null +[ $? = 1 ] || test_failed $LINENO +"${LZIPRECOVER}" -q --remove=tdata +[ $? = 1 ] || test_failed $LINENO +"${LZIPRECOVER}" -q --remove=tdata in +[ $? = 2 ] || test_failed $LINENO +"${LZIPRECOVER}" -q --remove=tdataa "${in_lz}" [ $? = 1 ] || test_failed $LINENO "${LZIPRECOVER}" -Aq in @@ -130,30 +201,31 @@ printf "LZIP\001+.............................." | "${LZIP}" -t 2> /dev/null [ $? = 1 ] || test_failed $LINENO "${LZIPRECOVER}" -Akq "${in_lzma}" [ $? = 1 ] || test_failed $LINENO -"${LZIPRECOVER}" -Ac "${in_lzma}" > copy.lz -{ [ $? = 0 ] && cmp "${in_lz}" copy.lz ; } || test_failed $LINENO -"${LZIPRECOVER}" -A < "${in_lzma}" > copy.lz -{ [ $? = 0 ] && cmp "${in_lz}" copy.lz ; } || test_failed $LINENO -rm -f copy.lz +"${LZIPRECOVER}" -Ac "${in_lzma}" > copy.lz || test_failed $LINENO +cmp "${in_lz}" copy.lz || test_failed $LINENO +"${LZIPRECOVER}" -A < "${in_lzma}" > copy.lz || test_failed $LINENO +cmp "${in_lz}" copy.lz || test_failed $LINENO +rm -f copy.lz || framework_failure cat "${in_lzma}" > copy.lzma || framework_failure -"${LZIPRECOVER}" -Ak copy.lzma -{ [ $? = 0 ] && cmp "${in_lz}" copy.lz ; } || test_failed $LINENO +"${LZIPRECOVER}" -Ak copy.lzma || test_failed $LINENO +cmp "${in_lz}" copy.lz || test_failed $LINENO printf "to be overwritten" > copy.lz || framework_failure -"${LZIPRECOVER}" -Af copy.lzma -{ [ $? = 0 ] && cmp "${in_lz}" copy.lz ; } || test_failed $LINENO -rm -f copy.lz +"${LZIPRECOVER}" -Af copy.lzma || test_failed $LINENO +cmp "${in_lz}" copy.lz || test_failed $LINENO +rm -f copy.lz || framework_failure cat "${in_lzma}" > copy.tlz || framework_failure -"${LZIPRECOVER}" -Ak copy.tlz -{ [ $? = 0 ] && cmp "${in_lz}" copy.tar.lz ; } || test_failed $LINENO +"${LZIPRECOVER}" -Ak copy.tlz || test_failed $LINENO +cmp "${in_lz}" copy.tar.lz || test_failed $LINENO printf "to be overwritten" > copy.tar.lz || framework_failure -"${LZIPRECOVER}" -Af copy.tlz -{ [ $? = 0 ] && cmp "${in_lz}" copy.tar.lz ; } || test_failed $LINENO -rm -f copy.tar.lz +"${LZIPRECOVER}" -Af copy.tlz || test_failed $LINENO +cmp "${in_lz}" copy.tar.lz || test_failed $LINENO +rm -f copy.tar.lz || framework_failure cat "${in_lzma}" > anyothername || framework_failure -"${LZIPRECOVER}" -A -o copy - anyothername - < "${in_lzma}" -{ [ $? = 0 ] && cmp "${in_lz}" copy.lz && cmp "${in_lz}" anyothername.lz ; } || +"${LZIPRECOVER}" -A -o copy - anyothername - < "${in_lzma}" || test_failed $LINENO -rm -f copy.lz anyothername.lz +cmp "${in_lz}" copy.lz || test_failed $LINENO +cmp "${in_lz}" anyothername.lz || test_failed $LINENO +rm -f copy.lz anyothername.lz || framework_failure printf "\ntesting decompression..." @@ -162,26 +234,28 @@ printf "\ntesting decompression..." "${LZIP}" -cd "${in_lz}" > copy || test_failed $LINENO cmp in copy || test_failed $LINENO -rm -f copy +rm -f copy || framework_failure cat "${in_lz}" > copy.lz || framework_failure "${LZIP}" -dk copy.lz || test_failed $LINENO cmp in copy || test_failed $LINENO printf "to be overwritten" > copy || framework_failure "${LZIP}" -d copy.lz 2> /dev/null [ $? = 1 ] || test_failed $LINENO -"${LZIP}" -df copy.lz -{ [ $? = 0 ] && [ ! -e copy.lz ] && cmp in copy ; } || test_failed $LINENO +"${LZIP}" -df copy.lz || test_failed $LINENO +[ ! -e copy.lz ] || test_failed $LINENO +cmp in copy || test_failed $LINENO printf "to be overwritten" > copy || framework_failure "${LZIP}" -df -o copy < "${in_lz}" || test_failed $LINENO cmp in copy || test_failed $LINENO -rm -f copy +rm -f copy || framework_failure cat "${in_lz}" > anyothername || framework_failure -"${LZIP}" -dv --output copy - anyothername - < "${in_lz}" 2> /dev/null -{ [ $? = 0 ] && cmp in copy && cmp in anyothername.out ; } || +"${LZIP}" -dv --output copy - anyothername - < "${in_lz}" 2> /dev/null || test_failed $LINENO -rm -f copy anyothername.out +cmp in copy || test_failed $LINENO +cmp in anyothername.out || test_failed $LINENO +rm -f copy anyothername.out || framework_failure "${LZIP}" -lq in "${in_lz}" [ $? = 2 ] || test_failed $LINENO @@ -192,10 +266,12 @@ rm -f copy anyothername.out "${LZIP}" -tq nx_file.lz "${in_lz}" [ $? = 1 ] || test_failed $LINENO "${LZIP}" -cdq in "${in_lz}" > copy -{ [ $? = 2 ] && cat copy in | cmp in - ; } || test_failed $LINENO +[ $? = 2 ] || test_failed $LINENO +cat copy in | cmp in - || test_failed $LINENO "${LZIP}" -cdq nx_file.lz "${in_lz}" > copy -{ [ $? = 1 ] && cmp in copy ; } || test_failed $LINENO -rm -f copy +[ $? = 1 ] || test_failed $LINENO +cmp in copy || test_failed $LINENO +rm -f copy || framework_failure cat "${in_lz}" > copy.lz || framework_failure for i in 1 2 3 4 5 6 7 ; do printf "g" >> copy.lz || framework_failure @@ -205,11 +281,15 @@ for i in 1 2 3 4 5 6 7 ; do [ $? = 2 ] || test_failed $LINENO $i done "${LZIP}" -dq in copy.lz -{ [ $? = 2 ] && [ -e copy.lz ] && [ ! -e copy ] && [ ! -e in.out ] ; } || - test_failed $LINENO +[ $? = 2 ] || test_failed $LINENO +[ -e copy.lz ] || test_failed $LINENO +[ ! -e copy ] || test_failed $LINENO +[ ! -e in.out ] || test_failed $LINENO "${LZIP}" -dq nx_file.lz copy.lz -{ [ $? = 1 ] && [ ! -e copy.lz ] && [ ! -e nx_file ] && cmp in copy ; } || - test_failed $LINENO +[ $? = 1 ] || test_failed $LINENO +[ ! -e copy.lz ] || test_failed $LINENO +[ ! -e nx_file ] || test_failed $LINENO +cmp in copy || test_failed $LINENO cat in in > in2 || framework_failure cat "${in_lz}" "${in_lz}" > in2.lz || framework_failure @@ -221,7 +301,7 @@ cmp in2 copy2 || test_failed $LINENO cat in2.lz > copy2.lz || framework_failure printf "\ngarbage" >> copy2.lz || framework_failure "${LZIP}" -tvvvv copy2.lz 2> /dev/null || test_failed $LINENO -rm -f copy2 +rm -f copy2 || framework_failure "${LZIP}" -aD0 -q copy2.lz [ $? = 2 ] || test_failed $LINENO "${LZIP}" -alq copy2.lz @@ -231,9 +311,11 @@ rm -f copy2 "${LZIP}" -atq < copy2.lz [ $? = 2 ] || test_failed $LINENO "${LZIP}" -adkq copy2.lz -{ [ $? = 2 ] && [ ! -e copy2 ] ; } || test_failed $LINENO +[ $? = 2 ] || test_failed $LINENO +[ ! -e copy2 ] || test_failed $LINENO "${LZIP}" -adkq -o copy2 < copy2.lz -{ [ $? = 2 ] && [ ! -e copy2 ] ; } || test_failed $LINENO +[ $? = 2 ] || test_failed $LINENO +[ ! -e copy2 ] || test_failed $LINENO printf "to be overwritten" > copy2 || framework_failure "${LZIP}" -df copy2.lz || test_failed $LINENO cmp in2 copy2 || test_failed $LINENO @@ -246,59 +328,88 @@ cmp "${inD}" copy || test_failed $LINENO "${LZIPRECOVER}" -D 21723,397 "${in_lz}" > copy || test_failed $LINENO cmp "${inD}" copy || test_failed $LINENO "${LZIPRECOVER}" -D0 -iq "${f6b1_lz}" -fo copy -{ [ $? = 2 ] && cmp "${f6b1}" copy ; } || test_failed $LINENO +[ $? = 2 ] || test_failed $LINENO +cmp "${f6b1}" copy || test_failed $LINENO "${LZIPRECOVER}" -D0 -iq "${f6b1_lz}" > copy -{ [ $? = 2 ] && cmp "${f6b1}" copy ; } || test_failed $LINENO +[ $? = 2 ] || test_failed $LINENO +cmp "${f6b1}" copy || test_failed $LINENO + +printf "LZIP\001+" > in2t.lz || framework_failure # gap size < 36 bytes +cat "${in_lz}" in "${in_lz}" >> in2t.lz || framework_failure +printf "LZIP\001-" >> in2t.lz || framework_failure # truncated member +"${LZIPRECOVER}" -D0 -i in2t.lz > copy2 || test_failed $LINENO +cmp in2 copy2 || test_failed $LINENO +rm -f in2 in2t.lz copy2 || framework_failure printf "\ntesting bad input..." headers='LZIp LZiP LZip LzIP LzIp LziP lZIP lZIp lZiP lzIP' body='\001\014\000\203\377\373\377\377\300\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000$\000\000\000\000\000\000\000' -cat "${in_lz}" > in0.lz -printf "LZIP${body}" >> in0.lz -if "${LZIP}" -tq in0.lz ; then +cat "${in_lz}" > int.lz +printf "LZIP${body}" >> int.lz +if "${LZIP}" -tq int.lz ; then for header in ${headers} ; do - printf "${header}${body}" > in0.lz # first member - "${LZIP}" -lq in0.lz + printf "${header}${body}" > int.lz # first member + "${LZIP}" -lq int.lz [ $? = 2 ] || test_failed $LINENO ${header} - "${LZIP}" -tq in0.lz + "${LZIP}" -tq int.lz [ $? = 2 ] || test_failed $LINENO ${header} - "${LZIP}" -lq --loose-trailing in0.lz + "${LZIP}" -tq < int.lz [ $? = 2 ] || test_failed $LINENO ${header} - "${LZIP}" -tq --loose-trailing in0.lz + "${LZIP}" -cdq int.lz > /dev/null [ $? = 2 ] || test_failed $LINENO ${header} - cat "${in_lz}" > in0.lz - printf "${header}${body}" >> in0.lz # trailing data - "${LZIP}" -lq in0.lz + "${LZIP}" -lq --loose-trailing int.lz [ $? = 2 ] || test_failed $LINENO ${header} - "${LZIP}" -tq in0.lz + "${LZIP}" -tq --loose-trailing int.lz [ $? = 2 ] || test_failed $LINENO ${header} - "${LZIP}" -lq --loose-trailing in0.lz - [ $? = 0 ] || test_failed $LINENO ${header} - "${LZIP}" -t --loose-trailing in0.lz - [ $? = 0 ] || test_failed $LINENO ${header} - "${LZIP}" -lq --loose-trailing --trailing-error in0.lz + "${LZIP}" -tq --loose-trailing < int.lz [ $? = 2 ] || test_failed $LINENO ${header} - "${LZIP}" -tq --loose-trailing --trailing-error in0.lz + "${LZIP}" -cdq --loose-trailing int.lz > /dev/null [ $? = 2 ] || test_failed $LINENO ${header} - "${LZIPRECOVER}" -q --dump-tdata in0.lz > /dev/null + cat "${in_lz}" > int.lz + printf "${header}${body}" >> int.lz # trailing data + "${LZIP}" -lq int.lz [ $? = 2 ] || test_failed $LINENO ${header} - "${LZIPRECOVER}" -q --strip-tdata in0.lz > /dev/null + "${LZIP}" -tq int.lz [ $? = 2 ] || test_failed $LINENO ${header} - "${LZIPRECOVER}" --dump-tdata --loose-trailing in0.lz > /dev/null - [ $? = 0 ] || test_failed $LINENO ${header} - "${LZIPRECOVER}" --strip-tdata --loose-trailing in0.lz > /dev/null - [ $? = 0 ] || test_failed $LINENO ${header} - "${LZIPRECOVER}" -q --remove-tdata in0.lz + "${LZIP}" -tq < int.lz [ $? = 2 ] || test_failed $LINENO ${header} - "${LZIPRECOVER}" --remove-tdata --loose-trailing in0.lz - [ $? = 0 ] || test_failed $LINENO ${header} - cmp "${in_lz}" in0.lz || test_failed $LINENO ${header} + "${LZIP}" -cdq int.lz > /dev/null + [ $? = 2 ] || test_failed $LINENO ${header} + "${LZIP}" -lq --loose-trailing int.lz || + test_failed $LINENO ${header} + "${LZIP}" -t --loose-trailing int.lz || + test_failed $LINENO ${header} + "${LZIP}" -t --loose-trailing < int.lz || + test_failed $LINENO ${header} + "${LZIP}" -cd --loose-trailing int.lz > /dev/null || + test_failed $LINENO ${header} + "${LZIP}" -lq --loose-trailing --trailing-error int.lz + [ $? = 2 ] || test_failed $LINENO ${header} + "${LZIP}" -tq --loose-trailing --trailing-error int.lz + [ $? = 2 ] || test_failed $LINENO ${header} + "${LZIP}" -tq --loose-trailing --trailing-error < int.lz + [ $? = 2 ] || test_failed $LINENO ${header} + "${LZIP}" -cdq --loose-trailing --trailing-error int.lz > /dev/null + [ $? = 2 ] || test_failed $LINENO ${header} + "${LZIPRECOVER}" -q --dump=tdata int.lz > /dev/null + [ $? = 2 ] || test_failed $LINENO ${header} + "${LZIPRECOVER}" -q --strip=tdata int.lz > /dev/null + [ $? = 2 ] || test_failed $LINENO ${header} + "${LZIPRECOVER}" --dump=tdata --loose-trailing int.lz > \ + /dev/null || test_failed $LINENO ${header} + "${LZIPRECOVER}" --strip=tdata --loose-trailing int.lz > \ + /dev/null || test_failed $LINENO ${header} + "${LZIPRECOVER}" -q --remove=tdata int.lz + [ $? = 2 ] || test_failed $LINENO ${header} + "${LZIPRECOVER}" --remove=tdata --loose-trailing int.lz || + test_failed $LINENO ${header} + cmp "${in_lz}" int.lz || test_failed $LINENO ${header} done else printf "\nwarning: skipping header test: 'printf' does not work on your system." fi -rm -f in0.lz +rm -f int.lz || framework_failure cat "${in_lz}" "${in_lz}" "${in_lz}" > in3.lz || framework_failure if dd if=in3.lz of=trunc.lz bs=14752 count=1 2> /dev/null && @@ -319,55 +430,64 @@ if dd if=in3.lz of=trunc.lz bs=14752 count=1 2> /dev/null && else printf "\nwarning: skipping truncation test: 'dd' does not work on your system." fi -rm -f in3.lz trunc.lz +rm -f in3.lz trunc.lz out || framework_failure + +for i in "${f6s1_lz}" "${f6s2_lz}" ; do + lines=`"${LZIP}" -lvv "$i" | wc -l || test_failed $LINENO "$i"` + [ "${lines}" -eq 2 ] || test_failed $LINENO "$i" +done +for i in "${f6s3_lz}" "${f6s4_lz}" "${f6s5_lz}" "${f6s6_lz}" ; do + lines=`"${LZIP}" -lvv "$i" | wc -l || test_failed $LINENO "$i"` + [ "${lines}" -eq 9 ] || test_failed $LINENO "$i" +done cat "${in_lz}" > ingin.lz || framework_failure printf "g" >> ingin.lz || framework_failure cat "${in_lz}" >> ingin.lz || framework_failure "${LZIP}" -lq ingin.lz [ $? = 2 ] || test_failed $LINENO +"${LZIP}" -lq -i ingin.lz || test_failed $LINENO "${LZIP}" -t ingin.lz || test_failed $LINENO "${LZIP}" -cd ingin.lz > copy || test_failed $LINENO cmp in copy || test_failed $LINENO "${LZIP}" -t < ingin.lz || test_failed $LINENO "${LZIP}" -d < ingin.lz > copy || test_failed $LINENO cmp in copy || test_failed $LINENO -"${LZIPRECOVER}" -q --dump-tdata ingin.lz -[ $? = 2 ] || test_failed $LINENO -"${LZIPRECOVER}" -q --strip-tdata ingin.lz -[ $? = 2 ] || test_failed $LINENO -"${LZIPRECOVER}" -q --remove-tdata ingin.lz -[ $? = 2 ] || test_failed $LINENO -rm -f ingin.lz printf "\ntesting --merge..." -rm -f copy.lz -"${LZIPRECOVER}" -m -o copy.lz "${fox6_lz}" "${f6b1_lz}" -{ [ $? = 0 ] && [ ! -e copy.lz ] ; } || test_failed $LINENO -"${LZIPRECOVER}" -m -o copy.lz "${f6b1_lz}" "${fox6_lz}" -{ [ $? = 0 ] && [ ! -e copy.lz ] ; } || test_failed $LINENO +rm -f copy.lz || framework_failure +"${LZIPRECOVER}" -m -o copy.lz "${fox6_lz}" "${f6b1_lz}" || test_failed $LINENO +[ ! -e copy.lz ] || test_failed $LINENO +"${LZIPRECOVER}" -m -o copy.lz "${f6b1_lz}" "${fox6_lz}" || test_failed $LINENO +[ ! -e copy.lz ] || test_failed $LINENO "${LZIPRECOVER}" -m -o copy.lz "${bad1_lz}" "${bad2_lz}" "${bad1_lz}" -q -{ [ $? = 2 ] && [ ! -e copy.lz ] ; } || test_failed $LINENO +[ $? = 2 ] || test_failed $LINENO +[ ! -e copy.lz ] || test_failed $LINENO "${LZIPRECOVER}" -m -o copy.lz "${bad1_lz}" "${bad2_lz}" "${bad2_lz}" -q -{ [ $? = 2 ] && [ ! -e copy.lz ] ; } || test_failed $LINENO +[ $? = 2 ] || test_failed $LINENO +[ ! -e copy.lz ] || test_failed $LINENO cat "${bad2_lz}" > bad2.lz || framework_failure "${LZIPRECOVER}" -m -o copy.lz "${bad1_lz}" "${bad2_lz}" bad2.lz -q -{ [ $? = 2 ] && [ ! -e copy.lz ] ; } || test_failed $LINENO -rm -f bad2.lz +[ $? = 2 ] || test_failed $LINENO +[ ! -e copy.lz ] || test_failed $LINENO +rm -f bad2.lz || framework_failure "${LZIPRECOVER}" -m -o copy.lz "${f6b1_lz}" "${f6b5_lz}" -q -{ [ $? = 2 ] && [ ! -e copy.lz ] ; } || test_failed $LINENO +[ $? = 2 ] || test_failed $LINENO +[ ! -e copy.lz ] || test_failed $LINENO "${LZIPRECOVER}" -m -o copy.lz "${f6b3_lz}" "${f6b5_lz}" -q -{ [ $? = 2 ] && [ ! -e copy.lz ] ; } || test_failed $LINENO +[ $? = 2 ] || test_failed $LINENO +[ ! -e copy.lz ] || test_failed $LINENO "${LZIPRECOVER}" -m -o copy.lz "${bad3_lz}" "${bad4_lz}" -q -{ [ $? = 2 ] && [ ! -e copy.lz ] ; } || test_failed $LINENO +[ $? = 2 ] || test_failed $LINENO +[ ! -e copy.lz ] || test_failed $LINENO "${LZIPRECOVER}" -mf -o copy.lz "${f6b1_lz}" "${f6b4_lz}" || test_failed $LINENO cmp "${fox6_lz}" copy.lz || test_failed $LINENO "${LZIPRECOVER}" -mf -o copy.lz "${f6b4_lz}" "${f6b1_lz}" || test_failed $LINENO cmp "${fox6_lz}" copy.lz || test_failed $LINENO -for i in "${f6b1_lz}" "${f6b3_lz}" "${f6b4_lz}" "${f6b5_lz}" ; do +for i in "${f6b1_lz}" "${f6b3_lz}" "${f6b4_lz}" "${f6b5_lz}" "${f6b6_lz}" ; do "${LZIPRECOVER}" -mf -o copy.lz "${f6b2_lz}" "$i" || test_failed $LINENO "$i" cmp "${fox6_lz}" copy.lz || test_failed $LINENO "$i" @@ -376,7 +496,7 @@ for i in "${f6b1_lz}" "${f6b3_lz}" "${f6b4_lz}" "${f6b5_lz}" ; do cmp "${fox6_lz}" copy.lz || test_failed $LINENO "$i" done -for i in "${f6b3_lz}" "${f6b4_lz}" "${f6b5_lz}" ; do +for i in "${f6b3_lz}" "${f6b4_lz}" "${f6b5_lz}" "${f6b6_lz}" ; do "${LZIPRECOVER}" -mf -o copy.lz "${f6b1_lz}" "${f6b2_lz}" "$i" || test_failed $LINENO "$i" cmp "${fox6_lz}" copy.lz || test_failed $LINENO "$i" @@ -431,6 +551,7 @@ cmp out4.lz copy4.lz || test_failed $LINENO cmp out4.lz copy4.lz || test_failed $LINENO "${LZIPRECOVER}" -mf -o out4.lz bad22.lz bad12.lz bad11.lz || test_failed $LINENO cmp out4.lz copy4.lz || test_failed $LINENO +rm -f bad11.lz bad12.lz bad22.lz || framework_failure for i in "${bad1_lz}" "${bad2_lz}" ; do for j in "${bad3_lz}" "${bad4_lz}" "${bad5_lz}" ; do @@ -484,18 +605,22 @@ cmp out4.lz copy4.lz || test_failed $LINENO "${LZIPRECOVER}" -mf -o out4.lz bad534.lz bad453.lz bad345.lz || test_failed $LINENO cmp out4.lz copy4.lz || test_failed $LINENO +rm -f bad345.lz bad453.lz bad534.lz out4.lz copy4.lz || framework_failure printf "\ntesting --repair..." -rm -f copy.lz +rm -f copy.lz || framework_failure "${LZIPRECOVER}" -R -o copy.lz "${fox6_lz}" || test_failed $LINENO -{ [ $? = 0 ] && [ ! -e copy.lz ] ; } || test_failed $LINENO +[ ! -e copy.lz ] || test_failed $LINENO "${LZIPRECOVER}" -R -o copy.lz "${bad2_lz}" -q -{ [ $? = 2 ] && [ ! -e copy.lz ] ; } || test_failed $LINENO +[ $? = 2 ] || test_failed $LINENO +[ ! -e copy.lz ] || test_failed $LINENO "${LZIPRECOVER}" -R -o copy.lz "${bad3_lz}" -q -{ [ $? = 2 ] && [ ! -e copy.lz ] ; } || test_failed $LINENO +[ $? = 2 ] || test_failed $LINENO +[ ! -e copy.lz ] || test_failed $LINENO "${LZIPRECOVER}" -R -o copy.lz "${bad4_lz}" -q -{ [ $? = 2 ] && [ ! -e copy.lz ] ; } || test_failed $LINENO +[ $? = 2 ] || test_failed $LINENO +[ ! -e copy.lz ] || test_failed $LINENO "${LZIPRECOVER}" -Rf -o copy.lz "${f6b1_lz}" || test_failed $LINENO cmp "${fox6_lz}" copy.lz || test_failed $LINENO "${LZIPRECOVER}" -Rf -o copy.lz "${bad1_lz}" || test_failed $LINENO @@ -503,56 +628,654 @@ cmp "${in_lz}" copy.lz || test_failed $LINENO cat "${f6b1_lz}" > copy.tar.lz || framework_failure "${LZIPRECOVER}" -R copy.tar.lz || test_failed $LINENO -{ [ $? = 0 ] && [ -e copy_fixed.tar.lz ] ; } || test_failed $LINENO +[ -e copy_fixed.tar.lz ] || test_failed $LINENO mv copy.tar.lz copy.lz || framework_failure "${LZIPRECOVER}" -R copy.lz || test_failed $LINENO -{ [ $? = 0 ] && [ -e copy_fixed.lz ] ; } || test_failed $LINENO +[ -e copy_fixed.lz ] || test_failed $LINENO mv copy.lz copy.tlz || framework_failure "${LZIPRECOVER}" -R copy.tlz || test_failed $LINENO -{ [ $? = 0 ] && [ -e copy_fixed.tlz ] ; } || test_failed $LINENO +[ -e copy_fixed.tlz ] || test_failed $LINENO +rm -f copy_fixed.* copy.tlz || framework_failure printf "\ntesting --split..." cat "${in_lz}" "${in_lz}" "${in_lz}" "${in_lz}" "${in_lz}" "${in_lz}" \ "${in_lz}" "${in_lz}" "${in_lz}" > in9.lz || framework_failure -printf "garbage" >> in9.lz || framework_failure "${LZIPRECOVER}" -s in9.lz || test_failed $LINENO for i in 1 2 3 4 5 6 7 8 9 ; do + cmp "${in_lz}" rec${i}in9.lz || test_failed $LINENO $i "${LZIP}" -cd rec${i}in9.lz > copy || test_failed $LINENO $i cmp in copy || test_failed $LINENO $i done cat rec*in9.lz | cmp in9.lz - || test_failed $LINENO +rm -f rec*in9.lz || framework_failure + +cat in9.lz > in9t.lz || framework_failure +printf "garbage" >> in9t.lz || framework_failure +"${LZIPRECOVER}" -s in9t.lz || test_failed $LINENO +for i in 01 02 03 04 05 06 07 08 09 ; do + cmp "${in_lz}" rec${i}in9t.lz || test_failed $LINENO $i + "${LZIP}" -cd rec${i}in9t.lz > copy || test_failed $LINENO $i + cmp in copy || test_failed $LINENO $i +done +[ -e rec10in9t.lz ] || test_failed $LINENO +[ ! -e rec11in9t.lz ] || test_failed $LINENO +cat rec*in9t.lz | cmp in9t.lz - || test_failed $LINENO +rm -f rec*in9t.lz in9t.lz || framework_failure + +printf "LZIP\001+" > in9t.lz || framework_failure # gap size < 36 bytes +cat "${in_lz}" "${in_lz}" "${in_lz}" in "${in_lz}" "${in_lz}" "${in_lz}" \ + "${in_lz}" "${in_lz}" "${in_lz}" in >> in9t.lz || framework_failure +"${LZIPRECOVER}" -s in9t.lz || test_failed $LINENO +for i in 02 03 04 06 07 08 09 10 11 ; do + cmp "${in_lz}" rec${i}in9t.lz || test_failed $LINENO $i + "${LZIP}" -cd rec${i}in9t.lz > copy || test_failed $LINENO $i + cmp in copy || test_failed $LINENO $i +done +cmp in rec05in9t.lz || test_failed $LINENO +cmp in rec12in9t.lz || test_failed $LINENO +[ -e rec01in9t.lz ] || test_failed $LINENO +[ ! -e rec13in9t.lz ] || test_failed $LINENO +cat rec*in9t.lz | cmp in9t.lz - || test_failed $LINENO +rm -f rec*in9t.lz in9t.lz || framework_failure + +cat "${in_lz}" "${in_lz}" "${in_lz}" "${in_lz}" "${in_lz}" "${in_lz}" \ + "${in_lz}" "${in_lz}" in "${in_lz}" > in9t.lz || framework_failure +printf "LZIP\001-" >> in9t.lz || framework_failure # truncated member +"${LZIPRECOVER}" -s in9t.lz || test_failed $LINENO +for i in 01 02 03 04 05 06 07 08 10 ; do + cmp "${in_lz}" rec${i}in9t.lz || test_failed $LINENO $i + "${LZIP}" -cd rec${i}in9t.lz > copy || test_failed $LINENO $i + cmp in copy || test_failed $LINENO $i +done +cmp in rec09in9t.lz || test_failed $LINENO +[ -e rec11in9t.lz ] || test_failed $LINENO +[ ! -e rec12in9t.lz ] || test_failed $LINENO +cat rec*in9t.lz | cmp in9t.lz - || test_failed $LINENO +rm -f rec*in9t.lz in9t.lz || framework_failure + +cat "${in_lz}" "${in_lz}" "${in_lz}" in "${in_lz}" > in9t.lz || framework_failure +printf "LZIP\001-" >> in9t.lz || framework_failure # truncated member +cat "${in_lz}" "${in_lz}" "${in_lz}" "${in_lz}" "${in_lz}" >> in9t.lz || + framework_failure +"${LZIPRECOVER}" -s in9t.lz || test_failed $LINENO +for i in 01 02 03 05 07 08 09 10 11 ; do + cmp "${in_lz}" rec${i}in9t.lz || test_failed $LINENO $i + "${LZIP}" -cd rec${i}in9t.lz > copy || test_failed $LINENO $i + cmp in copy || test_failed $LINENO $i +done +cmp in rec04in9t.lz || test_failed $LINENO +[ -e rec06in9t.lz ] || test_failed $LINENO +[ ! -e rec12in9t.lz ] || test_failed $LINENO +cat rec*in9t.lz | cmp in9t.lz - || test_failed $LINENO +rm -f rec*in9t.lz in9t.lz || framework_failure + +"${LZIPRECOVER}" -s "${f6b1_lz}" -o f6.lz || test_failed $LINENO +for i in 1 2 3 4 5 6 ; do + [ -e rec${i}f6.lz ] || test_failed $LINENO +done +[ ! -e rec7f6.lz ] || test_failed $LINENO +cat rec*f6.lz | cmp "${f6b1_lz}" - || test_failed $LINENO +rm -f rec*f6.lz || framework_failure + +"${LZIPRECOVER}" -s "${f6b2_lz}" -o f6.lz || test_failed $LINENO +for i in 1 3 4 5 6 ; do + cmp "${fox_lz}" rec${i}f6.lz || test_failed $LINENO +done +[ -e rec2f6.lz ] || test_failed $LINENO +[ ! -e rec7f6.lz ] || test_failed $LINENO +cat rec*f6.lz | cmp "${f6b2_lz}" - || test_failed $LINENO +rm -f rec*f6.lz || framework_failure + +"${LZIPRECOVER}" -s "${f6b3_lz}" -o f6.lz || test_failed $LINENO +for i in 1 2 4 ; do + cmp "${fox_lz}" rec${i}f6.lz || test_failed $LINENO +done +[ -e rec3f6.lz ] || test_failed $LINENO +[ ! -e rec5f6.lz ] || test_failed $LINENO +cat rec*f6.lz | cmp "${f6b3_lz}" - || test_failed $LINENO +rm -f rec*f6.lz || framework_failure + +for i in "${f6b4_lz}" "${f6b5_lz}" ; do + "${LZIPRECOVER}" -s "$i" -o f6.lz || test_failed $LINENO + for j in 1 2 3 4 ; do + cmp "${fox_lz}" rec${j}f6.lz || test_failed $LINENO + done + [ -e rec5f6.lz ] || test_failed $LINENO + [ ! -e rec6f6.lz ] || test_failed $LINENO + cat rec*f6.lz | cmp "$i" - || test_failed $LINENO + rm -f rec*f6.lz || framework_failure +done + +"${LZIPRECOVER}" -s "${f6b6_lz}" -o f6.lz || test_failed $LINENO +for i in 1 2 3 4 5 ; do + cmp "${fox_lz}" rec${i}f6.lz || test_failed $LINENO +done +[ -e rec6f6.lz ] || test_failed $LINENO +[ ! -e rec7f6.lz ] || test_failed $LINENO +cat rec*f6.lz | cmp "${f6b6_lz}" - || test_failed $LINENO +rm -f rec*f6.lz || framework_failure + +"${LZIPRECOVER}" -s "${f6s1_lz}" -o f6.lz || test_failed $LINENO +for i in 1 2 3 4 5 ; do + cmp "${fox_lz}" rec${i}f6.lz || test_failed $LINENO +done +[ -e rec6f6.lz ] || test_failed $LINENO +[ ! -e rec7f6.lz ] || test_failed $LINENO +cat rec*f6.lz | cmp "${f6s1_lz}" - || test_failed $LINENO +rm -f rec*f6.lz || framework_failure +for i in "${f6s2_lz}" "${f6s3_lz}" "${f6s4_lz}" "${f6s5_lz}" "${f6s6_lz}" ; do + "${LZIPRECOVER}" -s "$i" -o f6.lz || test_failed $LINENO "$i" + for j in 1 2 3 4 5 6 ; do + cmp "${fox_lz}" rec${j}f6.lz || test_failed $LINENO "$i $j" + done + [ -e rec7f6.lz ] || test_failed $LINENO "$i" + [ ! -e rec8f6.lz ] || test_failed $LINENO "$i" + cat rec*f6.lz | cmp "$i" - || test_failed $LINENO "$i" + rm -f rec*f6.lz || framework_failure +done + +"${LZIPRECOVER}" -s ingin.lz || test_failed $LINENO +cmp "${in_lz}" rec1ingin.lz || test_failed $LINENO +cmp "${in_lz}" rec3ingin.lz || test_failed $LINENO +printf "g" | cmp rec2ingin.lz - || test_failed $LINENO +[ ! -e rec4ingin.lz ] || test_failed $LINENO +cat rec*ingin.lz | cmp ingin.lz - || test_failed $LINENO +rm -f rec*ingin.lz || framework_failure + +printf "\ntesting --*=damaged..." + +touch empty || framework_failure +cat "${in_lz}" > in.lz || framework_failure +cat "${in_lz}" in > int.lz || framework_failure +"${LZIPRECOVER}" --dump=damaged in.lz > copy || test_failed $LINENO +cmp empty copy || test_failed $LINENO +"${LZIPRECOVER}" --dump=damage int.lz > copy || test_failed $LINENO +cmp empty copy || test_failed $LINENO +"${LZIPRECOVER}" --strip=damag in.lz > copy || test_failed $LINENO +cmp in.lz copy || test_failed $LINENO +"${LZIPRECOVER}" --strip=dama int.lz > copy || test_failed $LINENO +cmp int.lz copy || test_failed $LINENO +# strip trailing data from all but the last file +"${LZIPRECOVER}" --strip=dam int.lz int.lz > copy || test_failed $LINENO +cat "${in_lz}" "${in_lz}" in | cmp copy - || test_failed $LINENO +"${LZIPRECOVER}" --remove=da in.lz || test_failed $LINENO +cmp "${in_lz}" in.lz || test_failed $LINENO +"${LZIPRECOVER}" --remove=d int.lz || test_failed $LINENO +cat "${in_lz}" in | cmp int.lz - || test_failed $LINENO +rm -f in.lz int.lz || framework_failure + +cat in9.lz in > in9t.lz || framework_failure +"${LZIPRECOVER}" --dump=damaged in9.lz > copy || test_failed $LINENO +cmp empty copy || test_failed $LINENO +"${LZIPRECOVER}" --dump=damaged in9t.lz > copy || test_failed $LINENO +cmp empty copy || test_failed $LINENO +"${LZIPRECOVER}" --strip=damaged in9.lz > copy || test_failed $LINENO +cmp in9.lz copy || test_failed $LINENO +"${LZIPRECOVER}" --strip=damaged in9t.lz > copy || test_failed $LINENO +cmp in9t.lz copy || test_failed $LINENO +"${LZIPRECOVER}" --remove=damaged in9t.lz || test_failed $LINENO +cat in9.lz in | cmp in9t.lz - || test_failed $LINENO +cat in9.lz > in9t.lz || framework_failure +"${LZIPRECOVER}" --remove=damaged in9t.lz || test_failed $LINENO +cmp in9.lz in9t.lz || test_failed $LINENO +rm -f in9t.lz || framework_failure + +printf "LZIP\001+" > in9t.lz || framework_failure # gap size < 36 bytes +cat "${in_lz}" "${in_lz}" "${in_lz}" in "${in_lz}" "${in_lz}" "${in_lz}" \ + "${in_lz}" "${in_lz}" "${in_lz}" >> in9t.lz || framework_failure +printf "LZIP\001-" >> in9t.lz || framework_failure # truncated member +printf "LZIP\001+" > gaps || framework_failure +cat in >> gaps || framework_failure +printf "LZIP\001-" >> gaps || framework_failure +"${LZIPRECOVER}" --dump=damaged in9t.lz > copy || test_failed $LINENO +cmp gaps copy || test_failed $LINENO +"${LZIPRECOVER}" --strip=damaged in9t.lz > copy || test_failed $LINENO +cmp in9.lz copy || test_failed $LINENO +"${LZIPRECOVER}" --remove=damaged in9t.lz || test_failed $LINENO +cmp in9.lz in9t.lz || test_failed $LINENO +rm -f in9.lz in9t.lz gaps || framework_failure + +"${LZIPRECOVER}" --dump=damaged "${f6b1_lz}" > copy || test_failed $LINENO +cmp "${f6b1_lz}" copy || test_failed $LINENO +cat "${f6b1_lz}" in > f6bt.lz || framework_failure +"${LZIPRECOVER}" --dump=damaged f6bt.lz > copy || test_failed $LINENO +cmp "${f6b1_lz}" copy || test_failed $LINENO +"${LZIPRECOVER}" -q --strip=damaged "${f6b1_lz}" > copy || test_failed $LINENO +cmp empty copy || test_failed $LINENO +"${LZIPRECOVER}" -q --strip=damaged f6bt.lz > copy || test_failed $LINENO +cmp empty copy || test_failed $LINENO +cat "${f6b1_lz}" > f6b.lz || framework_failure +"${LZIPRECOVER}" -q --remove=damaged f6b.lz +[ $? = 2 ] || test_failed $LINENO +cmp "${f6b1_lz}" f6b.lz || test_failed $LINENO +"${LZIPRECOVER}" -q --remove=damaged f6bt.lz +[ $? = 2 ] || test_failed $LINENO +cat "${f6b1_lz}" in | cmp f6bt.lz - || test_failed $LINENO +rm -f f6b.lz f6bt.lz || framework_failure + +"${LZIPRECOVER}" --dump=damaged "${f6b2_lz}" > copy || test_failed $LINENO +cat "${fox_lz}" copy "${fox_lz}" "${fox_lz}" "${fox_lz}" \ + "${fox_lz}" | cmp "${f6b2_lz}" - || test_failed $LINENO +cat "${f6b2_lz}" in > f6bt.lz || framework_failure +"${LZIPRECOVER}" --dump=damaged f6bt.lz > copy || test_failed $LINENO +cat "${fox_lz}" copy "${fox_lz}" "${fox_lz}" "${fox_lz}" \ + "${fox_lz}" | cmp "${f6b2_lz}" - || test_failed $LINENO +cat "${fox_lz}" "${fox_lz}" "${fox_lz}" "${fox_lz}" "${fox_lz}" > fox5.lz +"${LZIPRECOVER}" --strip=damaged "${f6b2_lz}" > copy || test_failed $LINENO +cmp fox5.lz copy || test_failed $LINENO +"${LZIPRECOVER}" --strip=damaged f6bt.lz > copy || test_failed $LINENO +cat fox5.lz in | cmp copy - || test_failed $LINENO +cat "${f6b2_lz}" > f6b.lz || framework_failure +"${LZIPRECOVER}" --remove=damaged f6b.lz || test_failed $LINENO +cmp fox5.lz f6b.lz || test_failed $LINENO +"${LZIPRECOVER}" --remove=damaged f6bt.lz || test_failed $LINENO +cat fox5.lz in | cmp f6bt.lz - || test_failed $LINENO +rm -f f6b.lz f6bt.lz || framework_failure + +"${LZIPRECOVER}" --dump=damaged "${f6b3_lz}" > copy || test_failed $LINENO +cat "${fox_lz}" "${fox_lz}" copy "${fox_lz}" | cmp "${f6b3_lz}" - || + test_failed $LINENO +cat "${f6b3_lz}" in > f6bt.lz || framework_failure +"${LZIPRECOVER}" --dump=damaged f6bt.lz > copy || test_failed $LINENO +cat "${fox_lz}" "${fox_lz}" copy "${fox_lz}" | cmp "${f6b3_lz}" - || + test_failed $LINENO +cat "${fox_lz}" "${fox_lz}" "${fox_lz}" > fox3.lz +"${LZIPRECOVER}" --strip=damaged "${f6b3_lz}" > copy || test_failed $LINENO +cmp fox3.lz copy || test_failed $LINENO +"${LZIPRECOVER}" --strip=damaged f6bt.lz > copy || test_failed $LINENO +cat fox3.lz in | cmp copy - || test_failed $LINENO +cat "${f6b3_lz}" > f6b.lz || framework_failure +"${LZIPRECOVER}" --remove=damaged f6b.lz || test_failed $LINENO +cmp fox3.lz f6b.lz || test_failed $LINENO +"${LZIPRECOVER}" --remove=damaged f6bt.lz || test_failed $LINENO +cat fox3.lz in | cmp f6bt.lz - || test_failed $LINENO +rm -f f6b.lz f6bt.lz fox3.lz || framework_failure + +cat "${fox_lz}" "${fox_lz}" "${fox_lz}" "${fox_lz}" > fox4.lz +for i in "${f6b4_lz}" "${f6b5_lz}" ; do + "${LZIPRECOVER}" --dump=damaged "$i" > copy || test_failed $LINENO "$i" + cat fox4.lz copy | cmp "$i" - || test_failed $LINENO "$i" + cat "$i" in > f6bt.lz || framework_failure + "${LZIPRECOVER}" --dump=damaged f6bt.lz > copy || + test_failed $LINENO "$i" + cat fox4.lz copy | cmp f6bt.lz - || test_failed $LINENO "$i" + "${LZIPRECOVER}" --strip=damaged "$i" > copy || test_failed $LINENO "$i" + cmp fox4.lz copy || test_failed $LINENO "$i" + "${LZIPRECOVER}" --strip=damaged f6bt.lz > copy || + test_failed $LINENO "$i" + cmp fox4.lz copy || test_failed $LINENO "$i" + cat "$i" > f6b.lz || framework_failure + "${LZIPRECOVER}" --remove=damaged f6b.lz || test_failed $LINENO "$i" + cmp fox4.lz f6b.lz || test_failed $LINENO "$i" + "${LZIPRECOVER}" --remove=damaged f6bt.lz || test_failed $LINENO "$i" + cmp fox4.lz f6bt.lz || test_failed $LINENO "$i" +done +rm -f f6b.lz f6bt.lz fox4.lz || framework_failure + +"${LZIPRECOVER}" --dump=damaged "${f6b6_lz}" > copy || test_failed $LINENO +cat fox5.lz copy | cmp "${f6b6_lz}" - || test_failed $LINENO +cat "${f6b6_lz}" in > f6bt.lz || framework_failure +"${LZIPRECOVER}" --dump=damaged f6bt.lz > copy || test_failed $LINENO +cat fox5.lz copy | cmp "${f6b6_lz}" - || test_failed $LINENO +"${LZIPRECOVER}" --strip=damaged "${f6b6_lz}" > copy || test_failed $LINENO +cmp fox5.lz copy || test_failed $LINENO +"${LZIPRECOVER}" --strip=damaged f6bt.lz > copy || test_failed $LINENO +cat fox5.lz in | cmp copy - || test_failed $LINENO +cat "${f6b6_lz}" > f6b.lz || framework_failure +"${LZIPRECOVER}" --remove=damaged f6b.lz || test_failed $LINENO +cmp fox5.lz f6b.lz || test_failed $LINENO +"${LZIPRECOVER}" --remove=damaged f6bt.lz || test_failed $LINENO +cat fox5.lz in | cmp f6bt.lz - || test_failed $LINENO +rm -f f6b.lz f6bt.lz || framework_failure + +for i in "${f6s1_lz}" "${f6s2_lz}" ; do + "${LZIPRECOVER}" --dump=damaged "$i" > copy || test_failed $LINENO "$i" + cmp "$i" copy || test_failed $LINENO "$i" + cat "$i" in > f6bt.lz || framework_failure + "${LZIPRECOVER}" --dump=damaged f6bt.lz > copy || + test_failed $LINENO "$i" + cmp "$i" copy || test_failed $LINENO "$i" + "${LZIPRECOVER}" -q --strip=damaged "$i" > copy || + test_failed $LINENO "$i" + cmp empty copy || test_failed $LINENO "$i" + "${LZIPRECOVER}" -q --strip=damaged f6bt.lz > copy || + test_failed $LINENO "$i" + cmp empty copy || test_failed $LINENO "$i" + cat "$i" > f6b.lz || framework_failure + "${LZIPRECOVER}" -q --remove=damaged f6b.lz + [ $? = 2 ] || test_failed $LINENO "$i" + cmp "$i" f6b.lz || test_failed $LINENO "$i" + "${LZIPRECOVER}" -q --remove=damaged f6bt.lz + [ $? = 2 ] || test_failed $LINENO "$i" + cat "$i" in | cmp f6bt.lz - || test_failed $LINENO "$i" +done +rm -f f6b.lz f6bt.lz || framework_failure + +for i in "${f6s3_lz}" "${f6s4_lz}" "${f6s5_lz}" "${f6s6_lz}" ; do + "${LZIPRECOVER}" --dump=damaged "$i" > copy || test_failed $LINENO "$i" + cmp empty copy || test_failed $LINENO "$i" + cat "$i" in > f6bt.lz || framework_failure + "${LZIPRECOVER}" --dump=damaged f6bt.lz > copy || + test_failed $LINENO "$i" + cmp empty copy || test_failed $LINENO "$i" + "${LZIPRECOVER}" --strip=damaged "$i" > copy || test_failed $LINENO "$i" + cmp "$i" copy || test_failed $LINENO "$i" + "${LZIPRECOVER}" --strip=damaged f6bt.lz > copy || + test_failed $LINENO "$i" + cat "$i" in | cmp copy - || test_failed $LINENO "$i" + cat "$i" > f6b.lz || framework_failure + "${LZIPRECOVER}" --remove=damaged f6b.lz || test_failed $LINENO "$i" + cmp "$i" f6b.lz || test_failed $LINENO "$i" + "${LZIPRECOVER}" --remove=damaged f6bt.lz || test_failed $LINENO "$i" + cat "$i" in | cmp f6bt.lz - || test_failed $LINENO "$i" +done +rm -f f6b.lz f6bt.lz || framework_failure + +cat ingin.lz "${inD}" > ingint.lz || framework_failure +"${LZIPRECOVER}" --dump=damaged ingin.lz > copy || test_failed $LINENO +printf "g" | cmp copy - || test_failed $LINENO +"${LZIPRECOVER}" --dump=damaged ingint.lz > copy || test_failed $LINENO +printf "g" | cmp copy - || test_failed $LINENO +"${LZIPRECOVER}" --strip=damaged ingin.lz > copy || test_failed $LINENO +cmp in2.lz copy || test_failed $LINENO +"${LZIPRECOVER}" --strip=damaged ingint.lz > copy || test_failed $LINENO +cat "${in_lz}" "${in_lz}" "${inD}" | cmp copy - || test_failed $LINENO +cat ingin.lz > ingin2.lz || framework_failure +"${LZIPRECOVER}" --remove=damaged ingin2.lz || test_failed $LINENO +cmp in2.lz ingin2.lz || test_failed $LINENO +"${LZIPRECOVER}" --remove=damaged ingint.lz || test_failed $LINENO +cat "${in_lz}" "${in_lz}" "${inD}" | cmp ingint.lz - || test_failed $LINENO +rm -f ingin2.lz ingint.lz || framework_failure + +# concatenate output from several files +"${LZIPRECOVER}" --dump=damaged "${f6b2_lz}" > copy || test_failed $LINENO +"${LZIPRECOVER}" --dump=damaged "${bad2_lz}" "${f6b2_lz}" > copy2 || + test_failed $LINENO +cat "${bad2_lz}" copy | cmp copy2 - || test_failed $LINENO +cat "${bad2_lz}" in > bad2t.lz || framework_failure +cat "${f6b2_lz}" in > f6bt.lz || framework_failure +"${LZIPRECOVER}" --dump=damaged bad2t.lz "${f6b2_lz}" "${bad2_lz}" \ +f6bt.lz > copy4 || test_failed $LINENO +cat "${bad2_lz}" copy "${bad2_lz}" copy | cmp copy4 - || test_failed $LINENO +"${LZIPRECOVER}" --dump=damaged "${f6b2_lz}" bad2t.lz f6bt.lz \ +"${bad2_lz}" > copy4 || test_failed $LINENO +cat copy "${bad2_lz}" copy "${bad2_lz}" | cmp copy4 - || test_failed $LINENO +# +"${LZIPRECOVER}" -q --strip=damaged "${bad2_lz}" "${f6b2_lz}" > copy || + test_failed $LINENO +cmp fox5.lz copy || test_failed $LINENO +"${LZIPRECOVER}" -q --strip=damaged bad2t.lz "${f6b2_lz}" > copy || + test_failed $LINENO +cmp fox5.lz copy || test_failed $LINENO +"${LZIPRECOVER}" -q --strip=damaged "${f6b2_lz}" bad2t.lz f6bt.lz > copy || + test_failed $LINENO +cat fox5.lz fox5.lz in | cmp copy - || test_failed $LINENO +"${LZIPRECOVER}" -q --strip=damaged "${f6b2_lz}" f6bt.lz bad2t.lz > copy || + test_failed $LINENO +cat fox5.lz fox5.lz | cmp copy - || test_failed $LINENO +"${LZIPRECOVER}" -q --strip=damaged f6bt.lz bad2t.lz > copy || + test_failed $LINENO +cmp fox5.lz copy || test_failed $LINENO +"${LZIPRECOVER}" -q --strip=damaged f6bt.lz "${in_lz}" > copy || + test_failed $LINENO +cat fox5.lz "${in_lz}" | cmp copy - || test_failed $LINENO +"${LZIPRECOVER}" --strip=damaged --strip=tdata f6bt.lz "${in_lz}" > copy || + test_failed $LINENO +cat fox5.lz "${in_lz}" | cmp copy - || test_failed $LINENO +# +cat "${f6b2_lz}" > f6b.lz || framework_failure +"${LZIPRECOVER}" -q --remove=damaged f6b.lz bad2t.lz f6bt.lz +[ $? = 2 ] || test_failed $LINENO +cat "${bad2_lz}" in | cmp bad2t.lz - || test_failed $LINENO +cmp fox5.lz f6b.lz || test_failed $LINENO +cat fox5.lz in | cmp f6bt.lz - || test_failed $LINENO +cat "${bad2_lz}" in > bad2t.lz || framework_failure +cat "${fox6_lz}" "${inD}" > fox6t.lz || framework_failure +cat "${f6b1_lz}" in > f6abt.lz || framework_failure +cat "${f6b2_lz}" > f6b.lz || framework_failure +cat "${f6b2_lz}" in > f6bt.lz || framework_failure +"${LZIPRECOVER}" -q --remove=d:t fox6t.lz f6abt.lz f6b.lz bad2t.lz f6bt.lz +[ $? = 2 ] || test_failed $LINENO +cat "${bad2_lz}" in | cmp bad2t.lz - || test_failed $LINENO +cat "${f6b1_lz}" in | cmp f6abt.lz - || test_failed $LINENO +cmp "${fox6_lz}" fox6t.lz || test_failed $LINENO +cmp fox5.lz f6b.lz || test_failed $LINENO +cmp fox5.lz f6bt.lz || test_failed $LINENO +rm -f fox6t.lz f6b.lz f6bt.lz bad2t.lz fox5.lz copy2 copy4 || framework_failure printf "\ntesting trailing data..." cat "${in_lz}" "${inD}" > int.lz || framework_failure -"${LZIPRECOVER}" --dump-tdata int.lz > copy +"${LZIPRECOVER}" --dump=tdata int.lz > copy || test_failed $LINENO cmp "${inD}" copy || test_failed $LINENO -rm -f copy -"${LZIPRECOVER}" --dump-tdata int.lz -o copy +rm -f copy || framework_failure +"${LZIPRECOVER}" --dump=tdat int.lz -o copy || test_failed $LINENO cmp "${inD}" copy || test_failed $LINENO cat "${fox6_lz}" "${inD}" > fox6t.lz || framework_failure cat "${inD}" "${inD}" > inD2 || framework_failure -"${LZIPRECOVER}" --dump-tdata int.lz fox6t.lz -f -o copy +"${LZIPRECOVER}" --dump=tda int.lz fox6t.lz -f -o copy || test_failed $LINENO cmp inD2 copy || test_failed $LINENO +rm -f inD2 || framework_failure +cat ingin.lz "${inD}" > ingint.lz || framework_failure +"${LZIPRECOVER}" -q --dump=td ingint.lz > /dev/null +[ $? = 2 ] || test_failed $LINENO +"${LZIPRECOVER}" -i --dump=t ingint.lz > copy || test_failed $LINENO +cmp "${inD}" copy || test_failed $LINENO -"${LZIPRECOVER}" --strip-tdata int.lz > copy +"${LZIPRECOVER}" --strip=tdata int.lz > copy || test_failed $LINENO cmp "${in_lz}" copy || test_failed $LINENO -rm -f copy -"${LZIPRECOVER}" --strip-tdata int.lz -o copy +rm -f copy || framework_failure +"${LZIPRECOVER}" --strip=tdata int.lz -o copy || test_failed $LINENO cmp "${in_lz}" copy || test_failed $LINENO -"${LZIPRECOVER}" --strip-tdata fox6t.lz -f -o copy +"${LZIPRECOVER}" --strip=tdata fox6t.lz -f -o copy || test_failed $LINENO cmp "${fox6_lz}" copy || test_failed $LINENO -"${LZIPRECOVER}" --strip-tdata int.lz int.lz -f -o copy +"${LZIPRECOVER}" --strip=tdata int.lz int.lz -f -o copy || test_failed $LINENO cmp in2.lz copy || test_failed $LINENO +rm -f in2.lz || framework_failure +"${LZIPRECOVER}" --strip=tdata int.lz fox6t.lz > copy || test_failed $LINENO +cat "${in_lz}" "${fox6_lz}" | cmp copy - || test_failed $LINENO +"${LZIPRECOVER}" -q --strip=tdata ingint.lz > /dev/null +[ $? = 2 ] || test_failed $LINENO +"${LZIPRECOVER}" -i --strip=tdata ingint.lz > copy || test_failed $LINENO +cmp ingin.lz copy || test_failed $LINENO -"${LZIPRECOVER}" --remove-tdata int.lz fox6t.lz +"${LZIPRECOVER}" --remove=tdata int.lz fox6t.lz || test_failed $LINENO cmp "${in_lz}" int.lz || test_failed $LINENO cmp "${fox6_lz}" fox6t.lz || test_failed $LINENO -"${LZIPRECOVER}" --remove-tdata int.lz +"${LZIPRECOVER}" --remove=tdata int.lz || test_failed $LINENO cmp "${in_lz}" int.lz || test_failed $LINENO -"${LZIPRECOVER}" --remove-tdata fox6t.lz +"${LZIPRECOVER}" --remove=tdata fox6t.lz || test_failed $LINENO cmp "${fox6_lz}" fox6t.lz || test_failed $LINENO +"${LZIPRECOVER}" -q --remove=tdata ingint.lz +[ $? = 2 ] || test_failed $LINENO +cmp -s ingin.lz ingint.lz && test_failed $LINENO +"${LZIPRECOVER}" -i --remove=tdata ingint.lz || test_failed $LINENO +cmp ingin.lz ingint.lz || test_failed $LINENO +rm -f int.lz fox6t.lz ingint.lz ingin.lz || framework_failure + +for i in "${f6s3_lz}" "${f6s4_lz}" "${f6s5_lz}" "${f6s6_lz}" ; do + "${LZIPRECOVER}" --strip=tdata "$i" > copy || test_failed $LINENO "$i" + "${LZIPRECOVER}" --dump=tdata "$i" > tdata || test_failed $LINENO "$i" + cmp "${fox6_lz}" copy || test_failed $LINENO "$i" + cat copy tdata | cmp "$i" - || test_failed $LINENO "$i" + cat "$i" "${inD}" > f6t.lz || framework_failure + "${LZIPRECOVER}" --strip=tdata f6t.lz > copy || test_failed $LINENO "$i" + "${LZIPRECOVER}" --dump=tdata f6t.lz > tdata || test_failed $LINENO "$i" + cmp "${fox6_lz}" copy || test_failed $LINENO "$i" + cat copy tdata | cmp f6t.lz - || test_failed $LINENO "$i" + "${LZIPRECOVER}" --remove=tdata f6t.lz || test_failed $LINENO "$i" + cmp "${fox6_lz}" f6t.lz || test_failed $LINENO "$i" + rm -f copy tdata f6t.lz || framework_failure +done + +printf "\ntesting --dump/remove/strip..." + +"${LZIPRECOVER}" -s "${num_lz}" -o num.lz || test_failed $LINENO +[ -e rec9num.lz ] || test_failed $LINENO +[ ! -e rec10num.lz ] || test_failed $LINENO +cat rec*num.lz | cmp "${num_lz}" - || test_failed $LINENO +for i in 1 2 3 4 5 6 7 8 9 ; do + "${LZIPRECOVER}" --dump=$i "${num_lz}" | cmp rec${i}num.lz - || + test_failed $LINENO $i + "${LZIPRECOVER}" --strip=^$i "${num_lz}" | cmp rec${i}num.lz - || + test_failed $LINENO $i + cat "${num_lz}" > num.lz || framework_failure + "${LZIPRECOVER}" --remove=^$i num.lz || test_failed $LINENO $i + cmp rec${i}num.lz num.lz || test_failed $LINENO $i +done +"${LZIPRECOVER}" -q --dump=1 in "${num_lz}" > out +[ $? = 2 ] || test_failed $LINENO +cmp rec1num.lz out || test_failed $LINENO +"${LZIPRECOVER}" -q --strip=^1 in "${num_lz}" > out +[ $? = 2 ] || test_failed $LINENO +cmp rec1num.lz out || test_failed $LINENO + +"${LZIPRECOVER}" --dump=r1 "${num_lz}" | cmp rec9num.lz - || + test_failed $LINENO +"${LZIPRECOVER}" --dump=d:r3 "${num_lz}" | cmp rec7num.lz - || + test_failed $LINENO +"${LZIPRECOVER}" --dump=r5:d "${num_lz}" | cmp rec5num.lz - || + test_failed $LINENO +"${LZIPRECOVER}" --dump=t:r9 "${num_lz}" | cmp rec1num.lz - || + test_failed $LINENO +"${LZIPRECOVER}" --strip=r^1:t "${num_lz}" | cmp rec9num.lz - || + test_failed $LINENO +"${LZIPRECOVER}" --strip=d:r^3:t "${num_lz}" | cmp rec7num.lz - || + test_failed $LINENO +"${LZIPRECOVER}" --strip=r^5:d:t "${num_lz}" | cmp rec5num.lz - || + test_failed $LINENO +"${LZIPRECOVER}" --strip=d:t:r^9 "${num_lz}" | cmp rec1num.lz - || + test_failed $LINENO + +"${LZIPRECOVER}" --dump=1,5 "${num_lz}" > out || test_failed $LINENO +cat rec1num.lz rec5num.lz | cmp out - || test_failed $LINENO +"${LZIPRECOVER}" --dump=3,6 "${num_lz}" > out || test_failed $LINENO +cat rec3num.lz rec6num.lz | cmp out - || test_failed $LINENO +"${LZIPRECOVER}" --dump=2-4 "${num_lz}" > out || test_failed $LINENO +cat rec2num.lz rec3num.lz rec4num.lz | cmp out - || test_failed $LINENO +"${LZIPRECOVER}" --dump=4,6,8 "${num_lz}" > out || test_failed $LINENO +cat rec4num.lz rec6num.lz rec8num.lz | cmp out - || test_failed $LINENO +"${LZIPRECOVER}" --strip=^1,5 "${num_lz}" > out || test_failed $LINENO +cat rec1num.lz rec5num.lz | cmp out - || test_failed $LINENO +"${LZIPRECOVER}" --strip=^3,6 "${num_lz}" > out || test_failed $LINENO +cat rec3num.lz rec6num.lz | cmp out - || test_failed $LINENO +"${LZIPRECOVER}" --strip=^2-4 "${num_lz}" > out || test_failed $LINENO +cat rec2num.lz rec3num.lz rec4num.lz | cmp out - || test_failed $LINENO +"${LZIPRECOVER}" --strip=^4,6,8 "${num_lz}" > out || test_failed $LINENO +cat rec4num.lz rec6num.lz rec8num.lz | cmp out - || test_failed $LINENO + +# create a subset tarlz archive +"${LZIPRECOVER}" --dump=1-2:r1:t "${num_lz}" > out || test_failed $LINENO +cat rec1num.lz rec2num.lz rec9num.lz | cmp out - || test_failed $LINENO +"${LZIPRECOVER}" --dump=4-5:r1:t "${num_lz}" > out || test_failed $LINENO +cat rec4num.lz rec5num.lz rec9num.lz | cmp out - || test_failed $LINENO +"${LZIPRECOVER}" --dump=7-8:r1:t "${num_lz}" > out || test_failed $LINENO +cat rec7num.lz rec8num.lz rec9num.lz | cmp out - || test_failed $LINENO + +"${LZIPRECOVER}" --dump=1-9 "${num_lz}" | cmp "${num_lz}" - || + test_failed $LINENO +"${LZIPRECOVER}" --dump=r1-9 "${num_lz}" | cmp "${num_lz}" - || + test_failed $LINENO +"${LZIPRECOVER}" --dump=1-1000 "${num_lz}" | cmp "${num_lz}" - || + test_failed $LINENO +"${LZIPRECOVER}" --dump=r1-1000 "${num_lz}" | cmp "${num_lz}" - || + test_failed $LINENO +"${LZIPRECOVER}" --dump=1-4:r1-4:5 "${num_lz}" | cmp "${num_lz}" - || + test_failed $LINENO +"${LZIPRECOVER}" --dump=^10 "${num_lz}" | cmp "${num_lz}" - || + test_failed $LINENO +"${LZIPRECOVER}" --strip=^1-9 "${num_lz}" | cmp "${num_lz}" - || + test_failed $LINENO +"${LZIPRECOVER}" --strip=r^1-9 "${num_lz}" | cmp "${num_lz}" - || + test_failed $LINENO +"${LZIPRECOVER}" --strip=^1-1000 "${num_lz}" | cmp "${num_lz}" - || + test_failed $LINENO +"${LZIPRECOVER}" --strip=r^1-1000 "${num_lz}" | cmp "${num_lz}" - || + test_failed $LINENO +"${LZIPRECOVER}" --strip=^1-4:r^1-4:^5 "${num_lz}" | cmp "${num_lz}" - || + test_failed $LINENO +"${LZIPRECOVER}" --strip=10 "${num_lz}" | cmp "${num_lz}" - || + test_failed $LINENO + +"${LZIPRECOVER}" -i --dump=r1 "${nbt_lz}" | cmp rec9num.lz - || + test_failed $LINENO +"${LZIPRECOVER}" -i --dump=r3 "${nbt_lz}" | cmp rec7num.lz - || + test_failed $LINENO +"${LZIPRECOVER}" -i --dump=r7 "${nbt_lz}" | cmp rec4num.lz - || + test_failed $LINENO +"${LZIPRECOVER}" -i --strip=r^1:t "${nbt_lz}" | cmp rec9num.lz - || + test_failed $LINENO +"${LZIPRECOVER}" -i --strip=r^3:t "${nbt_lz}" | cmp rec7num.lz - || + test_failed $LINENO +"${LZIPRECOVER}" -i --strip=r^7:t "${nbt_lz}" | cmp rec4num.lz - || + test_failed $LINENO + +"${LZIPRECOVER}" -i --dump=4 -f -o out "${nbt_lz}" || test_failed $LINENO +printf "gap" | cmp out - || test_failed $LINENO +"${LZIPRECOVER}" -i --dump=8 "${nbt_lz}" > out || test_failed $LINENO +printf "damaged" | cmp out - || test_failed $LINENO +"${LZIPRECOVER}" -i --dump=tdata "${nbt_lz}" > out || test_failed $LINENO +printf "trailing data" | cmp out - || test_failed $LINENO +"${LZIPRECOVER}" -i --dump=4:t "${nbt_lz}" > out || test_failed $LINENO +printf "gaptrailing data" | cmp out - || test_failed $LINENO +"${LZIPRECOVER}" -i --dump=4,8:t "${nbt_lz}" > out || test_failed $LINENO +printf "gapdamagedtrailing data" | cmp out - || test_failed $LINENO +"${LZIPRECOVER}" -i --dump=4,8 "${nbt_lz}" > out || test_failed $LINENO +printf "gapdamaged" | cmp out - || test_failed $LINENO +"${LZIPRECOVER}" -i --dump=damaged "${nbt_lz}" > out || test_failed $LINENO +printf "gapdamaged" | cmp out - || test_failed $LINENO +"${LZIPRECOVER}" -i --dump=d:t "${nbt_lz}" > out || test_failed $LINENO +printf "gapdamagedtrailing data" | cmp out - || test_failed $LINENO +"${LZIPRECOVER}" -i --strip=^4:t -f -o out "${nbt_lz}" || test_failed $LINENO +printf "gap" | cmp out - || test_failed $LINENO +"${LZIPRECOVER}" -i --strip=^8:t "${nbt_lz}" > out || test_failed $LINENO +printf "damaged" | cmp out - || test_failed $LINENO +"${LZIPRECOVER}" -i --strip=1-11 "${nbt_lz}" > out || test_failed $LINENO +cmp empty out || test_failed $LINENO +"${LZIPRECOVER}" -i --strip=^4 "${nbt_lz}" > out || test_failed $LINENO +printf "gaptrailing data" | cmp out - || test_failed $LINENO +"${LZIPRECOVER}" -i --strip=^4,8 "${nbt_lz}" > out || test_failed $LINENO +printf "gapdamagedtrailing data" | cmp out - || test_failed $LINENO +"${LZIPRECOVER}" -i --strip=^4,8:t "${nbt_lz}" > out || test_failed $LINENO +printf "gapdamaged" | cmp out - || test_failed $LINENO +"${LZIPRECOVER}" -i --strip=r^4,8:t "${nbt_lz}" > out || test_failed $LINENO +printf "gapdamaged" | cmp out - || test_failed $LINENO +"${LZIPRECOVER}" -i --strip=r^4,8 "${nbt_lz}" > out || test_failed $LINENO +printf "gapdamagedtrailing data" | cmp out - || test_failed $LINENO + +cat "${num_lz}" > num.lz || framework_failure +"${LZIPRECOVER}" --remove=1-3,5,7,9 num.lz || test_failed $LINENO +cat rec4num.lz rec6num.lz rec8num.lz | cmp num.lz - || test_failed $LINENO +cat "${num_lz}" > num.lz || framework_failure +"${LZIPRECOVER}" --remove=^4,6,8 num.lz || test_failed $LINENO +cat rec4num.lz rec6num.lz rec8num.lz | cmp num.lz - || test_failed $LINENO +cat "${num_lz}" > num.lz || framework_failure +"${LZIPRECOVER}" --remove=r1,3,5,7-9 num.lz || test_failed $LINENO +cat rec4num.lz rec6num.lz rec8num.lz | cmp num.lz - || test_failed $LINENO +cat "${num_lz}" > num.lz || framework_failure +"${LZIPRECOVER}" --remove=r^2,4,6 num.lz || test_failed $LINENO +cat rec4num.lz rec6num.lz rec8num.lz | cmp num.lz - || test_failed $LINENO + +cat "${nbt_lz}" > nbt.lz || framework_failure +"${LZIPRECOVER}" -i --remove=4,8:tdata nbt.lz || test_failed $LINENO +cmp "${num_lz}" nbt.lz || test_failed $LINENO +cat "${nbt_lz}" > nbt.lz || framework_failure +"${LZIPRECOVER}" -i --remove=r4,8:tdata nbt.lz || test_failed $LINENO +cmp "${num_lz}" nbt.lz || test_failed $LINENO +cat "${nbt_lz}" > nbt.lz || framework_failure +"${LZIPRECOVER}" --remove=damaged:tdata nbt.lz || test_failed $LINENO +cmp "${num_lz}" nbt.lz || test_failed $LINENO +rm -f rec*num.lz nbt.lz empty || framework_failure + +for i in 1 2 3 4 5 6 7 8 9 10 ; do + "${LZIPRECOVER}" -i --strip=1-$i "${nbt_lz}" > out || + test_failed $LINENO $i + cat "${nbt_lz}" > nbt.lz || framework_failure + "${LZIPRECOVER}" -i --remove=1-$i nbt.lz || test_failed $LINENO $i + cmp nbt.lz out || test_failed $LINENO $i +done +rm -f nbt.lz out || framework_failure echo if [ ${fail} = 0 ] ; then diff --git a/testsuite/fox.lz b/testsuite/fox.lz Binary files differnew file mode 100644 index 0000000..509da82 --- /dev/null +++ b/testsuite/fox.lz diff --git a/testsuite/fox6_bad6.lz b/testsuite/fox6_bad6.lz Binary files differnew file mode 100644 index 0000000..085b2fd --- /dev/null +++ b/testsuite/fox6_bad6.lz diff --git a/testsuite/fox6_sc1.lz b/testsuite/fox6_sc1.lz Binary files differnew file mode 100644 index 0000000..278f8a8 --- /dev/null +++ b/testsuite/fox6_sc1.lz diff --git a/testsuite/fox6_sc2.lz b/testsuite/fox6_sc2.lz Binary files differnew file mode 100644 index 0000000..dc17461 --- /dev/null +++ b/testsuite/fox6_sc2.lz diff --git a/testsuite/fox6_sc3.lz b/testsuite/fox6_sc3.lz Binary files differnew file mode 100644 index 0000000..a602938 --- /dev/null +++ b/testsuite/fox6_sc3.lz diff --git a/testsuite/fox6_sc4.lz b/testsuite/fox6_sc4.lz Binary files differnew file mode 100644 index 0000000..d1a77f7 --- /dev/null +++ b/testsuite/fox6_sc4.lz diff --git a/testsuite/fox6_sc5.lz b/testsuite/fox6_sc5.lz Binary files differnew file mode 100644 index 0000000..35453c6 --- /dev/null +++ b/testsuite/fox6_sc5.lz diff --git a/testsuite/fox6_sc6.lz b/testsuite/fox6_sc6.lz Binary files differnew file mode 100644 index 0000000..c1fad92 --- /dev/null +++ b/testsuite/fox6_sc6.lz diff --git a/testsuite/numbers.lz b/testsuite/numbers.lz Binary files differnew file mode 100644 index 0000000..57460bc --- /dev/null +++ b/testsuite/numbers.lz diff --git a/testsuite/numbersbt.lz b/testsuite/numbersbt.lz Binary files differnew file mode 100644 index 0000000..019e54d --- /dev/null +++ b/testsuite/numbersbt.lz diff --git a/trailing_data.cc b/trailing_data.cc deleted file mode 100644 index e03b145..0000000 --- a/trailing_data.cc +++ /dev/null @@ -1,144 +0,0 @@ -/* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2018 Antonio Diaz Diaz. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -#define _FILE_OFFSET_BITS 64 - -#include <cerrno> -#include <cstdio> -#include <cstring> -#include <string> -#include <vector> -#include <stdint.h> -#include <unistd.h> -#include <utime.h> -#include <sys/stat.h> - -#include "lzip.h" -#include "block.h" -#include "file_index.h" - - -int dump_tdata( const std::vector< std::string > & filenames, - const std::string & default_output_filename, const bool force, - const bool strip, const bool loose_trailing ) - { - if( default_output_filename.empty() ) outfd = STDOUT_FILENO; - else - { - output_filename = default_output_filename; - if( !open_outstream( force, true, false, false ) ) return 1; - } - unsigned long long total_size = 0; - int files = 0, retval = 0; - bool stdin_used = false; - for( unsigned i = 0; i < filenames.size(); ++i ) - { - const bool from_stdin = ( filenames[i] == "-" ); - if( from_stdin ) { if( stdin_used ) continue; else stdin_used = true; } - const char * const input_filename = - from_stdin ? "(stdin)" : filenames[i].c_str(); - struct stat in_stats; // not used - const int infd = from_stdin ? STDIN_FILENO : - open_instream( input_filename, &in_stats, true, true ); - if( infd < 0 ) { if( retval < 1 ) retval = 1; continue; } - - const File_index file_index( infd, false, true, loose_trailing ); - if( file_index.retval() != 0 ) - { - show_file_error( input_filename, file_index.error().c_str() ); - if( retval < file_index.retval() ) retval = file_index.retval(); - close( infd ); - continue; - } - const unsigned long long cdata_size = file_index.cdata_size(); - const long long trailing_size = file_index.file_size() - cdata_size; - if( strip ) - { - total_size += cdata_size; ++files; - if( !safe_seek( infd, 0 ) || !copy_file( infd, outfd, cdata_size ) ) - cleanup_and_fail( 1 ); - } - else if( trailing_size > 0 ) - { - total_size += trailing_size; ++files; - if( !safe_seek( infd, cdata_size ) || !copy_file( infd, outfd ) ) - cleanup_and_fail( 1 ); - } - close( infd ); - } - if( verbosity >= 1 ) - { - if( strip ) - std::fprintf( stderr, "%llu bytes copied from %d file(s).\n", - total_size, files ); - else - std::fprintf( stderr, "%llu trailing bytes dumped from %d file(s).\n", - total_size, files ); - } - if( close_outstream( 0 ) != 0 ) return 1; - return retval; - } - - -int remove_tdata( const std::vector< std::string > & filenames, - const bool loose_trailing ) - { - unsigned long long total_size = 0; - int files = 0, retval = 0; - for( unsigned i = 0; i < filenames.size(); ++i ) - { - const char * const filename = filenames[i].c_str(); - struct stat in_stats; - const int infd = open_truncable_stream( filename, &in_stats ); - if( infd < 0 ) { if( retval < 1 ) retval = 1; continue; } - - const File_index file_index( infd, false, true, loose_trailing ); - if( file_index.retval() != 0 ) - { - show_file_error( filename, file_index.error().c_str() ); - if( retval < file_index.retval() ) retval = file_index.retval(); - close( infd ); - continue; - } - const unsigned long long cdata_size = file_index.cdata_size(); - const long long trailing_size = file_index.file_size() - cdata_size; - if( trailing_size > 0 ) - { - int i; - do i = ftruncate( infd, cdata_size ); - while( i != 0 && errno == EINTR ); - if( i == 0 ) - { - struct utimbuf t; - t.actime = in_stats.st_atime; - t.modtime = in_stats.st_mtime; - utime( filename, &t ); - total_size += trailing_size; ++files; - } - else - { - show_file_error( filename, "Can't truncate file", errno ); - if( retval < 1 ) retval = 1; - } - } - close( infd ); - } - if( verbosity >= 1 ) - std::fprintf( stderr, "%llu trailing bytes removed from %d file(s).\n", - total_size, files ); - return retval; - } diff --git a/unzcrash.cc b/unzcrash.cc index a118b9d..d22b650 100644 --- a/unzcrash.cc +++ b/unzcrash.cc @@ -1,6 +1,6 @@ /* Unzcrash - Tests robustness of decompressors to corrupted data. Inspired by unzcrash.c from Julian Seward's bzip2. - Copyright (C) 2008-2018 Antonio Diaz Diaz. + Copyright (C) 2008-2019 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -22,6 +22,8 @@ (eg, bug) which caused unzcrash to panic. */ +#define _FILE_OFFSET_BITS 64 + #include <algorithm> #include <cerrno> #include <climits> @@ -49,7 +51,6 @@ void show_error( const char * const msg, const int errcode = 0, namespace { -const char * const Program_name = "Unzcrash"; const char * const program_name = "unzcrash"; const char * invocation_name = 0; @@ -58,9 +59,8 @@ int verbosity = 0; void show_help() { - std::printf( "%s - Tests robustness of decompressors to corrupted data.\n", Program_name ); - std::printf( "\nUsage: %s [options] 'lzip -t' file.lz\n", invocation_name ); - std::printf( "\nBy default, unzcrash reads the specified file and then repeatedly\n" + std::printf( "Unzcrash tests the robustness of decompressors to corrupted data.\n" + "\nBy default, unzcrash reads the specified file and then repeatedly\n" "decompresses it, increasing 256 times each byte of the compressed data,\n" "so as to test all possible one-byte errors. Note that it may take years\n" "or even centuries to test all possible one-byte errors in a large file\n" @@ -86,7 +86,8 @@ void show_help() "\nIn order to compare the outputs, unzcrash needs a zcmp program able to\n" "understand the format being tested. For example the one provided by zutils.\n" "Use '--zcmp=false' to disable comparisons.\n" - "\nOptions:\n" + "\nUsage: %s [options] 'lzip -t' file.lz\n", invocation_name ); + std::printf( "\nOptions:\n" " -h, --help display this help and exit\n" " -V, --version output version information and exit\n" " -b, --bits=<range> test N-bit errors instead of full byte\n" |