diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2018-02-13 07:01:05 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2018-02-13 07:01:29 +0000 |
commit | f2780f2544d654fa46ad91189c1636bd6f64605d (patch) | |
tree | f2762bc24207fc925eec55398803ee9981fb433c | |
parent | Releasing debian version 1.19-4. (diff) | |
download | lziprecover-f2780f2544d654fa46ad91189c1636bd6f64605d.tar.xz lziprecover-f2780f2544d654fa46ad91189c1636bd6f64605d.zip |
Merging upstream version 1.20.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r-- | ChangeLog | 38 | ||||
-rw-r--r-- | INSTALL | 2 | ||||
-rw-r--r-- | Makefile.in | 31 | ||||
-rw-r--r-- | NEWS | 53 | ||||
-rw-r--r-- | README | 4 | ||||
-rw-r--r-- | alone_to_lz.cc | 6 | ||||
-rw-r--r-- | arg_parser.cc | 2 | ||||
-rw-r--r-- | arg_parser.h | 2 | ||||
-rw-r--r-- | block.cc | 2 | ||||
-rw-r--r-- | block.h | 9 | ||||
-rwxr-xr-x | configure | 6 | ||||
-rw-r--r-- | decoder.cc | 72 | ||||
-rw-r--r-- | decoder.h | 11 | ||||
-rw-r--r-- | doc/lziprecover.1 | 19 | ||||
-rw-r--r-- | doc/lziprecover.info | 207 | ||||
-rw-r--r-- | doc/lziprecover.texi | 187 | ||||
-rw-r--r-- | file_index.cc | 42 | ||||
-rw-r--r-- | file_index.h | 9 | ||||
-rw-r--r-- | list.cc | 8 | ||||
-rw-r--r-- | lzip.h | 69 | ||||
-rw-r--r-- | main.cc | 382 | ||||
-rw-r--r-- | main_common.cc | 123 | ||||
-rw-r--r-- | merge.cc | 42 | ||||
-rw-r--r-- | mtester.cc | 2 | ||||
-rw-r--r-- | mtester.h | 2 | ||||
-rw-r--r-- | range_dec.cc | 31 | ||||
-rw-r--r-- | repair.cc | 42 | ||||
-rw-r--r-- | split.cc | 42 | ||||
-rwxr-xr-x | testsuite/check.sh | 111 | ||||
-rw-r--r-- | trailing_data.cc | 144 | ||||
-rw-r--r-- | unzcrash.cc | 213 |
31 files changed, 1235 insertions, 678 deletions
@@ -1,3 +1,20 @@ +2018-02-12 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 1.20 released. + * split.cc: Fixed splitting of files > 64 KiB broken since 1.16. + * main.cc: Added new option '--dump-tdata'. + * main.cc: Added new option '--remove-tdata'. + * main.cc: Added new option '--strip-tdata'. + * main.cc: Added new option '--loose-trailing'. + * Improved corrupt header detection to HD=3. + * main.cc: Show corrupt or truncated header in multimember file. + * Replaced 'bits/byte' with inverse compression ratio in output. + * Show progress of decompression at verbosity level 2 (-vv). + * Show progress of decompression only if stderr is a terminal. + * main.cc: Show final diagnostic when testing multiple files. + * decoder.cc (verify_trailer): Show stored sizes also in hex. + Show dictionary size at verbosity level 4 (-vvvv). + 2017-04-10 Antonio Diaz Diaz <antonio@gnu.org> * Version 1.19 released. @@ -44,8 +61,8 @@ * lzip.texi: Added chapter 'Trailing data'. * configure: Avoid warning on some shells when testing for g++. * Makefile.in: Detect the existence of install-info. - * testsuite/check.sh: Don't check error messages. - * testsuite/check.sh: A POSIX shell is required to run the tests. + * check.sh: Don't check error messages. + * check.sh: A POSIX shell is required to run the tests. 2015-05-28 Antonio Diaz Diaz <antonio@gnu.org> @@ -86,8 +103,7 @@ * split.cc: In verbose mode show names of files being created. * main.cc (show_header): Show header version if verbosity >= 4. * configure: Options now accept a separate argument. - * Makefile.in: Added new target 'install-as-lzip'. - * Makefile.in: Added new target 'install-bin'. + * Makefile.in: Added new targets 'install-as-lzip', 'install-bin'. * main.cc: Use 'setmode' instead of '_setmode' on Windows and OS/2. 2012-02-24 Antonio Diaz Diaz <ant_diaz@teleline.es> @@ -115,10 +131,10 @@ * Version 1.12 released. * lziprecover.cc: If '-v' is not specified show errors only. - * testsuite/unzcrash.cc: Use Arg_parser. - * testsuite/unzcrash.cc: Added new option '-b, --bits'. - * testsuite/unzcrash.cc: Added new option '-p, --position'. - * testsuite/unzcrash.cc: Added new option '-s, --size'. + * unzcrash.cc: Use Arg_parser. + * unzcrash.cc: Added new option '-b, --bits'. + * unzcrash.cc: Added new option '-p, --position'. + * unzcrash.cc: Added new option '-s, --size'. 2010-09-16 Antonio Diaz Diaz <ant_diaz@teleline.es> @@ -143,16 +159,16 @@ * Version 1.6 released. * Added man page for lziprecover. - * testsuite/check.sh: Test lziprecover. + * check.sh: Test lziprecover. 2009-01-24 Antonio Diaz Diaz <ant_diaz@teleline.es> * Version 1.4 released. * Added 'lziprecover', a member recoverer program. - * testsuite/unzcrash.cc: Test all 1-byte errors. + * unzcrash.cc: Test all 1-byte errors. -Copyright (C) 2009-2017 Antonio Diaz Diaz. +Copyright (C) 2009-2018 Antonio Diaz Diaz. This file is a collection of facts, and thus it is not copyrightable, but just in case, you have unlimited permission to copy, distribute and @@ -65,7 +65,7 @@ After running 'configure', you can run 'make' and 'make install' as explained above. -Copyright (C) 2009-2017 Antonio Diaz Diaz. +Copyright (C) 2009-2018 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute and modify it. diff --git a/Makefile.in b/Makefile.in index d3f1edb..76c06cc 100644 --- a/Makefile.in +++ b/Makefile.in @@ -8,7 +8,7 @@ SHELL = /bin/sh CAN_RUN_INSTALLINFO = $(SHELL) -c "install-info --version" > /dev/null 2>&1 objs = arg_parser.o alone_to_lz.o block.o file_index.o list.o merge.o \ - mtester.o range_dec.o repair.o split.o decoder.o main.o + mtester.o range_dec.o repair.o split.o trailing_data.o decoder.o main.o unzobjs = arg_parser.o unzcrash.o @@ -36,20 +36,21 @@ unzcrash.o : unzcrash.cc %.o : %.cc $(CXX) $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $< -$(objs) : Makefile -alone_to_lz.o : lzip.h mtester.h -arg_parser.o : arg_parser.h -block.o : block.h -decoder.o : lzip.h decoder.h -file_index.o : lzip.h block.h file_index.h -list.o : lzip.h block.h file_index.h -main.o : arg_parser.h lzip.h decoder.h block.h -merge.o : lzip.h decoder.h block.h file_index.h -mtester.o : lzip.h mtester.h -range_dec.o : lzip.h decoder.h block.h file_index.h -repair.o : lzip.h mtester.h block.h file_index.h -split.o : lzip.h block.h file_index.h -unzcrash.o : arg_parser.h Makefile +$(objs) : Makefile +alone_to_lz.o : lzip.h mtester.h +arg_parser.o : arg_parser.h +block.o : block.h +decoder.o : lzip.h decoder.h +file_index.o : lzip.h block.h file_index.h +list.o : lzip.h block.h file_index.h +main.o : arg_parser.h lzip.h decoder.h block.h main_common.cc +merge.o : lzip.h decoder.h block.h file_index.h +mtester.o : lzip.h mtester.h +range_dec.o : lzip.h decoder.h block.h file_index.h +repair.o : lzip.h mtester.h block.h file_index.h +split.o : lzip.h block.h file_index.h +trailing_data.o : lzip.h block.h file_index.h +unzcrash.o : Makefile arg_parser.h main_common.cc doc : info man @@ -1,17 +1,46 @@ -Changes in version 1.19: +Changes in version 1.20: -'--merge' is now able to fix files with thousands of scattered errors -per member by grouping the errors into clusters and then merging the -files as if each cluster were a single error. +Splitting was broken for files larger than 64 KiB because of a bug +introduced in version 1.16. -The option '-a, --trailing-error' now works with '-l, --list' and -'-D, --range-decompress'. +The options "--dump-tdata", "--remove-tdata", and "--strip-tdata" have +been added to ease the management of metadata stored as trailing data: -The output of option '-l, --list' has been simplified to make it easier -to read. + "--dump-tdata" dumps the trailing data (if any) of one or more regular + files to standard output. -In test mode, lziprecover now continues checking the rest of the files -if any input file is a terminal. + "--remove-tdata" removes the trailing data from regular files in place. -Trailing data are now shown both in hexadecimal and as a string of -printable ASCII characters. + "--strip-tdata" copies one or more regular files to standard output, + stripping the trailing data (if any) from each file. + +The option '--loose-trailing', has been added. + +The test used by lziprecover to discriminate trailing data from a corrupt +header in multimember or concatenated files has been improved to a +Hamming distance (HD) of 3, and the 3 bit flips must happen in different +magic bytes for the test to fail. As a consequence some kinds of files +no longer can be appended to a lzip file as trailing data unless the +'--loose-trailing' option is used when decompressing. +Lziprecover can be used to remove conflicting trailing data from a file. + +The contents of a corrupt or truncated header found in a multimember +file is now shown, after the error message, in the same format as +trailing data. + +The 'bits/byte' ratio has been replaced with the inverse compression +ratio in the output. + +The progress of decompression is now shown at verbosity level 2 (-vv) or +higher. + +Progress of decompression is only shown if stderr is a terminal. + +A final diagnostic is now shown at verbosity level 1 (-v) or higher if +any file fails the test when testing multiple files. + +In case of (de)compressed size mismatch, the stored size is now also +shown in hexadecimal to ease visual comparison. + +The dictionary size is now shown at verbosity level 4 (-vvvv) when +decompressing or testing. @@ -17,7 +17,7 @@ archiving, taking into account both data integrity and decoder availability: * The lzip format provides very safe integrity checking and some data - recovery means. The lziprecover program can repair bit-flip errors + recovery means. The lziprecover program can repair bit flip errors (one of the most common forms of data corruption) in lzip files, and provides data recovery capabilities, including error-checked merging of damaged copies of a file. @@ -75,7 +75,7 @@ unzcrash.c from Julian Seward's bzip2. Type 'make unzcrash' in the lziprecover source directory to build it. Then try 'unzcrash --help'. -Copyright (C) 2009-2017 Antonio Diaz Diaz. +Copyright (C) 2009-2018 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute and modify it. diff --git a/alone_to_lz.cc b/alone_to_lz.cc index e87a18a..e949f9d 100644 --- a/alone_to_lz.cc +++ b/alone_to_lz.cc @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2017 Antonio Diaz Diaz. + Copyright (C) 2009-2018 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -85,7 +85,7 @@ int alone_to_lz( const int infd, const Pretty_print & pp ) long file_size = 0; uint8_t * const buffer = read_file( infd, &file_size, pp ); if( !buffer ) return 1; - if( pp.verbosity() >= 1 ) pp(); + if( verbosity >= 1 ) pp(); if( file_size < lzma_header_size ) { pp( "file is too short" ); std::free( buffer ); return 2; } @@ -144,6 +144,6 @@ int alone_to_lz( const int infd, const Pretty_print & pp ) } catch( std::bad_alloc ) { pp( "Not enough memory." ); return 1; } catch( Error e ) { pp(); show_error( e.msg, errno ); return 1; } - if( pp.verbosity() >= 1 ) std::fputs( "done\n", stderr ); + if( verbosity >= 1 ) std::fputs( "done\n", stderr ); return 0; } diff --git a/arg_parser.cc b/arg_parser.cc index cc7d1e2..008ebc8 100644 --- a/arg_parser.cc +++ b/arg_parser.cc @@ -1,5 +1,5 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C++ version) - Copyright (C) 2006-2017 Antonio Diaz Diaz. + Copyright (C) 2006-2018 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided diff --git a/arg_parser.h b/arg_parser.h index 95b0320..f015881 100644 --- a/arg_parser.h +++ b/arg_parser.h @@ -1,5 +1,5 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C++ version) - Copyright (C) 2006-2017 Antonio Diaz Diaz. + Copyright (C) 2006-2018 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2017 Antonio Diaz Diaz. + Copyright (C) 2009-2018 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2017 Antonio Diaz Diaz. + Copyright (C) 2009-2018 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -54,10 +54,9 @@ public: // defined in range_dec.cc int range_decompress( const std::string & input_filename, const std::string & default_output_filename, - Block range, const int verbosity, const bool force, - const bool ignore_errors, const bool ignore_trailing, + Block range, const bool force, const bool ignore_errors, + const bool ignore_trailing, const bool loose_trailing, const bool to_stdout ); // defined in repair.cc -int debug_delay( const std::string & input_filename, Block range, - const int verbosity ); +int debug_delay( const std::string & input_filename, Block range ); @@ -1,12 +1,12 @@ #! /bin/sh # configure script for Lziprecover - Data recovery tool for the lzip format -# Copyright (C) 2009-2017 Antonio Diaz Diaz. +# Copyright (C) 2009-2018 Antonio Diaz Diaz. # # This configure script is free software: you have unlimited permission # to copy, distribute and modify it. pkgname=lziprecover -pkgversion=1.19 +pkgversion=1.20 progname=lziprecover srctrigger=doc/${pkgname}.texi @@ -168,7 +168,7 @@ echo "LDFLAGS = ${LDFLAGS}" rm -f Makefile cat > Makefile << EOF # Makefile for Lziprecover - Data recovery tool for the lzip format -# Copyright (C) 2009-2017 Antonio Diaz Diaz. +# Copyright (C) 2009-2018 Antonio Diaz Diaz. # This file was generated automatically by configure. Don't edit. # # This Makefile is free software: you have unlimited permission @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2017 Antonio Diaz Diaz. + Copyright (C) 2009-2018 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -34,23 +34,6 @@ const CRC32 crc32; -void Pretty_print::operator()( const char * const msg, FILE * const f ) const - { - if( verbosity_ >= 0 ) - { - if( first_post ) - { - first_post = false; - std::fprintf( f, " %s: ", name_.c_str() ); - for( unsigned i = name_.size(); i < longest_name; ++i ) - std::fputc( ' ', f ); - if( !msg ) std::fflush( f ); - } - if( msg ) std::fprintf( f, "%s\n", msg ); - } - } - - /* Returns the number of bytes really read. If (returned value < size) and (errno == 0), means EOF was reached. */ @@ -97,6 +80,7 @@ bool Range_decoder::read_block() at_stream_end = ( stream_pos < buffer_size ); partial_member_pos += pos; pos = 0; + show_dprogress(); } return pos < stream_pos; } @@ -130,7 +114,6 @@ bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const int size = rdec.read_data( trailer.data, File_trailer::size ); const unsigned long long data_size = data_position(); const unsigned long long member_size = rdec.member_position(); - const int verbosity = pp.verbosity(); bool error = false; if( size < File_trailer::size ) @@ -145,50 +128,61 @@ bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const while( size < File_trailer::size ) trailer.data[size++] = 0; } - if( trailer.data_crc() != crc() ) + const unsigned td_crc = trailer.data_crc(); + if( td_crc != crc() ) { error = true; if( verbosity >= 0 ) { pp(); - std::fprintf( stderr, "CRC mismatch; trailer says %08X, data CRC is %08X\n", - trailer.data_crc(), crc() ); + std::fprintf( stderr, "CRC mismatch; stored %08X, computed %08X\n", + td_crc, crc() ); } } - if( trailer.data_size() != data_size ) + const unsigned long long td_size = trailer.data_size(); + if( td_size != data_size ) { error = true; if( verbosity >= 0 ) { pp(); - std::fprintf( stderr, "Data size mismatch; trailer says %llu, data size is %llu (0x%llX)\n", - trailer.data_size(), data_size, data_size ); + std::fprintf( stderr, "Data size mismatch; stored %llu (0x%llX), computed %llu (0x%llX)\n", + td_size, td_size, data_size, data_size ); } } - if( trailer.member_size() != member_size ) + const unsigned long long tm_size = trailer.member_size(); + if( tm_size != member_size ) { error = true; if( verbosity >= 0 ) { pp(); - std::fprintf( stderr, "Member size mismatch; trailer says %llu, member size is %llu (0x%llX)\n", - trailer.member_size(), member_size, member_size ); + std::fprintf( stderr, "Member size mismatch; stored %llu (0x%llX), computed %llu (0x%llX)\n", + tm_size, tm_size, member_size, member_size ); } } - if( !error && verbosity >= 2 && data_size > 0 && member_size > 0 ) - std::fprintf( stderr, "%6.3f:1, %6.3f bits/byte, %5.2f%% saved. ", - (double)data_size / member_size, - ( 8.0 * member_size ) / data_size, - 100.0 * ( 1.0 - ( (double)member_size / data_size ) ) ); - if( !error && verbosity >= 4 ) - std::fprintf( stderr, "CRC %08X, decompressed %9llu, compressed %8llu. ", - crc(), data_size, member_size ); - if( rdec.get_code() != 0 && !error && verbosity >= 1 ) + if( error ) return false; + if( verbosity >= 2 ) + { + if( verbosity >= 4 ) show_header( dictionary_size ); + if( data_size == 0 || member_size == 0 ) + std::fputs( "no data compressed. ", stderr ); + else + std::fprintf( stderr, "%6.3f:1, %5.2f%% ratio, %5.2f%% saved. ", + (double)data_size / member_size, + ( 100.0 * member_size ) / data_size, + 100.0 - ( ( 100.0 * member_size ) / data_size ) ); + if( verbosity >= 4 ) std::fprintf( stderr, "CRC %08X, ", td_crc ); + if( verbosity >= 3 ) + std::fprintf( stderr, "decompressed %9llu, compressed %8llu. ", + data_size, member_size ); + } + if( rdec.get_code() != 0 && verbosity >= 1 ) { // corruption in the last 4 bytes of the EOS marker pp(); std::fprintf( stderr, "Range decoder final code is %08X\n", rdec.get_code() ); } - return !error; + return true; } @@ -284,7 +278,7 @@ int LZ_decoder::decode_member( const Pretty_print & pp ) { rdec.load(); continue; } - if( pp.verbosity() >= 0 ) + if( verbosity >= 0 ) { pp(); std::fprintf( stderr, "Unsupported marker code '%d'\n", len ); @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2017 Antonio Diaz Diaz. + Copyright (C) 2009-2018 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -227,11 +227,7 @@ class LZ_decoder bool verify_trailer( const Pretty_print & pp ) const; uint8_t peek_prev() const - { - if( pos > 0 ) return buffer[pos-1]; - if( pos_wrapped ) return buffer[dictionary_size-1]; - return 0; // prev_byte of first byte - } + { return buffer[((pos > 0) ? pos : dictionary_size)-1]; } uint8_t peek( const unsigned distance ) const { @@ -296,7 +292,8 @@ public: crc_( 0xFFFFFFFFU ), outfd( ofd ), pos_wrapped( false ) - {} + // prev_byte of first byte; also for peek( 0 ) on corrupt file + { buffer[dictionary_size-1] = 0; } ~LZ_decoder() { delete[] buffer; } diff --git a/doc/lziprecover.1 b/doc/lziprecover.1 index 31440f8..d52e2e6 100644 --- a/doc/lziprecover.1 +++ b/doc/lziprecover.1 @@ -1,5 +1,5 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.46.1. -.TH LZIPRECOVER "1" "April 2017" "lziprecover 1.19" "User Commands" +.TH LZIPRECOVER "1" "February 2018" "lziprecover 1.20" "User Commands" .SH NAME lziprecover \- recovers data from damaged lzip files .SH SYNOPSIS @@ -20,6 +20,9 @@ files and test integrity of files. Lziprecover provides random access to the data in multimember files; it only decompresses the members containing the desired data. .PP +Lziprecover facilitates the management of metadata stored as trailing +data in lzip files. +.PP Lziprecover is not a replacement for regular backups, but a last line of defense for the case where the backups are also damaged. .SH OPTIONS @@ -77,6 +80,18 @@ test compressed file integrity .TP \fB\-v\fR, \fB\-\-verbose\fR be verbose (a 2nd \fB\-v\fR gives more) +.TP +\fB\-\-loose\-trailing\fR +allow trailing data seeming corrupt header +.TP +\fB\-\-dump\-tdata\fR +dump trailing data to standard output +.TP +\fB\-\-remove\-tdata\fR +remove trailing data from files in place +.TP +\fB\-\-strip\-tdata\fR +copy files to stdout without trailing data .PP If no file names are given, or if a file is '\-', lziprecover decompresses from standard input to standard output. @@ -92,7 +107,7 @@ Report bugs to lzip\-bug@nongnu.org .br Lziprecover home page: http://www.nongnu.org/lzip/lziprecover.html .SH COPYRIGHT -Copyright \(co 2017 Antonio Diaz Diaz. +Copyright \(co 2018 Antonio Diaz Diaz. License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html> .br This is free software: you are free to change and redistribute it. diff --git a/doc/lziprecover.info b/doc/lziprecover.info index 4b3a8fb..55d044b 100644 --- a/doc/lziprecover.info +++ b/doc/lziprecover.info @@ -12,14 +12,14 @@ File: lziprecover.info, Node: Top, Next: Introduction, Up: (dir) Lziprecover Manual ****************** -This manual is for Lziprecover (version 1.19, 10 April 2017). +This manual is for Lziprecover (version 1.20, 12 February 2018). * Menu: * Introduction:: Purpose and features of lziprecover * Invoking lziprecover:: Command line interface * Data safety:: Protecting data from accidental loss -* Repairing files:: Fixing bit-flip and similar errors +* Repairing files:: Fixing bit flips and similar errors * Merging files:: Fixing several damaged copies * File names:: Names of the files produced by lziprecover * File format:: Detailed format of the compressed file @@ -30,7 +30,7 @@ This manual is for Lziprecover (version 1.19, 10 April 2017). * Concept index:: Index of concepts - Copyright (C) 2009-2017 Antonio Diaz Diaz. + Copyright (C) 2009-2018 Antonio Diaz Diaz. This manual is free documentation: you have unlimited permission to copy, distribute and modify it. @@ -58,7 +58,7 @@ archiving, taking into account both data integrity and decoder availability: * The lzip format provides very safe integrity checking and some data - recovery means. The lziprecover program can repair bit-flip errors + recovery means. The lziprecover program can repair bit flip errors (one of the most common forms of data corruption) in lzip files, and provides data recovery capabilities, including error-checked merging of damaged copies of a file. *Note Data safety::. @@ -123,7 +123,7 @@ When decompressing or testing, '-' used as a FILE argument means standard input. It can be mixed with other FILES and is read just once, the first time it appears in the command line. - Lziprecover supports the following options: + lziprecover supports the following options: '-h' '--help' @@ -162,24 +162,25 @@ the first time it appears in the command line. Write decompressed data to standard output; keep input files unchanged. This option is needed when reading from a named pipe (fifo) or from a device. Use it also to recover as much of the - uncompressed data as possible when decompressing a corrupt file. + decompressed data as possible when decompressing a corrupt file. '-d' '--decompress' - Decompress the specified file(s). If a file does not exist or - can't be opened, lziprecover continues decompressing the rest of - the files. If a file fails to decompress, lziprecover exits - immediately without decompressing the rest of the files. + Decompress the specified files. If a file does not exist or can't + be opened, lziprecover continues decompressing the rest of the + files. If a file fails to decompress, or is a terminal, + lziprecover exits immediately without decompressing the rest of + the files. '-D RANGE' '--range-decompress=RANGE' Decompress only a range of bytes starting at decompressed byte - position 'BEGIN' and up to byte position 'END - 1'. This option - provides random access to the data in multimember files; it only - decompresses the members containing the desired data. In order to - guarantee the correctness of the data produced, all members - containing any part of the desired data are decompressed and their - integrity is verified. + position 'BEGIN' and up to byte position 'END - 1'. Byte + positions start at 0. This option provides random access to the + data in multimember files; it only decompresses the members + containing the desired data. In order to guarantee the correctness + of the data produced, all members containing any part of the + desired data are decompressed and their integrity is verified. Four formats of RANGE are recognized, 'BEGIN', 'BEGIN-END', 'BEGIN,SIZE', and ',SIZE'. If only BEGIN is specified, END is taken @@ -206,7 +207,7 @@ the first time it appears in the command line. '-l' '--list' Print the uncompressed size, compressed size and percentage saved - of the specified file(s). Trailing data are ignored. The values + of the specified files. Trailing data are ignored. The values produced are correct even for multimember files. If more than one file is given, a final line containing the cumulative sizes is printed. With '-v', the dictionary size, the number of members in @@ -268,12 +269,13 @@ the first time it appears in the command line. '-t' '--test' - Check integrity of the specified file(s), but don't decompress - them. This really performs a trial decompression and throws away - the result. Use it together with '-v' to see information about - the file(s). If a file fails the test, does not exist, can't be - opened, or is a terminal, lziprecover continues checking the rest - of the files. + Check integrity of the specified files, but don't decompress them. + This really performs a trial decompression and throws away the + result. Use it together with '-v' to see information about the + files. If a file fails the test, does not exist, can't be opened, + or is a terminal, lziprecover continues checking the rest of the + files. A final diagnostic is shown at verbosity level 1 or higher + if any file fails the test when testing multiple files. '-v' '--verbose' @@ -283,10 +285,46 @@ the first time it appears in the command line. size, trailer contents (CRC, data size, member size), and up to 6 bytes of trailing data (if any) both in hexadecimal and as a string of printable ASCII characters. + Two or more '-v' options show the progress of decompression. In other modes, increasing verbosity levels show final status, progress of operations, and extra information (for example, the failed areas). +'--loose-trailing' + When decompressing, testing or listing, allow trailing data whose + first bytes are so similar to the magic bytes of a lzip header + that they can be confused with a corrupt header. Use this option + if a file triggers a "corrupt header" error and the cause is not + indeed a corrupt header. + +'--dump-tdata' + Dump the trailing data (if any) of one or more regular files to + standard output, or to a file if the '--output' option is used. If + more than one file is given, the trailing data of all files are + concatenated. If a file does not exist, can't be opened, or is not + regular, lziprecover continues processing the rest of the files. + If the dump fails in one file, lziprecover exits immediately + without processing the rest of the files. + +'--remove-tdata' + Remove the trailing data from regular files in place. The date of + each file is preserved if possible. If the removal fails in one + file, lziprecover continues processing the rest of the files. This + option may be dangerous if the file is corrupt or if the trailing + data contain a forbidden combination of characters. *Note Trailing + data::. Verify that 'lzip -cd file.lz | wc -c' and the + uncompressed size shown by 'lzip -l file.lz' match before + attempting the removal. + +'--strip-tdata' + Copy one or more regular files to standard output (or to a file if + the '--output' option is used), stripping the trailing data (if + any) from each file. If more than one file is given, the files are + concatenated. If a file does not exist, can't be opened, or is not + regular, lziprecover continues processing the rest of the files. + If a file fails to copy, lziprecover exits immediately without + processing the rest of the files. + Numbers given as arguments to options may be followed by a multiplier and an optional 'B' for "byte". @@ -336,8 +374,8 @@ scientific data, compressed it, and stored two copies on separate media. Years later you notice that both copies are corrupt. If you compressed with gzip and both copies suffer any damage in the -data stream, even if it is just one altered bit, the original data can't -be recovered. +data stream, even if it is just one altered bit, the original data can +only be recovered by an expert, if at all. If you used bzip2, and if the file is large enough to contain more than one compressed data block (usually larger than 900 kB @@ -363,7 +401,7 @@ Lziprecover can repair perfectly most files with small errors (up to one single-byte error per member), without the need of any extra redundance at all. If the reparation is successful, the repaired file will be identical bit for bit to the original. This makes lzip files resistant -to bit-flip, one of the most common forms of data corruption. +to bit flip, one of the most common forms of data corruption. The error may be located anywhere in the file except in the first 5 bytes of each member header or in the 'Member size' field of the @@ -372,9 +410,9 @@ can be easily repaired with a text editor like GNU Moe (*note File format::). If the error is in the member size, it is enough to ignore the message about 'bad member size' when decompressing. - Bit-flip happens when one bit in the file is changed from 0 to 1 or + Bit flip happens when one bit in the file is changed from 0 to 1 or vice versa. It may be caused by bad RAM or even by natural radiation. I -have seen a case of bit-flip in a file stored on an USB flash drive. +have seen a case of bit flip in a file stored on an USB flash drive. One byte may seem small, but most file corruptions not produced by transmission errors or I/O errors just affect one byte, or even one bit, @@ -547,10 +585,11 @@ member. Such trailing data may be: * Useful data added by the user; a cryptographically secure hash, a description of file contents, etc. It is safe to append any amount - of text to a lzip file as long as the text does not begin with the - string "LZIP", and does not contain any zero bytes (null - characters). Nonzero bytes and zero bytes can't be safely mixed in - trailing data. + of text to a lzip file as long as none of the first four bytes of + the text match the corresponding byte in the string "LZIP", and + the text does not contain any zero bytes (null characters). + Nonzero bytes and zero bytes can't be safely mixed in trailing + data. * Garbage added by some not totally successful copy operation. @@ -558,12 +597,17 @@ member. Such trailing data may be: and hash value (for a chosen hash) coincide with those of another file. - * In very rare cases, trailing data could be the corrupt header of - another member. In multimember or concatenated files the - probability of corruption happening in the magic bytes is 5 times - smaller than the probability of getting a false positive caused by - the corruption of the integrity information itself. Therefore it - can be considered to be below the noise level. + * In rare cases, trailing data could be the corrupt header of another + member. In multimember or concatenated files the probability of + corruption happening in the magic bytes is 5 times smaller than the + probability of getting a false positive caused by the corruption + of the integrity information itself. Therefore it can be + considered to be below the noise level. Additionally, the test + used by lziprecover to discriminate trailing data from a corrupt + header has a Hamming distance (HD) of 3, and the 3 bit flips must + happen in different magic bytes for the test to fail. In any case, + the option '--trailing-error' guarantees that any corrupt header + will be detected. Trailing data are in no way part of the lzip file format, but tools reading lzip files are expected to behave as correctly and usefully as @@ -574,6 +618,30 @@ like that of user-added data, they are expected to be ignored. In those cases where a file containing trailing data must be rejected, the option '--trailing-error' can be used. *Note --trailing-error::. + Lziprecover facilitates the management of metadata stored as trailing +data in lzip files. See the following examples: + +Example 1: Add a comment or description to a compressed file. + + # First append the comment as trailing data to a lzip file + echo 'This file contains this and that' >> file.lz + # This command prints the comment to standard output + lziprecover --dump-tdata file.lz + # This command outputs file.lz without the comment + lziprecover --strip-tdata file.lz + # This command removes the comment from file.lz + lziprecover --remove-tdata file.lz + + +Example 2: Add and verify a cryptographically secure hash. (This may be +convenient, but a separate copy of the hash must be kept in a safe place +to guarantee that both file and hash have not been maliciously +replaced). + + sha256sum < file.lz >> file.lz + lziprecover --strip-tdata file.lz | sha256sum -c \ + <(lziprecover --dump-tdata file.lz) + File: lziprecover.info, Node: Examples, Next: Unzcrash, Prev: Trailing data, Up: Top @@ -674,7 +742,9 @@ lziprecover source directory to build it. By default, unzcrash reads the specified file and then repeatedly decompresses it, increasing 256 times each byte of the compressed data, -so as to test all possible one-byte errors. +so as to test all possible one-byte errors. Note that it may take years +or even centuries to test all possible one-byte errors in a large file +(tens of MB). If the '--block' option is given, unzcrash reads the specified file and then repeatedly decompresses it, setting all bytes in each @@ -711,9 +781,9 @@ by 'zutils'. *Note Zcmp: (zutils)Zcmp, The format for running unzcrash is: - unzcrash [OPTIONS] "lzip -tv" FILENAME.lz + unzcrash [OPTIONS] 'lzip -t' FILE.lz - Unzcrash supports the following options: + unzcrash supports the following options: '-h' '--help' @@ -742,25 +812,35 @@ by 'zutils'. *Note Zcmp: (zutils)Zcmp, '-B[SIZE][,VALUE]' '--block[=SIZE][,VALUE]' - Test block errors of given SIZE aligned to a SIZE-byte boundary, - simulating a whole sector I/O error. Block SIZE defaults to 512 - bytes. VALUE defaults to 0. + Test block errors of given SIZE, simulating a whole sector I/O + error. Block SIZE defaults to 512 bytes. VALUE defaults to 0. By + default, only blocks aligned to a SIZE-byte boundary are tested, + but this may be changed with the '--delta' option. '-d N' '--delta=N' - Test only one of every N bytes, blocks or truncation sizes, - instead of all of them. + Test only one byte, block, or truncation size every N bytes, + instead of all of them. If the '--block' option is given, N + defaults to the block size. Else N defaults to 1. Values of N + smaller than the block size will result in overlappinng blocks. + (Which is convenient for testing because there are usually too few + non-overlappinng blocks in a file). '-e POSITION,VALUE' '--set-byte=POSITION,VALUE' Set byte at POSITION to VALUE in the internal buffer after reading - and testing FILENAME.lz but before the first test call to the + and testing FILE.lz but before the first test call to the decompressor. If VALUE is preceded by '+', it is added to the original value of the byte at POSITION. If VALUE is preceded by 'f' (flip), it is XORed with the original value of the byte at POSITION. This option can be used to run tests with a changed dictionary size, for example. +'-n' +'--no-verify' + Skip initial verification of FILE.lz and 'zcmp'. May speed up + things a lot when testing many (or large) known good files. + '-p BYTES' '--position=BYTES' First byte position to test in the file. Defaults to 0. Negative @@ -829,29 +909,32 @@ Concept index * introduction: Introduction. (line 6) * invoking: Invoking lziprecover. (line 6) * merging files: Merging files. (line 6) +* options: Invoking lziprecover. (line 6) * repairing files: Repairing files. (line 6) * trailing data: Trailing data. (line 6) * unzcrash: Unzcrash. (line 6) +* usage: Invoking lziprecover. (line 6) +* version: Invoking lziprecover. (line 6) Tag Table: Node: Top231 -Node: Introduction1269 -Node: Invoking lziprecover4646 -Ref: --trailing-error5296 -Node: Data safety12788 -Node: Repairing files14712 -Node: Merging files16635 -Node: File names19397 -Node: File format19861 -Node: Trailing data22289 -Node: Examples24195 -Ref: concat-example24626 -Ref: ddrescue-example25727 -Node: Unzcrash27017 -Node: Problems32021 -Node: Concept index32573 +Node: Introduction1273 +Node: Invoking lziprecover4650 +Ref: --trailing-error5300 +Node: Data safety14832 +Node: Repairing files16783 +Node: Merging files18706 +Node: File names21468 +Node: File format21932 +Node: Trailing data24360 +Node: Examples27595 +Ref: concat-example28026 +Ref: ddrescue-example29127 +Node: Unzcrash30417 +Node: Problems36055 +Node: Concept index36607 End Tag Table diff --git a/doc/lziprecover.texi b/doc/lziprecover.texi index ae3be14..a15e710 100644 --- a/doc/lziprecover.texi +++ b/doc/lziprecover.texi @@ -6,8 +6,8 @@ @finalout @c %**end of header -@set UPDATED 10 April 2017 -@set VERSION 1.19 +@set UPDATED 12 February 2018 +@set VERSION 1.20 @dircategory Data Compression @direntry @@ -38,7 +38,7 @@ This manual is for Lziprecover (version @value{VERSION}, @value{UPDATED}). * Introduction:: Purpose and features of lziprecover * Invoking lziprecover:: Command line interface * Data safety:: Protecting data from accidental loss -* Repairing files:: Fixing bit-flip and similar errors +* Repairing files:: Fixing bit flips and similar errors * Merging files:: Fixing several damaged copies * File names:: Names of the files produced by lziprecover * File format:: Detailed format of the compressed file @@ -50,7 +50,7 @@ This manual is for Lziprecover (version @value{VERSION}, @value{UPDATED}). @end menu @sp 1 -Copyright @copyright{} 2009-2017 Antonio Diaz Diaz. +Copyright @copyright{} 2009-2018 Antonio Diaz Diaz. This manual is free documentation: you have unlimited permission to copy, distribute and modify it. @@ -79,7 +79,7 @@ availability: @itemize @bullet @item The lzip format provides very safe integrity checking and some data -recovery means. The lziprecover program can repair bit-flip errors (one +recovery means. The lziprecover program can repair bit flip errors (one of the most common forms of data corruption) in lzip files, and provides data recovery capabilities, including error-checked merging of damaged copies of a file. @xref{Data safety}. @@ -111,8 +111,8 @@ the compressors in the lzip family; lzip, plzip, minilzip/lzlib, clzip and pdlzip. If the cause of file corruption is damaged media, the combination -@w{GNU ddrescue + lziprecover} is the best option for recovering data -from multiple damaged copies. @xref{ddrescue-example}, for an example. +@w{GNU ddrescue + lziprecover} is the best option for recovering data from +multiple damaged copies. @xref{ddrescue-example}, for an example. If a file is too damaged for lziprecover to repair it, all the recoverable data in all members of the file can be extracted with the @@ -139,6 +139,9 @@ undergone the process of decompression. @node Invoking lziprecover @chapter Invoking lziprecover @cindex invoking +@cindex options +@cindex usage +@cindex version The format for running lziprecover is: @@ -151,7 +154,7 @@ When decompressing or testing, @samp{-} used as a @var{file} argument means standard input. It can be mixed with other @var{files} and is read just once, the first time it appears in the command line. -Lziprecover supports the following options: +lziprecover supports the following options: @table @code @item -h @@ -191,25 +194,25 @@ lzma-alone file as follows: @itemx --stdout Write decompressed data to standard output; keep input files unchanged. This option is needed when reading from a named pipe (fifo) or from a -device. Use it also to recover as much of the uncompressed data as +device. Use it also to recover as much of the decompressed data as possible when decompressing a corrupt file. @item -d @itemx --decompress -Decompress the specified file(s). If a file does not exist or can't be -opened, lziprecover continues decompressing the rest of the files. If a -file fails to decompress, lziprecover exits immediately without +Decompress the specified files. If a file does not exist or can't be +opened, lziprecover continues decompressing the rest of the files. If a file +fails to decompress, or is a terminal, lziprecover exits immediately without decompressing the rest of the files. @item -D @var{range} @itemx --range-decompress=@var{range} Decompress only a range of bytes starting at decompressed byte position @samp{@var{begin}} and up to byte position @w{@samp{@var{end} - 1}}. -This option provides random access to the data in multimember files; it -only decompresses the members containing the desired data. In order to -guarantee the correctness of the data produced, all members containing -any part of the desired data are decompressed and their integrity is -verified. +Byte positions start at 0. This option provides random access to the +data in multimember files; it only decompresses the members containing +the desired data. In order to guarantee the correctness of the data +produced, all members containing any part of the desired data are +decompressed and their integrity is verified. Four formats of @var{range} are recognized, @samp{@var{begin}}, @samp{@var{begin}-@var{end}}, @samp{@var{begin},@var{size}}, and @@ -237,7 +240,7 @@ Keep (don't delete) input files during decompression. @item -l @itemx --list Print the uncompressed size, compressed size and percentage saved of the -specified file(s). Trailing data are ignored. The values produced are +specified files. Trailing data are ignored. The values produced are correct even for multimember files. If more than one file is given, a final line containing the cumulative sizes is printed. With @samp{-v}, the dictionary size, the number of members in the file, and the amount @@ -297,11 +300,13 @@ on the number of members in @samp{@var{file}}. @item -t @itemx --test -Check integrity of the specified file(s), but don't decompress them. -This really performs a trial decompression and throws away the result. -Use it together with @samp{-v} to see information about the file(s). If -a file fails the test, does not exist, can't be opened, or is a -terminal, lziprecover continues checking the rest of the files. +Check integrity of the specified files, but don't decompress them. This +really performs a trial decompression and throws away the result. Use it +together with @samp{-v} to see information about the files. If a file +fails the test, does not exist, can't be opened, or is a terminal, lziprecover +continues checking the rest of the files. A final diagnostic is shown at +verbosity level 1 or higher if any file fails the test when testing +multiple files. @item -v @itemx --verbose @@ -311,9 +316,43 @@ verbosity level, showing status, compression ratio, dictionary size, trailer contents (CRC, data size, member size), and up to 6 bytes of trailing data (if any) both in hexadecimal and as a string of printable ASCII characters.@* +Two or more @samp{-v} options show the progress of decompression.@* In other modes, increasing verbosity levels show final status, progress of operations, and extra information (for example, the failed areas). +@item --loose-trailing +When decompressing, testing or listing, allow trailing data whose first +bytes are so similar to the magic bytes of a lzip header that they can +be confused with a corrupt header. Use this option if a file triggers a +"corrupt header" error and the cause is not indeed a corrupt header. + +@item --dump-tdata +Dump the trailing data (if any) of one or more regular files to standard +output, or to a file if the @samp{--output} option is used. If more than +one file is given, the trailing data of all files are concatenated. If a +file does not exist, can't be opened, or is not regular, lziprecover +continues processing the rest of the files. If the dump fails in one +file, lziprecover exits immediately without processing the rest of the +files. + +@item --remove-tdata +Remove the trailing data from regular files in place. The date of each +file is preserved if possible. If the removal fails in one file, +lziprecover continues processing the rest of the files. This option may +be dangerous if the file is corrupt or if the trailing data contain a +forbidden combination of characters. @xref{Trailing data}. Verify that +@w{@samp{lzip -cd file.lz | wc -c}} and the uncompressed size shown by +@w{@samp{lzip -l file.lz}} match before attempting the removal. + +@item --strip-tdata +Copy one or more regular files to standard output (or to a file if the +@samp{--output} option is used), stripping the trailing data (if any) +from each file. If more than one file is given, the files are +concatenated. If a file does not exist, can't be opened, or is not +regular, lziprecover continues processing the rest of the files. If a +file fails to copy, lziprecover exits immediately without processing the +rest of the files. + @end table Numbers given as arguments to options may be followed by a multiplier @@ -365,12 +404,12 @@ compressed it, and stored two copies on separate media. Years later you notice that both copies are corrupt. If you compressed with gzip and both copies suffer any damage in the -data stream, even if it is just one altered bit, the original data can't -be recovered. +data stream, even if it is just one altered bit, the original data can +only be recovered by an expert, if at all. If you used bzip2, and if the file is large enough to contain more than -one compressed data block (usually larger than 900 kB uncompressed), and -if no block is damaged in both files, then the data can be manually +one compressed data block (usually larger than @w{900 kB} uncompressed), +and if no block is damaged in both files, then the data can be manually recovered by splitting the files with bzip2recover, verifying every block and then copying the right blocks in the right order into another file. @@ -391,7 +430,7 @@ Lziprecover can repair perfectly most files with small errors (up to one single-byte error per member), without the need of any extra redundance at all. If the reparation is successful, the repaired file will be identical bit for bit to the original. This makes lzip files resistant -to bit-flip, one of the most common forms of data corruption. +to bit flip, one of the most common forms of data corruption. The error may be located anywhere in the file except in the first 5 bytes of each member header or in the @samp{Member size} field of the @@ -400,9 +439,9 @@ can be easily repaired with a text editor like GNU Moe (@pxref{File format}). If the error is in the member size, it is enough to ignore the message about @samp{bad member size} when decompressing. -Bit-flip happens when one bit in the file is changed from 0 to 1 or vice +Bit flip happens when one bit in the file is changed from 0 to 1 or vice versa. It may be caused by bad RAM or even by natural radiation. I have -seen a case of bit-flip in a file stored on an USB flash drive. +seen a case of bit flip in a file stored on an USB flash drive. One byte may seem small, but most file corruptions not produced by transmission errors or I/O errors just affect one byte, or even one bit, @@ -463,7 +502,7 @@ into clusters and then merging the files as if each cluster were a single error. Here is a real case of successful merging. Two copies of the file -@samp{icecat-3.5.3-x86.tar.lz} (compressed size 9 MB) became corrupt +@samp{icecat-3.5.3-x86.tar.lz} (compressed size @w{9 MB}) became corrupt while stored on the same NAND flash device. One of the copies had 76 single-bit errors scattered in an area of 1020 bytes, and the other had 3028 such errors in an area of 31729 bytes. Lziprecover produced a @@ -592,9 +631,10 @@ padding zero bytes to a lzip file. @item Useful data added by the user; a cryptographically secure hash, a description of file contents, etc. It is safe to append any amount of -text to a lzip file as long as the text does not begin with the string -"LZIP", and does not contain any zero bytes (null characters). Nonzero -bytes and zero bytes can't be safely mixed in trailing data. +text to a lzip file as long as none of the first four bytes of the text +match the corresponding byte in the string "LZIP", and the text does not +contain any zero bytes (null characters). Nonzero bytes and zero bytes +can't be safely mixed in trailing data. @item Garbage added by some not totally successful copy operation. @@ -604,12 +644,16 @@ Malicious data added to the file in order to make its total size and hash value (for a chosen hash) coincide with those of another file. @item -In very rare cases, trailing data could be the corrupt header of another +In rare cases, trailing data could be the corrupt header of another member. In multimember or concatenated files the probability of corruption happening in the magic bytes is 5 times smaller than the probability of getting a false positive caused by the corruption of the integrity information itself. Therefore it can be considered to be below -the noise level. +the noise level. Additionally, the test used by lziprecover to discriminate +trailing data from a corrupt header has a Hamming distance (HD) of 3, +and the 3 bit flips must happen in different magic bytes for the test to +fail. In any case, the option @samp{--trailing-error} guarantees that +any corrupt header will be detected. @end itemize Trailing data are in no way part of the lzip file format, but tools @@ -621,6 +665,35 @@ that of user-added data, they are expected to be ignored. In those cases where a file containing trailing data must be rejected, the option @samp{--trailing-error} can be used. @xref{--trailing-error}. +Lziprecover facilitates the management of metadata stored as trailing +data in lzip files. See the following examples: + +@noindent +Example 1: Add a comment or description to a compressed file. + +@example +# First append the comment as trailing data to a lzip file +echo 'This file contains this and that' >> file.lz +# This command prints the comment to standard output +lziprecover --dump-tdata file.lz +# This command outputs file.lz without the comment +lziprecover --strip-tdata file.lz +# This command removes the comment from file.lz +lziprecover --remove-tdata file.lz +@end example + +@sp 1 +@noindent +Example 2: Add and verify a cryptographically secure hash. (This may be +convenient, but a separate copy of the hash must be kept in a safe place +to guarantee that both file and hash have not been maliciously replaced). + +@example +sha256sum < file.lz >> file.lz +lziprecover --strip-tdata file.lz | sha256sum -c \ + <(lziprecover --dump-tdata file.lz) +@end example + @node Examples @chapter A small tutorial with examples @@ -658,7 +731,7 @@ Do this instead @sp 1 @noindent -Example 4: Decompress @samp{file.lz} partially until 10 KiB of +Example 4: Decompress @samp{file.lz} partially until @w{10 KiB} of decompressed data are produced. @example @@ -756,7 +829,9 @@ lziprecover source directory to build it. By default, unzcrash reads the specified file and then repeatedly decompresses it, increasing 256 times each byte of the compressed data, -so as to test all possible one-byte errors. +so as to test all possible one-byte errors. Note that it may take years +or even centuries to test all possible one-byte errors in a large file +(tens of MB). If the @code{--block} option is given, unzcrash reads the specified file and then repeatedly decompresses it, setting all bytes in each @@ -801,10 +876,10 @@ See The format for running unzcrash is: @example -unzcrash [@var{options}] "lzip -tv" @var{filename}.lz +unzcrash [@var{options}] 'lzip -t' @var{file}.lz @end example -Unzcrash supports the following options: +unzcrash supports the following options: @table @code @item -h @@ -835,24 +910,34 @@ The number of N-bit errors per byte (N = 1 to 8) is: @item -B[@var{size}][,@var{value}] @itemx --block[=@var{size}][,@var{value}] -Test block errors of given @var{size} aligned to a @var{size}-byte -boundary, simulating a whole sector I/O error. Block @var{size} defaults -to 512 bytes. @var{value} defaults to 0. +Test block errors of given @var{size}, simulating a whole sector I/O +error. Block @var{size} defaults to 512 bytes. @var{value} defaults to +0. By default, only blocks aligned to a @var{size}-byte boundary are +tested, but this may be changed with the @code{--delta} option. @item -d @var{n} @itemx --delta=@var{n} -Test only one of every @var{n} bytes, blocks or truncation sizes, -instead of all of them. +Test only one byte, block, or truncation size every @var{n} bytes, +instead of all of them. If the @code{--block} option is given, @var{n} +defaults to the block size. Else @var{n} defaults to 1. Values of +@var{n} smaller than the block size will result in overlappinng blocks. +(Which is convenient for testing because there are usually too few +non-overlappinng blocks in a file). @item -e @var{position},@var{value} @itemx --set-byte=@var{position},@var{value} Set byte at @var{position} to @var{value} in the internal buffer after -reading and testing @var{filename}.lz but before the first test call to -the decompressor. If @var{value} is preceded by @samp{+}, it is added to -the original value of the byte at @var{position}. If @var{value} is -preceded by @samp{f} (flip), it is XORed with the original value of the -byte at @var{position}. This option can be used to run tests with a -changed dictionary size, for example. +reading and testing @var{file}.lz but before the first test call to the +decompressor. If @var{value} is preceded by @samp{+}, it is added to the +original value of the byte at @var{position}. If @var{value} is preceded +by @samp{f} (flip), it is XORed with the original value of the byte at +@var{position}. This option can be used to run tests with a changed +dictionary size, for example. + +@item -n +@itemx --no-verify +Skip initial verification of @var{file}.lz and @samp{zcmp}. May speed up +things a lot when testing many (or large) known good files. @item -p @var{bytes} @itemx --position=@var{bytes} diff --git a/file_index.cc b/file_index.cc index f2f81e7..b3d7d70 100644 --- a/file_index.cc +++ b/file_index.cc @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2017 Antonio Diaz Diaz. + Copyright (C) 2009-2018 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -56,8 +56,9 @@ void File_index::set_num_error( const char * const msg, unsigned long long num ) // If successful, push last member and set pos to member header. -bool File_index::skip_trailing_data( const int fd, const bool ignore_bad_ds, - long long & pos ) +bool File_index::skip_trailing_data( const int fd, long long & pos, + const bool ignore_bad_ds, + const bool ignore_trailing, const bool loose_trailing ) { enum { block_size = 16384, buffer_size = block_size + File_trailer::size - 1 + File_header::size }; @@ -92,10 +93,13 @@ bool File_index::skip_trailing_data( const int fd, const bool ignore_bad_ds, if( !header.verify_magic() || !header.verify_version() || ( !ignore_bad_ds && !isvalid_ds( dictionary_size ) ) ) continue; if( (*(File_header *)( buffer + i )).verify_prefix( bsize - i ) ) - { - error_ = "Last member in input file is truncated or corrupt."; - retval_ = 2; return false; - } + { error_ = "Last member in input file is truncated or corrupt."; + retval_ = 2; return false; } + if( !loose_trailing && bsize - i >= File_header::size && + (*(File_header *)( buffer + i )).verify_corrupt() ) + { error_ = corrupt_mm_msg; retval_ = 2; return false; } + if( !ignore_trailing ) + { error_ = trailing_msg; retval_ = 2; return false; } pos = ipos + i - member_size; member_vector.push_back( Member( 0, trailer.data_size(), pos, member_size, dictionary_size ) ); @@ -114,7 +118,7 @@ bool File_index::skip_trailing_data( const int fd, const bool ignore_bad_ds, File_index::File_index( const int infd, const bool ignore_bad_ds, - const bool ignore_trailing ) + const bool ignore_trailing, const bool loose_trailing ) : isize( lseek( infd, 0, SEEK_END ) ), retval_( 0 ) { if( isize < 0 ) @@ -145,11 +149,10 @@ File_index::File_index( const int infd, const bool ignore_bad_ds, const unsigned long long member_size = trailer.member_size(); if( member_size < min_member_size || member_size > (unsigned long long)pos ) { - if( !member_vector.empty() ) - set_num_error( "Member size in trailer is corrupt at pos ", pos - 8 ); - else if( skip_trailing_data( infd, ignore_bad_ds, pos ) ) - { if( ignore_trailing ) continue; - error_ = trailing_msg; retval_ = 2; return; } + if( member_vector.empty() ) + { if( skip_trailing_data( infd, pos, ignore_bad_ds, ignore_trailing, + loose_trailing ) ) continue; else return; } + set_num_error( "Member size in trailer is corrupt at pos ", pos - 8 ); break; } if( seek_read( infd, header.data, File_header::size, @@ -159,11 +162,10 @@ File_index::File_index( const int infd, const bool ignore_bad_ds, if( !header.verify_magic() || !header.verify_version() || ( !ignore_bad_ds && !isvalid_ds( dictionary_size ) ) ) { - if( !member_vector.empty() ) - set_num_error( "Bad header at pos ", pos - member_size ); - else if( skip_trailing_data( infd, ignore_bad_ds, pos ) ) - { if( ignore_trailing ) continue; - error_ = trailing_msg; retval_ = 2; return; } + if( member_vector.empty() ) + { if( skip_trailing_data( infd, pos, ignore_bad_ds, ignore_trailing, + loose_trailing ) ) continue; else return; } + set_num_error( "Bad header at pos ", pos - member_size ); break; } pos -= member_size; @@ -261,8 +263,8 @@ File_index::File_index( const std::vector< int > & infd_vector, } } pos -= member_size; - member_vector.push_back( Member( 0, trailer.data_size(), - pos, member_size, 0 ) ); + member_vector.push_back( Member( 0, trailer.data_size(), pos, + member_size, 0 ) ); } error: if( pos != 0 || member_vector.empty() ) diff --git a/file_index.h b/file_index.h index 71e9852..da374ae 100644 --- a/file_index.h +++ b/file_index.h @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2017 Antonio Diaz Diaz. + Copyright (C) 2009-2018 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -37,13 +37,14 @@ class File_index void set_errno_error( const char * const msg ); void set_num_error( const char * const msg, unsigned long long num ); - bool skip_trailing_data( const int fd, const bool ignore_bad_ds, - long long & pos ); + bool skip_trailing_data( const int fd, long long & pos, + const bool ignore_bad_ds, + const bool ignore_trailing, const bool loose_trailing ); public: File_index() : error_( "No index" ), isize( 0 ), retval_( 2 ) {} File_index( const int infd, const bool ignore_bad_ds, - const bool ignore_trailing ); + const bool ignore_trailing, const bool loose_trailing ); File_index( const std::vector< int > & infd_vector, const long long fsize ); long members() const { return member_vector.size(); } @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2017 Antonio Diaz Diaz. + Copyright (C) 2009-2018 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -38,7 +38,7 @@ void list_line( const unsigned long long uncomp_size, { if( uncomp_size > 0 ) std::printf( "%15llu %15llu %6.2f%% %s\n", uncomp_size, comp_size, - 100.0 * ( 1.0 - ( (double)comp_size / uncomp_size ) ), + 100.0 - ( ( 100.0 * comp_size ) / uncomp_size ), input_filename ); else std::printf( "%15llu %15llu -INF%% %s\n", uncomp_size, comp_size, @@ -49,7 +49,7 @@ void list_line( const unsigned long long uncomp_size, int list_files( const std::vector< std::string > & filenames, - const int verbosity, const bool ignore_trailing ) + const bool ignore_trailing, const bool loose_trailing ) { unsigned long long total_comp = 0, total_uncomp = 0; int files = 0, retval = 0; @@ -66,7 +66,7 @@ int list_files( const std::vector< std::string > & filenames, open_instream( input_filename, &in_stats, true, true ); if( infd < 0 ) { if( retval < 1 ) retval = 1; continue; } - const File_index file_index( infd, false, ignore_trailing ); + const File_index file_index( infd, false, ignore_trailing, loose_trailing ); close( infd ); if( file_index.retval() != 0 ) { @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2017 Antonio Diaz Diaz. + Copyright (C) 2009-2018 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -100,20 +100,22 @@ struct Len_model }; -class Pretty_print +// defined in main.cc +extern int verbosity; + +class Pretty_print // requires global var 'int verbosity' { std::string name_; + std::string padded_name; const char * const stdin_name; unsigned longest_name; - const int verbosity_; mutable bool first_post; public: - Pretty_print( const std::vector< std::string > & filenames, const int v ) - : stdin_name( "(stdin)" ), longest_name( 0 ), verbosity_( v ), - first_post( false ) + Pretty_print( const std::vector< std::string > & filenames ) + : stdin_name( "(stdin)" ), longest_name( 0 ), first_post( false ) { - if( verbosity_ <= 0 ) return; + if( verbosity <= 0 ) return; const unsigned stdin_name_len = std::strlen( stdin_name ); for( unsigned i = 0; i < filenames.size(); ++i ) { @@ -124,8 +126,8 @@ public: if( longest_name == 0 ) longest_name = stdin_name_len; } - Pretty_print( const std::string & filename, const int v ) - : stdin_name( "(stdin)" ), verbosity_( v ), first_post( false ) + Pretty_print( const std::string & filename ) + : stdin_name( "(stdin)" ), first_post( false ) { const unsigned stdin_name_len = std::strlen( stdin_name ); longest_name = ( filename == "-" ) ? stdin_name_len : filename.size(); @@ -137,12 +139,14 @@ public: { if( filename.size() && filename != "-" ) name_ = filename; else name_ = stdin_name; + padded_name = " "; padded_name += name_; padded_name += ": "; + if( name_.size() < longest_name ) + padded_name.append( longest_name - name_.size(), ' ' ); first_post = true; } void reset() const { if( name_.size() ) first_post = true; } const char * name() const { return name_.c_str(); } - int verbosity() const { return verbosity_; } void operator()( const char * const msg = 0, FILE * const f = stderr ) const; }; @@ -206,11 +210,19 @@ struct File_header void set_magic() { std::memcpy( data, magic_string, 4 ); data[4] = 1; } bool verify_magic() const { return ( std::memcmp( data, magic_string, 4 ) == 0 ); } - bool verify_prefix( const int size ) const // detect truncated header + + bool verify_prefix( const int sz ) const // detect (truncated) header { - for( int i = 0; i < size && i < 4; ++i ) + for( int i = 0; i < sz && i < 4; ++i ) if( data[i] != magic_string[i] ) return false; - return ( size > 0 ); + return ( sz > 0 ); + } + bool verify_corrupt() const // detect corrupt header + { + int matches = 0; + for( int i = 0; i < 4; ++i ) + if( data[i] == magic_string[i] ) ++matches; + return ( matches > 1 && matches < 4 ); } uint8_t version() const { return data[4]; } @@ -312,6 +324,7 @@ inline unsigned long long positive_diff( const unsigned long long x, const char * const bad_magic_msg = "Bad magic number (file not in lzip format)."; const char * const bad_dict_msg = "Invalid dictionary size in member header."; +const char * const corrupt_mm_msg = "Corrupt header in multimember file."; const char * const trailing_msg = "Trailing data not allowed."; // defined in alone_to_lz.cc @@ -327,7 +340,7 @@ int seek_read( const int fd, uint8_t * const buf, const int size, // defined in list.cc int list_files( const std::vector< std::string > & filenames, - const int verbosity, const bool ignore_trailing ); + const bool ignore_trailing, const bool loose_trailing ); // defined in main.cc extern std::string output_filename; // global vars for output file @@ -335,9 +348,11 @@ extern int outfd; struct stat; const char * bad_version( const unsigned version ); const char * format_ds( const unsigned dictionary_size ); -void show_header( const unsigned dictionary_size, const int vlevel = 3 ); +void show_header( const unsigned dictionary_size ); int open_instream( const char * const name, struct stat * const in_statsp, const bool no_ofile, const bool reg_only = false ); +int open_truncable_stream( const char * const name, + struct stat * const in_statsp ); bool open_outstream( const bool force, const bool from_stdin, const bool rw = false, const bool skipping = true ); bool file_exists( const std::string & filename ); @@ -353,6 +368,11 @@ void show_error2( const char * const msg1, const char * const name, const char * const msg2 ); void show_error4( const char * const msg1, const char * const name1, const char * const name2, const char * const msg2 ); +class Range_decoder; +void show_dprogress( const unsigned long long cfile_size = 0, + const unsigned long long partial_size = 0, + const Range_decoder * const d = 0, + const Pretty_print * const p = 0 ); // defined in merge.cc bool copy_file( const int infd, const int outfd, @@ -361,7 +381,7 @@ bool test_member_from_file( const int infd, const unsigned long long msize, long long * const failure_posp = 0 ); int merge_files( const std::vector< std::string > & filenames, const std::string & default_output_filename, - const int verbosity, const bool force ); + const bool force ); // defined in range_dec.cc bool safe_seek( const int fd, const long long pos ); @@ -369,15 +389,20 @@ bool safe_seek( const int fd, const long long pos ); // defined in repair.cc int repair_file( const std::string & input_filename, const std::string & default_output_filename, - const int verbosity, const bool force ); + const bool force ); int debug_repair( const std::string & input_filename, - const Bad_byte & bad_byte, const int verbosity ); + const Bad_byte & bad_byte ); int debug_decompress( const std::string & input_filename, - const Bad_byte & bad_byte, const int verbosity, - const bool show_packets ); + const Bad_byte & bad_byte, const bool show_packets ); // defined in split.cc bool verify_header( const File_header & header, const Pretty_print & pp ); int split_file( const std::string & input_filename, - const std::string & default_output_filename, - const int verbosity, const bool force ); + const std::string & default_output_filename, const bool force ); + +// defined in trailing_data.cc +int dump_tdata( const std::vector< std::string > & filenames, + const std::string & default_output_filename, const bool force, + const bool strip, const bool loose_trailing ); +int remove_tdata( const std::vector< std::string > & filenames, + const bool loose_trailing ); @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2017 Antonio Diaz Diaz. + Copyright (C) 2009-2018 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -66,6 +66,7 @@ #error "Environments where CHAR_BIT != 8 are not supported." #endif +int verbosity = 0; std::string output_filename; // global vars for output file int outfd = -1; @@ -73,7 +74,6 @@ namespace { const char * const Program_name = "Lziprecover"; const char * const program_name = "lziprecover"; -const char * const program_year = "2017"; const char * invocation_name = 0; const struct { const char * from; const char * to; } known_extensions[] = { @@ -82,10 +82,10 @@ const struct { const char * from; const char * to; } known_extensions[] = { { 0, 0 } }; enum Mode { m_none, m_alone_to_lz, m_debug_decompress, m_debug_delay, - m_debug_repair, m_decompress, m_list, m_merge, m_range_dec, - m_repair, m_show_packets, m_split, m_test }; + m_debug_repair, m_decompress, m_dump_tdata, m_list, m_merge, + m_range_dec, m_remove_tdata, m_repair, m_show_packets, m_split, + m_strip_tdata, m_test }; -int verbosity = 0; bool delete_output_on_interrupt = false; @@ -101,6 +101,8 @@ void show_help() "files and test integrity of files.\n" "\nLziprecover provides random access to the data in multimember files; it\n" "only decompresses the members containing the desired data.\n" + "\nLziprecover facilitates the management of metadata stored as trailing\n" + "data in lzip files.\n" "\nLziprecover is not a replacement for regular backups, but a last line of\n" "defense for the case where the backups are also damaged.\n" "\nUsage: %s [options] [files]\n", invocation_name ); @@ -122,7 +124,11 @@ void show_help() " -R, --repair try to repair a small error in file\n" " -s, --split split multimember file in single-member files\n" " -t, --test test compressed file integrity\n" - " -v, --verbose be verbose (a 2nd -v gives more)\n" ); + " -v, --verbose be verbose (a 2nd -v gives more)\n" + " --loose-trailing allow trailing data seeming corrupt header\n" + " --dump-tdata dump trailing data to standard output\n" + " --remove-tdata remove trailing data from files in place\n" + " --strip-tdata copy files to stdout without trailing data\n" ); if( verbosity >= 1 ) { std::printf( " -W, --debug-decompress=<pos>,<val> set pos to val and decompress to stdout\n" @@ -142,17 +148,22 @@ void show_help() "Lziprecover home page: http://www.nongnu.org/lzip/lziprecover.html\n" ); } +} // end namespace -void show_version() +void Pretty_print::operator()( const char * const msg, FILE * const f ) const { - std::printf( "%s %s\n", program_name, PROGVERSION ); - std::printf( "Copyright (C) %s Antonio Diaz Diaz.\n", program_year ); - std::printf( "License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>\n" - "This is free software: you are free to change and redistribute it.\n" - "There is NO WARRANTY, to the extent permitted by law.\n" ); + if( verbosity >= 0 ) + { + if( first_post ) + { + first_post = false; + std::fputs( padded_name.c_str(), f ); + if( !msg ) std::fflush( f ); + } + if( msg ) std::fprintf( f, "%s\n", msg ); + } } -} // end namespace const char * bad_version( const unsigned version ) { @@ -182,70 +193,16 @@ const char * format_ds( const unsigned dictionary_size ) } -void show_header( const unsigned dictionary_size, const int vlevel ) +void show_header( const unsigned dictionary_size ) { - if( verbosity >= vlevel ) - std::fprintf( stderr, "dictionary %s. ", format_ds( dictionary_size ) ); + std::fprintf( stderr, "dictionary %s, ", format_ds( dictionary_size ) ); } -namespace { -long long getnum( const char * const ptr, const long long llimit = -LLONG_MAX, - const long long ulimit = LLONG_MAX, - const char ** const tailp = 0 ) - { - char * tail; - errno = 0; - long long result = strtoll( ptr, &tail, 0 ); - if( tail == ptr ) - { - show_error( "Bad or missing numerical argument.", 0, true ); - std::exit( 1 ); - } +#include "main_common.cc" - if( !errno && tail[0] ) - { - char * const p = tail++; - int factor; - bool bsuf; // 'B' suffix is present - if( tail[0] == 'i' ) { ++tail; factor = 1024; } else factor = 1000; - if( tail[0] == 'B' ) { ++tail; bsuf = true; } else bsuf = false; - int exponent = -1; // -1 = bad multiplier - switch( *p ) - { - case 'Y': exponent = 8; break; - case 'Z': exponent = 7; break; - case 'E': exponent = 6; break; - case 'P': exponent = 5; break; - case 'T': exponent = 4; break; - case 'G': exponent = 3; break; - case 'M': exponent = 2; break; - case 'K': if( factor == 1024 ) exponent = 1; break; - case 'k': if( factor == 1000 ) exponent = 1; break; - case 'B': if( factor == 1000 && !bsuf ) exponent = 0; break; - default : if( tailp ) { tail = p; exponent = 0; } break; - } - if( exponent < 0 ) - { - show_error( "Bad multiplier in numerical argument.", 0, true ); - std::exit( 1 ); - } - for( int i = 0; i < exponent; ++i ) - { - if( LLONG_MAX / factor >= std::labs( result ) ) result *= factor; - else { errno = ERANGE; break; } - } - } - if( !errno && ( result < llimit || result > ulimit ) ) errno = ERANGE; - if( errno ) - { - show_error( "Numerical argument out of limits." ); - std::exit( 1 ); - } - if( tailp ) *tailp = tail; - return result; - } +namespace { // Recognized formats: <begin> <begin>-<end> <begin>,<size> ,<size> // @@ -253,13 +210,13 @@ void parse_range( const char * const ptr, Block & range ) { const char * tail = ptr; long long value = - ( ptr[0] == ',' ) ? 0 : getnum( ptr, 0, INT64_MAX - 1, &tail ); + ( ptr[0] == ',' ) ? 0 : getnum( ptr, 0, 0, INT64_MAX - 1, &tail ); if( tail[0] == 0 || tail[0] == ',' || tail[0] == '-' ) { range.pos( value ); if( tail[0] == 0 ) { range.size( INT64_MAX - value ); return; } const bool issize = ( tail[0] == ',' ); - value = getnum( tail + 1, 1, INT64_MAX ); // size + value = getnum( tail + 1, 0, 1, INT64_MAX ); // size if( issize || value > range.pos() ) { if( !issize ) value -= range.pos(); @@ -276,7 +233,7 @@ void parse_range( const char * const ptr, Block & range ) void parse_pos_value( const char * const ptr, Bad_byte & bad_byte ) { const char * tail; - bad_byte.pos = getnum( ptr, 0, INT64_MAX, &tail ); + bad_byte.pos = getnum( ptr, 0, 0, INT64_MAX, &tail ); if( tail[0] != ',' ) { show_error( "Bad separator between <pos> and <val>.", 0, true ); @@ -285,7 +242,7 @@ void parse_pos_value( const char * const ptr, Bad_byte & bad_byte ) if( tail[1] == '+' ) { ++tail; bad_byte.mode = Bad_byte::delta; } else if( tail[1] == 'f' ) { ++tail; bad_byte.mode = Bad_byte::flip; } else bad_byte.mode = Bad_byte::literal; - bad_byte.value = getnum( tail + 1, 0, 255 ); + bad_byte.value = getnum( tail + 1, 0, 0, 255 ); } @@ -322,6 +279,37 @@ int extension_index( const std::string & name ) return -1; } + +void set_a_outname( const std::string & name ) + { + output_filename = name; + if( name.size() > 5 && name.compare( name.size() - 5, 5, ".lzma" ) == 0 ) + output_filename.erase( name.size() - 2 ); + else if( name.size() > 4 && name.compare( name.size() - 4, 4, ".tlz" ) == 0 ) + output_filename.insert( name.size() - 2, "ar." ); + else if( name.size() <= 3 || name.compare( name.size() - 3, 3, ".lz" ) != 0 ) + output_filename += known_extensions[0].from; + } + + +void set_d_outname( const std::string & name, const int eindex ) + { + if( eindex >= 0 ) + { + const std::string from( known_extensions[eindex].from ); + if( name.size() > from.size() ) + { + output_filename.assign( name, 0, name.size() - from.size() ); + output_filename += known_extensions[eindex].to; + return; + } + } + output_filename = name; output_filename += ".out"; + if( verbosity >= 1 ) + std::fprintf( stderr, "%s: Can't guess original name for '%s' -- using '%s'\n", + program_name, name.c_str(), output_filename.c_str() ); + } + } // end namespace int open_instream( const char * const name, struct stat * const in_statsp, @@ -351,39 +339,29 @@ int open_instream( const char * const name, struct stat * const in_statsp, return infd; } -namespace { -void set_a_outname( const std::string & name ) +int open_truncable_stream( const char * const name, + struct stat * const in_statsp ) { - output_filename = name; - if( name.size() > 5 && name.compare( name.size() - 5, 5, ".lzma" ) == 0 ) - output_filename.erase( name.size() - 2 ); - else if( name.size() > 4 && name.compare( name.size() - 4, 4, ".tlz" ) == 0 ) - output_filename.insert( name.size() - 2, "ar." ); - else if( name.size() <= 3 || name.compare( name.size() - 3, 3, ".lz" ) != 0 ) - output_filename += known_extensions[0].from; - } - - -void set_d_outname( const std::string & name, const int eindex ) - { - if( eindex >= 0 ) + int infd = open( name, O_RDWR | O_BINARY ); + if( infd < 0 ) + show_file_error( name, "Can't open input file", errno ); + else { - const std::string from( known_extensions[eindex].from ); - if( name.size() > from.size() ) + const int i = fstat( infd, in_statsp ); + const mode_t mode = in_statsp->st_mode; + if( i != 0 || !S_ISREG( mode ) ) { - output_filename.assign( name, 0, name.size() - from.size() ); - output_filename += known_extensions[eindex].to; - return; + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: File '%s' is not a regular file.\n", + program_name, name ); + close( infd ); + infd = -1; } } - output_filename = name; output_filename += ".out"; - if( verbosity >= 1 ) - std::fprintf( stderr, "%s: Can't guess original name for '%s' -- using '%s'\n", - program_name, name.c_str(), output_filename.c_str() ); + return infd; } -} // end namespace bool open_outstream( const bool force, const bool from_stdin, const bool rw, const bool skipping ) @@ -504,9 +482,9 @@ unsigned char xdigit( const unsigned value ) bool show_trailing_data( const uint8_t * const data, const int size, const Pretty_print & pp, const bool all, - const bool ignore_trailing ) + const int ignore_trailing ) // -1 = show { - if( verbosity >= 4 || !ignore_trailing ) + if( verbosity >= 4 || ignore_trailing <= 0 ) { std::string msg; if( !all ) msg = "first bytes of "; @@ -522,14 +500,15 @@ bool show_trailing_data( const uint8_t * const data, const int size, { if( std::isprint( data[i] ) ) msg += data[i]; else msg += '.'; } msg += '\''; pp( msg.c_str() ); - if( !ignore_trailing ) show_file_error( pp.name(), trailing_msg ); + if( ignore_trailing == 0 ) show_file_error( pp.name(), trailing_msg ); } - return ignore_trailing; + return ( ignore_trailing > 0 ); } -int decompress( const int infd, const Pretty_print & pp, - const bool ignore_trailing, const bool testing ) +int decompress( const unsigned long long cfile_size, const int infd, + const Pretty_print & pp, const bool ignore_trailing, + const bool loose_trailing, const bool testing ) { int retval = 0; @@ -543,8 +522,13 @@ int decompress( const int infd, const Pretty_print & pp, const int size = rdec.read_data( header.data, File_header::size ); if( rdec.finished() ) // End Of File { - if( first_member || header.verify_prefix( size ) ) - { pp( "File ends unexpectedly at member header." ); retval = 2; } + if( first_member ) + { show_file_error( pp.name(), "File ends unexpectedly at member header." ); + retval = 2; } + else if( header.verify_prefix( size ) ) + { pp( "Truncated header in multimember file." ); + show_trailing_data( header.data, size, pp, true, -1 ); + retval = 2; } else if( size > 0 && !show_trailing_data( header.data, size, pp, true, ignore_trailing ) ) retval = 2; @@ -554,6 +538,10 @@ int decompress( const int infd, const Pretty_print & pp, { if( first_member ) { show_file_error( pp.name(), bad_magic_msg ); retval = 2; } + else if( !loose_trailing && header.verify_corrupt() ) + { pp( corrupt_mm_msg ); + show_trailing_data( header.data, size, pp, false, -1 ); + retval = 2; } else if( !show_trailing_data( header.data, size, pp, false, ignore_trailing ) ) retval = 2; break; @@ -564,10 +552,10 @@ int decompress( const int infd, const Pretty_print & pp, if( !isvalid_ds( dictionary_size ) ) { pp( bad_dict_msg ); retval = 2; break; } - if( verbosity >= 2 || ( verbosity == 1 && first_member ) ) - { pp(); show_header( dictionary_size ); } + if( verbosity >= 2 || ( verbosity == 1 && first_member ) ) pp(); LZ_decoder decoder( rdec, dictionary_size, outfd ); + show_dprogress( cfile_size, partial_file_pos, &rdec, &pp ); // init const int result = decoder.decode_member( pp ); partial_file_pos += rdec.member_position(); if( result != 0 ) @@ -615,7 +603,7 @@ int close_outstream( const struct stat * const in_statsp ) if( delete_output_on_interrupt ) close_and_set_permissions( in_statsp ); if( outfd >= 0 && close( outfd ) != 0 ) - { show_error( "Can't close stdout", errno ); return 1; } + { show_error( "Error closing stdout", errno ); return 1; } outfd = -1; return 0; } @@ -634,21 +622,6 @@ std::string insert_fixed( std::string name ) } -void show_error( const char * const msg, const int errcode, const bool help ) - { - if( verbosity < 0 ) return; - if( msg && msg[0] ) - { - std::fprintf( stderr, "%s: %s", program_name, msg ); - if( errcode > 0 ) std::fprintf( stderr, ": %s", std::strerror( errcode ) ); - std::fputc( '\n', stderr ); - } - if( help ) - std::fprintf( stderr, "Try '%s --help' for more information.\n", - invocation_name ); - } - - void show_file_error( const char * const filename, const char * const msg, const int errcode ) { @@ -659,14 +632,6 @@ void show_file_error( const char * const filename, const char * const msg, } -void internal_error( const char * const msg ) - { - if( verbosity >= 0 ) - std::fprintf( stderr, "%s: internal error: %s\n", program_name, msg ); - std::exit( 3 ); - } - - void show_error2( const char * const msg1, const char * const name, const char * const msg2 ) { @@ -684,47 +649,83 @@ void show_error4( const char * const msg1, const char * const name1, } +void show_dprogress( const unsigned long long cfile_size, + const unsigned long long partial_size, + const Range_decoder * const d, + const Pretty_print * const p ) + { + static unsigned long long csize = 0; // file_size / 100 + static unsigned long long psize = 0; + static const Range_decoder * rdec = 0; + static const Pretty_print * pp = 0; + static int counter = 0; + static bool enabled = true; + + if( !enabled ) return; + if( p ) // initialize static vars + { + if( verbosity < 2 || !isatty( STDERR_FILENO ) ) { enabled = false; return; } + csize = cfile_size; psize = partial_size; rdec = d; pp = p; counter = 0; + } + if( rdec && pp && --counter <= 0 ) + { + const unsigned long long pos = psize + rdec->member_position(); + counter = 7; // update display every 114688 bytes + if( csize > 0 ) + std::fprintf( stderr, "%4llu%% %.1f MB\r", pos / csize, pos / 1000000.0 ); + else + std::fprintf( stderr, " %.1f MB\r", pos / 1000000.0 ); + pp->reset(); (*pp)(); // restore cursor position + } + } + + int main( const int argc, const char * const argv[] ) { Block range( 0, 0 ); Bad_byte bad_byte; std::string default_output_filename; std::vector< std::string > filenames; - int infd = -1; Mode program_mode = m_none; bool force = false; bool ignore_errors = false; bool ignore_trailing = true; bool keep_input_files = false; + bool loose_trailing = false; bool to_stdout = false; invocation_name = argv[0]; + enum { opt_dtd = 256, opt_lt, opt_rtd, opt_std }; const Arg_parser::Option options[] = { - { 'a', "trailing-error", Arg_parser::no }, - { 'A', "alone-to-lz", Arg_parser::no }, - { 'c', "stdout", Arg_parser::no }, - { 'd', "decompress", Arg_parser::no }, - { 'D', "range-decompress", Arg_parser::yes }, - { 'f', "force", Arg_parser::no }, - { 'h', "help", Arg_parser::no }, - { 'i', "ignore-errors", Arg_parser::no }, - { 'k', "keep", Arg_parser::no }, - { 'l', "list", Arg_parser::no }, - { 'm', "merge", Arg_parser::no }, - { 'n', "threads", Arg_parser::yes }, - { 'o', "output", Arg_parser::yes }, - { 'q', "quiet", Arg_parser::no }, - { 'R', "repair", Arg_parser::no }, - { 's', "split", Arg_parser::no }, - { 't', "test", Arg_parser::no }, - { 'v', "verbose", Arg_parser::no }, - { 'V', "version", Arg_parser::no }, - { 'W', "debug-decompress", Arg_parser::yes }, - { 'X', "show-packets", Arg_parser::maybe }, - { 'Y', "debug-delay", Arg_parser::yes }, - { 'Z', "debug-repair", Arg_parser::yes }, - { 0 , 0, Arg_parser::no } }; + { 'a', "trailing-error", Arg_parser::no }, + { 'A', "alone-to-lz", Arg_parser::no }, + { 'c', "stdout", Arg_parser::no }, + { 'd', "decompress", Arg_parser::no }, + { 'D', "range-decompress", Arg_parser::yes }, + { 'f', "force", Arg_parser::no }, + { 'h', "help", Arg_parser::no }, + { 'i', "ignore-errors", Arg_parser::no }, + { 'k', "keep", Arg_parser::no }, + { 'l', "list", Arg_parser::no }, + { 'm', "merge", Arg_parser::no }, + { 'n', "threads", Arg_parser::yes }, + { 'o', "output", Arg_parser::yes }, + { 'q', "quiet", Arg_parser::no }, + { 'R', "repair", Arg_parser::no }, + { 's', "split", Arg_parser::no }, + { 't', "test", Arg_parser::no }, + { 'v', "verbose", Arg_parser::no }, + { 'V', "version", Arg_parser::no }, + { 'W', "debug-decompress", Arg_parser::yes }, + { 'X', "show-packets", Arg_parser::maybe }, + { 'Y', "debug-delay", Arg_parser::yes }, + { 'Z', "debug-repair", Arg_parser::yes }, + { opt_dtd, "dump-tdata", Arg_parser::no }, + { opt_lt, "loose-trailing", Arg_parser::no }, + { opt_rtd, "remove-tdata", Arg_parser::no }, + { opt_std, "strip-tdata", Arg_parser::no }, + { 0 , 0, Arg_parser::no } }; const Arg_parser parser( argc, argv, options ); if( parser.error().size() ) // bad option @@ -767,6 +768,10 @@ int main( const int argc, const char * const argv[] ) parse_range( arg, range ); break; case 'Z': set_mode( program_mode, m_debug_repair ); parse_pos_value( arg, bad_byte ); break; + case opt_dtd: set_mode( program_mode, m_dump_tdata ); break; + case opt_lt: loose_trailing = true; break; + case opt_rtd: set_mode( program_mode, m_remove_tdata ); break; + case opt_std: set_mode( program_mode, m_strip_tdata ); break; default : internal_error( "uncaught option." ); } } // end process options @@ -796,37 +801,48 @@ int main( const int argc, const char * const argv[] ) case m_alone_to_lz: break; case m_debug_decompress: one_file( filenames.size() ); - return debug_decompress( filenames[0], bad_byte, verbosity, false ); + return debug_decompress( filenames[0], bad_byte, false ); case m_debug_delay: one_file( filenames.size() ); - return debug_delay( filenames[0], range, verbosity ); + return debug_delay( filenames[0], range ); case m_debug_repair: one_file( filenames.size() ); - return debug_repair( filenames[0], bad_byte, verbosity ); + return debug_repair( filenames[0], bad_byte ); case m_decompress: break; + case m_dump_tdata: + case m_strip_tdata: + if( filenames.size() < 1 ) + { show_error( "You must specify at least 1 file.", 0, true ); return 1; } + if( default_output_filename.size() ) set_signals(); + return dump_tdata( filenames, default_output_filename, force, + program_mode == m_strip_tdata, loose_trailing ); case m_list: break; case m_merge: if( filenames.size() < 2 ) { show_error( "You must specify at least 2 files.", 0, true ); return 1; } set_signals(); - return merge_files( filenames, default_output_filename, verbosity, force ); + return merge_files( filenames, default_output_filename, force ); case m_range_dec: one_file( filenames.size() ); set_signals(); return range_decompress( filenames[0], default_output_filename, range, - verbosity, force, ignore_errors, ignore_trailing, to_stdout ); + force, ignore_errors, ignore_trailing, + loose_trailing, to_stdout ); + case m_remove_tdata: + if( filenames.size() < 1 ) + { show_error( "You must specify at least 1 file.", 0, true ); return 1; } + return remove_tdata( filenames, loose_trailing ); case m_repair: one_file( filenames.size() ); set_signals(); - return repair_file( filenames[0], default_output_filename, verbosity, - force ); + return repair_file( filenames[0], default_output_filename, force ); case m_show_packets: one_file( filenames.size() ); - return debug_decompress( filenames[0], bad_byte, verbosity, true ); + return debug_decompress( filenames[0], bad_byte, true ); case m_split: one_file( filenames.size() ); set_signals(); - return split_file( filenames[0], default_output_filename, verbosity, force ); + return split_file( filenames[0], default_output_filename, force ); case m_test: break; } } @@ -837,7 +853,7 @@ int main( const int argc, const char * const argv[] ) if( filenames.empty() ) filenames.push_back("-"); if( program_mode == m_list ) - return list_files( filenames, verbosity, ignore_trailing ); + return list_files( filenames, ignore_trailing, loose_trailing ); if( program_mode == m_test ) outfd = -1; @@ -848,13 +864,15 @@ int main( const int argc, const char * const argv[] ) ( filenames_given || default_output_filename.size() ) ) set_signals(); - Pretty_print pp( filenames, verbosity ); + Pretty_print pp( filenames ); + int failed_tests = 0; int retval = 0; bool stdin_used = false; for( unsigned i = 0; i < filenames.size(); ++i ) { std::string input_filename; + int infd; struct stat in_stats; output_filename.clear(); @@ -875,7 +893,7 @@ int main( const int argc, const char * const argv[] ) if( !open_outstream( force, true ) ) { if( retval < 1 ) retval = 1; - close( infd ); infd = -1; + close( infd ); continue; } } @@ -898,7 +916,7 @@ int main( const int argc, const char * const argv[] ) if( !open_outstream( force, false ) ) { if( retval < 1 ) retval = 1; - close( infd ); infd = -1; + close( infd ); continue; } } @@ -909,32 +927,42 @@ int main( const int argc, const char * const argv[] ) if( !check_tty( pp.name(), infd, program_mode ) ) { if( retval < 1 ) retval = 1; - if( program_mode == m_test ) { close( infd ); infd = -1; continue; } + if( program_mode == m_test ) { close( infd ); continue; } cleanup_and_fail( retval ); } const struct stat * const in_statsp = input_filename.size() ? &in_stats : 0; + const unsigned long long cfile_size = + ( in_statsp && S_ISREG( in_statsp->st_mode ) ) ? + ( in_statsp->st_size + 99 ) / 100 : 0; int tmp; if( program_mode == m_alone_to_lz ) tmp = alone_to_lz( infd, pp ); else - tmp = decompress( infd, pp, ignore_trailing, program_mode == m_test ); + tmp = decompress( cfile_size, infd, pp, ignore_trailing, + loose_trailing, program_mode == m_test ); if( tmp > retval ) retval = tmp; - if( tmp && program_mode != m_test ) cleanup_and_fail( retval ); + if( tmp ) + { if( program_mode != m_test ) cleanup_and_fail( retval ); + else ++failed_tests; } if( delete_output_on_interrupt ) close_and_set_permissions( in_statsp ); if( input_filename.size() ) { - close( infd ); infd = -1; + close( infd ); if( !keep_input_files && !to_stdout && program_mode != m_test ) std::remove( input_filename.c_str() ); } } if( outfd >= 0 && close( outfd ) != 0 ) { - show_error( "Can't close stdout", errno ); + show_error( "Error closing stdout", errno ); if( retval < 1 ) retval = 1; } + if( failed_tests > 0 && verbosity >= 1 && filenames.size() > 1 ) + std::fprintf( stderr, "%s: warning: %d %s failed the test.\n", + program_name, failed_tests, + ( failed_tests == 1 ) ? "file" : "files" ); return retval; } diff --git a/main_common.cc b/main_common.cc new file mode 100644 index 0000000..3b9f677 --- /dev/null +++ b/main_common.cc @@ -0,0 +1,123 @@ +/* Lziprecover - Data recovery tool for the lzip format + Copyright (C) 2009-2018 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +namespace { + +const char * const program_year = "2018"; + + +void show_version() + { + std::printf( "%s %s\n", program_name, PROGVERSION ); + std::printf( "Copyright (C) %s Antonio Diaz Diaz.\n", program_year ); + std::printf( "License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>\n" + "This is free software: you are free to change and redistribute it.\n" + "There is NO WARRANTY, to the extent permitted by law.\n" ); + } + + +// Recognized formats: <num>[YZEPTGM][i][Bs], <num>k[Bs], <num>Ki[Bs] +// +long long getnum( const char * const ptr, const int hardbs, + const long long llimit = -LLONG_MAX, + const long long ulimit = LLONG_MAX, + const char ** const tailp = 0 ) + { + char * tail; + errno = 0; + long long result = strtoll( ptr, &tail, 0 ); + if( tail == ptr ) + { + show_error( "Bad or missing numerical argument.", 0, true ); + std::exit( 1 ); + } + + if( !errno && tail[0] ) + { + char * const p = tail++; + int factor = 1000; // default factor + int exponent = -1; // -1 = bad multiplier + char usuf = 0; // 'B' or 's' unit suffix is present + switch( *p ) + { + case 'Y': exponent = 8; break; + case 'Z': exponent = 7; break; + case 'E': exponent = 6; break; + case 'P': exponent = 5; break; + case 'T': exponent = 4; break; + case 'G': exponent = 3; break; + case 'M': exponent = 2; break; + case 'K': if( tail[0] == 'i' ) { ++tail; factor = 1024; exponent = 1; } break; + case 'k': if( tail[0] != 'i' ) exponent = 1; break; + case 'B': + case 's': usuf = *p; exponent = 0; break; + default : if( tailp ) { tail = p; exponent = 0; } break; + } + if( exponent > 1 && tail[0] == 'i' ) { ++tail; factor = 1024; } + if( exponent > 0 && usuf == 0 && ( tail[0] == 'B' || tail[0] == 's' ) ) + { usuf = tail[0]; ++tail; } + if( exponent < 0 || ( usuf == 's' && hardbs <= 0 ) || + ( !tailp && tail[0] != 0 ) ) + { + show_error( "Bad multiplier in numerical argument.", 0, true ); + std::exit( 1 ); + } + for( int i = 0; i < exponent; ++i ) + { + if( LLONG_MAX / factor >= llabs( result ) ) result *= factor; + else { errno = ERANGE; break; } + } + if( usuf == 's' ) + { + if( LLONG_MAX / hardbs >= llabs( result ) ) result *= hardbs; + else errno = ERANGE; + } + } + if( !errno && ( result < llimit || result > ulimit ) ) errno = ERANGE; + if( errno ) + { + show_error( "Numerical argument out of limits." ); + std::exit( 1 ); + } + if( tailp ) *tailp = tail; + return result; + } + +} // end namespace + + +void show_error( const char * const msg, const int errcode, const bool help ) + { + if( verbosity < 0 ) return; + if( msg && msg[0] ) + { + std::fprintf( stderr, "%s: %s", program_name, msg ); + if( errcode > 0 ) std::fprintf( stderr, ": %s", std::strerror( errcode ) ); + std::fputc( '\n', stderr ); + } + if( help ) + std::fprintf( stderr, "Try '%s --help' for more information.\n", + invocation_name ); + } + + +void internal_error( const char * const msg ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: internal error: %s\n", program_name, msg ); + std::exit( 3 ); + } @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2017 Antonio Diaz Diaz. + Copyright (C) 2009-2018 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -201,8 +201,7 @@ long ipow( const unsigned base, const unsigned exponent ) int open_input_files( const std::vector< std::string > & filenames, std::vector< int > & infd_vector, - File_index & file_index, struct stat * const in_statsp, - const int verbosity ) + File_index & file_index, struct stat * const in_statsp ) { const int files = filenames.size(); for( int i = 0; i + 1 < files; ++i ) @@ -231,7 +230,7 @@ int open_input_files( const std::vector< std::string > & filenames, for( int i = 0; i < files; ++i ) { long long tmp; - const File_index fi( infd_vector[i], true, true ); + const File_index fi( infd_vector[i], true, true, true ); if( fi.retval() == 0 ) // file format is intact { if( good_fi < 0 ) { good_fi = i; file_index = fi; } @@ -290,8 +289,7 @@ int open_input_files( const std::vector< std::string > & filenames, } -void maybe_cluster_blocks( std::vector< Block > & block_vector, - const int verbosity ) +void maybe_cluster_blocks( std::vector< Block > & block_vector ) { const unsigned long old_size = block_vector.size(); if( old_size <= 16 ) return; @@ -335,8 +333,7 @@ bool color_done( const std::vector< int > & color_vector, const int i ) bool try_merge_member2( const long long mpos, const long long msize, const std::vector< Block > & block_vector, const std::vector< int > & color_vector, - const std::vector< int > & infd_vector, - const int verbosity ) + const std::vector< int > & infd_vector ) { const int blocks = block_vector.size(); const int files = infd_vector.size(); @@ -380,8 +377,7 @@ bool try_merge_member2( const long long mpos, const long long msize, bool try_merge_member( const long long mpos, const long long msize, const std::vector< Block > & block_vector, const std::vector< int > & color_vector, - const std::vector< int > & infd_vector, - const int verbosity ) + const std::vector< int > & infd_vector ) { const int blocks = block_vector.size(); const int files = infd_vector.size(); @@ -437,8 +433,7 @@ bool try_merge_member( const long long mpos, const long long msize, bool try_merge_member1( const long long mpos, const long long msize, const std::vector< Block > & block_vector, const std::vector< int > & color_vector, - const std::vector< int > & infd_vector, - const int verbosity ) + const std::vector< int > & infd_vector ) { if( block_vector.size() != 1 || block_vector[0].size() <= 1 ) return false; const long long pos = block_vector[0].pos(); @@ -524,10 +519,13 @@ bool test_member_from_file( const int infd, const unsigned long long msize, header.verify_version() && isvalid_ds( dictionary_size ) ) { LZ_decoder decoder( rdec, dictionary_size, -1 ); - Pretty_print dummy( "", -1 ); - - if( decoder.decode_member( dummy ) == 0 && - rdec.member_position() == msize ) return true; + const int old_verbosity = verbosity; + verbosity = -1; // suppress all messages + Pretty_print dummy( "" ); + const bool done = ( decoder.decode_member( dummy ) == 0 && + rdec.member_position() == msize ); + verbosity = old_verbosity; // restore verbosity level + if( done ) return true; } if( failure_posp ) *failure_posp = rdec.member_position(); return false; @@ -536,14 +534,14 @@ bool test_member_from_file( const int infd, const unsigned long long msize, int merge_files( const std::vector< std::string > & filenames, const std::string & default_output_filename, - const int verbosity, const bool force ) + const bool force ) { const int files = filenames.size(); std::vector< int > infd_vector( files ); File_index file_index; struct stat in_stats; const int retval = - open_input_files( filenames, infd_vector, file_index, &in_stats, verbosity ); + open_input_files( filenames, infd_vector, file_index, &in_stats ); if( retval >= 0 ) return retval; if( !safe_seek( infd_vector[0], 0 ) ) return 1; @@ -588,22 +586,22 @@ int merge_files( const std::vector< std::string > & filenames, { if( block_vector.size() > 1 ) { - maybe_cluster_blocks( block_vector, verbosity ); + maybe_cluster_blocks( block_vector ); done = try_merge_member2( mpos, msize, block_vector, color_vector, - infd_vector, verbosity ); + infd_vector ); print_pending_newline(); } if( !done ) { done = try_merge_member( mpos, msize, block_vector, color_vector, - infd_vector, verbosity ); + infd_vector ); print_pending_newline(); } } if( !done ) { done = try_merge_member1( mpos, msize, block_vector, color_vector, - infd_vector, verbosity ); + infd_vector ); print_pending_newline(); } if( !done ) @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2017 Antonio Diaz Diaz. + Copyright (C) 2009-2018 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2017 Antonio Diaz Diaz. + Copyright (C) 2009-2018 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/range_dec.cc b/range_dec.cc index e105aaa..803a540 100644 --- a/range_dec.cc +++ b/range_dec.cc @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2017 Antonio Diaz Diaz. + Copyright (C) 2009-2018 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -51,13 +51,13 @@ int decompress_member( const int infd, const Pretty_print & pp, if( !isvalid_ds( dictionary_size ) ) { pp( "Invalid dictionary size in member header." ); return 2; } - if( pp.verbosity() >= 2 ) { pp(); show_header( dictionary_size ); } + if( verbosity >= 2 ) pp(); LZ_decoder decoder( rdec, dictionary_size, outfd, outskip, outend ); const int result = decoder.decode_member( pp ); if( result != 0 ) { - if( pp.verbosity() >= 0 && result <= 2 ) + if( verbosity >= 0 && result <= 2 ) { pp(); std::fprintf( stderr, "%s at pos %llu\n", ( result == 2 ) ? @@ -66,7 +66,7 @@ int decompress_member( const int infd, const Pretty_print & pp, } return 2; } - if( pp.verbosity() >= 2 ) std::fputs( "done\n", stderr ); + if( verbosity >= 2 ) std::fputs( "done\n", stderr ); return 0; } @@ -109,16 +109,17 @@ bool safe_seek( const int fd, const long long pos ) int range_decompress( const std::string & input_filename, const std::string & default_output_filename, - Block range, const int verbosity, const bool force, - const bool ignore_errors, const bool ignore_trailing, + Block range, const bool force, const bool ignore_errors, + const bool ignore_trailing, const bool loose_trailing, const bool to_stdout ) { struct stat in_stats; const int infd = open_instream( input_filename.c_str(), &in_stats, true, true ); if( infd < 0 ) return 1; - Pretty_print pp( input_filename, verbosity ); - const File_index file_index( infd, ignore_errors, ignore_trailing ); + Pretty_print pp( input_filename ); + const File_index file_index( infd, ignore_errors, ignore_trailing, + loose_trailing ); if( file_index.retval() != 0 ) { show_file_error( input_filename.c_str(), file_index.error().c_str() ); return file_index.retval(); } @@ -126,18 +127,14 @@ int range_decompress( const std::string & input_filename, if( range.end() > file_index.udata_size() ) range.size( std::max( 0LL, file_index.udata_size() - range.pos() ) ); if( range.size() <= 0 ) - { if( verbosity >= 0 ) pp( "Nothing to do." ); return 0; } + { pp( "Nothing to do." ); return 0; } if( verbosity >= 1 ) - { - if( verbosity >= 2 ) - std::fprintf( stderr, "Decompressed file size = %sB\n", - format_num( file_index.udata_size() ) ); - std::fprintf( stderr, "Decompressing range %sB to %sB (%sBytes)\n", + std::fprintf( stderr, "Decompressing range %sB to %sB (%sof %sBytes)\n", format_num( range.pos() ), format_num( range.pos() + range.size() ), - format_num( range.size() ) ); - } + format_num( range.size() ), + format_num( file_index.udata_size() ) ); if( to_stdout || default_output_filename.empty() ) outfd = STDOUT_FILENO; @@ -154,7 +151,7 @@ int range_decompress( const std::string & input_filename, const Block & db = file_index.dblock( i ); if( range.overlaps( db ) ) { - if( verbosity >= 3 ) + if( verbosity >= 3 && file_index.members() > 1 ) std::fprintf( stderr, "Decompressing member %3ld\n", i + 1 ); const long long outskip = std::max( 0LL, range.pos() - db.pos() ); const long long outend = std::min( db.size(), range.end() - db.pos() ); @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2017 Antonio Diaz Diaz. + Copyright (C) 2009-2018 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -107,7 +107,7 @@ int repair_dictionary_size( const long long msize, uint8_t * const mbuffer ) // Return value: -1 = master failed, 0 = begin reached, >0 = repaired pos long repair_member( const long long mpos, const long long msize, uint8_t * const mbuffer, const long begin, const long end, - const unsigned dictionary_size, const int verbosity ) + const unsigned dictionary_size ) { for( long pos = end; pos >= begin && pos > end - 50000; ) { @@ -140,14 +140,14 @@ long repair_member( const long long mpos, const long long msize, int repair_file( const std::string & input_filename, const std::string & default_output_filename, - const int verbosity, const bool force ) + const bool force ) { struct stat in_stats; const int infd = open_instream( input_filename.c_str(), &in_stats, true, true ); if( infd < 0 ) return 1; - Pretty_print pp( input_filename, verbosity ); - const File_index file_index( infd, true, true ); + Pretty_print pp( input_filename ); + const File_index file_index( infd, true, true, true ); if( file_index.retval() != 0 ) { show_file_error( input_filename.c_str(), file_index.error().c_str() ); return file_index.retval(); } @@ -186,10 +186,10 @@ int repair_file( const std::string & input_filename, pos = repair_dictionary_size( msize, mbuffer ); if( pos == 0 ) pos = repair_member( mpos, msize, mbuffer, File_header::size + 1, - File_header::size + 5, dictionary_size, verbosity ); + File_header::size + 5, dictionary_size ); if( pos == 0 ) pos = repair_member( mpos, msize, mbuffer, File_header::size + 6, - failure_pos, dictionary_size, verbosity ); + failure_pos, dictionary_size ); print_pending_newline(); } if( pos < 0 ) @@ -228,15 +228,14 @@ int repair_file( const std::string & input_filename, } -int debug_delay( const std::string & input_filename, Block range, - const int verbosity ) +int debug_delay( const std::string & input_filename, Block range ) { struct stat in_stats; // not used const int infd = open_instream( input_filename.c_str(), &in_stats, true, true ); if( infd < 0 ) return 1; - Pretty_print pp( input_filename, verbosity ); - const File_index file_index( infd, false, true ); + Pretty_print pp( input_filename ); + const File_index file_index( infd, false, true, true ); if( file_index.retval() != 0 ) { show_file_error( input_filename.c_str(), file_index.error().c_str() ); return file_index.retval(); } @@ -244,7 +243,7 @@ int debug_delay( const std::string & input_filename, Block range, if( range.end() > file_index.cdata_size() ) range.size( std::max( 0LL, file_index.cdata_size() - range.pos() ) ); if( range.size() <= 0 ) - { if( verbosity >= 0 ) pp( "Nothing to do." ); return 0; } + { pp( "Nothing to do." ); return 0; } for( long i = 0; i < file_index.members(); ++i ) { @@ -309,14 +308,14 @@ int debug_delay( const std::string & input_filename, Block range, int debug_repair( const std::string & input_filename, - const Bad_byte & bad_byte, const int verbosity ) + const Bad_byte & bad_byte ) { struct stat in_stats; // not used const int infd = open_instream( input_filename.c_str(), &in_stats, true, true ); if( infd < 0 ) return 1; - Pretty_print pp( input_filename, verbosity ); - const File_index file_index( infd, false, true ); + Pretty_print pp( input_filename ); + const File_index file_index( infd, false, true, true ); if( file_index.retval() != 0 ) { show_file_error( input_filename.c_str(), file_index.error().c_str() ); return file_index.retval(); } @@ -325,7 +324,7 @@ int debug_repair( const std::string & input_filename, for( ; idx < file_index.members(); ++idx ) if( file_index.mblock( idx ).includes( bad_byte.pos ) ) break; if( idx >= file_index.members() ) - { if( verbosity >= 0 ) pp( "Nothing to do." ); return 0; } + { pp( "Nothing to do." ); return 0; } const long long mpos = file_index.mblock( idx ).pos(); const long long msize = file_index.mblock( idx ).size(); @@ -377,10 +376,10 @@ int debug_repair( const std::string & input_filename, long pos = repair_dictionary_size( msize, mbuffer ); if( pos == 0 ) pos = repair_member( mpos, msize, mbuffer, File_header::size + 1, - File_header::size + 5, dictionary_size, verbosity ); + File_header::size + 5, dictionary_size ); if( pos == 0 ) pos = repair_member( mpos, msize, mbuffer, File_header::size + 6, - failure_pos, dictionary_size, verbosity ); + failure_pos, dictionary_size ); print_pending_newline(); delete[] mbuffer; if( pos < 0 ) @@ -393,15 +392,14 @@ int debug_repair( const std::string & input_filename, int debug_decompress( const std::string & input_filename, - const Bad_byte & bad_byte, const int verbosity, - const bool show_packets ) + const Bad_byte & bad_byte, const bool show_packets ) { struct stat in_stats; const int infd = open_instream( input_filename.c_str(), &in_stats, true, true ); if( infd < 0 ) return 1; - Pretty_print pp( input_filename, verbosity ); - const File_index file_index( infd, false, true ); + Pretty_print pp( input_filename ); + const File_index file_index( infd, false, true, true ); if( file_index.retval() != 0 ) { show_file_error( input_filename.c_str(), file_index.error().c_str() ); return file_index.retval(); } @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2017 Antonio Diaz Diaz. + Copyright (C) 2009-2018 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -86,7 +86,7 @@ int find_magic( const uint8_t * const buffer, const int pos, const int size ) int do_split_file( const std::string & input_filename, uint8_t * & base_buffer, const std::string & default_output_filename, - const int verbosity, const bool force ) + const bool force ) { const int hsize = File_header::size; const int tsize = File_trailer::size; @@ -98,7 +98,15 @@ int do_split_file( const std::string & input_filename, uint8_t * & base_buffer, struct stat in_stats; const int infd = open_instream( input_filename.c_str(), &in_stats, true, true ); if( infd < 0 ) return 1; - Pretty_print pp( input_filename, verbosity ); + Pretty_print pp( input_filename ); + + // don't move this after seek_read + const File_index file_index( infd, true, true, true ); +// if( file_index.retval() != 0 ) pp( file_index.error().c_str() ); + const long max_members = file_index.retval() ? 999999 : file_index.members(); + int max_digits = 1; + for( long i = max_members; i >= 10; i /= 10 ) ++max_digits; + int size = seek_read( infd, buffer, buffer_size + hsize, 0 ) - hsize; bool at_stream_end = ( size < buffer_size ); if( size != buffer_size && errno ) @@ -107,17 +115,12 @@ int do_split_file( const std::string & input_filename, uint8_t * & base_buffer, { pp( "Input file is too short." ); return 2; } if( !verify_header( *(File_header *)buffer, pp ) ) return 2; - const File_index file_index( infd, true, true ); - if( file_index.retval() != 0 ) pp( file_index.error().c_str() ); - const long max_members = file_index.retval() ? 999999 : file_index.members(); - int max_digits = 1; - for( long i = max_members; i >= 10; i /= 10 ) ++max_digits; - first_filename( input_filename, default_output_filename, max_digits ); if( !open_outstream( force, false, false, false ) ) { close( infd ); return 1; } unsigned long long partial_member_size = 0; + const bool ttyout = isatty( STDOUT_FILENO ); while( true ) { int pos = 0; @@ -135,7 +138,8 @@ int do_split_file( const std::string & input_filename, uint8_t * & base_buffer, if( close_outstream( &in_stats ) != 0 ) return 1; if( verbosity >= 1 ) { - std::printf( "Member '%s' done \r", output_filename.c_str() ); + std::printf( "Member '%s' done %c", output_filename.c_str(), + ttyout ? '\r' : '\n' ); std::fflush( stdout ); } if( !next_filename( max_digits ) ) @@ -184,29 +188,19 @@ int do_split_file( const std::string & input_filename, uint8_t * & base_buffer, bool verify_header( const File_header & header, const Pretty_print & pp ) { if( !header.verify_magic() ) - { - pp( "Bad magic number (file not in lzip format)." ); - return false; - } + { pp( bad_magic_msg ); return false; } if( !header.verify_version() ) - { - if( pp.verbosity() >= 0 ) - { pp(); - std::fprintf( stderr, "Version %d member format not supported.\n", - header.version() ); } - return false; - } + { pp( bad_version( header.version() ) ); return false; } return true; } int split_file( const std::string & input_filename, - const std::string & default_output_filename, - const int verbosity, const bool force ) + const std::string & default_output_filename, const bool force ) { uint8_t * base_buffer; const int retval = do_split_file( input_filename, base_buffer, - default_output_filename, verbosity, force ); + default_output_filename, force ); delete[] base_buffer; return retval; } diff --git a/testsuite/check.sh b/testsuite/check.sh index 3ce8f9e..76ad361 100755 --- a/testsuite/check.sh +++ b/testsuite/check.sh @@ -1,6 +1,6 @@ #! /bin/sh # check script for Lziprecover - Data recovery tool for the lzip format -# Copyright (C) 2009-2017 Antonio Diaz Diaz. +# Copyright (C) 2009-2018 Antonio Diaz Diaz. # # This script is free software: you have unlimited permission # to copy, distribute and modify it. @@ -77,6 +77,8 @@ printf "testing lziprecover-%s..." "$2" [ $? = 2 ] || test_failed $LINENO "${LZIP}" -cdq < in [ $? = 2 ] || test_failed $LINENO +"${LZIP}" -dq -o in < "${in_lz}" +[ $? = 1 ] || test_failed $LINENO # these are for code coverage "${LZIP}" -lt "${in_lz}" 2> /dev/null [ $? = 1 ] || test_failed $LINENO @@ -111,6 +113,12 @@ printf "LZIP\001+.............................." | "${LZIP}" -t 2> /dev/null [ $? = 1 ] || test_failed $LINENO "${LZIPRECOVER}" -sq [ $? = 1 ] || test_failed $LINENO +"${LZIPRECOVER}" -q --dump-tdata +[ $? = 1 ] || test_failed $LINENO +"${LZIPRECOVER}" -q --strip-tdata +[ $? = 1 ] || test_failed $LINENO +"${LZIPRECOVER}" -q --remove-tdata +[ $? = 1 ] || test_failed $LINENO "${LZIPRECOVER}" -Aq in [ $? = 2 ] || test_failed $LINENO @@ -244,6 +252,54 @@ cmp "${inD}" copy || test_failed $LINENO printf "\ntesting bad input..." +headers='LZIp LZiP LZip LzIP LzIp LziP lZIP lZIp lZiP lzIP' +body='\001\014\000\203\377\373\377\377\300\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000$\000\000\000\000\000\000\000' +cat "${in_lz}" > in0.lz +printf "LZIP${body}" >> in0.lz +if "${LZIP}" -tq in0.lz ; then + for header in ${headers} ; do + printf "${header}${body}" > in0.lz # first member + "${LZIP}" -lq in0.lz + [ $? = 2 ] || test_failed $LINENO ${header} + "${LZIP}" -tq in0.lz + [ $? = 2 ] || test_failed $LINENO ${header} + "${LZIP}" -lq --loose-trailing in0.lz + [ $? = 2 ] || test_failed $LINENO ${header} + "${LZIP}" -tq --loose-trailing in0.lz + [ $? = 2 ] || test_failed $LINENO ${header} + cat "${in_lz}" > in0.lz + printf "${header}${body}" >> in0.lz # trailing data + "${LZIP}" -lq in0.lz + [ $? = 2 ] || test_failed $LINENO ${header} + "${LZIP}" -tq in0.lz + [ $? = 2 ] || test_failed $LINENO ${header} + "${LZIP}" -lq --loose-trailing in0.lz + [ $? = 0 ] || test_failed $LINENO ${header} + "${LZIP}" -t --loose-trailing in0.lz + [ $? = 0 ] || test_failed $LINENO ${header} + "${LZIP}" -lq --loose-trailing --trailing-error in0.lz + [ $? = 2 ] || test_failed $LINENO ${header} + "${LZIP}" -tq --loose-trailing --trailing-error in0.lz + [ $? = 2 ] || test_failed $LINENO ${header} + "${LZIPRECOVER}" -q --dump-tdata in0.lz > /dev/null + [ $? = 2 ] || test_failed $LINENO ${header} + "${LZIPRECOVER}" -q --strip-tdata in0.lz > /dev/null + [ $? = 2 ] || test_failed $LINENO ${header} + "${LZIPRECOVER}" --dump-tdata --loose-trailing in0.lz > /dev/null + [ $? = 0 ] || test_failed $LINENO ${header} + "${LZIPRECOVER}" --strip-tdata --loose-trailing in0.lz > /dev/null + [ $? = 0 ] || test_failed $LINENO ${header} + "${LZIPRECOVER}" -q --remove-tdata in0.lz + [ $? = 2 ] || test_failed $LINENO ${header} + "${LZIPRECOVER}" --remove-tdata --loose-trailing in0.lz + [ $? = 0 ] || test_failed $LINENO ${header} + cmp "${in_lz}" in0.lz || test_failed $LINENO ${header} + done +else + printf "\nwarning: skipping header test: 'printf' does not work on your system." +fi +rm -f in0.lz + cat "${in_lz}" "${in_lz}" "${in_lz}" > in3.lz || framework_failure if dd if=in3.lz of=trunc.lz bs=14752 count=1 2> /dev/null && [ -e trunc.lz ] && cmp in2.lz trunc.lz > /dev/null 2>&1 ; then @@ -251,7 +307,7 @@ if dd if=in3.lz of=trunc.lz bs=14752 count=1 2> /dev/null && dd if=in3.lz of=trunc.lz bs=$i count=1 2> /dev/null "${LZIP}" -lq trunc.lz [ $? = 2 ] || test_failed $LINENO $i - "${LZIP}" -t trunc.lz 2> /dev/null + "${LZIP}" -tq trunc.lz [ $? = 2 ] || test_failed $LINENO $i "${LZIP}" -tq < trunc.lz [ $? = 2 ] || test_failed $LINENO $i @@ -263,6 +319,7 @@ if dd if=in3.lz of=trunc.lz bs=14752 count=1 2> /dev/null && else printf "\nwarning: skipping truncation test: 'dd' does not work on your system." fi +rm -f in3.lz trunc.lz cat "${in_lz}" > ingin.lz || framework_failure printf "g" >> ingin.lz || framework_failure @@ -275,6 +332,13 @@ cmp in copy || test_failed $LINENO "${LZIP}" -t < ingin.lz || test_failed $LINENO "${LZIP}" -d < ingin.lz > copy || test_failed $LINENO cmp in copy || test_failed $LINENO +"${LZIPRECOVER}" -q --dump-tdata ingin.lz +[ $? = 2 ] || test_failed $LINENO +"${LZIPRECOVER}" -q --strip-tdata ingin.lz +[ $? = 2 ] || test_failed $LINENO +"${LZIPRECOVER}" -q --remove-tdata ingin.lz +[ $? = 2 ] || test_failed $LINENO +rm -f ingin.lz printf "\ntesting --merge..." @@ -449,13 +513,46 @@ mv copy.lz copy.tlz || framework_failure printf "\ntesting --split..." -cat "${in_lz}" "${in_lz}" "${in_lz}" > copy || framework_failure -printf "garbage" >> copy || framework_failure -"${LZIPRECOVER}" -s -o copy.lz copy || test_failed $LINENO -for i in 1 2 3 ; do - "${LZIPRECOVER}" -cd rec${i}copy.lz > copy || test_failed $LINENO $i +cat "${in_lz}" "${in_lz}" "${in_lz}" "${in_lz}" "${in_lz}" "${in_lz}" \ + "${in_lz}" "${in_lz}" "${in_lz}" > in9.lz || framework_failure +printf "garbage" >> in9.lz || framework_failure +"${LZIPRECOVER}" -s in9.lz || test_failed $LINENO +for i in 1 2 3 4 5 6 7 8 9 ; do + "${LZIP}" -cd rec${i}in9.lz > copy || test_failed $LINENO $i cmp in copy || test_failed $LINENO $i done +cat rec*in9.lz | cmp in9.lz - || test_failed $LINENO + +printf "\ntesting trailing data..." + +cat "${in_lz}" "${inD}" > int.lz || framework_failure +"${LZIPRECOVER}" --dump-tdata int.lz > copy +cmp "${inD}" copy || test_failed $LINENO +rm -f copy +"${LZIPRECOVER}" --dump-tdata int.lz -o copy +cmp "${inD}" copy || test_failed $LINENO +cat "${fox6_lz}" "${inD}" > fox6t.lz || framework_failure +cat "${inD}" "${inD}" > inD2 || framework_failure +"${LZIPRECOVER}" --dump-tdata int.lz fox6t.lz -f -o copy +cmp inD2 copy || test_failed $LINENO + +"${LZIPRECOVER}" --strip-tdata int.lz > copy +cmp "${in_lz}" copy || test_failed $LINENO +rm -f copy +"${LZIPRECOVER}" --strip-tdata int.lz -o copy +cmp "${in_lz}" copy || test_failed $LINENO +"${LZIPRECOVER}" --strip-tdata fox6t.lz -f -o copy +cmp "${fox6_lz}" copy || test_failed $LINENO +"${LZIPRECOVER}" --strip-tdata int.lz int.lz -f -o copy +cmp in2.lz copy || test_failed $LINENO + +"${LZIPRECOVER}" --remove-tdata int.lz fox6t.lz +cmp "${in_lz}" int.lz || test_failed $LINENO +cmp "${fox6_lz}" fox6t.lz || test_failed $LINENO +"${LZIPRECOVER}" --remove-tdata int.lz +cmp "${in_lz}" int.lz || test_failed $LINENO +"${LZIPRECOVER}" --remove-tdata fox6t.lz +cmp "${fox6_lz}" fox6t.lz || test_failed $LINENO echo if [ ${fail} = 0 ] ; then diff --git a/trailing_data.cc b/trailing_data.cc new file mode 100644 index 0000000..e03b145 --- /dev/null +++ b/trailing_data.cc @@ -0,0 +1,144 @@ +/* Lziprecover - Data recovery tool for the lzip format + Copyright (C) 2009-2018 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#define _FILE_OFFSET_BITS 64 + +#include <cerrno> +#include <cstdio> +#include <cstring> +#include <string> +#include <vector> +#include <stdint.h> +#include <unistd.h> +#include <utime.h> +#include <sys/stat.h> + +#include "lzip.h" +#include "block.h" +#include "file_index.h" + + +int dump_tdata( const std::vector< std::string > & filenames, + const std::string & default_output_filename, const bool force, + const bool strip, const bool loose_trailing ) + { + if( default_output_filename.empty() ) outfd = STDOUT_FILENO; + else + { + output_filename = default_output_filename; + if( !open_outstream( force, true, false, false ) ) return 1; + } + unsigned long long total_size = 0; + int files = 0, retval = 0; + bool stdin_used = false; + for( unsigned i = 0; i < filenames.size(); ++i ) + { + const bool from_stdin = ( filenames[i] == "-" ); + if( from_stdin ) { if( stdin_used ) continue; else stdin_used = true; } + const char * const input_filename = + from_stdin ? "(stdin)" : filenames[i].c_str(); + struct stat in_stats; // not used + const int infd = from_stdin ? STDIN_FILENO : + open_instream( input_filename, &in_stats, true, true ); + if( infd < 0 ) { if( retval < 1 ) retval = 1; continue; } + + const File_index file_index( infd, false, true, loose_trailing ); + if( file_index.retval() != 0 ) + { + show_file_error( input_filename, file_index.error().c_str() ); + if( retval < file_index.retval() ) retval = file_index.retval(); + close( infd ); + continue; + } + const unsigned long long cdata_size = file_index.cdata_size(); + const long long trailing_size = file_index.file_size() - cdata_size; + if( strip ) + { + total_size += cdata_size; ++files; + if( !safe_seek( infd, 0 ) || !copy_file( infd, outfd, cdata_size ) ) + cleanup_and_fail( 1 ); + } + else if( trailing_size > 0 ) + { + total_size += trailing_size; ++files; + if( !safe_seek( infd, cdata_size ) || !copy_file( infd, outfd ) ) + cleanup_and_fail( 1 ); + } + close( infd ); + } + if( verbosity >= 1 ) + { + if( strip ) + std::fprintf( stderr, "%llu bytes copied from %d file(s).\n", + total_size, files ); + else + std::fprintf( stderr, "%llu trailing bytes dumped from %d file(s).\n", + total_size, files ); + } + if( close_outstream( 0 ) != 0 ) return 1; + return retval; + } + + +int remove_tdata( const std::vector< std::string > & filenames, + const bool loose_trailing ) + { + unsigned long long total_size = 0; + int files = 0, retval = 0; + for( unsigned i = 0; i < filenames.size(); ++i ) + { + const char * const filename = filenames[i].c_str(); + struct stat in_stats; + const int infd = open_truncable_stream( filename, &in_stats ); + if( infd < 0 ) { if( retval < 1 ) retval = 1; continue; } + + const File_index file_index( infd, false, true, loose_trailing ); + if( file_index.retval() != 0 ) + { + show_file_error( filename, file_index.error().c_str() ); + if( retval < file_index.retval() ) retval = file_index.retval(); + close( infd ); + continue; + } + const unsigned long long cdata_size = file_index.cdata_size(); + const long long trailing_size = file_index.file_size() - cdata_size; + if( trailing_size > 0 ) + { + int i; + do i = ftruncate( infd, cdata_size ); + while( i != 0 && errno == EINTR ); + if( i == 0 ) + { + struct utimbuf t; + t.actime = in_stats.st_atime; + t.modtime = in_stats.st_mtime; + utime( filename, &t ); + total_size += trailing_size; ++files; + } + else + { + show_file_error( filename, "Can't truncate file", errno ); + if( retval < 1 ) retval = 1; + } + } + close( infd ); + } + if( verbosity >= 1 ) + std::fprintf( stderr, "%llu trailing bytes removed from %d file(s).\n", + total_size, files ); + return retval; + } diff --git a/unzcrash.cc b/unzcrash.cc index 9a32b82..a118b9d 100644 --- a/unzcrash.cc +++ b/unzcrash.cc @@ -1,6 +1,6 @@ /* Unzcrash - Tests robustness of decompressors to corrupted data. Inspired by unzcrash.c from Julian Seward's bzip2. - Copyright (C) 2008-2017 Antonio Diaz Diaz. + Copyright (C) 2008-2018 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -44,12 +44,13 @@ #define INT64_MAX 0x7FFFFFFFFFFFFFFFLL #endif +void show_error( const char * const msg, const int errcode = 0, + const bool help = false ); namespace { const char * const Program_name = "Unzcrash"; const char * const program_name = "unzcrash"; -const char * const program_year = "2017"; const char * invocation_name = 0; int verbosity = 0; @@ -58,10 +59,12 @@ int verbosity = 0; void show_help() { std::printf( "%s - Tests robustness of decompressors to corrupted data.\n", Program_name ); - std::printf( "\nUsage: %s [options] \"lzip -tv\" filename.lz\n", invocation_name ); + std::printf( "\nUsage: %s [options] 'lzip -t' file.lz\n", invocation_name ); std::printf( "\nBy default, unzcrash reads the specified file and then repeatedly\n" "decompresses it, increasing 256 times each byte of the compressed data,\n" - "so as to test all possible one-byte errors.\n" + "so as to test all possible one-byte errors. Note that it may take years\n" + "or even centuries to test all possible one-byte errors in a large file\n" + "(tens of MB).\n" "\nIf the '--block' option is given, unzcrash reads the specified file\n" "and then repeatedly decompresses it, setting all bytes in each\n" "successive block to the value given, so as to test all possible full\n" @@ -88,8 +91,9 @@ void show_help() " -V, --version output version information and exit\n" " -b, --bits=<range> test N-bit errors instead of full byte\n" " -B, --block[=<size>][,<val>] test blocks of given size [512,0]\n" - " -d, --delta=<n> test one of every n bytes/blocks/truncations\n" + " -d, --delta=<n> test one byte/block/truncation every n bytes\n" " -e, --set-byte=<pos>,<val> set byte at position <pos> to value <val>\n" + " -n, --no-verify skip initial verification of file.lz\n" " -p, --position=<bytes> first byte position to test [default 0]\n" " -q, --quiet suppress all messages\n" " -s, --size=<bytes> number of byte positions to test [all]\n" @@ -107,106 +111,22 @@ void show_help() "Lziprecover home page: http://www.nongnu.org/lzip/lziprecover.html\n" ); } +} // end namespace -void show_version() - { - std::printf( "%s %s\n", program_name, PROGVERSION ); - std::printf( "Copyright (C) %s Antonio Diaz Diaz.\n", program_year ); - std::printf( "License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>\n" - "This is free software: you are free to change and redistribute it.\n" - "There is NO WARRANTY, to the extent permitted by law.\n" ); - } - - -void show_error( const char * const msg, const int errcode = 0, - const bool help = false ) - { - if( verbosity < 0 ) return; - if( msg && msg[0] ) - { - std::fprintf( stderr, "%s: %s", program_name, msg ); - if( errcode > 0 ) std::fprintf( stderr, ": %s", std::strerror( errcode ) ); - std::fputc( '\n', stderr ); - } - if( help ) - std::fprintf( stderr, "Try '%s --help' for more information.\n", - invocation_name ); - } - - -void internal_error( const char * const msg ) - { - if( verbosity >= 0 ) - std::fprintf( stderr, "%s: internal error: %s\n", program_name, msg ); - std::exit( 3 ); - } +#include "main_common.cc" -long long getnum( const char * const ptr, const long long llimit = -LLONG_MAX, - const long long ulimit = LLONG_MAX, - const char ** const tailp = 0 ) - { - char * tail; - errno = 0; - long long result = strtoll( ptr, &tail, 0 ); - if( tail == ptr ) - { - show_error( "Bad or missing numerical argument.", 0, true ); - std::exit( 1 ); - } - - if( !errno && tail[0] ) - { - char * const p = tail++; - int factor; - bool bsuf; // 'B' suffix is present - if( tail[0] == 'i' ) { ++tail; factor = 1024; } else factor = 1000; - if( tail[0] == 'B' ) { ++tail; bsuf = true; } else bsuf = false; - int exponent = -1; // -1 = bad multiplier - switch( *p ) - { - case 'Y': exponent = 8; break; - case 'Z': exponent = 7; break; - case 'E': exponent = 6; break; - case 'P': exponent = 5; break; - case 'T': exponent = 4; break; - case 'G': exponent = 3; break; - case 'M': exponent = 2; break; - case 'K': if( factor == 1024 ) exponent = 1; break; - case 'k': if( factor == 1000 ) exponent = 1; break; - case 'B': if( factor == 1000 && !bsuf ) exponent = 0; break; - default : if( tailp ) { tail = p; exponent = 0; } break; - } - if( exponent < 0 ) - { - show_error( "Bad multiplier in numerical argument.", 0, true ); - std::exit( 1 ); - } - for( int i = 0; i < exponent; ++i ) - { - if( LLONG_MAX / factor >= std::labs( result ) ) result *= factor; - else { errno = ERANGE; break; } - } - } - if( !errno && ( result < llimit || result > ulimit ) ) errno = ERANGE; - if( errno ) - { - show_error( "Numerical argument out of limits." ); - std::exit( 1 ); - } - if( tailp ) *tailp = tail; - return result; - } +namespace { void parse_block( const char * const ptr, long & size, uint8_t & value ) { const char * tail = ptr; if( tail[0] != ',' ) - size = getnum( ptr, 1, INT_MAX, &tail ); + size = getnum( ptr, 0, 1, INT_MAX, &tail ); if( tail[0] == ',' ) - value = getnum( tail + 1, 0, 255 ); + value = getnum( tail + 1, 0, 0, 255 ); else if( tail[0] ) { show_error( "Bad separator in argument of '--block'", 0, true ); @@ -237,7 +157,7 @@ struct Bad_byte void parse_pos_value( const char * const ptr, Bad_byte & bad_byte ) { const char * tail; - bad_byte.pos = getnum( ptr, 0, INT64_MAX, &tail ); + bad_byte.pos = getnum( ptr, 0, 0, INT64_MAX, &tail ); if( tail[0] != ',' ) { show_error( "Bad separator between <pos> and <val>.", 0, true ); @@ -246,7 +166,7 @@ void parse_pos_value( const char * const ptr, Bad_byte & bad_byte ) if( tail[1] == '+' ) { ++tail; bad_byte.mode = Bad_byte::delta; } else if( tail[1] == 'f' ) { ++tail; bad_byte.mode = Bad_byte::flip; } else bad_byte.mode = Bad_byte::literal; - bad_byte.value = getnum( tail + 1, 0, 255 ); + bad_byte.value = getnum( tail + 1, 0, 0, 255 ); } @@ -348,7 +268,7 @@ public: if( data[i] ) { std::printf( "%d", i + 1 ); - if( --c ) std::fputs( ",", stdout ); + if( --c ) std::fputc( ',', stdout ); } std::fputs( " bit errors.\n", stdout ); } @@ -378,27 +298,29 @@ int main( const int argc, const char * const argv[] ) const char * zcmp_program = "zcmp"; long pos = 0; long max_size = LONG_MAX; - long delta = 1; + long delta = 0; // to be set later long block_size = 512; Mode program_mode = m_byte; uint8_t block_value = 0; + bool verify = true; invocation_name = argv[0]; const Arg_parser::Option options[] = { - { 'h', "help", Arg_parser::no }, - { 'b', "bits", Arg_parser::yes }, - { 'B', "block", Arg_parser::maybe }, - { 'd', "delta", Arg_parser::yes }, - { 'e', "set-byte", Arg_parser::yes }, - { 'p', "position", Arg_parser::yes }, - { 'q', "quiet", Arg_parser::no }, - { 's', "size", Arg_parser::yes }, - { 't', "truncate", Arg_parser::no }, - { 'v', "verbose", Arg_parser::no }, - { 'V', "version", Arg_parser::no }, - { 'z', "zcmp", Arg_parser::yes }, - { 0 , 0, Arg_parser::no } }; + { 'h', "help", Arg_parser::no }, + { 'b', "bits", Arg_parser::yes }, + { 'B', "block", Arg_parser::maybe }, + { 'd', "delta", Arg_parser::yes }, + { 'e', "set-byte", Arg_parser::yes }, + { 'n', "no-verify", Arg_parser::no }, + { 'p', "position", Arg_parser::yes }, + { 'q', "quiet", Arg_parser::no }, + { 's', "size", Arg_parser::yes }, + { 't', "truncate", Arg_parser::no }, + { 'v', "verbose", Arg_parser::no }, + { 'V', "version", Arg_parser::no }, + { 'z', "zcmp", Arg_parser::yes }, + { 0 , 0, Arg_parser::no } }; const Arg_parser parser( argc, argv, options ); if( parser.error().size() ) // bad option @@ -416,11 +338,12 @@ int main( const int argc, const char * const argv[] ) case 'b': if( !bits.parse( arg ) ) return 1; program_mode = m_byte; break; case 'B': if( arg[0] ) parse_block( arg, block_size, block_value ); program_mode = m_block; break; - case 'd': delta = getnum( arg, 1, INT_MAX ); break; + case 'd': delta = getnum( arg, block_size, 1, INT_MAX ); break; case 'e': parse_pos_value( arg, bad_byte ); break; - case 'p': pos = getnum( arg, -LONG_MAX, LONG_MAX ); break; + case 'n': verify = false; break; + case 'p': pos = getnum( arg, block_size, -LONG_MAX, LONG_MAX ); break; case 'q': verbosity = -1; break; - case 's': max_size = getnum( arg, -LONG_MAX, LONG_MAX ); break; + case 's': max_size = getnum( arg, block_size, -LONG_MAX, LONG_MAX ); break; case 't': program_mode = m_truncate; break; case 'v': if( verbosity < 4 ) ++verbosity; break; case 'V': show_version(); return 0; @@ -432,11 +355,12 @@ int main( const int argc, const char * const argv[] ) if( argind + 2 != parser.arguments() ) { if( verbosity >= 0 ) - std::fprintf( stderr, "Usage: %s \"lzip -tv\" filename.lz\n", - invocation_name ); + std::fprintf( stderr, "Usage: %s 'lzip -t' file.lz\n", invocation_name ); return 1; } + if( delta <= 0 ) delta = ( program_mode == m_block ) ? block_size : 1; + const char * const filename = parser.argument( argind + 1 ).c_str(); long file_size = 0; uint8_t * const buffer = read_file( filename, &file_size ); @@ -449,28 +373,31 @@ int main( const int argc, const char * const argv[] ) // verify original file if( verbosity >= 1 ) fprintf( stderr, "Testing file '%s'\n", filename ); - FILE * f = popen( command, "w" ); - if( !f ) - { show_error( "Can't open pipe to decompressor", errno ); return 1; } - if( (long)std::fwrite( buffer, 1, file_size, f ) != file_size ) - { show_error( "Can't write to decompressor", errno ); return 1; } - if( pclose( f ) != 0 ) - { - if( verbosity >= 0 ) - std::fprintf( stderr, "%s: Can't run '%s'.\n", program_name, command ); - return 1; - } - if( zcmp_command[0] ) + if( verify ) { - f = popen( zcmp_command, "w" ); + FILE * f = popen( command, "w" ); if( !f ) - { show_error( "Can't open pipe to zcmp command", errno ); return 1; } + { show_error( "Can't open pipe to decompressor", errno ); return 1; } if( (long)std::fwrite( buffer, 1, file_size, f ) != file_size ) - { show_error( "Can't write to zcmp command", errno ); return 1; } + { show_error( "Can't write to decompressor", errno ); return 1; } if( pclose( f ) != 0 ) { - show_error( "zcmp command failed. Skipping comparison" ); - zcmp_command[0] = 0; + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Can't run '%s'.\n", program_name, command ); + return 1; + } + if( zcmp_command[0] ) + { + f = popen( zcmp_command, "w" ); + if( !f ) + { show_error( "Can't open pipe to zcmp command", errno ); return 1; } + if( (long)std::fwrite( buffer, 1, file_size, f ) != file_size ) + { show_error( "Can't write to zcmp command", errno ); return 1; } + if( pclose( f ) != 0 ) + { + show_error( "zcmp command failed. Disabling comparisons" ); + zcmp_command[0] = 0; + } } } @@ -494,7 +421,7 @@ int main( const int argc, const char * const argv[] ) if( verbosity >= 0 ) std::fprintf( stderr, "length %ld\n", i ); ++positions; ++decompressions; - f = popen( command, "w" ); + FILE * f = popen( command, "w" ); if( !f ) { show_error( "Can't open pipe", errno ); return 1; } std::fwrite( buffer, 1, i, f ); if( pclose( f ) == 0 ) @@ -520,13 +447,13 @@ int main( const int argc, const char * const argv[] ) { uint8_t * block = (uint8_t *)std::malloc( block_size ); if( !block ) { show_error( "Not enough memory." ); return 1; } - for( long i = pos; i < end; i += std::min( block_size * delta, end - i ) ) + for( long i = pos; i < end; i += std::min( delta, end - i ) ) { const long size = std::min( block_size, file_size - i ); if( verbosity >= 0 ) std::fprintf( stderr, "block %ld,%ld\n", i, size ); ++positions; ++decompressions; - f = popen( command, "w" ); + FILE * f = popen( command, "w" ); if( !f ) { show_error( "Can't open pipe", errno ); return 1; } std::memcpy( block , buffer + i, size ); std::memset( buffer + i, block_value, size ); @@ -571,15 +498,17 @@ int main( const int argc, const char * const argv[] ) if( verbosity >= 2 ) std::fprintf( stderr, "0x%02X (0x%02X+0x%02X) ", buffer[i], byte, j ); - f = popen( command, "w" ); + FILE * f = popen( command, "w" ); if( !f ) { show_error( "Can't open pipe", errno ); return 1; } std::fwrite( buffer, 1, file_size, f ); if( pclose( f ) == 0 ) { ++successes; if( verbosity >= 0 ) - std::fprintf( stderr, "0x%02X (0x%02X+0x%02X) passed the test\n", - buffer[i], byte, j ); + { if( verbosity < 2 ) + std::fprintf( stderr, "0x%02X (0x%02X+0x%02X) ", + buffer[i], byte, j ); + std::fputs( "passed the test\n", stderr ); } if( zcmp_command[0] ) { f = popen( zcmp_command, "w" ); @@ -606,10 +535,12 @@ int main( const int argc, const char * const argv[] ) positions, mode_str[program_mode], decompressions, successes ); if( successes > 0 ) { - if( zcmp_command[0] ) + if( zcmp_command[0] == 0 ) + std::fputs( "\n comparisons disabled\n", stderr ); + else if( failed_comparisons > 0 ) std::fprintf( stderr, ", of which\n%8ld comparisons failed\n", failed_comparisons ); - else std::fprintf( stderr, "\n comparisons disabled\n" ); + else std::fputs( "\n all comparisons passed\n", stderr ); } else std::fputc( '\n', stderr ); } |