diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2017-05-07 15:53:12 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2017-05-07 15:53:12 +0000 |
commit | 217f007824bd69712dada24a431c0f703d515fa3 (patch) | |
tree | f9e719e5800eda365dae0baf81f11a20467ac07f | |
parent | Releasing debian version 1.18-5. (diff) | |
download | lziprecover-217f007824bd69712dada24a431c0f703d515fa3.tar.xz lziprecover-217f007824bd69712dada24a431c0f703d515fa3.zip |
Merging upstream version 1.19.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r-- | ChangeLog | 18 | ||||
-rw-r--r-- | INSTALL | 4 | ||||
-rw-r--r-- | Makefile.in | 16 | ||||
-rw-r--r-- | NEWS | 78 | ||||
-rw-r--r-- | README | 33 | ||||
-rw-r--r-- | alone_to_lz.cc | 10 | ||||
-rw-r--r-- | arg_parser.cc | 16 | ||||
-rw-r--r-- | arg_parser.h | 5 | ||||
-rw-r--r-- | block.cc | 2 | ||||
-rw-r--r-- | block.h | 5 | ||||
-rwxr-xr-x | configure | 21 | ||||
-rw-r--r-- | decoder.cc | 56 | ||||
-rw-r--r-- | decoder.h | 119 | ||||
-rw-r--r-- | doc/lziprecover.1 | 9 | ||||
-rw-r--r-- | doc/lziprecover.info | 180 | ||||
-rw-r--r-- | doc/lziprecover.texi | 144 | ||||
-rw-r--r-- | file_index.cc | 135 | ||||
-rw-r--r-- | file_index.h | 22 | ||||
-rw-r--r-- | list.cc | 122 | ||||
-rw-r--r-- | lzip.h | 61 | ||||
-rw-r--r-- | main.cc | 275 | ||||
-rw-r--r-- | merge.cc | 158 | ||||
-rw-r--r-- | mtester.cc | 118 | ||||
-rw-r--r-- | mtester.h | 4 | ||||
-rw-r--r-- | range_dec.cc | 92 | ||||
-rw-r--r-- | repair.cc | 94 | ||||
-rw-r--r-- | split.cc | 4 | ||||
-rwxr-xr-x | testsuite/check.sh | 513 | ||||
-rw-r--r-- | testsuite/test_bad4.lz | bin | 7376 -> 7376 bytes | |||
-rw-r--r-- | testsuite/test_bad5.lz | bin | 7376 -> 7376 bytes | |||
-rw-r--r-- | unzcrash.cc | 117 |
31 files changed, 1468 insertions, 963 deletions
@@ -1,3 +1,17 @@ +2017-04-10 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 1.19 released. + * merge.cc: Fix members with thousands of scattered errors. + * Option '-a' now works with '-l' and '-D'. + * The output of option '-l, --list' has been simplified. + * main.cc: Continue testing if any input file is a terminal. + * main.cc: Show trailing data in both hexadecimal and ASCII. + * file_index.cc: Improve detection of bad dict and trailing data. + * file_index.cc: Skip trailing data more efficiently. + * lzip.h: Unified messages for bad magic, trailing data, etc. + * New struct Bad_byte allows delta and flip modes for bad_value. + * unzcrash.cc: Added new option '-e, --set-byte'. + 2016-05-12 Antonio Diaz Diaz <antonio@gnu.org> * Version 1.18 released. @@ -91,7 +105,7 @@ range of bytes decompressing only the members containing the desired data. * Added new option '-l, --list' which prints correct total file - sizes and ratios even for multimember files. + sizes even for multimember files. * merge.cc repair.cc: Remove output file if recovery fails. * Changed quote characters in messages as advised by GNU Standards. * split.cc: Use Boyer-Moore algorithm to search for headers. @@ -138,7 +152,7 @@ * testsuite/unzcrash.cc: Test all 1-byte errors. -Copyright (C) 2009-2016 Antonio Diaz Diaz. +Copyright (C) 2009-2017 Antonio Diaz Diaz. This file is a collection of facts, and thus it is not copyrightable, but just in case, you have unlimited permission to copy, distribute and @@ -6,7 +6,7 @@ standards compliant compiler. Gcc is available at http://gcc.gnu.org. Unzcrash needs a zcmp program able to understand the format being -tested. For example the one provided by zutils. +tested. For example the zcmp program provided by zutils. Zutils is available at http://www.nongnu.org/zutils/zutils.html Procedure @@ -65,7 +65,7 @@ After running 'configure', you can run 'make' and 'make install' as explained above. -Copyright (C) 2009-2016 Antonio Diaz Diaz. +Copyright (C) 2009-2017 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute and modify it. diff --git a/Makefile.in b/Makefile.in index 258ecc1..d3f1edb 100644 --- a/Makefile.in +++ b/Makefile.in @@ -7,15 +7,16 @@ INSTALL_DIR = $(INSTALL) -d -m 755 SHELL = /bin/sh CAN_RUN_INSTALLINFO = $(SHELL) -c "install-info --version" > /dev/null 2>&1 -objs = arg_parser.o alone_to_lz.o block.o file_index.o merge.o mtester.o \ - range_dec.o repair.o split.o decoder.o main.o +objs = arg_parser.o alone_to_lz.o block.o file_index.o list.o merge.o \ + mtester.o range_dec.o repair.o split.o decoder.o main.o unzobjs = arg_parser.o unzcrash.o .PHONY : all install install-bin install-info install-man \ install-strip install-compress install-strip-compress \ install-bin-strip install-info-compress install-man-compress \ - install-as-lzip uninstall uninstall-bin uninstall-info uninstall-man \ + install-as-lzip \ + uninstall uninstall-bin uninstall-info uninstall-man \ doc info man check dist clean distclean all : $(progname) @@ -41,6 +42,7 @@ arg_parser.o : arg_parser.h block.o : block.h decoder.o : lzip.h decoder.h file_index.o : lzip.h block.h file_index.h +list.o : lzip.h block.h file_index.h main.o : arg_parser.h lzip.h decoder.h block.h merge.o : lzip.h decoder.h block.h file_index.h mtester.o : lzip.h mtester.h @@ -131,17 +133,17 @@ dist : doc $(DISTNAME)/doc/$(progname).1 \ $(DISTNAME)/doc/$(pkgname).info \ $(DISTNAME)/doc/$(pkgname).texi \ + $(DISTNAME)/*.h \ + $(DISTNAME)/*.cc \ $(DISTNAME)/testsuite/check.sh \ $(DISTNAME)/testsuite/fox6.lz \ $(DISTNAME)/testsuite/fox6_bad[1-5].lz \ $(DISTNAME)/testsuite/fox6_bad1.txt \ $(DISTNAME)/testsuite/test.txt \ + $(DISTNAME)/testsuite/test21723.txt \ $(DISTNAME)/testsuite/test.txt.lz \ $(DISTNAME)/testsuite/test.txt.lzma \ - $(DISTNAME)/testsuite/test21723.txt \ - $(DISTNAME)/testsuite/test_bad[1-5].lz \ - $(DISTNAME)/*.h \ - $(DISTNAME)/*.cc + $(DISTNAME)/testsuite/test_bad[1-5].lz rm -f $(DISTNAME) lzip -v -9 $(DISTNAME).tar @@ -1,71 +1,17 @@ -Changes in version 1.18: +Changes in version 1.19: -The option "-a, --trailing-error", which makes lziprecover exit with -error status 2 if any remaining input is detected after decompressing -the last member, has been added. +'--merge' is now able to fix files with thousands of scattered errors +per member by grouping the errors into clusters and then merging the +files as if each cluster were a single error. -"--merge" now detects identical files by their CRC. +The option '-a, --trailing-error' now works with '-l, --list' and +'-D, --range-decompress'. -"--repair" now tries to detect gross damage in the file before -attempting to repair it. +The output of option '-l, --list' has been simplified to make it easier +to read. -"--repair" now can repair a damaged dictionary size in the header. +In test mode, lziprecover now continues checking the rest of the files +if any input file is a terminal. -"--repair" now tries bytes at member offsets 7 to 11 first because -errors in these bytes sometimes can't be detected until the end of the -member. - -Decompression time has been reduced by 2%. - -When decompressing or testing, up to 6 bytes of trailing data are -printed if "-vvvv" is specified. - -The test of the value remaining in the range decoder has been removed. -(After extensive testing it has been found useless to detect corruption -in the decompressed data. Eliminating it reduces the number of false -positives for corruption and makes error detection more accurate). - -When decompressing, the file specified with the '--output' option is now -deleted if the input is a terminal. - -"--merge", "--range-decompress", "--repair" and "--split" now preserve -dates, permissions, and, when possible, ownership of the files created -just as "--decompress" does. - -Dictionary size and size of trailing data (if any) are now printed when -"-lv" is specified. - -The new option "-A, --alone-to-lz", which converts lzma-alone files to -lzip format without recompressing, just adding a lzip header and -trailer, has been added. Only streamed files with default LZMA -properties can be converted; non-streamed lzma-alone files lack the end -of stream marker required in lzip files. - -The new option "-W, --debug-decompress=<pos>,<val>", which sets the byte -<pos> to the value <val> and then decompresses to stdout the resulting -corrupt data, has been added. - -The new option "-X, --show-packets", which shows the LZMA packets -(coding sequences) coded in a given file, has been added. - -The short name of option "--debug-delay" has been changed to "-Y". - -The short name of option "--debug-repair" has been changed to "-Z". - -The new options "-B, --block", "-d, --delta", "-t, --truncate" and "-z, ---zcmp" have been added to unzcrash. - -Unzcrash now can read files as large as RAM allows. - -Unzcrash now compares the output of the decompressor for the original -and corrupt files when the decompressor returns with zero status. For -this unzcrash needs a 'zcmp' program able to understand the format being -tested. For example the one provided by 'zutils'. - -Unzcrash now accepts negative position (relative to the end of file) and -negative size (relative to the rest of the file). - -The new chapter "Trailing data" has been added to the manual. - -A harmless check failure on Windows, caused by the failed comparison of -a message in text mode, has been fixed. +Trailing data are now shown both in hexadecimal and as a string of +printable ASCII characters. @@ -1,10 +1,13 @@ Description Lziprecover is a data recovery tool and decompressor for files in the -lzip compressed data format (.lz), able to repair slightly damaged -files, produce a correct file by merging the good parts of two or more -damaged copies, extract data from damaged files, decompress files and -test integrity of files. +lzip compressed data format (.lz). Lziprecover is able to repair +slightly damaged files, produce a correct file by merging the good parts +of two or more damaged copies, extract data from damaged files, +decompress files and test integrity of files. + +Lziprecover provides random access to the data in multimember files; it +only decompresses the members containing the desired data. Lziprecover is not a replacement for regular backups, but a last line of defense for the case where the backups are also damaged. @@ -20,11 +23,11 @@ availability: merging of damaged copies of a file. * The lzip format is as simple as possible (but not simpler). The - lzip manual provides the code of a simple decompressor along with a - detailed explanation of how it works, so that with the only help of - the lzip manual it would be possible for a digital archaeologist to - extract the data from a lzip file long after quantum computers - eventually render LZMA obsolete. + lzip manual provides the source code of a simple decompressor along + with a detailed explanation of how it works, so that with the only + help of the lzip manual it would be possible for a digital + archaeologist to extract the data from a lzip file long after + quantum computers eventually render LZMA obsolete. * Additionally the lzip reference implementation is copylefted, which guarantees that it will remain free forever. @@ -34,6 +37,10 @@ repair the nearer it is from the beginning of the file. Therefore, with the help of lziprecover, losing an entire archive just because of a corrupt byte near the beginning is a thing of the past. +For compressible data, multiple lzip-compressed copies have a better +chance of surviving intact than one uncompressed copy using the same +amount of storage space. + Lziprecover is able to recover or decompress files produced by any of the compressors in the lzip family; lzip, plzip, minilzip/lzlib, clzip and pdlzip. @@ -46,12 +53,6 @@ If a file is too damaged for lziprecover to repair it, all the recoverable data in all members of the file can be extracted in one step with the '-D' option. -Lziprecover provides random access to the data in multimember files; it -only decompresses the members containing the desired data. - -Lziprecover can print correct total file sizes and ratios even for -multimember files. - When recovering data, lziprecover takes as arguments the names of the damaged files and writes zero or more recovered files depending on the operation selected and whether the recovery succeeded or not. The @@ -74,7 +75,7 @@ unzcrash.c from Julian Seward's bzip2. Type 'make unzcrash' in the lziprecover source directory to build it. Then try 'unzcrash --help'. -Copyright (C) 2009-2016 Antonio Diaz Diaz. +Copyright (C) 2009-2017 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute and modify it. diff --git a/alone_to_lz.cc b/alone_to_lz.cc index 22e3f29..e87a18a 100644 --- a/alone_to_lz.cc +++ b/alone_to_lz.cc @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2016 Antonio Diaz Diaz. + Copyright (C) 2009-2017 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -91,7 +91,13 @@ int alone_to_lz( const int infd, const Pretty_print & pp ) { pp( "file is too short" ); std::free( buffer ); return 2; } if( buffer[0] != 93 ) // (45 * 2) + (9 * 0) + 3 { - pp( "file has non-default LZMA properties" ); + File_header & header = *(File_header *)buffer; + const unsigned dictionary_size = header.dictionary_size(); + if( header.verify_magic() && header.verify_version() && + isvalid_ds( dictionary_size ) ) + pp( "file is already in lzip format" ); + else + pp( "file has non-default LZMA properties" ); std::free( buffer ); return 2; } for( int i = 5; i < 13; ++i ) if( buffer[i] != 0xFF ) diff --git a/arg_parser.cc b/arg_parser.cc index 82972ad..cc7d1e2 100644 --- a/arg_parser.cc +++ b/arg_parser.cc @@ -1,5 +1,5 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C++ version) - Copyright (C) 2006-2016 Antonio Diaz Diaz. + Copyright (C) 2006-2017 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -42,7 +42,7 @@ bool Arg_parser::parse_long_option( const char * const opt, const char * const a else if( index < 0 ) index = i; // First nonexact match found else if( options[index].code != options[i].code || options[index].has_arg != options[i].has_arg ) - ambig = true; // Second or later nonexact match found + ambig = true; // Second or later nonexact match found } if( ambig && !exact ) @@ -142,7 +142,7 @@ Arg_parser::Arg_parser( const int argc, const char * const argv[], { if( argc < 2 || !argv || !options ) return; - std::vector< std::string > non_options; // skipped non-options + std::vector< const char * > non_options; // skipped non-options int argind = 1; // index in argv while( argind < argc ) @@ -163,17 +163,17 @@ Arg_parser::Arg_parser( const int argc, const char * const argv[], } else { - if( !in_order ) non_options.push_back( argv[argind++] ); - else { data.push_back( Record() ); data.back().argument = argv[argind++]; } + if( in_order ) data.push_back( Record( argv[argind++] ) ); + else non_options.push_back( argv[argind++] ); } } if( error_.size() ) data.clear(); else { for( unsigned i = 0; i < non_options.size(); ++i ) - { data.push_back( Record() ); data.back().argument.swap( non_options[i] ); } + data.push_back( Record( non_options[i] ) ); while( argind < argc ) - { data.push_back( Record() ); data.back().argument = argv[argind++]; } + data.push_back( Record( argv[argind++] ) ); } } @@ -192,5 +192,5 @@ Arg_parser::Arg_parser( const char * const opt, const char * const arg, parse_short_option( opt, arg, options, argind ); if( error_.size() ) data.clear(); } - else { data.push_back( Record() ); data.back().argument = opt; } + else data.push_back( Record( opt ) ); } diff --git a/arg_parser.h b/arg_parser.h index f45b9ac..95b0320 100644 --- a/arg_parser.h +++ b/arg_parser.h @@ -1,5 +1,5 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C++ version) - Copyright (C) 2006-2016 Antonio Diaz Diaz. + Copyright (C) 2006-2017 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -57,7 +57,8 @@ private: { int code; std::string argument; - explicit Record( const int c = 0 ) : code( c ) {} + explicit Record( const int c ) : code( c ) {} + explicit Record( const char * const arg ) : code( 0 ), argument( arg ) {} }; std::string error_; @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2016 Antonio Diaz Diaz. + Copyright (C) 2009-2017 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2016 Antonio Diaz Diaz. + Copyright (C) 2009-2017 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -55,7 +55,8 @@ public: int range_decompress( const std::string & input_filename, const std::string & default_output_filename, Block range, const int verbosity, const bool force, - const bool ignore, const bool to_stdout ); + const bool ignore_errors, const bool ignore_trailing, + const bool to_stdout ); // defined in repair.cc int debug_delay( const std::string & input_filename, Block range, @@ -1,12 +1,12 @@ #! /bin/sh # configure script for Lziprecover - Data recovery tool for the lzip format -# Copyright (C) 2009-2016 Antonio Diaz Diaz. +# Copyright (C) 2009-2017 Antonio Diaz Diaz. # # This configure script is free software: you have unlimited permission # to copy, distribute and modify it. pkgname=lziprecover -pkgversion=1.18 +pkgversion=1.19 progname=lziprecover srctrigger=doc/${pkgname}.texi @@ -26,11 +26,11 @@ CXXFLAGS='-Wall -W -O2' LDFLAGS= # checking whether we are using GNU C++. -if /bin/sh -c "${CXX} --version" > /dev/null 2>&1 ; then true -else +/bin/sh -c "${CXX} --version" > /dev/null 2>&1 || + { CXX=c++ - CXXFLAGS='-W -O2' -fi + CXXFLAGS=-O2 + } # Loop over all args args= @@ -52,9 +52,12 @@ while [ $# != 0 ] ; do # Process the options case ${option} in --help | -h) - echo "Usage: configure [options]" + echo "Usage: $0 [OPTION]... [VAR=VALUE]..." + echo + echo "To assign makefile variables (e.g., CXX, CXXFLAGS...), specify them as" + echo "arguments to configure in the form VAR=VALUE." echo - echo "Options: [defaults in brackets]" + echo "Options and variables: [defaults in brackets]" echo " -h, --help display this help and exit" echo " -V, --version output version information and exit" echo " --srcdir=DIR find the sources in DIR [. or ..]" @@ -165,7 +168,7 @@ echo "LDFLAGS = ${LDFLAGS}" rm -f Makefile cat > Makefile << EOF # Makefile for Lziprecover - Data recovery tool for the lzip format -# Copyright (C) 2009-2016 Antonio Diaz Diaz. +# Copyright (C) 2009-2017 Antonio Diaz Diaz. # This file was generated automatically by configure. Don't edit. # # This Makefile is free software: you have unlimited permission @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2016 Antonio Diaz Diaz. + Copyright (C) 2009-2017 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -181,7 +181,7 @@ bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const ( 8.0 * member_size ) / data_size, 100.0 * ( 1.0 - ( (double)member_size / data_size ) ) ); if( !error && verbosity >= 4 ) - std::fprintf( stderr, "data CRC %08X, data size %9llu, member size %8llu. ", + std::fprintf( stderr, "CRC %08X, decompressed %9llu, compressed %8llu. ", crc(), data_size, member_size ); if( rdec.get_code() != 0 && !error && verbosity >= 1 ) { // corruption in the last 4 bytes of the EOS marker @@ -204,7 +204,7 @@ int LZ_decoder::decode_member( const Pretty_print & pp ) Bit_model bm_rep2[State::states]; Bit_model bm_len[State::states][pos_states]; Bit_model bm_dis_slot[len_states][1<<dis_slot_bits]; - Bit_model bm_dis[modeled_distances-end_dis_model]; + Bit_model bm_dis[modeled_distances-end_dis_model+1]; Bit_model bm_align[dis_align_size]; Len_model match_len_model; Len_model rep_len_model; @@ -220,25 +220,23 @@ int LZ_decoder::decode_member( const Pretty_print & pp ) const int pos_state = data_position() & pos_state_mask; if( rdec.decode_bit( bm_match[state()][pos_state] ) == 0 ) // 1st bit { - const uint8_t prev_byte = peek_prev(); - if( state.is_char() ) - { - state.set_char1(); - put_byte( rdec.decode_tree8( bm_literal[get_lit_state(prev_byte)] ) ); - } + Bit_model * const bm = bm_literal[get_lit_state(peek_prev())]; + if( state.is_char_set_char() ) + put_byte( rdec.decode_tree8( bm ) ); else - { - state.set_char2(); - put_byte( rdec.decode_matched( bm_literal[get_lit_state(prev_byte)], - peek( rep0 ) ) ); - } + put_byte( rdec.decode_matched( bm, peek( rep0 ) ) ); } else // match or repeated match { int len; if( rdec.decode_bit( bm_rep[state()] ) != 0 ) // 2nd bit { - if( rdec.decode_bit( bm_rep0[state()] ) != 0 ) // 3rd bit + if( rdec.decode_bit( bm_rep0[state()] ) == 0 ) // 3rd bit + { + if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit + { state.set_short_rep(); put_byte( peek( rep0 ) ); continue; } + } + else { unsigned distance; if( rdec.decode_bit( bm_rep1[state()] ) == 0 ) // 4th bit @@ -254,34 +252,28 @@ int LZ_decoder::decode_member( const Pretty_print & pp ) rep1 = rep0; rep0 = distance; } - else - { - if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit - { state.set_short_rep(); put_byte( peek( rep0 ) ); continue; } - } state.set_rep(); len = min_match_len + rdec.decode_len( rep_len_model, pos_state ); } else // match { - const unsigned rep0_saved = rep0; len = min_match_len + rdec.decode_len( match_len_model, pos_state ); - const int dis_slot = rdec.decode_tree6( bm_dis_slot[get_len_state(len)] ); - if( dis_slot < start_dis_model ) rep0 = dis_slot; - else + unsigned distance = rdec.decode_tree6( bm_dis_slot[get_len_state(len)] ); + if( distance >= start_dis_model ) { + const unsigned dis_slot = distance; const int direct_bits = ( dis_slot >> 1 ) - 1; - rep0 = ( 2 | ( dis_slot & 1 ) ) << direct_bits; + distance = ( 2 | ( dis_slot & 1 ) ) << direct_bits; if( dis_slot < end_dis_model ) - rep0 += rdec.decode_tree_reversed( bm_dis + rep0 - dis_slot - 1, - direct_bits ); + distance += rdec.decode_tree_reversed( + bm_dis + ( distance - dis_slot ), direct_bits ); else { - rep0 += rdec.decode( direct_bits - dis_align_bits ) << dis_align_bits; - rep0 += rdec.decode_tree_reversed4( bm_align ); - if( rep0 == 0xFFFFFFFFU ) // marker found + distance += + rdec.decode( direct_bits - dis_align_bits ) << dis_align_bits; + distance += rdec.decode_tree_reversed4( bm_align ); + if( distance == 0xFFFFFFFFU ) // marker found { - rep0 = rep0_saved; rdec.normalize(); flush_data(); if( len == min_match_len ) // End Of Stream marker @@ -301,7 +293,7 @@ int LZ_decoder::decode_member( const Pretty_print & pp ) } } } - rep3 = rep2; rep2 = rep1; rep1 = rep0_saved; + rep3 = rep2; rep2 = rep1; rep1 = rep0; rep0 = distance; state.set_match(); if( rep0 >= dictionary_size || ( rep0 >= pos && !pos_wrapped ) ) { flush_data(); return 1; } @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2016 Antonio Diaz Diaz. + Copyright (C) 2009-2017 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -50,7 +50,9 @@ public: unsigned get_code() const { return code; } bool finished() { return pos >= stream_pos && !read_block(); } unsigned long long member_position() const { return partial_member_pos + pos; } - void reset_member_position() { partial_member_pos = -pos; } + + void reset_member_position() + { partial_member_pos = 0; partial_member_pos -= pos; } uint8_t get_byte() { @@ -61,15 +63,15 @@ public: int read_data( uint8_t * const outbuf, const int size ) { - int rest = size; - while( rest > 0 && !finished() ) + int sz = 0; + while( sz < size && !finished() ) { - const int rd = std::min( rest, stream_pos - pos ); - std::memcpy( outbuf + size - rest, buffer + pos, rd ); + const int rd = std::min( size - sz, stream_pos - pos ); + std::memcpy( outbuf + sz, buffer + pos, rd ); pos += rd; - rest -= rd; + sz += rd; } - return size - rest; + return sz; } void load() @@ -86,24 +88,23 @@ public: { range <<= 8; code = (code << 8) | get_byte(); } } - int decode( const int num_bits ) + unsigned decode( const int num_bits ) { - int symbol = 0; + unsigned symbol = 0; for( int i = num_bits; i > 0; --i ) { normalize(); range >>= 1; // symbol <<= 1; // if( code >= range ) { code -= range; symbol |= 1; } - const uint32_t mask = 0U - (code < range); - code -= range; - code += range & mask; - symbol = (symbol << 1) + (mask + 1); + const bool bit = ( code >= range ); + symbol = ( symbol << 1 ) + bit; + code -= range & ( 0U - bit ); } return symbol; } - int decode_bit( Bit_model & bm ) + unsigned decode_bit( Bit_model & bm ) { normalize(); const uint32_t bound = ( range >> bit_model_total_bits ) * bm.probability; @@ -122,18 +123,18 @@ public: } } - int decode_tree3( Bit_model bm[] ) + unsigned decode_tree3( Bit_model bm[] ) { - int symbol = 1; + unsigned symbol = 1; symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); return symbol & 7; } - int decode_tree6( Bit_model bm[] ) + unsigned decode_tree6( Bit_model bm[] ) { - int symbol = 1; + unsigned symbol = 1; symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); @@ -143,49 +144,47 @@ public: return symbol & 0x3F; } - int decode_tree8( Bit_model bm[] ) + unsigned decode_tree8( Bit_model bm[] ) { - int symbol = 1; - while( symbol < 0x100 ) + unsigned symbol = 1; + for( int i = 0; i < 8; ++i ) symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); return symbol & 0xFF; } - int decode_tree_reversed( Bit_model bm[], const int num_bits ) + unsigned decode_tree_reversed( Bit_model bm[], const int num_bits ) { - int model = 1; - int symbol = 0; + unsigned model = 1; + unsigned symbol = 0; for( int i = 0; i < num_bits; ++i ) { - const bool bit = decode_bit( bm[model] ); - model <<= 1; - if( bit ) { ++model; symbol |= (1 << i); } + const unsigned bit = decode_bit( bm[model] ); + model = ( model << 1 ) + bit; + symbol |= ( bit << i ); } return symbol; } - int decode_tree_reversed4( Bit_model bm[] ) + unsigned decode_tree_reversed4( Bit_model bm[] ) { - int model = 1; - int symbol = decode_bit( bm[model] ); - model = (model << 1) + symbol; - int bit = decode_bit( bm[model] ); - model = (model << 1) + bit; symbol |= (bit << 1); + unsigned symbol = decode_bit( bm[1] ); + unsigned model = 2 + symbol; + unsigned bit = decode_bit( bm[model] ); + model = ( model << 1 ) + bit; symbol |= ( bit << 1 ); bit = decode_bit( bm[model] ); - model = (model << 1) + bit; symbol |= (bit << 2); - if( decode_bit( bm[model] ) ) symbol |= 8; + model = ( model << 1 ) + bit; symbol |= ( bit << 2 ); + symbol |= ( decode_bit( bm[model] ) << 3 ); return symbol; } - int decode_matched( Bit_model bm[], int match_byte ) + unsigned decode_matched( Bit_model bm[], unsigned match_byte ) { Bit_model * const bm1 = bm + 0x100; - int symbol = 1; + unsigned symbol = 1; while( symbol < 0x100 ) { - match_byte <<= 1; - const int match_bit = match_byte & 0x100; - const int bit = decode_bit( bm1[match_bit+symbol] ); + const unsigned match_bit = ( match_byte <<= 1 ) & 0x100; + const unsigned bit = decode_bit( bm1[match_bit+symbol] ); symbol = ( symbol << 1 ) | bit; if( match_bit != bit << 8 ) { @@ -197,7 +196,7 @@ public: return symbol & 0xFF; } - int decode_len( Len_model & lm, const int pos_state ) + unsigned decode_len( Len_model & lm, const int pos_state ) { if( decode_bit( lm.choice1 ) == 0 ) return decode_tree3( lm.bm_low[pos_state] ); @@ -229,14 +228,15 @@ class LZ_decoder uint8_t peek_prev() const { - const unsigned i = ( ( pos > 0 ) ? pos : dictionary_size ) - 1; - return buffer[i]; + if( pos > 0 ) return buffer[pos-1]; + if( pos_wrapped ) return buffer[dictionary_size-1]; + return 0; // prev_byte of first byte } uint8_t peek( const unsigned distance ) const { - unsigned i = pos - distance - 1; - if( pos <= distance ) i += dictionary_size; + const unsigned i = ( ( pos > distance ) ? 0 : dictionary_size ) + + pos - distance - 1; return buffer[i]; } @@ -248,17 +248,26 @@ class LZ_decoder void copy_block( const unsigned distance, unsigned len ) { - unsigned i = pos - distance - 1; - bool fast; - if( pos <= distance ) - { i += dictionary_size; - fast = ( len <= dictionary_size - i && len <= i - pos ); } + unsigned lpos = pos, i = lpos - distance - 1; + bool fast, fast2; + if( lpos > distance ) + { + fast = ( len < dictionary_size - lpos ); + fast2 = ( fast && len <= lpos - i ); + } else - fast = ( len < dictionary_size - pos && len <= pos - i ); - if( fast ) // no wrap, no overlap { - std::memcpy( buffer + pos, buffer + i, len ); + i += dictionary_size; + fast = ( len < dictionary_size - i ); // (i == pos) may happen + fast2 = ( fast && len <= i - lpos ); + } + if( fast ) // no wrap + { pos += len; + if( fast2 ) // no wrap, no overlap + std::memcpy( buffer + lpos, buffer + i, len ); + else + for( ; len > 0; --len ) buffer[lpos++] = buffer[i++]; } else for( ; len > 0; --len ) { @@ -287,7 +296,7 @@ public: crc_( 0xFFFFFFFFU ), outfd( ofd ), pos_wrapped( false ) - { buffer[dictionary_size-1] = 0; } // prev_byte of first byte + {} ~LZ_decoder() { delete[] buffer; } diff --git a/doc/lziprecover.1 b/doc/lziprecover.1 index 97f564f..31440f8 100644 --- a/doc/lziprecover.1 +++ b/doc/lziprecover.1 @@ -1,5 +1,5 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.46.1. -.TH LZIPRECOVER "1" "May 2016" "lziprecover 1.18" "User Commands" +.TH LZIPRECOVER "1" "April 2017" "lziprecover 1.19" "User Commands" .SH NAME lziprecover \- recovers data from damaged lzip files .SH SYNOPSIS @@ -17,6 +17,9 @@ Lziprecover can also produce a correct file by merging the good parts of two or more damaged copies, extract data from damaged files, decompress files and test integrity of files. .PP +Lziprecover provides random access to the data in multimember files; it +only decompresses the members containing the desired data. +.PP Lziprecover is not a replacement for regular backups, but a last line of defense for the case where the backups are also damaged. .SH OPTIONS @@ -52,7 +55,7 @@ make '\-\-range\-decompress' ignore data errors keep (don't delete) input files .TP \fB\-l\fR, \fB\-\-list\fR -print total file sizes and ratios +print (un)compressed file sizes .TP \fB\-m\fR, \fB\-\-merge\fR correct errors in file using several copies @@ -89,7 +92,7 @@ Report bugs to lzip\-bug@nongnu.org .br Lziprecover home page: http://www.nongnu.org/lzip/lziprecover.html .SH COPYRIGHT -Copyright \(co 2016 Antonio Diaz Diaz. +Copyright \(co 2017 Antonio Diaz Diaz. License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html> .br This is free software: you are free to change and redistribute it. diff --git a/doc/lziprecover.info b/doc/lziprecover.info index 17985d2..4b3a8fb 100644 --- a/doc/lziprecover.info +++ b/doc/lziprecover.info @@ -12,7 +12,7 @@ File: lziprecover.info, Node: Top, Next: Introduction, Up: (dir) Lziprecover Manual ****************** -This manual is for Lziprecover (version 1.18, 12 May 2016). +This manual is for Lziprecover (version 1.19, 10 April 2017). * Menu: @@ -30,7 +30,7 @@ This manual is for Lziprecover (version 1.18, 12 May 2016). * Concept index:: Index of concepts - Copyright (C) 2009-2016 Antonio Diaz Diaz. + Copyright (C) 2009-2017 Antonio Diaz Diaz. This manual is free documentation: you have unlimited permission to copy, distribute and modify it. @@ -42,10 +42,13 @@ File: lziprecover.info, Node: Introduction, Next: Invoking lziprecover, Prev: ************** Lziprecover is a data recovery tool and decompressor for files in the -lzip compressed data format (.lz), able to repair slightly damaged -files, produce a correct file by merging the good parts of two or more -damaged copies, extract data from damaged files, decompress files and -test integrity of files. +lzip compressed data format (.lz). Lziprecover is able to repair +slightly damaged files, produce a correct file by merging the good parts +of two or more damaged copies, extract data from damaged files, +decompress files and test integrity of files. + + Lziprecover provides random access to the data in multimember files; +it only decompresses the members containing the desired data. Lziprecover is not a replacement for regular backups, but a last line of defense for the case where the backups are also damaged. @@ -61,11 +64,11 @@ availability: merging of damaged copies of a file. *Note Data safety::. * The lzip format is as simple as possible (but not simpler). The - lzip manual provides the code of a simple decompressor along with - a detailed explanation of how it works, so that with the only help - of the lzip manual it would be possible for a digital - archaeologist to extract the data from a lzip file long after - quantum computers eventually render LZMA obsolete. + lzip manual provides the source code of a simple decompressor + along with a detailed explanation of how it works, so that with + the only help of the lzip manual it would be possible for a + digital archaeologist to extract the data from a lzip file long + after quantum computers eventually render LZMA obsolete. * Additionally the lzip reference implementation is copylefted, which guarantees that it will remain free forever. @@ -94,12 +97,6 @@ garbage data may be produced at the end of each member): lziprecover -D0 -i -o file -q file.lz - Lziprecover provides random access to the data in multimember files; -it only decompresses the members containing the desired data. - - Lziprecover can print correct total file sizes and ratios even for -multimember files. - When recovering data, lziprecover takes as arguments the names of the damaged files and writes zero or more recovered files depending on the operation selected and whether the recovery succeeded or not. The @@ -108,6 +105,10 @@ damaged files themselves are never modified. When decompressing or testing file integrity, lziprecover behaves like lzip or lunzip. + LANGUAGE NOTE: Uncompressed = not compressed = plain data; it may +never have been compressed. Decompressed is used to refer to data which +have undergone the process of decompression. + File: lziprecover.info, Node: Invoking lziprecover, Next: Data safety, Prev: Introduction, Up: Top @@ -204,9 +205,18 @@ the first time it appears in the command line. '-l' '--list' - Print total file sizes and ratios. The values produced are correct - even for multimember files. Use it together with '-v' to see - information about the members in the file. + Print the uncompressed size, compressed size and percentage saved + of the specified file(s). Trailing data are ignored. The values + produced are correct even for multimember files. If more than one + file is given, a final line containing the cumulative sizes is + printed. With '-v', the dictionary size, the number of members in + the file, and the amount of trailing data (if any) are also + printed. With '-vv', the positions and sizes of each member in + multimember files are also printed. '-lq' can be used to verify + quickly (without decompressing) the structural integrity of the + specified files. (Use '--test' to verify the data integrity). + '-alq' additionally verifies that none of the specified files + contain trailing data. '-m' '--merge' @@ -234,11 +244,11 @@ the first time it appears in the command line. '-R' '--repair' - Try to repair a file with small errors (up to one byte error per - member). If successful, a repaired copy is written to the file - 'FILE_fixed.lz'. 'FILE' is not modified at all. The exit status - is 0 if the file could be repaired, 2 otherwise. See the chapter - 'Repairing files' (*note Repairing files::) for a complete + Try to repair a file with small errors (up to one single-byte + error per member). If successful, a repaired copy is written to + the file 'FILE_fixed.lz'. 'FILE' is not modified at all. The exit + status is 0 if the file could be repaired, 2 otherwise. See the + chapter 'Repairing files' (*note Repairing files::) for a complete description of the repair mode. '-s' @@ -261,8 +271,9 @@ the first time it appears in the command line. Check integrity of the specified file(s), but don't decompress them. This really performs a trial decompression and throws away the result. Use it together with '-v' to see information about - the file(s). If a file fails the test, lziprecover continues - checking the rest of the files. + the file(s). If a file fails the test, does not exist, can't be + opened, or is a terminal, lziprecover continues checking the rest + of the files. '-v' '--verbose' @@ -270,7 +281,11 @@ the first time it appears in the command line. When decompressing or testing, further -v's (up to 4) increase the verbosity level, showing status, compression ratio, dictionary size, trailer contents (CRC, data size, member size), and up to 6 - bytes of trailing data (if any). + bytes of trailing data (if any) both in hexadecimal and as a + string of printable ASCII characters. + In other modes, increasing verbosity levels show final status, + progress of operations, and extra information (for example, the + failed areas). Numbers given as arguments to options may be followed by a multiplier @@ -316,7 +331,7 @@ files::), if at least one backup copy of the file is made. separate media. How does lzip compare with gzip and bzip2 with respect to data -safety? Lets suppose that you made a backup of your valuable +safety? Let's suppose that you made a backup of your valuable scientific data, compressed it, and stored two copies on separate media. Years later you notice that both copies are corrupt. @@ -362,10 +377,11 @@ vice versa. It may be caused by bad RAM or even by natural radiation. I have seen a case of bit-flip in a file stored on an USB flash drive. One byte may seem small, but most file corruptions not produced by -I/O errors just affect one byte, or even one bit, of the file. Also, -unlike magnetic media, where errors usually affect a whole sector, -solid-state storage devices tend to produce single-byte errors, making -of lzip the perfect format for data stored on such devices. +transmission errors or I/O errors just affect one byte, or even one bit, +of the file. Also, unlike magnetic media, where errors usually affect a +whole sector, solid-state storage devices tend to produce single-byte +errors, making of lzip the perfect format for data stored on such +devices. Repairing a file can take some time. Small files or files with the error located near the beginning can be repaired in a few seconds. But @@ -395,11 +411,11 @@ the file. is damaged in all copies), or are adjacent and the boundary can't be determined, or if the copies have too many damaged areas. - All the copies must have the same size. If any of them is larger or -smaller than it should, either because it has been truncated or because -it got some garbage data appended at the end, it can be brought to the -correct size with the following command before merging it with the other -copies: + All the copies to be merged must have the same size. If any of them +is larger or smaller than it should, either because it has been +truncated or because it got some garbage data appended at the end, it +can be brought to the correct size with the following command before +merging it with the other copies: ddrescue -s<correct_size> -x<correct_size> file.lz correct_size_file.lz @@ -411,6 +427,29 @@ few MB) with small errors (one sector damaged per copy), the probability approaches 100 percent even with only two copies. (Supposing that the errors are randomly located inside each copy). + Some types of solid-state device (NAND flash, for example) can +produce bursts of scattered single-bit errors. Lziprecover is able to +merge files with thousands of such scattered errors by grouping the +errors into clusters and then merging the files as if each cluster were +a single error. + + Here is a real case of successful merging. Two copies of the file +'icecat-3.5.3-x86.tar.lz' (compressed size 9 MB) became corrupt while +stored on the same NAND flash device. One of the copies had 76 +single-bit errors scattered in an area of 1020 bytes, and the other had +3028 such errors in an area of 31729 bytes. Lziprecover produced a +correct file, identical to the original, in just 5 seconds: + + $ lziprecover -vvm a/icecat-3.5.3-x86.tar.lz b/icecat-3.5.3-x86.tar.lz + Merging member 1 of 1 (2552 errors) + 2552 errors have been grouped in 16 clusters. + Trying variation 2 of 2, block 2 + Input files merged successfully. + + Note that the number of errors reported by lziprecover (2552) is +lower than the number of corrupt bytes (3104) because contiguous +corrupt bytes are counted as a single multibyte error. + File: lziprecover.info, Node: File names, Next: File format, Prev: Merging files, Up: Top @@ -499,16 +538,21 @@ File: lziprecover.info, Node: Trailing data, Next: Examples, Prev: File forma 8 Extra data appended to the file ********************************* -Sometimes extra data is found appended to a lzip file after the last +Sometimes extra data are found appended to a lzip file after the last member. Such trailing data may be: * Padding added to make the file size a multiple of some block size, - for example when writing to a tape. - - * Garbage added by some not totally successful copy operation. + for example when writing to a tape. It is safe to append any + amount of padding zero bytes to a lzip file. * Useful data added by the user; a cryptographically secure hash, a - description of file contents, etc. + description of file contents, etc. It is safe to append any amount + of text to a lzip file as long as the text does not begin with the + string "LZIP", and does not contain any zero bytes (null + characters). Nonzero bytes and zero bytes can't be safely mixed in + trailing data. + + * Garbage added by some not totally successful copy operation. * Malicious data added to the file in order to make its total size and hash value (for a chosen hash) coincide with those of another @@ -521,8 +565,12 @@ member. Such trailing data may be: the corruption of the integrity information itself. Therefore it can be considered to be below the noise level. + Trailing data are in no way part of the lzip file format, but tools +reading lzip files are expected to behave as correctly and usefully as +possible in the presence of trailing data. + Trailing data can be safely ignored in most cases. In some cases, -like that of user-added data, it is expected to be ignored. In those +like that of user-added data, they are expected to be ignored. In those cases where a file containing trailing data must be rejected, the option '--trailing-error' can be used. *Note --trailing-error::. @@ -544,8 +592,8 @@ show status. lziprecover -tv file.lz -Example 3: The right way of concatenating compressed files. *Note -Trailing data::. +Example 3: The right way of concatenating the decompressed output of two +or more compressed files. *Note Trailing data::. Don't do this cat file1.lz file2.lz file3.lz | lziprecover -d @@ -703,6 +751,16 @@ by 'zutils'. *Note Zcmp: (zutils)Zcmp, Test only one of every N bytes, blocks or truncation sizes, instead of all of them. +'-e POSITION,VALUE' +'--set-byte=POSITION,VALUE' + Set byte at POSITION to VALUE in the internal buffer after reading + and testing FILENAME.lz but before the first test call to the + decompressor. If VALUE is preceded by '+', it is added to the + original value of the byte at POSITION. If VALUE is preceded by + 'f' (flip), it is XORed with the original value of the byte at + POSITION. This option can be used to run tests with a changed + dictionary size, for example. + '-p BYTES' '--position=BYTES' First byte position to test in the file. Defaults to 0. Negative @@ -779,21 +837,21 @@ Concept index Tag Table: Node: Top231 -Node: Introduction1267 -Node: Invoking lziprecover4525 -Ref: --trailing-error5175 -Node: Data safety11779 -Node: Repairing files13702 -Node: Merging files15602 -Node: File names17217 -Node: File format17681 -Node: Trailing data20109 -Node: Examples21492 -Ref: concat-example21923 -Ref: ddrescue-example22986 -Node: Unzcrash24276 -Node: Problems28786 -Node: Concept index29338 +Node: Introduction1269 +Node: Invoking lziprecover4646 +Ref: --trailing-error5296 +Node: Data safety12788 +Node: Repairing files14712 +Node: Merging files16635 +Node: File names19397 +Node: File format19861 +Node: Trailing data22289 +Node: Examples24195 +Ref: concat-example24626 +Ref: ddrescue-example25727 +Node: Unzcrash27017 +Node: Problems32021 +Node: Concept index32573 End Tag Table diff --git a/doc/lziprecover.texi b/doc/lziprecover.texi index 2702d70..ae3be14 100644 --- a/doc/lziprecover.texi +++ b/doc/lziprecover.texi @@ -6,8 +6,8 @@ @finalout @c %**end of header -@set UPDATED 12 May 2016 -@set VERSION 1.18 +@set UPDATED 10 April 2017 +@set VERSION 1.19 @dircategory Data Compression @direntry @@ -50,7 +50,7 @@ This manual is for Lziprecover (version @value{VERSION}, @value{UPDATED}). @end menu @sp 1 -Copyright @copyright{} 2009-2016 Antonio Diaz Diaz. +Copyright @copyright{} 2009-2017 Antonio Diaz Diaz. This manual is free documentation: you have unlimited permission to copy, distribute and modify it. @@ -61,10 +61,13 @@ to copy, distribute and modify it. @cindex introduction Lziprecover is a data recovery tool and decompressor for files in the -lzip compressed data format (.lz), able to repair slightly damaged -files, produce a correct file by merging the good parts of two or more -damaged copies, extract data from damaged files, decompress files and -test integrity of files. +lzip compressed data format (.lz). Lziprecover is able to repair +slightly damaged files, produce a correct file by merging the good parts +of two or more damaged copies, extract data from damaged files, +decompress files and test integrity of files. + +Lziprecover provides random access to the data in multimember files; it +only decompresses the members containing the desired data. Lziprecover is not a replacement for regular backups, but a last line of defense for the case where the backups are also damaged. @@ -83,10 +86,10 @@ copies of a file. @xref{Data safety}. @item The lzip format is as simple as possible (but not simpler). The lzip -manual provides the code of a simple decompressor along with a detailed -explanation of how it works, so that with the only help of the lzip -manual it would be possible for a digital archaeologist to extract the -data from a lzip file long after quantum computers eventually render +manual provides the source code of a simple decompressor along with a +detailed explanation of how it works, so that with the only help of the +lzip manual it would be possible for a digital archaeologist to extract +the data from a lzip file long after quantum computers eventually render LZMA obsolete. @item @@ -120,12 +123,6 @@ garbage data may be produced at the end of each member): lziprecover -D0 -i -o file -q file.lz @end example -Lziprecover provides random access to the data in multimember files; it -only decompresses the members containing the desired data. - -Lziprecover can print correct total file sizes and ratios even for -multimember files. - When recovering data, lziprecover takes as arguments the names of the damaged files and writes zero or more recovered files depending on the operation selected and whether the recovery succeeded or not. The @@ -134,6 +131,10 @@ damaged files themselves are never modified. When decompressing or testing file integrity, lziprecover behaves like lzip or lunzip. +LANGUAGE NOTE: Uncompressed = not compressed = plain data; it may never +have been compressed. Decompressed is used to refer to data which have +undergone the process of decompression. + @node Invoking lziprecover @chapter Invoking lziprecover @@ -235,9 +236,17 @@ Keep (don't delete) input files during decompression. @item -l @itemx --list -Print total file sizes and ratios. The values produced are correct even -for multimember files. Use it together with @samp{-v} to see information -about the members in the file. +Print the uncompressed size, compressed size and percentage saved of the +specified file(s). Trailing data are ignored. The values produced are +correct even for multimember files. If more than one file is given, a +final line containing the cumulative sizes is printed. With @samp{-v}, +the dictionary size, the number of members in the file, and the amount +of trailing data (if any) are also printed. With @samp{-vv}, the +positions and sizes of each member in multimember files are also +printed. @samp{-lq} can be used to verify quickly (without +decompressing) the structural integrity of the specified files. (Use +@samp{--test} to verify the data integrity). @samp{-alq} additionally +verifies that none of the specified files contain trailing data. @item -m @itemx --merge @@ -259,14 +268,13 @@ file. If converting a lzma-alone file from standard input and name of the converted file. (Or plain @samp{@var{file}} if it already ends in @samp{.lz} or @samp{.tlz}). - @item -q @itemx --quiet Quiet operation. Suppress all messages. @item -R @itemx --repair -Try to repair a file with small errors (up to one byte error per +Try to repair a file with small errors (up to one single-byte error per member). If successful, a repaired copy is written to the file @samp{@var{file}_fixed.lz}. @samp{@var{file}} is not modified at all. The exit status is 0 if the file could be repaired, 2 otherwise. See the @@ -292,8 +300,8 @@ on the number of members in @samp{@var{file}}. Check integrity of the specified file(s), but don't decompress them. This really performs a trial decompression and throws away the result. Use it together with @samp{-v} to see information about the file(s). If -a file fails the test, lziprecover continues checking the rest of the -files. +a file fails the test, does not exist, can't be opened, or is a +terminal, lziprecover continues checking the rest of the files. @item -v @itemx --verbose @@ -301,7 +309,10 @@ Verbose mode.@* When decompressing or testing, further -v's (up to 4) increase the verbosity level, showing status, compression ratio, dictionary size, trailer contents (CRC, data size, member size), and up to 6 bytes of -trailing data (if any). +trailing data (if any) both in hexadecimal and as a string of printable +ASCII characters.@* +In other modes, increasing verbosity levels show final status, progress +of operations, and extra information (for example, the failed areas). @end table @@ -349,7 +360,7 @@ The only remedy for total device failure is storing backup copies in separate media. How does lzip compare with gzip and bzip2 with respect to data safety? -Lets suppose that you made a backup of your valuable scientific data, +Let's suppose that you made a backup of your valuable scientific data, compressed it, and stored two copies on separate media. Years later you notice that both copies are corrupt. @@ -393,11 +404,12 @@ Bit-flip happens when one bit in the file is changed from 0 to 1 or vice versa. It may be caused by bad RAM or even by natural radiation. I have seen a case of bit-flip in a file stored on an USB flash drive. -One byte may seem small, but most file corruptions not produced by I/O -errors just affect one byte, or even one bit, of the file. Also, unlike -magnetic media, where errors usually affect a whole sector, solid-state -storage devices tend to produce single-byte errors, making of lzip the -perfect format for data stored on such devices. +One byte may seem small, but most file corruptions not produced by +transmission errors or I/O errors just affect one byte, or even one bit, +of the file. Also, unlike magnetic media, where errors usually affect a +whole sector, solid-state storage devices tend to produce single-byte +errors, making of lzip the perfect format for data stored on such +devices. Repairing a file can take some time. Small files or files with the error located near the beginning can be repaired in a few seconds. But @@ -426,11 +438,11 @@ The merge will fail if the damaged areas overlap (at least one byte is damaged in all copies), or are adjacent and the boundary can't be determined, or if the copies have too many damaged areas. -All the copies must have the same size. If any of them is larger or -smaller than it should, either because it has been truncated or because -it got some garbage data appended at the end, it can be brought to the -correct size with the following command before merging it with the other -copies: +All the copies to be merged must have the same size. If any of them is +larger or smaller than it should, either because it has been truncated +or because it got some garbage data appended at the end, it can be +brought to the correct size with the following command before merging it +with the other copies: @example ddrescue -s<correct_size> -x<correct_size> file.lz correct_size_file.lz @@ -444,6 +456,31 @@ few MB) with small errors (one sector damaged per copy), the probability approaches 100 percent even with only two copies. (Supposing that the errors are randomly located inside each copy). +Some types of solid-state device (NAND flash, for example) can produce +bursts of scattered single-bit errors. Lziprecover is able to merge +files with thousands of such scattered errors by grouping the errors +into clusters and then merging the files as if each cluster were a +single error. + +Here is a real case of successful merging. Two copies of the file +@samp{icecat-3.5.3-x86.tar.lz} (compressed size 9 MB) became corrupt +while stored on the same NAND flash device. One of the copies had 76 +single-bit errors scattered in an area of 1020 bytes, and the other had +3028 such errors in an area of 31729 bytes. Lziprecover produced a +correct file, identical to the original, in just 5 seconds: + +@example +$ lziprecover -vvm a/icecat-3.5.3-x86.tar.lz b/icecat-3.5.3-x86.tar.lz +Merging member 1 of 1 (2552 errors) + 2552 errors have been grouped in 16 clusters. + Trying variation 2 of 2, block 2 +Input files merged successfully. +@end example + +Note that the number of errors reported by lziprecover (2552) is lower +than the number of corrupt bytes (3104) because contiguous corrupt bytes +are counted as a single multibyte error. + @node File names @chapter Names of the files produced by lziprecover @@ -543,20 +580,24 @@ facilitates safe recovery of undamaged members from multimember files. @chapter Extra data appended to the file @cindex trailing data -Sometimes extra data is found appended to a lzip file after the last +Sometimes extra data are found appended to a lzip file after the last member. Such trailing data may be: @itemize @bullet @item Padding added to make the file size a multiple of some block size, for -example when writing to a tape. +example when writing to a tape. It is safe to append any amount of +padding zero bytes to a lzip file. @item -Garbage added by some not totally successful copy operation. +Useful data added by the user; a cryptographically secure hash, a +description of file contents, etc. It is safe to append any amount of +text to a lzip file as long as the text does not begin with the string +"LZIP", and does not contain any zero bytes (null characters). Nonzero +bytes and zero bytes can't be safely mixed in trailing data. @item -Useful data added by the user; a cryptographically secure hash, a -description of file contents, etc. +Garbage added by some not totally successful copy operation. @item Malicious data added to the file in order to make its total size and @@ -571,8 +612,12 @@ integrity information itself. Therefore it can be considered to be below the noise level. @end itemize +Trailing data are in no way part of the lzip file format, but tools +reading lzip files are expected to behave as correctly and usefully as +possible in the presence of trailing data. + Trailing data can be safely ignored in most cases. In some cases, like -that of user-added data, it is expected to be ignored. In those cases +that of user-added data, they are expected to be ignored. In those cases where a file containing trailing data must be rejected, the option @samp{--trailing-error} can be used. @xref{--trailing-error}. @@ -601,8 +646,8 @@ lziprecover -tv file.lz @sp 1 @anchor{concat-example} @noindent -Example 3: The right way of concatenating compressed files. -@xref{Trailing data}. +Example 3: The right way of concatenating the decompressed output of two +or more compressed files. @xref{Trailing data}. @example Don't do this @@ -753,7 +798,6 @@ See @uref{http://www.nongnu.org/zutils/manual/zutils_manual.html#Zcmp,,zcmp} @end ifhtml - The format for running unzcrash is: @example @@ -800,6 +844,16 @@ to 512 bytes. @var{value} defaults to 0. Test only one of every @var{n} bytes, blocks or truncation sizes, instead of all of them. +@item -e @var{position},@var{value} +@itemx --set-byte=@var{position},@var{value} +Set byte at @var{position} to @var{value} in the internal buffer after +reading and testing @var{filename}.lz but before the first test call to +the decompressor. If @var{value} is preceded by @samp{+}, it is added to +the original value of the byte at @var{position}. If @var{value} is +preceded by @samp{f} (flip), it is XORed with the original value of the +byte at @var{position}. This option can be used to run tests with a +changed dictionary size, for example. + @item -p @var{bytes} @itemx --position=@var{bytes} First byte position to test in the file. Defaults to 0. Negative values diff --git a/file_index.cc b/file_index.cc index 747c376..f2f81e7 100644 --- a/file_index.cc +++ b/file_index.cc @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2016 Antonio Diaz Diaz. + Copyright (C) 2009-2017 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -15,6 +15,8 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#define _FILE_OFFSET_BITS 64 + #include <algorithm> #include <cerrno> #include <cstdio> @@ -44,17 +46,75 @@ void File_index::set_errno_error( const char * const msg ) retval_ = 1; } -void File_index::set_num_error( const char * const msg1, unsigned long long num, - const char * const msg2 ) +void File_index::set_num_error( const char * const msg, unsigned long long num ) { char buf[80]; - snprintf( buf, sizeof buf, "%s%llu%s", msg1, num, msg2 ); + snprintf( buf, sizeof buf, "%s%llu", msg, num ); error_ = buf; retval_ = 2; } -File_index::File_index( const int infd ) +// If successful, push last member and set pos to member header. +bool File_index::skip_trailing_data( const int fd, const bool ignore_bad_ds, + long long & pos ) + { + enum { block_size = 16384, + buffer_size = block_size + File_trailer::size - 1 + File_header::size }; + uint8_t buffer[buffer_size]; + if( pos < min_member_size ) return false; + int bsize = pos % block_size; // total bytes in buffer + if( bsize <= buffer_size - block_size ) bsize += block_size; + int search_size = bsize; // bytes to search for trailer + int rd_size = bsize; // bytes to read from file + unsigned long long ipos = pos - rd_size; // aligned to block_size + + while( true ) + { + if( seek_read( fd, buffer, rd_size, ipos ) != rd_size ) + { set_errno_error( "Error seeking member trailer: " ); return false; } + const uint8_t max_msb = ( ipos + search_size ) >> 56; + for( int i = search_size; i >= File_trailer::size; --i ) + if( buffer[i-1] <= max_msb ) // most significant byte of member_size + { + File_trailer & trailer = + *(File_trailer *)( buffer + i - File_trailer::size ); + const unsigned long long member_size = trailer.member_size(); + if( member_size == 0 ) + { while( i > File_trailer::size && buffer[i-9] == 0 ) --i; continue; } + if( member_size < min_member_size || member_size > ipos + i ) + continue; + File_header header; + if( seek_read( fd, header.data, File_header::size, + ipos + i - member_size ) != File_header::size ) + { set_errno_error( "Error reading member header: " ); return false; } + const unsigned dictionary_size = header.dictionary_size(); + if( !header.verify_magic() || !header.verify_version() || + ( !ignore_bad_ds && !isvalid_ds( dictionary_size ) ) ) continue; + if( (*(File_header *)( buffer + i )).verify_prefix( bsize - i ) ) + { + error_ = "Last member in input file is truncated or corrupt."; + retval_ = 2; return false; + } + pos = ipos + i - member_size; + member_vector.push_back( Member( 0, trailer.data_size(), pos, + member_size, dictionary_size ) ); + return true; + } + if( ipos <= 0 ) + { set_num_error( "Member size in trailer is corrupt at pos ", pos - 8 ); + return false; } + bsize = buffer_size; + search_size = bsize - File_header::size; + rd_size = block_size; + ipos -= rd_size; + std::memcpy( buffer + rd_size, buffer, buffer_size - rd_size ); + } + } + + +File_index::File_index( const int infd, const bool ignore_bad_ds, + const bool ignore_trailing ) : isize( lseek( infd, 0, SEEK_END ) ), retval_( 0 ) { if( isize < 0 ) @@ -69,48 +129,46 @@ File_index::File_index( const int infd ) if( seek_read( infd, header.data, File_header::size, 0 ) != File_header::size ) { set_errno_error( "Error reading member header: " ); return; } if( !header.verify_magic() ) - { error_ = "Bad magic number (file not in lzip format)."; - retval_ = 2; return; } + { error_ = bad_magic_msg; retval_ = 2; return; } if( !header.verify_version() ) - { set_num_error( "Version ", header.version(), - " member format not supported." ); return; } + { error_ = bad_version( header.version() ); retval_ = 2; return; } + if( !ignore_bad_ds && !isvalid_ds( header.dictionary_size() ) ) + { error_ = bad_dict_msg; retval_ = 2; return; } - long long pos = isize; // always points to a header or to EOF + long long pos = isize; // always points to a header or to EOF while( pos >= min_member_size ) { File_trailer trailer; if( seek_read( infd, trailer.data, File_trailer::size, pos - File_trailer::size ) != File_trailer::size ) { set_errno_error( "Error reading member trailer: " ); break; } - const long long member_size = trailer.member_size(); - if( member_size < min_member_size || member_size > pos ) + const unsigned long long member_size = trailer.member_size(); + if( member_size < min_member_size || member_size > (unsigned long long)pos ) { - if( member_vector.empty() ) - { --pos; continue; } // maybe trailing data - set_num_error( "Member size in trailer is corrupt at pos ", pos - 8 ); + if( !member_vector.empty() ) + set_num_error( "Member size in trailer is corrupt at pos ", pos - 8 ); + else if( skip_trailing_data( infd, ignore_bad_ds, pos ) ) + { if( ignore_trailing ) continue; + error_ = trailing_msg; retval_ = 2; return; } break; } if( seek_read( infd, header.data, File_header::size, pos - member_size ) != File_header::size ) { set_errno_error( "Error reading member header: " ); break; } - if( !header.verify_magic() || !header.verify_version() ) - { - if( member_vector.empty() ) - { --pos; continue; } // maybe trailing data - set_num_error( "Bad header at pos ", pos - member_size ); - break; - } const unsigned dictionary_size = header.dictionary_size(); - if( member_vector.empty() && isize - pos > File_header::size && - seek_read( infd, header.data, File_header::size, pos ) == File_header::size && - header.verify_magic() && header.verify_version() ) + if( !header.verify_magic() || !header.verify_version() || + ( !ignore_bad_ds && !isvalid_ds( dictionary_size ) ) ) { - error_ = "Last member in input file is truncated or corrupt."; - retval_ = 2; break; + if( !member_vector.empty() ) + set_num_error( "Bad header at pos ", pos - member_size ); + else if( skip_trailing_data( infd, ignore_bad_ds, pos ) ) + { if( ignore_trailing ) continue; + error_ = trailing_msg; retval_ = 2; return; } + break; } pos -= member_size; - member_vector.push_back( Member( 0, trailer.data_size(), - pos, member_size, dictionary_size ) ); + member_vector.push_back( Member( 0, trailer.data_size(), pos, + member_size, dictionary_size ) ); } if( pos != 0 || member_vector.empty() ) { @@ -157,13 +215,12 @@ File_index::File_index( const std::vector< int > & infd_vector, if( header.verify_magic() && header.verify_version() ) done = true; } if( !done ) - { error_ = "Bad magic number (file not in lzip format)."; - retval_ = 2; return; } + { error_ = bad_magic_msg; retval_ = 2; return; } long long pos = isize; // always points to a header or to EOF while( pos >= min_member_size ) { - long long member_size; + unsigned long long member_size; File_trailer trailer; done = false; for( int it = 0; it < files && !done; ++it ) @@ -173,7 +230,7 @@ File_index::File_index( const std::vector< int > & infd_vector, pos - File_trailer::size ) != File_trailer::size ) { set_errno_error( "Error reading member trailer: " ); goto error; } member_size = trailer.member_size(); - if( member_size >= min_member_size && member_size <= pos ) + if( member_size >= min_member_size && member_size <= (unsigned long long)pos ) for( int ih = 0; ih < files && !done; ++ih ) { const int hfd = infd_vector[ih]; @@ -185,22 +242,24 @@ File_index::File_index( const std::vector< int > & infd_vector, } if( !done ) { - if( member_vector.empty() ) // maybe trailing data - { --pos; continue; } + if( member_vector.empty() ) { --pos; continue; } // maybe trailing data set_num_error( "Member size in trailer may be corrupt at pos ", pos - 8 ); break; } - if( member_vector.empty() && isize - pos > File_header::size ) + if( member_vector.empty() && isize > pos ) + { + const int size = std::min( (long long)File_header::size, isize - pos ); for( int i = 0; i < files; ++i ) { const int infd = infd_vector[i]; - if( seek_read( infd, header.data, File_header::size, pos ) == File_header::size && - header.verify_magic() && header.verify_version() ) + if( seek_read( infd, header.data, size, pos ) == size && + header.verify_prefix( size ) ) { error_ = "Last member in input file is truncated or corrupt."; retval_ = 2; goto error; } } + } pos -= member_size; member_vector.push_back( Member( 0, trailer.data_size(), pos, member_size, 0 ) ); diff --git a/file_index.h b/file_index.h index 2b127c3..71e9852 100644 --- a/file_index.h +++ b/file_index.h @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2016 Antonio Diaz Diaz. + Copyright (C) 2009-2017 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -36,12 +36,14 @@ class File_index int retval_; void set_errno_error( const char * const msg ); - void set_num_error( const char * const msg1, unsigned long long num, - const char * const msg2 = "" ); + void set_num_error( const char * const msg, unsigned long long num ); + bool skip_trailing_data( const int fd, const bool ignore_bad_ds, + long long & pos ); public: File_index() : error_( "No index" ), isize( 0 ), retval_( 2 ) {} - explicit File_index( const int infd ); + File_index( const int infd, const bool ignore_bad_ds, + const bool ignore_trailing ); File_index( const std::vector< int > & infd_vector, const long long fsize ); long members() const { return member_vector.size(); } @@ -58,13 +60,13 @@ public: } bool operator!=( const File_index & fi ) const { return !( *this == fi ); } - long long data_end() const - { if( member_vector.size() ) return member_vector.back().dblock.end(); - else return 0; } + long long udata_size() const + { if( member_vector.empty() ) return 0; + return member_vector.back().dblock.end(); } - long long file_end() const - { if( member_vector.size() ) return member_vector.back().mblock.end(); - else return 0; } + long long cdata_size() const + { if( member_vector.empty() ) return 0; + return member_vector.back().mblock.end(); } // total size including trailing data (if any) long long file_size() const @@ -0,0 +1,122 @@ +/* Lziprecover - Data recovery tool for the lzip format + Copyright (C) 2009-2017 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#define _FILE_OFFSET_BITS 64 + +#include <cstdio> +#include <cstring> +#include <string> +#include <vector> +#include <stdint.h> +#include <unistd.h> +#include <sys/stat.h> + +#include "lzip.h" +#include "block.h" +#include "file_index.h" + + +namespace { + +void list_line( const unsigned long long uncomp_size, + const unsigned long long comp_size, + const char * const input_filename ) + { + if( uncomp_size > 0 ) + std::printf( "%15llu %15llu %6.2f%% %s\n", uncomp_size, comp_size, + 100.0 * ( 1.0 - ( (double)comp_size / uncomp_size ) ), + input_filename ); + else + std::printf( "%15llu %15llu -INF%% %s\n", uncomp_size, comp_size, + input_filename ); + } + +} // end namespace + + +int list_files( const std::vector< std::string > & filenames, + const int verbosity, const bool ignore_trailing ) + { + unsigned long long total_comp = 0, total_uncomp = 0; + int files = 0, retval = 0; + bool first_post = true; + bool stdin_used = false; + for( unsigned i = 0; i < filenames.size(); ++i ) + { + const bool from_stdin = ( filenames[i] == "-" ); + if( from_stdin ) { if( stdin_used ) continue; else stdin_used = true; } + const char * const input_filename = + from_stdin ? "(stdin)" : filenames[i].c_str(); + struct stat in_stats; // not used + const int infd = from_stdin ? STDIN_FILENO : + open_instream( input_filename, &in_stats, true, true ); + if( infd < 0 ) { if( retval < 1 ) retval = 1; continue; } + + const File_index file_index( infd, false, ignore_trailing ); + close( infd ); + if( file_index.retval() != 0 ) + { + show_file_error( input_filename, file_index.error().c_str() ); + if( retval < file_index.retval() ) retval = file_index.retval(); + continue; + } + if( verbosity >= 0 ) + { + const unsigned long long udata_size = file_index.udata_size(); + const unsigned long long cdata_size = file_index.cdata_size(); + total_comp += cdata_size; total_uncomp += udata_size; ++files; + if( first_post ) + { + first_post = false; + if( verbosity >= 1 ) std::fputs( " dict memb trail ", stdout ); + std::fputs( " uncompressed compressed saved name\n", stdout ); + } + if( verbosity >= 1 ) + { + unsigned dictionary_size = 0; + for( long i = 0; i < file_index.members(); ++i ) + dictionary_size = + std::max( dictionary_size, file_index.dictionary_size( i ) ); + const long long trailing_size = file_index.file_size() - cdata_size; + std::printf( "%s %5ld %6lld ", format_ds( dictionary_size ), + file_index.members(), trailing_size ); + } + list_line( udata_size, cdata_size, input_filename ); + + if( verbosity >= 2 && file_index.members() > 1 ) + { + std::fputs( " member data_pos data_size member_pos member_size\n", stdout ); + for( long i = 0; i < file_index.members(); ++i ) + { + const Block & db = file_index.dblock( i ); + const Block & mb = file_index.mblock( i ); + std::printf( "%5ld %15llu %15llu %15llu %15llu\n", + i + 1, db.pos(), db.size(), mb.pos(), mb.size() ); + } + first_post = true; // reprint heading after list of members + } + std::fflush( stdout ); + } + } + if( verbosity >= 0 && files > 1 ) + { + if( verbosity >= 1 ) std::fputs( " ", stdout ); + list_line( total_uncomp, total_comp, "(totals)" ); + std::fflush( stdout ); + } + return retval; + } @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2016 Antonio Diaz Diaz. + Copyright (C) 2009-2017 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -30,8 +30,11 @@ public: static const int next[states] = { 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5 }; st = next[st]; } - void set_char1() { st -= ( st < 4 ) ? st : 3; } // for st < 7 - void set_char2() { st -= ( st < 10 ) ? 3 : 6; } // for st >= 7 + bool is_char_set_char() + { + if( st < 7 ) { st -= ( st < 4 ) ? st : 3; return true; } + else { st -= ( st < 10 ) ? 3 : 6; return false; } + } void set_match() { st = ( st < 7 ) ? 7 : 10; } void set_rep() { st = ( st < 7 ) ? 8 : 11; } void set_short_rep() { st = ( st < 7 ) ? 9 : 11; } @@ -168,8 +171,10 @@ public: void update_buf( uint32_t & crc, const uint8_t * const buffer, const int size ) const { + uint32_t c = crc; for( int i = 0; i < size; ++i ) - crc = data[(crc^buffer[i])&0xFF] ^ ( crc >> 8 ); + c = data[(c^buffer[i])&0xFF] ^ ( c >> 8 ); + crc = c; } }; @@ -227,7 +232,7 @@ struct File_header { const unsigned base_size = 1 << data[5]; const unsigned fraction = base_size / 16; - for( int i = 7; i >= 1; --i ) + for( unsigned i = 7; i >= 1; --i ) if( base_size - ( i * fraction ) >= sz ) { data[5] |= ( i << 5 ); break; } } @@ -276,6 +281,23 @@ struct File_trailer }; +struct Bad_byte + { + enum Mode { literal, delta, flip }; + long long pos; + Mode mode; + uint8_t value; + + Bad_byte() : pos( -1 ), mode( literal ), value( 0 ) {} + uint8_t operator()( const uint8_t old_value ) const + { + if( mode == delta ) return old_value + value; + if( mode == flip ) return old_value ^ value; + return value; + } + }; + + struct Error { const char * const msg; @@ -288,6 +310,10 @@ inline unsigned long long positive_diff( const unsigned long long x, { return ( ( x > y ) ? x - y : 0 ); } +const char * const bad_magic_msg = "Bad magic number (file not in lzip format)."; +const char * const bad_dict_msg = "Invalid dictionary size in member header."; +const char * const trailing_msg = "Trailing data not allowed."; + // defined in alone_to_lz.cc int alone_to_lz( const int infd, const Pretty_print & pp ); @@ -299,10 +325,17 @@ long writeblock( const int fd, const uint8_t * const buf, const long size ); int seek_read( const int fd, uint8_t * const buf, const int size, const long long pos ); +// defined in list.cc +int list_files( const std::vector< std::string > & filenames, + const int verbosity, const bool ignore_trailing ); + // defined in main.cc extern std::string output_filename; // global vars for output file extern int outfd; - +struct stat; +const char * bad_version( const unsigned version ); +const char * format_ds( const unsigned dictionary_size ); +void show_header( const unsigned dictionary_size, const int vlevel = 3 ); int open_instream( const char * const name, struct stat * const in_statsp, const bool no_ofile, const bool reg_only = false ); bool open_outstream( const bool force, const bool from_stdin, @@ -311,9 +344,10 @@ bool file_exists( const std::string & filename ); void cleanup_and_fail( const int retval ); int close_outstream( const struct stat * const in_statsp ); std::string insert_fixed( std::string name ); -void show_header( const unsigned dictionary_size, const int vlevel = 3 ); void show_error( const char * const msg, const int errcode = 0, const bool help = false ); +void show_file_error( const char * const filename, const char * const msg, + const int errcode = 0 ); void internal_error( const char * const msg ); void show_error2( const char * const msg1, const char * const name, const char * const msg2 ); @@ -330,22 +364,17 @@ int merge_files( const std::vector< std::string > & filenames, const int verbosity, const bool force ); // defined in range_dec.cc -const char * format_num( unsigned long long num, - unsigned long long limit = -1ULL, - const int set_prefix = 0 ); bool safe_seek( const int fd, const long long pos ); -int list_files( const std::vector< std::string > & filenames, - const int verbosity ); // defined in repair.cc int repair_file( const std::string & input_filename, const std::string & default_output_filename, const int verbosity, const bool force ); -int debug_repair( const std::string & input_filename, const long long bad_pos, - const int verbosity, const uint8_t bad_value ); +int debug_repair( const std::string & input_filename, + const Bad_byte & bad_byte, const int verbosity ); int debug_decompress( const std::string & input_filename, - const long long bad_pos, const int verbosity, - const uint8_t bad_value, const bool show_packets ); + const Bad_byte & bad_byte, const int verbosity, + const bool show_packets ); // defined in split.cc bool verify_header( const File_header & header, const Pretty_print & pp ); @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2016 Antonio Diaz Diaz. + Copyright (C) 2009-2017 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -73,10 +73,10 @@ namespace { const char * const Program_name = "Lziprecover"; const char * const program_name = "lziprecover"; -const char * const program_year = "2016"; +const char * const program_year = "2017"; const char * invocation_name = 0; -struct { const char * from; const char * to; } const known_extensions[] = { +const struct { const char * from; const char * to; } known_extensions[] = { { ".lz", "" }, { ".tlz", ".tar" }, { 0, 0 } }; @@ -99,6 +99,8 @@ void show_help() "\nLziprecover can also produce a correct file by merging the good parts of\n" "two or more damaged copies, extract data from damaged files, decompress\n" "files and test integrity of files.\n" + "\nLziprecover provides random access to the data in multimember files; it\n" + "only decompresses the members containing the desired data.\n" "\nLziprecover is not a replacement for regular backups, but a last line of\n" "defense for the case where the backups are also damaged.\n" "\nUsage: %s [options] [files]\n", invocation_name ); @@ -113,7 +115,7 @@ void show_help() " -f, --force overwrite existing output files\n" " -i, --ignore-errors make '--range-decompress' ignore data errors\n" " -k, --keep keep (don't delete) input files\n" - " -l, --list print total file sizes and ratios\n" + " -l, --list print (un)compressed file sizes\n" " -m, --merge correct errors in file using several copies\n" " -o, --output=<file> place the output into <file>\n" " -q, --quiet suppress all messages\n" @@ -152,42 +154,64 @@ void show_version() } // end namespace +const char * bad_version( const unsigned version ) + { + static char buf[80]; + snprintf( buf, sizeof buf, "Version %u member format not supported.", + version ); + return buf; + } + + +const char * format_ds( const unsigned dictionary_size ) + { + enum { bufsize = 16, factor = 1024 }; + static char buf[bufsize]; + const char * const prefix[8] = + { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" }; + const char * p = ""; + const char * np = " "; + unsigned num = dictionary_size; + bool exact = ( num % factor == 0 ); + + for( int i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i ) + { num /= factor; if( num % factor != 0 ) exact = false; + p = prefix[i]; np = ""; } + snprintf( buf, bufsize, "%s%4u %sB", np, num, p ); + return buf; + } + + void show_header( const unsigned dictionary_size, const int vlevel ) { if( verbosity >= vlevel ) - { - const char * const prefix[8] = - { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" }; - enum { factor = 1024 }; - const char * p = ""; - const char * np = " "; - unsigned num = dictionary_size; - bool exact = ( num % factor == 0 ); - - for( int i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i ) - { num /= factor; if( num % factor != 0 ) exact = false; - p = prefix[i]; np = ""; } - std::fprintf( stderr, "dictionary size %s%4u %sB. ", np, num, p ); - } + std::fprintf( stderr, "dictionary %s. ", format_ds( dictionary_size ) ); } namespace { -// Returns the number of chars read, or 0 if error. -// -int parse_long_long( const char * const ptr, long long & value ) +long long getnum( const char * const ptr, const long long llimit = -LLONG_MAX, + const long long ulimit = LLONG_MAX, + const char ** const tailp = 0 ) { char * tail; errno = 0; - value = strtoll( ptr, &tail, 0 ); - if( tail == ptr || errno || value < 0 ) return 0; - int c = tail - ptr; + long long result = strtoll( ptr, &tail, 0 ); + if( tail == ptr ) + { + show_error( "Bad or missing numerical argument.", 0, true ); + std::exit( 1 ); + } - if( ptr[c] ) + if( !errno && tail[0] ) { - const int factor = ( ptr[c+1] == 'i' ) ? 1024 : 1000; - int exponent = 0; - switch( ptr[c] ) + char * const p = tail++; + int factor; + bool bsuf; // 'B' suffix is present + if( tail[0] == 'i' ) { ++tail; factor = 1024; } else factor = 1000; + if( tail[0] == 'B' ) { ++tail; bsuf = true; } else bsuf = false; + int exponent = -1; // -1 = bad multiplier + switch( *p ) { case 'Y': exponent = 8; break; case 'Z': exponent = 7; break; @@ -196,22 +220,30 @@ int parse_long_long( const char * const ptr, long long & value ) case 'T': exponent = 4; break; case 'G': exponent = 3; break; case 'M': exponent = 2; break; - case 'K': if( factor == 1024 ) exponent = 1; else return 0; break; - case 'k': if( factor == 1000 ) exponent = 1; else return 0; break; + case 'K': if( factor == 1024 ) exponent = 1; break; + case 'k': if( factor == 1000 ) exponent = 1; break; + case 'B': if( factor == 1000 && !bsuf ) exponent = 0; break; + default : if( tailp ) { tail = p; exponent = 0; } break; } - if( exponent > 0 ) + if( exponent < 0 ) { - ++c; - if( ptr[c] == 'i' ) { ++c; if( value ) format_num( 0, 0, -1 ); } - if( ptr[c] == 'B' ) ++c; - for( int i = 0; i < exponent; ++i ) - { - if( INT64_MAX / factor >= value ) value *= factor; - else return 0; - } + show_error( "Bad multiplier in numerical argument.", 0, true ); + std::exit( 1 ); + } + for( int i = 0; i < exponent; ++i ) + { + if( LLONG_MAX / factor >= std::labs( result ) ) result *= factor; + else { errno = ERANGE; break; } } } - return c; + if( !errno && ( result < llimit || result > ulimit ) ) errno = ERANGE; + if( errno ) + { + show_error( "Numerical argument out of limits." ); + std::exit( 1 ); + } + if( tailp ) *tailp = tail; + return result; } @@ -219,17 +251,16 @@ int parse_long_long( const char * const ptr, long long & value ) // void parse_range( const char * const ptr, Block & range ) { - long long value = 0; - const bool size_only = ( ptr[0] == ',' ); - int c = size_only ? 0 : parse_long_long( ptr, value ); // pos - if( size_only || ( c && value >= 0 && value < INT64_MAX && - ( ptr[c] == 0 || ptr[c] == ',' || ptr[c] == '-' ) ) ) + const char * tail = ptr; + long long value = + ( ptr[0] == ',' ) ? 0 : getnum( ptr, 0, INT64_MAX - 1, &tail ); + if( tail[0] == 0 || tail[0] == ',' || tail[0] == '-' ) { range.pos( value ); - if( ptr[c] == 0 ) { range.size( INT64_MAX - value ); return; } - const bool issize = ( ptr[c] == ',' ); - c = parse_long_long( ptr + c + 1, value ); // size - if( c && value > 0 && ( issize || value > range.pos() ) ) + if( tail[0] == 0 ) { range.size( INT64_MAX - value ); return; } + const bool issize = ( tail[0] == ',' ); + value = getnum( tail + 1, 1, INT64_MAX ); // size + if( issize || value > range.pos() ) { if( !issize ) value -= range.pos(); if( INT64_MAX - range.pos() >= value ) { range.size( value ); return; } @@ -240,21 +271,21 @@ void parse_range( const char * const ptr, Block & range ) } -// Recognized format: <pos>,<value> +// Recognized formats: <pos>,<value> <pos>,+<value> <pos>,f<value> // -void parse_pos_value( const char * const ptr, long long & pos, uint8_t & value ) +void parse_pos_value( const char * const ptr, Bad_byte & bad_byte ) { - long long val = 0; - int c = parse_long_long( ptr, val ); // pos - if( c && val >= 0 && val < INT64_MAX && ptr[c] == ',' ) + const char * tail; + bad_byte.pos = getnum( ptr, 0, INT64_MAX, &tail ); + if( tail[0] != ',' ) { - pos = val; - c = parse_long_long( ptr + c + 1, val ); // value - if( c && val >= 0 && val < 256 ) - { value = val; return; } + show_error( "Bad separator between <pos> and <val>.", 0, true ); + std::exit( 1 ); } - show_error( "Bad file position or byte value.", 0, true ); - std::exit( 1 ); + if( tail[1] == '+' ) { ++tail; bad_byte.mode = Bad_byte::delta; } + else if( tail[1] == 'f' ) { ++tail; bad_byte.mode = Bad_byte::flip; } + else bad_byte.mode = Bad_byte::literal; + bad_byte.value = getnum( tail + 1, 0, 255 ); } @@ -281,12 +312,12 @@ void set_mode( Mode & program_mode, const Mode new_mode ) int extension_index( const std::string & name ) { - for( int i = 0; known_extensions[i].from; ++i ) + for( int eindex = 0; known_extensions[eindex].from; ++eindex ) { - const std::string ext( known_extensions[i].from ); + const std::string ext( known_extensions[eindex].from ); if( name.size() > ext.size() && name.compare( name.size() - ext.size(), ext.size(), ext ) == 0 ) - return i; + return eindex; } return -1; } @@ -298,11 +329,7 @@ int open_instream( const char * const name, struct stat * const in_statsp, { int infd = open( name, O_RDONLY | O_BINARY ); if( infd < 0 ) - { - if( verbosity >= 0 ) - std::fprintf( stderr, "%s: Can't open input file '%s': %s\n", - program_name, name, std::strerror( errno ) ); - } + show_file_error( name, "Can't open input file", errno ); else { const int i = fstat( infd, in_statsp ); @@ -338,15 +365,15 @@ void set_a_outname( const std::string & name ) } -void set_d_outname( const std::string & name, const int i ) +void set_d_outname( const std::string & name, const int eindex ) { - if( i >= 0 ) + if( eindex >= 0 ) { - const std::string from( known_extensions[i].from ); + const std::string from( known_extensions[eindex].from ); if( name.size() > from.size() ) { output_filename.assign( name, 0, name.size() - from.size() ); - output_filename += known_extensions[i].to; + output_filename += known_extensions[eindex].to; return; } } @@ -398,7 +425,8 @@ bool file_exists( const std::string & filename ) } -bool check_tty( const int infd, const Mode program_mode ) +bool check_tty( const char * const input_filename, const int infd, + const Mode program_mode ) { if( program_mode == m_alone_to_lz && isatty( outfd ) ) { @@ -407,7 +435,8 @@ bool check_tty( const int infd, const Mode program_mode ) } if( isatty( infd ) ) // all modes read compressed data { - show_error( "I won't read compressed data from a terminal.", 0, true ); + show_file_error( input_filename, + "I won't read compressed data from a terminal." ); return false; } return true; @@ -465,10 +494,10 @@ void close_and_set_permissions( const struct stat * const in_statsp ) } -unsigned char xdigit( const int value ) +unsigned char xdigit( const unsigned value ) { - if( value >= 0 && value <= 9 ) return '0' + value; - if( value >= 10 && value <= 15 ) return 'A' + value - 10; + if( value <= 9 ) return '0' + value; + if( value <= 15 ) return 'A' + value - 10; return 0; } @@ -482,26 +511,18 @@ bool show_trailing_data( const uint8_t * const data, const int size, std::string msg; if( !all ) msg = "first bytes of "; msg += "trailing data = "; - bool text = true; for( int i = 0; i < size; ++i ) - if( !std::isprint( data[i] ) ) { text = false; break; } - if( text ) - { - msg += '\''; - msg.append( (const char *)data, size ); - msg += '\''; - } - else { - for( int i = 0; i < size; ++i ) - { - if( i > 0 ) msg += ' '; - msg += xdigit( data[i] >> 4 ); - msg += xdigit( data[i] & 0x0F ); - } + msg += xdigit( data[i] >> 4 ); + msg += xdigit( data[i] & 0x0F ); + msg += ' '; } + msg += '\''; + for( int i = 0; i < size; ++i ) + { if( std::isprint( data[i] ) ) msg += data[i]; else msg += '.'; } + msg += '\''; pp( msg.c_str() ); - if( !ignore_trailing ) show_error( "Trailing data not allowed." ); + if( !ignore_trailing ) show_file_error( pp.name(), trailing_msg ); } return ignore_trailing; } @@ -532,22 +553,16 @@ int decompress( const int infd, const Pretty_print & pp, if( !header.verify_magic() ) { if( first_member ) - { pp( "Bad magic number (file not in lzip format)." ); retval = 2; } + { show_file_error( pp.name(), bad_magic_msg ); retval = 2; } else if( !show_trailing_data( header.data, size, pp, false, ignore_trailing ) ) retval = 2; break; } if( !header.verify_version() ) - { - if( verbosity >= 0 ) - { pp(); - std::fprintf( stderr, "Version %d member format not supported.\n", - header.version() ); } - retval = 2; break; - } + { pp( bad_version( header.version() ) ); retval = 2; break; } const unsigned dictionary_size = header.dictionary_size(); if( !isvalid_ds( dictionary_size ) ) - { pp( "Invalid dictionary size in member header." ); retval = 2; break; } + { pp( bad_dict_msg ); retval = 2; break; } if( verbosity >= 2 || ( verbosity == 1 && first_member ) ) { pp(); show_header( dictionary_size ); } @@ -634,6 +649,16 @@ void show_error( const char * const msg, const int errcode, const bool help ) } +void show_file_error( const char * const filename, const char * const msg, + const int errcode ) + { + if( verbosity < 0 ) return; + std::fprintf( stderr, "%s: %s: %s", program_name, filename, msg ); + if( errcode > 0 ) std::fprintf( stderr, ": %s", std::strerror( errcode ) ); + std::fputc( '\n', stderr ); + } + + void internal_error( const char * const msg ) { if( verbosity >= 0 ) @@ -662,13 +687,11 @@ void show_error4( const char * const msg1, const char * const name1, int main( const int argc, const char * const argv[] ) { Block range( 0, 0 ); - long long bad_pos = -1; - std::string input_filename; + Bad_byte bad_byte; std::string default_output_filename; std::vector< std::string > filenames; int infd = -1; Mode program_mode = m_none; - uint8_t bad_value = 0; bool force = false; bool ignore_errors = false; bool ignore_trailing = true; @@ -712,8 +735,8 @@ int main( const int argc, const char * const argv[] ) { const int code = parser.code( argind ); if( !code ) break; // no more options - const std::string & arg = parser.argument( argind ); - const char * const ptr = arg.c_str(); + const std::string & sarg = parser.argument( argind ); + const char * const arg = sarg.c_str(); switch( code ) { case 'a': ignore_trailing = false; break; @@ -721,7 +744,7 @@ int main( const int argc, const char * const argv[] ) case 'c': to_stdout = true; break; case 'd': set_mode( program_mode, m_decompress ); break; case 'D': set_mode( program_mode, m_range_dec ); - parse_range( ptr, range ); break; + parse_range( arg, range ); break; case 'f': force = true; break; case 'h': show_help(); return 0; case 'i': ignore_errors = true; break; @@ -729,7 +752,7 @@ int main( const int argc, const char * const argv[] ) case 'l': set_mode( program_mode, m_list ); break; case 'm': set_mode( program_mode, m_merge ); break; case 'n': break; - case 'o': default_output_filename = arg; break; + case 'o': default_output_filename = sarg; break; case 'q': verbosity = -1; break; case 'R': set_mode( program_mode, m_repair ); break; case 's': set_mode( program_mode, m_split ); break; @@ -737,13 +760,13 @@ int main( const int argc, const char * const argv[] ) case 'v': if( verbosity < 4 ) ++verbosity; break; case 'V': show_version(); return 0; case 'W': set_mode( program_mode, m_debug_decompress ); - parse_pos_value( ptr, bad_pos, bad_value ); break; + parse_pos_value( arg, bad_byte ); break; case 'X': set_mode( program_mode, m_show_packets ); - if( ptr[0] ) parse_pos_value( ptr, bad_pos, bad_value ); break; + if( arg[0] ) { parse_pos_value( arg, bad_byte ); } break; case 'Y': set_mode( program_mode, m_debug_delay ); - parse_range( ptr, range ); break; + parse_range( arg, range ); break; case 'Z': set_mode( program_mode, m_debug_repair ); - parse_pos_value( ptr, bad_pos, bad_value ); break; + parse_pos_value( arg, bad_byte ); break; default : internal_error( "uncaught option." ); } } // end process options @@ -773,18 +796,15 @@ int main( const int argc, const char * const argv[] ) case m_alone_to_lz: break; case m_debug_decompress: one_file( filenames.size() ); - return debug_decompress( filenames[0], bad_pos, verbosity, bad_value, false ); + return debug_decompress( filenames[0], bad_byte, verbosity, false ); case m_debug_delay: one_file( filenames.size() ); return debug_delay( filenames[0], range, verbosity ); case m_debug_repair: one_file( filenames.size() ); - return debug_repair( filenames[0], bad_pos, verbosity, bad_value ); + return debug_repair( filenames[0], bad_byte, verbosity ); case m_decompress: break; - case m_list: - if( filenames.size() < 1 ) - { show_error( "You must specify at least 1 file.", 0, true ); return 1; } - return list_files( filenames, verbosity ); + case m_list: break; case m_merge: if( filenames.size() < 2 ) { show_error( "You must specify at least 2 files.", 0, true ); return 1; } @@ -794,7 +814,7 @@ int main( const int argc, const char * const argv[] ) one_file( filenames.size() ); set_signals(); return range_decompress( filenames[0], default_output_filename, range, - verbosity, force, ignore_errors, to_stdout ); + verbosity, force, ignore_errors, ignore_trailing, to_stdout ); case m_repair: one_file( filenames.size() ); set_signals(); @@ -802,7 +822,7 @@ int main( const int argc, const char * const argv[] ) force ); case m_show_packets: one_file( filenames.size() ); - return debug_decompress( filenames[0], bad_pos, verbosity, bad_value, true ); + return debug_decompress( filenames[0], bad_byte, verbosity, true ); case m_split: one_file( filenames.size() ); set_signals(); @@ -814,12 +834,16 @@ int main( const int argc, const char * const argv[] ) { show_error( "Not enough memory." ); cleanup_and_fail( 1 ); } catch( Error e ) { show_error( e.msg, errno ); cleanup_and_fail( 1 ); } + if( filenames.empty() ) filenames.push_back("-"); + + if( program_mode == m_list ) + return list_files( filenames, verbosity, ignore_trailing ); + if( program_mode == m_test ) outfd = -1; else if( program_mode != m_alone_to_lz && program_mode != m_decompress ) internal_error( "invalid decompressor operation." ); - if( filenames.empty() ) filenames.push_back("-"); if( !to_stdout && program_mode != m_test && ( filenames_given || default_output_filename.size() ) ) set_signals(); @@ -830,13 +854,13 @@ int main( const int argc, const char * const argv[] ) bool stdin_used = false; for( unsigned i = 0; i < filenames.size(); ++i ) { + std::string input_filename; struct stat in_stats; output_filename.clear(); if( filenames[i].empty() || filenames[i] == "-" ) { if( stdin_used ) continue; else stdin_used = true; - input_filename.clear(); infd = STDIN_FILENO; if( program_mode != m_test ) { @@ -881,14 +905,15 @@ int main( const int argc, const char * const argv[] ) } } - if( !check_tty( infd, program_mode ) ) + pp.set_name( input_filename ); + if( !check_tty( pp.name(), infd, program_mode ) ) { if( retval < 1 ) retval = 1; + if( program_mode == m_test ) { close( infd ); infd = -1; continue; } cleanup_and_fail( retval ); } const struct stat * const in_statsp = input_filename.size() ? &in_stats : 0; - pp.set_name( input_filename ); int tmp; if( program_mode == m_alone_to_lz ) tmp = alone_to_lz( infd, pp ); @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2016 Antonio Diaz Diaz. + Copyright (C) 2009-2017 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -37,6 +37,12 @@ namespace { +bool pending_newline = false; + +void print_pending_newline() + { if( pending_newline ) std::fputc( '\n', stdout ); pending_newline = false; } + + bool file_crc( uint32_t & crc, const int infd ) { const int buffer_size = 65536; @@ -100,6 +106,7 @@ void combine( std::vector< Block > & block_vector, std::vector< Block > & bv ) // positions in 'block_vector' are absolute file positions. +// blocks in 'block_vector' are ascending and don't overlap. bool diff_member( const long long mpos, const long long msize, const std::vector< int > & infd_vector, std::vector< Block > & block_vector, @@ -224,7 +231,7 @@ int open_input_files( const std::vector< std::string > & filenames, for( int i = 0; i < files; ++i ) { long long tmp; - const File_index fi( infd_vector[i] ); + const File_index fi( infd_vector[i], true, true ); if( fi.retval() == 0 ) // file format is intact { if( good_fi < 0 ) { good_fi = i; file_index = fi; } @@ -283,6 +290,92 @@ int open_input_files( const std::vector< std::string > & filenames, } +void maybe_cluster_blocks( std::vector< Block > & block_vector, + const int verbosity ) + { + const unsigned long old_size = block_vector.size(); + if( old_size <= 16 ) return; + do { + int min_gap = INT_MAX; + bool same = true; // all gaps have the same size + for( unsigned i = 1; i < block_vector.size(); ++i ) + { + const long long gap = block_vector[i].pos() - block_vector[i-1].end(); + if( gap < min_gap ) + { if( min_gap < INT_MAX ) same = false; min_gap = gap; } + else if( gap != min_gap ) same = false; + } + if( min_gap >= INT_MAX || same ) break; + for( unsigned i = block_vector.size() - 1; i > 0; --i ) + { + const long long gap = block_vector[i].pos() - block_vector[i-1].end(); + if( gap == min_gap ) + { + block_vector[i-1].size( block_vector[i-1].size() + gap + + block_vector[i].size() ); + block_vector.erase( block_vector.begin() + i ); + } + } + } while( block_vector.size() > 16 ); + if( verbosity >= 1 && old_size > block_vector.size() ) + std::printf( " %lu errors have been grouped in %lu clusters.\n", + old_size, (long)block_vector.size() ); + } + + +bool color_done( const std::vector< int > & color_vector, const int i ) + { + for( int j = i - 1; j >= 0; --j ) + if( color_vector[j] == color_vector[i] ) return true; + return false; + } + + + // try dividing blocks in 2 color groups at every gap +bool try_merge_member2( const long long mpos, const long long msize, + const std::vector< Block > & block_vector, + const std::vector< int > & color_vector, + const std::vector< int > & infd_vector, + const int verbosity ) + { + const int blocks = block_vector.size(); + const int files = infd_vector.size(); + const int variations = files * ( files - 1 ); + + for( int i1 = 0; i1 < files; ++i1 ) + for( int i2 = 0; i2 < files; ++i2 ) + { + if( i1 == i2 || color_vector[i1] == color_vector[i2] || + color_done( color_vector, i1 ) ) continue; + for( int bi = 0; bi < blocks; ++bi ) + if( !safe_seek( infd_vector[i2], block_vector[bi].pos() ) || + !safe_seek( outfd, block_vector[bi].pos() ) || + !copy_file( infd_vector[i2], outfd, block_vector[bi].size() ) ) + cleanup_and_fail( 1 ); + const int infd = infd_vector[i1]; + const int var = ( i1 * ( files - 1 ) ) + i2 - ( i2 > i1 ) + 1; + for( int bi = 0; bi + 1 < blocks; ++bi ) + { + if( verbosity >= 2 ) + { + std::printf( " Trying variation %d of %d, block %d \r", + var, variations, bi + 1 ); + std::fflush( stdout ); pending_newline = true; + } + if( !safe_seek( infd, block_vector[bi].pos() ) || + !safe_seek( outfd, block_vector[bi].pos() ) || + !copy_file( infd, outfd, block_vector[bi].size() ) || + !safe_seek( outfd, mpos ) ) + cleanup_and_fail( 1 ); + long long failure_pos = 0; + if( test_member_from_file( outfd, msize, &failure_pos ) ) return true; + if( mpos + failure_pos < block_vector[bi].end() ) break; + } + } + return false; + } + + // merge block by block bool try_merge_member( const long long mpos, const long long msize, const std::vector< Block > & block_vector, @@ -306,13 +399,13 @@ bool try_merge_member( const long long mpos, const long long msize, while( bi >= 0 ) { - if( verbosity >= 1 ) + if( verbosity >= 2 ) { long var = 0; for( int i = 0; i < blocks; ++i ) var = ( var * files ) + file_idx[i]; - std::printf( "Trying variation %ld of %ld \r", var + 1, variations ); - std::fflush( stdout ); + std::printf( " Trying variation %ld of %ld \r", var + 1, variations ); + std::fflush( stdout ); pending_newline = true; } while( bi < blocks ) { @@ -330,14 +423,8 @@ bool try_merge_member( const long long mpos, const long long msize, while( bi > 0 && mpos + failure_pos < block_vector[bi-1].pos() ) --bi; while( --bi >= 0 ) { - while( ++file_idx[bi] < files ) - { - const int color = color_vector[file_idx[bi]]; - bool done = true; - for( int i = file_idx[bi] - 1; i >= 0; --i ) - if( color_vector[i] == color ) { done = false; break; } - if( done ) break; - } + while( ++file_idx[bi] < files && + color_done( color_vector, file_idx[bi] ) ); if( file_idx[bi] < files ) break; file_idx[bi] = 0; } @@ -363,7 +450,8 @@ bool try_merge_member1( const long long mpos, const long long msize, for( int i1 = 0; i1 < files; ++i1 ) for( int i2 = 0; i2 < files; ++i2 ) { - if( i1 == i2 || color_vector[i1] == color_vector[i2] ) continue; + if( i1 == i2 || color_vector[i1] == color_vector[i2] || + color_done( color_vector, i1 ) ) continue; const int infd = infd_vector[i1]; if( !safe_seek( infd, pos ) || !safe_seek( infd_vector[i2], pos ) || @@ -371,13 +459,13 @@ bool try_merge_member1( const long long mpos, const long long msize, !copy_file( infd_vector[i2], outfd, size ) ) cleanup_and_fail( 1 ); const int var = ( i1 * ( files - 1 ) ) + i2 - ( i2 > i1 ) + 1; - for( long long i = 0; i < size; ++i ) + for( long long i = 0; i + 1 < size; ++i ) { - if( verbosity >= 1 ) + if( verbosity >= 2 ) { - std::printf( "Trying variation %d of %d, position %lld \r", + std::printf( " Trying variation %d of %d, position %lld \r", var, variations, pos + i ); - std::fflush( stdout ); + std::fflush( stdout ); pending_newline = true; } if( !safe_seek( outfd, pos + i ) || readblock( infd, &byte, 1 ) != 1 || @@ -471,6 +559,7 @@ int merge_files( const std::vector< std::string > & filenames, const long long msize = file_index.mblock( j ).size(); // vector of data blocks differing among the copies of the current member std::vector< Block > block_vector; + // different color means members are different std::vector< int > color_vector( files, 0 ); if( !diff_member( mpos, msize, infd_vector, block_vector, color_vector ) || !safe_seek( outfd, mpos ) ) @@ -480,31 +569,46 @@ int merge_files( const std::vector< std::string > & filenames, { if( file_index.members() > 1 && test_member_from_file( outfd, msize ) ) continue; - show_error( "Input files are (partially) identical. Merging is not possible." ); + if( verbosity >= 0 ) + std::fprintf( stderr, "Member %ld is damaged and identical in all files." + " Merging is not possible.\n", j + 1 ); cleanup_and_fail( 2 ); } - if( verbosity >= 1 && file_index.members() > 1 ) + if( verbosity >= 2 ) { - std::printf( "Merging member %ld of %ld\n", - j + 1, file_index.members() ); + std::printf( "Merging member %ld of %ld (%lu error%s)\n", + j + 1, file_index.members(), (long)block_vector.size(), + ( block_vector.size() == 1 ) ? "" : "s" ); std::fflush( stdout ); } bool done = false; if( file_index.members() > 1 || block_vector.size() > 1 ) { - done = try_merge_member( mpos, msize, block_vector, color_vector, - infd_vector, verbosity ); - if( !done && verbosity >= 1 ) std::fputc( '\n', stdout ); + if( block_vector.size() > 1 ) + { + maybe_cluster_blocks( block_vector, verbosity ); + done = try_merge_member2( mpos, msize, block_vector, color_vector, + infd_vector, verbosity ); + print_pending_newline(); + } + if( !done ) + { + done = try_merge_member( mpos, msize, block_vector, color_vector, + infd_vector, verbosity ); + print_pending_newline(); + } } if( !done ) + { done = try_merge_member1( mpos, msize, block_vector, color_vector, infd_vector, verbosity ); - if( verbosity >= 1 ) std::fputc( '\n', stdout ); + print_pending_newline(); + } if( !done ) { - if( verbosity >= 2 ) + if( verbosity >= 3 ) for( unsigned i = 0; i < block_vector.size(); ++i ) std::fprintf( stderr, "area %2d from position %6lld to %6lld\n", i + 1, block_vector[i].pos(), block_vector[i].end() - 1 ); @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2016 Antonio Diaz Diaz. + Copyright (C) 2009-2017 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -113,25 +113,23 @@ int LZ_mtester::test_member( const unsigned long pos_limit ) const int pos_state = data_position() & pos_state_mask; if( rdec.decode_bit( bm_match[state()][pos_state] ) == 0 ) // 1st bit { - const uint8_t prev_byte = peek_prev(); - if( state.is_char() ) - { - state.set_char1(); - put_byte( rdec.decode_tree8( bm_literal[get_lit_state(prev_byte)] ) ); - } + Bit_model * const bm = bm_literal[get_lit_state(peek_prev())]; + if( state.is_char_set_char() ) + put_byte( rdec.decode_tree8( bm ) ); else - { - state.set_char2(); - put_byte( rdec.decode_matched( bm_literal[get_lit_state(prev_byte)], - peek( rep0 ) ) ); - } + put_byte( rdec.decode_matched( bm, peek( rep0 ) ) ); } - else + else // match or repeated match { int len; if( rdec.decode_bit( bm_rep[state()] ) != 0 ) // 2nd bit { - if( rdec.decode_bit( bm_rep0[state()] ) != 0 ) // 3rd bit + if( rdec.decode_bit( bm_rep0[state()] ) == 0 ) // 3rd bit + { + if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit + { state.set_short_rep(); put_byte( peek( rep0 ) ); continue; } + } + else { unsigned distance; if( rdec.decode_bit( bm_rep1[state()] ) == 0 ) // 4th bit @@ -147,34 +145,28 @@ int LZ_mtester::test_member( const unsigned long pos_limit ) rep1 = rep0; rep0 = distance; } - else - { - if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit - { state.set_short_rep(); put_byte( peek( rep0 ) ); continue; } - } state.set_rep(); len = min_match_len + rdec.decode_len( rep_len_model, pos_state ); } - else + else // match { - const unsigned rep0_saved = rep0; len = min_match_len + rdec.decode_len( match_len_model, pos_state ); - const int dis_slot = rdec.decode_tree6( bm_dis_slot[get_len_state(len)] ); - if( dis_slot < start_dis_model ) rep0 = dis_slot; - else + unsigned distance = rdec.decode_tree6( bm_dis_slot[get_len_state(len)] ); + if( distance >= start_dis_model ) { + const unsigned dis_slot = distance; const int direct_bits = ( dis_slot >> 1 ) - 1; - rep0 = ( 2 | ( dis_slot & 1 ) ) << direct_bits; + distance = ( 2 | ( dis_slot & 1 ) ) << direct_bits; if( dis_slot < end_dis_model ) - rep0 += rdec.decode_tree_reversed( bm_dis + rep0 - dis_slot - 1, - direct_bits ); + distance += rdec.decode_tree_reversed( + bm_dis + ( distance - dis_slot ), direct_bits ); else { - rep0 += rdec.decode( direct_bits - dis_align_bits ) << dis_align_bits; - rep0 += rdec.decode_tree_reversed4( bm_align ); - if( rep0 == 0xFFFFFFFFU ) // marker found + distance += + rdec.decode( direct_bits - dis_align_bits ) << dis_align_bits; + distance += rdec.decode_tree_reversed4( bm_align ); + if( distance == 0xFFFFFFFFU ) // marker found { - rep0 = rep0_saved; rdec.normalize(); flush_data(); if( len == min_match_len ) // End Of Stream marker @@ -183,10 +175,10 @@ int LZ_mtester::test_member( const unsigned long pos_limit ) } return 4; } - if( rep0 > max_rep0 ) max_rep0 = rep0; + if( distance > max_rep0 ) max_rep0 = distance; } } - rep3 = rep2; rep2 = rep1; rep1 = rep0_saved; + rep3 = rep2; rep2 = rep1; rep1 = rep0; rep0 = distance; state.set_match(); if( rep0 >= dictionary_size || ( rep0 >= pos && !pos_wrapped ) ) { flush_data(); return 1; } @@ -212,11 +204,10 @@ int LZ_mtester::debug_decode_member( const long long dpos, const long long mpos, const int pos_state = data_position() & pos_state_mask; if( rdec.decode_bit( bm_match[state()][pos_state] ) == 0 ) // 1st bit { - const uint8_t prev_byte = peek_prev(); - if( state.is_char() ) + Bit_model * const bm = bm_literal[get_lit_state(peek_prev())]; + if( state.is_char_set_char() ) { - state.set_char1(); - const uint8_t cur_byte = rdec.decode_tree8( bm_literal[get_lit_state(prev_byte)] ); + const uint8_t cur_byte = rdec.decode_tree8( bm ); put_byte( cur_byte ); if( show_packets ) std::printf( "%6llu %6llu literal %s\n", @@ -224,10 +215,8 @@ int LZ_mtester::debug_decode_member( const long long dpos, const long long mpos, } else { - state.set_char2(); const uint8_t match_byte = peek( rep0 ); - const uint8_t cur_byte = - rdec.decode_matched( bm_literal[get_lit_state(prev_byte)], match_byte ); + const uint8_t cur_byte = rdec.decode_matched( bm, match_byte ); put_byte( cur_byte ); if( show_packets ) std::printf( "%6llu %6llu literal %s, match byte %6llu %s\n", @@ -241,7 +230,18 @@ int LZ_mtester::debug_decode_member( const long long dpos, const long long mpos, if( rdec.decode_bit( bm_rep[state()] ) != 0 ) // 2nd bit { int rep = 0; - if( rdec.decode_bit( bm_rep0[state()] ) != 0 ) // 3rd bit + if( rdec.decode_bit( bm_rep0[state()] ) == 0 ) // 3rd bit + { + if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit + { + if( show_packets ) + std::printf( "%6llu %6llu shortrep %s %6u (%6llu)\n", + mp, dp, format_byte( peek( rep0 ) ), + rep0 + 1, dp - rep0 - 1 ); + state.set_short_rep(); put_byte( peek( rep0 ) ); continue; + } + } + else { unsigned distance; if( rdec.decode_bit( bm_rep1[state()] ) == 0 ) // 4th bit @@ -257,17 +257,6 @@ int LZ_mtester::debug_decode_member( const long long dpos, const long long mpos, rep1 = rep0; rep0 = distance; } - else - { - if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit - { - if( show_packets ) - std::printf( "%6llu %6llu shortrep %s %6u (%6llu)\n", - mp, dp, format_byte( peek( rep0 ) ), - rep0 + 1, dp - rep0 - 1 ); - state.set_short_rep(); put_byte( peek( rep0 ) ); continue; - } - } state.set_rep(); len = min_match_len + rdec.decode_len( rep_len_model, pos_state ); if( show_packets ) @@ -276,24 +265,23 @@ int LZ_mtester::debug_decode_member( const long long dpos, const long long mpos, } else // match { - const unsigned rep0_saved = rep0; len = min_match_len + rdec.decode_len( match_len_model, pos_state ); - const int dis_slot = rdec.decode_tree6( bm_dis_slot[get_len_state(len)] ); - if( dis_slot < start_dis_model ) rep0 = dis_slot; - else + unsigned distance = rdec.decode_tree6( bm_dis_slot[get_len_state(len)] ); + if( distance >= start_dis_model ) { + const unsigned dis_slot = distance; const int direct_bits = ( dis_slot >> 1 ) - 1; - rep0 = ( 2 | ( dis_slot & 1 ) ) << direct_bits; + distance = ( 2 | ( dis_slot & 1 ) ) << direct_bits; if( dis_slot < end_dis_model ) - rep0 += rdec.decode_tree_reversed( bm_dis + rep0 - dis_slot - 1, - direct_bits ); + distance += rdec.decode_tree_reversed( + bm_dis + ( distance - dis_slot ), direct_bits ); else { - rep0 += rdec.decode( direct_bits - dis_align_bits ) << dis_align_bits; - rep0 += rdec.decode_tree_reversed4( bm_align ); - if( rep0 == 0xFFFFFFFFU ) // marker found + distance += + rdec.decode( direct_bits - dis_align_bits ) << dis_align_bits; + distance += rdec.decode_tree_reversed4( bm_align ); + if( distance == 0xFFFFFFFFU ) // marker found { - rep0 = rep0_saved; rdec.normalize(); flush_data(); if( show_packets ) @@ -313,10 +301,10 @@ int LZ_mtester::debug_decode_member( const long long dpos, const long long mpos, } return 4; } - if( rep0 > max_rep0 ) max_rep0 = rep0; + if( distance > max_rep0 ) max_rep0 = distance; } } - rep3 = rep2; rep2 = rep1; rep1 = rep0_saved; + rep3 = rep2; rep2 = rep1; rep1 = rep0; rep0 = distance; state.set_match(); if( show_packets ) std::printf( "%6llu %6llu match %6u,%3d (%6lld)", @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2016 Antonio Diaz Diaz. + Copyright (C) 2009-2017 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -217,7 +217,7 @@ class LZ_mtester Bit_model bm_rep2[State::states]; Bit_model bm_len[State::states][pos_states]; Bit_model bm_dis_slot[len_states][1<<dis_slot_bits]; - Bit_model bm_dis[modeled_distances-end_dis_model]; + Bit_model bm_dis[modeled_distances-end_dis_model+1]; Bit_model bm_align[dis_align_size]; Len_model match_len_model; diff --git a/range_dec.cc b/range_dec.cc index eeb542a..e105aaa 100644 --- a/range_dec.cc +++ b/range_dec.cc @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2016 Antonio Diaz Diaz. + Copyright (C) 2009-2017 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -71,62 +71,9 @@ int decompress_member( const int infd, const Pretty_print & pp, } -int list_file( const char * const input_filename, const Pretty_print & pp ) - { - struct stat in_stats; // not used - const int infd = open_instream( input_filename, &in_stats, true, true ); - if( infd < 0 ) return 1; - - const File_index file_index( infd ); - close( infd ); - if( file_index.retval() != 0 ) - { pp( file_index.error().c_str() ); return file_index.retval(); } - - if( pp.verbosity() >= 0 ) - { - const unsigned long long data_size = file_index.data_end(); - const unsigned long long file_size = file_index.file_end(); - unsigned dictionary_size = 0; - for( long i = 0; i < file_index.members(); ++i ) - dictionary_size = - std::max( dictionary_size, file_index.dictionary_size( i ) ); - pp( 0, stdout ); - show_header( dictionary_size, 1 ); - if( data_size > 0 && file_size > 0 ) - std::printf( "%6.3f:1, %6.3f bits/byte, %5.2f%% saved. ", - (double)data_size / file_size, - ( 8.0 * file_size ) / data_size, - 100.0 * ( 1.0 - ( (double)file_size / data_size ) ) ); - std::printf( "decompressed size %9llu, compressed size %8llu.\n", - data_size, file_size ); - - if( pp.verbosity() >= 1 && file_index.members() > 1 ) - { - std::printf( " Total members in file = %ld\n", file_index.members() ); - if( pp.verbosity() >= 2 ) - for( long i = 0; i < file_index.members(); ++i ) - { - const Block & db = file_index.dblock( i ); - const Block & mb = file_index.mblock( i ); - std::printf( " Member %3ld data pos %9llu data size %7llu " - "member pos %9llu member size %7llu.\n", i + 1, - db.pos(), db.size(), mb.pos(), mb.size() ); - } - } - const long long trailing_size = file_index.file_size() - file_index.file_end(); - if( pp.verbosity() >= 1 && trailing_size > 0 ) - std::printf( " %lld bytes of trailing data at end of file.\n", - trailing_size ); - } - return 0; - } - -} // end namespace - - const char * format_num( unsigned long long num, - unsigned long long limit, - const int set_prefix ) + unsigned long long limit = -1ULL, + const int set_prefix = 0 ) { const char * const si_prefix[8] = { "k", "M", "G", "T", "P", "E", "Z", "Y" }; @@ -150,6 +97,8 @@ const char * format_num( unsigned long long num, return buf; } +} // end namespace + bool safe_seek( const int fd, const long long pos ) { @@ -158,37 +107,24 @@ bool safe_seek( const int fd, const long long pos ) } -int list_files( const std::vector< std::string > & filenames, - const int verbosity ) - { - Pretty_print pp( filenames, verbosity ); - int retval = 0; - for( unsigned i = 0; i < filenames.size(); ++i ) - { - pp.set_name( filenames[i] ); - const int tmp = list_file( filenames[i].c_str(), pp ); - if( tmp > retval ) retval = tmp; - } - return retval; - } - - int range_decompress( const std::string & input_filename, const std::string & default_output_filename, Block range, const int verbosity, const bool force, - const bool ignore, const bool to_stdout ) + const bool ignore_errors, const bool ignore_trailing, + const bool to_stdout ) { struct stat in_stats; const int infd = open_instream( input_filename.c_str(), &in_stats, true, true ); if( infd < 0 ) return 1; Pretty_print pp( input_filename, verbosity ); - const File_index file_index( infd ); + const File_index file_index( infd, ignore_errors, ignore_trailing ); if( file_index.retval() != 0 ) - { pp( file_index.error().c_str() ); return file_index.retval(); } + { show_file_error( input_filename.c_str(), file_index.error().c_str() ); + return file_index.retval(); } - if( range.end() > file_index.data_end() ) - range.size( std::max( 0LL, file_index.data_end() - range.pos() ) ); + if( range.end() > file_index.udata_size() ) + range.size( std::max( 0LL, file_index.udata_size() - range.pos() ) ); if( range.size() <= 0 ) { if( verbosity >= 0 ) pp( "Nothing to do." ); return 0; } @@ -196,7 +132,7 @@ int range_decompress( const std::string & input_filename, { if( verbosity >= 2 ) std::fprintf( stderr, "Decompressed file size = %sB\n", - format_num( file_index.data_end() ) ); + format_num( file_index.udata_size() ) ); std::fprintf( stderr, "Decompressing range %sB to %sB (%sBytes)\n", format_num( range.pos() ), format_num( range.pos() + range.size() ), @@ -225,7 +161,7 @@ int range_decompress( const std::string & input_filename, const long long mpos = file_index.mblock( i ).pos(); if( !safe_seek( infd, mpos ) ) { retval = 1; break; } const int tmp = decompress_member( infd, pp, mpos, outskip, outend ); - if( tmp && ( tmp != 2 || !ignore ) ) + if( tmp && ( tmp != 2 || !ignore_errors ) ) cleanup_and_fail( tmp ); if( tmp > retval ) retval = tmp; pp.reset(); @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2016 Antonio Diaz Diaz. + Copyright (C) 2009-2017 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -37,6 +37,12 @@ namespace { +bool pending_newline = false; + +void print_pending_newline() + { if( pending_newline ) std::fputc( '\n', stdout ); pending_newline = false; } + + bool gross_damage( const long long msize, const uint8_t * const mbuffer ) { enum { maxlen = 6 }; // max number of consecutive identical bytes @@ -68,7 +74,7 @@ int repair_dictionary_size( const long long msize, uint8_t * const mbuffer ) File_header & header = *(File_header *)mbuffer; unsigned dictionary_size = header.dictionary_size(); File_trailer & trailer = - *(File_trailer *)( mbuffer + msize - File_trailer::size ); + *(File_trailer *)( mbuffer + msize - File_trailer::size ); const unsigned long long data_size = trailer.data_size(); const bool valid_ds = isvalid_ds( dictionary_size ); if( valid_ds && dictionary_size >= data_size ) return 0; // can't be bad @@ -112,10 +118,10 @@ long repair_member( const long long mpos, const long long msize, if( !master ) return -1; for( ; pos >= min_pos; --pos ) { - if( verbosity >= 1 ) + if( verbosity >= 2 ) { - std::printf( "Trying position %llu \r", mpos + pos ); - std::fflush( stdout ); + std::printf( " Trying position %llu \r", mpos + pos ); + std::fflush( stdout ); pending_newline = true; } for( int j = 0; j < 255; ++j ) { @@ -141,9 +147,10 @@ int repair_file( const std::string & input_filename, if( infd < 0 ) return 1; Pretty_print pp( input_filename, verbosity ); - const File_index file_index( infd ); + const File_index file_index( infd, true, true ); if( file_index.retval() != 0 ) - { pp( file_index.error().c_str() ); return file_index.retval(); } + { show_file_error( input_filename.c_str(), file_index.error().c_str() ); + return file_index.retval(); } output_filename = default_output_filename.empty() ? insert_fixed( input_filename ) : default_output_filename; @@ -161,7 +168,7 @@ int repair_file( const std::string & input_filename, { show_error( "Can't repair error in input file." ); cleanup_and_fail( 2 ); } - if( verbosity >= 1 ) // damaged member found + if( verbosity >= 2 ) // damaged member found { std::printf( "Repairing member %ld of %ld (failure pos = %llu)\n", i + 1, file_index.members(), mpos + failure_pos ); @@ -183,6 +190,7 @@ int repair_file( const std::string & input_filename, if( pos == 0 ) pos = repair_member( mpos, msize, mbuffer, File_header::size + 6, failure_pos, dictionary_size, verbosity ); + print_pending_newline(); } if( pos < 0 ) cleanup_and_fail( 1 ); @@ -200,7 +208,6 @@ int repair_file( const std::string & input_filename, cleanup_and_fail( 1 ); } } delete[] mbuffer; - if( verbosity >= 1 ) std::fputc( '\n', stdout ); if( pos == 0 ) { show_error( "Can't repair input file. Error is probably larger than 1 byte." ); @@ -229,12 +236,13 @@ int debug_delay( const std::string & input_filename, Block range, if( infd < 0 ) return 1; Pretty_print pp( input_filename, verbosity ); - const File_index file_index( infd ); + const File_index file_index( infd, false, true ); if( file_index.retval() != 0 ) - { pp( file_index.error().c_str() ); return file_index.retval(); } + { show_file_error( input_filename.c_str(), file_index.error().c_str() ); + return file_index.retval(); } - if( range.end() > file_index.file_end() ) - range.size( std::max( 0LL, file_index.file_end() - range.pos() ) ); + if( range.end() > file_index.cdata_size() ) + range.size( std::max( 0LL, file_index.cdata_size() - range.pos() ) ); if( range.size() <= 0 ) { if( verbosity >= 0 ) pp( "Nothing to do." ); return 0; } @@ -245,7 +253,7 @@ int debug_delay( const std::string & input_filename, Block range, const long long mpos = file_index.mblock( i ).pos(); const long long msize = file_index.mblock( i ).size(); const unsigned dictionary_size = file_index.dictionary_size( i ); - if( verbosity >= 1 ) + if( verbosity >= 2 ) { std::printf( "Finding max delay in member %ld of %ld (mpos = %llu, msize = %llu)\n", i + 1, file_index.members(), mpos, msize ); @@ -266,10 +274,10 @@ int debug_delay( const std::string & input_filename, Block range, const long partial_end = std::min( pos + 100, end ); for( ; pos < partial_end; ++pos ) { - if( verbosity >= 1 ) + if( verbosity >= 2 ) { - std::printf( "Delays in position %llu \r", mpos + pos ); - std::fflush( stdout ); + std::printf( " Delays at position %llu \r", mpos + pos ); + std::fflush( stdout ); pending_newline = true; } int value = -1; for( int j = 0; j < 256; ++j ) @@ -281,18 +289,18 @@ int debug_delay( const std::string & input_filename, Block range, const long delay = failure_pos - pos; if( delay > max_delay ) { max_delay = delay; value = mbuffer[pos]; } } - if( value >= 0 && verbosity >= 0 ) + if( value >= 0 && verbosity >= 2 ) { - std::printf( "New max delay %lu at position %llu (0x%02X)\n", + std::printf( " New max delay %lu at position %llu (0x%02X)\n", max_delay, mpos + pos, value ); - std::fflush( stdout ); + std::fflush( stdout ); pending_newline = false; } if( pos + max_delay >= msize ) { pos = end; break; } } delete master; } delete[] mbuffer; - if( verbosity >= 1 ) std::fputc( '\n', stdout ); + print_pending_newline(); } if( verbosity >= 1 ) std::fputs( "Done.\n", stdout ); @@ -300,21 +308,22 @@ int debug_delay( const std::string & input_filename, Block range, } -int debug_repair( const std::string & input_filename, const long long bad_pos, - const int verbosity, const uint8_t bad_value ) +int debug_repair( const std::string & input_filename, + const Bad_byte & bad_byte, const int verbosity ) { struct stat in_stats; // not used const int infd = open_instream( input_filename.c_str(), &in_stats, true, true ); if( infd < 0 ) return 1; Pretty_print pp( input_filename, verbosity ); - const File_index file_index( infd ); + const File_index file_index( infd, false, true ); if( file_index.retval() != 0 ) - { pp( file_index.error().c_str() ); return file_index.retval(); } + { show_file_error( input_filename.c_str(), file_index.error().c_str() ); + return file_index.retval(); } long idx = 0; for( ; idx < file_index.members(); ++idx ) - if( file_index.mblock( idx ).includes( bad_pos ) ) break; + if( file_index.mblock( idx ).includes( bad_byte.pos ) ) break; if( idx >= file_index.members() ) { if( verbosity >= 0 ) pp( "Nothing to do." ); return 0; } @@ -335,10 +344,11 @@ int debug_repair( const std::string & input_filename, const long long bad_pos, if( !mbuffer ) return 1; const File_header & header = *(File_header *)mbuffer; const unsigned dictionary_size = header.dictionary_size(); - const uint8_t good_value = mbuffer[bad_pos-mpos]; - mbuffer[bad_pos-mpos] = bad_value; + const uint8_t good_value = mbuffer[bad_byte.pos-mpos]; + const uint8_t bad_value = bad_byte( good_value ); + mbuffer[bad_byte.pos-mpos] = bad_value; long failure_pos = 0; - if( bad_pos != 5 || isvalid_ds( header.dictionary_size() ) ) + if( bad_byte.pos != 5 || isvalid_ds( header.dictionary_size() ) ) { const LZ_mtester * master = prepare_master( mbuffer, msize, 0, header.dictionary_size() ); @@ -354,12 +364,13 @@ int debug_repair( const std::string & input_filename, const long long bad_pos, } delete master; } - if( verbosity >= 1 ) + if( verbosity >= 2 ) { std::printf( "Test repairing member %ld of %ld (mpos = %llu, msize = %llu)\n" - " (damage pos = %llu (0x%02X->0x%02X), failure pos = %llu)\n", + " (damage pos = %llu (0x%02X->0x%02X), failure pos = %llu, delay = %lld )\n", idx + 1, file_index.members(), mpos, msize, - bad_pos, good_value, bad_value, mpos + failure_pos ); + bad_byte.pos, good_value, bad_value, mpos + failure_pos, + mpos + failure_pos - bad_byte.pos ); std::fflush( stdout ); } if( failure_pos >= msize ) failure_pos = msize - 1; @@ -370,10 +381,10 @@ int debug_repair( const std::string & input_filename, const long long bad_pos, if( pos == 0 ) pos = repair_member( mpos, msize, mbuffer, File_header::size + 6, failure_pos, dictionary_size, verbosity ); + print_pending_newline(); delete[] mbuffer; if( pos < 0 ) { show_error( "Can't prepare master." ); return 1; } - if( verbosity >= 1 ) std::fputc( '\n', stdout ); if( pos == 0 ) internal_error( "can't repair input file." ); if( verbosity >= 1 ) std::fputs( "Member repaired successfully.\n", stdout ); @@ -382,17 +393,18 @@ int debug_repair( const std::string & input_filename, const long long bad_pos, int debug_decompress( const std::string & input_filename, - const long long bad_pos, const int verbosity, - const uint8_t bad_value, const bool show_packets ) + const Bad_byte & bad_byte, const int verbosity, + const bool show_packets ) { struct stat in_stats; const int infd = open_instream( input_filename.c_str(), &in_stats, true, true ); if( infd < 0 ) return 1; Pretty_print pp( input_filename, verbosity ); - const File_index file_index( infd ); + const File_index file_index( infd, false, true ); if( file_index.retval() != 0 ) - { pp( file_index.error().c_str() ); return file_index.retval(); } + { show_file_error( input_filename.c_str(), file_index.error().c_str() ); + return file_index.retval(); } outfd = show_packets ? -1 : STDOUT_FILENO; int retval = 0; @@ -411,12 +423,14 @@ int debug_decompress( const std::string & input_filename, retval = 2; break; } uint8_t * const mbuffer = read_member( infd, mpos, msize ); if( !mbuffer ) { retval = 1; break; } - if( bad_pos >= 0 && file_index.mblock( i ).includes( bad_pos ) ) + if( bad_byte.pos >= 0 && file_index.mblock( i ).includes( bad_byte.pos ) ) { + const uint8_t good_value = mbuffer[bad_byte.pos-mpos]; + const uint8_t bad_value = bad_byte( good_value ); + mbuffer[bad_byte.pos-mpos] = bad_value; if( verbosity >= 1 && show_packets ) std::printf( "Byte at pos %llu changed from 0x%02X to 0x%02X\n", - bad_pos, mbuffer[bad_pos-mpos], bad_value ); - mbuffer[bad_pos-mpos] = bad_value; + bad_byte.pos, good_value, bad_value ); } LZ_mtester mtester( mbuffer, msize, dictionary_size, outfd ); const int result = mtester.debug_decode_member( dpos, mpos, show_packets ); @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2016 Antonio Diaz Diaz. + Copyright (C) 2009-2017 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -107,7 +107,7 @@ int do_split_file( const std::string & input_filename, uint8_t * & base_buffer, { pp( "Input file is too short." ); return 2; } if( !verify_header( *(File_header *)buffer, pp ) ) return 2; - const File_index file_index( infd ); + const File_index file_index( infd, true, true ); if( file_index.retval() != 0 ) pp( file_index.error().c_str() ); const long max_members = file_index.retval() ? 999999 : file_index.members(); int max_digits = 1; diff --git a/testsuite/check.sh b/testsuite/check.sh index 285d225..3ce8f9e 100755 --- a/testsuite/check.sh +++ b/testsuite/check.sh @@ -1,6 +1,6 @@ #! /bin/sh # check script for Lziprecover - Data recovery tool for the lzip format -# Copyright (C) 2009-2016 Antonio Diaz Diaz. +# Copyright (C) 2009-2017 Antonio Diaz Diaz. # # This script is free software: you have unlimited permission # to copy, distribute and modify it. @@ -18,12 +18,12 @@ if [ ! -f "${LZIP}" ] || [ ! -x "${LZIP}" ] ; then exit 1 fi -if [ -e "${LZIP}" ] 2> /dev/null ; then true -else +[ -e "${LZIP}" ] 2> /dev/null || + { echo "$0: a POSIX shell is required to run the tests" echo "Try bash -c \"$0 $1 $2\"" exit 1 -fi + } if [ -d tmp ] ; then rm -rf tmp ; fi mkdir tmp @@ -46,6 +46,7 @@ bad3_lz="${testdir}"/test_bad3.lz bad4_lz="${testdir}"/test_bad4.lz bad5_lz="${testdir}"/test_bad5.lz fail=0 +test_failed() { fail=1 ; printf " $1" ; [ -z "$2" ] || printf "($2)" ; } # Description of test files for lziprecover: # fox6_bad1.lz: byte at offset 5 changed from 0x0C to 0x00 (DS) @@ -54,344 +55,406 @@ fail=0 # byte at offset 268 changed from 0x34 to 0x33 (mid stream) # byte at offset 327 changed from 0x2A to 0x2B (byte 7) # byte at offset 458 changed from 0xA0 to 0x20 (EOS marker) -# fox6_bad2.lz: [110-129] --> zeroed; -# fox6_bad3.lz: [180-379] --> zeroed; -# fox6_bad4.lz: [330-429] --> zeroed; -# fox6_bad5.lz: [380-479] --> zeroed; +# fox6_bad2.lz: [110-129] --> zeroed +# fox6_bad3.lz: [180-379] --> zeroed +# fox6_bad4.lz: [330-429] --> zeroed +# fox6_bad5.lz: [380-479] --> zeroed # test_bad1.lz: byte at offset 66 changed from 0xA6 to 0x46 # test_bad2.lz: [ 34- 65] --> copy of bytes [ 68- 99] -# test_bad3.lz: [ 512-1535] --> zeroed; [2560-3583] --> zeroed -# test_bad4.lz: [3072-4095] --> random data; [4608-5631] --> zeroed -# test_bad5.lz: [1024-2047] --> random data; [5120-6143] --> random data +# test_bad3.lz: [ 512-1535] --> zeroed [2560-3583] --> zeroed +# test_bad4.lz: [3072-4095] --> random errors [4608-5631] --> zeroed +# test_bad5.lz: [1024-2047] --> random errors [5120-6143] --> random data printf "testing lziprecover-%s..." "$2" +"${LZIP}" -lq in +[ $? = 2 ] || test_failed $LINENO "${LZIP}" -tq in -if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi +[ $? = 2 ] || test_failed $LINENO "${LZIP}" -tq < in -if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi +[ $? = 2 ] || test_failed $LINENO "${LZIP}" -cdq in -if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi +[ $? = 2 ] || test_failed $LINENO "${LZIP}" -cdq < in -if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi -dd if="${in_lz}" bs=1 count=6 2> /dev/null | "${LZIP}" -tq -if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi -dd if="${in_lz}" bs=1 count=20 2> /dev/null | "${LZIP}" -tq -if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi -"${LZIPRECOVER}" -lq -if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi +[ $? = 2 ] || test_failed $LINENO +# these are for code coverage +"${LZIP}" -lt "${in_lz}" 2> /dev/null +[ $? = 1 ] || test_failed $LINENO +"${LZIP}" -cdl "${in_lz}" > out 2> /dev/null +[ $? = 1 ] || test_failed $LINENO +"${LZIP}" -cdt "${in_lz}" > out 2> /dev/null +[ $? = 1 ] || test_failed $LINENO +"${LZIP}" -t -- nx_file 2> /dev/null +[ $? = 1 ] || test_failed $LINENO +"${LZIP}" --help > /dev/null || test_failed $LINENO +"${LZIP}" -n1 -V > /dev/null || test_failed $LINENO +"${LZIP}" -m 2> /dev/null +[ $? = 1 ] || test_failed $LINENO +"${LZIP}" -z 2> /dev/null +[ $? = 1 ] || test_failed $LINENO +"${LZIP}" --bad_option 2> /dev/null +[ $? = 1 ] || test_failed $LINENO +"${LZIP}" --t 2> /dev/null +[ $? = 1 ] || test_failed $LINENO +"${LZIP}" --test=2 2> /dev/null +[ $? = 1 ] || test_failed $LINENO +"${LZIP}" --output= 2> /dev/null +[ $? = 1 ] || test_failed $LINENO +"${LZIP}" --output 2> /dev/null +[ $? = 1 ] || test_failed $LINENO +printf "LZIP\001-.............................." | "${LZIP}" -t 2> /dev/null +printf "LZIP\002-.............................." | "${LZIP}" -t 2> /dev/null +printf "LZIP\001+.............................." | "${LZIP}" -t 2> /dev/null "${LZIPRECOVER}" -mq "${bad1_lz}" -if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi +[ $? = 1 ] || test_failed $LINENO "${LZIPRECOVER}" -Rq -if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi +[ $? = 1 ] || test_failed $LINENO "${LZIPRECOVER}" -sq -if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi +[ $? = 1 ] || test_failed $LINENO "${LZIPRECOVER}" -Aq in -if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi -"${LZIPRECOVER}" -Aq < in > copy.lz # /dev/null returns 1 on OS/2 -if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi +[ $? = 2 ] || test_failed $LINENO +"${LZIPRECOVER}" -Aq < in > copy.lz # /dev/null returns 1 on OS/2 +[ $? = 2 ] || test_failed $LINENO "${LZIPRECOVER}" -Aq < "${in_lz}" > copy.lz -if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi +[ $? = 2 ] || test_failed $LINENO "${LZIPRECOVER}" -Aq "${in_lz}" -if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi +[ $? = 1 ] || test_failed $LINENO "${LZIPRECOVER}" -Akq "${in_lzma}" -if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi +[ $? = 1 ] || test_failed $LINENO "${LZIPRECOVER}" -Ac "${in_lzma}" > copy.lz -if [ $? = 0 ] && cmp "${in_lz}" copy.lz ; then printf . ; else printf - ; fail=1 ; fi +{ [ $? = 0 ] && cmp "${in_lz}" copy.lz ; } || test_failed $LINENO "${LZIPRECOVER}" -A < "${in_lzma}" > copy.lz -if [ $? = 0 ] && cmp "${in_lz}" copy.lz ; then printf . ; else printf - ; fail=1 ; fi +{ [ $? = 0 ] && cmp "${in_lz}" copy.lz ; } || test_failed $LINENO rm -f copy.lz cat "${in_lzma}" > copy.lzma || framework_failure "${LZIPRECOVER}" -Ak copy.lzma -if [ $? = 0 ] && cmp "${in_lz}" copy.lz ; then printf . ; else printf - ; fail=1 ; fi +{ [ $? = 0 ] && cmp "${in_lz}" copy.lz ; } || test_failed $LINENO printf "to be overwritten" > copy.lz || framework_failure "${LZIPRECOVER}" -Af copy.lzma -if [ $? = 0 ] && cmp "${in_lz}" copy.lz ; then printf . ; else printf - ; fail=1 ; fi +{ [ $? = 0 ] && cmp "${in_lz}" copy.lz ; } || test_failed $LINENO rm -f copy.lz cat "${in_lzma}" > copy.tlz || framework_failure "${LZIPRECOVER}" -Ak copy.tlz -if [ $? = 0 ] && cmp "${in_lz}" copy.tar.lz ; then printf . ; else printf - ; fail=1 ; fi +{ [ $? = 0 ] && cmp "${in_lz}" copy.tar.lz ; } || test_failed $LINENO printf "to be overwritten" > copy.tar.lz || framework_failure "${LZIPRECOVER}" -Af copy.tlz -if [ $? = 0 ] && cmp "${in_lz}" copy.tar.lz ; then printf . ; else printf - ; fail=1 ; fi +{ [ $? = 0 ] && cmp "${in_lz}" copy.tar.lz ; } || test_failed $LINENO rm -f copy.tar.lz cat "${in_lzma}" > anyothername || framework_failure "${LZIPRECOVER}" -A -o copy - anyothername - < "${in_lzma}" -if [ $? = 0 ] && cmp "${in_lz}" copy.lz && cmp "${in_lz}" anyothername.lz ; then - printf . ; else printf - ; fail=1 ; fi +{ [ $? = 0 ] && cmp "${in_lz}" copy.lz && cmp "${in_lz}" anyothername.lz ; } || + test_failed $LINENO rm -f copy.lz anyothername.lz printf "\ntesting decompression..." -"${LZIP}" -t "${in_lz}" -if [ $? = 0 ] ; then printf . ; else printf - ; fail=1 ; fi -"${LZIP}" -cd "${in_lz}" > copy || fail=1 -cmp in copy || fail=1 -printf . +"${LZIP}" -lq "${in_lz}" || test_failed $LINENO +"${LZIP}" -t "${in_lz}" || test_failed $LINENO +"${LZIP}" -cd "${in_lz}" > copy || test_failed $LINENO +cmp in copy || test_failed $LINENO rm -f copy cat "${in_lz}" > copy.lz || framework_failure -"${LZIP}" -dk copy.lz || fail=1 -cmp in copy || fail=1 +"${LZIP}" -dk copy.lz || test_failed $LINENO +cmp in copy || test_failed $LINENO printf "to be overwritten" > copy || framework_failure -"${LZIP}" -dq copy.lz -if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi +"${LZIP}" -d copy.lz 2> /dev/null +[ $? = 1 ] || test_failed $LINENO "${LZIP}" -df copy.lz -if [ $? = 0 ] && [ ! -e copy.lz ] && cmp in copy ; then - printf . ; else printf - ; fail=1 ; fi +{ [ $? = 0 ] && [ ! -e copy.lz ] && cmp in copy ; } || test_failed $LINENO printf "to be overwritten" > copy || framework_failure -"${LZIP}" -df -o copy < "${in_lz}" || fail=1 -cmp in copy || fail=1 -printf . +"${LZIP}" -df -o copy < "${in_lz}" || test_failed $LINENO +cmp in copy || test_failed $LINENO rm -f copy cat "${in_lz}" > anyothername || framework_failure -"${LZIP}" -d -o copy - anyothername - < "${in_lz}" -if [ $? = 0 ] && cmp in copy && cmp in anyothername.out ; then - printf . ; else printf - ; fail=1 ; fi +"${LZIP}" -dv --output copy - anyothername - < "${in_lz}" 2> /dev/null +{ [ $? = 0 ] && cmp in copy && cmp in anyothername.out ; } || + test_failed $LINENO rm -f copy anyothername.out +"${LZIP}" -lq in "${in_lz}" +[ $? = 2 ] || test_failed $LINENO +"${LZIP}" -lq nx_file.lz "${in_lz}" +[ $? = 1 ] || test_failed $LINENO "${LZIP}" -tq in "${in_lz}" -if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi -"${LZIP}" -tq foo.lz "${in_lz}" -if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi +[ $? = 2 ] || test_failed $LINENO +"${LZIP}" -tq nx_file.lz "${in_lz}" +[ $? = 1 ] || test_failed $LINENO "${LZIP}" -cdq in "${in_lz}" > copy -if [ $? = 2 ] && cat copy in | cmp in - ; then printf . ; else printf - ; fail=1 ; fi -"${LZIP}" -cdq foo.lz "${in_lz}" > copy -if [ $? = 1 ] && cmp in copy ; then printf . ; else printf - ; fail=1 ; fi +{ [ $? = 2 ] && cat copy in | cmp in - ; } || test_failed $LINENO +"${LZIP}" -cdq nx_file.lz "${in_lz}" > copy +{ [ $? = 1 ] && cmp in copy ; } || test_failed $LINENO rm -f copy cat "${in_lz}" > copy.lz || framework_failure +for i in 1 2 3 4 5 6 7 ; do + printf "g" >> copy.lz || framework_failure + "${LZIP}" -alvv copy.lz "${in_lz}" > /dev/null 2>&1 + [ $? = 2 ] || test_failed $LINENO $i + "${LZIP}" -atvvvv copy.lz "${in_lz}" 2> /dev/null + [ $? = 2 ] || test_failed $LINENO $i +done "${LZIP}" -dq in copy.lz -if [ $? = 2 ] && [ -e copy.lz ] && [ ! -e copy ] && [ ! -e in.out ] ; then - printf . ; else printf - ; fail=1 ; fi -"${LZIP}" -dq foo.lz copy.lz -if [ $? = 1 ] && [ ! -e copy.lz ] && [ ! -e foo ] && cmp in copy ; then - printf . ; else printf - ; fail=1 ; fi +{ [ $? = 2 ] && [ -e copy.lz ] && [ ! -e copy ] && [ ! -e in.out ] ; } || + test_failed $LINENO +"${LZIP}" -dq nx_file.lz copy.lz +{ [ $? = 1 ] && [ ! -e copy.lz ] && [ ! -e nx_file ] && cmp in copy ; } || + test_failed $LINENO cat in in > in2 || framework_failure -cat "${in_lz}" "${in_lz}" > copy2.lz || framework_failure -"${LZIP}" -t copy2.lz || fail=1 -"${LZIP}" -cd copy2.lz > copy2 || fail=1 -cmp in2 copy2 || fail=1 -printf . - -printf "garbage" >> copy2.lz || framework_failure +cat "${in_lz}" "${in_lz}" > in2.lz || framework_failure +"${LZIP}" -lq in2.lz || test_failed $LINENO +"${LZIP}" -t in2.lz || test_failed $LINENO +"${LZIP}" -cd in2.lz > copy2 || test_failed $LINENO +cmp in2 copy2 || test_failed $LINENO + +cat in2.lz > copy2.lz || framework_failure +printf "\ngarbage" >> copy2.lz || framework_failure +"${LZIP}" -tvvvv copy2.lz 2> /dev/null || test_failed $LINENO rm -f copy2 +"${LZIP}" -aD0 -q copy2.lz +[ $? = 2 ] || test_failed $LINENO +"${LZIP}" -alq copy2.lz +[ $? = 2 ] || test_failed $LINENO "${LZIP}" -atq copy2.lz -if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi +[ $? = 2 ] || test_failed $LINENO "${LZIP}" -atq < copy2.lz -if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi +[ $? = 2 ] || test_failed $LINENO "${LZIP}" -adkq copy2.lz -if [ $? = 2 ] && [ ! -e copy2 ] ; then printf . ; else printf - ; fail=1 ; fi +{ [ $? = 2 ] && [ ! -e copy2 ] ; } || test_failed $LINENO "${LZIP}" -adkq -o copy2 < copy2.lz -if [ $? = 2 ] && [ ! -e copy2 ] ; then printf . ; else printf - ; fail=1 ; fi +{ [ $? = 2 ] && [ ! -e copy2 ] ; } || test_failed $LINENO printf "to be overwritten" > copy2 || framework_failure -"${LZIP}" -df copy2.lz || fail=1 -cmp in2 copy2 || fail=1 -printf . - -"${LZIPRECOVER}" -D ,18000 "${in_lz}" > copy || fail=1 -"${LZIPRECOVER}" -D 18000 "${in_lz}" >> copy || fail=1 -cmp in copy || fail=1 -printf . -"${LZIPRECOVER}" -D 21723-22120 -fo copy "${in_lz}" || fail=1 -cmp "${inD}" copy || fail=1 -"${LZIPRECOVER}" -D 21723,397 "${in_lz}" > copy || fail=1 -cmp "${inD}" copy || fail=1 -printf . +"${LZIP}" -df copy2.lz || test_failed $LINENO +cmp in2 copy2 || test_failed $LINENO + +"${LZIPRECOVER}" -D ,18000 "${in_lz}" > copy || test_failed $LINENO +"${LZIPRECOVER}" -D 18000 "${in_lz}" >> copy || test_failed $LINENO +cmp in copy || test_failed $LINENO +"${LZIPRECOVER}" -D 21723-22120 -fo copy "${in_lz}" || test_failed $LINENO +cmp "${inD}" copy || test_failed $LINENO +"${LZIPRECOVER}" -D 21723,397 "${in_lz}" > copy || test_failed $LINENO +cmp "${inD}" copy || test_failed $LINENO "${LZIPRECOVER}" -D0 -iq "${f6b1_lz}" -fo copy -if [ $? = 2 ] && cmp "${f6b1}" copy ; then printf . ; else printf - ; fail=1 ; fi +{ [ $? = 2 ] && cmp "${f6b1}" copy ; } || test_failed $LINENO "${LZIPRECOVER}" -D0 -iq "${f6b1_lz}" > copy -if [ $? = 2 ] && cmp "${f6b1}" copy ; then printf . ; else printf - ; fail=1 ; fi +{ [ $? = 2 ] && cmp "${f6b1}" copy ; } || test_failed $LINENO + +printf "\ntesting bad input..." + +cat "${in_lz}" "${in_lz}" "${in_lz}" > in3.lz || framework_failure +if dd if=in3.lz of=trunc.lz bs=14752 count=1 2> /dev/null && + [ -e trunc.lz ] && cmp in2.lz trunc.lz > /dev/null 2>&1 ; then + for i in 6 20 14734 14753 14754 14755 14756 14757 14758 ; do + dd if=in3.lz of=trunc.lz bs=$i count=1 2> /dev/null + "${LZIP}" -lq trunc.lz + [ $? = 2 ] || test_failed $LINENO $i + "${LZIP}" -t trunc.lz 2> /dev/null + [ $? = 2 ] || test_failed $LINENO $i + "${LZIP}" -tq < trunc.lz + [ $? = 2 ] || test_failed $LINENO $i + "${LZIP}" -cdq trunc.lz > out + [ $? = 2 ] || test_failed $LINENO $i + "${LZIP}" -dq < trunc.lz > out + [ $? = 2 ] || test_failed $LINENO $i + done +else + printf "\nwarning: skipping truncation test: 'dd' does not work on your system." +fi + +cat "${in_lz}" > ingin.lz || framework_failure +printf "g" >> ingin.lz || framework_failure +cat "${in_lz}" >> ingin.lz || framework_failure +"${LZIP}" -lq ingin.lz +[ $? = 2 ] || test_failed $LINENO +"${LZIP}" -t ingin.lz || test_failed $LINENO +"${LZIP}" -cd ingin.lz > copy || test_failed $LINENO +cmp in copy || test_failed $LINENO +"${LZIP}" -t < ingin.lz || test_failed $LINENO +"${LZIP}" -d < ingin.lz > copy || test_failed $LINENO +cmp in copy || test_failed $LINENO printf "\ntesting --merge..." rm -f copy.lz "${LZIPRECOVER}" -m -o copy.lz "${fox6_lz}" "${f6b1_lz}" -if [ $? = 0 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi +{ [ $? = 0 ] && [ ! -e copy.lz ] ; } || test_failed $LINENO "${LZIPRECOVER}" -m -o copy.lz "${f6b1_lz}" "${fox6_lz}" -if [ $? = 0 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi +{ [ $? = 0 ] && [ ! -e copy.lz ] ; } || test_failed $LINENO "${LZIPRECOVER}" -m -o copy.lz "${bad1_lz}" "${bad2_lz}" "${bad1_lz}" -q -if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi +{ [ $? = 2 ] && [ ! -e copy.lz ] ; } || test_failed $LINENO "${LZIPRECOVER}" -m -o copy.lz "${bad1_lz}" "${bad2_lz}" "${bad2_lz}" -q -if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi +{ [ $? = 2 ] && [ ! -e copy.lz ] ; } || test_failed $LINENO cat "${bad2_lz}" > bad2.lz || framework_failure "${LZIPRECOVER}" -m -o copy.lz "${bad1_lz}" "${bad2_lz}" bad2.lz -q -if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi +{ [ $? = 2 ] && [ ! -e copy.lz ] ; } || test_failed $LINENO rm -f bad2.lz "${LZIPRECOVER}" -m -o copy.lz "${f6b1_lz}" "${f6b5_lz}" -q -if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi +{ [ $? = 2 ] && [ ! -e copy.lz ] ; } || test_failed $LINENO "${LZIPRECOVER}" -m -o copy.lz "${f6b3_lz}" "${f6b5_lz}" -q -if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi +{ [ $? = 2 ] && [ ! -e copy.lz ] ; } || test_failed $LINENO "${LZIPRECOVER}" -m -o copy.lz "${bad3_lz}" "${bad4_lz}" -q -if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi +{ [ $? = 2 ] && [ ! -e copy.lz ] ; } || test_failed $LINENO -"${LZIPRECOVER}" -mf -o copy.lz "${f6b1_lz}" "${f6b4_lz}" || fail=1 -cmp "${fox6_lz}" copy.lz || fail=1 -printf . -"${LZIPRECOVER}" -mf -o copy.lz "${f6b4_lz}" "${f6b1_lz}" || fail=1 -cmp "${fox6_lz}" copy.lz || fail=1 -printf . +"${LZIPRECOVER}" -mf -o copy.lz "${f6b1_lz}" "${f6b4_lz}" || test_failed $LINENO +cmp "${fox6_lz}" copy.lz || test_failed $LINENO +"${LZIPRECOVER}" -mf -o copy.lz "${f6b4_lz}" "${f6b1_lz}" || test_failed $LINENO +cmp "${fox6_lz}" copy.lz || test_failed $LINENO for i in "${f6b1_lz}" "${f6b3_lz}" "${f6b4_lz}" "${f6b5_lz}" ; do - "${LZIPRECOVER}" -mf -o copy.lz "${f6b2_lz}" "${i}" || fail=1 - cmp "${fox6_lz}" copy.lz || fail=1 - "${LZIPRECOVER}" -mf -o copy.lz "${i}" "${f6b2_lz}" || fail=1 - cmp "${fox6_lz}" copy.lz || fail=1 - printf . + "${LZIPRECOVER}" -mf -o copy.lz "${f6b2_lz}" "$i" || + test_failed $LINENO "$i" + cmp "${fox6_lz}" copy.lz || test_failed $LINENO "$i" + "${LZIPRECOVER}" -mf -o copy.lz "$i" "${f6b2_lz}" || + test_failed $LINENO "$i" + cmp "${fox6_lz}" copy.lz || test_failed $LINENO "$i" done for i in "${f6b3_lz}" "${f6b4_lz}" "${f6b5_lz}" ; do - "${LZIPRECOVER}" -mf -o copy.lz "${f6b1_lz}" "${f6b2_lz}" "${i}" || fail=1 - cmp "${fox6_lz}" copy.lz || fail=1 - "${LZIPRECOVER}" -mf -o copy.lz "${f6b1_lz}" "${i}" "${f6b2_lz}" || fail=1 - cmp "${fox6_lz}" copy.lz || fail=1 - "${LZIPRECOVER}" -mf -o copy.lz "${f6b2_lz}" "${f6b1_lz}" "${i}" || fail=1 - cmp "${fox6_lz}" copy.lz || fail=1 - "${LZIPRECOVER}" -mf -o copy.lz "${f6b2_lz}" "${i}" "${f6b1_lz}" || fail=1 - cmp "${fox6_lz}" copy.lz || fail=1 - "${LZIPRECOVER}" -mf -o copy.lz "${i}" "${f6b1_lz}" "${f6b2_lz}" || fail=1 - cmp "${fox6_lz}" copy.lz || fail=1 - "${LZIPRECOVER}" -mf -o copy.lz "${i}" "${f6b2_lz}" "${f6b1_lz}" || fail=1 - cmp "${fox6_lz}" copy.lz || fail=1 - printf . + "${LZIPRECOVER}" -mf -o copy.lz "${f6b1_lz}" "${f6b2_lz}" "$i" || + test_failed $LINENO "$i" + cmp "${fox6_lz}" copy.lz || test_failed $LINENO "$i" + "${LZIPRECOVER}" -mf -o copy.lz "${f6b1_lz}" "$i" "${f6b2_lz}" || + test_failed $LINENO "$i" + cmp "${fox6_lz}" copy.lz || test_failed $LINENO "$i" + "${LZIPRECOVER}" -mf -o copy.lz "${f6b2_lz}" "${f6b1_lz}" "$i" || + test_failed $LINENO "$i" + cmp "${fox6_lz}" copy.lz || test_failed $LINENO "$i" + "${LZIPRECOVER}" -mf -o copy.lz "${f6b2_lz}" "$i" "${f6b1_lz}" || + test_failed $LINENO "$i" + cmp "${fox6_lz}" copy.lz || test_failed $LINENO "$i" + "${LZIPRECOVER}" -mf -o copy.lz "$i" "${f6b1_lz}" "${f6b2_lz}" || + test_failed $LINENO "$i" + cmp "${fox6_lz}" copy.lz || test_failed $LINENO "$i" + "${LZIPRECOVER}" -mf -o copy.lz "$i" "${f6b2_lz}" "${f6b1_lz}" || + test_failed $LINENO "$i" + cmp "${fox6_lz}" copy.lz || test_failed $LINENO "$i" done -"${LZIPRECOVER}" -mf -o copy.lz "${f6b3_lz}" "${f6b4_lz}" "${f6b5_lz}" || fail=1 -cmp "${fox6_lz}" copy.lz || fail=1 -printf . -"${LZIPRECOVER}" -mf -o copy.lz "${f6b1_lz}" "${f6b3_lz}" "${f6b4_lz}" "${f6b5_lz}" || fail=1 -cmp "${fox6_lz}" copy.lz || fail=1 -printf . -"${LZIPRECOVER}" -mf -o copy.lz "${f6b2_lz}" "${f6b3_lz}" "${f6b4_lz}" "${f6b5_lz}" || fail=1 -cmp "${fox6_lz}" copy.lz || fail=1 -printf . -"${LZIPRECOVER}" -mf -o copy.lz "${f6b1_lz}" "${f6b2_lz}" "${f6b3_lz}" "${f6b4_lz}" "${f6b5_lz}" || fail=1 -cmp "${fox6_lz}" copy.lz || fail=1 -printf . - -"${LZIPRECOVER}" -mf -o copy.lz "${bad1_lz}" "${bad2_lz}" || fail=1 -cmp "${in_lz}" copy.lz || fail=1 -printf . -"${LZIPRECOVER}" -mf -o copy.lz "${bad2_lz}" "${bad1_lz}" || fail=1 -cmp "${in_lz}" copy.lz || fail=1 -printf . +"${LZIPRECOVER}" -mf -o copy.lz "${f6b3_lz}" "${f6b4_lz}" "${f6b5_lz}" || + test_failed $LINENO +cmp "${fox6_lz}" copy.lz || test_failed $LINENO +"${LZIPRECOVER}" -mf -o copy.lz "${f6b1_lz}" "${f6b3_lz}" "${f6b4_lz}" \ +"${f6b5_lz}" || test_failed $LINENO +cmp "${fox6_lz}" copy.lz || test_failed $LINENO +"${LZIPRECOVER}" -mf -o copy.lz "${f6b2_lz}" "${f6b3_lz}" "${f6b4_lz}" \ +"${f6b5_lz}" || test_failed $LINENO +cmp "${fox6_lz}" copy.lz || test_failed $LINENO +"${LZIPRECOVER}" -mf -o copy.lz "${f6b1_lz}" "${f6b2_lz}" "${f6b3_lz}" \ +"${f6b4_lz}" "${f6b5_lz}" || test_failed $LINENO +cmp "${fox6_lz}" copy.lz || test_failed $LINENO + +"${LZIPRECOVER}" -mf -o copy.lz "${bad1_lz}" "${bad2_lz}" || test_failed $LINENO +cmp "${in_lz}" copy.lz || test_failed $LINENO +"${LZIPRECOVER}" -mf -o copy.lz "${bad2_lz}" "${bad1_lz}" || test_failed $LINENO +cmp "${in_lz}" copy.lz || test_failed $LINENO cat "${bad1_lz}" "${in_lz}" "${bad1_lz}" "${bad1_lz}" > bad11.lz || framework_failure cat "${bad1_lz}" "${in_lz}" "${bad2_lz}" "${in_lz}" > bad12.lz || framework_failure cat "${bad2_lz}" "${in_lz}" "${bad2_lz}" "${bad2_lz}" > bad22.lz || framework_failure cat "${in_lz}" "${in_lz}" "${in_lz}" "${in_lz}" > copy4.lz || framework_failure -"${LZIPRECOVER}" -mf -o out4.lz bad11.lz bad12.lz bad22.lz || fail=1 -cmp out4.lz copy4.lz || fail=1 -printf . -"${LZIPRECOVER}" -mf -o out4.lz bad11.lz bad22.lz bad12.lz || fail=1 -cmp out4.lz copy4.lz || fail=1 -printf . -"${LZIPRECOVER}" -mf -o out4.lz bad12.lz bad11.lz bad22.lz || fail=1 -cmp out4.lz copy4.lz || fail=1 -printf . -"${LZIPRECOVER}" -mf -o out4.lz bad12.lz bad22.lz bad11.lz || fail=1 -cmp out4.lz copy4.lz || fail=1 -printf . -"${LZIPRECOVER}" -mf -o out4.lz bad22.lz bad11.lz bad12.lz || fail=1 -cmp out4.lz copy4.lz || fail=1 -printf . -"${LZIPRECOVER}" -mf -o out4.lz bad22.lz bad12.lz bad11.lz || fail=1 -cmp out4.lz copy4.lz || fail=1 -printf . +"${LZIPRECOVER}" -mf -o out4.lz bad11.lz bad12.lz bad22.lz || test_failed $LINENO +cmp out4.lz copy4.lz || test_failed $LINENO +"${LZIPRECOVER}" -mf -o out4.lz bad11.lz bad22.lz bad12.lz || test_failed $LINENO +cmp out4.lz copy4.lz || test_failed $LINENO +"${LZIPRECOVER}" -mf -o out4.lz bad12.lz bad11.lz bad22.lz || test_failed $LINENO +cmp out4.lz copy4.lz || test_failed $LINENO +"${LZIPRECOVER}" -mf -o out4.lz bad12.lz bad22.lz bad11.lz || test_failed $LINENO +cmp out4.lz copy4.lz || test_failed $LINENO +"${LZIPRECOVER}" -mf -o out4.lz bad22.lz bad11.lz bad12.lz || test_failed $LINENO +cmp out4.lz copy4.lz || test_failed $LINENO +"${LZIPRECOVER}" -mf -o out4.lz bad22.lz bad12.lz bad11.lz || test_failed $LINENO +cmp out4.lz copy4.lz || test_failed $LINENO for i in "${bad1_lz}" "${bad2_lz}" ; do for j in "${bad3_lz}" "${bad4_lz}" "${bad5_lz}" ; do - "${LZIPRECOVER}" -mf -o copy.lz "${i}" "${j}" || fail=1 - cmp "${in_lz}" copy.lz || fail=1 - "${LZIPRECOVER}" -mf -o copy.lz "${j}" "${i}" || fail=1 - cmp "${in_lz}" copy.lz || fail=1 + "${LZIPRECOVER}" -mf -o copy.lz "$i" "$j" || + test_failed $LINENO "$i $j" + cmp "${in_lz}" copy.lz || test_failed $LINENO "$i $j" + "${LZIPRECOVER}" -mf -o copy.lz "$j" "$i" || + test_failed $LINENO "$i $j" + cmp "${in_lz}" copy.lz || test_failed $LINENO "$i $j" done - printf . done -"${LZIPRECOVER}" -mf -o copy.lz "${bad3_lz}" "${bad4_lz}" "${bad5_lz}" || fail=1 -cmp "${in_lz}" copy.lz || fail=1 -printf . -"${LZIPRECOVER}" -mf -o copy.lz "${bad3_lz}" "${bad5_lz}" "${bad4_lz}" || fail=1 -cmp "${in_lz}" copy.lz || fail=1 -printf . -"${LZIPRECOVER}" -mf -o copy.lz "${bad4_lz}" "${bad3_lz}" "${bad5_lz}" || fail=1 -cmp "${in_lz}" copy.lz || fail=1 -printf . -"${LZIPRECOVER}" -mf -o copy.lz "${bad4_lz}" "${bad5_lz}" "${bad3_lz}" || fail=1 -cmp "${in_lz}" copy.lz || fail=1 -printf . -"${LZIPRECOVER}" -mf -o copy.lz "${bad5_lz}" "${bad3_lz}" "${bad4_lz}" || fail=1 -cmp "${in_lz}" copy.lz || fail=1 -printf . -"${LZIPRECOVER}" -mf -o copy.lz "${bad5_lz}" "${bad4_lz}" "${bad3_lz}" || fail=1 -cmp "${in_lz}" copy.lz || fail=1 -printf . +"${LZIPRECOVER}" -mf -o copy.lz "${bad3_lz}" "${bad4_lz}" "${bad5_lz}" || + test_failed $LINENO +cmp "${in_lz}" copy.lz || test_failed $LINENO +"${LZIPRECOVER}" -mf -o copy.lz "${bad3_lz}" "${bad5_lz}" "${bad4_lz}" || + test_failed $LINENO +cmp "${in_lz}" copy.lz || test_failed $LINENO +"${LZIPRECOVER}" -mf -o copy.lz "${bad4_lz}" "${bad3_lz}" "${bad5_lz}" || + test_failed $LINENO +cmp "${in_lz}" copy.lz || test_failed $LINENO +"${LZIPRECOVER}" -mf -o copy.lz "${bad4_lz}" "${bad5_lz}" "${bad3_lz}" || + test_failed $LINENO +cmp "${in_lz}" copy.lz || test_failed $LINENO +"${LZIPRECOVER}" -mf -o copy.lz "${bad5_lz}" "${bad3_lz}" "${bad4_lz}" || + test_failed $LINENO +cmp "${in_lz}" copy.lz || test_failed $LINENO +"${LZIPRECOVER}" -mf -o copy.lz "${bad5_lz}" "${bad4_lz}" "${bad3_lz}" || + test_failed $LINENO +cmp "${in_lz}" copy.lz || test_failed $LINENO cat "${bad3_lz}" "${bad4_lz}" "${bad5_lz}" "${in_lz}" > bad345.lz || framework_failure cat "${bad4_lz}" "${bad5_lz}" "${bad3_lz}" "${in_lz}" > bad453.lz || framework_failure cat "${bad5_lz}" "${bad3_lz}" "${bad4_lz}" "${in_lz}" > bad534.lz || framework_failure cat "${in_lz}" "${in_lz}" "${in_lz}" "${in_lz}" > copy4.lz || framework_failure -"${LZIPRECOVER}" -mf -o out4.lz bad345.lz bad453.lz bad534.lz || fail=1 -cmp out4.lz copy4.lz || fail=1 -printf . -"${LZIPRECOVER}" -mf -o out4.lz bad345.lz bad534.lz bad453.lz || fail=1 -cmp out4.lz copy4.lz || fail=1 -printf . -"${LZIPRECOVER}" -mf -o out4.lz bad453.lz bad345.lz bad534.lz || fail=1 -cmp out4.lz copy4.lz || fail=1 -printf . -"${LZIPRECOVER}" -mf -o out4.lz bad453.lz bad534.lz bad345.lz || fail=1 -cmp out4.lz copy4.lz || fail=1 -printf . -"${LZIPRECOVER}" -mf -o out4.lz bad534.lz bad345.lz bad453.lz || fail=1 -cmp out4.lz copy4.lz || fail=1 -printf . -"${LZIPRECOVER}" -mf -o out4.lz bad534.lz bad453.lz bad345.lz || fail=1 -cmp out4.lz copy4.lz || fail=1 -printf . +"${LZIPRECOVER}" -mf -o out4.lz bad345.lz bad453.lz bad534.lz || + test_failed $LINENO +cmp out4.lz copy4.lz || test_failed $LINENO +"${LZIPRECOVER}" -mf -o out4.lz bad345.lz bad534.lz bad453.lz || + test_failed $LINENO +cmp out4.lz copy4.lz || test_failed $LINENO +"${LZIPRECOVER}" -mf -o out4.lz bad453.lz bad345.lz bad534.lz || + test_failed $LINENO +cmp out4.lz copy4.lz || test_failed $LINENO +"${LZIPRECOVER}" -mf -o out4.lz bad453.lz bad534.lz bad345.lz || + test_failed $LINENO +cmp out4.lz copy4.lz || test_failed $LINENO +"${LZIPRECOVER}" -mf -o out4.lz bad534.lz bad345.lz bad453.lz || + test_failed $LINENO +cmp out4.lz copy4.lz || test_failed $LINENO +"${LZIPRECOVER}" -mf -o out4.lz bad534.lz bad453.lz bad345.lz || + test_failed $LINENO +cmp out4.lz copy4.lz || test_failed $LINENO printf "\ntesting --repair..." rm -f copy.lz -"${LZIPRECOVER}" -R -o copy.lz "${fox6_lz}" || fail=1 -if [ $? = 0 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi +"${LZIPRECOVER}" -R -o copy.lz "${fox6_lz}" || test_failed $LINENO +{ [ $? = 0 ] && [ ! -e copy.lz ] ; } || test_failed $LINENO "${LZIPRECOVER}" -R -o copy.lz "${bad2_lz}" -q -if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi +{ [ $? = 2 ] && [ ! -e copy.lz ] ; } || test_failed $LINENO "${LZIPRECOVER}" -R -o copy.lz "${bad3_lz}" -q -if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi +{ [ $? = 2 ] && [ ! -e copy.lz ] ; } || test_failed $LINENO "${LZIPRECOVER}" -R -o copy.lz "${bad4_lz}" -q -if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi -"${LZIPRECOVER}" -Rf -o copy.lz "${f6b1_lz}" || fail=1 -cmp "${fox6_lz}" copy.lz || fail=1 -printf . -"${LZIPRECOVER}" -Rf -o copy.lz "${bad1_lz}" || fail=1 -cmp "${in_lz}" copy.lz || fail=1 -printf . +{ [ $? = 2 ] && [ ! -e copy.lz ] ; } || test_failed $LINENO +"${LZIPRECOVER}" -Rf -o copy.lz "${f6b1_lz}" || test_failed $LINENO +cmp "${fox6_lz}" copy.lz || test_failed $LINENO +"${LZIPRECOVER}" -Rf -o copy.lz "${bad1_lz}" || test_failed $LINENO +cmp "${in_lz}" copy.lz || test_failed $LINENO cat "${f6b1_lz}" > copy.tar.lz || framework_failure -"${LZIPRECOVER}" -R copy.tar.lz || fail=1 -if [ $? = 0 ] && [ -e copy_fixed.tar.lz ] ; then printf . ; else printf - ; fail=1 ; fi +"${LZIPRECOVER}" -R copy.tar.lz || test_failed $LINENO +{ [ $? = 0 ] && [ -e copy_fixed.tar.lz ] ; } || test_failed $LINENO mv copy.tar.lz copy.lz || framework_failure -"${LZIPRECOVER}" -R copy.lz || fail=1 -if [ $? = 0 ] && [ -e copy_fixed.lz ] ; then printf . ; else printf - ; fail=1 ; fi +"${LZIPRECOVER}" -R copy.lz || test_failed $LINENO +{ [ $? = 0 ] && [ -e copy_fixed.lz ] ; } || test_failed $LINENO mv copy.lz copy.tlz || framework_failure -"${LZIPRECOVER}" -R copy.tlz || fail=1 -if [ $? = 0 ] && [ -e copy_fixed.tlz ] ; then printf . ; else printf - ; fail=1 ; fi +"${LZIPRECOVER}" -R copy.tlz || test_failed $LINENO +{ [ $? = 0 ] && [ -e copy_fixed.tlz ] ; } || test_failed $LINENO printf "\ntesting --split..." cat "${in_lz}" "${in_lz}" "${in_lz}" > copy || framework_failure -printf "garbage" >> copy || fail=1 -"${LZIPRECOVER}" -s -o copy.lz copy || fail=1 -printf . +printf "garbage" >> copy || framework_failure +"${LZIPRECOVER}" -s -o copy.lz copy || test_failed $LINENO for i in 1 2 3 ; do - "${LZIPRECOVER}" -cd rec${i}copy.lz > copy || fail=1 - cmp in copy || fail=1 - printf . + "${LZIPRECOVER}" -cd rec${i}copy.lz > copy || test_failed $LINENO $i + cmp in copy || test_failed $LINENO $i done echo diff --git a/testsuite/test_bad4.lz b/testsuite/test_bad4.lz Binary files differindex 361df5e..ddb0d6b 100644 --- a/testsuite/test_bad4.lz +++ b/testsuite/test_bad4.lz diff --git a/testsuite/test_bad5.lz b/testsuite/test_bad5.lz Binary files differindex 1ed1566..6fab91c 100644 --- a/testsuite/test_bad5.lz +++ b/testsuite/test_bad5.lz diff --git a/unzcrash.cc b/unzcrash.cc index 3970638..9a32b82 100644 --- a/unzcrash.cc +++ b/unzcrash.cc @@ -1,6 +1,6 @@ /* Unzcrash - Tests robustness of decompressors to corrupted data. Inspired by unzcrash.c from Julian Seward's bzip2. - Copyright (C) 2008-2016 Antonio Diaz Diaz. + Copyright (C) 2008-2017 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -40,12 +40,16 @@ #error "Environments where CHAR_BIT != 8 are not supported." #endif +#ifndef INT64_MAX +#define INT64_MAX 0x7FFFFFFFFFFFFFFFLL +#endif + namespace { const char * const Program_name = "Unzcrash"; const char * const program_name = "unzcrash"; -const char * const program_year = "2016"; +const char * const program_year = "2017"; const char * invocation_name = 0; int verbosity = 0; @@ -55,14 +59,27 @@ void show_help() { std::printf( "%s - Tests robustness of decompressors to corrupted data.\n", Program_name ); std::printf( "\nUsage: %s [options] \"lzip -tv\" filename.lz\n", invocation_name ); - std::printf( "\nThis program reads the specified file and then repeatedly decompresses\n" - "it, increasing 256 times each byte of the compressed data, so as to test\n" - "all possible one-byte errors. This should not cause any invalid memory\n" - "accesses. If it does, please, report it as a bug.\n" + std::printf( "\nBy default, unzcrash reads the specified file and then repeatedly\n" + "decompresses it, increasing 256 times each byte of the compressed data,\n" + "so as to test all possible one-byte errors.\n" + "\nIf the '--block' option is given, unzcrash reads the specified file\n" + "and then repeatedly decompresses it, setting all bytes in each\n" + "successive block to the value given, so as to test all possible full\n" + "sector errors.\n" + "\nIf the '--truncate' option is given, unzcrash reads the specified\n" + "file and then repeatedly decompresses it, truncating the file to\n" + "increasing lengths, so as to test all possible truncation points.\n" + "\nNone of the three test modes described above should cause any invalid\n" + "memory accesses. If any of them does, please, report it as a bug to the\n" + "maintainers of the decompressor being tested.\n" "\nIf the decompressor returns with zero status, unzcrash compares the\n" "output of the decompressor for the original and corrupt files. If the\n" - "outputs differ, it means that the decompressor failed to recognize the\n" - "corruption and produced garbage output. Please, report it as a bug.\n" + "outputs differ, it means that the decompressor returned a false\n" + "negative; it failed to recognize the corruption and produced garbage\n" + "output. The only exception is when a multimember file is truncated just\n" + "after the last byte of a member, producing a shorter but valid\n" + "compressed file. Except in this latter case, please, report any false\n" + "negative as a bug.\n" "\nIn order to compare the outputs, unzcrash needs a zcmp program able to\n" "understand the format being tested. For example the one provided by zutils.\n" "Use '--zcmp=false' to disable comparisons.\n" @@ -72,6 +89,7 @@ void show_help() " -b, --bits=<range> test N-bit errors instead of full byte\n" " -B, --block[=<size>][,<val>] test blocks of given size [512,0]\n" " -d, --delta=<n> test one of every n bytes/blocks/truncations\n" + " -e, --set-byte=<pos>,<val> set byte at position <pos> to value <val>\n" " -p, --position=<bytes> first byte position to test [default 0]\n" " -q, --quiet suppress all messages\n" " -s, --size=<bytes> number of byte positions to test [all]\n" @@ -124,12 +142,13 @@ void internal_error( const char * const msg ) } -long getnum( const char * const ptr, const long llimit, const long ulimit, - const bool comma = false ) +long long getnum( const char * const ptr, const long long llimit = -LLONG_MAX, + const long long ulimit = LLONG_MAX, + const char ** const tailp = 0 ) { char * tail; errno = 0; - long result = strtol( ptr, &tail, 0 ); + long long result = strtoll( ptr, &tail, 0 ); if( tail == ptr ) { show_error( "Bad or missing numerical argument.", 0, true ); @@ -138,11 +157,14 @@ long getnum( const char * const ptr, const long llimit, const long ulimit, if( !errno && tail[0] ) { - const int factor = ( tail[1] == 'i' ) ? 1024 : 1000; + char * const p = tail++; + int factor; + bool bsuf; // 'B' suffix is present + if( tail[0] == 'i' ) { ++tail; factor = 1024; } else factor = 1000; + if( tail[0] == 'B' ) { ++tail; bsuf = true; } else bsuf = false; int exponent = -1; // -1 = bad multiplier - switch( tail[0] ) + switch( *p ) { - case ',': if( comma ) exponent = 0; break; case 'Y': exponent = 8; break; case 'Z': exponent = 7; break; case 'E': exponent = 6; break; @@ -152,6 +174,8 @@ long getnum( const char * const ptr, const long llimit, const long ulimit, case 'M': exponent = 2; break; case 'K': if( factor == 1024 ) exponent = 1; break; case 'k': if( factor == 1000 ) exponent = 1; break; + case 'B': if( factor == 1000 && !bsuf ) exponent = 0; break; + default : if( tailp ) { tail = p; exponent = 0; } break; } if( exponent < 0 ) { @@ -160,7 +184,7 @@ long getnum( const char * const ptr, const long llimit, const long ulimit, } for( int i = 0; i < exponent; ++i ) { - if( LONG_MAX / factor >= std::labs( result ) ) result *= factor; + if( LLONG_MAX / factor >= std::labs( result ) ) result *= factor; else { errno = ERANGE; break; } } } @@ -170,23 +194,64 @@ long getnum( const char * const ptr, const long llimit, const long ulimit, show_error( "Numerical argument out of limits." ); std::exit( 1 ); } + if( tailp ) *tailp = tail; return result; } void parse_block( const char * const ptr, long & size, uint8_t & value ) { - const char * const ptr2 = std::strchr( ptr, ',' ); + const char * tail = ptr; + + if( tail[0] != ',' ) + size = getnum( ptr, 1, INT_MAX, &tail ); + if( tail[0] == ',' ) + value = getnum( tail + 1, 0, 255 ); + else if( tail[0] ) + { + show_error( "Bad separator in argument of '--block'", 0, true ); + std::exit( 1 ); + } + } + + +struct Bad_byte + { + enum Mode { literal, delta, flip }; + long long pos; + Mode mode; + uint8_t value; - if( !ptr2 || ptr2 != ptr ) - size = getnum( ptr, 1, INT_MAX, true ); - if( ptr2 ) - value = getnum( ptr2 + 1, 0, 255 ); + Bad_byte() : pos( -1 ), mode( literal ), value( 0 ) {} + uint8_t operator()( const uint8_t old_value ) const + { + if( mode == delta ) return old_value + value; + if( mode == flip ) return old_value ^ value; + return value; + } + }; + + +// Recognized formats: <pos>,<value> <pos>,+<value> <pos>,f<value> +// +void parse_pos_value( const char * const ptr, Bad_byte & bad_byte ) + { + const char * tail; + bad_byte.pos = getnum( ptr, 0, INT64_MAX, &tail ); + if( tail[0] != ',' ) + { + show_error( "Bad separator between <pos> and <val>.", 0, true ); + std::exit( 1 ); + } + if( tail[1] == '+' ) { ++tail; bad_byte.mode = Bad_byte::delta; } + else if( tail[1] == 'f' ) { ++tail; bad_byte.mode = Bad_byte::flip; } + else bad_byte.mode = Bad_byte::literal; + bad_byte.value = getnum( tail + 1, 0, 255 ); } /* Returns the address of a malloc'd buffer containing the file data and - its size in '*size'. + the file size in '*size'. In case of error, returns 0 and does not modify '*size'. */ uint8_t * read_file( const char * const name, long * const size ) @@ -309,6 +374,7 @@ int main( const int argc, const char * const argv[] ) enum Mode { m_block, m_byte, m_truncate }; const char * mode_str[3] = { "block", "byte", "size" }; Bitset8 bits; // if Bitset8::parse not called test full byte + Bad_byte bad_byte; const char * zcmp_program = "zcmp"; long pos = 0; long max_size = LONG_MAX; @@ -324,6 +390,7 @@ int main( const int argc, const char * const argv[] ) { 'b', "bits", Arg_parser::yes }, { 'B', "block", Arg_parser::maybe }, { 'd', "delta", Arg_parser::yes }, + { 'e', "set-byte", Arg_parser::yes }, { 'p', "position", Arg_parser::yes }, { 'q', "quiet", Arg_parser::no }, { 's', "size", Arg_parser::yes }, @@ -331,7 +398,7 @@ int main( const int argc, const char * const argv[] ) { 'v', "verbose", Arg_parser::no }, { 'V', "version", Arg_parser::no }, { 'z', "zcmp", Arg_parser::yes }, - { 0 , 0, Arg_parser::no } }; + { 0 , 0, Arg_parser::no } }; const Arg_parser parser( argc, argv, options ); if( parser.error().size() ) // bad option @@ -350,6 +417,7 @@ int main( const int argc, const char * const argv[] ) case 'B': if( arg[0] ) parse_block( arg, block_size, block_value ); program_mode = m_block; break; case 'd': delta = getnum( arg, 1, INT_MAX ); break; + case 'e': parse_pos_value( arg, bad_byte ); break; case 'p': pos = getnum( arg, -LONG_MAX, LONG_MAX ); break; case 'q': verbosity = -1; break; case 's': max_size = getnum( arg, -LONG_MAX, LONG_MAX ); break; @@ -414,6 +482,11 @@ int main( const int argc, const char * const argv[] ) { show_error( "Nothing to do; domain is empty." ); return 0; } if( max_size < 0 ) max_size += file_size - pos; const long end = ( ( max_size < file_size - pos ) ? pos + max_size : file_size ); + if( bad_byte.pos >= file_size ) + { show_error( "Position of '--set-byte' is beyond end of file." ); + return 1; } + if( bad_byte.pos >= 0 ) + buffer[bad_byte.pos] = bad_byte( buffer[bad_byte.pos] ); long positions = 0, decompressions = 0, successes = 0, failed_comparisons = 0; if( program_mode == m_truncate ) for( long i = pos; i < end; i += std::min( delta, end - i ) ) |