diff options
-rw-r--r-- | ChangeLog | 21 | ||||
-rw-r--r-- | INSTALL | 7 | ||||
-rw-r--r-- | Makefile.in | 5 | ||||
-rw-r--r-- | NEWS | 83 | ||||
-rw-r--r-- | README | 12 | ||||
-rw-r--r-- | alone_to_lz.cc | 6 | ||||
-rw-r--r-- | arg_parser.cc | 15 | ||||
-rw-r--r-- | arg_parser.h | 23 | ||||
-rw-r--r-- | common.h | 43 | ||||
-rwxr-xr-x | configure | 6 | ||||
-rw-r--r-- | decoder.cc | 14 | ||||
-rw-r--r-- | decoder.h | 109 | ||||
-rw-r--r-- | doc/lziprecover.1 | 20 | ||||
-rw-r--r-- | doc/lziprecover.info | 175 | ||||
-rw-r--r-- | doc/lziprecover.texi | 138 | ||||
-rw-r--r-- | dump_remove.cc | 4 | ||||
-rw-r--r-- | list.cc | 67 | ||||
-rw-r--r-- | lunzcrash.cc | 164 | ||||
-rw-r--r-- | lzip.h | 39 | ||||
-rw-r--r-- | lzip_index.cc | 4 | ||||
-rw-r--r-- | lzip_index.h | 2 | ||||
-rw-r--r-- | main.cc | 236 | ||||
-rw-r--r-- | main_common.cc | 93 | ||||
-rw-r--r-- | md5.cc | 2 | ||||
-rw-r--r-- | md5.h | 2 | ||||
-rw-r--r-- | merge.cc | 2 | ||||
-rw-r--r-- | mtester.cc | 28 | ||||
-rw-r--r-- | mtester.h | 123 | ||||
-rw-r--r-- | nrep_stats.cc | 2 | ||||
-rw-r--r-- | range_dec.cc | 2 | ||||
-rw-r--r-- | repair.cc | 39 | ||||
-rw-r--r-- | reproduce.cc | 13 | ||||
-rw-r--r-- | split.cc | 2 | ||||
-rwxr-xr-x | testsuite/check.sh | 15 | ||||
-rw-r--r-- | unzcrash.cc | 344 |
35 files changed, 1161 insertions, 699 deletions
@@ -1,3 +1,20 @@ +2022-01-21 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 1.23 released. + * Decompression time has been reduced by 5-12% depending on the file. + * main_common.cc (getnum): Show option name and valid range if error. + * dump_remove.cc (dump_members): Check tty except for --dump=tdata. + * Option '-U, --unzcrash' now takes an argument ('1' or 'B<size>'). + * mtester.cc (duplicate_buffer): Use an external buffer. + * repair.cc (debug_decompress): Continue decoding on CRC mismatch. + * unzcrash.cc: Make zcmp_command a string of unlimited size. + Use execvp instead of popen to avoid invoking /bin/sh. + Print byte or block position in messages. + * New file common.h. + * Improve several descriptions in manual, '--help', and man page. + * lziprecover.texi: Change GNU Texinfo category to 'Compression'. + (Reported by Alfred M. Szmidt). + 2021-01-02 Antonio Diaz Diaz <antonio@gnu.org> * Version 1.22 released. @@ -45,7 +62,7 @@ * Print '\n' instead of '\r' if !isatty( 1 ) in merge, repair. * main.cc: Compile on DOS with DJGPP. * lziprecover.texi: New chapter 'Tarlz'. - * configure: Accept appending to CXXFLAGS, 'CXXFLAGS+=OPTIONS'. + * configure: Accept appending to CXXFLAGS; 'CXXFLAGS+=OPTIONS'. * INSTALL: Document use of CXXFLAGS+='-D __USE_MINGW_ANSI_STDIO'. * New test files fox.lz, fox6_sc[1-6].lz. @@ -209,7 +226,7 @@ * unzcrash.cc: Test all 1-byte errors. -Copyright (C) 2009-2021 Antonio Diaz Diaz. +Copyright (C) 2009-2022 Antonio Diaz Diaz. This file is a collection of facts, and thus it is not copyrightable, but just in case, you have unlimited permission to copy, distribute, and @@ -1,7 +1,8 @@ Requirements ------------ -You will need a C++11 compiler. (gcc 3.3.6 or newer is recommended). -I use gcc 6.1.0 and 4.1.2, but the code should compile with any standards +You will need a C++98 compiler with suport for 'long long'. +(gcc 3.3.6 or newer is recommended). +I use gcc 6.1.0 and 3.3.6, but the code should compile with any standards compliant compiler. Gcc is available at http://gcc.gnu.org. @@ -76,7 +77,7 @@ After running 'configure', you can run 'make' and 'make install' as explained above. -Copyright (C) 2009-2021 Antonio Diaz Diaz. +Copyright (C) 2009-2022 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute, and modify it. diff --git a/Makefile.in b/Makefile.in index ee413f0..a8bd012 100644 --- a/Makefile.in +++ b/Makefile.in @@ -23,10 +23,10 @@ unzobjs = arg_parser.o unzcrash.o all : $(progname) $(progname) : $(objs) - $(CXX) $(LDFLAGS) $(CXXFLAGS) -o $@ $(objs) + $(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $(objs) unzcrash : $(unzobjs) - $(CXX) $(LDFLAGS) $(CXXFLAGS) -o $@ $(unzobjs) + $(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $(unzobjs) main.o : main.cc $(CXX) $(CPPFLAGS) $(CXXFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $< @@ -38,6 +38,7 @@ unzcrash.o : unzcrash.cc $(CXX) $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $< $(objs) : Makefile +lzip.h : common.h alone_to_lz.o : lzip.h mtester.h arg_parser.o : arg_parser.h decoder.o : lzip.h decoder.h @@ -1,73 +1,28 @@ -Changes in version 1.22: +Changes in version 1.23: -The option '-e, --reproduce', which can recover a missing (zeroed) sector in -a lzip file, has been added. For it to work, two things are required: - - The same version of the lzip tool that created the file. - - A reference file containing the uncompressed data corresponding to the - missing compressed data of the zeroed sector, plus some context data - before and after them. -Thanks to Nissanka Gooneratne for his help in testing the reproduce mode. +Decompression time has been reduced by 5-12% depending on the file. -The options '--lzip-level', '--lzip-name', and '--reference-file', auxiliary -to '-e, --reproduce', have been added. +In case of error in a numerical argument to a command line option, lziprecover +now shows the name of the option and the range of valid values. -Option aliases '--dump-tdata', '--remove-tdata', and '--strip-tdata' have -been removed. +Options '--dump' and '--strip' now refuse to write compressed data to a +terminal except when dumping trailing data with '--dump=tdata'. -When decompressing or testing, lziprecover now reports an error if a file -name is empty (lziprecover -t ""). +The option '-U, --unzcrash' now requires an argument: '1' to test 1-bit +errors, or 'B<size>' to test zeroed blocks. -Option '-o, --output' now behaves like '-c, --stdout', but sending the -output unconditionally to a file instead of to standard output. See the new -description of '-o' in the manual. This change is backwards compatible only -when decompressing from standard input alone. Therefore commands like: - lziprecover -d -o foo - bar.lz < foo.lz -must now be split into: - lziprecover -d -o foo - < foo.lz - lziprecover -d bar.lz -or rewritten as: - lziprecover -d - bar.lz < foo.lz > foo +The memory tester now allocates the dictionary once per member instead of +doing it for each test. This makes '-U, --unzcrash' about two times faster +on my machine on files with an uncompressed size larger than about 30 MB. -When using '-c' or '-o', lziprecover now checks whether the output is a -terminal only once. +'-W, --debug-decompress' now continues decompressing the members following +the damaged member if it has been fully decompressed (just failed with a CRC +mismatch). -Lziprecover now does not even open the output file if the input file is a -terminal. +The tool unzcrash now uses execvp instead of popen to avoid invoking /bin/sh +and run faster. It also prints byte or block position in messages. -'--ignore-errors' now makes '--decompress' and '--test' ignore data errors -and continue decompressing the remaining members in the file, keeping input -files unchanged. +Several descriptions have been improved in manual, '--help', and man page. -'--ignore-errors --range-decompress' now decompresses a truncated last -member. It also returns 0 if only ignored errors (format errors or data -errors) are found. - -'--ignore-errors' now considers that any fragment of file starting with a -valid header and large enough to be a member is a (corrupt) member, not a -gap, even if it lacks a valid trailer. - -The words 'decompressed' and 'compressed' have been replaced with the -shorter 'out' and 'in' in the verbose output when decompressing or testing. - -Several compiler warnings have been fixed. (Reported by Nissanka Gooneratne). - -Option '--list' now reports corruption or truncation of the last header in a -multimenber file specifically instead of showing the generic message "Last -member in input file is truncated or corrupt." - -The debug options '-E, --debug-reproduce', '-M, --md5sum', and -'-U, --unzcrash' have been added. - -The commands needed to extract files from a tar.lz archive have been -documented in the manual, in the output of '--help', and in the man page. - -The new chapter 'Reproducing one sector' has been added to the manual. - -The new sections 'Merging with a backup' and 'Reproducing a mailbox' have -been added to the manual. - -The debug options for experts have been documented in the manual. - -Lzip 1.16 (or clzip 1.6) or newer is required to run the tests. - -9 new test files have been added to the testsuite. +The texinfo category of the manual has been changed from 'Data Compression' +to 'Compression' to match that of gzip. (Reported by Alfred M. Szmidt). @@ -2,10 +2,10 @@ Description Lziprecover is a data recovery tool and decompressor for files in the lzip compressed data format (.lz). Lziprecover is able to repair slightly damaged -files, produce a correct file by merging the good parts of two or more -damaged copies, reproduce a missing (zeroed) sector using a reference file, -extract data from damaged files, decompress files, and test integrity of -files. +files (up to one single-byte error per member), produce a correct file by +merging the good parts of two or more damaged copies, reproduce a missing +(zeroed) sector using a reference file, extract data from damaged files, +decompress files, and test integrity of files. Lziprecover can remove the damaged members from multimember files, for example multimember tar.lz archives. @@ -51,7 +51,7 @@ provides recovery capabilities like those of lziprecover, which is able to find and combine the good parts of several damaged copies. Lziprecover is able to recover or decompress files produced by any of the -compressors in the lzip family; lzip, plzip, minilzip/lzlib, clzip, and +compressors in the lzip family: lzip, plzip, minilzip/lzlib, clzip, and pdlzip. If the cause of file corruption is a damaged medium, the combination @@ -84,7 +84,7 @@ Julian Seward's bzip2. Type 'make unzcrash' in the lziprecover source directory to build it. Then try 'unzcrash --help'. -Copyright (C) 2009-2021 Antonio Diaz Diaz. +Copyright (C) 2009-2022 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute, and modify it. diff --git a/alone_to_lz.cc b/alone_to_lz.cc index 1f65dfe..9e5b330 100644 --- a/alone_to_lz.cc +++ b/alone_to_lz.cc @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2021 Antonio Diaz Diaz. + Copyright (C) 2009-2022 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -35,9 +35,9 @@ namespace { -/* Returns the address of a malloc'd buffer containing the file data and +/* Return the address of a malloc'd buffer containing the file data and the file size in '*size'. The buffer is at least 20 bytes larger. - In case of error, returns 0 and does not modify '*size'. + In case of error, return 0 and do not modify '*size'. */ uint8_t * read_file( const int infd, long * const size, const char * const filename ) diff --git a/arg_parser.cc b/arg_parser.cc index 2e40a13..59998ac 100644 --- a/arg_parser.cc +++ b/arg_parser.cc @@ -1,5 +1,5 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C++ version) - Copyright (C) 2006-2021 Antonio Diaz Diaz. + Copyright (C) 2006-2022 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -35,9 +35,10 @@ bool Arg_parser::parse_long_option( const char * const opt, const char * const a // Test all long options for either exact match or abbreviated matches. for( int i = 0; options[i].code != 0; ++i ) - if( options[i].name && std::strncmp( options[i].name, &opt[2], len ) == 0 ) + if( options[i].long_name && + std::strncmp( options[i].long_name, &opt[2], len ) == 0 ) { - if( std::strlen( options[i].name ) == len ) // Exact match found + if( std::strlen( options[i].long_name ) == len ) // Exact match found { index = i; exact = true; break; } else if( index < 0 ) index = i; // First nonexact match found else if( options[index].code != options[i].code || @@ -58,19 +59,19 @@ bool Arg_parser::parse_long_option( const char * const opt, const char * const a } ++argind; - data.push_back( Record( options[index].code ) ); + data.push_back( Record( options[index].code, options[index].long_name ) ); if( opt[len+2] ) // '--<long_option>=<argument>' syntax { if( options[index].has_arg == no ) { - error_ = "option '--"; error_ += options[index].name; + error_ = "option '--"; error_ += options[index].long_name; error_ += "' doesn't allow an argument"; return false; } if( options[index].has_arg == yes && !opt[len+3] ) { - error_ = "option '--"; error_ += options[index].name; + error_ = "option '--"; error_ += options[index].long_name; error_ += "' requires an argument"; return false; } @@ -82,7 +83,7 @@ bool Arg_parser::parse_long_option( const char * const opt, const char * const a { if( !arg || !arg[0] ) { - error_ = "option '--"; error_ += options[index].name; + error_ = "option '--"; error_ += options[index].long_name; error_ += "' requires an argument"; return false; } diff --git a/arg_parser.h b/arg_parser.h index 5629b90..e854838 100644 --- a/arg_parser.h +++ b/arg_parser.h @@ -1,5 +1,5 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C++ version) - Copyright (C) 2006-2021 Antonio Diaz Diaz. + Copyright (C) 2006-2022 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -23,9 +23,9 @@ In case of error, 'error' returns a non-empty error message. 'options' is an array of 'struct Option' terminated by an element - containing a code which is zero. A null name means a short-only - option. A code value outside the unsigned char range means a - long-only option. + containing a code which is zero. A null long_name means a short-only + option. A code value outside the unsigned char range means a long-only + option. Arg_parser normally makes it appear as if all the option arguments were specified before all the non-option arguments for the purposes @@ -48,7 +48,7 @@ public: struct Option { int code; // Short option letter or code ( code != 0 ) - const char * name; // Long option name (maybe null) + const char * long_name; // Long option name (maybe null) Has_arg has_arg; }; @@ -56,8 +56,12 @@ private: struct Record { int code; + std::string parsed_name; std::string argument; - explicit Record( const int c ) : code( c ) {} + explicit Record( const unsigned char c ) + : code( c ), parsed_name( "-" ) { parsed_name += c; } + Record( const int c, const char * const long_name ) + : code( c ), parsed_name( "--" ) { parsed_name += long_name; } explicit Record( const char * const arg ) : code( 0 ), argument( arg ) {} }; @@ -91,6 +95,13 @@ public: else return 0; } + // Full name of the option parsed (short or long). + const std::string & parsed_name( const int i ) const + { + if( i >= 0 && i < arguments() ) return data[i].parsed_name; + else return empty_arg; + } + const std::string & argument( const int i ) const { if( i >= 0 && i < arguments() ) return data[i].argument; diff --git a/common.h b/common.h new file mode 100644 index 0000000..c3d0691 --- /dev/null +++ b/common.h @@ -0,0 +1,43 @@ +/* Lziprecover - Data recovery tool for the lzip format + Copyright (C) 2009-2022 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +struct Bad_byte + { + enum Mode { literal, delta, flip }; + long long pos; + const char * option_name; + Mode mode; + uint8_t value; + + Bad_byte() : pos( -1 ), option_name( 0 ), mode( literal ), value( 0 ) {} + uint8_t operator()( const uint8_t old_value ) const + { + if( mode == delta ) return old_value + value; + if( mode == flip ) return old_value ^ value; + return value; + } + + void parse_bb( const char * const arg, const char * const pn ); + }; + + +// defined in main_common.cc +void show_error( const char * const msg, const int errcode = 0, + const bool help = false ); +void show_file_error( const char * const filename, const char * const msg, + const int errcode = 0 ); +void internal_error( const char * const msg ); @@ -1,12 +1,12 @@ #! /bin/sh # configure script for Lziprecover - Data recovery tool for the lzip format -# Copyright (C) 2009-2021 Antonio Diaz Diaz. +# Copyright (C) 2009-2022 Antonio Diaz Diaz. # # This configure script is free software: you have unlimited permission # to copy, distribute, and modify it. pkgname=lziprecover -pkgversion=1.22 +pkgversion=1.23 progname=lziprecover srctrigger=doc/${pkgname}.texi @@ -167,7 +167,7 @@ echo "LDFLAGS = ${LDFLAGS}" rm -f Makefile cat > Makefile << EOF # Makefile for Lziprecover - Data recovery tool for the lzip format -# Copyright (C) 2009-2021 Antonio Diaz Diaz. +# Copyright (C) 2009-2022 Antonio Diaz Diaz. # This file was generated automatically by configure. Don't edit. # # This Makefile is free software: you have unlimited permission @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2021 Antonio Diaz Diaz. + Copyright (C) 2009-2022 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -34,8 +34,8 @@ const CRC32 crc32; -/* Returns the number of bytes really read. - If (returned value < size) and (errno == 0), means EOF was reached. +/* Return the number of bytes really read. + If (value returned < size) and (errno == 0), means EOF was reached. */ long long readblock( const int fd, uint8_t * const buf, const long long size ) { @@ -53,8 +53,8 @@ long long readblock( const int fd, uint8_t * const buf, const long long size ) } -/* Returns the number of bytes really written. - If (returned value < size), it is always an error. +/* Return the number of bytes really written. + If (value returned < size), it is always an error. */ long long writeblock( const int fd, const uint8_t * const buf, const long long size ) @@ -248,11 +248,11 @@ int LZ_decoder::decode_member( const Pretty_print & pp ) rep0 = distance; } state.set_rep(); - len = min_match_len + rdec.decode_len( rep_len_model, pos_state ); + len = rdec.decode_len( rep_len_model, pos_state ); } else // match { - len = min_match_len + rdec.decode_len( match_len_model, pos_state ); + len = rdec.decode_len( match_len_model, pos_state ); unsigned distance = rdec.decode_tree6( bm_dis_slot[get_len_state(len)] ); if( distance >= start_dis_model ) { @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2021 Antonio Diaz Diaz. + Copyright (C) 2009-2022 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -149,37 +149,78 @@ public: } else { - range -= bound; code -= bound; + range -= bound; bm.probability -= bm.probability >> bit_model_move_bits; return 1; } } - unsigned decode_tree3( Bit_model bm[] ) + void decode_symbol_bit( Bit_model & bm, unsigned & symbol ) { - unsigned symbol = 2 | decode_bit( bm[1] ); - symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); - symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); - return symbol & 7; + normalize(); + symbol <<= 1; + const uint32_t bound = ( range >> bit_model_total_bits ) * bm.probability; + if( code < bound ) + { + range = bound; + bm.probability += + ( bit_model_total - bm.probability ) >> bit_model_move_bits; + } + else + { + code -= bound; + range -= bound; + bm.probability -= bm.probability >> bit_model_move_bits; + symbol |= 1; + } + } + + void decode_symbol_bit_reversed( Bit_model & bm, unsigned & model, + unsigned & symbol, const int i ) + { + normalize(); + model <<= 1; + const uint32_t bound = ( range >> bit_model_total_bits ) * bm.probability; + if( code < bound ) + { + range = bound; + bm.probability += + ( bit_model_total - bm.probability ) >> bit_model_move_bits; + } + else + { + code -= bound; + range -= bound; + bm.probability -= bm.probability >> bit_model_move_bits; + model |= 1; + symbol |= 1 << i; + } } unsigned decode_tree6( Bit_model bm[] ) { - unsigned symbol = 2 | decode_bit( bm[1] ); - symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); - symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); - symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); - symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); - symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); + unsigned symbol = 1; + decode_symbol_bit( bm[symbol], symbol ); + decode_symbol_bit( bm[symbol], symbol ); + decode_symbol_bit( bm[symbol], symbol ); + decode_symbol_bit( bm[symbol], symbol ); + decode_symbol_bit( bm[symbol], symbol ); + decode_symbol_bit( bm[symbol], symbol ); return symbol & 0x3F; } unsigned decode_tree8( Bit_model bm[] ) { unsigned symbol = 1; - for( int i = 0; i < 8; ++i ) - symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); + decode_symbol_bit( bm[symbol], symbol ); + decode_symbol_bit( bm[symbol], symbol ); + decode_symbol_bit( bm[symbol], symbol ); + decode_symbol_bit( bm[symbol], symbol ); + decode_symbol_bit( bm[symbol], symbol ); + decode_symbol_bit( bm[symbol], symbol ); + decode_symbol_bit( bm[symbol], symbol ); + decode_symbol_bit( bm[symbol], symbol ); return symbol & 0xFF; } @@ -188,20 +229,18 @@ public: unsigned model = 1; unsigned symbol = 0; for( int i = 0; i < num_bits; ++i ) - { - const unsigned bit = decode_bit( bm[model] ); - model <<= 1; model += bit; - symbol |= ( bit << i ); - } + decode_symbol_bit_reversed( bm[model], model, symbol, i ); return symbol; } unsigned decode_tree_reversed4( Bit_model bm[] ) { - unsigned symbol = decode_bit( bm[1] ); - symbol += decode_bit( bm[2+symbol] ) << 1; - symbol += decode_bit( bm[4+symbol] ) << 2; - symbol += decode_bit( bm[8+symbol] ) << 3; + unsigned model = 1; + unsigned symbol = 0; + decode_symbol_bit_reversed( bm[model], model, symbol, 0 ); + decode_symbol_bit_reversed( bm[model], model, symbol, 1 ); + decode_symbol_bit_reversed( bm[model], model, symbol, 2 ); + decode_symbol_bit_reversed( bm[model], model, symbol, 3 ); return symbol; } @@ -216,8 +255,7 @@ public: symbol <<= 1; symbol |= bit; if( match_bit >> 8 != bit ) { - while( symbol < 0x100 ) - symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); + while( symbol < 0x100 ) decode_symbol_bit( bm[symbol], symbol ); break; } } @@ -226,11 +264,24 @@ public: unsigned decode_len( Len_model & lm, const int pos_state ) { + Bit_model * bm; + unsigned mask, offset, symbol = 1; + if( decode_bit( lm.choice1 ) == 0 ) - return decode_tree3( lm.bm_low[pos_state] ); + { bm = lm.bm_low[pos_state]; mask = 7; offset = 0; goto len3; } if( decode_bit( lm.choice2 ) == 0 ) - return len_low_symbols + decode_tree3( lm.bm_mid[pos_state] ); - return len_low_symbols + len_mid_symbols + decode_tree8( lm.bm_high ); + { bm = lm.bm_mid[pos_state]; mask = 7; offset = len_low_symbols; goto len3; } + bm = lm.bm_high; mask = 0xFF; offset = len_low_symbols + len_mid_symbols; + decode_symbol_bit( bm[symbol], symbol ); + decode_symbol_bit( bm[symbol], symbol ); + decode_symbol_bit( bm[symbol], symbol ); + decode_symbol_bit( bm[symbol], symbol ); + decode_symbol_bit( bm[symbol], symbol ); +len3: + decode_symbol_bit( bm[symbol], symbol ); + decode_symbol_bit( bm[symbol], symbol ); + decode_symbol_bit( bm[symbol], symbol ); + return ( symbol & mask ) + min_match_len + offset; } }; diff --git a/doc/lziprecover.1 b/doc/lziprecover.1 index eefa0b9..e05a366 100644 --- a/doc/lziprecover.1 +++ b/doc/lziprecover.1 @@ -1,5 +1,5 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.16. -.TH LZIPRECOVER "1" "January 2021" "lziprecover 1.22" "User Commands" +.TH LZIPRECOVER "1" "January 2022" "lziprecover 1.23" "User Commands" .SH NAME lziprecover \- recovers data from damaged lzip files .SH SYNOPSIS @@ -8,15 +8,13 @@ lziprecover \- recovers data from damaged lzip files .SH DESCRIPTION Lziprecover is a data recovery tool and decompressor for files in the lzip compressed data format (.lz). Lziprecover is able to repair slightly damaged -files, produce a correct file by merging the good parts of two or more -damaged copies, reproduce a missing (zeroed) sector using a reference file, -extract data from damaged files, decompress files, and test integrity of -files. +files (up to one single\-byte error per member), produce a correct file by +merging the good parts of two or more damaged copies, reproduce a missing +(zeroed) sector using a reference file, extract data from damaged files, +decompress files, and test integrity of files. .PP -Lziprecover can repair perfectly most files with small errors (up to one -single\-byte error per member), without the need of any extra redundance -at all. Losing an entire archive just because of a corrupt byte near the -beginning is a thing of the past. +With the help of lziprecover, losing an entire archive just because of a +corrupt byte near the beginning is a thing of the past. .PP Lziprecover can remove the damaged members from multimember files, for example multimember tar.lz archives. @@ -119,14 +117,14 @@ To extract all the files from archive 'foo.tar.lz', use the commands .PP Exit status: 0 for a normal exit, 1 for environmental problems (file not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or -invalid input file, 3 for an internal consistency error (eg, bug) which +invalid input file, 3 for an internal consistency error (e.g., bug) which caused lziprecover to panic. .SH "REPORTING BUGS" Report bugs to lzip\-bug@nongnu.org .br Lziprecover home page: http://www.nongnu.org/lzip/lziprecover.html .SH COPYRIGHT -Copyright \(co 2021 Antonio Diaz Diaz. +Copyright \(co 2022 Antonio Diaz Diaz. License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html> .br This is free software: you are free to change and redistribute it. diff --git a/doc/lziprecover.info b/doc/lziprecover.info index 49170df..112f65b 100644 --- a/doc/lziprecover.info +++ b/doc/lziprecover.info @@ -1,7 +1,7 @@ This is lziprecover.info, produced by makeinfo version 4.13+ from lziprecover.texi. -INFO-DIR-SECTION Data Compression +INFO-DIR-SECTION Compression START-INFO-DIR-ENTRY * Lziprecover: (lziprecover). Data recovery tool for the lzip format END-INFO-DIR-ENTRY @@ -12,7 +12,7 @@ File: lziprecover.info, Node: Top, Next: Introduction, Up: (dir) Lziprecover Manual ****************** -This manual is for Lziprecover (version 1.22, 2 January 2021). +This manual is for Lziprecover (version 1.23, 21 January 2022). * Menu: @@ -32,7 +32,7 @@ This manual is for Lziprecover (version 1.22, 2 January 2021). * Concept index:: Index of concepts - Copyright (C) 2009-2021 Antonio Diaz Diaz. + Copyright (C) 2009-2022 Antonio Diaz Diaz. This manual is free documentation: you have unlimited permission to copy, distribute, and modify it. @@ -45,10 +45,10 @@ File: lziprecover.info, Node: Introduction, Next: Invoking lziprecover, Prev: Lziprecover is a data recovery tool and decompressor for files in the lzip compressed data format (.lz). Lziprecover is able to repair slightly damaged -files, produce a correct file by merging the good parts of two or more -damaged copies, reproduce a missing (zeroed) sector using a reference file, -extract data from damaged files, decompress files, and test integrity of -files. +files (up to one single-byte error per member), produce a correct file by +merging the good parts of two or more damaged copies, reproduce a missing +(zeroed) sector using a reference file, extract data from damaged files, +decompress files, and test integrity of files. Lziprecover can remove the damaged members from multimember files, for example multimember tar.lz archives. @@ -94,7 +94,7 @@ provides recovery capabilities like those of lziprecover, which is able to find and combine the good parts of several damaged copies. Lziprecover is able to recover or decompress files produced by any of the -compressors in the lzip family; lzip, plzip, minilzip/lzlib, clzip, and +compressors in the lzip family: lzip, plzip, minilzip/lzlib, clzip, and pdlzip. If the cause of file corruption is a damaged medium, the combination @@ -105,7 +105,7 @@ ddrescue-example2::, for examples. If a file is too damaged for lziprecover to repair it, all the recoverable data in all members of the file can be extracted with the following command (the resulting file may contain errors and some garbage -data may be produced at the end of each member): +data may be produced at the end of each damaged member): lziprecover -cd -i file.lz > file @@ -161,7 +161,7 @@ lziprecover decompresses from standard input to standard output. dictionary size of the resulting file (and therefore the amount of memory required to decompress it). Only streamed files with default LZMA properties can be converted; non-streamed lzma-alone files lack - the end of stream marker required in lzip files. + the "End Of Stream" marker required in lzip files. The name of the converted lzip file is derived from that of the original lzma-alone file as follows: @@ -176,15 +176,18 @@ lziprecover decompresses from standard input to standard output. unchanged. This option (or '-o') is needed when reading from a named pipe (fifo) or from a device. Use it also to recover as much of the decompressed data as possible when decompressing a corrupt file. '-c' - overrides '-o', but '-c' has no effect when merging, removing members, + overrides '-o'. '-c' has no effect when merging, removing members, repairing, reproducing, splitting, testing or listing. '-d' '--decompress' - Decompress the files specified. If a file does not exist or can't be - opened, lziprecover continues decompressing the rest of the files. If - a file fails to decompress, or is a terminal, lziprecover exits - immediately without decompressing the rest of the files. + Decompress the files specified. If a file does not exist, can't be + opened, or the destination file already exists and '--force' has not + been specified, lziprecover continues decompressing the rest of the + files and exits with error status 1. If a file fails to decompress, or + is a terminal, lziprecover exits immediately with error status 2 + without decompressing the rest of the files. A terminal is considered + an uncompressed file, and therefore invalid. '-D RANGE' '--range-decompress=RANGE' @@ -243,12 +246,12 @@ lziprecover decompresses from standard input to standard output. '-cd -i' method resyncs to the next member header after each error, and is immune to some format errors that make '-D0 -i' fail. The range decompressed may be smaller than the range requested, because of the - errors. + errors. The exit status is set to 0 unless other errors are found (I/O + errors, for example). Make '--list', '--dump', '--remove', and '--strip' ignore format errors. The sizes of the members with errors (specially the last) may - be wrong. The exit status is set to 0 unless other errors are found - (I/O errors, for example). + be wrong. '-k' '--keep' @@ -267,10 +270,12 @@ lziprecover decompresses from standard input to standard output. between members are shown. The member numbers shown coincide with the file numbers produced by '--split'. - '-lq' can be used to verify quickly (without decompressing) the - structural integrity of the files specified. (Use '--test' to verify - the data integrity). '-alq' additionally verifies that none of the - files specified contain trailing data. + If any file is damaged, does not exist, can't be opened, or is not + regular, the final exit status will be > 0. '-lq' can be used to verify + quickly (without decompressing) the structural integrity of the files + specified. (Use '--test' to verify the data integrity). '-alq' + additionally verifies that none of the files specified contain + trailing data. '-m' '--merge' @@ -361,7 +366,7 @@ lziprecover decompresses from standard input to standard output. If a file does not exist, can't be opened, or is not regular, lziprecover continues processing the rest of the files. If the dump fails in one file, lziprecover exits immediately without processing the - rest of the files. + rest of the files. Only '--dump=tdata' can write to a terminal. The argument to '--dump' is a colon-separated list of the following element specifiers; a member list (1,3-6), a reverse member list @@ -451,29 +456,39 @@ lziprecover decompresses from standard input to standard output. byte values. Print cumulative data for all files followed by the name of the first file with the longest sequence. -'-U' -'--unzcrash' - Test 1-bit errors in the LZMA stream of the input FILE like the - command 'unzcrash -b1 -p7 -s-20 'lzip -t' FILE' but in memory, and - therefore much faster. *Note Unzcrash::. This option tests all the - members independently in a multimember file, skipping headers and - trailers. If a decompression succeeds, the decompressed output is - compared with the original decompressed output of FILE using MD5 - digests. The compressed FILE must not contain errors and must - decompress correctly for the comparisons to work. +'-U 1|BSIZE' +'--unzcrash=1|BSIZE' + With argument '1', test 1-bit errors in the LZMA stream of the + compressed input FILE like the command + 'unzcrash -b1 -p7 -s-20 'lzip -t' FILE' but in memory, and therefore + much faster. *Note Unzcrash::. This option tests all the members + independently in a multimember file, skipping headers and trailers. If + a decompression succeeds, the decompressed output is compared with the + decompressed output of the original FILE using MD5 digests. FILE must + not contain errors and must decompress correctly for the comparisons to + work. + + With argument 'B', test zeroed sectors (blocks of bytes) in the LZMA + stream of the compressed input FILE like the command + 'unzcrash --block=SIZE -d1 -p7 -s-(SIZE+20) 'lzip -t' FILE' but in + memory, and therefore much faster. Testing and comparisons work just + like with the argument '1' explained above. By default '--unzcrash' only prints the interesting cases; CRC mismatches, size mismatches, unsupported marker codes, unexpected EOFs, apparently successful decompressions, and decoder errors detected - 50_000 or more bytes beyond the byte being tested. At verbosity level - 1 (-v) it also prints decoder errors detected 10_000 or more bytes - beyond the byte being tested. At verbosity level 2 (-vv) it prints all - cases. + 50_000 or more bytes beyond the byte (or the start of the block) being + tested. At verbosity level 1 (-v) it also prints decoder errors + detected 10_000 or more bytes beyond the byte being tested. At + verbosity level 2 (-vv) it prints all cases for 1-bit errors or the + decoder errors detected beyond the end of the block for zeroed blocks. '-W POSITION,VALUE' '--debug-decompress=POSITION,VALUE' Load the compressed FILE into memory, set the byte at POSITION to VALUE, and decompress the modified compressed data to standard output. + If the damaged member is decompressed fully (just fails with a CRC + mismatch), the members following it are also decompressed. '-X[POSITION,VALUE]' '--show-packets[=POSITION,VALUE]' @@ -517,7 +532,7 @@ Y yottabyte (10^24) | Yi yobibyte (2^80) Exit status: 0 for a normal exit, 1 for environmental problems (file not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or invalid -input file, 3 for an internal consistency error (eg, bug) which caused +input file, 3 for an internal consistency error (e.g., bug) which caused lziprecover to panic. @@ -875,7 +890,7 @@ gmp-6.1.1.tar gmp-6.1.2.tar.lz 175 / 473 = 37% gmp-6.1.2.tar gmp-6.1.1.tar.lz 181 / 472 = 38.35% Note that the "performance of reproduce" is a probability, not a partial -recovery. The data is either fully recovered (with the probability X shown +recovery. The data is either recovered fully (with the probability X shown in the last column of the tables above) or not recovered at all (with probability 1 - X). @@ -1065,9 +1080,11 @@ when there is no longer anything to take away. represents a variable number of bytes. - A lzip file consists of a series of "members" (compressed data sets). -The members simply appear one after another in the file, with no additional -information before, between, or after them. + A lzip file consists of a series of independent "members" (compressed +data sets). The members simply appear one after another in the file, with no +additional information before, between, or after them. Each member can +encode in compressed form up to 16 EiB - 1 byte of uncompressed data. The +size of a multimember file is unlimited. Each member has the following structure: @@ -1095,21 +1112,22 @@ information before, between, or after them. Valid values for dictionary size range from 4 KiB to 512 MiB. 'LZMA stream' - The LZMA stream, finished by an end of stream marker. Uses default + The LZMA stream, finished by an "End Of Stream" marker. Uses default values for encoder properties. *Note Stream format: (lzip)Stream format, for a complete description. 'CRC32 (4 bytes)' - Cyclic Redundancy Check (CRC) of the uncompressed original data. + Cyclic Redundancy Check (CRC) of the original uncompressed data. 'Data size (8 bytes)' - Size of the uncompressed original data. + Size of the original uncompressed data. 'Member size (8 bytes)' Total size of the member, including header and trailer. This field acts as a distributed index, allows the verification of stream integrity, - and facilitates safe recovery of undamaged members from multimember - files. + and facilitates the safe recovery of undamaged members from + multimember files. Member size should be limited to 2 PiB to prevent + the data size field from overflowing. @@ -1167,7 +1185,7 @@ Example 1: Add a comment or description to a compressed file. # This command prints the comment to standard output lziprecover --dump=tdata file.lz # This command outputs file.lz without the comment - lziprecover --strip=tdata file.lz + lziprecover --strip=tdata file.lz > stripped_file.lz # This command removes the comment from file.lz lziprecover --remove=tdata file.lz @@ -1209,7 +1227,7 @@ Example 4: The right way of concatenating the decompressed output of two or more compressed files. *Note Trailing data::. Don't do this - cat file1.lz file2.lz file3.lz | lziprecover -d + cat file1.lz file2.lz file3.lz | lziprecover -d - Do this instead lziprecover -cd file1.lz file2.lz file3.lz You may also concatenate the compressed files like this @@ -1292,7 +1310,10 @@ latter case, please, report any false negative as a bug. In order to compare the outputs, unzcrash needs a 'zcmp' program able to understand the format being tested. For example the 'zcmp' provided by -zutils. Use '--zcmp=false' to disable comparisons. *Note Zcmp: (zutils)Zcmp. +zutils. If the 'zcmp' program used does not understand the format being +tested, all the comparisons will fail because the compressed files will be +compared without being decompressed first. Use '--zcmp=false' to disable +comparisons. *Note Zcmp: (zutils)Zcmp. The format for running unzcrash is: @@ -1393,7 +1414,7 @@ tested must decompress it correctly for the comparisons to work. Exit status: 0 for a normal exit, 1 for environmental problems (file not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or invalid -input file, 3 for an internal consistency error (eg, bug) which caused +input file, 3 for an internal consistency error (e.g., bug) which caused unzcrash to panic. @@ -1443,32 +1464,32 @@ Concept index Tag Table: -Node: Top231 -Node: Introduction1410 -Node: Invoking lziprecover5353 -Ref: --trailing-error6220 -Ref: range-format8391 -Ref: --reproduce8726 -Ref: --repair12904 -Node: Data safety24532 -Node: Merging with a backup26520 -Node: Reproducing a mailbox27784 -Node: Repairing one byte30285 -Node: Merging files32350 -Ref: performance-of-merge33520 -Ref: ddrescue-example35129 -Node: Reproducing one sector36416 -Ref: performance-of-reproduce40299 -Ref: ddrescue-example242974 -Node: Tarlz45394 -Node: File names49058 -Node: File format49515 -Node: Trailing data51964 -Node: Examples55186 -Ref: concat-example55762 -Node: Unzcrash57152 -Node: Problems63240 -Node: Concept index63792 +Node: Top226 +Node: Introduction1406 +Node: Invoking lziprecover5398 +Ref: --trailing-error6265 +Ref: range-format8644 +Ref: --reproduce8979 +Ref: --repair13278 +Node: Data safety25584 +Node: Merging with a backup27572 +Node: Reproducing a mailbox28836 +Node: Repairing one byte31337 +Node: Merging files33402 +Ref: performance-of-merge34572 +Ref: ddrescue-example36181 +Node: Reproducing one sector37468 +Ref: performance-of-reproduce41351 +Ref: ddrescue-example244026 +Node: Tarlz46446 +Node: File names50110 +Node: File format50567 +Node: Trailing data53258 +Node: Examples56499 +Ref: concat-example57075 +Node: Unzcrash58467 +Node: Problems64739 +Node: Concept index65291 End Tag Table diff --git a/doc/lziprecover.texi b/doc/lziprecover.texi index 6766403..7b3449e 100644 --- a/doc/lziprecover.texi +++ b/doc/lziprecover.texi @@ -6,10 +6,10 @@ @finalout @c %**end of header -@set UPDATED 2 January 2021 -@set VERSION 1.22 +@set UPDATED 21 January 2022 +@set VERSION 1.23 -@dircategory Data Compression +@dircategory Compression @direntry * Lziprecover: (lziprecover). Data recovery tool for the lzip format @end direntry @@ -53,7 +53,7 @@ This manual is for Lziprecover (version @value{VERSION}, @value{UPDATED}). @end menu @sp 1 -Copyright @copyright{} 2009-2021 Antonio Diaz Diaz. +Copyright @copyright{} 2009-2022 Antonio Diaz Diaz. This manual is free documentation: you have unlimited permission to copy, distribute, and modify it. @@ -67,10 +67,10 @@ distribute, and modify it. @uref{http://www.nongnu.org/lzip/lziprecover.html,,Lziprecover} is a data recovery tool and decompressor for files in the lzip compressed data format (.lz). Lziprecover is able to repair slightly damaged -files, produce a correct file by merging the good parts of two or more -damaged copies, reproduce a missing (zeroed) sector using a reference file, -extract data from damaged files, decompress files, and test integrity of -files. +files (up to one single-byte error per member), produce a correct file by +merging the good parts of two or more damaged copies, reproduce a missing +(zeroed) sector using a reference file, extract data from damaged files, +decompress files, and test integrity of files. Lziprecover can remove the damaged members from multimember files, for example multimember tar.lz archives. @@ -100,8 +100,8 @@ The lzip format is as simple as possible (but not simpler). The lzip manual provides the source code of a simple decompressor along with a detailed explanation of how it works, so that with the only help of the lzip manual it would be possible for a digital archaeologist to extract -the data from a lzip file long after quantum computers eventually render -LZMA obsolete. +the data from a lzip file long after quantum computers eventually +render LZMA obsolete. @item Additionally the lzip reference implementation is copylefted, which @@ -121,7 +121,7 @@ provides recovery capabilities like those of lziprecover, which is able to find and combine the good parts of several damaged copies. Lziprecover is able to recover or decompress files produced by any of the -compressors in the lzip family; lzip, plzip, minilzip/lzlib, clzip, and +compressors in the lzip family: lzip, plzip, minilzip/lzlib, clzip, and pdlzip. If the cause of file corruption is a damaged medium, the combination @@ -132,7 +132,7 @@ from damaged lzip files. @xref{ddrescue-example}, and If a file is too damaged for lziprecover to repair it, all the recoverable data in all members of the file can be extracted with the following command (the resulting file may contain errors and some garbage data may be produced -at the end of each member): +at the end of each damaged member): @example lziprecover -cd -i file.lz > file @@ -200,8 +200,8 @@ Convert lzma-alone files to lzip format without recompressing, just adding a lzip header and trailer. The conversion minimizes the dictionary size of the resulting file (and therefore the amount of memory required to decompress it). Only streamed files with default LZMA -properties can be converted; non-streamed lzma-alone files lack the end -of stream marker required in lzip files. +properties can be converted; non-streamed lzma-alone files lack the "End +Of Stream" marker required in lzip files. The name of the converted lzip file is derived from that of the original lzma-alone file as follows: @@ -217,16 +217,19 @@ lzma-alone file as follows: Write decompressed data to standard output; keep input files unchanged. This option (or @samp{-o}) is needed when reading from a named pipe (fifo) or from a device. Use it also to recover as much of the decompressed data as -possible when decompressing a corrupt file. @samp{-c} overrides @samp{-o}, -but @samp{-c} has no effect when merging, removing members, repairing, +possible when decompressing a corrupt file. @samp{-c} overrides @samp{-o}. +@samp{-c} has no effect when merging, removing members, repairing, reproducing, splitting, testing or listing. @item -d @itemx --decompress -Decompress the files specified. If a file does not exist or can't be -opened, lziprecover continues decompressing the rest of the files. If a file -fails to decompress, or is a terminal, lziprecover exits immediately without -decompressing the rest of the files. +Decompress the files specified. If a file does not exist, can't be opened, +or the destination file already exists and @samp{--force} has not been +specified, lziprecover continues decompressing the rest of the files and +exits with error status 1. If a file fails to decompress, or is a terminal, +lziprecover exits immediately with error status 2 without decompressing the +rest of the files. A terminal is considered an uncompressed file, and +therefore invalid. @item -D @var{range} @itemx --range-decompress=@var{range} @@ -287,12 +290,12 @@ data in all members of @samp{file.lz} without having to split it first. The @w{@samp{-cd -i}} method resyncs to the next member header after each error, and is immune to some format errors that make @w{@samp{-D0 -i}} fail. The range decompressed may be smaller than the range requested, because of the -errors. +errors. The exit status is set to 0 unless other errors are found (I/O +errors, for example). Make @samp{--list}, @samp{--dump}, @samp{--remove}, and @samp{--strip} ignore format errors. The sizes of the members with errors (specially the -last) may be wrong. The exit status is set to 0 unless other errors are -found (I/O errors, for example). +last) may be wrong. @item -k @itemx --keep @@ -308,13 +311,13 @@ size, the number of members in the file, and the amount of trailing data (if any) are also printed. With @samp{-vv}, the positions and sizes of each member in multimember files are also printed. With @samp{-i}, format errors are ignored, and with @samp{-ivv}, gaps between members are shown. The -member numbers shown coincide with the file numbers produced by -@samp{--split}. +member numbers shown coincide with the file numbers produced by @samp{--split}. -@samp{-lq} can be used to verify quickly (without decompressing) the -structural integrity of the files specified. (Use @samp{--test} to verify -the data integrity). @samp{-alq} additionally verifies that none of the -files specified contain trailing data. +If any file is damaged, does not exist, can't be opened, or is not regular, +the final exit status will be @w{> 0}. @samp{-lq} can be used to verify +quickly (without decompressing) the structural integrity of the files +specified. (Use @samp{--test} to verify the data integrity). @samp{-alq} +additionally verifies that none of the files specified contain trailing data. @item -m @itemx --merge @@ -404,7 +407,7 @@ one file is given, the elements dumped from all files are concatenated. If a file does not exist, can't be opened, or is not regular, lziprecover continues processing the rest of the files. If the dump fails in one file, lziprecover exits immediately without processing the -rest of the files. +rest of the files. Only @samp{--dump=tdata} can write to a terminal. The argument to @samp{--dump} is a colon-separated list of the following element specifiers; a member list (1,3-6), a reverse member list @@ -495,29 +498,39 @@ specified, print the frequency of repeated sequences of all possible byte values. Print cumulative data for all files followed by the name of the first file with the longest sequence. -@item -U -@itemx --unzcrash -Test 1-bit errors in the LZMA stream of the input @var{file} like the -command @w{@samp{unzcrash -b1 -p7 -s-20 'lzip -t' @var{file}}} but in -memory, and therefore much faster. @xref{Unzcrash}. This option tests all -the members independently in a multimember file, skipping headers and -trailers. If a decompression succeeds, the decompressed output is compared -with the original decompressed output of @var{file} using MD5 digests. The -compressed @var{file} must not contain errors and must decompress correctly -for the comparisons to work. +@item -U 1|B@var{size} +@itemx --unzcrash=1|B@var{size} +With argument @samp{1}, test 1-bit errors in the LZMA stream of the +compressed input @var{file} like the command +@w{@samp{unzcrash -b1 -p7 -s-20 'lzip -t' @var{file}}} but in memory, and +therefore much faster. @xref{Unzcrash}. This option tests all the members +independently in a multimember file, skipping headers and trailers. If a +decompression succeeds, the decompressed output is compared with the +decompressed output of the original @var{file} using MD5 digests. @var{file} +must not contain errors and must decompress correctly for the comparisons to +work. + +With argument @samp{B}, test zeroed sectors (blocks of bytes) in the LZMA +stream of the compressed input @var{file} like the command +@w{@samp{unzcrash --block=@var{size} -d1 -p7 -s-(@var{size}+20) 'lzip -t' @var{file}}} +but in memory, and therefore much faster. Testing and comparisons work just +like with the argument @samp{1} explained above. By default @samp{--unzcrash} only prints the interesting cases; CRC mismatches, size mismatches, unsupported marker codes, unexpected EOFs, apparently successful decompressions, and decoder errors detected 50_000 or -more bytes beyond the byte being tested. At verbosity level 1 (-v) it also -prints decoder errors detected 10_000 or more bytes beyond the byte being -tested. At verbosity level 2 (-vv) it prints all cases. +more bytes beyond the byte (or the start of the block) being tested. At +verbosity level 1 (-v) it also prints decoder errors detected 10_000 or more +bytes beyond the byte being tested. At verbosity level 2 (-vv) it prints all +cases for 1-bit errors or the decoder errors detected beyond the end of the +block for zeroed blocks. @item -W @var{position},@var{value} @itemx --debug-decompress=@var{position},@var{value} Load the compressed @var{file} into memory, set the byte at @var{position} to @var{value}, and decompress the modified compressed data to standard -output. +output. If the damaged member is decompressed fully (just fails with a CRC +mismatch), the members following it are also decompressed. @item -X[@var{position},@var{value}] @itemx --show-packets[=@var{position},@var{value}] @@ -563,9 +576,9 @@ Table of SI and binary prefixes (unit multipliers): @sp 1 Exit status: 0 for a normal exit, 1 for environmental problems (file not -found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or -invalid input file, 3 for an internal consistency error (eg, bug) which -caused lziprecover to panic. +found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or invalid +input file, 3 for an internal consistency error (e.g., bug) which caused +lziprecover to panic. @node Data safety @@ -944,7 +957,7 @@ real backups of my own working directory: @end multitable Note that the "performance of reproduce" is a probability, not a partial -recovery. The data is either fully recovered (with the probability X shown +recovery. The data is either recovered fully (with the probability X shown in the last column of the tables above) or not recovered at all (with probability @w{1 - X}). @@ -1158,9 +1171,11 @@ represents one byte; a box like this: represents a variable number of bytes. @sp 1 -A lzip file consists of a series of "members" (compressed data sets). -The members simply appear one after another in the file, with no -additional information before, between, or after them. +A lzip file consists of a series of independent "members" (compressed data +sets). The members simply appear one after another in the file, with no +additional information before, between, or after them. Each member can +encode in compressed form up to @w{16 EiB - 1 byte} of uncompressed data. +The size of a multimember file is unlimited. Each member has the following structure: @@ -1190,7 +1205,7 @@ Example: 0xD3 = 2^19 - 6 * 2^15 = 512 KiB - 6 * 32 KiB = 320 KiB@* Valid values for dictionary size range from 4 KiB to 512 MiB. @item LZMA stream -The LZMA stream, finished by an end of stream marker. Uses default values +The LZMA stream, finished by an "End Of Stream" marker. Uses default values for encoder properties. @ifnothtml @xref{Stream format,,,lzip}, @@ -1202,15 +1217,17 @@ See for a complete description. @item CRC32 (4 bytes) -Cyclic Redundancy Check (CRC) of the uncompressed original data. +Cyclic Redundancy Check (CRC) of the original uncompressed data. @item Data size (8 bytes) -Size of the uncompressed original data. +Size of the original uncompressed data. @item Member size (8 bytes) Total size of the member, including header and trailer. This field acts as a distributed index, allows the verification of stream integrity, and -facilitates safe recovery of undamaged members from multimember files. +facilitates the safe recovery of undamaged members from multimember files. +Member size should be limited to @w{2 PiB} to prevent the data size field +from overflowing. @end table @@ -1277,7 +1294,7 @@ echo 'This file contains this and that' >> file.lz # This command prints the comment to standard output lziprecover --dump=tdata file.lz # This command outputs file.lz without the comment -lziprecover --strip=tdata file.lz +lziprecover --strip=tdata file.lz > stripped_file.lz # This command removes the comment from file.lz lziprecover --remove=tdata file.lz @end example @@ -1333,7 +1350,7 @@ more compressed files. @xref{Trailing data}. @example Don't do this - cat file1.lz file2.lz file3.lz | lziprecover -d + cat file1.lz file2.lz file3.lz | lziprecover -d - Do this instead lziprecover -cd file1.lz file2.lz file3.lz You may also concatenate the compressed files like this @@ -1429,7 +1446,10 @@ case, please, report any false negative as a bug. In order to compare the outputs, unzcrash needs a @samp{zcmp} program able to understand the format being tested. For example the @samp{zcmp} provided by @uref{http://www.nongnu.org/zutils/manual/zutils_manual.html#Zcmp,,zutils}. -Use @samp{--zcmp=false} to disable comparisons. +If the @samp{zcmp} program used does not understand the format being tested, +all the comparisons will fail because the compressed files will be compared +without being decompressed first. Use @samp{--zcmp=false} to disable +comparisons. @ifnothtml @xref{Zcmp,,,zutils}. @end ifnothtml @@ -1540,7 +1560,7 @@ unzcrash and zcmp to use the same decompressor with a command like Exit status: 0 for a normal exit, 1 for environmental problems (file not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or -invalid input file, 3 for an internal consistency error (eg, bug) which +invalid input file, 3 for an internal consistency error (e.g., bug) which caused unzcrash to panic. diff --git a/dump_remove.cc b/dump_remove.cc index d33551f..37f7f00 100644 --- a/dump_remove.cc +++ b/dump_remove.cc @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2021 Antonio Diaz Diaz. + Copyright (C) 2009-2022 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -48,6 +48,8 @@ int dump_members( const std::vector< std::string > & filenames, set_signal_handler(); if( !open_outstream( force, false, false, false ) ) return 1; } + if( ( strip || !member_list.tdata || member_list.damaged || member_list.range() ) && + !check_tty_out() ) return 1; // check tty except for --dump=tdata unsigned long long copied_size = 0, stripped_size = 0; unsigned long long copied_tsize = 0, stripped_tsize = 0; long members = 0, smembers = 0; @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2021 Antonio Diaz Diaz. + Copyright (C) 2009-2022 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -76,47 +76,44 @@ int list_files( const std::vector< std::string > & filenames, set_retval( retval, lzip_index.retval() ); continue; } - if( verbosity >= 0 ) + if( verbosity < 0 ) continue; + const unsigned long long udata_size = lzip_index.udata_size(); + const unsigned long long cdata_size = lzip_index.cdata_size(); + total_comp += cdata_size; total_uncomp += udata_size; ++files; + const long members = lzip_index.members(); + if( first_post ) { - const unsigned long long udata_size = lzip_index.udata_size(); - const unsigned long long cdata_size = lzip_index.cdata_size(); - total_comp += cdata_size; total_uncomp += udata_size; ++files; - const long members = lzip_index.members(); - if( first_post ) - { - first_post = false; - if( verbosity >= 1 ) std::fputs( " dict memb trail ", stdout ); - std::fputs( " uncompressed compressed saved name\n", stdout ); - } - if( verbosity >= 1 ) - std::printf( "%s %5ld %6lld ", - format_ds( lzip_index.dictionary_size() ), members, - lzip_index.file_size() - cdata_size ); - list_line( udata_size, cdata_size, input_filename ); + first_post = false; + if( verbosity >= 1 ) std::fputs( " dict memb trail ", stdout ); + std::fputs( " uncompressed compressed saved name\n", stdout ); + } + if( verbosity >= 1 ) + std::printf( "%s %5ld %6lld ", format_ds( lzip_index.dictionary_size() ), + members, lzip_index.file_size() - cdata_size ); + list_line( udata_size, cdata_size, input_filename ); - if( verbosity >= 2 && ( members > 1 || - ( members == 1 && lzip_index.mblock( 0 ).pos() > 0 ) ) ) + if( verbosity >= 2 && ( members > 1 || + ( members == 1 && lzip_index.mblock( 0 ).pos() > 0 ) ) ) + { + std::fputs( " member data_pos data_size member_pos member_size\n", stdout ); + long long prev_end = 0; + for( long i = 0, gaps = 0; i < members; ++i ) { - std::fputs( " member data_pos data_size member_pos member_size\n", stdout ); - long long prev_end = 0; - for( long i = 0, gaps = 0; i < members; ++i ) + const Block & db = lzip_index.dblock( i ); + const Block & mb = lzip_index.mblock( i ); + if( mb.pos() > prev_end ) { - const Block & db = lzip_index.dblock( i ); - const Block & mb = lzip_index.mblock( i ); - if( mb.pos() > prev_end ) - { - std::printf( " gap - - %14llu %14llu\n", - prev_end, mb.pos() - prev_end ); - ++gaps; - } - std::printf( "%6ld %14llu %14llu %14llu %14llu\n", - i + gaps + 1, db.pos(), db.size(), mb.pos(), mb.size() ); - prev_end = mb.end(); + std::printf( " gap - - %14llu %14llu\n", + prev_end, mb.pos() - prev_end ); + ++gaps; } - first_post = true; // reprint heading after list of members + std::printf( "%6ld %14llu %14llu %14llu %14llu\n", + i + gaps + 1, db.pos(), db.size(), mb.pos(), mb.size() ); + prev_end = mb.end(); } - std::fflush( stdout ); + first_post = true; // reprint heading after list of members } + std::fflush( stdout ); } if( verbosity >= 0 && files > 1 ) { diff --git a/lunzcrash.cc b/lunzcrash.cc index b07b748..577d355 100644 --- a/lunzcrash.cc +++ b/lunzcrash.cc @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2021 Antonio Diaz Diaz. + Copyright (C) 2009-2022 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -69,24 +69,26 @@ bool compare_member( const uint8_t * const mbuffer, const long long msize, } -int test_member_rest( const LZ_mtester & master, long * const failure_posp, +int test_member_rest( const LZ_mtester & master, uint8_t * const buffer2, + long * const failure_posp, const unsigned long long byte_pos ) { - LZ_mtester mtester( master ); - mtester.duplicate_buffer(); + LZ_mtester mtester( master ); // tester with external buffer + mtester.duplicate_buffer( buffer2 ); int result = mtester.test_member( LLONG_MAX, LLONG_MAX, stdout, byte_pos ); - if( result == 0 && !mtester.finished() ) result = -1; + if( result == 0 && !mtester.finished() ) result = -1; // false negative if( result != 0 ) *failure_posp = mtester.member_position(); return result; } -long next_pct_pos( const Lzip_index & lzip_index, const int i, const int pct ) +long next_pct_pos( const Lzip_index & lzip_index, const int i, const int pct, + const int sector_size = 0 ) { if( pct <= 0 ) return 0; - const long long cdata_size = lzip_index.cdata_size(); + const long long cdata_size = lzip_index.cdata_size() - sector_size; const long long mpos = lzip_index.mblock( i ).pos(); - const long long msize = lzip_index.mblock( i ).size(); + const long long msize = lzip_index.mblock( i ).size() - sector_size; long long pct_pos = (long long)( cdata_size / ( 100.0 / pct ) ); if( pct_pos <= mpos ) pct_pos = 0; @@ -101,18 +103,17 @@ long next_pct_pos( const Lzip_index & lzip_index, const int i, const int pct ) /* Test 1-bit errors in LZMA streams in file. Unless verbosity >= 1, print only the bytes with interesting results. */ -int lunzcrash( const std::string & input_filename ) +int lunzcrash_bit( const char * const input_filename ) { struct stat in_stats; // not used - const int infd = - open_instream( input_filename.c_str(), &in_stats, false, true ); + const int infd = open_instream( input_filename, &in_stats, false, true ); if( infd < 0 ) return 1; const Lzip_index lzip_index( infd, true, true ); if( lzip_index.retval() != 0 ) - { show_file_error( input_filename.c_str(), lzip_index.error().c_str() ); + { show_file_error( input_filename, lzip_index.error().c_str() ); return lzip_index.retval(); } - if( verbosity >= 2 ) printf( "Testing file '%s'\n", input_filename.c_str() ); + if( verbosity >= 2 ) printf( "Testing file '%s'\n", input_filename ); const long long cdata_size = lzip_index.cdata_size(); long positions = 0, decompressions = 0, successes = 0, failed_comparisons = 0; @@ -125,14 +126,15 @@ int lunzcrash( const std::string & input_filename ) uint8_t * const mbuffer = read_member( infd, mpos, msize ); if( !mbuffer ) return 1; uint8_t md5_orig[16]; - if( !verify_member( mbuffer, msize, dictionary_size, - input_filename.c_str(), md5_orig ) ) return 2; + if( !verify_member( mbuffer, msize, dictionary_size, input_filename, + md5_orig ) ) return 2; long pct_pos = next_pct_pos( lzip_index, i, pct ); long pos = Lzip_header::size + 1, printed = 0; // last pos printed const long end = msize - 20; if( verbosity == 0 ) // give a clue of the range being tested std::printf( "Testing bytes %llu to %llu\n", mpos + pos, mpos + end - 1 ); LZ_mtester master( mbuffer, msize, dictionary_size ); + uint8_t * const buffer2 = new uint8_t[dictionary_size]; for( ; pos < end; ++pos ) { const long pos_limit = pos - 16; @@ -150,17 +152,20 @@ int lunzcrash( const std::string & input_filename ) ++decompressions; mbuffer[pos] ^= mask; long failure_pos = 0; - const int result = test_member_rest( master, &failure_pos, + const int result = test_member_rest( master, buffer2, &failure_pos, ( printed < pos ) ? mpos + pos : 0 ); - if( result == 0 ) + if( result <= 0 ) { ++successes; if( verbosity >= 0 ) { if( printed < pos ) { std::printf( "byte %llu\n", mpos + pos ); printed = pos; } - std::printf( "0x%02X (0x%02X^0x%02X) passed the test\n", - mbuffer[pos], byte, mask ); + std::printf( "0x%02X (0x%02X^0x%02X) passed the test%s", + mbuffer[pos], byte, mask, ( result < 0 ) ? "" : "\n" ); + if( result < 0 ) + std::printf( ", but only consumed %lu bytes of %llu\n", + failure_pos, msize ); } if( !compare_member( mbuffer, msize, dictionary_size, mpos + pos, md5_orig ) ) ++failed_comparisons; @@ -191,18 +196,133 @@ int lunzcrash( const std::string & input_filename ) mbuffer[pos] ^= mask; } } + delete[] buffer2; + if( !compare_member( mbuffer, msize, dictionary_size, mpos + pos, md5_orig ) ) + internal_error( "Some byte was not properly restored." ); delete[] mbuffer; } if( verbosity >= 0 ) { - std::printf( "\n%8ld bytes tested\n%8ld total decompressions" - "\n%8ld decompressions returned with zero status", + std::printf( "\n%9ld bytes tested\n%9ld total decompressions" + "\n%9ld decompressions returned with zero status", positions, decompressions, successes ); if( successes > 0 ) { if( failed_comparisons > 0 ) - std::printf( ", of which\n%8ld comparisons failed\n", + std::printf( ", of which\n%9ld comparisons failed\n", + failed_comparisons ); + else std::fputs( "\n all comparisons passed\n", stdout ); + } + else std::fputc( '\n', stdout ); + } + return 0; + } + + +/* Test zeroed blocks of given size in LZMA streams in file. + Unless verbosity >= 1, print only the bytes with interesting results. */ +int lunzcrash_block( const char * const input_filename, const int sector_size ) + { + struct stat in_stats; // not used + const int infd = open_instream( input_filename, &in_stats, false, true ); + if( infd < 0 ) return 1; + + const Lzip_index lzip_index( infd, true, true ); + if( lzip_index.retval() != 0 ) + { show_file_error( input_filename, lzip_index.error().c_str() ); + return lzip_index.retval(); } + if( verbosity >= 2 ) printf( "Testing file '%s'\n", input_filename ); + + const long long cdata_size = lzip_index.cdata_size(); + long decompressions = 0, successes = 0, failed_comparisons = 0; + int pct = ( cdata_size >= 1000 && isatty( STDERR_FILENO ) ) ? 0 : 100; + uint8_t * const block = new uint8_t[sector_size]; + for( long i = 0; i < lzip_index.members(); ++i ) + { + const long long mpos = lzip_index.mblock( i ).pos(); + const long long msize = lzip_index.mblock( i ).size(); + long pos = Lzip_header::size + 1; + const long end = msize - sector_size - 20; + if( end <= pos ) continue; // sector_size larger than LZMA stream + const unsigned dictionary_size = lzip_index.dictionary_size( i ); + uint8_t * const mbuffer = read_member( infd, mpos, msize ); + if( !mbuffer ) return 1; + uint8_t md5_orig[16]; + if( !verify_member( mbuffer, msize, dictionary_size, input_filename, + md5_orig ) ) return 2; + long pct_pos = next_pct_pos( lzip_index, i, pct, sector_size ); + if( verbosity >= 0 ) // give a clue of the range being tested + std::printf( "Testing blocks of size %u from pos %llu to %llu\n", + sector_size, mpos + pos, mpos + end - 1 ); + LZ_mtester master( mbuffer, msize, dictionary_size ); + uint8_t * const buffer2 = new uint8_t[dictionary_size]; + for( ; pos < end; ++pos ) + { + const long pos_limit = pos - 16; + if( pos_limit > 0 && master.test_member( pos_limit ) != -1 ) + { show_error( "Can't advance master." ); return 1; } + if( verbosity >= 0 && pos >= pct_pos ) + { std::fprintf( stderr, "\r%3u%% done\r", pct ); ++pct; + pct_pos = next_pct_pos( lzip_index, i, pct, sector_size ); } + std::memcpy( block, mbuffer + pos, sector_size ); // save block + std::memset( mbuffer + pos, 0, sector_size ); + ++decompressions; + long failure_pos = 0; + const int result = + test_member_rest( master, buffer2, &failure_pos, mpos + pos ); + if( result <= 0 ) + { + ++successes; + if( verbosity >= 0 ) + { + std::printf( "block %llu,%u passed the test%s", + mpos + pos, sector_size, ( result < 0 ) ? "" : "\n" ); + if( result < 0 ) + std::printf( ", but only consumed %lu bytes of %llu\n", + failure_pos, msize ); + } + if( !compare_member( mbuffer, msize, dictionary_size, mpos + pos, + md5_orig ) ) ++failed_comparisons; + } + else if( result == 1 ) + { + if( verbosity >= 3 || + ( verbosity >= 2 && failure_pos - pos >= sector_size ) || + ( verbosity >= 1 && failure_pos - pos >= 10000 ) || + ( verbosity >= 0 && failure_pos - pos >= 50000 ) ) + std::printf( "block %llu,%u\nDecoder error at pos %llu\n", + mpos + pos, sector_size, mpos + failure_pos ); + } + else if( result == 3 || result == 4 ) // test_member printed the error + {} + else if( verbosity >= 0 ) + { + std::printf( "block %llu,%u\n", mpos + pos, sector_size ); + if( result == 2 ) + std::printf( "File ends unexpectedly at pos %llu\n", + mpos + failure_pos ); + else + std::printf( "Unknown error code '%d'\n", result ); + } + std::memcpy( mbuffer + pos, block, sector_size ); // restore block + } + delete[] buffer2; + if( !compare_member( mbuffer, msize, dictionary_size, mpos + pos, md5_orig ) ) + internal_error( "Block was not properly restored." ); + delete[] mbuffer; + } + delete[] block; + + if( verbosity >= 0 ) + { + std::printf( "\n%9ld blocks tested\n%9ld total decompressions" + "\n%9ld decompressions returned with zero status", + decompressions, decompressions, successes ); + if( successes > 0 ) + { + if( failed_comparisons > 0 ) + std::printf( ", of which\n%9ld comparisons failed\n", failed_comparisons ); else std::fputs( "\n all comparisons passed\n", stdout ); } @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2021 Antonio Diaz Diaz. + Copyright (C) 2009-2022 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -15,6 +15,8 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#include "common.h" + class State { int st; @@ -30,11 +32,7 @@ public: static const int next[states] = { 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5 }; st = next[st]; } - bool is_char_set_char() - { - if( st < 7 ) { st -= ( st < 4 ) ? st : 3; return true; } - else { st -= ( st < 10 ) ? 3 : 6; return false; } - } + bool is_char_set_char() { set_char(); return st < 4; } void set_match() { st = ( st < 7 ) ? 7 : 10; } void set_rep() { st = ( st < 7 ) ? 8 : 11; } void set_short_rep() { st = ( st < 7 ) ? 9 : 11; } @@ -172,6 +170,7 @@ public: void update_byte( uint32_t & crc, const uint8_t byte ) const { crc = data[(crc^byte)&0xFF] ^ ( crc >> 8 ); } + // about as fast as it is possible without messing with endianness void update_buf( uint32_t & crc, const uint8_t * const buffer, const int size ) const { @@ -319,23 +318,6 @@ struct Lzip_trailer }; -struct Bad_byte - { - enum Mode { literal, delta, flip }; - long long pos; - Mode mode; - uint8_t value; - - Bad_byte() : pos( -1 ), mode( literal ), value( 0 ) {} - uint8_t operator()( const uint8_t old_value ) const - { - if( mode == delta ) return old_value + value; - if( mode == flip ) return old_value ^ value; - return value; - } - }; - - #ifndef INT64_MAX #define INT64_MAX 0x7FFFFFFFFFFFFFFFLL #endif @@ -381,7 +363,7 @@ struct Member_list // members/gaps/tdata to be dumped/removed/stripped std::vector< Block > range_vector, rrange_vector; Member_list() : damaged( false ), tdata( false ), in( true ), rin( true ) {} - void parse( const char * p ); + void parse_ml( const char * p, const char * const option_name ); bool range() const { return range_vector.size() || rrange_vector.size(); } @@ -451,7 +433,8 @@ int seek_read( const int fd, uint8_t * const buf, const int size, const long long pos ); // defined in lunzcrash.cc -int lunzcrash( const std::string & input_filename ); +int lunzcrash_bit( const char * const input_filename ); +int lunzcrash_block( const char * const input_filename, const int sector_size ); int md5sum_files( const std::vector< std::string > & filenames ); // defined in main.cc @@ -470,14 +453,10 @@ bool open_outstream( const bool force, const bool protect, const bool rw = false, const bool skipping = true ); bool file_exists( const std::string & filename ); void cleanup_and_fail( const int retval ); +bool check_tty_out(); void set_signal_handler(); int close_outstream( const struct stat * const in_statsp ); std::string insert_fixed( std::string name ); -void show_error( const char * const msg, const int errcode = 0, - const bool help = false ); -void show_file_error( const char * const filename, const char * const msg, - const int errcode = 0 ); -void internal_error( const char * const msg ); void show_2file_error( const char * const msg1, const char * const name1, const char * const name2, const char * const msg2 ); class Range_decoder; diff --git a/lzip_index.cc b/lzip_index.cc index 66eb30d..eff4d05 100644 --- a/lzip_index.cc +++ b/lzip_index.cc @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2021 Antonio Diaz Diaz. + Copyright (C) 2009-2022 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -347,7 +347,7 @@ error: } -// Returns members + gaps [+ trailing data]. +// Return members + gaps [+ trailing data]. long Lzip_index::blocks( const bool count_tdata ) const { long n = member_vector.size() + ( count_tdata && cdata_size() < file_size() ); diff --git a/lzip_index.h b/lzip_index.h index 717c06c..0b8ace1 100644 --- a/lzip_index.h +++ b/lzip_index.h @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2021 Antonio Diaz Diaz. + Copyright (C) 2009-2022 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2021 Antonio Diaz Diaz. + Copyright (C) 2009-2022 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Exit status: 0 for a normal exit, 1 for environmental problems (file not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or invalid input file, 3 for an internal consistency error - (eg, bug) which caused lziprecover to panic. + (e.g., bug) which caused lziprecover to panic. */ #define _FILE_OFFSET_BITS 64 @@ -39,9 +39,9 @@ #include <unistd.h> #include <utime.h> #include <sys/stat.h> -#if defined(__MSVCRT__) || defined(__OS2__) || defined(__DJGPP__) +#if defined __MSVCRT__ || defined __OS2__ || defined __DJGPP__ #include <io.h> -#if defined(__MSVCRT__) +#if defined __MSVCRT__ #define fchmod(x,y) 0 #define fchown(x,y,z) 0 #define SIGHUP SIGTERM @@ -53,7 +53,7 @@ #define S_IWOTH 0 #endif #endif -#if defined(__DJGPP__) +#if defined __DJGPP__ #define S_ISSOCK(x) 0 #define S_ISVTX 0 #endif @@ -71,6 +71,11 @@ #error "Environments where CHAR_BIT != 8 are not supported." #endif +#if ( defined SIZE_MAX && SIZE_MAX < UINT_MAX ) || \ + ( defined SSIZE_MAX && SSIZE_MAX < INT_MAX ) +#error "Environments where 'size_t' is narrower than 'int' are not supported." +#endif + int verbosity = 0; const char * const program_name = "lziprecover"; @@ -89,7 +94,8 @@ const struct { const char * from; const char * to; } known_extensions[] = { enum Mode { m_none, m_alone_to_lz, m_debug_decompress, m_debug_delay, m_debug_repair, m_decompress, m_dump, m_list, m_md5sum, m_merge, m_nrep_stats, m_range_dec, m_remove, m_repair, m_reproduce, - m_show_packets, m_split, m_strip, m_test, m_unzcrash }; + m_show_packets, m_split, m_strip, m_test, m_unzcrash_bit, + m_unzcrash_block }; /* Variable used in signal handler context. It is not declared volatile because the handler never returns. */ @@ -100,14 +106,12 @@ void show_help() { std::printf( "Lziprecover is a data recovery tool and decompressor for files in the lzip\n" "compressed data format (.lz). Lziprecover is able to repair slightly damaged\n" - "files, produce a correct file by merging the good parts of two or more\n" - "damaged copies, reproduce a missing (zeroed) sector using a reference file,\n" - "extract data from damaged files, decompress files, and test integrity of\n" - "files.\n" - "\nLziprecover can repair perfectly most files with small errors (up to one\n" - "single-byte error per member), without the need of any extra redundance\n" - "at all. Losing an entire archive just because of a corrupt byte near the\n" - "beginning is a thing of the past.\n" + "files (up to one single-byte error per member), produce a correct file by\n" + "merging the good parts of two or more damaged copies, reproduce a missing\n" + "(zeroed) sector using a reference file, extract data from damaged files,\n" + "decompress files, and test integrity of files.\n" + "\nWith the help of lziprecover, losing an entire archive just because of a\n" + "corrupt byte near the beginning is a thing of the past.\n" "\nLziprecover can remove the damaged members from multimember files, for\n" "example multimember tar.lz archives.\n" "\nLziprecover provides random access to the data in multimember files; it only\n" @@ -150,7 +154,7 @@ void show_help() " -E, --debug-reproduce=<range>[,ss] set range to 0 and try to reproduce file\n" " -M, --md5sum print the MD5 digests of the input files\n" " -S, --nrep-stats[=<val>] print stats of N-byte repeated sequences\n" - " -U, --unzcrash test 1-bit errors in the input file\n" + " -U, --unzcrash=1|B<size> test 1-bit or block errors in input file\n" " -W, --debug-decompress=<pos>,<val> set pos to val and decompress to stdout\n" " -X, --show-packets[=<pos>,<val>] show in stdout the decoded LZMA packets\n" " -Y, --debug-delay=<range> find max error detection delay in <range>\n" @@ -164,7 +168,7 @@ void show_help() "'tar -xf foo.tar.lz' or 'lziprecover -cd foo.tar.lz | tar -xf -'.\n" "\nExit status: 0 for a normal exit, 1 for environmental problems (file\n" "not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n" - "invalid input file, 3 for an internal consistency error (eg, bug) which\n" + "invalid input file, 3 for an internal consistency error (e.g., bug) which\n" "caused lziprecover to panic.\n" "\nReport bugs to lzip-bug@nongnu.org\n" "Lziprecover home page: http://www.nongnu.org/lzip/lziprecover.html\n" ); @@ -174,16 +178,14 @@ void show_help() void Pretty_print::operator()( const char * const msg, FILE * const f ) const { - if( verbosity >= 0 ) + if( verbosity < 0 ) return; + if( first_post ) { - if( first_post ) - { - first_post = false; - std::fputs( padded_name.c_str(), f ); - if( !msg ) std::fflush( f ); - } - if( msg ) std::fprintf( f, "%s\n", msg ); + first_post = false; + std::fputs( padded_name.c_str(), f ); + if( !msg ) std::fflush( f ); } + if( msg ) std::fprintf( f, "%s\n", msg ); } @@ -225,41 +227,41 @@ void show_header( const unsigned dictionary_size ) // Colon-separated list of "damaged", "tdata", [r][^]<list> (1 1,3-5,8) -void Member_list::parse( const char * p ) +void Member_list::parse_ml( const char * arg, const char * const option_name ) { while( true ) { - const char * tp = p; // points to terminator; ':' or null + const char * tp = arg; // points to terminator (':' or '\0') while( *tp && *tp != ':' ) ++tp; - const unsigned len = tp - p; - if( std::isalpha( *(const unsigned char *)p ) ) + const unsigned len = tp - arg; + if( std::islower( *(const unsigned char *)arg ) ) { - if( len <= 7 && std::strncmp( "damaged", p, len ) == 0 ) + if( len <= 7 && std::strncmp( "damaged", arg, len ) == 0 ) { damaged = true; goto next; } - if( len <= 5 && std::strncmp( "tdata", p, len ) == 0 ) + if( len <= 5 && std::strncmp( "tdata", arg, len ) == 0 ) { tdata = true; goto next; } } { - const bool reverse = ( *p == 'r' ); - if( reverse ) ++p; - if( *p == '^' ) { ++p; if( reverse ) rin = false; else in = false; } + const bool reverse = ( *arg == 'r' ); + if( reverse ) ++arg; + if( *arg == '^' ) { ++arg; if( reverse ) rin = false; else in = false; } std::vector< Block > * rvp = reverse ? &rrange_vector : &range_vector; - while( std::isdigit( *(const unsigned char *)p ) ) + while( std::isdigit( *(const unsigned char *)arg ) ) { const char * tail; - const int pos = getnum( p, 0, 1, INT_MAX, &tail ) - 1; + const int pos = getnum( arg, option_name, 0, 1, INT_MAX, &tail ) - 1; if( rvp->size() && pos < rvp->back().end() ) break; const int size = (*tail == '-') ? - getnum( tail + 1, 0, pos + 1, INT_MAX, &tail ) - pos : 1; + getnum( tail + 1, option_name, 0, pos + 1, INT_MAX, &tail ) - pos : 1; rvp->push_back( Block( pos, size ) ); if( tail == tp ) goto next; - if( *tail == ',' ) p = tail + 1; else break; + if( *tail == ',' ) arg = tail + 1; else break; } } show_error( "Invalid list of members." ); std::exit( 1 ); next: - if( *(p = tp) != 0 ) ++p; else return; + if( *(arg = tp) != 0 ) ++arg; else return; } } @@ -268,70 +270,60 @@ namespace { // Recognized formats: <digit> 'a' m[<match_length>] // -int parse_lzip_level( const char * const p ) +int parse_lzip_level( const char * const arg, const char * const option_name ) { - if( *p == 'a' || std::isdigit( *(const unsigned char *)p ) ) return *p; - if( *p != 'm' ) + if( *arg == 'a' || std::isdigit( *(const unsigned char *)arg ) ) return *arg; + if( *arg != 'm' ) { - show_error( "Bad argument in option '--lzip-level'.", 0, true ); + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Bad argument in option '%s'.\n", + program_name, option_name ); std::exit( 1 ); } - if( p[1] == 0 ) return -1; - return -getnum( p + 1, 0, min_match_len_limit, max_match_len ); + if( arg[1] == 0 ) return -1; + return -getnum( arg + 1, option_name, 0, min_match_len_limit, max_match_len ); } /* Recognized format: <range>[,<sector_size>] range formats: <begin> <begin>-<end> <begin>,<size> ,<size> */ -void parse_range( const char * const ptr, Block & range, - int * const sector_sizep = 0 ) +void parse_range( const char * const arg, const char * const pn, + Block & range, int * const sector_sizep = 0 ) { - const char * tail = ptr; + const char * tail = arg; long long value = - ( ptr[0] == ',' ) ? 0 : getnum( ptr, 0, 0, INT64_MAX - 1, &tail ); + ( arg[0] == ',' ) ? 0 : getnum( arg, pn, 0, 0, INT64_MAX - 1, &tail ); if( tail[0] == 0 || tail[0] == ',' || tail[0] == '-' ) { range.pos( value ); if( tail[0] == 0 ) { range.size( INT64_MAX - value ); return; } const bool is_size = ( tail[0] == ',' ); if( sector_sizep && tail[1] == ',' ) { value = INT64_MAX - value; ++tail; } - else value = getnum( tail + 1, 0, 1, INT64_MAX, &tail ); // size - if( is_size || value > range.pos() ) + else value = getnum( tail + 1, pn, 0, 1, INT64_MAX, &tail ); // size + if( !is_size && value <= range.pos() ) { - if( !is_size ) value -= range.pos(); - if( INT64_MAX - range.pos() >= value ) - { - range.size( value ); - if( sector_sizep && tail[0] == ',' ) - *sector_sizep = getnum( tail + 1, 0, 8, INT_MAX ); - return; - } + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Begin must be < end in range argument " + "of option '%s'.\n", program_name, pn ); + std::exit( 1 ); + } + if( !is_size ) value -= range.pos(); + if( INT64_MAX - value >= range.pos() ) + { + range.size( value ); + if( sector_sizep && tail[0] == ',' ) + *sector_sizep = getnum( tail + 1, pn, 0, 8, INT_MAX ); + return; } } - show_error( "Bad decompression range.", 0, true ); + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Bad decompression range in option '%s'.\n", + program_name, pn ); std::exit( 1 ); } -// Recognized formats: <pos>,<value> <pos>,+<value> <pos>,f<value> -// -void parse_pos_value( const char * const ptr, Bad_byte & bad_byte ) - { - const char * tail; - bad_byte.pos = getnum( ptr, 0, 0, INT64_MAX, &tail ); - if( tail[0] != ',' ) - { - show_error( "Bad separator between <pos> and <val>.", 0, true ); - std::exit( 1 ); - } - if( tail[1] == '+' ) { ++tail; bad_byte.mode = Bad_byte::delta; } - else if( tail[1] == 'f' ) { ++tail; bad_byte.mode = Bad_byte::flip; } - else bad_byte.mode = Bad_byte::literal; - bad_byte.value = getnum( tail + 1, 0, 0, 255 ); - } - - void one_file( const int files ) { if( files != 1 ) @@ -353,6 +345,23 @@ void set_mode( Mode & program_mode, const Mode new_mode ) } +void parse_u( const char * const arg, const char * const option_name, + Mode & program_mode, int & sector_size ) + { + if( arg[0] == '1' ) set_mode( program_mode, m_unzcrash_bit ); + else if( arg[0] == 'B' ) + { set_mode( program_mode, m_unzcrash_block ); + sector_size = getnum( arg + 1, option_name, 0, 1, INT_MAX ); } + else + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Bad argument for option '%s'.\n", + program_name, option_name ); + std::exit( 1 ); + } + } + + int extension_index( const std::string & name ) { for( int eindex = 0; known_extensions[eindex].from; ++eindex ) @@ -506,6 +515,17 @@ void cleanup_and_fail( const int retval ) std::exit( retval ); } + +bool check_tty_out() + { + if( isatty( outfd ) ) + { show_file_error( output_filename.size() ? + output_filename.c_str() : "(stdout)", + "I won't write compressed data to a terminal." ); + return false; } + return true; + } + namespace { extern "C" void signal_handler( int ) @@ -521,21 +541,14 @@ bool check_tty_in( const char * const input_filename, const int infd, if( isatty( infd ) ) // all modes read compressed data { show_file_error( input_filename, "I won't read compressed data from a terminal." ); - close( infd ); set_retval( retval, 1 ); + close( infd ); set_retval( retval, 2 ); if( program_mode != m_test ) cleanup_and_fail( retval ); return false; } return true; } bool check_tty_out( const Mode program_mode ) - { - if( program_mode == m_alone_to_lz && isatty( outfd ) ) - { show_file_error( output_filename.size() ? - output_filename.c_str() : "(stdout)", - "I won't write compressed data to a terminal." ); - return false; } - return true; - } + { return program_mode != m_alone_to_lz || ::check_tty_out(); } // Set permissions, owner, and times. @@ -611,9 +624,10 @@ int decompress( const unsigned long long cfile_size, const int infd, const bool ignore_trailing, const bool loose_trailing, const bool testing ) { - int retval = 0; unsigned long long partial_file_pos = 0; Range_decoder rdec( infd ); + int retval = 0; + for( bool first_member = true; ; first_member = false ) { Lzip_header header; @@ -708,16 +722,6 @@ std::string insert_fixed( std::string name ) } -void show_file_error( const char * const filename, const char * const msg, - const int errcode ) - { - if( verbosity >= 0 ) - std::fprintf( stderr, "%s: %s: %s%s%s\n", program_name, filename, msg, - ( errcode > 0 ) ? ": " : "", - ( errcode > 0 ) ? std::strerror( errcode ) : "" ); - } - - void show_2file_error( const char * const msg1, const char * const name1, const char * const name2, const char * const msg2 ) { @@ -765,7 +769,6 @@ int main( const int argc, const char * const argv[] ) Bad_byte bad_byte; Member_list member_list; std::string default_output_filename; - std::vector< std::string > filenames; const char * lzip_name = "lzip"; // default is lzip const char * reference_filename = 0; Mode program_mode = m_none; @@ -805,7 +808,7 @@ int main( const int argc, const char * const argv[] ) { 's', "split", Arg_parser::no }, { 'S', "nrep-stats", Arg_parser::maybe }, { 't', "test", Arg_parser::no }, - { 'U', "unzcrash", Arg_parser::no }, + { 'U', "unzcrash", Arg_parser::yes }, { 'v', "verbose", Arg_parser::no }, { 'V', "version", Arg_parser::no }, { 'W', "debug-decompress", Arg_parser::yes }, @@ -830,6 +833,7 @@ int main( const int argc, const char * const argv[] ) { const int code = parser.code( argind ); if( !code ) break; // no more options + const char * const pn = parser.parsed_name( argind ).c_str(); const std::string & sarg = parser.argument( argind ); const char * const arg = sarg.c_str(); switch( code ) @@ -839,10 +843,10 @@ int main( const int argc, const char * const argv[] ) case 'c': to_stdout = true; break; case 'd': set_mode( program_mode, m_decompress ); break; case 'D': set_mode( program_mode, m_range_dec ); - parse_range( arg, range ); break; + parse_range( arg, pn, range ); break; case 'e': set_mode( program_mode, m_reproduce ); break; case 'E': set_mode( program_mode, m_reproduce ); - parse_range( arg, range, §or_size ); break; + parse_range( arg, pn, range, §or_size ); break; case 'f': force = true; break; case 'h': show_help(); return 0; case 'i': ignore_errors = true; break; @@ -856,35 +860,35 @@ int main( const int argc, const char * const argv[] ) case 'q': verbosity = -1; break; case 'R': set_mode( program_mode, m_repair ); break; case 's': set_mode( program_mode, m_split ); break; - case 'S': if( arg[0] ) repeated_byte = getnum( arg, 0, 0, 255 ); + case 'S': if( arg[0] ) repeated_byte = getnum( arg, pn, 0, 0, 255 ); set_mode( program_mode, m_nrep_stats ); break; case 't': set_mode( program_mode, m_test ); break; - case 'U': set_mode( program_mode, m_unzcrash ); break; + case 'U': parse_u( arg, pn, program_mode, sector_size ); break; case 'v': if( verbosity < 4 ) ++verbosity; break; case 'V': show_version(); return 0; case 'W': set_mode( program_mode, m_debug_decompress ); - parse_pos_value( arg, bad_byte ); break; + bad_byte.parse_bb( arg, pn ); break; case 'X': set_mode( program_mode, m_show_packets ); - if( arg[0] ) { parse_pos_value( arg, bad_byte ); } break; + if( arg[0] ) { bad_byte.parse_bb( arg, pn ); } break; case 'Y': set_mode( program_mode, m_debug_delay ); - parse_range( arg, range ); break; + parse_range( arg, pn, range ); break; case 'Z': set_mode( program_mode, m_debug_repair ); - parse_pos_value( arg, bad_byte ); break; + bad_byte.parse_bb( arg, pn ); break; case opt_du: set_mode( program_mode, m_dump ); - member_list.parse( arg ); break; + member_list.parse_ml( arg, pn ); break; case opt_lt: loose_trailing = true; break; - case opt_lzl: lzip_level = parse_lzip_level( arg ); break; + case opt_lzl: lzip_level = parse_lzip_level( arg, pn ); break; case opt_lzn: lzip_name = arg; break; case opt_ref: reference_filename = arg; break; case opt_re: set_mode( program_mode, m_remove ); - member_list.parse( arg ); break; + member_list.parse_ml( arg, pn ); break; case opt_st: set_mode( program_mode, m_strip ); - member_list.parse( arg ); break; + member_list.parse_ml( arg, pn ); break; default : internal_error( "uncaught option." ); } } // end process options -#if defined(__MSVCRT__) || defined(__OS2__) || defined(__DJGPP__) +#if defined __MSVCRT__ || defined __OS2__ || defined __DJGPP__ setmode( STDIN_FILENO, O_BINARY ); setmode( STDOUT_FILENO, O_BINARY ); #endif @@ -895,6 +899,7 @@ int main( const int argc, const char * const argv[] ) return 1; } + std::vector< std::string > filenames; bool filenames_given = false; for( ; argind < parser.arguments(); ++argind ) { @@ -963,9 +968,12 @@ int main( const int argc, const char * const argv[] ) one_file( filenames.size() ); return split_file( filenames[0], default_output_filename, force ); case m_test: break; - case m_unzcrash: + case m_unzcrash_bit: + one_file( filenames.size() ); + return lunzcrash_bit( filenames[0].c_str() ); + case m_unzcrash_block: one_file( filenames.size() ); - return lunzcrash( filenames[0] ); + return lunzcrash_block( filenames[0].c_str(), sector_size ); } } catch( std::bad_alloc & ) { show_error( mem_msg ); cleanup_and_fail( 1 ); } diff --git a/main_common.cc b/main_common.cc index 386a5b1..8f56a13 100644 --- a/main_common.cc +++ b/main_common.cc @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2021 Antonio Diaz Diaz. + Copyright (C) 2009-2022 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -17,7 +17,7 @@ namespace { -const char * const program_year = "2021"; +const char * const program_year = "2022"; const char * const mem_msg = "Not enough memory."; void show_version() @@ -30,19 +30,58 @@ void show_version() } +// separate large numbers >= 100_000 in groups of 3 digits using '_' +const char * format_num3( long long num ) + { + const char * const si_prefix = "kMGTPEZY"; + const char * const binary_prefix = "KMGTPEZY"; + enum { buffers = 8, bufsize = 4 * sizeof (long long) }; + static char buffer[buffers][bufsize]; // circle of static buffers for printf + static int current = 0; + + char * const buf = buffer[current++]; current %= buffers; + char * p = buf + bufsize - 1; // fill the buffer backwards + *p = 0; // terminator + const bool negative = num < 0; + if( negative ) num = -num; + if( num > 1024 ) + { + char prefix = 0; // try binary first, then si + for( int i = 0; i < 8 && num >= 1024 && num % 1024 == 0; ++i ) + { num /= 1024; prefix = binary_prefix[i]; } + if( prefix ) *(--p) = 'i'; + else + for( int i = 0; i < 8 && num >= 1000 && num % 1000 == 0; ++i ) + { num /= 1000; prefix = si_prefix[i]; } + if( prefix ) *(--p) = prefix; + } + const bool split = num >= 100000; + + for( int i = 0; ; ) + { + *(--p) = num % 10 + '0'; num /= 10; if( num == 0 ) break; + if( split && ++i >= 3 ) { i = 0; *(--p) = '_'; } + } + if( negative ) *(--p) = '-'; + return p; + } + + // Recognized formats: <num>[YZEPTGM][i][Bs], <num>k[Bs], <num>Ki[Bs] // -long long getnum( const char * const ptr, const int hardbs, - const long long llimit = -LLONG_MAX, +long long getnum( const char * const arg, const char * const option_name, + const int hardbs, const long long llimit = -LLONG_MAX, const long long ulimit = LLONG_MAX, const char ** const tailp = 0 ) { char * tail; errno = 0; - long long result = strtoll( ptr, &tail, 0 ); - if( tail == ptr ) + long long result = strtoll( arg, &tail, 0 ); + if( tail == arg ) { - show_error( "Bad or missing numerical argument.", 0, true ); + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Bad or missing numerical argument in " + "option '%s'.\n", program_name, option_name ); std::exit( 1 ); } @@ -73,7 +112,9 @@ long long getnum( const char * const ptr, const int hardbs, if( exponent < 0 || ( usuf == 's' && hardbs <= 0 ) || ( !tailp && tail[0] != 0 ) ) { - show_error( "Bad multiplier in numerical argument.", 0, true ); + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Bad multiplier in numerical argument of " + "option '%s'.\n", program_name, option_name ); std::exit( 1 ); } for( int i = 0; i < exponent; ++i ) @@ -90,7 +131,10 @@ long long getnum( const char * const ptr, const int hardbs, if( !errno && ( result < llimit || result > ulimit ) ) errno = ERANGE; if( errno ) { - show_error( "Numerical argument out of limits." ); + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Numerical argument out of limits [%s,%s] " + "in option '%s'.\n", program_name, format_num3( llimit ), + format_num3( ulimit ), option_name ); std::exit( 1 ); } if( tailp ) *tailp = tail; @@ -100,6 +144,27 @@ long long getnum( const char * const ptr, const int hardbs, } // end namespace +// Recognized formats: <pos>,<value> <pos>,+<value> <pos>,f<value> +// +void Bad_byte::parse_bb( const char * const arg, const char * const pn ) + { + option_name = pn; + const char * tail; + pos = getnum( arg, option_name, 0, 0, LLONG_MAX, &tail ); + if( tail[0] != ',' ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Bad separator between <pos> and <val> in " + "argument of option '%s'.\n", program_name, option_name ); + std::exit( 1 ); + } + if( tail[1] == '+' ) { ++tail; mode = delta; } + else if( tail[1] == 'f' ) { ++tail; mode = flip; } + else mode = literal; + value = getnum( tail + 1, option_name, 0, 0, 255 ); + } + + void show_error( const char * const msg, const int errcode, const bool help ) { if( verbosity < 0 ) return; @@ -113,6 +178,16 @@ void show_error( const char * const msg, const int errcode, const bool help ) } +void show_file_error( const char * const filename, const char * const msg, + const int errcode ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: %s: %s%s%s\n", program_name, filename, msg, + ( errcode > 0 ) ? ": " : "", + ( errcode > 0 ) ? std::strerror( errcode ) : "" ); + } + + void internal_error( const char * const msg ) { if( verbosity >= 0 ) @@ -1,6 +1,6 @@ /* Functions to compute MD5 message digest of memory blocks according to the definition of MD5 in RFC 1321 from April 1992. - Copyright (C) 2020, 2021 Antonio Diaz Diaz. + Copyright (C) 2020-2022 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -1,6 +1,6 @@ /* Functions to compute MD5 message digest of memory blocks according to the definition of MD5 in RFC 1321 from April 1992. - Copyright (C) 2020, 2021 Antonio Diaz Diaz. + Copyright (C) 2020-2022 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2021 Antonio Diaz Diaz. + Copyright (C) 2009-2022 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2021 Antonio Diaz Diaz. + Copyright (C) 2009-2022 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -64,14 +64,14 @@ void LZ_mtester::print_block( const int len ) } -void LZ_mtester::duplicate_buffer() +void LZ_mtester::duplicate_buffer( uint8_t * const buffer2 ) { - uint8_t * const tmp = new uint8_t[dictionary_size]; if( data_position() > 0 ) - std::memcpy( tmp, buffer, std::min( data_position(), - (unsigned long long)dictionary_size ) ); - else tmp[dictionary_size-1] = 0; // prev_byte of first byte - buffer = tmp; + std::memcpy( buffer2, buffer, std::min( data_position(), + (unsigned long long)dictionary_size ) ); + else buffer2[dictionary_size-1] = 0; // prev_byte of first byte + buffer = buffer2; + buffer_is_external = true; } @@ -103,7 +103,7 @@ bool LZ_mtester::verify_trailer( FILE * const f, unsigned long long byte_pos ) return false; } const unsigned long long data_size = data_position(); - const unsigned long long member_size = member_position(); + const unsigned long long member_size = rdec.member_position(); bool error = false; const unsigned td_crc = trailer->data_crc(); @@ -190,11 +190,11 @@ int LZ_mtester::test_member( const unsigned long long mpos_limit, rep0 = distance; } state.set_rep(); - len = min_match_len + rdec.decode_len( rep_len_model, pos_state ); + len = rdec.decode_len( rep_len_model, pos_state ); } else // match { - len = min_match_len + rdec.decode_len( match_len_model, pos_state ); + len = rdec.decode_len( match_len_model, pos_state ); unsigned distance = rdec.decode_tree6( bm_dis_slot[get_len_state(len)] ); if( distance >= start_dis_model ) { @@ -230,11 +230,11 @@ int LZ_mtester::test_member( const unsigned long long mpos_limit, if( rep0 > max_rep0 ) max_rep0 = rep0; state.set_match(); if( rep0 >= dictionary_size || ( rep0 >= pos && !pos_wrapped ) ) - { flush_data(); return 1; } + { if( outfd >= 0 ) { flush_data(); } return 1; } } copy_block( rep0, len ); } - flush_data(); + if( outfd >= 0 ) flush_data(); return 2; } @@ -312,14 +312,14 @@ int LZ_mtester::debug_decode_member( const long long dpos, const long long mpos, rep0 = distance; } state.set_rep(); - len = min_match_len + rdec.decode_len( rep_len_model, pos_state ); + len = rdec.decode_len( rep_len_model, pos_state ); if( show_packets ) std::printf( "%6llu %6llu rep%c %6u,%3d (%6llu)", mp, dp, rep + '0', rep0 + 1, len, dp - rep0 - 1 ); } else // match { - len = min_match_len + rdec.decode_len( match_len_model, pos_state ); + len = rdec.decode_len( match_len_model, pos_state ); unsigned distance = rdec.decode_tree6( bm_dis_slot[get_len_state(len)] ); if( distance >= start_dis_model ) { @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2021 Antonio Diaz Diaz. + Copyright (C) 2009-2022 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -17,7 +17,7 @@ class Range_mtester { - const uint8_t * const buffer; // input buffer + const uint8_t * const buffer; // input buffer const long long buffer_size; long long pos; // current pos in buffer uint32_t code; @@ -96,37 +96,78 @@ public: } else { - range -= bound; code -= bound; + range -= bound; bm.probability -= bm.probability >> bit_model_move_bits; return 1; } } - unsigned decode_tree3( Bit_model bm[] ) + void decode_symbol_bit( Bit_model & bm, unsigned & symbol ) + { + normalize(); + symbol <<= 1; + const uint32_t bound = ( range >> bit_model_total_bits ) * bm.probability; + if( code < bound ) + { + range = bound; + bm.probability += + ( bit_model_total - bm.probability ) >> bit_model_move_bits; + } + else + { + code -= bound; + range -= bound; + bm.probability -= bm.probability >> bit_model_move_bits; + symbol |= 1; + } + } + + void decode_symbol_bit_reversed( Bit_model & bm, unsigned & model, + unsigned & symbol, const int i ) { - unsigned symbol = 2 | decode_bit( bm[1] ); - symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); - symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); - return symbol & 7; + normalize(); + model <<= 1; + const uint32_t bound = ( range >> bit_model_total_bits ) * bm.probability; + if( code < bound ) + { + range = bound; + bm.probability += + ( bit_model_total - bm.probability ) >> bit_model_move_bits; + } + else + { + code -= bound; + range -= bound; + bm.probability -= bm.probability >> bit_model_move_bits; + model |= 1; + symbol |= 1 << i; + } } unsigned decode_tree6( Bit_model bm[] ) { - unsigned symbol = 2 | decode_bit( bm[1] ); - symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); - symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); - symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); - symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); - symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); + unsigned symbol = 1; + decode_symbol_bit( bm[symbol], symbol ); + decode_symbol_bit( bm[symbol], symbol ); + decode_symbol_bit( bm[symbol], symbol ); + decode_symbol_bit( bm[symbol], symbol ); + decode_symbol_bit( bm[symbol], symbol ); + decode_symbol_bit( bm[symbol], symbol ); return symbol & 0x3F; } unsigned decode_tree8( Bit_model bm[] ) { unsigned symbol = 1; - for( int i = 0; i < 8; ++i ) - symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); + decode_symbol_bit( bm[symbol], symbol ); + decode_symbol_bit( bm[symbol], symbol ); + decode_symbol_bit( bm[symbol], symbol ); + decode_symbol_bit( bm[symbol], symbol ); + decode_symbol_bit( bm[symbol], symbol ); + decode_symbol_bit( bm[symbol], symbol ); + decode_symbol_bit( bm[symbol], symbol ); + decode_symbol_bit( bm[symbol], symbol ); return symbol & 0xFF; } @@ -135,20 +176,18 @@ public: unsigned model = 1; unsigned symbol = 0; for( int i = 0; i < num_bits; ++i ) - { - const unsigned bit = decode_bit( bm[model] ); - model <<= 1; model += bit; - symbol |= ( bit << i ); - } + decode_symbol_bit_reversed( bm[model], model, symbol, i ); return symbol; } unsigned decode_tree_reversed4( Bit_model bm[] ) { - unsigned symbol = decode_bit( bm[1] ); - symbol += decode_bit( bm[2+symbol] ) << 1; - symbol += decode_bit( bm[4+symbol] ) << 2; - symbol += decode_bit( bm[8+symbol] ) << 3; + unsigned model = 1; + unsigned symbol = 0; + decode_symbol_bit_reversed( bm[model], model, symbol, 0 ); + decode_symbol_bit_reversed( bm[model], model, symbol, 1 ); + decode_symbol_bit_reversed( bm[model], model, symbol, 2 ); + decode_symbol_bit_reversed( bm[model], model, symbol, 3 ); return symbol; } @@ -163,8 +202,7 @@ public: symbol <<= 1; symbol |= bit; if( match_bit >> 8 != bit ) { - while( symbol < 0x100 ) - symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); + while( symbol < 0x100 ) decode_symbol_bit( bm[symbol], symbol ); break; } } @@ -173,11 +211,24 @@ public: unsigned decode_len( Len_model & lm, const int pos_state ) { + Bit_model * bm; + unsigned mask, offset, symbol = 1; + if( decode_bit( lm.choice1 ) == 0 ) - return decode_tree3( lm.bm_low[pos_state] ); + { bm = lm.bm_low[pos_state]; mask = 7; offset = 0; goto len3; } if( decode_bit( lm.choice2 ) == 0 ) - return len_low_symbols + decode_tree3( lm.bm_mid[pos_state] ); - return len_low_symbols + len_mid_symbols + decode_tree8( lm.bm_high ); + { bm = lm.bm_mid[pos_state]; mask = 7; offset = len_low_symbols; goto len3; } + bm = lm.bm_high; mask = 0xFF; offset = len_low_symbols + len_mid_symbols; + decode_symbol_bit( bm[symbol], symbol ); + decode_symbol_bit( bm[symbol], symbol ); + decode_symbol_bit( bm[symbol], symbol ); + decode_symbol_bit( bm[symbol], symbol ); + decode_symbol_bit( bm[symbol], symbol ); +len3: + decode_symbol_bit( bm[symbol], symbol ); + decode_symbol_bit( bm[symbol], symbol ); + decode_symbol_bit( bm[symbol], symbol ); + return ( symbol & mask ) + min_match_len + offset; } }; @@ -206,6 +257,7 @@ class LZ_mtester unsigned max_packet_size_; // maximum packet size found unsigned max_marker_size_; // maximum marker size found bool pos_wrapped; + bool buffer_is_external; Bit_model bm_literal[1<<literal_context_bits][0x300]; Bit_model bm_match[State::states][pos_states]; @@ -306,11 +358,11 @@ public: max_rep0( 0 ), max_packet_size_( 0 ), max_marker_size_( 0 ), - pos_wrapped( false ) + pos_wrapped( false ), buffer_is_external( false ) // prev_byte of first byte; also for peek( 0 ) on corrupt file { buffer[dictionary_size-1] = 0; } - ~LZ_mtester() { delete[] buffer; } + ~LZ_mtester() { if( !buffer_is_external ) delete[] buffer; } unsigned crc() const { return crc_ ^ 0xFFFFFFFFU; } unsigned long long data_position() const { return partial_data_pos + pos; } @@ -324,13 +376,14 @@ public: unsigned max_packet_size() const { return max_packet_size_; } unsigned max_marker_size() const { return max_marker_size_; } - const uint8_t * get_buffers( const uint8_t ** prev_bufferp, - int * sizep, int * prev_sizep ) const + const uint8_t * get_buffers( const uint8_t ** const prev_bufferp, + int * const sizep, int * const prev_sizep ) const { *sizep = ( pos_wrapped && pos == 0 ) ? dictionary_size : pos; *prev_sizep = ( pos_wrapped && pos > 0 ) ? dictionary_size - pos : 0; *prev_bufferp = buffer + pos; return buffer; } - void duplicate_buffer(); + void duplicate_buffer( uint8_t * const buffer2 ); + // these two functions set max_rep0 int test_member( const unsigned long long mpos_limit = LLONG_MAX, const unsigned long long dpos_limit = LLONG_MAX, diff --git a/nrep_stats.cc b/nrep_stats.cc index 2f335e6..1f249ff 100644 --- a/nrep_stats.cc +++ b/nrep_stats.cc @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2021 Antonio Diaz Diaz. + Copyright (C) 2009-2022 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/range_dec.cc b/range_dec.cc index 24ac5e8..ea7f7e7 100644 --- a/range_dec.cc +++ b/range_dec.cc @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2021 Antonio Diaz Diaz. + Copyright (C) 2009-2022 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2021 Antonio Diaz Diaz. + Copyright (C) 2009-2022 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -107,10 +107,11 @@ const LZ_mtester * prepare_master( const uint8_t * const buffer, } -bool test_member_rest( const LZ_mtester & master, long * const failure_posp = 0 ) +bool test_member_rest( const LZ_mtester & master, uint8_t * const buffer2, + long * const failure_posp = 0 ) { - LZ_mtester mtester( master ); - mtester.duplicate_buffer(); + LZ_mtester mtester( master ); // tester with external buffer + mtester.duplicate_buffer( buffer2 ); if( mtester.test_member() == 0 && mtester.finished() ) return true; if( failure_posp ) *failure_posp = mtester.member_position(); return false; @@ -122,13 +123,14 @@ long repair_member( const long long mpos, const long long msize, uint8_t * const mbuffer, const long begin, const long end, const unsigned dictionary_size, const char terminator ) { + uint8_t * const buffer2 = new uint8_t[dictionary_size]; for( long pos = end; pos >= begin && pos > end - 50000; ) { const long min_pos = std::max( begin, pos - 100 ); const unsigned long pos_limit = std::max( min_pos - 16, 0L ); const LZ_mtester * master = prepare_master( mbuffer, msize, pos_limit, dictionary_size ); - if( !master ) return -1; + if( !master ) { delete[] buffer2; return -1; } for( ; pos >= min_pos; --pos ) { if( verbosity >= 2 ) @@ -139,12 +141,14 @@ long repair_member( const long long mpos, const long long msize, for( int j = 0; j < 255; ++j ) { ++mbuffer[pos]; - if( test_member_rest( *master ) ) { delete master; return pos; } + if( test_member_rest( *master, buffer2 ) ) + { delete master; delete[] buffer2; return pos; } } ++mbuffer[pos]; } delete master; } + delete[] buffer2; return 0; } @@ -297,6 +301,7 @@ int debug_delay( const std::string & input_filename, Block range, } uint8_t * const mbuffer = read_member( infd, mpos, msize ); if( !mbuffer ) return 1; + uint8_t * const buffer2 = new uint8_t[dictionary_size]; long pos = std::max( range.pos() - mpos, Lzip_header::size + 1LL ); const long end = std::min( range.end() - mpos, msize ); long max_delay = 0; @@ -305,8 +310,8 @@ int debug_delay( const std::string & input_filename, Block range, const unsigned long pos_limit = std::max( pos - 16, 0L ); const LZ_mtester * master = prepare_master( mbuffer, msize, pos_limit, dictionary_size ); - if( !master ) - { show_error( "Can't prepare master." ); return 1; } + if( !master ) { show_error( "Can't prepare master." ); + delete[] buffer2; delete[] mbuffer; return 1; } const long partial_end = std::min( pos + 100, end ); for( ; pos < partial_end; ++pos ) { @@ -321,7 +326,7 @@ int debug_delay( const std::string & input_filename, Block range, ++mbuffer[pos]; if( j == 255 ) break; long failure_pos = 0; - if( test_member_rest( *master, &failure_pos ) ) continue; + if( test_member_rest( *master, buffer2, &failure_pos ) ) continue; const long delay = failure_pos - pos; if( delay > max_delay ) { max_delay = delay; value = mbuffer[pos]; } } @@ -335,6 +340,7 @@ int debug_delay( const std::string & input_filename, Block range, } delete master; } + delete[] buffer2; delete[] mbuffer; print_pending_newline( terminator ); } @@ -386,19 +392,15 @@ int debug_repair( const std::string & input_filename, long failure_pos = 0; if( bad_byte.pos != 5 || isvalid_ds( header.dictionary_size() ) ) { - const LZ_mtester * master = - prepare_master( mbuffer, msize, 0, header.dictionary_size() ); - if( !master ) - { show_error( "Can't prepare master." ); delete[] mbuffer; return 1; } - if( test_member_rest( *master, &failure_pos ) ) + LZ_mtester mtester( mbuffer, msize, header.dictionary_size() ); + if( mtester.test_member() == 0 && mtester.finished() ) { if( verbosity >= 1 ) std::fputs( "Member decompressed with no errors.\n", stdout ); - delete master; delete[] mbuffer; return 0; } - delete master; + failure_pos = mtester.member_position(); } if( verbosity >= 2 ) { @@ -435,6 +437,7 @@ int debug_repair( const std::string & input_filename, the packet, not counting the data present in the range decoder before and after the decoding. The max marker size of a 'Sync Flush marker' does not include the 5 bytes read by rdec.load). + if bad_byte.pos >= cdata_size, bad_byte is ignored. */ int debug_decompress( const std::string & input_filename, const Bad_byte & bad_byte, const bool show_packets ) @@ -499,7 +502,9 @@ int debug_decompress( const std::string & input_filename, std::printf( "%s at pos %llu\n", ( result == 2 ) ? "File ends unexpectedly" : "Decoder error", mpos + mtester.member_position() ); - retval = 2; break; + retval = 2; + if( result != 3 || !mtester.finished() || mtester.data_position() != + (unsigned long long)lzip_index.dblock( i ).size() ) break; } if( i + 1 < lzip_index.members() && show_packets ) std::fputc( '\n', stdout ); diff --git a/reproduce.cc b/reproduce.cc index 40104b7..58a0c5d 100644 --- a/reproduce.cc +++ b/reproduce.cc @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2021 Antonio Diaz Diaz. + Copyright (C) 2009-2022 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -52,7 +52,7 @@ int fatal_retval = 0; int fatal( const int retval ) { if( fatal_retval == 0 ) fatal_retval = retval; return retval; } -// Returns the position of the damaged area in the member, or -1 if error. +// Return the position of the damaged area in the member, or -1 if error. long long zeroed_sector_pos( const char * const input_filename, const uint8_t * const mbuffer, const long long msize, long long * const sizep, uint8_t * const valuep ) @@ -121,7 +121,7 @@ const LZ_mtester * prepare_master2( const uint8_t * const mbuffer, beginning of the file or to the beginning of the dictionary. Choose the match nearest to the beginning of the file. As a fallback, locate the longest partial match at least 512 bytes long. - Returns the offset in file of the first undecoded byte, or -1 if no match. */ + Return the offset in file of the first undecoded byte, or -1 if no match. */ long long match_file( const LZ_mtester & master, const uint8_t * const rbuf, const long long rsize, const char * const reference_filename ) @@ -222,8 +222,9 @@ void show_fork_error( const char * const prog_name ) } -/* Returns -1 if child not terminated, 1 in case of error, or exit status of - child process 'pid'. */ +/* Return -1 if child not terminated, 1 in case of error, or exit status of + child process 'pid'. +*/ int child_status( const pid_t pid, const char * const name ) { int status; @@ -245,7 +246,7 @@ int child_status( const pid_t pid, const char * const name ) } -// Returns exit status of child process 'pid', or 1 in case of error. +// Return exit status of child process 'pid', or 1 in case of error. // int wait_for_child( const pid_t pid, const char * const name ) { @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2021 Antonio Diaz Diaz. + Copyright (C) 2009-2022 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/testsuite/check.sh b/testsuite/check.sh index e78b7f7..af8d787 100755 --- a/testsuite/check.sh +++ b/testsuite/check.sh @@ -1,6 +1,6 @@ #! /bin/sh # check script for Lziprecover - Data recovery tool for the lzip format -# Copyright (C) 2009-2021 Antonio Diaz Diaz. +# Copyright (C) 2009-2022 Antonio Diaz Diaz. # # This script is free software: you have unlimited permission # to copy, distribute, and modify it. @@ -270,15 +270,21 @@ lines=$("${LZIP}" -tvv "${in_em}" 2>&1 | wc -l) || test_failed $LINENO lines=$("${LZIP}" -lvv "${in_em}" | wc -l) || test_failed $LINENO [ "${lines}" -eq 11 ] || test_failed $LINENO "${lines}" +"${LZIP}" -cd "${fox_lz}" > fox || test_failed $LINENO cat "${in_lz}" > copy.lz || framework_failure "${LZIP}" -dk copy.lz || test_failed $LINENO cmp in copy || test_failed $LINENO -printf "to be overwritten" > copy || framework_failure -"${LZIP}" -d copy.lz 2> /dev/null +cat fox > copy || framework_failure +cat "${in_lz}" > out.lz || framework_failure +rm -f out || framework_failure +"${LZIP}" -d copy.lz out.lz 2> /dev/null # skip copy, decompress out [ $? = 1 ] || test_failed $LINENO +cmp fox copy || test_failed $LINENO +cmp in out || test_failed $LINENO "${LZIP}" -df copy.lz || test_failed $LINENO [ ! -e copy.lz ] || test_failed $LINENO cmp in copy || test_failed $LINENO +rm -f out || framework_failure printf "to be overwritten" > copy || framework_failure "${LZIP}" -df -o copy < "${in_lz}" || test_failed $LINENO @@ -308,7 +314,7 @@ rm -f copy anyothername.out || framework_failure [ $? = 1 ] || test_failed $LINENO "${LZIP}" -cdq in "${in_lz}" > copy [ $? = 2 ] || test_failed $LINENO -cat copy in | cmp in - || test_failed $LINENO +cat copy in | cmp in - || test_failed $LINENO # copy must be empty "${LZIP}" -cdq nx_file.lz "${in_lz}" > copy [ $? = 1 ] || test_failed $LINENO cmp in copy || test_failed $LINENO @@ -448,7 +454,6 @@ for i in fox_v2.lz fox_s11.lz fox_de20.lz \ [ $? = 2 ] || test_failed $LINENO $i done -"${LZIP}" -cd "${fox_lz}" > fox || test_failed $LINENO for i in fox_bcrc.lz fox_crc0.lz fox_das46.lz fox_mes81.lz ; do "${LZIP}" -cdq "${testdir}"/$i > out [ $? = 2 ] || test_failed $LINENO $i diff --git a/unzcrash.cc b/unzcrash.cc index d897021..0c92af8 100644 --- a/unzcrash.cc +++ b/unzcrash.cc @@ -1,6 +1,6 @@ /* Unzcrash - Tests robustness of decompressors to corrupted data. Inspired by unzcrash.c from Julian Seward's bzip2. - Copyright (C) 2008-2021 Antonio Diaz Diaz. + Copyright (C) 2008-2022 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,36 +19,36 @@ Exit status: 0 for a normal exit, 1 for environmental problems (file not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or invalid input file, 3 for an internal consistency error - (eg, bug) which caused unzcrash to panic. + (e.g., bug) which caused unzcrash to panic. */ #define _FILE_OFFSET_BITS 64 #include <algorithm> #include <cerrno> -#include <climits> +#include <climits> // SSIZE_MAX #include <csignal> #include <cstdio> #include <cstdlib> #include <cstring> #include <string> #include <vector> -#include <stdint.h> +#include <stdint.h> // SIZE_MAX #include <unistd.h> +#include <sys/wait.h> #include "arg_parser.h" +#include "common.h" #if CHAR_BIT != 8 #error "Environments where CHAR_BIT != 8 are not supported." #endif -#ifndef INT64_MAX -#define INT64_MAX 0x7FFFFFFFFFFFFFFFLL +#if ( defined SIZE_MAX && SIZE_MAX < ULONG_MAX ) || \ + ( defined SSIZE_MAX && SSIZE_MAX < LONG_MAX ) +#error "Environments where 'size_t' is narrower than 'long' are not supported." #endif -void show_error( const char * const msg, const int errcode = 0, - const bool help = false ); - namespace { const char * const program_name = "unzcrash"; @@ -103,7 +103,7 @@ void show_help() "A negative size is relative to the rest of the file.\n" "\nExit status: 0 for a normal exit, 1 for environmental problems (file\n" "not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n" - "invalid input file, 3 for an internal consistency error (eg, bug) which\n" + "invalid input file, 3 for an internal consistency error (e.g., bug) which\n" "caused unzcrash to panic.\n" "\nReport bugs to lzip-bug@nongnu.org\n" "Lziprecover home page: http://www.nongnu.org/lzip/lziprecover.html\n" ); @@ -111,66 +111,32 @@ void show_help() } // end namespace - #include "main_common.cc" - namespace { -void parse_block( const char * const ptr, long & size, uint8_t & value ) +void parse_block( const char * const arg, const char * const option_name, + long & size, uint8_t & value ) { - const char * tail = ptr; + const char * tail = arg; if( tail[0] != ',' ) - size = getnum( ptr, 0, 1, INT_MAX, &tail ); + size = getnum( arg, option_name, 0, 1, INT_MAX, &tail ); if( tail[0] == ',' ) - value = getnum( tail + 1, 0, 0, 255 ); + value = getnum( tail + 1, option_name, 0, 0, 255 ); else if( tail[0] ) { - show_error( "Bad separator in argument of '--block'", 0, true ); - std::exit( 1 ); - } - } - - -struct Bad_byte - { - enum Mode { literal, delta, flip }; - long long pos; - Mode mode; - uint8_t value; - - Bad_byte() : pos( -1 ), mode( literal ), value( 0 ) {} - uint8_t operator()( const uint8_t old_value ) const - { - if( mode == delta ) return old_value + value; - if( mode == flip ) return old_value ^ value; - return value; - } - }; - - -// Recognized formats: <pos>,<value> <pos>,+<value> <pos>,f<value> -// -void parse_pos_value( const char * const ptr, Bad_byte & bad_byte ) - { - const char * tail; - bad_byte.pos = getnum( ptr, 0, 0, INT64_MAX, &tail ); - if( tail[0] != ',' ) - { - show_error( "Bad separator between <pos> and <val>.", 0, true ); + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Bad separator between <size> and <value> in " + "argument of option '%s'.\n", program_name, option_name ); std::exit( 1 ); } - if( tail[1] == '+' ) { ++tail; bad_byte.mode = Bad_byte::delta; } - else if( tail[1] == 'f' ) { ++tail; bad_byte.mode = Bad_byte::flip; } - else bad_byte.mode = Bad_byte::literal; - bad_byte.value = getnum( tail + 1, 0, 0, 255 ); } -/* Returns the address of a malloc'd buffer containing the file data and +/* Return the address of a malloc'd buffer containing the file data and the file size in '*size'. - In case of error, returns 0 and does not modify '*size'. + In case of error, return 0 and do not modify '*size'. */ uint8_t * read_file( const char * const name, long * const size ) { @@ -228,7 +194,7 @@ public: { return ( i >= 1 && i <= 8 && data[i-1] ); } // Recognized formats: 1 1,2,3 1-4 1,3-5,8 1-3,5-8 - bool parse( const char * p ) + bool parse_bs( const char * p ) { for( int i = 0; i < 8; ++i ) data[i] = false; while( true ) @@ -283,6 +249,116 @@ int differing_bits( const uint8_t byte1, const uint8_t byte2 ) return count; } + +/* Return the number of bytes really written. + If (value returned < size), it is always an error. +*/ +long writeblock( const int fd, const uint8_t * const buf, const long size ) + { + long sz = 0; + errno = 0; + while( sz < size ) + { + const long n = write( fd, buf + sz, size - sz ); + if( n > 0 ) sz += n; + else if( n < 0 && errno != EINTR ) break; + errno = 0; + } + return sz; + } + + +void show_exec_error( const char * const prog_name ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Can't exec '%s': %s\n", + program_name, prog_name, std::strerror( errno ) ); + } + + +void show_fork_error( const char * const prog_name ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Can't fork '%s': %s\n", + program_name, prog_name, std::strerror( errno ) ); + } + + +int wait_for_child( const pid_t pid, const char * const name ) + { + int status; + while( waitpid( pid, &status, 0 ) == -1 ) + { + if( errno != EINTR ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Error waiting termination of '%s': %s\n", + program_name, name, std::strerror( errno ) ); + return -1; + } + } + if( WIFEXITED( status ) ) + { const int ret = WEXITSTATUS( status ); if( ret != 255 ) return ret; } + return -1; + } + + +bool word_split( const char * const command, std::vector< std::string > & args ) + { + const unsigned long old_size = args.size(); + for( const char * p = command; *p; ) + { + while( *p && std::isspace( *p ) ) ++p; // strip leading space + if( !*p ) break; + if( *p == '\'' || *p == '"' ) // quoted name + { + const char quote = *p; + const char * const begin = ++p; // skip leading quote + while( *p && *p != quote ) ++p; + if( !*p || begin == p ) return false; // umbalanced or empty + args.push_back( std::string( begin, p - begin ) ); + ++p; continue; // skip trailing quote + } + const char * const begin = p++; + while( *p && !std::isspace( *p ) ) ++p; + args.push_back( std::string( begin, p - begin ) ); + } + return args.size() > old_size; + } + + +// return -1 if fatal error, 0 if OK, >0 if error +int fork_and_feed( const uint8_t * const buffer, const long buffer_size, + const char ** const argv, const bool verify = false ) + { + int fda[2]; // pipe to child + if( pipe( fda ) < 0 ) + { show_error( "Can't create pipe", errno ); return -1; } + + const pid_t pid = vfork(); + if( pid < 0 ) // parent + { show_fork_error( argv[0] ); return -1; } + else if( pid > 0 ) // parent (feed data to child) + { + if( close( fda[0] ) != 0 ) + { show_error( "Error closing unused pipe", errno ); return -1; } + if( writeblock( fda[1], buffer, buffer_size ) != buffer_size && verify ) + { show_error( "Can't write to child process", errno ); return -1; } + if( close( fda[1] ) != 0 ) + { show_error( "Error closing pipe", errno ); return -1; } + } + else if( pid == 0 ) // child + { + if( dup2( fda[0], STDIN_FILENO ) >= 0 && + close( fda[0] ) == 0 && close( fda[1] ) == 0 ) + execvp( argv[0], (char **)argv ); + show_exec_error( argv[0] ); + _exit( 255 ); // 255 means fatal error in wait_for_child + } + + return wait_for_child( pid, argv[0] ); + } + } // end namespace @@ -290,7 +366,7 @@ int main( const int argc, const char * const argv[] ) { enum Mode { m_block, m_byte, m_truncate }; const char * mode_str[3] = { "block", "byte", "size" }; - Bitset8 bits; // if Bitset8::parse not called test full byte + Bitset8 bits; // if Bitset8::parse_bs not called test full byte Bad_byte bad_byte; const char * zcmp_program = "zcmp"; long pos = 0; @@ -328,19 +404,20 @@ int main( const int argc, const char * const argv[] ) { const int code = parser.code( argind ); if( !code ) break; // no more options + const char * const pn = parser.parsed_name( argind ).c_str(); const char * const arg = parser.argument( argind ).c_str(); switch( code ) { case 'h': show_help(); return 0; - case 'b': if( !bits.parse( arg ) ) return 1; program_mode = m_byte; break; - case 'B': if( arg[0] ) parse_block( arg, block_size, block_value ); + case 'b': if( !bits.parse_bs( arg ) ) return 1; program_mode = m_byte; break; + case 'B': if( arg[0] ) parse_block( arg, pn, block_size, block_value ); program_mode = m_block; break; - case 'd': delta = getnum( arg, block_size, 1, INT_MAX ); break; - case 'e': parse_pos_value( arg, bad_byte ); break; + case 'd': delta = getnum( arg, pn, block_size, 1, INT_MAX ); break; + case 'e': bad_byte.parse_bb( arg, pn ); break; case 'n': verify = false; break; - case 'p': pos = getnum( arg, block_size, -LONG_MAX, LONG_MAX ); break; + case 'p': pos = getnum( arg, pn, block_size, -LONG_MAX, LONG_MAX ); break; case 'q': verbosity = -1; break; - case 's': max_size = getnum( arg, block_size, -LONG_MAX, LONG_MAX ); break; + case 's': max_size = getnum( arg, pn, block_size, -LONG_MAX, LONG_MAX ); break; case 't': program_mode = m_truncate; break; case 'v': if( verbosity < 4 ) ++verbosity; break; case 'V': show_version(); return 0; @@ -349,7 +426,7 @@ int main( const int argc, const char * const argv[] ) } } // end process options - if( argind + 2 != parser.arguments() ) + if( parser.arguments() - argind != 2 ) { if( verbosity >= 0 ) std::fprintf( stderr, "Usage: %s 'lzip -t' file.lz\n", invocation_name ); @@ -358,42 +435,68 @@ int main( const int argc, const char * const argv[] ) if( delta <= 0 ) delta = ( program_mode == m_block ) ? block_size : 1; + const char * const command = parser.argument( argind ).c_str(); + std::vector< std::string > command_args; + if( !word_split( command, command_args ) ) + { show_file_error( command, "Invalid command" ); return 1; } + const char ** const command_argv = new const char *[command_args.size()+1]; + for( unsigned i = 0; i < command_args.size(); ++i ) + command_argv[i] = command_args[i].c_str(); + command_argv[command_args.size()] = 0; + const char * const filename = parser.argument( argind + 1 ).c_str(); long file_size = 0; uint8_t * const buffer = read_file( filename, &file_size ); if( !buffer ) return 1; - const char * const command = parser.argument( argind ).c_str(); - char zcmp_command[1024] = { 0 }; + std::string zcmp_command; + std::vector< std::string > zcmp_args; + const char ** zcmp_argv = 0; if( std::strcmp( zcmp_program, "false" ) != 0 ) - snprintf( zcmp_command, sizeof zcmp_command, "%s '%s' -", - zcmp_program, filename ); + { + zcmp_command = zcmp_program; + zcmp_command += " '"; zcmp_command += filename; zcmp_command += "' -"; + if( !word_split( zcmp_command.c_str(), zcmp_args ) ) + { show_file_error( zcmp_command.c_str(), "Invalid zcmp command" ); + return 1; } + zcmp_argv = new const char *[zcmp_args.size()+1]; + for( unsigned i = 0; i < zcmp_args.size(); ++i ) + zcmp_argv[i] = zcmp_args[i].c_str(); + zcmp_argv[zcmp_args.size()] = 0; + } // verify original file if( verbosity >= 1 ) fprintf( stderr, "Testing file '%s'\n", filename ); if( verify ) { - FILE * f = popen( command, "w" ); - if( !f ) - { show_error( "Can't open pipe to decompressor", errno ); return 1; } - if( (long)std::fwrite( buffer, 1, file_size, f ) != file_size ) - { show_error( "Can't write to decompressor", errno ); return 1; } - if( pclose( f ) != 0 ) + const int ret = fork_and_feed( buffer, file_size, command_argv, true ); + if( ret != 0 ) { if( verbosity >= 0 ) - std::fprintf( stderr, "%s: Can't run '%s'.\n", program_name, command ); + { + if( ret < 0 ) + std::fprintf( stderr, "%s: Can't run '%s'.\n", program_name, command ); + else + std::fprintf( stderr, "%s: \"%s\" failed (%d).\n", + program_name, command, ret ); + } return 1; } - if( zcmp_command[0] ) + if( zcmp_command.size() ) { - f = popen( zcmp_command, "w" ); - if( !f ) - { show_error( "Can't open pipe to zcmp command", errno ); return 1; } - if( (long)std::fwrite( buffer, 1, file_size, f ) != file_size ) - { show_error( "Can't write to zcmp command", errno ); return 1; } - if( pclose( f ) != 0 ) + const int ret = fork_and_feed( buffer, file_size, zcmp_argv, true ); + if( ret != 0 ) { - show_error( "zcmp command failed. Disabling comparisons" ); - zcmp_command[0] = 0; + if( verbosity >= 0 ) + { + if( ret < 0 ) + std::fprintf( stderr, "%s: Can't run '%s'.\n", + program_name, zcmp_command.c_str() ); + else + std::fprintf( stderr, "%s: \"%s\" failed (%d). Disabling comparisons.\n", + program_name, zcmp_command.c_str(), ret ); + } + if( ret < 0 ) return 1; + zcmp_command.clear(); } } } @@ -407,31 +510,32 @@ int main( const int argc, const char * const argv[] ) if( max_size < 0 ) max_size += file_size - pos; const long end = ( ( max_size < file_size - pos ) ? pos + max_size : file_size ); if( bad_byte.pos >= file_size ) - { show_error( "Position of '--set-byte' is beyond end of file." ); - return 1; } + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Position is beyond end of file " + "in option '%s'.\n", program_name, bad_byte.option_name ); + return 1; + } if( bad_byte.pos >= 0 ) buffer[bad_byte.pos] = bad_byte( buffer[bad_byte.pos] ); long positions = 0, decompressions = 0, successes = 0, failed_comparisons = 0; if( program_mode == m_truncate ) for( long i = pos; i < end; i += std::min( delta, end - i ) ) { - if( verbosity >= 0 ) - std::fprintf( stderr, "length %ld\n", i ); + if( verbosity >= 1 ) std::fprintf( stderr, "length %ld\n", i ); ++positions; ++decompressions; - FILE * f = popen( command, "w" ); - if( !f ) { show_error( "Can't open pipe", errno ); return 1; } - std::fwrite( buffer, 1, i, f ); - if( pclose( f ) == 0 ) + const int ret = fork_and_feed( buffer, i, command_argv ); + if( ret < 0 ) return 1; + if( ret == 0 ) { ++successes; if( verbosity >= 0 ) - std::fputs( "passed the test\n", stderr ); - if( zcmp_command[0] ) + std::fprintf( stderr, "length %ld passed the test\n", i ); + if( zcmp_command.size() ) { - f = popen( zcmp_command, "w" ); - if( !f ) { show_error( "Can't open pipe", errno ); return 1; } - std::fwrite( buffer, 1, i, f ); - if( pclose( f ) != 0 ) + const int ret = fork_and_feed( buffer, i, zcmp_argv ); + if( ret < 0 ) return 1; + if( ret > 0 ) { ++failed_comparisons; if( verbosity >= 0 ) @@ -447,25 +551,22 @@ int main( const int argc, const char * const argv[] ) for( long i = pos; i < end; i += std::min( delta, end - i ) ) { const long size = std::min( block_size, file_size - i ); - if( verbosity >= 0 ) - std::fprintf( stderr, "block %ld,%ld\n", i, size ); + if( verbosity >= 1 ) std::fprintf( stderr, "block %ld,%ld\n", i, size ); ++positions; ++decompressions; - FILE * f = popen( command, "w" ); - if( !f ) { show_error( "Can't open pipe", errno ); return 1; } - std::memcpy( block , buffer + i, size ); + std::memcpy( block, buffer + i, size ); std::memset( buffer + i, block_value, size ); - std::fwrite( buffer, 1, file_size, f ); - if( pclose( f ) == 0 ) + const int ret = fork_and_feed( buffer, file_size, command_argv ); + if( ret < 0 ) return 1; + if( ret == 0 ) { ++successes; if( verbosity >= 0 ) - std::fputs( "passed the test\n", stderr ); - if( zcmp_command[0] ) + std::fprintf( stderr, "block %ld,%ld passed the test\n", i, size ); + if( zcmp_command.size() ) { - f = popen( zcmp_command, "w" ); - if( !f ) { show_error( "Can't open pipe", errno ); return 1; } - std::fwrite( buffer, 1, file_size, f ); - if( pclose( f ) != 0 ) + const int ret = fork_and_feed( buffer, file_size, zcmp_argv ); + if( ret < 0 ) return 1; + if( ret > 0 ) { ++failed_comparisons; if( verbosity >= 0 ) @@ -482,8 +583,7 @@ int main( const int argc, const char * const argv[] ) if( verbosity >= 1 ) bits.print(); for( long i = pos; i < end; i += std::min( delta, end - i ) ) { - if( verbosity >= 0 ) - std::fprintf( stderr, "byte %ld\n", i ); + if( verbosity >= 1 ) std::fprintf( stderr, "byte %ld\n", i ); ++positions; const uint8_t byte = buffer[i]; for( int j = 1; j < 256; ++j ) @@ -495,23 +595,21 @@ int main( const int argc, const char * const argv[] ) if( verbosity >= 2 ) std::fprintf( stderr, "0x%02X (0x%02X+0x%02X) ", buffer[i], byte, j ); - FILE * f = popen( command, "w" ); - if( !f ) { show_error( "Can't open pipe", errno ); return 1; } - std::fwrite( buffer, 1, file_size, f ); - if( pclose( f ) == 0 ) + const int ret = fork_and_feed( buffer, file_size, command_argv ); + if( ret < 0 ) return 1; + if( ret == 0 ) { ++successes; if( verbosity >= 0 ) { if( verbosity < 2 ) // else already printed above std::fprintf( stderr, "0x%02X (0x%02X+0x%02X) ", buffer[i], byte, j ); - std::fputs( "passed the test\n", stderr ); } - if( zcmp_command[0] ) + std::fprintf( stderr, "byte %ld passed the test\n", i ); } + if( zcmp_command.size() ) { - f = popen( zcmp_command, "w" ); - if( !f ) { show_error( "Can't open pipe", errno ); return 1; } - std::fwrite( buffer, 1, file_size, f ); - if( pclose( f ) != 0 ) + const int ret = fork_and_feed( buffer, file_size, zcmp_argv ); + if( ret < 0 ) return 1; + if( ret > 0 ) { ++failed_comparisons; if( verbosity >= 0 ) @@ -532,7 +630,7 @@ int main( const int argc, const char * const argv[] ) positions, mode_str[program_mode], decompressions, successes ); if( successes > 0 ) { - if( zcmp_command[0] == 0 ) + if( zcmp_command.empty() ) std::fputs( "\n comparisons disabled\n", stderr ); else if( failed_comparisons > 0 ) std::fprintf( stderr, ", of which\n%8ld comparisons failed\n", |