diff options
-rw-r--r-- | ChangeLog | 7 | ||||
-rw-r--r-- | NEWS | 26 | ||||
-rwxr-xr-x | configure | 2 | ||||
-rw-r--r-- | decoder.cc | 14 | ||||
-rw-r--r-- | doc/lziprecover.1 | 4 | ||||
-rw-r--r-- | doc/lziprecover.info | 28 | ||||
-rw-r--r-- | doc/lziprecover.texi | 10 | ||||
-rw-r--r-- | file_index.cc | 2 | ||||
-rw-r--r-- | lzip.h | 4 | ||||
-rw-r--r-- | main.cc | 62 | ||||
-rw-r--r-- | merge.cc | 4 | ||||
-rw-r--r-- | mtester.cc | 169 | ||||
-rw-r--r-- | mtester.h | 39 | ||||
-rw-r--r-- | range_dec.cc | 29 | ||||
-rw-r--r-- | repair.cc | 234 | ||||
-rwxr-xr-x | testsuite/check.sh | 10 | ||||
-rw-r--r-- | unzcrash.cc | 6 |
17 files changed, 451 insertions, 199 deletions
@@ -1,3 +1,10 @@ +2015-06-30 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 1.18-pre1 released. + * repair.cc (repair_file): Detect gross damage before repairing. + * repair.cc: Try bytes at offsets 7 and 8 first. + * Added new option '-x, --show-packets'. + 2015-05-28 Antonio Diaz Diaz <antonio@gnu.org> * Version 1.17 released. @@ -1,21 +1,11 @@ -Changes in version 1.17: +Changes in version 1.18: -Merging files now uses an algorithm similar to the ones used to solve -the "Master Mind" game, which makes it much faster. Up to 2 orders of -magnitude faster depending on number of files and number of errors. -Please, report as a bug any files correctly merged by lziprecover 1.16 -that this version can't merge. +"--repair" now tries to detect gross damage in the file before +attempting to repair it. -Repair time has been reduced by 15%. +"--repair" now tries bytes at member offsets 7 and 8 first because +errors in these bytes sometimes can't be detected until the end of the +member. -The new option "-y, --debug-delay", which finds the max error detection -delay in a given range of positions, has been added. - -The new option "-z, --debug-repair", which test repairs a one-byte error -at a given position, has been added. - -The targets "install-compress", "install-strip-compress", -"install-info-compress" and "install-man-compress" have been added to -the Makefile. - -The chapter "File names" has been added to the manual. +The new option "-x, --show-packets", which shows the LZMA packets +(coding sequences) coded in a given file, has been added. @@ -6,7 +6,7 @@ # to copy, distribute and modify it. pkgname=lziprecover -pkgversion=1.17 +pkgversion=1.18-pre1 progname=lziprecover srctrigger=doc/${pkgname}.texi @@ -43,7 +43,7 @@ void Pretty_print::operator()( const char * const msg, FILE * const f ) const first_post = false; std::fprintf( f, " %s: ", name_.c_str() ); for( unsigned i = 0; i < longest_name - name_.size(); ++i ) - std::fprintf( f, " " ); + std::fputc( ' ', f ); if( !msg ) std::fflush( f ); } if( msg ) std::fprintf( f, "%s\n", msg ); @@ -154,7 +154,7 @@ bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const if( pp.verbosity() >= 0 ) { pp(); - std::fprintf( stderr, "CRC mismatch; trailer says %08X, data CRC is %08X.\n", + std::fprintf( stderr, "CRC mismatch; trailer says %08X, data CRC is %08X\n", trailer.data_crc(), crc() ); } } @@ -164,7 +164,7 @@ bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const if( pp.verbosity() >= 0 ) { pp(); - std::fprintf( stderr, "Data size mismatch; trailer says %llu, data size is %llu (0x%llX).\n", + std::fprintf( stderr, "Data size mismatch; trailer says %llu, data size is %llu (0x%llX)\n", trailer.data_size(), data_position(), data_position() ); } } @@ -174,7 +174,7 @@ bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const if( pp.verbosity() >= 0 ) { pp(); - std::fprintf( stderr, "Member size mismatch; trailer says %llu, member size is %llu (0x%llX).\n", + std::fprintf( stderr, "Member size mismatch; trailer says %llu, member size is %llu (0x%llX)\n", trailer.member_size(), member_size, member_size ); } } @@ -231,7 +231,7 @@ int LZ_decoder::decode_member( const Pretty_print & pp ) peek( rep0 ) ) ); } } - else + else /* match or repeated match */ { int len; if( rdec.decode_bit( bm_rep[state()] ) != 0 ) // 2nd bit @@ -260,7 +260,7 @@ int LZ_decoder::decode_member( const Pretty_print & pp ) state.set_rep(); len = min_match_len + rdec.decode_len( rep_len_model, pos_state ); } - else + else /* match */ { const unsigned rep0_saved = rep0; len = min_match_len + rdec.decode_len( match_len_model, pos_state ); @@ -293,7 +293,7 @@ int LZ_decoder::decode_member( const Pretty_print & pp ) if( pp.verbosity() >= 0 ) { pp(); - std::fprintf( stderr, "Unsupported marker code '%d'.\n", len ); + std::fprintf( stderr, "Unsupported marker code '%d'\n", len ); } return 4; } diff --git a/doc/lziprecover.1 b/doc/lziprecover.1 index b420b76..99b61dd 100644 --- a/doc/lziprecover.1 +++ b/doc/lziprecover.1 @@ -1,5 +1,5 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.46.1. -.TH LZIPRECOVER "1" "May 2015" "lziprecover 1.17" "User Commands" +.TH LZIPRECOVER "1" "June 2015" "lziprecover 1.18-pre1" "User Commands" .SH NAME lziprecover \- recovers data from damaged lzip files .SH SYNOPSIS @@ -30,7 +30,7 @@ send decompressed output to standard output decompress .TP \fB\-D\fR, \fB\-\-range\-decompress=\fR<range> -decompress only a range of bytes (N\-M) +decompress a range of bytes (N\-M) to stdout .TP \fB\-f\fR, \fB\-\-force\fR overwrite existing output files diff --git a/doc/lziprecover.info b/doc/lziprecover.info index cb9517a..654e60c 100644 --- a/doc/lziprecover.info +++ b/doc/lziprecover.info @@ -12,7 +12,7 @@ File: lziprecover.info, Node: Top, Next: Introduction, Up: (dir) Lziprecover Manual ****************** -This manual is for Lziprecover (version 1.17, 28 May 2015). +This manual is for Lziprecover (version 1.18-pre1, 30 June 2015). * Menu: @@ -274,7 +274,7 @@ files::), if at least one backup copy of the file is made. separate media. How does lzip compare with gzip and bzip2 with respect to data -safety? Lets suppose that you made a backup copy of your valuable +safety? Lets suppose that you made a backup of your valuable scientific data, compressed it, and stored two copies on separate media. Years later you notice that both copies are corrupt. @@ -652,18 +652,18 @@ Concept index Tag Table: Node: Top231 -Node: Introduction1208 -Node: Invoking lziprecover4304 -Node: Data safety9737 -Node: Repairing files11666 -Node: Merging files13568 -Node: File names15409 -Node: File format15873 -Node: Examples18277 -Ref: ddrescue-example19523 -Node: Unzcrash20779 -Node: Problems23333 -Node: Concept index23885 +Node: Introduction1214 +Node: Invoking lziprecover4310 +Node: Data safety9743 +Node: Repairing files11667 +Node: Merging files13569 +Node: File names15410 +Node: File format15874 +Node: Examples18278 +Ref: ddrescue-example19524 +Node: Unzcrash20780 +Node: Problems23334 +Node: Concept index23886 End Tag Table diff --git a/doc/lziprecover.texi b/doc/lziprecover.texi index 3f6e0aa..29045e7 100644 --- a/doc/lziprecover.texi +++ b/doc/lziprecover.texi @@ -6,8 +6,8 @@ @finalout @c %**end of header -@set UPDATED 28 May 2015 -@set VERSION 1.17 +@set UPDATED 30 June 2015 +@set VERSION 1.18-pre1 @dircategory Data Compression @direntry @@ -302,9 +302,9 @@ The only remedy for total device failure is storing backup copies in separate media. How does lzip compare with gzip and bzip2 with respect to data safety? -Lets suppose that you made a backup copy of your valuable scientific -data, compressed it, and stored two copies on separate media. Years -later you notice that both copies are corrupt. +Lets suppose that you made a backup of your valuable scientific data, +compressed it, and stored two copies on separate media. Years later you +notice that both copies are corrupt. If you compressed with gzip and both copies suffer any damage in the data stream, even if it is just one altered bit, the original data can't diff --git a/file_index.cc b/file_index.cc index 49511bf..a1a0f30 100644 --- a/file_index.cc +++ b/file_index.cc @@ -40,7 +40,7 @@ int seek_read( const int fd, uint8_t * const buf, const int size, void File_index::set_errno_error( const char * const msg ) { - error_ = msg; error_ += std::strerror( errno ); error_ += '.'; + error_ = msg; error_ += std::strerror( errno ); retval_ = 1; } @@ -289,6 +289,7 @@ int seek_read( const int fd, uint8_t * const buf, const int size, // defined in main.cc int open_instream( const char * const name, struct stat * const in_statsp, const bool no_ofile, const bool reg_only = false ); +bool file_exists( const std::string & filename ); int open_outstream_rw( const std::string & output_filename, const bool force ); void show_header( const unsigned dictionary_size ); void show_error( const char * const msg, const int errcode = 0, @@ -322,6 +323,9 @@ int repair_file( const std::string & input_filename, const bool force ); int debug_repair( const std::string & input_filename, const long long bad_pos, const int verbosity, const uint8_t bad_value ); +int debug_show_packets( const std::string & input_filename, + const long long bad_pos, const int verbosity, + const uint8_t bad_value ); // defined in split.cc int split_file( const std::string & input_filename, @@ -79,7 +79,7 @@ struct { const char * from; const char * to; } const known_extensions[] = { { 0, 0 } }; enum Mode { m_none, m_debug_delay, m_debug_repair, m_decompress, m_list, - m_merge, m_range_dec, m_repair, m_split, m_test }; + m_merge, m_range_dec, m_repair, m_show_packets, m_split, m_test }; std::string output_filename; int outfd = -1; @@ -106,7 +106,7 @@ void show_help() " -V, --version output version information and exit\n" " -c, --stdout send decompressed output to standard output\n" " -d, --decompress decompress\n" - " -D, --range-decompress=<range> decompress only a range of bytes (N-M)\n" + " -D, --range-decompress=<range> decompress a range of bytes (N-M) to stdout\n" " -f, --force overwrite existing output files\n" " -i, --ignore-errors make '--range-decompress' ignore data errors\n" " -k, --keep keep (don't delete) input files\n" @@ -120,8 +120,9 @@ void show_help() " -v, --verbose be verbose (a 2nd -v gives more)\n" ); if( verbosity >= 1 ) { - std::printf( " -y, --debug-delay=<range> find max error detection delay in <range>\n" - " -z, --debug-repair=<pos>,<val> test repair one-byte error at <pos>\n" ); + std::printf( " -x, --show-packets[=<pos>,<val>] show in stdout the decoded LZMA packets\n" + " -y, --debug-delay=<range> find max error detection delay in <range>\n" + " -z, --debug-repair=<pos>,<val> test repair one-byte error at <pos>\n" ); } std::printf( "Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n" "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n" @@ -292,7 +293,7 @@ int open_instream( const char * const name, struct stat * const in_statsp, if( infd < 0 ) { if( verbosity >= 0 ) - std::fprintf( stderr, "%s: Can't open input file '%s': %s.\n", + std::fprintf( stderr, "%s: Can't open input file '%s': %s\n", program_name, name, std::strerror( errno ) ); } else @@ -332,7 +333,7 @@ void set_d_outname( const std::string & name, const int i ) } output_filename = name; output_filename += ".out"; if( verbosity >= 1 ) - std::fprintf( stderr, "%s: Can't guess original name for '%s' -- using '%s'.\n", + std::fprintf( stderr, "%s: Can't guess original name for '%s' -- using '%s'\n", program_name, name.c_str(), output_filename.c_str() ); } @@ -349,7 +350,7 @@ bool open_outstream( const bool force ) std::fprintf( stderr, "%s: Output file '%s' already exists, skipping.\n", program_name, output_filename.c_str() ); else - std::fprintf( stderr, "%s: Can't create output file '%s': %s.\n", + std::fprintf( stderr, "%s: Can't create output file '%s': %s\n", program_name, output_filename.c_str(), std::strerror( errno ) ); } return ( outfd >= 0 ); @@ -504,23 +505,20 @@ int decompress( const int infd, const Pretty_print & pp, const bool testing ) if( verbosity >= 0 && result <= 2 ) { pp(); - if( result == 2 ) - std::fprintf( stderr, "File ends unexpectedly at pos %llu.\n", - partial_file_pos ); - else - std::fprintf( stderr, "Decoder error at pos %llu.\n", - partial_file_pos ); + std::fprintf( stderr, "%s at pos %llu\n", ( result == 2 ) ? + "File ends unexpectedly" : "Decoder error", + partial_file_pos ); } retval = 2; break; } if( verbosity >= 2 ) - { std::fprintf( stderr, testing ? "ok\n" : "done\n" ); pp.reset(); } + { std::fputs( testing ? "ok\n" : "done\n", stderr ); pp.reset(); } } } catch( std::bad_alloc ) { pp( "Not enough memory." ); retval = 1; } catch( Error e ) { pp(); show_error( e.msg, errno ); retval = 1; } if( verbosity == 1 && retval == 0 ) - std::fprintf( stderr, testing ? "ok\n" : "done\n" ); + std::fputs( testing ? "ok\n" : "done\n", stderr ); return retval; } @@ -542,6 +540,21 @@ void set_signals() } // end namespace +bool file_exists( const std::string & filename ) + { + struct stat st; + if( stat( filename.c_str(), &st ) == 0 ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Output file '%s' already exists." + " Use '--force' to overwrite it.\n", + program_name, filename.c_str() ); + return true; + } + return false; + } + + int open_outstream_rw( const std::string & output_filename, const bool force ) { int flags = O_CREAT | O_RDWR | O_BINARY; @@ -555,7 +568,7 @@ int open_outstream_rw( const std::string & output_filename, const bool force ) " Use '--force' to overwrite it.\n", program_name, output_filename.c_str() ); else - std::fprintf( stderr, "%s: Can't create output file '%s': %s.\n", + std::fprintf( stderr, "%s: Can't create output file '%s': %s\n", program_name, output_filename.c_str(), std::strerror( errno ) ); } return outfd; @@ -570,8 +583,8 @@ void show_error( const char * const msg, const int errcode, const bool help ) { std::fprintf( stderr, "%s: %s", program_name, msg ); if( errcode > 0 ) - std::fprintf( stderr, ": %s.", std::strerror( errcode ) ); - std::fprintf( stderr, "\n" ); + std::fprintf( stderr, ": %s", std::strerror( errcode ) ); + std::fputc( '\n', stderr ); } if( help ) std::fprintf( stderr, "Try '%s --help' for more information.\n", @@ -599,7 +612,7 @@ void show_error2( const char * const msg1, const char * const name, int main( const int argc, const char * const argv[] ) { Block range( 0, 0 ); - long long bad_pos = 0; + long long bad_pos = -1; std::string input_filename; std::string default_output_filename; std::vector< std::string > filenames; @@ -631,6 +644,7 @@ int main( const int argc, const char * const argv[] ) { 't', "test", Arg_parser::no }, { 'v', "verbose", Arg_parser::no }, { 'V', "version", Arg_parser::no }, + { 'x', "show-packets", Arg_parser::maybe }, { 'y', "debug-delay", Arg_parser::yes }, { 'z', "debug-repair", Arg_parser::yes }, { 0 , 0, Arg_parser::no } }; @@ -665,6 +679,9 @@ int main( const int argc, const char * const argv[] ) case 't': set_mode( program_mode, m_test ); break; case 'v': if( verbosity < 4 ) ++verbosity; break; case 'V': show_version(); return 0; + case 'x': set_mode( program_mode, m_show_packets ); + if( arg.size() ) + parse_pos_value( arg.c_str(), bad_pos, bad_value ); break; case 'y': set_mode( program_mode, m_debug_delay ); parse_range( arg.c_str(), range ); break; case 'z': set_mode( program_mode, m_debug_repair ); @@ -720,8 +737,11 @@ int main( const int argc, const char * const argv[] ) one_file( filenames.size() ); if( default_output_filename.empty() ) default_output_filename = insert_fixed( filenames[0] ); - return repair_file( filenames[0], default_output_filename, - verbosity, force ); + return repair_file( filenames[0], default_output_filename, verbosity, + force ); + case m_show_packets: + one_file( filenames.size() ); + return debug_show_packets( filenames[0], bad_pos, verbosity, bad_value ); case m_split: one_file( filenames.size() ); return split_file( filenames[0], default_output_filename, verbosity, force ); @@ -480,12 +480,12 @@ int merge_files( const std::vector< std::string > & filenames, { done = try_merge_member( mpos, msize, block_vector, color_vector, infd_vector, output_filename, outfd, verbosity ); - if( !done && verbosity >= 1 ) std::fputs( "\n", stdout ); + if( !done && verbosity >= 1 ) std::fputc( '\n', stdout ); } if( !done ) done = try_merge_member1( mpos, msize, block_vector, color_vector, infd_vector, output_filename, outfd, verbosity ); - if( verbosity >= 1 ) std::fputs( "\n", stdout ); + if( verbosity >= 1 ) std::fputc( '\n', stdout ); if( !done ) { if( verbosity >= 2 ) @@ -32,6 +32,24 @@ #include "mtester.h" +namespace { + +const char * format_byte( const uint8_t byte ) + { + enum { buffers = 8, bufsize = 16 }; + static char buffer[buffers][bufsize]; // circle of static buffers for printf + static int current = 0; + char * const buf = buffer[current++]; current %= buffers; + if( ( byte >= 0x20 && byte <= 0x7E ) || byte >= 0xA0 ) + snprintf( buf, bufsize, "'%c' (0x%02X)", byte, byte ); + else + snprintf( buf, bufsize, " (0x%02X)", byte ); + return buf; + } + +} // end namespace + + void LZ_mtester::flush_data() { if( pos > stream_pos ) @@ -56,6 +74,19 @@ bool LZ_mtester::verify_trailer() } +void LZ_mtester::print_block( const int len ) + { + std::fputs( " \"", stdout ); + for( int i = len - 1; i >= 0; --i ) + { + uint8_t byte = peek( i ); + if( byte < 0x20 || ( byte > 0x7E && byte < 0xA0 ) ) byte = '.'; + std::fputc( byte, stdout ); + } + std::fputs( "\"\n", stdout ); + } + + void LZ_mtester::duplicate_buffer() { uint8_t * const tmp = new uint8_t[buffer_size]; @@ -80,7 +111,7 @@ int LZ_mtester::test_member( const long pos_limit ) const int pos_state = data_position() & pos_state_mask; if( rdec.decode_bit( bm_match[state()][pos_state] ) == 0 ) // 1st bit { - const uint8_t prev_byte = get_prev_byte(); + const uint8_t prev_byte = peek_prev(); if( state.is_char() ) { state.set_char1(); @@ -90,7 +121,7 @@ int LZ_mtester::test_member( const long pos_limit ) { state.set_char2(); put_byte( rdec.decode_matched( bm_literal[get_lit_state(prev_byte)], - get_byte( rep0 ) ) ); + peek( rep0 ) ) ); } } else @@ -117,7 +148,7 @@ int LZ_mtester::test_member( const long pos_limit ) else { if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit - { state.set_short_rep(); put_byte( get_byte( rep0 ) ); continue; } + { state.set_short_rep(); put_byte( peek( rep0 ) ); continue; } } state.set_rep(); len = min_match_len + rdec.decode_len( rep_len_model, pos_state ); @@ -165,6 +196,136 @@ int LZ_mtester::test_member( const long pos_limit ) } +/* Return value: 0 = OK, 1 = decoder error, 2 = unexpected EOF, + 3 = trailer error, 4 = unknown marker found. */ +int LZ_mtester::debug_decode_member( const long long dpos, const long long mpos, + const bool show_packets ) + { + rdec.load(); + while( !rdec.finished() ) + { + const unsigned long long dp = data_position() + dpos; + const unsigned long long mp = member_position() + mpos - 4; + const int pos_state = data_position() & pos_state_mask; + if( rdec.decode_bit( bm_match[state()][pos_state] ) == 0 ) // 1st bit + { + const uint8_t prev_byte = peek_prev(); + if( state.is_char() ) + { + state.set_char1(); + const uint8_t cur_byte = rdec.decode_tree8( bm_literal[get_lit_state(prev_byte)] ); + put_byte( cur_byte ); + if( show_packets ) + std::printf( "%6llu %6llu literal %s\n", + mp, dp, format_byte( cur_byte ) ); + } + else + { + state.set_char2(); + const uint8_t match_byte = peek( rep0 ); + const uint8_t cur_byte = + rdec.decode_matched( bm_literal[get_lit_state(prev_byte)], match_byte ); + put_byte( cur_byte ); + if( show_packets ) + std::printf( "%6llu %6llu literal %s, match byte %6llu %s\n", + mp, dp, format_byte( cur_byte ), dp - rep0 - 1, + format_byte( match_byte ) ); + } + } + else /* match or repeated match */ + { + int len; + if( rdec.decode_bit( bm_rep[state()] ) != 0 ) // 2nd bit + { + int rep = 0; + if( rdec.decode_bit( bm_rep0[state()] ) != 0 ) // 3rd bit + { + unsigned distance; + if( rdec.decode_bit( bm_rep1[state()] ) == 0 ) // 4th bit + { distance = rep1; rep = 1; } + else + { + if( rdec.decode_bit( bm_rep2[state()] ) == 0 ) // 5th bit + { distance = rep2; rep = 2; } + else + { distance = rep3; rep3 = rep2; rep = 3; } + rep2 = rep1; + } + rep1 = rep0; + rep0 = distance; + } + else + { + if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit + { + if( show_packets ) + std::printf( "%6llu %6llu shortrep %s %6u (%6llu)\n", + mp, dp, format_byte( peek( rep0 ) ), + rep0 + 1, dp - rep0 - 1 ); + state.set_short_rep(); put_byte( peek( rep0 ) ); continue; + } + } + state.set_rep(); + len = min_match_len + rdec.decode_len( rep_len_model, pos_state ); + if( show_packets ) + std::printf( "%6llu %6llu rep%c %6u,%3d (%6llu)", + mp, dp, rep + '0', rep0 + 1, len, dp - rep0 - 1 ); + } + else /* match */ + { + const unsigned rep0_saved = rep0; + len = min_match_len + rdec.decode_len( match_len_model, pos_state ); + const int dis_slot = rdec.decode_tree6( bm_dis_slot[get_len_state(len)] ); + if( dis_slot < start_dis_model ) rep0 = dis_slot; + else + { + const int direct_bits = ( dis_slot >> 1 ) - 1; + rep0 = ( 2 | ( dis_slot & 1 ) ) << direct_bits; + if( dis_slot < end_dis_model ) + rep0 += rdec.decode_tree_reversed( bm_dis + rep0 - dis_slot - 1, + direct_bits ); + else + { + rep0 += rdec.decode( direct_bits - dis_align_bits ) << dis_align_bits; + rep0 += rdec.decode_tree_reversed4( bm_align ); + if( rep0 == 0xFFFFFFFFU ) // marker found + { + rep0 = rep0_saved; + rdec.normalize(); + flush_data(); + if( show_packets ) + std::printf( "%6llu %6llu marker code '%d'\n", mp, dp, len ); + if( len == min_match_len ) // End Of Stream marker + { + if( show_packets ) + std::printf( "%6llu %6llu member trailer\n", + mpos + member_position(), dpos + data_position() ); + if( verify_trailer() ) return 0; + if( show_packets ) std::fputs( "trailer error\n", stdout ); + return 3; + } + return 4; + } + } + } + rep3 = rep2; rep2 = rep1; rep1 = rep0_saved; + state.set_match(); + if( show_packets ) + std::printf( "%6llu %6llu match %6u,%3d (%6lld)", + mp, dp, rep0 + 1, len, dp - rep0 - 1 ); + if( rep0 >= dictionary_size || rep0 >= data_position() ) + { flush_data(); if( show_packets ) std::fputc( '\n', stdout ); + return 1; } + } + copy_block( rep0, len ); + if( show_packets ) print_block( len ); + } + } + flush_data(); + return 2; + } + + uint8_t * read_member( const int infd, const long long mpos, const long long msize ) { @@ -184,7 +345,7 @@ const LZ_mtester * prepare_master( const uint8_t * const buffer, const long buffer_size, const long pos_limit ) { - File_header & header = *(File_header *)buffer; + const File_header & header = *(File_header *)buffer; const unsigned dictionary_size = header.dictionary_size(); if( header.verify_magic() && header.verify_version() && dictionary_size >= min_dictionary_size && @@ -37,16 +37,16 @@ public: at_stream_end( false ) {} - void load() - { - for( int i = 0; i < 5; ++i ) code = (code << 8) | get_byte(); - code &= range; // make sure that first byte is discarded - } - bool code_is_zero() const { return ( code == 0 ); } bool finished() { return pos >= buffer_size; } long member_position() const { return pos; } + uint8_t get_byte() + { + if( finished() ) return 0xAA; // make code != 0 + return buffer[pos++]; + } + const File_trailer * get_trailer() { if( buffer_size - pos < File_trailer::size ) return 0; @@ -55,10 +55,10 @@ public: return p; } - uint8_t get_byte() + void load() { - if( finished() ) return 0xAA; // make code != 0 - return buffer[pos++]; + for( int i = 0; i < 5; ++i ) code = (code << 8) | get_byte(); + code &= range; // make sure that first byte is discarded } void normalize() @@ -195,13 +195,13 @@ class LZ_mtester Range_mtester rdec; const unsigned dictionary_size; const int buffer_size; - uint8_t * buffer; // output buffer - int pos; // current pos in buffer - int stream_pos; // first byte not yet written to file + uint8_t * buffer; /* output buffer */ + int pos; /* current pos in buffer */ + int stream_pos; /* first byte not yet written to file */ uint32_t crc_; - unsigned rep0; // rep[0-3] latest four distances - unsigned rep1; // used for efficient coding of - unsigned rep2; // repeated distances + unsigned rep0; /* rep[0-3] latest four distances */ + unsigned rep1; /* used for efficient coding of */ + unsigned rep2; /* repeated distances */ unsigned rep3; State state; @@ -219,18 +219,17 @@ class LZ_mtester Len_model match_len_model; Len_model rep_len_model; - unsigned long long stream_position() const - { return partial_data_pos + stream_pos; } void flush_data(); bool verify_trailer(); + void print_block( const int len ); - uint8_t get_prev_byte() const + uint8_t peek_prev() const { const int i = ( ( pos > 0 ) ? pos : buffer_size ) - 1; return buffer[i]; } - uint8_t get_byte( const int distance ) const + uint8_t peek( const int distance ) const { int i = pos - distance - 1; if( i < 0 ) i += buffer_size; @@ -289,6 +288,8 @@ public: void duplicate_buffer(); int test_member( const long pos_limit = LONG_MAX ); + int debug_decode_member( const long long dpos, const long long mpos, + const bool show_packets ); }; diff --git a/range_dec.cc b/range_dec.cc index 062427a..d4a2b2c 100644 --- a/range_dec.cc +++ b/range_dec.cc @@ -72,16 +72,13 @@ int decompress_member( const int infd, const int outfd, if( pp.verbosity() >= 0 && result <= 2 ) { pp(); - if( result == 2 ) - std::fprintf( stderr, "File ends unexpectedly at pos %llu.\n", - mpos + rdec.member_position() ); - else - std::fprintf( stderr, "Decoder error at pos %llu.\n", - mpos + rdec.member_position() ); + std::fprintf( stderr, "%s at pos %llu\n", ( result == 2 ) ? + "File ends unexpectedly" : "Decoder error", + mpos + rdec.member_position() ); } return 2; } - if( pp.verbosity() >= 2 ) std::fprintf( stderr, "done\n" ); + if( pp.verbosity() >= 2 ) std::fputs( "done\n", stderr ); } catch( std::bad_alloc ) { pp( "Not enough memory." ); return 1; } catch( Error e ) { pp(); show_error( e.msg, errno ); return 1; } @@ -115,7 +112,7 @@ int list_file( const char * const input_filename, const Pretty_print & pp ) if( pp.verbosity() >= 1 && file_index.members() > 1 ) { - std::printf( " Total members in file = %ld.\n", file_index.members() ); + std::printf( " Total members in file = %ld\n", file_index.members() ); if( pp.verbosity() >= 2 ) for( long i = 0; i < file_index.members(); ++i ) { @@ -141,18 +138,21 @@ const char * format_num( unsigned long long num, { "k", "M", "G", "T", "P", "E", "Z", "Y" }; const char * const binary_prefix[8] = { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" }; + enum { buffers = 8, bufsize = 32 }; + static char buffer[buffers][bufsize]; // circle of static buffers for printf + static int current = 0; static bool si = true; - static char buf[32]; if( set_prefix ) si = ( set_prefix > 0 ); const unsigned factor = ( si ? 1000 : 1024 ); + char * const buf = buffer[current++]; current %= buffers; const char * const * prefix = ( si ? si_prefix : binary_prefix ); const char * p = ""; bool exact = ( num % factor == 0 ); for( int i = 0; i < 8 && ( num > limit || ( exact && num >= factor ) ); ++i ) { num /= factor; if( num % factor != 0 ) exact = false; p = prefix[i]; } - snprintf( buf, sizeof buf, "%llu %s", num, p ); + snprintf( buf, bufsize, "%llu %s", num, p ); return buf; } @@ -203,9 +203,10 @@ int range_decompress( const std::string & input_filename, if( verbosity >= 2 ) std::fprintf( stderr, "Decompressed file size = %sB\n", format_num( file_index.data_end() ) ); - std::fprintf( stderr, "Decompressing range %sB", format_num( range.pos() ) ); - std::fprintf( stderr, " to %sB ", format_num( range.pos() + range.size() ) ); - std::fprintf( stderr, "(%sBytes)\n", format_num( range.size() ) ); + std::fprintf( stderr, "Decompressing range %sB to %sB (%sBytes)\n", + format_num( range.pos() ), + format_num( range.pos() + range.size() ), + format_num( range.size() ) ); } int outfd = -1; @@ -241,6 +242,6 @@ int range_decompress( const std::string & input_filename, cleanup_and_fail( output_filename, -1, 1 ); } if( verbosity >= 2 && retval == 0 ) - std::fprintf( stderr, "Byte range decompressed successfully.\n" ); + std::fputs( "Byte range decompressed successfully.\n", stderr ); return retval; } @@ -36,6 +36,23 @@ namespace { +bool gross_damage( const long long msize, const uint8_t * const mbuffer ) + { + enum { maxlen = 6 }; // max number of consecutive identical bytes + long i = File_header::size; + const long end = msize - File_trailer::size - maxlen; + uint8_t byte; + while( i < end ) + { + byte = mbuffer[i]; + int len = 0; // does not count the first byte + while( mbuffer[++i] == byte && ++len < maxlen ) {} + if( len >= maxlen ) return true; + } + return false; + } + + int seek_write( const int fd, const uint8_t * const buf, const int size, const long long pos ) { @@ -44,6 +61,35 @@ int seek_write( const int fd, const uint8_t * const buf, const int size, return 0; } +// Return value: -1 = master failed, 0 = begin reached, >0 = repaired pos +long repair_member( const long long mpos, const long long msize, + uint8_t * const mbuffer, const long begin, const long end, + const int verbosity ) + { + for( long pos = end; pos >= begin && pos > end - 50000; ) + { + const long min_pos = std::max( begin, pos - 100 ); + const LZ_mtester * master = prepare_master( mbuffer, msize, min_pos - 16 ); + if( !master ) return -1; + for( ; pos >= min_pos; --pos ) + { + if( verbosity >= 1 ) + { + std::printf( "Trying position %llu \r", mpos + pos ); + std::fflush( stdout ); + } + for( int j = 0; j < 255; ++j ) + { + ++mbuffer[pos]; + if( test_member_rest( *master ) ) { delete master; return pos; } + } + ++mbuffer[pos]; + } + delete master; + } + return 0; + } + } // end namespace @@ -51,7 +97,7 @@ int repair_file( const std::string & input_filename, const std::string & output_filename, const int verbosity, const bool force ) { - struct stat in_stats; + struct stat in_stats; // not used const int infd = open_instream( input_filename.c_str(), &in_stats, true, true ); if( infd < 0 ) return 1; @@ -60,6 +106,7 @@ int repair_file( const std::string & input_filename, if( file_index.retval() != 0 ) { pp( file_index.error().c_str() ); return file_index.retval(); } + if( !force && file_exists( output_filename ) ) return 1; int outfd = -1; for( long i = 0; i < file_index.members(); ++i ) { @@ -69,7 +116,6 @@ int repair_file( const std::string & input_filename, cleanup_and_fail( output_filename, outfd, 1 ); long long failure_pos = 0; if( try_decompress_member( infd, msize, &failure_pos ) ) continue; - if( failure_pos >= msize - 8 ) failure_pos = msize - 8 - 1; if( failure_pos < File_header::size ) { show_error( "Can't repair error in input file." ); cleanup_and_fail( output_filename, outfd, 2 ); } @@ -80,51 +126,38 @@ int repair_file( const std::string & input_filename, i + 1, file_index.members(), mpos + failure_pos ); std::fflush( stdout ); } + if( failure_pos >= msize - 8 ) failure_pos = msize - 8 - 1; uint8_t * const mbuffer = read_member( infd, mpos, msize ); if( !mbuffer ) cleanup_and_fail( output_filename, outfd, 1 ); - long pos = failure_pos; - bool done = false; - while( pos >= File_header::size && pos > failure_pos - 50000 && !done ) + long pos = 0; + if( !gross_damage( msize, mbuffer ) ) { - const long min_pos = std::max( (long)File_header::size, pos - 100 ); - const LZ_mtester * master = prepare_master( mbuffer, msize, min_pos - 16 ); - if( !master ) - cleanup_and_fail( output_filename, outfd, 1 ); - for( ; pos >= min_pos && !done; --pos ) + pos = repair_member( mpos, msize, mbuffer, File_header::size + 1, + File_header::size + 2, verbosity ); + if( pos == 0 ) + pos = repair_member( mpos, msize, mbuffer, File_header::size + 3, + failure_pos, verbosity ); + } + if( pos < 0 ) + cleanup_and_fail( output_filename, outfd, 1 ); + if( pos > 0 ) + { + if( outfd < 0 ) // first damaged member repaired { - if( verbosity >= 1 ) - { - std::printf( "Trying position %llu \r", mpos + pos ); - std::fflush( stdout ); - } - for( int j = 0; j < 256; ++j ) - { - ++mbuffer[pos]; - if( j == 255 ) break; - if( test_member_rest( *master ) ) - { - done = true; - if( outfd < 0 ) // first damaged member repaired - { - if( !safe_seek( infd, 0 ) ) return 1; - outfd = open_outstream_rw( output_filename, force ); - if( outfd < 0 ) { close( infd ); return 1; } - if( !copy_file( infd, outfd ) ) // copy whole file - cleanup_and_fail( output_filename, outfd, 1 ); - } - if( seek_write( outfd, mbuffer + pos, 1, mpos + pos ) != 1 ) - { show_error( "Error writing output file", errno ); - cleanup_and_fail( output_filename, outfd, 1 ); } - break; - } - } + if( !safe_seek( infd, 0 ) ) return 1; + outfd = open_outstream_rw( output_filename, true ); + if( outfd < 0 ) { close( infd ); return 1; } + if( !copy_file( infd, outfd ) ) // copy whole file + cleanup_and_fail( output_filename, outfd, 1 ); } - delete master; + if( seek_write( outfd, mbuffer + pos, 1, mpos + pos ) != 1 ) + { show_error( "Error writing output file", errno ); + cleanup_and_fail( output_filename, outfd, 1 ); } } delete[] mbuffer; - if( verbosity >= 1 ) std::fputs( "\n", stdout ); - if( !done ) + if( verbosity >= 1 ) std::fputc( '\n', stdout ); + if( pos == 0 ) { show_error( "Can't repair input file. Error is probably larger than 1 byte." ); cleanup_and_fail( output_filename, outfd, 2 ); @@ -151,7 +184,7 @@ int repair_file( const std::string & input_filename, int debug_delay( const std::string & input_filename, Block range, const int verbosity ) { - struct stat in_stats; + struct stat in_stats; // not used const int infd = open_instream( input_filename.c_str(), &in_stats, true, true ); if( infd < 0 ) return 1; @@ -171,15 +204,14 @@ int debug_delay( const std::string & input_filename, Block range, if( !range.overlaps( mb ) ) continue; const long long mpos = file_index.mblock( i ).pos(); const long long msize = file_index.mblock( i ).size(); - if( verbosity >= 1 ) // damaged member found + if( verbosity >= 1 ) { std::printf( "Finding max delay in member %ld of %ld (mpos = %llu, msize = %llu)\n", i + 1, file_index.members(), mpos, msize ); std::fflush( stdout ); } uint8_t * const mbuffer = read_member( infd, mpos, msize ); - if( !mbuffer ) - { show_error( "Can't read member." ); return 1; } + if( !mbuffer ) return 1; long pos = std::max( range.pos() - mpos, File_header::size + 1LL ); const long end = std::min( range.end() - mpos, msize ); long max_delay = 0; @@ -217,7 +249,7 @@ int debug_delay( const std::string & input_filename, Block range, delete master; } delete[] mbuffer; - if( verbosity >= 1 ) std::fputs( "\n", stdout ); + if( verbosity >= 1 ) std::fputc( '\n', stdout ); } if( verbosity >= 1 ) std::fputs( "Done.\n", stdout ); @@ -228,7 +260,7 @@ int debug_delay( const std::string & input_filename, Block range, int debug_repair( const std::string & input_filename, const long long bad_pos, const int verbosity, const uint8_t bad_value ) { - struct stat in_stats; + struct stat in_stats; // not used const int infd = open_instream( input_filename.c_str(), &in_stats, true, true ); if( infd < 0 ) return 1; @@ -247,8 +279,7 @@ int debug_repair( const std::string & input_filename, const long long bad_pos, const long long msize = file_index.mblock( idx ).size(); { long long failure_pos = 0; - if( !safe_seek( infd, mpos ) ) - { show_error( "Can't seek to member." ); return 1; } + if( !safe_seek( infd, mpos ) ) return 1; if( !try_decompress_member( infd, msize, &failure_pos ) ) { if( verbosity >= 0 ) @@ -258,62 +289,101 @@ int debug_repair( const std::string & input_filename, const long long bad_pos, } } uint8_t * const mbuffer = read_member( infd, mpos, msize ); - if( !mbuffer ) - { show_error( "Can't read member." ); return 1; } - const uint8_t good_value = mbuffer[bad_pos]; - mbuffer[bad_pos] = bad_value; + if( !mbuffer ) return 1; + const uint8_t good_value = mbuffer[bad_pos-mpos]; + mbuffer[bad_pos-mpos] = bad_value; long failure_pos = 0; { const LZ_mtester * master = prepare_master( mbuffer, msize, 0 ); if( !master ) - { show_error( "Can't prepare master." ); return 1; } + { show_error( "Can't prepare master." ); delete[] mbuffer; return 1; } if( test_member_rest( *master, &failure_pos ) ) { if( verbosity >= 1 ) std::fputs( "Member decompressed with no errors.\n", stdout ); + delete master; + delete[] mbuffer; return 0; } + delete master; if( verbosity >= 1 ) { std::printf( "Test repairing member %ld of %ld (mpos = %llu, msize = %llu)\n" " (damage pos = %llu (0x%02X->0x%02X), failure pos = %llu)\n", idx + 1, file_index.members(), mpos, msize, - mpos + bad_pos, good_value, bad_value, mpos + failure_pos ); + bad_pos, good_value, bad_value, mpos + failure_pos ); std::fflush( stdout ); } } - long pos = failure_pos; - bool done = false; - while( pos >= File_header::size && pos > failure_pos - 50000 && !done ) + if( failure_pos >= msize ) failure_pos = msize - 1; + long pos = repair_member( mpos, msize, mbuffer, File_header::size + 1, + File_header::size + 2, verbosity ); + if( pos == 0 ) + pos = repair_member( mpos, msize, mbuffer, File_header::size + 3, + failure_pos, verbosity ); + delete[] mbuffer; + if( pos < 0 ) + { show_error( "Can't prepare master." ); return 1; } + if( verbosity >= 1 ) std::fputc( '\n', stdout ); + if( pos == 0 ) internal_error( "can't repair input file." ); + if( verbosity >= 1 ) + std::fputs( "Member repaired successfully.\n", stdout ); + return 0; + } + + +int debug_show_packets( const std::string & input_filename, + const long long bad_pos, const int verbosity, + const uint8_t bad_value ) + { + struct stat in_stats; // not used + const int infd = open_instream( input_filename.c_str(), &in_stats, true, true ); + if( infd < 0 ) return 1; + + Pretty_print pp( input_filename, verbosity ); + const File_index file_index( infd ); + if( file_index.retval() != 0 ) + { pp( file_index.error().c_str() ); return file_index.retval(); } + + int retval = 0; + for( long i = 0; i < file_index.members(); ++i ) { - const long min_pos = std::max( (long)File_header::size, pos - 100 ); - const LZ_mtester * master = prepare_master( mbuffer, msize, min_pos - 16 ); - if( !master ) - { show_error( "Can't prepare master." ); return 1; } - for( ; pos >= min_pos && !done; --pos ) + const long long dpos = file_index.dblock( i ).pos(); + const long long mpos = file_index.mblock( i ).pos(); + const long long msize = file_index.mblock( i ).size(); + if( verbosity >= 1 ) + std::printf( "Decoding LZMA packets in member %ld of %ld (mpos = %llu, msize = %llu)\n" + " mpos dpos\n", + i + 1, file_index.members(), mpos, msize ); + uint8_t * const mbuffer = read_member( infd, mpos, msize ); + if( !mbuffer ) return 1; + const File_header & header = *(File_header *)mbuffer; + const unsigned dictionary_size = header.dictionary_size(); + if( !header.verify_magic() || !header.verify_version() || + dictionary_size < min_dictionary_size || + dictionary_size > max_dictionary_size ) + { show_error( "Header error." ); return 2; } + if( bad_pos >= 0 && file_index.mblock( i ).includes( bad_pos ) ) { if( verbosity >= 1 ) - { - std::printf( "Trying position %llu \r", mpos + pos ); - std::fflush( stdout ); - } - for( int j = 0; j < 256; ++j ) - { - ++mbuffer[pos]; - if( j == 255 ) break; - if( test_member_rest( *master ) ) { done = true; break; } - } + std::printf( "Byte at pos %llu changed from 0x%02X to 0x%02X\n", + bad_pos, mbuffer[bad_pos-mpos], bad_value ); + mbuffer[bad_pos-mpos] = bad_value; } - delete master; - } - delete[] mbuffer; - if( verbosity >= 1 ) std::fputs( "\n", stdout ); - if( !done ) - { - show_error( "Can't repair input file. There is a bug somewhere." ); - return 3; + LZ_mtester mtester( mbuffer, msize, dictionary_size ); + const int result = mtester.debug_decode_member( dpos, mpos, true ); + delete[] mbuffer; + if( result != 0 ) + { + if( verbosity >= 0 && result <= 2 ) + std::printf( "%s at pos %llu\n", ( result == 2 ) ? + "File ends unexpectedly" : "Decoder error", + mpos + mtester.member_position() ); + retval = 2; break; + } + if( i + 1 < file_index.members() ) std::fputc( '\n', stdout ); } - if( verbosity >= 1 ) - std::fputs( "Member repaired successfully.\n", stdout ); - return 0; + + if( verbosity >= 1 ) std::fputs( "Done.\n", stdout ); + return retval; } diff --git a/testsuite/check.sh b/testsuite/check.sh index 1df77a3..8a9c81d 100755 --- a/testsuite/check.sh +++ b/testsuite/check.sh @@ -105,7 +105,6 @@ printf . cat in in > in2 || framework_failure cat "${in_lz}" "${in_lz}" > copy2.lz || framework_failure "${LZIP}" -t copy2.lz || fail=1 -printf . "${LZIP}" -cd copy2.lz > copy2 || fail=1 cmp in2 copy2 || fail=1 printf . @@ -210,19 +209,14 @@ printf . "${LZIPRECOVER}" -mf -o copy.lz "${bad3_lz}" "${bad4_lz}" "${bad5_lz}" || fail=1 cmp "${in_lz}" copy.lz || fail=1 -printf . "${LZIPRECOVER}" -mf -o copy.lz "${bad3_lz}" "${bad5_lz}" "${bad4_lz}" || fail=1 cmp "${in_lz}" copy.lz || fail=1 -printf . "${LZIPRECOVER}" -mf -o copy.lz "${bad4_lz}" "${bad3_lz}" "${bad5_lz}" || fail=1 cmp "${in_lz}" copy.lz || fail=1 -printf . "${LZIPRECOVER}" -mf -o copy.lz "${bad4_lz}" "${bad5_lz}" "${bad3_lz}" || fail=1 cmp "${in_lz}" copy.lz || fail=1 -printf . "${LZIPRECOVER}" -mf -o copy.lz "${bad5_lz}" "${bad3_lz}" "${bad4_lz}" || fail=1 cmp "${in_lz}" copy.lz || fail=1 -printf . "${LZIPRECOVER}" -mf -o copy.lz "${bad5_lz}" "${bad4_lz}" "${bad3_lz}" || fail=1 cmp "${in_lz}" copy.lz || fail=1 printf . @@ -250,6 +244,10 @@ rm -f copy.lz if [ $? = 0 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi "${LZIPRECOVER}" -R -o copy.lz "${bad2_lz}" -q if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi +"${LZIPRECOVER}" -R -o copy.lz "${bad3_lz}" -q +if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi +"${LZIPRECOVER}" -R -o copy.lz "${bad4_lz}" -q +if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi "${LZIPRECOVER}" -Rf -o copy.lz "${f5b1_lz}" || fail=1 cmp "${fox5_lz}" copy.lz || fail=1 "${LZIPRECOVER}" -Rf -o copy.lz "${bad1_lz}" || fail=1 diff --git a/unzcrash.cc b/unzcrash.cc index 49f67aa..50262b9 100644 --- a/unzcrash.cc +++ b/unzcrash.cc @@ -96,7 +96,7 @@ void show_error( const char * const msg, const int errcode = 0, std::fprintf( stderr, "%s: %s", program_name, msg ); if( errcode > 0 ) std::fprintf( stderr, ": %s", std::strerror( errcode ) ); - std::fprintf( stderr, "\n" ); + std::fputc( '\n', stderr ); } if( help ) std::fprintf( stderr, "Try '%s --help' for more information.\n", @@ -293,7 +293,7 @@ int main( const int argc, const char * const argv[] ) if( !f ) { if( verbosity >= 0 ) - std::fprintf( stderr, "Can't open file '%s' for reading\n", + std::fprintf( stderr, "Can't open file '%s' for reading.\n", parser.argument( argind + 1 ).c_str() ); return 1; } @@ -316,7 +316,7 @@ int main( const int argc, const char * const argv[] ) if( wr != size || pclose( f ) != 0 ) { if( verbosity >= 0 ) - std::fprintf( stderr, "Could not run '%s' : %s.\n", + std::fprintf( stderr, "Could not run '%s': %s\n", parser.argument( argind ).c_str(), std::strerror( errno ) ); return 1; } |