From 0763626b1d5a396a8d78985c9a445763686f92f8 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 7 Nov 2015 12:45:50 +0100 Subject: Merging upstream version 1.16~pre2. Signed-off-by: Daniel Baumann --- ChangeLog | 5 + Makefile.in | 19 ++-- NEWS | 3 + configure | 2 +- decoder.cc | 23 ++-- decoder.h | 2 +- doc/lziprecover.1 | 2 +- doc/lziprecover.info | 41 ++++--- doc/lziprecover.texi | 25 +++-- file_index.cc | 13 ++- file_index.h | 8 +- lzip.h | 8 +- main.cc | 21 ++-- merge.cc | 45 ++++---- mtester.cc | 209 +++++++++++++++++++++++++++++++++++ mtester.h | 300 +++++++++++++++++++++++++++++++++++++++++++++++++++ range_dec.cc | 10 +- repair.cc | 88 +++++++-------- split.cc | 4 +- 19 files changed, 685 insertions(+), 143 deletions(-) create mode 100644 mtester.cc create mode 100644 mtester.h diff --git a/ChangeLog b/ChangeLog index 1e94bae..85b179e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +2014-05-25 Antonio Diaz Diaz + + * Version 1.16-pre2 released. + * New class LZ_mtester makes repair much faster. + 2014-04-05 Antonio Diaz Diaz * Version 1.16-pre1 released. diff --git a/Makefile.in b/Makefile.in index 54e61a3..f0181b8 100644 --- a/Makefile.in +++ b/Makefile.in @@ -6,8 +6,8 @@ INSTALL_DATA = $(INSTALL) -m 644 INSTALL_DIR = $(INSTALL) -d -m 755 SHELL = /bin/sh -objs = arg_parser.o file_index.o merge.o range_dec.o repair.o split.o \ - decoder.o main.o +objs = arg_parser.o file_index.o merge.o mtester.o range_dec.o repair.o \ + split.o decoder.o main.o unzobjs = arg_parser.o unzcrash.o @@ -18,22 +18,22 @@ unzobjs = arg_parser.o unzcrash.o all : $(progname) $(progname) : $(objs) - $(CXX) $(LDFLAGS) -o $@ $(objs) + $(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $(objs) $(progname)_profiled : $(objs) - $(CXX) $(LDFLAGS) -pg -o $@ $(objs) + $(CXX) $(CXXFLAGS) $(LDFLAGS) -pg -o $@ $(objs) unzcrash : $(unzobjs) - $(CXX) $(LDFLAGS) -o $@ $(unzobjs) + $(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $(unzobjs) main.o : main.cc - $(CXX) $(CPPFLAGS) $(CXXFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $< + $(CXX) $(CXXFLAGS) $(CPPFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $< unzcrash.o : testsuite/unzcrash.cc - $(CXX) $(CPPFLAGS) $(CXXFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $< + $(CXX) $(CXXFLAGS) $(CPPFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $< %.o : %.cc - $(CXX) $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $< + $(CXX) $(CXXFLAGS) $(CPPFLAGS) -c -o $@ $< $(objs) : Makefile arg_parser.o : arg_parser.h @@ -41,8 +41,9 @@ decoder.o : lzip.h decoder.h file_index.o : lzip.h file_index.h main.o : arg_parser.h lzip.h decoder.h merge.o : lzip.h decoder.h file_index.h +mtester.o : lzip.h mtester.h range_dec.o : lzip.h decoder.h file_index.h -repair.o : lzip.h file_index.h +repair.o : lzip.h file_index.h mtester.h split.o : lzip.h unzcrash.o : arg_parser.h Makefile diff --git a/NEWS b/NEWS index 8066951..bff5bae 100644 --- a/NEWS +++ b/NEWS @@ -1,5 +1,8 @@ Changes in version 1.16: +Repairing of single-byte errors is now about 10 times faster depending +on file size and position of error. + Copying of file dates, permissions, and ownership now behaves like "cp -p". (If the user ID or the group ID can't be duplicated, the file permission bits S_ISUID and S_ISGID are cleared). diff --git a/configure b/configure index 6701961..92f078c 100755 --- a/configure +++ b/configure @@ -6,7 +6,7 @@ # to copy, distribute and modify it. pkgname=lziprecover -pkgversion=1.16-pre1 +pkgversion=1.16-pre2 progname=lziprecover srctrigger=doc/${pkgname}.texi diff --git a/decoder.cc b/decoder.cc index c0defc8..75d70d0 100644 --- a/decoder.cc +++ b/decoder.cc @@ -54,19 +54,20 @@ void Pretty_print::operator()( const char * const msg, FILE * const f ) const /* Returns the number of bytes really read. If (returned value < size) and (errno == 0), means EOF was reached. */ -int readblock( const int fd, uint8_t * const buf, const int size ) +long readblock( const int fd, uint8_t * const buf, const long size ) { - int rest = size; + long pos = 0; errno = 0; - while( rest > 0 ) + while( pos < size ) { - const int n = read( fd, buf + size - rest, rest ); - if( n > 0 ) rest -= n; + const int sz = std::min( 65536L, size - pos ); + const int n = read( fd, buf + pos, sz ); + if( n > 0 ) pos += n; else if( n == 0 ) break; // EOF else if( errno != EINTR ) break; errno = 0; } - return size - rest; + return pos; } @@ -75,16 +76,16 @@ int readblock( const int fd, uint8_t * const buf, const int size ) */ int writeblock( const int fd, const uint8_t * const buf, const int size ) { - int rest = size; + int pos = 0; errno = 0; - while( rest > 0 ) + while( pos < size ) { - const int n = write( fd, buf + size - rest, rest ); - if( n > 0 ) rest -= n; + const int n = write( fd, buf + pos, size - pos ); + if( n > 0 ) pos += n; else if( n < 0 && errno != EINTR ) break; errno = 0; } - return size - rest; + return pos; } diff --git a/decoder.h b/decoder.h index 4230c5a..30c28f5 100644 --- a/decoder.h +++ b/decoder.h @@ -281,7 +281,7 @@ public: stream_pos( 0 ), crc_( 0xFFFFFFFFU ), outfd( ofd ) - { buffer[buffer_size-1] = 0; } // prev_byte of first_byte + { buffer[buffer_size-1] = 0; } // prev_byte of first byte ~LZ_decoder() { delete[] buffer; } diff --git a/doc/lziprecover.1 b/doc/lziprecover.1 index 3a6300f..6dbc3ec 100644 --- a/doc/lziprecover.1 +++ b/doc/lziprecover.1 @@ -1,5 +1,5 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.37.1. -.TH LZIPRECOVER "1" "April 2014" "lziprecover 1.16-pre1" "User Commands" +.TH LZIPRECOVER "1" "May 2014" "lziprecover 1.16-pre2" "User Commands" .SH NAME lziprecover \- recovers data from damaged lzip files .SH SYNOPSIS diff --git a/doc/lziprecover.info b/doc/lziprecover.info index 1248e6f..7bc75f0 100644 --- a/doc/lziprecover.info +++ b/doc/lziprecover.info @@ -12,7 +12,7 @@ File: lziprecover.info, Node: Top, Next: Introduction, Up: (dir) Lziprecover Manual ****************** -This manual is for Lziprecover (version 1.16-pre1, 5 April 2014). +This manual is for Lziprecover (version 1.16-pre2, 25 May 2014). * Menu: @@ -248,17 +248,26 @@ File: lziprecover.info, Node: Repairing files, Next: Merging files, Prev: Inv 3 Repairing files ***************** -Lziprecover is able to repair files with small errors (up to one byte -error per member). The error may be located anywhere in the file except -in the header (first 6 bytes of each member) or in the 'Member size' -field of the trailer (last 8 bytes of each member). This makes lzip -files resistant to bit-flip, one of the most common forms of data +Lziprecover is usually able to repair files with small errors (up to one +byte error per member). The error may be located anywhere in the file +except in the header (first 6 bytes of each member) or in the 'Member +size' field of the trailer (last 8 bytes of each member). This makes +lzip files resistant to bit-flip, one of the most common forms of data corruption. Bit-flip happens when one bit in the file is changed from 0 to 1 or vice versa. It may be caused by bad RAM or even by natural radiation. I have seen a case of bit-flip in a file stored in an USB flash drive. + Repairing a file can take some time. Small files or files with the +error located near the beginning can be repaired in a few seconds. But +repairing a large file compressed with a large dictionary size and with +the error located far from the beginning, can take hours. + + On the other hand, errors located near the beginning of the file +cause much more loss of data than errors located near the end. So +lziprecover repairs more efficiently the worst errors. +  File: lziprecover.info, Node: Merging files, Next: File format, Prev: Repairing files, Up: Top @@ -552,16 +561,16 @@ Concept index  Tag Table: Node: Top226 -Node: Introduction1100 -Node: Invoking lziprecover3858 -Node: Repairing files9296 -Node: Merging files10015 -Node: File format11786 -Node: Examples14296 -Ref: ddrescue-example15497 -Node: Unzcrash16606 -Node: Problems18978 -Node: Concept index19528 +Node: Introduction1099 +Node: Invoking lziprecover3857 +Node: Repairing files9295 +Node: Merging files10485 +Node: File format12256 +Node: Examples14766 +Ref: ddrescue-example15967 +Node: Unzcrash17076 +Node: Problems19448 +Node: Concept index19998  End Tag Table diff --git a/doc/lziprecover.texi b/doc/lziprecover.texi index be4fc27..00fbc8e 100644 --- a/doc/lziprecover.texi +++ b/doc/lziprecover.texi @@ -6,8 +6,8 @@ @finalout @c %**end of header -@set UPDATED 5 April 2014 -@set VERSION 1.16-pre1 +@set UPDATED 25 May 2014 +@set VERSION 1.16-pre2 @dircategory Data Compression @direntry @@ -278,17 +278,26 @@ caused lziprecover to panic. @chapter Repairing files @cindex repairing files -Lziprecover is able to repair files with small errors (up to one byte -error per member). The error may be located anywhere in the file except -in the header (first 6 bytes of each member) or in the @samp{Member -size} field of the trailer (last 8 bytes of each member). This makes -lzip files resistant to bit-flip, one of the most common forms of data -corruption. +Lziprecover is usually able to repair files with small errors (up to one +byte error per member). The error may be located anywhere in the file +except in the header (first 6 bytes of each member) or in the +@samp{Member size} field of the trailer (last 8 bytes of each member). +This makes lzip files resistant to bit-flip, one of the most common +forms of data corruption. Bit-flip happens when one bit in the file is changed from 0 to 1 or vice versa. It may be caused by bad RAM or even by natural radiation. I have seen a case of bit-flip in a file stored in an USB flash drive. +Repairing a file can take some time. Small files or files with the error +located near the beginning can be repaired in a few seconds. But +repairing a large file compressed with a large dictionary size and with +the error located far from the beginning, can take hours. + +On the other hand, errors located near the beginning of the file cause +much more loss of data than errors located near the end. So lziprecover +repairs more efficiently the worst errors. + @node Merging files @chapter Merging files diff --git a/file_index.cc b/file_index.cc index cdb4031..b4f5420 100644 --- a/file_index.cc +++ b/file_index.cc @@ -28,6 +28,15 @@ #include "file_index.h" +int seek_read( const int fd, uint8_t * const buf, const int size, + const long long pos ) + { + if( lseek( fd, pos, SEEK_SET ) == pos ) + return readblock( fd, buf, size ); + return 0; + } + + Block Block::split( const long long pos ) { if( pos > pos_ && pos < end() ) @@ -120,7 +129,7 @@ File_index::File_index( const int infd ) return; } std::reverse( member_vector.begin(), member_vector.end() ); - for( unsigned i = 0; i < member_vector.size() - 1; ++i ) + for( unsigned long i = 0; i < member_vector.size() - 1; ++i ) { const long long end = member_vector[i].dblock.end(); if( end < 0 || end > INT64_MAX ) @@ -214,7 +223,7 @@ error: return; } std::reverse( member_vector.begin(), member_vector.end() ); - for( unsigned i = 0; i < member_vector.size() - 1; ++i ) + for( unsigned long i = 0; i < member_vector.size() - 1; ++i ) { const long long end = member_vector[i].dblock.end(); if( end < 0 || end > INT64_MAX ) diff --git a/file_index.h b/file_index.h index 8acb60f..7fee1e6 100644 --- a/file_index.h +++ b/file_index.h @@ -76,7 +76,7 @@ public: explicit File_index( const int infd ); File_index( const std::vector< int > & infd_vector, const long long fsize ); - int members() const { return member_vector.size(); } + long members() const { return member_vector.size(); } const std::string & error() const { return error_; } int retval() const { return retval_; } @@ -84,7 +84,7 @@ public: { if( retval_ || fi.retval_ || isize != fi.isize || member_vector.size() != fi.member_vector.size() ) return false; - for( unsigned i = 0; i < member_vector.size(); ++i ) + for( unsigned long i = 0; i < member_vector.size(); ++i ) if( member_vector[i] != fi.member_vector[i] ) return false; return true; } @@ -102,8 +102,8 @@ public: long long file_size() const { if( isize >= 0 ) return isize; else return 0; } - const Block & dblock( const int i ) const + const Block & dblock( const long i ) const { return member_vector[i].dblock; } - const Block & mblock( const int i ) const + const Block & mblock( const long i ) const { return member_vector[i].mblock; } }; diff --git a/lzip.h b/lzip.h index 483835f..cd44b42 100644 --- a/lzip.h +++ b/lzip.h @@ -279,9 +279,13 @@ inline unsigned long long positive_diff( const unsigned long long x, // defined in decoder.cc -int readblock( const int fd, uint8_t * const buf, const int size ); +long readblock( const int fd, uint8_t * const buf, const long size ); int writeblock( const int fd, const uint8_t * const buf, const int size ); +// defined in file_index.cc +int seek_read( const int fd, uint8_t * const buf, const int size, + const long long pos ); + // defined in main.cc int open_instream( const char * const name, struct stat * const in_statsp, const bool no_ofile, const bool reg_only = false ); @@ -314,8 +318,6 @@ int range_decompress( const std::string & input_filename, const bool force, const bool ignore, const bool to_stdout ); // defined in repair.cc -int seek_read( const int fd, uint8_t * const buf, const int size, - const long long pos ); int repair_file( const std::string & input_filename, const std::string & output_filename, const int verbosity, const bool force ); diff --git a/main.cc b/main.cc index e5b1cbc..81a13aa 100644 --- a/main.cc +++ b/main.cc @@ -256,17 +256,6 @@ bool open_outstream( const bool force ) } -bool check_tty( const int infd ) - { - if( isatty( infd ) ) - { - show_error( "I won't read compressed data from a terminal.", 0, true ); - return false; - } - return true; - } - - void cleanup_and_fail( const int retval ) { if( delete_output_on_interrupt ) @@ -591,6 +580,7 @@ int main( const int argc, const char * const argv[] ) if( filenames.back() != "-" ) filenames_given = true; } + try { switch( program_mode ) { case m_none: internal_error( "invalid operation." ); break; @@ -620,6 +610,9 @@ int main( const int argc, const char * const argv[] ) return split_file( filenames[0], default_output_filename, verbosity, force ); case m_test: break; } + } + catch( std::bad_alloc ) { show_error( "Not enough memory." ); return 1; } + catch( Error e ) { show_error( e.msg, errno ); return 1; } if( program_mode == m_test ) outfd = -1; @@ -683,7 +676,11 @@ int main( const int argc, const char * const argv[] ) } } - if( !check_tty( infd ) ) return 1; + if( isatty( infd ) ) + { + show_error( "I won't read compressed data from a terminal.", 0, true ); + return 1; + } if( output_filename.size() && !to_stdout && program_mode != m_test ) delete_output_on_interrupt = true; diff --git a/merge.cc b/merge.cc index 08a3d0e..95b9318 100644 --- a/merge.cc +++ b/merge.cc @@ -75,6 +75,7 @@ void combine( std::vector< Block > & block_vector, std::vector< Block > & bv ) } +// positions in 'block_vector' are absolute file positions. bool diff_member( const long long mpos, const long long msize, const std::vector< int > & infd_vector, std::vector< Block > & block_vector ) @@ -220,7 +221,7 @@ int open_input_files( const std::vector< std::string > & filenames, { const int infd = infd_vector[i]; bool error = false; - for( int j = 0; j < file_index.members(); ++j ) + for( long j = 0; j < file_index.members(); ++j ) { const long long mpos = file_index.mblock( j ).pos(); const long long msize = file_index.mblock( j ).size(); @@ -284,26 +285,21 @@ bool copy_file( const int infd, const int outfd, const long long max_size ) bool try_decompress_member( const int fd, const unsigned long long msize, long long * failure_posp ) { - try { - Range_decoder rdec( fd ); - File_header header; - rdec.read_data( header.data, File_header::size ); - if( !rdec.finished() && // End Of File - header.verify_magic() && header.verify_version() && - header.dictionary_size() >= min_dictionary_size && - header.dictionary_size() <= max_dictionary_size ) - { - LZ_decoder decoder( header, rdec, -1 ); - Pretty_print dummy( "", -1 ); + Range_decoder rdec( fd ); + File_header header; + rdec.read_data( header.data, File_header::size ); + if( !rdec.finished() && // End Of File + header.verify_magic() && header.verify_version() && + header.dictionary_size() >= min_dictionary_size && + header.dictionary_size() <= max_dictionary_size ) + { + LZ_decoder decoder( header, rdec, -1 ); + Pretty_print dummy( "", -1 ); - if( decoder.decode_member( dummy ) == 0 && - rdec.member_position() == msize ) return true; - if( failure_posp ) *failure_posp = rdec.member_position(); - } + if( decoder.decode_member( dummy ) == 0 && + rdec.member_position() == msize ) return true; + if( failure_posp ) *failure_posp = rdec.member_position(); } - catch( std::bad_alloc ) - { show_error( "Not enough memory." ); std::exit( 1 ); } - catch( Error e ) {} return false; } @@ -325,7 +321,7 @@ int merge_files( const std::vector< std::string > & filenames, if( !copy_file( infd_vector[0], outfd ) ) // copy whole file cleanup_and_fail( output_filename, outfd, 1 ); - for( int j = 0; j < file_index.members(); ++j ) + for( long j = 0; j < file_index.members(); ++j ) { const long long mpos = file_index.mblock( j ).pos(); const long long msize = file_index.mblock( j ).size(); @@ -360,7 +356,7 @@ int merge_files( const std::vector< std::string > & filenames, if( verbosity >= 1 && file_index.members() > 1 ) { - std::printf( "Merging member %d\n", j + 1 ); + std::printf( "Merging member %ld\n", j + 1 ); std::fflush( stdout ); } const int base_variations = ipow( files, block_vector.size() ); @@ -378,11 +374,10 @@ int merge_files( const std::vector< std::string > & filenames, { const int infd = infd_vector[tmp % files]; tmp /= files; - if( lseek( infd, block_vector[i].pos(), SEEK_SET ) < 0 || - lseek( outfd, block_vector[i].pos(), SEEK_SET ) < 0 || + if( !safe_seek( infd, block_vector[i].pos() ) || + !safe_seek( outfd, block_vector[i].pos() ) || !copy_file( infd, outfd, block_vector[i].size() ) ) - { show_error( "Error reading output file", errno ); - cleanup_and_fail( output_filename, outfd, 1 ); } + cleanup_and_fail( output_filename, outfd, 1 ); } if( !safe_seek( outfd, mpos ) ) cleanup_and_fail( output_filename, outfd, 1 ); diff --git a/mtester.cc b/mtester.cc new file mode 100644 index 0000000..3fd2563 --- /dev/null +++ b/mtester.cc @@ -0,0 +1,209 @@ +/* Lziprecover - Data recovery tool for lzip files + Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#define _FILE_OFFSET_BITS 64 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "lzip.h" +#include "mtester.h" + + +void LZ_mtester::flush_data() + { + if( pos > stream_pos ) + { + const int size = pos - stream_pos; + crc32.update_buf( crc_, buffer + stream_pos, size ); + if( pos >= buffer_size ) { partial_data_pos += pos; pos = 0; } + stream_pos = pos; + } + } + + +bool LZ_mtester::verify_trailer() + { + const File_trailer * trailer = rdec.get_trailer(); + if( !trailer ) return false; + + return ( rdec.code_is_zero() && + trailer->data_crc() == crc() && + trailer->data_size() == data_position() && + trailer->member_size() == (unsigned long)member_position() ); + } + + +void LZ_mtester::duplicate_buffer() + { + uint8_t * const tmp = new uint8_t[buffer_size]; + if( data_position() > 0 ) + std::memcpy( tmp, buffer, std::min( data_position(), + (unsigned long long)buffer_size ) ); + else tmp[buffer_size-1] = 0; // prev_byte of first byte + buffer = tmp; + } + + +/* Return value: 0 = OK, 1 = decoder error, 2 = unexpected EOF, + 3 = trailer error, 4 = unknown marker found, + -1 = pos_limit reached. */ +int LZ_mtester::test_member( const long pos_limit ) + { + if( pos_limit < File_header::size + 5 ) return -1; + if( member_position() == File_header::size ) rdec.load(); + while( !rdec.finished() ) + { + if( member_position() >= pos_limit ) { flush_data(); return -1; } + const int pos_state = data_position() & pos_state_mask; + if( rdec.decode_bit( bm_match[state()][pos_state] ) == 0 ) // 1st bit + { + const uint8_t prev_byte = get_prev_byte(); + if( state.is_char() ) + { + state.set_char1(); + put_byte( rdec.decode_tree8( bm_literal[get_lit_state(prev_byte)] ) ); + } + else + { + state.set_char2(); + put_byte( rdec.decode_matched( bm_literal[get_lit_state(prev_byte)], + get_byte( rep0 ) ) ); + } + } + else + { + int len; + if( rdec.decode_bit( bm_rep[state()] ) != 0 ) // 2nd bit + { + if( rdec.decode_bit( bm_rep0[state()] ) != 0 ) // 3rd bit + { + unsigned distance; + if( rdec.decode_bit( bm_rep1[state()] ) == 0 ) // 4th bit + distance = rep1; + else + { + if( rdec.decode_bit( bm_rep2[state()] ) == 0 ) // 5th bit + distance = rep2; + else + { distance = rep3; rep3 = rep2; } + rep2 = rep1; + } + rep1 = rep0; + rep0 = distance; + } + else + { + if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit + { state.set_short_rep(); put_byte( get_byte( rep0 ) ); continue; } + } + state.set_rep(); + len = min_match_len + rdec.decode_len( rep_len_model, pos_state ); + } + else + { + const unsigned rep0_saved = rep0; + len = min_match_len + rdec.decode_len( match_len_model, pos_state ); + const int dis_slot = rdec.decode_tree6( bm_dis_slot[get_len_state(len)] ); + if( dis_slot < start_dis_model ) rep0 = dis_slot; + else + { + const int direct_bits = ( dis_slot >> 1 ) - 1; + rep0 = ( 2 | ( dis_slot & 1 ) ) << direct_bits; + if( dis_slot < end_dis_model ) + rep0 += rdec.decode_tree_reversed( bm_dis + rep0 - dis_slot - 1, + direct_bits ); + else + { + rep0 += rdec.decode( direct_bits - dis_align_bits ) << dis_align_bits; + rep0 += rdec.decode_tree_reversed4( bm_align ); + if( rep0 == 0xFFFFFFFFU ) // Marker found + { + rep0 = rep0_saved; + rdec.normalize(); + flush_data(); + if( len == min_match_len ) // End Of Stream marker + { + if( verify_trailer() ) return 0; else return 3; + } + return 4; + } + } + } + rep3 = rep2; rep2 = rep1; rep1 = rep0_saved; + state.set_match(); + if( rep0 >= dictionary_size || rep0 >= data_position() ) + { flush_data(); return 1; } + } + copy_block( rep0, len ); + } + } + flush_data(); + return 2; + } + + +uint8_t * read_member( const int infd, const long long mpos, + const long long msize ) + { + if( msize <= 0 || msize > LONG_MAX ) + { show_error( "Member is larger than LONG_MAX." ); return 0; } + if( !safe_seek( infd, mpos ) ) return 0; + uint8_t * const buffer = new uint8_t[msize]; + + if( readblock( infd, buffer, msize ) != msize ) + { show_error( "Error reading input file", errno ); + delete[] buffer; return 0; } + return buffer; + } + + +const LZ_mtester * prepare_master( const uint8_t * const buffer, + const long buffer_size, + const long pos_limit ) + { + File_header & header = *(File_header *)buffer; + const unsigned dictionary_size = header.dictionary_size(); + if( header.verify_magic() && header.verify_version() && + dictionary_size >= min_dictionary_size && + dictionary_size <= max_dictionary_size ) + { + LZ_mtester * const master = + new LZ_mtester( buffer, buffer_size, dictionary_size ); + if( master->test_member( pos_limit ) == -1 ) return master; + delete master; + } + return 0; + } + + +bool test_member_rest( const LZ_mtester & master, long * const failure_posp ) + { + LZ_mtester mtester( master ); + mtester.duplicate_buffer(); + if( mtester.test_member() == 0 && mtester.finished() ) return true; + if( failure_posp ) *failure_posp = mtester.member_position(); + return false; + } diff --git a/mtester.h b/mtester.h new file mode 100644 index 0000000..3ff3fcb --- /dev/null +++ b/mtester.h @@ -0,0 +1,300 @@ +/* Lziprecover - Data recovery tool for lzip files + Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +class Range_mtester + { + const uint8_t * const buffer; // input buffer + const long buffer_size; + long pos; // current pos in buffer + uint32_t code; + uint32_t range; + bool at_stream_end; + + void operator=( const Range_mtester & ); // declared as private + +public: + Range_mtester( const uint8_t * const buf, const long buf_size ) + : + buffer( buf ), + buffer_size( buf_size ), + pos( File_header::size ), + code( 0 ), + range( 0xFFFFFFFFU ), + at_stream_end( false ) + {} + + void load() + { + for( int i = 0; i < 5; ++i ) code = (code << 8) | get_byte(); + code &= range; // make sure that first byte is discarded + } + + bool code_is_zero() const { return ( code == 0 ); } + bool finished() { return pos >= buffer_size; } + long member_position() const { return pos; } + + const File_trailer * get_trailer() + { + if( buffer_size - pos < File_trailer::size ) return 0; + const File_trailer * const p = (File_trailer *)(buffer + pos); + pos += File_trailer::size; + return p; + } + + uint8_t get_byte() + { + if( finished() ) return 0xAA; // make code != 0 + return buffer[pos++]; + } + + void normalize() + { + if( range <= 0x00FFFFFFU ) + { range <<= 8; code = (code << 8) | get_byte(); } + } + + int decode( const int num_bits ) + { + int symbol = 0; + for( int i = num_bits; i > 0; --i ) + { + normalize(); + range >>= 1; +// symbol <<= 1; +// if( code >= range ) { code -= range; symbol |= 1; } + const uint32_t mask = 0U - (code < range); + code -= range; + code += range & mask; + symbol = (symbol << 1) + (mask + 1); + } + return symbol; + } + + int decode_bit( Bit_model & bm ) + { + normalize(); + const uint32_t bound = ( range >> bit_model_total_bits ) * bm.probability; + if( code < bound ) + { + range = bound; + bm.probability += (bit_model_total - bm.probability) >> bit_model_move_bits; + return 0; + } + else + { + range -= bound; + code -= bound; + bm.probability -= bm.probability >> bit_model_move_bits; + return 1; + } + } + + int decode_tree3( Bit_model bm[] ) + { + int symbol = 1; + symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); + symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); + symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); + return symbol & 7; + } + + int decode_tree6( Bit_model bm[] ) + { + int symbol = 1; + symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); + symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); + symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); + symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); + symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); + symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); + return symbol & 0x3F; + } + + int decode_tree8( Bit_model bm[] ) + { + int symbol = 1; + while( symbol < 0x100 ) + symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); + return symbol & 0xFF; + } + + int decode_tree_reversed( Bit_model bm[], const int num_bits ) + { + int model = 1; + int symbol = 0; + for( int i = 0; i < num_bits; ++i ) + { + const bool bit = decode_bit( bm[model] ); + model <<= 1; + if( bit ) { ++model; symbol |= (1 << i); } + } + return symbol; + } + + int decode_tree_reversed4( Bit_model bm[] ) + { + int model = 1; + int symbol = decode_bit( bm[model] ); + model = (model << 1) + symbol; + int bit = decode_bit( bm[model] ); + model = (model << 1) + bit; symbol |= (bit << 1); + bit = decode_bit( bm[model] ); + model = (model << 1) + bit; symbol |= (bit << 2); + if( decode_bit( bm[model] ) ) symbol |= 8; + return symbol; + } + + int decode_matched( Bit_model bm[], int match_byte ) + { + Bit_model * const bm1 = bm + 0x100; + int symbol = 1; + while( symbol < 0x100 ) + { + match_byte <<= 1; + const int match_bit = match_byte & 0x100; + const int bit = decode_bit( bm1[match_bit+symbol] ); + symbol = ( symbol << 1 ) | bit; + if( match_bit != bit << 8 ) + { + while( symbol < 0x100 ) + symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); + break; + } + } + return symbol & 0xFF; + } + + int decode_len( Len_model & lm, const int pos_state ) + { + if( decode_bit( lm.choice1 ) == 0 ) + return decode_tree3( lm.bm_low[pos_state] ); + if( decode_bit( lm.choice2 ) == 0 ) + return len_low_symbols + decode_tree3( lm.bm_mid[pos_state] ); + return len_low_symbols + len_mid_symbols + decode_tree8( lm.bm_high ); + } + }; + + +class LZ_mtester + { + unsigned long long partial_data_pos; + Range_mtester rdec; + const unsigned dictionary_size; + const int buffer_size; + uint8_t * buffer; // output buffer + int pos; // current pos in buffer + int stream_pos; // first byte not yet written to file + uint32_t crc_; + unsigned rep0; // rep[0-3] latest four distances + unsigned rep1; // used for efficient coding of + unsigned rep2; // repeated distances + unsigned rep3; + State state; + + Bit_model bm_literal[1< 0 ) ? pos : buffer_size ) - 1; + return buffer[i]; + } + + uint8_t get_byte( const int distance ) const + { + int i = pos - distance - 1; + if( i < 0 ) i += buffer_size; + return buffer[i]; + } + + void put_byte( const uint8_t b ) + { + buffer[pos] = b; + if( ++pos >= buffer_size ) flush_data(); + } + + void copy_block( const int distance, int len ) + { + int i = pos - distance - 1; + if( i < 0 ) i += buffer_size; + if( len < buffer_size - std::max( pos, i ) && len <= std::abs( pos - i ) ) + { + std::memcpy( buffer + pos, buffer + i, len ); // no wrap, no overlap + pos += len; + } + else for( ; len > 0; --len ) + { + buffer[pos] = buffer[i]; + if( ++pos >= buffer_size ) flush_data(); + if( ++i >= buffer_size ) i = 0; + } + } + + void operator=( const LZ_mtester & ); // declared as private + +public: + LZ_mtester( const uint8_t * const ibuf, const long ibuf_size, + const int dict_size ) + : + partial_data_pos( 0 ), + rdec( ibuf, ibuf_size ), + dictionary_size( dict_size ), + buffer_size( std::max( 65536U, dictionary_size ) ), + buffer( new uint8_t[buffer_size] ), + pos( 0 ), + stream_pos( 0 ), + crc_( 0xFFFFFFFFU ), + rep0( 0 ), + rep1( 0 ), + rep2( 0 ), + rep3( 0 ) + { buffer[buffer_size-1] = 0; } // prev_byte of first byte + + ~LZ_mtester() { delete[] buffer; } + + unsigned crc() const { return crc_ ^ 0xFFFFFFFFU; } + unsigned long long data_position() const { return partial_data_pos + pos; } + bool finished() { return rdec.finished(); } + long member_position() const { return rdec.member_position(); } + + void duplicate_buffer(); + int test_member( const long pos_limit = LONG_MAX ); + }; + + +uint8_t * read_member( const int infd, const long long mpos, + const long long msize ); +const LZ_mtester * prepare_master( const uint8_t * const buffer, + const long buffer_size, + const long pos_limit ); +bool test_member_rest( const LZ_mtester & master, long * const failure_posp = 0 ); diff --git a/range_dec.cc b/range_dec.cc index 2c6c342..111405d 100644 --- a/range_dec.cc +++ b/range_dec.cc @@ -203,13 +203,13 @@ int list_file( const char * const input_filename, const Pretty_print & pp ) if( pp.verbosity() >= 1 && file_index.members() > 1 ) { - std::printf( " Total members in file = %d.\n", file_index.members() ); + std::printf( " Total members in file = %ld.\n", file_index.members() ); if( pp.verbosity() >= 2 ) - for( int i = 0; i < file_index.members(); ++i ) + for( long i = 0; i < file_index.members(); ++i ) { const Block & db = file_index.dblock( i ); const Block & mb = file_index.mblock( i ); - std::printf( " Member %3d data pos %9llu data size %7llu " + std::printf( " Member %3ld data pos %9llu data size %7llu " "member pos %9llu member size %7llu.\n", i + 1, db.pos(), db.size(), mb.pos(), mb.size() ); } @@ -282,13 +282,13 @@ int range_decompress( const std::string & input_filename, if( outfd < 0 ) return 1; } int retval = 0; - for( int i = 0; i < file_index.members(); ++i ) + for( long i = 0; i < file_index.members(); ++i ) { const Block & db = file_index.dblock( i ); if( range.overlaps( db ) ) { if( verbosity >= 3 ) - std::fprintf( stderr, "Decompressing member %3d\n", i + 1 ); + std::fprintf( stderr, "Decompressing member %3ld\n", i + 1 ); const long long outskip = std::max( 0LL, range.pos() - db.pos() ); const long long outend = std::min( db.size(), range.end() - db.pos() ); const long long mpos = file_index.mblock( i ).pos(); diff --git a/repair.cc b/repair.cc index 92a417f..0048bcf 100644 --- a/repair.cc +++ b/repair.cc @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -29,15 +30,7 @@ #include "lzip.h" #include "file_index.h" - - -int seek_read( const int fd, uint8_t * const buf, const int size, - const long long pos ) - { - if( lseek( fd, pos, SEEK_SET ) == pos ) - return readblock( fd, buf, size ); - return 0; - } +#include "mtester.h" int seek_write( const int fd, const uint8_t * const buf, const int size, @@ -63,7 +56,7 @@ int repair_file( const std::string & input_filename, { pp( file_index.error().c_str() ); return file_index.retval(); } int outfd = -1; - for( int i = 0; i < file_index.members(); ++i ) + for( long i = 0; i < file_index.members(); ++i ) { const long long mpos = file_index.mblock( i ).pos(); const long long msize = file_index.mblock( i ).size(); @@ -76,50 +69,59 @@ int repair_file( const std::string & input_filename, { show_error( "Can't repair error in input file." ); cleanup_and_fail( output_filename, outfd, 2 ); } - if( outfd < 0 ) // first damaged member found + if( verbosity >= 1 ) // damaged member found { - if( !safe_seek( infd, 0 ) ) return 1; - outfd = open_outstream_rw( output_filename, force ); - if( outfd < 0 ) { close( infd ); return 1; } - if( !copy_file( infd, outfd ) ) // copy whole file - cleanup_and_fail( output_filename, outfd, 1 ); - } - - if( verbosity >= 1 ) - { - std::printf( "Repairing member %d\n", i + 1 ); + std::printf( "Repairing member %ld (failure pos = %llu)\n", + i + 1, mpos + failure_pos ); std::fflush( stdout ); } - const long long min_pos = - std::max( (long long)File_header::size, failure_pos - 1000 ); + uint8_t * const mbuffer = read_member( infd, mpos, msize ); + if( !mbuffer ) + cleanup_and_fail( output_filename, outfd, 1 ); + long pos = failure_pos; bool done = false; - for( long long pos = failure_pos; pos >= min_pos && !done ; --pos ) + while( pos >= File_header::size && pos > failure_pos - 20000 && !done ) { - if( verbosity >= 1 ) - { - std::printf( "Trying position %llu \r", mpos + pos ); - std::fflush( stdout ); - } - uint8_t byte; - if( seek_read( outfd, &byte, 1, mpos + pos ) != 1 ) - { show_error( "Error reading output file", errno ); - cleanup_and_fail( output_filename, outfd, 1 ); } - for( int i = 0; i < 256; ++i ) + const long min_pos = std::max( (long)File_header::size, pos - 1000 ); + const LZ_mtester * master = prepare_master( mbuffer, msize, min_pos - 16 ); + if( !master ) + cleanup_and_fail( output_filename, outfd, 1 ); + for( ; pos >= min_pos && !done ; --pos ) { - ++byte; - if( seek_write( outfd, &byte, 1, mpos + pos ) != 1 || - lseek( outfd, mpos, SEEK_SET ) < 0 ) - { show_error( "Error writing output file", errno ); - cleanup_and_fail( output_filename, outfd, 1 ); } - if( i == 255 ) break; - if( try_decompress_member( outfd, msize ) ) - { done = true; break; } + if( verbosity >= 1 ) + { + std::printf( "Trying position %llu \r", mpos + pos ); + std::fflush( stdout ); + } + for( int j = 0; j < 256; ++j ) + { + ++mbuffer[pos]; + if( j == 255 ) break; + if( test_member_rest( *master ) ) + { + done = true; + if( outfd < 0 ) // first damaged member repaired + { + if( !safe_seek( infd, 0 ) ) return 1; + outfd = open_outstream_rw( output_filename, force ); + if( outfd < 0 ) { close( infd ); return 1; } + if( !copy_file( infd, outfd ) ) // copy whole file + cleanup_and_fail( output_filename, outfd, 1 ); + } + if( seek_write( outfd, mbuffer + pos, 1, mpos + pos ) != 1 ) + { show_error( "Error writing output file", errno ); + cleanup_and_fail( output_filename, outfd, 1 ); } + break; + } + } } + delete master; } + delete[] mbuffer; if( verbosity >= 1 ) std::printf( "\n" ); if( !done ) { - show_error( "Error is larger than 1 byte. Can't repair input file." ); + show_error( "Can't repair input file. Error is probably larger than 1 byte." ); cleanup_and_fail( output_filename, outfd, 2 ); } } diff --git a/split.cc b/split.cc index 8eafd82..fbf0676 100644 --- a/split.cc +++ b/split.cc @@ -129,9 +129,9 @@ int do_split_file( const std::string & input_filename, uint8_t * & base_buffer, const File_index file_index( infd ); if( file_index.retval() != 0 ) pp( file_index.error().c_str() ); - const int max_members = file_index.retval() ? 999999 : file_index.members(); + const long max_members = file_index.retval() ? 999999 : file_index.members(); int max_digits = 1; - for( int i = max_members; i >= 10; i /= 10 ) ++max_digits; + for( long i = max_members; i >= 10; i /= 10 ) ++max_digits; std::string output_filename; first_filename( input_filename, default_output_filename, output_filename, -- cgit v1.2.3