summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog5
-rw-r--r--Makefile.in19
-rw-r--r--NEWS3
-rwxr-xr-xconfigure2
-rw-r--r--decoder.cc23
-rw-r--r--decoder.h2
-rw-r--r--doc/lziprecover.12
-rw-r--r--doc/lziprecover.info41
-rw-r--r--doc/lziprecover.texi25
-rw-r--r--file_index.cc13
-rw-r--r--file_index.h8
-rw-r--r--lzip.h8
-rw-r--r--main.cc21
-rw-r--r--merge.cc45
-rw-r--r--mtester.cc209
-rw-r--r--mtester.h300
-rw-r--r--range_dec.cc10
-rw-r--r--repair.cc88
-rw-r--r--split.cc4
19 files changed, 685 insertions, 143 deletions
diff --git a/ChangeLog b/ChangeLog
index 1e94bae..85b179e 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2014-05-25 Antonio Diaz Diaz <antonio@gnu.org>
+
+ * Version 1.16-pre2 released.
+ * New class LZ_mtester makes repair much faster.
+
2014-04-05 Antonio Diaz Diaz <antonio@gnu.org>
* Version 1.16-pre1 released.
diff --git a/Makefile.in b/Makefile.in
index 54e61a3..f0181b8 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -6,8 +6,8 @@ INSTALL_DATA = $(INSTALL) -m 644
INSTALL_DIR = $(INSTALL) -d -m 755
SHELL = /bin/sh
-objs = arg_parser.o file_index.o merge.o range_dec.o repair.o split.o \
- decoder.o main.o
+objs = arg_parser.o file_index.o merge.o mtester.o range_dec.o repair.o \
+ split.o decoder.o main.o
unzobjs = arg_parser.o unzcrash.o
@@ -18,22 +18,22 @@ unzobjs = arg_parser.o unzcrash.o
all : $(progname)
$(progname) : $(objs)
- $(CXX) $(LDFLAGS) -o $@ $(objs)
+ $(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $(objs)
$(progname)_profiled : $(objs)
- $(CXX) $(LDFLAGS) -pg -o $@ $(objs)
+ $(CXX) $(CXXFLAGS) $(LDFLAGS) -pg -o $@ $(objs)
unzcrash : $(unzobjs)
- $(CXX) $(LDFLAGS) -o $@ $(unzobjs)
+ $(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $(unzobjs)
main.o : main.cc
- $(CXX) $(CPPFLAGS) $(CXXFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $<
+ $(CXX) $(CXXFLAGS) $(CPPFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $<
unzcrash.o : testsuite/unzcrash.cc
- $(CXX) $(CPPFLAGS) $(CXXFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $<
+ $(CXX) $(CXXFLAGS) $(CPPFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $<
%.o : %.cc
- $(CXX) $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $<
+ $(CXX) $(CXXFLAGS) $(CPPFLAGS) -c -o $@ $<
$(objs) : Makefile
arg_parser.o : arg_parser.h
@@ -41,8 +41,9 @@ decoder.o : lzip.h decoder.h
file_index.o : lzip.h file_index.h
main.o : arg_parser.h lzip.h decoder.h
merge.o : lzip.h decoder.h file_index.h
+mtester.o : lzip.h mtester.h
range_dec.o : lzip.h decoder.h file_index.h
-repair.o : lzip.h file_index.h
+repair.o : lzip.h file_index.h mtester.h
split.o : lzip.h
unzcrash.o : arg_parser.h Makefile
diff --git a/NEWS b/NEWS
index 8066951..bff5bae 100644
--- a/NEWS
+++ b/NEWS
@@ -1,5 +1,8 @@
Changes in version 1.16:
+Repairing of single-byte errors is now about 10 times faster depending
+on file size and position of error.
+
Copying of file dates, permissions, and ownership now behaves like "cp -p".
(If the user ID or the group ID can't be duplicated, the file permission
bits S_ISUID and S_ISGID are cleared).
diff --git a/configure b/configure
index 6701961..92f078c 100755
--- a/configure
+++ b/configure
@@ -6,7 +6,7 @@
# to copy, distribute and modify it.
pkgname=lziprecover
-pkgversion=1.16-pre1
+pkgversion=1.16-pre2
progname=lziprecover
srctrigger=doc/${pkgname}.texi
diff --git a/decoder.cc b/decoder.cc
index c0defc8..75d70d0 100644
--- a/decoder.cc
+++ b/decoder.cc
@@ -54,19 +54,20 @@ void Pretty_print::operator()( const char * const msg, FILE * const f ) const
/* Returns the number of bytes really read.
If (returned value < size) and (errno == 0), means EOF was reached.
*/
-int readblock( const int fd, uint8_t * const buf, const int size )
+long readblock( const int fd, uint8_t * const buf, const long size )
{
- int rest = size;
+ long pos = 0;
errno = 0;
- while( rest > 0 )
+ while( pos < size )
{
- const int n = read( fd, buf + size - rest, rest );
- if( n > 0 ) rest -= n;
+ const int sz = std::min( 65536L, size - pos );
+ const int n = read( fd, buf + pos, sz );
+ if( n > 0 ) pos += n;
else if( n == 0 ) break; // EOF
else if( errno != EINTR ) break;
errno = 0;
}
- return size - rest;
+ return pos;
}
@@ -75,16 +76,16 @@ int readblock( const int fd, uint8_t * const buf, const int size )
*/
int writeblock( const int fd, const uint8_t * const buf, const int size )
{
- int rest = size;
+ int pos = 0;
errno = 0;
- while( rest > 0 )
+ while( pos < size )
{
- const int n = write( fd, buf + size - rest, rest );
- if( n > 0 ) rest -= n;
+ const int n = write( fd, buf + pos, size - pos );
+ if( n > 0 ) pos += n;
else if( n < 0 && errno != EINTR ) break;
errno = 0;
}
- return size - rest;
+ return pos;
}
diff --git a/decoder.h b/decoder.h
index 4230c5a..30c28f5 100644
--- a/decoder.h
+++ b/decoder.h
@@ -281,7 +281,7 @@ public:
stream_pos( 0 ),
crc_( 0xFFFFFFFFU ),
outfd( ofd )
- { buffer[buffer_size-1] = 0; } // prev_byte of first_byte
+ { buffer[buffer_size-1] = 0; } // prev_byte of first byte
~LZ_decoder() { delete[] buffer; }
diff --git a/doc/lziprecover.1 b/doc/lziprecover.1
index 3a6300f..6dbc3ec 100644
--- a/doc/lziprecover.1
+++ b/doc/lziprecover.1
@@ -1,5 +1,5 @@
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.37.1.
-.TH LZIPRECOVER "1" "April 2014" "lziprecover 1.16-pre1" "User Commands"
+.TH LZIPRECOVER "1" "May 2014" "lziprecover 1.16-pre2" "User Commands"
.SH NAME
lziprecover \- recovers data from damaged lzip files
.SH SYNOPSIS
diff --git a/doc/lziprecover.info b/doc/lziprecover.info
index 1248e6f..7bc75f0 100644
--- a/doc/lziprecover.info
+++ b/doc/lziprecover.info
@@ -12,7 +12,7 @@ File: lziprecover.info, Node: Top, Next: Introduction, Up: (dir)
Lziprecover Manual
******************
-This manual is for Lziprecover (version 1.16-pre1, 5 April 2014).
+This manual is for Lziprecover (version 1.16-pre2, 25 May 2014).
* Menu:
@@ -248,17 +248,26 @@ File: lziprecover.info, Node: Repairing files, Next: Merging files, Prev: Inv
3 Repairing files
*****************
-Lziprecover is able to repair files with small errors (up to one byte
-error per member). The error may be located anywhere in the file except
-in the header (first 6 bytes of each member) or in the 'Member size'
-field of the trailer (last 8 bytes of each member). This makes lzip
-files resistant to bit-flip, one of the most common forms of data
+Lziprecover is usually able to repair files with small errors (up to one
+byte error per member). The error may be located anywhere in the file
+except in the header (first 6 bytes of each member) or in the 'Member
+size' field of the trailer (last 8 bytes of each member). This makes
+lzip files resistant to bit-flip, one of the most common forms of data
corruption.
Bit-flip happens when one bit in the file is changed from 0 to 1 or
vice versa. It may be caused by bad RAM or even by natural radiation. I
have seen a case of bit-flip in a file stored in an USB flash drive.
+ Repairing a file can take some time. Small files or files with the
+error located near the beginning can be repaired in a few seconds. But
+repairing a large file compressed with a large dictionary size and with
+the error located far from the beginning, can take hours.
+
+ On the other hand, errors located near the beginning of the file
+cause much more loss of data than errors located near the end. So
+lziprecover repairs more efficiently the worst errors.
+

File: lziprecover.info, Node: Merging files, Next: File format, Prev: Repairing files, Up: Top
@@ -552,16 +561,16 @@ Concept index

Tag Table:
Node: Top226
-Node: Introduction1100
-Node: Invoking lziprecover3858
-Node: Repairing files9296
-Node: Merging files10015
-Node: File format11786
-Node: Examples14296
-Ref: ddrescue-example15497
-Node: Unzcrash16606
-Node: Problems18978
-Node: Concept index19528
+Node: Introduction1099
+Node: Invoking lziprecover3857
+Node: Repairing files9295
+Node: Merging files10485
+Node: File format12256
+Node: Examples14766
+Ref: ddrescue-example15967
+Node: Unzcrash17076
+Node: Problems19448
+Node: Concept index19998

End Tag Table
diff --git a/doc/lziprecover.texi b/doc/lziprecover.texi
index be4fc27..00fbc8e 100644
--- a/doc/lziprecover.texi
+++ b/doc/lziprecover.texi
@@ -6,8 +6,8 @@
@finalout
@c %**end of header
-@set UPDATED 5 April 2014
-@set VERSION 1.16-pre1
+@set UPDATED 25 May 2014
+@set VERSION 1.16-pre2
@dircategory Data Compression
@direntry
@@ -278,17 +278,26 @@ caused lziprecover to panic.
@chapter Repairing files
@cindex repairing files
-Lziprecover is able to repair files with small errors (up to one byte
-error per member). The error may be located anywhere in the file except
-in the header (first 6 bytes of each member) or in the @samp{Member
-size} field of the trailer (last 8 bytes of each member). This makes
-lzip files resistant to bit-flip, one of the most common forms of data
-corruption.
+Lziprecover is usually able to repair files with small errors (up to one
+byte error per member). The error may be located anywhere in the file
+except in the header (first 6 bytes of each member) or in the
+@samp{Member size} field of the trailer (last 8 bytes of each member).
+This makes lzip files resistant to bit-flip, one of the most common
+forms of data corruption.
Bit-flip happens when one bit in the file is changed from 0 to 1 or vice
versa. It may be caused by bad RAM or even by natural radiation. I have
seen a case of bit-flip in a file stored in an USB flash drive.
+Repairing a file can take some time. Small files or files with the error
+located near the beginning can be repaired in a few seconds. But
+repairing a large file compressed with a large dictionary size and with
+the error located far from the beginning, can take hours.
+
+On the other hand, errors located near the beginning of the file cause
+much more loss of data than errors located near the end. So lziprecover
+repairs more efficiently the worst errors.
+
@node Merging files
@chapter Merging files
diff --git a/file_index.cc b/file_index.cc
index cdb4031..b4f5420 100644
--- a/file_index.cc
+++ b/file_index.cc
@@ -28,6 +28,15 @@
#include "file_index.h"
+int seek_read( const int fd, uint8_t * const buf, const int size,
+ const long long pos )
+ {
+ if( lseek( fd, pos, SEEK_SET ) == pos )
+ return readblock( fd, buf, size );
+ return 0;
+ }
+
+
Block Block::split( const long long pos )
{
if( pos > pos_ && pos < end() )
@@ -120,7 +129,7 @@ File_index::File_index( const int infd )
return;
}
std::reverse( member_vector.begin(), member_vector.end() );
- for( unsigned i = 0; i < member_vector.size() - 1; ++i )
+ for( unsigned long i = 0; i < member_vector.size() - 1; ++i )
{
const long long end = member_vector[i].dblock.end();
if( end < 0 || end > INT64_MAX )
@@ -214,7 +223,7 @@ error:
return;
}
std::reverse( member_vector.begin(), member_vector.end() );
- for( unsigned i = 0; i < member_vector.size() - 1; ++i )
+ for( unsigned long i = 0; i < member_vector.size() - 1; ++i )
{
const long long end = member_vector[i].dblock.end();
if( end < 0 || end > INT64_MAX )
diff --git a/file_index.h b/file_index.h
index 8acb60f..7fee1e6 100644
--- a/file_index.h
+++ b/file_index.h
@@ -76,7 +76,7 @@ public:
explicit File_index( const int infd );
File_index( const std::vector< int > & infd_vector, const long long fsize );
- int members() const { return member_vector.size(); }
+ long members() const { return member_vector.size(); }
const std::string & error() const { return error_; }
int retval() const { return retval_; }
@@ -84,7 +84,7 @@ public:
{
if( retval_ || fi.retval_ || isize != fi.isize ||
member_vector.size() != fi.member_vector.size() ) return false;
- for( unsigned i = 0; i < member_vector.size(); ++i )
+ for( unsigned long i = 0; i < member_vector.size(); ++i )
if( member_vector[i] != fi.member_vector[i] ) return false;
return true;
}
@@ -102,8 +102,8 @@ public:
long long file_size() const
{ if( isize >= 0 ) return isize; else return 0; }
- const Block & dblock( const int i ) const
+ const Block & dblock( const long i ) const
{ return member_vector[i].dblock; }
- const Block & mblock( const int i ) const
+ const Block & mblock( const long i ) const
{ return member_vector[i].mblock; }
};
diff --git a/lzip.h b/lzip.h
index 483835f..cd44b42 100644
--- a/lzip.h
+++ b/lzip.h
@@ -279,9 +279,13 @@ inline unsigned long long positive_diff( const unsigned long long x,
// defined in decoder.cc
-int readblock( const int fd, uint8_t * const buf, const int size );
+long readblock( const int fd, uint8_t * const buf, const long size );
int writeblock( const int fd, const uint8_t * const buf, const int size );
+// defined in file_index.cc
+int seek_read( const int fd, uint8_t * const buf, const int size,
+ const long long pos );
+
// defined in main.cc
int open_instream( const char * const name, struct stat * const in_statsp,
const bool no_ofile, const bool reg_only = false );
@@ -314,8 +318,6 @@ int range_decompress( const std::string & input_filename,
const bool force, const bool ignore, const bool to_stdout );
// defined in repair.cc
-int seek_read( const int fd, uint8_t * const buf, const int size,
- const long long pos );
int repair_file( const std::string & input_filename,
const std::string & output_filename, const int verbosity,
const bool force );
diff --git a/main.cc b/main.cc
index e5b1cbc..81a13aa 100644
--- a/main.cc
+++ b/main.cc
@@ -256,17 +256,6 @@ bool open_outstream( const bool force )
}
-bool check_tty( const int infd )
- {
- if( isatty( infd ) )
- {
- show_error( "I won't read compressed data from a terminal.", 0, true );
- return false;
- }
- return true;
- }
-
-
void cleanup_and_fail( const int retval )
{
if( delete_output_on_interrupt )
@@ -591,6 +580,7 @@ int main( const int argc, const char * const argv[] )
if( filenames.back() != "-" ) filenames_given = true;
}
+ try {
switch( program_mode )
{
case m_none: internal_error( "invalid operation." ); break;
@@ -620,6 +610,9 @@ int main( const int argc, const char * const argv[] )
return split_file( filenames[0], default_output_filename, verbosity, force );
case m_test: break;
}
+ }
+ catch( std::bad_alloc ) { show_error( "Not enough memory." ); return 1; }
+ catch( Error e ) { show_error( e.msg, errno ); return 1; }
if( program_mode == m_test )
outfd = -1;
@@ -683,7 +676,11 @@ int main( const int argc, const char * const argv[] )
}
}
- if( !check_tty( infd ) ) return 1;
+ if( isatty( infd ) )
+ {
+ show_error( "I won't read compressed data from a terminal.", 0, true );
+ return 1;
+ }
if( output_filename.size() && !to_stdout && program_mode != m_test )
delete_output_on_interrupt = true;
diff --git a/merge.cc b/merge.cc
index 08a3d0e..95b9318 100644
--- a/merge.cc
+++ b/merge.cc
@@ -75,6 +75,7 @@ void combine( std::vector< Block > & block_vector, std::vector< Block > & bv )
}
+// positions in 'block_vector' are absolute file positions.
bool diff_member( const long long mpos, const long long msize,
const std::vector< int > & infd_vector,
std::vector< Block > & block_vector )
@@ -220,7 +221,7 @@ int open_input_files( const std::vector< std::string > & filenames,
{
const int infd = infd_vector[i];
bool error = false;
- for( int j = 0; j < file_index.members(); ++j )
+ for( long j = 0; j < file_index.members(); ++j )
{
const long long mpos = file_index.mblock( j ).pos();
const long long msize = file_index.mblock( j ).size();
@@ -284,26 +285,21 @@ bool copy_file( const int infd, const int outfd, const long long max_size )
bool try_decompress_member( const int fd, const unsigned long long msize,
long long * failure_posp )
{
- try {
- Range_decoder rdec( fd );
- File_header header;
- rdec.read_data( header.data, File_header::size );
- if( !rdec.finished() && // End Of File
- header.verify_magic() && header.verify_version() &&
- header.dictionary_size() >= min_dictionary_size &&
- header.dictionary_size() <= max_dictionary_size )
- {
- LZ_decoder decoder( header, rdec, -1 );
- Pretty_print dummy( "", -1 );
+ Range_decoder rdec( fd );
+ File_header header;
+ rdec.read_data( header.data, File_header::size );
+ if( !rdec.finished() && // End Of File
+ header.verify_magic() && header.verify_version() &&
+ header.dictionary_size() >= min_dictionary_size &&
+ header.dictionary_size() <= max_dictionary_size )
+ {
+ LZ_decoder decoder( header, rdec, -1 );
+ Pretty_print dummy( "", -1 );
- if( decoder.decode_member( dummy ) == 0 &&
- rdec.member_position() == msize ) return true;
- if( failure_posp ) *failure_posp = rdec.member_position();
- }
+ if( decoder.decode_member( dummy ) == 0 &&
+ rdec.member_position() == msize ) return true;
+ if( failure_posp ) *failure_posp = rdec.member_position();
}
- catch( std::bad_alloc )
- { show_error( "Not enough memory." ); std::exit( 1 ); }
- catch( Error e ) {}
return false;
}
@@ -325,7 +321,7 @@ int merge_files( const std::vector< std::string > & filenames,
if( !copy_file( infd_vector[0], outfd ) ) // copy whole file
cleanup_and_fail( output_filename, outfd, 1 );
- for( int j = 0; j < file_index.members(); ++j )
+ for( long j = 0; j < file_index.members(); ++j )
{
const long long mpos = file_index.mblock( j ).pos();
const long long msize = file_index.mblock( j ).size();
@@ -360,7 +356,7 @@ int merge_files( const std::vector< std::string > & filenames,
if( verbosity >= 1 && file_index.members() > 1 )
{
- std::printf( "Merging member %d\n", j + 1 );
+ std::printf( "Merging member %ld\n", j + 1 );
std::fflush( stdout );
}
const int base_variations = ipow( files, block_vector.size() );
@@ -378,11 +374,10 @@ int merge_files( const std::vector< std::string > & filenames,
{
const int infd = infd_vector[tmp % files];
tmp /= files;
- if( lseek( infd, block_vector[i].pos(), SEEK_SET ) < 0 ||
- lseek( outfd, block_vector[i].pos(), SEEK_SET ) < 0 ||
+ if( !safe_seek( infd, block_vector[i].pos() ) ||
+ !safe_seek( outfd, block_vector[i].pos() ) ||
!copy_file( infd, outfd, block_vector[i].size() ) )
- { show_error( "Error reading output file", errno );
- cleanup_and_fail( output_filename, outfd, 1 ); }
+ cleanup_and_fail( output_filename, outfd, 1 );
}
if( !safe_seek( outfd, mpos ) )
cleanup_and_fail( output_filename, outfd, 1 );
diff --git a/mtester.cc b/mtester.cc
new file mode 100644
index 0000000..3fd2563
--- /dev/null
+++ b/mtester.cc
@@ -0,0 +1,209 @@
+/* Lziprecover - Data recovery tool for lzip files
+ Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#define _FILE_OFFSET_BITS 64
+
+#include <algorithm>
+#include <cerrno>
+#include <climits>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <string>
+#include <vector>
+#include <stdint.h>
+#include <unistd.h>
+
+#include "lzip.h"
+#include "mtester.h"
+
+
+void LZ_mtester::flush_data()
+ {
+ if( pos > stream_pos )
+ {
+ const int size = pos - stream_pos;
+ crc32.update_buf( crc_, buffer + stream_pos, size );
+ if( pos >= buffer_size ) { partial_data_pos += pos; pos = 0; }
+ stream_pos = pos;
+ }
+ }
+
+
+bool LZ_mtester::verify_trailer()
+ {
+ const File_trailer * trailer = rdec.get_trailer();
+ if( !trailer ) return false;
+
+ return ( rdec.code_is_zero() &&
+ trailer->data_crc() == crc() &&
+ trailer->data_size() == data_position() &&
+ trailer->member_size() == (unsigned long)member_position() );
+ }
+
+
+void LZ_mtester::duplicate_buffer()
+ {
+ uint8_t * const tmp = new uint8_t[buffer_size];
+ if( data_position() > 0 )
+ std::memcpy( tmp, buffer, std::min( data_position(),
+ (unsigned long long)buffer_size ) );
+ else tmp[buffer_size-1] = 0; // prev_byte of first byte
+ buffer = tmp;
+ }
+
+
+/* Return value: 0 = OK, 1 = decoder error, 2 = unexpected EOF,
+ 3 = trailer error, 4 = unknown marker found,
+ -1 = pos_limit reached. */
+int LZ_mtester::test_member( const long pos_limit )
+ {
+ if( pos_limit < File_header::size + 5 ) return -1;
+ if( member_position() == File_header::size ) rdec.load();
+ while( !rdec.finished() )
+ {
+ if( member_position() >= pos_limit ) { flush_data(); return -1; }
+ const int pos_state = data_position() & pos_state_mask;
+ if( rdec.decode_bit( bm_match[state()][pos_state] ) == 0 ) // 1st bit
+ {
+ const uint8_t prev_byte = get_prev_byte();
+ if( state.is_char() )
+ {
+ state.set_char1();
+ put_byte( rdec.decode_tree8( bm_literal[get_lit_state(prev_byte)] ) );
+ }
+ else
+ {
+ state.set_char2();
+ put_byte( rdec.decode_matched( bm_literal[get_lit_state(prev_byte)],
+ get_byte( rep0 ) ) );
+ }
+ }
+ else
+ {
+ int len;
+ if( rdec.decode_bit( bm_rep[state()] ) != 0 ) // 2nd bit
+ {
+ if( rdec.decode_bit( bm_rep0[state()] ) != 0 ) // 3rd bit
+ {
+ unsigned distance;
+ if( rdec.decode_bit( bm_rep1[state()] ) == 0 ) // 4th bit
+ distance = rep1;
+ else
+ {
+ if( rdec.decode_bit( bm_rep2[state()] ) == 0 ) // 5th bit
+ distance = rep2;
+ else
+ { distance = rep3; rep3 = rep2; }
+ rep2 = rep1;
+ }
+ rep1 = rep0;
+ rep0 = distance;
+ }
+ else
+ {
+ if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit
+ { state.set_short_rep(); put_byte( get_byte( rep0 ) ); continue; }
+ }
+ state.set_rep();
+ len = min_match_len + rdec.decode_len( rep_len_model, pos_state );
+ }
+ else
+ {
+ const unsigned rep0_saved = rep0;
+ len = min_match_len + rdec.decode_len( match_len_model, pos_state );
+ const int dis_slot = rdec.decode_tree6( bm_dis_slot[get_len_state(len)] );
+ if( dis_slot < start_dis_model ) rep0 = dis_slot;
+ else
+ {
+ const int direct_bits = ( dis_slot >> 1 ) - 1;
+ rep0 = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
+ if( dis_slot < end_dis_model )
+ rep0 += rdec.decode_tree_reversed( bm_dis + rep0 - dis_slot - 1,
+ direct_bits );
+ else
+ {
+ rep0 += rdec.decode( direct_bits - dis_align_bits ) << dis_align_bits;
+ rep0 += rdec.decode_tree_reversed4( bm_align );
+ if( rep0 == 0xFFFFFFFFU ) // Marker found
+ {
+ rep0 = rep0_saved;
+ rdec.normalize();
+ flush_data();
+ if( len == min_match_len ) // End Of Stream marker
+ {
+ if( verify_trailer() ) return 0; else return 3;
+ }
+ return 4;
+ }
+ }
+ }
+ rep3 = rep2; rep2 = rep1; rep1 = rep0_saved;
+ state.set_match();
+ if( rep0 >= dictionary_size || rep0 >= data_position() )
+ { flush_data(); return 1; }
+ }
+ copy_block( rep0, len );
+ }
+ }
+ flush_data();
+ return 2;
+ }
+
+
+uint8_t * read_member( const int infd, const long long mpos,
+ const long long msize )
+ {
+ if( msize <= 0 || msize > LONG_MAX )
+ { show_error( "Member is larger than LONG_MAX." ); return 0; }
+ if( !safe_seek( infd, mpos ) ) return 0;
+ uint8_t * const buffer = new uint8_t[msize];
+
+ if( readblock( infd, buffer, msize ) != msize )
+ { show_error( "Error reading input file", errno );
+ delete[] buffer; return 0; }
+ return buffer;
+ }
+
+
+const LZ_mtester * prepare_master( const uint8_t * const buffer,
+ const long buffer_size,
+ const long pos_limit )
+ {
+ File_header & header = *(File_header *)buffer;
+ const unsigned dictionary_size = header.dictionary_size();
+ if( header.verify_magic() && header.verify_version() &&
+ dictionary_size >= min_dictionary_size &&
+ dictionary_size <= max_dictionary_size )
+ {
+ LZ_mtester * const master =
+ new LZ_mtester( buffer, buffer_size, dictionary_size );
+ if( master->test_member( pos_limit ) == -1 ) return master;
+ delete master;
+ }
+ return 0;
+ }
+
+
+bool test_member_rest( const LZ_mtester & master, long * const failure_posp )
+ {
+ LZ_mtester mtester( master );
+ mtester.duplicate_buffer();
+ if( mtester.test_member() == 0 && mtester.finished() ) return true;
+ if( failure_posp ) *failure_posp = mtester.member_position();
+ return false;
+ }
diff --git a/mtester.h b/mtester.h
new file mode 100644
index 0000000..3ff3fcb
--- /dev/null
+++ b/mtester.h
@@ -0,0 +1,300 @@
+/* Lziprecover - Data recovery tool for lzip files
+ Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+class Range_mtester
+ {
+ const uint8_t * const buffer; // input buffer
+ const long buffer_size;
+ long pos; // current pos in buffer
+ uint32_t code;
+ uint32_t range;
+ bool at_stream_end;
+
+ void operator=( const Range_mtester & ); // declared as private
+
+public:
+ Range_mtester( const uint8_t * const buf, const long buf_size )
+ :
+ buffer( buf ),
+ buffer_size( buf_size ),
+ pos( File_header::size ),
+ code( 0 ),
+ range( 0xFFFFFFFFU ),
+ at_stream_end( false )
+ {}
+
+ void load()
+ {
+ for( int i = 0; i < 5; ++i ) code = (code << 8) | get_byte();
+ code &= range; // make sure that first byte is discarded
+ }
+
+ bool code_is_zero() const { return ( code == 0 ); }
+ bool finished() { return pos >= buffer_size; }
+ long member_position() const { return pos; }
+
+ const File_trailer * get_trailer()
+ {
+ if( buffer_size - pos < File_trailer::size ) return 0;
+ const File_trailer * const p = (File_trailer *)(buffer + pos);
+ pos += File_trailer::size;
+ return p;
+ }
+
+ uint8_t get_byte()
+ {
+ if( finished() ) return 0xAA; // make code != 0
+ return buffer[pos++];
+ }
+
+ void normalize()
+ {
+ if( range <= 0x00FFFFFFU )
+ { range <<= 8; code = (code << 8) | get_byte(); }
+ }
+
+ int decode( const int num_bits )
+ {
+ int symbol = 0;
+ for( int i = num_bits; i > 0; --i )
+ {
+ normalize();
+ range >>= 1;
+// symbol <<= 1;
+// if( code >= range ) { code -= range; symbol |= 1; }
+ const uint32_t mask = 0U - (code < range);
+ code -= range;
+ code += range & mask;
+ symbol = (symbol << 1) + (mask + 1);
+ }
+ return symbol;
+ }
+
+ int decode_bit( Bit_model & bm )
+ {
+ normalize();
+ const uint32_t bound = ( range >> bit_model_total_bits ) * bm.probability;
+ if( code < bound )
+ {
+ range = bound;
+ bm.probability += (bit_model_total - bm.probability) >> bit_model_move_bits;
+ return 0;
+ }
+ else
+ {
+ range -= bound;
+ code -= bound;
+ bm.probability -= bm.probability >> bit_model_move_bits;
+ return 1;
+ }
+ }
+
+ int decode_tree3( Bit_model bm[] )
+ {
+ int symbol = 1;
+ symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
+ symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
+ symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
+ return symbol & 7;
+ }
+
+ int decode_tree6( Bit_model bm[] )
+ {
+ int symbol = 1;
+ symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
+ symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
+ symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
+ symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
+ symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
+ symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
+ return symbol & 0x3F;
+ }
+
+ int decode_tree8( Bit_model bm[] )
+ {
+ int symbol = 1;
+ while( symbol < 0x100 )
+ symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
+ return symbol & 0xFF;
+ }
+
+ int decode_tree_reversed( Bit_model bm[], const int num_bits )
+ {
+ int model = 1;
+ int symbol = 0;
+ for( int i = 0; i < num_bits; ++i )
+ {
+ const bool bit = decode_bit( bm[model] );
+ model <<= 1;
+ if( bit ) { ++model; symbol |= (1 << i); }
+ }
+ return symbol;
+ }
+
+ int decode_tree_reversed4( Bit_model bm[] )
+ {
+ int model = 1;
+ int symbol = decode_bit( bm[model] );
+ model = (model << 1) + symbol;
+ int bit = decode_bit( bm[model] );
+ model = (model << 1) + bit; symbol |= (bit << 1);
+ bit = decode_bit( bm[model] );
+ model = (model << 1) + bit; symbol |= (bit << 2);
+ if( decode_bit( bm[model] ) ) symbol |= 8;
+ return symbol;
+ }
+
+ int decode_matched( Bit_model bm[], int match_byte )
+ {
+ Bit_model * const bm1 = bm + 0x100;
+ int symbol = 1;
+ while( symbol < 0x100 )
+ {
+ match_byte <<= 1;
+ const int match_bit = match_byte & 0x100;
+ const int bit = decode_bit( bm1[match_bit+symbol] );
+ symbol = ( symbol << 1 ) | bit;
+ if( match_bit != bit << 8 )
+ {
+ while( symbol < 0x100 )
+ symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
+ break;
+ }
+ }
+ return symbol & 0xFF;
+ }
+
+ int decode_len( Len_model & lm, const int pos_state )
+ {
+ if( decode_bit( lm.choice1 ) == 0 )
+ return decode_tree3( lm.bm_low[pos_state] );
+ if( decode_bit( lm.choice2 ) == 0 )
+ return len_low_symbols + decode_tree3( lm.bm_mid[pos_state] );
+ return len_low_symbols + len_mid_symbols + decode_tree8( lm.bm_high );
+ }
+ };
+
+
+class LZ_mtester
+ {
+ unsigned long long partial_data_pos;
+ Range_mtester rdec;
+ const unsigned dictionary_size;
+ const int buffer_size;
+ uint8_t * buffer; // output buffer
+ int pos; // current pos in buffer
+ int stream_pos; // first byte not yet written to file
+ uint32_t crc_;
+ unsigned rep0; // rep[0-3] latest four distances
+ unsigned rep1; // used for efficient coding of
+ unsigned rep2; // repeated distances
+ unsigned rep3;
+ State state;
+
+ Bit_model bm_literal[1<<literal_context_bits][0x300];
+ Bit_model bm_match[State::states][pos_states];
+ Bit_model bm_rep[State::states];
+ Bit_model bm_rep0[State::states];
+ Bit_model bm_rep1[State::states];
+ Bit_model bm_rep2[State::states];
+ Bit_model bm_len[State::states][pos_states];
+ Bit_model bm_dis_slot[len_states][1<<dis_slot_bits];
+ Bit_model bm_dis[modeled_distances-end_dis_model];
+ Bit_model bm_align[dis_align_size];
+
+ Len_model match_len_model;
+ Len_model rep_len_model;
+
+ unsigned long long stream_position() const
+ { return partial_data_pos + stream_pos; }
+ void flush_data();
+ bool verify_trailer();
+
+ uint8_t get_prev_byte() const
+ {
+ const int i = ( ( pos > 0 ) ? pos : buffer_size ) - 1;
+ return buffer[i];
+ }
+
+ uint8_t get_byte( const int distance ) const
+ {
+ int i = pos - distance - 1;
+ if( i < 0 ) i += buffer_size;
+ return buffer[i];
+ }
+
+ void put_byte( const uint8_t b )
+ {
+ buffer[pos] = b;
+ if( ++pos >= buffer_size ) flush_data();
+ }
+
+ void copy_block( const int distance, int len )
+ {
+ int i = pos - distance - 1;
+ if( i < 0 ) i += buffer_size;
+ if( len < buffer_size - std::max( pos, i ) && len <= std::abs( pos - i ) )
+ {
+ std::memcpy( buffer + pos, buffer + i, len ); // no wrap, no overlap
+ pos += len;
+ }
+ else for( ; len > 0; --len )
+ {
+ buffer[pos] = buffer[i];
+ if( ++pos >= buffer_size ) flush_data();
+ if( ++i >= buffer_size ) i = 0;
+ }
+ }
+
+ void operator=( const LZ_mtester & ); // declared as private
+
+public:
+ LZ_mtester( const uint8_t * const ibuf, const long ibuf_size,
+ const int dict_size )
+ :
+ partial_data_pos( 0 ),
+ rdec( ibuf, ibuf_size ),
+ dictionary_size( dict_size ),
+ buffer_size( std::max( 65536U, dictionary_size ) ),
+ buffer( new uint8_t[buffer_size] ),
+ pos( 0 ),
+ stream_pos( 0 ),
+ crc_( 0xFFFFFFFFU ),
+ rep0( 0 ),
+ rep1( 0 ),
+ rep2( 0 ),
+ rep3( 0 )
+ { buffer[buffer_size-1] = 0; } // prev_byte of first byte
+
+ ~LZ_mtester() { delete[] buffer; }
+
+ unsigned crc() const { return crc_ ^ 0xFFFFFFFFU; }
+ unsigned long long data_position() const { return partial_data_pos + pos; }
+ bool finished() { return rdec.finished(); }
+ long member_position() const { return rdec.member_position(); }
+
+ void duplicate_buffer();
+ int test_member( const long pos_limit = LONG_MAX );
+ };
+
+
+uint8_t * read_member( const int infd, const long long mpos,
+ const long long msize );
+const LZ_mtester * prepare_master( const uint8_t * const buffer,
+ const long buffer_size,
+ const long pos_limit );
+bool test_member_rest( const LZ_mtester & master, long * const failure_posp = 0 );
diff --git a/range_dec.cc b/range_dec.cc
index 2c6c342..111405d 100644
--- a/range_dec.cc
+++ b/range_dec.cc
@@ -203,13 +203,13 @@ int list_file( const char * const input_filename, const Pretty_print & pp )
if( pp.verbosity() >= 1 && file_index.members() > 1 )
{
- std::printf( " Total members in file = %d.\n", file_index.members() );
+ std::printf( " Total members in file = %ld.\n", file_index.members() );
if( pp.verbosity() >= 2 )
- for( int i = 0; i < file_index.members(); ++i )
+ for( long i = 0; i < file_index.members(); ++i )
{
const Block & db = file_index.dblock( i );
const Block & mb = file_index.mblock( i );
- std::printf( " Member %3d data pos %9llu data size %7llu "
+ std::printf( " Member %3ld data pos %9llu data size %7llu "
"member pos %9llu member size %7llu.\n", i + 1,
db.pos(), db.size(), mb.pos(), mb.size() );
}
@@ -282,13 +282,13 @@ int range_decompress( const std::string & input_filename,
if( outfd < 0 ) return 1; }
int retval = 0;
- for( int i = 0; i < file_index.members(); ++i )
+ for( long i = 0; i < file_index.members(); ++i )
{
const Block & db = file_index.dblock( i );
if( range.overlaps( db ) )
{
if( verbosity >= 3 )
- std::fprintf( stderr, "Decompressing member %3d\n", i + 1 );
+ std::fprintf( stderr, "Decompressing member %3ld\n", i + 1 );
const long long outskip = std::max( 0LL, range.pos() - db.pos() );
const long long outend = std::min( db.size(), range.end() - db.pos() );
const long long mpos = file_index.mblock( i ).pos();
diff --git a/repair.cc b/repair.cc
index 92a417f..0048bcf 100644
--- a/repair.cc
+++ b/repair.cc
@@ -20,6 +20,7 @@
#include <cerrno>
#include <climits>
#include <cstdio>
+#include <cstdlib>
#include <cstring>
#include <string>
#include <vector>
@@ -29,15 +30,7 @@
#include "lzip.h"
#include "file_index.h"
-
-
-int seek_read( const int fd, uint8_t * const buf, const int size,
- const long long pos )
- {
- if( lseek( fd, pos, SEEK_SET ) == pos )
- return readblock( fd, buf, size );
- return 0;
- }
+#include "mtester.h"
int seek_write( const int fd, const uint8_t * const buf, const int size,
@@ -63,7 +56,7 @@ int repair_file( const std::string & input_filename,
{ pp( file_index.error().c_str() ); return file_index.retval(); }
int outfd = -1;
- for( int i = 0; i < file_index.members(); ++i )
+ for( long i = 0; i < file_index.members(); ++i )
{
const long long mpos = file_index.mblock( i ).pos();
const long long msize = file_index.mblock( i ).size();
@@ -76,50 +69,59 @@ int repair_file( const std::string & input_filename,
{ show_error( "Can't repair error in input file." );
cleanup_and_fail( output_filename, outfd, 2 ); }
- if( outfd < 0 ) // first damaged member found
+ if( verbosity >= 1 ) // damaged member found
{
- if( !safe_seek( infd, 0 ) ) return 1;
- outfd = open_outstream_rw( output_filename, force );
- if( outfd < 0 ) { close( infd ); return 1; }
- if( !copy_file( infd, outfd ) ) // copy whole file
- cleanup_and_fail( output_filename, outfd, 1 );
- }
-
- if( verbosity >= 1 )
- {
- std::printf( "Repairing member %d\n", i + 1 );
+ std::printf( "Repairing member %ld (failure pos = %llu)\n",
+ i + 1, mpos + failure_pos );
std::fflush( stdout );
}
- const long long min_pos =
- std::max( (long long)File_header::size, failure_pos - 1000 );
+ uint8_t * const mbuffer = read_member( infd, mpos, msize );
+ if( !mbuffer )
+ cleanup_and_fail( output_filename, outfd, 1 );
+ long pos = failure_pos;
bool done = false;
- for( long long pos = failure_pos; pos >= min_pos && !done ; --pos )
+ while( pos >= File_header::size && pos > failure_pos - 20000 && !done )
{
- if( verbosity >= 1 )
- {
- std::printf( "Trying position %llu \r", mpos + pos );
- std::fflush( stdout );
- }
- uint8_t byte;
- if( seek_read( outfd, &byte, 1, mpos + pos ) != 1 )
- { show_error( "Error reading output file", errno );
- cleanup_and_fail( output_filename, outfd, 1 ); }
- for( int i = 0; i < 256; ++i )
+ const long min_pos = std::max( (long)File_header::size, pos - 1000 );
+ const LZ_mtester * master = prepare_master( mbuffer, msize, min_pos - 16 );
+ if( !master )
+ cleanup_and_fail( output_filename, outfd, 1 );
+ for( ; pos >= min_pos && !done ; --pos )
{
- ++byte;
- if( seek_write( outfd, &byte, 1, mpos + pos ) != 1 ||
- lseek( outfd, mpos, SEEK_SET ) < 0 )
- { show_error( "Error writing output file", errno );
- cleanup_and_fail( output_filename, outfd, 1 ); }
- if( i == 255 ) break;
- if( try_decompress_member( outfd, msize ) )
- { done = true; break; }
+ if( verbosity >= 1 )
+ {
+ std::printf( "Trying position %llu \r", mpos + pos );
+ std::fflush( stdout );
+ }
+ for( int j = 0; j < 256; ++j )
+ {
+ ++mbuffer[pos];
+ if( j == 255 ) break;
+ if( test_member_rest( *master ) )
+ {
+ done = true;
+ if( outfd < 0 ) // first damaged member repaired
+ {
+ if( !safe_seek( infd, 0 ) ) return 1;
+ outfd = open_outstream_rw( output_filename, force );
+ if( outfd < 0 ) { close( infd ); return 1; }
+ if( !copy_file( infd, outfd ) ) // copy whole file
+ cleanup_and_fail( output_filename, outfd, 1 );
+ }
+ if( seek_write( outfd, mbuffer + pos, 1, mpos + pos ) != 1 )
+ { show_error( "Error writing output file", errno );
+ cleanup_and_fail( output_filename, outfd, 1 ); }
+ break;
+ }
+ }
}
+ delete master;
}
+ delete[] mbuffer;
if( verbosity >= 1 ) std::printf( "\n" );
if( !done )
{
- show_error( "Error is larger than 1 byte. Can't repair input file." );
+ show_error( "Can't repair input file. Error is probably larger than 1 byte." );
cleanup_and_fail( output_filename, outfd, 2 );
}
}
diff --git a/split.cc b/split.cc
index 8eafd82..fbf0676 100644
--- a/split.cc
+++ b/split.cc
@@ -129,9 +129,9 @@ int do_split_file( const std::string & input_filename, uint8_t * & base_buffer,
const File_index file_index( infd );
if( file_index.retval() != 0 ) pp( file_index.error().c_str() );
- const int max_members = file_index.retval() ? 999999 : file_index.members();
+ const long max_members = file_index.retval() ? 999999 : file_index.members();
int max_digits = 1;
- for( int i = max_members; i >= 10; i /= 10 ) ++max_digits;
+ for( long i = max_members; i >= 10; i /= 10 ) ++max_digits;
std::string output_filename;
first_filename( input_filename, default_output_filename, output_filename,