Adding upstream version 1.16~pre2.upstream/1.16_pre2

Signed-off-by: Daniel Baumann <mail@daniel-baumann.ch>
author: Daniel Baumann <mail@daniel-baumann.ch> 2015-11-07 11:45:45 +0000
committer: Daniel Baumann <mail@daniel-baumann.ch> 2015-11-07 11:45:45 +0000
commit: 5e8398a39d8758cb4dee9a43f92ac958277e0ebd (patch)
tree: 10ba2517467532e4a002f47cc32732f1f335eae0
parent: Adding upstream version 1.16~pre1. (diff)
download: lziprecover-upstream/1.16_pre2.tar.xz
lziprecover-upstream/1.16_pre2.zip
19 files changed, 685 insertions, 143 deletions
diff --git a/ChangeLog b/ChangeLog
index 1e94bae..85b179e 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2014-05-25  Antonio Diaz Diaz  <antonio@gnu.org>
+
+	* Version 1.16-pre2 released.
+	* New class LZ_mtester makes repair much faster.
+
 2014-04-05  Antonio Diaz Diaz  <antonio@gnu.org>
 
 	* Version 1.16-pre1 released.
diff --git a/Makefile.in b/Makefile.in
index 54e61a3..f0181b8 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -6,8 +6,8 @@ INSTALL_DATA = $(INSTALL) -m 644
 INSTALL_DIR = $(INSTALL) -d -m 755
 SHELL = /bin/sh
 
-objs = arg_parser.o file_index.o merge.o range_dec.o repair.o split.o \
-       decoder.o main.o
+objs = arg_parser.o file_index.o merge.o mtester.o range_dec.o repair.o \
+       split.o decoder.o main.o
 unzobjs = arg_parser.o unzcrash.o
 
 
@@ -18,22 +18,22 @@ unzobjs = arg_parser.o unzcrash.o
 all : $(progname)
 
 $(progname) : $(objs)
-	$(CXX) $(LDFLAGS) -o $@ $(objs)
+	$(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $(objs)
 
 $(progname)_profiled : $(objs)
-	$(CXX) $(LDFLAGS) -pg -o $@ $(objs)
+	$(CXX) $(CXXFLAGS) $(LDFLAGS) -pg -o $@ $(objs)
 
 unzcrash : $(unzobjs)
-	$(CXX) $(LDFLAGS) -o $@ $(unzobjs)
+	$(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $(unzobjs)
 
 main.o : main.cc
-	$(CXX) $(CPPFLAGS) $(CXXFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $<
+	$(CXX) $(CXXFLAGS) $(CPPFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $<
 
 unzcrash.o : testsuite/unzcrash.cc
-	$(CXX) $(CPPFLAGS) $(CXXFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $<
+	$(CXX) $(CXXFLAGS) $(CPPFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $<
 
 %.o : %.cc
-	$(CXX) $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $<
+	$(CXX) $(CXXFLAGS) $(CPPFLAGS) -c -o $@ $<
 
 $(objs)      : Makefile
 arg_parser.o : arg_parser.h
@@ -41,8 +41,9 @@ decoder.o    : lzip.h decoder.h
 file_index.o : lzip.h file_index.h
 main.o       : arg_parser.h lzip.h decoder.h
 merge.o      : lzip.h decoder.h file_index.h
+mtester.o    : lzip.h mtester.h
 range_dec.o  : lzip.h decoder.h file_index.h
-repair.o     : lzip.h file_index.h
+repair.o     : lzip.h file_index.h mtester.h
 split.o      : lzip.h
 unzcrash.o   : arg_parser.h Makefile
 
diff --git a/NEWS b/NEWS
index 8066951..bff5bae 100644
--- a/NEWS
+++ b/NEWS
@@ -1,5 +1,8 @@
 Changes in version 1.16:
 
+Repairing of single-byte errors is now about 10 times faster depending
+on file size and position of error.
+
 Copying of file dates, permissions, and ownership now behaves like "cp -p".
 (If the user ID or the group ID can't be duplicated, the file permission
 bits S_ISUID and S_ISGID are cleared).
diff --git a/configure b/configure
index 6701961..92f078c 100755
--- a/configure
+++ b/configure
@@ -6,7 +6,7 @@
 # to copy, distribute and modify it.
 
 pkgname=lziprecover
-pkgversion=1.16-pre1
+pkgversion=1.16-pre2
 progname=lziprecover
 srctrigger=doc/${pkgname}.texi
 
diff --git a/decoder.cc b/decoder.cc
index c0defc8..75d70d0 100644
--- a/decoder.cc
+++ b/decoder.cc
@@ -54,19 +54,20 @@ void Pretty_print::operator()( const char * const msg, FILE * const f ) const
 /* Returns the number of bytes really read.
    If (returned value < size) and (errno == 0), means EOF was reached.
 */
-int readblock( const int fd, uint8_t * const buf, const int size )
+long readblock( const int fd, uint8_t * const buf, const long size )
   {
-  int rest = size;
+  long pos = 0;
   errno = 0;
-  while( rest > 0 )
+  while( pos < size )
     {
-    const int n = read( fd, buf + size - rest, rest );
-    if( n > 0 ) rest -= n;
+    const int sz = std::min( 65536L, size - pos );
+    const int n = read( fd, buf + pos, sz );
+    if( n > 0 ) pos += n;
     else if( n == 0 ) break;				// EOF
     else if( errno != EINTR ) break;
     errno = 0;
     }
-  return size - rest;
+  return pos;
   }
 
 
@@ -75,16 +76,16 @@ int readblock( const int fd, uint8_t * const buf, const int size )
 */
 int writeblock( const int fd, const uint8_t * const buf, const int size )
   {
-  int rest = size;
+  int pos = 0;
   errno = 0;
-  while( rest > 0 )
+  while( pos < size )
     {
-    const int n = write( fd, buf + size - rest, rest );
-    if( n > 0 ) rest -= n;
+    const int n = write( fd, buf + pos, size - pos );
+    if( n > 0 ) pos += n;
     else if( n < 0 && errno != EINTR ) break;
     errno = 0;
     }
-  return size - rest;
+  return pos;
   }
 
 
diff --git a/decoder.h b/decoder.h
index 4230c5a..30c28f5 100644
--- a/decoder.h
+++ b/decoder.h
@@ -281,7 +281,7 @@ public:
     stream_pos( 0 ),
     crc_( 0xFFFFFFFFU ),
     outfd( ofd )
-    { buffer[buffer_size-1] = 0; }	// prev_byte of first_byte
+    { buffer[buffer_size-1] = 0; }	// prev_byte of first byte
 
   ~LZ_decoder() { delete[] buffer; }
 
diff --git a/doc/lziprecover.1 b/doc/lziprecover.1
index 3a6300f..6dbc3ec 100644
--- a/doc/lziprecover.1
+++ b/doc/lziprecover.1
@@ -1,5 +1,5 @@
 .\" DO NOT MODIFY THIS FILE!  It was generated by help2man 1.37.1.
-.TH LZIPRECOVER "1" "April 2014" "lziprecover 1.16-pre1" "User Commands"
+.TH LZIPRECOVER "1" "May 2014" "lziprecover 1.16-pre2" "User Commands"
 .SH NAME
 lziprecover \- recovers data from damaged lzip files
 .SH SYNOPSIS
diff --git a/doc/lziprecover.info b/doc/lziprecover.info
index 1248e6f..7bc75f0 100644
--- a/doc/lziprecover.info
+++ b/doc/lziprecover.info
@@ -12,7 +12,7 @@ File: lziprecover.info,  Node: Top,  Next: Introduction,  Up: (dir)
 Lziprecover Manual
 ******************
 
-This manual is for Lziprecover (version 1.16-pre1, 5 April 2014).
+This manual is for Lziprecover (version 1.16-pre2, 25 May 2014).
 
 * Menu:
 
@@ -248,17 +248,26 @@ File: lziprecover.info,  Node: Repairing files,  Next: Merging files,  Prev: Inv
 3 Repairing files
 *****************
 
-Lziprecover is able to repair files with small errors (up to one byte
-error per member). The error may be located anywhere in the file except
-in the header (first 6 bytes of each member) or in the 'Member size'
-field of the trailer (last 8 bytes of each member). This makes lzip
-files resistant to bit-flip, one of the most common forms of data
+Lziprecover is usually able to repair files with small errors (up to one
+byte error per member). The error may be located anywhere in the file
+except in the header (first 6 bytes of each member) or in the 'Member
+size' field of the trailer (last 8 bytes of each member).  This makes
+lzip files resistant to bit-flip, one of the most common forms of data
 corruption.
 
    Bit-flip happens when one bit in the file is changed from 0 to 1 or
 vice versa. It may be caused by bad RAM or even by natural radiation. I
 have seen a case of bit-flip in a file stored in an USB flash drive.
 
+   Repairing a file can take some time. Small files or files with the
+error located near the beginning can be repaired in a few seconds. But
+repairing a large file compressed with a large dictionary size and with
+the error located far from the beginning, can take hours.
+
+   On the other hand, errors located near the beginning of the file
+cause much more loss of data than errors located near the end. So
+lziprecover repairs more efficiently the worst errors.
+
 
 File: lziprecover.info,  Node: Merging files,  Next: File format,  Prev: Repairing files,  Up: Top
 
@@ -552,16 +561,16 @@ Concept index
 
 Tag Table:
 Node: Top226
-Node: Introduction1100
-Node: Invoking lziprecover3858
-Node: Repairing files9296
-Node: Merging files10015
-Node: File format11786
-Node: Examples14296
-Ref: ddrescue-example15497
-Node: Unzcrash16606
-Node: Problems18978
-Node: Concept index19528
+Node: Introduction1099
+Node: Invoking lziprecover3857
+Node: Repairing files9295
+Node: Merging files10485
+Node: File format12256
+Node: Examples14766
+Ref: ddrescue-example15967
+Node: Unzcrash17076
+Node: Problems19448
+Node: Concept index19998
 
 End Tag Table
 
diff --git a/doc/lziprecover.texi b/doc/lziprecover.texi
index be4fc27..00fbc8e 100644
--- a/doc/lziprecover.texi
+++ b/doc/lziprecover.texi
@@ -6,8 +6,8 @@
 @finalout
 @c %**end of header
 
-@set UPDATED 5 April 2014
-@set VERSION 1.16-pre1
+@set UPDATED 25 May 2014
+@set VERSION 1.16-pre2
 
 @dircategory Data Compression
 @direntry
@@ -278,17 +278,26 @@ caused lziprecover to panic.
 @chapter Repairing files
 @cindex repairing files
 
-Lziprecover is able to repair files with small errors (up to one byte
-error per member). The error may be located anywhere in the file except
-in the header (first 6 bytes of each member) or in the @samp{Member
-size} field of the trailer (last 8 bytes of each member). This makes
-lzip files resistant to bit-flip, one of the most common forms of data
-corruption.
+Lziprecover is usually able to repair files with small errors (up to one
+byte error per member). The error may be located anywhere in the file
+except in the header (first 6 bytes of each member) or in the
+@samp{Member size} field of the trailer (last 8 bytes of each member).
+This makes lzip files resistant to bit-flip, one of the most common
+forms of data corruption.
 
 Bit-flip happens when one bit in the file is changed from 0 to 1 or vice
 versa. It may be caused by bad RAM or even by natural radiation. I have
 seen a case of bit-flip in a file stored in an USB flash drive.
 
+Repairing a file can take some time. Small files or files with the error
+located near the beginning can be repaired in a few seconds. But
+repairing a large file compressed with a large dictionary size and with
+the error located far from the beginning, can take hours.
+
+On the other hand, errors located near the beginning of the file cause
+much more loss of data than errors located near the end. So lziprecover
+repairs more efficiently the worst errors.
+
 
 @node Merging files
 @chapter Merging files
diff --git a/file_index.cc b/file_index.cc
index cdb4031..b4f5420 100644
--- a/file_index.cc
+++ b/file_index.cc
@@ -28,6 +28,15 @@
 #include "file_index.h"
 
 
+int seek_read( const int fd, uint8_t * const buf, const int size,
+               const long long pos )
+  {
+  if( lseek( fd, pos, SEEK_SET ) == pos )
+    return readblock( fd, buf, size );
+  return 0;
+  }
+
+
 Block Block::split( const long long pos )
   {
   if( pos > pos_ && pos < end() )
@@ -120,7 +129,7 @@ File_index::File_index( const int infd )
     return;
     }
   std::reverse( member_vector.begin(), member_vector.end() );
-  for( unsigned i = 0; i < member_vector.size() - 1; ++i )
+  for( unsigned long i = 0; i < member_vector.size() - 1; ++i )
     {
     const long long end = member_vector[i].dblock.end();
     if( end < 0 || end > INT64_MAX )
@@ -214,7 +223,7 @@ error:
     return;
     }
   std::reverse( member_vector.begin(), member_vector.end() );
-  for( unsigned i = 0; i < member_vector.size() - 1; ++i )
+  for( unsigned long i = 0; i < member_vector.size() - 1; ++i )
     {
     const long long end = member_vector[i].dblock.end();
     if( end < 0 || end > INT64_MAX )
diff --git a/file_index.h b/file_index.h
index 8acb60f..7fee1e6 100644
--- a/file_index.h
+++ b/file_index.h
@@ -76,7 +76,7 @@ public:
   explicit File_index( const int infd );
   File_index( const std::vector< int > & infd_vector, const long long fsize );
 
-  int members() const { return member_vector.size(); }
+  long members() const { return member_vector.size(); }
   const std::string & error() const { return error_; }
   int retval() const { return retval_; }
 
@@ -84,7 +84,7 @@ public:
     {
     if( retval_ || fi.retval_ || isize != fi.isize ||
         member_vector.size() != fi.member_vector.size() ) return false;
-    for( unsigned i = 0; i < member_vector.size(); ++i )
+    for( unsigned long i = 0; i < member_vector.size(); ++i )
       if( member_vector[i] != fi.member_vector[i] ) return false;
     return true;
     }
@@ -102,8 +102,8 @@ public:
   long long file_size() const
     { if( isize >= 0 ) return isize; else return 0; }
 
-  const Block & dblock( const int i ) const
+  const Block & dblock( const long i ) const
     { return member_vector[i].dblock; }
-  const Block & mblock( const int i ) const
+  const Block & mblock( const long i ) const
     { return member_vector[i].mblock; }
   };
diff --git a/lzip.h b/lzip.h
index 483835f..cd44b42 100644
--- a/lzip.h
+++ b/lzip.h
@@ -279,9 +279,13 @@ inline unsigned long long positive_diff( const unsigned long long x,
 
 
 // defined in decoder.cc
-int readblock( const int fd, uint8_t * const buf, const int size );
+long readblock( const int fd, uint8_t * const buf, const long size );
 int writeblock( const int fd, const uint8_t * const buf, const int size );
 
+// defined in file_index.cc
+int seek_read( const int fd, uint8_t * const buf, const int size,
+               const long long pos );
+
 // defined in main.cc
 int open_instream( const char * const name, struct stat * const in_statsp,
                    const bool no_ofile, const bool reg_only = false );
@@ -314,8 +318,6 @@ int range_decompress( const std::string & input_filename,
                       const bool force, const bool ignore, const bool to_stdout );
 
 // defined in repair.cc
-int seek_read( const int fd, uint8_t * const buf, const int size,
-               const long long pos );
 int repair_file( const std::string & input_filename,
                  const std::string & output_filename, const int verbosity,
                  const bool force );
diff --git a/main.cc b/main.cc
index e5b1cbc..81a13aa 100644
--- a/main.cc
+++ b/main.cc
@@ -256,17 +256,6 @@ bool open_outstream( const bool force )
   }
 
 
-bool check_tty( const int infd )
-  {
-  if( isatty( infd ) )
-    {
-    show_error( "I won't read compressed data from a terminal.", 0, true );
-    return false;
-    }
-  return true;
-  }
-
-
 void cleanup_and_fail( const int retval )
   {
   if( delete_output_on_interrupt )
@@ -591,6 +580,7 @@ int main( const int argc, const char * const argv[] )
     if( filenames.back() != "-" ) filenames_given = true;
     }
 
+  try {
   switch( program_mode )
     {
     case m_none: internal_error( "invalid operation." ); break;
@@ -620,6 +610,9 @@ int main( const int argc, const char * const argv[] )
       return split_file( filenames[0], default_output_filename, verbosity, force );
     case m_test: break;
     }
+    }
+  catch( std::bad_alloc ) { show_error( "Not enough memory." ); return 1; }
+  catch( Error e ) { show_error( e.msg, errno ); return 1; }
 
   if( program_mode == m_test )
     outfd = -1;
@@ -683,7 +676,11 @@ int main( const int argc, const char * const argv[] )
         }
       }
 
-    if( !check_tty( infd ) ) return 1;
+    if( isatty( infd ) )
+      {
+      show_error( "I won't read compressed data from a terminal.", 0, true );
+      return 1;
+      }
 
     if( output_filename.size() && !to_stdout && program_mode != m_test )
       delete_output_on_interrupt = true;
diff --git a/merge.cc b/merge.cc
index 08a3d0e..95b9318 100644
--- a/merge.cc
+++ b/merge.cc
@@ -75,6 +75,7 @@ void combine( std::vector< Block > & block_vector, std::vector< Block > & bv )
   }
 
 
+// positions in 'block_vector' are absolute file positions.
 bool diff_member( const long long mpos, const long long msize,
                   const std::vector< int > & infd_vector,
                   std::vector< Block > & block_vector )
@@ -220,7 +221,7 @@ int open_input_files( const std::vector< std::string > & filenames,
     {
     const int infd = infd_vector[i];
     bool error = false;
-    for( int j = 0; j < file_index.members(); ++j )
+    for( long j = 0; j < file_index.members(); ++j )
       {
       const long long mpos = file_index.mblock( j ).pos();
       const long long msize = file_index.mblock( j ).size();
@@ -284,26 +285,21 @@ bool copy_file( const int infd, const int outfd, const long long max_size )
 bool try_decompress_member( const int fd, const unsigned long long msize,
                             long long * failure_posp )
   {
-  try {
-    Range_decoder rdec( fd );
-    File_header header;
-    rdec.read_data( header.data, File_header::size );
-    if( !rdec.finished() &&			// End Of File
-        header.verify_magic() && header.verify_version() &&
-        header.dictionary_size() >= min_dictionary_size &&
-        header.dictionary_size() <= max_dictionary_size )
-      {
-      LZ_decoder decoder( header, rdec, -1 );
-      Pretty_print dummy( "", -1 );
+  Range_decoder rdec( fd );
+  File_header header;
+  rdec.read_data( header.data, File_header::size );
+  if( !rdec.finished() &&			// End Of File
+      header.verify_magic() && header.verify_version() &&
+      header.dictionary_size() >= min_dictionary_size &&
+      header.dictionary_size() <= max_dictionary_size )
+    {
+    LZ_decoder decoder( header, rdec, -1 );
+    Pretty_print dummy( "", -1 );
 
-      if( decoder.decode_member( dummy ) == 0 &&
-          rdec.member_position() == msize ) return true;
-      if( failure_posp ) *failure_posp = rdec.member_position();
-      }
+    if( decoder.decode_member( dummy ) == 0 &&
+        rdec.member_position() == msize ) return true;
+    if( failure_posp ) *failure_posp = rdec.member_position();
     }
-  catch( std::bad_alloc )
-    { show_error( "Not enough memory." ); std::exit( 1 ); }
-  catch( Error e ) {}
   return false;
   }
 
@@ -325,7 +321,7 @@ int merge_files( const std::vector< std::string > & filenames,
   if( !copy_file( infd_vector[0], outfd ) )		// copy whole file
     cleanup_and_fail( output_filename, outfd, 1 );
 
-  for( int j = 0; j < file_index.members(); ++j )
+  for( long j = 0; j < file_index.members(); ++j )
     {
     const long long mpos = file_index.mblock( j ).pos();
     const long long msize = file_index.mblock( j ).size();
@@ -360,7 +356,7 @@ int merge_files( const std::vector< std::string > & filenames,
 
     if( verbosity >= 1 && file_index.members() > 1 )
       {
-      std::printf( "Merging member %d\n", j + 1 );
+      std::printf( "Merging member %ld\n", j + 1 );
       std::fflush( stdout );
       }
     const int base_variations = ipow( files, block_vector.size() );
@@ -378,11 +374,10 @@ int merge_files( const std::vector< std::string > & filenames,
         {
         const int infd = infd_vector[tmp % files];
         tmp /= files;
-        if( lseek( infd, block_vector[i].pos(), SEEK_SET ) < 0 ||
-            lseek( outfd, block_vector[i].pos(), SEEK_SET ) < 0 ||
+        if( !safe_seek( infd, block_vector[i].pos() ) ||
+            !safe_seek( outfd, block_vector[i].pos() ) ||
             !copy_file( infd, outfd, block_vector[i].size() ) )
-          { show_error( "Error reading output file", errno );
-            cleanup_and_fail( output_filename, outfd, 1 ); }
+          cleanup_and_fail( output_filename, outfd, 1 );
         }
       if( !safe_seek( outfd, mpos ) )
         cleanup_and_fail( output_filename, outfd, 1 );
diff --git a/mtester.cc b/mtester.cc
new file mode 100644
index 0000000..3fd2563
--- /dev/null
+++ b/mtester.cc
@@ -0,0 +1,209 @@
+/*  Lziprecover - Data recovery tool for lzip files
+    Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz.
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#define _FILE_OFFSET_BITS 64
+
+#include <algorithm>
+#include <cerrno>
+#include <climits>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <string>
+#include <vector>
+#include <stdint.h>
+#include <unistd.h>
+
+#include "lzip.h"
+#include "mtester.h"
+
+
+void LZ_mtester::flush_data()
+  {
+  if( pos > stream_pos )
+    {
+    const int size = pos - stream_pos;
+    crc32.update_buf( crc_, buffer + stream_pos, size );
+    if( pos >= buffer_size ) { partial_data_pos += pos; pos = 0; }
+    stream_pos = pos;
+    }
+  }
+
+
+bool LZ_mtester::verify_trailer()
+  {
+  const File_trailer * trailer = rdec.get_trailer();
+  if( !trailer ) return false;
+
+  return ( rdec.code_is_zero() &&
+           trailer->data_crc() == crc() &&
+           trailer->data_size() == data_position() &&
+           trailer->member_size() == (unsigned long)member_position() );
+  }
+
+
+void LZ_mtester::duplicate_buffer()
+  {
+  uint8_t * const tmp = new uint8_t[buffer_size];
+  if( data_position() > 0 )
+    std::memcpy( tmp, buffer, std::min( data_position(),
+                                        (unsigned long long)buffer_size ) );
+  else tmp[buffer_size-1] = 0;		// prev_byte of first byte
+  buffer = tmp;
+  }
+
+
+/* Return value: 0 = OK, 1 = decoder error, 2 = unexpected EOF,
+                 3 = trailer error, 4 = unknown marker found,
+                 -1 = pos_limit reached. */
+int LZ_mtester::test_member( const long pos_limit )
+  {
+  if( pos_limit < File_header::size + 5 ) return -1;
+  if( member_position() == File_header::size ) rdec.load();
+  while( !rdec.finished() )
+    {
+    if( member_position() >= pos_limit ) { flush_data(); return -1; }
+    const int pos_state = data_position() & pos_state_mask;
+    if( rdec.decode_bit( bm_match[state()][pos_state] ) == 0 )	// 1st bit
+      {
+      const uint8_t prev_byte = get_prev_byte();
+      if( state.is_char() )
+        {
+        state.set_char1();
+        put_byte( rdec.decode_tree8( bm_literal[get_lit_state(prev_byte)] ) );
+        }
+      else
+        {
+        state.set_char2();
+        put_byte( rdec.decode_matched( bm_literal[get_lit_state(prev_byte)],
+                                       get_byte( rep0 ) ) );
+        }
+      }
+    else
+      {
+      int len;
+      if( rdec.decode_bit( bm_rep[state()] ) != 0 )		// 2nd bit
+        {
+        if( rdec.decode_bit( bm_rep0[state()] ) != 0 )		// 3rd bit
+          {
+          unsigned distance;
+          if( rdec.decode_bit( bm_rep1[state()] ) == 0 )	// 4th bit
+            distance = rep1;
+          else
+            {
+            if( rdec.decode_bit( bm_rep2[state()] ) == 0 )	// 5th bit
+              distance = rep2;
+            else
+              { distance = rep3; rep3 = rep2; }
+            rep2 = rep1;
+            }
+          rep1 = rep0;
+          rep0 = distance;
+          }
+        else
+          {
+          if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit
+            { state.set_short_rep(); put_byte( get_byte( rep0 ) ); continue; }
+          }
+        state.set_rep();
+        len = min_match_len + rdec.decode_len( rep_len_model, pos_state );
+        }
+      else
+        {
+        const unsigned rep0_saved = rep0;
+        len = min_match_len + rdec.decode_len( match_len_model, pos_state );
+        const int dis_slot = rdec.decode_tree6( bm_dis_slot[get_len_state(len)] );
+        if( dis_slot < start_dis_model ) rep0 = dis_slot;
+        else
+          {
+          const int direct_bits = ( dis_slot >> 1 ) - 1;
+          rep0 = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
+          if( dis_slot < end_dis_model )
+            rep0 += rdec.decode_tree_reversed( bm_dis + rep0 - dis_slot - 1,
+                                               direct_bits );
+          else
+            {
+            rep0 += rdec.decode( direct_bits - dis_align_bits ) << dis_align_bits;
+            rep0 += rdec.decode_tree_reversed4( bm_align );
+            if( rep0 == 0xFFFFFFFFU )		// Marker found
+              {
+              rep0 = rep0_saved;
+              rdec.normalize();
+              flush_data();
+              if( len == min_match_len )	// End Of Stream marker
+                {
+                if( verify_trailer() ) return 0; else return 3;
+                }
+              return 4;
+              }
+            }
+          }
+        rep3 = rep2; rep2 = rep1; rep1 = rep0_saved;
+        state.set_match();
+        if( rep0 >= dictionary_size || rep0 >= data_position() )
+          { flush_data(); return 1; }
+        }
+      copy_block( rep0, len );
+      }
+    }
+  flush_data();
+  return 2;
+  }
+
+
+uint8_t * read_member( const int infd, const long long mpos,
+                       const long long msize )
+  {
+  if( msize <= 0 || msize > LONG_MAX )
+    { show_error( "Member is larger than LONG_MAX." ); return 0; }
+  if( !safe_seek( infd, mpos ) ) return 0;
+  uint8_t * const buffer = new uint8_t[msize];
+
+  if( readblock( infd, buffer, msize ) != msize )
+    { show_error( "Error reading input file", errno );
+      delete[] buffer; return 0; }
+  return buffer;
+  }
+
+
+const LZ_mtester * prepare_master( const uint8_t * const buffer,
+                                   const long buffer_size,
+                                   const long pos_limit )
+  {
+  File_header & header = *(File_header *)buffer;
+  const unsigned dictionary_size = header.dictionary_size();
+  if( header.verify_magic() && header.verify_version() &&
+      dictionary_size >= min_dictionary_size &&
+      dictionary_size <= max_dictionary_size )
+    {
+    LZ_mtester * const master =
+      new LZ_mtester( buffer, buffer_size, dictionary_size );
+    if( master->test_member( pos_limit ) == -1 ) return master;
+    delete master;
+    }
+  return 0;
+  }
+
+
+bool test_member_rest( const LZ_mtester & master, long * const failure_posp )
+  {
+  LZ_mtester mtester( master );
+  mtester.duplicate_buffer();
+  if( mtester.test_member() == 0 && mtester.finished() ) return true;
+  if( failure_posp ) *failure_posp = mtester.member_position();
+  return false;
+  }
diff --git a/mtester.h b/mtester.h
new file mode 100644
index 0000000..3ff3fcb
--- /dev/null
+++ b/mtester.h
@@ -0,0 +1,300 @@
+/*  Lziprecover - Data recovery tool for lzip files
+    Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz.
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+class Range_mtester
+  {
+  const uint8_t * const buffer;	// input buffer
+  const long buffer_size;
+  long pos;			// current pos in buffer
+  uint32_t code;
+  uint32_t range;
+  bool at_stream_end;
+
+  void operator=( const Range_mtester & );	// declared as private
+
+public:
+  Range_mtester( const uint8_t * const buf, const long buf_size )
+    :
+    buffer( buf ),
+    buffer_size( buf_size ),
+    pos( File_header::size ),
+    code( 0 ),
+    range( 0xFFFFFFFFU ),
+    at_stream_end( false )
+    {}
+
+  void load()
+    {
+    for( int i = 0; i < 5; ++i ) code = (code << 8) | get_byte();
+    code &= range;		// make sure that first byte is discarded
+    }
+
+  bool code_is_zero() const { return ( code == 0 ); }
+  bool finished() { return pos >= buffer_size; }
+  long member_position() const { return pos; }
+
+  const File_trailer * get_trailer()
+    {
+    if( buffer_size - pos < File_trailer::size ) return 0;
+    const File_trailer * const p = (File_trailer *)(buffer + pos);
+    pos += File_trailer::size;
+    return p;
+    }
+
+  uint8_t get_byte()
+    {
+    if( finished() ) return 0xAA;		// make code != 0
+    return buffer[pos++];
+    }
+
+  void normalize()
+    {
+    if( range <= 0x00FFFFFFU )
+      { range <<= 8; code = (code << 8) | get_byte(); }
+    }
+
+  int decode( const int num_bits )
+    {
+    int symbol = 0;
+    for( int i = num_bits; i > 0; --i )
+      {
+      normalize();
+      range >>= 1;
+//      symbol <<= 1;
+//      if( code >= range ) { code -= range; symbol |= 1; }
+      const uint32_t mask = 0U - (code < range);
+      code -= range;
+      code += range & mask;
+      symbol = (symbol << 1) + (mask + 1);
+      }
+    return symbol;
+    }
+
+  int decode_bit( Bit_model & bm )
+    {
+    normalize();
+    const uint32_t bound = ( range >> bit_model_total_bits ) * bm.probability;
+    if( code < bound )
+      {
+      range = bound;
+      bm.probability += (bit_model_total - bm.probability) >> bit_model_move_bits;
+      return 0;
+      }
+    else
+      {
+      range -= bound;
+      code -= bound;
+      bm.probability -= bm.probability >> bit_model_move_bits;
+      return 1;
+      }
+    }
+
+  int decode_tree3( Bit_model bm[] )
+    {
+    int symbol = 1;
+    symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
+    symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
+    symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
+    return symbol & 7;
+    }
+
+  int decode_tree6( Bit_model bm[] )
+    {
+    int symbol = 1;
+    symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
+    symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
+    symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
+    symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
+    symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
+    symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
+    return symbol & 0x3F;
+    }
+
+  int decode_tree8( Bit_model bm[] )
+    {
+    int symbol = 1;
+    while( symbol < 0x100 )
+      symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
+    return symbol & 0xFF;
+    }
+
+  int decode_tree_reversed( Bit_model bm[], const int num_bits )
+    {
+    int model = 1;
+    int symbol = 0;
+    for( int i = 0; i < num_bits; ++i )
+      {
+      const bool bit = decode_bit( bm[model] );
+      model <<= 1;
+      if( bit ) { ++model; symbol |= (1 << i); }
+      }
+    return symbol;
+    }
+
+  int decode_tree_reversed4( Bit_model bm[] )
+    {
+    int model = 1;
+    int symbol = decode_bit( bm[model] );
+    model = (model << 1) + symbol;
+    int bit = decode_bit( bm[model] );
+    model = (model << 1) + bit; symbol |= (bit << 1);
+    bit = decode_bit( bm[model] );
+    model = (model << 1) + bit; symbol |= (bit << 2);
+    if( decode_bit( bm[model] ) ) symbol |= 8;
+    return symbol;
+    }
+
+  int decode_matched( Bit_model bm[], int match_byte )
+    {
+    Bit_model * const bm1 = bm + 0x100;
+    int symbol = 1;
+    while( symbol < 0x100 )
+      {
+      match_byte <<= 1;
+      const int match_bit = match_byte & 0x100;
+      const int bit = decode_bit( bm1[match_bit+symbol] );
+      symbol = ( symbol << 1 ) | bit;
+      if( match_bit != bit << 8 )
+        {
+        while( symbol < 0x100 )
+          symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
+        break;
+        }
+      }
+    return symbol & 0xFF;
+    }
+
+  int decode_len( Len_model & lm, const int pos_state )
+    {
+    if( decode_bit( lm.choice1 ) == 0 )
+      return decode_tree3( lm.bm_low[pos_state] );
+    if( decode_bit( lm.choice2 ) == 0 )
+      return len_low_symbols + decode_tree3( lm.bm_mid[pos_state] );
+    return len_low_symbols + len_mid_symbols + decode_tree8( lm.bm_high );
+    }
+  };
+
+
+class LZ_mtester
+  {
+  unsigned long long partial_data_pos;
+  Range_mtester rdec;
+  const unsigned dictionary_size;
+  const int buffer_size;
+  uint8_t * buffer;		// output buffer
+  int pos;			// current pos in buffer
+  int stream_pos;		// first byte not yet written to file
+  uint32_t crc_;
+  unsigned rep0;		// rep[0-3] latest four distances
+  unsigned rep1;		// used for efficient coding of
+  unsigned rep2;		// repeated distances
+  unsigned rep3;
+  State state;
+
+  Bit_model bm_literal[1<<literal_context_bits][0x300];
+  Bit_model bm_match[State::states][pos_states];
+  Bit_model bm_rep[State::states];
+  Bit_model bm_rep0[State::states];
+  Bit_model bm_rep1[State::states];
+  Bit_model bm_rep2[State::states];
+  Bit_model bm_len[State::states][pos_states];
+  Bit_model bm_dis_slot[len_states][1<<dis_slot_bits];
+  Bit_model bm_dis[modeled_distances-end_dis_model];
+  Bit_model bm_align[dis_align_size];
+
+  Len_model match_len_model;
+  Len_model rep_len_model;
+
+  unsigned long long stream_position() const
+    { return partial_data_pos + stream_pos; }
+  void flush_data();
+  bool verify_trailer();
+
+  uint8_t get_prev_byte() const
+    {
+    const int i = ( ( pos > 0 ) ? pos : buffer_size ) - 1;
+    return buffer[i];
+    }
+
+  uint8_t get_byte( const int distance ) const
+    {
+    int i = pos - distance - 1;
+    if( i < 0 ) i += buffer_size;
+    return buffer[i];
+    }
+
+  void put_byte( const uint8_t b )
+    {
+    buffer[pos] = b;
+    if( ++pos >= buffer_size ) flush_data();
+    }
+
+  void copy_block( const int distance, int len )
+    {
+    int i = pos - distance - 1;
+    if( i < 0 ) i += buffer_size;
+    if( len < buffer_size - std::max( pos, i ) && len <= std::abs( pos - i ) )
+      {
+      std::memcpy( buffer + pos, buffer + i, len );	// no wrap, no overlap
+      pos += len;
+      }
+    else for( ; len > 0; --len )
+      {
+      buffer[pos] = buffer[i];
+      if( ++pos >= buffer_size ) flush_data();
+      if( ++i >= buffer_size ) i = 0;
+      }
+    }
+
+  void operator=( const LZ_mtester & );		// declared as private
+
+public:
+  LZ_mtester( const uint8_t * const ibuf, const long ibuf_size,
+              const int dict_size )
+    :
+    partial_data_pos( 0 ),
+    rdec( ibuf, ibuf_size ),
+    dictionary_size( dict_size ),
+    buffer_size( std::max( 65536U, dictionary_size ) ),
+    buffer( new uint8_t[buffer_size] ),
+    pos( 0 ),
+    stream_pos( 0 ),
+    crc_( 0xFFFFFFFFU ),
+    rep0( 0 ),
+    rep1( 0 ),
+    rep2( 0 ),
+    rep3( 0 )
+    { buffer[buffer_size-1] = 0; }	// prev_byte of first byte
+
+  ~LZ_mtester() { delete[] buffer; }
+
+  unsigned crc() const { return crc_ ^ 0xFFFFFFFFU; }
+  unsigned long long data_position() const { return partial_data_pos + pos; }
+  bool finished() { return rdec.finished(); }
+  long member_position() const { return rdec.member_position(); }
+
+  void duplicate_buffer();
+  int test_member( const long pos_limit = LONG_MAX );
+  };
+
+
+uint8_t * read_member( const int infd, const long long mpos,
+                       const long long msize );
+const LZ_mtester * prepare_master( const uint8_t * const buffer,
+                                   const long buffer_size,
+                                   const long pos_limit );
+bool test_member_rest( const LZ_mtester & master, long * const failure_posp = 0 );
diff --git a/range_dec.cc b/range_dec.cc
index 2c6c342..111405d 100644
--- a/range_dec.cc
+++ b/range_dec.cc
@@ -203,13 +203,13 @@ int list_file( const char * const input_filename, const Pretty_print & pp )
 
     if( pp.verbosity() >= 1 && file_index.members() > 1 )
       {
-      std::printf( "    Total members in file = %d.\n", file_index.members() );
+      std::printf( "    Total members in file = %ld.\n", file_index.members() );
       if( pp.verbosity() >= 2 )
-        for( int i = 0; i < file_index.members(); ++i )
+        for( long i = 0; i < file_index.members(); ++i )
           {
           const Block & db = file_index.dblock( i );
           const Block & mb = file_index.mblock( i );
-          std::printf( "    Member %3d   data pos %9llu   data size %7llu   "
+          std::printf( "    Member %3ld   data pos %9llu   data size %7llu   "
                        "member pos %9llu   member size %7llu.\n", i + 1,
                        db.pos(), db.size(), mb.pos(), mb.size() );
           }
@@ -282,13 +282,13 @@ int range_decompress( const std::string & input_filename,
       if( outfd < 0 ) return 1; }
 
   int retval = 0;
-  for( int i = 0; i < file_index.members(); ++i )
+  for( long i = 0; i < file_index.members(); ++i )
     {
     const Block & db = file_index.dblock( i );
     if( range.overlaps( db ) )
       {
       if( verbosity >= 3 )
-        std::fprintf( stderr, "Decompressing member %3d\n", i + 1 );
+        std::fprintf( stderr, "Decompressing member %3ld\n", i + 1 );
       const long long outskip = std::max( 0LL, range.pos() - db.pos() );
       const long long outend = std::min( db.size(), range.end() - db.pos() );
       const long long mpos = file_index.mblock( i ).pos();
diff --git a/repair.cc b/repair.cc
index 92a417f..0048bcf 100644
--- a/repair.cc
+++ b/repair.cc
@@ -20,6 +20,7 @@
 #include <cerrno>
 #include <climits>
 #include <cstdio>
+#include <cstdlib>
 #include <cstring>
 #include <string>
 #include <vector>
@@ -29,15 +30,7 @@
 
 #include "lzip.h"
 #include "file_index.h"
-
-
-int seek_read( const int fd, uint8_t * const buf, const int size,
-               const long long pos )
-  {
-  if( lseek( fd, pos, SEEK_SET ) == pos )
-    return readblock( fd, buf, size );
-  return 0;
-  }
+#include "mtester.h"
 
 
 int seek_write( const int fd, const uint8_t * const buf, const int size,
@@ -63,7 +56,7 @@ int repair_file( const std::string & input_filename,
     { pp( file_index.error().c_str() ); return file_index.retval(); }
 
   int outfd = -1;
-  for( int i = 0; i < file_index.members(); ++i )
+  for( long i = 0; i < file_index.members(); ++i )
     {
     const long long mpos = file_index.mblock( i ).pos();
     const long long msize = file_index.mblock( i ).size();
@@ -76,50 +69,59 @@ int repair_file( const std::string & input_filename,
       { show_error( "Can't repair error in input file." );
         cleanup_and_fail( output_filename, outfd, 2 ); }
 
-    if( outfd < 0 )			// first damaged member found
+    if( verbosity >= 1 )		// damaged member found
       {
-      if( !safe_seek( infd, 0 ) ) return 1;
-      outfd = open_outstream_rw( output_filename, force );
-      if( outfd < 0 ) { close( infd ); return 1; }
-      if( !copy_file( infd, outfd ) )		// copy whole file
-        cleanup_and_fail( output_filename, outfd, 1 );
-      }
-
-    if( verbosity >= 1 )
-      {
-      std::printf( "Repairing member %d\n", i + 1 );
+      std::printf( "Repairing member %ld  (failure pos = %llu)\n",
+                   i + 1, mpos + failure_pos );
       std::fflush( stdout );
       }
-    const long long min_pos =
-      std::max( (long long)File_header::size, failure_pos - 1000 );
+    uint8_t * const mbuffer = read_member( infd, mpos, msize );
+    if( !mbuffer )
+      cleanup_and_fail( output_filename, outfd, 1 );
+    long pos = failure_pos;
     bool done = false;
-    for( long long pos = failure_pos; pos >= min_pos && !done ; --pos )
+    while( pos >= File_header::size && pos > failure_pos - 20000 && !done )
       {
-      if( verbosity >= 1 )
-        {
-        std::printf( "Trying position %llu \r", mpos + pos );
-        std::fflush( stdout );
-        }
-      uint8_t byte;
-      if( seek_read( outfd, &byte, 1, mpos + pos ) != 1 )
-        { show_error( "Error reading output file", errno );
-          cleanup_and_fail( output_filename, outfd, 1 ); }
-      for( int i = 0; i < 256; ++i )
+      const long min_pos = std::max( (long)File_header::size, pos - 1000 );
+      const LZ_mtester * master = prepare_master( mbuffer, msize, min_pos - 16 );
+      if( !master )
+        cleanup_and_fail( output_filename, outfd, 1 );
+      for( ; pos >= min_pos && !done ; --pos )
         {
-        ++byte;
-        if( seek_write( outfd, &byte, 1, mpos + pos ) != 1 ||
-            lseek( outfd, mpos, SEEK_SET ) < 0 )
-          { show_error( "Error writing output file", errno );
-            cleanup_and_fail( output_filename, outfd, 1 ); }
-        if( i == 255 ) break;
-        if( try_decompress_member( outfd, msize ) )
-          { done = true; break; }
+        if( verbosity >= 1 )
+          {
+          std::printf( "Trying position %llu \r", mpos + pos );
+          std::fflush( stdout );
+          }
+        for( int j = 0; j < 256; ++j )
+          {
+          ++mbuffer[pos];
+          if( j == 255 ) break;
+          if( test_member_rest( *master ) )
+            {
+            done = true;
+            if( outfd < 0 )		// first damaged member repaired
+              {
+              if( !safe_seek( infd, 0 ) ) return 1;
+              outfd = open_outstream_rw( output_filename, force );
+              if( outfd < 0 ) { close( infd ); return 1; }
+              if( !copy_file( infd, outfd ) )		// copy whole file
+                cleanup_and_fail( output_filename, outfd, 1 );
+              }
+            if( seek_write( outfd, mbuffer + pos, 1, mpos + pos ) != 1 )
+              { show_error( "Error writing output file", errno );
+                cleanup_and_fail( output_filename, outfd, 1 ); }
+            break;
+            }
+          }
         }
+      delete master;
       }
+    delete[] mbuffer;
     if( verbosity >= 1 ) std::printf( "\n" );
     if( !done )
       {
-      show_error( "Error is larger than 1 byte. Can't repair input file." );
+      show_error( "Can't repair input file. Error is probably larger than 1 byte." );
       cleanup_and_fail( output_filename, outfd, 2 );
       }
     }
diff --git a/split.cc b/split.cc
index 8eafd82..fbf0676 100644
--- a/split.cc
+++ b/split.cc
@@ -129,9 +129,9 @@ int do_split_file( const std::string & input_filename, uint8_t * & base_buffer,
 
   const File_index file_index( infd );
   if( file_index.retval() != 0 ) pp( file_index.error().c_str() );
-  const int max_members = file_index.retval() ? 999999 : file_index.members();
+  const long max_members = file_index.retval() ? 999999 : file_index.members();
   int max_digits = 1;
-  for( int i = max_members; i >= 10; i /= 10 ) ++max_digits;
+  for( long i = max_members; i >= 10; i /= 10 ) ++max_digits;
 
   std::string output_filename;
   first_filename( input_filename, default_output_filename, output_filename,
author	Daniel Baumann <mail@daniel-baumann.ch>	2015-11-07 11:45:45 +0000
committer	Daniel Baumann <mail@daniel-baumann.ch>	2015-11-07 11:45:45 +0000
commit	5e8398a39d8758cb4dee9a43f92ac958277e0ebd (patch)
tree	10ba2517467532e4a002f47cc32732f1f335eae0
parent	Adding upstream version 1.16~pre1. (diff)
download	lziprecover-upstream/1.16_pre2.tar.xz lziprecover-upstream/1.16_pre2.zip