/* Lziprecover - Data recovery tool for lzipped files
Copyright (C) 2009, 2010, 2011 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
*/
#define _FILE_OFFSET_BITS 64
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include "lzip.h"
#include "decoder.h"
namespace {
class Block
{
long long pos_, size_; // pos + size <= LLONG_MAX
public:
Block( const long long p, const long long s ) throw()
: pos_( p ), size_( s ) {}
long long pos() const throw() { return pos_; }
long long size() const throw() { return size_; }
long long end() const throw() { return pos_ + size_; }
void pos( const long long p ) throw() { pos_ = p; }
void size( const long long s ) throw() { size_ = s; }
void shift( Block & b ) throw() { ++size_; ++b.pos_; --b.size_; }
};
bool copy_and_diff_file( const std::vector< int > & infd_vector,
const int outfd, std::vector< Block > & block_vector )
{
const int buffer_size = 65536;
std::vector< uint8_t * > buffer_vector( infd_vector.size() );
for( unsigned int i = 0; i < infd_vector.size(); ++i )
buffer_vector[i] = new uint8_t[buffer_size];
Block b( 0, 0 );
long long partial_pos = 0;
int equal_bytes = 0;
bool error = false;
while( true )
{
const int rd = readblock( infd_vector[0], buffer_vector[0], buffer_size );
if( rd != buffer_size && errno )
{ show_error( "Error reading input file", errno ); error = true; break; }
if( rd > 0 )
{
for( unsigned int i = 1; i < infd_vector.size(); ++i )
if( readblock( infd_vector[i], buffer_vector[i], rd ) != rd )
{ show_error( "Error reading input file", errno );
error = true; break; }
if( error ) break;
const int wr = writeblock( outfd, buffer_vector[0], rd );
if( wr != rd )
{ show_error( "Error writing output file", errno );
error = true; break; }
for( int i = 0; i < rd; ++i )
{
while( i < rd && b.pos() == 0 )
{
for( unsigned int j = 1; j < infd_vector.size(); ++j )
if( buffer_vector[0][i] != buffer_vector[j][i] )
{ b.pos( partial_pos + i ); break; } // begin block
++i;
}
while( i < rd && b.pos() > 0 )
{
++equal_bytes;
for( unsigned int j = 1; j < infd_vector.size(); ++j )
if( buffer_vector[0][i] != buffer_vector[j][i] )
{ equal_bytes = 0; break; }
if( equal_bytes >= 2 ) // end block
{
b.size( partial_pos + i - ( equal_bytes - 1 ) - b.pos() );
block_vector.push_back( b );
b.pos( 0 );
equal_bytes = 0;
}
++i;
}
}
partial_pos += rd;
}
if( rd < buffer_size ) break; // EOF
}
if( b.pos() > 0 ) // finish last block
{
b.size( partial_pos - b.pos() );
block_vector.push_back( b );
}
for( unsigned int i = 0; i < infd_vector.size(); ++i )
delete[] buffer_vector[i];
return !error;
}
int ipow( const unsigned int base, const unsigned int exponent ) throw()
{
int result = 1;
for( unsigned int i = 0; i < exponent; ++i )
{
if( INT_MAX / base >= (unsigned int)result ) result *= base;
else { result = INT_MAX; break; }
}
return result;
}
int open_input_files( const std::vector< std::string > & filenames,
std::vector< int > & infd_vector, long long & isize )
{
bool identical = false;
for( unsigned int i = 1; i < filenames.size(); ++i )
if( filenames[0] == filenames[i] )
{ identical = true; break; }
if( !identical )
for( unsigned int i = 0; i < filenames.size(); ++i )
{
struct stat in_stats;
ino_t st_ino0 = 0;
dev_t st_dev0 = 0;
infd_vector[i] = open_instream( filenames[i], &in_stats, true, true );
if( infd_vector[i] < 0 ) return 1;
if( i == 0 ) { st_ino0 = in_stats.st_ino; st_dev0 = in_stats.st_dev; }
else if( st_ino0 == in_stats.st_ino && st_dev0 == in_stats.st_dev )
{ identical = true; break; }
}
if( identical ) { show_error( "Two input files are the same." ); return 1; }
isize = 0;
for( unsigned int i = 0; i < filenames.size(); ++i )
{
const long long tmp = lseek( infd_vector[i], 0, SEEK_END );
if( tmp < 0 )
{
if( verbosity >= 0 )
std::fprintf( stderr, "File `%s' is not seekable.\n", filenames[i].c_str() );
return 1;
}
if( i == 0 )
{
isize = tmp;
if( isize < 36 ) { show_error( "Input file is too short." ); return 2; }
}
else if( isize != tmp )
{ show_error( "Sizes of input files are different." ); return 1; }
}
for( unsigned int i = 0; i < filenames.size(); ++i )
if( !verify_single_member( infd_vector[i], isize ) )
return 2;
for( unsigned int i = 0; i < filenames.size(); ++i )
{
if( lseek( infd_vector[i], 0, SEEK_SET ) < 0 )
{ show_error( "Seek error in input file", errno ); return 1; }
if( try_decompress( infd_vector[i], isize ) )
{
if( verbosity >= 1 )
std::printf( "File `%s' has no errors. Recovery is not needed.\n",
filenames[i].c_str() );
return 0;
}
if( lseek( infd_vector[i], 0, SEEK_SET ) < 0 )
{ show_error( "Seek error in input file", errno ); return 1; }
}
return -1;
}
} // end namespace
void cleanup_and_fail( const std::string & output_filename,
const int outfd, const int retval ) throw()
{
if( outfd >= 0 ) close( outfd );
if( std::remove( output_filename.c_str() ) != 0 && errno != ENOENT )
show_error( "WARNING: deletion of output file (apparently) failed." );
std::exit( retval );
}
bool copy_file( const int infd, const int outfd, const long long size )
{
long long rest = size;
const int buffer_size = 65536;
uint8_t * const buffer = new uint8_t[buffer_size];
bool error = false;
while( true )
{
const int block_size = std::min( (long long)buffer_size, rest );
if( block_size <= 0 ) break;
const int rd = readblock( infd, buffer, block_size );
if( rd != block_size && errno )
{ show_error( "Error reading input file", errno ); error = true; break; }
if( rd > 0 )
{
const int wr = writeblock( outfd, buffer, rd );
if( wr != rd )
{ show_error( "Error writing output file", errno );
error = true; break; }
rest -= rd;
}
if( rd < block_size ) break; // EOF
}
delete[] buffer;
return !error;
}
bool try_decompress( const int fd, const long long file_size,
long long * failure_posp )
{
try {
Range_decoder rdec( fd );
File_header header;
rdec.reset_member_position();
for( int i = 0; i < File_header::size; ++i )
header.data[i] = rdec.get_byte();
if( !rdec.finished() && // End Of File
header.verify_magic() &&
header.version() == 1 &&
header.dictionary_size() >= min_dictionary_size &&
header.dictionary_size() <= max_dictionary_size )
{
LZ_decoder decoder( header, rdec, -1 );
std::vector< std::string > dummy_filenames;
Pretty_print dummy( dummy_filenames, -1 );
if( decoder.decode_member( dummy ) == 0 &&
rdec.member_position() == file_size ) return true;
if( failure_posp ) *failure_posp = rdec.member_position();
}
}
catch( std::bad_alloc )
{
show_error( "Not enough memory. Find a machine with more memory." );
std::exit( 1 );
}
catch( Error e ) {}
return false;
}
bool verify_header( const File_header & header )
{
if( !header.verify_magic() )
{
show_error( "Bad magic number (file not in lzip format)." );
return false;
}
if( header.version() == 0 )
{
show_error( "Version 0 member format can't be recovered." );
return false;
}
if( header.version() != 1 )
{
if( verbosity >= 0 )
std::fprintf( stderr, "Version %d member format not supported.\n",
header.version() );
return false;
}
return true;
}
bool verify_single_member( const int fd, const long long file_size )
{
File_header header;
if( lseek( fd, 0, SEEK_SET ) < 0 ||
readblock( fd, header.data, File_header::size ) != File_header::size )
{ show_error( "Error reading member header", errno ); return false; }
if( !verify_header( header ) ) return false;
File_trailer trailer;
if( lseek( fd, -File_trailer::size(), SEEK_END ) < 0 ||
readblock( fd, trailer.data, File_trailer::size() ) != File_trailer::size() )
{ show_error( "Error reading member trailer", errno ); return false; }
const long long member_size = trailer.member_size();
if( member_size != file_size )
{
if( member_size < file_size &&
lseek( fd, -member_size, SEEK_END ) > 0 &&
readblock( fd, header.data, File_header::size ) == File_header::size &&
verify_header( header ) )
show_error( "Input file has more than 1 member. Split it first." );
else
show_error( "Member size in input file trailer is corrupt." );
return false;
}
return true;
}
int merge_files( const std::vector< std::string > & filenames,
const std::string & output_filename, const bool force )
{
std::vector< int > infd_vector( filenames.size() );
long long isize = 0;
const int retval = open_input_files( filenames, infd_vector, isize );
if( retval >= 0 ) return retval;
const int outfd = open_outstream_rw( output_filename, force );
if( outfd < 0 ) return 1;
// vector of data blocks differing among the copies of the input file.
std::vector< Block > block_vector;
if( !copy_and_diff_file( infd_vector, outfd, block_vector ) )
cleanup_and_fail( output_filename, outfd, 1 );
if( !block_vector.size() )
{ show_error( "Input files are identical. Recovery is not possible." );
cleanup_and_fail( output_filename, outfd, 2 ); }
const bool single_block = ( block_vector.size() == 1 );
if( single_block && block_vector[0].size() < 2 )
{ show_error( "Input files have the same byte damaged."
" Try repairing one of them." );
cleanup_and_fail( output_filename, outfd, 2 ); }
if( ipow( filenames.size(), block_vector.size() ) >= INT_MAX ||
( single_block &&
ipow( filenames.size(), 2 ) >= INT_MAX / block_vector[0].size() ) )
{ show_error( "Input files are too damaged. Recovery is not possible." );
cleanup_and_fail( output_filename, outfd, 2 ); }
const int shifts = ( single_block ? block_vector[0].size() - 1 : 1 );
if( single_block )
{
Block b( block_vector[0].pos() + 1, block_vector[0].size() - 1 );
block_vector[0].size( 1 );
block_vector.push_back( b );
}
const int base_variations = ipow( filenames.size(), block_vector.size() );
const int variations = ( base_variations * shifts ) - 2;
bool done = false;
for( int var = 1; var <= variations; ++var )
{
if( verbosity >= 1 )
{
std::printf( "Trying variation %d of %d \r", var, variations );
std::fflush( stdout );
}
int tmp = var;
for( unsigned int i = 0; i < block_vector.size(); ++i )
{
const int infd = infd_vector[tmp % filenames.size()];
tmp /= filenames.size();
if( lseek( infd, block_vector[i].pos(), SEEK_SET ) < 0 ||
lseek( outfd, block_vector[i].pos(), SEEK_SET ) < 0 ||
!copy_file( infd, outfd, block_vector[i].size() ) )
{ show_error( "Error reading output file", errno );
cleanup_and_fail( output_filename, outfd, 1 ); }
}
if( lseek( outfd, 0, SEEK_SET ) < 0 )
{ show_error( "Seek error in output file", errno );
cleanup_and_fail( output_filename, outfd, 1 ); }
if( try_decompress( outfd, isize ) )
{ done = true; break; }
if( var % base_variations == 0 ) block_vector[0].shift( block_vector[1] );
}
if( verbosity >= 1 ) std::printf( "\n" );
if( close( outfd ) != 0 )
{
show_error( "Error closing output file", errno );
cleanup_and_fail( output_filename, -1, 1 );
}
if( !done )
{
show_error( "Some error areas overlap. Can't recover input file." );
cleanup_and_fail( output_filename, -1, 2 );
}
if( verbosity >= 1 )
std::printf( "Input files merged successfully.\n" );
return 0;
}