/* Lziprecover - Data recovery tool for lzipped files Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #define _FILE_OFFSET_BITS 64 #include #include #include #include #include #include #include #include #include #include #include "lzip.h" #include "decoder.h" #include "file_index.h" namespace { bool copy_and_diff_file( const std::vector< int > & infd_vector, const int outfd, std::vector< Block > & block_vector ) { const int buffer_size = 65536; std::vector< uint8_t * > buffer_vector( infd_vector.size() ); for( unsigned i = 0; i < infd_vector.size(); ++i ) buffer_vector[i] = new uint8_t[buffer_size]; Block b( 0, 0 ); long long partial_pos = 0; int equal_bytes = 0; bool error = false; while( true ) { const int rd = readblock( infd_vector[0], buffer_vector[0], buffer_size ); if( rd != buffer_size && errno ) { show_error( "Error reading input file", errno ); error = true; break; } if( rd > 0 ) { for( unsigned i = 1; i < infd_vector.size(); ++i ) if( readblock( infd_vector[i], buffer_vector[i], rd ) != rd ) { show_error( "Error reading input file", errno ); error = true; break; } if( error ) break; const int wr = writeblock( outfd, buffer_vector[0], rd ); if( wr != rd ) { show_error( "Error writing output file", errno ); error = true; break; } for( int i = 0; i < rd; ++i ) { while( i < rd && b.pos() == 0 ) { for( unsigned j = 1; j < infd_vector.size(); ++j ) if( buffer_vector[0][i] != buffer_vector[j][i] ) { b.pos( partial_pos + i ); break; } // begin block ++i; } while( i < rd && b.pos() > 0 ) { ++equal_bytes; for( unsigned j = 1; j < infd_vector.size(); ++j ) if( buffer_vector[0][i] != buffer_vector[j][i] ) { equal_bytes = 0; break; } if( equal_bytes >= 2 ) // end block { b.size( partial_pos + i - ( equal_bytes - 1 ) - b.pos() ); block_vector.push_back( b ); b.pos( 0 ); equal_bytes = 0; } ++i; } } partial_pos += rd; } if( rd < buffer_size ) break; // EOF } if( b.pos() > 0 ) // finish last block { b.size( partial_pos - b.pos() ); block_vector.push_back( b ); } for( unsigned i = 0; i < infd_vector.size(); ++i ) delete[] buffer_vector[i]; return !error; } int ipow( const unsigned base, const unsigned exponent ) { unsigned result = 1; for( unsigned i = 0; i < exponent; ++i ) { if( INT_MAX / base >= result ) result *= base; else { result = INT_MAX; break; } } return result; } int open_input_files( const std::vector< std::string > & filenames, std::vector< int > & infd_vector, long long & isize, const int verbosity ) { bool identical = false; for( unsigned i = 1; i < filenames.size(); ++i ) if( filenames[0] == filenames[i] ) { identical = true; break; } if( !identical ) for( unsigned i = 0; i < filenames.size(); ++i ) { struct stat in_stats; ino_t st_ino0 = 0; dev_t st_dev0 = 0; infd_vector[i] = open_instream( filenames[i], &in_stats, true, true ); if( infd_vector[i] < 0 ) return 1; if( i == 0 ) { st_ino0 = in_stats.st_ino; st_dev0 = in_stats.st_dev; } else if( st_ino0 == in_stats.st_ino && st_dev0 == in_stats.st_dev ) { identical = true; break; } } if( identical ) { show_error( "Two input files are the same." ); return 2; } isize = 0; for( unsigned i = 0; i < filenames.size(); ++i ) { const long long tmp = lseek( infd_vector[i], 0, SEEK_END ); if( tmp < 0 ) { if( verbosity >= 0 ) std::fprintf( stderr, "File '%s' is not seekable.\n", filenames[i].c_str() ); return 1; } if( i == 0 ) { isize = tmp; if( isize < min_member_size ) { show_error( "Input file is too short." ); return 2; } } else if( isize != tmp ) { show_error( "Sizes of input files are different." ); return 2; } } for( unsigned i = 0; i < filenames.size(); ++i ) if( !verify_single_member( infd_vector[i], isize, verbosity ) ) return 2; for( unsigned i = 0; i < filenames.size(); ++i ) { if( lseek( infd_vector[i], 0, SEEK_SET ) < 0 ) { show_error( "Seek error in input file", errno ); return 1; } if( try_decompress( infd_vector[i], isize ) ) { if( verbosity >= 1 ) std::printf( "File '%s' has no errors. Recovery is not needed.\n", filenames[i].c_str() ); return 0; } if( lseek( infd_vector[i], 0, SEEK_SET ) < 0 ) { show_error( "Seek error in input file", errno ); return 1; } } return -1; } } // end namespace void cleanup_and_fail( const std::string & output_filename, const int outfd, const int retval ) { if( outfd >= 0 ) close( outfd ); if( std::remove( output_filename.c_str() ) != 0 && errno != ENOENT ) show_error( "WARNING: deletion of output file (apparently) failed." ); std::exit( retval ); } // max_size < 0 means no size limit. bool copy_file( const int infd, const int outfd, const long long max_size ) { const int buffer_size = 65536; // remaining number of bytes to copy long long rest = ( ( max_size >= 0 ) ? max_size : buffer_size ); uint8_t * const buffer = new uint8_t[buffer_size]; bool error = false; while( rest > 0 ) { const int size = std::min( (long long)buffer_size, rest ); if( max_size >= 0 ) rest -= size; const int rd = readblock( infd, buffer, size ); if( rd != size && errno ) { show_error( "Error reading input file", errno ); error = true; break; } if( rd > 0 ) { const int wr = writeblock( outfd, buffer, rd ); if( wr != rd ) { show_error( "Error writing output file", errno ); error = true; break; } } if( rd < size ) break; // EOF } delete[] buffer; return !error; } bool try_decompress( const int fd, const unsigned long long file_size, long long * failure_posp ) { try { Range_decoder rdec( fd ); File_header header; rdec.read_data( header.data, File_header::size ); if( !rdec.finished() && // End Of File header.verify_magic() && header.version() == 1 && header.dictionary_size() >= min_dictionary_size && header.dictionary_size() <= max_dictionary_size ) { LZ_decoder decoder( header, rdec, -1 ); Pretty_print dummy( "", -1 ); if( decoder.decode_member( dummy ) == 0 && rdec.member_position() == file_size ) return true; if( failure_posp ) *failure_posp = rdec.member_position(); } } catch( std::bad_alloc ) { show_error( "Not enough memory. Find a machine with more memory." ); std::exit( 1 ); } catch( Error e ) {} return false; } bool verify_header( const File_header & header, const int verbosity ) { if( !header.verify_magic() ) { show_error( "Bad magic number (file not in lzip format)." ); return false; } if( header.version() == 0 ) { show_error( "Version 0 member format can't be recovered." ); return false; } if( header.version() != 1 ) { if( verbosity >= 0 ) std::fprintf( stderr, "Version %d member format not supported.\n", header.version() ); return false; } return true; } bool verify_single_member( const int fd, const long long file_size, const int verbosity ) { File_header header; if( lseek( fd, 0, SEEK_SET ) < 0 || readblock( fd, header.data, File_header::size ) != File_header::size ) { show_error( "Error reading member header", errno ); return false; } if( !verify_header( header, verbosity ) ) return false; File_trailer trailer; if( lseek( fd, -File_trailer::size(), SEEK_END ) < 0 || readblock( fd, trailer.data, File_trailer::size() ) != File_trailer::size() ) { show_error( "Error reading member trailer", errno ); return false; } const long long member_size = trailer.member_size(); if( member_size != file_size ) { if( member_size < file_size && lseek( fd, -member_size, SEEK_END ) > 0 && readblock( fd, header.data, File_header::size ) == File_header::size && verify_header( header, verbosity ) ) show_error( "Input file has more than 1 member. Split it first." ); else show_error( "Member size in input file trailer is corrupt." ); return false; } return true; } int merge_files( const std::vector< std::string > & filenames, const std::string & output_filename, const int verbosity, const bool force ) { std::vector< int > infd_vector( filenames.size() ); long long isize = 0; const int retval = open_input_files( filenames, infd_vector, isize, verbosity ); if( retval >= 0 ) return retval; const int outfd = open_outstream_rw( output_filename, force ); if( outfd < 0 ) return 1; // vector of data blocks differing among the copies of the input file. std::vector< Block > block_vector; if( !copy_and_diff_file( infd_vector, outfd, block_vector ) ) cleanup_and_fail( output_filename, outfd, 1 ); if( block_vector.size() == 0 ) { show_error( "Input files are identical. Recovery is not possible." ); cleanup_and_fail( output_filename, outfd, 2 ); } const bool single_block = ( block_vector.size() == 1 ); if( single_block && block_vector[0].size() < 2 ) { show_error( "Input files have the same byte damaged." " Try repairing one of them." ); cleanup_and_fail( output_filename, outfd, 2 ); } if( ipow( filenames.size(), block_vector.size() ) >= INT_MAX || ( single_block && ipow( filenames.size(), 2 ) >= INT_MAX / block_vector[0].size() ) ) { show_error( "Input files are too damaged. Recovery is not possible." ); cleanup_and_fail( output_filename, outfd, 2 ); } const int shifts = ( single_block ? block_vector[0].size() - 1 : 1 ); if( single_block ) { Block b( block_vector[0].pos() + 1, block_vector[0].size() - 1 ); block_vector[0].size( 1 ); block_vector.push_back( b ); } const int base_variations = ipow( filenames.size(), block_vector.size() ); const int variations = ( base_variations * shifts ) - 2; bool done = false; for( int var = 1; var <= variations; ++var ) { if( verbosity >= 1 ) { std::printf( "Trying variation %d of %d \r", var, variations ); std::fflush( stdout ); } int tmp = var; for( unsigned i = 0; i < block_vector.size(); ++i ) { const int infd = infd_vector[tmp % filenames.size()]; tmp /= filenames.size(); if( lseek( infd, block_vector[i].pos(), SEEK_SET ) < 0 || lseek( outfd, block_vector[i].pos(), SEEK_SET ) < 0 || !copy_file( infd, outfd, block_vector[i].size() ) ) { show_error( "Error reading output file", errno ); cleanup_and_fail( output_filename, outfd, 1 ); } } if( lseek( outfd, 0, SEEK_SET ) < 0 ) { show_error( "Seek error in output file", errno ); cleanup_and_fail( output_filename, outfd, 1 ); } if( try_decompress( outfd, isize ) ) { done = true; break; } if( var % base_variations == 0 ) block_vector[0].shift( block_vector[1] ); } if( verbosity >= 1 ) std::printf( "\n" ); if( close( outfd ) != 0 ) { show_error( "Error closing output file", errno ); cleanup_and_fail( output_filename, -1, 1 ); } if( !done ) { show_error( "Some error areas overlap. Can't recover input file." ); cleanup_and_fail( output_filename, -1, 2 ); } if( verbosity >= 1 ) std::printf( "Input files merged successfully.\n" ); return 0; }