diff options
Diffstat (limited to 'merge.cc')
-rw-r--r-- | merge.cc | 200 |
1 files changed, 148 insertions, 52 deletions
@@ -78,18 +78,22 @@ void combine( std::vector< Block > & block_vector, std::vector< Block > & bv ) // positions in 'block_vector' are absolute file positions. bool diff_member( const long long mpos, const long long msize, const std::vector< int > & infd_vector, - std::vector< Block > & block_vector ) + std::vector< Block > & block_vector, + std::vector< int > & color_vector ) { const int files = infd_vector.size(); const int buffer_size = 65536; uint8_t * const buffer1 = new uint8_t[buffer_size]; uint8_t * const buffer2 = new uint8_t[buffer_size]; + int next_color = 1; bool error = false; - for( int i1 = 0; i1 + 1 < files && !error; ++i1 ) + for( int i1 = 0; i1 < files && !error; ++i1 ) { for( int i2 = i1 + 1; i2 < files && !error; ++i2 ) { + if( color_vector[i1] != 0 && color_vector[i1] == color_vector[i2] ) + continue; std::vector< Block > bv; long long partial_pos = 0; const int fd1 = infd_vector[i1], fd2 = infd_vector[i2]; @@ -98,7 +102,7 @@ bool diff_member( const long long mpos, const long long msize, if( !safe_seek( fd1, mpos ) || !safe_seek( fd2, mpos ) ) { error = true; break; } - while( msize > partial_pos ) + while( partial_pos < msize ) { const int size = std::min( (long long)buffer_size, msize - partial_pos ); const int rd = readblock( fd1, buffer1, size ); @@ -133,21 +137,32 @@ bool diff_member( const long long mpos, const long long msize, Block b( mpos + begin, partial_pos - prev_equal - begin ); bv.push_back( b ); } + if( bv.empty() ) // members are identical, set to same color + { + if( color_vector[i1] == 0 ) + { + if( color_vector[i2] != 0 ) color_vector[i1] = color_vector[i2]; + else color_vector[i1] = color_vector[i2] = next_color++; + } + else if( color_vector[i2] == 0 ) color_vector[i2] = color_vector[i1]; + else internal_error( "different colors assigned to identical members." ); + } combine( block_vector, bv ); } + if( color_vector[i1] == 0 ) color_vector[i1] = next_color++; } delete[] buffer2; delete[] buffer1; return !error; } -int ipow( const unsigned base, const unsigned exponent ) +long ipow( const unsigned base, const unsigned exponent ) { - unsigned result = 1; + unsigned long result = 1; for( unsigned i = 0; i < exponent; ++i ) { - if( INT_MAX / base >= result ) result *= base; - else { result = INT_MAX; break; } + if( LONG_MAX / base >= result ) result *= base; + else { result = LONG_MAX; break; } } return result; } @@ -239,6 +254,116 @@ int open_input_files( const std::vector< std::string > & filenames, return -1; } + +bool try_merge_member( const long long mpos, const long long msize, + const std::vector< Block > & block_vector, + const std::vector< int > & color_vector, + const std::vector< int > & infd_vector, + const std::string & output_filename, + const int outfd, const int verbosity ) + { + const int blocks = block_vector.size(); + const int files = infd_vector.size(); + const long variations = ipow( files, blocks ); + if( variations >= LONG_MAX ) + { + if( files > 2 ) + show_error( "Too many damaged blocks. Try merging fewer files." ); + else + show_error( "Too many damaged blocks. Merging is not possible." ); + cleanup_and_fail( output_filename, outfd, 2 ); + } + int bi = 0; // block index + std::vector< int > file_idx( blocks, 0 ); // file to read each block from + + while( bi >= 0 ) + { + if( verbosity >= 1 ) + { + long var = 0; + for( int i = 0; i < blocks; ++i ) + var = ( var * files ) + file_idx[i]; + std::printf( "Trying variation %ld of %ld \r", var + 1, variations ); + std::fflush( stdout ); + } + while( bi < blocks ) + { + const int infd = infd_vector[file_idx[bi]]; + if( !safe_seek( infd, block_vector[bi].pos() ) || + !safe_seek( outfd, block_vector[bi].pos() ) || + !copy_file( infd, outfd, block_vector[bi].size() ) ) + cleanup_and_fail( output_filename, outfd, 1 ); + ++bi; + } + if( !safe_seek( outfd, mpos ) ) + cleanup_and_fail( output_filename, outfd, 1 ); + long long failure_pos = 0; + if( try_decompress_member( outfd, msize, &failure_pos ) ) return true; + while( bi > 0 && mpos + failure_pos < block_vector[bi-1].pos() ) --bi; + while( --bi >= 0 ) + { + while( ++file_idx[bi] < files ) + { + const int color = color_vector[file_idx[bi]]; + bool done = true; + for( int i = file_idx[bi] - 1; i >= 0; --i ) + if( color_vector[i] == color ) { done = false; break; } + if( done ) break; + } + if( file_idx[bi] < files ) break; + file_idx[bi] = 0; + } + } + return false; + } + + +bool try_merge_member1( const long long mpos, const long long msize, + const std::vector< Block > & block_vector, + const std::vector< int > & color_vector, + const std::vector< int > & infd_vector, + const std::string & output_filename, + const int outfd, const int verbosity ) + { + if( block_vector.size() != 1 || block_vector[0].size() <= 1 ) return false; + const long long pos = block_vector[0].pos(); + const long long size = block_vector[0].size(); + const int files = infd_vector.size(); + const int variations = files * ( files - 1 ); + uint8_t byte; + + for( int i1 = 0; i1 < files; ++i1 ) + for( int i2 = 0; i2 < files; ++i2 ) + { + if( i1 == i2 || color_vector[i1] == color_vector[i2] ) continue; + const int infd = infd_vector[i1]; + if( !safe_seek( infd, pos ) || + !safe_seek( infd_vector[i2], pos ) || + !safe_seek( outfd, pos ) || + !copy_file( infd_vector[i2], outfd, size ) ) + cleanup_and_fail( output_filename, outfd, 1 ); + const int var = ( i1 * ( files - 1 ) ) + i2 - ( i2 > i1 ) + 1; + for( long long i = 0; i < size; ++i ) + { + if( verbosity >= 1 ) + { + std::printf( "Trying variation %d of %d, position %lld \r", + var, variations, pos + i ); + std::fflush( stdout ); + } + if( !safe_seek( outfd, pos + i ) || + readblock( infd, &byte, 1 ) != 1 || + writeblock( outfd, &byte, 1 ) != 1 || + !safe_seek( outfd, mpos ) ) + cleanup_and_fail( output_filename, outfd, 1 ); + long long failure_pos = 0; + if( try_decompress_member( outfd, msize, &failure_pos ) ) return true; + if( mpos + failure_pos <= pos + i ) break; + } + } + return false; + } + } // end namespace @@ -327,7 +452,8 @@ int merge_files( const std::vector< std::string > & filenames, const long long msize = file_index.mblock( j ).size(); // vector of data blocks differing among the copies of the current member std::vector< Block > block_vector; - if( !diff_member( mpos, msize, infd_vector, block_vector ) || + std::vector< int > color_vector( files, 0 ); + if( !diff_member( mpos, msize, infd_vector, block_vector, color_vector ) || !safe_seek( outfd, mpos ) ) cleanup_and_fail( output_filename, outfd, 1 ); @@ -335,63 +461,33 @@ int merge_files( const std::vector< std::string > & filenames, { if( file_index.members() > 1 && try_decompress_member( outfd, msize ) ) continue; - show_error( "Input files are (partially) identical. Recovery is not possible." ); + show_error( "Input files are (partially) identical. Merging is not possible." ); cleanup_and_fail( output_filename, outfd, 2 ); } - const int size0 = block_vector[0].size(); - const bool single_block = ( block_vector.size() == 1 ); - if( ipow( files, block_vector.size() ) >= INT_MAX || - ( single_block && ipow( files, 2 ) >= INT_MAX / size0 ) ) - { show_error( "Input files are too damaged. Recovery is not possible." ); - cleanup_and_fail( output_filename, outfd, 2 ); } - - const int shifts = ( single_block && size0 > 1 ) ? size0 - 1 : 1; - if( single_block && size0 > 1 ) - { - Block b( block_vector[0].pos() + 1, size0 - 1 ); - block_vector[0].size( 1 ); - block_vector.push_back( b ); - } - if( verbosity >= 1 && file_index.members() > 1 ) { - std::printf( "Merging member %ld\n", j + 1 ); + std::printf( "Merging member %ld of %ld\n", + j + 1, (long)file_index.members() ); std::fflush( stdout ); } - const int base_variations = ipow( files, block_vector.size() ); - const int variations = base_variations * shifts; + bool done = false; - for( int var = 0; var < variations; ++var ) + if( file_index.members() > 1 || block_vector.size() > 1 ) { - if( verbosity >= 1 ) - { - std::printf( "Trying variation %d of %d \r", var + 1, variations ); - std::fflush( stdout ); - } - int tmp = var; - for( unsigned i = 0; i < block_vector.size(); ++i ) - { - const int infd = infd_vector[tmp % files]; - tmp /= files; - if( !safe_seek( infd, block_vector[i].pos() ) || - !safe_seek( outfd, block_vector[i].pos() ) || - !copy_file( infd, outfd, block_vector[i].size() ) ) - cleanup_and_fail( output_filename, outfd, 1 ); - } - if( !safe_seek( outfd, mpos ) ) - cleanup_and_fail( output_filename, outfd, 1 ); - if( try_decompress_member( outfd, msize ) ) - { done = true; break; } - if( var > 0 && var % base_variations == 0 ) - block_vector[0].shift( block_vector[1] ); + done = try_merge_member( mpos, msize, block_vector, color_vector, + infd_vector, output_filename, outfd, verbosity ); + if( !done && verbosity >= 1 ) std::fputs( "\n", stdout ); } - if( verbosity >= 1 ) std::printf( "\n" ); + if( !done ) + done = try_merge_member1( mpos, msize, block_vector, color_vector, + infd_vector, output_filename, outfd, verbosity ); + if( verbosity >= 1 ) std::fputs( "\n", stdout ); if( !done ) { if( verbosity >= 2 ) for( unsigned i = 0; i < block_vector.size(); ++i ) - std::fprintf( stderr, "area %2d from offset %6lld to %6lld\n", i + 1, + std::fprintf( stderr, "area %2d from position %6lld to %6lld\n", i + 1, block_vector[i].pos(), block_vector[i].end() - 1 ); show_error( "Some error areas overlap. Can't recover input file." ); cleanup_and_fail( output_filename, outfd, 2 ); @@ -404,6 +500,6 @@ int merge_files( const std::vector< std::string > & filenames, cleanup_and_fail( output_filename, -1, 1 ); } if( verbosity >= 1 ) - std::printf( "Input files merged successfully.\n" ); + std::fputs( "Input files merged successfully.\n", stdout ); return 0; } |