diff options
Diffstat (limited to 'split.cc')
-rw-r--r-- | split.cc | 194 |
1 files changed, 66 insertions, 128 deletions
@@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2018 Antonio Diaz Diaz. + Copyright (C) 2009-2019 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -29,8 +29,7 @@ #include <sys/stat.h> #include "lzip.h" -#include "block.h" -#include "file_index.h" +#include "lzip_index.h" namespace { @@ -50,6 +49,11 @@ void first_filename( const std::string & input_filename, bool next_filename( const int max_digits ) { + if( verbosity >= 1 ) + { + std::printf( "Member '%s' done \n", output_filename.c_str() ); + std::fflush( stdout ); + } int b = output_filename.size(); while( b > 0 && output_filename[b-1] != '/' ) --b; for( int i = b + max_digits + 2; i > b + 2; --i ) // "rec<max_digits>" @@ -60,147 +64,81 @@ bool next_filename( const int max_digits ) return false; } - -// Search forward from 'pos' for "LZIP" (Boyer-Moore algorithm) -// Returns pos of found string or 'pos+size' if not found. -// -int find_magic( const uint8_t * const buffer, const int pos, const int size ) - { - const unsigned char table[256] = { - 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, - 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, - 4,4,4,4,4,4,4,4,4,1,4,4,3,4,4,4,4,4,4,4,4,4,4,4,4,4,2,4,4,4,4,4, - 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, - 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, - 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, - 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, - 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4 }; - - for( int i = pos; i <= pos + size - 4; i += table[buffer[i+3]] ) - if( buffer[i] == 'L' && buffer[i+1] == 'Z' && - buffer[i+2] == 'I' && buffer[i+3] == 'P' ) - return i; // magic string found - return pos + size; - } +} // end namespace -int do_split_file( const std::string & input_filename, uint8_t * & base_buffer, - const std::string & default_output_filename, - const bool force ) +int split_file( const std::string & input_filename, + const std::string & default_output_filename, const bool force ) { - const int hsize = File_header::size; - const int tsize = File_trailer::size; - const int buffer_size = 65536; - const int base_buffer_size = tsize + buffer_size + hsize; - base_buffer = new uint8_t[base_buffer_size]; - uint8_t * const buffer = base_buffer + tsize; - struct stat in_stats; const int infd = open_instream( input_filename.c_str(), &in_stats, true, true ); if( infd < 0 ) return 1; - Pretty_print pp( input_filename ); - - // don't move this after seek_read - const File_index file_index( infd, true, true, true ); -// if( file_index.retval() != 0 ) pp( file_index.error().c_str() ); - const long max_members = file_index.retval() ? 999999 : file_index.members(); - int max_digits = 1; - for( long i = max_members; i >= 10; i /= 10 ) ++max_digits; - - int size = seek_read( infd, buffer, buffer_size + hsize, 0 ) - hsize; - bool at_stream_end = ( size < buffer_size ); - if( size != buffer_size && errno ) - { show_error( "Read error", errno ); return 1; } - if( size < min_member_size ) - { pp( "Input file is too short." ); return 2; } - if( !verify_header( *(File_header *)buffer, pp ) ) return 2; - - first_filename( input_filename, default_output_filename, max_digits ); - if( !open_outstream( force, false, false, false ) ) - { close( infd ); return 1; } - unsigned long long partial_member_size = 0; - const bool ttyout = isatty( STDOUT_FILENO ); - while( true ) + Lzip_index lzip_index( infd, true, true, true, true ); + if( lzip_index.retval() != 0 ) { - int pos = 0; - for( int newpos = 1; newpos <= size; ++newpos ) + show_file_error( input_filename.c_str(), lzip_index.error().c_str() ); + return lzip_index.retval(); + } + // verify last member + const Block b = lzip_index.mblock( lzip_index.members() - 1 ); + long long mpos = b.pos(); + long long msize = b.size(); + long long failure_pos = 0; + if( !safe_seek( infd, mpos ) ) return 1; + if( test_member_from_file( infd, msize, &failure_pos ) == 1 ) + { // corrupt or fake trailer + while( true ) { - newpos = find_magic( buffer, newpos, size + 4 - newpos ); - if( newpos <= size ) - { - const File_trailer & trailer = *(File_trailer *)(base_buffer + newpos); - if( partial_member_size + newpos - pos == trailer.member_size() ) - { // header found - const int wr = writeblock( outfd, buffer + pos, newpos - pos ); - if( wr != newpos - pos ) - { show_error( "Write error", errno ); return 1; } - if( close_outstream( &in_stats ) != 0 ) return 1; - if( verbosity >= 1 ) - { - std::printf( "Member '%s' done %c", output_filename.c_str(), - ttyout ? '\r' : '\n' ); - std::fflush( stdout ); - } - if( !next_filename( max_digits ) ) - { show_error( "Too many members in file." ); close( infd ); return 1; } - if( !open_outstream( force, false, false, false ) ) - { close( infd ); return 1; } - partial_member_size = 0; - pos = newpos; - } - } + mpos += failure_pos; msize -= failure_pos; + if( msize < min_member_size ) break; // trailing data + if( !safe_seek( infd, mpos ) ) return 1; + if( test_member_from_file( infd, msize, &failure_pos ) != 1 ) break; } - - if( at_stream_end ) + lzip_index = Lzip_index( infd, true, true, true, true, mpos ); + if( lzip_index.retval() != 0 ) { - const int wr = writeblock( outfd, buffer + pos, size + hsize - pos ); - if( wr != size + hsize - pos ) - { show_error( "Write error", errno ); return 1; } - break; + show_file_error( input_filename.c_str(), lzip_index.error().c_str() ); + return lzip_index.retval(); } - if( pos < buffer_size ) + } + + if( !safe_seek( infd, 0 ) ) return 1; + int max_digits = 1; + for( long i = lzip_index.blocks( true ); i >= 10; i /= 10 ) ++max_digits; + first_filename( input_filename, default_output_filename, max_digits ); + + long long stream_pos = 0; // first pos not yet written to file + set_signal_handler(); + for( long i = 0; i < lzip_index.members(); ++i ) + { + const Block & mb = lzip_index.mblock( i ); + if( mb.pos() > stream_pos ) // gap { - partial_member_size += buffer_size - pos; - const int wr = writeblock( outfd, buffer + pos, buffer_size - pos ); - if( wr != buffer_size - pos ) - { show_error( "Write error", errno ); return 1; } + if( !open_outstream( force, false, false, false ) ) + { close( infd ); return 1; } + if( !copy_file( infd, outfd, mb.pos() - stream_pos ) || + close_outstream( &in_stats ) != 0 ) + cleanup_and_fail( 1 ); + next_filename( max_digits ); } - std::memcpy( base_buffer, base_buffer + buffer_size, tsize + hsize ); - size = readblock( infd, buffer + hsize, buffer_size ); - at_stream_end = ( size < buffer_size ); - if( size != buffer_size && errno ) - { show_error( "Read error", errno ); return 1; } + if( !open_outstream( force, false, false, false ) ) // member + { close( infd ); return 1; } + if( !copy_file( infd, outfd, mb.size() ) || + close_outstream( &in_stats ) != 0 ) + cleanup_and_fail( 1 ); + next_filename( max_digits ); + stream_pos = mb.end(); } - close( infd ); - if( close_outstream( &in_stats ) != 0 ) return 1; - if( verbosity >= 1 ) + if( lzip_index.file_size() > stream_pos ) // trailing data { - std::printf( "Member '%s' done \n", output_filename.c_str() ); - std::fflush( stdout ); + if( !open_outstream( force, false, false, false ) ) + { close( infd ); return 1; } + if( !copy_file( infd, outfd, lzip_index.file_size() - stream_pos ) || + close_outstream( &in_stats ) != 0 ) + cleanup_and_fail( 1 ); + next_filename( max_digits ); } + close( infd ); return 0; } - -} // end namespace - - -bool verify_header( const File_header & header, const Pretty_print & pp ) - { - if( !header.verify_magic() ) - { pp( bad_magic_msg ); return false; } - if( !header.verify_version() ) - { pp( bad_version( header.version() ) ); return false; } - return true; - } - - -int split_file( const std::string & input_filename, - const std::string & default_output_filename, const bool force ) - { - uint8_t * base_buffer; - const int retval = do_split_file( input_filename, base_buffer, - default_output_filename, force ); - delete[] base_buffer; - return retval; - } |