summaryrefslogtreecommitdiffstats
path: root/split.cc
diff options
context:
space:
mode:
Diffstat (limited to 'split.cc')
-rw-r--r--split.cc194
1 files changed, 66 insertions, 128 deletions
diff --git a/split.cc b/split.cc
index 3e697eb..6d0b04f 100644
--- a/split.cc
+++ b/split.cc
@@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
- Copyright (C) 2009-2018 Antonio Diaz Diaz.
+ Copyright (C) 2009-2019 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -29,8 +29,7 @@
#include <sys/stat.h>
#include "lzip.h"
-#include "block.h"
-#include "file_index.h"
+#include "lzip_index.h"
namespace {
@@ -50,6 +49,11 @@ void first_filename( const std::string & input_filename,
bool next_filename( const int max_digits )
{
+ if( verbosity >= 1 )
+ {
+ std::printf( "Member '%s' done \n", output_filename.c_str() );
+ std::fflush( stdout );
+ }
int b = output_filename.size();
while( b > 0 && output_filename[b-1] != '/' ) --b;
for( int i = b + max_digits + 2; i > b + 2; --i ) // "rec<max_digits>"
@@ -60,147 +64,81 @@ bool next_filename( const int max_digits )
return false;
}
-
-// Search forward from 'pos' for "LZIP" (Boyer-Moore algorithm)
-// Returns pos of found string or 'pos+size' if not found.
-//
-int find_magic( const uint8_t * const buffer, const int pos, const int size )
- {
- const unsigned char table[256] = {
- 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
- 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
- 4,4,4,4,4,4,4,4,4,1,4,4,3,4,4,4,4,4,4,4,4,4,4,4,4,4,2,4,4,4,4,4,
- 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
- 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
- 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
- 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
- 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4 };
-
- for( int i = pos; i <= pos + size - 4; i += table[buffer[i+3]] )
- if( buffer[i] == 'L' && buffer[i+1] == 'Z' &&
- buffer[i+2] == 'I' && buffer[i+3] == 'P' )
- return i; // magic string found
- return pos + size;
- }
+} // end namespace
-int do_split_file( const std::string & input_filename, uint8_t * & base_buffer,
- const std::string & default_output_filename,
- const bool force )
+int split_file( const std::string & input_filename,
+ const std::string & default_output_filename, const bool force )
{
- const int hsize = File_header::size;
- const int tsize = File_trailer::size;
- const int buffer_size = 65536;
- const int base_buffer_size = tsize + buffer_size + hsize;
- base_buffer = new uint8_t[base_buffer_size];
- uint8_t * const buffer = base_buffer + tsize;
-
struct stat in_stats;
const int infd = open_instream( input_filename.c_str(), &in_stats, true, true );
if( infd < 0 ) return 1;
- Pretty_print pp( input_filename );
-
- // don't move this after seek_read
- const File_index file_index( infd, true, true, true );
-// if( file_index.retval() != 0 ) pp( file_index.error().c_str() );
- const long max_members = file_index.retval() ? 999999 : file_index.members();
- int max_digits = 1;
- for( long i = max_members; i >= 10; i /= 10 ) ++max_digits;
-
- int size = seek_read( infd, buffer, buffer_size + hsize, 0 ) - hsize;
- bool at_stream_end = ( size < buffer_size );
- if( size != buffer_size && errno )
- { show_error( "Read error", errno ); return 1; }
- if( size < min_member_size )
- { pp( "Input file is too short." ); return 2; }
- if( !verify_header( *(File_header *)buffer, pp ) ) return 2;
-
- first_filename( input_filename, default_output_filename, max_digits );
- if( !open_outstream( force, false, false, false ) )
- { close( infd ); return 1; }
- unsigned long long partial_member_size = 0;
- const bool ttyout = isatty( STDOUT_FILENO );
- while( true )
+ Lzip_index lzip_index( infd, true, true, true, true );
+ if( lzip_index.retval() != 0 )
{
- int pos = 0;
- for( int newpos = 1; newpos <= size; ++newpos )
+ show_file_error( input_filename.c_str(), lzip_index.error().c_str() );
+ return lzip_index.retval();
+ }
+ // verify last member
+ const Block b = lzip_index.mblock( lzip_index.members() - 1 );
+ long long mpos = b.pos();
+ long long msize = b.size();
+ long long failure_pos = 0;
+ if( !safe_seek( infd, mpos ) ) return 1;
+ if( test_member_from_file( infd, msize, &failure_pos ) == 1 )
+ { // corrupt or fake trailer
+ while( true )
{
- newpos = find_magic( buffer, newpos, size + 4 - newpos );
- if( newpos <= size )
- {
- const File_trailer & trailer = *(File_trailer *)(base_buffer + newpos);
- if( partial_member_size + newpos - pos == trailer.member_size() )
- { // header found
- const int wr = writeblock( outfd, buffer + pos, newpos - pos );
- if( wr != newpos - pos )
- { show_error( "Write error", errno ); return 1; }
- if( close_outstream( &in_stats ) != 0 ) return 1;
- if( verbosity >= 1 )
- {
- std::printf( "Member '%s' done %c", output_filename.c_str(),
- ttyout ? '\r' : '\n' );
- std::fflush( stdout );
- }
- if( !next_filename( max_digits ) )
- { show_error( "Too many members in file." ); close( infd ); return 1; }
- if( !open_outstream( force, false, false, false ) )
- { close( infd ); return 1; }
- partial_member_size = 0;
- pos = newpos;
- }
- }
+ mpos += failure_pos; msize -= failure_pos;
+ if( msize < min_member_size ) break; // trailing data
+ if( !safe_seek( infd, mpos ) ) return 1;
+ if( test_member_from_file( infd, msize, &failure_pos ) != 1 ) break;
}
-
- if( at_stream_end )
+ lzip_index = Lzip_index( infd, true, true, true, true, mpos );
+ if( lzip_index.retval() != 0 )
{
- const int wr = writeblock( outfd, buffer + pos, size + hsize - pos );
- if( wr != size + hsize - pos )
- { show_error( "Write error", errno ); return 1; }
- break;
+ show_file_error( input_filename.c_str(), lzip_index.error().c_str() );
+ return lzip_index.retval();
}
- if( pos < buffer_size )
+ }
+
+ if( !safe_seek( infd, 0 ) ) return 1;
+ int max_digits = 1;
+ for( long i = lzip_index.blocks( true ); i >= 10; i /= 10 ) ++max_digits;
+ first_filename( input_filename, default_output_filename, max_digits );
+
+ long long stream_pos = 0; // first pos not yet written to file
+ set_signal_handler();
+ for( long i = 0; i < lzip_index.members(); ++i )
+ {
+ const Block & mb = lzip_index.mblock( i );
+ if( mb.pos() > stream_pos ) // gap
{
- partial_member_size += buffer_size - pos;
- const int wr = writeblock( outfd, buffer + pos, buffer_size - pos );
- if( wr != buffer_size - pos )
- { show_error( "Write error", errno ); return 1; }
+ if( !open_outstream( force, false, false, false ) )
+ { close( infd ); return 1; }
+ if( !copy_file( infd, outfd, mb.pos() - stream_pos ) ||
+ close_outstream( &in_stats ) != 0 )
+ cleanup_and_fail( 1 );
+ next_filename( max_digits );
}
- std::memcpy( base_buffer, base_buffer + buffer_size, tsize + hsize );
- size = readblock( infd, buffer + hsize, buffer_size );
- at_stream_end = ( size < buffer_size );
- if( size != buffer_size && errno )
- { show_error( "Read error", errno ); return 1; }
+ if( !open_outstream( force, false, false, false ) ) // member
+ { close( infd ); return 1; }
+ if( !copy_file( infd, outfd, mb.size() ) ||
+ close_outstream( &in_stats ) != 0 )
+ cleanup_and_fail( 1 );
+ next_filename( max_digits );
+ stream_pos = mb.end();
}
- close( infd );
- if( close_outstream( &in_stats ) != 0 ) return 1;
- if( verbosity >= 1 )
+ if( lzip_index.file_size() > stream_pos ) // trailing data
{
- std::printf( "Member '%s' done \n", output_filename.c_str() );
- std::fflush( stdout );
+ if( !open_outstream( force, false, false, false ) )
+ { close( infd ); return 1; }
+ if( !copy_file( infd, outfd, lzip_index.file_size() - stream_pos ) ||
+ close_outstream( &in_stats ) != 0 )
+ cleanup_and_fail( 1 );
+ next_filename( max_digits );
}
+ close( infd );
return 0;
}
-
-} // end namespace
-
-
-bool verify_header( const File_header & header, const Pretty_print & pp )
- {
- if( !header.verify_magic() )
- { pp( bad_magic_msg ); return false; }
- if( !header.verify_version() )
- { pp( bad_version( header.version() ) ); return false; }
- return true;
- }
-
-
-int split_file( const std::string & input_filename,
- const std::string & default_output_filename, const bool force )
- {
- uint8_t * base_buffer;
- const int retval = do_split_file( input_filename, base_buffer,
- default_output_filename, force );
- delete[] base_buffer;
- return retval;
- }