summaryrefslogtreecommitdiffstats
path: root/file_index.cc
diff options
context:
space:
mode:
Diffstat (limited to 'file_index.cc')
-rw-r--r--file_index.cc135
1 files changed, 97 insertions, 38 deletions
diff --git a/file_index.cc b/file_index.cc
index 747c376..f2f81e7 100644
--- a/file_index.cc
+++ b/file_index.cc
@@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for the lzip format
- Copyright (C) 2009-2016 Antonio Diaz Diaz.
+ Copyright (C) 2009-2017 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -15,6 +15,8 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+#define _FILE_OFFSET_BITS 64
+
#include <algorithm>
#include <cerrno>
#include <cstdio>
@@ -44,17 +46,75 @@ void File_index::set_errno_error( const char * const msg )
retval_ = 1;
}
-void File_index::set_num_error( const char * const msg1, unsigned long long num,
- const char * const msg2 )
+void File_index::set_num_error( const char * const msg, unsigned long long num )
{
char buf[80];
- snprintf( buf, sizeof buf, "%s%llu%s", msg1, num, msg2 );
+ snprintf( buf, sizeof buf, "%s%llu", msg, num );
error_ = buf;
retval_ = 2;
}
-File_index::File_index( const int infd )
+// If successful, push last member and set pos to member header.
+bool File_index::skip_trailing_data( const int fd, const bool ignore_bad_ds,
+ long long & pos )
+ {
+ enum { block_size = 16384,
+ buffer_size = block_size + File_trailer::size - 1 + File_header::size };
+ uint8_t buffer[buffer_size];
+ if( pos < min_member_size ) return false;
+ int bsize = pos % block_size; // total bytes in buffer
+ if( bsize <= buffer_size - block_size ) bsize += block_size;
+ int search_size = bsize; // bytes to search for trailer
+ int rd_size = bsize; // bytes to read from file
+ unsigned long long ipos = pos - rd_size; // aligned to block_size
+
+ while( true )
+ {
+ if( seek_read( fd, buffer, rd_size, ipos ) != rd_size )
+ { set_errno_error( "Error seeking member trailer: " ); return false; }
+ const uint8_t max_msb = ( ipos + search_size ) >> 56;
+ for( int i = search_size; i >= File_trailer::size; --i )
+ if( buffer[i-1] <= max_msb ) // most significant byte of member_size
+ {
+ File_trailer & trailer =
+ *(File_trailer *)( buffer + i - File_trailer::size );
+ const unsigned long long member_size = trailer.member_size();
+ if( member_size == 0 )
+ { while( i > File_trailer::size && buffer[i-9] == 0 ) --i; continue; }
+ if( member_size < min_member_size || member_size > ipos + i )
+ continue;
+ File_header header;
+ if( seek_read( fd, header.data, File_header::size,
+ ipos + i - member_size ) != File_header::size )
+ { set_errno_error( "Error reading member header: " ); return false; }
+ const unsigned dictionary_size = header.dictionary_size();
+ if( !header.verify_magic() || !header.verify_version() ||
+ ( !ignore_bad_ds && !isvalid_ds( dictionary_size ) ) ) continue;
+ if( (*(File_header *)( buffer + i )).verify_prefix( bsize - i ) )
+ {
+ error_ = "Last member in input file is truncated or corrupt.";
+ retval_ = 2; return false;
+ }
+ pos = ipos + i - member_size;
+ member_vector.push_back( Member( 0, trailer.data_size(), pos,
+ member_size, dictionary_size ) );
+ return true;
+ }
+ if( ipos <= 0 )
+ { set_num_error( "Member size in trailer is corrupt at pos ", pos - 8 );
+ return false; }
+ bsize = buffer_size;
+ search_size = bsize - File_header::size;
+ rd_size = block_size;
+ ipos -= rd_size;
+ std::memcpy( buffer + rd_size, buffer, buffer_size - rd_size );
+ }
+ }
+
+
+File_index::File_index( const int infd, const bool ignore_bad_ds,
+ const bool ignore_trailing )
: isize( lseek( infd, 0, SEEK_END ) ), retval_( 0 )
{
if( isize < 0 )
@@ -69,48 +129,46 @@ File_index::File_index( const int infd )
if( seek_read( infd, header.data, File_header::size, 0 ) != File_header::size )
{ set_errno_error( "Error reading member header: " ); return; }
if( !header.verify_magic() )
- { error_ = "Bad magic number (file not in lzip format).";
- retval_ = 2; return; }
+ { error_ = bad_magic_msg; retval_ = 2; return; }
if( !header.verify_version() )
- { set_num_error( "Version ", header.version(),
- " member format not supported." ); return; }
+ { error_ = bad_version( header.version() ); retval_ = 2; return; }
+ if( !ignore_bad_ds && !isvalid_ds( header.dictionary_size() ) )
+ { error_ = bad_dict_msg; retval_ = 2; return; }
- long long pos = isize; // always points to a header or to EOF
+ long long pos = isize; // always points to a header or to EOF
while( pos >= min_member_size )
{
File_trailer trailer;
if( seek_read( infd, trailer.data, File_trailer::size,
pos - File_trailer::size ) != File_trailer::size )
{ set_errno_error( "Error reading member trailer: " ); break; }
- const long long member_size = trailer.member_size();
- if( member_size < min_member_size || member_size > pos )
+ const unsigned long long member_size = trailer.member_size();
+ if( member_size < min_member_size || member_size > (unsigned long long)pos )
{
- if( member_vector.empty() )
- { --pos; continue; } // maybe trailing data
- set_num_error( "Member size in trailer is corrupt at pos ", pos - 8 );
+ if( !member_vector.empty() )
+ set_num_error( "Member size in trailer is corrupt at pos ", pos - 8 );
+ else if( skip_trailing_data( infd, ignore_bad_ds, pos ) )
+ { if( ignore_trailing ) continue;
+ error_ = trailing_msg; retval_ = 2; return; }
break;
}
if( seek_read( infd, header.data, File_header::size,
pos - member_size ) != File_header::size )
{ set_errno_error( "Error reading member header: " ); break; }
- if( !header.verify_magic() || !header.verify_version() )
- {
- if( member_vector.empty() )
- { --pos; continue; } // maybe trailing data
- set_num_error( "Bad header at pos ", pos - member_size );
- break;
- }
const unsigned dictionary_size = header.dictionary_size();
- if( member_vector.empty() && isize - pos > File_header::size &&
- seek_read( infd, header.data, File_header::size, pos ) == File_header::size &&
- header.verify_magic() && header.verify_version() )
+ if( !header.verify_magic() || !header.verify_version() ||
+ ( !ignore_bad_ds && !isvalid_ds( dictionary_size ) ) )
{
- error_ = "Last member in input file is truncated or corrupt.";
- retval_ = 2; break;
+ if( !member_vector.empty() )
+ set_num_error( "Bad header at pos ", pos - member_size );
+ else if( skip_trailing_data( infd, ignore_bad_ds, pos ) )
+ { if( ignore_trailing ) continue;
+ error_ = trailing_msg; retval_ = 2; return; }
+ break;
}
pos -= member_size;
- member_vector.push_back( Member( 0, trailer.data_size(),
- pos, member_size, dictionary_size ) );
+ member_vector.push_back( Member( 0, trailer.data_size(), pos,
+ member_size, dictionary_size ) );
}
if( pos != 0 || member_vector.empty() )
{
@@ -157,13 +215,12 @@ File_index::File_index( const std::vector< int > & infd_vector,
if( header.verify_magic() && header.verify_version() ) done = true;
}
if( !done )
- { error_ = "Bad magic number (file not in lzip format).";
- retval_ = 2; return; }
+ { error_ = bad_magic_msg; retval_ = 2; return; }
long long pos = isize; // always points to a header or to EOF
while( pos >= min_member_size )
{
- long long member_size;
+ unsigned long long member_size;
File_trailer trailer;
done = false;
for( int it = 0; it < files && !done; ++it )
@@ -173,7 +230,7 @@ File_index::File_index( const std::vector< int > & infd_vector,
pos - File_trailer::size ) != File_trailer::size )
{ set_errno_error( "Error reading member trailer: " ); goto error; }
member_size = trailer.member_size();
- if( member_size >= min_member_size && member_size <= pos )
+ if( member_size >= min_member_size && member_size <= (unsigned long long)pos )
for( int ih = 0; ih < files && !done; ++ih )
{
const int hfd = infd_vector[ih];
@@ -185,22 +242,24 @@ File_index::File_index( const std::vector< int > & infd_vector,
}
if( !done )
{
- if( member_vector.empty() ) // maybe trailing data
- { --pos; continue; }
+ if( member_vector.empty() ) { --pos; continue; } // maybe trailing data
set_num_error( "Member size in trailer may be corrupt at pos ", pos - 8 );
break;
}
- if( member_vector.empty() && isize - pos > File_header::size )
+ if( member_vector.empty() && isize > pos )
+ {
+ const int size = std::min( (long long)File_header::size, isize - pos );
for( int i = 0; i < files; ++i )
{
const int infd = infd_vector[i];
- if( seek_read( infd, header.data, File_header::size, pos ) == File_header::size &&
- header.verify_magic() && header.verify_version() )
+ if( seek_read( infd, header.data, size, pos ) == size &&
+ header.verify_prefix( size ) )
{
error_ = "Last member in input file is truncated or corrupt.";
retval_ = 2; goto error;
}
}
+ }
pos -= member_size;
member_vector.push_back( Member( 0, trailer.data_size(),
pos, member_size, 0 ) );