diff options
Diffstat (limited to 'file_index.cc')
-rw-r--r-- | file_index.cc | 135 |
1 files changed, 97 insertions, 38 deletions
diff --git a/file_index.cc b/file_index.cc index 747c376..f2f81e7 100644 --- a/file_index.cc +++ b/file_index.cc @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for the lzip format - Copyright (C) 2009-2016 Antonio Diaz Diaz. + Copyright (C) 2009-2017 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -15,6 +15,8 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#define _FILE_OFFSET_BITS 64 + #include <algorithm> #include <cerrno> #include <cstdio> @@ -44,17 +46,75 @@ void File_index::set_errno_error( const char * const msg ) retval_ = 1; } -void File_index::set_num_error( const char * const msg1, unsigned long long num, - const char * const msg2 ) +void File_index::set_num_error( const char * const msg, unsigned long long num ) { char buf[80]; - snprintf( buf, sizeof buf, "%s%llu%s", msg1, num, msg2 ); + snprintf( buf, sizeof buf, "%s%llu", msg, num ); error_ = buf; retval_ = 2; } -File_index::File_index( const int infd ) +// If successful, push last member and set pos to member header. +bool File_index::skip_trailing_data( const int fd, const bool ignore_bad_ds, + long long & pos ) + { + enum { block_size = 16384, + buffer_size = block_size + File_trailer::size - 1 + File_header::size }; + uint8_t buffer[buffer_size]; + if( pos < min_member_size ) return false; + int bsize = pos % block_size; // total bytes in buffer + if( bsize <= buffer_size - block_size ) bsize += block_size; + int search_size = bsize; // bytes to search for trailer + int rd_size = bsize; // bytes to read from file + unsigned long long ipos = pos - rd_size; // aligned to block_size + + while( true ) + { + if( seek_read( fd, buffer, rd_size, ipos ) != rd_size ) + { set_errno_error( "Error seeking member trailer: " ); return false; } + const uint8_t max_msb = ( ipos + search_size ) >> 56; + for( int i = search_size; i >= File_trailer::size; --i ) + if( buffer[i-1] <= max_msb ) // most significant byte of member_size + { + File_trailer & trailer = + *(File_trailer *)( buffer + i - File_trailer::size ); + const unsigned long long member_size = trailer.member_size(); + if( member_size == 0 ) + { while( i > File_trailer::size && buffer[i-9] == 0 ) --i; continue; } + if( member_size < min_member_size || member_size > ipos + i ) + continue; + File_header header; + if( seek_read( fd, header.data, File_header::size, + ipos + i - member_size ) != File_header::size ) + { set_errno_error( "Error reading member header: " ); return false; } + const unsigned dictionary_size = header.dictionary_size(); + if( !header.verify_magic() || !header.verify_version() || + ( !ignore_bad_ds && !isvalid_ds( dictionary_size ) ) ) continue; + if( (*(File_header *)( buffer + i )).verify_prefix( bsize - i ) ) + { + error_ = "Last member in input file is truncated or corrupt."; + retval_ = 2; return false; + } + pos = ipos + i - member_size; + member_vector.push_back( Member( 0, trailer.data_size(), pos, + member_size, dictionary_size ) ); + return true; + } + if( ipos <= 0 ) + { set_num_error( "Member size in trailer is corrupt at pos ", pos - 8 ); + return false; } + bsize = buffer_size; + search_size = bsize - File_header::size; + rd_size = block_size; + ipos -= rd_size; + std::memcpy( buffer + rd_size, buffer, buffer_size - rd_size ); + } + } + + +File_index::File_index( const int infd, const bool ignore_bad_ds, + const bool ignore_trailing ) : isize( lseek( infd, 0, SEEK_END ) ), retval_( 0 ) { if( isize < 0 ) @@ -69,48 +129,46 @@ File_index::File_index( const int infd ) if( seek_read( infd, header.data, File_header::size, 0 ) != File_header::size ) { set_errno_error( "Error reading member header: " ); return; } if( !header.verify_magic() ) - { error_ = "Bad magic number (file not in lzip format)."; - retval_ = 2; return; } + { error_ = bad_magic_msg; retval_ = 2; return; } if( !header.verify_version() ) - { set_num_error( "Version ", header.version(), - " member format not supported." ); return; } + { error_ = bad_version( header.version() ); retval_ = 2; return; } + if( !ignore_bad_ds && !isvalid_ds( header.dictionary_size() ) ) + { error_ = bad_dict_msg; retval_ = 2; return; } - long long pos = isize; // always points to a header or to EOF + long long pos = isize; // always points to a header or to EOF while( pos >= min_member_size ) { File_trailer trailer; if( seek_read( infd, trailer.data, File_trailer::size, pos - File_trailer::size ) != File_trailer::size ) { set_errno_error( "Error reading member trailer: " ); break; } - const long long member_size = trailer.member_size(); - if( member_size < min_member_size || member_size > pos ) + const unsigned long long member_size = trailer.member_size(); + if( member_size < min_member_size || member_size > (unsigned long long)pos ) { - if( member_vector.empty() ) - { --pos; continue; } // maybe trailing data - set_num_error( "Member size in trailer is corrupt at pos ", pos - 8 ); + if( !member_vector.empty() ) + set_num_error( "Member size in trailer is corrupt at pos ", pos - 8 ); + else if( skip_trailing_data( infd, ignore_bad_ds, pos ) ) + { if( ignore_trailing ) continue; + error_ = trailing_msg; retval_ = 2; return; } break; } if( seek_read( infd, header.data, File_header::size, pos - member_size ) != File_header::size ) { set_errno_error( "Error reading member header: " ); break; } - if( !header.verify_magic() || !header.verify_version() ) - { - if( member_vector.empty() ) - { --pos; continue; } // maybe trailing data - set_num_error( "Bad header at pos ", pos - member_size ); - break; - } const unsigned dictionary_size = header.dictionary_size(); - if( member_vector.empty() && isize - pos > File_header::size && - seek_read( infd, header.data, File_header::size, pos ) == File_header::size && - header.verify_magic() && header.verify_version() ) + if( !header.verify_magic() || !header.verify_version() || + ( !ignore_bad_ds && !isvalid_ds( dictionary_size ) ) ) { - error_ = "Last member in input file is truncated or corrupt."; - retval_ = 2; break; + if( !member_vector.empty() ) + set_num_error( "Bad header at pos ", pos - member_size ); + else if( skip_trailing_data( infd, ignore_bad_ds, pos ) ) + { if( ignore_trailing ) continue; + error_ = trailing_msg; retval_ = 2; return; } + break; } pos -= member_size; - member_vector.push_back( Member( 0, trailer.data_size(), - pos, member_size, dictionary_size ) ); + member_vector.push_back( Member( 0, trailer.data_size(), pos, + member_size, dictionary_size ) ); } if( pos != 0 || member_vector.empty() ) { @@ -157,13 +215,12 @@ File_index::File_index( const std::vector< int > & infd_vector, if( header.verify_magic() && header.verify_version() ) done = true; } if( !done ) - { error_ = "Bad magic number (file not in lzip format)."; - retval_ = 2; return; } + { error_ = bad_magic_msg; retval_ = 2; return; } long long pos = isize; // always points to a header or to EOF while( pos >= min_member_size ) { - long long member_size; + unsigned long long member_size; File_trailer trailer; done = false; for( int it = 0; it < files && !done; ++it ) @@ -173,7 +230,7 @@ File_index::File_index( const std::vector< int > & infd_vector, pos - File_trailer::size ) != File_trailer::size ) { set_errno_error( "Error reading member trailer: " ); goto error; } member_size = trailer.member_size(); - if( member_size >= min_member_size && member_size <= pos ) + if( member_size >= min_member_size && member_size <= (unsigned long long)pos ) for( int ih = 0; ih < files && !done; ++ih ) { const int hfd = infd_vector[ih]; @@ -185,22 +242,24 @@ File_index::File_index( const std::vector< int > & infd_vector, } if( !done ) { - if( member_vector.empty() ) // maybe trailing data - { --pos; continue; } + if( member_vector.empty() ) { --pos; continue; } // maybe trailing data set_num_error( "Member size in trailer may be corrupt at pos ", pos - 8 ); break; } - if( member_vector.empty() && isize - pos > File_header::size ) + if( member_vector.empty() && isize > pos ) + { + const int size = std::min( (long long)File_header::size, isize - pos ); for( int i = 0; i < files; ++i ) { const int infd = infd_vector[i]; - if( seek_read( infd, header.data, File_header::size, pos ) == File_header::size && - header.verify_magic() && header.verify_version() ) + if( seek_read( infd, header.data, size, pos ) == size && + header.verify_prefix( size ) ) { error_ = "Last member in input file is truncated or corrupt."; retval_ = 2; goto error; } } + } pos -= member_size; member_vector.push_back( Member( 0, trailer.data_size(), pos, member_size, 0 ) ); |