From 40096590262a76e88eebba97605fade30029428a Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 May 2017 17:54:46 +0200 Subject: Adding upstream version 1.6. Signed-off-by: Daniel Baumann --- file_index.cc | 130 ++++++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 91 insertions(+), 39 deletions(-) (limited to 'file_index.cc') diff --git a/file_index.cc b/file_index.cc index 0e48833..581b516 100644 --- a/file_index.cc +++ b/file_index.cc @@ -1,5 +1,5 @@ /* Plzip - Parallel compressor compatible with lzip - Copyright (C) 2009-2016 Antonio Diaz Diaz. + Copyright (C) 2009-2017 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -15,6 +15,8 @@ along with this program. If not, see . */ +#define _FILE_OFFSET_BITS 64 + #include #include #include @@ -29,6 +31,8 @@ #include "file_index.h" +namespace { + int seek_read( const int fd, uint8_t * const buf, const int size, const long long pos ) { @@ -37,6 +41,8 @@ int seek_read( const int fd, uint8_t * const buf, const int size, return 0; } +} // end namespace + void File_index::set_errno_error( const char * const msg ) { @@ -44,20 +50,75 @@ void File_index::set_errno_error( const char * const msg ) retval_ = 1; } -void File_index::set_num_error( const char * const msg1, unsigned long long num, - const char * const msg2 ) +void File_index::set_num_error( const char * const msg, unsigned long long num ) { char buf[80]; - snprintf( buf, sizeof buf, "%s%llu%s", msg1, num, msg2 ); + snprintf( buf, sizeof buf, "%s%llu", msg, num ); error_ = buf; - retval_ = member_vector.empty() ? 1 : 2; // maybe trailing data + retval_ = 2; } -File_index::File_index( const int infd, const bool ignore_garbage ) - : retval_( 0 ) +// If successful, push last member and set pos to member header. +bool File_index::skip_trailing_data( const int fd, long long & pos ) + { + enum { block_size = 16384, + buffer_size = block_size + File_trailer::size - 1 + File_header::size }; + uint8_t buffer[buffer_size]; + if( pos < min_member_size ) return false; + int bsize = pos % block_size; // total bytes in buffer + if( bsize <= buffer_size - block_size ) bsize += block_size; + int search_size = bsize; // bytes to search for trailer + int rd_size = bsize; // bytes to read from file + unsigned long long ipos = pos - rd_size; // aligned to block_size + + while( true ) + { + if( seek_read( fd, buffer, rd_size, ipos ) != rd_size ) + { set_errno_error( "Error seeking member trailer: " ); return false; } + const uint8_t max_msb = ( ipos + search_size ) >> 56; + for( int i = search_size; i >= File_trailer::size; --i ) + if( buffer[i-1] <= max_msb ) // most significant byte of member_size + { + File_trailer & trailer = + *(File_trailer *)( buffer + i - File_trailer::size ); + const unsigned long long member_size = trailer.member_size(); + if( member_size == 0 ) + { while( i > File_trailer::size && buffer[i-9] == 0 ) --i; continue; } + if( member_size < min_member_size || member_size > ipos + i ) + continue; + File_header header; + if( seek_read( fd, header.data, File_header::size, + ipos + i - member_size ) != File_header::size ) + { set_errno_error( "Error reading member header: " ); return false; } + const unsigned dictionary_size = header.dictionary_size(); + if( !header.verify_magic() || !header.verify_version() || + !isvalid_ds( dictionary_size ) ) continue; + if( (*(File_header *)( buffer + i )).verify_prefix( bsize - i ) ) + { + error_ = "Last member in input file is truncated or corrupt."; + retval_ = 2; return false; + } + pos = ipos + i - member_size; + member_vector.push_back( Member( 0, trailer.data_size(), pos, + member_size, dictionary_size ) ); + return true; + } + if( ipos <= 0 ) + { set_num_error( "Member size in trailer is corrupt at pos ", pos - 8 ); + return false; } + bsize = buffer_size; + search_size = bsize - File_header::size; + rd_size = block_size; + ipos -= rd_size; + std::memcpy( buffer + rd_size, buffer, buffer_size - rd_size ); + } + } + + +File_index::File_index( const int infd, const bool ignore_trailing ) + : isize( lseek( infd, 0, SEEK_END ) ), retval_( 0 ) { - const long long isize = lseek( infd, 0, SEEK_END ); if( isize < 0 ) { set_errno_error( "Input file is not seekable: " ); return; } if( isize < min_member_size ) @@ -70,55 +131,46 @@ File_index::File_index( const int infd, const bool ignore_garbage ) if( seek_read( infd, header.data, File_header::size, 0 ) != File_header::size ) { set_errno_error( "Error reading member header: " ); return; } if( !header.verify_magic() ) - { error_ = "Bad magic number (file not in lzip format)."; - retval_ = 2; return; } + { error_ = bad_magic_msg; retval_ = 2; return; } if( !header.verify_version() ) - { set_num_error( "Version ", header.version(), - " member format not supported." ); retval_ = 2; return; } + { error_ = bad_version( header.version() ); retval_ = 2; return; } + if( !isvalid_ds( header.dictionary_size() ) ) + { error_ = bad_dict_msg; retval_ = 2; return; } - long long pos = isize; // always points to a header or to EOF - const long long max_garbage = 1024; + long long pos = isize; // always points to a header or to EOF while( pos >= min_member_size ) { File_trailer trailer; if( seek_read( infd, trailer.data, File_trailer::size, pos - File_trailer::size ) != File_trailer::size ) { set_errno_error( "Error reading member trailer: " ); break; } - const long long member_size = trailer.member_size(); - if( member_size < min_member_size || member_size > pos ) + const unsigned long long member_size = trailer.member_size(); + if( member_size < min_member_size || member_size > (unsigned long long)pos ) { - if( member_vector.empty() && isize - pos < max_garbage ) - { - if( ignore_garbage ) { --pos; continue; } // maybe trailing data - error_ = "Trailing data not allowed."; retval_ = 2; return; - } - set_num_error( "Member size in trailer is corrupt at pos ", pos - 8 ); + if( !member_vector.empty() ) + set_num_error( "Member size in trailer is corrupt at pos ", pos - 8 ); + else if( skip_trailing_data( infd, pos ) ) + { if( ignore_trailing ) continue; + error_ = trailing_msg; retval_ = 2; return; } break; } if( seek_read( infd, header.data, File_header::size, pos - member_size ) != File_header::size ) { set_errno_error( "Error reading member header: " ); break; } - if( !header.verify_magic() || !header.verify_version() ) - { - if( member_vector.empty() && isize - pos < max_garbage ) - { - if( ignore_garbage ) { --pos; continue; } // maybe trailing data - error_ = "Trailing data not allowed."; retval_ = 2; return; - } - set_num_error( "Bad header at pos ", pos - member_size ); - break; - } const unsigned dictionary_size = header.dictionary_size(); - if( member_vector.empty() && isize - pos > File_header::size && - seek_read( infd, header.data, File_header::size, pos ) == File_header::size && - header.verify_magic() && header.verify_version() ) + if( !header.verify_magic() || !header.verify_version() || + !isvalid_ds( dictionary_size ) ) { - error_ = "Last member in input file is truncated or corrupt."; - retval_ = 2; break; + if( !member_vector.empty() ) + set_num_error( "Bad header at pos ", pos - member_size ); + else if( skip_trailing_data( infd, pos ) ) + { if( ignore_trailing ) continue; + error_ = trailing_msg; retval_ = 2; return; } + break; } pos -= member_size; - member_vector.push_back( Member( 0, trailer.data_size(), - pos, member_size, dictionary_size ) ); + member_vector.push_back( Member( 0, trailer.data_size(), pos, + member_size, dictionary_size ) ); } if( pos != 0 || member_vector.empty() ) { -- cgit v1.2.3