diff options
Diffstat (limited to '')
-rw-r--r-- | lzip_index.cc | 110 |
1 files changed, 65 insertions, 45 deletions
diff --git a/lzip_index.cc b/lzip_index.cc index d9c810c..fe79f5b 100644 --- a/lzip_index.cc +++ b/lzip_index.cc @@ -1,18 +1,18 @@ -/* Plzip - Massively parallel implementation of lzip - Copyright (C) 2009-2019 Antonio Diaz Diaz. +/* Plzip - Massively parallel implementation of lzip + Copyright (C) 2009-2021 Antonio Diaz Diaz. - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 2 of the License, or - (at your option) any later version. + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ #define _FILE_OFFSET_BITS 64 @@ -23,7 +23,6 @@ #include <cstring> #include <string> #include <vector> -#include <pthread.h> #include <stdint.h> #include <unistd.h> @@ -44,6 +43,19 @@ int seek_read( const int fd, uint8_t * const buf, const int size, } // end namespace +bool Lzip_index::check_header_error( const Lzip_header & header, + const bool first ) + { + if( !header.verify_magic() ) + { error_ = bad_magic_msg; retval_ = 2; if( first ) bad_magic_ = true; + return true; } + if( !header.verify_version() ) + { error_ = bad_version( header.version() ); retval_ = 2; return true; } + if( !isvalid_ds( header.dictionary_size() ) ) + { error_ = bad_dict_msg; retval_ = 2; return true; } + return false; + } + void Lzip_index::set_errno_error( const char * const msg ) { error_ = msg; error_ += std::strerror( errno ); @@ -59,14 +71,24 @@ void Lzip_index::set_num_error( const char * const msg, unsigned long long num ) } +bool Lzip_index::read_header( const int fd, Lzip_header & header, + const long long pos ) + { + if( seek_read( fd, header.data, Lzip_header::size, pos ) != Lzip_header::size ) + { set_errno_error( "Error reading member header: " ); return false; } + return true; + } + + // If successful, push last member and set pos to member header. -bool Lzip_index::skip_trailing_data( const int fd, long long & pos, - const bool ignore_trailing, const bool loose_trailing ) +bool Lzip_index::skip_trailing_data( const int fd, unsigned long long & pos, + const bool ignore_trailing, + const bool loose_trailing ) { + if( pos < min_member_size ) return false; enum { block_size = 16384, buffer_size = block_size + Lzip_trailer::size - 1 + Lzip_header::size }; uint8_t buffer[buffer_size]; - if( pos < min_member_size ) return false; int bsize = pos % block_size; // total bytes in buffer if( bsize <= buffer_size - block_size ) bsize += block_size; int search_size = bsize; // bytes to search for trailer @@ -89,26 +111,30 @@ bool Lzip_index::skip_trailing_data( const int fd, long long & pos, if( member_size > ipos + i || !trailer.verify_consistency() ) continue; Lzip_header header; - if( seek_read( fd, header.data, Lzip_header::size, - ipos + i - member_size ) != Lzip_header::size ) - { set_errno_error( "Error reading member header: " ); return false; } - const unsigned dictionary_size = header.dictionary_size(); - if( !header.verify_magic() || !header.verify_version() || - !isvalid_ds( dictionary_size ) ) continue; - if( (*(const Lzip_header *)( buffer + i )).verify_prefix( bsize - i ) ) - { error_ = "Last member in input file is truncated or corrupt."; - retval_ = 2; return false; } - if( !loose_trailing && bsize - i >= Lzip_header::size && - (*(const Lzip_header *)( buffer + i )).verify_corrupt() ) + if( !read_header( fd, header, ipos + i - member_size ) ) return false; + if( !header.verify() ) continue; + const Lzip_header & header2 = *(const Lzip_header *)( buffer + i ); + const bool full_h2 = bsize - i >= Lzip_header::size; + if( header2.verify_prefix( bsize - i ) ) // last member + { + if( !full_h2 ) error_ = "Last member in input file is truncated."; + else if( !check_header_error( header2, false ) ) + error_ = "Last member in input file is truncated or corrupt."; + retval_ = 2; return false; + } + if( !loose_trailing && full_h2 && header2.verify_corrupt() ) { error_ = corrupt_mm_msg; retval_ = 2; return false; } if( !ignore_trailing ) { error_ = trailing_msg; retval_ = 2; return false; } pos = ipos + i - member_size; + const unsigned dictionary_size = header.dictionary_size(); member_vector.push_back( Member( 0, trailer.data_size(), pos, member_size, dictionary_size ) ); + if( dictionary_size_ < dictionary_size ) + dictionary_size_ = dictionary_size; return true; } - if( ipos <= 0 ) + if( ipos == 0 ) { set_num_error( "Bad trailer at pos ", pos - Lzip_trailer::size ); return false; } bsize = buffer_size; @@ -122,7 +148,8 @@ bool Lzip_index::skip_trailing_data( const int fd, long long & pos, Lzip_index::Lzip_index( const int infd, const bool ignore_trailing, const bool loose_trailing ) - : insize( lseek( infd, 0, SEEK_END ) ), retval_( 0 ) + : insize( lseek( infd, 0, SEEK_END ) ), retval_( 0 ), dictionary_size_( 0 ), + bad_magic_( false ) { if( insize < 0 ) { set_errno_error( "Input file is not seekable: " ); return; } @@ -133,16 +160,10 @@ Lzip_index::Lzip_index( const int infd, const bool ignore_trailing, retval_ = 2; return; } Lzip_header header; - if( seek_read( infd, header.data, Lzip_header::size, 0 ) != Lzip_header::size ) - { set_errno_error( "Error reading member header: " ); return; } - if( !header.verify_magic() ) - { error_ = bad_magic_msg; retval_ = 2; return; } - if( !header.verify_version() ) - { error_ = bad_version( header.version() ); retval_ = 2; return; } - if( !isvalid_ds( header.dictionary_size() ) ) - { error_ = bad_dict_msg; retval_ = 2; return; } + if( !read_header( infd, header, 0 ) ) return; + if( check_header_error( header, true ) ) return; - long long pos = insize; // always points to a header or to EOF + unsigned long long pos = insize; // always points to a header or to EOF while( pos >= min_member_size ) { Lzip_trailer trailer; @@ -150,7 +171,7 @@ Lzip_index::Lzip_index( const int infd, const bool ignore_trailing, pos - Lzip_trailer::size ) != Lzip_trailer::size ) { set_errno_error( "Error reading member trailer: " ); break; } const unsigned long long member_size = trailer.member_size(); - if( member_size > (unsigned long long)pos || !trailer.verify_consistency() ) + if( member_size > pos || !trailer.verify_consistency() ) // bad trailer { if( member_vector.empty() ) { if( skip_trailing_data( infd, pos, ignore_trailing, loose_trailing ) ) @@ -158,12 +179,8 @@ Lzip_index::Lzip_index( const int infd, const bool ignore_trailing, set_num_error( "Bad trailer at pos ", pos - Lzip_trailer::size ); break; } - if( seek_read( infd, header.data, Lzip_header::size, - pos - member_size ) != Lzip_header::size ) - { set_errno_error( "Error reading member header: " ); break; } - const unsigned dictionary_size = header.dictionary_size(); - if( !header.verify_magic() || !header.verify_version() || - !isvalid_ds( dictionary_size ) ) + if( !read_header( infd, header, pos - member_size ) ) break; + if( !header.verify() ) // bad header { if( member_vector.empty() ) { if( skip_trailing_data( infd, pos, ignore_trailing, loose_trailing ) ) @@ -172,8 +189,11 @@ Lzip_index::Lzip_index( const int infd, const bool ignore_trailing, break; } pos -= member_size; + const unsigned dictionary_size = header.dictionary_size(); member_vector.push_back( Member( 0, trailer.data_size(), pos, member_size, dictionary_size ) ); + if( dictionary_size_ < dictionary_size ) + dictionary_size_ = dictionary_size; } if( pos != 0 || member_vector.empty() ) { |