summaryrefslogtreecommitdiffstats
path: root/lzip_index.cc
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--lzip_index.cc110
1 files changed, 65 insertions, 45 deletions
diff --git a/lzip_index.cc b/lzip_index.cc
index d9c810c..fe79f5b 100644
--- a/lzip_index.cc
+++ b/lzip_index.cc
@@ -1,18 +1,18 @@
-/* Plzip - Massively parallel implementation of lzip
- Copyright (C) 2009-2019 Antonio Diaz Diaz.
+/* Plzip - Massively parallel implementation of lzip
+ Copyright (C) 2009-2021 Antonio Diaz Diaz.
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 2 of the License, or
- (at your option) any later version.
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define _FILE_OFFSET_BITS 64
@@ -23,7 +23,6 @@
#include <cstring>
#include <string>
#include <vector>
-#include <pthread.h>
#include <stdint.h>
#include <unistd.h>
@@ -44,6 +43,19 @@ int seek_read( const int fd, uint8_t * const buf, const int size,
} // end namespace
+bool Lzip_index::check_header_error( const Lzip_header & header,
+ const bool first )
+ {
+ if( !header.verify_magic() )
+ { error_ = bad_magic_msg; retval_ = 2; if( first ) bad_magic_ = true;
+ return true; }
+ if( !header.verify_version() )
+ { error_ = bad_version( header.version() ); retval_ = 2; return true; }
+ if( !isvalid_ds( header.dictionary_size() ) )
+ { error_ = bad_dict_msg; retval_ = 2; return true; }
+ return false;
+ }
+
void Lzip_index::set_errno_error( const char * const msg )
{
error_ = msg; error_ += std::strerror( errno );
@@ -59,14 +71,24 @@ void Lzip_index::set_num_error( const char * const msg, unsigned long long num )
}
+bool Lzip_index::read_header( const int fd, Lzip_header & header,
+ const long long pos )
+ {
+ if( seek_read( fd, header.data, Lzip_header::size, pos ) != Lzip_header::size )
+ { set_errno_error( "Error reading member header: " ); return false; }
+ return true;
+ }
+
+
// If successful, push last member and set pos to member header.
-bool Lzip_index::skip_trailing_data( const int fd, long long & pos,
- const bool ignore_trailing, const bool loose_trailing )
+bool Lzip_index::skip_trailing_data( const int fd, unsigned long long & pos,
+ const bool ignore_trailing,
+ const bool loose_trailing )
{
+ if( pos < min_member_size ) return false;
enum { block_size = 16384,
buffer_size = block_size + Lzip_trailer::size - 1 + Lzip_header::size };
uint8_t buffer[buffer_size];
- if( pos < min_member_size ) return false;
int bsize = pos % block_size; // total bytes in buffer
if( bsize <= buffer_size - block_size ) bsize += block_size;
int search_size = bsize; // bytes to search for trailer
@@ -89,26 +111,30 @@ bool Lzip_index::skip_trailing_data( const int fd, long long & pos,
if( member_size > ipos + i || !trailer.verify_consistency() )
continue;
Lzip_header header;
- if( seek_read( fd, header.data, Lzip_header::size,
- ipos + i - member_size ) != Lzip_header::size )
- { set_errno_error( "Error reading member header: " ); return false; }
- const unsigned dictionary_size = header.dictionary_size();
- if( !header.verify_magic() || !header.verify_version() ||
- !isvalid_ds( dictionary_size ) ) continue;
- if( (*(const Lzip_header *)( buffer + i )).verify_prefix( bsize - i ) )
- { error_ = "Last member in input file is truncated or corrupt.";
- retval_ = 2; return false; }
- if( !loose_trailing && bsize - i >= Lzip_header::size &&
- (*(const Lzip_header *)( buffer + i )).verify_corrupt() )
+ if( !read_header( fd, header, ipos + i - member_size ) ) return false;
+ if( !header.verify() ) continue;
+ const Lzip_header & header2 = *(const Lzip_header *)( buffer + i );
+ const bool full_h2 = bsize - i >= Lzip_header::size;
+ if( header2.verify_prefix( bsize - i ) ) // last member
+ {
+ if( !full_h2 ) error_ = "Last member in input file is truncated.";
+ else if( !check_header_error( header2, false ) )
+ error_ = "Last member in input file is truncated or corrupt.";
+ retval_ = 2; return false;
+ }
+ if( !loose_trailing && full_h2 && header2.verify_corrupt() )
{ error_ = corrupt_mm_msg; retval_ = 2; return false; }
if( !ignore_trailing )
{ error_ = trailing_msg; retval_ = 2; return false; }
pos = ipos + i - member_size;
+ const unsigned dictionary_size = header.dictionary_size();
member_vector.push_back( Member( 0, trailer.data_size(), pos,
member_size, dictionary_size ) );
+ if( dictionary_size_ < dictionary_size )
+ dictionary_size_ = dictionary_size;
return true;
}
- if( ipos <= 0 )
+ if( ipos == 0 )
{ set_num_error( "Bad trailer at pos ", pos - Lzip_trailer::size );
return false; }
bsize = buffer_size;
@@ -122,7 +148,8 @@ bool Lzip_index::skip_trailing_data( const int fd, long long & pos,
Lzip_index::Lzip_index( const int infd, const bool ignore_trailing,
const bool loose_trailing )
- : insize( lseek( infd, 0, SEEK_END ) ), retval_( 0 )
+ : insize( lseek( infd, 0, SEEK_END ) ), retval_( 0 ), dictionary_size_( 0 ),
+ bad_magic_( false )
{
if( insize < 0 )
{ set_errno_error( "Input file is not seekable: " ); return; }
@@ -133,16 +160,10 @@ Lzip_index::Lzip_index( const int infd, const bool ignore_trailing,
retval_ = 2; return; }
Lzip_header header;
- if( seek_read( infd, header.data, Lzip_header::size, 0 ) != Lzip_header::size )
- { set_errno_error( "Error reading member header: " ); return; }
- if( !header.verify_magic() )
- { error_ = bad_magic_msg; retval_ = 2; return; }
- if( !header.verify_version() )
- { error_ = bad_version( header.version() ); retval_ = 2; return; }
- if( !isvalid_ds( header.dictionary_size() ) )
- { error_ = bad_dict_msg; retval_ = 2; return; }
+ if( !read_header( infd, header, 0 ) ) return;
+ if( check_header_error( header, true ) ) return;
- long long pos = insize; // always points to a header or to EOF
+ unsigned long long pos = insize; // always points to a header or to EOF
while( pos >= min_member_size )
{
Lzip_trailer trailer;
@@ -150,7 +171,7 @@ Lzip_index::Lzip_index( const int infd, const bool ignore_trailing,
pos - Lzip_trailer::size ) != Lzip_trailer::size )
{ set_errno_error( "Error reading member trailer: " ); break; }
const unsigned long long member_size = trailer.member_size();
- if( member_size > (unsigned long long)pos || !trailer.verify_consistency() )
+ if( member_size > pos || !trailer.verify_consistency() ) // bad trailer
{
if( member_vector.empty() )
{ if( skip_trailing_data( infd, pos, ignore_trailing, loose_trailing ) )
@@ -158,12 +179,8 @@ Lzip_index::Lzip_index( const int infd, const bool ignore_trailing,
set_num_error( "Bad trailer at pos ", pos - Lzip_trailer::size );
break;
}
- if( seek_read( infd, header.data, Lzip_header::size,
- pos - member_size ) != Lzip_header::size )
- { set_errno_error( "Error reading member header: " ); break; }
- const unsigned dictionary_size = header.dictionary_size();
- if( !header.verify_magic() || !header.verify_version() ||
- !isvalid_ds( dictionary_size ) )
+ if( !read_header( infd, header, pos - member_size ) ) break;
+ if( !header.verify() ) // bad header
{
if( member_vector.empty() )
{ if( skip_trailing_data( infd, pos, ignore_trailing, loose_trailing ) )
@@ -172,8 +189,11 @@ Lzip_index::Lzip_index( const int infd, const bool ignore_trailing,
break;
}
pos -= member_size;
+ const unsigned dictionary_size = header.dictionary_size();
member_vector.push_back( Member( 0, trailer.data_size(), pos,
member_size, dictionary_size ) );
+ if( dictionary_size_ < dictionary_size )
+ dictionary_size_ = dictionary_size;
}
if( pos != 0 || member_vector.empty() )
{