diff options
Diffstat (limited to 'file_index.c')
-rw-r--r-- | file_index.c | 268 |
1 files changed, 268 insertions, 0 deletions
diff --git a/file_index.c b/file_index.c new file mode 100644 index 0000000..e737608 --- /dev/null +++ b/file_index.c @@ -0,0 +1,268 @@ +/* Lunzip - Decompressor for the lzip format + Copyright (C) 2010-2017 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#define _FILE_OFFSET_BITS 64 + +#include <errno.h> +#include <stdbool.h> +#include <stdio.h> +#include <string.h> +#include <stdint.h> +#include <stdlib.h> +#include <unistd.h> + +#include "lzip.h" +#include "file_index.h" + + +static int seek_read( const int fd, uint8_t * const buf, const int size, + const long long pos ) + { + if( lseek( fd, pos, SEEK_SET ) == pos ) + return readblock( fd, buf, size ); + return 0; + } + + +static bool add_error( struct File_index * const fi, const char * const msg ) + { + const int len = strlen( msg ); + void * tmp = resize_buffer( fi->error, fi->error_size + len + 1 ); + if( !tmp ) return false; + fi->error = (char *)tmp; + strncpy( fi->error + fi->error_size, msg, len + 1 ); + fi->error_size += len; + return true; + } + + +static bool push_back_member( struct File_index * const fi, + const long long dp, const long long ds, + const long long mp, const long long ms, + const unsigned dict_size ) + { + struct Member * p; + void * tmp = resize_buffer( fi->member_vector, + ( fi->members + 1 ) * sizeof fi->member_vector[0] ); + if( !tmp ) + { add_error( fi, "Not enough memory." ); fi->retval = 1; return false; } + fi->member_vector = (struct Member *)tmp; + p = &(fi->member_vector[fi->members]); + init_member( p, dp, ds, mp, ms, dict_size ); + ++fi->members; + return true; + } + + +static void Fi_free_member_vector( struct File_index * const fi ) + { + if( fi->member_vector ) + { free( fi->member_vector ); fi->member_vector = 0; } + fi->members = 0; + } + + +static void Fi_reverse_member_vector( struct File_index * const fi ) + { + struct Member tmp; + long i; + for( i = 0; i < fi->members / 2; ++i ) + { + tmp = fi->member_vector[i]; + fi->member_vector[i] = fi->member_vector[fi->members-i-1]; + fi->member_vector[fi->members-i-1] = tmp; + } + } + + +static void Fi_set_errno_error( struct File_index * const fi, + const char * const msg ) + { + add_error( fi, msg ); add_error( fi, strerror( errno ) ); + fi->retval = 1; + } + +static void Fi_set_num_error( struct File_index * const fi, + const char * const msg, unsigned long long num ) + { + char buf[80]; + snprintf( buf, sizeof buf, "%s%llu", msg, num ); + add_error( fi, buf ); + fi->retval = 2; + } + + +/* If successful, push last member and set pos to member header. */ +static bool Fi_skip_trailing_data( struct File_index * const fi, + const int fd, long long * const pos ) + { + enum { block_size = 16384, + buffer_size = block_size + Ft_size - 1 + Fh_size }; + uint8_t buffer[buffer_size]; + int bsize = *pos % block_size; /* total bytes in buffer */ + int search_size, rd_size; + unsigned long long ipos; + int i; + if( bsize <= buffer_size - block_size ) bsize += block_size; + search_size = bsize; /* bytes to search for trailer */ + rd_size = bsize; /* bytes to read from file */ + ipos = *pos - rd_size; /* aligned to block_size */ + if( *pos < min_member_size ) return false; + + while( true ) + { + const uint8_t max_msb = ( ipos + search_size ) >> 56; + if( seek_read( fd, buffer, rd_size, ipos ) != rd_size ) + { Fi_set_errno_error( fi, "Error seeking member trailer: " ); + return false; } + for( i = search_size; i >= Ft_size; --i ) + if( buffer[i-1] <= max_msb ) /* most significant byte of member_size */ + { + File_header header; + File_trailer * trailer = (File_trailer *)( buffer + i - Ft_size ); + const unsigned long long member_size = Ft_get_member_size( *trailer ); + unsigned dictionary_size; + if( member_size == 0 ) + { while( i > Ft_size && buffer[i-9] == 0 ) --i; continue; } + if( member_size < min_member_size || member_size > ipos + i ) + continue; + if( seek_read( fd, header, Fh_size, + ipos + i - member_size ) != Fh_size ) + { Fi_set_errno_error( fi, "Error reading member header: " ); + return false; } + dictionary_size = Fh_get_dictionary_size( header ); + if( !Fh_verify_magic( header ) || !Fh_verify_version( header ) || + !isvalid_ds( dictionary_size ) ) continue; + if( Fh_verify_prefix( buffer + i, bsize - i ) ) + { + add_error( fi, "Last member in input file is truncated or corrupt." ); + fi->retval = 2; return false; + } + *pos = ipos + i - member_size; + return push_back_member( fi, 0, Ft_get_data_size( *trailer ), *pos, + member_size, dictionary_size ); + } + if( ipos <= 0 ) + { Fi_set_num_error( fi, "Member size in trailer is corrupt at pos ", + *pos - 8 ); + return false; } + bsize = buffer_size; + search_size = bsize - Fh_size; + rd_size = block_size; + ipos -= rd_size; + memcpy( buffer + rd_size, buffer, buffer_size - rd_size ); + } + } + + +bool Fi_init( struct File_index * const fi, const int infd, + const bool ignore_trailing ) + { + File_header header; + long long pos; + long i; + fi->member_vector = 0; + fi->error = 0; + fi->isize = lseek( infd, 0, SEEK_END ); + fi->members = 0; + fi->error_size = 0; + fi->retval = 0; + if( fi->isize < 0 ) + { Fi_set_errno_error( fi, "Input file is not seekable: " ); return false; } + if( fi->isize < min_member_size ) + { add_error( fi, "Input file is too short." ); fi->retval = 2; + return false; } + if( fi->isize > INT64_MAX ) + { add_error( fi, "Input file is too long (2^63 bytes or more)." ); + fi->retval = 2; return false; } + + if( seek_read( infd, header, Fh_size, 0 ) != Fh_size ) + { Fi_set_errno_error( fi, "Error reading member header: " ); return false; } + if( !Fh_verify_magic( header ) ) + { add_error( fi, bad_magic_msg ); fi->retval = 2; return false; } + if( !Fh_verify_version( header ) ) + { add_error( fi, bad_version( Fh_version( header ) ) ); fi->retval = 2; + return false; } + if( !isvalid_ds( Fh_get_dictionary_size( header ) ) ) + { add_error( fi, bad_dict_msg ); fi->retval = 2; return false; } + + pos = fi->isize; /* always points to a header or to EOF */ + while( pos >= min_member_size ) + { + File_trailer trailer; + unsigned long long member_size; + unsigned dictionary_size; + if( seek_read( infd, trailer, Ft_size, pos - Ft_size ) != Ft_size ) + { Fi_set_errno_error( fi, "Error reading member trailer: " ); break; } + member_size = Ft_get_member_size( trailer ); + if( member_size < min_member_size || member_size > (unsigned long long)pos ) + { + if( fi->members > 0 ) + Fi_set_num_error( fi, "Member size in trailer is corrupt at pos ", + pos - 8 ); + else if( Fi_skip_trailing_data( fi, infd, &pos ) ) + { if( ignore_trailing ) continue; + add_error( fi, trailing_msg ); fi->retval = 2; return false; } + break; + } + if( seek_read( infd, header, Fh_size, pos - member_size ) != Fh_size ) + { Fi_set_errno_error( fi, "Error reading member header: " ); break; } + dictionary_size = Fh_get_dictionary_size( header ); + if( !Fh_verify_magic( header ) || !Fh_verify_version( header ) || + !isvalid_ds( dictionary_size ) ) + { + if( fi->members > 0 ) + Fi_set_num_error( fi, "Bad header at pos ", pos - member_size ); + else if( Fi_skip_trailing_data( fi, infd, &pos ) ) + { if( ignore_trailing ) continue; + add_error( fi, trailing_msg ); fi->retval = 2; return false; } + break; + } + pos -= member_size; + if( !push_back_member( fi, 0, Ft_get_data_size( trailer ), pos, + member_size, dictionary_size ) ) + return false; + } + if( pos != 0 || fi->members <= 0 ) + { + Fi_free_member_vector( fi ); + if( fi->retval == 0 ) + { add_error( fi, "Can't create file index." ); fi->retval = 2; } + return false; + } + Fi_reverse_member_vector( fi ); + for( i = 0; i < fi->members - 1; ++i ) + { + const long long end = block_end( fi->member_vector[i].dblock ); + if( end < 0 || end > INT64_MAX ) + { + Fi_free_member_vector( fi ); + add_error( fi, "Data in input file is too long (2^63 bytes or more)." ); + fi->retval = 2; return false; + } + fi->member_vector[i+1].dblock.pos = end; + } + return true; + } + + +void Fi_free( struct File_index * const fi ) + { + Fi_free_member_vector( fi ); + if( fi->error ) { free( fi->error ); fi->error = 0; } + fi->error_size = 0; + } |