diff options
Diffstat (limited to '')
-rw-r--r-- | archive_reader.cc | 224 |
1 files changed, 224 insertions, 0 deletions
diff --git a/archive_reader.cc b/archive_reader.cc new file mode 100644 index 0000000..496c33b --- /dev/null +++ b/archive_reader.cc @@ -0,0 +1,224 @@ +/* Tarlz - Archiver with multimember lzip compression + Copyright (C) 2013-2020 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#define _FILE_OFFSET_BITS 64 + +#include <algorithm> +#include <cerrno> +#include <climits> +#include <cstring> +#include <string> +#include <vector> +#include <stdint.h> +#include <unistd.h> +#include <lzlib.h> + +#include "tarlz.h" +#include "lzip_index.h" +#include "archive_reader.h" + + +int Archive_reader_base::parse_records( Extended & extended, + const Tar_header header, + Resizable_buffer & rbuf, + const bool permissive ) + { + const long long edsize = parse_octal( header + size_o, size_l ); + const long long bufsize = round_up( edsize ); + if( edsize <= 0 || edsize >= 1LL << 33 || bufsize >= INT_MAX ) + return 1; // overflow or no extended data + if( !rbuf.resize( bufsize ) ) return 1; // extended records buffer + int retval = read( (uint8_t *)rbuf(), bufsize ); + if( retval == 0 && !extended.parse( rbuf(), edsize, permissive ) ) + retval = 2; + return retval; + } + + +/* Read 'size' uncompressed bytes, decompressing the input if needed. + Return value: 0 = OK, 1 = damaged member, 2 = fatal error. */ +int Archive_reader::read( uint8_t * const buf, const int size ) + { + if( fatal_ ) return err( 2 ); + if( first_read ) // check format + { + first_read = false; + uncompressed_seekable = ad.seekable && !ad.indexed && + ad.lzip_index.file_size() > 3 * header_size; + if( size != header_size ) + internal_error( "size != header_size on first call." ); + const int rd = readblock( ad.infd, buf, size ); + if( rd != size && errno ) + return err( 2, "Error reading archive", errno, rd ); + const Lzip_header & header = (*(const Lzip_header *)buf); + const bool islz = ( rd >= min_member_size && header.verify_magic() && + header.verify_version() && + isvalid_ds( header.dictionary_size() ) ); + const bool istar = ( rd == size && verify_ustar_chksum( buf ) ); + const bool iseof = + ( !islz && !istar && rd == size && block_is_zero( buf, size ) ); + bool maybe_lz = islz; // maybe corrupt tar.lz + if( !islz && !istar && !iseof ) // corrupt or invalid format + { + const bool lz_ext = has_lz_ext( ad.name ); + show_file_error( ad.namep, lz_ext ? posix_lz_msg : posix_msg ); + if( lz_ext && rd >= min_member_size ) maybe_lz = true; + else return err( 1 ); + } + if( !maybe_lz ) // uncompressed + { if( rd == size ) return 0; + return err( 2, "EOF reading archive", 0, rd ); } + uncompressed_seekable = false; // compressed + decoder = LZ_decompress_open(); + if( !decoder || LZ_decompress_errno( decoder ) != LZ_ok ) + return err( 2, mem_msg ); + if( LZ_decompress_write( decoder, buf, rd ) != rd ) + internal_error( "library error (LZ_decompress_write)." ); + const int ret = read( buf, size ); if( ret != 0 ) return ret; + if( verify_ustar_chksum( buf ) || block_is_zero( buf, size ) ) return 0; + return err( 2, islz ? posix_lz_msg : "" ); + } + + if( !decoder ) // uncompressed + { + const int rd = readblock( ad.infd, buf, size ); + if( rd == size ) return 0; else return err( 2, end_msg, 0, rd ); + } + const int ibuf_size = 16384; + uint8_t ibuf[ibuf_size]; + int sz = 0; + while( sz < size ) + { + const int rd = LZ_decompress_read( decoder, buf + sz, size - sz ); + if( rd < 0 ) + { + const unsigned long long old_pos = LZ_decompress_total_in_size( decoder ); + if( LZ_decompress_sync_to_member( decoder ) < 0 ) + internal_error( "library error (LZ_decompress_sync_to_member)." ); + e_skip_ = true; set_error_status( 2 ); + const unsigned long long new_pos = LZ_decompress_total_in_size( decoder ); + // lzlib < 1.8 does not update total_in_size when syncing to member + if( new_pos >= old_pos && new_pos < LLONG_MAX ) + return err( 1, "", 0, sz, true ); + return err( 2, "Skipping to next header failed. " + "Lzlib 1.8 or newer required.", 0, sz ); + } + if( rd == 0 && LZ_decompress_finished( decoder ) == 1 ) + { return err( 2, end_msg, 0, sz ); } + sz += rd; + if( sz < size && !at_eof && LZ_decompress_write_size( decoder ) > 0 ) + { + const int rsize = std::min( ibuf_size, LZ_decompress_write_size( decoder ) ); + const int rd = readblock( ad.infd, ibuf, rsize ); + if( LZ_decompress_write( decoder, ibuf, rd ) != rd ) + internal_error( "library error (LZ_decompress_write)." ); + if( rd < rsize ) + { + at_eof = true; LZ_decompress_finish( decoder ); + if( errno ) return err( 2, "Error reading archive.", errno, sz ); + } + } + } + return 0; + } + + +int Archive_reader::skip_member( const Extended & extended ) + { + long long rest = round_up( extended.file_size() ); // size + padding + if( uncompressed_seekable && lseek( ad.infd, rest, SEEK_CUR ) > 0 ) + return 0; + const int bufsize = 32 * header_size; + uint8_t buf[bufsize]; + while( rest > 0 ) // skip tar member + { + const int rsize = ( rest >= bufsize ) ? bufsize : rest; + const int ret = read( buf, rsize ); + if( ret != 0 ) return ret; + rest -= rsize; + } + return 0; + } + + +void Archive_reader_i::set_member( const long i ) + { + LZ_decompress_reset( decoder ); // prepare for new member + data_pos_ = ad.lzip_index.dblock( i ).pos(); + mdata_end = ad.lzip_index.dblock( i ).end(); + archive_pos = ad.lzip_index.mblock( i ).pos(); + member_id = i; + } + + +/* Read 'size' decompressed bytes from the archive. + Return value: 0 = OK, 1 = damaged member, 2 = fatal error. */ +int Archive_reader_i::read( uint8_t * const buf, const int size ) + { + int sz = 0; + + while( sz < size ) + { + const int rd = LZ_decompress_read( decoder, buf + sz, size - sz ); + if( rd < 0 ) + return err( 1, LZ_strerror( LZ_decompress_errno( decoder ) ) ); + if( rd == 0 && LZ_decompress_finished( decoder ) == 1 ) + return err( 2, end_msg ); + sz += rd; data_pos_ += rd; + if( sz < size && LZ_decompress_write_size( decoder ) > 0 ) + { + const long long ibuf_size = 16384; + uint8_t ibuf[ibuf_size]; + const long long member_end = ad.lzip_index.mblock( member_id ).end(); + const long long rest = ( ( archive_pos < member_end ) ? + member_end : ad.lzip_index.cdata_size() ) - archive_pos; + const int rsize = std::min( LZ_decompress_write_size( decoder ), + (int)std::min( ibuf_size, rest ) ); + if( rsize <= 0 ) LZ_decompress_finish( decoder ); + else + { + const int rd = preadblock( ad.infd, ibuf, rsize, archive_pos ); + if( LZ_decompress_write( decoder, ibuf, rd ) != rd ) + internal_error( "library error (LZ_decompress_write)." ); + archive_pos += rd; + if( rd < rsize ) + { + LZ_decompress_finish( decoder ); + if( errno ) return err( 2, "Error reading archive" ); + } + } + } + } + return 0; + } + + +int Archive_reader_i::skip_member( const Extended & extended ) + { + long long rest = round_up( extended.file_size() ); // size + padding + if( data_pos_ + rest == mdata_end ) { data_pos_ = mdata_end; return 0; } + const int bufsize = 32 * header_size; + uint8_t buf[bufsize]; + while( rest > 0 ) // skip tar member + { + const int rsize = ( rest >= bufsize ) ? bufsize : rest; + const int ret = read( buf, rsize ); + if( ret != 0 ) return ret; + rest -= rsize; + } + return 0; + } |