From cb1387c92038634c063ee06a24e249b87525f519 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Tue, 23 Jan 2024 06:08:19 +0100 Subject: Merging upstream version 0.25. Signed-off-by: Daniel Baumann --- extended.cc | 141 +++++++++++++++++++++++++++++++----------------------------- 1 file changed, 74 insertions(+), 67 deletions(-) (limited to 'extended.cc') diff --git a/extended.cc b/extended.cc index f05d15f..0dfba9b 100644 --- a/extended.cc +++ b/extended.cc @@ -1,5 +1,5 @@ /* Tarlz - Archiver with multimember lzip compression - Copyright (C) 2013-2022 Antonio Diaz Diaz. + Copyright (C) 2013-2024 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,7 +20,6 @@ #include #include #include -#include #include "tarlz.h" @@ -30,34 +29,31 @@ const CRC32 crc32c( true ); namespace { -unsigned long long record_size( const unsigned keyword_size, - const unsigned long value_size ) +unsigned record_size( const unsigned keyword_size, const unsigned value_size ) { /* length + ' ' + keyword + '=' + value + '\n' minimize length; prefer "99<97_bytes>" to "100<97_bytes>" */ - unsigned long long size = 1 + keyword_size + 1 + value_size + 1; + unsigned size = 1 + keyword_size + 1 + value_size + 1; size += decimal_digits( decimal_digits( size ) + size ); return size; } -unsigned long long parse_decimal( const char * const ptr, - const char ** const tailp, - const unsigned long long size ) +long long parse_decimal( const char * const ptr, const char ** const tailp, + const int size, const unsigned long long limit = LLONG_MAX ) { unsigned long long result = 0; - unsigned long long i = 0; + int i = 0; while( i < size && std::isspace( (unsigned char)ptr[i] ) ) ++i; - if( !std::isdigit( (unsigned char)ptr[i] ) ) - { if( tailp ) *tailp = ptr; return 0; } + if( !std::isdigit( (unsigned char)ptr[i] ) ) { *tailp = ptr; return -1; } for( ; i < size && std::isdigit( (unsigned char)ptr[i] ); ++i ) { const unsigned long long prev = result; result *= 10; result += ptr[i] - '0'; - if( result < prev || result > LLONG_MAX ) // overflow - { if( tailp ) *tailp = ptr; return 0; } + if( result < prev || result > limit || result > LLONG_MAX ) // overflow + { *tailp = ptr; return -1; } } - if( tailp ) *tailp = ptr + i; + *tailp = ptr + i; return result; } @@ -77,7 +73,7 @@ uint32_t parse_record_crc( const char * const ptr ) } -unsigned char xdigit( const unsigned value ) +unsigned char xdigit( const unsigned value ) // hex digit for 'value' { if( value <= 9 ) return '0' + value; if( value <= 15 ) return 'A' + value - 10; @@ -90,27 +86,26 @@ void print_hex( char * const buf, int size, unsigned long long num ) void print_decimal( char * const buf, int size, unsigned long long num ) { while( --size >= 0 ) { buf[size] = num % 10 + '0'; num /= 10; } } -unsigned long long print_size_keyword( char * const buf, - const unsigned long long size, const char * keyword ) +int print_size_keyword( char * const buf, const int size, const char * keyword ) { // "size keyword=value\n" - unsigned long long pos = decimal_digits( size ); + int pos = decimal_digits( size ); print_decimal( buf, pos, size ); buf[pos++] = ' '; while( *keyword ) { buf[pos++] = *keyword; ++keyword; } buf[pos++] = '='; return pos; } -bool print_record( char * const buf, const unsigned long long size, - const char * keyword, const std::string & value ) +bool print_record( char * const buf, const int size, const char * keyword, + const std::string & value ) { - unsigned long long pos = print_size_keyword( buf, size, keyword ); + int pos = print_size_keyword( buf, size, keyword ); std::memcpy( buf + pos, value.c_str(), value.size() ); pos += value.size(); buf[pos++] = '\n'; return pos == size; } -bool print_record( char * const buf, const int size, - const char * keyword, const unsigned long long value ) +bool print_record( char * const buf, const int size, const char * keyword, + const unsigned long long value ) { int pos = print_size_keyword( buf, size, keyword ); const int vd = decimal_digits( value ); @@ -118,8 +113,8 @@ bool print_record( char * const buf, const int size, return pos == size; } -bool print_record( char * const buf, const int size, - const char * keyword, const Etime & value ) +bool print_record( char * const buf, const int size, const char * keyword, + const Etime & value ) { int pos = print_size_keyword( buf, size, keyword ); pos += value.print( buf + pos ); buf[pos++] = '\n'; @@ -154,12 +149,12 @@ unsigned Etime::print( char * const buf ) const } bool Etime::parse( const char * const ptr, const char ** const tailp, - const long long size ) + const int size ) { char * tail; errno = 0; long long s = strtoll( ptr, &tail, 10 ); - if( tail == ptr || errno || + if( tail == ptr || tail - ptr > size || errno || ( *tail != 0 && *tail != '\n' && *tail != '.' ) ) return false; int ns = 0; if( *tail == '.' ) // parse nanoseconds and any extra digits @@ -182,6 +177,8 @@ const std::string Extended::crc_record( "22 GNU.crc32=00000000\n" ); void Extended::calculate_sizes() const { + if( linkpath_.size() > max_edata_size || path_.size() > max_edata_size ) + { full_size_ = -3; return; } linkpath_recsize_ = linkpath_.size() ? record_size( 8, linkpath_.size() ) : 0; path_recsize_ = path_.size() ? record_size( 4, path_.size() ) : 0; file_size_recsize_ = @@ -192,19 +189,21 @@ void Extended::calculate_sizes() const atime_.out_of_ustar_range() ? record_size( 5, atime_.decimal_size() ) : 0; mtime_recsize_ = mtime_.out_of_ustar_range() ? record_size( 5, mtime_.decimal_size() ) : 0; - edsize_ = linkpath_recsize_ + path_recsize_ + file_size_recsize_ + - uid_recsize_ + gid_recsize_ + atime_recsize_ + mtime_recsize_ + - crc_record.size(); + const long long tmp = linkpath_recsize_ + path_recsize_ + + file_size_recsize_ + uid_recsize_ + gid_recsize_ + + atime_recsize_ + mtime_recsize_ + crc_record.size(); + if( tmp > max_edata_size ) { full_size_ = -3; return; } + edsize_ = tmp; padded_edsize_ = round_up( edsize_ ); + if( padded_edsize_ > max_edata_size ) { full_size_ = -3; return; } full_size_ = header_size + padded_edsize_; } // print a diagnostic for each unknown keyword once per keyword -void Extended::unknown_keyword( const char * const buf, - const unsigned long long size ) const +void Extended::unknown_keyword( const char * const buf, const int size ) const { - unsigned long long eq_pos = 0; // position of '=' in buf + int eq_pos = 0; // position of '=' in buf while( eq_pos < size && buf[eq_pos] != '=' ) ++eq_pos; const std::string keyword( buf, eq_pos ); for( unsigned i = 0; i < unknown_keywords.size(); ++i ) @@ -215,13 +214,12 @@ void Extended::unknown_keyword( const char * const buf, } -// Return the size of the extended block, -1 if error, -2 if out of memory. -long long Extended::format_block( Resizable_buffer & rbuf ) const +/* Return the size of the extended block, or 0 if empty. + Return -1 if error, -2 if out of memory, -3 if block too long. */ +int Extended::format_block( Resizable_buffer & rbuf ) const { - if( empty() ) return 0; // no extended data - const unsigned long long bufsize = full_size(); // recalculate sizes - if( edsize_ <= 0 ) return 0; // no extended data - if( edsize_ >= 1LL << 33 ) return -1; // too much extended data + const int bufsize = full_size(); // recalculate sizes if needed + if( bufsize <= 0 ) return bufsize; // error or no extended data if( !rbuf.resize( bufsize ) ) return -2; // extended block buffer uint8_t * const header = rbuf.u8(); // extended header char * const buf = rbuf() + header_size; // extended records @@ -232,7 +230,7 @@ long long Extended::format_block( Resizable_buffer & rbuf ) const if( path_recsize_ && !print_record( buf, path_recsize_, "path", path_ ) ) return -1; - long long pos = path_recsize_; + int pos = path_recsize_; if( linkpath_recsize_ && !print_record( buf + pos, linkpath_recsize_, "linkpath", linkpath_ ) ) return -1; @@ -268,24 +266,37 @@ long long Extended::format_block( Resizable_buffer & rbuf ) const } -bool Extended::parse( const char * const buf, const unsigned long long edsize, +const char * Extended::full_size_error() const + { + const char * const eferec_msg = "Error formatting extended records."; + switch( full_size_ ) + { + case -1: return eferec_msg; + case -2: return mem_msg2; + case -3: return longrec_msg; + default: internal_error( "invalid call to full_size_error." ); + return 0; // keep compiler quiet + } + } + + +bool Extended::parse( const char * const buf, const int edsize, const bool permissive ) { - reset(); full_size_ = -1; // invalidate cached sizes - for( unsigned long long pos = 0; pos < edsize; ) // parse records + reset(); full_size_ = -4; // invalidate cached sizes + for( int pos = 0; pos < edsize; ) // parse records { const char * tail; - const unsigned long long rsize = - parse_decimal( buf + pos, &tail, edsize - pos ); - if( rsize == 0 || rsize > edsize - pos || - tail[0] != ' ' || buf[pos+rsize-1] != '\n' ) return false; + const int rsize = + parse_decimal( buf + pos, &tail, edsize - pos, edsize - pos ); + if( rsize <= 0 || tail[0] != ' ' || buf[pos+rsize-1] != '\n' ) return false; ++tail; // point to keyword // rest = length of (keyword + '=' + value) without the final newline - const unsigned long long rest = ( buf + ( pos + rsize - 1 ) ) - tail; + const int rest = ( buf + ( pos + rsize - 1 ) ) - tail; if( rest > 5 && std::memcmp( tail, "path=", 5 ) == 0 ) { if( path_.size() && !permissive ) return false; - unsigned long long len = rest - 5; + int len = rest - 5; while( len > 1 && tail[5+len-1] == '/' ) --len; // trailing '/' path_.assign( tail + 5, len ); // this also truncates path_ at the first embedded null character @@ -294,30 +305,30 @@ bool Extended::parse( const char * const buf, const unsigned long long edsize, else if( rest > 9 && std::memcmp( tail, "linkpath=", 9 ) == 0 ) { if( linkpath_.size() && !permissive ) return false; - unsigned long long len = rest - 9; + int len = rest - 9; while( len > 1 && tail[9+len-1] == '/' ) --len; // trailing '/' linkpath_.assign( tail + 9, len ); } else if( rest > 5 && std::memcmp( tail, "size=", 5 ) == 0 ) { if( file_size_ != 0 && !permissive ) return false; - file_size_ = parse_decimal( tail + 5, &tail, rest - 5 ); - // parse error or size fits in ustar header - if( file_size_ < 1LL << 33 || file_size_ > max_file_size || - tail != buf + ( pos + rsize - 1 ) ) return false; + file_size_ = parse_decimal( tail + 5, &tail, rest - 5, max_file_size ); + // overflow, parse error, or size fits in ustar header + if( file_size_ < 1LL << 33 || tail != buf + ( pos + rsize - 1 ) ) + return false; } else if( rest > 4 && std::memcmp( tail, "uid=", 4 ) == 0 ) { if( uid_ >= 0 && !permissive ) return false; uid_ = parse_decimal( tail + 4, &tail, rest - 4 ); - // parse error or uid fits in ustar header + // overflow, parse error, or uid fits in ustar header if( uid_ < 1 << 21 || tail != buf + ( pos + rsize - 1 ) ) return false; } else if( rest > 4 && std::memcmp( tail, "gid=", 4 ) == 0 ) { if( gid_ >= 0 && !permissive ) return false; gid_ = parse_decimal( tail + 4, &tail, rest - 4 ); - // parse error or gid fits in ustar header + // overflow, parse error, or gid fits in ustar header if( gid_ < 1 << 21 || tail != buf + ( pos + rsize - 1 ) ) return false; } else if( rest > 6 && std::memcmp( tail, "atime=", 6 ) == 0 ) @@ -335,7 +346,7 @@ bool Extended::parse( const char * const buf, const unsigned long long edsize, else if( rest > 10 && std::memcmp( tail, "GNU.crc32=", 10 ) == 0 ) { if( crc_present_ && !permissive ) return false; - if( rsize != crc_record.size() ) return false; + if( rsize != (int)crc_record.size() ) return false; crc_present_ = true; const uint32_t stored_crc = parse_record_crc( tail + 10 ); const uint32_t computed_crc = @@ -343,7 +354,7 @@ bool Extended::parse( const char * const buf, const unsigned long long edsize, if( stored_crc != computed_crc ) { if( verbosity >= 2 ) - std::fprintf( stderr, "CRC32C = %08X\n", (unsigned)computed_crc ); + std::fprintf( stderr, "CRC32-C = %08X\n", (unsigned)computed_crc ); return false; } } @@ -367,7 +378,7 @@ void Extended::fill_from_ustar( const Tar_header header ) if( len > 0 ) { linkpath_.assign( (const char *)header + linkname_o, len ); - full_size_ = -1; + full_size_ = -4; } } @@ -399,17 +410,13 @@ void Extended::fill_from_ustar( const Tar_header header ) /* Return file size from record or from ustar header, and reset file_size_. - Used for fast parsing of headers in uncompressed archives. -*/ + Used for fast parsing of headers in uncompressed archives. */ long long Extended::get_file_size_and_reset( const Tar_header header ) { const long long tmp = file_size_; file_size( 0 ); // reset full_size_ const Typeflag typeflag = (Typeflag)header[typeflag_o]; - if( typeflag == tf_regular || typeflag == tf_hiperf ) - { - if( tmp == 0 ) return parse_octal( header + size_o, size_l ); - else return tmp; - } - return 0; + if( typeflag != tf_regular && typeflag != tf_hiperf ) return 0; + if( tmp > 0 ) return tmp; + return parse_octal( header + size_o, size_l ); } -- cgit v1.2.3