From aa4d2adf37f7449dd1a99df517de0a9ee97867bd Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 23 Jan 2019 18:42:00 +0100 Subject: Adding upstream version 0.9. Signed-off-by: Daniel Baumann --- extract.cc | 335 +++++++++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 239 insertions(+), 96 deletions(-) (limited to 'extract.cc') diff --git a/extract.cc b/extract.cc index 58cda61..e25f5b6 100644 --- a/extract.cc +++ b/extract.cc @@ -1,5 +1,5 @@ /* Tarlz - Archiver with multimember lzip compression - Copyright (C) 2013-2018 Antonio Diaz Diaz. + Copyright (C) 2013-2019 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,9 @@ #define _FILE_OFFSET_BITS 64 #include +#include #include +#include #include #include #include @@ -36,13 +38,15 @@ #include "arg_parser.h" #include "lzip.h" +#include "lzip_index.h" #include "tarlz.h" namespace { +Resizable_buffer grbuf( initial_line_length ); int gretval = 0; -bool has_lz_ext; // global var for archive_read +bool has_lz_ext; // global var for archive_read void skip_warn( const bool reset = false ) // avoid duplicate warnings { @@ -83,13 +87,6 @@ bool make_path( const std::string & name ) } -inline bool block_is_zero( const uint8_t * const buf, const int size ) - { - for( int i = 0; i < size; ++i ) if( buf[i] != 0 ) return false; - return true; - } - - // Return value: 0 = OK, 1 = damaged member, 2 = fatal error. // If sizep and error, return in *sizep the number of bytes read. // The first 6 bytes of the archive must be intact for islz to be meaningful. @@ -114,6 +111,7 @@ int archive_read( const int infd, uint8_t * const buf, const int size, { show_error( "Error reading archive", errno ); fatal = true; return 2; } const Lzip_header & header = (*(const Lzip_header *)buf); bool islz = ( rd >= min_member_size && header.verify_magic() && + header.verify_version() && isvalid_ds( header.dictionary_size() ) ); const bool istar = ( rd == size && verify_ustar_chksum( buf ) ); const bool iseof = @@ -160,8 +158,8 @@ int archive_read( const int infd, uint8_t * const buf, const int size, skip_warn(); gretval = 2; return 1; } if( rd == 0 && LZ_decompress_finished( decoder ) == 1 ) - { LZ_decompress_close( decoder ); - show_error( "Archive ends unexpectedly." ); fatal = true; return 2; } + { LZ_decompress_close( decoder ); + show_error( "Archive ends unexpectedly." ); fatal = true; return 2; } sz += rd; if( sizep ) *sizep = sz; if( sz == size && LZ_decompress_finished( decoder ) == 1 && LZ_decompress_close( decoder ) < 0 ) @@ -185,12 +183,14 @@ int archive_read( const int infd, uint8_t * const buf, const int size, } -const char * mode_string( const Tar_header header ) +enum { mode_string_size = 10, + group_string_size = 1 + uname_l + 1 + gname_l + 1 }; // 67 + +void format_mode_string( const Tar_header header, char buf[mode_string_size] ) { - static char buf[11]; const Typeflag typeflag = (Typeflag)header[typeflag_o]; - std::memcpy( buf, "----------", sizeof buf - 1 ); + std::memcpy( buf, "----------", mode_string_size ); switch( typeflag ) { case tf_regular: break; @@ -203,7 +203,7 @@ const char * mode_string( const Tar_header header ) case tf_hiperf: buf[0] = 'C'; break; default: buf[0] = '?'; } - const mode_t mode = strtoul( header + mode_o, 0, 8 ); // 12 bits + const mode_t mode = parse_octal( header + mode_o, mode_l ); // 12 bits const bool setuid = mode & S_ISUID; const bool setgid = mode & S_ISGID; const bool sticky = mode & S_ISVTX; @@ -219,46 +219,79 @@ const char * mode_string( const Tar_header header ) if( mode & S_IWOTH ) buf[8] = 'w'; if( mode & S_IXOTH ) buf[9] = sticky ? 't' : 'x'; else if( sticky ) buf[9] = 'T'; - return buf; } -const char * user_group_string( const Tar_header header ) +int format_user_group_string( const Tar_header header, + char buf[group_string_size] ) { - enum { bufsize = uname_l + 1 + gname_l + 1 }; - static char buf[bufsize]; - + int len; if( header[uname_o] && header[gname_o] ) - snprintf( buf, bufsize, "%.32s/%.32s", header + uname_o, header + gname_o ); + len = snprintf( buf, group_string_size, + " %.32s/%.32s", header + uname_o, header + gname_o ); else { - const int uid = strtoul( header + uid_o, 0, 8 ); - const int gid = strtoul( header + gid_o, 0, 8 ); - snprintf( buf, bufsize, "%u/%u", uid, gid ); + const unsigned uid = parse_octal( header + uid_o, uid_l ); + const unsigned gid = parse_octal( header + gid_o, gid_l ); + len = snprintf( buf, group_string_size, " %u/%u", uid, gid ); } - return buf; + return len; } +} // end namespace -void show_member_name( const Extended & extended, const Tar_header header, - const int vlevel ) +bool block_is_zero( const uint8_t * const buf, const int size ) { - if( verbosity < vlevel ) return; - if( verbosity > vlevel ) + for( int i = 0; i < size; ++i ) if( buf[i] != 0 ) return false; + return true; + } + + +void format_member_name( const Extended & extended, const Tar_header header, + Resizable_buffer & rbuf, const bool long_format ) + { + if( long_format ) { - const time_t mtime = strtoull( header + mtime_o, 0, 8 ); // 33 bits - const struct tm * const tm = localtime( &mtime ); + format_mode_string( header, rbuf() ); + const int group_string_len = + format_user_group_string( header, rbuf() + mode_string_size ); + const int offset = mode_string_size + group_string_len; + const time_t mtime = parse_octal( header + mtime_o, mtime_l ); // 33 bits + struct tm tms; + const struct tm * tm = localtime_r( &mtime, &tms ); + if( !tm ) + { time_t z = 0; tm = localtime_r( &z, &tms ); if( !tm ) tm = &tms; } const Typeflag typeflag = (Typeflag)header[typeflag_o]; const bool islink = ( typeflag == tf_link || typeflag == tf_symlink ); const char * const link_string = !islink ? "" : ( ( typeflag == tf_link ) ? " link to " : " -> " ); - std::printf( "%s %s %9llu %4d-%02u-%02u %02u:%02u %s%s%s\n", - mode_string( header ), user_group_string( header ), - extended.size, 1900 + tm->tm_year, 1 + tm->tm_mon, - tm->tm_mday, tm->tm_hour, tm->tm_min, extended.path.c_str(), - link_string, !islink ? "" : extended.linkpath.c_str() ); + for( int i = 0; i < 2; ++i ) + { + const int len = snprintf( rbuf() + offset, rbuf.size() - offset, + " %9llu %4d-%02u-%02u %02u:%02u %s%s%s\n", + extended.size, 1900 + tm->tm_year, 1 + tm->tm_mon, + tm->tm_mday, tm->tm_hour, tm->tm_min, extended.path.c_str(), + link_string, !islink ? "" : extended.linkpath.c_str() ); + if( (int)rbuf.size() > len + offset ) break; + else rbuf.resize( len + offset + 1 ); + } + } + else + { + if( rbuf.size() < extended.path.size() + 2 ) + rbuf.resize( extended.path.size() + 2 ); + snprintf( rbuf(), rbuf.size(), "%s\n", extended.path.c_str() ); } - else std::printf( "%s\n", extended.path.c_str() ); + } + +namespace { + +void show_member_name( const Extended & extended, const Tar_header header, + const int vlevel, Resizable_buffer & rbuf ) + { + if( verbosity < vlevel ) return; + format_member_name( extended, header, rbuf, verbosity > vlevel ); + std::fputs( rbuf(), stdout ); std::fflush( stdout ); } @@ -266,7 +299,7 @@ void show_member_name( const Extended & extended, const Tar_header header, int list_member( const int infd, const Extended & extended, const Tar_header header, const bool skip ) { - if( !skip ) show_member_name( extended, header, 0 ); + if( !skip ) show_member_name( extended, header, 0, grbuf ); const unsigned bufsize = 32 * header_size; uint8_t buf[bufsize]; @@ -304,13 +337,13 @@ int extract_member( const int infd, const Extended & extended, show_file_error( filename, "Contains a '..' component, skipping." ); return list_member( infd, extended, header, true ); } - const mode_t mode = strtoul( header + mode_o, 0, 8 ); // 12 bits - const time_t mtime = strtoull( header + mtime_o, 0, 8 ); // 33 bits + const mode_t mode = parse_octal( header + mode_o, mode_l ); // 12 bits + const time_t mtime = parse_octal( header + mtime_o, mtime_l ); // 33 bits const Typeflag typeflag = (Typeflag)header[typeflag_o]; const bool islink = ( typeflag == tf_link || typeflag == tf_symlink ); int outfd = -1; - show_member_name( extended, header, 1 ); + show_member_name( extended, header, 1, grbuf ); std::remove( filename ); make_path( filename ); switch( typeflag ) @@ -352,8 +385,9 @@ int extract_member( const int infd, const Extended & extended, case tf_chardev: case tf_blockdev: { - const unsigned dev = makedev( strtoul( header + devmajor_o, 0, 8 ), - strtoul( header + devminor_o, 0, 8 ) ); + const unsigned dev = + makedev( parse_octal( header + devmajor_o, devmajor_l ), + parse_octal( header + devminor_o, devminor_l ) ); const int dmode = ( typeflag == tf_chardev ? S_IFCHR : S_IFBLK ) | mode; if( mknod( filename, dmode, dev ) != 0 ) { @@ -376,8 +410,8 @@ int extract_member( const int infd, const Extended & extended, return 2; } - const uid_t uid = (uid_t)strtoul( header + uid_o, 0, 8 ); - const gid_t gid = (gid_t)strtoul( header + gid_o, 0, 8 ); + const uid_t uid = (uid_t)parse_octal( header + uid_o, uid_l ); + const gid_t gid = (gid_t)parse_octal( header + gid_o, gid_l ); if( !islink && chown( filename, uid, gid ) != 0 && errno != EPERM && errno != EINVAL ) { @@ -423,6 +457,7 @@ int extract_member( const int infd, const Extended & extended, return 0; } +} // end namespace // Removes any amount of leading "./" and '/' strings. const char * remove_leading_slash( const char * const filename ) @@ -464,78 +499,163 @@ bool compare_tslash( const char * const name1, const char * const name2 ) return ( !*p && !*q ); } -} // end namespace +namespace { +unsigned long long parse_decimal( const char * const ptr, + const char ** const tailp, + const unsigned long long size ) + { + unsigned long long result = 0; + unsigned long long i = 0; + while( i < size && std::isspace( ptr[i] ) ) ++i; + if( !std::isdigit( (unsigned char)ptr[i] ) ) + { if( tailp ) *tailp = ptr; return 0; } + for( ; i < size && std::isdigit( (unsigned char)ptr[i] ); ++i ) + { + const unsigned long long prev = result; + result *= 10; result += ptr[i] - '0'; + if( result < prev || result > LLONG_MAX ) // overflow + { if( tailp ) *tailp = ptr; return 0; } + } + if( tailp ) *tailp = ptr + i; + return result; + } -bool Extended::parse( const int infd, const Tar_header header, - const bool permissive ) + +uint32_t parse_record_crc( const char * const ptr ) { - const unsigned long long edsize = strtoull( header + size_o, 0, 8 ); + uint32_t crc = 0; + for( int i = 0; i < 8; ++i ) + { + crc <<= 4; + if( ptr[i] >= '0' && ptr[i] <= '9' ) crc += ptr[i] - '0'; + else if( ptr[i] >= 'A' && ptr[i] <= 'F' ) crc += ptr[i] + 10 - 'A'; + else if( ptr[i] >= 'a' && ptr[i] <= 'f' ) crc += ptr[i] + 10 - 'a'; + else { crc = 0; break; } // invalid digit in crc string + } + return crc; + } + + +bool parse_records( const int infd, Extended & extended, + const Tar_header header, const bool permissive ) + { + const unsigned long long edsize = parse_octal( header + size_o, size_l ); const unsigned long long bufsize = round_up( edsize ); if( bufsize == 0 || edsize == 0 || edsize >= 1ULL << 33 ) return false; // overflow or no extended data char * const buf = new char[bufsize]; // extended records buffer - if( archive_read( infd, (uint8_t *)buf, bufsize ) != 0 ) goto error; + const bool ret = ( archive_read( infd, (uint8_t *)buf, bufsize ) == 0 && + extended.parse( buf, edsize, permissive ) ); + delete[] buf; + return ret; + } + +} // end namespace + + +/* Returns the number of bytes really read. + If (returned value < size) and (errno == 0), means EOF was reached. +*/ +int readblock( const int fd, uint8_t * const buf, const int size ) + { + int sz = 0; + errno = 0; + while( sz < size ) + { + const int n = read( fd, buf + sz, size - sz ); + if( n > 0 ) sz += n; + else if( n == 0 ) break; // EOF + else if( errno != EINTR ) break; + errno = 0; + } + return sz; + } + + +/* Returns the number of bytes really written. + If (returned value < size), it is always an error. +*/ +int writeblock( const int fd, const uint8_t * const buf, const int size ) + { + int sz = 0; + errno = 0; + while( sz < size ) + { + const int n = write( fd, buf + sz, size - sz ); + if( n > 0 ) sz += n; + else if( n < 0 && errno != EINTR ) break; + errno = 0; + } + return sz; + } + + +unsigned long long parse_octal( const uint8_t * const ptr, const int size ) + { + unsigned long long result = 0; + int i = 0; + while( i < size && std::isspace( ptr[i] ) ) ++i; + for( ; i < size && ptr[i] >= '0' && ptr[i] <= '7'; ++i ) + { result <<= 3; result += ptr[i] - '0'; } + return result; + } + + +bool Extended::parse( const char * const buf, const unsigned long long edsize, + const bool permissive ) + { for( unsigned long long pos = 0; pos < edsize; ) // parse records { - char * tail; - const unsigned long long rsize = strtoull( buf + pos, &tail, 10 ); + const char * tail; + const unsigned long long rsize = + parse_decimal( buf + pos, &tail, edsize - pos ); if( rsize == 0 || rsize > edsize - pos || tail[0] != ' ' || - buf[pos+rsize-1] != '\n' ) goto error; + buf[pos+rsize-1] != '\n' ) return false; ++tail; // point to keyword - // length of (keyword + '=' + value) without the final newline - const unsigned long long rest = ( buf + pos + rsize - 1 ) - tail; + // rest = length of (keyword + '=' + value) without the final newline + const unsigned long long rest = ( buf + ( pos + rsize - 1 ) ) - tail; if( rest > 5 && std::memcmp( tail, "path=", 5 ) == 0 ) - { if( path.size() && !permissive ) goto error; + { if( path.size() && !permissive ) return false; path.assign( tail + 5, rest - 5 ); } else if( rest > 9 && std::memcmp( tail, "linkpath=", 9 ) == 0 ) - { if( linkpath.size() && !permissive ) goto error; + { if( linkpath.size() && !permissive ) return false; linkpath.assign( tail + 9, rest - 9 ); } else if( rest > 5 && std::memcmp( tail, "size=", 5 ) == 0 ) { - if( size != 0 && !permissive ) goto error; - size = 0; - for( unsigned long long i = 5; i < rest; ++i ) - { - if( tail[i] < '0' || tail[i] > '9' ) goto error; - const unsigned long long prev = size; - size = size * 10 + ( tail[i] - '0' ); - if( size < prev ) goto error; // overflow - } - if( size < 1ULL << 33 ) goto error; // size fits in ustar header + if( size != 0 && !permissive ) return false; + size = parse_decimal( tail + 5, &tail, rest - 5 ); + // parse error or size fits in ustar header + if( size < 1ULL << 33 || tail != buf + ( pos + rsize - 1 ) ) return false; } else if( rest > 10 && std::memcmp( tail, "GNU.crc32=", 10 ) == 0 ) { - if( crc_present && !permissive ) goto error; - if( rsize != 22 ) goto error; - char * t; - const uint32_t stored_crc = strtoul( tail + 10, &t, 16 ); - if( t - tail - 10 != 8 || t[0] != '\n' ) goto error; + if( crc_present && !permissive ) return false; + if( rsize != 22 ) return false; + const uint32_t stored_crc = parse_record_crc( tail + 10 ); const uint32_t computed_crc = crc32c.windowed_crc( (const uint8_t *)buf, pos + rsize - 9, edsize ); crc_present = true; - if( stored_crc != computed_crc ) goto error; + if( stored_crc != computed_crc ) return false; } pos += rsize; } - delete[] buf; return true; -error: - delete[] buf; - return false; } int decode( const std::string & archive_name, const Arg_parser & parser, - const int filenames, const bool keep_damaged, const bool listing, - const bool missing_crc, const bool permissive ) + const int filenames, const int num_workers, const int debug_level, + const bool keep_damaged, const bool listing, const bool missing_crc, + const bool permissive ) { const int infd = archive_name.size() ? open_instream( archive_name ) : STDIN_FILENO; if( infd < 0 ) return 1; - // execute -C options and mark filenames to be extracted or listed - std::vector< bool > name_pending( parser.arguments(), false ); + // Execute -C options and mark filenames to be extracted or listed. + // name_pending is of type char instead of bool to allow concurrent update. + std::vector< char > name_pending( parser.arguments(), false ); for( int i = 0; i < parser.arguments(); ++i ) { const int code = parser.code( i ); @@ -549,34 +669,57 @@ int decode( const std::string & archive_name, const Arg_parser & parser, if( !code ) name_pending[i] = true; } - has_lz_ext = + if( listing && num_workers > 0 ) // multi-threaded --list + { + const Lzip_index lzip_index( infd, true, false ); + const long members = lzip_index.members(); + if( lzip_index.retval() == 0 && ( members >= 3 || + ( members >= 2 && lzip_index.dblock( members - 1 ).size() > 1024 ) ) ) + { //show_file_error( archive_name.c_str(), "Is compressed seekable" ); + return list_lz( parser, name_pending, lzip_index, filenames, + debug_level, infd, std::min( (long)num_workers, members ), + missing_crc, permissive ); } + lseek( infd, 0, SEEK_SET ); + } + + has_lz_ext = // global var for archive_read ( archive_name.size() > 3 && archive_name.compare( archive_name.size() - 3, 3, ".lz" ) == 0 ) || ( archive_name.size() > 4 && archive_name.compare( archive_name.size() - 4, 4, ".tlz" ) == 0 ); - Extended extended; // metadata from extended records + Extended extended; // metadata from extended records int retval = 0; - bool prev_extended = false; // prev header was extended - while( true ) // process one member per iteration + bool prev_extended = false; // prev header was extended + while( true ) // process one tar member per iteration { - uint8_t buf[header_size]; - const int ret = archive_read( infd, buf, header_size ); + Tar_header header; + const int ret = archive_read( infd, header, header_size ); if( ret == 2 ) return 2; - if( ret != 0 || !verify_ustar_chksum( buf ) ) + if( ret != 0 || !verify_ustar_chksum( header ) ) { - if( ret == 0 && block_is_zero( buf, header_size ) ) break; // EOF + if( ret == 0 && block_is_zero( header, header_size ) ) break; // EOF skip_warn(); gretval = 2; continue; } - skip_warn( true ); // reset warning + skip_warn( true ); // reset warning - const char * const header = (const char *)buf; const Typeflag typeflag = (Typeflag)header[typeflag_o]; + if( typeflag == tf_global ) + { + if( prev_extended ) + { show_error( "Format violation: global header after extended header." ); + return 2; } + Extended dummy; // global headers are parsed and ignored + if( !parse_records( infd, dummy, header, true ) ) + { show_error( "Error in global extended records. Skipping to next header." ); + gretval = 2; } + continue; + } if( typeflag == tf_extended ) { if( prev_extended && !permissive ) { show_error( "Format violation: consecutive extended headers found." - /*" Use --permissive."*/, 0, true ); return 2; } - if( !extended.parse( infd, header, permissive ) ) + /*" Use --permissive.", 0, true*/ ); return 2; } + if( !parse_records( infd, extended, header, permissive ) ) { show_error( "Error in extended records. Skipping to next header." ); extended.reset(); gretval = 2; } else if( !extended.crc_present && missing_crc ) @@ -586,7 +729,7 @@ int decode( const std::string & archive_name, const Arg_parser & parser, } prev_extended = false; - if( extended.linkpath.empty() ) + if( extended.linkpath.empty() ) // copy linkpath from ustar header { for( int i = 0; i < linkname_l && header[linkname_o+i]; ++i ) extended.linkpath += header[linkname_o+i]; @@ -595,7 +738,7 @@ int decode( const std::string & archive_name, const Arg_parser & parser, extended.linkpath.resize( extended.linkpath.size() - 1 ); } - if( extended.path.empty() ) + if( extended.path.empty() ) // copy path from ustar header { char stored_name[prefix_l+1+name_l+1]; int len = 0; @@ -624,7 +767,7 @@ int decode( const std::string & archive_name, const Arg_parser & parser, if( extended.size == 0 && ( typeflag == tf_regular || typeflag == tf_hiperf ) ) - extended.size = strtoull( header + size_o, 0, 8 ); + extended.size = parse_octal( header + size_o, size_l ); if( listing || skip ) retval = list_member( infd, extended, header, skip ); -- cgit v1.2.3