diff options
Diffstat (limited to 'extract.cc')
-rw-r--r-- | extract.cc | 398 |
1 files changed, 243 insertions, 155 deletions
@@ -28,6 +28,10 @@ #include <unistd.h> #include <utime.h> #include <sys/stat.h> +#include <sys/types.h> +#if defined(__GNU_LIBRARY__) +#include <sys/sysmacros.h> // for makedev +#endif #include <lzlib.h> #include "arg_parser.h" @@ -38,6 +42,17 @@ namespace { int gretval = 0; +bool has_lz_ext; // global var for archive_read + +void skip_warn( const bool reset = false ) // avoid duplicate warnings + { + static bool skipping = false; + + if( reset ) skipping = false; + else if( !skipping ) + { skipping = true; show_error( "Skipping to next header." ); } + } + bool make_path( const std::string & name ) { @@ -68,42 +83,6 @@ bool make_path( const std::string & name ) } -// Returns in buf the first rd bytes of the second lzip member or -// the first 512 bytes of the second tar member, and sets islz if lzip member -bool skip_first_member( const int infd, uint8_t * const buf, - int & rd, bool & islz ) - { - while( true ) - { - for( int i = 0; i < rd; ++i ) - if( buf[i] == 'L' && (*(Lzip_header *)( buf + i )).verify_prefix( rd - i ) ) - { - const int ts = rd - i; // tail size - std::memmove( buf, buf + i, ts ); - if( ts >= (int)sizeof lzip_magic ) - { rd = ts; islz = true; return true; } - int rd2 = readblock( infd, buf + ts, header_size - ts ); - if( rd2 != header_size - ts && errno ) - { show_error( "Error reading archive", errno ); return false; } - if( ts + rd2 >= min_member_size && - (*(Lzip_header *)buf).verify_magic() ) - { rd = ts + rd2; islz = true; return true; } - std::memmove( buf, buf + ts, rd2 ); - int rd3 = readblock( infd, buf + rd2, header_size - rd2 ); - if( rd3 != header_size - rd2 && errno ) - { show_error( "Error reading archive", errno ); return false; } - rd = rd2 + rd3; i = -1; - } - if( rd < header_size ) return false; // eof - if( rd == header_size && verify_ustar_chksum( buf ) ) - { islz = false; return true; } - rd = readblock( infd, buf, header_size ); - if( rd != header_size && errno ) - { show_error( "Error reading archive", errno ); return false; } - } - } - - inline bool block_is_zero( const uint8_t * const buf, const int size ) { for( int i = 0; i < size; ++i ) if( buf[i] != 0 ) return false; @@ -111,58 +90,83 @@ inline bool block_is_zero( const uint8_t * const buf, const int size ) } -bool archive_read( const int infd, uint8_t * const buf, const int size ) +// Return value: 0 = OK, 1 = damaged member, 2 = fatal error. +// If sizep and error, return in *sizep the number of bytes read. +// The first 6 bytes of the archive must be intact for islz to be meaningful. +int archive_read( const int infd, uint8_t * const buf, const int size, + int * const sizep = 0 ) { static LZ_Decoder * decoder = 0; - static bool first_call = true; static bool at_eof = false; + static bool fatal = false; + static bool first_call = true; + if( sizep ) *sizep = 0; + if( fatal ) return 2; if( first_call ) // check format { first_call = false; if( size != header_size ) internal_error( "size != header_size on first call." ); - int rd = readblock( infd, buf, size ); + const int rd = readblock( infd, buf, size ); + if( sizep ) *sizep = rd; if( rd != size && errno ) - { show_error( "Error reading archive", errno ); return false; } - bool islz = - ( rd >= min_member_size && (*(Lzip_header *)buf).verify_magic() ); + { show_error( "Error reading archive", errno ); fatal = true; return 2; } + const Lzip_header & header = (*(const Lzip_header *)buf); + bool islz = ( rd >= min_member_size && header.verify_magic() && + isvalid_ds( header.dictionary_size() ) ); const bool istar = ( rd == size && verify_ustar_chksum( buf ) ); const bool iseof = ( !islz && !istar && rd == size && block_is_zero( buf, size ) ); - if( !islz && !istar && !iseof ) + if( !islz && !istar && !iseof ) // corrupt or invalid format { - show_error( "This does not look like a tar archive." ); - show_error( "Skipping to next header." ); -// std::fprintf( stderr, "%07o\n", ustar_chksum( buf ) ); - gretval = 2; - if( !skip_first_member( infd, buf, rd, islz ) ) return false; + show_error( "This does not look like a POSIX tar archive." ); + if( has_lz_ext ) islz = true; + if( verbosity >= 2 && !islz && rd == size ) + std::fprintf( stderr, "ustar chksum = %07o\n", ustar_chksum( buf ) ); + if( !islz ) return 1; } - if( !islz ) return true; // uncompressed + if( !islz ) // uncompressed + { if( rd == size ) return 0; fatal = true; return 2; } decoder = LZ_decompress_open(); // compressed if( !decoder || LZ_decompress_errno( decoder ) != LZ_ok ) { show_error( "Not enough memory." ); - LZ_decompress_close( decoder ); return false; } + LZ_decompress_close( decoder ); fatal = true; return 2; } if( LZ_decompress_write( decoder, buf, rd ) != rd ) internal_error( "library error (LZ_decompress_write)." ); - if( !archive_read( infd, buf, size ) ) return false; - if( verify_ustar_chksum( buf ) || block_is_zero( buf, size ) ) return true; - show_error( "This does not look like a tar archive." ); - show_error( "Skipping to next header." ); - gretval = 2; - if( LZ_decompress_sync_to_member( decoder ) < 0 ) - internal_error( "library error (LZ_decompress_sync_to_member)." ); + const int res = archive_read( infd, buf, size, sizep ); + if( res != 0 ) { if( res == 2 ) fatal = true; return res; } + if( verify_ustar_chksum( buf ) || block_is_zero( buf, size ) ) return 0; + show_error( "This does not look like a POSIX tar.lz archive." ); + fatal = true; return 2; } if( !decoder ) // uncompressed - { if( readblock( infd, buf, size ) == size ) return true; - show_error( "Archive ends unexpectedly." ); return false; } + { + const int rd = readblock( infd, buf, size ); if( rd == size ) return 0; + if( sizep ) *sizep = rd; + show_error( "Archive ends unexpectedly." ); fatal = true; return 2; + } const int ibuf_size = 16384; uint8_t ibuf[ibuf_size]; int sz = 0; while( sz < size ) { - if( !at_eof && LZ_decompress_write_size( decoder ) > 0 ) + const int rd = LZ_decompress_read( decoder, buf + sz, size - sz ); + if( rd < 0 ) + { + if( LZ_decompress_sync_to_member( decoder ) < 0 ) + internal_error( "library error (LZ_decompress_sync_to_member)." ); + skip_warn(); gretval = 2; return 1; + } + if( rd == 0 && LZ_decompress_finished( decoder ) == 1 ) + { LZ_decompress_close( decoder ); + show_error( "Archive ends unexpectedly." ); fatal = true; return 2; } + sz += rd; if( sizep ) *sizep = sz; + if( sz == size && LZ_decompress_finished( decoder ) == 1 && + LZ_decompress_close( decoder ) < 0 ) + { show_error( "LZ_decompress_close failed." ); fatal = true; return 2; } + if( sz < size && !at_eof && LZ_decompress_write_size( decoder ) > 0 ) { const int rsize = std::min( ibuf_size, LZ_decompress_write_size( decoder ) ); const int rd = readblock( infd, ibuf, rsize ); @@ -172,27 +176,12 @@ bool archive_read( const int infd, uint8_t * const buf, const int size ) { at_eof = true; LZ_decompress_finish( decoder ); if( errno ) - { show_error( "Error reading archive", errno ); return false; } + { show_error( "Error reading archive", errno ); fatal = true; + return 2; } } } - const int rd = LZ_decompress_read( decoder, buf + sz, size - sz ); - if( rd < 0 ) - { - show_error( "Skipping to next header." ); - gretval = 2; - if( LZ_decompress_sync_to_member( decoder ) < 0 ) - internal_error( "library error (LZ_decompress_sync_to_member)." ); - continue; - } - if( rd == 0 && LZ_decompress_finished( decoder ) == 1 ) - { LZ_decompress_close( decoder ); - show_error( "Archive ends unexpectedly." ); return false; } - sz += rd; - if( sz == size && LZ_decompress_finished( decoder ) == 1 && - LZ_decompress_close( decoder ) < 0 ) - { show_error( "LZ_decompress_close failed." ); return false; } } - return true; + return 0; } @@ -251,55 +240,44 @@ const char * user_group_string( const Tar_header header ) } -const char * link_string( const Tar_header header ) - { - enum { bufsize = 9 + linkname_l + 1 }; - static char buf[bufsize]; - const Typeflag typeflag = (Typeflag)header[typeflag_o]; - - if( typeflag == tf_link ) - snprintf( buf, bufsize, " link to %.100s", header + linkname_o ); - else if( typeflag == tf_symlink ) - snprintf( buf, bufsize, " -> %.100s", header + linkname_o ); - else buf[0] = 0; - return buf; - } - - -void show_member_name( const char * const filename, const Tar_header header, +void show_member_name( const Extended & extended, const Tar_header header, const int vlevel ) { if( verbosity < vlevel ) return; if( verbosity > vlevel ) { const time_t mtime = strtoull( header + mtime_o, 0, 8 ); // 33 bits - struct tm * tm = localtime( &mtime ); - std::printf( "%s %s %9llu %4d-%02u-%02u %02u:%02u %s%s\n", + const struct tm * const tm = localtime( &mtime ); + const Typeflag typeflag = (Typeflag)header[typeflag_o]; + const bool islink = ( typeflag == tf_link || typeflag == tf_symlink ); + const char * const link_string = !islink ? "" : + ( ( typeflag == tf_link ) ? " link to " : " -> " ); + std::printf( "%s %s %9llu %4d-%02u-%02u %02u:%02u %s%s%s\n", mode_string( header ), user_group_string( header ), - strtoull( header + size_o, 0, 8 ), 1900 + tm->tm_year, - 1 + tm->tm_mon, tm->tm_mday, tm->tm_hour, tm->tm_min, - filename, link_string( header ) ); + extended.size, 1900 + tm->tm_year, 1 + tm->tm_mon, + tm->tm_mday, tm->tm_hour, tm->tm_min, extended.path.c_str(), + link_string, !islink ? "" : extended.linkpath.c_str() ); } - else std::printf( "%s\n", filename ); + else std::printf( "%s\n", extended.path.c_str() ); std::fflush( stdout ); } -int list_member( const int infd, const char * const filename, - const unsigned long long file_size, const Tar_header header, - const bool skip ) +int list_member( const int infd, const Extended & extended, + const Tar_header header, const bool skip ) { - if( !skip ) show_member_name( filename, header, 0 ); + if( !skip ) show_member_name( extended, header, 0 ); const unsigned bufsize = 32 * header_size; uint8_t buf[bufsize]; - unsigned long long rest = file_size; - const int rem = file_size % header_size; + unsigned long long rest = extended.size; + const int rem = extended.size % header_size; const int padding = rem ? header_size - rem : 0; while( rest > 0 ) { const int rsize = ( rest >= bufsize ) ? bufsize : rest + padding; - if( !archive_read( infd, buf, rsize ) ) return 2; + const int ret = archive_read( infd, buf, rsize ); + if( ret != 0 ) { if( ret == 2 ) return 2; else break; } if( rest < bufsize ) break; rest -= rsize; } @@ -317,13 +295,14 @@ bool contains_dotdot( const char * const filename ) } -int extract_member( const int infd, const char * const filename, - const unsigned long long file_size, const Tar_header header ) +int extract_member( const int infd, const Extended & extended, + const Tar_header header, const bool keep_damaged ) { + const char * const filename = extended.path.c_str(); if( contains_dotdot( filename ) ) { show_file_error( filename, "Contains a '..' component, skipping." ); - return list_member( infd, filename, file_size, header, true ); + return list_member( infd, extended, header, true ); } const mode_t mode = strtoul( header + mode_o, 0, 8 ); // 12 bits const time_t mtime = strtoull( header + mtime_o, 0, 8 ); // 33 bits @@ -331,7 +310,7 @@ int extract_member( const int infd, const char * const filename, const bool islink = ( typeflag == tf_link || typeflag == tf_symlink ); int outfd = -1; - show_member_name( filename, header, 1 ); + show_member_name( extended, header, 1 ); std::remove( filename ); make_path( filename ); switch( typeflag ) @@ -345,14 +324,12 @@ int extract_member( const int infd, const char * const filename, case tf_link: case tf_symlink: { - char linkname[linkname_l+1]; - std::memcpy( linkname, header + linkname_o, linkname_l ); - linkname[linkname_l] = 0; + const char * const linkname = extended.linkpath.c_str(); /* if( contains_dotdot( linkname ) ) { show_file_error( filename, "Link destination contains a '..' component, skipping." ); - return list_member( infd, filename, file_size, header, false ); + return list_member( infd, extended, header, false ); }*/ const bool hard = typeflag == tf_link; if( ( hard && link( linkname, filename ) != 0 ) || @@ -410,15 +387,25 @@ int extract_member( const int infd, const char * const filename, const unsigned bufsize = 32 * header_size; uint8_t buf[bufsize]; - unsigned long long rest = file_size; - const int rem = file_size % header_size; + unsigned long long rest = extended.size; + const int rem = extended.size % header_size; const int padding = rem ? header_size - rem : 0; while( rest > 0 ) { const int rsize = ( rest >= bufsize ) ? bufsize : rest + padding; - if( !archive_read( infd, buf, rsize ) ) - { if( outfd >= 0 ) { close( outfd ); std::remove( filename ); } - return 2; } + int rd; + const int ret = archive_read( infd, buf, rsize, &rd ); + if( ret != 0 ) + { + if( outfd >= 0 ) + { + if( keep_damaged ) + { writeblock( outfd, buf, std::min( rest, (unsigned long long)rd ) ); + close( outfd ); } + else { close( outfd ); std::remove( filename ); } + } + if( ret == 2 ) return 2; else return 0; + } const int wsize = ( rest >= bufsize ) ? bufsize : rest; if( outfd >= 0 && writeblock( outfd, buf, wsize ) != wsize ) { show_file_error( filename, "Error writing file", errno ); return 2; } @@ -437,6 +424,7 @@ int extract_member( const int infd, const char * const filename, } +// Removes any amount of leading "./" and '/' strings. const char * remove_leading_slash( const char * const filename ) { static bool first_post = true; @@ -479,73 +467,173 @@ bool compare_tslash( const char * const name1, const char * const name2 ) } // end namespace +bool Extended::parse( const int infd, const Tar_header header, + const bool permissive ) + { + const unsigned long long edsize = strtoull( header + size_o, 0, 8 ); + const unsigned long long bufsize = round_up( edsize ); + if( bufsize == 0 || edsize == 0 || edsize >= 1ULL << 33 ) + return false; // overflow or no extended data + char * const buf = new char[bufsize]; // extended records buffer + if( archive_read( infd, (uint8_t *)buf, bufsize ) != 0 ) goto error; + for( unsigned long long pos = 0; pos < edsize; ) // parse records + { + char * tail; + const unsigned long long rsize = strtoull( buf + pos, &tail, 10 ); + if( rsize == 0 || rsize > edsize - pos || tail[0] != ' ' || + buf[pos+rsize-1] != '\n' ) goto error; + ++tail; // point to keyword + // length of (keyword + '=' + value) without the final newline + const unsigned long long rest = ( buf + pos + rsize - 1 ) - tail; + if( rest > 5 && std::memcmp( tail, "path=", 5 ) == 0 ) + { if( path.size() && !permissive ) goto error; + path.assign( tail + 5, rest - 5 ); } + else if( rest > 9 && std::memcmp( tail, "linkpath=", 9 ) == 0 ) + { if( linkpath.size() && !permissive ) goto error; + linkpath.assign( tail + 9, rest - 9 ); } + else if( rest > 5 && std::memcmp( tail, "size=", 5 ) == 0 ) + { + if( size != 0 && !permissive ) goto error; + size = 0; + for( unsigned long long i = 5; i < rest; ++i ) + { + if( tail[i] < '0' || tail[i] > '9' ) goto error; + const unsigned long long prev = size; + size = size * 10 + ( tail[i] - '0' ); + if( size < prev ) goto error; // overflow + } + if( size < 1ULL << 33 ) goto error; // size fits in ustar header + } + else if( rest > 10 && std::memcmp( tail, "GNU.crc32=", 10 ) == 0 ) + { + if( crc_present && !permissive ) goto error; + if( rsize != 22 ) goto error; + char * t; + const uint32_t stored_crc = strtoul( tail + 10, &t, 16 ); + if( t - tail - 10 != 8 || t[0] != '\n' ) goto error; + const uint32_t computed_crc = + crc32c.windowed_crc( (const uint8_t *)buf, pos + rsize - 9, edsize ); + crc_present = true; + if( stored_crc != computed_crc ) goto error; + } + pos += rsize; + } + delete[] buf; + return true; +error: + delete[] buf; + return false; + } + + int decode( const std::string & archive_name, const Arg_parser & parser, - const int filenames, const bool listing ) + const int filenames, const bool keep_damaged, const bool listing, + const bool missing_crc, const bool permissive ) { const int infd = archive_name.size() ? open_instream( archive_name ) : STDIN_FILENO; if( infd < 0 ) return 1; + // execute -C options and mark filenames to be extracted or listed std::vector< bool > name_pending( parser.arguments(), false ); for( int i = 0; i < parser.arguments(); ++i ) { const int code = parser.code( i ); if( code == 'C' && !listing ) { - const char * const filename = parser.argument( i ).c_str(); - if( chdir( filename ) != 0 ) - { show_file_error( filename, "Error changing working directory", errno ); + const char * const dir = parser.argument( i ).c_str(); + if( chdir( dir ) != 0 ) + { show_file_error( dir, "Error changing working directory", errno ); return 1; } } if( !code ) name_pending[i] = true; } + has_lz_ext = + ( archive_name.size() > 3 && + archive_name.compare( archive_name.size() - 3, 3, ".lz" ) == 0 ) || + ( archive_name.size() > 4 && + archive_name.compare( archive_name.size() - 4, 4, ".tlz" ) == 0 ); + Extended extended; // metadata from extended records int retval = 0; - bool skipping = false; + bool prev_extended = false; // prev header was extended while( true ) // process one member per iteration { uint8_t buf[header_size]; - if( !archive_read( infd, buf, header_size ) ) return 2; - if( !verify_ustar_chksum( buf ) ) + const int ret = archive_read( infd, buf, header_size ); + if( ret == 2 ) return 2; + if( ret != 0 || !verify_ustar_chksum( buf ) ) { - if( block_is_zero( buf, header_size ) ) break; - gretval = 2; - if( !skipping ) - { skipping = true; show_error( "Skipping to next header." ); } - continue; + if( ret == 0 && block_is_zero( buf, header_size ) ) break; // EOF + skip_warn(); gretval = 2; continue; } - skipping = false; + skip_warn( true ); // reset warning const char * const header = (const char *)buf; - enum { max_filename_size = prefix_l + 1 + name_l + 1 }; - char stored_name[max_filename_size]; - int len = 0; - while( len < prefix_l && header[prefix_o+len] ) - { stored_name[len] = header[prefix_o+len]; ++len; } - if( len && header[name_o] ) stored_name[len++] = '/'; - for( int i = 0; i < name_l && header[name_o+i]; ++i ) - { stored_name[len] = header[name_o+i]; ++len; } - while( len > 0 && stored_name[len-1] == '/' ) --len; // trailing '/' - stored_name[len] = 0; - const char * const filename = remove_leading_slash( stored_name ); + const Typeflag typeflag = (Typeflag)header[typeflag_o]; + if( typeflag == tf_extended ) + { + if( prev_extended && !permissive ) + { show_error( "Format violation: consecutive extended headers found." + /*" Use --permissive."*/, 0, true ); return 2; } + if( !extended.parse( infd, header, permissive ) ) + { show_error( "Error in extended records. Skipping to next header." ); + extended.reset(); gretval = 2; } + else if( !extended.crc_present && missing_crc ) + { show_error( "Missing CRC in extended records.", 0, true ); return 2; } + prev_extended = true; + continue; + } + prev_extended = false; + + if( extended.linkpath.empty() ) + { + for( int i = 0; i < linkname_l && header[linkname_o+i]; ++i ) + extended.linkpath += header[linkname_o+i]; + while( extended.linkpath.size() > 1 && // trailing '/' + extended.linkpath[extended.linkpath.size()-1] == '/' ) + extended.linkpath.resize( extended.linkpath.size() - 1 ); + } + + if( extended.path.empty() ) + { + char stored_name[prefix_l+1+name_l+1]; + int len = 0; + while( len < prefix_l && header[prefix_o+len] ) + { stored_name[len] = header[prefix_o+len]; ++len; } + if( len && header[name_o] ) stored_name[len++] = '/'; + for( int i = 0; i < name_l && header[name_o+i]; ++i ) + { stored_name[len] = header[name_o+i]; ++len; } + while( len > 0 && stored_name[len-1] == '/' ) --len; // trailing '/' + stored_name[len] = 0; + extended.path = remove_leading_slash( stored_name ); + } + const char * const filename = extended.path.c_str(); bool skip = filenames > 0; if( skip ) for( int i = 0; i < parser.arguments(); ++i ) - if( parser.code( i ) == 0 && - ( compare_prefix_dir( parser.argument( i ).c_str(), filename ) || - compare_tslash( filename, parser.argument( i ).c_str() ) ) ) - { skip = false; name_pending[i] = false; break; } + if( parser.code( i ) == 0 ) + { + const char * const name = + remove_leading_slash( parser.argument( i ).c_str() ); + if( compare_prefix_dir( name, filename ) || + compare_tslash( name, filename ) ) + { skip = false; name_pending[i] = false; break; } + } + + if( extended.size == 0 && + ( typeflag == tf_regular || typeflag == tf_hiperf ) ) + extended.size = strtoull( header + size_o, 0, 8 ); - const Typeflag typeflag = (Typeflag)header[typeflag_o]; - const unsigned long long file_size = - ( typeflag == tf_regular || typeflag == tf_hiperf ) ? - strtoull( header + size_o, 0, 8 ) : 0; if( listing || skip ) - retval = list_member( infd, filename, file_size, header, skip ); + retval = list_member( infd, extended, header, skip ); else - retval = extract_member( infd, filename, file_size, header ); - if( retval ) return retval; + retval = extract_member( infd, extended, header, keep_damaged ); + extended.reset(); + if( retval ) + { show_error( "Error is not recoverable: exiting now." ); + return retval; } } for( int i = 0; i < parser.arguments(); ++i ) |