diff options
Diffstat (limited to 'extract.cc')
-rw-r--r-- | extract.cc | 201 |
1 files changed, 168 insertions, 33 deletions
@@ -26,13 +26,14 @@ #include <cstring> #include <string> #include <vector> +#include <pthread.h> #include <stdint.h> #include <unistd.h> #include <utime.h> #include <sys/stat.h> #include <sys/types.h> #if defined(__GNU_LIBRARY__) -#include <sys/sysmacros.h> // for makedev +#include <sys/sysmacros.h> // for major, minor, makedev #endif #include <lzlib.h> @@ -44,15 +45,17 @@ namespace { Resizable_buffer grbuf( initial_line_length ); +bool archive_is_uncompressed_seekable = false; bool has_lz_ext; // global var for archive_read -void skip_warn( const bool reset = false ) // avoid duplicate warnings +bool skip_warn( const bool reset = false ) // avoid duplicate warnings { static bool skipping = false; if( reset ) skipping = false; else if( !skipping ) - { skipping = true; show_error( "Skipping to next header." ); } + { skipping = true; show_error( "Skipping to next header." ); return true; } + return false; } @@ -122,7 +125,8 @@ int archive_read( const int infd, uint8_t * const buf, const int size, } if( !islz ) // uncompressed { if( rd == size ) return 0; fatal = true; return 2; } - decoder = LZ_decompress_open(); // compressed + archive_is_uncompressed_seekable = false; // compressed + decoder = LZ_decompress_open(); if( !decoder || LZ_decompress_errno( decoder ) != LZ_ok ) { show_error( mem_msg ); LZ_decompress_close( decoder ); fatal = true; return 2; } @@ -251,7 +255,7 @@ void format_member_name( const Extended & extended, const Tar_header header, format_mode_string( header, rbuf() ); const int group_string_len = format_user_group_string( header, rbuf() + mode_string_size ); - const int offset = mode_string_size + group_string_len; + int offset = mode_string_size + group_string_len; const time_t mtime = parse_octal( header + mtime_o, mtime_l ); // 33 bits struct tm tms; const struct tm * tm = localtime_r( &mtime, &tms ); @@ -261,13 +265,20 @@ void format_member_name( const Extended & extended, const Tar_header header, const bool islink = ( typeflag == tf_link || typeflag == tf_symlink ); const char * const link_string = !islink ? "" : ( ( typeflag == tf_link ) ? " link to " : " -> " ); + if( typeflag == tf_chardev || typeflag == tf_blockdev ) + offset += snprintf( rbuf() + offset, rbuf.size() - offset, " %5u,%u", + (unsigned)parse_octal( header + devmajor_o, devmajor_l ), + (unsigned)parse_octal( header + devminor_o, devminor_l ) ); + else + offset += snprintf( rbuf() + offset, rbuf.size() - offset, " %9llu", + extended.file_size() ); for( int i = 0; i < 2; ++i ) { const int len = snprintf( rbuf() + offset, rbuf.size() - offset, - " %9llu %4d-%02u-%02u %02u:%02u %s%s%s\n", - extended.file_size(), 1900 + tm->tm_year, 1 + tm->tm_mon, - tm->tm_mday, tm->tm_hour, tm->tm_min, extended.path().c_str(), - link_string, !islink ? "" : extended.linkpath().c_str() ); + " %4d-%02u-%02u %02u:%02u %s%s%s\n", + 1900 + tm->tm_year, 1 + tm->tm_mon, tm->tm_mday, + tm->tm_hour, tm->tm_min, extended.path().c_str(), + link_string, islink ? extended.linkpath().c_str() : "" ); if( (int)rbuf.size() > len + offset || !rbuf.resize( len + offset + 1 ) ) break; } @@ -292,16 +303,15 @@ void show_member_name( const Extended & extended, const Tar_header header, } -int list_member( const int infd, const Extended & extended, - const Tar_header header, const bool skip ) +int skip_member( const int infd, const Extended & extended ) { - if( !skip ) show_member_name( extended, header, 0, grbuf ); - - const unsigned bufsize = 32 * header_size; - uint8_t buf[bufsize]; unsigned long long rest = extended.file_size(); const int rem = rest % header_size; const int padding = rem ? header_size - rem : 0; + if( archive_is_uncompressed_seekable && + lseek( infd, rest + padding, SEEK_CUR ) > 0 ) return 0; + const unsigned bufsize = 32 * header_size; + uint8_t buf[bufsize]; while( rest > 0 ) { const int rsize = ( rest >= bufsize ) ? bufsize : rest + padding; @@ -314,6 +324,130 @@ int list_member( const int infd, const Extended & extended, } +void show_file_diff( const char * const filename, const char * const msg ) + { + if( verbosity >= 0 ) std::fprintf( stderr, "%s: %s\n", filename, msg ); + } + + +int compare_member( const int infd1, const Extended & extended, + const Tar_header header, const bool ignore_ids ) + { + show_member_name( extended, header, 1, grbuf ); + unsigned long long rest = extended.file_size(); + const char * const filename = extended.path().c_str(); + const Typeflag typeflag = (Typeflag)header[typeflag_o]; + bool diff = false, size_differs = false, type_differs = true; + struct stat st; + if( lstat( filename, &st ) != 0 ) + show_file_error( filename, "Warning: Can't stat", errno ); + else if( ( typeflag == tf_regular || typeflag == tf_hiperf ) && + !S_ISREG( st.st_mode ) ) + show_file_diff( filename, "Is not a regular file" ); + else if( typeflag == tf_symlink && !S_ISLNK( st.st_mode ) ) + show_file_diff( filename, "Is not a symlink" ); + else if( typeflag == tf_chardev && !S_ISCHR( st.st_mode ) ) + show_file_diff( filename, "Is not a character device" ); + else if( typeflag == tf_blockdev && !S_ISBLK( st.st_mode ) ) + show_file_diff( filename, "Is not a block device" ); + else if( typeflag == tf_directory && !S_ISDIR( st.st_mode ) ) + show_file_diff( filename, "Is not a directory" ); + else if( typeflag == tf_fifo && !S_ISFIFO( st.st_mode ) ) + show_file_diff( filename, "Is not a FIFO" ); + else + { + type_differs = false; + if( typeflag != tf_symlink ) + { + const mode_t mode = parse_octal( header + mode_o, mode_l ); // 12 bits + if( mode != ( st.st_mode & ( S_ISUID | S_ISGID | S_ISVTX | + S_IRWXU | S_IRWXG | S_IRWXO ) ) ) + { show_file_diff( filename, "Mode differs" ); diff = true; } + } + if( !ignore_ids ) + { + if( (uid_t)parse_octal( header + uid_o, uid_l ) != st.st_uid ) + { show_file_diff( filename, "Uid differs" ); diff = true; } + if( (gid_t)parse_octal( header + gid_o, gid_l ) != st.st_gid ) + { show_file_diff( filename, "Gid differs" ); diff = true; } + } + if( typeflag != tf_symlink ) + { + const time_t mtime = parse_octal( header + mtime_o, mtime_l ); // 33 bits + if( mtime != st.st_mtime ) + { show_file_diff( filename, "Mod time differs" ); diff = true; } + if( ( typeflag == tf_regular || typeflag == tf_hiperf ) && + (off_t)rest != st.st_size ) // don't compare contents + { show_file_diff( filename, "Size differs" ); size_differs = true; } + if( ( typeflag == tf_chardev || typeflag == tf_blockdev ) && + ( parse_octal( header + devmajor_o, devmajor_l ) != major( st.st_rdev ) || + parse_octal( header + devminor_o, devminor_l ) != minor( st.st_rdev ) ) ) + { show_file_diff( filename, "Device number differs" ); diff = true; } + } + else + { + char * const buf = new char[st.st_size+1]; + long len = readlink( filename, buf, st.st_size ); + bool e = ( len != st.st_size ); + if( !e ) { buf[len] = 0; if( extended.linkpath() != buf ) e = true; } + delete[] buf; + if( e ) { show_file_diff( filename, "Symlink differs" ); diff = true; } + } + } + if( diff || size_differs || type_differs ) + { diff = false; set_error_status( 1 ); } + if( rest == 0 ) return 0; + if( ( typeflag != tf_regular && typeflag != tf_hiperf ) || + size_differs || type_differs ) return skip_member( infd1, extended ); + // else compare file contents + const int rem = rest % header_size; + const int padding = rem ? header_size - rem : 0; + const unsigned bufsize = 32 * header_size; + uint8_t buf1[bufsize]; + uint8_t buf2[bufsize]; + const int infd2 = open_instream( filename ); + if( infd2 < 0 ) + { set_error_status( 1 ); return skip_member( infd1, extended ); } + int retval = 0; + while( rest > 0 ) + { + const int rsize1 = ( rest >= bufsize ) ? bufsize : rest + padding; + const int rsize2 = ( rest >= bufsize ) ? bufsize : rest; + const int ret = archive_read( infd1, buf1, rsize1 ); + if( ret != 0 ) { if( ret == 2 ) retval = 2; diff = true; break; } + if( !diff ) + { + const int rd = readblock( infd2, buf2, rsize2 ); + if( rd != rsize2 ) + { + if( errno ) show_file_error( filename, "Read error", errno ); + else show_file_diff( filename, "EOF found in file" ); + diff = true; + } + else + { + int i = 0; while( i < rsize2 && buf1[i] == buf2[i] ) ++i; + if( i < rsize2 ) + { show_file_diff( filename, "Contents differ" ); diff = true; } + } + } + if( rest < bufsize ) break; + rest -= rsize1; + } + if( diff ) set_error_status( 1 ); + close( infd2 ); + return retval; + } + + +int list_member( const int infd, const Extended & extended, + const Tar_header header ) + { + show_member_name( extended, header, 0, grbuf ); + return skip_member( infd, extended ); + } + + bool contains_dotdot( const char * const filename ) { for( int i = 0; filename[i]; ++i ) @@ -331,7 +465,7 @@ int extract_member( const int infd, const Extended & extended, if( contains_dotdot( filename ) ) { show_file_error( filename, "Contains a '..' component, skipping." ); - return list_member( infd, extended, header, true ); + return skip_member( infd, extended ); } const mode_t mode = parse_octal( header + mode_o, mode_l ); // 12 bits const time_t mtime = parse_octal( header + mtime_o, mtime_l ); // 33 bits @@ -354,12 +488,6 @@ int extract_member( const int infd, const Extended & extended, case tf_symlink: { const char * const linkname = extended.linkpath().c_str(); -/* if( contains_dotdot( linkname ) ) - { - show_file_error( filename, - "Link destination contains a '..' component, skipping." ); - return list_member( infd, extended, header, false ); - }*/ const bool hard = typeflag == tf_link; if( ( hard && link( linkname, filename ) != 0 ) || ( !hard && symlink( linkname, filename ) != 0 ) ) @@ -545,20 +673,21 @@ unsigned long long parse_octal( const uint8_t * const ptr, const int size ) int decode( const std::string & archive_name, const Arg_parser & parser, const int filenames, const int num_workers, const int debug_level, - const bool keep_damaged, const bool listing, const bool missing_crc, + const Program_mode program_mode, const bool ignore_ids, + const bool keep_damaged, const bool missing_crc, const bool permissive ) { const int infd = archive_name.size() ? open_instream( archive_name ) : STDIN_FILENO; if( infd < 0 ) return 1; - // Execute -C options and mark filenames to be extracted or listed. + // Execute -C options and mark filenames to be compared, extracted or listed. // name_pending is of type char instead of bool to allow concurrent update. std::vector< char > name_pending( parser.arguments(), false ); for( int i = 0; i < parser.arguments(); ++i ) { const int code = parser.code( i ); - if( code == 'C' && !listing ) + if( code == 'C' && program_mode != m_list ) { const char * const dir = parser.argument( i ).c_str(); if( chdir( dir ) != 0 ) @@ -569,7 +698,7 @@ int decode( const std::string & archive_name, const Arg_parser & parser, } // multi-threaded --list is faster even with 1 thread and 1 file in archive - if( listing && num_workers > 0 ) + if( program_mode == m_list && num_workers > 0 ) { const Lzip_index lzip_index( infd, true, false ); // only regular files const long members = lzip_index.members(); @@ -580,7 +709,9 @@ int decode( const std::string & archive_name, const Arg_parser & parser, infd, std::min( (long)num_workers, members ), missing_crc, permissive ); } - lseek( infd, 0, SEEK_SET ); + if( lseek( infd, 0, SEEK_SET ) == 0 && lzip_index.retval() != 0 && + lzip_index.file_size() > 3 * header_size ) + archive_is_uncompressed_seekable = true; // unless compressed corrupt } has_lz_ext = // global var for archive_read @@ -599,9 +730,9 @@ int decode( const std::string & archive_name, const Arg_parser & parser, if( ret != 0 || !verify_ustar_chksum( header ) ) { if( ret == 0 && block_is_zero( header, header_size ) ) break; // EOF - if( verbosity >= 2 ) + if( skip_warn() && verbosity >= 2 ) std::fprintf( stderr, "ustar chksum = %07o\n", ustar_chksum( header ) ); - skip_warn(); set_error_status( 2 ); continue; + set_error_status( 2 ); continue; } skip_warn( true ); // reset warning @@ -676,8 +807,12 @@ int decode( const std::string & archive_name, const Arg_parser & parser, ( typeflag == tf_regular || typeflag == tf_hiperf ) ) extended.file_size( parse_octal( header + size_o, size_l ) ); - if( listing || skip ) - retval = list_member( infd, extended, header, skip ); + if( skip ) + retval = skip_member( infd, extended ); + else if( program_mode == m_list ) + retval = list_member( infd, extended, header ); + else if( program_mode == m_diff ) + retval = compare_member( infd, extended, header, ignore_ids ); else retval = extract_member( infd, extended, header, keep_damaged ); extended.reset(); @@ -690,7 +825,7 @@ int decode( const std::string & archive_name, const Arg_parser & parser, if( !parser.code( i ) && parser.argument( i ).size() && name_pending[i] ) { show_file_error( parser.argument( i ).c_str(), "Not found in archive." ); - set_error_status( 1 ); + retval = 1; } - return final_exit_status( retval ); + return final_exit_status( retval, program_mode != m_diff ); } |