summaryrefslogtreecommitdiffstats
path: root/extract.cc
diff options
context:
space:
mode:
Diffstat (limited to 'extract.cc')
-rw-r--r--extract.cc201
1 files changed, 168 insertions, 33 deletions
diff --git a/extract.cc b/extract.cc
index 63f58a5..2307060 100644
--- a/extract.cc
+++ b/extract.cc
@@ -26,13 +26,14 @@
#include <cstring>
#include <string>
#include <vector>
+#include <pthread.h>
#include <stdint.h>
#include <unistd.h>
#include <utime.h>
#include <sys/stat.h>
#include <sys/types.h>
#if defined(__GNU_LIBRARY__)
-#include <sys/sysmacros.h> // for makedev
+#include <sys/sysmacros.h> // for major, minor, makedev
#endif
#include <lzlib.h>
@@ -44,15 +45,17 @@
namespace {
Resizable_buffer grbuf( initial_line_length );
+bool archive_is_uncompressed_seekable = false;
bool has_lz_ext; // global var for archive_read
-void skip_warn( const bool reset = false ) // avoid duplicate warnings
+bool skip_warn( const bool reset = false ) // avoid duplicate warnings
{
static bool skipping = false;
if( reset ) skipping = false;
else if( !skipping )
- { skipping = true; show_error( "Skipping to next header." ); }
+ { skipping = true; show_error( "Skipping to next header." ); return true; }
+ return false;
}
@@ -122,7 +125,8 @@ int archive_read( const int infd, uint8_t * const buf, const int size,
}
if( !islz ) // uncompressed
{ if( rd == size ) return 0; fatal = true; return 2; }
- decoder = LZ_decompress_open(); // compressed
+ archive_is_uncompressed_seekable = false; // compressed
+ decoder = LZ_decompress_open();
if( !decoder || LZ_decompress_errno( decoder ) != LZ_ok )
{ show_error( mem_msg );
LZ_decompress_close( decoder ); fatal = true; return 2; }
@@ -251,7 +255,7 @@ void format_member_name( const Extended & extended, const Tar_header header,
format_mode_string( header, rbuf() );
const int group_string_len =
format_user_group_string( header, rbuf() + mode_string_size );
- const int offset = mode_string_size + group_string_len;
+ int offset = mode_string_size + group_string_len;
const time_t mtime = parse_octal( header + mtime_o, mtime_l ); // 33 bits
struct tm tms;
const struct tm * tm = localtime_r( &mtime, &tms );
@@ -261,13 +265,20 @@ void format_member_name( const Extended & extended, const Tar_header header,
const bool islink = ( typeflag == tf_link || typeflag == tf_symlink );
const char * const link_string = !islink ? "" :
( ( typeflag == tf_link ) ? " link to " : " -> " );
+ if( typeflag == tf_chardev || typeflag == tf_blockdev )
+ offset += snprintf( rbuf() + offset, rbuf.size() - offset, " %5u,%u",
+ (unsigned)parse_octal( header + devmajor_o, devmajor_l ),
+ (unsigned)parse_octal( header + devminor_o, devminor_l ) );
+ else
+ offset += snprintf( rbuf() + offset, rbuf.size() - offset, " %9llu",
+ extended.file_size() );
for( int i = 0; i < 2; ++i )
{
const int len = snprintf( rbuf() + offset, rbuf.size() - offset,
- " %9llu %4d-%02u-%02u %02u:%02u %s%s%s\n",
- extended.file_size(), 1900 + tm->tm_year, 1 + tm->tm_mon,
- tm->tm_mday, tm->tm_hour, tm->tm_min, extended.path().c_str(),
- link_string, !islink ? "" : extended.linkpath().c_str() );
+ " %4d-%02u-%02u %02u:%02u %s%s%s\n",
+ 1900 + tm->tm_year, 1 + tm->tm_mon, tm->tm_mday,
+ tm->tm_hour, tm->tm_min, extended.path().c_str(),
+ link_string, islink ? extended.linkpath().c_str() : "" );
if( (int)rbuf.size() > len + offset || !rbuf.resize( len + offset + 1 ) )
break;
}
@@ -292,16 +303,15 @@ void show_member_name( const Extended & extended, const Tar_header header,
}
-int list_member( const int infd, const Extended & extended,
- const Tar_header header, const bool skip )
+int skip_member( const int infd, const Extended & extended )
{
- if( !skip ) show_member_name( extended, header, 0, grbuf );
-
- const unsigned bufsize = 32 * header_size;
- uint8_t buf[bufsize];
unsigned long long rest = extended.file_size();
const int rem = rest % header_size;
const int padding = rem ? header_size - rem : 0;
+ if( archive_is_uncompressed_seekable &&
+ lseek( infd, rest + padding, SEEK_CUR ) > 0 ) return 0;
+ const unsigned bufsize = 32 * header_size;
+ uint8_t buf[bufsize];
while( rest > 0 )
{
const int rsize = ( rest >= bufsize ) ? bufsize : rest + padding;
@@ -314,6 +324,130 @@ int list_member( const int infd, const Extended & extended,
}
+void show_file_diff( const char * const filename, const char * const msg )
+ {
+ if( verbosity >= 0 ) std::fprintf( stderr, "%s: %s\n", filename, msg );
+ }
+
+
+int compare_member( const int infd1, const Extended & extended,
+ const Tar_header header, const bool ignore_ids )
+ {
+ show_member_name( extended, header, 1, grbuf );
+ unsigned long long rest = extended.file_size();
+ const char * const filename = extended.path().c_str();
+ const Typeflag typeflag = (Typeflag)header[typeflag_o];
+ bool diff = false, size_differs = false, type_differs = true;
+ struct stat st;
+ if( lstat( filename, &st ) != 0 )
+ show_file_error( filename, "Warning: Can't stat", errno );
+ else if( ( typeflag == tf_regular || typeflag == tf_hiperf ) &&
+ !S_ISREG( st.st_mode ) )
+ show_file_diff( filename, "Is not a regular file" );
+ else if( typeflag == tf_symlink && !S_ISLNK( st.st_mode ) )
+ show_file_diff( filename, "Is not a symlink" );
+ else if( typeflag == tf_chardev && !S_ISCHR( st.st_mode ) )
+ show_file_diff( filename, "Is not a character device" );
+ else if( typeflag == tf_blockdev && !S_ISBLK( st.st_mode ) )
+ show_file_diff( filename, "Is not a block device" );
+ else if( typeflag == tf_directory && !S_ISDIR( st.st_mode ) )
+ show_file_diff( filename, "Is not a directory" );
+ else if( typeflag == tf_fifo && !S_ISFIFO( st.st_mode ) )
+ show_file_diff( filename, "Is not a FIFO" );
+ else
+ {
+ type_differs = false;
+ if( typeflag != tf_symlink )
+ {
+ const mode_t mode = parse_octal( header + mode_o, mode_l ); // 12 bits
+ if( mode != ( st.st_mode & ( S_ISUID | S_ISGID | S_ISVTX |
+ S_IRWXU | S_IRWXG | S_IRWXO ) ) )
+ { show_file_diff( filename, "Mode differs" ); diff = true; }
+ }
+ if( !ignore_ids )
+ {
+ if( (uid_t)parse_octal( header + uid_o, uid_l ) != st.st_uid )
+ { show_file_diff( filename, "Uid differs" ); diff = true; }
+ if( (gid_t)parse_octal( header + gid_o, gid_l ) != st.st_gid )
+ { show_file_diff( filename, "Gid differs" ); diff = true; }
+ }
+ if( typeflag != tf_symlink )
+ {
+ const time_t mtime = parse_octal( header + mtime_o, mtime_l ); // 33 bits
+ if( mtime != st.st_mtime )
+ { show_file_diff( filename, "Mod time differs" ); diff = true; }
+ if( ( typeflag == tf_regular || typeflag == tf_hiperf ) &&
+ (off_t)rest != st.st_size ) // don't compare contents
+ { show_file_diff( filename, "Size differs" ); size_differs = true; }
+ if( ( typeflag == tf_chardev || typeflag == tf_blockdev ) &&
+ ( parse_octal( header + devmajor_o, devmajor_l ) != major( st.st_rdev ) ||
+ parse_octal( header + devminor_o, devminor_l ) != minor( st.st_rdev ) ) )
+ { show_file_diff( filename, "Device number differs" ); diff = true; }
+ }
+ else
+ {
+ char * const buf = new char[st.st_size+1];
+ long len = readlink( filename, buf, st.st_size );
+ bool e = ( len != st.st_size );
+ if( !e ) { buf[len] = 0; if( extended.linkpath() != buf ) e = true; }
+ delete[] buf;
+ if( e ) { show_file_diff( filename, "Symlink differs" ); diff = true; }
+ }
+ }
+ if( diff || size_differs || type_differs )
+ { diff = false; set_error_status( 1 ); }
+ if( rest == 0 ) return 0;
+ if( ( typeflag != tf_regular && typeflag != tf_hiperf ) ||
+ size_differs || type_differs ) return skip_member( infd1, extended );
+ // else compare file contents
+ const int rem = rest % header_size;
+ const int padding = rem ? header_size - rem : 0;
+ const unsigned bufsize = 32 * header_size;
+ uint8_t buf1[bufsize];
+ uint8_t buf2[bufsize];
+ const int infd2 = open_instream( filename );
+ if( infd2 < 0 )
+ { set_error_status( 1 ); return skip_member( infd1, extended ); }
+ int retval = 0;
+ while( rest > 0 )
+ {
+ const int rsize1 = ( rest >= bufsize ) ? bufsize : rest + padding;
+ const int rsize2 = ( rest >= bufsize ) ? bufsize : rest;
+ const int ret = archive_read( infd1, buf1, rsize1 );
+ if( ret != 0 ) { if( ret == 2 ) retval = 2; diff = true; break; }
+ if( !diff )
+ {
+ const int rd = readblock( infd2, buf2, rsize2 );
+ if( rd != rsize2 )
+ {
+ if( errno ) show_file_error( filename, "Read error", errno );
+ else show_file_diff( filename, "EOF found in file" );
+ diff = true;
+ }
+ else
+ {
+ int i = 0; while( i < rsize2 && buf1[i] == buf2[i] ) ++i;
+ if( i < rsize2 )
+ { show_file_diff( filename, "Contents differ" ); diff = true; }
+ }
+ }
+ if( rest < bufsize ) break;
+ rest -= rsize1;
+ }
+ if( diff ) set_error_status( 1 );
+ close( infd2 );
+ return retval;
+ }
+
+
+int list_member( const int infd, const Extended & extended,
+ const Tar_header header )
+ {
+ show_member_name( extended, header, 0, grbuf );
+ return skip_member( infd, extended );
+ }
+
+
bool contains_dotdot( const char * const filename )
{
for( int i = 0; filename[i]; ++i )
@@ -331,7 +465,7 @@ int extract_member( const int infd, const Extended & extended,
if( contains_dotdot( filename ) )
{
show_file_error( filename, "Contains a '..' component, skipping." );
- return list_member( infd, extended, header, true );
+ return skip_member( infd, extended );
}
const mode_t mode = parse_octal( header + mode_o, mode_l ); // 12 bits
const time_t mtime = parse_octal( header + mtime_o, mtime_l ); // 33 bits
@@ -354,12 +488,6 @@ int extract_member( const int infd, const Extended & extended,
case tf_symlink:
{
const char * const linkname = extended.linkpath().c_str();
-/* if( contains_dotdot( linkname ) )
- {
- show_file_error( filename,
- "Link destination contains a '..' component, skipping." );
- return list_member( infd, extended, header, false );
- }*/
const bool hard = typeflag == tf_link;
if( ( hard && link( linkname, filename ) != 0 ) ||
( !hard && symlink( linkname, filename ) != 0 ) )
@@ -545,20 +673,21 @@ unsigned long long parse_octal( const uint8_t * const ptr, const int size )
int decode( const std::string & archive_name, const Arg_parser & parser,
const int filenames, const int num_workers, const int debug_level,
- const bool keep_damaged, const bool listing, const bool missing_crc,
+ const Program_mode program_mode, const bool ignore_ids,
+ const bool keep_damaged, const bool missing_crc,
const bool permissive )
{
const int infd = archive_name.size() ?
open_instream( archive_name ) : STDIN_FILENO;
if( infd < 0 ) return 1;
- // Execute -C options and mark filenames to be extracted or listed.
+ // Execute -C options and mark filenames to be compared, extracted or listed.
// name_pending is of type char instead of bool to allow concurrent update.
std::vector< char > name_pending( parser.arguments(), false );
for( int i = 0; i < parser.arguments(); ++i )
{
const int code = parser.code( i );
- if( code == 'C' && !listing )
+ if( code == 'C' && program_mode != m_list )
{
const char * const dir = parser.argument( i ).c_str();
if( chdir( dir ) != 0 )
@@ -569,7 +698,7 @@ int decode( const std::string & archive_name, const Arg_parser & parser,
}
// multi-threaded --list is faster even with 1 thread and 1 file in archive
- if( listing && num_workers > 0 )
+ if( program_mode == m_list && num_workers > 0 )
{
const Lzip_index lzip_index( infd, true, false ); // only regular files
const long members = lzip_index.members();
@@ -580,7 +709,9 @@ int decode( const std::string & archive_name, const Arg_parser & parser,
infd, std::min( (long)num_workers, members ),
missing_crc, permissive );
}
- lseek( infd, 0, SEEK_SET );
+ if( lseek( infd, 0, SEEK_SET ) == 0 && lzip_index.retval() != 0 &&
+ lzip_index.file_size() > 3 * header_size )
+ archive_is_uncompressed_seekable = true; // unless compressed corrupt
}
has_lz_ext = // global var for archive_read
@@ -599,9 +730,9 @@ int decode( const std::string & archive_name, const Arg_parser & parser,
if( ret != 0 || !verify_ustar_chksum( header ) )
{
if( ret == 0 && block_is_zero( header, header_size ) ) break; // EOF
- if( verbosity >= 2 )
+ if( skip_warn() && verbosity >= 2 )
std::fprintf( stderr, "ustar chksum = %07o\n", ustar_chksum( header ) );
- skip_warn(); set_error_status( 2 ); continue;
+ set_error_status( 2 ); continue;
}
skip_warn( true ); // reset warning
@@ -676,8 +807,12 @@ int decode( const std::string & archive_name, const Arg_parser & parser,
( typeflag == tf_regular || typeflag == tf_hiperf ) )
extended.file_size( parse_octal( header + size_o, size_l ) );
- if( listing || skip )
- retval = list_member( infd, extended, header, skip );
+ if( skip )
+ retval = skip_member( infd, extended );
+ else if( program_mode == m_list )
+ retval = list_member( infd, extended, header );
+ else if( program_mode == m_diff )
+ retval = compare_member( infd, extended, header, ignore_ids );
else
retval = extract_member( infd, extended, header, keep_damaged );
extended.reset();
@@ -690,7 +825,7 @@ int decode( const std::string & archive_name, const Arg_parser & parser,
if( !parser.code( i ) && parser.argument( i ).size() && name_pending[i] )
{
show_file_error( parser.argument( i ).c_str(), "Not found in archive." );
- set_error_status( 1 );
+ retval = 1;
}
- return final_exit_status( retval );
+ return final_exit_status( retval, program_mode != m_diff );
}