summaryrefslogtreecommitdiffstats
path: root/extract.cc
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--extract.cc335
1 files changed, 239 insertions, 96 deletions
diff --git a/extract.cc b/extract.cc
index 58cda61..e25f5b6 100644
--- a/extract.cc
+++ b/extract.cc
@@ -1,5 +1,5 @@
/* Tarlz - Archiver with multimember lzip compression
- Copyright (C) 2013-2018 Antonio Diaz Diaz.
+ Copyright (C) 2013-2019 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -18,7 +18,9 @@
#define _FILE_OFFSET_BITS 64
#include <algorithm>
+#include <cctype>
#include <cerrno>
+#include <climits>
#include <cstdio>
#include <cstdlib>
#include <cstring>
@@ -36,13 +38,15 @@
#include "arg_parser.h"
#include "lzip.h"
+#include "lzip_index.h"
#include "tarlz.h"
namespace {
+Resizable_buffer grbuf( initial_line_length );
int gretval = 0;
-bool has_lz_ext; // global var for archive_read
+bool has_lz_ext; // global var for archive_read
void skip_warn( const bool reset = false ) // avoid duplicate warnings
{
@@ -83,13 +87,6 @@ bool make_path( const std::string & name )
}
-inline bool block_is_zero( const uint8_t * const buf, const int size )
- {
- for( int i = 0; i < size; ++i ) if( buf[i] != 0 ) return false;
- return true;
- }
-
-
// Return value: 0 = OK, 1 = damaged member, 2 = fatal error.
// If sizep and error, return in *sizep the number of bytes read.
// The first 6 bytes of the archive must be intact for islz to be meaningful.
@@ -114,6 +111,7 @@ int archive_read( const int infd, uint8_t * const buf, const int size,
{ show_error( "Error reading archive", errno ); fatal = true; return 2; }
const Lzip_header & header = (*(const Lzip_header *)buf);
bool islz = ( rd >= min_member_size && header.verify_magic() &&
+ header.verify_version() &&
isvalid_ds( header.dictionary_size() ) );
const bool istar = ( rd == size && verify_ustar_chksum( buf ) );
const bool iseof =
@@ -160,8 +158,8 @@ int archive_read( const int infd, uint8_t * const buf, const int size,
skip_warn(); gretval = 2; return 1;
}
if( rd == 0 && LZ_decompress_finished( decoder ) == 1 )
- { LZ_decompress_close( decoder );
- show_error( "Archive ends unexpectedly." ); fatal = true; return 2; }
+ { LZ_decompress_close( decoder );
+ show_error( "Archive ends unexpectedly." ); fatal = true; return 2; }
sz += rd; if( sizep ) *sizep = sz;
if( sz == size && LZ_decompress_finished( decoder ) == 1 &&
LZ_decompress_close( decoder ) < 0 )
@@ -185,12 +183,14 @@ int archive_read( const int infd, uint8_t * const buf, const int size,
}
-const char * mode_string( const Tar_header header )
+enum { mode_string_size = 10,
+ group_string_size = 1 + uname_l + 1 + gname_l + 1 }; // 67
+
+void format_mode_string( const Tar_header header, char buf[mode_string_size] )
{
- static char buf[11];
const Typeflag typeflag = (Typeflag)header[typeflag_o];
- std::memcpy( buf, "----------", sizeof buf - 1 );
+ std::memcpy( buf, "----------", mode_string_size );
switch( typeflag )
{
case tf_regular: break;
@@ -203,7 +203,7 @@ const char * mode_string( const Tar_header header )
case tf_hiperf: buf[0] = 'C'; break;
default: buf[0] = '?';
}
- const mode_t mode = strtoul( header + mode_o, 0, 8 ); // 12 bits
+ const mode_t mode = parse_octal( header + mode_o, mode_l ); // 12 bits
const bool setuid = mode & S_ISUID;
const bool setgid = mode & S_ISGID;
const bool sticky = mode & S_ISVTX;
@@ -219,46 +219,79 @@ const char * mode_string( const Tar_header header )
if( mode & S_IWOTH ) buf[8] = 'w';
if( mode & S_IXOTH ) buf[9] = sticky ? 't' : 'x';
else if( sticky ) buf[9] = 'T';
- return buf;
}
-const char * user_group_string( const Tar_header header )
+int format_user_group_string( const Tar_header header,
+ char buf[group_string_size] )
{
- enum { bufsize = uname_l + 1 + gname_l + 1 };
- static char buf[bufsize];
-
+ int len;
if( header[uname_o] && header[gname_o] )
- snprintf( buf, bufsize, "%.32s/%.32s", header + uname_o, header + gname_o );
+ len = snprintf( buf, group_string_size,
+ " %.32s/%.32s", header + uname_o, header + gname_o );
else
{
- const int uid = strtoul( header + uid_o, 0, 8 );
- const int gid = strtoul( header + gid_o, 0, 8 );
- snprintf( buf, bufsize, "%u/%u", uid, gid );
+ const unsigned uid = parse_octal( header + uid_o, uid_l );
+ const unsigned gid = parse_octal( header + gid_o, gid_l );
+ len = snprintf( buf, group_string_size, " %u/%u", uid, gid );
}
- return buf;
+ return len;
}
+} // end namespace
-void show_member_name( const Extended & extended, const Tar_header header,
- const int vlevel )
+bool block_is_zero( const uint8_t * const buf, const int size )
{
- if( verbosity < vlevel ) return;
- if( verbosity > vlevel )
+ for( int i = 0; i < size; ++i ) if( buf[i] != 0 ) return false;
+ return true;
+ }
+
+
+void format_member_name( const Extended & extended, const Tar_header header,
+ Resizable_buffer & rbuf, const bool long_format )
+ {
+ if( long_format )
{
- const time_t mtime = strtoull( header + mtime_o, 0, 8 ); // 33 bits
- const struct tm * const tm = localtime( &mtime );
+ format_mode_string( header, rbuf() );
+ const int group_string_len =
+ format_user_group_string( header, rbuf() + mode_string_size );
+ const int offset = mode_string_size + group_string_len;
+ const time_t mtime = parse_octal( header + mtime_o, mtime_l ); // 33 bits
+ struct tm tms;
+ const struct tm * tm = localtime_r( &mtime, &tms );
+ if( !tm )
+ { time_t z = 0; tm = localtime_r( &z, &tms ); if( !tm ) tm = &tms; }
const Typeflag typeflag = (Typeflag)header[typeflag_o];
const bool islink = ( typeflag == tf_link || typeflag == tf_symlink );
const char * const link_string = !islink ? "" :
( ( typeflag == tf_link ) ? " link to " : " -> " );
- std::printf( "%s %s %9llu %4d-%02u-%02u %02u:%02u %s%s%s\n",
- mode_string( header ), user_group_string( header ),
- extended.size, 1900 + tm->tm_year, 1 + tm->tm_mon,
- tm->tm_mday, tm->tm_hour, tm->tm_min, extended.path.c_str(),
- link_string, !islink ? "" : extended.linkpath.c_str() );
+ for( int i = 0; i < 2; ++i )
+ {
+ const int len = snprintf( rbuf() + offset, rbuf.size() - offset,
+ " %9llu %4d-%02u-%02u %02u:%02u %s%s%s\n",
+ extended.size, 1900 + tm->tm_year, 1 + tm->tm_mon,
+ tm->tm_mday, tm->tm_hour, tm->tm_min, extended.path.c_str(),
+ link_string, !islink ? "" : extended.linkpath.c_str() );
+ if( (int)rbuf.size() > len + offset ) break;
+ else rbuf.resize( len + offset + 1 );
+ }
+ }
+ else
+ {
+ if( rbuf.size() < extended.path.size() + 2 )
+ rbuf.resize( extended.path.size() + 2 );
+ snprintf( rbuf(), rbuf.size(), "%s\n", extended.path.c_str() );
}
- else std::printf( "%s\n", extended.path.c_str() );
+ }
+
+namespace {
+
+void show_member_name( const Extended & extended, const Tar_header header,
+ const int vlevel, Resizable_buffer & rbuf )
+ {
+ if( verbosity < vlevel ) return;
+ format_member_name( extended, header, rbuf, verbosity > vlevel );
+ std::fputs( rbuf(), stdout );
std::fflush( stdout );
}
@@ -266,7 +299,7 @@ void show_member_name( const Extended & extended, const Tar_header header,
int list_member( const int infd, const Extended & extended,
const Tar_header header, const bool skip )
{
- if( !skip ) show_member_name( extended, header, 0 );
+ if( !skip ) show_member_name( extended, header, 0, grbuf );
const unsigned bufsize = 32 * header_size;
uint8_t buf[bufsize];
@@ -304,13 +337,13 @@ int extract_member( const int infd, const Extended & extended,
show_file_error( filename, "Contains a '..' component, skipping." );
return list_member( infd, extended, header, true );
}
- const mode_t mode = strtoul( header + mode_o, 0, 8 ); // 12 bits
- const time_t mtime = strtoull( header + mtime_o, 0, 8 ); // 33 bits
+ const mode_t mode = parse_octal( header + mode_o, mode_l ); // 12 bits
+ const time_t mtime = parse_octal( header + mtime_o, mtime_l ); // 33 bits
const Typeflag typeflag = (Typeflag)header[typeflag_o];
const bool islink = ( typeflag == tf_link || typeflag == tf_symlink );
int outfd = -1;
- show_member_name( extended, header, 1 );
+ show_member_name( extended, header, 1, grbuf );
std::remove( filename );
make_path( filename );
switch( typeflag )
@@ -352,8 +385,9 @@ int extract_member( const int infd, const Extended & extended,
case tf_chardev:
case tf_blockdev:
{
- const unsigned dev = makedev( strtoul( header + devmajor_o, 0, 8 ),
- strtoul( header + devminor_o, 0, 8 ) );
+ const unsigned dev =
+ makedev( parse_octal( header + devmajor_o, devmajor_l ),
+ parse_octal( header + devminor_o, devminor_l ) );
const int dmode = ( typeflag == tf_chardev ? S_IFCHR : S_IFBLK ) | mode;
if( mknod( filename, dmode, dev ) != 0 )
{
@@ -376,8 +410,8 @@ int extract_member( const int infd, const Extended & extended,
return 2;
}
- const uid_t uid = (uid_t)strtoul( header + uid_o, 0, 8 );
- const gid_t gid = (gid_t)strtoul( header + gid_o, 0, 8 );
+ const uid_t uid = (uid_t)parse_octal( header + uid_o, uid_l );
+ const gid_t gid = (gid_t)parse_octal( header + gid_o, gid_l );
if( !islink && chown( filename, uid, gid ) != 0 &&
errno != EPERM && errno != EINVAL )
{
@@ -423,6 +457,7 @@ int extract_member( const int infd, const Extended & extended,
return 0;
}
+} // end namespace
// Removes any amount of leading "./" and '/' strings.
const char * remove_leading_slash( const char * const filename )
@@ -464,78 +499,163 @@ bool compare_tslash( const char * const name1, const char * const name2 )
return ( !*p && !*q );
}
-} // end namespace
+namespace {
+unsigned long long parse_decimal( const char * const ptr,
+ const char ** const tailp,
+ const unsigned long long size )
+ {
+ unsigned long long result = 0;
+ unsigned long long i = 0;
+ while( i < size && std::isspace( ptr[i] ) ) ++i;
+ if( !std::isdigit( (unsigned char)ptr[i] ) )
+ { if( tailp ) *tailp = ptr; return 0; }
+ for( ; i < size && std::isdigit( (unsigned char)ptr[i] ); ++i )
+ {
+ const unsigned long long prev = result;
+ result *= 10; result += ptr[i] - '0';
+ if( result < prev || result > LLONG_MAX ) // overflow
+ { if( tailp ) *tailp = ptr; return 0; }
+ }
+ if( tailp ) *tailp = ptr + i;
+ return result;
+ }
-bool Extended::parse( const int infd, const Tar_header header,
- const bool permissive )
+
+uint32_t parse_record_crc( const char * const ptr )
{
- const unsigned long long edsize = strtoull( header + size_o, 0, 8 );
+ uint32_t crc = 0;
+ for( int i = 0; i < 8; ++i )
+ {
+ crc <<= 4;
+ if( ptr[i] >= '0' && ptr[i] <= '9' ) crc += ptr[i] - '0';
+ else if( ptr[i] >= 'A' && ptr[i] <= 'F' ) crc += ptr[i] + 10 - 'A';
+ else if( ptr[i] >= 'a' && ptr[i] <= 'f' ) crc += ptr[i] + 10 - 'a';
+ else { crc = 0; break; } // invalid digit in crc string
+ }
+ return crc;
+ }
+
+
+bool parse_records( const int infd, Extended & extended,
+ const Tar_header header, const bool permissive )
+ {
+ const unsigned long long edsize = parse_octal( header + size_o, size_l );
const unsigned long long bufsize = round_up( edsize );
if( bufsize == 0 || edsize == 0 || edsize >= 1ULL << 33 )
return false; // overflow or no extended data
char * const buf = new char[bufsize]; // extended records buffer
- if( archive_read( infd, (uint8_t *)buf, bufsize ) != 0 ) goto error;
+ const bool ret = ( archive_read( infd, (uint8_t *)buf, bufsize ) == 0 &&
+ extended.parse( buf, edsize, permissive ) );
+ delete[] buf;
+ return ret;
+ }
+
+} // end namespace
+
+
+/* Returns the number of bytes really read.
+ If (returned value < size) and (errno == 0), means EOF was reached.
+*/
+int readblock( const int fd, uint8_t * const buf, const int size )
+ {
+ int sz = 0;
+ errno = 0;
+ while( sz < size )
+ {
+ const int n = read( fd, buf + sz, size - sz );
+ if( n > 0 ) sz += n;
+ else if( n == 0 ) break; // EOF
+ else if( errno != EINTR ) break;
+ errno = 0;
+ }
+ return sz;
+ }
+
+
+/* Returns the number of bytes really written.
+ If (returned value < size), it is always an error.
+*/
+int writeblock( const int fd, const uint8_t * const buf, const int size )
+ {
+ int sz = 0;
+ errno = 0;
+ while( sz < size )
+ {
+ const int n = write( fd, buf + sz, size - sz );
+ if( n > 0 ) sz += n;
+ else if( n < 0 && errno != EINTR ) break;
+ errno = 0;
+ }
+ return sz;
+ }
+
+
+unsigned long long parse_octal( const uint8_t * const ptr, const int size )
+ {
+ unsigned long long result = 0;
+ int i = 0;
+ while( i < size && std::isspace( ptr[i] ) ) ++i;
+ for( ; i < size && ptr[i] >= '0' && ptr[i] <= '7'; ++i )
+ { result <<= 3; result += ptr[i] - '0'; }
+ return result;
+ }
+
+
+bool Extended::parse( const char * const buf, const unsigned long long edsize,
+ const bool permissive )
+ {
for( unsigned long long pos = 0; pos < edsize; ) // parse records
{
- char * tail;
- const unsigned long long rsize = strtoull( buf + pos, &tail, 10 );
+ const char * tail;
+ const unsigned long long rsize =
+ parse_decimal( buf + pos, &tail, edsize - pos );
if( rsize == 0 || rsize > edsize - pos || tail[0] != ' ' ||
- buf[pos+rsize-1] != '\n' ) goto error;
+ buf[pos+rsize-1] != '\n' ) return false;
++tail; // point to keyword
- // length of (keyword + '=' + value) without the final newline
- const unsigned long long rest = ( buf + pos + rsize - 1 ) - tail;
+ // rest = length of (keyword + '=' + value) without the final newline
+ const unsigned long long rest = ( buf + ( pos + rsize - 1 ) ) - tail;
if( rest > 5 && std::memcmp( tail, "path=", 5 ) == 0 )
- { if( path.size() && !permissive ) goto error;
+ { if( path.size() && !permissive ) return false;
path.assign( tail + 5, rest - 5 ); }
else if( rest > 9 && std::memcmp( tail, "linkpath=", 9 ) == 0 )
- { if( linkpath.size() && !permissive ) goto error;
+ { if( linkpath.size() && !permissive ) return false;
linkpath.assign( tail + 9, rest - 9 ); }
else if( rest > 5 && std::memcmp( tail, "size=", 5 ) == 0 )
{
- if( size != 0 && !permissive ) goto error;
- size = 0;
- for( unsigned long long i = 5; i < rest; ++i )
- {
- if( tail[i] < '0' || tail[i] > '9' ) goto error;
- const unsigned long long prev = size;
- size = size * 10 + ( tail[i] - '0' );
- if( size < prev ) goto error; // overflow
- }
- if( size < 1ULL << 33 ) goto error; // size fits in ustar header
+ if( size != 0 && !permissive ) return false;
+ size = parse_decimal( tail + 5, &tail, rest - 5 );
+ // parse error or size fits in ustar header
+ if( size < 1ULL << 33 || tail != buf + ( pos + rsize - 1 ) ) return false;
}
else if( rest > 10 && std::memcmp( tail, "GNU.crc32=", 10 ) == 0 )
{
- if( crc_present && !permissive ) goto error;
- if( rsize != 22 ) goto error;
- char * t;
- const uint32_t stored_crc = strtoul( tail + 10, &t, 16 );
- if( t - tail - 10 != 8 || t[0] != '\n' ) goto error;
+ if( crc_present && !permissive ) return false;
+ if( rsize != 22 ) return false;
+ const uint32_t stored_crc = parse_record_crc( tail + 10 );
const uint32_t computed_crc =
crc32c.windowed_crc( (const uint8_t *)buf, pos + rsize - 9, edsize );
crc_present = true;
- if( stored_crc != computed_crc ) goto error;
+ if( stored_crc != computed_crc ) return false;
}
pos += rsize;
}
- delete[] buf;
return true;
-error:
- delete[] buf;
- return false;
}
int decode( const std::string & archive_name, const Arg_parser & parser,
- const int filenames, const bool keep_damaged, const bool listing,
- const bool missing_crc, const bool permissive )
+ const int filenames, const int num_workers, const int debug_level,
+ const bool keep_damaged, const bool listing, const bool missing_crc,
+ const bool permissive )
{
const int infd = archive_name.size() ?
open_instream( archive_name ) : STDIN_FILENO;
if( infd < 0 ) return 1;
- // execute -C options and mark filenames to be extracted or listed
- std::vector< bool > name_pending( parser.arguments(), false );
+ // Execute -C options and mark filenames to be extracted or listed.
+ // name_pending is of type char instead of bool to allow concurrent update.
+ std::vector< char > name_pending( parser.arguments(), false );
for( int i = 0; i < parser.arguments(); ++i )
{
const int code = parser.code( i );
@@ -549,34 +669,57 @@ int decode( const std::string & archive_name, const Arg_parser & parser,
if( !code ) name_pending[i] = true;
}
- has_lz_ext =
+ if( listing && num_workers > 0 ) // multi-threaded --list
+ {
+ const Lzip_index lzip_index( infd, true, false );
+ const long members = lzip_index.members();
+ if( lzip_index.retval() == 0 && ( members >= 3 ||
+ ( members >= 2 && lzip_index.dblock( members - 1 ).size() > 1024 ) ) )
+ { //show_file_error( archive_name.c_str(), "Is compressed seekable" );
+ return list_lz( parser, name_pending, lzip_index, filenames,
+ debug_level, infd, std::min( (long)num_workers, members ),
+ missing_crc, permissive ); }
+ lseek( infd, 0, SEEK_SET );
+ }
+
+ has_lz_ext = // global var for archive_read
( archive_name.size() > 3 &&
archive_name.compare( archive_name.size() - 3, 3, ".lz" ) == 0 ) ||
( archive_name.size() > 4 &&
archive_name.compare( archive_name.size() - 4, 4, ".tlz" ) == 0 );
- Extended extended; // metadata from extended records
+ Extended extended; // metadata from extended records
int retval = 0;
- bool prev_extended = false; // prev header was extended
- while( true ) // process one member per iteration
+ bool prev_extended = false; // prev header was extended
+ while( true ) // process one tar member per iteration
{
- uint8_t buf[header_size];
- const int ret = archive_read( infd, buf, header_size );
+ Tar_header header;
+ const int ret = archive_read( infd, header, header_size );
if( ret == 2 ) return 2;
- if( ret != 0 || !verify_ustar_chksum( buf ) )
+ if( ret != 0 || !verify_ustar_chksum( header ) )
{
- if( ret == 0 && block_is_zero( buf, header_size ) ) break; // EOF
+ if( ret == 0 && block_is_zero( header, header_size ) ) break; // EOF
skip_warn(); gretval = 2; continue;
}
- skip_warn( true ); // reset warning
+ skip_warn( true ); // reset warning
- const char * const header = (const char *)buf;
const Typeflag typeflag = (Typeflag)header[typeflag_o];
+ if( typeflag == tf_global )
+ {
+ if( prev_extended )
+ { show_error( "Format violation: global header after extended header." );
+ return 2; }
+ Extended dummy; // global headers are parsed and ignored
+ if( !parse_records( infd, dummy, header, true ) )
+ { show_error( "Error in global extended records. Skipping to next header." );
+ gretval = 2; }
+ continue;
+ }
if( typeflag == tf_extended )
{
if( prev_extended && !permissive )
{ show_error( "Format violation: consecutive extended headers found."
- /*" Use --permissive."*/, 0, true ); return 2; }
- if( !extended.parse( infd, header, permissive ) )
+ /*" Use --permissive.", 0, true*/ ); return 2; }
+ if( !parse_records( infd, extended, header, permissive ) )
{ show_error( "Error in extended records. Skipping to next header." );
extended.reset(); gretval = 2; }
else if( !extended.crc_present && missing_crc )
@@ -586,7 +729,7 @@ int decode( const std::string & archive_name, const Arg_parser & parser,
}
prev_extended = false;
- if( extended.linkpath.empty() )
+ if( extended.linkpath.empty() ) // copy linkpath from ustar header
{
for( int i = 0; i < linkname_l && header[linkname_o+i]; ++i )
extended.linkpath += header[linkname_o+i];
@@ -595,7 +738,7 @@ int decode( const std::string & archive_name, const Arg_parser & parser,
extended.linkpath.resize( extended.linkpath.size() - 1 );
}
- if( extended.path.empty() )
+ if( extended.path.empty() ) // copy path from ustar header
{
char stored_name[prefix_l+1+name_l+1];
int len = 0;
@@ -624,7 +767,7 @@ int decode( const std::string & archive_name, const Arg_parser & parser,
if( extended.size == 0 &&
( typeflag == tf_regular || typeflag == tf_hiperf ) )
- extended.size = strtoull( header + size_o, 0, 8 );
+ extended.size = parse_octal( header + size_o, size_l );
if( listing || skip )
retval = list_member( infd, extended, header, skip );