/* Tarlz - Archiver with multimember lzip compression
Copyright (C) 2013-2018 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
*/
#define _FILE_OFFSET_BITS 64
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#if defined(__GNU_LIBRARY__)
#include // for makedev
#endif
#include
#include "arg_parser.h"
#include "lzip.h"
#include "tarlz.h"
namespace {
int gretval = 0;
bool has_lz_ext; // global var for archive_read
void skip_warn( const bool reset = false ) // avoid duplicate warnings
{
static bool skipping = false;
if( reset ) skipping = false;
else if( !skipping )
{ skipping = true; show_error( "Skipping to next header." ); }
}
bool make_path( const std::string & name )
{
const mode_t mode = S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH;
unsigned end = name.size(); // first slash before last component
while( end > 0 && name[end-1] == '/' ) --end; // remove trailing slashes
while( end > 0 && name[end-1] != '/' ) --end; // remove last component
while( end > 0 && name[end-1] == '/' ) --end; // remove more slashes
unsigned index = 0;
while( index < end )
{
while( index < end && name[index] == '/' ) ++index;
unsigned first = index;
while( index < end && name[index] != '/' ) ++index;
if( first < index )
{
const std::string partial( name, 0, index );
struct stat st;
if( stat( partial.c_str(), &st ) == 0 )
{ if( !S_ISDIR( st.st_mode ) ) return false; }
else if( mkdir( partial.c_str(), mode ) != 0 )
return false;
}
}
return true;
}
inline bool block_is_zero( const uint8_t * const buf, const int size )
{
for( int i = 0; i < size; ++i ) if( buf[i] != 0 ) return false;
return true;
}
// Return value: 0 = OK, 1 = damaged member, 2 = fatal error.
// If sizep and error, return in *sizep the number of bytes read.
// The first 6 bytes of the archive must be intact for islz to be meaningful.
int archive_read( const int infd, uint8_t * const buf, const int size,
int * const sizep = 0 )
{
static LZ_Decoder * decoder = 0;
static bool at_eof = false;
static bool fatal = false;
static bool first_call = true;
if( sizep ) *sizep = 0;
if( fatal ) return 2;
if( first_call ) // check format
{
first_call = false;
if( size != header_size )
internal_error( "size != header_size on first call." );
const int rd = readblock( infd, buf, size );
if( sizep ) *sizep = rd;
if( rd != size && errno )
{ show_error( "Error reading archive", errno ); fatal = true; return 2; }
const Lzip_header & header = (*(const Lzip_header *)buf);
bool islz = ( rd >= min_member_size && header.verify_magic() &&
isvalid_ds( header.dictionary_size() ) );
const bool istar = ( rd == size && verify_ustar_chksum( buf ) );
const bool iseof =
( !islz && !istar && rd == size && block_is_zero( buf, size ) );
if( !islz && !istar && !iseof ) // corrupt or invalid format
{
show_error( "This does not look like a POSIX tar archive." );
if( has_lz_ext ) islz = true;
if( verbosity >= 2 && !islz && rd == size )
std::fprintf( stderr, "ustar chksum = %07o\n", ustar_chksum( buf ) );
if( !islz ) return 1;
}
if( !islz ) // uncompressed
{ if( rd == size ) return 0; fatal = true; return 2; }
decoder = LZ_decompress_open(); // compressed
if( !decoder || LZ_decompress_errno( decoder ) != LZ_ok )
{ show_error( "Not enough memory." );
LZ_decompress_close( decoder ); fatal = true; return 2; }
if( LZ_decompress_write( decoder, buf, rd ) != rd )
internal_error( "library error (LZ_decompress_write)." );
const int res = archive_read( infd, buf, size, sizep );
if( res != 0 ) { if( res == 2 ) fatal = true; return res; }
if( verify_ustar_chksum( buf ) || block_is_zero( buf, size ) ) return 0;
show_error( "This does not look like a POSIX tar.lz archive." );
fatal = true; return 2;
}
if( !decoder ) // uncompressed
{
const int rd = readblock( infd, buf, size ); if( rd == size ) return 0;
if( sizep ) *sizep = rd;
show_error( "Archive ends unexpectedly." ); fatal = true; return 2;
}
const int ibuf_size = 16384;
uint8_t ibuf[ibuf_size];
int sz = 0;
while( sz < size )
{
const int rd = LZ_decompress_read( decoder, buf + sz, size - sz );
if( rd < 0 )
{
if( LZ_decompress_sync_to_member( decoder ) < 0 )
internal_error( "library error (LZ_decompress_sync_to_member)." );
skip_warn(); gretval = 2; return 1;
}
if( rd == 0 && LZ_decompress_finished( decoder ) == 1 )
{ LZ_decompress_close( decoder );
show_error( "Archive ends unexpectedly." ); fatal = true; return 2; }
sz += rd; if( sizep ) *sizep = sz;
if( sz == size && LZ_decompress_finished( decoder ) == 1 &&
LZ_decompress_close( decoder ) < 0 )
{ show_error( "LZ_decompress_close failed." ); fatal = true; return 2; }
if( sz < size && !at_eof && LZ_decompress_write_size( decoder ) > 0 )
{
const int rsize = std::min( ibuf_size, LZ_decompress_write_size( decoder ) );
const int rd = readblock( infd, ibuf, rsize );
if( LZ_decompress_write( decoder, ibuf, rd ) != rd )
internal_error( "library error (LZ_decompress_write)." );
if( rd < rsize )
{
at_eof = true; LZ_decompress_finish( decoder );
if( errno )
{ show_error( "Error reading archive", errno ); fatal = true;
return 2; }
}
}
}
return 0;
}
const char * mode_string( const Tar_header header )
{
static char buf[11];
const Typeflag typeflag = (Typeflag)header[typeflag_o];
std::memcpy( buf, "----------", sizeof buf - 1 );
switch( typeflag )
{
case tf_regular: break;
case tf_link: buf[0] = 'h'; break;
case tf_symlink: buf[0] = 'l'; break;
case tf_chardev: buf[0] = 'c'; break;
case tf_blockdev: buf[0] = 'b'; break;
case tf_directory: buf[0] = 'd'; break;
case tf_fifo: buf[0] = 'p'; break;
case tf_hiperf: buf[0] = 'C'; break;
default: buf[0] = '?';
}
const mode_t mode = strtoul( header + mode_o, 0, 8 ); // 12 bits
const bool setuid = mode & S_ISUID;
const bool setgid = mode & S_ISGID;
const bool sticky = mode & S_ISVTX;
if( mode & S_IRUSR ) buf[1] = 'r';
if( mode & S_IWUSR ) buf[2] = 'w';
if( mode & S_IXUSR ) buf[3] = setuid ? 's' : 'x';
else if( setuid ) buf[3] = 'S';
if( mode & S_IRGRP ) buf[4] = 'r';
if( mode & S_IWGRP ) buf[5] = 'w';
if( mode & S_IXGRP ) buf[6] = setgid ? 's' : 'x';
else if( setgid ) buf[6] = 'S';
if( mode & S_IROTH ) buf[7] = 'r';
if( mode & S_IWOTH ) buf[8] = 'w';
if( mode & S_IXOTH ) buf[9] = sticky ? 't' : 'x';
else if( sticky ) buf[9] = 'T';
return buf;
}
const char * user_group_string( const Tar_header header )
{
enum { bufsize = uname_l + 1 + gname_l + 1 };
static char buf[bufsize];
if( header[uname_o] && header[gname_o] )
snprintf( buf, bufsize, "%.32s/%.32s", header + uname_o, header + gname_o );
else
{
const int uid = strtoul( header + uid_o, 0, 8 );
const int gid = strtoul( header + gid_o, 0, 8 );
snprintf( buf, bufsize, "%u/%u", uid, gid );
}
return buf;
}
void show_member_name( const Extended & extended, const Tar_header header,
const int vlevel )
{
if( verbosity < vlevel ) return;
if( verbosity > vlevel )
{
const time_t mtime = strtoull( header + mtime_o, 0, 8 ); // 33 bits
const struct tm * const tm = localtime( &mtime );
const Typeflag typeflag = (Typeflag)header[typeflag_o];
const bool islink = ( typeflag == tf_link || typeflag == tf_symlink );
const char * const link_string = !islink ? "" :
( ( typeflag == tf_link ) ? " link to " : " -> " );
std::printf( "%s %s %9llu %4d-%02u-%02u %02u:%02u %s%s%s\n",
mode_string( header ), user_group_string( header ),
extended.size, 1900 + tm->tm_year, 1 + tm->tm_mon,
tm->tm_mday, tm->tm_hour, tm->tm_min, extended.path.c_str(),
link_string, !islink ? "" : extended.linkpath.c_str() );
}
else std::printf( "%s\n", extended.path.c_str() );
std::fflush( stdout );
}
int list_member( const int infd, const Extended & extended,
const Tar_header header, const bool skip )
{
if( !skip ) show_member_name( extended, header, 0 );
const unsigned bufsize = 32 * header_size;
uint8_t buf[bufsize];
unsigned long long rest = extended.size;
const int rem = extended.size % header_size;
const int padding = rem ? header_size - rem : 0;
while( rest > 0 )
{
const int rsize = ( rest >= bufsize ) ? bufsize : rest + padding;
const int ret = archive_read( infd, buf, rsize );
if( ret != 0 ) { if( ret == 2 ) return 2; else break; }
if( rest < bufsize ) break;
rest -= rsize;
}
return 0;
}
bool contains_dotdot( const char * const filename )
{
for( int i = 0; filename[i]; ++i )
if( filename[i] == '.' && filename[i+1] == '.' &&
( i == 0 || filename[i-1] == '/' ) &&
( filename[i+2] == 0 || filename[i+2] == '/' ) ) return true;
return false;
}
int extract_member( const int infd, const Extended & extended,
const Tar_header header, const bool keep_damaged )
{
const char * const filename = extended.path.c_str();
if( contains_dotdot( filename ) )
{
show_file_error( filename, "Contains a '..' component, skipping." );
return list_member( infd, extended, header, true );
}
const mode_t mode = strtoul( header + mode_o, 0, 8 ); // 12 bits
const time_t mtime = strtoull( header + mtime_o, 0, 8 ); // 33 bits
const Typeflag typeflag = (Typeflag)header[typeflag_o];
const bool islink = ( typeflag == tf_link || typeflag == tf_symlink );
int outfd = -1;
show_member_name( extended, header, 1 );
std::remove( filename );
make_path( filename );
switch( typeflag )
{
case tf_regular:
case tf_hiperf:
outfd = open_outstream( filename );
if( outfd < 0 ) return 2;
chmod( filename, mode ); // ignore errors
break;
case tf_link:
case tf_symlink:
{
const char * const linkname = extended.linkpath.c_str();
/* if( contains_dotdot( linkname ) )
{
show_file_error( filename,
"Link destination contains a '..' component, skipping." );
return list_member( infd, extended, header, false );
}*/
const bool hard = typeflag == tf_link;
if( ( hard && link( linkname, filename ) != 0 ) ||
( !hard && symlink( linkname, filename ) != 0 ) )
{
if( verbosity >= 0 )
std::fprintf( stderr, "Can't %slink file '%s' to '%s': %s.\n",
hard ? "" : "sym", linkname, filename,
std::strerror( errno ) );
return 2;
}
} break;
case tf_directory:
if( mkdir( filename, mode ) != 0 && errno != EEXIST )
{
show_file_error( filename, "Can't create directory", errno );
return 2;
}
break;
case tf_chardev:
case tf_blockdev:
{
const unsigned dev = makedev( strtoul( header + devmajor_o, 0, 8 ),
strtoul( header + devminor_o, 0, 8 ) );
const int dmode = ( typeflag == tf_chardev ? S_IFCHR : S_IFBLK ) | mode;
if( mknod( filename, dmode, dev ) != 0 )
{
show_file_error( filename, "Can't create device node", errno );
return 2;
}
break;
}
case tf_fifo:
if( mkfifo( filename, mode ) != 0 && errno != EEXIST )
{
show_file_error( filename, "Can't create FIFO file", errno );
return 2;
}
break;
default:
if( verbosity >= 0 )
std::fprintf( stderr, "File type '%c' not supported for file '%s'.\n",
typeflag, filename );
return 2;
}
const uid_t uid = (uid_t)strtoul( header + uid_o, 0, 8 );
const gid_t gid = (gid_t)strtoul( header + gid_o, 0, 8 );
if( !islink && chown( filename, uid, gid ) != 0 &&
errno != EPERM && errno != EINVAL )
{
show_file_error( filename, "Can't change file owner", errno );
return 2;
}
const unsigned bufsize = 32 * header_size;
uint8_t buf[bufsize];
unsigned long long rest = extended.size;
const int rem = extended.size % header_size;
const int padding = rem ? header_size - rem : 0;
while( rest > 0 )
{
const int rsize = ( rest >= bufsize ) ? bufsize : rest + padding;
int rd;
const int ret = archive_read( infd, buf, rsize, &rd );
if( ret != 0 )
{
if( outfd >= 0 )
{
if( keep_damaged )
{ writeblock( outfd, buf, std::min( rest, (unsigned long long)rd ) );
close( outfd ); }
else { close( outfd ); std::remove( filename ); }
}
if( ret == 2 ) return 2; else return 0;
}
const int wsize = ( rest >= bufsize ) ? bufsize : rest;
if( outfd >= 0 && writeblock( outfd, buf, wsize ) != wsize )
{ show_file_error( filename, "Error writing file", errno ); return 2; }
rest -= wsize;
}
if( outfd >= 0 && close( outfd ) != 0 )
{ show_file_error( filename, "Error closing file", errno ); return 2; }
if( !islink )
{
struct utimbuf t;
t.actime = mtime;
t.modtime = mtime;
utime( filename, &t ); // ignore errors
}
return 0;
}
// Removes any amount of leading "./" and '/' strings.
const char * remove_leading_slash( const char * const filename )
{
static bool first_post = true;
const char * p = filename;
while( *p == '/' || ( *p == '.' && p[1] == '/' ) ) ++p;
if( p != filename && first_post )
{
first_post = false;
std::string msg( "Removing leading '" );
msg.append( filename, p - filename );
msg += "' from member names.";
show_error( msg.c_str() );
}
if( *p == 0 ) p = ".";
return p;
}
// return true if dir is a parent directory of name
bool compare_prefix_dir( const char * const dir, const char * const name )
{
int len = 0;
while( dir[len] && dir[len] == name[len] ) ++len;
return ( !dir[len] && len > 0 && ( dir[len-1] == '/' || name[len] == '/' ) );
}
// compare two file names ignoring trailing slashes
bool compare_tslash( const char * const name1, const char * const name2 )
{
const char * p = name1;
const char * q = name2;
while( *p && *p == *q ) { ++p; ++q; }
while( *p == '/' ) ++p;
while( *q == '/' ) ++q;
return ( !*p && !*q );
}
} // end namespace
bool Extended::parse( const int infd, const Tar_header header,
const bool permissive )
{
const unsigned long long edsize = strtoull( header + size_o, 0, 8 );
const unsigned long long bufsize = round_up( edsize );
if( bufsize == 0 || edsize == 0 || edsize >= 1ULL << 33 )
return false; // overflow or no extended data
char * const buf = new char[bufsize]; // extended records buffer
if( archive_read( infd, (uint8_t *)buf, bufsize ) != 0 ) goto error;
for( unsigned long long pos = 0; pos < edsize; ) // parse records
{
char * tail;
const unsigned long long rsize = strtoull( buf + pos, &tail, 10 );
if( rsize == 0 || rsize > edsize - pos || tail[0] != ' ' ||
buf[pos+rsize-1] != '\n' ) goto error;
++tail; // point to keyword
// length of (keyword + '=' + value) without the final newline
const unsigned long long rest = ( buf + pos + rsize - 1 ) - tail;
if( rest > 5 && std::memcmp( tail, "path=", 5 ) == 0 )
{ if( path.size() && !permissive ) goto error;
path.assign( tail + 5, rest - 5 ); }
else if( rest > 9 && std::memcmp( tail, "linkpath=", 9 ) == 0 )
{ if( linkpath.size() && !permissive ) goto error;
linkpath.assign( tail + 9, rest - 9 ); }
else if( rest > 5 && std::memcmp( tail, "size=", 5 ) == 0 )
{
if( size != 0 && !permissive ) goto error;
size = 0;
for( unsigned long long i = 5; i < rest; ++i )
{
if( tail[i] < '0' || tail[i] > '9' ) goto error;
const unsigned long long prev = size;
size = size * 10 + ( tail[i] - '0' );
if( size < prev ) goto error; // overflow
}
if( size < 1ULL << 33 ) goto error; // size fits in ustar header
}
else if( rest > 10 && std::memcmp( tail, "GNU.crc32=", 10 ) == 0 )
{
if( crc_present && !permissive ) goto error;
if( rsize != 22 ) goto error;
char * t;
const uint32_t stored_crc = strtoul( tail + 10, &t, 16 );
if( t - tail - 10 != 8 || t[0] != '\n' ) goto error;
const uint32_t computed_crc =
crc32c.windowed_crc( (const uint8_t *)buf, pos + rsize - 9, edsize );
crc_present = true;
if( stored_crc != computed_crc ) goto error;
}
pos += rsize;
}
delete[] buf;
return true;
error:
delete[] buf;
return false;
}
int decode( const std::string & archive_name, const Arg_parser & parser,
const int filenames, const bool keep_damaged, const bool listing,
const bool missing_crc, const bool permissive )
{
const int infd = archive_name.size() ?
open_instream( archive_name ) : STDIN_FILENO;
if( infd < 0 ) return 1;
// execute -C options and mark filenames to be extracted or listed
std::vector< bool > name_pending( parser.arguments(), false );
for( int i = 0; i < parser.arguments(); ++i )
{
const int code = parser.code( i );
if( code == 'C' && !listing )
{
const char * const dir = parser.argument( i ).c_str();
if( chdir( dir ) != 0 )
{ show_file_error( dir, "Error changing working directory", errno );
return 1; }
}
if( !code ) name_pending[i] = true;
}
has_lz_ext =
( archive_name.size() > 3 &&
archive_name.compare( archive_name.size() - 3, 3, ".lz" ) == 0 ) ||
( archive_name.size() > 4 &&
archive_name.compare( archive_name.size() - 4, 4, ".tlz" ) == 0 );
Extended extended; // metadata from extended records
int retval = 0;
bool prev_extended = false; // prev header was extended
while( true ) // process one member per iteration
{
uint8_t buf[header_size];
const int ret = archive_read( infd, buf, header_size );
if( ret == 2 ) return 2;
if( ret != 0 || !verify_ustar_chksum( buf ) )
{
if( ret == 0 && block_is_zero( buf, header_size ) ) break; // EOF
skip_warn(); gretval = 2; continue;
}
skip_warn( true ); // reset warning
const char * const header = (const char *)buf;
const Typeflag typeflag = (Typeflag)header[typeflag_o];
if( typeflag == tf_extended )
{
if( prev_extended && !permissive )
{ show_error( "Format violation: consecutive extended headers found."
/*" Use --permissive."*/, 0, true ); return 2; }
if( !extended.parse( infd, header, permissive ) )
{ show_error( "Error in extended records. Skipping to next header." );
extended.reset(); gretval = 2; }
else if( !extended.crc_present && missing_crc )
{ show_error( "Missing CRC in extended records.", 0, true ); return 2; }
prev_extended = true;
continue;
}
prev_extended = false;
if( extended.linkpath.empty() )
{
for( int i = 0; i < linkname_l && header[linkname_o+i]; ++i )
extended.linkpath += header[linkname_o+i];
while( extended.linkpath.size() > 1 && // trailing '/'
extended.linkpath[extended.linkpath.size()-1] == '/' )
extended.linkpath.resize( extended.linkpath.size() - 1 );
}
if( extended.path.empty() )
{
char stored_name[prefix_l+1+name_l+1];
int len = 0;
while( len < prefix_l && header[prefix_o+len] )
{ stored_name[len] = header[prefix_o+len]; ++len; }
if( len && header[name_o] ) stored_name[len++] = '/';
for( int i = 0; i < name_l && header[name_o+i]; ++i )
{ stored_name[len] = header[name_o+i]; ++len; }
while( len > 0 && stored_name[len-1] == '/' ) --len; // trailing '/'
stored_name[len] = 0;
extended.path = remove_leading_slash( stored_name );
}
const char * const filename = extended.path.c_str();
bool skip = filenames > 0;
if( skip )
for( int i = 0; i < parser.arguments(); ++i )
if( parser.code( i ) == 0 )
{
const char * const name =
remove_leading_slash( parser.argument( i ).c_str() );
if( compare_prefix_dir( name, filename ) ||
compare_tslash( name, filename ) )
{ skip = false; name_pending[i] = false; break; }
}
if( extended.size == 0 &&
( typeflag == tf_regular || typeflag == tf_hiperf ) )
extended.size = strtoull( header + size_o, 0, 8 );
if( listing || skip )
retval = list_member( infd, extended, header, skip );
else
retval = extract_member( infd, extended, header, keep_damaged );
extended.reset();
if( retval )
{ show_error( "Error is not recoverable: exiting now." );
return retval; }
}
for( int i = 0; i < parser.arguments(); ++i )
if( parser.code( i ) == 0 && name_pending[i] )
{
show_file_error( parser.argument( i ).c_str(), "Not found in archive." );
if( gretval < 1 ) gretval = 1;
}
if( !retval && gretval )
{ show_error( "Exiting with failure status due to previous errors." );
retval = gretval; }
return retval;
}