/* Tarlz - Archiver with multimember lzip compression
Copyright (C) 2013-2019 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
*/
#define _FILE_OFFSET_BITS 64
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include // for major, minor, makedev
#include
#include "arg_parser.h"
#include "lzip_index.h"
#include "tarlz.h"
namespace {
Resizable_buffer grbuf;
bool archive_is_uncompressed_seekable = false;
bool archive_has_lz_ext; // local var for archive_read
bool skip_warn( const bool reset = false ) // avoid duplicate warnings
{
static bool skipping = false;
if( reset ) skipping = false;
else if( !skipping )
{ skipping = true; show_error( "Skipping to next header." ); return true; }
return false;
}
bool make_path( const std::string & name )
{
const mode_t mode = S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH;
unsigned end = name.size(); // first slash before last component
while( end > 0 && name[end-1] == '/' ) --end; // remove trailing slashes
while( end > 0 && name[end-1] != '/' ) --end; // remove last component
while( end > 0 && name[end-1] == '/' ) --end; // remove more slashes
unsigned index = 0;
while( index < end )
{
while( index < end && name[index] == '/' ) ++index;
unsigned first = index;
while( index < end && name[index] != '/' ) ++index;
if( first < index )
{
const std::string partial( name, 0, index );
struct stat st;
if( stat( partial.c_str(), &st ) == 0 )
{ if( !S_ISDIR( st.st_mode ) ) return false; }
else if( mkdir( partial.c_str(), mode ) != 0 )
return false;
}
}
return true;
}
// Return value: 0 = OK, 1 = damaged member, 2 = fatal error.
// If sizep and error, return in *sizep the number of bytes read.
// The first 6 bytes of the archive must be intact for islz to be meaningful.
int archive_read( const char * const archive_namep, const int infd,
uint8_t * const buf, const int size, int * const sizep = 0 )
{
static LZ_Decoder * decoder = 0;
static bool at_eof = false;
static bool fatal = false;
static bool first_call = true;
if( sizep ) *sizep = 0;
if( fatal ) return 2;
if( first_call ) // check format
{
first_call = false;
if( size != header_size )
internal_error( "size != header_size on first call." );
const int rd = readblock( infd, buf, size );
if( sizep ) *sizep = rd;
if( rd != size && errno )
{ show_file_error( archive_namep, "Error reading archive", errno );
fatal = true; return 2; }
const Lzip_header & header = (*(const Lzip_header *)buf);
bool islz = ( rd >= min_member_size && header.verify_magic() &&
header.verify_version() &&
isvalid_ds( header.dictionary_size() ) );
const bool istar = ( rd == size && verify_ustar_chksum( buf ) );
const bool iseof =
( !islz && !istar && rd == size && block_is_zero( buf, size ) );
if( !islz && !istar && !iseof ) // corrupt or invalid format
{
show_file_error( archive_namep, posix_msg );
if( archive_has_lz_ext && rd >= min_member_size ) islz = true;
if( !islz ) return 1;
}
if( !islz ) // uncompressed
{ if( rd == size ) return 0; fatal = true; return 2; }
archive_is_uncompressed_seekable = false; // compressed
decoder = LZ_decompress_open();
if( !decoder || LZ_decompress_errno( decoder ) != LZ_ok )
{ show_error( mem_msg );
LZ_decompress_close( decoder ); fatal = true; return 2; }
if( LZ_decompress_write( decoder, buf, rd ) != rd )
internal_error( "library error (LZ_decompress_write)." );
const int res = archive_read( archive_namep, infd, buf, size, sizep );
if( res != 0 ) { if( res == 2 ) fatal = true; return res; }
if( verify_ustar_chksum( buf ) || block_is_zero( buf, size ) ) return 0;
show_file_error( archive_namep, posix_lz_msg );
fatal = true; return 2;
}
if( !decoder ) // uncompressed
{
const int rd = readblock( infd, buf, size ); if( rd == size ) return 0;
if( sizep ) *sizep = rd;
show_file_error( archive_namep, end_msg ); fatal = true; return 2;
}
const int ibuf_size = 16384;
uint8_t ibuf[ibuf_size];
int sz = 0;
while( sz < size )
{
const int rd = LZ_decompress_read( decoder, buf + sz, size - sz );
if( rd < 0 )
{
if( LZ_decompress_sync_to_member( decoder ) < 0 )
internal_error( "library error (LZ_decompress_sync_to_member)." );
skip_warn(); set_error_status( 2 ); return 1;
}
if( rd == 0 && LZ_decompress_finished( decoder ) == 1 )
{ LZ_decompress_close( decoder );
show_file_error( archive_namep, end_msg ); fatal = true; return 2; }
sz += rd; if( sizep ) *sizep = sz;
if( sz < size && !at_eof && LZ_decompress_write_size( decoder ) > 0 )
{
const int rsize = std::min( ibuf_size, LZ_decompress_write_size( decoder ) );
const int rd = readblock( infd, ibuf, rsize );
if( LZ_decompress_write( decoder, ibuf, rd ) != rd )
internal_error( "library error (LZ_decompress_write)." );
if( rd < rsize )
{
at_eof = true; LZ_decompress_finish( decoder );
if( errno )
{ show_file_error( archive_namep, "Error reading archive", errno );
fatal = true; return 2; }
}
}
}
return 0;
}
int skip_member( const char * const archive_namep, const int infd,
const Extended & extended )
{
long long rest = extended.file_size();
const int rem = rest % header_size;
if( rem ) rest += header_size - rem; // padding
if( archive_is_uncompressed_seekable && lseek( infd, rest, SEEK_CUR ) > 0 )
return 0;
const int bufsize = 32 * header_size;
uint8_t buf[bufsize];
while( rest > 0 )
{
const int rsize = ( rest >= bufsize ) ? bufsize : rest;
const int ret = archive_read( archive_namep, infd, buf, rsize );
if( ret != 0 ) { if( ret == 2 ) return 2; else break; }
rest -= rsize;
}
return 0;
}
void show_file_diff( const char * const filename, const char * const msg )
{
if( verbosity >= 0 )
{ std::printf( "%s: %s\n", filename, msg ); std::fflush( stdout ); }
}
int compare_member( const char * const archive_namep, const int infd1,
const Extended & extended, const Tar_header header,
const bool ignore_ids )
{
if( !show_member_name( extended, header, 1, grbuf ) ) return 1;
long long rest = extended.file_size();
const char * const filename = extended.path().c_str();
const Typeflag typeflag = (Typeflag)header[typeflag_o];
bool diff = false, size_differs = false, type_differs = true;
struct stat st;
if( hstat( filename, &st ) != 0 )
show_file_error( filename, "Warning: Can't stat", errno );
else if( ( typeflag == tf_regular || typeflag == tf_hiperf ) &&
!S_ISREG( st.st_mode ) )
show_file_diff( filename, "Is not a regular file" );
else if( typeflag == tf_symlink && !S_ISLNK( st.st_mode ) )
show_file_diff( filename, "Is not a symlink" );
else if( typeflag == tf_chardev && !S_ISCHR( st.st_mode ) )
show_file_diff( filename, "Is not a character device" );
else if( typeflag == tf_blockdev && !S_ISBLK( st.st_mode ) )
show_file_diff( filename, "Is not a block device" );
else if( typeflag == tf_directory && !S_ISDIR( st.st_mode ) )
show_file_diff( filename, "Is not a directory" );
else if( typeflag == tf_fifo && !S_ISFIFO( st.st_mode ) )
show_file_diff( filename, "Is not a FIFO" );
else
{
type_differs = false;
if( typeflag != tf_symlink )
{
const mode_t mode = parse_octal( header + mode_o, mode_l ); // 12 bits
if( mode != ( st.st_mode & ( S_ISUID | S_ISGID | S_ISVTX |
S_IRWXU | S_IRWXG | S_IRWXO ) ) )
{ show_file_diff( filename, "Mode differs" ); diff = true; }
}
if( !ignore_ids )
{
if( (uid_t)parse_octal( header + uid_o, uid_l ) != st.st_uid )
{ show_file_diff( filename, "Uid differs" ); diff = true; }
if( (gid_t)parse_octal( header + gid_o, gid_l ) != st.st_gid )
{ show_file_diff( filename, "Gid differs" ); diff = true; }
}
if( typeflag != tf_symlink )
{
if( typeflag != tf_directory )
{
const time_t mtime = parse_octal( header + mtime_o, mtime_l ); // 33 bits
if( mtime != st.st_mtime )
{ show_file_diff( filename, "Mod time differs" ); diff = true; }
}
if( ( typeflag == tf_regular || typeflag == tf_hiperf ) &&
rest != st.st_size ) // don't compare contents
{ show_file_diff( filename, "Size differs" ); size_differs = true; }
if( ( typeflag == tf_chardev || typeflag == tf_blockdev ) &&
( parse_octal( header + devmajor_o, devmajor_l ) !=
(unsigned)major( st.st_rdev ) ||
parse_octal( header + devminor_o, devminor_l ) !=
(unsigned)minor( st.st_rdev ) ) )
{ show_file_diff( filename, "Device number differs" ); diff = true; }
}
else
{
char * const buf = new char[st.st_size+1];
long len = readlink( filename, buf, st.st_size );
bool e = ( len != st.st_size );
if( !e )
{
while( len > 1 && buf[len-1] == '/' ) --len; // trailing '/'
buf[len] = 0;
if( extended.linkpath() != buf ) e = true;
}
delete[] buf;
if( e ) { show_file_diff( filename, "Symlink differs" ); diff = true; }
}
}
if( diff || size_differs || type_differs )
{ diff = false; set_error_status( 1 ); }
if( rest <= 0 ) return 0;
if( ( typeflag != tf_regular && typeflag != tf_hiperf ) ||
size_differs || type_differs )
return skip_member( archive_namep, infd1, extended );
// else compare file contents
const int rem = rest % header_size;
const int padding = rem ? header_size - rem : 0;
const int bufsize = 32 * header_size;
uint8_t buf1[bufsize];
uint8_t buf2[bufsize];
const int infd2 = open_instream( filename );
if( infd2 < 0 )
{ set_error_status( 1 );
return skip_member( archive_namep, infd1, extended ); }
int retval = 0;
while( rest > 0 )
{
const int rsize1 = ( rest >= bufsize ) ? bufsize : rest + padding;
const int rsize2 = ( rest >= bufsize ) ? bufsize : rest;
const int ret = archive_read( archive_namep, infd1, buf1, rsize1 );
if( ret != 0 ) { if( ret == 2 ) retval = 2; diff = true; break; }
if( !diff )
{
const int rd = readblock( infd2, buf2, rsize2 );
if( rd != rsize2 )
{
if( errno ) show_file_error( filename, "Read error", errno );
else show_file_diff( filename, "EOF found in file" );
diff = true;
}
else
{
int i = 0; while( i < rsize2 && buf1[i] == buf2[i] ) ++i;
if( i < rsize2 )
{ show_file_diff( filename, "Contents differ" ); diff = true; }
}
}
if( rest < bufsize ) break;
rest -= rsize1;
}
if( diff ) set_error_status( 1 );
close( infd2 );
return retval;
}
int list_member( const char * const archive_namep, const int infd,
const Extended & extended, const Tar_header header )
{
if( !show_member_name( extended, header, 0, grbuf ) ) return 1;
return skip_member( archive_namep, infd, extended );
}
bool contains_dotdot( const char * const filename )
{
for( int i = 0; filename[i]; ++i )
if( dotdot_at_i( filename, i ) ) return true;
return false;
}
int extract_member( const char * const archive_namep, const int infd,
const Extended & extended, const Tar_header header,
const bool keep_damaged )
{
const char * const filename = extended.path().c_str();
if( contains_dotdot( filename ) )
{
show_file_error( filename, "Contains a '..' component, skipping." );
return skip_member( archive_namep, infd, extended );
}
const mode_t mode = parse_octal( header + mode_o, mode_l ); // 12 bits
const time_t mtime = parse_octal( header + mtime_o, mtime_l ); // 33 bits
const Typeflag typeflag = (Typeflag)header[typeflag_o];
const bool islink = ( typeflag == tf_link || typeflag == tf_symlink );
int outfd = -1;
if( !show_member_name( extended, header, 1, grbuf ) ) return 1;
std::remove( filename );
make_path( filename );
switch( typeflag )
{
case tf_regular:
case tf_hiperf:
outfd = open_outstream( filename );
if( outfd < 0 ) return 2;
break;
case tf_link:
case tf_symlink:
{
const char * const linkname = extended.linkpath().c_str();
const bool hard = typeflag == tf_link;
if( ( hard && link( linkname, filename ) != 0 ) ||
( !hard && symlink( linkname, filename ) != 0 ) )
{
if( verbosity >= 0 )
std::fprintf( stderr, "Can't %slink file '%s' to '%s': %s.\n",
hard ? "" : "sym", linkname, filename,
std::strerror( errno ) );
return 2;
}
} break;
case tf_directory:
if( mkdir( filename, mode ) != 0 && errno != EEXIST )
{
show_file_error( filename, "Can't create directory", errno );
return 2;
}
break;
case tf_chardev:
case tf_blockdev:
{
const unsigned dev =
makedev( parse_octal( header + devmajor_o, devmajor_l ),
parse_octal( header + devminor_o, devminor_l ) );
const int dmode = ( typeflag == tf_chardev ? S_IFCHR : S_IFBLK ) | mode;
if( mknod( filename, dmode, dev ) != 0 )
{
show_file_error( filename, "Can't create device node", errno );
return 2;
}
break;
}
case tf_fifo:
if( mkfifo( filename, mode ) != 0 && errno != EEXIST )
{
show_file_error( filename, "Can't create FIFO file", errno );
return 2;
}
break;
default:
if( verbosity >= 0 )
std::fprintf( stderr, "File type '%c' not supported for file '%s'.\n",
typeflag, filename );
return 2;
}
const uid_t uid = (uid_t)parse_octal( header + uid_o, uid_l );
const gid_t gid = (gid_t)parse_octal( header + gid_o, gid_l );
if( !islink && chown( filename, uid, gid ) != 0 &&
errno != EPERM && errno != EINVAL )
{
show_file_error( filename, "Can't change file owner", errno );
return 2;
}
if( typeflag == tf_regular || typeflag == tf_hiperf )
fchmod( outfd, mode ); // ignore errors
const int bufsize = 32 * header_size;
uint8_t buf[bufsize];
long long rest = extended.file_size();
const int rem = rest % header_size;
const int padding = rem ? header_size - rem : 0;
while( rest > 0 )
{
const int rsize = ( rest >= bufsize ) ? bufsize : rest + padding;
int rd;
const int ret = archive_read( archive_namep, infd, buf, rsize, &rd );
if( ret != 0 )
{
if( outfd >= 0 )
{
if( keep_damaged )
{ writeblock( outfd, buf, std::min( rest, (long long)rd ) );
close( outfd ); }
else { close( outfd ); std::remove( filename ); }
}
if( ret == 2 ) return 2; else return 0;
}
const int wsize = ( rest >= bufsize ) ? bufsize : rest;
if( outfd >= 0 && writeblock( outfd, buf, wsize ) != wsize )
{ show_file_error( filename, "Error writing file", errno ); return 2; }
rest -= wsize;
}
if( outfd >= 0 && close( outfd ) != 0 )
{ show_file_error( filename, "Error closing file", errno ); return 2; }
if( !islink )
{
struct utimbuf t;
t.actime = mtime;
t.modtime = mtime;
utime( filename, &t ); // ignore errors
}
return 0;
}
bool parse_records( const char * const archive_namep, const int infd,
Extended & extended, const Tar_header header,
Resizable_buffer & rbuf, const bool permissive )
{
const long long edsize = parse_octal( header + size_o, size_l );
const long long bufsize = round_up( edsize );
if( edsize <= 0 || edsize >= 1LL << 33 || bufsize >= INT_MAX )
return false; // overflow or no extended data
if( !rbuf.resize( bufsize ) ) return false; // extended records buffer
return ( archive_read( archive_namep, infd, (uint8_t *)rbuf(), bufsize ) == 0 &&
extended.parse( rbuf(), edsize, permissive ) );
}
} // end namespace
int decode( const std::string & archive_name, const Arg_parser & parser,
const int filenames, const int num_workers, const int debug_level,
const Program_mode program_mode, const bool ignore_ids,
const bool keep_damaged, const bool missing_crc,
const bool permissive )
{
const bool from_stdin = archive_name.empty();
const char * const archive_namep =
from_stdin ? "(stdin)" : archive_name.c_str();
const int infd = from_stdin ? STDIN_FILENO : open_instream( archive_name );
if( infd < 0 ) return 1;
// Execute -C options and mark filenames to be compared, extracted or listed.
// name_pending is of type char instead of bool to allow concurrent update.
std::vector< char > name_pending( parser.arguments(), false );
for( int i = 0; i < parser.arguments(); ++i )
{
const int code = parser.code( i );
if( code == 'C' && program_mode != m_list )
{
const char * const dir = parser.argument( i ).c_str();
if( chdir( dir ) != 0 )
{ show_file_error( dir, "Error changing working directory", errno );
return 1; }
}
if( !code && parser.argument( i ).size() &&
!Exclude::excluded( parser.argument( i ).c_str() ) )
name_pending[i] = true;
}
// multi-threaded --list is faster even with 1 thread and 1 file in archive
if( program_mode == m_list && num_workers > 0 )
{
const Lzip_index lzip_index( infd, true, false ); // only regular files
const long members = lzip_index.members();
if( lzip_index.retval() == 0 && members >= 2 ) // one file + eof
{
// show_file_error( archive_namep, "Is compressed seekable" );
return list_lz( archive_namep, parser, name_pending, lzip_index,
filenames, debug_level, infd,
std::min( (long)num_workers, members ),
missing_crc, permissive );
}
if( lseek( infd, 0, SEEK_SET ) == 0 && lzip_index.retval() != 0 &&
lzip_index.file_size() > 3 * header_size )
archive_is_uncompressed_seekable = true; // unless compressed corrupt
}
archive_has_lz_ext = has_lz_ext( archive_name ); // var for archive_read
Extended extended; // metadata from extended records
int retval = 0;
bool prev_extended = false; // prev header was extended
while( true ) // process one tar header per iteration
{
Tar_header header;
const int ret = archive_read( archive_namep, infd, header, header_size );
if( ret == 2 ) { retval = 2; break; }
if( ret != 0 || !verify_ustar_chksum( header ) )
{
if( ret == 0 && block_is_zero( header, header_size ) )
{
if( !prev_extended || permissive ) break; // EOF
show_file_error( archive_namep, fv_msg1 );
retval = 2; break;
}
if( skip_warn() && verbosity >= 2 )
std::fprintf( stderr, "ustar chksum = %07o\n", ustar_chksum( header ) );
set_error_status( 2 ); continue;
}
skip_warn( true ); // reset warning
const Typeflag typeflag = (Typeflag)header[typeflag_o];
if( typeflag == tf_global )
{
if( prev_extended && !permissive )
{ show_file_error( archive_namep, fv_msg2 ); retval = 2; break; }
Extended dummy; // global headers are parsed and ignored
if( !parse_records( archive_namep, infd, dummy, header, grbuf, true ) )
{ show_file_error( archive_namep,
"Error in global extended records. Skipping to next header." );
set_error_status( 2 ); }
continue;
}
if( typeflag == tf_extended )
{
if( prev_extended && !permissive )
{ show_file_error( archive_namep, fv_msg3 ); retval = 2; break; }
if( !parse_records( archive_namep, infd, extended, header, grbuf,
permissive ) )
{ show_file_error( archive_namep,
"Error in extended records. Skipping to next header." );
extended.reset(); set_error_status( 2 ); }
else if( !extended.crc_present() && missing_crc )
{ show_file_error( archive_namep, mcrc_msg ); retval = 2; break; }
prev_extended = true;
continue;
}
prev_extended = false;
extended.fill_from_ustar( header ); // copy metadata from header
if( check_skip_filename( parser, name_pending, extended.path().c_str(),
filenames ) )
retval = skip_member( archive_namep, infd, extended );
else if( program_mode == m_list )
retval = list_member( archive_namep, infd, extended, header );
else if( program_mode == m_diff )
retval = compare_member( archive_namep, infd, extended, header, ignore_ids );
else retval = extract_member( archive_namep, infd, extended, header,
keep_damaged );
extended.reset();
if( retval )
{ show_error( "Error is not recoverable: exiting now." ); break; }
}
if( close( infd ) != 0 && !retval )
{ show_file_error( archive_namep, "Error closing archive", errno );
retval = 1; }
if( retval == 0 ) for( int i = 0; i < parser.arguments(); ++i )
if( !parser.code( i ) && parser.argument( i ).size() && name_pending[i] )
{
show_file_error( parser.argument( i ).c_str(), "Not found in archive." );
retval = 1;
}
return final_exit_status( retval, program_mode != m_diff );
}