/* Tarlz - Archiver with multimember lzip compression
Copyright (C) 2013-2019 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
*/
#define _FILE_OFFSET_BITS 64
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include "arg_parser.h"
#include "lzip_index.h"
#include "tarlz.h"
// Returns the number of bytes really read.
// If (returned value < size) and (errno == 0), means EOF was reached.
//
int preadblock( const int fd, uint8_t * const buf, const int size,
const long long pos )
{
int sz = 0;
errno = 0;
while( sz < size )
{
const int n = pread( fd, buf + sz, size - sz, pos + sz );
if( n > 0 ) sz += n;
else if( n == 0 ) break; // EOF
else if( errno != EINTR ) break;
errno = 0;
}
return sz;
}
// Returns the number of bytes really written.
// If (returned value < size), it is always an error.
//
int pwriteblock( const int fd, const uint8_t * const buf, const int size,
const long long pos )
{
int sz = 0;
errno = 0;
while( sz < size )
{
const int n = pwrite( fd, buf + sz, size - sz, pos + sz );
if( n > 0 ) sz += n;
else if( n < 0 && errno != EINTR ) break;
errno = 0;
}
return sz;
}
namespace {
// This can be called from any thread, main thread or sub-threads alike,
// since they all call common helper functions that call cleanup_and_fail()
// in case of an error.
//
void cleanup_and_fail( const int retval = 2 )
{
// only one thread can delete and exit
static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
pthread_mutex_lock( &mutex ); // ignore errors to avoid loop
std::exit( retval );
}
void xinit_mutex( pthread_mutex_t * const mutex )
{
const int errcode = pthread_mutex_init( mutex, 0 );
if( errcode )
{ show_error( "pthread_mutex_init", errcode ); cleanup_and_fail(); }
}
void xinit_cond( pthread_cond_t * const cond )
{
const int errcode = pthread_cond_init( cond, 0 );
if( errcode )
{ show_error( "pthread_cond_init", errcode ); cleanup_and_fail(); }
}
void xdestroy_mutex( pthread_mutex_t * const mutex )
{
const int errcode = pthread_mutex_destroy( mutex );
if( errcode )
{ show_error( "pthread_mutex_destroy", errcode ); cleanup_and_fail(); }
}
void xdestroy_cond( pthread_cond_t * const cond )
{
const int errcode = pthread_cond_destroy( cond );
if( errcode )
{ show_error( "pthread_cond_destroy", errcode ); cleanup_and_fail(); }
}
void xlock( pthread_mutex_t * const mutex )
{
const int errcode = pthread_mutex_lock( mutex );
if( errcode )
{ show_error( "pthread_mutex_lock", errcode ); cleanup_and_fail(); }
}
void xunlock( pthread_mutex_t * const mutex )
{
const int errcode = pthread_mutex_unlock( mutex );
if( errcode )
{ show_error( "pthread_mutex_unlock", errcode ); cleanup_and_fail(); }
}
void xwait( pthread_cond_t * const cond, pthread_mutex_t * const mutex )
{
const int errcode = pthread_cond_wait( cond, mutex );
if( errcode )
{ show_error( "pthread_cond_wait", errcode ); cleanup_and_fail(); }
}
void xsignal( pthread_cond_t * const cond )
{
const int errcode = pthread_cond_signal( cond );
if( errcode )
{ show_error( "pthread_cond_signal", errcode ); cleanup_and_fail(); }
}
void xbroadcast( pthread_cond_t * const cond )
{
const int errcode = pthread_cond_broadcast( cond );
if( errcode )
{ show_error( "pthread_cond_broadcast", errcode ); cleanup_and_fail(); }
}
struct Packet // member name and metadata or error message
{
enum Status { ok, member_done, error };
long member_id; // lzip member containing the header of this tar member
std::string line; // member name and metadata ready to print
Status status;
Packet( const long i, const char * const msg, const Status s = ok )
: member_id( i ), line( msg ), status( s ) {}
};
class Packet_courier // moves packets around
{
public:
unsigned ocheck_counter;
unsigned owait_counter;
private:
long error_member_id; // first lzip member with error/misalign/eof
int deliver_worker_id; // worker queue currently delivering packets
int master_worker_id; // worker in charge if error/misalignment/eof
std::vector< std::queue< Packet * > > opacket_queues;
int num_working; // number of workers still running
const int num_workers; // number of workers
const unsigned out_slots; // max output packets per queue
pthread_mutex_t omutex;
pthread_cond_t oav_or_exit; // output packet available or all workers exited
std::vector< pthread_cond_t > slot_av; // output slot available
pthread_cond_t check_master;
Packet_courier( const Packet_courier & ); // declared as private
void operator=( const Packet_courier & ); // declared as private
public:
Packet_courier( const int workers, const int slots )
: ocheck_counter( 0 ), owait_counter( 0 ),
error_member_id( -1 ), deliver_worker_id( 0 ), master_worker_id( -1 ),
opacket_queues( workers ), num_working( workers ),
num_workers( workers ), out_slots( slots ), slot_av( workers )
{
xinit_mutex( &omutex ); xinit_cond( &oav_or_exit );
for( unsigned i = 0; i < slot_av.size(); ++i ) xinit_cond( &slot_av[i] );
xinit_cond( &check_master );
}
~Packet_courier()
{
xdestroy_cond( &check_master );
for( unsigned i = 0; i < slot_av.size(); ++i ) xdestroy_cond( &slot_av[i] );
xdestroy_cond( &oav_or_exit ); xdestroy_mutex( &omutex );
}
bool mastership_granted() const { return master_worker_id >= 0; }
bool request_mastership( const long member_id, const int worker_id )
{
xlock( &omutex );
if( mastership_granted() ) // already granted
{ xunlock( &omutex ); return ( master_worker_id == worker_id ); }
if( error_member_id < 0 || error_member_id > member_id )
error_member_id = member_id;
while( !mastership_granted() && ( worker_id != deliver_worker_id ||
!opacket_queues[deliver_worker_id].empty() ) )
xwait( &check_master, &omutex );
if( !mastership_granted() && worker_id == deliver_worker_id &&
opacket_queues[deliver_worker_id].empty() )
{
master_worker_id = worker_id; // grant mastership
for( int i = 0; i < num_workers; ++i ) // delete all packets
while( !opacket_queues[i].empty() )
opacket_queues[i].pop();
xbroadcast( &check_master );
xunlock( &omutex );
return true;
}
xunlock( &omutex );
return false; // mastership granted to another worker
}
void worker_finished()
{
// notify muxer when last worker exits
xlock( &omutex );
if( --num_working == 0 ) xsignal( &oav_or_exit );
xunlock( &omutex );
}
// collect a packet from a worker
bool collect_packet( Packet * const opacket, const int worker_id )
{
xlock( &omutex );
if( ( mastership_granted() && master_worker_id != worker_id ) ||
( error_member_id >= 0 && error_member_id < opacket->member_id ) )
{ xunlock( &omutex ); return false; } // reject packet
while( opacket_queues[worker_id].size() >= out_slots )
xwait( &slot_av[worker_id], &omutex );
opacket_queues[worker_id].push( opacket );
if( worker_id == deliver_worker_id ) xsignal( &oav_or_exit );
xunlock( &omutex );
return true;
}
// deliver a packet to muxer
// if packet.status == Packet::member_done, move to next queue
Packet * deliver_packet()
{
Packet * opacket = 0;
xlock( &omutex );
++ocheck_counter;
while( opacket_queues[deliver_worker_id].empty() && num_working > 0 )
{
++owait_counter;
if( !mastership_granted() && error_member_id >= 0 )
xbroadcast( &check_master ); // mastership requested not yet granted
xwait( &oav_or_exit, &omutex );
}
if( !opacket_queues[deliver_worker_id].empty() )
{
opacket = opacket_queues[deliver_worker_id].front();
opacket_queues[deliver_worker_id].pop();
if( opacket_queues[deliver_worker_id].size() + 1 == out_slots )
xsignal( &slot_av[deliver_worker_id] );
if( opacket->status == Packet::member_done && !mastership_granted() )
{ if( ++deliver_worker_id >= num_workers ) deliver_worker_id = 0; }
}
xunlock( &omutex );
return opacket;
}
bool finished() // all packets delivered to muxer
{
if( num_working != 0 ) return false;
for( int i = 0; i < num_workers; ++i )
if( !opacket_queues[i].empty() ) return false;
return true;
}
};
/* Return value: -1 = member_end exceeded, 0 = OK,
1 = damaged member, 2 = fatal error.
If sizep and error, return in *sizep the number of bytes read. */
int archive_read_lz( LZ_Decoder * const decoder, const int infd,
long long & file_pos, const long long member_end,
const long long cdata_size, uint8_t * const buf,
const int size,
const char ** msg, int * const sizep = 0 )
{
int sz = 0;
if( sizep ) *sizep = 0;
while( sz < size )
{
const int rd = LZ_decompress_read( decoder, buf + sz, size - sz );
if( rd < 0 )
{ *msg = LZ_strerror( LZ_decompress_errno( decoder ) ); return 1; }
if( rd == 0 && LZ_decompress_finished( decoder ) == 1 )
{ *msg = "Archive ends unexpectedly."; return 2; }
sz += rd; if( sizep ) *sizep = sz;
if( sz < size && LZ_decompress_write_size( decoder ) > 0 )
{
const long long ibuf_size = 16384; // try 65536
uint8_t ibuf[ibuf_size];
const long long rest = ( file_pos < member_end ) ?
member_end - file_pos : cdata_size - file_pos;
const int rsize = std::min( LZ_decompress_write_size( decoder ),
(int)std::min( ibuf_size, rest ) );
if( rsize <= 0 ) LZ_decompress_finish( decoder );
else
{
const int rd = preadblock( infd, ibuf, rsize, file_pos );
if( LZ_decompress_write( decoder, ibuf, rd ) != rd )
internal_error( "library error (LZ_decompress_write)." );
file_pos += rd;
if( rd < rsize )
{
LZ_decompress_finish( decoder );
if( errno ) { *msg = "Error reading archive"; return 2; }
}
}
}
}
return ( file_pos > member_end ) ? -1 : 0;
}
int list_member_lz( LZ_Decoder * const decoder, const int infd,
long long & file_pos, const long long member_end,
const long long cdata_size, long long & data_pos,
const long long mdata_end, Packet_courier & courier,
const Extended & extended, const Tar_header header,
Resizable_buffer & rbuf, const long member_id,
const int worker_id, const char ** msg, const bool skip )
{
unsigned long long rest = extended.file_size();
const int rem = rest % header_size;
const int padding = rem ? header_size - rem : 0;
const long long data_rest = mdata_end - ( data_pos + rest + padding );
bool master = false;
if( data_rest < 0 ) // tar member exceeds lzip member end
{
if( courier.request_mastership( member_id, worker_id ) ) master = true;
else return 2;
}
if( verbosity < 0 || skip ) rbuf()[0] = 0;
else format_member_name( extended, header, rbuf, verbosity > 0 );
Packet * const opacket = new Packet( member_id, rbuf(),
data_rest ? Packet::ok : Packet::member_done );
courier.collect_packet( opacket, worker_id );
if( !data_rest ) { data_pos = mdata_end; return 0; }
const unsigned bufsize = 32 * header_size;
uint8_t buf[bufsize];
while( rest > 0 )
{
const int rsize = ( rest >= bufsize ) ? bufsize : rest + padding;
const int ret = archive_read_lz( decoder, infd, file_pos, member_end,
cdata_size, buf, rsize, msg );
if( ret > 0 ) return ret;
data_pos += rsize;
if( rest < bufsize ) break;
rest -= rsize;
}
return ( master ? -1 : 0 );
}
int parse_records_lz( LZ_Decoder * const decoder, const int infd,
long long & file_pos, const long long member_end,
const long long cdata_size, long long & data_pos,
Extended & extended, const Tar_header header,
const char ** msg, const bool permissive )
{
const unsigned long long edsize = parse_octal( header + size_o, size_l );
const unsigned long long bufsize = round_up( edsize );
if( bufsize == 0 || edsize == 0 || edsize >= 1ULL << 33 )
return false; // overflow or no extended data
char * const buf = new char[bufsize]; // extended records buffer
int retval = archive_read_lz( decoder, infd, file_pos, member_end,
cdata_size, (uint8_t *)buf, bufsize, msg );
if( retval == 0 )
{ if( extended.parse( buf, edsize, permissive ) ) data_pos += bufsize;
else retval = 1; }
delete[] buf;
return retval;
}
struct Worker_arg
{
const Lzip_index * lzip_index;
Packet_courier * courier;
const Arg_parser * parser;
std::vector< char > * name_pending;
int worker_id;
int num_workers;
int infd;
int filenames;
bool missing_crc;
bool permissive;
};
// read lzip members from archive, list their tar members, and
// give the produced packets to courier.
extern "C" void * dworker_l( void * arg )
{
const Worker_arg & tmp = *(const Worker_arg *)arg;
const Lzip_index & lzip_index = *tmp.lzip_index;
Packet_courier & courier = *tmp.courier;
const Arg_parser & parser = *tmp.parser;
std::vector< char > & name_pending = *tmp.name_pending;
const int worker_id = tmp.worker_id;
const int num_workers = tmp.num_workers;
const int infd = tmp.infd;
const int filenames = tmp.filenames;
const int missing_crc = tmp.missing_crc;
const bool permissive = tmp.permissive;
LZ_Decoder * const decoder = LZ_decompress_open();
if( !decoder || LZ_decompress_errno( decoder ) != LZ_ok )
{ show_error( "Not enough memory." ); cleanup_and_fail(); }
const long long cdata_size = lzip_index.cdata_size();
Resizable_buffer rbuf( initial_line_length );
bool master = false;
for( long i = worker_id; !master && i < lzip_index.members(); i += num_workers )
{
long long data_pos = lzip_index.dblock( i ).pos();
const long long mdata_end = lzip_index.dblock( i ).end();
long long data_end = mdata_end;
long long file_pos = lzip_index.mblock( i ).pos();
long long member_end = lzip_index.mblock( i ).end();
Extended extended; // metadata from extended records
int retval = 0;
bool prev_extended = false; // prev header was extended
LZ_decompress_reset( decoder ); // prepare for new member
while( true ) // process one tar member per iteration
{
if( data_pos >= data_end ) break;
Tar_header header;
const char * msg = 0;
const int ret = archive_read_lz( decoder, infd, file_pos, member_end,
cdata_size, header, header_size, &msg );
if( ret != 0 )
{
if( !courier.request_mastership( i, worker_id ) ) goto done;
master = true;
if( ret > 0 )
{
Packet * const opacket = new Packet( i, msg, Packet::error );
courier.collect_packet( opacket, worker_id );
goto done;
}
// member_end exceeded, process rest of file
else { data_end = lzip_index.udata_size(); member_end = cdata_size; }
}
data_pos += header_size;
if( !verify_ustar_chksum( header ) )
{
if( !courier.request_mastership( i, worker_id ) ) goto done;
master = true;
if( block_is_zero( header, header_size ) ) break; // EOF
Packet * const opacket = new Packet( i,
( data_pos > header_size ) ? "Corrupt or invalid header." :
"This does not look like a POSIX tar.lz archive.", Packet::error );
courier.collect_packet( opacket, worker_id );
goto done;
}
const Typeflag typeflag = (Typeflag)header[typeflag_o];
if( typeflag == tf_global )
{
if( prev_extended )
{ show_error( "Format violation: global header after extended header." );
cleanup_and_fail(); }
Extended dummy; // global headers are parsed and ignored
const int ret = parse_records_lz( decoder, infd, file_pos, member_end,
cdata_size, data_pos, dummy, header, &msg, true );
if( ret != 0 )
{
if( !courier.request_mastership( i, worker_id ) ) goto done;
master = true;
if( ret > 0 )
{
if( !msg ) msg = "Error in global extended records.";
Packet * const opacket = new Packet( i, msg, Packet::error );
courier.collect_packet( opacket, worker_id );
if( ret == 2 ) goto done;
}
// member_end exceeded, process rest of file
else { data_end = lzip_index.udata_size(); member_end = cdata_size; }
}
continue;
}
if( typeflag == tf_extended )
{
int ret = 0;
if( prev_extended && !permissive )
{ msg = "Format violation: consecutive extended headers found.";
ret = 2; }
else ret = parse_records_lz( decoder, infd, file_pos, member_end,
cdata_size, data_pos, extended, header, &msg, permissive );
if( ret == 0 && !extended.crc_present() && missing_crc )
{ msg = "Missing CRC in extended records."; ret = 2; }
if( ret != 0 )
{
if( !courier.request_mastership( i, worker_id ) ) goto done;
master = true;
if( ret > 0 )
{
if( !msg ) msg = "Error in extended records.";
Packet * const opacket = new Packet( i, msg, Packet::error );
courier.collect_packet( opacket, worker_id );
extended.reset();
if( ret == 2 ) goto done;
}
// member_end exceeded, process rest of file
else { data_end = lzip_index.udata_size(); member_end = cdata_size; }
}
prev_extended = true;
continue;
}
prev_extended = false;
if( extended.linkpath().empty() ) // copy linkpath from ustar header
{
int len = 0;
while( len < linkname_l && header[linkname_o+len] ) ++len;
while( len > 1 && header[linkname_o+len-1] == '/' ) --len; // trailing '/'
if( len > 0 )
{
const uint8_t c = header[linkname_o+len]; header[linkname_o+len] = 0;
extended.linkpath( (const char *)header + linkname_o );
header[linkname_o+len] = c;
}
}
if( extended.path().empty() ) // copy path from ustar header
{
char stored_name[prefix_l+1+name_l+1];
int len = 0;
while( len < prefix_l && header[prefix_o+len] )
{ stored_name[len] = header[prefix_o+len]; ++len; }
if( len && header[name_o] ) stored_name[len++] = '/';
for( int i = 0; i < name_l && header[name_o+i]; ++i )
{ stored_name[len] = header[name_o+i]; ++len; }
while( len > 0 && stored_name[len-1] == '/' ) --len; // trailing '/'
stored_name[len] = 0;
extended.path( remove_leading_slash( stored_name ) );
}
const char * const filename = extended.path().c_str();
bool skip = filenames > 0;
if( skip )
for( int i = 0; i < parser.arguments(); ++i )
if( parser.code( i ) == 0 )
{
const char * const name =
remove_leading_slash( parser.argument( i ).c_str() );
if( compare_prefix_dir( name, filename ) ||
compare_tslash( name, filename ) )
{ skip = false; name_pending[i] = false; break; }
}
if( extended.file_size() == 0 &&
( typeflag == tf_regular || typeflag == tf_hiperf ) )
extended.file_size( parse_octal( header + size_o, size_l ) );
retval = list_member_lz( decoder, infd, file_pos, member_end,
cdata_size, data_pos, mdata_end, courier,
extended, header, rbuf, i, worker_id, &msg, skip );
extended.reset();
if( retval < 0 ) // member_end exceeded, process rest of file
{ master = true;
data_end = lzip_index.udata_size(); member_end = cdata_size; }
else if( retval > 0 )
{ show_error( msg );
show_error( "Error is not recoverable: exiting now." );
cleanup_and_fail(); }
}
}
if( LZ_decompress_close( decoder ) < 0 )
{
Packet * const opacket = new Packet( lzip_index.members(),
"LZ_decompress_close failed.", Packet::error );
courier.collect_packet( opacket, worker_id );
}
done:
courier.worker_finished();
return 0;
}
// get from courier the processed and sorted packets, and print
// the member lines on stdout or the diagnostics on stderr.
void muxer( Packet_courier & courier )
{
while( true )
{
Packet * const opacket = courier.deliver_packet();
if( !opacket ) break; // queue is empty. all workers exited
if( opacket->status == Packet::error )
{ show_error( opacket->line.c_str() ); cleanup_and_fail(); }
if( opacket->line.size() )
{ std::fputs( opacket->line.c_str(), stdout );
std::fflush( stdout ); }
delete opacket;
}
if( !courier.mastership_granted() ) // no worker found EOF blocks
{ show_error( "Archive ends unexpectedly." ); cleanup_and_fail(); }
}
} // end namespace
// init the courier, then start the workers and call the muxer.
int list_lz( const Arg_parser & parser, std::vector< char > & name_pending,
const Lzip_index & lzip_index, const int filenames,
const int debug_level, const int infd, const int num_workers,
const bool missing_crc, const bool permissive )
{
const int out_slots = 65536; // max small files (<=512B) in 64 MiB
Packet_courier courier( num_workers, out_slots );
Worker_arg * worker_args = new( std::nothrow ) Worker_arg[num_workers];
pthread_t * worker_threads = new( std::nothrow ) pthread_t[num_workers];
if( !worker_args || !worker_threads )
{ show_error( "Not enough memory." ); cleanup_and_fail(); }
for( int i = 0; i < num_workers; ++i )
{
worker_args[i].lzip_index = &lzip_index;
worker_args[i].courier = &courier;
worker_args[i].parser = &parser;
worker_args[i].name_pending = &name_pending;
worker_args[i].worker_id = i;
worker_args[i].num_workers = num_workers;
worker_args[i].infd = infd;
worker_args[i].filenames = filenames;
worker_args[i].missing_crc = missing_crc;
worker_args[i].permissive = permissive;
const int errcode =
pthread_create( &worker_threads[i], 0, dworker_l, &worker_args[i] );
if( errcode )
{ show_error( "Can't create worker threads", errcode ); cleanup_and_fail(); }
}
muxer( courier );
for( int i = num_workers - 1; i >= 0; --i )
{
const int errcode = pthread_join( worker_threads[i], 0 );
if( errcode )
{ show_error( "Can't join worker threads", errcode ); cleanup_and_fail(); }
}
delete[] worker_threads;
delete[] worker_args;
int retval = 0;
for( int i = 0; i < parser.arguments(); ++i )
if( parser.code( i ) == 0 && name_pending[i] )
{
show_file_error( parser.argument( i ).c_str(), "Not found in archive." );
retval = 1;
}
if( debug_level & 1 )
std::fprintf( stderr,
"muxer tried to consume from workers %8u times\n"
"muxer had to wait %8u times\n",
courier.ocheck_counter,
courier.owait_counter );
if( !courier.finished() ) internal_error( "courier not finished." );
return retval;
}