diff options
Diffstat (limited to 'list_lz.cc')
-rw-r--r-- | list_lz.cc | 699 |
1 files changed, 699 insertions, 0 deletions
diff --git a/list_lz.cc b/list_lz.cc new file mode 100644 index 0000000..d41d2b7 --- /dev/null +++ b/list_lz.cc @@ -0,0 +1,699 @@ +/* Tarlz - Archiver with multimember lzip compression + Copyright (C) 2013-2019 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#define _FILE_OFFSET_BITS 64 + +#include <algorithm> +#include <cerrno> +#include <climits> +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <queue> +#include <string> +#include <vector> +#include <pthread.h> +#include <stdint.h> +#include <unistd.h> +#include <lzlib.h> + +#include "arg_parser.h" +#include "lzip.h" +#include "lzip_index.h" +#include "tarlz.h" + + +// Returns the number of bytes really read. +// If (returned value < size) and (errno == 0), means EOF was reached. +// +int preadblock( const int fd, uint8_t * const buf, const int size, + const long long pos ) + { + int sz = 0; + errno = 0; + while( sz < size ) + { + const int n = pread( fd, buf + sz, size - sz, pos + sz ); + if( n > 0 ) sz += n; + else if( n == 0 ) break; // EOF + else if( errno != EINTR ) break; + errno = 0; + } + return sz; + } + + +// Returns the number of bytes really written. +// If (returned value < size), it is always an error. +// +int pwriteblock( const int fd, const uint8_t * const buf, const int size, + const long long pos ) + { + int sz = 0; + errno = 0; + while( sz < size ) + { + const int n = pwrite( fd, buf + sz, size - sz, pos + sz ); + if( n > 0 ) sz += n; + else if( n < 0 && errno != EINTR ) break; + errno = 0; + } + return sz; + } + + +namespace { + +// This can be called from any thread, main thread or sub-threads alike, +// since they all call common helper functions that call cleanup_and_fail() +// in case of an error. +// +void cleanup_and_fail( const int retval = 2 ) + { + // only one thread can delete and exit + static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; + + pthread_mutex_lock( &mutex ); // ignore errors to avoid loop + std::exit( retval ); + } + + +void xinit_mutex( pthread_mutex_t * const mutex ) + { + const int errcode = pthread_mutex_init( mutex, 0 ); + if( errcode ) + { show_error( "pthread_mutex_init", errcode ); cleanup_and_fail(); } + } + +void xinit_cond( pthread_cond_t * const cond ) + { + const int errcode = pthread_cond_init( cond, 0 ); + if( errcode ) + { show_error( "pthread_cond_init", errcode ); cleanup_and_fail(); } + } + + +void xdestroy_mutex( pthread_mutex_t * const mutex ) + { + const int errcode = pthread_mutex_destroy( mutex ); + if( errcode ) + { show_error( "pthread_mutex_destroy", errcode ); cleanup_and_fail(); } + } + +void xdestroy_cond( pthread_cond_t * const cond ) + { + const int errcode = pthread_cond_destroy( cond ); + if( errcode ) + { show_error( "pthread_cond_destroy", errcode ); cleanup_and_fail(); } + } + + +void xlock( pthread_mutex_t * const mutex ) + { + const int errcode = pthread_mutex_lock( mutex ); + if( errcode ) + { show_error( "pthread_mutex_lock", errcode ); cleanup_and_fail(); } + } + + +void xunlock( pthread_mutex_t * const mutex ) + { + const int errcode = pthread_mutex_unlock( mutex ); + if( errcode ) + { show_error( "pthread_mutex_unlock", errcode ); cleanup_and_fail(); } + } + + +void xwait( pthread_cond_t * const cond, pthread_mutex_t * const mutex ) + { + const int errcode = pthread_cond_wait( cond, mutex ); + if( errcode ) + { show_error( "pthread_cond_wait", errcode ); cleanup_and_fail(); } + } + + +void xsignal( pthread_cond_t * const cond ) + { + const int errcode = pthread_cond_signal( cond ); + if( errcode ) + { show_error( "pthread_cond_signal", errcode ); cleanup_and_fail(); } + } + + +void xbroadcast( pthread_cond_t * const cond ) + { + const int errcode = pthread_cond_broadcast( cond ); + if( errcode ) + { show_error( "pthread_cond_broadcast", errcode ); cleanup_and_fail(); } + } + + +struct Packet // member name and metadata or error message + { + enum Status { ok, member_done, error }; + long member_id; // lzip member containing the header of this tar member + std::string line; // member name and metadata ready to print + Status status; + Packet( const long i, const char * const msg, const Status s = ok ) + : member_id( i ), line( msg ), status( s ) {} + }; + + +class Packet_courier // moves packets around + { +public: + unsigned ocheck_counter; + unsigned owait_counter; +private: + long error_member_id; // first lzip member with error/misalign/eof + int deliver_worker_id; // worker queue currently delivering packets + int master_worker_id; // worker in charge if error/misalignment/eof + std::vector< std::queue< Packet * > > opacket_queues; + int num_working; // number of workers still running + const int num_workers; // number of workers + const unsigned out_slots; // max output packets per queue + pthread_mutex_t omutex; + pthread_cond_t oav_or_exit; // output packet available or all workers exited + std::vector< pthread_cond_t > slot_av; // output slot available + pthread_cond_t check_master; + + Packet_courier( const Packet_courier & ); // declared as private + void operator=( const Packet_courier & ); // declared as private + +public: + Packet_courier( const int workers, const int slots ) + : ocheck_counter( 0 ), owait_counter( 0 ), + error_member_id( -1 ), deliver_worker_id( 0 ), master_worker_id( -1 ), + opacket_queues( workers ), num_working( workers ), + num_workers( workers ), out_slots( slots ), slot_av( workers ) + { + xinit_mutex( &omutex ); xinit_cond( &oav_or_exit ); + for( unsigned i = 0; i < slot_av.size(); ++i ) xinit_cond( &slot_av[i] ); + xinit_cond( &check_master ); + } + + ~Packet_courier() + { + xdestroy_cond( &check_master ); + for( unsigned i = 0; i < slot_av.size(); ++i ) xdestroy_cond( &slot_av[i] ); + xdestroy_cond( &oav_or_exit ); xdestroy_mutex( &omutex ); + } + + bool mastership_granted() const { return master_worker_id >= 0; } + + bool request_mastership( const long member_id, const int worker_id ) + { + xlock( &omutex ); + if( mastership_granted() ) // already granted + { xunlock( &omutex ); return ( master_worker_id == worker_id ); } + if( error_member_id < 0 || error_member_id > member_id ) + error_member_id = member_id; + while( !mastership_granted() && ( worker_id != deliver_worker_id || + !opacket_queues[deliver_worker_id].empty() ) ) + xwait( &check_master, &omutex ); + if( !mastership_granted() && worker_id == deliver_worker_id && + opacket_queues[deliver_worker_id].empty() ) + { + master_worker_id = worker_id; // grant mastership + for( int i = 0; i < num_workers; ++i ) // delete all packets + while( !opacket_queues[i].empty() ) + opacket_queues[i].pop(); + xbroadcast( &check_master ); + xunlock( &omutex ); + return true; + } + xunlock( &omutex ); + return false; // mastership granted to another worker + } + + void worker_finished() + { + // notify muxer when last worker exits + xlock( &omutex ); + if( --num_working == 0 ) xsignal( &oav_or_exit ); + xunlock( &omutex ); + } + + // collect a packet from a worker + bool collect_packet( Packet * const opacket, const int worker_id ) + { + xlock( &omutex ); + if( ( mastership_granted() && master_worker_id != worker_id ) || + ( error_member_id >= 0 && error_member_id < opacket->member_id ) ) + { xunlock( &omutex ); return false; } // reject packet + while( opacket_queues[worker_id].size() >= out_slots ) + xwait( &slot_av[worker_id], &omutex ); + opacket_queues[worker_id].push( opacket ); + if( worker_id == deliver_worker_id ) xsignal( &oav_or_exit ); + xunlock( &omutex ); + return true; + } + + // deliver a packet to muxer + // if packet.status == Packet::member_done, move to next queue + Packet * deliver_packet() + { + Packet * opacket = 0; + xlock( &omutex ); + ++ocheck_counter; + while( opacket_queues[deliver_worker_id].empty() && num_working > 0 ) + { + ++owait_counter; + if( !mastership_granted() && error_member_id >= 0 ) + xbroadcast( &check_master ); // mastership requested not yet granted + xwait( &oav_or_exit, &omutex ); + } + if( !opacket_queues[deliver_worker_id].empty() ) + { + opacket = opacket_queues[deliver_worker_id].front(); + opacket_queues[deliver_worker_id].pop(); + if( opacket_queues[deliver_worker_id].size() + 1 == out_slots ) + xsignal( &slot_av[deliver_worker_id] ); + if( opacket->status == Packet::member_done && !mastership_granted() ) + { if( ++deliver_worker_id >= num_workers ) deliver_worker_id = 0; } + } + xunlock( &omutex ); + return opacket; + } + + bool finished() // all packets delivered to muxer + { + if( num_working != 0 ) return false; + for( int i = 0; i < num_workers; ++i ) + if( !opacket_queues[i].empty() ) return false; + return true; + } + }; + + +/* Return value: -1 = member_end exceeded, 0 = OK, + 1 = damaged member, 2 = fatal error. + If sizep and error, return in *sizep the number of bytes read. */ +int archive_read_lz( LZ_Decoder * const decoder, const int infd, + long long & file_pos, const long long member_end, + const long long cdata_size, uint8_t * const buf, + const int size, + const char ** msg, int * const sizep = 0 ) + { + int sz = 0; + + if( sizep ) *sizep = 0; + while( sz < size ) + { + const int rd = LZ_decompress_read( decoder, buf + sz, size - sz ); + if( rd < 0 ) + { *msg = LZ_strerror( LZ_decompress_errno( decoder ) ); return 1; } + if( rd == 0 && LZ_decompress_finished( decoder ) == 1 ) + { *msg = "Archive ends unexpectedly."; return 2; } + sz += rd; if( sizep ) *sizep = sz; + if( sz < size && LZ_decompress_write_size( decoder ) > 0 ) + { + const long long ibuf_size = 16384; // try 65536 + uint8_t ibuf[ibuf_size]; + const long long rest = ( file_pos < member_end ) ? + member_end - file_pos : cdata_size - file_pos; + const int rsize = std::min( LZ_decompress_write_size( decoder ), + (int)std::min( ibuf_size, rest ) ); + if( rsize <= 0 ) LZ_decompress_finish( decoder ); + else + { + const int rd = preadblock( infd, ibuf, rsize, file_pos ); + if( LZ_decompress_write( decoder, ibuf, rd ) != rd ) + internal_error( "library error (LZ_decompress_write)." ); + file_pos += rd; + if( rd < rsize ) + { + LZ_decompress_finish( decoder ); + if( errno ) { *msg = "Error reading archive"; return 2; } + } + } + } + } + return ( file_pos > member_end ) ? -1 : 0; + } + + +int list_member_lz( LZ_Decoder * const decoder, const int infd, + long long & file_pos, const long long member_end, + const long long cdata_size, long long & data_pos, + const long long mdata_end, Packet_courier & courier, + const Extended & extended, const Tar_header header, + Resizable_buffer & rbuf, const long member_id, + const int worker_id, const char ** msg, const bool skip ) + { + unsigned long long rest = extended.size; + const int rem = extended.size % header_size; + const int padding = rem ? header_size - rem : 0; + const long long data_rest = mdata_end - ( data_pos + rest + padding ); + bool master = false; + + if( data_rest < 0 ) // tar member exceeds lzip member end + { + if( courier.request_mastership( member_id, worker_id ) ) master = true; + else return 2; + } + + if( verbosity < 0 || skip ) rbuf()[0] = 0; + else format_member_name( extended, header, rbuf, verbosity > 0 ); + Packet * const opacket = new Packet( member_id, rbuf(), + data_rest ? Packet::ok : Packet::member_done ); + courier.collect_packet( opacket, worker_id ); + if( !data_rest ) { data_pos = mdata_end; return 0; } + + const unsigned bufsize = 32 * header_size; + uint8_t buf[bufsize]; + while( rest > 0 ) + { + const int rsize = ( rest >= bufsize ) ? bufsize : rest + padding; + const int ret = archive_read_lz( decoder, infd, file_pos, member_end, + cdata_size, buf, rsize, msg ); + if( ret > 0 ) return ret; + data_pos += rsize; + if( rest < bufsize ) break; + rest -= rsize; + } + return ( master ? -1 : 0 ); + } + + +int parse_records_lz( LZ_Decoder * const decoder, const int infd, + long long & file_pos, const long long member_end, + const long long cdata_size, long long & data_pos, + Extended & extended, const Tar_header header, + const char ** msg, const bool permissive ) + { + const unsigned long long edsize = parse_octal( header + size_o, size_l ); + const unsigned long long bufsize = round_up( edsize ); + if( bufsize == 0 || edsize == 0 || edsize >= 1ULL << 33 ) + return false; // overflow or no extended data + char * const buf = new char[bufsize]; // extended records buffer + int retval = archive_read_lz( decoder, infd, file_pos, member_end, + cdata_size, (uint8_t *)buf, bufsize, msg ); + if( retval == 0 ) + { if( extended.parse( buf, edsize, permissive ) ) data_pos += bufsize; + else retval = 1; } + delete[] buf; + return retval; + } + + +struct Worker_arg + { + const Lzip_index * lzip_index; + Packet_courier * courier; + const Arg_parser * parser; + std::vector< char > * name_pending; + int worker_id; + int num_workers; + int infd; + int filenames; + bool missing_crc; + bool permissive; + }; + + + // read lzip members from archive, list their tar members, and + // give the produced packets to courier. +extern "C" void * dworker_l( void * arg ) + { + const Worker_arg & tmp = *(const Worker_arg *)arg; + const Lzip_index & lzip_index = *tmp.lzip_index; + Packet_courier & courier = *tmp.courier; + const Arg_parser & parser = *tmp.parser; + std::vector< char > & name_pending = *tmp.name_pending; + const int worker_id = tmp.worker_id; + const int num_workers = tmp.num_workers; + const int infd = tmp.infd; + const int filenames = tmp.filenames; + const int missing_crc = tmp.missing_crc; + const bool permissive = tmp.permissive; + + LZ_Decoder * const decoder = LZ_decompress_open(); + if( !decoder || LZ_decompress_errno( decoder ) != LZ_ok ) + { show_error( "Not enough memory." ); cleanup_and_fail(); } + + const long long cdata_size = lzip_index.cdata_size(); + Resizable_buffer rbuf( initial_line_length ); + bool master = false; + for( long i = worker_id; !master && i < lzip_index.members(); i += num_workers ) + { + long long data_pos = lzip_index.dblock( i ).pos(); + const long long mdata_end = lzip_index.dblock( i ).end(); + long long data_end = mdata_end; + long long file_pos = lzip_index.mblock( i ).pos(); + long long member_end = lzip_index.mblock( i ).end(); + + Extended extended; // metadata from extended records + int retval = 0; + bool prev_extended = false; // prev header was extended + LZ_decompress_reset( decoder ); // prepare for new member + while( true ) // process one tar member per iteration + { + if( data_pos >= data_end ) break; + Tar_header header; + const char * msg = 0; + const int ret = archive_read_lz( decoder, infd, file_pos, member_end, + cdata_size, header, header_size, &msg ); + if( ret != 0 ) + { + if( !courier.request_mastership( i, worker_id ) ) goto done; + master = true; + if( ret > 0 ) + { + Packet * const opacket = new Packet( i, msg, Packet::error ); + courier.collect_packet( opacket, worker_id ); + goto done; + } + // member_end exceeded, process rest of file + else { data_end = lzip_index.udata_size(); member_end = cdata_size; } + } + data_pos += header_size; + if( !verify_ustar_chksum( header ) ) + { + if( !courier.request_mastership( i, worker_id ) ) goto done; + master = true; + if( block_is_zero( header, header_size ) ) break; // EOF + Packet * const opacket = new Packet( i, + ( data_pos > header_size ) ? "Corrupt or invalid header." : + "This does not look like a POSIX tar.lz archive.", Packet::error ); + courier.collect_packet( opacket, worker_id ); + goto done; + } + + const Typeflag typeflag = (Typeflag)header[typeflag_o]; + if( typeflag == tf_global ) + { + if( prev_extended ) + { show_error( "Format violation: global header after extended header." ); + cleanup_and_fail(); } + Extended dummy; // global headers are parsed and ignored + const int ret = parse_records_lz( decoder, infd, file_pos, member_end, + cdata_size, data_pos, dummy, header, &msg, true ); + if( ret != 0 ) + { + if( !courier.request_mastership( i, worker_id ) ) goto done; + master = true; + if( ret > 0 ) + { + if( !msg ) msg = "Error in global extended records."; + Packet * const opacket = new Packet( i, msg, Packet::error ); + courier.collect_packet( opacket, worker_id ); + if( ret == 2 ) goto done; + } + // member_end exceeded, process rest of file + else { data_end = lzip_index.udata_size(); member_end = cdata_size; } + } + continue; + } + if( typeflag == tf_extended ) + { + int ret = 0; + if( prev_extended && !permissive ) + { msg = "Format violation: consecutive extended headers found."; + ret = 2; } + else ret = parse_records_lz( decoder, infd, file_pos, member_end, + cdata_size, data_pos, extended, header, &msg, permissive ); + if( ret == 0 && !extended.crc_present && missing_crc ) + { msg = "Missing CRC in extended records."; ret = 2; } + if( ret != 0 ) + { + if( !courier.request_mastership( i, worker_id ) ) goto done; + master = true; + if( ret > 0 ) + { + if( !msg ) msg = "Error in extended records."; + Packet * const opacket = new Packet( i, msg, Packet::error ); + courier.collect_packet( opacket, worker_id ); + extended.reset(); + if( ret == 2 ) goto done; + } + // member_end exceeded, process rest of file + else { data_end = lzip_index.udata_size(); member_end = cdata_size; } + } + prev_extended = true; + continue; + } + prev_extended = false; + + if( extended.linkpath.empty() ) // copy linkpath from ustar header + { + for( int i = 0; i < linkname_l && header[linkname_o+i]; ++i ) + extended.linkpath += header[linkname_o+i]; + while( extended.linkpath.size() > 1 && // trailing '/' + extended.linkpath[extended.linkpath.size()-1] == '/' ) + extended.linkpath.resize( extended.linkpath.size() - 1 ); + } + + if( extended.path.empty() ) // copy path from ustar header + { + char stored_name[prefix_l+1+name_l+1]; + int len = 0; + while( len < prefix_l && header[prefix_o+len] ) + { stored_name[len] = header[prefix_o+len]; ++len; } + if( len && header[name_o] ) stored_name[len++] = '/'; + for( int i = 0; i < name_l && header[name_o+i]; ++i ) + { stored_name[len] = header[name_o+i]; ++len; } + while( len > 0 && stored_name[len-1] == '/' ) --len; // trailing '/' + stored_name[len] = 0; + extended.path = remove_leading_slash( stored_name ); + } + const char * const filename = extended.path.c_str(); + + bool skip = filenames > 0; + if( skip ) + for( int i = 0; i < parser.arguments(); ++i ) + if( parser.code( i ) == 0 ) + { + const char * const name = + remove_leading_slash( parser.argument( i ).c_str() ); + if( compare_prefix_dir( name, filename ) || + compare_tslash( name, filename ) ) + { skip = false; name_pending[i] = false; break; } + } + + if( extended.size == 0 && + ( typeflag == tf_regular || typeflag == tf_hiperf ) ) + extended.size = parse_octal( header + size_o, size_l ); + + retval = list_member_lz( decoder, infd, file_pos, member_end, + cdata_size, data_pos, mdata_end, courier, + extended, header, rbuf, i, worker_id, &msg, skip ); + extended.reset(); + if( retval < 0 ) // member_end exceeded, process rest of file + { master = true; + data_end = lzip_index.udata_size(); member_end = cdata_size; } + else if( retval > 0 ) + { show_error( msg ); + show_error( "Error is not recoverable: exiting now." ); + cleanup_and_fail(); } + } + } + if( LZ_decompress_close( decoder ) < 0 ) + { + Packet * const opacket = new Packet( lzip_index.members(), + "LZ_decompress_close failed.", Packet::error ); + courier.collect_packet( opacket, worker_id ); + } +done: + courier.worker_finished(); + return 0; + } + + + // get from courier the processed and sorted packets, and print + // the member lines on stdout or the diagnostics on stderr. +void muxer( Packet_courier & courier ) + { + while( true ) + { + Packet * const opacket = courier.deliver_packet(); + if( !opacket ) break; // queue is empty. all workers exited + + if( opacket->status == Packet::error ) + { show_error( opacket->line.c_str() ); cleanup_and_fail(); } + if( opacket->line.size() ) + { std::fputs( opacket->line.c_str(), stdout ); + std::fflush( stdout ); } + delete opacket; + } + if( !courier.mastership_granted() ) // no worker found EOF blocks + { show_error( "Archive ends unexpectedly." ); cleanup_and_fail(); } + } + +} // end namespace + + + // init the courier, then start the workers and call the muxer. +int list_lz( const Arg_parser & parser, std::vector< char > & name_pending, + const Lzip_index & lzip_index, const int filenames, + const int debug_level, const int infd, const int num_workers, + const bool missing_crc, const bool permissive ) + { + const int out_slots = 100; + Packet_courier courier( num_workers, out_slots ); + + Worker_arg * worker_args = new( std::nothrow ) Worker_arg[num_workers]; + pthread_t * worker_threads = new( std::nothrow ) pthread_t[num_workers]; + if( !worker_args || !worker_threads ) + { show_error( "Not enough memory." ); cleanup_and_fail(); } + for( int i = 0; i < num_workers; ++i ) + { + worker_args[i].lzip_index = &lzip_index; + worker_args[i].courier = &courier; + worker_args[i].parser = &parser; + worker_args[i].name_pending = &name_pending; + worker_args[i].worker_id = i; + worker_args[i].num_workers = num_workers; + worker_args[i].infd = infd; + worker_args[i].filenames = filenames; + worker_args[i].missing_crc = missing_crc; + worker_args[i].permissive = permissive; + const int errcode = + pthread_create( &worker_threads[i], 0, dworker_l, &worker_args[i] ); + if( errcode ) + { show_error( "Can't create worker threads", errcode ); cleanup_and_fail(); } + } + + muxer( courier ); + + for( int i = num_workers - 1; i >= 0; --i ) + { + const int errcode = pthread_join( worker_threads[i], 0 ); + if( errcode ) + { show_error( "Can't join worker threads", errcode ); cleanup_and_fail(); } + } + delete[] worker_threads; + delete[] worker_args; + + int retval = 0; + for( int i = 0; i < parser.arguments(); ++i ) + if( parser.code( i ) == 0 && name_pending[i] ) + { + show_file_error( parser.argument( i ).c_str(), "Not found in archive." ); + retval = 1; + } + + if( debug_level & 1 ) + std::fprintf( stderr, + "muxer tried to consume from workers %8u times\n" + "muxer had to wait %8u times\n", + courier.ocheck_counter, + courier.owait_counter ); + + if( !courier.finished() ) internal_error( "courier not finished." ); + return retval; + } |