diff options
Diffstat (limited to 'list_lz.cc')
-rw-r--r-- | list_lz.cc | 359 |
1 files changed, 174 insertions, 185 deletions
@@ -149,7 +149,7 @@ bool check_skip_filename( const Arg_parser & parser, std::vector< char > & name_pending, const char * const filename, const int filenames ) { - if( Exclude::excluded( filename ) ) return true; // skip excluded + if( Exclude::excluded( filename ) ) return true; // skip excluded files bool skip = filenames > 0; if( skip ) for( int i = 0; i < parser.arguments(); ++i ) @@ -165,6 +165,90 @@ bool check_skip_filename( const Arg_parser & parser, } +/* Return value: 0 = OK, 1 = damaged member, 2 = fatal error. */ +int archive_read_lz( LZ_Decoder * const decoder, const int infd, + long long & file_pos, const long long member_end, + const long long cdata_size, uint8_t * const buf, + const int size, const char ** msg ) + { + int sz = 0; + + while( sz < size ) + { + const int rd = LZ_decompress_read( decoder, buf + sz, size - sz ); + if( rd < 0 ) + { *msg = LZ_strerror( LZ_decompress_errno( decoder ) ); return 1; } + if( rd == 0 && LZ_decompress_finished( decoder ) == 1 ) + { *msg = end_msg; return 2; } + sz += rd; + if( sz < size && LZ_decompress_write_size( decoder ) > 0 ) + { + const long long ibuf_size = 16384; // try 65536 + uint8_t ibuf[ibuf_size]; + const long long rest = ( file_pos < member_end ) ? + member_end - file_pos : cdata_size - file_pos; + const int rsize = std::min( LZ_decompress_write_size( decoder ), + (int)std::min( ibuf_size, rest ) ); + if( rsize <= 0 ) LZ_decompress_finish( decoder ); + else + { + const int rd = preadblock( infd, ibuf, rsize, file_pos ); + if( LZ_decompress_write( decoder, ibuf, rd ) != rd ) + internal_error( "library error (LZ_decompress_write)." ); + file_pos += rd; + if( rd < rsize ) + { + LZ_decompress_finish( decoder ); + if( errno ) { *msg = "Error reading archive"; return 2; } + } + } + } + } + return 0; + } + + +int parse_records_lz( LZ_Decoder * const decoder, const int infd, + long long & file_pos, const long long member_end, + const long long cdata_size, long long & data_pos, + Extended & extended, const Tar_header header, + Resizable_buffer & rbuf, const char ** msg, + const bool permissive ) + { + const long long edsize = parse_octal( header + size_o, size_l ); + const long long bufsize = round_up( edsize ); + if( edsize <= 0 || edsize >= 1LL << 33 || bufsize >= INT_MAX ) + return 1; // overflow or no extended data + if( !rbuf.resize( bufsize ) ) return 1; // extended records buffer + int retval = archive_read_lz( decoder, infd, file_pos, member_end, + cdata_size, (uint8_t *)rbuf(), bufsize, msg ); + if( retval == 0 ) + { if( extended.parse( rbuf(), edsize, permissive ) ) data_pos += bufsize; + else retval = 1; } + return retval; + } + + +int skip_member_lz( LZ_Decoder * const decoder, const int infd, + long long & file_pos, const long long member_end, + const long long cdata_size, long long & data_pos, + long long rest, const char ** msg ) + { + const int bufsize = 32 * header_size; + uint8_t buf[bufsize]; + while( rest > 0 ) // skip tar member + { + const int rsize = ( rest >= bufsize ) ? bufsize : rest; + const int ret = archive_read_lz( decoder, infd, file_pos, member_end, + cdata_size, buf, rsize, msg ); + if( ret != 0 ) return ret; + data_pos += rsize; + rest -= rsize; + } + return 0; + } + + namespace { struct Packet // member name and metadata or error message @@ -195,6 +279,7 @@ private: pthread_cond_t oav_or_exit; // output packet available or all workers exited std::vector< pthread_cond_t > slot_av; // output slot available pthread_cond_t check_master; + bool eof_found_; Packet_courier( const Packet_courier & ); // declared as private void operator=( const Packet_courier & ); // declared as private @@ -204,7 +289,8 @@ public: : ocheck_counter( 0 ), owait_counter( 0 ), error_member_id( -1 ), deliver_worker_id( 0 ), master_worker_id( -1 ), opacket_queues( workers ), num_working( workers ), - num_workers( workers ), out_slots( slots ), slot_av( workers ) + num_workers( workers ), out_slots( slots ), slot_av( workers ), + eof_found_( false ) { xinit_mutex( &omutex ); xinit_cond( &oav_or_exit ); for( unsigned i = 0; i < slot_av.size(); ++i ) xinit_cond( &slot_av[i] ); @@ -218,6 +304,9 @@ public: xdestroy_cond( &oav_or_exit ); xdestroy_mutex( &omutex ); } + bool eof_found() const { return eof_found_; } + void report_eof() { eof_found_ = true; } + bool mastership_granted() const { return master_worker_id >= 0; } bool request_mastership( const long member_id, const int worker_id ) @@ -255,12 +344,15 @@ public: /* Collect a packet from a worker. If a packet is rejected, the worker must terminate. */ - bool collect_packet( const Packet * const opacket, const int worker_id ) + bool collect_packet( const int worker_id, const long member_id, + const char * const msg, + const Packet::Status status = Packet::ok ) { + const Packet * const opacket = new Packet( member_id, msg, status ); xlock( &omutex ); if( ( mastership_granted() && master_worker_id != worker_id ) || ( error_member_id >= 0 && error_member_id < opacket->member_id ) ) - { xunlock( &omutex ); return false; } // reject packet + { xunlock( &omutex ); delete opacket; return false; } // reject packet while( opacket_queues[worker_id].size() >= out_slots ) xwait( &slot_av[worker_id], &omutex ); opacket_queues[worker_id].push( opacket ); @@ -310,53 +402,6 @@ public: }; -/* Return value: -1 = member_end exceeded, 0 = OK, - 1 = damaged member, 2 = fatal error. - If sizep and error, return in *sizep the number of bytes read. */ -int archive_read_lz( LZ_Decoder * const decoder, const int infd, - long long & file_pos, const long long member_end, - const long long cdata_size, uint8_t * const buf, - const int size, - const char ** msg, int * const sizep = 0 ) - { - int sz = 0; - - if( sizep ) *sizep = 0; - while( sz < size ) - { - const int rd = LZ_decompress_read( decoder, buf + sz, size - sz ); - if( rd < 0 ) - { *msg = LZ_strerror( LZ_decompress_errno( decoder ) ); return 1; } - if( rd == 0 && LZ_decompress_finished( decoder ) == 1 ) - { *msg = "Archive ends unexpectedly."; return 2; } - sz += rd; if( sizep ) *sizep = sz; - if( sz < size && LZ_decompress_write_size( decoder ) > 0 ) - { - const long long ibuf_size = 16384; // try 65536 - uint8_t ibuf[ibuf_size]; - const long long rest = ( file_pos < member_end ) ? - member_end - file_pos : cdata_size - file_pos; - const int rsize = std::min( LZ_decompress_write_size( decoder ), - (int)std::min( ibuf_size, rest ) ); - if( rsize <= 0 ) LZ_decompress_finish( decoder ); - else - { - const int rd = preadblock( infd, ibuf, rsize, file_pos ); - if( LZ_decompress_write( decoder, ibuf, rd ) != rd ) - internal_error( "library error (LZ_decompress_write)." ); - file_pos += rd; - if( rd < rsize ) - { - LZ_decompress_finish( decoder ); - if( errno ) { *msg = "Error reading archive"; return 2; } - } - } - } - } - return ( file_pos > member_end ) ? -1 : 0; - } - - int list_member_lz( LZ_Decoder * const decoder, const int infd, long long & file_pos, const long long member_end, const long long cdata_size, long long & data_pos, @@ -365,61 +410,22 @@ int list_member_lz( LZ_Decoder * const decoder, const int infd, Resizable_buffer & rbuf, const long member_id, const int worker_id, const char ** msg, const bool skip ) { - unsigned long long rest = extended.file_size(); + long long rest = extended.file_size(); const int rem = rest % header_size; - const int padding = rem ? header_size - rem : 0; - const long long data_rest = mdata_end - ( data_pos + rest + padding ); - bool master = false; - - if( data_rest < 0 ) // tar member exceeds lzip member end - { - if( courier.request_mastership( member_id, worker_id ) ) master = true; - else { *msg = "tar member exceeds lzip member end"; return 2; } - } + if( rem ) rest += header_size - rem; // padding + const long long data_rest = mdata_end - ( data_pos + rest ); if( verbosity < 0 || skip ) rbuf()[0] = 0; else if( !format_member_name( extended, header, rbuf, verbosity > 0 ) ) { *msg = mem_msg; return 1; } - const Packet * const opacket = new Packet( member_id, rbuf(), - data_rest ? Packet::ok : Packet::member_done ); - if( !courier.collect_packet( opacket, worker_id ) ) + if( !courier.collect_packet( worker_id, member_id, rbuf(), + data_rest ? Packet::ok : Packet::member_done ) ) { *msg = "other worker found an error"; return 1; } - if( !data_rest ) { data_pos = mdata_end; return 0; } - - const unsigned bufsize = 32 * header_size; - uint8_t buf[bufsize]; - while( rest > 0 ) - { - const int rsize = ( rest >= bufsize ) ? bufsize : rest + padding; - const int ret = archive_read_lz( decoder, infd, file_pos, member_end, - cdata_size, buf, rsize, msg ); - if( ret > 0 ) return ret; - data_pos += rsize; - if( rest < bufsize ) break; - rest -= rsize; - } - return ( master ? -1 : 0 ); - } - - -int parse_records_lz( LZ_Decoder * const decoder, const int infd, - long long & file_pos, const long long member_end, - const long long cdata_size, long long & data_pos, - Extended & extended, const Tar_header header, - Resizable_buffer & rbuf, const char ** msg, - const bool permissive ) - { - const unsigned long long edsize = parse_octal( header + size_o, size_l ); - const unsigned long long bufsize = round_up( edsize ); - if( edsize == 0 || edsize >= 1ULL << 33 || bufsize == 0 || bufsize >= INT_MAX ) - return 1; // overflow or no extended data - if( !rbuf.resize( bufsize ) ) return 1; // extended records buffer - int retval = archive_read_lz( decoder, infd, file_pos, member_end, - cdata_size, (uint8_t *)rbuf(), bufsize, msg ); - if( retval == 0 ) - { if( extended.parse( rbuf(), edsize, permissive ) ) data_pos += bufsize; - else retval = 1; } - return retval; + if( data_rest ) + return skip_member_lz( decoder, infd, file_pos, member_end, cdata_size, + data_pos, rest, msg ); + data_pos = mdata_end; + return 0; } @@ -467,21 +473,30 @@ extern "C" void * tworker( void * arg ) const long long mdata_end = lzip_index.dblock( i ).end(); long long data_end = mdata_end; long long file_pos = lzip_index.mblock( i ).pos(); - long long member_end = lzip_index.mblock( i ).end(); + const long long member_end = lzip_index.mblock( i ).end(); if( data_pos >= data_end ) // empty lzip member { - const Packet * const opacket = new Packet( i, "", Packet::member_done ); - if( !courier.collect_packet( opacket, worker_id ) ) goto done; - continue; + if( courier.collect_packet( worker_id, i, "", Packet::member_done ) ) + continue; else break; } Extended extended; // metadata from extended records - int retval = 0; bool prev_extended = false; // prev header was extended LZ_decompress_reset( decoder ); // prepare for new member - while( true ) // process one tar member per iteration + while( true ) // process one tar header per iteration { - if( data_pos >= data_end ) break; + if( data_pos >= data_end ) + { + if( data_pos == data_end && !prev_extended ) break; + // member end exceeded or ends in extended, process rest of file + if( !courier.request_mastership( i, worker_id ) ) goto done; + master = true; + if( data_end < lzip_index.udata_size() ) + data_end = lzip_index.udata_size(); + else + { courier.collect_packet( worker_id, i, end_msg, Packet::error ); + goto done; } + } Tar_header header; const char * msg = 0; const int ret = archive_read_lz( decoder, infd, file_pos, member_end, @@ -490,81 +505,59 @@ extern "C" void * tworker( void * arg ) { if( !courier.request_mastership( i, worker_id ) ) goto done; master = true; - if( ret > 0 ) - { - const Packet * const opacket = new Packet( i, msg, Packet::error ); - courier.collect_packet( opacket, worker_id ); - goto done; - } - // member_end exceeded, process rest of file - else { data_end = lzip_index.udata_size(); member_end = cdata_size; } + courier.collect_packet( worker_id, i, msg, Packet::error ); + goto done; } data_pos += header_size; if( !verify_ustar_chksum( header ) ) { if( !courier.request_mastership( i, worker_id ) ) goto done; master = true; - if( block_is_zero( header, header_size ) ) break; // EOF - const Packet * const opacket = new Packet( i, - ( data_pos > header_size ) ? "Corrupt or invalid header." : - "This does not look like a POSIX tar.lz archive.", Packet::error ); - courier.collect_packet( opacket, worker_id ); + if( block_is_zero( header, header_size ) ) // EOF + { + if( !prev_extended || permissive ) courier.report_eof(); + else courier.collect_packet( worker_id, i, fv_msg1, Packet::error ); + goto done; + } + courier.collect_packet( worker_id, i, ( data_pos > header_size ) ? + bad_hdr_msg : posix_lz_msg, Packet::error ); goto done; } const Typeflag typeflag = (Typeflag)header[typeflag_o]; if( typeflag == tf_global ) { - if( prev_extended ) - { show_error( "Format violation: global header after extended header." ); - cleanup_and_fail( 2 ); } + if( prev_extended && !permissive ) + { courier.collect_packet( worker_id, i, fv_msg2, Packet::error ); + goto done; } Extended dummy; // global headers are parsed and ignored - const int ret = parse_records_lz( decoder, infd, file_pos, member_end, - cdata_size, data_pos, dummy, header, - rbuf, &msg, true ); - if( ret != 0 ) + if( parse_records_lz( decoder, infd, file_pos, member_end, cdata_size, + data_pos, dummy, header, rbuf, &msg, true ) == 0 ) { - if( !courier.request_mastership( i, worker_id ) ) goto done; - master = true; - if( ret > 0 ) - { - if( !msg ) msg = "Error in global extended records."; - const Packet * const opacket = new Packet( i, msg, Packet::error ); - courier.collect_packet( opacket, worker_id ); + if( data_pos == data_end && // end of lzip member + !courier.collect_packet( worker_id, i, "", Packet::member_done ) ) goto done; - } - // member_end exceeded, process rest of file - else { data_end = lzip_index.udata_size(); member_end = cdata_size; } + continue; } - continue; + if( courier.request_mastership( i, worker_id ) ) + courier.collect_packet( worker_id, i, msg ? msg : gblrec_msg, + Packet::error ); + goto done; } if( typeflag == tf_extended ) { int ret = 0; - if( prev_extended && !permissive ) - { msg = "Format violation: consecutive extended headers found."; - ret = 2; } + if( prev_extended && !permissive ) { msg = fv_msg3; ret = 2; } else ret = parse_records_lz( decoder, infd, file_pos, member_end, cdata_size, data_pos, extended, header, rbuf, &msg, permissive ); if( ret == 0 && !extended.crc_present() && missing_crc ) - { msg = "Missing CRC in extended records."; ret = 2; } - if( ret != 0 ) - { - if( !courier.request_mastership( i, worker_id ) ) goto done; - master = true; - if( ret > 0 ) - { - if( !msg ) msg = "Error in extended records."; - const Packet * const opacket = new Packet( i, msg, Packet::error ); - courier.collect_packet( opacket, worker_id ); - goto done; - } - // member_end exceeded, process rest of file - else { data_end = lzip_index.udata_size(); member_end = cdata_size; } - } - prev_extended = true; - continue; + { msg = mcrc_msg; ret = 2; } + if( ret == 0 ) { prev_extended = true; continue; } + if( courier.request_mastership( i, worker_id ) ) + courier.collect_packet( worker_id, i, msg ? msg : extrec_msg, + Packet::error ); + goto done; } prev_extended = false; @@ -573,28 +566,18 @@ extern "C" void * tworker( void * arg ) const bool skip = check_skip_filename( parser, name_pending, extended.path().c_str(), filenames ); - retval = list_member_lz( decoder, infd, file_pos, member_end, cdata_size, - data_pos, mdata_end, courier, extended, - header, rbuf, i, worker_id, &msg, skip ); + if( list_member_lz( decoder, infd, file_pos, member_end, cdata_size, + data_pos, mdata_end, courier, extended, + header, rbuf, i, worker_id, &msg, skip ) != 0 ) + { courier.collect_packet( worker_id, i, msg, Packet::error ); + goto done; } extended.reset(); - if( retval < 0 ) // member_end exceeded, process rest of file - { master = true; - data_end = lzip_index.udata_size(); member_end = cdata_size; } - else if( retval > 0 ) - { - const Packet * const opacket = new Packet( i, msg, Packet::error ); - courier.collect_packet( opacket, worker_id ); - goto done; - } } } done: if( LZ_decompress_close( decoder ) < 0 ) - { - const Packet * const opacket = new Packet( lzip_index.members(), - "LZ_decompress_close failed.", Packet::error ); - courier.collect_packet( opacket, worker_id ); - } + courier.collect_packet( worker_id, lzip_index.members(), + "LZ_decompress_close failed.", Packet::error ); courier.worker_finished(); return 0; } @@ -602,7 +585,7 @@ done: /* Get from courier the processed and sorted packets, and print the member lines on stdout or the diagnostics on stderr. */ -void muxer( Packet_courier & courier ) +void muxer( const char * const archive_namep, Packet_courier & courier ) { while( true ) { @@ -610,23 +593,25 @@ void muxer( Packet_courier & courier ) if( !opacket ) break; // queue is empty. all workers exited if( opacket->status == Packet::error ) - { show_error( opacket->line.c_str() ); cleanup_and_fail( 2 ); } + { show_file_error( archive_namep, opacket->line.c_str() ); + cleanup_and_fail( 2 ); } if( opacket->line.size() ) { std::fputs( opacket->line.c_str(), stdout ); std::fflush( stdout ); } delete opacket; } - if( !courier.mastership_granted() ) // no worker found EOF blocks - { show_error( "Archive ends unexpectedly." ); cleanup_and_fail( 2 ); } + if( !courier.eof_found() ) // no worker found EOF blocks + { show_file_error( archive_namep, end_msg ); cleanup_and_fail( 2 ); } } } // end namespace // init the courier, then start the workers and call the muxer. -int list_lz( const Arg_parser & parser, std::vector< char > & name_pending, - const Lzip_index & lzip_index, const int filenames, - const int debug_level, const int infd, const int num_workers, - const bool missing_crc, const bool permissive ) +int list_lz( const char * const archive_namep, const Arg_parser & parser, + std::vector< char > & name_pending, const Lzip_index & lzip_index, + const int filenames, const int debug_level, const int infd, + const int num_workers, const bool missing_crc, + const bool permissive ) { const int out_slots = 65536; // max small files (<=512B) in 64 MiB @@ -655,7 +640,7 @@ int list_lz( const Arg_parser & parser, std::vector< char > & name_pending, { show_error( "Can't create worker threads", errcode ); cleanup_and_fail(); } } - muxer( courier ); + muxer( archive_namep, courier ); for( int i = num_workers - 1; i >= 0; --i ) { @@ -667,7 +652,11 @@ int list_lz( const Arg_parser & parser, std::vector< char > & name_pending, delete[] worker_args; int retval = 0; - for( int i = 0; i < parser.arguments(); ++i ) + if( close( infd ) != 0 ) + { show_file_error( archive_namep, "Error closing archive", errno ); + retval = 1; } + + if( retval == 0 ) for( int i = 0; i < parser.arguments(); ++i ) if( !parser.code( i ) && parser.argument( i ).size() && name_pending[i] ) { show_file_error( parser.argument( i ).c_str(), "Not found in archive." ); |