diff options
Diffstat (limited to 'decode_lz.cc')
-rw-r--r-- | decode_lz.cc | 280 |
1 files changed, 237 insertions, 43 deletions
diff --git a/decode_lz.cc b/decode_lz.cc index 71c699b..4fc3d80 100644 --- a/decode_lz.cc +++ b/decode_lz.cc @@ -1,5 +1,5 @@ /* Tarlz - Archiver with multimember lzip compression - Copyright (C) 2013-2020 Antonio Diaz Diaz. + Copyright (C) 2013-2021 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -29,7 +29,12 @@ #include <pthread.h> #include <stdint.h> #include <unistd.h> +#include <utime.h> #include <sys/stat.h> +#if !defined __FreeBSD__ && !defined __OpenBSD__ && !defined __NetBSD__ && \ + !defined __DragonFly__ && !defined __APPLE__ && !defined __OS2__ +#include <sys/sysmacros.h> // for major, minor, makedev +#endif #include <lzlib.h> #include "arg_parser.h" @@ -43,47 +48,8 @@ - the other workers return. - the muxer drains the queue and returns. */ -/* Returns the number of bytes really read. - If (returned value < size) and (errno == 0), means EOF was reached. -*/ -int preadblock( const int fd, uint8_t * const buf, const int size, - const long long pos ) - { - int sz = 0; - errno = 0; - while( sz < size ) - { - const int n = pread( fd, buf + sz, size - sz, pos + sz ); - if( n > 0 ) sz += n; - else if( n == 0 ) break; // EOF - else if( errno != EINTR ) break; - errno = 0; - } - return sz; - } - - namespace { -/* Returns the number of bytes really written. - If (returned value < size), it is always an error. -*//* -int pwriteblock( const int fd, const uint8_t * const buf, const int size, - const long long pos ) - { - int sz = 0; - errno = 0; - while( sz < size ) - { - const int n = pwrite( fd, buf + sz, size - sz, pos + sz ); - if( n > 0 ) sz += n; - else if( n < 0 && errno != EINTR ) break; - errno = 0; - } - return sz; - } -*/ - const char * const other_msg = "Other worker found an error."; struct Packet // member name and metadata or error message @@ -237,6 +203,35 @@ public: }; +// prevent two threads from extracting the same file at the same time +class Name_monitor + { + std::vector< unsigned > crc_vector; + std::vector< std::string > name_vector; + pthread_mutex_t mutex; + +public: + Name_monitor( const int num_workers ) + : crc_vector( num_workers ), name_vector( num_workers ) + { if( num_workers > 0 ) xinit_mutex( &mutex ); } + + bool reserve_name( const unsigned worker_id, const std::string & filename ) + { + // compare the CRCs of the names, verify collisions comparing the names + const unsigned crc = + crc32c.compute_crc( (const uint8_t *)filename.c_str(), filename.size() ); + xlock( &mutex ); + for( unsigned i = 0; i < crc_vector.size(); ++i ) + if( crc_vector[i] == crc && crc != 0 && i != worker_id && + name_vector[i] == filename ) + { xunlock( &mutex ); return false; } // filename already reserved + crc_vector[worker_id] = crc; name_vector[worker_id] = filename; + xunlock( &mutex ); + return true; + } + }; + + const char * skip_member_lz( Archive_reader_i & ar, Packet_courier & courier, const Extended & extended, const long member_id, const int worker_id ) @@ -310,11 +305,201 @@ const char * list_member_lz( Archive_reader_i & ar, Packet_courier & courier, } +const char * extract_member_lz( const Cl_options & cl_opts, + Archive_reader_i & ar, Packet_courier & courier, + const Extended & extended, const Tar_header header, + Resizable_buffer & rbuf, const long member_id, + const int worker_id, Name_monitor & name_monitor ) + { + // skip member if another copy is already being extracted by another thread + if( !name_monitor.reserve_name( worker_id, extended.path() ) ) + return skip_member_lz( ar, courier, extended, member_id, worker_id ); + const char * const filename = extended.path().c_str(); + if( contains_dotdot( filename ) ) + { + if( verbosity >= 0 ) + { std::string estr( extended.path() ); + estr += ": Contains a '..' component, skipping."; + if( !courier.collect_packet( member_id, worker_id, estr.c_str(), + Packet::diag ) ) return other_msg; } + return skip_member_lz( ar, courier, extended, member_id, worker_id ); + } + mode_t mode = parse_octal( header + mode_o, mode_l ); // 12 bits + if( geteuid() != 0 && !cl_opts.preserve_permissions ) mode &= ~get_umask(); + const time_t mtime = parse_octal( header + mtime_o, mtime_l ); // 33 bits + const Typeflag typeflag = (Typeflag)header[typeflag_o]; + const bool islink = ( typeflag == tf_link || typeflag == tf_symlink ); + int outfd = -1; + + if( verbosity < 1 ) rbuf()[0] = 0; + else if( !format_member_name( extended, header, rbuf, verbosity > 1 ) ) + return mem_msg; + if( rbuf()[0] && !courier.collect_packet( member_id, worker_id, rbuf(), + Packet::ok ) ) return other_msg; + if( typeflag != tf_directory ) std::remove( filename ); + if( !make_path( filename ) && verbosity >= 0 ) + { std::string estr( extended.path() ); + estr += ": warning: Failed to create intermediate directory."; + if( !courier.collect_packet( member_id, worker_id, estr.c_str(), + Packet::diag ) ) return other_msg; } + switch( typeflag ) + { + case tf_regular: + case tf_hiperf: + outfd = open_outstream( filename, true, &rbuf ); + if( outfd < 0 ) + { + if( verbosity >= 0 && !courier.collect_packet( member_id, worker_id, + rbuf(), Packet::diag ) ) return other_msg; + set_error_status( 2 ); + return skip_member_lz( ar, courier, extended, member_id, worker_id ); + } + break; + case tf_link: + case tf_symlink: + { + const char * const linkname = extended.linkpath().c_str(); + const bool hard = typeflag == tf_link; + if( ( hard && link( linkname, filename ) != 0 ) || + ( !hard && symlink( linkname, filename ) != 0 ) ) + { + if( verbosity >= 0 ) + { + const int saved_errno = errno; + const int size = + snprintf( rbuf(), rbuf.size(), "Can't %slink '%s' to '%s': %s.\n", + hard ? "" : "sym", linkname, filename, + std::strerror( saved_errno ) ); + if( size > 0 && (unsigned)size > rbuf.size() && rbuf.resize( size ) ) + snprintf( rbuf(), rbuf.size(), "Can't %slink '%s' to '%s': %s.\n", + hard ? "" : "sym", linkname, filename, + std::strerror( saved_errno ) ); + if( verbosity >= 0 && !courier.collect_packet( member_id, worker_id, + rbuf(), Packet::diag ) ) return other_msg; + } + set_error_status( 2 ); + } + } break; + case tf_directory: + { + struct stat st; + bool exists = ( stat( filename, &st ) == 0 ); + if( exists && !S_ISDIR( st.st_mode ) ) + { exists = false; std::remove( filename ); } + if( !exists && mkdir( filename, mode ) != 0 && errno != EEXIST ) + { + if( verbosity >= 0 ) + { snprintf( rbuf(), rbuf.size(), "%s: Can't create directory: %s\n", + filename, std::strerror( errno ) ); + if( !courier.collect_packet( member_id, worker_id, rbuf(), + Packet::diag ) ) return other_msg; } + set_error_status( 2 ); + } + } break; + case tf_chardev: + case tf_blockdev: + { + const unsigned dev = + makedev( parse_octal( header + devmajor_o, devmajor_l ), + parse_octal( header + devminor_o, devminor_l ) ); + const int dmode = ( typeflag == tf_chardev ? S_IFCHR : S_IFBLK ) | mode; + if( mknod( filename, dmode, dev ) != 0 ) + { + if( verbosity >= 0 ) + { snprintf( rbuf(), rbuf.size(), "%s: Can't create device node: %s\n", + filename, std::strerror( errno ) ); + if( !courier.collect_packet( member_id, worker_id, rbuf(), + Packet::diag ) ) return other_msg; } + set_error_status( 2 ); + } + break; + } + case tf_fifo: + if( mkfifo( filename, mode ) != 0 && errno != EEXIST ) + { + if( verbosity >= 0 ) + { snprintf( rbuf(), rbuf.size(), "%s: Can't create FIFO file: %s\n", + filename, std::strerror( errno ) ); + if( !courier.collect_packet( member_id, worker_id, rbuf(), + Packet::diag ) ) return other_msg; } + set_error_status( 2 ); + } + break; + default: + if( verbosity >= 0 ) + { snprintf( rbuf(), rbuf.size(), + "File type '%c' not supported for file '%s'.\n", + typeflag, filename ); + if( !courier.collect_packet( member_id, worker_id, rbuf(), + Packet::diag ) ) return other_msg; } + set_error_status( 2 ); + } + + const uid_t uid = (uid_t)parse_octal( header + uid_o, uid_l ); + const gid_t gid = (gid_t)parse_octal( header + gid_o, gid_l ); + if( !islink && chown( filename, uid, gid ) != 0 && + errno != EPERM && errno != EINVAL ) + { + if( verbosity >= 0 ) + { snprintf( rbuf(), rbuf.size(), "%s: Can't change file owner: %s\n", + filename, std::strerror( errno ) ); + if( !courier.collect_packet( member_id, worker_id, rbuf(), + Packet::diag ) ) return other_msg; } + set_error_status( 2 ); + } + + if( typeflag == tf_regular || typeflag == tf_directory || + typeflag == tf_hiperf ) fchmod( outfd, mode ); // ignore errors + + const int bufsize = 32 * header_size; + uint8_t buf[bufsize]; + long long rest = extended.file_size(); + const int rem = rest % header_size; + const int padding = rem ? header_size - rem : 0; + while( rest > 0 ) + { + const int rsize = ( rest >= bufsize ) ? bufsize : rest + padding; + const int ret = ar.read( buf, rsize ); + if( ret != 0 ) + { + if( outfd >= 0 ) + { + if( cl_opts.keep_damaged ) + { writeblock( outfd, buf, std::min( rest, (long long)ar.e_size() ) ); + close( outfd ); } + else { close( outfd ); std::remove( filename ); } + } + return ar.e_msg(); + } + const int wsize = ( rest >= bufsize ) ? bufsize : rest; + if( outfd >= 0 && writeblock( outfd, buf, wsize ) != wsize ) + { snprintf( rbuf(), rbuf.size(), "%s: Error writing file: %s\n", + filename, std::strerror( errno ) ); return rbuf(); } + rest -= wsize; + } + if( outfd >= 0 && close( outfd ) != 0 ) + { snprintf( rbuf(), rbuf.size(), "%s: Error closing file: %s\n", + filename, std::strerror( errno ) ); return rbuf(); } + if( !islink ) + { + struct utimbuf t; + t.actime = mtime; + t.modtime = mtime; + utime( filename, &t ); // ignore errors + } + if( ar.at_member_end() && + !courier.collect_packet( member_id, worker_id, "", Packet::member_done ) ) + return other_msg; + return 0; + } + + struct Worker_arg { const Cl_options * cl_opts; const Archive_descriptor * ad; Packet_courier * courier; + Name_monitor * name_monitor; std::vector< char > * name_pending; int worker_id; int num_workers; @@ -330,6 +515,7 @@ extern "C" void * dworker( void * arg ) const Cl_options & cl_opts = *tmp.cl_opts; const Archive_descriptor & ad = *tmp.ad; Packet_courier & courier = *tmp.courier; + Name_monitor & name_monitor = *tmp.name_monitor; std::vector< char > & name_pending = *tmp.name_pending; const int worker_id = tmp.worker_id; const int num_workers = tmp.num_workers; @@ -429,9 +615,12 @@ extern "C" void * dworker( void * arg ) msg = skip_member_lz( ar, courier, extended, i, worker_id ); else if( cl_opts.program_mode == m_list ) msg = list_member_lz( ar, courier, extended, header, rbuf, i, worker_id ); - else msg = compare_member_lz( cl_opts, ar, courier, extended, header, - rbuf, i, worker_id ); - if( msg ) + else if( cl_opts.program_mode == m_diff ) + msg = compare_member_lz( cl_opts, ar, courier, extended, header, + rbuf, i, worker_id ); + else msg = extract_member_lz( cl_opts, ar, courier, extended, header, + rbuf, i, worker_id, name_monitor ); + if( msg ) // fatal error { if( courier.request_mastership( i, worker_id ) ) courier.collect_packet( i, worker_id, msg, Packet::error ); goto done; } @@ -481,6 +670,9 @@ int decode_lz( const Cl_options & cl_opts, const Archive_descriptor & ad, const int out_slots = 65536; // max small files (<=512B) in 64 MiB const int num_workers = // limited to number of members std::min( (long)cl_opts.num_workers, ad.lzip_index.members() ); + if( cl_opts.program_mode == m_extract ) get_umask(); // cache the umask + Name_monitor + name_monitor( ( cl_opts.program_mode == m_extract ) ? num_workers : 0 ); /* If an error happens after any threads have been started, exit must be called before courier goes out of scope. */ @@ -494,6 +686,7 @@ int decode_lz( const Cl_options & cl_opts, const Archive_descriptor & ad, worker_args[i].cl_opts = &cl_opts; worker_args[i].ad = &ad; worker_args[i].courier = &courier; + worker_args[i].name_monitor = &name_monitor; worker_args[i].name_pending = &name_pending; worker_args[i].worker_id = i; worker_args[i].num_workers = num_workers; @@ -531,6 +724,7 @@ int decode_lz( const Cl_options & cl_opts, const Archive_descriptor & ad, courier.ocheck_counter, courier.owait_counter ); + Exclude::clear(); // avoid error with gcc 3.3.6 if( !courier.finished() ) internal_error( "courier not finished." ); return final_exit_status( retval, cl_opts.program_mode != m_diff ); } |