summaryrefslogtreecommitdiffstats
path: root/decode_lz.cc
diff options
context:
space:
mode:
Diffstat (limited to 'decode_lz.cc')
-rw-r--r--decode_lz.cc280
1 files changed, 237 insertions, 43 deletions
diff --git a/decode_lz.cc b/decode_lz.cc
index 71c699b..4fc3d80 100644
--- a/decode_lz.cc
+++ b/decode_lz.cc
@@ -1,5 +1,5 @@
/* Tarlz - Archiver with multimember lzip compression
- Copyright (C) 2013-2020 Antonio Diaz Diaz.
+ Copyright (C) 2013-2021 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -29,7 +29,12 @@
#include <pthread.h>
#include <stdint.h>
#include <unistd.h>
+#include <utime.h>
#include <sys/stat.h>
+#if !defined __FreeBSD__ && !defined __OpenBSD__ && !defined __NetBSD__ && \
+ !defined __DragonFly__ && !defined __APPLE__ && !defined __OS2__
+#include <sys/sysmacros.h> // for major, minor, makedev
+#endif
#include <lzlib.h>
#include "arg_parser.h"
@@ -43,47 +48,8 @@
- the other workers return.
- the muxer drains the queue and returns. */
-/* Returns the number of bytes really read.
- If (returned value < size) and (errno == 0), means EOF was reached.
-*/
-int preadblock( const int fd, uint8_t * const buf, const int size,
- const long long pos )
- {
- int sz = 0;
- errno = 0;
- while( sz < size )
- {
- const int n = pread( fd, buf + sz, size - sz, pos + sz );
- if( n > 0 ) sz += n;
- else if( n == 0 ) break; // EOF
- else if( errno != EINTR ) break;
- errno = 0;
- }
- return sz;
- }
-
-
namespace {
-/* Returns the number of bytes really written.
- If (returned value < size), it is always an error.
-*//*
-int pwriteblock( const int fd, const uint8_t * const buf, const int size,
- const long long pos )
- {
- int sz = 0;
- errno = 0;
- while( sz < size )
- {
- const int n = pwrite( fd, buf + sz, size - sz, pos + sz );
- if( n > 0 ) sz += n;
- else if( n < 0 && errno != EINTR ) break;
- errno = 0;
- }
- return sz;
- }
-*/
-
const char * const other_msg = "Other worker found an error.";
struct Packet // member name and metadata or error message
@@ -237,6 +203,35 @@ public:
};
+// prevent two threads from extracting the same file at the same time
+class Name_monitor
+ {
+ std::vector< unsigned > crc_vector;
+ std::vector< std::string > name_vector;
+ pthread_mutex_t mutex;
+
+public:
+ Name_monitor( const int num_workers )
+ : crc_vector( num_workers ), name_vector( num_workers )
+ { if( num_workers > 0 ) xinit_mutex( &mutex ); }
+
+ bool reserve_name( const unsigned worker_id, const std::string & filename )
+ {
+ // compare the CRCs of the names, verify collisions comparing the names
+ const unsigned crc =
+ crc32c.compute_crc( (const uint8_t *)filename.c_str(), filename.size() );
+ xlock( &mutex );
+ for( unsigned i = 0; i < crc_vector.size(); ++i )
+ if( crc_vector[i] == crc && crc != 0 && i != worker_id &&
+ name_vector[i] == filename )
+ { xunlock( &mutex ); return false; } // filename already reserved
+ crc_vector[worker_id] = crc; name_vector[worker_id] = filename;
+ xunlock( &mutex );
+ return true;
+ }
+ };
+
+
const char * skip_member_lz( Archive_reader_i & ar, Packet_courier & courier,
const Extended & extended, const long member_id,
const int worker_id )
@@ -310,11 +305,201 @@ const char * list_member_lz( Archive_reader_i & ar, Packet_courier & courier,
}
+const char * extract_member_lz( const Cl_options & cl_opts,
+ Archive_reader_i & ar, Packet_courier & courier,
+ const Extended & extended, const Tar_header header,
+ Resizable_buffer & rbuf, const long member_id,
+ const int worker_id, Name_monitor & name_monitor )
+ {
+ // skip member if another copy is already being extracted by another thread
+ if( !name_monitor.reserve_name( worker_id, extended.path() ) )
+ return skip_member_lz( ar, courier, extended, member_id, worker_id );
+ const char * const filename = extended.path().c_str();
+ if( contains_dotdot( filename ) )
+ {
+ if( verbosity >= 0 )
+ { std::string estr( extended.path() );
+ estr += ": Contains a '..' component, skipping.";
+ if( !courier.collect_packet( member_id, worker_id, estr.c_str(),
+ Packet::diag ) ) return other_msg; }
+ return skip_member_lz( ar, courier, extended, member_id, worker_id );
+ }
+ mode_t mode = parse_octal( header + mode_o, mode_l ); // 12 bits
+ if( geteuid() != 0 && !cl_opts.preserve_permissions ) mode &= ~get_umask();
+ const time_t mtime = parse_octal( header + mtime_o, mtime_l ); // 33 bits
+ const Typeflag typeflag = (Typeflag)header[typeflag_o];
+ const bool islink = ( typeflag == tf_link || typeflag == tf_symlink );
+ int outfd = -1;
+
+ if( verbosity < 1 ) rbuf()[0] = 0;
+ else if( !format_member_name( extended, header, rbuf, verbosity > 1 ) )
+ return mem_msg;
+ if( rbuf()[0] && !courier.collect_packet( member_id, worker_id, rbuf(),
+ Packet::ok ) ) return other_msg;
+ if( typeflag != tf_directory ) std::remove( filename );
+ if( !make_path( filename ) && verbosity >= 0 )
+ { std::string estr( extended.path() );
+ estr += ": warning: Failed to create intermediate directory.";
+ if( !courier.collect_packet( member_id, worker_id, estr.c_str(),
+ Packet::diag ) ) return other_msg; }
+ switch( typeflag )
+ {
+ case tf_regular:
+ case tf_hiperf:
+ outfd = open_outstream( filename, true, &rbuf );
+ if( outfd < 0 )
+ {
+ if( verbosity >= 0 && !courier.collect_packet( member_id, worker_id,
+ rbuf(), Packet::diag ) ) return other_msg;
+ set_error_status( 2 );
+ return skip_member_lz( ar, courier, extended, member_id, worker_id );
+ }
+ break;
+ case tf_link:
+ case tf_symlink:
+ {
+ const char * const linkname = extended.linkpath().c_str();
+ const bool hard = typeflag == tf_link;
+ if( ( hard && link( linkname, filename ) != 0 ) ||
+ ( !hard && symlink( linkname, filename ) != 0 ) )
+ {
+ if( verbosity >= 0 )
+ {
+ const int saved_errno = errno;
+ const int size =
+ snprintf( rbuf(), rbuf.size(), "Can't %slink '%s' to '%s': %s.\n",
+ hard ? "" : "sym", linkname, filename,
+ std::strerror( saved_errno ) );
+ if( size > 0 && (unsigned)size > rbuf.size() && rbuf.resize( size ) )
+ snprintf( rbuf(), rbuf.size(), "Can't %slink '%s' to '%s': %s.\n",
+ hard ? "" : "sym", linkname, filename,
+ std::strerror( saved_errno ) );
+ if( verbosity >= 0 && !courier.collect_packet( member_id, worker_id,
+ rbuf(), Packet::diag ) ) return other_msg;
+ }
+ set_error_status( 2 );
+ }
+ } break;
+ case tf_directory:
+ {
+ struct stat st;
+ bool exists = ( stat( filename, &st ) == 0 );
+ if( exists && !S_ISDIR( st.st_mode ) )
+ { exists = false; std::remove( filename ); }
+ if( !exists && mkdir( filename, mode ) != 0 && errno != EEXIST )
+ {
+ if( verbosity >= 0 )
+ { snprintf( rbuf(), rbuf.size(), "%s: Can't create directory: %s\n",
+ filename, std::strerror( errno ) );
+ if( !courier.collect_packet( member_id, worker_id, rbuf(),
+ Packet::diag ) ) return other_msg; }
+ set_error_status( 2 );
+ }
+ } break;
+ case tf_chardev:
+ case tf_blockdev:
+ {
+ const unsigned dev =
+ makedev( parse_octal( header + devmajor_o, devmajor_l ),
+ parse_octal( header + devminor_o, devminor_l ) );
+ const int dmode = ( typeflag == tf_chardev ? S_IFCHR : S_IFBLK ) | mode;
+ if( mknod( filename, dmode, dev ) != 0 )
+ {
+ if( verbosity >= 0 )
+ { snprintf( rbuf(), rbuf.size(), "%s: Can't create device node: %s\n",
+ filename, std::strerror( errno ) );
+ if( !courier.collect_packet( member_id, worker_id, rbuf(),
+ Packet::diag ) ) return other_msg; }
+ set_error_status( 2 );
+ }
+ break;
+ }
+ case tf_fifo:
+ if( mkfifo( filename, mode ) != 0 && errno != EEXIST )
+ {
+ if( verbosity >= 0 )
+ { snprintf( rbuf(), rbuf.size(), "%s: Can't create FIFO file: %s\n",
+ filename, std::strerror( errno ) );
+ if( !courier.collect_packet( member_id, worker_id, rbuf(),
+ Packet::diag ) ) return other_msg; }
+ set_error_status( 2 );
+ }
+ break;
+ default:
+ if( verbosity >= 0 )
+ { snprintf( rbuf(), rbuf.size(),
+ "File type '%c' not supported for file '%s'.\n",
+ typeflag, filename );
+ if( !courier.collect_packet( member_id, worker_id, rbuf(),
+ Packet::diag ) ) return other_msg; }
+ set_error_status( 2 );
+ }
+
+ const uid_t uid = (uid_t)parse_octal( header + uid_o, uid_l );
+ const gid_t gid = (gid_t)parse_octal( header + gid_o, gid_l );
+ if( !islink && chown( filename, uid, gid ) != 0 &&
+ errno != EPERM && errno != EINVAL )
+ {
+ if( verbosity >= 0 )
+ { snprintf( rbuf(), rbuf.size(), "%s: Can't change file owner: %s\n",
+ filename, std::strerror( errno ) );
+ if( !courier.collect_packet( member_id, worker_id, rbuf(),
+ Packet::diag ) ) return other_msg; }
+ set_error_status( 2 );
+ }
+
+ if( typeflag == tf_regular || typeflag == tf_directory ||
+ typeflag == tf_hiperf ) fchmod( outfd, mode ); // ignore errors
+
+ const int bufsize = 32 * header_size;
+ uint8_t buf[bufsize];
+ long long rest = extended.file_size();
+ const int rem = rest % header_size;
+ const int padding = rem ? header_size - rem : 0;
+ while( rest > 0 )
+ {
+ const int rsize = ( rest >= bufsize ) ? bufsize : rest + padding;
+ const int ret = ar.read( buf, rsize );
+ if( ret != 0 )
+ {
+ if( outfd >= 0 )
+ {
+ if( cl_opts.keep_damaged )
+ { writeblock( outfd, buf, std::min( rest, (long long)ar.e_size() ) );
+ close( outfd ); }
+ else { close( outfd ); std::remove( filename ); }
+ }
+ return ar.e_msg();
+ }
+ const int wsize = ( rest >= bufsize ) ? bufsize : rest;
+ if( outfd >= 0 && writeblock( outfd, buf, wsize ) != wsize )
+ { snprintf( rbuf(), rbuf.size(), "%s: Error writing file: %s\n",
+ filename, std::strerror( errno ) ); return rbuf(); }
+ rest -= wsize;
+ }
+ if( outfd >= 0 && close( outfd ) != 0 )
+ { snprintf( rbuf(), rbuf.size(), "%s: Error closing file: %s\n",
+ filename, std::strerror( errno ) ); return rbuf(); }
+ if( !islink )
+ {
+ struct utimbuf t;
+ t.actime = mtime;
+ t.modtime = mtime;
+ utime( filename, &t ); // ignore errors
+ }
+ if( ar.at_member_end() &&
+ !courier.collect_packet( member_id, worker_id, "", Packet::member_done ) )
+ return other_msg;
+ return 0;
+ }
+
+
struct Worker_arg
{
const Cl_options * cl_opts;
const Archive_descriptor * ad;
Packet_courier * courier;
+ Name_monitor * name_monitor;
std::vector< char > * name_pending;
int worker_id;
int num_workers;
@@ -330,6 +515,7 @@ extern "C" void * dworker( void * arg )
const Cl_options & cl_opts = *tmp.cl_opts;
const Archive_descriptor & ad = *tmp.ad;
Packet_courier & courier = *tmp.courier;
+ Name_monitor & name_monitor = *tmp.name_monitor;
std::vector< char > & name_pending = *tmp.name_pending;
const int worker_id = tmp.worker_id;
const int num_workers = tmp.num_workers;
@@ -429,9 +615,12 @@ extern "C" void * dworker( void * arg )
msg = skip_member_lz( ar, courier, extended, i, worker_id );
else if( cl_opts.program_mode == m_list )
msg = list_member_lz( ar, courier, extended, header, rbuf, i, worker_id );
- else msg = compare_member_lz( cl_opts, ar, courier, extended, header,
- rbuf, i, worker_id );
- if( msg )
+ else if( cl_opts.program_mode == m_diff )
+ msg = compare_member_lz( cl_opts, ar, courier, extended, header,
+ rbuf, i, worker_id );
+ else msg = extract_member_lz( cl_opts, ar, courier, extended, header,
+ rbuf, i, worker_id, name_monitor );
+ if( msg ) // fatal error
{ if( courier.request_mastership( i, worker_id ) )
courier.collect_packet( i, worker_id, msg, Packet::error );
goto done; }
@@ -481,6 +670,9 @@ int decode_lz( const Cl_options & cl_opts, const Archive_descriptor & ad,
const int out_slots = 65536; // max small files (<=512B) in 64 MiB
const int num_workers = // limited to number of members
std::min( (long)cl_opts.num_workers, ad.lzip_index.members() );
+ if( cl_opts.program_mode == m_extract ) get_umask(); // cache the umask
+ Name_monitor
+ name_monitor( ( cl_opts.program_mode == m_extract ) ? num_workers : 0 );
/* If an error happens after any threads have been started, exit must be
called before courier goes out of scope. */
@@ -494,6 +686,7 @@ int decode_lz( const Cl_options & cl_opts, const Archive_descriptor & ad,
worker_args[i].cl_opts = &cl_opts;
worker_args[i].ad = &ad;
worker_args[i].courier = &courier;
+ worker_args[i].name_monitor = &name_monitor;
worker_args[i].name_pending = &name_pending;
worker_args[i].worker_id = i;
worker_args[i].num_workers = num_workers;
@@ -531,6 +724,7 @@ int decode_lz( const Cl_options & cl_opts, const Archive_descriptor & ad,
courier.ocheck_counter,
courier.owait_counter );
+ Exclude::clear(); // avoid error with gcc 3.3.6
if( !courier.finished() ) internal_error( "courier not finished." );
return final_exit_status( retval, cl_opts.program_mode != m_diff );
}