diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2021-01-27 16:07:35 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2021-01-27 16:08:24 +0000 |
commit | 2a00d366f34bfdfa7e5a3019b4753bc94a80748d (patch) | |
tree | fad13d976fa52e336b4bb0b85eff6de1350c9906 /extract.cc | |
parent | Releasing debian version 0.17-1. (diff) | |
download | tarlz-2a00d366f34bfdfa7e5a3019b4753bc94a80748d.tar.xz tarlz-2a00d366f34bfdfa7e5a3019b4753bc94a80748d.zip |
Merging upstream version 0.19.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'extract.cc')
-rw-r--r-- | extract.cc | 527 |
1 files changed, 0 insertions, 527 deletions
diff --git a/extract.cc b/extract.cc deleted file mode 100644 index 0b9b1d3..0000000 --- a/extract.cc +++ /dev/null @@ -1,527 +0,0 @@ -/* Tarlz - Archiver with multimember lzip compression - Copyright (C) 2013-2020 Antonio Diaz Diaz. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -#define _FILE_OFFSET_BITS 64 - -#include <algorithm> -#include <cctype> -#include <cerrno> -#include <climits> -#include <cstdio> -#include <cstdlib> -#include <cstring> -#include <string> -#include <vector> -#include <pthread.h> -#include <stdint.h> -#include <unistd.h> -#include <utime.h> -#include <sys/stat.h> -#include <sys/types.h> -#if !defined __FreeBSD__ && !defined __OpenBSD__ && !defined __NetBSD__ && \ - !defined __DragonFly__ && !defined __APPLE__ -#include <sys/sysmacros.h> // for major, minor, makedev -#endif -#include <lzlib.h> - -#include "arg_parser.h" -#include "tarlz.h" -#include "lzip_index.h" -#include "archive_reader.h" - - -namespace { - -Resizable_buffer grbuf; - -bool skip_warn( const bool reset = false ) // avoid duplicate warnings - { - static bool skipping = false; - - if( reset ) skipping = false; - else if( !skipping ) - { skipping = true; show_error( "Skipping to next header." ); return true; } - return false; - } - - -void read_error( const Archive_reader & ar ) - { - show_file_error( ar.ad.namep, ar.e_msg(), ar.e_code() ); - if( ar.e_skip() ) skip_warn(); - } - - -bool make_path( const std::string & name ) - { - const mode_t mode = S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH; - unsigned end = name.size(); // first slash before last component - - while( end > 0 && name[end-1] == '/' ) --end; // remove trailing slashes - while( end > 0 && name[end-1] != '/' ) --end; // remove last component - while( end > 0 && name[end-1] == '/' ) --end; // remove more slashes - - unsigned index = 0; - while( index < end ) - { - while( index < end && name[index] == '/' ) ++index; - unsigned first = index; - while( index < end && name[index] != '/' ) ++index; - if( first < index ) - { - const std::string partial( name, 0, index ); - struct stat st; - if( stat( partial.c_str(), &st ) == 0 ) - { if( !S_ISDIR( st.st_mode ) ) return false; } - else if( mkdir( partial.c_str(), mode ) != 0 ) - return false; - } - } - return true; - } - - -int skip_member( Archive_reader & ar, const Extended & extended ) - { - const int ret = ar.skip_member( extended ); - if( ret != 0 ) { read_error( ar ); if( ret == 2 ) return 2; } - return 0; - } - - -int compare_member( const Cl_options & cl_opts, Archive_reader & ar, - const Extended & extended, const Tar_header header ) - { - if( !show_member_name( extended, header, 1, grbuf ) ) return 1; - std::string estr, ostr; - const bool stat_differs = - !compare_file_type( estr, ostr, cl_opts, extended, header ); - if( estr.size() ) std::fputs( estr.c_str(), stderr ); - if( ostr.size() ) { std::fputs( ostr.c_str(), stdout ); std::fflush( stdout ); } - if( extended.file_size() <= 0 ) return 0; - const Typeflag typeflag = (Typeflag)header[typeflag_o]; - if( ( typeflag != tf_regular && typeflag != tf_hiperf ) || stat_differs ) - return skip_member( ar, extended ); - // else compare file contents - const char * const filename = extended.path().c_str(); - const int infd2 = open_instream( filename ); - if( infd2 < 0 ) - { set_error_status( 1 ); return skip_member( ar, extended ); } - int retval = compare_file_contents( estr, ostr, ar, extended.file_size(), - filename, infd2 ); - if( retval ) { read_error( ar ); if( retval != 2 ) retval = 0; } - else { if( estr.size() ) std::fputs( estr.c_str(), stderr ); - if( ostr.size() ) - { std::fputs( ostr.c_str(), stdout ); std::fflush( stdout ); } } - return retval; - } - - -int list_member( Archive_reader & ar, - const Extended & extended, const Tar_header header ) - { - if( !show_member_name( extended, header, 0, grbuf ) ) return 1; - return skip_member( ar, extended ); - } - - -bool contains_dotdot( const char * const filename ) - { - for( int i = 0; filename[i]; ++i ) - if( dotdot_at_i( filename, i ) ) return true; - return false; - } - - -mode_t get_umask() - { - static mode_t mask = 0; // read once, cache the result - static bool first_call = true; - if( first_call ) { first_call = false; mask = umask( 0 ); umask( mask ); - mask &= S_IRWXU | S_IRWXG | S_IRWXO; } - return mask; - } - - -int extract_member( const Cl_options & cl_opts, Archive_reader & ar, - const Extended & extended, const Tar_header header ) - { - const char * const filename = extended.path().c_str(); - if( contains_dotdot( filename ) ) - { - show_file_error( filename, "Contains a '..' component, skipping." ); - return skip_member( ar, extended ); - } - mode_t mode = parse_octal( header + mode_o, mode_l ); // 12 bits - if( geteuid() != 0 && !cl_opts.preserve_permissions ) mode &= ~get_umask(); - const time_t mtime = parse_octal( header + mtime_o, mtime_l ); // 33 bits - const Typeflag typeflag = (Typeflag)header[typeflag_o]; - const bool islink = ( typeflag == tf_link || typeflag == tf_symlink ); - int outfd = -1; - - if( !show_member_name( extended, header, 1, grbuf ) ) return 1; - std::remove( filename ); - make_path( filename ); - switch( typeflag ) - { - case tf_regular: - case tf_hiperf: - outfd = open_outstream( filename ); - if( outfd < 0 ) return 2; - break; - case tf_link: - case tf_symlink: - { - const char * const linkname = extended.linkpath().c_str(); - const bool hard = typeflag == tf_link; - if( ( hard && link( linkname, filename ) != 0 ) || - ( !hard && symlink( linkname, filename ) != 0 ) ) - { - if( verbosity >= 0 ) - std::fprintf( stderr, "Can't %slink '%s' to '%s': %s.\n", - hard ? "" : "sym", linkname, filename, - std::strerror( errno ) ); - return 2; - } - } break; - case tf_directory: - if( mkdir( filename, mode ) != 0 && errno != EEXIST ) - { - show_file_error( filename, "Can't create directory", errno ); - return 2; - } - break; - case tf_chardev: - case tf_blockdev: - { - const unsigned dev = - makedev( parse_octal( header + devmajor_o, devmajor_l ), - parse_octal( header + devminor_o, devminor_l ) ); - const int dmode = ( typeflag == tf_chardev ? S_IFCHR : S_IFBLK ) | mode; - if( mknod( filename, dmode, dev ) != 0 ) - { - show_file_error( filename, "Can't create device node", errno ); - return 2; - } - break; - } - case tf_fifo: - if( mkfifo( filename, mode ) != 0 && errno != EEXIST ) - { - show_file_error( filename, "Can't create FIFO file", errno ); - return 2; - } - break; - default: - if( verbosity >= 0 ) - std::fprintf( stderr, "File type '%c' not supported for file '%s'.\n", - typeflag, filename ); - return 2; - } - - const uid_t uid = (uid_t)parse_octal( header + uid_o, uid_l ); - const gid_t gid = (gid_t)parse_octal( header + gid_o, gid_l ); - if( !islink && chown( filename, uid, gid ) != 0 && - errno != EPERM && errno != EINVAL ) - { - show_file_error( filename, "Can't change file owner", errno ); - return 2; - } - - if( typeflag == tf_regular || typeflag == tf_hiperf ) - fchmod( outfd, mode ); // ignore errors - - const int bufsize = 32 * header_size; - uint8_t buf[bufsize]; - long long rest = extended.file_size(); - const int rem = rest % header_size; - const int padding = rem ? header_size - rem : 0; - while( rest > 0 ) - { - const int rsize = ( rest >= bufsize ) ? bufsize : rest + padding; - const int ret = ar.read( buf, rsize ); - if( ret != 0 ) - { - read_error( ar ); - if( outfd >= 0 ) - { - if( cl_opts.keep_damaged ) - { writeblock( outfd, buf, std::min( rest, (long long)ar.e_size() ) ); - close( outfd ); } - else { close( outfd ); std::remove( filename ); } - } - if( ret == 2 ) return 2; else return 0; - } - const int wsize = ( rest >= bufsize ) ? bufsize : rest; - if( outfd >= 0 && writeblock( outfd, buf, wsize ) != wsize ) - { show_file_error( filename, "Error writing file", errno ); return 2; } - rest -= wsize; - } - if( outfd >= 0 && close( outfd ) != 0 ) - { show_file_error( filename, "Error closing file", errno ); return 2; } - if( !islink ) - { - struct utimbuf t; - t.actime = mtime; - t.modtime = mtime; - utime( filename, &t ); // ignore errors - } - return 0; - } - - -void format_file_diff( std::string & ostr, const char * const filename, - const char * const msg ) - { - if( verbosity < 0 ) return; - { ostr += filename; ostr += ": "; ostr += msg; ostr += '\n'; } - } - -} // end namespace - - -bool compare_file_type( std::string & estr, std::string & ostr, - const Cl_options & cl_opts, - const Extended & extended, const Tar_header header ) - { - const char * const filename = extended.path().c_str(); - const Typeflag typeflag = (Typeflag)header[typeflag_o]; - struct stat st; - bool diff = false, size_differs = false, type_differs = true; - if( hstat( filename, &st, cl_opts.dereference ) != 0 ) - format_file_error( estr, filename, "Warning: Can't stat", errno ); - else if( ( typeflag == tf_regular || typeflag == tf_hiperf ) && - !S_ISREG( st.st_mode ) ) - format_file_diff( ostr, filename, "Is not a regular file" ); - else if( typeflag == tf_symlink && !S_ISLNK( st.st_mode ) ) - format_file_diff( ostr, filename, "Is not a symlink" ); - else if( typeflag == tf_chardev && !S_ISCHR( st.st_mode ) ) - format_file_diff( ostr, filename, "Is not a character device" ); - else if( typeflag == tf_blockdev && !S_ISBLK( st.st_mode ) ) - format_file_diff( ostr, filename, "Is not a block device" ); - else if( typeflag == tf_directory && !S_ISDIR( st.st_mode ) ) - format_file_diff( ostr, filename, "Is not a directory" ); - else if( typeflag == tf_fifo && !S_ISFIFO( st.st_mode ) ) - format_file_diff( ostr, filename, "Is not a FIFO" ); - else - { - type_differs = false; - if( typeflag != tf_symlink ) - { - const mode_t mode = parse_octal( header + mode_o, mode_l ); // 12 bits - if( mode != ( st.st_mode & ( S_ISUID | S_ISGID | S_ISVTX | - S_IRWXU | S_IRWXG | S_IRWXO ) ) ) - { format_file_diff( ostr, filename, "Mode differs" ); diff = true; } - } - if( !cl_opts.ignore_ids ) - { - if( (uid_t)parse_octal( header + uid_o, uid_l ) != st.st_uid ) - { format_file_diff( ostr, filename, "Uid differs" ); diff = true; } - if( (gid_t)parse_octal( header + gid_o, gid_l ) != st.st_gid ) - { format_file_diff( ostr, filename, "Gid differs" ); diff = true; } - } - if( typeflag != tf_symlink ) - { - if( typeflag != tf_directory ) - { - const time_t mtime = parse_octal( header + mtime_o, mtime_l ); // 33 bits - if( mtime != st.st_mtime ) - { format_file_diff( ostr, filename, "Mod time differs" ); diff = true; } - } - if( ( typeflag == tf_regular || typeflag == tf_hiperf ) && - extended.file_size() != st.st_size ) // don't compare contents - { format_file_diff( ostr, filename, "Size differs" ); size_differs = true; } - if( ( typeflag == tf_chardev || typeflag == tf_blockdev ) && - ( parse_octal( header + devmajor_o, devmajor_l ) != - (unsigned)major( st.st_rdev ) || - parse_octal( header + devminor_o, devminor_l ) != - (unsigned)minor( st.st_rdev ) ) ) - { format_file_diff( ostr, filename, "Device number differs" ); diff = true; } - } - else - { - char * const buf = new char[st.st_size+1]; - long len = readlink( filename, buf, st.st_size ); - bool e = ( len != st.st_size ); - if( !e ) - { - while( len > 1 && buf[len-1] == '/' ) --len; // trailing '/' - buf[len] = 0; - if( extended.linkpath() != buf ) e = true; - } - delete[] buf; - if( e ) { format_file_diff( ostr, filename, "Symlink differs" ); diff = true; } - } - } - if( diff || size_differs || type_differs ) set_error_status( 1 ); - return !( size_differs || type_differs ); - } - - -bool compare_file_contents( std::string & estr, std::string & ostr, - Archive_reader_base & ar, const long long file_size, - const char * const filename, const int infd2 ) - { - long long rest = file_size; - const int rem = rest % header_size; - const int padding = rem ? header_size - rem : 0; - const int bufsize = 32 * header_size; - uint8_t buf1[bufsize]; - uint8_t buf2[bufsize]; - int retval = 0; - bool diff = false; - estr.clear(); ostr.clear(); - while( rest > 0 ) - { - const int rsize1 = ( rest >= bufsize ) ? bufsize : rest + padding; - const int rsize2 = ( rest >= bufsize ) ? bufsize : rest; - if( ( retval = ar.read( buf1, rsize1 ) ) != 0 ) { diff = true; break; } - if( !diff ) - { - const int rd = readblock( infd2, buf2, rsize2 ); - if( rd != rsize2 ) - { - if( errno ) format_file_error( estr, filename, "Read error", errno ); - else format_file_diff( ostr, filename, "EOF found in file" ); - diff = true; - } - else - { - int i = 0; while( i < rsize2 && buf1[i] == buf2[i] ) ++i; - if( i < rsize2 ) - { format_file_diff( ostr, filename, "Contents differ" ); diff = true; } - } - } - if( rest < bufsize ) break; - rest -= rsize1; - } - close( infd2 ); - if( diff ) set_error_status( 1 ); - return retval; - } - - -int decode( const Cl_options & cl_opts ) - { - // open archive before changing working directory - const Archive_descriptor ad( cl_opts.archive_name ); - if( ad.infd < 0 ) return 1; - - // Execute -C options and mark filenames to be compared, extracted or listed. - // name_pending is of type char instead of bool to allow concurrent update. - std::vector< char > name_pending( cl_opts.parser.arguments(), false ); - for( int i = 0; i < cl_opts.parser.arguments(); ++i ) - { - const int code = cl_opts.parser.code( i ); - if( code == 'C' && cl_opts.program_mode != m_list ) - { - const char * const dir = cl_opts.parser.argument( i ).c_str(); - if( chdir( dir ) != 0 ) - { show_file_error( dir, "Error changing working directory", errno ); - return 1; } - } - if( !code && cl_opts.parser.argument( i ).size() && - !Exclude::excluded( cl_opts.parser.argument( i ).c_str() ) ) - name_pending[i] = true; - } - - // multi-threaded --list is faster even with 1 thread and 1 file in archive - // (but multi-threaded --diff probably needs at least 2 of each) - if( ( cl_opts.program_mode == m_diff || cl_opts.program_mode == m_list ) && - cl_opts.num_workers > 0 ) - { - if( ad.indexed && ad.lzip_index.members() >= 2 ) // one file + eof - { - // show_file_error( ad.namep, "Is compressed seekable" ); - return decode_lz( cl_opts, ad, name_pending ); - } - } - - Archive_reader ar( ad ); - Extended extended; // metadata from extended records - int retval = 0; - bool prev_extended = false; // prev header was extended - while( true ) // process one tar header per iteration - { - Tar_header header; - const int ret = ar.read( header, header_size ); - if( ret != 0 ) { read_error( ar ); if( ret == 2 ) { retval = 2; break; } } - if( ret != 0 || !verify_ustar_chksum( header ) ) - { - if( ret == 0 && block_is_zero( header, header_size ) ) - { - if( !prev_extended || cl_opts.permissive ) break; // EOF - show_file_error( ad.namep, fv_msg1 ); - retval = 2; break; - } - if( skip_warn() && verbosity >= 2 ) - std::fprintf( stderr, "ustar chksum = %07o\n", ustar_chksum( header ) ); - set_error_status( 2 ); continue; - } - skip_warn( true ); // reset warning - - const Typeflag typeflag = (Typeflag)header[typeflag_o]; - if( typeflag == tf_global ) - { - if( prev_extended && !cl_opts.permissive ) - { show_file_error( ad.namep, fv_msg2 ); retval = 2; break; } - Extended dummy; // global headers are parsed and ignored - const int ret = ar.parse_records( dummy, header, grbuf, true ); - if( ret != 0 ) { show_file_error( ad.namep, gblrec_msg ); skip_warn(); - set_error_status( ret ); } - continue; - } - if( typeflag == tf_extended ) - { - if( prev_extended && !cl_opts.permissive ) - { show_file_error( ad.namep, fv_msg3 ); retval = 2; break; } - const int ret = ar.parse_records( extended, header, grbuf, - cl_opts.permissive ); - if( ret != 0 ) { show_file_error( ad.namep, extrec_msg ); skip_warn(); - extended.reset(); set_error_status( ret ); } - else if( !extended.crc_present() && cl_opts.missing_crc ) - { show_file_error( ad.namep, mcrc_msg ); retval = 2; break; } - prev_extended = true; - continue; - } - prev_extended = false; - - extended.fill_from_ustar( header ); // copy metadata from header - - if( check_skip_filename( cl_opts, name_pending, extended.path().c_str() ) ) - retval = skip_member( ar, extended ); - else if( cl_opts.program_mode == m_list ) - retval = list_member( ar, extended, header ); - else if( cl_opts.program_mode == m_diff ) - retval = compare_member( cl_opts, ar, extended, header ); - else retval = extract_member( cl_opts, ar, extended, header ); - extended.reset(); - if( retval ) - { show_error( "Error is not recoverable: exiting now." ); break; } - } - - if( close( ad.infd ) != 0 && !retval ) - { show_file_error( ad.namep, "Error closing archive", errno ); - retval = 1; } - - if( retval == 0 ) - for( int i = 0; i < cl_opts.parser.arguments(); ++i ) - if( nonempty_arg( cl_opts.parser, i ) && name_pending[i] ) - { show_file_error( cl_opts.parser.argument( i ).c_str(), - "Not found in archive." ); retval = 1; } - return final_exit_status( retval, cl_opts.program_mode != m_diff ); - } |