summaryrefslogtreecommitdiffstats
path: root/extract.cc
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2021-01-27 16:07:35 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2021-01-27 16:08:24 +0000
commit2a00d366f34bfdfa7e5a3019b4753bc94a80748d (patch)
treefad13d976fa52e336b4bb0b85eff6de1350c9906 /extract.cc
parentReleasing debian version 0.17-1. (diff)
downloadtarlz-2a00d366f34bfdfa7e5a3019b4753bc94a80748d.tar.xz
tarlz-2a00d366f34bfdfa7e5a3019b4753bc94a80748d.zip
Merging upstream version 0.19.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'extract.cc')
-rw-r--r--extract.cc527
1 files changed, 0 insertions, 527 deletions
diff --git a/extract.cc b/extract.cc
deleted file mode 100644
index 0b9b1d3..0000000
--- a/extract.cc
+++ /dev/null
@@ -1,527 +0,0 @@
-/* Tarlz - Archiver with multimember lzip compression
- Copyright (C) 2013-2020 Antonio Diaz Diaz.
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 2 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#define _FILE_OFFSET_BITS 64
-
-#include <algorithm>
-#include <cctype>
-#include <cerrno>
-#include <climits>
-#include <cstdio>
-#include <cstdlib>
-#include <cstring>
-#include <string>
-#include <vector>
-#include <pthread.h>
-#include <stdint.h>
-#include <unistd.h>
-#include <utime.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-#if !defined __FreeBSD__ && !defined __OpenBSD__ && !defined __NetBSD__ && \
- !defined __DragonFly__ && !defined __APPLE__
-#include <sys/sysmacros.h> // for major, minor, makedev
-#endif
-#include <lzlib.h>
-
-#include "arg_parser.h"
-#include "tarlz.h"
-#include "lzip_index.h"
-#include "archive_reader.h"
-
-
-namespace {
-
-Resizable_buffer grbuf;
-
-bool skip_warn( const bool reset = false ) // avoid duplicate warnings
- {
- static bool skipping = false;
-
- if( reset ) skipping = false;
- else if( !skipping )
- { skipping = true; show_error( "Skipping to next header." ); return true; }
- return false;
- }
-
-
-void read_error( const Archive_reader & ar )
- {
- show_file_error( ar.ad.namep, ar.e_msg(), ar.e_code() );
- if( ar.e_skip() ) skip_warn();
- }
-
-
-bool make_path( const std::string & name )
- {
- const mode_t mode = S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH;
- unsigned end = name.size(); // first slash before last component
-
- while( end > 0 && name[end-1] == '/' ) --end; // remove trailing slashes
- while( end > 0 && name[end-1] != '/' ) --end; // remove last component
- while( end > 0 && name[end-1] == '/' ) --end; // remove more slashes
-
- unsigned index = 0;
- while( index < end )
- {
- while( index < end && name[index] == '/' ) ++index;
- unsigned first = index;
- while( index < end && name[index] != '/' ) ++index;
- if( first < index )
- {
- const std::string partial( name, 0, index );
- struct stat st;
- if( stat( partial.c_str(), &st ) == 0 )
- { if( !S_ISDIR( st.st_mode ) ) return false; }
- else if( mkdir( partial.c_str(), mode ) != 0 )
- return false;
- }
- }
- return true;
- }
-
-
-int skip_member( Archive_reader & ar, const Extended & extended )
- {
- const int ret = ar.skip_member( extended );
- if( ret != 0 ) { read_error( ar ); if( ret == 2 ) return 2; }
- return 0;
- }
-
-
-int compare_member( const Cl_options & cl_opts, Archive_reader & ar,
- const Extended & extended, const Tar_header header )
- {
- if( !show_member_name( extended, header, 1, grbuf ) ) return 1;
- std::string estr, ostr;
- const bool stat_differs =
- !compare_file_type( estr, ostr, cl_opts, extended, header );
- if( estr.size() ) std::fputs( estr.c_str(), stderr );
- if( ostr.size() ) { std::fputs( ostr.c_str(), stdout ); std::fflush( stdout ); }
- if( extended.file_size() <= 0 ) return 0;
- const Typeflag typeflag = (Typeflag)header[typeflag_o];
- if( ( typeflag != tf_regular && typeflag != tf_hiperf ) || stat_differs )
- return skip_member( ar, extended );
- // else compare file contents
- const char * const filename = extended.path().c_str();
- const int infd2 = open_instream( filename );
- if( infd2 < 0 )
- { set_error_status( 1 ); return skip_member( ar, extended ); }
- int retval = compare_file_contents( estr, ostr, ar, extended.file_size(),
- filename, infd2 );
- if( retval ) { read_error( ar ); if( retval != 2 ) retval = 0; }
- else { if( estr.size() ) std::fputs( estr.c_str(), stderr );
- if( ostr.size() )
- { std::fputs( ostr.c_str(), stdout ); std::fflush( stdout ); } }
- return retval;
- }
-
-
-int list_member( Archive_reader & ar,
- const Extended & extended, const Tar_header header )
- {
- if( !show_member_name( extended, header, 0, grbuf ) ) return 1;
- return skip_member( ar, extended );
- }
-
-
-bool contains_dotdot( const char * const filename )
- {
- for( int i = 0; filename[i]; ++i )
- if( dotdot_at_i( filename, i ) ) return true;
- return false;
- }
-
-
-mode_t get_umask()
- {
- static mode_t mask = 0; // read once, cache the result
- static bool first_call = true;
- if( first_call ) { first_call = false; mask = umask( 0 ); umask( mask );
- mask &= S_IRWXU | S_IRWXG | S_IRWXO; }
- return mask;
- }
-
-
-int extract_member( const Cl_options & cl_opts, Archive_reader & ar,
- const Extended & extended, const Tar_header header )
- {
- const char * const filename = extended.path().c_str();
- if( contains_dotdot( filename ) )
- {
- show_file_error( filename, "Contains a '..' component, skipping." );
- return skip_member( ar, extended );
- }
- mode_t mode = parse_octal( header + mode_o, mode_l ); // 12 bits
- if( geteuid() != 0 && !cl_opts.preserve_permissions ) mode &= ~get_umask();
- const time_t mtime = parse_octal( header + mtime_o, mtime_l ); // 33 bits
- const Typeflag typeflag = (Typeflag)header[typeflag_o];
- const bool islink = ( typeflag == tf_link || typeflag == tf_symlink );
- int outfd = -1;
-
- if( !show_member_name( extended, header, 1, grbuf ) ) return 1;
- std::remove( filename );
- make_path( filename );
- switch( typeflag )
- {
- case tf_regular:
- case tf_hiperf:
- outfd = open_outstream( filename );
- if( outfd < 0 ) return 2;
- break;
- case tf_link:
- case tf_symlink:
- {
- const char * const linkname = extended.linkpath().c_str();
- const bool hard = typeflag == tf_link;
- if( ( hard && link( linkname, filename ) != 0 ) ||
- ( !hard && symlink( linkname, filename ) != 0 ) )
- {
- if( verbosity >= 0 )
- std::fprintf( stderr, "Can't %slink '%s' to '%s': %s.\n",
- hard ? "" : "sym", linkname, filename,
- std::strerror( errno ) );
- return 2;
- }
- } break;
- case tf_directory:
- if( mkdir( filename, mode ) != 0 && errno != EEXIST )
- {
- show_file_error( filename, "Can't create directory", errno );
- return 2;
- }
- break;
- case tf_chardev:
- case tf_blockdev:
- {
- const unsigned dev =
- makedev( parse_octal( header + devmajor_o, devmajor_l ),
- parse_octal( header + devminor_o, devminor_l ) );
- const int dmode = ( typeflag == tf_chardev ? S_IFCHR : S_IFBLK ) | mode;
- if( mknod( filename, dmode, dev ) != 0 )
- {
- show_file_error( filename, "Can't create device node", errno );
- return 2;
- }
- break;
- }
- case tf_fifo:
- if( mkfifo( filename, mode ) != 0 && errno != EEXIST )
- {
- show_file_error( filename, "Can't create FIFO file", errno );
- return 2;
- }
- break;
- default:
- if( verbosity >= 0 )
- std::fprintf( stderr, "File type '%c' not supported for file '%s'.\n",
- typeflag, filename );
- return 2;
- }
-
- const uid_t uid = (uid_t)parse_octal( header + uid_o, uid_l );
- const gid_t gid = (gid_t)parse_octal( header + gid_o, gid_l );
- if( !islink && chown( filename, uid, gid ) != 0 &&
- errno != EPERM && errno != EINVAL )
- {
- show_file_error( filename, "Can't change file owner", errno );
- return 2;
- }
-
- if( typeflag == tf_regular || typeflag == tf_hiperf )
- fchmod( outfd, mode ); // ignore errors
-
- const int bufsize = 32 * header_size;
- uint8_t buf[bufsize];
- long long rest = extended.file_size();
- const int rem = rest % header_size;
- const int padding = rem ? header_size - rem : 0;
- while( rest > 0 )
- {
- const int rsize = ( rest >= bufsize ) ? bufsize : rest + padding;
- const int ret = ar.read( buf, rsize );
- if( ret != 0 )
- {
- read_error( ar );
- if( outfd >= 0 )
- {
- if( cl_opts.keep_damaged )
- { writeblock( outfd, buf, std::min( rest, (long long)ar.e_size() ) );
- close( outfd ); }
- else { close( outfd ); std::remove( filename ); }
- }
- if( ret == 2 ) return 2; else return 0;
- }
- const int wsize = ( rest >= bufsize ) ? bufsize : rest;
- if( outfd >= 0 && writeblock( outfd, buf, wsize ) != wsize )
- { show_file_error( filename, "Error writing file", errno ); return 2; }
- rest -= wsize;
- }
- if( outfd >= 0 && close( outfd ) != 0 )
- { show_file_error( filename, "Error closing file", errno ); return 2; }
- if( !islink )
- {
- struct utimbuf t;
- t.actime = mtime;
- t.modtime = mtime;
- utime( filename, &t ); // ignore errors
- }
- return 0;
- }
-
-
-void format_file_diff( std::string & ostr, const char * const filename,
- const char * const msg )
- {
- if( verbosity < 0 ) return;
- { ostr += filename; ostr += ": "; ostr += msg; ostr += '\n'; }
- }
-
-} // end namespace
-
-
-bool compare_file_type( std::string & estr, std::string & ostr,
- const Cl_options & cl_opts,
- const Extended & extended, const Tar_header header )
- {
- const char * const filename = extended.path().c_str();
- const Typeflag typeflag = (Typeflag)header[typeflag_o];
- struct stat st;
- bool diff = false, size_differs = false, type_differs = true;
- if( hstat( filename, &st, cl_opts.dereference ) != 0 )
- format_file_error( estr, filename, "Warning: Can't stat", errno );
- else if( ( typeflag == tf_regular || typeflag == tf_hiperf ) &&
- !S_ISREG( st.st_mode ) )
- format_file_diff( ostr, filename, "Is not a regular file" );
- else if( typeflag == tf_symlink && !S_ISLNK( st.st_mode ) )
- format_file_diff( ostr, filename, "Is not a symlink" );
- else if( typeflag == tf_chardev && !S_ISCHR( st.st_mode ) )
- format_file_diff( ostr, filename, "Is not a character device" );
- else if( typeflag == tf_blockdev && !S_ISBLK( st.st_mode ) )
- format_file_diff( ostr, filename, "Is not a block device" );
- else if( typeflag == tf_directory && !S_ISDIR( st.st_mode ) )
- format_file_diff( ostr, filename, "Is not a directory" );
- else if( typeflag == tf_fifo && !S_ISFIFO( st.st_mode ) )
- format_file_diff( ostr, filename, "Is not a FIFO" );
- else
- {
- type_differs = false;
- if( typeflag != tf_symlink )
- {
- const mode_t mode = parse_octal( header + mode_o, mode_l ); // 12 bits
- if( mode != ( st.st_mode & ( S_ISUID | S_ISGID | S_ISVTX |
- S_IRWXU | S_IRWXG | S_IRWXO ) ) )
- { format_file_diff( ostr, filename, "Mode differs" ); diff = true; }
- }
- if( !cl_opts.ignore_ids )
- {
- if( (uid_t)parse_octal( header + uid_o, uid_l ) != st.st_uid )
- { format_file_diff( ostr, filename, "Uid differs" ); diff = true; }
- if( (gid_t)parse_octal( header + gid_o, gid_l ) != st.st_gid )
- { format_file_diff( ostr, filename, "Gid differs" ); diff = true; }
- }
- if( typeflag != tf_symlink )
- {
- if( typeflag != tf_directory )
- {
- const time_t mtime = parse_octal( header + mtime_o, mtime_l ); // 33 bits
- if( mtime != st.st_mtime )
- { format_file_diff( ostr, filename, "Mod time differs" ); diff = true; }
- }
- if( ( typeflag == tf_regular || typeflag == tf_hiperf ) &&
- extended.file_size() != st.st_size ) // don't compare contents
- { format_file_diff( ostr, filename, "Size differs" ); size_differs = true; }
- if( ( typeflag == tf_chardev || typeflag == tf_blockdev ) &&
- ( parse_octal( header + devmajor_o, devmajor_l ) !=
- (unsigned)major( st.st_rdev ) ||
- parse_octal( header + devminor_o, devminor_l ) !=
- (unsigned)minor( st.st_rdev ) ) )
- { format_file_diff( ostr, filename, "Device number differs" ); diff = true; }
- }
- else
- {
- char * const buf = new char[st.st_size+1];
- long len = readlink( filename, buf, st.st_size );
- bool e = ( len != st.st_size );
- if( !e )
- {
- while( len > 1 && buf[len-1] == '/' ) --len; // trailing '/'
- buf[len] = 0;
- if( extended.linkpath() != buf ) e = true;
- }
- delete[] buf;
- if( e ) { format_file_diff( ostr, filename, "Symlink differs" ); diff = true; }
- }
- }
- if( diff || size_differs || type_differs ) set_error_status( 1 );
- return !( size_differs || type_differs );
- }
-
-
-bool compare_file_contents( std::string & estr, std::string & ostr,
- Archive_reader_base & ar, const long long file_size,
- const char * const filename, const int infd2 )
- {
- long long rest = file_size;
- const int rem = rest % header_size;
- const int padding = rem ? header_size - rem : 0;
- const int bufsize = 32 * header_size;
- uint8_t buf1[bufsize];
- uint8_t buf2[bufsize];
- int retval = 0;
- bool diff = false;
- estr.clear(); ostr.clear();
- while( rest > 0 )
- {
- const int rsize1 = ( rest >= bufsize ) ? bufsize : rest + padding;
- const int rsize2 = ( rest >= bufsize ) ? bufsize : rest;
- if( ( retval = ar.read( buf1, rsize1 ) ) != 0 ) { diff = true; break; }
- if( !diff )
- {
- const int rd = readblock( infd2, buf2, rsize2 );
- if( rd != rsize2 )
- {
- if( errno ) format_file_error( estr, filename, "Read error", errno );
- else format_file_diff( ostr, filename, "EOF found in file" );
- diff = true;
- }
- else
- {
- int i = 0; while( i < rsize2 && buf1[i] == buf2[i] ) ++i;
- if( i < rsize2 )
- { format_file_diff( ostr, filename, "Contents differ" ); diff = true; }
- }
- }
- if( rest < bufsize ) break;
- rest -= rsize1;
- }
- close( infd2 );
- if( diff ) set_error_status( 1 );
- return retval;
- }
-
-
-int decode( const Cl_options & cl_opts )
- {
- // open archive before changing working directory
- const Archive_descriptor ad( cl_opts.archive_name );
- if( ad.infd < 0 ) return 1;
-
- // Execute -C options and mark filenames to be compared, extracted or listed.
- // name_pending is of type char instead of bool to allow concurrent update.
- std::vector< char > name_pending( cl_opts.parser.arguments(), false );
- for( int i = 0; i < cl_opts.parser.arguments(); ++i )
- {
- const int code = cl_opts.parser.code( i );
- if( code == 'C' && cl_opts.program_mode != m_list )
- {
- const char * const dir = cl_opts.parser.argument( i ).c_str();
- if( chdir( dir ) != 0 )
- { show_file_error( dir, "Error changing working directory", errno );
- return 1; }
- }
- if( !code && cl_opts.parser.argument( i ).size() &&
- !Exclude::excluded( cl_opts.parser.argument( i ).c_str() ) )
- name_pending[i] = true;
- }
-
- // multi-threaded --list is faster even with 1 thread and 1 file in archive
- // (but multi-threaded --diff probably needs at least 2 of each)
- if( ( cl_opts.program_mode == m_diff || cl_opts.program_mode == m_list ) &&
- cl_opts.num_workers > 0 )
- {
- if( ad.indexed && ad.lzip_index.members() >= 2 ) // one file + eof
- {
- // show_file_error( ad.namep, "Is compressed seekable" );
- return decode_lz( cl_opts, ad, name_pending );
- }
- }
-
- Archive_reader ar( ad );
- Extended extended; // metadata from extended records
- int retval = 0;
- bool prev_extended = false; // prev header was extended
- while( true ) // process one tar header per iteration
- {
- Tar_header header;
- const int ret = ar.read( header, header_size );
- if( ret != 0 ) { read_error( ar ); if( ret == 2 ) { retval = 2; break; } }
- if( ret != 0 || !verify_ustar_chksum( header ) )
- {
- if( ret == 0 && block_is_zero( header, header_size ) )
- {
- if( !prev_extended || cl_opts.permissive ) break; // EOF
- show_file_error( ad.namep, fv_msg1 );
- retval = 2; break;
- }
- if( skip_warn() && verbosity >= 2 )
- std::fprintf( stderr, "ustar chksum = %07o\n", ustar_chksum( header ) );
- set_error_status( 2 ); continue;
- }
- skip_warn( true ); // reset warning
-
- const Typeflag typeflag = (Typeflag)header[typeflag_o];
- if( typeflag == tf_global )
- {
- if( prev_extended && !cl_opts.permissive )
- { show_file_error( ad.namep, fv_msg2 ); retval = 2; break; }
- Extended dummy; // global headers are parsed and ignored
- const int ret = ar.parse_records( dummy, header, grbuf, true );
- if( ret != 0 ) { show_file_error( ad.namep, gblrec_msg ); skip_warn();
- set_error_status( ret ); }
- continue;
- }
- if( typeflag == tf_extended )
- {
- if( prev_extended && !cl_opts.permissive )
- { show_file_error( ad.namep, fv_msg3 ); retval = 2; break; }
- const int ret = ar.parse_records( extended, header, grbuf,
- cl_opts.permissive );
- if( ret != 0 ) { show_file_error( ad.namep, extrec_msg ); skip_warn();
- extended.reset(); set_error_status( ret ); }
- else if( !extended.crc_present() && cl_opts.missing_crc )
- { show_file_error( ad.namep, mcrc_msg ); retval = 2; break; }
- prev_extended = true;
- continue;
- }
- prev_extended = false;
-
- extended.fill_from_ustar( header ); // copy metadata from header
-
- if( check_skip_filename( cl_opts, name_pending, extended.path().c_str() ) )
- retval = skip_member( ar, extended );
- else if( cl_opts.program_mode == m_list )
- retval = list_member( ar, extended, header );
- else if( cl_opts.program_mode == m_diff )
- retval = compare_member( cl_opts, ar, extended, header );
- else retval = extract_member( cl_opts, ar, extended, header );
- extended.reset();
- if( retval )
- { show_error( "Error is not recoverable: exiting now." ); break; }
- }
-
- if( close( ad.infd ) != 0 && !retval )
- { show_file_error( ad.namep, "Error closing archive", errno );
- retval = 1; }
-
- if( retval == 0 )
- for( int i = 0; i < cl_opts.parser.arguments(); ++i )
- if( nonempty_arg( cl_opts.parser, i ) && name_pending[i] )
- { show_file_error( cl_opts.parser.argument( i ).c_str(),
- "Not found in archive." ); retval = 1; }
- return final_exit_status( retval, cl_opts.program_mode != m_diff );
- }