diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-14 12:57:29 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-14 12:57:29 +0000 |
commit | 29146f385a524ad6a4b1b127cc3d9641a8fe0adc (patch) | |
tree | 1caea11496a3d9e0333cdf649d9f9be6d5a67b78 /decode.cc | |
parent | Initial commit. (diff) | |
download | tarlz-29146f385a524ad6a4b1b127cc3d9641a8fe0adc.tar.xz tarlz-29146f385a524ad6a4b1b127cc3d9641a8fe0adc.zip |
Adding upstream version 0.25.upstream/0.25upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'decode.cc')
-rw-r--r-- | decode.cc | 533 |
1 files changed, 533 insertions, 0 deletions
diff --git a/decode.cc b/decode.cc new file mode 100644 index 0000000..bcac4c8 --- /dev/null +++ b/decode.cc @@ -0,0 +1,533 @@ +/* Tarlz - Archiver with multimember lzip compression + Copyright (C) 2013-2024 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#define _FILE_OFFSET_BITS 64 + +#include <algorithm> +#include <cctype> +#include <cerrno> +#include <cstdio> +#include <fcntl.h> +#include <stdint.h> // for lzlib.h +#include <unistd.h> +#include <utime.h> +#include <sys/stat.h> +#if !defined __FreeBSD__ && !defined __OpenBSD__ && !defined __NetBSD__ && \ + !defined __DragonFly__ && !defined __APPLE__ && !defined __OS2__ +#include <sys/sysmacros.h> // for major, minor, makedev +#else +#include <sys/types.h> // for major, minor, makedev +#endif +#include <lzlib.h> + +#include "tarlz.h" +#include "arg_parser.h" +#include "lzip_index.h" +#include "archive_reader.h" +#include "decode.h" + +#ifndef O_DIRECTORY +#define O_DIRECTORY 0 +#endif + +namespace { + +Resizable_buffer grbuf; + +bool skip_warn( const bool reset = false ) // avoid duplicate warnings + { + static bool skipping = false; + + if( reset ) skipping = false; + else if( !skipping ) + { skipping = true; show_error( "Skipping to next header." ); return true; } + return false; + } + + +void read_error( const Archive_reader & ar ) + { + show_file_error( ar.ad.namep, ar.e_msg(), ar.e_code() ); + if( ar.e_skip() ) skip_warn(); + } + + +int skip_member( Archive_reader & ar, const Extended & extended, + const Typeflag typeflag ) + { + if( data_may_follow( typeflag ) ) + { const int ret = ar.skip_member( extended ); + if( ret != 0 ) { read_error( ar ); if( ar.fatal() ) return ret; } } + return 0; + } + + +int compare_member( const Cl_options & cl_opts, Archive_reader & ar, + const Extended & extended, const Tar_header header ) + { + if( !show_member_name( extended, header, 1, grbuf ) ) return 1; + std::string estr, ostr; + const bool stat_differs = + !compare_file_type( estr, ostr, cl_opts, extended, header ); + if( estr.size() ) std::fputs( estr.c_str(), stderr ); + if( ostr.size() ) { std::fputs( ostr.c_str(), stdout ); std::fflush( stdout ); } + if( extended.file_size() <= 0 ) return 0; + const Typeflag typeflag = (Typeflag)header[typeflag_o]; + if( ( typeflag != tf_regular && typeflag != tf_hiperf ) || stat_differs ) + return skip_member( ar, extended, typeflag ); + // else compare file contents + const char * const filename = extended.path().c_str(); + const int infd2 = open_instream( filename ); + if( infd2 < 0 ) + { set_error_status( 1 ); return skip_member( ar, extended, typeflag ); } + int retval = compare_file_contents( estr, ostr, ar, extended.file_size(), + filename, infd2 ); + if( retval ) { read_error( ar ); if( !ar.fatal() ) retval = 0; } + else { if( estr.size() ) std::fputs( estr.c_str(), stderr ); + if( ostr.size() ) + { std::fputs( ostr.c_str(), stdout ); std::fflush( stdout ); } } + return retval; + } + + +int list_member( Archive_reader & ar, + const Extended & extended, const Tar_header header ) + { + if( !show_member_name( extended, header, 0, grbuf ) ) return 1; + return skip_member( ar, extended, (Typeflag)header[typeflag_o] ); + } + + +int extract_member( const Cl_options & cl_opts, Archive_reader & ar, + const Extended & extended, const Tar_header header ) + { + const char * const filename = extended.path().c_str(); + const Typeflag typeflag = (Typeflag)header[typeflag_o]; + if( contains_dotdot( filename ) ) + { + show_file_error( filename, dotdot_msg ); + return skip_member( ar, extended, typeflag ); + } + mode_t mode = parse_octal( header + mode_o, mode_l ); // 12 bits + if( geteuid() != 0 && !cl_opts.preserve_permissions ) mode &= ~get_umask(); + int outfd = -1; + + if( !show_member_name( extended, header, 1, grbuf ) ) return 1; + // remove file (or empty dir) before extraction to prevent following links + std::remove( filename ); + if( !make_dirs( filename ) ) + { + show_file_error( filename, intdir_msg, errno ); + set_error_status( 1 ); + return skip_member( ar, extended, typeflag ); + } + + switch( typeflag ) + { + case tf_regular: + case tf_hiperf: + outfd = open_outstream( filename ); + if( outfd < 0 ) + { set_error_status( 1 ); return skip_member( ar, extended, typeflag ); } + break; + case tf_link: + case tf_symlink: + { + const char * const linkname = extended.linkpath().c_str(); + const bool hard = typeflag == tf_link; + if( ( hard && link( linkname, filename ) != 0 ) || + ( !hard && symlink( linkname, filename ) != 0 ) ) + { + print_error( errno, cantln_msg, hard ? "" : "sym", linkname, filename ); + set_error_status( 1 ); + } + } break; + case tf_directory: + if( mkdir( filename, mode ) != 0 && errno != EEXIST ) + { + show_file_error( filename, mkdir_msg, errno ); + set_error_status( 1 ); + } + break; + case tf_chardev: + case tf_blockdev: + { + const unsigned dev = + makedev( parse_octal( header + devmajor_o, devmajor_l ), + parse_octal( header + devminor_o, devminor_l ) ); + const int dmode = ( typeflag == tf_chardev ? S_IFCHR : S_IFBLK ) | mode; + if( mknod( filename, dmode, dev ) != 0 ) + { + show_file_error( filename, mknod_msg, errno ); + set_error_status( 1 ); + } + break; + } + case tf_fifo: + if( mkfifo( filename, mode ) != 0 ) + { + show_file_error( filename, mkfifo_msg, errno ); + set_error_status( 1 ); + } + break; + default: + print_error( 0, uftype_msg, filename, typeflag ); + set_error_status( 2 ); + return skip_member( ar, extended, typeflag ); + } + + const bool islink = ( typeflag == tf_link || typeflag == tf_symlink ); + errno = 0; + if( !islink && + ( !uid_gid_in_range( extended.get_uid(), extended.get_gid() ) || + chown( filename, extended.get_uid(), extended.get_gid() ) != 0 ) ) + { + if( outfd >= 0 ) mode &= ~( S_ISUID | S_ISGID | S_ISVTX ); + // chown in many cases returns with EPERM, which can be safely ignored. + if( errno != EPERM && errno != EINVAL ) + { show_file_error( filename, chown_msg, errno ); set_error_status( 1 ); } + } + + if( outfd >= 0 ) fchmod( outfd, mode ); // ignore errors + + if( data_may_follow( typeflag ) ) + { + const int bufsize = 32 * header_size; + uint8_t buf[bufsize]; + long long rest = extended.file_size(); + const int rem = rest % header_size; + const int padding = rem ? header_size - rem : 0; + while( rest > 0 ) + { + const int rsize = ( rest >= bufsize ) ? bufsize : rest + padding; + const int ret = ar.read( buf, rsize ); + if( ret != 0 ) + { + read_error( ar ); + if( outfd >= 0 ) + { + if( cl_opts.keep_damaged ) + { writeblock( outfd, buf, std::min( rest, (long long)ar.e_size() ) ); + close( outfd ); } + else { close( outfd ); std::remove( filename ); } + } + if( ar.fatal() ) return ret; else return 0; + } + const int wsize = ( rest >= bufsize ) ? bufsize : rest; + if( outfd >= 0 && writeblock( outfd, buf, wsize ) != wsize ) + { show_file_error( filename, werr_msg, errno ); return 1; } + rest -= wsize; + } + } + if( outfd >= 0 && close( outfd ) != 0 ) + { show_file_error( filename, eclosf_msg, errno ); return 1; } + if( !islink ) + { + struct utimbuf t; + t.actime = extended.atime().sec(); + t.modtime = extended.mtime().sec(); + utime( filename, &t ); // ignore errors + } + return 0; + } + + +void format_file_diff( std::string & ostr, const char * const filename, + const char * const msg ) + { if( verbosity >= 0 ) + { ostr += filename; ostr += ": "; ostr += msg; ostr += '\n'; } } + + +bool option_C_present( const Arg_parser & parser ) + { + for( int i = 0; i < parser.arguments(); ++i ) + if( parser.code( i ) == 'C' ) return true; + return false; + } + + +bool option_C_after_filename( const Arg_parser & parser ) + { + for( int i = 0; i < parser.arguments(); ++i ) + if( nonempty_arg( parser, i ) ) + while( ++i < parser.arguments() ) + if( parser.code( i ) == 'C' ) return true; + return false; + } + +} // end namespace + + +mode_t get_umask() + { + static mode_t mask = 0; // read once, cache the result + static bool first_call = true; + if( first_call ) { first_call = false; mask = umask( 0 ); umask( mask ); + mask &= S_IRWXU | S_IRWXG | S_IRWXO; } + return mask; + } + + +bool compare_file_type( std::string & estr, std::string & ostr, + const Cl_options & cl_opts, + const Extended & extended, const Tar_header header ) + { + const char * const filename = extended.path().c_str(); + const Typeflag typeflag = (Typeflag)header[typeflag_o]; + struct stat st; + bool diff = false, size_differs = false, type_differs = true; + if( hstat( filename, &st, cl_opts.dereference ) != 0 ) + format_file_error( estr, filename, "warning: can't stat", errno ); + else if( ( typeflag == tf_regular || typeflag == tf_hiperf ) && + !S_ISREG( st.st_mode ) ) + format_file_diff( ostr, filename, "Is not a regular file" ); + else if( typeflag == tf_symlink && !S_ISLNK( st.st_mode ) ) + format_file_diff( ostr, filename, "Is not a symlink" ); + else if( typeflag == tf_chardev && !S_ISCHR( st.st_mode ) ) + format_file_diff( ostr, filename, "Is not a character device" ); + else if( typeflag == tf_blockdev && !S_ISBLK( st.st_mode ) ) + format_file_diff( ostr, filename, "Is not a block device" ); + else if( typeflag == tf_directory && !S_ISDIR( st.st_mode ) ) + format_file_diff( ostr, filename, "Is not a directory" ); + else if( typeflag == tf_fifo && !S_ISFIFO( st.st_mode ) ) + format_file_diff( ostr, filename, "Is not a FIFO" ); + else + { + type_differs = false; + if( typeflag != tf_symlink && !cl_opts.ignore_metadata ) + { + const mode_t mode = parse_octal( header + mode_o, mode_l ); // 12 bits + if( mode != ( st.st_mode & ( S_ISUID | S_ISGID | S_ISVTX | + S_IRWXU | S_IRWXG | S_IRWXO ) ) ) + { format_file_diff( ostr, filename, "Mode differs" ); diff = true; } + } + if( !cl_opts.ignore_ids && !cl_opts.ignore_metadata ) + { + if( extended.get_uid() != (long long)st.st_uid ) + { format_file_diff( ostr, filename, "Uid differs" ); diff = true; } + if( extended.get_gid() != (long long)st.st_gid ) + { format_file_diff( ostr, filename, "Gid differs" ); diff = true; } + } + if( typeflag != tf_symlink ) + { + if( typeflag != tf_directory && !cl_opts.ignore_metadata && + extended.mtime().sec() != (long long)st.st_mtime ) + { + if( (time_t)extended.mtime().sec() == st.st_mtime ) + { if( !cl_opts.ignore_overflow ) { diff = true; + format_file_diff( ostr, filename, "Mod time overflow" ); } } + else { diff = true; + format_file_diff( ostr, filename, "Mod time differs" ); } + } + if( ( typeflag == tf_regular || typeflag == tf_hiperf ) && + extended.file_size() != st.st_size ) // don't compare contents + { format_file_diff( ostr, filename, "Size differs" ); size_differs = true; } + if( ( typeflag == tf_chardev || typeflag == tf_blockdev ) && + ( parse_octal( header + devmajor_o, devmajor_l ) != + (unsigned)major( st.st_rdev ) || + parse_octal( header + devminor_o, devminor_l ) != + (unsigned)minor( st.st_rdev ) ) ) + { format_file_diff( ostr, filename, "Device number differs" ); diff = true; } + } + else + { + char * const buf = new char[st.st_size+1]; + long len = readlink( filename, buf, st.st_size ); + bool e = ( len != st.st_size ); + if( !e ) + { + while( len > 1 && buf[len-1] == '/' ) --len; // trailing '/' + buf[len] = 0; + if( extended.linkpath() != buf ) e = true; + } + delete[] buf; + if( e ) { format_file_diff( ostr, filename, "Symlink differs" ); diff = true; } + } + } + if( diff || size_differs || type_differs ) set_error_status( 1 ); + return !( size_differs || type_differs ); + } + + +bool compare_file_contents( std::string & estr, std::string & ostr, + Archive_reader_base & ar, const long long file_size, + const char * const filename, const int infd2 ) + { + long long rest = file_size; + const int rem = rest % header_size; + const int padding = rem ? header_size - rem : 0; + const int bufsize = 32 * header_size; + uint8_t buf1[bufsize]; + uint8_t buf2[bufsize]; + int retval = 0; + bool diff = false; + estr.clear(); ostr.clear(); + while( rest > 0 ) + { + const int rsize1 = ( rest >= bufsize ) ? bufsize : rest + padding; + const int rsize2 = ( rest >= bufsize ) ? bufsize : rest; + if( ( retval = ar.read( buf1, rsize1 ) ) != 0 ) { diff = true; break; } + if( !diff ) + { + const int rd = readblock( infd2, buf2, rsize2 ); + if( rd != rsize2 ) + { + if( errno ) format_file_error( estr, filename, "Read error", errno ); + else format_file_diff( ostr, filename, "EOF found in file" ); + diff = true; + } + else + { + int i = 0; while( i < rsize2 && buf1[i] == buf2[i] ) ++i; + if( i < rsize2 ) + { format_file_diff( ostr, filename, "Contents differ" ); diff = true; } + } + } + if( rest < bufsize ) break; + rest -= rsize1; + } + close( infd2 ); + if( diff ) set_error_status( 1 ); + return retval; + } + + +int decode( const Cl_options & cl_opts ) + { + if( !grbuf.size() ) { show_error( mem_msg ); return 1; } + // open archive before changing working directory + const Archive_descriptor ad( cl_opts.archive_name ); + if( ad.infd < 0 ) return 1; + + const bool c_present = option_C_present( cl_opts.parser ) && + cl_opts.program_mode != m_list; + const bool c_after_name = c_present && + option_C_after_filename( cl_opts.parser ); + // save current working directory for sequential decoding + const int chdir_fd = c_after_name ? open( ".", O_RDONLY | O_DIRECTORY ) : -1; + if( c_after_name && chdir_fd < 0 ) + { show_error( "Can't save current working directory", errno ); return 1; } + if( c_present && !c_after_name ) // execute all -C options + for( int i = 0; i < cl_opts.parser.arguments(); ++i ) + { + if( cl_opts.parser.code( i ) != 'C' ) continue; + const char * const dir = cl_opts.parser.argument( i ).c_str(); + if( chdir( dir ) != 0 ) + { show_file_error( dir, chdir_msg, errno ); return 1; } + } + /* Mark filenames to be compared, extracted or listed. + name_pending is of type char instead of bool to allow concurrent update. */ + std::vector< char > name_pending( cl_opts.parser.arguments(), false ); + for( int i = 0; i < cl_opts.parser.arguments(); ++i ) + if( nonempty_arg( cl_opts.parser, i ) && // skip opts, empty names + !Exclude::excluded( cl_opts.parser.argument( i ).c_str() ) ) + name_pending[i] = true; + + /* multi-threaded --list is faster even with 1 thread and 1 file in archive + but multi-threaded --diff and --extract probably need at least 2 of each. + CWD is not per-thread; multi-threaded decode can't be used if a + -C option appears after a file name in the command line. */ + if( cl_opts.num_workers > 0 && !c_after_name && ad.indexed && + ad.lzip_index.members() >= 2 ) // 2 lzip members may be 1 file + EOA + return decode_lz( cl_opts, ad, name_pending ); + + Archive_reader ar( ad ); // serial reader + Extended extended; // metadata from extended records + int retval = 0; + bool prev_extended = false; // prev header was extended + while( true ) // process one tar header per iteration + { + Tar_header header; + const int ret = ar.read( header, header_size ); + if( ret != 0 ) { read_error( ar ); if( ar.fatal() ) { retval = ret; break; } } + if( ret != 0 || !check_ustar_chksum( header ) ) // error or EOA + { + if( ret == 0 && block_is_zero( header, header_size ) ) // EOA + { + if( !prev_extended || cl_opts.permissive ) break; + show_file_error( ad.namep, fv_msg1 ); + retval = 2; break; + } + if( skip_warn() && verbosity >= 2 ) + std::fprintf( stderr, "ustar chksum = %07o\n", ustar_chksum( header ) ); + set_error_status( 2 ); continue; + } + skip_warn( true ); // reset warning + + const Typeflag typeflag = (Typeflag)header[typeflag_o]; + if( typeflag == tf_global ) + { + if( prev_extended && !cl_opts.permissive ) + { show_file_error( ad.namep, fv_msg2 ); retval = 2; break; } + Extended dummy; // global headers are parsed and ignored + const int ret = ar.parse_records( dummy, header, grbuf, gblrec_msg, true ); + if( ret != 0 ) + { show_file_error( ad.namep, ar.e_msg(), ar.e_code() ); + if( ar.fatal() ) { retval = ret; break; } + skip_warn(); set_error_status( ret ); } + continue; + } + if( typeflag == tf_extended ) + { + if( prev_extended && !cl_opts.permissive ) + { show_file_error( ad.namep, fv_msg3 ); retval = 2; break; } + const int ret = ar.parse_records( extended, header, grbuf, extrec_msg, + cl_opts.permissive ); + if( ret != 0 ) + { show_file_error( ad.namep, ar.e_msg(), ar.e_code() ); + if( ar.fatal() ) { retval = ret; break; } + skip_warn(); extended.reset(); set_error_status( ret ); } + else if( !extended.crc_present() && cl_opts.missing_crc ) + { show_file_error( ad.namep, miscrc_msg ); retval = 2; break; } + prev_extended = true; continue; + } + prev_extended = false; + + extended.fill_from_ustar( header ); // copy metadata from header + + try { + // members without name are skipped except when listing + if( check_skip_filename( cl_opts, name_pending, extended.path().c_str(), + chdir_fd ) ) retval = skip_member( ar, extended, typeflag ); + else + { + print_removed_prefix( extended.removed_prefix ); + if( cl_opts.program_mode == m_list ) + retval = list_member( ar, extended, header ); + else if( extended.path().empty() ) + retval = skip_member( ar, extended, typeflag ); + else if( cl_opts.program_mode == m_diff ) + retval = compare_member( cl_opts, ar, extended, header ); + else retval = extract_member( cl_opts, ar, extended, header ); + } + } + catch( Chdir_error & ) { retval = 1; } + extended.reset(); + if( retval ) + { show_error( "Error is not recoverable: exiting now." ); break; } + } + + if( close( ad.infd ) != 0 && retval == 0 ) + { show_file_error( ad.namep, eclosa_msg, errno ); retval = 1; } + + if( retval == 0 ) + for( int i = 0; i < cl_opts.parser.arguments(); ++i ) + if( nonempty_arg( cl_opts.parser, i ) && name_pending[i] ) + { show_file_error( cl_opts.parser.argument( i ).c_str(), nfound_msg ); + retval = 1; } + return final_exit_status( retval, cl_opts.program_mode != m_diff ); + } |