/* Tarlz - Archiver with multimember lzip compression Copyright (C) 2013-2019 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #define _FILE_OFFSET_BITS 64 #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(__GNU_LIBRARY__) #include // for makedev #endif #include #include "arg_parser.h" #include "lzip_index.h" #include "tarlz.h" namespace { Resizable_buffer grbuf( initial_line_length ); int gretval = 0; bool has_lz_ext; // global var for archive_read void skip_warn( const bool reset = false ) // avoid duplicate warnings { static bool skipping = false; if( reset ) skipping = false; else if( !skipping ) { skipping = true; show_error( "Skipping to next header." ); } } bool make_path( const std::string & name ) { const mode_t mode = S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH; unsigned end = name.size(); // first slash before last component while( end > 0 && name[end-1] == '/' ) --end; // remove trailing slashes while( end > 0 && name[end-1] != '/' ) --end; // remove last component while( end > 0 && name[end-1] == '/' ) --end; // remove more slashes unsigned index = 0; while( index < end ) { while( index < end && name[index] == '/' ) ++index; unsigned first = index; while( index < end && name[index] != '/' ) ++index; if( first < index ) { const std::string partial( name, 0, index ); struct stat st; if( stat( partial.c_str(), &st ) == 0 ) { if( !S_ISDIR( st.st_mode ) ) return false; } else if( mkdir( partial.c_str(), mode ) != 0 ) return false; } } return true; } // Return value: 0 = OK, 1 = damaged member, 2 = fatal error. // If sizep and error, return in *sizep the number of bytes read. // The first 6 bytes of the archive must be intact for islz to be meaningful. int archive_read( const int infd, uint8_t * const buf, const int size, int * const sizep = 0 ) { static LZ_Decoder * decoder = 0; static bool at_eof = false; static bool fatal = false; static bool first_call = true; if( sizep ) *sizep = 0; if( fatal ) return 2; if( first_call ) // check format { first_call = false; if( size != header_size ) internal_error( "size != header_size on first call." ); const int rd = readblock( infd, buf, size ); if( sizep ) *sizep = rd; if( rd != size && errno ) { show_error( "Error reading archive", errno ); fatal = true; return 2; } const Lzip_header & header = (*(const Lzip_header *)buf); bool islz = ( rd >= min_member_size && header.verify_magic() && header.verify_version() && isvalid_ds( header.dictionary_size() ) ); const bool istar = ( rd == size && verify_ustar_chksum( buf ) ); const bool iseof = ( !islz && !istar && rd == size && block_is_zero( buf, size ) ); if( !islz && !istar && !iseof ) // corrupt or invalid format { show_error( "This does not look like a POSIX tar archive." ); if( has_lz_ext ) islz = true; if( verbosity >= 2 && !islz && rd == size ) std::fprintf( stderr, "ustar chksum = %07o\n", ustar_chksum( buf ) ); if( !islz ) return 1; } if( !islz ) // uncompressed { if( rd == size ) return 0; fatal = true; return 2; } decoder = LZ_decompress_open(); // compressed if( !decoder || LZ_decompress_errno( decoder ) != LZ_ok ) { show_error( "Not enough memory." ); LZ_decompress_close( decoder ); fatal = true; return 2; } if( LZ_decompress_write( decoder, buf, rd ) != rd ) internal_error( "library error (LZ_decompress_write)." ); const int res = archive_read( infd, buf, size, sizep ); if( res != 0 ) { if( res == 2 ) fatal = true; return res; } if( verify_ustar_chksum( buf ) || block_is_zero( buf, size ) ) return 0; show_error( "This does not look like a POSIX tar.lz archive." ); fatal = true; return 2; } if( !decoder ) // uncompressed { const int rd = readblock( infd, buf, size ); if( rd == size ) return 0; if( sizep ) *sizep = rd; show_error( "Archive ends unexpectedly." ); fatal = true; return 2; } const int ibuf_size = 16384; uint8_t ibuf[ibuf_size]; int sz = 0; while( sz < size ) { const int rd = LZ_decompress_read( decoder, buf + sz, size - sz ); if( rd < 0 ) { if( LZ_decompress_sync_to_member( decoder ) < 0 ) internal_error( "library error (LZ_decompress_sync_to_member)." ); skip_warn(); gretval = 2; return 1; } if( rd == 0 && LZ_decompress_finished( decoder ) == 1 ) { LZ_decompress_close( decoder ); show_error( "Archive ends unexpectedly." ); fatal = true; return 2; } sz += rd; if( sizep ) *sizep = sz; if( sz == size && LZ_decompress_finished( decoder ) == 1 && LZ_decompress_close( decoder ) < 0 ) { show_error( "LZ_decompress_close failed." ); fatal = true; return 2; } if( sz < size && !at_eof && LZ_decompress_write_size( decoder ) > 0 ) { const int rsize = std::min( ibuf_size, LZ_decompress_write_size( decoder ) ); const int rd = readblock( infd, ibuf, rsize ); if( LZ_decompress_write( decoder, ibuf, rd ) != rd ) internal_error( "library error (LZ_decompress_write)." ); if( rd < rsize ) { at_eof = true; LZ_decompress_finish( decoder ); if( errno ) { show_error( "Error reading archive", errno ); fatal = true; return 2; } } } } return 0; } enum { mode_string_size = 10, group_string_size = 1 + uname_l + 1 + gname_l + 1 }; // 67 void format_mode_string( const Tar_header header, char buf[mode_string_size] ) { const Typeflag typeflag = (Typeflag)header[typeflag_o]; std::memcpy( buf, "----------", mode_string_size ); switch( typeflag ) { case tf_regular: break; case tf_link: buf[0] = 'h'; break; case tf_symlink: buf[0] = 'l'; break; case tf_chardev: buf[0] = 'c'; break; case tf_blockdev: buf[0] = 'b'; break; case tf_directory: buf[0] = 'd'; break; case tf_fifo: buf[0] = 'p'; break; case tf_hiperf: buf[0] = 'C'; break; default: buf[0] = '?'; } const mode_t mode = parse_octal( header + mode_o, mode_l ); // 12 bits const bool setuid = mode & S_ISUID; const bool setgid = mode & S_ISGID; const bool sticky = mode & S_ISVTX; if( mode & S_IRUSR ) buf[1] = 'r'; if( mode & S_IWUSR ) buf[2] = 'w'; if( mode & S_IXUSR ) buf[3] = setuid ? 's' : 'x'; else if( setuid ) buf[3] = 'S'; if( mode & S_IRGRP ) buf[4] = 'r'; if( mode & S_IWGRP ) buf[5] = 'w'; if( mode & S_IXGRP ) buf[6] = setgid ? 's' : 'x'; else if( setgid ) buf[6] = 'S'; if( mode & S_IROTH ) buf[7] = 'r'; if( mode & S_IWOTH ) buf[8] = 'w'; if( mode & S_IXOTH ) buf[9] = sticky ? 't' : 'x'; else if( sticky ) buf[9] = 'T'; } int format_user_group_string( const Tar_header header, char buf[group_string_size] ) { int len; if( header[uname_o] && header[gname_o] ) len = snprintf( buf, group_string_size, " %.32s/%.32s", header + uname_o, header + gname_o ); else { const unsigned uid = parse_octal( header + uid_o, uid_l ); const unsigned gid = parse_octal( header + gid_o, gid_l ); len = snprintf( buf, group_string_size, " %u/%u", uid, gid ); } return len; } } // end namespace bool block_is_zero( const uint8_t * const buf, const int size ) { for( int i = 0; i < size; ++i ) if( buf[i] != 0 ) return false; return true; } void format_member_name( const Extended & extended, const Tar_header header, Resizable_buffer & rbuf, const bool long_format ) { if( long_format ) { format_mode_string( header, rbuf() ); const int group_string_len = format_user_group_string( header, rbuf() + mode_string_size ); const int offset = mode_string_size + group_string_len; const time_t mtime = parse_octal( header + mtime_o, mtime_l ); // 33 bits struct tm tms; const struct tm * tm = localtime_r( &mtime, &tms ); if( !tm ) { time_t z = 0; tm = localtime_r( &z, &tms ); if( !tm ) tm = &tms; } const Typeflag typeflag = (Typeflag)header[typeflag_o]; const bool islink = ( typeflag == tf_link || typeflag == tf_symlink ); const char * const link_string = !islink ? "" : ( ( typeflag == tf_link ) ? " link to " : " -> " ); for( int i = 0; i < 2; ++i ) { const int len = snprintf( rbuf() + offset, rbuf.size() - offset, " %9llu %4d-%02u-%02u %02u:%02u %s%s%s\n", extended.file_size(), 1900 + tm->tm_year, 1 + tm->tm_mon, tm->tm_mday, tm->tm_hour, tm->tm_min, extended.path().c_str(), link_string, !islink ? "" : extended.linkpath().c_str() ); if( (int)rbuf.size() > len + offset ) break; else rbuf.resize( len + offset + 1 ); } } else { if( rbuf.size() < extended.path().size() + 2 ) rbuf.resize( extended.path().size() + 2 ); snprintf( rbuf(), rbuf.size(), "%s\n", extended.path().c_str() ); } } namespace { void show_member_name( const Extended & extended, const Tar_header header, const int vlevel, Resizable_buffer & rbuf ) { if( verbosity < vlevel ) return; format_member_name( extended, header, rbuf, verbosity > vlevel ); std::fputs( rbuf(), stdout ); std::fflush( stdout ); } int list_member( const int infd, const Extended & extended, const Tar_header header, const bool skip ) { if( !skip ) show_member_name( extended, header, 0, grbuf ); const unsigned bufsize = 32 * header_size; uint8_t buf[bufsize]; unsigned long long rest = extended.file_size(); const int rem = rest % header_size; const int padding = rem ? header_size - rem : 0; while( rest > 0 ) { const int rsize = ( rest >= bufsize ) ? bufsize : rest + padding; const int ret = archive_read( infd, buf, rsize ); if( ret != 0 ) { if( ret == 2 ) return 2; else break; } if( rest < bufsize ) break; rest -= rsize; } return 0; } bool contains_dotdot( const char * const filename ) { for( int i = 0; filename[i]; ++i ) if( filename[i] == '.' && filename[i+1] == '.' && ( i == 0 || filename[i-1] == '/' ) && ( filename[i+2] == 0 || filename[i+2] == '/' ) ) return true; return false; } int extract_member( const int infd, const Extended & extended, const Tar_header header, const bool keep_damaged ) { const char * const filename = extended.path().c_str(); if( contains_dotdot( filename ) ) { show_file_error( filename, "Contains a '..' component, skipping." ); return list_member( infd, extended, header, true ); } const mode_t mode = parse_octal( header + mode_o, mode_l ); // 12 bits const time_t mtime = parse_octal( header + mtime_o, mtime_l ); // 33 bits const Typeflag typeflag = (Typeflag)header[typeflag_o]; const bool islink = ( typeflag == tf_link || typeflag == tf_symlink ); int outfd = -1; show_member_name( extended, header, 1, grbuf ); std::remove( filename ); make_path( filename ); switch( typeflag ) { case tf_regular: case tf_hiperf: outfd = open_outstream( filename ); if( outfd < 0 ) return 2; chmod( filename, mode ); // ignore errors break; case tf_link: case tf_symlink: { const char * const linkname = extended.linkpath().c_str(); /* if( contains_dotdot( linkname ) ) { show_file_error( filename, "Link destination contains a '..' component, skipping." ); return list_member( infd, extended, header, false ); }*/ const bool hard = typeflag == tf_link; if( ( hard && link( linkname, filename ) != 0 ) || ( !hard && symlink( linkname, filename ) != 0 ) ) { if( verbosity >= 0 ) std::fprintf( stderr, "Can't %slink file '%s' to '%s': %s.\n", hard ? "" : "sym", linkname, filename, std::strerror( errno ) ); return 2; } } break; case tf_directory: if( mkdir( filename, mode ) != 0 && errno != EEXIST ) { show_file_error( filename, "Can't create directory", errno ); return 2; } break; case tf_chardev: case tf_blockdev: { const unsigned dev = makedev( parse_octal( header + devmajor_o, devmajor_l ), parse_octal( header + devminor_o, devminor_l ) ); const int dmode = ( typeflag == tf_chardev ? S_IFCHR : S_IFBLK ) | mode; if( mknod( filename, dmode, dev ) != 0 ) { show_file_error( filename, "Can't create device node", errno ); return 2; } break; } case tf_fifo: if( mkfifo( filename, mode ) != 0 && errno != EEXIST ) { show_file_error( filename, "Can't create FIFO file", errno ); return 2; } break; default: if( verbosity >= 0 ) std::fprintf( stderr, "File type '%c' not supported for file '%s'.\n", typeflag, filename ); return 2; } const uid_t uid = (uid_t)parse_octal( header + uid_o, uid_l ); const gid_t gid = (gid_t)parse_octal( header + gid_o, gid_l ); if( !islink && chown( filename, uid, gid ) != 0 && errno != EPERM && errno != EINVAL ) { show_file_error( filename, "Can't change file owner", errno ); return 2; } const unsigned bufsize = 32 * header_size; uint8_t buf[bufsize]; unsigned long long rest = extended.file_size(); const int rem = rest % header_size; const int padding = rem ? header_size - rem : 0; while( rest > 0 ) { const int rsize = ( rest >= bufsize ) ? bufsize : rest + padding; int rd; const int ret = archive_read( infd, buf, rsize, &rd ); if( ret != 0 ) { if( outfd >= 0 ) { if( keep_damaged ) { writeblock( outfd, buf, std::min( rest, (unsigned long long)rd ) ); close( outfd ); } else { close( outfd ); std::remove( filename ); } } if( ret == 2 ) return 2; else return 0; } const int wsize = ( rest >= bufsize ) ? bufsize : rest; if( outfd >= 0 && writeblock( outfd, buf, wsize ) != wsize ) { show_file_error( filename, "Error writing file", errno ); return 2; } rest -= wsize; } if( outfd >= 0 && close( outfd ) != 0 ) { show_file_error( filename, "Error closing file", errno ); return 2; } if( !islink ) { struct utimbuf t; t.actime = mtime; t.modtime = mtime; utime( filename, &t ); // ignore errors } return 0; } } // end namespace // Removes any amount of leading "./" and '/' strings. const char * remove_leading_slash( const char * const filename ) { static bool first_post = true; const char * p = filename; while( *p == '/' || ( *p == '.' && p[1] == '/' ) ) ++p; if( p != filename && first_post ) { first_post = false; std::string msg( "Removing leading '" ); msg.append( filename, p - filename ); msg += "' from member names."; show_error( msg.c_str() ); } if( *p == 0 ) p = "."; return p; } // return true if dir is a parent directory of name bool compare_prefix_dir( const char * const dir, const char * const name ) { int len = 0; while( dir[len] && dir[len] == name[len] ) ++len; return ( !dir[len] && len > 0 && ( dir[len-1] == '/' || name[len] == '/' ) ); } // compare two file names ignoring trailing slashes bool compare_tslash( const char * const name1, const char * const name2 ) { const char * p = name1; const char * q = name2; while( *p && *p == *q ) { ++p; ++q; } while( *p == '/' ) ++p; while( *q == '/' ) ++q; return ( !*p && !*q ); } namespace { bool parse_records( const int infd, Extended & extended, const Tar_header header, const bool permissive ) { const unsigned long long edsize = parse_octal( header + size_o, size_l ); const unsigned long long bufsize = round_up( edsize ); if( bufsize == 0 || edsize == 0 || edsize >= 1ULL << 33 ) return false; // overflow or no extended data char * const buf = new char[bufsize]; // extended records buffer const bool ret = ( archive_read( infd, (uint8_t *)buf, bufsize ) == 0 && extended.parse( buf, edsize, permissive ) ); delete[] buf; return ret; } } // end namespace /* Returns the number of bytes really read. If (returned value < size) and (errno == 0), means EOF was reached. */ int readblock( const int fd, uint8_t * const buf, const int size ) { int sz = 0; errno = 0; while( sz < size ) { const int n = read( fd, buf + sz, size - sz ); if( n > 0 ) sz += n; else if( n == 0 ) break; // EOF else if( errno != EINTR ) break; errno = 0; } return sz; } /* Returns the number of bytes really written. If (returned value < size), it is always an error. */ int writeblock( const int fd, const uint8_t * const buf, const int size ) { int sz = 0; errno = 0; while( sz < size ) { const int n = write( fd, buf + sz, size - sz ); if( n > 0 ) sz += n; else if( n < 0 && errno != EINTR ) break; errno = 0; } return sz; } unsigned long long parse_octal( const uint8_t * const ptr, const int size ) { unsigned long long result = 0; int i = 0; while( i < size && std::isspace( ptr[i] ) ) ++i; for( ; i < size && ptr[i] >= '0' && ptr[i] <= '7'; ++i ) { result <<= 3; result += ptr[i] - '0'; } return result; } int decode( const std::string & archive_name, const Arg_parser & parser, const int filenames, const int num_workers, const int debug_level, const bool keep_damaged, const bool listing, const bool missing_crc, const bool permissive ) { const int infd = archive_name.size() ? open_instream( archive_name ) : STDIN_FILENO; if( infd < 0 ) return 1; // Execute -C options and mark filenames to be extracted or listed. // name_pending is of type char instead of bool to allow concurrent update. std::vector< char > name_pending( parser.arguments(), false ); for( int i = 0; i < parser.arguments(); ++i ) { const int code = parser.code( i ); if( code == 'C' && !listing ) { const char * const dir = parser.argument( i ).c_str(); if( chdir( dir ) != 0 ) { show_file_error( dir, "Error changing working directory", errno ); return 1; } } if( !code ) name_pending[i] = true; } if( listing && num_workers > 0 ) // multi-threaded --list { const Lzip_index lzip_index( infd, true, false ); const long members = lzip_index.members(); if( lzip_index.retval() == 0 && ( members >= 3 || ( members >= 2 && lzip_index.dblock( members - 1 ).size() > 1024 ) ) ) { //show_file_error( archive_name.c_str(), "Is compressed seekable" ); return list_lz( parser, name_pending, lzip_index, filenames, debug_level, infd, std::min( (long)num_workers, members ), missing_crc, permissive ); } lseek( infd, 0, SEEK_SET ); } has_lz_ext = // global var for archive_read ( archive_name.size() > 3 && archive_name.compare( archive_name.size() - 3, 3, ".lz" ) == 0 ) || ( archive_name.size() > 4 && archive_name.compare( archive_name.size() - 4, 4, ".tlz" ) == 0 ); Extended extended; // metadata from extended records int retval = 0; bool prev_extended = false; // prev header was extended while( true ) // process one tar member per iteration { Tar_header header; const int ret = archive_read( infd, header, header_size ); if( ret == 2 ) return 2; if( ret != 0 || !verify_ustar_chksum( header ) ) { if( ret == 0 && block_is_zero( header, header_size ) ) break; // EOF skip_warn(); gretval = 2; continue; } skip_warn( true ); // reset warning const Typeflag typeflag = (Typeflag)header[typeflag_o]; if( typeflag == tf_global ) { if( prev_extended ) { show_error( "Format violation: global header after extended header." ); return 2; } Extended dummy; // global headers are parsed and ignored if( !parse_records( infd, dummy, header, true ) ) { show_error( "Error in global extended records. Skipping to next header." ); gretval = 2; } continue; } if( typeflag == tf_extended ) { if( prev_extended && !permissive ) { show_error( "Format violation: consecutive extended headers found." /*" Use --permissive.", 0, true*/ ); return 2; } if( !parse_records( infd, extended, header, permissive ) ) { show_error( "Error in extended records. Skipping to next header." ); extended.reset(); gretval = 2; } else if( !extended.crc_present() && missing_crc ) { show_error( "Missing CRC in extended records.", 0, true ); return 2; } prev_extended = true; continue; } prev_extended = false; if( extended.linkpath().empty() ) // copy linkpath from ustar header { int len = 0; while( len < linkname_l && header[linkname_o+len] ) ++len; while( len > 1 && header[linkname_o+len-1] == '/' ) --len; // trailing '/' if( len > 0 ) { const uint8_t c = header[linkname_o+len]; header[linkname_o+len] = 0; extended.linkpath( (const char *)header + linkname_o ); header[linkname_o+len] = c; } } if( extended.path().empty() ) // copy path from ustar header { char stored_name[prefix_l+1+name_l+1]; int len = 0; while( len < prefix_l && header[prefix_o+len] ) { stored_name[len] = header[prefix_o+len]; ++len; } if( len && header[name_o] ) stored_name[len++] = '/'; for( int i = 0; i < name_l && header[name_o+i]; ++i ) { stored_name[len] = header[name_o+i]; ++len; } while( len > 0 && stored_name[len-1] == '/' ) --len; // trailing '/' stored_name[len] = 0; extended.path( remove_leading_slash( stored_name ) ); } const char * const filename = extended.path().c_str(); bool skip = filenames > 0; if( skip ) for( int i = 0; i < parser.arguments(); ++i ) if( parser.code( i ) == 0 ) { const char * const name = remove_leading_slash( parser.argument( i ).c_str() ); if( compare_prefix_dir( name, filename ) || compare_tslash( name, filename ) ) { skip = false; name_pending[i] = false; break; } } if( extended.file_size() == 0 && ( typeflag == tf_regular || typeflag == tf_hiperf ) ) extended.file_size( parse_octal( header + size_o, size_l ) ); if( listing || skip ) retval = list_member( infd, extended, header, skip ); else retval = extract_member( infd, extended, header, keep_damaged ); extended.reset(); if( retval ) { show_error( "Error is not recoverable: exiting now." ); return retval; } } for( int i = 0; i < parser.arguments(); ++i ) if( parser.code( i ) == 0 && name_pending[i] ) { show_file_error( parser.argument( i ).c_str(), "Not found in archive." ); if( gretval < 1 ) gretval = 1; } if( !retval && gretval ) { show_error( "Exiting with failure status due to previous errors." ); retval = gretval; } return retval; }