diff options
Diffstat (limited to '')
-rw-r--r-- | extract.cc | 561 |
1 files changed, 561 insertions, 0 deletions
diff --git a/extract.cc b/extract.cc new file mode 100644 index 0000000..67f4a20 --- /dev/null +++ b/extract.cc @@ -0,0 +1,561 @@ +/* Tarlz - Archiver with multimember lzip compression + Copyright (C) 2013-2018 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#define _FILE_OFFSET_BITS 64 + +#include <algorithm> +#include <cerrno> +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <string> +#include <vector> +#include <stdint.h> +#include <unistd.h> +#include <utime.h> +#include <sys/stat.h> +#include <lzlib.h> + +#include "arg_parser.h" +#include "lzip.h" +#include "tarlz.h" + + +namespace { + +int gretval = 0; + +bool make_path( const std::string & name ) + { + const mode_t mode = S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH; + unsigned end = name.size(); // first slash before last component + + while( end > 0 && name[end-1] == '/' ) --end; // remove trailing slashes + while( end > 0 && name[end-1] != '/' ) --end; // remove last component + while( end > 0 && name[end-1] == '/' ) --end; // remove more slashes + + unsigned index = 0; + while( index < end ) + { + while( index < end && name[index] == '/' ) ++index; + unsigned first = index; + while( index < end && name[index] != '/' ) ++index; + if( first < index ) + { + const std::string partial( name, 0, index ); + struct stat st; + if( stat( partial.c_str(), &st ) == 0 ) + { if( !S_ISDIR( st.st_mode ) ) return false; } + else if( mkdir( partial.c_str(), mode ) != 0 ) + return false; + } + } + return true; + } + + +// Returns in buf the first rd bytes of the second lzip member or +// the first 512 bytes of the second tar member, and sets islz if lzip member +bool skip_first_member( const int infd, uint8_t * const buf, + int & rd, bool & islz ) + { + while( true ) + { + for( int i = 0; i < rd; ++i ) + if( buf[i] == 'L' && (*(Lzip_header *)( buf + i )).verify_prefix( rd - i ) ) + { + const int ts = rd - i; // tail size + std::memmove( buf, buf + i, ts ); + if( ts >= (int)sizeof lzip_magic ) + { rd = ts; islz = true; return true; } + int rd2 = readblock( infd, buf + ts, header_size - ts ); + if( rd2 != header_size - ts && errno ) + { show_error( "Error reading archive", errno ); return false; } + if( ts + rd2 >= min_member_size && + (*(Lzip_header *)buf).verify_magic() ) + { rd = ts + rd2; islz = true; return true; } + std::memmove( buf, buf + ts, rd2 ); + int rd3 = readblock( infd, buf + rd2, header_size - rd2 ); + if( rd3 != header_size - rd2 && errno ) + { show_error( "Error reading archive", errno ); return false; } + rd = rd2 + rd3; i = -1; + } + if( rd < header_size ) return false; // eof + if( rd == header_size && verify_ustar_chksum( buf ) ) + { islz = false; return true; } + rd = readblock( infd, buf, header_size ); + if( rd != header_size && errno ) + { show_error( "Error reading archive", errno ); return false; } + } + } + + +inline bool block_is_zero( const uint8_t * const buf, const int size ) + { + for( int i = 0; i < size; ++i ) if( buf[i] != 0 ) return false; + return true; + } + + +bool archive_read( const int infd, uint8_t * const buf, const int size ) + { + static LZ_Decoder * decoder = 0; + static bool first_call = true; + static bool at_eof = false; + + if( first_call ) // check format + { + first_call = false; + if( size != header_size ) + internal_error( "size != header_size on first call." ); + int rd = readblock( infd, buf, size ); + if( rd != size && errno ) + { show_error( "Error reading archive", errno ); return false; } + bool islz = + ( rd >= min_member_size && (*(Lzip_header *)buf).verify_magic() ); + const bool istar = ( rd == size && verify_ustar_chksum( buf ) ); + const bool iseof = + ( !islz && !istar && rd == size && block_is_zero( buf, size ) ); + if( !islz && !istar && !iseof ) + { + show_error( "This does not look like a tar archive." ); + show_error( "Skipping to next header." ); +// std::fprintf( stderr, "%07o\n", ustar_chksum( buf ) ); + gretval = 2; + if( !skip_first_member( infd, buf, rd, islz ) ) return false; + } + if( !islz ) return true; // uncompressed + decoder = LZ_decompress_open(); // compressed + if( !decoder || LZ_decompress_errno( decoder ) != LZ_ok ) + { show_error( "Not enough memory." ); + LZ_decompress_close( decoder ); return false; } + if( LZ_decompress_write( decoder, buf, rd ) != rd ) + internal_error( "library error (LZ_decompress_write)." ); + if( !archive_read( infd, buf, size ) ) return false; + if( verify_ustar_chksum( buf ) || block_is_zero( buf, size ) ) return true; + show_error( "This does not look like a tar archive." ); + show_error( "Skipping to next header." ); + gretval = 2; + if( LZ_decompress_sync_to_member( decoder ) < 0 ) + internal_error( "library error (LZ_decompress_sync_to_member)." ); + } + + if( !decoder ) // uncompressed + { if( readblock( infd, buf, size ) == size ) return true; + show_error( "Archive ends unexpectedly." ); return false; } + const int ibuf_size = 16384; + uint8_t ibuf[ibuf_size]; + int sz = 0; + while( sz < size ) + { + if( !at_eof && LZ_decompress_write_size( decoder ) > 0 ) + { + const int rsize = std::min( ibuf_size, LZ_decompress_write_size( decoder ) ); + const int rd = readblock( infd, ibuf, rsize ); + if( LZ_decompress_write( decoder, ibuf, rd ) != rd ) + internal_error( "library error (LZ_decompress_write)." ); + if( rd < rsize ) + { + at_eof = true; LZ_decompress_finish( decoder ); + if( errno ) + { show_error( "Error reading archive", errno ); return false; } + } + } + const int rd = LZ_decompress_read( decoder, buf + sz, size - sz ); + if( rd < 0 ) + { + show_error( "Skipping to next header." ); + gretval = 2; + if( LZ_decompress_sync_to_member( decoder ) < 0 ) + internal_error( "library error (LZ_decompress_sync_to_member)." ); + continue; + } + if( rd == 0 && LZ_decompress_finished( decoder ) == 1 ) + { LZ_decompress_close( decoder ); + show_error( "Archive ends unexpectedly." ); return false; } + sz += rd; + if( sz == size && LZ_decompress_finished( decoder ) == 1 && + LZ_decompress_close( decoder ) < 0 ) + { show_error( "LZ_decompress_close failed." ); return false; } + } + return true; + } + + +const char * mode_string( const Tar_header header ) + { + static char buf[11]; + const Typeflag typeflag = (Typeflag)header[typeflag_o]; + + std::memcpy( buf, "----------", sizeof buf - 1 ); + switch( typeflag ) + { + case tf_regular: break; + case tf_link: buf[0] = 'h'; break; + case tf_symlink: buf[0] = 'l'; break; + case tf_chardev: buf[0] = 'c'; break; + case tf_blockdev: buf[0] = 'b'; break; + case tf_directory: buf[0] = 'd'; break; + case tf_fifo: buf[0] = 'p'; break; + case tf_hiperf: buf[0] = 'C'; break; + default: buf[0] = '?'; + } + const mode_t mode = strtoul( header + mode_o, 0, 8 ); // 12 bits + const bool setuid = mode & S_ISUID; + const bool setgid = mode & S_ISGID; + const bool sticky = mode & S_ISVTX; + if( mode & S_IRUSR ) buf[1] = 'r'; + if( mode & S_IWUSR ) buf[2] = 'w'; + if( mode & S_IXUSR ) buf[3] = setuid ? 's' : 'x'; + else if( setuid ) buf[3] = 'S'; + if( mode & S_IRGRP ) buf[4] = 'r'; + if( mode & S_IWGRP ) buf[5] = 'w'; + if( mode & S_IXGRP ) buf[6] = setgid ? 's' : 'x'; + else if( setgid ) buf[6] = 'S'; + if( mode & S_IROTH ) buf[7] = 'r'; + if( mode & S_IWOTH ) buf[8] = 'w'; + if( mode & S_IXOTH ) buf[9] = sticky ? 't' : 'x'; + else if( sticky ) buf[9] = 'T'; + return buf; + } + + +const char * user_group_string( const Tar_header header ) + { + enum { bufsize = uname_l + 1 + gname_l + 1 }; + static char buf[bufsize]; + + if( header[uname_o] && header[gname_o] ) + snprintf( buf, bufsize, "%.32s/%.32s", header + uname_o, header + gname_o ); + else + { + const int uid = strtoul( header + uid_o, 0, 8 ); + const int gid = strtoul( header + gid_o, 0, 8 ); + snprintf( buf, bufsize, "%u/%u", uid, gid ); + } + return buf; + } + + +const char * link_string( const Tar_header header ) + { + enum { bufsize = 9 + linkname_l + 1 }; + static char buf[bufsize]; + const Typeflag typeflag = (Typeflag)header[typeflag_o]; + + if( typeflag == tf_link ) + snprintf( buf, bufsize, " link to %.100s", header + linkname_o ); + else if( typeflag == tf_symlink ) + snprintf( buf, bufsize, " -> %.100s", header + linkname_o ); + else buf[0] = 0; + return buf; + } + + +void show_member_name( const char * const filename, const Tar_header header, + const int vlevel ) + { + if( verbosity < vlevel ) return; + if( verbosity > vlevel ) + { + const time_t mtime = strtoull( header + mtime_o, 0, 8 ); // 33 bits + struct tm * tm = localtime( &mtime ); + std::printf( "%s %s %9llu %4d-%02u-%02u %02u:%02u %s%s\n", + mode_string( header ), user_group_string( header ), + strtoull( header + size_o, 0, 8 ), 1900 + tm->tm_year, + 1 + tm->tm_mon, tm->tm_mday, tm->tm_hour, tm->tm_min, + filename, link_string( header ) ); + } + else std::printf( "%s\n", filename ); + std::fflush( stdout ); + } + + +int list_member( const int infd, const char * const filename, + const unsigned long long file_size, const Tar_header header, + const bool skip ) + { + if( !skip ) show_member_name( filename, header, 0 ); + + const unsigned bufsize = 32 * header_size; + uint8_t buf[bufsize]; + unsigned long long rest = file_size; + const int rem = file_size % header_size; + const int padding = rem ? header_size - rem : 0; + while( rest > 0 ) + { + const int rsize = ( rest >= bufsize ) ? bufsize : rest + padding; + if( !archive_read( infd, buf, rsize ) ) return 2; + if( rest < bufsize ) break; + rest -= rsize; + } + return 0; + } + + +bool contains_dotdot( const char * const filename ) + { + for( int i = 0; filename[i]; ++i ) + if( filename[i] == '.' && filename[i+1] == '.' && + ( i == 0 || filename[i-1] == '/' ) && + ( filename[i+2] == 0 || filename[i+2] == '/' ) ) return true; + return false; + } + + +int extract_member( const int infd, const char * const filename, + const unsigned long long file_size, const Tar_header header ) + { + if( contains_dotdot( filename ) ) + { + show_file_error( filename, "Contains a '..' component, skipping." ); + return list_member( infd, filename, file_size, header, true ); + } + const mode_t mode = strtoul( header + mode_o, 0, 8 ); // 12 bits + const time_t mtime = strtoull( header + mtime_o, 0, 8 ); // 33 bits + const Typeflag typeflag = (Typeflag)header[typeflag_o]; + const bool islink = ( typeflag == tf_link || typeflag == tf_symlink ); + int outfd = -1; + + show_member_name( filename, header, 1 ); + std::remove( filename ); + make_path( filename ); + switch( typeflag ) + { + case tf_regular: + case tf_hiperf: + outfd = open_outstream( filename ); + if( outfd < 0 ) return 2; + chmod( filename, mode ); // ignore errors + break; + case tf_link: + case tf_symlink: + { + char linkname[linkname_l+1]; + std::memcpy( linkname, header + linkname_o, linkname_l ); + linkname[linkname_l] = 0; +/* if( contains_dotdot( linkname ) ) + { + show_file_error( filename, + "Link destination contains a '..' component, skipping." ); + return list_member( infd, filename, file_size, header, false ); + }*/ + const bool hard = typeflag == tf_link; + if( ( hard && link( linkname, filename ) != 0 ) || + ( !hard && symlink( linkname, filename ) != 0 ) ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "Can't %slink file '%s' to '%s': %s.\n", + hard ? "" : "sym", linkname, filename, + std::strerror( errno ) ); + return 2; + } + } break; + case tf_directory: + if( mkdir( filename, mode ) != 0 && errno != EEXIST ) + { + show_file_error( filename, "Can't create directory", errno ); + return 2; + } + break; + case tf_chardev: + case tf_blockdev: + { + const unsigned dev = makedev( strtoul( header + devmajor_o, 0, 8 ), + strtoul( header + devminor_o, 0, 8 ) ); + const int dmode = ( typeflag == tf_chardev ? S_IFCHR : S_IFBLK ) | mode; + if( mknod( filename, dmode, dev ) != 0 ) + { + show_file_error( filename, "Can't create device node", errno ); + return 2; + } + break; + } + case tf_fifo: + if( mkfifo( filename, mode ) != 0 && errno != EEXIST ) + { + show_file_error( filename, "Can't create FIFO file", errno ); + return 2; + } + break; + default: + if( verbosity >= 0 ) + std::fprintf( stderr, "File type '%c' not supported for file '%s'.\n", + typeflag, filename ); + return 2; + } + + const uid_t uid = (uid_t)strtoul( header + uid_o, 0, 8 ); + const gid_t gid = (gid_t)strtoul( header + gid_o, 0, 8 ); + if( !islink && chown( filename, uid, gid ) != 0 && + errno != EPERM && errno != EINVAL ) + { + show_file_error( filename, "Can't change file owner", errno ); + return 2; + } + + const unsigned bufsize = 32 * header_size; + uint8_t buf[bufsize]; + unsigned long long rest = file_size; + const int rem = file_size % header_size; + const int padding = rem ? header_size - rem : 0; + while( rest > 0 ) + { + const int rsize = ( rest >= bufsize ) ? bufsize : rest + padding; + if( !archive_read( infd, buf, rsize ) ) + { if( outfd >= 0 ) { close( outfd ); std::remove( filename ); } + return 2; } + const int wsize = ( rest >= bufsize ) ? bufsize : rest; + if( outfd >= 0 && writeblock( outfd, buf, wsize ) != wsize ) + { show_file_error( filename, "Error writing file", errno ); return 2; } + rest -= wsize; + } + if( outfd >= 0 && close( outfd ) != 0 ) + { show_file_error( filename, "Error closing file", errno ); return 2; } + if( !islink ) + { + struct utimbuf t; + t.actime = mtime; + t.modtime = mtime; + utime( filename, &t ); // ignore errors + } + return 0; + } + + +const char * remove_leading_slash( const char * const filename ) + { + static bool first_post = true; + const char * p = filename; + + while( *p == '/' || ( *p == '.' && p[1] == '/' ) ) ++p; + if( p != filename && first_post ) + { + first_post = false; + std::string msg( "Removing leading '" ); + msg.append( filename, p - filename ); + msg += "' from member names."; + show_error( msg.c_str() ); + } + if( *p == 0 ) p = "."; + return p; + } + + +// return true if dir is a parent directory of name +bool compare_prefix_dir( const char * const dir, const char * const name ) + { + int len = 0; + while( dir[len] && dir[len] == name[len] ) ++len; + return ( !dir[len] && len > 0 && ( dir[len-1] == '/' || name[len] == '/' ) ); + } + + +// compare two file names ignoring trailing slashes +bool compare_tslash( const char * const name1, const char * const name2 ) + { + const char * p = name1; + const char * q = name2; + while( *p && *p == *q ) { ++p; ++q; } + while( *p == '/' ) ++p; + while( *q == '/' ) ++q; + return ( !*p && !*q ); + } + +} // end namespace + + +int decode( const std::string & archive_name, const Arg_parser & parser, + const int filenames, const bool listing ) + { + const int infd = archive_name.size() ? + open_instream( archive_name ) : STDIN_FILENO; + if( infd < 0 ) return 1; + + std::vector< bool > name_pending( parser.arguments(), false ); + for( int i = 0; i < parser.arguments(); ++i ) + { + const int code = parser.code( i ); + if( code == 'C' && !listing ) + { + const char * const filename = parser.argument( i ).c_str(); + if( chdir( filename ) != 0 ) + { show_file_error( filename, "Error changing working directory", errno ); + return 1; } + } + if( !code ) name_pending[i] = true; + } + + int retval = 0; + bool skipping = false; + while( true ) // process one member per iteration + { + uint8_t buf[header_size]; + if( !archive_read( infd, buf, header_size ) ) return 2; + if( !verify_ustar_chksum( buf ) ) + { + if( block_is_zero( buf, header_size ) ) break; + gretval = 2; + if( !skipping ) + { skipping = true; show_error( "Skipping to next header." ); } + continue; + } + skipping = false; + + const char * const header = (const char *)buf; + enum { max_filename_size = prefix_l + 1 + name_l + 1 }; + char stored_name[max_filename_size]; + int len = 0; + while( len < prefix_l && header[prefix_o+len] ) + { stored_name[len] = header[prefix_o+len]; ++len; } + if( len && header[name_o] ) stored_name[len++] = '/'; + for( int i = 0; i < name_l && header[name_o+i]; ++i ) + { stored_name[len] = header[name_o+i]; ++len; } + while( len > 0 && stored_name[len-1] == '/' ) --len; // trailing '/' + stored_name[len] = 0; + const char * const filename = remove_leading_slash( stored_name ); + + bool skip = filenames > 0; + if( skip ) + for( int i = 0; i < parser.arguments(); ++i ) + if( parser.code( i ) == 0 && + ( compare_prefix_dir( parser.argument( i ).c_str(), filename ) || + compare_tslash( filename, parser.argument( i ).c_str() ) ) ) + { skip = false; name_pending[i] = false; break; } + + const Typeflag typeflag = (Typeflag)header[typeflag_o]; + const unsigned long long file_size = + ( typeflag == tf_regular || typeflag == tf_hiperf ) ? + strtoull( header + size_o, 0, 8 ) : 0; + if( listing || skip ) + retval = list_member( infd, filename, file_size, header, skip ); + else + retval = extract_member( infd, filename, file_size, header ); + if( retval ) return retval; + } + + for( int i = 0; i < parser.arguments(); ++i ) + if( parser.code( i ) == 0 && name_pending[i] ) + { + show_file_error( parser.argument( i ).c_str(), "Not found in archive." ); + if( gretval < 1 ) gretval = 1; + } + if( !retval && gretval ) + { show_error( "Exiting with failure status due to previous errors." ); + retval = gretval; } + return retval; + } |