/* Tarlz - Archiver with multimember lzip compression Copyright (C) 2013-2019 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #define _FILE_OFFSET_BITS 64 #include #include #include #include #include #include #include #include #include #include #include #include #if defined(__GNU_LIBRARY__) #include // for major, minor #endif #include #include #include #include #include "arg_parser.h" #include "tarlz.h" const CRC32 crc32c( true ); int cl_owner = -1; // global vars needed by add_member int cl_group = -1; int cl_data_size = 0; Solidity solidity = no_solid; namespace { LZ_Encoder * encoder = 0; // local vars needed by add_member const char * archive_namep = 0; unsigned long long partial_data_size = 0; // current block size int outfd = -1; int gretval = 0; int seek_read( const int fd, uint8_t * const buf, const int size, const long long pos ) { if( lseek( fd, pos, SEEK_SET ) == pos ) return readblock( fd, buf, size ); return 0; } // infd and outfd can refer to the same file if copying to a lower file // position or if source and destination blocks don't overlap. // max_size < 0 means no size limit. bool copy_file( const int infd, const int outfd, const long long max_size = -1 ) { const int buffer_size = 65536; // remaining number of bytes to copy long long rest = ( ( max_size >= 0 ) ? max_size : buffer_size ); long long copied_size = 0; uint8_t * const buffer = new uint8_t[buffer_size]; bool error = false; while( rest > 0 ) { const int size = std::min( (long long)buffer_size, rest ); if( max_size >= 0 ) rest -= size; const int rd = readblock( infd, buffer, size ); if( rd != size && errno ) { show_error( "Error reading input file", errno ); error = true; break; } if( rd > 0 ) { const int wr = writeblock( outfd, buffer, rd ); if( wr != rd ) { show_error( "Error writing output file", errno ); error = true; break; } copied_size += rd; } if( rd < size ) break; // EOF } delete[] buffer; return ( !error && ( max_size < 0 || copied_size == max_size ) ); } /* Check archive type. If success, leave fd file pos at 0. If remove_eof, leave fd file pos at beginning of the EOF blocks. */ bool check_appendable( const int fd, const bool remove_eof ) { struct stat st; if( fstat( fd, &st ) != 0 || !S_ISREG( st.st_mode ) ) return false; if( lseek( fd, 0, SEEK_SET ) != 0 ) return false; enum { bufsize = header_size + ( header_size / 8 ) }; uint8_t buf[bufsize]; int rd = readblock( fd, buf, bufsize ); if( rd == 0 && errno == 0 ) return true; // append to empty archive if( rd < min_member_size || ( rd != bufsize && errno ) ) return false; const Lzip_header * const p = (const Lzip_header *)buf; // shut up gcc if( !p->verify_magic() || !p->verify_version() ) return false; LZ_Decoder * decoder = LZ_decompress_open(); // decompress first header if( !decoder || LZ_decompress_errno( decoder ) != LZ_ok || LZ_decompress_write( decoder, buf, rd ) != rd || ( rd = LZ_decompress_read( decoder, buf, header_size ) ) != header_size ) { LZ_decompress_close( decoder ); return false; } LZ_decompress_close( decoder ); const bool maybe_eof = ( buf[0] == 0 ); if( !verify_ustar_chksum( buf ) && !maybe_eof ) return false; const long long end = lseek( fd, 0, SEEK_END ); if( end < min_member_size ) return false; Lzip_trailer trailer; if( seek_read( fd, trailer.data, Lzip_trailer::size, end - Lzip_trailer::size ) != Lzip_trailer::size ) return false; const long long member_size = trailer.member_size(); if( member_size < min_member_size || member_size > end || ( maybe_eof && member_size != end ) ) return false; Lzip_header header; if( seek_read( fd, header.data, Lzip_header::size, end - member_size ) != Lzip_header::size ) return false; if( !header.verify_magic() || !header.verify_version() || !isvalid_ds( header.dictionary_size() ) ) return false; const unsigned long long data_size = trailer.data_size(); if( data_size < header_size || data_size > 32256 ) return false; const unsigned data_crc = trailer.data_crc(); const CRC32 crc32; uint32_t crc = 0xFFFFFFFFU; for( unsigned i = 0; i < data_size; ++i ) crc32.update_byte( crc, 0 ); crc ^= 0xFFFFFFFFU; if( crc != data_crc ) return false; const long long pos = remove_eof ? end - member_size : 0; return ( lseek( fd, pos, SEEK_SET ) == pos ); } class File_is_the_archive { dev_t archive_dev; ino_t archive_ino; bool initialized; public: File_is_the_archive() : initialized( false ) {} bool init( const int fd ) { struct stat st; if( fstat( fd, &st ) != 0 ) return false; if( S_ISREG( st.st_mode ) ) { archive_dev = st.st_dev; archive_ino = st.st_ino; initialized = true; } return true; } bool operator()( const struct stat & st ) const { return initialized && archive_dev == st.st_dev && archive_ino == st.st_ino; } } file_is_the_archive; bool archive_write( const uint8_t * const buf, const int size ) { if( !encoder ) // uncompressed return ( writeblock( outfd, buf, size ) == size ); enum { obuf_size = 65536 }; uint8_t obuf[obuf_size]; int sz = 0; if( size <= 0 ) LZ_compress_finish( encoder ); // flush encoder while( sz < size || size <= 0 ) { const int wr = LZ_compress_write( encoder, buf + sz, size - sz ); if( wr < 0 ) internal_error( "library error (LZ_compress_write)." ); sz += wr; if( sz >= size && size > 0 ) break; // minimize dictionary size const int rd = LZ_compress_read( encoder, obuf, obuf_size ); if( rd < 0 ) internal_error( "library error (LZ_compress_read)." ); if( rd == 0 && sz >= size ) break; if( writeblock( outfd, obuf, rd ) != rd ) return false; } if( LZ_compress_finished( encoder ) == 1 && LZ_compress_restart_member( encoder, LLONG_MAX ) < 0 ) internal_error( "library error (LZ_compress_restart_member)." ); return true; } void init_tar_header( Tar_header header ) // set magic and version { std::memset( header, 0, header_size ); std::memcpy( header + magic_o, ustar_magic, magic_l - 1 ); header[version_o] = header[version_o+1] = '0'; } unsigned char xdigit( const unsigned value ) { if( value <= 9 ) return '0' + value; if( value <= 15 ) return 'A' + value - 10; return 0; } void print_hex( char * const buf, int size, unsigned long long num ) { while( --size >= 0 ) { buf[size] = xdigit( num & 0x0F ); num >>= 4; } } void print_octal( uint8_t * const buf, int size, unsigned long long num ) { while( --size >= 0 ) { buf[size] = '0' + ( num % 8 ); num /= 8; } } bool write_extended( const Extended & extended ) { const int path_rec = extended.recsize_path(); const int lpath_rec = extended.recsize_linkpath(); const int size_rec = extended.recsize_file_size(); const unsigned long long edsize = extended.edsize(); const unsigned long long bufsize = extended.edsize_pad(); if( edsize >= 1ULL << 33 ) return false; // too much extended data if( bufsize == 0 ) return edsize == 0; // overflow or no extended data char * const buf = new char[bufsize+1]; // extended records buffer unsigned long long pos = path_rec; // goto can't cross these const unsigned crc_size = Extended::crc_record.size(); if( path_rec && snprintf( buf, path_rec + 1, "%d path=%s\n", path_rec, extended.path().c_str() ) != path_rec ) goto error; if( lpath_rec && snprintf( buf + pos, lpath_rec + 1, "%d linkpath=%s\n", lpath_rec, extended.linkpath().c_str() ) != lpath_rec ) goto error; pos += lpath_rec; if( size_rec && snprintf( buf + pos, size_rec + 1, "%d size=%llu\n", size_rec, extended.file_size() ) != size_rec ) goto error; pos += size_rec; std::memcpy( buf + pos, Extended::crc_record.c_str(), crc_size ); pos += crc_size; if( pos != edsize ) goto error; print_hex( buf + edsize - 9, 8, crc32c.windowed_crc( (const uint8_t *)buf, edsize - 9, edsize ) ); std::memset( buf + edsize, 0, bufsize - edsize ); // wipe padding Tar_header header; // extended header init_tar_header( header ); header[typeflag_o] = tf_extended; // fill only required fields print_octal( header + size_o, size_l - 1, edsize ); print_octal( header + chksum_o, chksum_l - 1, ustar_chksum( header ) ); if( !archive_write( header, header_size ) ) goto error; for( pos = 0; pos < bufsize; ) // write extended records to archive { int size = std::min( bufsize - pos, 1ULL << 20 ); if( !archive_write( (const uint8_t *)buf + pos, size ) ) goto error; pos += size; } delete[] buf; return true; error: delete[] buf; return false; } const char * remove_leading_dotdot( const char * const filename ) { static std::string prefix; const char * p = filename; for( int i = 0; filename[i]; ++i ) if( filename[i] == '.' && filename[i+1] == '.' && ( i == 0 || filename[i-1] == '/' ) && ( filename[i+2] == 0 || filename[i+2] == '/' ) ) p = filename + i + 2; while( *p == '/' || ( *p == '.' && p[1] == '/' ) ) ++p; if( p != filename ) { std::string msg( filename, p - filename ); if( prefix != msg ) { prefix = msg; msg = "Removing leading '"; msg += prefix; msg += "' from member names."; show_error( msg.c_str() ); } } if( *p == 0 ) p = "."; return p; } // Return true if it stores filename in the ustar header. bool store_name( const char * const filename, Extended & extended, Tar_header header, const bool force_extended_name ) { const char * const stored_name = remove_leading_dotdot( filename ); if( !force_extended_name ) // try storing filename in the ustar header { const int len = std::strlen( stored_name ); enum { max_len = prefix_l + 1 + name_l }; // prefix + '/' + name if( len <= name_l ) // stored_name fits in name { std::memcpy( header + name_o, stored_name, len ); return true; } if( len <= max_len ) // find shortest prefix for( int i = len - name_l - 1; i < len && i <= prefix_l; ++i ) if( stored_name[i] == '/' ) // stored_name can be split { std::memcpy( header + name_o, stored_name + i + 1, len - i - 1 ); std::memcpy( header + prefix_o, stored_name, i ); return true; } } // store filename in extended record, leave name zeroed in ustar header extended.path( stored_name ); return false; } int add_member( const char * const filename, const struct stat *, const int flag, struct FTW * ) { struct stat st; if( lstat( filename, &st ) != 0 ) { show_file_error( filename, "Can't stat input file", errno ); gretval = 1; return 0; } if( file_is_the_archive( st ) ) { show_file_error( archive_namep, "File is the archive; not dumped." ); return 0; } Extended extended; // metadata for extended records Tar_header header; init_tar_header( header ); bool force_extended_name = false; const mode_t mode = st.st_mode; print_octal( header + mode_o, mode_l - 1, mode & ( S_ISUID | S_ISGID | S_ISVTX | S_IRWXU | S_IRWXG | S_IRWXO ) ); const uid_t uid = ( cl_owner >= 0 ) ? (uid_t)cl_owner : st.st_uid; const gid_t gid = ( cl_group >= 0 ) ? (gid_t)cl_group : st.st_gid; if( uid >= 2 << 20 || gid >= 2 << 20 ) { show_file_error( filename, "uid or gid is larger than 2_097_151." ); gretval = 1; return 0; } print_octal( header + uid_o, uid_l - 1, uid ); print_octal( header + gid_o, gid_l - 1, gid ); const long long mtime = st.st_mtime; // shut up gcc if( mtime < 0 || mtime >= 1LL << 33 ) { show_file_error( filename, "mtime is out of ustar range [0, 8_589_934_591]." ); gretval = 1; return 0; } print_octal( header + mtime_o, mtime_l - 1, mtime ); unsigned long long file_size = 0; Typeflag typeflag; if( S_ISREG( mode ) ) { typeflag = tf_regular; file_size = st.st_size; } else if( S_ISDIR( mode ) ) { typeflag = tf_directory; if( flag == FTW_DNR ) { show_file_error( filename, "Can't open directory", errno ); gretval = 1; return 0; } } else if( S_ISLNK( mode ) ) { typeflag = tf_symlink; long len; if( st.st_size <= linkname_l ) len = readlink( filename, (char *)header + linkname_o, linkname_l ); else { char * const buf = new char[st.st_size+1]; len = readlink( filename, buf, st.st_size ); if( len == st.st_size ) { buf[len] = 0; extended.linkpath( buf ); force_extended_name = true; } delete[] buf; } if( len != st.st_size ) { show_file_error( filename, "Error reading link", (len < 0) ? errno : 0 ); gretval = 1; return 0; } } else if( S_ISCHR( mode ) || S_ISBLK( mode ) ) { typeflag = S_ISCHR( mode ) ? tf_chardev : tf_blockdev; if( major( st.st_dev ) >= 2 << 20 || minor( st.st_dev ) >= 2 << 20 ) { show_file_error( filename, "devmajor or devminor is larger than 2_097_151." ); gretval = 1; return 0; } print_octal( header + devmajor_o, devmajor_l - 1, major( st.st_dev ) ); print_octal( header + devminor_o, devminor_l - 1, minor( st.st_dev ) ); } else if( S_ISFIFO( mode ) ) typeflag = tf_fifo; else { show_file_error( filename, "Unknown file type." ); gretval = 2; return 0; } header[typeflag_o] = typeflag; const struct passwd * const pw = getpwuid( uid ); if( pw && pw->pw_name ) std::strncpy( (char *)header + uname_o, pw->pw_name, uname_l - 1 ); const struct group * const gr = getgrgid( gid ); if( gr && gr->gr_name ) std::strncpy( (char *)header + gname_o, gr->gr_name, gname_l - 1 ); if( file_size >= 1ULL << 33 ) { extended.file_size( file_size ); force_extended_name = true; } else print_octal( header + size_o, size_l - 1, file_size ); store_name( filename, extended, header, force_extended_name ); print_octal( header + chksum_o, chksum_l - 1, ustar_chksum( header ) ); const int infd = file_size ? open_instream( filename ) : -1; if( file_size && infd < 0 ) { gretval = 1; return 0; } if( encoder && solidity == bsolid ) { const unsigned long long member_size = header_size + extended.full_size() + round_up( file_size ); const unsigned long long target_size = cl_data_size; if( partial_data_size >= target_size || ( partial_data_size >= min_data_size && partial_data_size + member_size / 2 > target_size ) ) { partial_data_size = member_size; if( !archive_write( 0, 0 ) ) { show_error( "Error flushing encoder", errno ); return 1; } } else partial_data_size += member_size; } if( !extended.empty() && !write_extended( extended ) ) { show_error( "Error writing extended header", errno ); return 1; } if( !archive_write( header, header_size ) ) { show_error( "Error writing ustar header", errno ); return 1; } if( file_size ) { enum { bufsize = 32 * header_size }; uint8_t buf[bufsize]; unsigned long long rest = file_size; while( rest > 0 ) { int size = std::min( rest, (unsigned long long)bufsize ); const int rd = readblock( infd, buf, size ); rest -= rd; if( rd != size ) { if( verbosity >= 0 ) std::fprintf( stderr, "File '%s' ends unexpectedly at pos %llu\n", filename, file_size - rest ); close( infd ); return 1; } if( rest == 0 ) // last read { const int rem = file_size % header_size; if( rem > 0 ) { const int padding = header_size - rem; std::memset( buf + size, 0, padding ); size += padding; } } if( !archive_write( buf, size ) ) { show_error( "Error writing archive", errno ); close( infd ); return 1; } } if( close( infd ) != 0 ) { show_file_error( filename, "Error closing file", errno ); return 1; } } if( encoder && solidity == no_solid && !archive_write( 0, 0 ) ) { show_error( "Error flushing encoder", errno ); return 1; } if( verbosity >= 1 ) std::fprintf( stderr, "%s\n", filename ); return 0; } } // end namespace unsigned ustar_chksum( const uint8_t * const header ) { unsigned chksum = chksum_l * 0x20; // treat chksum field as spaces for( int i = 0; i < chksum_o; ++i ) chksum += header[i]; for( int i = chksum_o + chksum_l; i < header_size; ++i ) chksum += header[i]; return chksum; } bool verify_ustar_chksum( const uint8_t * const header ) { return ( verify_ustar_magic( header ) && ustar_chksum( header ) == parse_octal( header + chksum_o, chksum_l ) ); } int concatenate( const std::string & archive_name, const Arg_parser & parser, const int filenames ) { if( !filenames ) { if( verbosity >= 1 ) show_error( "Nothing to concatenate." ); return 0; } if( archive_name.empty() ) { show_error( "'--concatenate' is incompatible with '-f -'.", 0, true ); return 1; } if( ( outfd = open_outstream( archive_name, false ) ) < 0 ) return 1; if( !file_is_the_archive.init( outfd ) ) { show_file_error( archive_name.c_str(), "Can't stat", errno ); return 1; } int retval = 0; for( int i = 0; i < parser.arguments(); ++i ) // copy archives { if( parser.code( i ) ) continue; // skip options const char * const filename = parser.argument( i ).c_str(); const int infd = open_instream( filename ); if( infd < 0 ) { show_file_error( filename, "Can't open input file", errno ); retval = 1; break; } if( !check_appendable( infd, false ) ) { show_file_error( filename, "Not an appendable tar.lz archive." ); close( infd ); retval = 2; break; } struct stat st; if( fstat( infd, &st ) == 0 && file_is_the_archive( st ) ) { show_file_error( filename, "File is the archive; not concatenated." ); close( infd ); continue; } if( !check_appendable( outfd, true ) ) { show_error( "This does not look like an appendable tar.lz archive." ); close( infd ); retval = 2; break; } if( !copy_file( infd, outfd ) || close( infd ) != 0 ) { show_file_error( filename, "Error copying archive", errno ); retval = 1; break; } if( verbosity >= 1 ) std::fprintf( stderr, "%s\n", filename ); } if( close( outfd ) != 0 && !retval ) { show_error( "Error closing archive", errno ); retval = 1; } return retval; } int encode( const std::string & archive_name, const Arg_parser & parser, const int filenames, const int level, const bool append ) { struct Lzma_options { int dictionary_size; // 4 KiB .. 512 MiB int match_len_limit; // 5 .. 273 }; const Lzma_options option_mapping[] = { { 65535, 16 }, // -0 { 1 << 20, 5 }, // -1 { 3 << 19, 6 }, // -2 { 1 << 21, 8 }, // -3 { 3 << 20, 12 }, // -4 { 1 << 22, 20 }, // -5 { 1 << 23, 36 }, // -6 { 1 << 24, 68 }, // -7 { 3 << 23, 132 }, // -8 { 1 << 25, 273 } }; // -9 const bool compressed = ( level >= 0 && level <= 9 ); if( !append ) { if( !filenames ) { show_error( "Cowardly refusing to create an empty archive.", 0, true ); return 1; } if( archive_name.empty() ) outfd = STDOUT_FILENO; else if( ( outfd = open_outstream( archive_name ) ) < 0 ) return 1; } else { if( !filenames ) { if( verbosity >= 1 ) show_error( "Nothing to append." ); return 0; } if( archive_name.empty() ) { show_error( "'--append' is incompatible with '-f -'.", 0, true ); return 1; } if( !compressed ) { show_error( "'--append' is incompatible with '--uncompressed'.", 0, true ); return 1; } if( ( outfd = open_outstream( archive_name, false ) ) < 0 ) return 1; if( !check_appendable( outfd, true ) ) { show_error( "This does not look like an appendable tar.lz archive." ); return 2; } } archive_namep = archive_name.size() ? archive_name.c_str() : "(stdout)"; if( !file_is_the_archive.init( outfd ) ) { show_file_error( archive_namep, "Can't stat", errno ); return 1; } if( compressed ) { const int dictionary_size = option_mapping[level].dictionary_size; if( cl_data_size <= 0 ) { if( level == 0 ) cl_data_size = 1 << 20; else cl_data_size = 2 * dictionary_size; } encoder = LZ_compress_open( dictionary_size, option_mapping[level].match_len_limit, LLONG_MAX ); if( !encoder || LZ_compress_errno( encoder ) != LZ_ok ) { if( !encoder || LZ_compress_errno( encoder ) == LZ_mem_error ) show_error( "Not enough memory. Try a lower compression level." ); else internal_error( "invalid argument to encoder." ); return 1; } } int retval = 0; for( int i = 0; i < parser.arguments(); ++i ) // write members { const int code = parser.code( i ); const std::string & arg = parser.argument( i ); const char * filename = arg.c_str(); if( code == 'C' && chdir( filename ) != 0 ) { show_file_error( filename, "Error changing working directory", errno ); retval = 1; break; } if( code ) continue; // skip options std::string deslashed; // arg without trailing slashes unsigned len = arg.size(); while( len > 1 && arg[len-1] == '/' ) --len; if( len < arg.size() ) { deslashed.assign( arg, 0, len ); filename = deslashed.c_str(); } struct stat st; if( lstat( filename, &st ) != 0 ) { show_file_error( filename, "Can't stat input file", errno ); if( gretval < 1 ) gretval = 1; } else if( ( retval = nftw( filename, add_member, 16, FTW_PHYS ) ) != 0 ) break; // write error else if( encoder && solidity == dsolid && !archive_write( 0, 0 ) ) { show_error( "Error flushing encoder", errno ); retval = 1; } } if( !retval ) // write End-Of-Archive records { enum { bufsize = 2 * header_size }; uint8_t buf[bufsize]; std::memset( buf, 0, bufsize ); if( encoder && ( solidity == asolid || solidity == bsolid ) && !archive_write( 0, 0 ) ) { show_error( "Error flushing encoder", errno ); retval = 1; } else if( !archive_write( buf, bufsize ) || ( encoder && !archive_write( 0, 0 ) ) ) // flush encoder { show_error( "Error writing end-of-archive blocks", errno ); retval = 1; } } if( encoder && LZ_compress_close( encoder ) < 0 ) { show_error( "LZ_compress_close failed." ); retval = 1; } if( close( outfd ) != 0 && !retval ) { show_error( "Error closing archive", errno ); retval = 1; } if( retval && archive_name.size() && !append ) std::remove( archive_name.c_str() ); if( !retval && gretval ) { show_error( "Exiting with failure status due to previous errors." ); retval = gretval; } return retval; }