/* Tarlz - Archiver with multimember lzip compression
Copyright (C) 2013-2019 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
*/
#define _FILE_OFFSET_BITS 64
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#if defined(__GNU_LIBRARY__)
#include // for major, minor
#endif
#include
#include
#include
#include
#include "arg_parser.h"
#include "tarlz.h"
const CRC32 crc32c( true );
int cl_owner = -1; // global vars needed by add_member
int cl_group = -1;
int cl_data_size = 0;
Solidity solidity = bsolid;
namespace {
LZ_Encoder * encoder = 0; // local vars needed by add_member
const char * archive_namep = 0;
unsigned long long partial_data_size = 0; // size of current block
Resizable_buffer grbuf( 2 * header_size ); // extended header + data
int goutfd = -1;
int error_status = 0;
class File_is_the_archive
{
dev_t archive_dev;
ino_t archive_ino;
bool initialized;
public:
File_is_the_archive() : initialized( false ) {}
bool init( const int fd )
{
struct stat st;
if( fstat( fd, &st ) != 0 ) return false;
if( S_ISREG( st.st_mode ) )
{ archive_dev = st.st_dev; archive_ino = st.st_ino; initialized = true; }
return true;
}
bool operator()( const struct stat & st ) const
{
return initialized && archive_dev == st.st_dev && archive_ino == st.st_ino;
}
} file_is_the_archive;
bool option_C_after_relative_filename( const Arg_parser & parser )
{
for( int i = 0; i < parser.arguments(); ++i )
if( !parser.code( i ) && parser.argument( i ).size() &&
parser.argument( i )[0] != '/' ) // relative_filename
while( ++i < parser.arguments() )
if( parser.code( i ) == 'C' ) return true;
return false;
}
int seek_read( const int fd, uint8_t * const buf, const int size,
const long long pos )
{
if( lseek( fd, pos, SEEK_SET ) == pos )
return readblock( fd, buf, size );
return 0;
}
// infd and outfd can refer to the same file if copying to a lower file
// position or if source and destination blocks don't overlap.
// max_size < 0 means no size limit.
bool copy_file( const int infd, const int outfd, const long long max_size = -1 )
{
const int buffer_size = 65536;
// remaining number of bytes to copy
long long rest = ( ( max_size >= 0 ) ? max_size : buffer_size );
long long copied_size = 0;
uint8_t * const buffer = new uint8_t[buffer_size];
bool error = false;
while( rest > 0 )
{
const int size = std::min( (long long)buffer_size, rest );
if( max_size >= 0 ) rest -= size;
const int rd = readblock( infd, buffer, size );
if( rd != size && errno )
{ show_error( "Error reading input file", errno ); error = true; break; }
if( rd > 0 )
{
const int wr = writeblock( outfd, buffer, rd );
if( wr != rd )
{ show_error( "Error writing output file", errno );
error = true; break; }
copied_size += rd;
}
if( rd < size ) break; // EOF
}
delete[] buffer;
return ( !error && ( max_size < 0 || copied_size == max_size ) );
}
/* Check archive type. If success, leave fd file pos at 0.
If remove_eof, leave fd file pos at beginning of the EOF blocks. */
bool check_appendable( const int fd, const bool remove_eof )
{
struct stat st;
if( fstat( fd, &st ) != 0 || !S_ISREG( st.st_mode ) ) return false;
if( lseek( fd, 0, SEEK_SET ) != 0 ) return false;
enum { bufsize = header_size + ( header_size / 8 ) };
uint8_t buf[bufsize];
int rd = readblock( fd, buf, bufsize );
if( rd == 0 && errno == 0 ) return true; // append to empty archive
if( rd < min_member_size || ( rd != bufsize && errno ) ) return false;
const Lzip_header * const p = (const Lzip_header *)buf; // shut up gcc
if( !p->verify_magic() || !p->verify_version() ) return false;
LZ_Decoder * decoder = LZ_decompress_open(); // decompress first header
if( !decoder || LZ_decompress_errno( decoder ) != LZ_ok ||
LZ_decompress_write( decoder, buf, rd ) != rd ||
( rd = LZ_decompress_read( decoder, buf, header_size ) ) != header_size )
{ LZ_decompress_close( decoder ); return false; }
LZ_decompress_close( decoder );
const bool maybe_eof = ( buf[0] == 0 );
if( !verify_ustar_chksum( buf ) && !maybe_eof ) return false;
const long long end = lseek( fd, 0, SEEK_END );
if( end < min_member_size ) return false;
Lzip_trailer trailer;
if( seek_read( fd, trailer.data, Lzip_trailer::size,
end - Lzip_trailer::size ) != Lzip_trailer::size )
return false;
const long long member_size = trailer.member_size();
if( member_size < min_member_size || member_size > end ||
( maybe_eof && member_size != end ) ) return false;
Lzip_header header;
if( seek_read( fd, header.data, Lzip_header::size,
end - member_size ) != Lzip_header::size ) return false;
if( !header.verify_magic() || !header.verify_version() ||
!isvalid_ds( header.dictionary_size() ) ) return false;
const unsigned long long data_size = trailer.data_size();
if( data_size < header_size || data_size > 32256 ) return false;
const unsigned data_crc = trailer.data_crc();
const CRC32 crc32;
uint32_t crc = 0xFFFFFFFFU;
for( unsigned i = 0; i < data_size; ++i ) crc32.update_byte( crc, 0 );
crc ^= 0xFFFFFFFFU;
if( crc != data_crc ) return false;
const long long pos = remove_eof ? end - member_size : 0;
return ( lseek( fd, pos, SEEK_SET ) == pos );
}
bool archive_write( const uint8_t * const buf, const int size )
{
static bool flushed = true; // avoid flushing empty lzip members
if( size <= 0 && flushed ) return true;
flushed = ( size <= 0 );
if( !encoder ) // uncompressed
return ( writeblock( goutfd, buf, size ) == size );
enum { obuf_size = 65536 };
uint8_t obuf[obuf_size];
int sz = 0;
if( size <= 0 ) LZ_compress_finish( encoder ); // flush encoder
while( sz < size || size <= 0 )
{
const int wr = LZ_compress_write( encoder, buf + sz, size - sz );
if( wr < 0 ) internal_error( "library error (LZ_compress_write)." );
sz += wr;
if( sz >= size && size > 0 ) break; // minimize dictionary size
const int rd = LZ_compress_read( encoder, obuf, obuf_size );
if( rd < 0 ) internal_error( "library error (LZ_compress_read)." );
if( rd == 0 && sz >= size ) break;
if( writeblock( goutfd, obuf, rd ) != rd ) return false;
}
if( LZ_compress_finished( encoder ) == 1 &&
LZ_compress_restart_member( encoder, LLONG_MAX ) < 0 )
internal_error( "library error (LZ_compress_restart_member)." );
return true;
}
bool write_extended( const Extended & extended )
{
const long long ebsize = extended.format_block( grbuf );
if( ebsize < 0 ) return false;
for( long long pos = 0; pos < ebsize; ) // write extended block to archive
{
int size = std::min( ebsize - pos, 1LL << 20 );
if( !archive_write( (const uint8_t *)grbuf() + pos, size ) ) return false;
pos += size;
}
return true;
}
// Return true if it stores filename in the ustar header.
bool store_name( const char * const filename, Extended & extended,
Tar_header header, const bool force_extended_name )
{
const char * const stored_name = remove_leading_dotslash( filename, true );
if( !force_extended_name ) // try storing filename in the ustar header
{
const int len = std::strlen( stored_name );
enum { max_len = prefix_l + 1 + name_l }; // prefix + '/' + name
if( len <= name_l ) // stored_name fits in name
{ std::memcpy( header + name_o, stored_name, len ); return true; }
if( len <= max_len ) // find shortest prefix
for( int i = len - name_l - 1; i < len && i <= prefix_l; ++i )
if( stored_name[i] == '/' ) // stored_name can be split
{
std::memcpy( header + name_o, stored_name + i + 1, len - i - 1 );
std::memcpy( header + prefix_o, stored_name, i );
return true;
}
}
// store filename in extended record, leave name zeroed in ustar header
extended.path( stored_name );
return false;
}
// add one tar member to the archive
int add_member( const char * const filename, const struct stat *,
const int flag, struct FTW * )
{
unsigned long long file_size = 0;
Extended extended; // metadata for extended records
Tar_header header;
if( !fill_headers( filename, extended, header, file_size, flag ) ) return 0;
const int infd = file_size ? open_instream( filename ) : -1;
if( file_size && infd < 0 ) { set_error_status( 1 ); return 0; }
if( encoder && solidity == bsolid &&
block_is_full( extended, file_size, partial_data_size ) &&
!archive_write( 0, 0 ) )
{ show_error( "Error flushing encoder", errno ); return 1; }
if( !write_extended( extended ) )
{ show_error( "Error writing extended header", errno ); return 1; }
if( !archive_write( header, header_size ) )
{ show_error( "Error writing ustar header", errno ); return 1; }
if( file_size )
{
enum { bufsize = 32 * header_size };
uint8_t buf[bufsize];
unsigned long long rest = file_size;
while( rest > 0 )
{
int size = std::min( rest, (unsigned long long)bufsize );
const int rd = readblock( infd, buf, size );
rest -= rd;
if( rd != size )
{
if( verbosity >= 0 )
std::fprintf( stderr, "File '%s' ends unexpectedly at pos %llu\n",
filename, file_size - rest );
close( infd ); return 1;
}
if( rest == 0 ) // last read
{
const int rem = file_size % header_size;
if( rem > 0 )
{ const int padding = header_size - rem;
std::memset( buf + size, 0, padding ); size += padding; }
}
if( !archive_write( buf, size ) )
{ show_error( "Error writing archive", errno ); close( infd );
return 1; }
}
if( close( infd ) != 0 )
{ show_file_error( filename, "Error closing file", errno ); return 1; }
}
if( encoder && solidity == no_solid && !archive_write( 0, 0 ) )
{ show_error( "Error flushing encoder", errno ); return 1; }
if( verbosity >= 1 ) std::fprintf( stderr, "%s\n", filename );
return 0;
}
} // end namespace
/* Removes any amount of leading "./" and '/' strings from filename.
Optionally also removes prefixes containing a ".." component. */
const char * remove_leading_dotslash( const char * const filename,
const bool dotdot )
{
// prevent two threads from modifying the list of prefixes at the same time
static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
static std::vector< std::string > prefixes; // list of prefixes
const char * p = filename;
if( dotdot )
for( int i = 0; filename[i]; ++i )
if( filename[i] == '.' && filename[i+1] == '.' &&
( i == 0 || filename[i-1] == '/' ) &&
( filename[i+2] == 0 || filename[i+2] == '/' ) )
p = filename + i + 2;
while( *p == '/' || ( *p == '.' && p[1] == '/' ) ) ++p;
if( p != filename )
{
std::string msg( filename, p - filename );
unsigned i = 0;
xlock( &mutex );
while( i < prefixes.size() && prefixes[i] != msg ) ++i;
if( i >= prefixes.size() )
{
prefixes.push_back( msg );
msg.insert( 0, "Removing leading '" ); msg += "' from member names.";
show_error( msg.c_str() );
}
xunlock( &mutex );
}
if( *p == 0 && *filename != 0 ) p = ".";
return p;
}
bool fill_headers( const char * const filename, Extended & extended,
Tar_header header, unsigned long long & file_size,
const int flag )
{
struct stat st;
if( lstat( filename, &st ) != 0 )
{ show_file_error( filename, "Can't stat input file", errno );
set_error_status( 1 ); return false; }
if( file_is_the_archive( st ) )
{ show_file_error( archive_namep, "File is the archive; not dumped." );
return false; }
init_tar_header( header );
bool force_extended_name = false;
const mode_t mode = st.st_mode;
print_octal( header + mode_o, mode_l - 1,
mode & ( S_ISUID | S_ISGID | S_ISVTX |
S_IRWXU | S_IRWXG | S_IRWXO ) );
const uid_t uid = ( cl_owner >= 0 ) ? (uid_t)cl_owner : st.st_uid;
const gid_t gid = ( cl_group >= 0 ) ? (gid_t)cl_group : st.st_gid;
if( uid >= 2 << 20 || gid >= 2 << 20 )
{ show_file_error( filename, "uid or gid is larger than 2_097_151." );
set_error_status( 1 ); return false; }
print_octal( header + uid_o, uid_l - 1, uid );
print_octal( header + gid_o, gid_l - 1, gid );
const long long mtime = st.st_mtime; // shut up gcc
if( mtime < 0 || mtime >= 1LL << 33 )
{ show_file_error( filename, "mtime is out of ustar range [0, 8_589_934_591]." );
set_error_status( 1 ); return false; }
print_octal( header + mtime_o, mtime_l - 1, mtime );
Typeflag typeflag;
if( S_ISREG( mode ) ) { typeflag = tf_regular; file_size = st.st_size; }
else if( S_ISDIR( mode ) )
{
typeflag = tf_directory;
if( flag == FTW_DNR )
{ show_file_error( filename, "Can't open directory", errno );
set_error_status( 1 ); return false; }
}
else if( S_ISLNK( mode ) )
{
typeflag = tf_symlink;
long len;
if( st.st_size <= linkname_l )
len = readlink( filename, (char *)header + linkname_o, linkname_l );
else
{
char * const buf = new char[st.st_size+1];
len = readlink( filename, buf, st.st_size );
if( len == st.st_size )
{ buf[len] = 0; extended.linkpath( buf ); force_extended_name = true; }
delete[] buf;
}
if( len != st.st_size )
{ show_file_error( filename, "Error reading link", (len < 0) ? errno : 0 );
set_error_status( 1 ); return false; }
}
else if( S_ISCHR( mode ) || S_ISBLK( mode ) )
{
typeflag = S_ISCHR( mode ) ? tf_chardev : tf_blockdev;
if( major( st.st_dev ) >= 2 << 20 || minor( st.st_dev ) >= 2 << 20 )
{ show_file_error( filename, "devmajor or devminor is larger than 2_097_151." );
set_error_status( 1 ); return false; }
print_octal( header + devmajor_o, devmajor_l - 1, major( st.st_dev ) );
print_octal( header + devminor_o, devminor_l - 1, minor( st.st_dev ) );
}
else if( S_ISFIFO( mode ) ) typeflag = tf_fifo;
else { show_file_error( filename, "Unknown file type." );
set_error_status( 2 ); return false; }
header[typeflag_o] = typeflag;
errno = 0;
const struct passwd * const pw = getpwuid( uid );
if( pw && pw->pw_name )
std::strncpy( (char *)header + uname_o, pw->pw_name, uname_l - 1 );
else { show_file_error( filename, "Can't read user name from database", errno );
set_error_status( 1 ); }
errno = 0;
const struct group * const gr = getgrgid( gid );
if( gr && gr->gr_name )
std::strncpy( (char *)header + gname_o, gr->gr_name, gname_l - 1 );
else { show_file_error( filename, "Can't read group name from database", errno );
set_error_status( 1 ); }
if( file_size >= 1ULL << 33 )
{ extended.file_size( file_size ); force_extended_name = true; }
else print_octal( header + size_o, size_l - 1, file_size );
store_name( filename, extended, header, force_extended_name );
print_octal( header + chksum_o, chksum_l - 1, ustar_chksum( header ) );
return true;
}
bool block_is_full( const Extended & extended,
const unsigned long long file_size,
unsigned long long & partial_data_size )
{
const unsigned long long member_size =
header_size + extended.full_size() + round_up( file_size );
const unsigned long long target_size = cl_data_size;
if( partial_data_size >= target_size ||
( partial_data_size >= min_data_size &&
partial_data_size + member_size / 2 > target_size ) )
{ partial_data_size = member_size; return true; }
partial_data_size += member_size; return false;
}
void set_error_status( const int retval )
{
// prevent two threads from modifying the error_status at the same time
static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
xlock( &mutex );
if( error_status < retval ) error_status = retval;
xunlock( &mutex );
}
int final_exit_status( int retval )
{
if( !retval && error_status )
{ show_error( "Exiting with failure status due to previous errors." );
retval = error_status; }
return retval;
}
unsigned ustar_chksum( const uint8_t * const header )
{
unsigned chksum = chksum_l * 0x20; // treat chksum field as spaces
for( int i = 0; i < chksum_o; ++i ) chksum += header[i];
for( int i = chksum_o + chksum_l; i < header_size; ++i ) chksum += header[i];
return chksum;
}
bool verify_ustar_chksum( const uint8_t * const header )
{ return ( verify_ustar_magic( header ) &&
ustar_chksum( header ) == parse_octal( header + chksum_o, chksum_l ) ); }
int concatenate( const std::string & archive_name, const Arg_parser & parser,
const int filenames )
{
if( !filenames )
{ if( verbosity >= 1 ) show_error( "Nothing to concatenate." ); return 0; }
if( archive_name.empty() )
{ show_error( "'--concatenate' is incompatible with '-f -'.", 0, true );
return 1; }
const int outfd = open_outstream( archive_name, false );
if( outfd < 0 ) return 1;
if( !file_is_the_archive.init( outfd ) )
{ show_file_error( archive_name.c_str(), "Can't stat", errno ); return 1; }
int retval = 0;
for( int i = 0; i < parser.arguments(); ++i ) // copy archives
{
if( parser.code( i ) ) continue; // skip options
if( parser.argument( i ).empty() ) continue; // skip empty names
const char * const filename = parser.argument( i ).c_str();
const int infd = open_instream( filename );
if( infd < 0 )
{ show_file_error( filename, "Can't open input file", errno );
retval = 1; break; }
if( !check_appendable( infd, false ) )
{ show_file_error( filename, "Not an appendable tar.lz archive." );
close( infd ); retval = 2; break; }
struct stat st;
if( fstat( infd, &st ) == 0 && file_is_the_archive( st ) )
{ show_file_error( filename, "File is the archive; not concatenated." );
close( infd ); continue; }
if( !check_appendable( outfd, true ) )
{ show_error( "This does not look like an appendable tar.lz archive." );
close( infd ); retval = 2; break; }
if( !copy_file( infd, outfd ) || close( infd ) != 0 )
{ show_file_error( filename, "Error copying archive", errno );
retval = 1; break; }
if( verbosity >= 1 ) std::fprintf( stderr, "%s\n", filename );
}
if( close( outfd ) != 0 && !retval )
{ show_error( "Error closing archive", errno ); retval = 1; }
return retval;
}
int encode( const std::string & archive_name, const Arg_parser & parser,
const int filenames, const int level, const int num_workers,
const int debug_level, const bool append )
{
struct Lzma_options
{
int dictionary_size; // 4 KiB .. 512 MiB
int match_len_limit; // 5 .. 273
};
const Lzma_options option_mapping[] =
{
{ 65535, 16 }, // -0
{ 1 << 20, 5 }, // -1
{ 3 << 19, 6 }, // -2
{ 1 << 21, 8 }, // -3
{ 3 << 20, 12 }, // -4
{ 1 << 22, 20 }, // -5
{ 1 << 23, 36 }, // -6
{ 1 << 24, 68 }, // -7
{ 3 << 23, 132 }, // -8
{ 1 << 25, 273 } }; // -9
const bool compressed = ( level >= 0 && level <= 9 );
if( !append )
{
if( !filenames )
{ show_error( "Cowardly refusing to create an empty archive.", 0, true );
return 1; }
if( archive_name.empty() ) goutfd = STDOUT_FILENO;
else if( ( goutfd = open_outstream( archive_name ) ) < 0 ) return 1;
}
else
{
if( !filenames )
{ if( verbosity >= 1 ) show_error( "Nothing to append." ); return 0; }
if( archive_name.empty() )
{ show_error( "'--append' is incompatible with '-f -'.", 0, true );
return 1; }
if( !compressed )
{ show_error( "'--append' is incompatible with '--uncompressed'.", 0, true );
return 1; }
if( ( goutfd = open_outstream( archive_name, false ) ) < 0 ) return 1;
if( !check_appendable( goutfd, true ) )
{ show_error( "This does not look like an appendable tar.lz archive." );
return 2; }
}
archive_namep = archive_name.size() ? archive_name.c_str() : "(stdout)";
if( !file_is_the_archive.init( goutfd ) )
{ show_file_error( archive_namep, "Can't stat", errno ); return 1; }
if( compressed )
{
const int dictionary_size = option_mapping[level].dictionary_size;
if( cl_data_size <= 0 )
{
if( level == 0 ) cl_data_size = 1 << 20;
else cl_data_size = 2 * dictionary_size;
}
/* CWD is not per-thread; multi-threaded --create can't be used if a
-C option appears after a relative filename in the command line. */
if( solidity != asolid && solidity != solid && num_workers > 0 &&
!option_C_after_relative_filename( parser ) )
{
// show_file_error( archive_namep, "Multi-threaded --create" );
return encode_lz( archive_namep, parser, dictionary_size,
option_mapping[level].match_len_limit, num_workers,
goutfd, debug_level );
}
encoder = LZ_compress_open( dictionary_size,
option_mapping[level].match_len_limit, LLONG_MAX );
if( !encoder || LZ_compress_errno( encoder ) != LZ_ok )
{
if( !encoder || LZ_compress_errno( encoder ) == LZ_mem_error )
show_error( mem_msg2 );
else
internal_error( "invalid argument to encoder." );
return 1;
}
}
int retval = 0;
for( int i = 0; i < parser.arguments(); ++i ) // parse command line
{
const int code = parser.code( i );
const std::string & arg = parser.argument( i );
const char * filename = arg.c_str();
if( code == 'C' && chdir( filename ) != 0 )
{ show_file_error( filename, "Error changing working directory", errno );
retval = 1; break; }
if( code ) continue; // skip options
if( parser.argument( i ).empty() ) continue; // skip empty names
std::string deslashed; // arg without trailing slashes
unsigned len = arg.size();
while( len > 1 && arg[len-1] == '/' ) --len;
if( len < arg.size() )
{ deslashed.assign( arg, 0, len ); filename = deslashed.c_str(); }
struct stat st;
if( lstat( filename, &st ) != 0 ) // filename from command line
{ show_file_error( filename, "Can't stat input file", errno );
set_error_status( 1 ); }
else if( ( retval = nftw( filename, add_member, 16, FTW_PHYS ) ) != 0 )
break; // write error
else if( encoder && solidity == dsolid && !archive_write( 0, 0 ) )
{ show_error( "Error flushing encoder", errno ); retval = 1; }
}
if( !retval ) // write End-Of-Archive records
{
enum { bufsize = 2 * header_size };
uint8_t buf[bufsize];
std::memset( buf, 0, bufsize );
if( encoder &&
( solidity == asolid || ( solidity == bsolid && partial_data_size ) ) &&
!archive_write( 0, 0 ) )
{ show_error( "Error flushing encoder", errno ); retval = 1; }
else if( !archive_write( buf, bufsize ) ||
( encoder && !archive_write( 0, 0 ) ) ) // flush encoder
{ show_error( "Error writing end-of-archive blocks", errno );
retval = 1; }
}
if( encoder && LZ_compress_close( encoder ) < 0 )
{ show_error( "LZ_compress_close failed." ); retval = 1; }
if( close( goutfd ) != 0 && !retval )
{ show_error( "Error closing archive", errno ); retval = 1; }
return final_exit_status( retval );
}