summaryrefslogtreecommitdiffstats
path: root/create.cc
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--create.cc306
1 files changed, 267 insertions, 39 deletions
diff --git a/create.cc b/create.cc
index c5bed25..ba7d10a 100644
--- a/create.cc
+++ b/create.cc
@@ -28,6 +28,10 @@
#include <stdint.h>
#include <unistd.h>
#include <sys/stat.h>
+#include <sys/types.h>
+#if defined(__GNU_LIBRARY__)
+#include <sys/sysmacros.h> // for major, minor
+#endif
#include <ftw.h>
#include <grp.h>
#include <pwd.h>
@@ -37,6 +41,9 @@
#include "lzip.h"
#include "tarlz.h"
+
+const CRC32C crc32c;
+
int cl_owner = -1; // global vars needed by add_member
int cl_group = -1;
int cl_solid = 0; // 1 = dsolid, 2 = asolid, 3 = solid
@@ -44,6 +51,7 @@ int cl_solid = 0; // 1 = dsolid, 2 = asolid, 3 = solid
namespace {
LZ_Encoder * encoder = 0; // local vars needed by add_member
+const char * archive_namep = 0;
int outfd = -1;
int gretval = 0;
@@ -55,31 +63,67 @@ int seek_read( const int fd, uint8_t * const buf, const int size,
return 0;
}
-// Check archive type, remove EOF blocks, and leave outfd file pos at EOF
-bool check_appendable()
+// infd and outfd can refer to the same file if copying to a lower file
+// position or if source and destination blocks don't overlap.
+// max_size < 0 means no size limit.
+bool copy_file( const int infd, const int outfd, const long long max_size = -1 )
+ {
+ const int buffer_size = 65536;
+ // remaining number of bytes to copy
+ long long rest = ( ( max_size >= 0 ) ? max_size : buffer_size );
+ long long copied_size = 0;
+ uint8_t * const buffer = new uint8_t[buffer_size];
+ bool error = false;
+
+ while( rest > 0 )
+ {
+ const int size = std::min( (long long)buffer_size, rest );
+ if( max_size >= 0 ) rest -= size;
+ const int rd = readblock( infd, buffer, size );
+ if( rd != size && errno )
+ { show_error( "Error reading input file", errno ); error = true; break; }
+ if( rd > 0 )
+ {
+ const int wr = writeblock( outfd, buffer, rd );
+ if( wr != rd )
+ { show_error( "Error writing output file", errno );
+ error = true; break; }
+ copied_size += rd;
+ }
+ if( rd < size ) break; // EOF
+ }
+ delete[] buffer;
+ return ( !error && ( max_size < 0 || copied_size == max_size ) );
+ }
+
+
+/* Check archive type. If success, leave fd file pos at 0.
+ If remove_eof, leave fd file pos at beginning of the EOF blocks. */
+bool check_appendable( const int fd, const bool remove_eof )
{
struct stat st;
- if( fstat( outfd, &st ) != 0 || !S_ISREG( st.st_mode ) ) return false;
- uint8_t buf[header_size];
- int rd = readblock( outfd, buf, header_size );
+ if( fstat( fd, &st ) != 0 || !S_ISREG( st.st_mode ) ) return false;
+ if( lseek( fd, 0, SEEK_SET ) != 0 ) return false;
+ enum { bufsize = header_size + ( header_size / 8 ) };
+ uint8_t buf[bufsize];
+ int rd = readblock( fd, buf, bufsize );
if( rd == 0 && errno == 0 ) return true; // append to empty archive
- if( rd < min_member_size || ( rd != header_size && errno ) ) return false;
- const Lzip_header * const p = (Lzip_header *)buf; // shut up gcc
+ if( rd < min_member_size || ( rd != bufsize && errno ) ) return false;
+ const Lzip_header * const p = (const Lzip_header *)buf; // shut up gcc
if( !p->verify_magic() ) return false;
LZ_Decoder * decoder = LZ_decompress_open(); // decompress first header
if( !decoder || LZ_decompress_errno( decoder ) != LZ_ok ||
LZ_decompress_write( decoder, buf, rd ) != rd ||
- ( rd = LZ_decompress_read( decoder, buf, header_size ) ) <
- magic_o + magic_l )
+ ( rd = LZ_decompress_read( decoder, buf, header_size ) ) != header_size )
{ LZ_decompress_close( decoder ); return false; }
LZ_decompress_close( decoder );
const bool maybe_eof = ( buf[0] == 0 );
if( !verify_ustar_chksum( buf ) && !maybe_eof ) return false;
- const long long end = lseek( outfd, 0, SEEK_END );
+ const long long end = lseek( fd, 0, SEEK_END );
if( end < min_member_size ) return false;
Lzip_trailer trailer;
- if( seek_read( outfd, trailer.data, Lzip_trailer::size,
+ if( seek_read( fd, trailer.data, Lzip_trailer::size,
end - Lzip_trailer::size ) != Lzip_trailer::size )
return false;
const long long member_size = trailer.member_size();
@@ -87,9 +131,8 @@ bool check_appendable()
( maybe_eof && member_size != end ) ) return false;
Lzip_header header;
- if( seek_read( outfd, header.data, Lzip_header::size,
- end - member_size ) != Lzip_header::size )
- return false;
+ if( seek_read( fd, header.data, Lzip_header::size,
+ end - member_size ) != Lzip_header::size ) return false;
if( !header.verify_magic() || !isvalid_ds( header.dictionary_size() ) )
return false;
@@ -102,12 +145,33 @@ bool check_appendable()
crc ^= 0xFFFFFFFFU;
if( crc != data_crc ) return false;
- if( lseek( outfd, end - member_size, SEEK_SET ) != end - member_size ||
- ftruncate( outfd, end - member_size ) != 0 ) return false;
- return true;
+ const long long pos = remove_eof ? end - member_size : 0;
+ return ( lseek( fd, pos, SEEK_SET ) == pos );
}
+class File_is_archive
+ {
+ dev_t archive_dev;
+ ino_t archive_ino;
+ bool initialized;
+public:
+ File_is_archive() : initialized( false ) {}
+ bool init()
+ {
+ struct stat st;
+ if( fstat( outfd, &st ) != 0 ) return false;
+ if( S_ISREG( st.st_mode ) )
+ { archive_dev = st.st_dev; archive_ino = st.st_ino; initialized = true; }
+ return true;
+ }
+ bool operator()( const struct stat & st ) const
+ {
+ return initialized && archive_dev == st.st_dev && archive_ino == st.st_ino;
+ }
+ } file_is_archive;
+
+
bool archive_write( const uint8_t * const buf, const int size )
{
if( !encoder ) // uncompressed
@@ -121,9 +185,10 @@ bool archive_write( const uint8_t * const buf, const int size )
const int wr = LZ_compress_write( encoder, buf + sz, size - sz );
if( wr < 0 ) internal_error( "library error (LZ_compress_write)." );
sz += wr;
+ if( sz >= size && size > 0 ) break; // minimize dictionary size
const int rd = LZ_compress_read( encoder, obuf, obuf_size );
if( rd < 0 ) internal_error( "library error (LZ_compress_read)." );
- if( rd == 0 && sz == size ) break;
+ if( rd == 0 && sz >= size ) break;
if( writeblock( outfd, obuf, rd ) != rd ) return false;
}
if( LZ_compress_finished( encoder ) == 1 &&
@@ -133,11 +198,98 @@ bool archive_write( const uint8_t * const buf, const int size )
}
+void init_tar_header( Tar_header header ) // set magic and version
+ {
+ std::memset( header, 0, header_size );
+ std::memcpy( header + magic_o, ustar_magic, magic_l - 1 );
+ header[version_o] = header[version_o+1] = '0';
+ }
+
+
+unsigned char xdigit( const unsigned value )
+ {
+ if( value <= 9 ) return '0' + value;
+ if( value <= 15 ) return 'A' + value - 10;
+ return 0;
+ }
+
+void print_hex( char * const buf, int size, unsigned long long num )
+ {
+ while( --size >= 0 ) { buf[size] = xdigit( num & 0x0F ); num >>= 4; }
+ }
+
void print_octal( char * const buf, int size, unsigned long long num )
{
while( --size >= 0 ) { buf[size] = '0' + ( num % 8 ); num /= 8; }
}
+unsigned decimal_digits( unsigned long long value )
+ {
+ unsigned digits = 1;
+ while( value >= 10 ) { value /= 10; ++digits; }
+ return digits;
+ }
+
+unsigned long long record_size( const unsigned keyword_size,
+ const unsigned long long value_size )
+ {
+ // size = ' ' + keyword + '=' + value + '\n'
+ const unsigned long long size = 1 + keyword_size + 1 + value_size + 1;
+ const unsigned d1 = decimal_digits( size );
+ return decimal_digits( d1 + size ) + size;
+ }
+
+bool write_extended( const Extended & extended )
+ {
+ const int path_rec = extended.path.size() ?
+ record_size( 4, extended.path.size() ) : 0;
+ const int lpath_rec = extended.linkpath.size() ?
+ record_size( 8, extended.linkpath.size() ) : 0;
+ const int size_rec = ( extended.size > 0 ) ?
+ record_size( 4, decimal_digits( extended.size ) ) : 0;
+ const unsigned long long edsize = path_rec + lpath_rec + size_rec + 22;
+ const unsigned long long bufsize = round_up( edsize );
+ if( edsize >= 1ULL << 33 ) return false; // too much extended data
+ if( bufsize == 0 ) return edsize == 0; // overflow or no extended data
+ char * const buf = new char[bufsize+1]; // extended records buffer
+ unsigned long long pos = path_rec; // goto can't cross this
+ if( path_rec && snprintf( buf, path_rec + 1, "%d path=%s\n",
+ path_rec, extended.path.c_str() ) != path_rec )
+ goto error;
+ if( lpath_rec && snprintf( buf + pos, lpath_rec + 1, "%d linkpath=%s\n",
+ lpath_rec, extended.linkpath.c_str() ) != lpath_rec )
+ goto error;
+ pos += lpath_rec;
+ if( size_rec && snprintf( buf + pos, size_rec + 1, "%d size=%llu\n",
+ size_rec, extended.size ) != size_rec )
+ goto error;
+ pos += size_rec;
+ if( snprintf( buf + pos, 23, "22 GNU.crc32=00000000\n" ) != 22 ) goto error;
+ pos += 22;
+ if( pos != edsize ) goto error;
+ print_hex( buf + edsize - 9, 8,
+ crc32c.windowed_crc( (const uint8_t *)buf, edsize - 9, edsize ) );
+ std::memset( buf + edsize, 0, bufsize - edsize ); // wipe padding
+ Tar_header header; // extended header
+ init_tar_header( header );
+ header[typeflag_o] = tf_extended; // fill only required fields
+ print_octal( header + size_o, size_l - 1, edsize );
+ print_octal( header + chksum_o, chksum_l - 1,
+ ustar_chksum( (const uint8_t *)header ) );
+ if( !archive_write( (const uint8_t *)header, header_size ) ) goto error;
+ for( pos = 0; pos < bufsize; ) // write extended records to archive
+ {
+ int size = std::min( bufsize - pos, 1ULL << 20 );
+ if( !archive_write( (const uint8_t *)buf + pos, size ) ) goto error;
+ pos += size;
+ }
+ delete[] buf;
+ return true;
+error:
+ delete[] buf;
+ return false;
+ }
+
const char * remove_leading_dotdot( const char * const filename )
{
@@ -164,24 +316,31 @@ const char * remove_leading_dotdot( const char * const filename )
}
-bool split_name( const char * const filename, Tar_header header )
+// Return true if filename fits in the ustar header.
+bool store_name( const char * const filename, Extended & extended,
+ Tar_header header )
{
const char * const stored_name = remove_leading_dotdot( filename );
const int len = std::strlen( stored_name );
enum { max_len = prefix_l + 1 + name_l }; // prefix + '/' + name
+
+ // first try storing filename in the ustar header
if( len <= name_l ) // stored_name fits in name
{ std::memcpy( header + name_o, stored_name, len ); return true; }
if( len <= max_len ) // find shortest prefix
for( int i = len - name_l - 1; i < len && i <= prefix_l; ++i )
- if( stored_name[i] == '/' )
+ if( stored_name[i] == '/' ) // stored_name can be split
{
std::memcpy( header + name_o, stored_name + i + 1, len - i - 1 );
std::memcpy( header + prefix_o, stored_name, i );
return true;
}
+ // store filename in extended record, leave name zeroed in ustar header
+ extended.path = stored_name;
return false;
}
+
int add_member( const char * const filename, const struct stat *,
const int flag, struct FTW * )
{
@@ -189,11 +348,13 @@ int add_member( const char * const filename, const struct stat *,
if( lstat( filename, &st ) != 0 )
{ show_file_error( filename, "Can't stat input file", errno );
gretval = 1; return 0; }
+ if( file_is_archive( st ) )
+ { show_file_error( archive_namep, "File is the archive; not dumped." );
+ return 0; }
+ Extended extended; // metadata for extended records
Tar_header header;
- std::memset( header, 0, header_size );
- if( !split_name( filename, header ) )
- { show_file_error( filename, "File name is too long." );
- gretval = 2; return 0; }
+ init_tar_header( header );
+ store_name( filename, extended, header );
const mode_t mode = st.st_mode;
print_octal( header + mode_o, mode_l - 1,
@@ -201,10 +362,17 @@ int add_member( const char * const filename, const struct stat *,
S_IRWXU | S_IRWXG | S_IRWXO ) );
const uid_t uid = ( cl_owner >= 0 ) ? (uid_t)cl_owner : st.st_uid;
const gid_t gid = ( cl_group >= 0 ) ? (gid_t)cl_group : st.st_gid;
+ if( uid >= 2 << 20 || gid >= 2 << 20 )
+ { show_file_error( filename, "uid or gid is larger than 2_097_151." );
+ gretval = 1; return 0; }
print_octal( header + uid_o, uid_l - 1, uid );
print_octal( header + gid_o, gid_l - 1, gid );
+ const long long mtime = st.st_mtime; // shut up gcc
+ if( mtime < 0 || mtime >= 1LL << 33 )
+ { show_file_error( filename, "mtime is out of ustar range [0, 8_589_934_591]." );
+ gretval = 1; return 0; }
+ print_octal( header + mtime_o, mtime_l - 1, mtime );
unsigned long long file_size = 0;
- print_octal( header + mtime_o, mtime_l - 1, st.st_mtime );
Typeflag typeflag;
if( S_ISREG( mode ) ) { typeflag = tf_regular; file_size = st.st_size; }
else if( S_ISDIR( mode ) )
@@ -217,16 +385,26 @@ int add_member( const char * const filename, const struct stat *,
else if( S_ISLNK( mode ) )
{
typeflag = tf_symlink;
- if( st.st_size > linkname_l ||
- readlink( filename, header + linkname_o, linkname_l ) != st.st_size )
+ long len;
+ if( st.st_size <= linkname_l )
+ len = readlink( filename, header + linkname_o, linkname_l );
+ else
{
- show_file_error( filename, "Link destination name is too long." );
- gretval = 2; return 0;
+ char * const buf = new char[st.st_size+1];
+ len = readlink( filename, buf, st.st_size );
+ if( len == st.st_size ) { buf[len] = 0; extended.linkpath = buf; }
+ delete[] buf;
}
+ if( len != st.st_size )
+ { show_file_error( filename, "Error reading link", (len < 0) ? errno : 0 );
+ gretval = 1; return 0; }
}
else if( S_ISCHR( mode ) || S_ISBLK( mode ) )
{
typeflag = S_ISCHR( mode ) ? tf_chardev : tf_blockdev;
+ if( major( st.st_dev ) >= 2 << 20 || minor( st.st_dev ) >= 2 << 20 )
+ { show_file_error( filename, "devmajor or devminor is larger than 2_097_151." );
+ gretval = 1; return 0; }
print_octal( header + devmajor_o, devmajor_l - 1, major( st.st_dev ) );
print_octal( header + devminor_o, devminor_l - 1, minor( st.st_dev ) );
}
@@ -234,22 +412,23 @@ int add_member( const char * const filename, const struct stat *,
else { show_file_error( filename, "Unknown file type." );
gretval = 2; return 0; }
header[typeflag_o] = typeflag;
- std::memcpy( header + magic_o, ustar_magic, magic_l - 1 );
- header[version_o] = header[version_o+1] = '0';
const struct passwd * const pw = getpwuid( uid );
if( pw && pw->pw_name )
std::strncpy( header + uname_o, pw->pw_name, uname_l - 1 );
const struct group * const gr = getgrgid( gid );
if( gr && gr->gr_name )
std::strncpy( header + gname_o, gr->gr_name, gname_l - 1 );
- print_octal( header + size_o, size_l - 1, file_size );
+ if( file_size >= 1ULL << 33 ) extended.size = file_size;
+ else print_octal( header + size_o, size_l - 1, file_size );
print_octal( header + chksum_o, chksum_l - 1,
ustar_chksum( (const uint8_t *)header ) );
const int infd = file_size ? open_instream( filename ) : -1;
if( file_size && infd < 0 ) { gretval = 1; return 0; }
+ if( !extended.empty() && !write_extended( extended ) )
+ { show_error( "Error writing extended header", errno ); return 1; }
if( !archive_write( (const uint8_t *)header, header_size ) )
- { show_error( "Error writing archive header", errno ); return 1; }
+ { show_error( "Error writing ustar header", errno ); return 1; }
if( file_size )
{
enum { bufsize = 32 * header_size };
@@ -304,6 +483,49 @@ bool verify_ustar_chksum( const uint8_t * const buf )
ustar_chksum( buf ) == strtoul( (const char *)buf + chksum_o, 0, 8 ) ); }
+int concatenate( const std::string & archive_name, const Arg_parser & parser,
+ const int filenames )
+ {
+ if( !filenames )
+ { if( verbosity >= 1 ) show_error( "Nothing to concatenate." ); return 0; }
+ if( archive_name.empty() )
+ { show_error( "'--concatenate' is incompatible with '-f -'.", 0, true );
+ return 1; }
+ if( ( outfd = open_outstream( archive_name, false ) ) < 0 ) return 1;
+ if( !file_is_archive.init() )
+ { show_file_error( archive_name.c_str(), "Can't stat", errno ); return 1; }
+
+ int retval = 0;
+ for( int i = 0; i < parser.arguments(); ++i ) // copy archives
+ {
+ if( parser.code( i ) ) continue; // skip options
+ const char * const filename = parser.argument( i ).c_str();
+ const int infd = open_instream( filename );
+ if( infd < 0 )
+ { show_file_error( filename, "Can't open input file", errno );
+ retval = 1; break; }
+ if( !check_appendable( infd, false ) )
+ { show_file_error( filename, "Not an appendable tar.lz archive." );
+ close( infd ); retval = 2; break; }
+ struct stat st;
+ if( fstat( infd, &st ) == 0 && file_is_archive( st ) )
+ { show_file_error( filename, "File is the archive; not concatenated." );
+ close( infd ); continue; }
+ if( !check_appendable( outfd, true ) )
+ { show_error( "This does not look like an appendable tar.lz archive." );
+ close( infd ); retval = 2; break; }
+ if( !copy_file( infd, outfd ) || close( infd ) != 0 )
+ { show_file_error( filename, "Error copying archive", errno );
+ retval = 1; break; }
+ if( verbosity >= 1 ) std::fprintf( stderr, "%s\n", filename );
+ }
+
+ if( close( outfd ) != 0 && !retval )
+ { show_error( "Error closing archive", errno ); retval = 1; }
+ return retval;
+ }
+
+
int encode( const std::string & archive_name, const Arg_parser & parser,
const int filenames, const int level, const bool append )
{
@@ -345,11 +567,15 @@ int encode( const std::string & archive_name, const Arg_parser & parser,
{ show_error( "'--append' is incompatible with '--uncompressed'.", 0, true );
return 1; }
if( ( outfd = open_outstream( archive_name, false ) ) < 0 ) return 1;
- if( !check_appendable() )
+ if( !check_appendable( outfd, true ) )
{ show_error( "This does not look like an appendable tar.lz archive." );
return 2; }
}
+ archive_namep = archive_name.size() ? archive_name.c_str() : "(stdout)";
+ if( !file_is_archive.init() )
+ { show_file_error( archive_namep, "Can't stat", errno ); return 1; }
+
if( compressed )
{
encoder = LZ_compress_open( option_mapping[level].dictionary_size,
@@ -365,7 +591,6 @@ int encode( const std::string & archive_name, const Arg_parser & parser,
}
int retval = 0;
- std::string deslashed; // arg without trailing slashes
for( int i = 0; i < parser.arguments(); ++i ) // write members
{
const int code = parser.code( i );
@@ -375,6 +600,7 @@ int encode( const std::string & archive_name, const Arg_parser & parser,
{ show_file_error( filename, "Error changing working directory", errno );
retval = 1; break; }
if( code ) continue; // skip options
+ std::string deslashed; // arg without trailing slashes
unsigned len = arg.size();
while( len > 1 && arg[len-1] == '/' ) --len;
if( len < arg.size() )
@@ -391,16 +617,18 @@ int encode( const std::string & archive_name, const Arg_parser & parser,
if( !retval ) // write End-Of-Archive records
{
- uint8_t buf[header_size];
- std::memset( buf, 0, header_size );
+ enum { bufsize = 2 * header_size };
+ uint8_t buf[bufsize];
+ std::memset( buf, 0, bufsize );
if( encoder && cl_solid == 2 && !archive_write( 0, 0 ) ) // flush encoder
{ show_error( "Error flushing encoder", errno ); retval = 1; }
- else if( !archive_write( buf, header_size ) ||
- !archive_write( buf, header_size ) ||
+ else if( !archive_write( buf, bufsize ) ||
( encoder && !archive_write( 0, 0 ) ) ) // flush encoder
{ show_error( "Error writing end-of-archive blocks", errno );
retval = 1; }
}
+ if( encoder && LZ_compress_close( encoder ) < 0 )
+ { show_error( "LZ_compress_close failed." ); retval = 1; }
if( close( outfd ) != 0 && !retval )
{ show_error( "Error closing archive", errno ); retval = 1; }
if( retval && archive_name.size() && !append )