From 01348f04bc92f307f5f61dd9f9f4c8d7746336f5 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 1 Feb 2019 00:06:40 +0100 Subject: Adding upstream version 0.10. Signed-off-by: Daniel Baumann --- create.cc | 140 ++++++++++++++++++++++++++++++++++---------------------------- 1 file changed, 76 insertions(+), 64 deletions(-) (limited to 'create.cc') diff --git a/create.cc b/create.cc index 7310aee..813923a 100644 --- a/create.cc +++ b/create.cc @@ -38,20 +38,21 @@ #include #include "arg_parser.h" -#include "lzip.h" #include "tarlz.h" -const CRC32C crc32c; +const CRC32 crc32c( true ); int cl_owner = -1; // global vars needed by add_member int cl_group = -1; +int cl_data_size = 0; Solidity solidity = no_solid; namespace { LZ_Encoder * encoder = 0; // local vars needed by add_member const char * archive_namep = 0; +unsigned long long partial_data_size = 0; // current block size int outfd = -1; int gretval = 0; @@ -150,17 +151,18 @@ bool check_appendable( const int fd, const bool remove_eof ) } -class File_is_archive +class File_is_the_archive { dev_t archive_dev; ino_t archive_ino; bool initialized; + public: - File_is_archive() : initialized( false ) {} - bool init() + File_is_the_archive() : initialized( false ) {} + bool init( const int fd ) { struct stat st; - if( fstat( outfd, &st ) != 0 ) return false; + if( fstat( fd, &st ) != 0 ) return false; if( S_ISREG( st.st_mode ) ) { archive_dev = st.st_dev; archive_ino = st.st_ino; initialized = true; } return true; @@ -169,7 +171,7 @@ public: { return initialized && archive_dev == st.st_dev && archive_ino == st.st_ino; } - } file_is_archive; + } file_is_the_archive; bool archive_write( const uint8_t * const buf, const int size ) @@ -223,50 +225,32 @@ void print_octal( uint8_t * const buf, int size, unsigned long long num ) while( --size >= 0 ) { buf[size] = '0' + ( num % 8 ); num /= 8; } } -unsigned decimal_digits( unsigned long long value ) - { - unsigned digits = 1; - while( value >= 10 ) { value /= 10; ++digits; } - return digits; - } - -int record_size( const unsigned keyword_size, const unsigned long value_size ) - { - // size = ' ' + keyword + '=' + value + '\n' - unsigned long long size = 1 + keyword_size + 1 + value_size + 1; - const unsigned d1 = decimal_digits( size ); - size += decimal_digits( d1 + size ); - if( size >= INT_MAX ) size = 0; // overflows snprintf size - return size; - } - bool write_extended( const Extended & extended ) { - const int path_rec = extended.path.size() ? - record_size( 4, extended.path.size() ) : 0; - const int lpath_rec = extended.linkpath.size() ? - record_size( 8, extended.linkpath.size() ) : 0; - const int size_rec = ( extended.size > 0 ) ? - record_size( 4, decimal_digits( extended.size ) ) : 0; - const unsigned long long edsize = path_rec + lpath_rec + size_rec + 22; - const unsigned long long bufsize = round_up( edsize ); + const int path_rec = extended.recsize_path(); + const int lpath_rec = extended.recsize_linkpath(); + const int size_rec = extended.recsize_file_size(); + const unsigned long long edsize = extended.edsize(); + const unsigned long long bufsize = extended.edsize_pad(); if( edsize >= 1ULL << 33 ) return false; // too much extended data if( bufsize == 0 ) return edsize == 0; // overflow or no extended data char * const buf = new char[bufsize+1]; // extended records buffer - unsigned long long pos = path_rec; // goto can't cross this + unsigned long long pos = path_rec; // goto can't cross these + const unsigned crc_size = Extended::crc_record.size(); + if( path_rec && snprintf( buf, path_rec + 1, "%d path=%s\n", - path_rec, extended.path.c_str() ) != path_rec ) + path_rec, extended.path().c_str() ) != path_rec ) goto error; if( lpath_rec && snprintf( buf + pos, lpath_rec + 1, "%d linkpath=%s\n", - lpath_rec, extended.linkpath.c_str() ) != lpath_rec ) + lpath_rec, extended.linkpath().c_str() ) != lpath_rec ) goto error; pos += lpath_rec; if( size_rec && snprintf( buf + pos, size_rec + 1, "%d size=%llu\n", - size_rec, extended.size ) != size_rec ) + size_rec, extended.file_size() ) != size_rec ) goto error; pos += size_rec; - if( snprintf( buf + pos, 23, "22 GNU.crc32=00000000\n" ) != 22 ) goto error; - pos += 22; + std::memcpy( buf + pos, Extended::crc_record.c_str(), crc_size ); + pos += crc_size; if( pos != edsize ) goto error; print_hex( buf + edsize - 9, 8, crc32c.windowed_crc( (const uint8_t *)buf, edsize - 9, edsize ) ); @@ -316,27 +300,29 @@ const char * remove_leading_dotdot( const char * const filename ) } -// Return true if filename fits in the ustar header. +// Return true if it stores filename in the ustar header. bool store_name( const char * const filename, Extended & extended, - Tar_header header ) + Tar_header header, const bool force_extended_name ) { const char * const stored_name = remove_leading_dotdot( filename ); - const int len = std::strlen( stored_name ); - enum { max_len = prefix_l + 1 + name_l }; // prefix + '/' + name - - // first try storing filename in the ustar header - if( len <= name_l ) // stored_name fits in name - { std::memcpy( header + name_o, stored_name, len ); return true; } - if( len <= max_len ) // find shortest prefix - for( int i = len - name_l - 1; i < len && i <= prefix_l; ++i ) - if( stored_name[i] == '/' ) // stored_name can be split - { - std::memcpy( header + name_o, stored_name + i + 1, len - i - 1 ); - std::memcpy( header + prefix_o, stored_name, i ); - return true; - } + + if( !force_extended_name ) // try storing filename in the ustar header + { + const int len = std::strlen( stored_name ); + enum { max_len = prefix_l + 1 + name_l }; // prefix + '/' + name + if( len <= name_l ) // stored_name fits in name + { std::memcpy( header + name_o, stored_name, len ); return true; } + if( len <= max_len ) // find shortest prefix + for( int i = len - name_l - 1; i < len && i <= prefix_l; ++i ) + if( stored_name[i] == '/' ) // stored_name can be split + { + std::memcpy( header + name_o, stored_name + i + 1, len - i - 1 ); + std::memcpy( header + prefix_o, stored_name, i ); + return true; + } + } // store filename in extended record, leave name zeroed in ustar header - extended.path = stored_name; + extended.path( stored_name ); return false; } @@ -348,13 +334,13 @@ int add_member( const char * const filename, const struct stat *, if( lstat( filename, &st ) != 0 ) { show_file_error( filename, "Can't stat input file", errno ); gretval = 1; return 0; } - if( file_is_archive( st ) ) + if( file_is_the_archive( st ) ) { show_file_error( archive_namep, "File is the archive; not dumped." ); return 0; } Extended extended; // metadata for extended records Tar_header header; init_tar_header( header ); - store_name( filename, extended, header ); + bool force_extended_name = false; const mode_t mode = st.st_mode; print_octal( header + mode_o, mode_l - 1, @@ -392,7 +378,8 @@ int add_member( const char * const filename, const struct stat *, { char * const buf = new char[st.st_size+1]; len = readlink( filename, buf, st.st_size ); - if( len == st.st_size ) { buf[len] = 0; extended.linkpath = buf; } + if( len == st.st_size ) + { buf[len] = 0; extended.linkpath( buf ); force_extended_name = true; } delete[] buf; } if( len != st.st_size ) @@ -418,12 +405,30 @@ int add_member( const char * const filename, const struct stat *, const struct group * const gr = getgrgid( gid ); if( gr && gr->gr_name ) std::strncpy( (char *)header + gname_o, gr->gr_name, gname_l - 1 ); - if( file_size >= 1ULL << 33 ) extended.size = file_size; + if( file_size >= 1ULL << 33 ) + { extended.file_size( file_size ); force_extended_name = true; } else print_octal( header + size_o, size_l - 1, file_size ); + store_name( filename, extended, header, force_extended_name ); print_octal( header + chksum_o, chksum_l - 1, ustar_chksum( header ) ); const int infd = file_size ? open_instream( filename ) : -1; if( file_size && infd < 0 ) { gretval = 1; return 0; } + if( encoder && solidity == bsolid ) + { + const unsigned long long member_size = + header_size + extended.full_size() + round_up( file_size ); + const unsigned long long target_size = cl_data_size; + if( partial_data_size >= target_size || + ( partial_data_size >= min_data_size && + partial_data_size + member_size / 2 > target_size ) ) + { + partial_data_size = member_size; + if( !archive_write( 0, 0 ) ) + { show_error( "Error flushing encoder", errno ); return 1; } + } + else partial_data_size += member_size; + } + if( !extended.empty() && !write_extended( extended ) ) { show_error( "Error writing extended header", errno ); return 1; } if( !archive_write( header, header_size ) ) @@ -491,7 +496,7 @@ int concatenate( const std::string & archive_name, const Arg_parser & parser, { show_error( "'--concatenate' is incompatible with '-f -'.", 0, true ); return 1; } if( ( outfd = open_outstream( archive_name, false ) ) < 0 ) return 1; - if( !file_is_archive.init() ) + if( !file_is_the_archive.init( outfd ) ) { show_file_error( archive_name.c_str(), "Can't stat", errno ); return 1; } int retval = 0; @@ -507,7 +512,7 @@ int concatenate( const std::string & archive_name, const Arg_parser & parser, { show_file_error( filename, "Not an appendable tar.lz archive." ); close( infd ); retval = 2; break; } struct stat st; - if( fstat( infd, &st ) == 0 && file_is_archive( st ) ) + if( fstat( infd, &st ) == 0 && file_is_the_archive( st ) ) { show_file_error( filename, "File is the archive; not concatenated." ); close( infd ); continue; } if( !check_appendable( outfd, true ) ) @@ -572,12 +577,18 @@ int encode( const std::string & archive_name, const Arg_parser & parser, } archive_namep = archive_name.size() ? archive_name.c_str() : "(stdout)"; - if( !file_is_archive.init() ) + if( !file_is_the_archive.init( outfd ) ) { show_file_error( archive_namep, "Can't stat", errno ); return 1; } if( compressed ) { - encoder = LZ_compress_open( option_mapping[level].dictionary_size, + const int dictionary_size = option_mapping[level].dictionary_size; + if( cl_data_size <= 0 ) + { + if( level == 0 ) cl_data_size = 1 << 20; + else cl_data_size = 2 * dictionary_size; + } + encoder = LZ_compress_open( dictionary_size, option_mapping[level].match_len_limit, LLONG_MAX ); if( !encoder || LZ_compress_errno( encoder ) != LZ_ok ) { @@ -619,7 +630,8 @@ int encode( const std::string & archive_name, const Arg_parser & parser, enum { bufsize = 2 * header_size }; uint8_t buf[bufsize]; std::memset( buf, 0, bufsize ); - if( encoder && solidity == asolid && !archive_write( 0, 0 ) ) + if( encoder && ( solidity == asolid || solidity == bsolid ) && + !archive_write( 0, 0 ) ) { show_error( "Error flushing encoder", errno ); retval = 1; } else if( !archive_write( buf, bufsize ) || ( encoder && !archive_write( 0, 0 ) ) ) // flush encoder -- cgit v1.2.3