From cf054c2e80b8f5b9c6207c17a105e3f4c0bf1bcd Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 27 Feb 2019 20:14:39 +0100 Subject: Merging upstream version 0.13. Signed-off-by: Daniel Baumann --- ChangeLog | 7 +++ Makefile.in | 2 + NEWS | 28 +++------ configure | 2 +- create.cc | 55 +++++++++------- create_lz.cc | 44 ++++++------- doc/tarlz.1 | 2 +- doc/tarlz.info | 2 +- doc/tarlz.texi | 4 +- extended.cc | 78 +++++++++++++++++------ extract.cc | 72 ++++++--------------- list_lz.cc | 150 +++++++++++++++++++++----------------------- tarlz.h | 20 +++++- testsuite/check.sh | 39 +++++++++++- testsuite/test3_em1.tar.lz | Bin 0 -> 392 bytes testsuite/test3_em2.tar.lz | Bin 0 -> 392 bytes testsuite/test3_em3.tar.lz | Bin 0 -> 392 bytes testsuite/test3_em4.tar.lz | Bin 0 -> 392 bytes testsuite/test3_em5.tar.lz | Bin 0 -> 392 bytes testsuite/test3_em6.tar.lz | Bin 0 -> 500 bytes testsuite/ts_in_link.tar.lz | Bin 0 -> 509 bytes 21 files changed, 280 insertions(+), 225 deletions(-) create mode 100644 testsuite/test3_em1.tar.lz create mode 100644 testsuite/test3_em2.tar.lz create mode 100644 testsuite/test3_em3.tar.lz create mode 100644 testsuite/test3_em4.tar.lz create mode 100644 testsuite/test3_em5.tar.lz create mode 100644 testsuite/test3_em6.tar.lz create mode 100644 testsuite/ts_in_link.tar.lz diff --git a/ChangeLog b/ChangeLog index 5c4cb09..6c6884a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +2019-02-27 Antonio Diaz Diaz + + * Version 0.13 released. + * create_lz.cc (cworker): Fix skipping of unreadable files. + * list_lz.cc: Fix listing of archives containing empty lzip members. + * create.cc (fill_headers): Store negative mtime as cero. + 2019-02-22 Antonio Diaz Diaz * Version 0.12 released. diff --git a/Makefile.in b/Makefile.in index a5ce4cf..cf5519d 100644 --- a/Makefile.in +++ b/Makefile.in @@ -136,10 +136,12 @@ dist : doc $(DISTNAME)/testsuite/test_bad[12].txt.tar.lz \ $(DISTNAME)/testsuite/test3.tar.lz \ $(DISTNAME)/testsuite/test3_eof[123].tar.lz \ + $(DISTNAME)/testsuite/test3_em[1-6].tar.lz \ $(DISTNAME)/testsuite/tlz_in_tar[12].tar \ $(DISTNAME)/testsuite/tar_in_tlz[12].tar.lz \ $(DISTNAME)/testsuite/test3_dir.tar.lz \ $(DISTNAME)/testsuite/test3_dot.tar.lz \ + $(DISTNAME)/testsuite/ts_in_link.tar.lz \ $(DISTNAME)/testsuite/t155.tar.lz \ $(DISTNAME)/testsuite/test3_bad[1-6].tar.lz \ $(DISTNAME)/testsuite/dotdot[1-5].tar.lz \ diff --git a/NEWS b/NEWS index 9a6a14d..07f4272 100644 --- a/NEWS +++ b/NEWS @@ -1,23 +1,11 @@ -Changes in version 0.12: +Changes in version 0.13: -When dumping a character special file or a block special file, the devmajor -and devminor fields were incorrectly filled with the values corresponding to -the device containing the special file instead of the values corresponding -to the special file itself. +Skipping of unreadable files during multi-threaded archive creation with +per-file compression has been fixed. Tarlz did produce empty lzip members, +and sometines left the last files out of the archive. -If when creating an archive tarlz can't find a user or group name in the -database, it now saves just the numerical uid/gid instead of exiting with -error status. +Multi-threaded listing of tar.lz archives containing empty lzip members has +been fixed. It listed members out of order and sometimes hung. -When listing verbosely a character special file or a block special file, the -devmajor and devminor values are now shown. - -The new option '-d, --diff', which reports differences between archive and -file system, has been added. - -The new option '--ignore-ids', which tells '-d, --diff' to ignore -differences in owner and group IDs, has been added. This option is useful -when comparing an --anonymous archive. - -Listing of large seekable uncompressed archives is now much faster because -tarlz now skips over member data instead of reading it. +When creating an archive, negative modification times are now stored as cero +(1970-01-01 00:00:00 UTC). Negative times are not portable. diff --git a/configure b/configure index ed888bf..2d701da 100755 --- a/configure +++ b/configure @@ -6,7 +6,7 @@ # to copy, distribute and modify it. pkgname=tarlz -pkgversion=0.12 +pkgversion=0.13 progname=tarlz srctrigger=doc/${pkgname}.texi diff --git a/create.cc b/create.cc index 0eaa183..cba638a 100644 --- a/create.cc +++ b/create.cc @@ -92,15 +92,6 @@ bool option_C_after_relative_filename( const Arg_parser & parser ) } -bool writeblock_wrapper( const int outfd, const uint8_t * const buffer, - const int size ) - { - if( writeblock( outfd, buffer, size ) != size ) - { show_file_error( archive_namep, "Write error", errno ); return false; } - return true; - } - - // infd and outfd can refer to the same file if copying to a lower file // position or if source and destination blocks don't overlap. // max_size < 0 means no size limit. @@ -310,6 +301,15 @@ int add_member( const char * const filename, const struct stat *, } // end namespace +bool writeblock_wrapper( const int outfd, const uint8_t * const buffer, + const int size ) + { + if( writeblock( outfd, buffer, size ) != size ) + { show_file_error( archive_namep, "Write error", errno ); return false; } + return true; + } + + /* Removes any amount of leading "./" and '/' strings from filename. Optionally also removes prefixes containing a ".." component. */ const char * remove_leading_dotslash( const char * const filename, @@ -322,10 +322,7 @@ const char * remove_leading_dotslash( const char * const filename, if( dotdot ) for( int i = 0; filename[i]; ++i ) - if( filename[i] == '.' && filename[i+1] == '.' && - ( i == 0 || filename[i-1] == '/' ) && - ( filename[i+2] == 0 || filename[i+2] == '/' ) ) - p = filename + i + 2; + if( dotdot_at_i( filename, i ) ) p = filename + i + 2; while( *p == '/' || ( *p == '.' && p[1] == '/' ) ) ++p; if( p != filename ) { @@ -371,8 +368,8 @@ bool fill_headers( const char * const filename, Extended & extended, set_error_status( 1 ); return false; } print_octal( header + uid_o, uid_l - 1, uid ); print_octal( header + gid_o, gid_l - 1, gid ); - const long long mtime = st.st_mtime; // shut up gcc about time_t - if( mtime < 0 || mtime >= 1LL << 33 ) + const unsigned long long mtime = (st.st_mtime >= 0) ? st.st_mtime : 0; + if( mtime >= 1ULL << 33 ) { show_file_error( filename, "mtime is out of ustar range [0, 8_589_934_591]." ); set_error_status( 1 ); return false; } print_octal( header + mtime_o, mtime_l - 1, mtime ); @@ -388,25 +385,35 @@ bool fill_headers( const char * const filename, Extended & extended, else if( S_ISLNK( mode ) ) { typeflag = tf_symlink; - long len; + long len, sz; if( st.st_size <= linkname_l ) - len = readlink( filename, (char *)header + linkname_o, linkname_l ); + { + len = sz = readlink( filename, (char *)header + linkname_o, linkname_l ); + while( len > 1 && header[linkname_o+len-1] == '/' ) // trailing '/' + { --len; header[linkname_o+len] = 0; } + } else { char * const buf = new char[st.st_size+1]; - len = readlink( filename, buf, st.st_size ); - if( len == st.st_size ) - { buf[len] = 0; extended.linkpath( buf ); force_extended_name = true; } + len = sz = readlink( filename, buf, st.st_size ); + if( sz == st.st_size ) + { + while( len > 1 && buf[len-1] == '/' ) --len; // trailing '/' + if( len <= linkname_l ) std::memcpy( header + linkname_o, buf, len ); + else { buf[len] = 0; extended.linkpath( buf ); + force_extended_name = true; } + } delete[] buf; } - if( len != st.st_size ) - { show_file_error( filename, "Error reading link", (len < 0) ? errno : 0 ); + if( sz != st.st_size ) + { show_file_error( filename, "Error reading link", (sz < 0) ? errno : 0 ); set_error_status( 1 ); return false; } } else if( S_ISCHR( mode ) || S_ISBLK( mode ) ) { typeflag = S_ISCHR( mode ) ? tf_chardev : tf_blockdev; - if( major( st.st_rdev ) >= 2 << 20 || minor( st.st_rdev ) >= 2 << 20 ) + if( (unsigned)major( st.st_rdev ) >= 2 << 20 || + (unsigned)minor( st.st_rdev ) >= 2 << 20 ) { show_file_error( filename, "devmajor or devminor is larger than 2_097_151." ); set_error_status( 1 ); return false; } print_octal( header + devmajor_o, devmajor_l - 1, major( st.st_rdev ) ); @@ -593,7 +600,7 @@ int encode( const std::string & archive_name, const Arg_parser & parser, !option_C_after_relative_filename( parser ) ) { // show_file_error( archive_namep, "Multi-threaded --create" ); - return encode_lz( archive_namep, parser, dictionary_size, + return encode_lz( parser, dictionary_size, option_mapping[level].match_len_limit, num_workers, goutfd, debug_level ); } diff --git a/create_lz.cc b/create_lz.cc index 797427b..e2aaf3d 100644 --- a/create_lz.cc +++ b/create_lz.cc @@ -368,7 +368,11 @@ void loop_encode( const uint8_t * const ibuf, const int isize, courier.collect_packet( new Opacket( obuf, opos ), worker_id ); opos = 0; obuf = new( std::nothrow ) uint8_t[max_packet_size]; if( !obuf ) { show_error( mem_msg2 ); cleanup_and_fail(); } - if( LZ_compress_finished( encoder ) == 1 ) break; + if( LZ_compress_finished( encoder ) == 1 ) + { + if( LZ_compress_restart_member( encoder, LLONG_MAX ) >= 0 ) break; + show_error( "LZ_compress_restart_member failed." ); cleanup_and_fail(); + } } } if( ipos > isize ) internal_error( "ipacket size exceeded in worker." ); @@ -401,27 +405,27 @@ extern "C" void * cworker( void * arg ) if( !rbuf.size() ) { show_error( mem_msg2 ); cleanup_and_fail(); } int opos = 0; + bool flushed = true; // avoid producing empty lzip members while( true ) { const Ipacket * const ipacket = courier.distribute_packet( worker_id ); if( !ipacket ) break; // no more packets to process - if( ipacket->filename.empty() ) // end of group, flush encoder + if( ipacket->filename.empty() ) // end of group { - if( !encoder ) { delete ipacket; continue; } // nothing to flush - loop_encode( 0, 0, data, opos, courier, encoder, worker_id, true ); + if( !flushed ) // this lzip member is not empty + loop_encode( 0, 0, data, opos, courier, encoder, worker_id, true ); courier.collect_packet( new Opacket, worker_id ); // end of member token - if( LZ_compress_restart_member( encoder, LLONG_MAX ) < 0 ) - { show_error( "LZ_compress_restart_member failed." ); cleanup_and_fail(); } - delete ipacket; continue; + flushed = true; delete ipacket; continue; } const int infd = ipacket->file_size ? open_instream( ipacket->filename.c_str() ) : -1; - if( ipacket->file_size && infd < 0 ) + if( ipacket->file_size && infd < 0 ) // can't read file data { delete[] ipacket->header; delete ipacket->extended; delete ipacket; - set_error_status( 1 ); continue; } + set_error_status( 1 ); continue; } // skip file - if( !encoder ) + flushed = false; + if( !encoder ) // init encoder just before using it { data = new( std::nothrow ) uint8_t[max_packet_size]; encoder = LZ_compress_open( dictionary_size, match_len_limit, LLONG_MAX ); @@ -494,17 +498,15 @@ extern "C" void * cworker( void * arg ) /* Get from courier the processed and sorted packets, and write their contents to the output archive. */ -void muxer( Packet_courier & courier, const char * const archive_name, - const int outfd ) +void muxer( Packet_courier & courier, const int outfd ) { while( true ) { const Opacket * const opacket = courier.deliver_packet(); if( !opacket ) break; // queue is empty. all workers exited - if( writeblock( outfd, opacket->data, opacket->size ) != opacket->size ) - { show_file_error( archive_name, "Write error", errno ); - cleanup_and_fail(); } + if( !writeblock_wrapper( outfd, opacket->data, opacket->size ) ) + cleanup_and_fail(); delete[] opacket->data; delete opacket; } @@ -514,9 +516,9 @@ void muxer( Packet_courier & courier, const char * const archive_name, // init the courier, then start the grouper and the workers and call the muxer -int encode_lz( const char * const archive_name, const Arg_parser & parser, - const int dictionary_size, const int match_len_limit, - const int num_workers, const int outfd, const int debug_level ) +int encode_lz( const Arg_parser & parser, const int dictionary_size, + const int match_len_limit, const int num_workers, + const int outfd, const int debug_level ) { const int in_slots = 65536; // max small files (<=512B) in 64 MiB const int total_in_slots = ( INT_MAX / num_workers >= in_slots ) ? @@ -552,7 +554,7 @@ int encode_lz( const char * const archive_name, const Arg_parser & parser, { show_error( "Can't create worker threads", errcode ); cleanup_and_fail(); } } - muxer( courier, archive_name, outfd ); + muxer( courier, outfd ); for( int i = num_workers - 1; i >= 0; --i ) { @@ -575,9 +577,7 @@ int encode_lz( const char * const archive_name, const Arg_parser & parser, 0xA3, 0xB7, 0x80, 0x0C, 0x82, 0xDB, 0xFF, 0xFF, 0x9F, 0xF0, 0x00, 0x00, 0x2E, 0xAF, 0xB5, 0xEF, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; - if( writeblock( outfd, eof_member, eof_member_size ) != eof_member_size ) - { show_error( "Error writing end-of-archive blocks", errno ); - retval = 1; } + if( !writeblock_wrapper( outfd, eof_member, eof_member_size ) ) retval = 1; if( close( outfd ) != 0 && !retval ) { show_error( "Error closing archive", errno ); retval = 1; } diff --git a/doc/tarlz.1 b/doc/tarlz.1 index ff418c5..47be9a8 100644 --- a/doc/tarlz.1 +++ b/doc/tarlz.1 @@ -1,5 +1,5 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.46.1. -.TH TARLZ "1" "February 2019" "tarlz 0.12" "User Commands" +.TH TARLZ "1" "February 2019" "tarlz 0.13" "User Commands" .SH NAME tarlz \- creates tar archives with multimember lzip compression .SH SYNOPSIS diff --git a/doc/tarlz.info b/doc/tarlz.info index 349e420..fa8666c 100644 --- a/doc/tarlz.info +++ b/doc/tarlz.info @@ -11,7 +11,7 @@ File: tarlz.info, Node: Top, Next: Introduction, Up: (dir) Tarlz Manual ************ -This manual is for Tarlz (version 0.12, 22 February 2019). +This manual is for Tarlz (version 0.13, 27 February 2019). * Menu: diff --git a/doc/tarlz.texi b/doc/tarlz.texi index 56c4dab..47f01a2 100644 --- a/doc/tarlz.texi +++ b/doc/tarlz.texi @@ -6,8 +6,8 @@ @finalout @c %**end of header -@set UPDATED 22 February 2019 -@set VERSION 0.12 +@set UPDATED 27 February 2019 +@set VERSION 0.13 @dircategory Data Compression @direntry diff --git a/extended.cc b/extended.cc index 5440de7..5931be2 100644 --- a/extended.cc +++ b/extended.cc @@ -86,23 +86,6 @@ uint32_t parse_record_crc( const char * const ptr ) return crc; } -} // end namespace - - -const std::string Extended::crc_record( "22 GNU.crc32=00000000\n" ); - -void Extended::calculate_sizes() const - { - linkpath_recsize_ = linkpath_.size() ? record_size( 8, linkpath_.size() ) : 0; - path_recsize_ = path_.size() ? record_size( 4, path_.size() ) : 0; - file_size_recsize_ = - ( file_size_ > 0 ) ? record_size( 4, decimal_digits( file_size_ ) ) : 0; - edsize_ = linkpath_recsize_ + path_recsize_ + file_size_recsize_ + - crc_record.size(); - padded_edsize_ = round_up( edsize_ ); - full_size_ = header_size + padded_edsize_; - } - unsigned char xdigit( const unsigned value ) { @@ -144,6 +127,23 @@ bool print_record( char * const buf, const int size, return pos == size; } +} // end namespace + + +const std::string Extended::crc_record( "22 GNU.crc32=00000000\n" ); + +void Extended::calculate_sizes() const + { + linkpath_recsize_ = linkpath_.size() ? record_size( 8, linkpath_.size() ) : 0; + path_recsize_ = path_.size() ? record_size( 4, path_.size() ) : 0; + file_size_recsize_ = + ( file_size_ > 0 ) ? record_size( 4, decimal_digits( file_size_ ) ) : 0; + edsize_ = linkpath_recsize_ + path_recsize_ + file_size_recsize_ + + crc_record.size(); + padded_edsize_ = round_up( edsize_ ); + full_size_ = header_size + padded_edsize_; + } + // Returns the extended block size, or -1 if error. long long Extended::format_block( Resizable_buffer & rbuf ) const @@ -206,8 +206,12 @@ bool Extended::parse( const char * const buf, const unsigned long long edsize, path_.assign( remove_leading_dotslash( path_.c_str() ) ); } else if( rest > 9 && std::memcmp( tail, "linkpath=", 9 ) == 0 ) - { if( linkpath_.size() && !permissive ) return false; - linkpath_.assign( tail + 9, rest - 9 ); } + { + if( linkpath_.size() && !permissive ) return false; + unsigned long long len = rest - 9; + while( len > 1 && tail[9+len-1] == '/' ) --len; // trailing '/' + linkpath_.assign( tail + 9, len ); + } else if( rest > 5 && std::memcmp( tail, "size=", 5 ) == 0 ) { if( file_size_ != 0 && !permissive ) return false; @@ -235,3 +239,39 @@ bool Extended::parse( const char * const buf, const unsigned long long edsize, } return true; } + + +// if needed, copy linkpath, path and file_size from ustar header +void Extended::fill_from_ustar( const Tar_header header ) + { + if( linkpath_.empty() ) // copy linkpath from ustar header + { + int len = 0; + while( len < linkname_l && header[linkname_o+len] ) ++len; + while( len > 1 && header[linkname_o+len-1] == '/' ) --len; // trailing '/' + if( len > 0 ) + { + linkpath_.assign( (const char *)header + linkname_o, len ); + full_size_ = -1; + } + } + + if( path_.empty() ) // copy path from ustar header + { + char stored_name[prefix_l+1+name_l+1]; + int len = 0; + while( len < prefix_l && header[prefix_o+len] ) + { stored_name[len] = header[prefix_o+len]; ++len; } + if( len && header[name_o] ) stored_name[len++] = '/'; + for( int i = 0; i < name_l && header[name_o+i]; ++i ) + { stored_name[len] = header[name_o+i]; ++len; } + while( len > 0 && stored_name[len-1] == '/' ) --len; // trailing '/' + stored_name[len] = 0; + path( remove_leading_dotslash( stored_name ) ); + } + + const Typeflag typeflag = (Typeflag)header[typeflag_o]; + if( file_size_ == 0 && // copy file_size from ustar header + ( typeflag == tf_regular || typeflag == tf_hiperf ) ) + file_size( parse_octal( header + size_o, size_l ) ); + } diff --git a/extract.cc b/extract.cc index 2307060..04d974a 100644 --- a/extract.cc +++ b/extract.cc @@ -373,15 +373,20 @@ int compare_member( const int infd1, const Extended & extended, } if( typeflag != tf_symlink ) { - const time_t mtime = parse_octal( header + mtime_o, mtime_l ); // 33 bits - if( mtime != st.st_mtime ) - { show_file_diff( filename, "Mod time differs" ); diff = true; } + if( typeflag != tf_directory ) + { + const time_t mtime = parse_octal( header + mtime_o, mtime_l ); // 33 bits + if( mtime != st.st_mtime ) + { show_file_diff( filename, "Mod time differs" ); diff = true; } + } if( ( typeflag == tf_regular || typeflag == tf_hiperf ) && (off_t)rest != st.st_size ) // don't compare contents { show_file_diff( filename, "Size differs" ); size_differs = true; } if( ( typeflag == tf_chardev || typeflag == tf_blockdev ) && - ( parse_octal( header + devmajor_o, devmajor_l ) != major( st.st_rdev ) || - parse_octal( header + devminor_o, devminor_l ) != minor( st.st_rdev ) ) ) + ( parse_octal( header + devmajor_o, devmajor_l ) != + (unsigned)major( st.st_rdev ) || + parse_octal( header + devminor_o, devminor_l ) != + (unsigned)minor( st.st_rdev ) ) ) { show_file_diff( filename, "Device number differs" ); diff = true; } } else @@ -389,7 +394,12 @@ int compare_member( const int infd1, const Extended & extended, char * const buf = new char[st.st_size+1]; long len = readlink( filename, buf, st.st_size ); bool e = ( len != st.st_size ); - if( !e ) { buf[len] = 0; if( extended.linkpath() != buf ) e = true; } + if( !e ) + { + while( len > 1 && buf[len-1] == '/' ) --len; // trailing '/' + buf[len] = 0; + if( extended.linkpath() != buf ) e = true; + } delete[] buf; if( e ) { show_file_diff( filename, "Symlink differs" ); diff = true; } } @@ -451,9 +461,7 @@ int list_member( const int infd, const Extended & extended, bool contains_dotdot( const char * const filename ) { for( int i = 0; filename[i]; ++i ) - if( filename[i] == '.' && filename[i+1] == '.' && - ( i == 0 || filename[i-1] == '/' ) && - ( filename[i+2] == 0 || filename[i+2] == '/' ) ) return true; + if( dotdot_at_i( filename, i ) ) return true; return false; } @@ -763,50 +771,10 @@ int decode( const std::string & archive_name, const Arg_parser & parser, } prev_extended = false; - if( extended.linkpath().empty() ) // copy linkpath from ustar header - { - int len = 0; - while( len < linkname_l && header[linkname_o+len] ) ++len; - while( len > 1 && header[linkname_o+len-1] == '/' ) --len; // trailing '/' - if( len > 0 ) - { - const uint8_t c = header[linkname_o+len]; header[linkname_o+len] = 0; - extended.linkpath( (const char *)header + linkname_o ); - header[linkname_o+len] = c; - } - } - - if( extended.path().empty() ) // copy path from ustar header - { - char stored_name[prefix_l+1+name_l+1]; - int len = 0; - while( len < prefix_l && header[prefix_o+len] ) - { stored_name[len] = header[prefix_o+len]; ++len; } - if( len && header[name_o] ) stored_name[len++] = '/'; - for( int i = 0; i < name_l && header[name_o+i]; ++i ) - { stored_name[len] = header[name_o+i]; ++len; } - while( len > 0 && stored_name[len-1] == '/' ) --len; // trailing '/' - stored_name[len] = 0; - extended.path( remove_leading_dotslash( stored_name ) ); - } - const char * const filename = extended.path().c_str(); - - bool skip = filenames > 0; - if( skip ) - for( int i = 0; i < parser.arguments(); ++i ) - if( !parser.code( i ) && parser.argument( i ).size() ) - { - const char * const name = - remove_leading_dotslash( parser.argument( i ).c_str() ); - if( compare_prefix_dir( name, filename ) || - compare_tslash( name, filename ) ) - { skip = false; name_pending[i] = false; break; } - } - - if( extended.file_size() == 0 && - ( typeflag == tf_regular || typeflag == tf_hiperf ) ) - extended.file_size( parse_octal( header + size_o, size_l ) ); + extended.fill_from_ustar( header ); // copy metadata from header + const bool skip = check_skip_filename( parser, name_pending, + extended.path().c_str(), filenames ); if( skip ) retval = skip_member( infd, extended ); else if( program_mode == m_list ) diff --git a/list_lz.cc b/list_lz.cc index 8e5f5fc..6e15db5 100644 --- a/list_lz.cc +++ b/list_lz.cc @@ -145,13 +145,32 @@ void xbroadcast( pthread_cond_t * const cond ) } +bool check_skip_filename( const Arg_parser & parser, + std::vector< char > & name_pending, + const char * const filename, const int filenames ) + { + bool skip = filenames > 0; + if( skip ) + for( int i = 0; i < parser.arguments(); ++i ) + if( !parser.code( i ) && parser.argument( i ).size() ) + { + const char * const name = + remove_leading_dotslash( parser.argument( i ).c_str() ); + if( compare_prefix_dir( name, filename ) || + compare_tslash( name, filename ) ) + { skip = false; name_pending[i] = false; break; } + } + return skip; + } + + namespace { struct Packet // member name and metadata or error message { enum Status { ok, member_done, error }; long member_id; // lzip member containing the header of this tar member - std::string line; // member name and metadata ready to print + std::string line; // member name and metadata ready to print, if any Status status; Packet( const long i, const char * const msg, const Status s = ok ) : member_id( i ), line( msg ), status( s ) {} @@ -167,7 +186,7 @@ private: long error_member_id; // first lzip member with error/misalign/eof int deliver_worker_id; // worker queue currently delivering packets int master_worker_id; // worker in charge if error/misalignment/eof - std::vector< std::queue< Packet * > > opacket_queues; + std::vector< std::queue< const Packet * > > opacket_queues; int num_working; // number of workers still running const int num_workers; // number of workers const unsigned out_slots; // max output packets per queue @@ -233,8 +252,9 @@ public: xunlock( &omutex ); } - // collect a packet from a worker - bool collect_packet( Packet * const opacket, const int worker_id ) + /* Collect a packet from a worker. + If a packet is rejected, the worker must terminate. */ + bool collect_packet( const Packet * const opacket, const int worker_id ) { xlock( &omutex ); if( ( mastership_granted() && master_worker_id != worker_id ) || @@ -249,27 +269,31 @@ public: } /* Deliver a packet to muxer. - If packet.status == Packet::member_done, move to next queue. */ - Packet * deliver_packet() + If packet.status == Packet::member_done, move to next queue. + If packet.line.empty(), wait again (empty lzip member). */ + const Packet * deliver_packet() { - Packet * opacket = 0; + const Packet * opacket = 0; xlock( &omutex ); ++ocheck_counter; - while( opacket_queues[deliver_worker_id].empty() && num_working > 0 ) - { - ++owait_counter; - if( !mastership_granted() && error_member_id >= 0 ) - xbroadcast( &check_master ); // mastership requested not yet granted - xwait( &oav_or_exit, &omutex ); - } - if( !opacket_queues[deliver_worker_id].empty() ) + while( true ) { + while( opacket_queues[deliver_worker_id].empty() && num_working > 0 ) + { + ++owait_counter; + if( !mastership_granted() && error_member_id >= 0 ) + xbroadcast( &check_master ); // mastership requested not yet granted + xwait( &oav_or_exit, &omutex ); + } + if( opacket_queues[deliver_worker_id].empty() ) break; opacket = opacket_queues[deliver_worker_id].front(); opacket_queues[deliver_worker_id].pop(); if( opacket_queues[deliver_worker_id].size() + 1 == out_slots ) xsignal( &slot_av[deliver_worker_id] ); if( opacket->status == Packet::member_done && !mastership_granted() ) { if( ++deliver_worker_id >= num_workers ) deliver_worker_id = 0; } + if( !opacket->line.empty() ) break; + delete opacket; opacket = 0; } xunlock( &omutex ); return opacket; @@ -349,14 +373,15 @@ int list_member_lz( LZ_Decoder * const decoder, const int infd, if( data_rest < 0 ) // tar member exceeds lzip member end { if( courier.request_mastership( member_id, worker_id ) ) master = true; - else return 2; + else { *msg = "tar member exceeds lzip member end"; return 2; } } if( verbosity < 0 || skip ) rbuf()[0] = 0; else format_member_name( extended, header, rbuf, verbosity > 0 ); - Packet * const opacket = new Packet( member_id, rbuf(), - data_rest ? Packet::ok : Packet::member_done ); - courier.collect_packet( opacket, worker_id ); + const Packet * const opacket = new Packet( member_id, rbuf(), + data_rest ? Packet::ok : Packet::member_done ); + if( !courier.collect_packet( opacket, worker_id ) ) + { *msg = "other worker found an error"; return 1; } if( !data_rest ) { data_pos = mdata_end; return 0; } const unsigned bufsize = 32 * header_size; @@ -441,6 +466,12 @@ extern "C" void * tworker( void * arg ) long long data_end = mdata_end; long long file_pos = lzip_index.mblock( i ).pos(); long long member_end = lzip_index.mblock( i ).end(); + if( data_pos >= data_end ) // empty lzip member + { + const Packet * const opacket = new Packet( i, "", Packet::member_done ); + if( !courier.collect_packet( opacket, worker_id ) ) goto done; + continue; + } Extended extended; // metadata from extended records int retval = 0; @@ -459,7 +490,7 @@ extern "C" void * tworker( void * arg ) master = true; if( ret > 0 ) { - Packet * const opacket = new Packet( i, msg, Packet::error ); + const Packet * const opacket = new Packet( i, msg, Packet::error ); courier.collect_packet( opacket, worker_id ); goto done; } @@ -472,7 +503,7 @@ extern "C" void * tworker( void * arg ) if( !courier.request_mastership( i, worker_id ) ) goto done; master = true; if( block_is_zero( header, header_size ) ) break; // EOF - Packet * const opacket = new Packet( i, + const Packet * const opacket = new Packet( i, ( data_pos > header_size ) ? "Corrupt or invalid header." : "This does not look like a POSIX tar.lz archive.", Packet::error ); courier.collect_packet( opacket, worker_id ); @@ -495,9 +526,9 @@ extern "C" void * tworker( void * arg ) if( ret > 0 ) { if( !msg ) msg = "Error in global extended records."; - Packet * const opacket = new Packet( i, msg, Packet::error ); + const Packet * const opacket = new Packet( i, msg, Packet::error ); courier.collect_packet( opacket, worker_id ); - if( ret == 2 ) goto done; + goto done; } // member_end exceeded, process rest of file else { data_end = lzip_index.udata_size(); member_end = cdata_size; } @@ -521,10 +552,9 @@ extern "C" void * tworker( void * arg ) if( ret > 0 ) { if( !msg ) msg = "Error in extended records."; - Packet * const opacket = new Packet( i, msg, Packet::error ); + const Packet * const opacket = new Packet( i, msg, Packet::error ); courier.collect_packet( opacket, worker_id ); - extended.reset(); - if( ret == 2 ) goto done; + goto done; } // member_end exceeded, process rest of file else { data_end = lzip_index.udata_size(); member_end = cdata_size; } @@ -534,70 +564,33 @@ extern "C" void * tworker( void * arg ) } prev_extended = false; - if( extended.linkpath().empty() ) // copy linkpath from ustar header - { - int len = 0; - while( len < linkname_l && header[linkname_o+len] ) ++len; - while( len > 1 && header[linkname_o+len-1] == '/' ) --len; // trailing '/' - if( len > 0 ) - { - const uint8_t c = header[linkname_o+len]; header[linkname_o+len] = 0; - extended.linkpath( (const char *)header + linkname_o ); - header[linkname_o+len] = c; - } - } - - if( extended.path().empty() ) // copy path from ustar header - { - char stored_name[prefix_l+1+name_l+1]; - int len = 0; - while( len < prefix_l && header[prefix_o+len] ) - { stored_name[len] = header[prefix_o+len]; ++len; } - if( len && header[name_o] ) stored_name[len++] = '/'; - for( int i = 0; i < name_l && header[name_o+i]; ++i ) - { stored_name[len] = header[name_o+i]; ++len; } - while( len > 0 && stored_name[len-1] == '/' ) --len; // trailing '/' - stored_name[len] = 0; - extended.path( remove_leading_dotslash( stored_name ) ); - } - const char * const filename = extended.path().c_str(); - - bool skip = filenames > 0; - if( skip ) - for( int i = 0; i < parser.arguments(); ++i ) - if( !parser.code( i ) && parser.argument( i ).size() ) - { - const char * const name = - remove_leading_dotslash( parser.argument( i ).c_str() ); - if( compare_prefix_dir( name, filename ) || - compare_tslash( name, filename ) ) - { skip = false; name_pending[i] = false; break; } - } + extended.fill_from_ustar( header ); // copy metadata from header - if( extended.file_size() == 0 && - ( typeflag == tf_regular || typeflag == tf_hiperf ) ) - extended.file_size( parse_octal( header + size_o, size_l ) ); + const bool skip = check_skip_filename( parser, name_pending, + extended.path().c_str(), filenames ); - retval = list_member_lz( decoder, infd, file_pos, member_end, - cdata_size, data_pos, mdata_end, courier, - extended, header, rbuf, i, worker_id, &msg, skip ); + retval = list_member_lz( decoder, infd, file_pos, member_end, cdata_size, + data_pos, mdata_end, courier, extended, + header, rbuf, i, worker_id, &msg, skip ); extended.reset(); if( retval < 0 ) // member_end exceeded, process rest of file { master = true; data_end = lzip_index.udata_size(); member_end = cdata_size; } else if( retval > 0 ) - { show_error( msg ); - show_error( "Error is not recoverable: exiting now." ); - cleanup_and_fail( 2 ); } + { + const Packet * const opacket = new Packet( i, msg, Packet::error ); + courier.collect_packet( opacket, worker_id ); + goto done; + } } } +done: if( LZ_decompress_close( decoder ) < 0 ) { - Packet * const opacket = new Packet( lzip_index.members(), + const Packet * const opacket = new Packet( lzip_index.members(), "LZ_decompress_close failed.", Packet::error ); courier.collect_packet( opacket, worker_id ); } -done: courier.worker_finished(); return 0; } @@ -609,14 +602,13 @@ void muxer( Packet_courier & courier ) { while( true ) { - Packet * const opacket = courier.deliver_packet(); + const Packet * const opacket = courier.deliver_packet(); if( !opacket ) break; // queue is empty. all workers exited if( opacket->status == Packet::error ) { show_error( opacket->line.c_str() ); cleanup_and_fail( 2 ); } if( opacket->line.size() ) - { std::fputs( opacket->line.c_str(), stdout ); - std::fflush( stdout ); } + { std::fputs( opacket->line.c_str(), stdout ); std::fflush( stdout ); } delete opacket; } if( !courier.mastership_granted() ) // no worker found EOF blocks diff --git a/tarlz.h b/tarlz.h index 18a078d..10a2dd5 100644 --- a/tarlz.h +++ b/tarlz.h @@ -62,6 +62,14 @@ inline unsigned long long round_up( const unsigned long long size ) } +inline bool dotdot_at_i( const char * const filename, const int i ) + { + return ( filename[i] == '.' && filename[i+1] == '.' && + ( i == 0 || filename[i-1] == '/' ) && + ( filename[i+2] == 0 || filename[i+2] == '/' ) ); + } + + enum { initial_line_length = 1000 }; // must be >= 87 for format_member_name class Resizable_buffer @@ -141,6 +149,7 @@ public: long long format_block( Resizable_buffer & rbuf ) const; bool parse( const char * const buf, const unsigned long long edsize, const bool permissive ); + void fill_from_ustar( const Tar_header header ); }; @@ -300,6 +309,8 @@ extern int cl_owner; extern int cl_group; extern int cl_data_size; extern Solidity solidity; +bool writeblock_wrapper( const int outfd, const uint8_t * const buffer, + const int size ); const char * remove_leading_dotslash( const char * const filename, const bool dotdot = false ); bool fill_headers( const char * const filename, Extended & extended, @@ -320,9 +331,9 @@ int encode( const std::string & archive_name, const Arg_parser & parser, const int debug_level, const bool append ); // defined in create_lz.cc -int encode_lz( const char * const archive_name, const Arg_parser & parser, - const int dictionary_size, const int match_len_limit, - const int num_workers, const int outfd, const int debug_level ); +int encode_lz( const Arg_parser & parser, const int dictionary_size, + const int match_len_limit, const int num_workers, + const int outfd, const int debug_level ); // defined in extract.cc enum Program_mode { m_none, m_append, m_concatenate, m_create, m_diff, @@ -351,6 +362,9 @@ void xunlock( pthread_mutex_t * const mutex ); void xwait( pthread_cond_t * const cond, pthread_mutex_t * const mutex ); void xsignal( pthread_cond_t * const cond ); void xbroadcast( pthread_cond_t * const cond ); +bool check_skip_filename( const Arg_parser & parser, + std::vector< char > & name_pending, + const char * const filename, const int filenames ); class Lzip_index; int list_lz( const Arg_parser & parser, std::vector< char > & name_pending, const Lzip_index & lzip_index, const int filenames, diff --git a/testsuite/check.sh b/testsuite/check.sh index f8b65c3..aea9fbf 100755 --- a/testsuite/check.sh +++ b/testsuite/check.sh @@ -68,6 +68,7 @@ lzlib_1_11() { [ ${lwarn} = 0 ] && # t155.tar[.lz]: directory + links + file + eof, all with 155 char names # tar_in_tlz1.tar.lz 2 members (test.txt.tar test3.tar) 3 lzip members # tar_in_tlz2.tar.lz 2 members (test.txt.tar test3.tar) 5 lzip members +# ts_in_link.tar.lz: 4 symbolic links (link[1-4]) to / /dir/ dir/ dir(107/) # test_bad1.tar.lz: truncated at offset 6000 (of 7495) # test_bad2.tar.lz: byte at offset 6000 changed from 0x56 to 0x46 # test3.tar: 3 members (foo bar baz) + 2 zeroed 512-byte blocks @@ -89,6 +90,8 @@ lzlib_1_11() { [ ${lwarn} = 0 ] && # test3_eof1.tar.lz: test3.tar.lz without eof blocks # test3_eof2.tar.lz: test3.tar.lz with only one eof block # test3_eof3.tar.lz: test3.tar.lz with one zeroed block between foo and bar +# test3_em?.tar.lz: test3.tar.lz with one empty lzip member at each position +# test3_em6.tar.lz: test3.tar.lz preceded by four empty lzip members # tlz_in_tar1.tar: 1 member (test3.tar.lz) first magic damaged # tlz_in_tar2.tar: 2 members (foo test3.tar.lz) first magic damaged # ug32chars.tar.lz: 1 member (foo) with 32-character owner and group names @@ -263,7 +266,8 @@ cmp cfoo foo || test_failed $LINENO [ ! -e bar ] || test_failed $LINENO [ ! -e baz ] || test_failed $LINENO rm -f foo bar baz || framework_failure -# + +# test --list and --extract tar in tar.lz for i in "${tarint1_lz}" "${tarint2_lz}" ; do for j in 0 2 6 ; do "${TARLZ}" -tf "$i" --threads=$j > out$j || @@ -284,6 +288,31 @@ for i in "${tarint1_lz}" "${tarint2_lz}" ; do rm -f test.txt.tar test3.tar || framework_failure done +# test --list and --extract with empty lzip members +for i in 1 2 3 4 5 6 ; do + for j in 0 2 6 ; do + "${TARLZ}" -tf "${testdir}"/test3_em${i}.tar.lz --threads=$j \ + > out$j || test_failed $LINENO "$i $j" + "${TARLZ}" -tvf "${testdir}"/test3_em${i}.tar.lz --threads=$j \ + > outv$j || test_failed $LINENO "$i $j" + done + diff -u out0 out2 || test_failed $LINENO $i + diff -u out0 out6 || test_failed $LINENO $i + diff -u out2 out6 || test_failed $LINENO $i + diff -u outv0 outv2 || test_failed $LINENO $i + diff -u outv0 outv6 || test_failed $LINENO $i + diff -u outv2 outv6 || test_failed $LINENO $i + rm -f out0 out2 out6 outv0 outv2 outv6 || framework_failure + for j in 0 2 6 ; do + "${TARLZ}" -xf "${testdir}"/test3_em${i}.tar.lz --threads=$j || + test_failed $LINENO "$i $j" + cmp cfoo foo || test_failed $LINENO "$i $j" + cmp cbar bar || test_failed $LINENO "$i $j" + cmp cbaz baz || test_failed $LINENO "$i $j" + rm -f foo bar baz || framework_failure + done +done + # test --concatenate cat "${in_tar_lz}" > out.tar.lz || framework_failure "${TARLZ}" -Af out.tar.lz "${test3_lz}" || test_failed $LINENO @@ -490,12 +519,16 @@ if ln dummy_file dummy_link 2> /dev/null && ln dir1/dir2/dir3/in dir1/dir2/dir3/"${name_100}" || framework_failure ln dir1/dir2/dir3/in "${path_100}" || framework_failure ln dir1/dir2/dir3/in "${path_106}" || framework_failure + ln -s dir2/ dir1/dir2_link || framework_failure ln -s in dir1/dir2/dir3/link || framework_failure ln -s "${name_100}" dir1/dir2/dir3/link_100 || framework_failure "${TARLZ}" -0 -cf out.tar.lz dir1 || test_failed $LINENO + "${TARLZ}" -df out.tar.lz || test_failed $LINENO rm -rf dir1 || framework_failure "${TARLZ}" -xf out.tar.lz || test_failed $LINENO + "${TARLZ}" -df out.tar.lz || test_failed $LINENO cmp "${in}" dir1/dir2/dir3/in || test_failed $LINENO + cmp "${in}" dir1/dir2_link/dir3/in || test_failed $LINENO cmp "${in}" dir1/dir2/dir3/"${name_100}" || test_failed $LINENO cmp "${in}" "${path_100}" || test_failed $LINENO cmp "${in}" "${path_106}" || test_failed $LINENO @@ -511,6 +544,10 @@ if ln dummy_file dummy_link 2> /dev/null && "${TARLZ}" -0 -q -c ../tmp/dir1 | "${TARLZ}" -x || test_failed $LINENO diff -ru tmp/dir1 dir1 || test_failed $LINENO rm -rf tmp/dir1 dir1 || framework_failure + "${TARLZ}" -xf "${testdir}"/ts_in_link.tar.lz || test_failed $LINENO + "${TARLZ}" -df "${testdir}"/ts_in_link.tar.lz --ignore-ids || + test_failed $LINENO + rm -f link1 link2 link3 link4 || framework_failure else printf "\nwarning: skipping link test: 'ln' does not work on your system." fi diff --git a/testsuite/test3_em1.tar.lz b/testsuite/test3_em1.tar.lz new file mode 100644 index 0000000..0aa8724 Binary files /dev/null and b/testsuite/test3_em1.tar.lz differ diff --git a/testsuite/test3_em2.tar.lz b/testsuite/test3_em2.tar.lz new file mode 100644 index 0000000..4fe4e5d Binary files /dev/null and b/testsuite/test3_em2.tar.lz differ diff --git a/testsuite/test3_em3.tar.lz b/testsuite/test3_em3.tar.lz new file mode 100644 index 0000000..49e2eab Binary files /dev/null and b/testsuite/test3_em3.tar.lz differ diff --git a/testsuite/test3_em4.tar.lz b/testsuite/test3_em4.tar.lz new file mode 100644 index 0000000..95df508 Binary files /dev/null and b/testsuite/test3_em4.tar.lz differ diff --git a/testsuite/test3_em5.tar.lz b/testsuite/test3_em5.tar.lz new file mode 100644 index 0000000..706beb5 Binary files /dev/null and b/testsuite/test3_em5.tar.lz differ diff --git a/testsuite/test3_em6.tar.lz b/testsuite/test3_em6.tar.lz new file mode 100644 index 0000000..806884d Binary files /dev/null and b/testsuite/test3_em6.tar.lz differ diff --git a/testsuite/ts_in_link.tar.lz b/testsuite/ts_in_link.tar.lz new file mode 100644 index 0000000..dff816c Binary files /dev/null and b/testsuite/ts_in_link.tar.lz differ -- cgit v1.2.3