From 29867477a1c50b8cbea6212b8dd649a052778bf0 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 13 Apr 2019 10:59:10 +0200 Subject: Adding upstream version 0.15. Signed-off-by: Daniel Baumann --- ChangeLog | 6 + Makefile.in | 23 ++- NEWS | 23 +-- README | 3 +- configure | 2 +- create.cc | 130 ++++++++-------- create_lz.cc | 27 ++-- delete.cc | 223 +++++++++++++++++++++++++++ delete_lz.cc | 167 ++++++++++++++++++++ doc/tarlz.1 | 9 +- doc/tarlz.info | 78 ++++++---- doc/tarlz.texi | 55 ++++--- exclude.cc | 5 + extended.cc | 8 +- extract.cc | 172 ++++++++++----------- list_lz.cc | 359 +++++++++++++++++++++---------------------- lzip_index.cc | 6 +- lzip_index.h | 2 +- main.cc | 15 +- tarlz.h | 87 ++++++++--- testsuite/check.sh | 362 +++++++++++++++++++++++++++++++++++++------- testsuite/t155_fv1.tar | Bin 0 -> 10240 bytes testsuite/t155_fv1.tar.lz | Bin 0 -> 914 bytes testsuite/t155_fv2.tar | Bin 0 -> 10240 bytes testsuite/t155_fv2.tar.lz | Bin 0 -> 1042 bytes testsuite/t155_fv3.tar | Bin 0 -> 10240 bytes testsuite/t155_fv3.tar.lz | Bin 0 -> 915 bytes testsuite/t155_fv4.tar.lz | Bin 0 -> 1031 bytes testsuite/t155_fv5.tar.lz | Bin 0 -> 1173 bytes testsuite/t155_fv6.tar.lz | Bin 0 -> 1031 bytes testsuite/test3_dir.tar | Bin 0 -> 4096 bytes testsuite/test3_eof1.tar | Bin 0 -> 3072 bytes testsuite/test3_eof2.tar | Bin 0 -> 3584 bytes testsuite/test3_eof3.tar | Bin 0 -> 4608 bytes testsuite/test3_eof4.tar | Bin 0 -> 4096 bytes testsuite/test3_eof4.tar.lz | Bin 0 -> 535 bytes testsuite/test3_eof5.tar.lz | Bin 0 -> 535 bytes testsuite/test3_gh1.tar | Bin 0 -> 5120 bytes testsuite/test3_gh1.tar.lz | Bin 0 -> 574 bytes testsuite/test3_gh2.tar | Bin 0 -> 5120 bytes testsuite/test3_gh2.tar.lz | Bin 0 -> 607 bytes testsuite/test3_gh3.tar | Bin 0 -> 5120 bytes testsuite/test3_gh3.tar.lz | Bin 0 -> 645 bytes testsuite/test3_gh4.tar | Bin 0 -> 5120 bytes testsuite/test3_gh4.tar.lz | Bin 0 -> 795 bytes testsuite/test3_gh5.tar.lz | Bin 0 -> 574 bytes testsuite/test3_gh6.tar.lz | Bin 0 -> 521 bytes testsuite/test3_sm1.tar.lz | Bin 0 -> 579 bytes testsuite/test3_sm2.tar.lz | Bin 0 -> 612 bytes testsuite/test3_sm3.tar.lz | Bin 0 -> 650 bytes testsuite/test3_sm4.tar.lz | Bin 0 -> 798 bytes 51 files changed, 1255 insertions(+), 507 deletions(-) create mode 100644 delete.cc create mode 100644 delete_lz.cc create mode 100644 testsuite/t155_fv1.tar create mode 100644 testsuite/t155_fv1.tar.lz create mode 100644 testsuite/t155_fv2.tar create mode 100644 testsuite/t155_fv2.tar.lz create mode 100644 testsuite/t155_fv3.tar create mode 100644 testsuite/t155_fv3.tar.lz create mode 100644 testsuite/t155_fv4.tar.lz create mode 100644 testsuite/t155_fv5.tar.lz create mode 100644 testsuite/t155_fv6.tar.lz create mode 100644 testsuite/test3_dir.tar create mode 100644 testsuite/test3_eof1.tar create mode 100644 testsuite/test3_eof2.tar create mode 100644 testsuite/test3_eof3.tar create mode 100644 testsuite/test3_eof4.tar create mode 100644 testsuite/test3_eof4.tar.lz create mode 100644 testsuite/test3_eof5.tar.lz create mode 100644 testsuite/test3_gh1.tar create mode 100644 testsuite/test3_gh1.tar.lz create mode 100644 testsuite/test3_gh2.tar create mode 100644 testsuite/test3_gh2.tar.lz create mode 100644 testsuite/test3_gh3.tar create mode 100644 testsuite/test3_gh3.tar.lz create mode 100644 testsuite/test3_gh4.tar create mode 100644 testsuite/test3_gh4.tar.lz create mode 100644 testsuite/test3_gh5.tar.lz create mode 100644 testsuite/test3_gh6.tar.lz create mode 100644 testsuite/test3_sm1.tar.lz create mode 100644 testsuite/test3_sm2.tar.lz create mode 100644 testsuite/test3_sm3.tar.lz create mode 100644 testsuite/test3_sm4.tar.lz diff --git a/ChangeLog b/ChangeLog index 4845761..a296969 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +2019-04-11 Antonio Diaz Diaz + + * Version 0.15 released. + * Added new option '--delete' (uncompressed and --no-solid archives). + * list_lz.cc: Fixed MT listing of archives with format violations. + 2019-03-12 Antonio Diaz Diaz * Version 0.14 released. diff --git a/Makefile.in b/Makefile.in index bdd5fe1..9756ab6 100644 --- a/Makefile.in +++ b/Makefile.in @@ -8,8 +8,8 @@ LIBS = -llz -lpthread SHELL = /bin/sh CAN_RUN_INSTALLINFO = $(SHELL) -c "install-info --version" > /dev/null 2>&1 -objs = arg_parser.o lzip_index.o create.o create_lz.o exclude.o extended.o \ - extract.o list_lz.o main.o +objs = arg_parser.o lzip_index.o create.o create_lz.o delete.o delete_lz.o \ + exclude.o extended.o extract.o list_lz.o main.o .PHONY : all install install-bin install-info install-man \ @@ -33,6 +33,8 @@ $(objs) : Makefile arg_parser.o : arg_parser.h create.o : arg_parser.h tarlz.h create_lz.o : arg_parser.h tarlz.h +delete.o : arg_parser.h lzip_index.h tarlz.h +delete_lz.o : arg_parser.h lzip_index.h tarlz.h exclude.o : tarlz.h extended.o : tarlz.h extract.o : arg_parser.h lzip_index.h tarlz.h @@ -126,26 +128,33 @@ dist : doc $(DISTNAME)/testsuite/test.txt.tar \ $(DISTNAME)/testsuite/test_bad1.txt.tar \ $(DISTNAME)/testsuite/test_bad[12].txt \ - $(DISTNAME)/testsuite/t155.tar \ $(DISTNAME)/testsuite/rfoo \ $(DISTNAME)/testsuite/rbar \ $(DISTNAME)/testsuite/rbaz \ $(DISTNAME)/testsuite/test3.tar \ + $(DISTNAME)/testsuite/test3_eof[1-4].tar \ + $(DISTNAME)/testsuite/test3_gh[1-4].tar \ $(DISTNAME)/testsuite/test3_bad[1-5].tar \ + $(DISTNAME)/testsuite/test3_dir.tar \ + $(DISTNAME)/testsuite/t155.tar \ + $(DISTNAME)/testsuite/t155_fv[1-3].tar \ $(DISTNAME)/testsuite/eof.tar \ $(DISTNAME)/testsuite/test.txt.lz \ $(DISTNAME)/testsuite/test.txt.tar.lz \ $(DISTNAME)/testsuite/test_bad[12].txt.tar.lz \ $(DISTNAME)/testsuite/test3.tar.lz \ - $(DISTNAME)/testsuite/test3_eof[123].tar.lz \ + $(DISTNAME)/testsuite/test3_eof[1-5].tar.lz \ $(DISTNAME)/testsuite/test3_em[1-6].tar.lz \ - $(DISTNAME)/testsuite/tlz_in_tar[12].tar \ - $(DISTNAME)/testsuite/tar_in_tlz[12].tar.lz \ + $(DISTNAME)/testsuite/test3_gh[1-6].tar.lz \ + $(DISTNAME)/testsuite/test3_sm[1-4].tar.lz \ + $(DISTNAME)/testsuite/test3_bad[1-6].tar.lz \ $(DISTNAME)/testsuite/test3_dir.tar.lz \ $(DISTNAME)/testsuite/test3_dot.tar.lz \ + $(DISTNAME)/testsuite/tar_in_tlz[12].tar.lz \ + $(DISTNAME)/testsuite/tlz_in_tar[12].tar \ $(DISTNAME)/testsuite/ts_in_link.tar.lz \ $(DISTNAME)/testsuite/t155.tar.lz \ - $(DISTNAME)/testsuite/test3_bad[1-6].tar.lz \ + $(DISTNAME)/testsuite/t155_fv[1-6].tar.lz \ $(DISTNAME)/testsuite/dotdot[1-5].tar.lz \ $(DISTNAME)/testsuite/ug32767.tar.lz \ $(DISTNAME)/testsuite/ug32chars.tar.lz \ diff --git a/NEWS b/NEWS index 2b736ee..4c45385 100644 --- a/NEWS +++ b/NEWS @@ -1,17 +1,10 @@ -Changes in version 0.14: +Changes in version 0.15: -The new option '--exclude', which excludes files matching a shell pattern, -has been added. +The new option '--delete', which deletes files and directories from an +archive in place, has been added. It currently can delete only from +uncompressed archives and from archives with individually compressed files +('--no-solid' archives). -The new option '-h, --dereference', which instructs tarlz to follow symbolic -links during archive creation, appending or comparison, has been added. -(The short option name '-h' no longer means '--help'). - -Concatenation and appending to uncompressed archives and to standard output -have been implemented. - -The new option '--out-slots', setting the number of output packets buffered -per worker thread during multi-threaded creation and appending to compressed -archives, has been added. Increasing the number of packets may increase -compression speed if the files being archived are larger than 64 MiB -compressed, but requires more memory. +Multi-threaded listing of compressed archives with format violations (for +example, an extended header without the corresponding ustar header) has been +fixed. diff --git a/README b/README index f1149a2..c0ab721 100644 --- a/README +++ b/README @@ -22,7 +22,8 @@ archive, but it has the following advantages: parallel, multiplying the decompression speed. * New members can be appended to the archive (by removing the EOF - member) just like to an uncompressed tar archive. + member), and unwanted members can be deleted from the archive. Just + like an uncompressed tar archive. * It is a safe posix-style backup format. In case of corruption, tarlz can extract all the undamaged members from the tar.lz diff --git a/configure b/configure index 75442cf..bd98b61 100755 --- a/configure +++ b/configure @@ -6,7 +6,7 @@ # to copy, distribute and modify it. pkgname=tarlz -pkgversion=0.14 +pkgversion=0.15 progname=tarlz srctrigger=doc/${pkgname}.texi diff --git a/create.cc b/create.cc index f70b99f..b8fc336 100644 --- a/create.cc +++ b/create.cc @@ -92,37 +92,6 @@ bool option_C_after_relative_filename( const Arg_parser & parser ) } -// infd and outfd can refer to the same file if copying to a lower file -// position or if source and destination blocks don't overlap. -// max_size < 0 means no size limit. -bool copy_file( const int infd, const int outfd, const long long max_size = -1 ) - { - const int buffer_size = 65536; - // remaining number of bytes to copy - long long rest = ( ( max_size >= 0 ) ? max_size : buffer_size ); - long long copied_size = 0; - uint8_t * const buffer = new uint8_t[buffer_size]; - bool error = false; - - while( rest > 0 ) - { - const int size = std::min( (long long)buffer_size, rest ); - if( max_size >= 0 ) rest -= size; - const int rd = readblock( infd, buffer, size ); - if( rd != size && errno ) - { show_error( "Error reading input file", errno ); error = true; break; } - if( rd > 0 ) - { - if( !writeblock_wrapper( outfd, buffer, rd ) ) { error = true; break; } - copied_size += rd; - } - if( rd < size ) break; // EOF - } - delete[] buffer; - return ( !error && ( max_size < 0 || copied_size == max_size ) ); - } - - /* Check archive type. Return position of EOF blocks or -1 if failure. If remove_eof, leave fd file pos at beginning of the EOF blocks. Else, leave fd file pos at 0. */ @@ -185,12 +154,12 @@ long long check_uncompressed_appendable( const int fd, const bool remove_eof ) struct stat st; // fd must be regular if( fstat( fd, &st ) != 0 || !S_ISREG( st.st_mode ) ) return -1; if( lseek( fd, 0, SEEK_SET ) != 0 ) return -1; - if( st.st_size == 0 ) return 0; // append to empty archive + if( st.st_size <= 0 ) return 0; // append to empty archive long long eof_pos = 0; Extended extended; // metadata from extended records Resizable_buffer rbuf; // extended records buffer bool prev_extended = false; // prev header was extended - while( true ) // process one tar member per iteration + while( true ) // process one tar header per iteration { Tar_header header; const int rd = readblock( fd, header, header_size ); @@ -202,12 +171,12 @@ long long check_uncompressed_appendable( const int fd, const bool remove_eof ) if( typeflag == tf_extended || typeflag == tf_global ) { if( prev_extended ) return -1; - const unsigned long long edsize = parse_octal( header + size_o, size_l ); - const unsigned long long bufsize = round_up( edsize ); - if( edsize == 0 || edsize >= 1ULL << 33 || bufsize >= INT_MAX ) + const long long edsize = parse_octal( header + size_o, size_l ); + const long long bufsize = round_up( edsize ); + if( edsize <= 0 || edsize >= 1LL << 33 || bufsize >= INT_MAX ) return -1; // overflow or no extended data if( !rbuf.resize( bufsize ) ) return -1; - if( readblock( fd, (uint8_t *)rbuf(), bufsize ) != (int)bufsize ) + if( readblock( fd, (uint8_t *)rbuf(), bufsize ) != bufsize ) return -1; if( typeflag == tf_extended ) { if( !extended.parse( rbuf(), edsize, false ) ) return -1; @@ -303,8 +272,8 @@ bool store_name( const char * const filename, Extended & extended, int add_member( const char * const filename, const struct stat *, const int flag, struct FTW * ) { - if( Exclude::excluded( filename ) ) return 0; // skip excluded - unsigned long long file_size = 0; + if( Exclude::excluded( filename ) ) return 0; // skip excluded files + long long file_size; Extended extended; // metadata for extended records Tar_header header; if( !fill_headers( filename, extended, header, file_size, flag ) ) return 0; @@ -319,12 +288,12 @@ int add_member( const char * const filename, const struct stat *, return 1; if( file_size ) { - enum { bufsize = 32 * header_size }; + const long long bufsize = 32 * header_size; uint8_t buf[bufsize]; - unsigned long long rest = file_size; + long long rest = file_size; while( rest > 0 ) { - int size = std::min( rest, (unsigned long long)bufsize ); + int size = std::min( rest, bufsize ); const int rd = readblock( infd, buf, size ); rest -= rd; if( rd != size ) @@ -354,6 +323,37 @@ int add_member( const char * const filename, const struct stat *, } // end namespace +// infd and outfd can refer to the same file if copying to a lower file +// position or if source and destination blocks don't overlap. +// max_size < 0 means no size limit. +bool copy_file( const int infd, const int outfd, const long long max_size ) + { + const long long buffer_size = 65536; + // remaining number of bytes to copy + long long rest = ( ( max_size >= 0 ) ? max_size : buffer_size ); + long long copied_size = 0; + uint8_t * const buffer = new uint8_t[buffer_size]; + bool error = false; + + while( rest > 0 ) + { + const int size = std::min( buffer_size, rest ); + if( max_size >= 0 ) rest -= size; + const int rd = readblock( infd, buffer, size ); + if( rd != size && errno ) + { show_error( "Error reading input file", errno ); error = true; break; } + if( rd > 0 ) + { + if( !writeblock_wrapper( outfd, buffer, rd ) ) { error = true; break; } + copied_size += rd; + } + if( rd < size ) break; // EOF + } + delete[] buffer; + return ( !error && ( max_size < 0 || copied_size == max_size ) ); + } + + bool writeblock_wrapper( const int outfd, const uint8_t * const buffer, const int size ) { @@ -417,8 +417,7 @@ const char * remove_leading_dotslash( const char * const filename, bool fill_headers( const char * const filename, Extended & extended, - Tar_header header, unsigned long long & file_size, - const int flag ) + Tar_header header, long long & file_size, const int flag ) { struct stat st; if( hstat( filename, &st ) != 0 ) @@ -447,7 +446,7 @@ bool fill_headers( const char * const filename, Extended & extended, set_error_status( 1 ); return false; } print_octal( header + mtime_o, mtime_l - 1, mtime ); Typeflag typeflag; - if( S_ISREG( mode ) ) { typeflag = tf_regular; file_size = st.st_size; } + if( S_ISREG( mode ) ) typeflag = tf_regular; else if( S_ISDIR( mode ) ) { typeflag = tf_directory; @@ -508,7 +507,9 @@ bool fill_headers( const char * const filename, Extended & extended, std::strncpy( (char *)header + gname_o, gr->gr_name, gname_l - 1 ); /* else { show_file_error( filename, "Can't read group name from database", errno ); set_error_status( 1 ); } */ // numerical only - if( file_size >= 1ULL << 33 ) + file_size = ( typeflag == tf_regular && st.st_size > 0 && + st.st_size <= max_file_size ) ? st.st_size : 0; + if( file_size >= 1LL << 33 ) { extended.file_size( file_size ); force_extended_name = true; } else print_octal( header + size_o, size_l - 1, file_size ); store_name( filename, extended, header, force_extended_name ); @@ -521,7 +522,7 @@ bool block_is_full( const Extended & extended, const unsigned long long file_size, unsigned long long & partial_data_size ) { - const unsigned long long member_size = + const unsigned long long member_size = // may overflow 'long long' header_size + extended.full_size() + round_up( file_size ); const unsigned long long target_size = cl_data_size; if( partial_data_size >= target_size || @@ -574,18 +575,18 @@ bool has_lz_ext( const std::string & name ) } -int concatenate( std::string archive_name, const Arg_parser & parser, +int concatenate( const std::string & archive_name, const Arg_parser & parser, const int filenames ) { if( !filenames ) { if( verbosity >= 1 ) show_error( "Nothing to concatenate." ); return 0; } const bool to_stdout = archive_name.empty(); + archive_namep = to_stdout ? "(stdout)" : archive_name.c_str(); const int outfd = to_stdout ? STDOUT_FILENO : open_outstream( archive_name, false ); if( outfd < 0 ) return 1; - if( to_stdout ) archive_name = "(stdout)"; - else if( !file_is_the_archive.init( outfd ) ) - { show_file_error( archive_name.c_str(), "Can't stat", errno ); return 1; } + if( !to_stdout && !file_is_the_archive.init( outfd ) ) + { show_file_error( archive_namep, "Can't stat", errno ); return 1; } int compressed; // tri-state bool if( to_stdout ) compressed = -1; // unknown else @@ -598,7 +599,7 @@ int concatenate( std::string archive_name, const Arg_parser & parser, pos = check_uncompressed_appendable( outfd, true ); if( pos > 0 ) compressed = false; else if( pos < 0 ) - { show_file_error( archive_name.c_str(), compressed ? + { show_file_error( archive_namep, compressed ? "This does not look like an appendable tar.lz archive." : "This does not look like an appendable tar archive." ); return 2; } @@ -612,7 +613,7 @@ int concatenate( std::string archive_name, const Arg_parser & parser, if( parser.code( i ) ) continue; // skip options if( parser.argument( i ).empty() ) continue; // skip empty names const char * const filename = parser.argument( i ).c_str(); - if( Exclude::excluded( filename ) ) continue; // skip excluded + if( Exclude::excluded( filename ) ) continue; // skip excluded files const int infd = open_instream( filename ); if( infd < 0 ) { retval = 1; break; } struct stat st; @@ -644,7 +645,7 @@ int concatenate( std::string archive_name, const Arg_parser & parser, if( eof_pending && !write_eof_records( outfd, compressed ) && !retval ) retval = 1; if( close( outfd ) != 0 && !retval ) - { show_file_error( archive_name.c_str(), "Error closing archive", errno ); + { show_file_error( archive_namep, "Error closing archive", errno ); retval = 1; } return retval; } @@ -673,21 +674,23 @@ int encode( const std::string & archive_name, const Arg_parser & parser, { 3 << 23, 132 }, // -8 { 1 << 25, 273 } }; // -9 const bool compressed = ( level >= 0 && level <= 9 ); + const bool to_stdout = archive_name.empty(); + archive_namep = to_stdout ? "(stdout)" : archive_name.c_str(); - if( archive_name.size() && !compressed && has_lz_ext( archive_name ) ) - { show_file_error( archive_name.c_str(), + if( !to_stdout && !compressed && has_lz_ext( archive_name ) ) + { show_file_error( archive_namep, "Uncompressed mode incompatible with .lz extension." ); return 2; } if( !filenames ) { - if( !append && archive_name.size() ) // create archive + if( !append && !to_stdout ) // create archive { show_error( "Cowardly refusing to create an empty archive.", 0, true ); return 1; } else // create/append to stdout or append to archive { if( verbosity >= 1 ) show_error( "Nothing to append." ); return 0; } } - if( archive_name.empty() ) // create/append to stdout + if( to_stdout ) // create/append to stdout goutfd = STDOUT_FILENO; else if( !append ) // create archive { if( ( goutfd = open_outstream( archive_name ) ) < 0 ) return 1; } @@ -695,14 +698,13 @@ int encode( const std::string & archive_name, const Arg_parser & parser, { if( ( goutfd = open_outstream( archive_name, false ) ) < 0 ) return 1; if( compressed && check_appendable( goutfd, true ) < 0 ) - { show_file_error( archive_name.c_str(), + { show_file_error( archive_namep, "This does not look like an appendable tar.lz archive." ); return 2; } if( !compressed && check_uncompressed_appendable( goutfd, true ) < 0 ) - { show_file_error( archive_name.c_str(), + { show_file_error( archive_namep, "This does not look like an appendable tar archive." ); return 2; } } - archive_namep = archive_name.size() ? archive_name.c_str() : "(stdout)"; if( !file_is_the_archive.init( goutfd ) ) { show_file_error( archive_namep, "Can't stat", errno ); return 1; } @@ -720,7 +722,7 @@ int encode( const std::string & archive_name, const Arg_parser & parser, !option_C_after_relative_filename( parser ) ) { // show_file_error( archive_namep, "Multi-threaded --create" ); - return encode_lz( parser, dictionary_size, + return encode_lz( archive_namep, parser, dictionary_size, option_mapping[level].match_len_limit, num_workers, goutfd, out_slots, debug_level, dereference ); } @@ -752,7 +754,7 @@ int encode( const std::string & archive_name, const Arg_parser & parser, while( len > 1 && arg[len-1] == '/' ) --len; if( len < arg.size() ) { deslashed.assign( arg, 0, len ); filename = deslashed.c_str(); } - if( Exclude::excluded( filename ) ) continue; // skip excluded + if( Exclude::excluded( filename ) ) continue; // skip excluded files struct stat st; if( lstat( filename, &st ) != 0 ) // filename from command line { show_file_error( filename, "Can't stat input file", errno ); @@ -778,7 +780,7 @@ int encode( const std::string & archive_name, const Arg_parser & parser, if( encoder && LZ_compress_close( encoder ) < 0 ) { show_error( "LZ_compress_close failed." ); retval = 1; } if( close( goutfd ) != 0 && !retval ) - { show_file_error( archive_name.c_str(), "Error closing archive", errno ); + { show_file_error( archive_namep, "Error closing archive", errno ); retval = 1; } return final_exit_status( retval ); } diff --git a/create_lz.cc b/create_lz.cc index e72839e..9cfdedd 100644 --- a/create_lz.cc +++ b/create_lz.cc @@ -83,13 +83,13 @@ public: struct Ipacket // filename, file size and headers { - const unsigned long long file_size; + const long long file_size; const std::string filename; // filename.empty() means end of lzip member const Extended * const extended; const uint8_t * const header; Ipacket() : file_size( 0 ), extended( 0 ), header( 0 ) {} - Ipacket( const char * const name, const unsigned long long s, + Ipacket( const char * const name, const long long s, const Extended * const ext, const uint8_t * const head ) : file_size( s ), filename( name ), extended( ext ), header( head ) {} }; @@ -260,8 +260,8 @@ public: int add_member_lz( const char * const filename, const struct stat *, const int flag, struct FTW * ) { - if( Exclude::excluded( filename ) ) return 0; // skip excluded - unsigned long long file_size = 0; + if( Exclude::excluded( filename ) ) return 0; // skip excluded files + long long file_size; // metadata for extended records Extended * const extended = new( std::nothrow ) Extended; uint8_t * const header = extended ? new( std::nothrow ) Tar_header : 0; @@ -315,7 +315,7 @@ extern "C" void * grouper( void * arg ) while( len > 1 && arg[len-1] == '/' ) --len; if( len < arg.size() ) { deslashed.assign( arg, 0, len ); filename = deslashed.c_str(); } - if( Exclude::excluded( filename ) ) continue; // skip excluded + if( Exclude::excluded( filename ) ) continue; // skip excluded files struct stat st; if( lstat( filename, &st ) != 0 ) // filename from command line { show_file_error( filename, "Can't stat input file", errno ); @@ -463,12 +463,12 @@ extern "C" void * cworker( void * arg ) if( ipacket->file_size ) { - enum { bufsize = 32 * header_size }; + const long long bufsize = 32 * header_size; uint8_t buf[bufsize]; - unsigned long long rest = ipacket->file_size; + long long rest = ipacket->file_size; while( rest > 0 ) { - int size = std::min( rest, (unsigned long long)bufsize ); + int size = std::min( rest, bufsize ); const int rd = readblock( infd, buf, size ); rest -= rd; if( rd != size ) @@ -521,10 +521,10 @@ void muxer( Packet_courier & courier, const int outfd ) // init the courier, then start the grouper and the workers and call the muxer -int encode_lz( const Arg_parser & parser, const int dictionary_size, - const int match_len_limit, const int num_workers, - const int outfd, const int out_slots, const int debug_level, - const bool dereference ) +int encode_lz( const char * const archive_namep, const Arg_parser & parser, + const int dictionary_size, const int match_len_limit, + const int num_workers, const int outfd, const int out_slots, + const int debug_level, const bool dereference ) { const int in_slots = 65536; // max small files (<=512B) in 64 MiB const int total_in_slots = ( INT_MAX / num_workers >= in_slots ) ? @@ -579,7 +579,8 @@ int encode_lz( const Arg_parser & parser, const int dictionary_size, int retval = !write_eof_records( outfd, true ); if( close( outfd ) != 0 && !retval ) - { show_error( "Error closing archive", errno ); retval = 1; } + { show_file_error( archive_namep, "Error closing archive", errno ); + retval = 1; } if( debug_level & 1 ) std::fprintf( stderr, diff --git a/delete.cc b/delete.cc new file mode 100644 index 0000000..2e87833 --- /dev/null +++ b/delete.cc @@ -0,0 +1,223 @@ +/* Tarlz - Archiver with multimember lzip compression + Copyright (C) 2013-2019 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#define _FILE_OFFSET_BITS 64 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "arg_parser.h" +#include "lzip_index.h" +#include "tarlz.h" + + +namespace { + +bool parse_records( const int infd, Extended & extended, + const Tar_header header, Resizable_buffer & rbuf, + const bool permissive ) + { + const long long edsize = parse_octal( header + size_o, size_l ); + const long long bufsize = round_up( edsize ); + if( edsize <= 0 || edsize >= 1LL << 33 || bufsize >= INT_MAX ) + return false; // overflow or no extended data + if( !rbuf.resize( bufsize ) ) return false; // extended records buffer + return ( readblock( infd, (uint8_t *)rbuf(), bufsize ) == bufsize && + extended.parse( rbuf(), edsize, permissive ) ); + } + +} // end namespace + + +bool safe_seek( const int fd, const long long pos ) + { + if( lseek( fd, pos, SEEK_SET ) == pos ) return true; + show_error( "Seek error", errno ); return false; + } + + +int tail_copy( const char * const archive_namep, const Arg_parser & parser, + std::vector< char > & name_pending, + const Lzip_index & lzip_index, const long long istream_pos, + const int infd, const int outfd, int retval ) + { + const long long rest = lzip_index.file_size() - istream_pos; + if( istream_pos > 0 && rest > 0 && + ( !safe_seek( infd, istream_pos ) || + !copy_file( infd, outfd, rest ) ) ) + { show_file_error( archive_namep, "Error during tail copy." ); + return retval ? retval : 1; } + const long long ostream_pos = lseek( outfd, 0, SEEK_CUR ); + if( ostream_pos < 0 ) { show_error( "Seek error", errno ); retval = 1; } + else if( ostream_pos > 0 && ostream_pos < lzip_index.file_size() ) + { + int result; + do result = ftruncate( outfd, ostream_pos ); + while( result != 0 && errno == EINTR ); + if( result != 0 ) + { + show_file_error( archive_namep, "Can't truncate archive", errno ); + if( retval < 1 ) retval = 1; + } + } + + if( ( close( outfd ) != 0 || close( infd ) != 0 ) && !retval ) + { show_file_error( archive_namep, "Error closing archive", errno ); + retval = 1; } + + if( retval == 0 ) for( int i = 0; i < parser.arguments(); ++i ) + if( !parser.code( i ) && parser.argument( i ).size() && name_pending[i] ) + { + show_file_error( parser.argument( i ).c_str(), "Not found in archive." ); + retval = 1; + } + return retval; + } + + +/* Deleting from a corrupt archive must not worsen the corruption. Stop and + tail-copy as soon as corruption is found. */ +int delete_members( const std::string & archive_name, const Arg_parser & parser, + const int filenames, const bool missing_crc, + const bool permissive ) + { + if( !filenames ) + { if( verbosity >= 1 ) show_error( "Nothing to delete." ); return 0; } + if( archive_name.empty() ) + { show_error( "Deleting from stdin not implemented yet." ); return 1; } + const char * const archive_namep = archive_name.c_str(); + const int infd = open_instream( archive_name ); + if( infd < 0 ) return 1; + const int outfd = open_outstream( archive_name, false ); + if( outfd < 0 ) { close( infd ); return 1; } + + // mark member names to be deleted + std::vector< char > name_pending( parser.arguments(), false ); + for( int i = 0; i < parser.arguments(); ++i ) + if( !parser.code( i ) && parser.argument( i ).size() && + !Exclude::excluded( parser.argument( i ).c_str() ) ) + name_pending[i] = true; + + const Lzip_index lzip_index( infd, true, false ); // only regular files + if( lzip_index.retval() == 0 ) // compressed + return delete_members_lz( archive_namep, parser, name_pending, lzip_index, + filenames, infd, outfd, missing_crc, permissive ); + if( lseek( infd, 0, SEEK_SET ) != 0 ) + { show_file_error( archive_namep, "Archive is not seekable." ); return 1; } + if( lzip_index.file_size() < 3 * header_size ) + { show_file_error( archive_namep, posix_msg ); return 2; } + // archive is uncompressed seekable, unless compressed corrupt + + Resizable_buffer rbuf; + long long istream_pos = 0; // source of next data move + long long member_begin = 0; // first pos of current tar member + Extended extended; // metadata from extended records + int retval = 0; + bool prev_extended = false; // prev header was extended + while( true ) // process one tar header per iteration + { + if( !prev_extended && ( member_begin = lseek( infd, 0, SEEK_CUR ) ) < 0 ) + { show_error( "Seek error", errno ); retval = 1; break; } + Tar_header header; + const int rd = readblock( infd, header, header_size ); + if( rd == 0 && errno == 0 ) // missing EOF blocks + { show_file_error( archive_namep, end_msg ); retval = 2; break; } + if( rd != header_size ) + { show_file_error( archive_namep, "Read error", errno ); + retval = 2; break; } + if( !verify_ustar_chksum( header ) ) + { + if( block_is_zero( header, header_size ) ) // EOF + { + if( prev_extended && !permissive ) + { show_file_error( archive_namep, fv_msg1 ); retval = 2; } + break; + } + show_file_error( archive_namep, "Corrupt header in archive." ); + retval = 2; break; + } + + const Typeflag typeflag = (Typeflag)header[typeflag_o]; + if( typeflag == tf_global ) + { + if( prev_extended && !permissive ) + { show_file_error( archive_namep, fv_msg2 ); retval = 2; break; } + Extended dummy; // global headers are parsed and ignored + if( !parse_records( infd, dummy, header, rbuf, true ) ) + { show_file_error( archive_namep, gblrec_msg ); retval = 2; break; } + continue; + } + if( typeflag == tf_extended ) + { + if( prev_extended && !permissive ) + { show_file_error( archive_namep, fv_msg3 ); retval = 2; break; } + if( !parse_records( infd, extended, header, rbuf, permissive ) ) + { show_file_error( archive_namep, extrec_msg ); retval = 2; break; } + else if( !extended.crc_present() && missing_crc ) + { show_file_error( archive_namep, mcrc_msg ); retval = 2; break; } + prev_extended = true; + continue; + } + prev_extended = false; + + extended.fill_from_ustar( header ); // copy metadata from header + + { // skip member + long long rest = extended.file_size(); + const int rem = rest % header_size; + if( rem ) rest += header_size - rem; // padding + if( lseek( infd, rest, SEEK_CUR ) <= 0 ) + { show_file_error( archive_namep, "Seek error", errno ); + retval = 1; break; } + } + + if( !check_skip_filename( parser, name_pending, extended.path().c_str(), + filenames ) ) // delete tar member + { + if( !show_member_name( extended, header, 1, rbuf ) ) + { retval = 1; break; } + const long long pos = lseek( infd, 0, SEEK_CUR ); + if( pos <= 0 || pos <= member_begin || member_begin < istream_pos ) + { show_file_error( archive_namep, "Seek error", errno ); + retval = 1; break; } + const long long size = member_begin - istream_pos; + if( size > 0 ) // move pending data each time a member is deleted + { + if( istream_pos == 0 ) + { if( !safe_seek( outfd, size ) ) { retval = 1; break; } } + else if( !safe_seek( infd, istream_pos ) || + !copy_file( infd, outfd, size ) || + !safe_seek( infd, pos ) ) { retval = 1; break; } + } + istream_pos = pos; + } + extended.reset(); + } + + return tail_copy( archive_namep, parser, name_pending, lzip_index, + istream_pos, infd, outfd, retval ); + } diff --git a/delete_lz.cc b/delete_lz.cc new file mode 100644 index 0000000..11c3a14 --- /dev/null +++ b/delete_lz.cc @@ -0,0 +1,167 @@ +/* Tarlz - Archiver with multimember lzip compression + Copyright (C) 2013-2019 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#define _FILE_OFFSET_BITS 64 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "arg_parser.h" +#include "lzip_index.h" +#include "tarlz.h" + + +/* Deleting from a corrupt archive must not worsen the corruption. Stop and + tail-copy as soon as corruption is found. */ +int delete_members_lz( const char * const archive_namep, + const Arg_parser & parser, + std::vector< char > & name_pending, + const Lzip_index & lzip_index, + const int filenames, const int infd, const int outfd, + const bool missing_crc, const bool permissive ) + { + Resizable_buffer rbuf; + LZ_Decoder * const decoder = LZ_decompress_open(); + if( !rbuf.size() || !decoder || LZ_decompress_errno( decoder ) != LZ_ok ) + { show_error( mem_msg ); return 1; } + + long long istream_pos = 0; // source of next data move + const long long cdata_size = lzip_index.cdata_size(); + int retval = 0; + for( long i = 0; i < lzip_index.members(); ++i ) + { + const long long mdata_pos = lzip_index.dblock( i ).pos(); + long long data_pos = mdata_pos; + const long long mdata_end = lzip_index.dblock( i ).end(); + if( data_pos >= mdata_end ) continue; // empty lzip member + const long long member_pos = lzip_index.mblock( i ).pos(); + long long file_pos = member_pos; + const long long member_end = lzip_index.mblock( i ).end(); + + long long member_begin = 0; // first pos of current tar member + Extended extended; // metadata from extended records + bool prev_extended = false; // prev header was extended + LZ_decompress_reset( decoder ); // prepare for new member + if( !safe_seek( infd, member_pos ) ) { retval = 1; break; } + while( true ) // process one tar header per iteration + { + if( data_pos >= mdata_end ) + { + if( data_pos == mdata_end && !prev_extended ) break; + // member end exceeded or ends in extended + show_file_error( archive_namep, "Member misalignment found." ); + retval = 2; goto done; + } + if( !prev_extended ) member_begin = data_pos; + Tar_header header; + const char * msg = 0; + retval = archive_read_lz( decoder, infd, file_pos, member_end, + cdata_size, header, header_size, &msg ); + if( retval != 0 ) { show_file_error( archive_namep, msg ); goto done; } + data_pos += header_size; + if( !verify_ustar_chksum( header ) ) + { + if( block_is_zero( header, header_size ) ) // EOF + { + if( prev_extended && !permissive ) + { show_file_error( archive_namep, fv_msg1 ); retval = 2; } + goto done; + } + show_file_error( archive_namep, ( data_pos > header_size ) ? + bad_hdr_msg : posix_lz_msg ); + retval = 2; + goto done; + } + + const Typeflag typeflag = (Typeflag)header[typeflag_o]; + if( typeflag == tf_global ) + { + if( prev_extended && !permissive ) + { show_file_error( archive_namep, fv_msg2 ); retval = 2; goto done; } + Extended dummy; // global headers are parsed and ignored + retval = parse_records_lz( decoder, infd, file_pos, member_end, + cdata_size, data_pos, dummy, header, + rbuf, &msg, true ); + if( retval == 0 ) continue; + show_file_error( archive_namep, msg ? msg : gblrec_msg ); + goto done; + } + if( typeflag == tf_extended ) + { + if( prev_extended && !permissive ) { msg = fv_msg3; retval = 2; } + else retval = parse_records_lz( decoder, infd, file_pos, member_end, + cdata_size, data_pos, extended, header, + rbuf, &msg, permissive ); + if( retval == 0 && !extended.crc_present() && missing_crc ) + { msg = mcrc_msg; retval = 2; } + if( retval == 0 ) { prev_extended = true; continue; } + show_file_error( archive_namep, msg ? msg : extrec_msg ); + goto done; + } + prev_extended = false; + + extended.fill_from_ustar( header ); // copy metadata from header + + long long rest = extended.file_size(); + const int rem = rest % header_size; + if( rem ) rest += header_size - rem; // padding + if( data_pos + rest >= mdata_end ) data_pos += rest; + else // skip tar member + if( ( retval = skip_member_lz( decoder, infd, file_pos, member_end, + cdata_size, data_pos, rest, &msg ) ) != 0 ) + goto done; + + if( !check_skip_filename( parser, name_pending, extended.path().c_str(), + filenames ) ) // delete tar member + { + // verify that members match + if( member_begin != mdata_pos || data_pos != mdata_end ) + { show_file_error( extended.path().c_str(), + "Can't delete: not individually compressed." ); + retval = 2; extended.reset(); continue; } + if( !show_member_name( extended, header, 1, rbuf ) ) + { retval = 1; goto done; } + const long long size = member_pos - istream_pos; + if( size > 0 ) // move pending data each time a member is deleted + { + if( istream_pos == 0 ) + { if( !safe_seek( outfd, size ) ) { retval = 1; break; } } + else if( !safe_seek( infd, istream_pos ) || + !copy_file( infd, outfd, size ) ) { retval = 1; break; } + } + istream_pos = member_end; + } + extended.reset(); + } + } +done: + if( LZ_decompress_close( decoder ) < 0 && !retval ) + { show_error( "LZ_decompress_close failed." ); retval = 1; } + // tail copy keeps trailing data + return tail_copy( archive_namep, parser, name_pending, lzip_index, + istream_pos, infd, outfd, retval ); + } diff --git a/doc/tarlz.1 b/doc/tarlz.1 index a17e58d..4308fc2 100644 --- a/doc/tarlz.1 +++ b/doc/tarlz.1 @@ -1,5 +1,5 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.46.1. -.TH TARLZ "1" "March 2019" "tarlz 0.14" "User Commands" +.TH TARLZ "1" "April 2019" "tarlz 0.15" "User Commands" .SH NAME tarlz \- creates tar archives with multimember lzip compression .SH SYNOPSIS @@ -31,7 +31,7 @@ display this help and exit output version information and exit .TP \fB\-A\fR, \fB\-\-concatenate\fR -append tar.lz archives to the end of an archive +append archives to the end of an archive .TP \fB\-B\fR, \fB\-\-data\-size=\fR set target size of input data blocks [2x8=16 MiB] @@ -48,6 +48,9 @@ find differences between archive and file system \fB\-\-ignore\-ids\fR ignore differences in owner and group IDs .TP +\fB\-\-delete\fR +delete files/directories from an archive +.TP \fB\-\-exclude=\fR exclude files matching a shell pattern .TP @@ -73,7 +76,7 @@ list the contents of an archive verbosely list files processed .TP \fB\-x\fR, \fB\-\-extract\fR -extract files from an archive +extract files/directories from an archive .TP \fB\-0\fR .. \fB\-9\fR set compression level [default 6] diff --git a/doc/tarlz.info b/doc/tarlz.info index fc1f092..f49bb43 100644 --- a/doc/tarlz.info +++ b/doc/tarlz.info @@ -11,7 +11,7 @@ File: tarlz.info, Node: Top, Next: Introduction, Up: (dir) Tarlz Manual ************ -This manual is for Tarlz (version 0.14, 12 March 2019). +This manual is for Tarlz (version 0.15, 11 April 2019). * Menu: @@ -59,7 +59,8 @@ archive, but it has the following advantages: parallel, multiplying the decompression speed. * New members can be appended to the archive (by removing the EOF - member) just like to an uncompressed tar archive. + member), and unwanted members can be deleted from the archive. Just + like an uncompressed tar archive. * It is a safe posix-style backup format. In case of corruption, tarlz can extract all the undamaged members from the tar.lz @@ -88,8 +89,11 @@ The format for running tarlz is: tarlz [OPTIONS] [FILES] -On archive creation or appending tarlz archives the files specified, but -removes from member names any leading and trailing slashes and any +All operations except '--concatenate' operate on whole trees if any +FILE is a directory. + + On archive creation or appending tarlz archives the files specified, +but removes from member names any leading and trailing slashes and any filename prefixes containing a '..' component. On extraction, leading and trailing slashes are also removed from member names, and archive members containing a '..' component in the filename are skipped. Tarlz @@ -176,6 +180,15 @@ equivalent to '-1 --solid' Make '--diff' ignore differences in owner and group IDs. This option is useful when comparing an '--anonymous' archive. +'--delete' + Delete the specified files and directories from an archive in + place. It currently can delete only from uncompressed archives and + from archives with individually compressed files ('--no-solid' + archives). To delete a directory without deleting the files under + it, use 'tarlz --delete -f foo --exclude='dir/*' dir'. Deleting in + place may be dangerous. A corrupt archive, a power cut, or an I/O + error may cause data loss. + '--exclude=PATTERN' Exclude files matching a shell pattern like '*.o'. A file is considered to match if any component of the filename matches. For @@ -240,8 +253,10 @@ equivalent to '-1 --solid' '-x' '--extract' - Extract files from an archive. If FILES are given, extract only - the FILES given. Else extract all the files in the archive. + Extract files from an archive. If FILES are given, extract only the + FILES given. Else extract all the files in the archive. To extract + a directory without extracting the files under it, use + 'tarlz -xf foo --exclude='dir/*' dir'. '-0 .. -9' Set the compression level for '--create' and '--append'. The @@ -597,7 +612,7 @@ characters in the array contain non-null characters including the last character. Each numeric field contains a leading space- or zero-filled, optionally null-terminated octal number using digits from the ISO/IEC 646:1991 (ASCII) standard. Tarlz is able to decode numeric fields 1 -byte larger than standard ustar by not requiring a terminating null +byte longer than standard ustar by not requiring a terminating null character.  @@ -607,10 +622,10 @@ File: tarlz.info, Node: Amendments to pax format, Next: Multi-threaded tar, P ****************************************** Tarlz is meant to reliably detect invalid or corrupt metadata during -extraction and to not create safety risks in the archives it creates. In -order to achieve these goals, tarlz makes some changes to the variant -of the pax format that it uses. This chapter describes these changes -and the concrete reasons to implement them. +decoding, and to create safe archives where corrupt metadata can be +reliably detected. In order to achieve these goals, tarlz makes some +changes to the variant of the pax format that it uses. This chapter +describes these changes and the concrete reasons to implement them. 4.1 Add a CRC of the extended records @@ -659,9 +674,9 @@ overridden by extended records. size larger than 8 GiB or a link name longer than 100 bytes), tarlz moves the filename also to the extended header to prevent an ustar tool from trying to extract the file or link. This also makes easier during -parallel extraction or listing the detection of a tar member split -between two lzip members at the boundary between the extended header -and the ustar header. +parallel decoding the detection of a tar member split between two lzip +members at the boundary between the extended header and the ustar +header. 4.3 As simple as possible (but not simpler) @@ -673,6 +688,10 @@ of a file exceed the limits of the ustar format. Adding extended headers to each member just to record subsecond timestamps seems wasteful for a backup format. + Global pax headers are tolerated, but not supported; they are parsed +and ignored. Some operations may not behave as expected if the archive +contains global headers. + 4.4 Avoid misconversions to/from UTF-8 ====================================== @@ -817,9 +836,10 @@ Example 6: Extract all files from archive 'archive.tar.lz'. tarlz -xf archive.tar.lz -Example 7: Extract files 'a' and 'c' from archive 'archive.tar.lz'. +Example 7: Extract files 'a' and 'c', and the whole tree under +directory 'dir1' from archive 'archive.tar.lz'. - tarlz -xf archive.tar.lz a c + tarlz -xf archive.tar.lz a c dir1 Example 8: Copy the contents of directory 'sourcedir' to the directory @@ -869,19 +889,19 @@ Concept index Tag Table: Node: Top223 Node: Introduction1086 -Node: Invoking tarlz3280 -Ref: --data-size5339 -Ref: --bsolid11442 -Node: File format15072 -Ref: key_crc3219892 -Node: Amendments to pax format25309 -Ref: crc3225833 -Ref: flawed-compat26858 -Node: Multi-threaded tar29225 -Node: Minimum archive sizes31764 -Node: Examples33897 -Node: Problems35566 -Node: Concept index36092 +Node: Invoking tarlz3337 +Ref: --data-size5489 +Ref: --bsolid12172 +Node: File format15802 +Ref: key_crc3220622 +Node: Amendments to pax format26039 +Ref: crc3226580 +Ref: flawed-compat27605 +Node: Multi-threaded tar30128 +Node: Minimum archive sizes32667 +Node: Examples34800 +Node: Problems36517 +Node: Concept index37043  End Tag Table diff --git a/doc/tarlz.texi b/doc/tarlz.texi index da7abfa..3d2f668 100644 --- a/doc/tarlz.texi +++ b/doc/tarlz.texi @@ -6,8 +6,8 @@ @finalout @c %**end of header -@set UPDATED 12 March 2019 -@set VERSION 0.14 +@set UPDATED 11 April 2019 +@set VERSION 0.15 @dircategory Data Compression @direntry @@ -84,7 +84,8 @@ parallel, multiplying the decompression speed. @item New members can be appended to the archive (by removing the EOF -member) just like to an uncompressed tar archive. +member), and unwanted members can be deleted from the archive. Just +like an uncompressed tar archive. @item It is a safe posix-style backup format. In case of corruption, @@ -121,6 +122,9 @@ tarlz [@var{options}] [@var{files}] @end example @noindent +All operations except @samp{--concatenate} operate on whole trees if any +@var{file} is a directory. + On archive creation or appending tarlz archives the files specified, but removes from member names any leading and trailing slashes and any filename prefixes containing a @samp{..} component. On extraction, leading and @@ -206,6 +210,15 @@ run from the root directory to perform the comparison. Make @samp{--diff} ignore differences in owner and group IDs. This option is useful when comparing an @samp{--anonymous} archive. +@item --delete +Delete the specified files and directories from an archive in place. It +currently can delete only from uncompressed archives and from archives with +individually compressed files (@samp{--no-solid} archives). To delete a +directory without deleting the files under it, use +@w{@code{tarlz --delete -f foo --exclude='dir/*' dir}}. Deleting in place +may be dangerous. A corrupt archive, a power cut, or an I/O error may cause +data loss. + @item --exclude=@var{pattern} Exclude files matching a shell pattern like @samp{*.o}. A file is considered to match if any component of the filename matches. For example, @samp{*.o} @@ -266,8 +279,10 @@ Verbosely list files processed. @item -x @itemx --extract -Extract files from an archive. If @var{files} are given, extract only -the @var{files} given. Else extract all the files in the archive. +Extract files from an archive. If @var{files} are given, extract only the +@var{files} given. Else extract all the files in the archive. To extract a +directory without extracting the files under it, use +@w{@code{tarlz -xf foo --exclude='dir/*' dir}}. @item -0 .. -9 Set the compression level for @samp{--create} and @samp{--append}. The @@ -474,7 +489,9 @@ If several extended headers precede an ustar header, only the last extended header takes effect. The other extended headers are ignored. Similarly, if several records with the same keyword appear in the same block of extended records, only the last record for the repeated keyword -takes effect. The other records for the repeated keyword are ignored. +takes effect. The other records for the repeated keyword are ignored.@* +A global header inserted between an extended header and an ustar header.@* +An extended header just before the EOF blocks. @end ignore @sp 1 @@ -654,7 +671,7 @@ and gname are null-terminated character strings except when all characters in the array contain non-null characters including the last character. Each numeric field contains a leading space- or zero-filled, optionally null-terminated octal number using digits from the ISO/IEC 646:1991 (ASCII) -standard. Tarlz is able to decode numeric fields 1 byte larger than standard +standard. Tarlz is able to decode numeric fields 1 byte longer than standard ustar by not requiring a terminating null character. @@ -663,10 +680,10 @@ ustar by not requiring a terminating null character. @cindex Amendments to pax format Tarlz is meant to reliably detect invalid or corrupt metadata during -extraction and to not create safety risks in the archives it creates. In -order to achieve these goals, tarlz makes some changes to the variant of the -pax format that it uses. This chapter describes these changes and the -concrete reasons to implement them. +decoding, and to create safe archives where corrupt metadata can be reliably +detected. In order to achieve these goals, tarlz makes some changes to the +variant of the pax format that it uses. This chapter describes these changes +and the concrete reasons to implement them. @sp 1 @anchor{crc32} @@ -713,9 +730,9 @@ extended records. If an extended header is required for any reason (for example a file size larger than @w{8 GiB} or a link name longer than 100 bytes), tarlz moves the filename also to the extended header to prevent an ustar tool from trying to -extract the file or link. This also makes easier during parallel extraction -or listing the detection of a tar member split between two lzip members at -the boundary between the extended header and the ustar header. +extract the file or link. This also makes easier during parallel decoding +the detection of a tar member split between two lzip members at the boundary +between the extended header and the ustar header. @sp 1 @section As simple as possible (but not simpler) @@ -726,6 +743,10 @@ exceed the limits of the ustar format. Adding extended headers to each member just to record subsecond timestamps seems wasteful for a backup format. +Global pax headers are tolerated, but not supported; they are parsed and +ignored. Some operations may not behave as expected if the archive contains +global headers. + @sp 1 @section Avoid misconversions to/from UTF-8 @@ -886,11 +907,11 @@ tarlz -xf archive.tar.lz @sp 1 @noindent -Example 7: Extract files @samp{a} and @samp{c} from archive -@samp{archive.tar.lz}. +Example 7: Extract files @samp{a} and @samp{c}, and the whole tree under +directory @samp{dir1} from archive @samp{archive.tar.lz}. @example -tarlz -xf archive.tar.lz a c +tarlz -xf archive.tar.lz a c dir1 @end example @sp 1 diff --git a/exclude.cc b/exclude.cc index fc9d67d..1bdae6e 100644 --- a/exclude.cc +++ b/exclude.cc @@ -47,7 +47,12 @@ bool Exclude::excluded( const char * const filename ) while( *p ) { for( unsigned i = 0; i < patterns.size(); ++i ) +#ifdef FNM_LEADING_DIR if( fnmatch( patterns[i].c_str(), p, FNM_LEADING_DIR ) == 0 ) return true; +#else + if( fnmatch( patterns[i].c_str(), p, 0 ) == 0 || + fnmatch( ( patterns[i] + "/*" ).c_str(), p, 0 ) == 0 ) return true; +#endif while( *p && *p != '/' ) ++p; // skip component while( *p == '/' ) ++p; // skip slashes } diff --git a/extended.cc b/extended.cc index 39b0f5a..0e647c6 100644 --- a/extended.cc +++ b/extended.cc @@ -64,7 +64,7 @@ unsigned long long parse_decimal( const char * const ptr, { const unsigned long long prev = result; result *= 10; result += ptr[i] - '0'; - if( result < prev || result > LLONG_MAX ) // overflow + if( result < prev || result > max_file_size ) // overflow { if( tailp ) *tailp = ptr; return 0; } } if( tailp ) *tailp = ptr + i; @@ -219,7 +219,7 @@ bool Extended::parse( const char * const buf, const unsigned long long edsize, if( file_size_ != 0 && !permissive ) return false; file_size_ = parse_decimal( tail + 5, &tail, rest - 5 ); // parse error or size fits in ustar header - if( file_size_ < 1ULL << 33 || tail != buf + ( pos + rsize - 1 ) ) + if( file_size_ < 1LL << 33 || tail != buf + ( pos + rsize - 1 ) ) return false; } else if( rest > 10 && std::memcmp( tail, "GNU.crc32=", 10 ) == 0 ) @@ -281,9 +281,9 @@ void Extended::fill_from_ustar( const Tar_header header ) /* Returns file size from record or from ustar header, and resets file_size_. Used for fast parsing of headers in uncompressed archives. */ -unsigned long long Extended::get_file_size_and_reset( const Tar_header header ) +long long Extended::get_file_size_and_reset( const Tar_header header ) { - const unsigned long long tmp = file_size_; + const long long tmp = file_size_; file_size( 0 ); const Typeflag typeflag = (Typeflag)header[typeflag_o]; if( typeflag == tf_regular || typeflag == tf_hiperf ) diff --git a/extract.cc b/extract.cc index 7d4b3ae..bd2b624 100644 --- a/extract.cc +++ b/extract.cc @@ -91,8 +91,8 @@ bool make_path( const std::string & name ) // Return value: 0 = OK, 1 = damaged member, 2 = fatal error. // If sizep and error, return in *sizep the number of bytes read. // The first 6 bytes of the archive must be intact for islz to be meaningful. -int archive_read( const int infd, uint8_t * const buf, const int size, - int * const sizep = 0 ) +int archive_read( const char * const archive_namep, const int infd, + uint8_t * const buf, const int size, int * const sizep = 0 ) { static LZ_Decoder * decoder = 0; static bool at_eof = false; @@ -109,7 +109,8 @@ int archive_read( const int infd, uint8_t * const buf, const int size, const int rd = readblock( infd, buf, size ); if( sizep ) *sizep = rd; if( rd != size && errno ) - { show_error( "Error reading archive", errno ); fatal = true; return 2; } + { show_file_error( archive_namep, "Error reading archive", errno ); + fatal = true; return 2; } const Lzip_header & header = (*(const Lzip_header *)buf); bool islz = ( rd >= min_member_size && header.verify_magic() && header.verify_version() && @@ -119,7 +120,7 @@ int archive_read( const int infd, uint8_t * const buf, const int size, ( !islz && !istar && rd == size && block_is_zero( buf, size ) ); if( !islz && !istar && !iseof ) // corrupt or invalid format { - show_error( "This does not look like a POSIX tar archive." ); + show_file_error( archive_namep, posix_msg ); if( archive_has_lz_ext && rd >= min_member_size ) islz = true; if( !islz ) return 1; } @@ -132,10 +133,10 @@ int archive_read( const int infd, uint8_t * const buf, const int size, LZ_decompress_close( decoder ); fatal = true; return 2; } if( LZ_decompress_write( decoder, buf, rd ) != rd ) internal_error( "library error (LZ_decompress_write)." ); - const int res = archive_read( infd, buf, size, sizep ); + const int res = archive_read( archive_namep, infd, buf, size, sizep ); if( res != 0 ) { if( res == 2 ) fatal = true; return res; } if( verify_ustar_chksum( buf ) || block_is_zero( buf, size ) ) return 0; - show_error( "This does not look like a POSIX tar.lz archive." ); + show_file_error( archive_namep, posix_lz_msg ); fatal = true; return 2; } @@ -143,7 +144,7 @@ int archive_read( const int infd, uint8_t * const buf, const int size, { const int rd = readblock( infd, buf, size ); if( rd == size ) return 0; if( sizep ) *sizep = rd; - show_error( "Archive ends unexpectedly." ); fatal = true; return 2; + show_file_error( archive_namep, end_msg ); fatal = true; return 2; } const int ibuf_size = 16384; uint8_t ibuf[ibuf_size]; @@ -159,11 +160,8 @@ int archive_read( const int infd, uint8_t * const buf, const int size, } if( rd == 0 && LZ_decompress_finished( decoder ) == 1 ) { LZ_decompress_close( decoder ); - show_error( "Archive ends unexpectedly." ); fatal = true; return 2; } + show_file_error( archive_namep, end_msg ); fatal = true; return 2; } sz += rd; if( sizep ) *sizep = sz; - if( sz == size && LZ_decompress_finished( decoder ) == 1 && - LZ_decompress_close( decoder ) < 0 ) - { show_error( "LZ_decompress_close failed." ); fatal = true; return 2; } if( sz < size && !at_eof && LZ_decompress_write_size( decoder ) > 0 ) { const int rsize = std::min( ibuf_size, LZ_decompress_write_size( decoder ) ); @@ -174,8 +172,8 @@ int archive_read( const int infd, uint8_t * const buf, const int size, { at_eof = true; LZ_decompress_finish( decoder ); if( errno ) - { show_error( "Error reading archive", errno ); fatal = true; - return 2; } + { show_file_error( archive_namep, "Error reading archive", errno ); + fatal = true; return 2; } } } } @@ -292,7 +290,6 @@ bool format_member_name( const Extended & extended, const Tar_header header, return true; } -namespace { bool show_member_name( const Extended & extended, const Tar_header header, const int vlevel, Resizable_buffer & rbuf ) @@ -307,22 +304,23 @@ bool show_member_name( const Extended & extended, const Tar_header header, return true; } +namespace { -int skip_member( const int infd, const Extended & extended ) +int skip_member( const char * const archive_namep, const int infd, + const Extended & extended ) { - unsigned long long rest = extended.file_size(); + long long rest = extended.file_size(); const int rem = rest % header_size; - const int padding = rem ? header_size - rem : 0; - if( archive_is_uncompressed_seekable && - lseek( infd, rest + padding, SEEK_CUR ) > 0 ) return 0; - const unsigned bufsize = 32 * header_size; + if( rem ) rest += header_size - rem; // padding + if( archive_is_uncompressed_seekable && lseek( infd, rest, SEEK_CUR ) > 0 ) + return 0; + const int bufsize = 32 * header_size; uint8_t buf[bufsize]; while( rest > 0 ) { - const int rsize = ( rest >= bufsize ) ? bufsize : rest + padding; - const int ret = archive_read( infd, buf, rsize ); + const int rsize = ( rest >= bufsize ) ? bufsize : rest; + const int ret = archive_read( archive_namep, infd, buf, rsize ); if( ret != 0 ) { if( ret == 2 ) return 2; else break; } - if( rest < bufsize ) break; rest -= rsize; } return 0; @@ -336,11 +334,12 @@ void show_file_diff( const char * const filename, const char * const msg ) } -int compare_member( const int infd1, const Extended & extended, - const Tar_header header, const bool ignore_ids ) +int compare_member( const char * const archive_namep, const int infd1, + const Extended & extended, const Tar_header header, + const bool ignore_ids ) { if( !show_member_name( extended, header, 1, grbuf ) ) return 1; - unsigned long long rest = extended.file_size(); + long long rest = extended.file_size(); const char * const filename = extended.path().c_str(); const Typeflag typeflag = (Typeflag)header[typeflag_o]; bool diff = false, size_differs = false, type_differs = true; @@ -386,7 +385,7 @@ int compare_member( const int infd1, const Extended & extended, { show_file_diff( filename, "Mod time differs" ); diff = true; } } if( ( typeflag == tf_regular || typeflag == tf_hiperf ) && - (off_t)rest != st.st_size ) // don't compare contents + rest != st.st_size ) // don't compare contents { show_file_diff( filename, "Size differs" ); size_differs = true; } if( ( typeflag == tf_chardev || typeflag == tf_blockdev ) && ( parse_octal( header + devmajor_o, devmajor_l ) != @@ -412,24 +411,26 @@ int compare_member( const int infd1, const Extended & extended, } if( diff || size_differs || type_differs ) { diff = false; set_error_status( 1 ); } - if( rest == 0 ) return 0; + if( rest <= 0 ) return 0; if( ( typeflag != tf_regular && typeflag != tf_hiperf ) || - size_differs || type_differs ) return skip_member( infd1, extended ); + size_differs || type_differs ) + return skip_member( archive_namep, infd1, extended ); // else compare file contents const int rem = rest % header_size; const int padding = rem ? header_size - rem : 0; - const unsigned bufsize = 32 * header_size; + const int bufsize = 32 * header_size; uint8_t buf1[bufsize]; uint8_t buf2[bufsize]; const int infd2 = open_instream( filename ); if( infd2 < 0 ) - { set_error_status( 1 ); return skip_member( infd1, extended ); } + { set_error_status( 1 ); + return skip_member( archive_namep, infd1, extended ); } int retval = 0; while( rest > 0 ) { const int rsize1 = ( rest >= bufsize ) ? bufsize : rest + padding; const int rsize2 = ( rest >= bufsize ) ? bufsize : rest; - const int ret = archive_read( infd1, buf1, rsize1 ); + const int ret = archive_read( archive_namep, infd1, buf1, rsize1 ); if( ret != 0 ) { if( ret == 2 ) retval = 2; diff = true; break; } if( !diff ) { @@ -456,11 +457,11 @@ int compare_member( const int infd1, const Extended & extended, } -int list_member( const int infd, const Extended & extended, - const Tar_header header ) +int list_member( const char * const archive_namep, const int infd, + const Extended & extended, const Tar_header header ) { if( !show_member_name( extended, header, 0, grbuf ) ) return 1; - return skip_member( infd, extended ); + return skip_member( archive_namep, infd, extended ); } @@ -472,14 +473,15 @@ bool contains_dotdot( const char * const filename ) } -int extract_member( const int infd, const Extended & extended, - const Tar_header header, const bool keep_damaged ) +int extract_member( const char * const archive_namep, const int infd, + const Extended & extended, const Tar_header header, + const bool keep_damaged ) { const char * const filename = extended.path().c_str(); if( contains_dotdot( filename ) ) { show_file_error( filename, "Contains a '..' component, skipping." ); - return skip_member( infd, extended ); + return skip_member( archive_namep, infd, extended ); } const mode_t mode = parse_octal( header + mode_o, mode_l ); // 12 bits const time_t mtime = parse_octal( header + mtime_o, mtime_l ); // 33 bits @@ -557,22 +559,22 @@ int extract_member( const int infd, const Extended & extended, return 2; } - const unsigned bufsize = 32 * header_size; + const int bufsize = 32 * header_size; uint8_t buf[bufsize]; - unsigned long long rest = extended.file_size(); + long long rest = extended.file_size(); const int rem = rest % header_size; const int padding = rem ? header_size - rem : 0; while( rest > 0 ) { const int rsize = ( rest >= bufsize ) ? bufsize : rest + padding; int rd; - const int ret = archive_read( infd, buf, rsize, &rd ); + const int ret = archive_read( archive_namep, infd, buf, rsize, &rd ); if( ret != 0 ) { if( outfd >= 0 ) { if( keep_damaged ) - { writeblock( outfd, buf, std::min( rest, (unsigned long long)rd ) ); + { writeblock( outfd, buf, std::min( rest, (long long)rd ) ); close( outfd ); } else { close( outfd ); std::remove( filename ); } } @@ -620,16 +622,16 @@ bool compare_tslash( const char * const name1, const char * const name2 ) namespace { -bool parse_records( const int infd, Extended & extended, - const Tar_header header, Resizable_buffer & rbuf, - const bool permissive ) +bool parse_records( const char * const archive_namep, const int infd, + Extended & extended, const Tar_header header, + Resizable_buffer & rbuf, const bool permissive ) { - const unsigned long long edsize = parse_octal( header + size_o, size_l ); - const unsigned long long bufsize = round_up( edsize ); - if( edsize == 0 || edsize >= 1ULL << 33 || bufsize == 0 || bufsize >= INT_MAX ) + const long long edsize = parse_octal( header + size_o, size_l ); + const long long bufsize = round_up( edsize ); + if( edsize <= 0 || edsize >= 1LL << 33 || bufsize >= INT_MAX ) return false; // overflow or no extended data if( !rbuf.resize( bufsize ) ) return false; // extended records buffer - return ( archive_read( infd, (uint8_t *)rbuf(), bufsize ) == 0 && + return ( archive_read( archive_namep, infd, (uint8_t *)rbuf(), bufsize ) == 0 && extended.parse( rbuf(), edsize, permissive ) ); } @@ -690,8 +692,10 @@ int decode( const std::string & archive_name, const Arg_parser & parser, const bool keep_damaged, const bool missing_crc, const bool permissive ) { - const int infd = archive_name.size() ? - open_instream( archive_name ) : STDIN_FILENO; + const bool from_stdin = archive_name.empty(); + const char * const archive_namep = + from_stdin ? "(stdin)" : archive_name.c_str(); + const int infd = from_stdin ? STDIN_FILENO : open_instream( archive_name ); if( infd < 0 ) return 1; // Execute -C options and mark filenames to be compared, extracted or listed. @@ -719,9 +723,10 @@ int decode( const std::string & archive_name, const Arg_parser & parser, const long members = lzip_index.members(); if( lzip_index.retval() == 0 && members >= 2 ) // one file + eof { - // show_file_error( archive_name.c_str(), "Is compressed seekable" ); - return list_lz( parser, name_pending, lzip_index, filenames, debug_level, - infd, std::min( (long)num_workers, members ), + // show_file_error( archive_namep, "Is compressed seekable" ); + return list_lz( archive_namep, parser, name_pending, lzip_index, + filenames, debug_level, infd, + std::min( (long)num_workers, members ), missing_crc, permissive ); } if( lseek( infd, 0, SEEK_SET ) == 0 && lzip_index.retval() != 0 && @@ -733,19 +738,18 @@ int decode( const std::string & archive_name, const Arg_parser & parser, Extended extended; // metadata from extended records int retval = 0; bool prev_extended = false; // prev header was extended - while( true ) // process one tar member per iteration + while( true ) // process one tar header per iteration { Tar_header header; - const int ret = archive_read( infd, header, header_size ); - if( ret == 2 ) return 2; + const int ret = archive_read( archive_namep, infd, header, header_size ); + if( ret == 2 ) { retval = 2; break; } if( ret != 0 || !verify_ustar_chksum( header ) ) { if( ret == 0 && block_is_zero( header, header_size ) ) { - if( !prev_extended ) break; // EOF - show_file_error( archive_name.c_str(), - "Format violation: extended header followed by EOF blocks." ); - return 2; + if( !prev_extended || permissive ) break; // EOF + show_file_error( archive_namep, fv_msg1 ); + retval = 2; break; } if( skip_warn() && verbosity >= 2 ) std::fprintf( stderr, "ustar chksum = %07o\n", ustar_chksum( header ) ); @@ -756,13 +760,11 @@ int decode( const std::string & archive_name, const Arg_parser & parser, const Typeflag typeflag = (Typeflag)header[typeflag_o]; if( typeflag == tf_global ) { - if( prev_extended ) - { show_file_error( archive_name.c_str(), - "Format violation: extended header followed by global header." ); - return 2; } + if( prev_extended && !permissive ) + { show_file_error( archive_namep, fv_msg2 ); retval = 2; break; } Extended dummy; // global headers are parsed and ignored - if( !parse_records( infd, dummy, header, grbuf, true ) ) - { show_file_error( archive_name.c_str(), + if( !parse_records( archive_namep, infd, dummy, header, grbuf, true ) ) + { show_file_error( archive_namep, "Error in global extended records. Skipping to next header." ); set_error_status( 2 ); } continue; @@ -770,16 +772,14 @@ int decode( const std::string & archive_name, const Arg_parser & parser, if( typeflag == tf_extended ) { if( prev_extended && !permissive ) - { show_file_error( archive_name.c_str(), - "Format violation: consecutive extended headers found." - /*" Use --permissive.", 0, true*/ ); return 2; } - if( !parse_records( infd, extended, header, grbuf, permissive ) ) - { show_file_error( archive_name.c_str(), + { show_file_error( archive_namep, fv_msg3 ); retval = 2; break; } + if( !parse_records( archive_namep, infd, extended, header, grbuf, + permissive ) ) + { show_file_error( archive_namep, "Error in extended records. Skipping to next header." ); extended.reset(); set_error_status( 2 ); } else if( !extended.crc_present() && missing_crc ) - { show_file_error( archive_name.c_str(), - "Missing CRC in extended records." ); return 2; } + { show_file_error( archive_namep, mcrc_msg ); retval = 2; break; } prev_extended = true; continue; } @@ -787,23 +787,25 @@ int decode( const std::string & archive_name, const Arg_parser & parser, extended.fill_from_ustar( header ); // copy metadata from header - const bool skip = check_skip_filename( parser, name_pending, - extended.path().c_str(), filenames ); - if( skip ) - retval = skip_member( infd, extended ); + if( check_skip_filename( parser, name_pending, extended.path().c_str(), + filenames ) ) + retval = skip_member( archive_namep, infd, extended ); else if( program_mode == m_list ) - retval = list_member( infd, extended, header ); + retval = list_member( archive_namep, infd, extended, header ); else if( program_mode == m_diff ) - retval = compare_member( infd, extended, header, ignore_ids ); - else - retval = extract_member( infd, extended, header, keep_damaged ); + retval = compare_member( archive_namep, infd, extended, header, ignore_ids ); + else retval = extract_member( archive_namep, infd, extended, header, + keep_damaged ); extended.reset(); if( retval ) - { show_error( "Error is not recoverable: exiting now." ); - return retval; } + { show_error( "Error is not recoverable: exiting now." ); break; } } - for( int i = 0; i < parser.arguments(); ++i ) + if( close( infd ) != 0 && !retval ) + { show_file_error( archive_namep, "Error closing archive", errno ); + retval = 1; } + + if( retval == 0 ) for( int i = 0; i < parser.arguments(); ++i ) if( !parser.code( i ) && parser.argument( i ).size() && name_pending[i] ) { show_file_error( parser.argument( i ).c_str(), "Not found in archive." ); diff --git a/list_lz.cc b/list_lz.cc index 6a95bab..e636ee0 100644 --- a/list_lz.cc +++ b/list_lz.cc @@ -149,7 +149,7 @@ bool check_skip_filename( const Arg_parser & parser, std::vector< char > & name_pending, const char * const filename, const int filenames ) { - if( Exclude::excluded( filename ) ) return true; // skip excluded + if( Exclude::excluded( filename ) ) return true; // skip excluded files bool skip = filenames > 0; if( skip ) for( int i = 0; i < parser.arguments(); ++i ) @@ -165,6 +165,90 @@ bool check_skip_filename( const Arg_parser & parser, } +/* Return value: 0 = OK, 1 = damaged member, 2 = fatal error. */ +int archive_read_lz( LZ_Decoder * const decoder, const int infd, + long long & file_pos, const long long member_end, + const long long cdata_size, uint8_t * const buf, + const int size, const char ** msg ) + { + int sz = 0; + + while( sz < size ) + { + const int rd = LZ_decompress_read( decoder, buf + sz, size - sz ); + if( rd < 0 ) + { *msg = LZ_strerror( LZ_decompress_errno( decoder ) ); return 1; } + if( rd == 0 && LZ_decompress_finished( decoder ) == 1 ) + { *msg = end_msg; return 2; } + sz += rd; + if( sz < size && LZ_decompress_write_size( decoder ) > 0 ) + { + const long long ibuf_size = 16384; // try 65536 + uint8_t ibuf[ibuf_size]; + const long long rest = ( file_pos < member_end ) ? + member_end - file_pos : cdata_size - file_pos; + const int rsize = std::min( LZ_decompress_write_size( decoder ), + (int)std::min( ibuf_size, rest ) ); + if( rsize <= 0 ) LZ_decompress_finish( decoder ); + else + { + const int rd = preadblock( infd, ibuf, rsize, file_pos ); + if( LZ_decompress_write( decoder, ibuf, rd ) != rd ) + internal_error( "library error (LZ_decompress_write)." ); + file_pos += rd; + if( rd < rsize ) + { + LZ_decompress_finish( decoder ); + if( errno ) { *msg = "Error reading archive"; return 2; } + } + } + } + } + return 0; + } + + +int parse_records_lz( LZ_Decoder * const decoder, const int infd, + long long & file_pos, const long long member_end, + const long long cdata_size, long long & data_pos, + Extended & extended, const Tar_header header, + Resizable_buffer & rbuf, const char ** msg, + const bool permissive ) + { + const long long edsize = parse_octal( header + size_o, size_l ); + const long long bufsize = round_up( edsize ); + if( edsize <= 0 || edsize >= 1LL << 33 || bufsize >= INT_MAX ) + return 1; // overflow or no extended data + if( !rbuf.resize( bufsize ) ) return 1; // extended records buffer + int retval = archive_read_lz( decoder, infd, file_pos, member_end, + cdata_size, (uint8_t *)rbuf(), bufsize, msg ); + if( retval == 0 ) + { if( extended.parse( rbuf(), edsize, permissive ) ) data_pos += bufsize; + else retval = 1; } + return retval; + } + + +int skip_member_lz( LZ_Decoder * const decoder, const int infd, + long long & file_pos, const long long member_end, + const long long cdata_size, long long & data_pos, + long long rest, const char ** msg ) + { + const int bufsize = 32 * header_size; + uint8_t buf[bufsize]; + while( rest > 0 ) // skip tar member + { + const int rsize = ( rest >= bufsize ) ? bufsize : rest; + const int ret = archive_read_lz( decoder, infd, file_pos, member_end, + cdata_size, buf, rsize, msg ); + if( ret != 0 ) return ret; + data_pos += rsize; + rest -= rsize; + } + return 0; + } + + namespace { struct Packet // member name and metadata or error message @@ -195,6 +279,7 @@ private: pthread_cond_t oav_or_exit; // output packet available or all workers exited std::vector< pthread_cond_t > slot_av; // output slot available pthread_cond_t check_master; + bool eof_found_; Packet_courier( const Packet_courier & ); // declared as private void operator=( const Packet_courier & ); // declared as private @@ -204,7 +289,8 @@ public: : ocheck_counter( 0 ), owait_counter( 0 ), error_member_id( -1 ), deliver_worker_id( 0 ), master_worker_id( -1 ), opacket_queues( workers ), num_working( workers ), - num_workers( workers ), out_slots( slots ), slot_av( workers ) + num_workers( workers ), out_slots( slots ), slot_av( workers ), + eof_found_( false ) { xinit_mutex( &omutex ); xinit_cond( &oav_or_exit ); for( unsigned i = 0; i < slot_av.size(); ++i ) xinit_cond( &slot_av[i] ); @@ -218,6 +304,9 @@ public: xdestroy_cond( &oav_or_exit ); xdestroy_mutex( &omutex ); } + bool eof_found() const { return eof_found_; } + void report_eof() { eof_found_ = true; } + bool mastership_granted() const { return master_worker_id >= 0; } bool request_mastership( const long member_id, const int worker_id ) @@ -255,12 +344,15 @@ public: /* Collect a packet from a worker. If a packet is rejected, the worker must terminate. */ - bool collect_packet( const Packet * const opacket, const int worker_id ) + bool collect_packet( const int worker_id, const long member_id, + const char * const msg, + const Packet::Status status = Packet::ok ) { + const Packet * const opacket = new Packet( member_id, msg, status ); xlock( &omutex ); if( ( mastership_granted() && master_worker_id != worker_id ) || ( error_member_id >= 0 && error_member_id < opacket->member_id ) ) - { xunlock( &omutex ); return false; } // reject packet + { xunlock( &omutex ); delete opacket; return false; } // reject packet while( opacket_queues[worker_id].size() >= out_slots ) xwait( &slot_av[worker_id], &omutex ); opacket_queues[worker_id].push( opacket ); @@ -310,53 +402,6 @@ public: }; -/* Return value: -1 = member_end exceeded, 0 = OK, - 1 = damaged member, 2 = fatal error. - If sizep and error, return in *sizep the number of bytes read. */ -int archive_read_lz( LZ_Decoder * const decoder, const int infd, - long long & file_pos, const long long member_end, - const long long cdata_size, uint8_t * const buf, - const int size, - const char ** msg, int * const sizep = 0 ) - { - int sz = 0; - - if( sizep ) *sizep = 0; - while( sz < size ) - { - const int rd = LZ_decompress_read( decoder, buf + sz, size - sz ); - if( rd < 0 ) - { *msg = LZ_strerror( LZ_decompress_errno( decoder ) ); return 1; } - if( rd == 0 && LZ_decompress_finished( decoder ) == 1 ) - { *msg = "Archive ends unexpectedly."; return 2; } - sz += rd; if( sizep ) *sizep = sz; - if( sz < size && LZ_decompress_write_size( decoder ) > 0 ) - { - const long long ibuf_size = 16384; // try 65536 - uint8_t ibuf[ibuf_size]; - const long long rest = ( file_pos < member_end ) ? - member_end - file_pos : cdata_size - file_pos; - const int rsize = std::min( LZ_decompress_write_size( decoder ), - (int)std::min( ibuf_size, rest ) ); - if( rsize <= 0 ) LZ_decompress_finish( decoder ); - else - { - const int rd = preadblock( infd, ibuf, rsize, file_pos ); - if( LZ_decompress_write( decoder, ibuf, rd ) != rd ) - internal_error( "library error (LZ_decompress_write)." ); - file_pos += rd; - if( rd < rsize ) - { - LZ_decompress_finish( decoder ); - if( errno ) { *msg = "Error reading archive"; return 2; } - } - } - } - } - return ( file_pos > member_end ) ? -1 : 0; - } - - int list_member_lz( LZ_Decoder * const decoder, const int infd, long long & file_pos, const long long member_end, const long long cdata_size, long long & data_pos, @@ -365,61 +410,22 @@ int list_member_lz( LZ_Decoder * const decoder, const int infd, Resizable_buffer & rbuf, const long member_id, const int worker_id, const char ** msg, const bool skip ) { - unsigned long long rest = extended.file_size(); + long long rest = extended.file_size(); const int rem = rest % header_size; - const int padding = rem ? header_size - rem : 0; - const long long data_rest = mdata_end - ( data_pos + rest + padding ); - bool master = false; - - if( data_rest < 0 ) // tar member exceeds lzip member end - { - if( courier.request_mastership( member_id, worker_id ) ) master = true; - else { *msg = "tar member exceeds lzip member end"; return 2; } - } + if( rem ) rest += header_size - rem; // padding + const long long data_rest = mdata_end - ( data_pos + rest ); if( verbosity < 0 || skip ) rbuf()[0] = 0; else if( !format_member_name( extended, header, rbuf, verbosity > 0 ) ) { *msg = mem_msg; return 1; } - const Packet * const opacket = new Packet( member_id, rbuf(), - data_rest ? Packet::ok : Packet::member_done ); - if( !courier.collect_packet( opacket, worker_id ) ) + if( !courier.collect_packet( worker_id, member_id, rbuf(), + data_rest ? Packet::ok : Packet::member_done ) ) { *msg = "other worker found an error"; return 1; } - if( !data_rest ) { data_pos = mdata_end; return 0; } - - const unsigned bufsize = 32 * header_size; - uint8_t buf[bufsize]; - while( rest > 0 ) - { - const int rsize = ( rest >= bufsize ) ? bufsize : rest + padding; - const int ret = archive_read_lz( decoder, infd, file_pos, member_end, - cdata_size, buf, rsize, msg ); - if( ret > 0 ) return ret; - data_pos += rsize; - if( rest < bufsize ) break; - rest -= rsize; - } - return ( master ? -1 : 0 ); - } - - -int parse_records_lz( LZ_Decoder * const decoder, const int infd, - long long & file_pos, const long long member_end, - const long long cdata_size, long long & data_pos, - Extended & extended, const Tar_header header, - Resizable_buffer & rbuf, const char ** msg, - const bool permissive ) - { - const unsigned long long edsize = parse_octal( header + size_o, size_l ); - const unsigned long long bufsize = round_up( edsize ); - if( edsize == 0 || edsize >= 1ULL << 33 || bufsize == 0 || bufsize >= INT_MAX ) - return 1; // overflow or no extended data - if( !rbuf.resize( bufsize ) ) return 1; // extended records buffer - int retval = archive_read_lz( decoder, infd, file_pos, member_end, - cdata_size, (uint8_t *)rbuf(), bufsize, msg ); - if( retval == 0 ) - { if( extended.parse( rbuf(), edsize, permissive ) ) data_pos += bufsize; - else retval = 1; } - return retval; + if( data_rest ) + return skip_member_lz( decoder, infd, file_pos, member_end, cdata_size, + data_pos, rest, msg ); + data_pos = mdata_end; + return 0; } @@ -467,21 +473,30 @@ extern "C" void * tworker( void * arg ) const long long mdata_end = lzip_index.dblock( i ).end(); long long data_end = mdata_end; long long file_pos = lzip_index.mblock( i ).pos(); - long long member_end = lzip_index.mblock( i ).end(); + const long long member_end = lzip_index.mblock( i ).end(); if( data_pos >= data_end ) // empty lzip member { - const Packet * const opacket = new Packet( i, "", Packet::member_done ); - if( !courier.collect_packet( opacket, worker_id ) ) goto done; - continue; + if( courier.collect_packet( worker_id, i, "", Packet::member_done ) ) + continue; else break; } Extended extended; // metadata from extended records - int retval = 0; bool prev_extended = false; // prev header was extended LZ_decompress_reset( decoder ); // prepare for new member - while( true ) // process one tar member per iteration + while( true ) // process one tar header per iteration { - if( data_pos >= data_end ) break; + if( data_pos >= data_end ) + { + if( data_pos == data_end && !prev_extended ) break; + // member end exceeded or ends in extended, process rest of file + if( !courier.request_mastership( i, worker_id ) ) goto done; + master = true; + if( data_end < lzip_index.udata_size() ) + data_end = lzip_index.udata_size(); + else + { courier.collect_packet( worker_id, i, end_msg, Packet::error ); + goto done; } + } Tar_header header; const char * msg = 0; const int ret = archive_read_lz( decoder, infd, file_pos, member_end, @@ -490,81 +505,59 @@ extern "C" void * tworker( void * arg ) { if( !courier.request_mastership( i, worker_id ) ) goto done; master = true; - if( ret > 0 ) - { - const Packet * const opacket = new Packet( i, msg, Packet::error ); - courier.collect_packet( opacket, worker_id ); - goto done; - } - // member_end exceeded, process rest of file - else { data_end = lzip_index.udata_size(); member_end = cdata_size; } + courier.collect_packet( worker_id, i, msg, Packet::error ); + goto done; } data_pos += header_size; if( !verify_ustar_chksum( header ) ) { if( !courier.request_mastership( i, worker_id ) ) goto done; master = true; - if( block_is_zero( header, header_size ) ) break; // EOF - const Packet * const opacket = new Packet( i, - ( data_pos > header_size ) ? "Corrupt or invalid header." : - "This does not look like a POSIX tar.lz archive.", Packet::error ); - courier.collect_packet( opacket, worker_id ); + if( block_is_zero( header, header_size ) ) // EOF + { + if( !prev_extended || permissive ) courier.report_eof(); + else courier.collect_packet( worker_id, i, fv_msg1, Packet::error ); + goto done; + } + courier.collect_packet( worker_id, i, ( data_pos > header_size ) ? + bad_hdr_msg : posix_lz_msg, Packet::error ); goto done; } const Typeflag typeflag = (Typeflag)header[typeflag_o]; if( typeflag == tf_global ) { - if( prev_extended ) - { show_error( "Format violation: global header after extended header." ); - cleanup_and_fail( 2 ); } + if( prev_extended && !permissive ) + { courier.collect_packet( worker_id, i, fv_msg2, Packet::error ); + goto done; } Extended dummy; // global headers are parsed and ignored - const int ret = parse_records_lz( decoder, infd, file_pos, member_end, - cdata_size, data_pos, dummy, header, - rbuf, &msg, true ); - if( ret != 0 ) + if( parse_records_lz( decoder, infd, file_pos, member_end, cdata_size, + data_pos, dummy, header, rbuf, &msg, true ) == 0 ) { - if( !courier.request_mastership( i, worker_id ) ) goto done; - master = true; - if( ret > 0 ) - { - if( !msg ) msg = "Error in global extended records."; - const Packet * const opacket = new Packet( i, msg, Packet::error ); - courier.collect_packet( opacket, worker_id ); + if( data_pos == data_end && // end of lzip member + !courier.collect_packet( worker_id, i, "", Packet::member_done ) ) goto done; - } - // member_end exceeded, process rest of file - else { data_end = lzip_index.udata_size(); member_end = cdata_size; } + continue; } - continue; + if( courier.request_mastership( i, worker_id ) ) + courier.collect_packet( worker_id, i, msg ? msg : gblrec_msg, + Packet::error ); + goto done; } if( typeflag == tf_extended ) { int ret = 0; - if( prev_extended && !permissive ) - { msg = "Format violation: consecutive extended headers found."; - ret = 2; } + if( prev_extended && !permissive ) { msg = fv_msg3; ret = 2; } else ret = parse_records_lz( decoder, infd, file_pos, member_end, cdata_size, data_pos, extended, header, rbuf, &msg, permissive ); if( ret == 0 && !extended.crc_present() && missing_crc ) - { msg = "Missing CRC in extended records."; ret = 2; } - if( ret != 0 ) - { - if( !courier.request_mastership( i, worker_id ) ) goto done; - master = true; - if( ret > 0 ) - { - if( !msg ) msg = "Error in extended records."; - const Packet * const opacket = new Packet( i, msg, Packet::error ); - courier.collect_packet( opacket, worker_id ); - goto done; - } - // member_end exceeded, process rest of file - else { data_end = lzip_index.udata_size(); member_end = cdata_size; } - } - prev_extended = true; - continue; + { msg = mcrc_msg; ret = 2; } + if( ret == 0 ) { prev_extended = true; continue; } + if( courier.request_mastership( i, worker_id ) ) + courier.collect_packet( worker_id, i, msg ? msg : extrec_msg, + Packet::error ); + goto done; } prev_extended = false; @@ -573,28 +566,18 @@ extern "C" void * tworker( void * arg ) const bool skip = check_skip_filename( parser, name_pending, extended.path().c_str(), filenames ); - retval = list_member_lz( decoder, infd, file_pos, member_end, cdata_size, - data_pos, mdata_end, courier, extended, - header, rbuf, i, worker_id, &msg, skip ); + if( list_member_lz( decoder, infd, file_pos, member_end, cdata_size, + data_pos, mdata_end, courier, extended, + header, rbuf, i, worker_id, &msg, skip ) != 0 ) + { courier.collect_packet( worker_id, i, msg, Packet::error ); + goto done; } extended.reset(); - if( retval < 0 ) // member_end exceeded, process rest of file - { master = true; - data_end = lzip_index.udata_size(); member_end = cdata_size; } - else if( retval > 0 ) - { - const Packet * const opacket = new Packet( i, msg, Packet::error ); - courier.collect_packet( opacket, worker_id ); - goto done; - } } } done: if( LZ_decompress_close( decoder ) < 0 ) - { - const Packet * const opacket = new Packet( lzip_index.members(), - "LZ_decompress_close failed.", Packet::error ); - courier.collect_packet( opacket, worker_id ); - } + courier.collect_packet( worker_id, lzip_index.members(), + "LZ_decompress_close failed.", Packet::error ); courier.worker_finished(); return 0; } @@ -602,7 +585,7 @@ done: /* Get from courier the processed and sorted packets, and print the member lines on stdout or the diagnostics on stderr. */ -void muxer( Packet_courier & courier ) +void muxer( const char * const archive_namep, Packet_courier & courier ) { while( true ) { @@ -610,23 +593,25 @@ void muxer( Packet_courier & courier ) if( !opacket ) break; // queue is empty. all workers exited if( opacket->status == Packet::error ) - { show_error( opacket->line.c_str() ); cleanup_and_fail( 2 ); } + { show_file_error( archive_namep, opacket->line.c_str() ); + cleanup_and_fail( 2 ); } if( opacket->line.size() ) { std::fputs( opacket->line.c_str(), stdout ); std::fflush( stdout ); } delete opacket; } - if( !courier.mastership_granted() ) // no worker found EOF blocks - { show_error( "Archive ends unexpectedly." ); cleanup_and_fail( 2 ); } + if( !courier.eof_found() ) // no worker found EOF blocks + { show_file_error( archive_namep, end_msg ); cleanup_and_fail( 2 ); } } } // end namespace // init the courier, then start the workers and call the muxer. -int list_lz( const Arg_parser & parser, std::vector< char > & name_pending, - const Lzip_index & lzip_index, const int filenames, - const int debug_level, const int infd, const int num_workers, - const bool missing_crc, const bool permissive ) +int list_lz( const char * const archive_namep, const Arg_parser & parser, + std::vector< char > & name_pending, const Lzip_index & lzip_index, + const int filenames, const int debug_level, const int infd, + const int num_workers, const bool missing_crc, + const bool permissive ) { const int out_slots = 65536; // max small files (<=512B) in 64 MiB @@ -655,7 +640,7 @@ int list_lz( const Arg_parser & parser, std::vector< char > & name_pending, { show_error( "Can't create worker threads", errcode ); cleanup_and_fail(); } } - muxer( courier ); + muxer( archive_namep, courier ); for( int i = num_workers - 1; i >= 0; --i ) { @@ -667,7 +652,11 @@ int list_lz( const Arg_parser & parser, std::vector< char > & name_pending, delete[] worker_args; int retval = 0; - for( int i = 0; i < parser.arguments(); ++i ) + if( close( infd ) != 0 ) + { show_file_error( archive_namep, "Error closing archive", errno ); + retval = 1; } + + if( retval == 0 ) for( int i = 0; i < parser.arguments(); ++i ) if( !parser.code( i ) && parser.argument( i ).size() && name_pending[i] ) { show_file_error( parser.argument( i ).c_str(), "Not found in archive." ); diff --git a/lzip_index.cc b/lzip_index.cc index d443fa0..6c599a2 100644 --- a/lzip_index.cc +++ b/lzip_index.cc @@ -69,7 +69,7 @@ void Lzip_index::set_num_error( const char * const msg, unsigned long long num ) // If successful, push last member and set pos to member header. -bool Lzip_index::skip_trailing_data( const int fd, long long & pos, +bool Lzip_index::skip_trailing_data( const int fd, unsigned long long & pos, const bool ignore_trailing, const bool loose_trailing ) { enum { block_size = 16384, @@ -151,7 +151,7 @@ Lzip_index::Lzip_index( const int infd, const bool ignore_trailing, if( !isvalid_ds( header.dictionary_size() ) ) { error_ = bad_dict_msg; retval_ = 2; return; } - long long pos = insize; // always points to a header or to EOF + unsigned long long pos = insize; // always points to a header or to EOF while( pos >= min_member_size ) { Lzip_trailer trailer; @@ -159,7 +159,7 @@ Lzip_index::Lzip_index( const int infd, const bool ignore_trailing, pos - Lzip_trailer::size ) != Lzip_trailer::size ) { set_errno_error( "Error reading member trailer: " ); break; } const unsigned long long member_size = trailer.member_size(); - if( member_size > (unsigned long long)pos || !trailer.verify_consistency() ) + if( member_size > pos || !trailer.verify_consistency() ) { if( member_vector.empty() ) { if( skip_trailing_data( infd, pos, ignore_trailing, loose_trailing ) ) diff --git a/lzip_index.h b/lzip_index.h index 9ff6ee9..08758da 100644 --- a/lzip_index.h +++ b/lzip_index.h @@ -55,7 +55,7 @@ class Lzip_index void set_errno_error( const char * const msg ); void set_num_error( const char * const msg, unsigned long long num ); - bool skip_trailing_data( const int fd, long long & pos, + bool skip_trailing_data( const int fd, unsigned long long & pos, const bool ignore_trailing, const bool loose_trailing ); public: diff --git a/main.cc b/main.cc index 8dcd2b2..d881b19 100644 --- a/main.cc +++ b/main.cc @@ -85,12 +85,13 @@ void show_help( const long num_online ) std::printf( "\nOptions:\n" " --help display this help and exit\n" " -V, --version output version information and exit\n" - " -A, --concatenate append tar.lz archives to the end of an archive\n" + " -A, --concatenate append archives to the end of an archive\n" " -B, --data-size= set target size of input data blocks [2x8=16 MiB]\n" " -c, --create create a new archive\n" " -C, --directory= change to directory \n" " -d, --diff find differences between archive and file system\n" " --ignore-ids ignore differences in owner and group IDs\n" + " --delete delete files/directories from an archive\n" " --exclude= exclude files matching a shell pattern\n" " -f, --file= use archive file \n" " -h, --dereference follow symlinks; archive the files they point to\n" @@ -99,7 +100,7 @@ void show_help( const long num_online ) " -r, --append append files to the end of an archive\n" " -t, --list list the contents of an archive\n" " -v, --verbose verbosely list files processed\n" - " -x, --extract extract files from an archive\n" + " -x, --extract extract files/directories from an archive\n" " -0 .. -9 set compression level [default 6]\n" " --uncompressed don't compress the archive created\n" " --asolid create solidly compressed appendable archive\n" @@ -310,9 +311,9 @@ int main( const int argc, const char * const argv[] ) { show_error( "Bad library version. At least lzlib 1.0 is required." ); return 1; } - enum { opt_ano = 256, opt_aso, opt_bso, opt_crc, opt_dbg, opt_dso, opt_exc, - opt_grp, opt_hlp, opt_id, opt_kd, opt_nso, opt_out, opt_own, opt_per, - opt_sol, opt_un }; + enum { opt_ano = 256, opt_aso, opt_bso, opt_crc, opt_dbg, opt_del, opt_dso, + opt_exc, opt_grp, opt_hlp, opt_id, opt_kd, opt_nso, opt_out, opt_own, + opt_per, opt_sol, opt_un }; const Arg_parser::Option options[] = { { '0', 0, Arg_parser::no }, @@ -344,6 +345,7 @@ int main( const int argc, const char * const argv[] ) { opt_aso, "asolid", Arg_parser::no }, { opt_bso, "bsolid", Arg_parser::no }, { opt_dbg, "debug", Arg_parser::yes }, + { opt_del, "delete", Arg_parser::no }, { opt_dso, "dsolid", Arg_parser::no }, { opt_exc, "exclude", Arg_parser::yes }, { opt_grp, "group", Arg_parser::yes }, @@ -402,6 +404,7 @@ int main( const int argc, const char * const argv[] ) case opt_bso: solidity = bsolid; break; case opt_crc: missing_crc = true; break; case opt_dbg: debug_level = getnum( arg, 0, 3 ); break; + case opt_del: set_mode( program_mode, m_delete ); break; case opt_dso: solidity = dsolid; break; case opt_exc: Exclude::add_pattern( sarg ); break; case opt_grp: set_group( arg ); break; @@ -433,6 +436,8 @@ int main( const int argc, const char * const argv[] ) num_workers, out_slots, debug_level, program_mode == m_append, dereference ); case m_concatenate: return concatenate( archive_name, parser, filenames ); + case m_delete: return delete_members( archive_name, parser, filenames, + missing_crc, permissive ); case m_diff: case m_extract: case m_list: return decode( archive_name, parser, filenames, diff --git a/tarlz.h b/tarlz.h index 196ecb1..0eb0572 100644 --- a/tarlz.h +++ b/tarlz.h @@ -15,6 +15,7 @@ along with this program. If not, see . */ +#define max_file_size ( LLONG_MAX - header_size ) enum { header_size = 512 }; typedef uint8_t Tar_header[header_size]; @@ -104,7 +105,7 @@ class Extended // stores metadata from/for extended records { std::string linkpath_; // these are the real metadata std::string path_; - unsigned long long file_size_; + long long file_size_; // >= 0 && <= max_file_size // cached sizes; if full_size_ < 0 they must be recalculated mutable long long edsize_; // extended data size @@ -137,15 +138,15 @@ public: const std::string & linkpath() const { return linkpath_; } const std::string & path() const { return path_; } - unsigned long long file_size() const { return file_size_; } - unsigned long long get_file_size_and_reset( const Tar_header header ); + long long file_size() const { return file_size_; } + long long get_file_size_and_reset( const Tar_header header ); void linkpath( const char * const lp ) { linkpath_ = lp; full_size_ = -1; } void path( const char * const p ) { path_ = p; full_size_ = -1; } - void file_size( const unsigned long long fs ) - { file_size_ = fs; full_size_ = -1; } + void file_size( const long long fs ) { full_size_ = -1; + file_size_ = ( fs >= 0 && fs <= max_file_size ) ? fs : 0; } - unsigned long long full_size() const + long long full_size() const { if( full_size_ < 0 ) calculate_sizes(); return full_size_; } bool crc_present() const { return crc_present_; } @@ -303,8 +304,18 @@ const char * const bad_magic_msg = "Bad magic number (file not in lzip format)." const char * const bad_dict_msg = "Invalid dictionary size in member header."; const char * const corrupt_mm_msg = "Corrupt header in multimember file."; const char * const trailing_msg = "Trailing data not allowed."; +const char * const bad_hdr_msg = "Corrupt or invalid tar header."; +const char * const gblrec_msg = "Error in global extended records."; +const char * const extrec_msg = "Error in extended records."; +const char * const mcrc_msg = "Missing CRC in extended records."; +const char * const end_msg = "Archive ends unexpectedly."; const char * const mem_msg = "Not enough memory."; const char * const mem_msg2 = "Not enough memory. Try a lower compression level."; +const char * const fv_msg1 = "Format violation: extended header followed by EOF blocks."; +const char * const fv_msg2 = "Format violation: extended header followed by global header."; +const char * const fv_msg3 = "Format violation: consecutive extended headers found."; +const char * const posix_msg = "This does not look like a POSIX tar archive."; +const char * const posix_lz_msg = "This does not look like a POSIX tar.lz archive."; // defined in create.cc enum Solidity { no_solid, bsolid, dsolid, asolid, solid }; @@ -312,14 +323,14 @@ extern int cl_owner; extern int cl_group; extern int cl_data_size; extern Solidity solidity; +bool copy_file( const int infd, const int outfd, const long long max_size = -1 ); bool writeblock_wrapper( const int outfd, const uint8_t * const buffer, const int size ); bool write_eof_records( const int outfd, const bool compressed ); const char * remove_leading_dotslash( const char * const filename, const bool dotdot = false ); bool fill_headers( const char * const filename, Extended & extended, - Tar_header header, unsigned long long & file_size, - const int flag ); + Tar_header header, long long & file_size, const int flag ); bool block_is_full( const Extended & extended, const unsigned long long file_size, unsigned long long & partial_data_size ); @@ -329,7 +340,7 @@ unsigned ustar_chksum( const uint8_t * const header ); bool verify_ustar_chksum( const uint8_t * const header ); bool has_lz_ext( const std::string & name ); class Arg_parser; -int concatenate( std::string archive_name, const Arg_parser & parser, +int concatenate( const std::string & archive_name, const Arg_parser & parser, const int filenames ); int encode( const std::string & archive_name, const Arg_parser & parser, const int filenames, const int level, const int num_workers, @@ -337,10 +348,29 @@ int encode( const std::string & archive_name, const Arg_parser & parser, const bool dereference ); // defined in create_lz.cc -int encode_lz( const Arg_parser & parser, const int dictionary_size, - const int match_len_limit, const int num_workers, - const int outfd, const int out_slots, const int debug_level, - const bool dereference ); +int encode_lz( const char * const archive_namep, const Arg_parser & parser, + const int dictionary_size, const int match_len_limit, + const int num_workers, const int outfd, const int out_slots, + const int debug_level, const bool dereference ); + +// defined in delete.cc +class Lzip_index; +bool safe_seek( const int fd, const long long pos ); +int tail_copy( const char * const archive_namep, const Arg_parser & parser, + std::vector< char > & name_pending, + const Lzip_index & lzip_index, const long long istream_pos, + const int infd, const int outfd, int retval ); +int delete_members( const std::string & archive_name, const Arg_parser & parser, + const int filenames, const bool missing_crc, + const bool permissive ); + +// defined in delete_lz.cc +int delete_members_lz( const char * const archive_namep, + const Arg_parser & parser, + std::vector< char > & name_pending, + const Lzip_index & lzip_index, + const int filenames, const int infd, const int outfd, + const bool missing_crc, const bool permissive ); // defined in exclude.cc namespace Exclude { @@ -349,11 +379,13 @@ bool excluded( const char * const filename ); } // end namespace Exclude // defined in extract.cc -enum Program_mode { m_none, m_append, m_concatenate, m_create, m_diff, - m_extract, m_list }; +enum Program_mode { m_none, m_append, m_concatenate, m_create, m_delete, + m_diff, m_extract, m_list }; bool block_is_zero( const uint8_t * const buf, const int size ); bool format_member_name( const Extended & extended, const Tar_header header, Resizable_buffer & rbuf, const bool long_format ); +bool show_member_name( const Extended & extended, const Tar_header header, + const int vlevel, Resizable_buffer & rbuf ); bool compare_prefix_dir( const char * const dir, const char * const name ); bool compare_tslash( const char * const name1, const char * const name2 ); int readblock( const int fd, uint8_t * const buf, const int size ); @@ -378,11 +410,26 @@ void xbroadcast( pthread_cond_t * const cond ); bool check_skip_filename( const Arg_parser & parser, std::vector< char > & name_pending, const char * const filename, const int filenames ); -class Lzip_index; -int list_lz( const Arg_parser & parser, std::vector< char > & name_pending, - const Lzip_index & lzip_index, const int filenames, - const int debug_level, const int infd, const int num_workers, - const bool missing_crc, const bool permissive ); +struct LZ_Decoder; +int archive_read_lz( LZ_Decoder * const decoder, const int infd, + long long & file_pos, const long long member_end, + const long long cdata_size, uint8_t * const buf, + const int size, const char ** msg ); +int parse_records_lz( LZ_Decoder * const decoder, const int infd, + long long & file_pos, const long long member_end, + const long long cdata_size, long long & data_pos, + Extended & extended, const Tar_header header, + Resizable_buffer & rbuf, const char ** msg, + const bool permissive ); +int skip_member_lz( LZ_Decoder * const decoder, const int infd, + long long & file_pos, const long long member_end, + const long long cdata_size, long long & data_pos, + long long rest, const char ** msg ); +int list_lz( const char * const archive_namep, const Arg_parser & parser, + std::vector< char > & name_pending, const Lzip_index & lzip_index, + const int filenames, const int debug_level, const int infd, + const int num_workers, const bool missing_crc, + const bool permissive ); // defined in lzip_index.cc int seek_read( const int fd, uint8_t * const buf, const int size, diff --git a/testsuite/check.sh b/testsuite/check.sh index 29330cf..00a56cd 100755 --- a/testsuite/check.sh +++ b/testsuite/check.sh @@ -36,6 +36,7 @@ inbad1="${testdir}"/test_bad1.txt inbad2="${testdir}"/test_bad2.txt test3="${testdir}"/test3.tar test3_lz="${testdir}"/test3.tar.lz +test3dir="${testdir}"/test3_dir.tar test3dir_lz="${testdir}"/test3_dir.tar.lz test3dot_lz="${testdir}"/test3_dot.tar.lz tarint1_lz="${testdir}"/tar_in_tlz1.tar.lz @@ -67,13 +68,14 @@ lzlib_1_11() { [ ${lwarn} = 0 ] && # Description of test files for tarlz: # test.txt.tar.lz: 1 member (test.txt). # t155.tar[.lz]: directory + links + file + eof, all with 155 char names +# t155_fv?.tar[.lz]: like t155.tar but with 3 kinds of format violations # tar_in_tlz1.tar.lz 2 members (test.txt.tar test3.tar) 3 lzip members # tar_in_tlz2.tar.lz 2 members (test.txt.tar test3.tar) 5 lzip members # ts_in_link.tar.lz: 4 symbolic links (link[1-4]) to / /dir/ dir/ dir(107/) # test_bad1.tar.lz: truncated at offset 6000 (of 7495) # test_bad2.tar.lz: byte at offset 6000 changed from 0x56 to 0x46 -# test3.tar: 3 members (foo bar baz) + 2 zeroed 512-byte blocks -# test3_dir.tar.lz: like test3.tar.lz but members /dir/foo /dir/bar /dir/baz +# test3.tar[.lz]: 3 members (foo bar baz) + 2 zeroed 512-byte blocks +# test3_dir.tar[.lz] like test3.tar but members /dir/foo /dir/bar /dir/baz # test3_dot.tar.lz: 3 times 3 members ./foo ././bar ./././baz # the 3 central members with filename in extended header # test3_bad1.tar: byte at offset 259 changed from 't' to '0' (magic) @@ -88,11 +90,19 @@ lzlib_1_11() { [ ${lwarn} = 0 ] && # test3_bad4.tar.lz: combined damage of test3_bad2.tar.lz and test3_bad3.tar.lz # test3_bad5.tar.lz: [71-134] --> zeroed (first trailer + seconf header) # test3_bad6.tar.lz: 510 zeros prepended to test3.tar.lz (header in two blocks) +# test3_eof?.tar: like test3_eof?.tar.lz but uncompressed # test3_eof1.tar.lz: test3.tar.lz without eof blocks # test3_eof2.tar.lz: test3.tar.lz with only one eof block # test3_eof3.tar.lz: test3.tar.lz with one zeroed block between foo and bar +# test3_eof4.tar.lz: test3.tar.lz ended by extended header without eof blocks +# test3_eof5.tar.lz: test3.tar.lz split ext first member, without eof blocks # test3_em?.tar.lz: test3.tar.lz with one empty lzip member at each position # test3_em6.tar.lz: test3.tar.lz preceded by four empty lzip members +# test3_gh?.tar: test3.tar with global header at each position +# test3_gh?.tar.lz: test3.tar.lz with global before bar split in 4 ways +# test3_gh5.tar.lz: test3.tar.lz with global in lzip member before foo +# test3_gh6.tar.lz: test3.tar.lz with global before foo in same member +# test3_sm?.tar.lz: test3.tar.lz with extended bar member split in 4 ways # tlz_in_tar1.tar: 1 member (test3.tar.lz) first magic damaged # tlz_in_tar2.tar: 2 members (foo test3.tar.lz) first magic damaged # ug32chars.tar.lz: 1 member (foo) with 32-character owner and group names @@ -172,6 +182,15 @@ rm -f test.txt || framework_failure cmp "${in}" test.txt || test_failed $LINENO rm -f test.txt || framework_failure +# test3 reference files for diff +"${TARLZ}" -tf "${test3}" > list3 || test_failed $LINENO +"${TARLZ}" -tvf "${test3}" > vlist3 || test_failed $LINENO +"${TARLZ}" -tf "${test3_lz}" > out || test_failed $LINENO +diff -u list3 out || test_failed $LINENO +"${TARLZ}" -tvf "${test3_lz}" > out || test_failed $LINENO +diff -u vlist3 out || test_failed $LINENO +rm -f out || framework_failure + # test3 reference files for cmp cat "${testdir}"/rfoo > cfoo || framework_failure cat "${testdir}"/rbar > cbar || framework_failure @@ -182,7 +201,10 @@ cmp cfoo foo || test_failed $LINENO cmp cbar bar || test_failed $LINENO cmp cbaz baz || test_failed $LINENO rm -f foo bar baz || framework_failure -"${TARLZ}" -q -tf "${test3_lz}" ./foo ./bar ./baz || test_failed $LINENO +"${TARLZ}" -tvf "${test3_lz}" ./foo ./bar ./baz > out 2> /dev/null || + test_failed $LINENO +diff -u vlist3 out || test_failed $LINENO +rm -f out || framework_failure "${TARLZ}" -q -xf "${test3_lz}" ./foo ./bar ./baz || test_failed $LINENO cmp cfoo foo || test_failed $LINENO cmp cbar bar || test_failed $LINENO @@ -209,17 +231,27 @@ cmp cfoo foo || test_failed $LINENO cmp cbar bar || test_failed $LINENO cmp cbaz baz || test_failed $LINENO rm -f foo bar baz || framework_failure -"${TARLZ}" -q -xf "${test3dir_lz}" --missing-crc || test_failed $LINENO -cmp cfoo dir/foo || test_failed $LINENO -cmp cbar dir/bar || test_failed $LINENO -cmp cbaz dir/baz || test_failed $LINENO -rm -rf dir || framework_failure -"${TARLZ}" -q -tf "${test3dir_lz}" dir/foo dir/bar dir/baz || test_failed $LINENO -"${TARLZ}" -q -xf "${test3dir_lz}" dir/foo dir/bar dir/baz || test_failed $LINENO -cmp cfoo dir/foo || test_failed $LINENO -cmp cbar dir/bar || test_failed $LINENO -cmp cbaz dir/baz || test_failed $LINENO -rm -rf dir || framework_failure + +for i in "${test3dir}" "${test3dir_lz}" ; do + "${TARLZ}" -q -tf "$i" --missing-crc || test_failed $LINENO "$i" + "${TARLZ}" -q -xf "$i" --missing-crc || test_failed $LINENO "$i" + cmp cfoo dir/foo || test_failed $LINENO "$i" + cmp cbar dir/bar || test_failed $LINENO "$i" + cmp cbaz dir/baz || test_failed $LINENO "$i" + rm -rf dir || framework_failure + "${TARLZ}" -q -tf "$i" dir || test_failed $LINENO "$i" + "${TARLZ}" -q -xf "$i" dir || test_failed $LINENO "$i" + cmp cfoo dir/foo || test_failed $LINENO "$i" + cmp cbar dir/bar || test_failed $LINENO "$i" + cmp cbaz dir/baz || test_failed $LINENO "$i" + rm -rf dir || framework_failure + "${TARLZ}" -q -tf "$i" dir/foo dir/baz || test_failed $LINENO "$i" + "${TARLZ}" -q -xf "$i" dir/foo dir/baz || test_failed $LINENO "$i" + cmp cfoo dir/foo || test_failed $LINENO "$i" + [ ! -e dir/bar ] || test_failed $LINENO "$i" + cmp cbaz dir/baz || test_failed $LINENO "$i" + rm -rf dir || framework_failure +done # --exclude "${TARLZ}" -xf "${test3}" --exclude='f*o' --exclude=baz || test_failed $LINENO @@ -237,63 +269,115 @@ cmp cfoo dir/foo || test_failed $LINENO [ ! -e dir/bar ] || test_failed $LINENO cmp cbaz dir/baz || test_failed $LINENO rm -rf dir || framework_failure +"${TARLZ}" -q -xf "${test3dir_lz}" --exclude=dir/bar || test_failed $LINENO +cmp cfoo dir/foo || test_failed $LINENO +[ ! -e dir/bar ] || test_failed $LINENO +cmp cbaz dir/baz || test_failed $LINENO +rm -rf dir || framework_failure "${TARLZ}" -q -xf "${test3dir_lz}" --exclude=dir || test_failed $LINENO [ ! -e dir ] || test_failed $LINENO rm -rf dir || framework_failure +"${TARLZ}" -q -xf "${test3dir_lz}" --exclude='dir/*' || test_failed $LINENO +[ ! -e dir ] || test_failed $LINENO +rm -rf dir || framework_failure +"${TARLZ}" -q -xf "${test3dir_lz}" --exclude='[bf][ao][orz]' || + test_failed $LINENO +[ ! -e dir ] || test_failed $LINENO +rm -rf dir || framework_failure "${TARLZ}" -q -xf "${test3dir_lz}" --exclude='*o' dir/foo || test_failed $LINENO [ ! -e dir ] || test_failed $LINENO rm -rf dir || framework_failure # eof -"${TARLZ}" -q -tf "${testdir}"/test3_eof1.tar.lz +"${TARLZ}" -tvf "${testdir}"/test3_eof1.tar > out 2> /dev/null [ $? = 2 ] || test_failed $LINENO -"${TARLZ}" -q -tf "${testdir}"/test3_eof2.tar.lz || test_failed $LINENO -"${TARLZ}" -q -tf "${testdir}"/test3_eof3.tar.lz || test_failed $LINENO -"${TARLZ}" -q -n0 -tf "${testdir}"/test3_eof1.tar.lz +diff -u vlist3 out || test_failed $LINENO +"${TARLZ}" -tvf "${testdir}"/test3_eof2.tar > out || test_failed $LINENO +diff -u vlist3 out || test_failed $LINENO +"${TARLZ}" -q -tf "${testdir}"/test3_eof3.tar || test_failed $LINENO +"${TARLZ}" -tvf "${testdir}"/test3_eof4.tar > out 2> /dev/null [ $? = 2 ] || test_failed $LINENO -"${TARLZ}" -q -n0 -tf "${testdir}"/test3_eof2.tar.lz || test_failed $LINENO -"${TARLZ}" -q -n0 -tf "${testdir}"/test3_eof3.tar.lz || test_failed $LINENO +diff -u vlist3 out || test_failed $LINENO +for i in 0 2 6 ; do + "${TARLZ}" -n$i -tvf "${testdir}"/test3_eof1.tar.lz > out 2> /dev/null + [ $? = 2 ] || test_failed $LINENO $i + diff -u vlist3 out || test_failed $LINENO + "${TARLZ}" -n$i -tvf "${testdir}"/test3_eof2.tar.lz > out || + test_failed $LINENO $i + diff -u vlist3 out || test_failed $LINENO + "${TARLZ}" -q -n$i -tf "${testdir}"/test3_eof3.tar.lz || + test_failed $LINENO $i + "${TARLZ}" -n$i -tvf "${testdir}"/test3_eof4.tar.lz > out 2> /dev/null + [ $? = 2 ] || test_failed $LINENO $i + diff -u vlist3 out || test_failed $LINENO + "${TARLZ}" -n$i -tvf "${testdir}"/test3_eof5.tar.lz > out 2> /dev/null + [ $? = 2 ] || test_failed $LINENO $i + diff -u vlist3 out || test_failed $LINENO +done +rm -f out || framework_failure # -"${TARLZ}" -q -xf "${testdir}"/test3_eof1.tar.lz +"${TARLZ}" -q -xf "${testdir}"/test3_eof1.tar [ $? = 2 ] || test_failed $LINENO cmp cfoo foo || test_failed $LINENO cmp cbar bar || test_failed $LINENO cmp cbaz baz || test_failed $LINENO rm -f foo bar baz || framework_failure -"${TARLZ}" -xf "${testdir}"/test3_eof2.tar.lz || test_failed $LINENO +"${TARLZ}" -xf "${testdir}"/test3_eof2.tar || test_failed $LINENO cmp cfoo foo || test_failed $LINENO cmp cbar bar || test_failed $LINENO cmp cbaz baz || test_failed $LINENO rm -f foo bar baz || framework_failure -"${TARLZ}" -xf "${testdir}"/test3_eof3.tar.lz || test_failed $LINENO +"${TARLZ}" -xf "${testdir}"/test3_eof3.tar || test_failed $LINENO cmp cfoo foo || test_failed $LINENO [ ! -e bar ] || test_failed $LINENO [ ! -e baz ] || test_failed $LINENO rm -f foo bar baz || framework_failure -# -"${TARLZ}" -q -n0 -xf "${testdir}"/test3_eof1.tar.lz +"${TARLZ}" -q -xf "${testdir}"/test3_eof4.tar [ $? = 2 ] || test_failed $LINENO cmp cfoo foo || test_failed $LINENO cmp cbar bar || test_failed $LINENO cmp cbaz baz || test_failed $LINENO rm -f foo bar baz || framework_failure -"${TARLZ}" -n0 -xf "${testdir}"/test3_eof2.tar.lz || test_failed $LINENO -cmp cfoo foo || test_failed $LINENO -cmp cbar bar || test_failed $LINENO -cmp cbaz baz || test_failed $LINENO -rm -f foo bar baz || framework_failure -"${TARLZ}" -n0 -xf "${testdir}"/test3_eof3.tar.lz || test_failed $LINENO -cmp cfoo foo || test_failed $LINENO -[ ! -e bar ] || test_failed $LINENO -[ ! -e baz ] || test_failed $LINENO -rm -f foo bar baz || framework_failure +# +for i in 0 2 6 ; do + "${TARLZ}" -q -n$i -xf "${testdir}"/test3_eof1.tar.lz + [ $? = 2 ] || test_failed $LINENO $i + cmp cfoo foo || test_failed $LINENO $i + cmp cbar bar || test_failed $LINENO $i + cmp cbaz baz || test_failed $LINENO $i + rm -f foo bar baz || framework_failure + "${TARLZ}" -n$i -xf "${testdir}"/test3_eof2.tar.lz || + test_failed $LINENO $i + cmp cfoo foo || test_failed $LINENO $i + cmp cbar bar || test_failed $LINENO $i + cmp cbaz baz || test_failed $LINENO $i + rm -f foo bar baz || framework_failure + "${TARLZ}" -n$i -xf "${testdir}"/test3_eof3.tar.lz || + test_failed $LINENO $i + cmp cfoo foo || test_failed $LINENO $i + [ ! -e bar ] || test_failed $LINENO $i + [ ! -e baz ] || test_failed $LINENO $i + rm -f foo bar baz || framework_failure + "${TARLZ}" -q -n$i -xf "${testdir}"/test3_eof4.tar.lz + [ $? = 2 ] || test_failed $LINENO $i + cmp cfoo foo || test_failed $LINENO $i + cmp cbar bar || test_failed $LINENO $i + cmp cbaz baz || test_failed $LINENO $i + rm -f foo bar baz || framework_failure + "${TARLZ}" -q -n$i -xf "${testdir}"/test3_eof5.tar.lz + [ $? = 2 ] || test_failed $LINENO $i + cmp cfoo foo || test_failed $LINENO $i + cmp cbar bar || test_failed $LINENO $i + cmp cbaz baz || test_failed $LINENO $i + rm -f foo bar baz || framework_failure +done # test --list and --extract tar in tar.lz for i in "${tarint1_lz}" "${tarint2_lz}" ; do for j in 0 2 6 ; do - "${TARLZ}" -tf "$i" --threads=$j > out$j || + "${TARLZ}" -tf "$i" -n$j > out$j || test_failed $LINENO "$i $j" - "${TARLZ}" -tvf "$i" --threads=$j > outv$j || + "${TARLZ}" -tvf "$i" -n$j > outv$j || test_failed $LINENO "$i $j" done diff -u out0 out2 || test_failed $LINENO $i @@ -309,23 +393,35 @@ for i in "${tarint1_lz}" "${tarint2_lz}" ; do rm -f test.txt.tar test3.tar || framework_failure done -# test --list and --extract with empty lzip members -for i in 1 2 3 4 5 6 ; do +# test --list and --extract with global headers uncompressed +for i in gh1 gh2 gh3 gh4 ; do + "${TARLZ}" -tf "${testdir}"/test3_${i}.tar > out || + test_failed $LINENO $i + diff -u list3 out || test_failed $LINENO $i + "${TARLZ}" -tvf "${testdir}"/test3_${i}.tar > out || + test_failed $LINENO $i + diff -u vlist3 out || test_failed $LINENO $i + "${TARLZ}" -xf "${testdir}"/test3_${i}.tar || test_failed $LINENO $i + cmp cfoo foo || test_failed $LINENO $i + cmp cbar bar || test_failed $LINENO $i + cmp cbaz baz || test_failed $LINENO $i + rm -f foo bar baz out || framework_failure +done + +# test --list and --extract with empty lzip members, global headers and +# extended tar members split among lzip members +for i in em1 em2 em3 em4 em5 em6 gh1 gh2 gh3 gh4 gh5 gh6 sm1 sm2 sm3 sm4 ; do for j in 0 2 6 ; do - "${TARLZ}" -tf "${testdir}"/test3_em${i}.tar.lz --threads=$j \ - > out$j || test_failed $LINENO "$i $j" - "${TARLZ}" -tvf "${testdir}"/test3_em${i}.tar.lz --threads=$j \ - > outv$j || test_failed $LINENO "$i $j" + "${TARLZ}" -tf "${testdir}"/test3_${i}.tar.lz -n$j > out || + test_failed $LINENO "$i $j" + diff -u list3 out || test_failed $LINENO "$i $j" + "${TARLZ}" -tvf "${testdir}"/test3_${i}.tar.lz -n$j > out || + test_failed $LINENO "$i $j" + diff -u vlist3 out || test_failed $LINENO "$i $j" done - diff -u out0 out2 || test_failed $LINENO $i - diff -u out0 out6 || test_failed $LINENO $i - diff -u out2 out6 || test_failed $LINENO $i - diff -u outv0 outv2 || test_failed $LINENO $i - diff -u outv0 outv6 || test_failed $LINENO $i - diff -u outv2 outv6 || test_failed $LINENO $i - rm -f out0 out2 out6 outv0 outv2 outv6 || framework_failure + rm -f out || framework_failure for j in 0 2 6 ; do - "${TARLZ}" -xf "${testdir}"/test3_em${i}.tar.lz --threads=$j || + "${TARLZ}" -xf "${testdir}"/test3_${i}.tar.lz -n$j || test_failed $LINENO "$i $j" cmp cfoo foo || test_failed $LINENO "$i $j" cmp cbar bar || test_failed $LINENO "$i $j" @@ -334,7 +430,7 @@ for i in 1 2 3 4 5 6 ; do done done -# test --concatenate +# test --concatenate compressed cat "${in}" > out.tar.lz || framework_failure # invalid tar.lz "${TARLZ}" -Aqf out.tar.lz "${test3_lz}" [ $? = 2 ] || test_failed $LINENO @@ -378,7 +474,7 @@ touch aout.tar.lz || framework_failure # --exclude cmp out.tar.lz aout.tar.lz || test_failed $LINENO rm -f out.tar.lz aout.tar.lz || framework_failure -# --uncompressed +# test --concatenate uncompressed cat "${in}" > out.tar || framework_failure # invalid tar "${TARLZ}" -Aqf out.tar "${test3}" [ $? = 2 ] || test_failed $LINENO @@ -535,6 +631,98 @@ cmp cfoo foo || test_failed $LINENO [ ! -e baz ] || test_failed $LINENO rm -f out.tar foo bar baz || framework_failure +# test --delete +for e in "" .lz ; do + "${TARLZ}" -A "${in_tar}"$e "${test3}"$e > out.tar$e || test_failed $LINENO $e + "${TARLZ}" -f out.tar$e --delete test.txt || test_failed $LINENO $e + cmp "${test3}"$e out.tar$e || test_failed $LINENO $e + "${TARLZ}" -f out.tar$e --delete || test_failed $LINENO $e # delete nothing + cmp "${test3}"$e out.tar$e || test_failed $LINENO $e + "${TARLZ}" -qf out.tar$e --delete nx_file + [ $? = 1 ] || test_failed $LINENO $e + cmp "${test3}"$e out.tar$e || test_failed $LINENO $e + "${TARLZ}" -A "${in_tar}"$e "${test3dir}"$e > out.tar$e || test_failed $LINENO $e + "${TARLZ}" -qf out.tar$e --delete test.txt || test_failed $LINENO $e + cmp "${test3dir}"$e out.tar$e || test_failed $LINENO $e + "${TARLZ}" -A "${in_tar}"$e "${test3dir}"$e > out.tar$e || test_failed $LINENO $e + "${TARLZ}" -qf out.tar$e --delete dir || test_failed $LINENO $e + cmp "${in_tar}"$e out.tar$e || test_failed $LINENO $e + "${TARLZ}" -A "${in_tar}"$e "${test3dir}"$e > out.tar$e || test_failed $LINENO $e + "${TARLZ}" -qf out.tar$e --del dir/foo dir/bar dir/baz || test_failed $LINENO $e + cmp "${in_tar}"$e out.tar$e || test_failed $LINENO $e + "${TARLZ}" -A "${in_tar}"$e "${test3dir}"$e > out.tar$e || test_failed $LINENO $e + "${TARLZ}" -qf out.tar$e --del dir/foo dir/baz || test_failed $LINENO $e + cmp "${in_tar}"$e out.tar$e > /dev/null && test_failed $LINENO $e + "${TARLZ}" -qf out.tar$e --del dir/bar || test_failed $LINENO $e + cmp "${in_tar}"$e out.tar$e || test_failed $LINENO $e + "${TARLZ}" -A "${in_tar}"$e "${test3}"$e > out.tar$e || test_failed $LINENO $e + "${TARLZ}" -f out.tar$e --delete foo bar baz || test_failed $LINENO $e + cmp "${in_tar}"$e out.tar$e || test_failed $LINENO $e + "${TARLZ}" -A "${in_tar}"$e "${test3}"$e > out.tar$e || test_failed $LINENO $e + "${TARLZ}" -f out.tar$e --del test.txt foo bar baz || test_failed $LINENO $e + cmp "${eof}"$e out.tar$e || test_failed $LINENO $e + "${TARLZ}" -A "${in_tar}"$e "${test3}"$e > out.tar$e || test_failed $LINENO $e + for i in test.txt foo bar baz ; do + "${TARLZ}" -f out.tar$e --delete $i || test_failed $LINENO "$e $i" + done + cmp "${eof}"$e out.tar$e || test_failed $LINENO $e + "${TARLZ}" -A "${in_tar}"$e "${test3}"$e > out.tar$e || test_failed $LINENO $e + for i in baz bar foo test.txt ; do + "${TARLZ}" -f out.tar$e --delete $i || test_failed $LINENO "$e $i" + done + cmp "${eof}"$e out.tar$e || test_failed $LINENO $e + "${TARLZ}" -A "${in_tar}"$e "${test3}"$e > out.tar$e || test_failed $LINENO $e + for i in foo bar test.txt baz ; do + "${TARLZ}" -f out.tar$e --delete $i || test_failed $LINENO "$e $i" + done + cmp "${eof}"$e out.tar$e || test_failed $LINENO $e + "${TARLZ}" -A "${in_tar}"$e "${t155}"$e "${test3}"$e > out.tar$e || + test_failed $LINENO $e + "${TARLZ}" -f out.tar$e --del baz foo test.txt bar || test_failed $LINENO $e + cmp "${t155}"$e out.tar$e || test_failed $LINENO $e + "${TARLZ}" -f out.tar$e --delete link || test_failed $LINENO $e + "${TARLZ}" -q -tf out.tar$e || test_failed $LINENO + cmp "${t155}"$e out.tar$e > /dev/null && test_failed $LINENO $e + rm -f out.tar$e || framework_failure +done + +# test --delete individual member after collective member +cat cfoo > foo || framework_failure +cat cbar > bar || framework_failure +cat cbaz > baz || framework_failure +cat "${in}" > test.txt || framework_failure +"${TARLZ}" -0 -cf out.tar.lz foo bar baz --asolid || test_failed $LINENO +"${TARLZ}" -0 -rf out.tar.lz test.txt || test_failed $LINENO +rm -f foo bar baz test.txt || framework_failure +"${TARLZ}" -f out.tar.lz --delete test.txt || test_failed $LINENO +"${TARLZ}" -xf out.tar.lz || test_failed $LINENO +cmp cfoo foo || test_failed $LINENO +cmp cbar bar || test_failed $LINENO +cmp cbaz baz || test_failed $LINENO +[ ! -e test.txt ] || test_failed $LINENO +rm -f out.tar.lz foo bar baz test.txt || framework_failure + +# test --delete with empty lzip member, global header +for i in 1 2 3 4 5 6 ; do + cat "${testdir}"/test3_em${i}.tar.lz > out.tar.lz || framework_failure + for j in foo bar baz ; do + "${TARLZ}" -f out.tar.lz --delete $j || test_failed $LINENO "$i $j" + done + rm -f out.tar.lz || framework_failure +done +cat "${testdir}"/test3_gh5.tar.lz > out.tar.lz || framework_failure +for i in foo bar baz ; do + "${TARLZ}" -f out.tar.lz --delete $i || test_failed $LINENO $i +done +rm -f out.tar.lz || framework_failure +for i in 1 2 3 4 ; do + cat "${testdir}"/test3_gh${i}.tar > out.tar || framework_failure + for j in foo bar baz ; do + "${TARLZ}" -f out.tar --delete $j || test_failed $LINENO "$i $j" + done + rm -f out.tar || framework_failure +done + # test --dereference touch dummy_file || framework_failure if ln dummy_file dummy_link 2> /dev/null && @@ -612,7 +800,8 @@ cmp out.tar.lz aout.tar.lz || test_failed $LINENO [ $? = 2 ] || test_failed $LINENO cmp out.tar.lz aout.tar.lz || test_failed $LINENO rm -f out.tar.lz aout.tar.lz || framework_failure -# + +# --uncompressed "${TARLZ}" --un -cf out.tar foo bar baz || test_failed $LINENO "${TARLZ}" --un -cf aout.tar foo || test_failed $LINENO "${TARLZ}" --un -rf aout.tar foo bar baz --exclude foo || test_failed $LINENO @@ -686,6 +875,15 @@ else "${TARLZ}" -df "${test3_lz}" --ignore-ids || test_failed $LINENO "${TARLZ}" -df "${test3_lz}" --exclude '*' || test_failed $LINENO "${TARLZ}" -df "${in_tar_lz}" --exclude '*' || test_failed $LINENO + rm -f bar || framework_failure + "${TARLZ}" -df "${test3_lz}" foo baz --ignore-ids || test_failed $LINENO + "${TARLZ}" -df "${test3_lz}" --exclude bar --ignore-ids || + test_failed $LINENO + rm -f foo baz || framework_failure + "${TARLZ}" -q -xf "${test3dir_lz}" || test_failed $LINENO + "${TARLZ}" -q -df "${test3dir_lz}" --ignore-ids || test_failed $LINENO + "${TARLZ}" -q -df "${test3dir_lz}" dir --ignore-ids || test_failed $LINENO + rm -rf dir || framework_failure fi rm -f out.tar aout.tar foo bar baz || framework_failure @@ -786,6 +984,8 @@ rm -f foo || framework_failure printf "\ntesting bad input..." +mkdir dir1 || framework_failure +cd dir1 || framework_failure "${TARLZ}" -q -xf "${testdir}"/dotdot1.tar.lz || test_failed $LINENO [ ! -e ../dir ] || test_failed $LINENO "${TARLZ}" -q -xf "${testdir}"/dotdot2.tar.lz || test_failed $LINENO @@ -796,6 +996,8 @@ printf "\ntesting bad input..." [ ! -e dir ] || test_failed $LINENO "${TARLZ}" -q -xf "${testdir}"/dotdot5.tar.lz || test_failed $LINENO [ ! -e dir ] || test_failed $LINENO +cd .. || framework_failure +rm -rf dir1 || framework_failure dd if="${in_tar}" of=truncated.tar bs=1000 count=1 2> /dev/null "${TARLZ}" -q -tf truncated.tar > /dev/null @@ -805,6 +1007,58 @@ dd if="${in_tar}" of=truncated.tar bs=1000 count=1 2> /dev/null [ ! -e test.txt ] || test_failed $LINENO rm -f truncated.tar || framework_failure +# test --delete with split 'bar' tar member +for i in 1 2 3 4 ; do + cat "${testdir}"/test3_sm${i}.tar.lz > out.tar.lz || framework_failure + for j in bar baz ; do + "${TARLZ}" -q -f out.tar.lz --delete $j + [ $? = 2 ] || test_failed $LINENO "$i $j" + done + cmp "${testdir}"/test3_sm${i}.tar.lz out.tar.lz || test_failed $LINENO $i + "${TARLZ}" -q -f out.tar.lz --delete foo + [ $? = 2 ] || test_failed $LINENO $i + "${TARLZ}" -xf out.tar.lz || test_failed $LINENO + [ ! -e foo ] || test_failed $LINENO + cmp cbar bar || test_failed $LINENO + cmp cbaz baz || test_failed $LINENO + rm -f out.tar.lz foo bar baz || framework_failure +done + +# test format violations +if [ "${ln_works}" = yes ] ; then + mkdir dir1 || framework_failure + "${TARLZ}" -C dir1 -xf "${t155}" || test_failed $LINENO +fi +for i in 1 2 3 ; do + "${TARLZ}" -q -tf "${testdir}"/t155_fv${i}.tar + [ $? = 2 ] || test_failed $LINENO $i + "${TARLZ}" -q -tf "${testdir}"/t155_fv${i}.tar --permissive || + test_failed $LINENO $i + if [ "${ln_works}" = yes ] ; then + mkdir dir2 || framework_failure + "${TARLZ}" -C dir2 -xf "${testdir}"/t155_fv${i}.tar --permissive || + test_failed $LINENO $i + diff -ru dir1 dir2 || test_failed $LINENO $i + rm -rf dir2 || framework_failure + fi +done +for i in 1 2 3 4 5 6 ; do + "${TARLZ}" -q -tf "${testdir}"/t155_fv${i}.tar.lz + [ $? = 2 ] || test_failed $LINENO $i + "${TARLZ}" -q -tf "${testdir}"/t155_fv${i}.tar.lz --permissive || + test_failed $LINENO $i + if [ "${ln_works}" = yes ] ; then + mkdir dir2 || framework_failure + "${TARLZ}" -C dir2 -xf "${testdir}"/t155_fv${i}.tar.lz --permissive || + test_failed $LINENO $i + diff -ru dir1 dir2 || test_failed $LINENO $i + rm -rf dir2 || framework_failure + fi +done +if [ "${ln_works}" = yes ] ; then + rm -rf dir1 || framework_failure +fi + # test compressed and --keep-damaged rm -f test.txt || framework_failure for i in "${inbad1}" "${inbad2}" ; do diff --git a/testsuite/t155_fv1.tar b/testsuite/t155_fv1.tar new file mode 100644 index 0000000..1ef64c3 Binary files /dev/null and b/testsuite/t155_fv1.tar differ diff --git a/testsuite/t155_fv1.tar.lz b/testsuite/t155_fv1.tar.lz new file mode 100644 index 0000000..896925e Binary files /dev/null and b/testsuite/t155_fv1.tar.lz differ diff --git a/testsuite/t155_fv2.tar b/testsuite/t155_fv2.tar new file mode 100644 index 0000000..f732b30 Binary files /dev/null and b/testsuite/t155_fv2.tar differ diff --git a/testsuite/t155_fv2.tar.lz b/testsuite/t155_fv2.tar.lz new file mode 100644 index 0000000..b380105 Binary files /dev/null and b/testsuite/t155_fv2.tar.lz differ diff --git a/testsuite/t155_fv3.tar b/testsuite/t155_fv3.tar new file mode 100644 index 0000000..fe5db13 Binary files /dev/null and b/testsuite/t155_fv3.tar differ diff --git a/testsuite/t155_fv3.tar.lz b/testsuite/t155_fv3.tar.lz new file mode 100644 index 0000000..aa24c0a Binary files /dev/null and b/testsuite/t155_fv3.tar.lz differ diff --git a/testsuite/t155_fv4.tar.lz b/testsuite/t155_fv4.tar.lz new file mode 100644 index 0000000..e3ae9c3 Binary files /dev/null and b/testsuite/t155_fv4.tar.lz differ diff --git a/testsuite/t155_fv5.tar.lz b/testsuite/t155_fv5.tar.lz new file mode 100644 index 0000000..966015a Binary files /dev/null and b/testsuite/t155_fv5.tar.lz differ diff --git a/testsuite/t155_fv6.tar.lz b/testsuite/t155_fv6.tar.lz new file mode 100644 index 0000000..bc83237 Binary files /dev/null and b/testsuite/t155_fv6.tar.lz differ diff --git a/testsuite/test3_dir.tar b/testsuite/test3_dir.tar new file mode 100644 index 0000000..e0c2b29 Binary files /dev/null and b/testsuite/test3_dir.tar differ diff --git a/testsuite/test3_eof1.tar b/testsuite/test3_eof1.tar new file mode 100644 index 0000000..175b807 Binary files /dev/null and b/testsuite/test3_eof1.tar differ diff --git a/testsuite/test3_eof2.tar b/testsuite/test3_eof2.tar new file mode 100644 index 0000000..458be1e Binary files /dev/null and b/testsuite/test3_eof2.tar differ diff --git a/testsuite/test3_eof3.tar b/testsuite/test3_eof3.tar new file mode 100644 index 0000000..3003a93 Binary files /dev/null and b/testsuite/test3_eof3.tar differ diff --git a/testsuite/test3_eof4.tar b/testsuite/test3_eof4.tar new file mode 100644 index 0000000..4012fea Binary files /dev/null and b/testsuite/test3_eof4.tar differ diff --git a/testsuite/test3_eof4.tar.lz b/testsuite/test3_eof4.tar.lz new file mode 100644 index 0000000..1593feb Binary files /dev/null and b/testsuite/test3_eof4.tar.lz differ diff --git a/testsuite/test3_eof5.tar.lz b/testsuite/test3_eof5.tar.lz new file mode 100644 index 0000000..156bd3a Binary files /dev/null and b/testsuite/test3_eof5.tar.lz differ diff --git a/testsuite/test3_gh1.tar b/testsuite/test3_gh1.tar new file mode 100644 index 0000000..f969561 Binary files /dev/null and b/testsuite/test3_gh1.tar differ diff --git a/testsuite/test3_gh1.tar.lz b/testsuite/test3_gh1.tar.lz new file mode 100644 index 0000000..d38f46b Binary files /dev/null and b/testsuite/test3_gh1.tar.lz differ diff --git a/testsuite/test3_gh2.tar b/testsuite/test3_gh2.tar new file mode 100644 index 0000000..f5f0c31 Binary files /dev/null and b/testsuite/test3_gh2.tar differ diff --git a/testsuite/test3_gh2.tar.lz b/testsuite/test3_gh2.tar.lz new file mode 100644 index 0000000..48f18dd Binary files /dev/null and b/testsuite/test3_gh2.tar.lz differ diff --git a/testsuite/test3_gh3.tar b/testsuite/test3_gh3.tar new file mode 100644 index 0000000..e0d3a9d Binary files /dev/null and b/testsuite/test3_gh3.tar differ diff --git a/testsuite/test3_gh3.tar.lz b/testsuite/test3_gh3.tar.lz new file mode 100644 index 0000000..89a31a6 Binary files /dev/null and b/testsuite/test3_gh3.tar.lz differ diff --git a/testsuite/test3_gh4.tar b/testsuite/test3_gh4.tar new file mode 100644 index 0000000..0655c31 Binary files /dev/null and b/testsuite/test3_gh4.tar differ diff --git a/testsuite/test3_gh4.tar.lz b/testsuite/test3_gh4.tar.lz new file mode 100644 index 0000000..5b9f605 Binary files /dev/null and b/testsuite/test3_gh4.tar.lz differ diff --git a/testsuite/test3_gh5.tar.lz b/testsuite/test3_gh5.tar.lz new file mode 100644 index 0000000..b8f4abe Binary files /dev/null and b/testsuite/test3_gh5.tar.lz differ diff --git a/testsuite/test3_gh6.tar.lz b/testsuite/test3_gh6.tar.lz new file mode 100644 index 0000000..7be9aca Binary files /dev/null and b/testsuite/test3_gh6.tar.lz differ diff --git a/testsuite/test3_sm1.tar.lz b/testsuite/test3_sm1.tar.lz new file mode 100644 index 0000000..6eb3947 Binary files /dev/null and b/testsuite/test3_sm1.tar.lz differ diff --git a/testsuite/test3_sm2.tar.lz b/testsuite/test3_sm2.tar.lz new file mode 100644 index 0000000..f312fcb Binary files /dev/null and b/testsuite/test3_sm2.tar.lz differ diff --git a/testsuite/test3_sm3.tar.lz b/testsuite/test3_sm3.tar.lz new file mode 100644 index 0000000..82ceb18 Binary files /dev/null and b/testsuite/test3_sm3.tar.lz differ diff --git a/testsuite/test3_sm4.tar.lz b/testsuite/test3_sm4.tar.lz new file mode 100644 index 0000000..601a640 Binary files /dev/null and b/testsuite/test3_sm4.tar.lz differ -- cgit v1.2.3